forked from datitran/face2face-demo
-
Notifications
You must be signed in to change notification settings - Fork 1
/
run_video.py
150 lines (124 loc) · 5.91 KB
/
run_video.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
import argparse
import cv2
import dlib
import numpy as np
import tensorflow as tf
from imutils import video
CROP_SIZE = 256
DOWNSAMPLE_RATIO = 4
def reshape_for_polyline(array):
"""Reshape image so that it works with polyline."""
return np.array(array, np.int32).reshape((-1, 1, 2))
def resize(image):
"""Crop and resize image for pix2pix."""
height, width, _ = image.shape
if height != width:
# crop to correct ratio
size = min(height, width)
oh = (height - size) // 2
ow = (width - size) // 2
cropped_image = image[oh:(oh + size), ow:(ow + size)]
image_resize = cv2.resize(cropped_image, (CROP_SIZE, CROP_SIZE))
return image_resize
def load_graph(frozen_graph_filename):
"""Load a (frozen) Tensorflow model into memory."""
graph = tf.Graph()
with graph.as_default():
od_graph_def = tf.GraphDef()
with tf.gfile.GFile(frozen_graph_filename, 'rb') as fid:
serialized_graph = fid.read()
od_graph_def.ParseFromString(serialized_graph)
tf.import_graph_def(od_graph_def, name='')
return graph
def main():
# TensorFlow
graph = load_graph(args.frozen_model_file)
image_tensor = graph.get_tensor_by_name('image_tensor:0')
output_tensor = graph.get_tensor_by_name('generate_output/output:0')
sess = tf.Session(graph=graph)
# OpenCV
cap = cv2.VideoCapture(args.video_source)
print(cap)
ret,frame = cap.read()
if frame is None:
cap = cv2.VideoCapture(int(args.video_source))
fps = video.FPS().start()
counter =0
while True:
# for i in range(10):
ret, frame = cap.read()
# frame=frame[150:-200,:,:]
# resize image and detect face
frame_resize = cv2.resize(frame, None, fx=1 / DOWNSAMPLE_RATIO, fy=1 / DOWNSAMPLE_RATIO)
gray = cv2.cvtColor(frame_resize, cv2.COLOR_BGR2GRAY)
faces = detector(gray, 1)
black_image = np.zeros(frame.shape, np.uint8)
for face in faces:
detected_landmarks = predictor(gray, face).parts()
landmarks = [[p.x * DOWNSAMPLE_RATIO, p.y * DOWNSAMPLE_RATIO] for p in detected_landmarks]
jaw = reshape_for_polyline(landmarks[0:17])
left_eyebrow = reshape_for_polyline(landmarks[22:27])
right_eyebrow = reshape_for_polyline(landmarks[17:22])
nose_bridge = reshape_for_polyline(landmarks[27:31])
lower_nose = reshape_for_polyline(landmarks[30:35])
left_eye = reshape_for_polyline(landmarks[42:48])
right_eye = reshape_for_polyline(landmarks[36:42])
outer_lip = reshape_for_polyline(landmarks[48:60])
inner_lip = reshape_for_polyline(landmarks[60:68])
color = (255, 255, 255)
thickness = 3
cv2.polylines(black_image, [jaw], False, color, thickness)
cv2.polylines(black_image, [left_eyebrow], False, color, thickness)
cv2.polylines(black_image, [right_eyebrow], False, color, thickness)
cv2.polylines(black_image, [nose_bridge], False, color, thickness)
cv2.polylines(black_image, [lower_nose], True, color, thickness)
cv2.polylines(black_image, [left_eye], True, color, thickness)
cv2.polylines(black_image, [right_eye], True, color, thickness)
cv2.polylines(black_image, [outer_lip], True, color, thickness)
cv2.polylines(black_image, [inner_lip], True, color, thickness)
# generate prediction
combined_image = np.concatenate([resize(black_image), resize(frame_resize)], axis=1)
image_rgb = cv2.cvtColor(combined_image, cv2.COLOR_BGR2RGB) # OpenCV uses BGR instead of RGB
generated_image = sess.run(output_tensor, feed_dict={image_tensor: image_rgb})
image_bgr = cv2.cvtColor(np.squeeze(generated_image), cv2.COLOR_RGB2BGR)
image_normal = np.concatenate([resize(frame_resize), image_bgr], axis=1)
image_landmark = np.concatenate([resize(black_image), image_bgr], axis=1)
image_all = np.concatenate([resize(frame_resize), resize(black_image), image_bgr], axis=1)
#por mientras para mostrar la imagen
if args.display_landmark == 0:
cv2.imshow('frame', image_normal)
else:
cv2.imshow('frame', image_all)
"""
cv2.imwrite('/tmp/image%09d.jpg'%counter,image_all)
cv2.imwrite('/tmp/face/gen/image%09d.jpg'%counter,image_bgr)
cv2.imwrite('/tmp/face/face/image%09d.jpg'%counter,resize(black_image))
cv2.imwrite('/tmp/face/input/image%09d.jpg'%counter,resize(frame_resize))
if len(faces)>0:
cv2.imwrite('/tmp/face/mix/image%09d.jpg'%counter,image_bgr)
else:
cv2.imwrite('/tmp/face/mix/image%09d.jpg'%counter,resize(frame_resize))
"""
counter = counter+1
fps.update()
if cv2.waitKey(1) & 0xFF == ord('q'):
break
fps.stop()
print('[INFO] elapsed time (total): {:.2f}'.format(fps.elapsed()))
print('[INFO] approx. FPS: {:.2f}'.format(fps.fps()))
sess.close()
cap.release()
cv2.destroyAllWindows()
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('-src', '--source', dest='video_source', type=str,
default=0, help='Device index of the camera.')
parser.add_argument('--show', dest='display_landmark', type=int, default=0, choices=[0, 1],
help='0 shows the normal input and 1 the facial landmark.')
parser.add_argument('--landmark-model', dest='face_landmark_shape_file', type=str, help='Face landmark model file.')
parser.add_argument('--tf-model', dest='frozen_model_file', type=str, help='Frozen TensorFlow model file.')
args = parser.parse_args()
# Create the face predictor and landmark predictor
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor(args.face_landmark_shape_file)
main()