-
Notifications
You must be signed in to change notification settings - Fork 34
/
bodypose3d.py
162 lines (129 loc) · 6.11 KB
/
bodypose3d.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
import cv2 as cv
import mediapipe as mp
import numpy as np
import sys
from utils import DLT, get_projection_matrix, write_keypoints_to_disk
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_pose = mp.solutions.pose
frame_shape = [720, 1280]
#add here if you need more keypoints
pose_keypoints = [16, 14, 12, 11, 13, 15, 24, 23, 25, 26, 27, 28]
def run_mp(input_stream1, input_stream2, P0, P1):
#input video stream
cap0 = cv.VideoCapture(input_stream1)
cap1 = cv.VideoCapture(input_stream2)
caps = [cap0, cap1]
#set camera resolution if using webcam to 1280x720. Any bigger will cause some lag for hand detection
for cap in caps:
cap.set(3, frame_shape[1])
cap.set(4, frame_shape[0])
#create body keypoints detector objects.
pose0 = mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5)
pose1 = mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5)
#containers for detected keypoints for each camera. These are filled at each frame.
#This will run you into memory issue if you run the program without stop
kpts_cam0 = []
kpts_cam1 = []
kpts_3d = []
while True:
#read frames from stream
ret0, frame0 = cap0.read()
ret1, frame1 = cap1.read()
if not ret0 or not ret1: break
#crop to 720x720.
#Note: camera calibration parameters are set to this resolution.If you change this, make sure to also change camera intrinsic parameters
if frame0.shape[1] != 720:
frame0 = frame0[:,frame_shape[1]//2 - frame_shape[0]//2:frame_shape[1]//2 + frame_shape[0]//2]
frame1 = frame1[:,frame_shape[1]//2 - frame_shape[0]//2:frame_shape[1]//2 + frame_shape[0]//2]
# the BGR image to RGB.
frame0 = cv.cvtColor(frame0, cv.COLOR_BGR2RGB)
frame1 = cv.cvtColor(frame1, cv.COLOR_BGR2RGB)
# To improve performance, optionally mark the image as not writeable to
# pass by reference.
frame0.flags.writeable = False
frame1.flags.writeable = False
results0 = pose0.process(frame0)
results1 = pose1.process(frame1)
#reverse changes
frame0.flags.writeable = True
frame1.flags.writeable = True
frame0 = cv.cvtColor(frame0, cv.COLOR_RGB2BGR)
frame1 = cv.cvtColor(frame1, cv.COLOR_RGB2BGR)
#check for keypoints detection
frame0_keypoints = []
if results0.pose_landmarks:
for i, landmark in enumerate(results0.pose_landmarks.landmark):
if i not in pose_keypoints: continue #only save keypoints that are indicated in pose_keypoints
pxl_x = landmark.x * frame0.shape[1]
pxl_y = landmark.y * frame0.shape[0]
pxl_x = int(round(pxl_x))
pxl_y = int(round(pxl_y))
cv.circle(frame0,(pxl_x, pxl_y), 3, (0,0,255), -1) #add keypoint detection points into figure
kpts = [pxl_x, pxl_y]
frame0_keypoints.append(kpts)
else:
#if no keypoints are found, simply fill the frame data with [-1,-1] for each kpt
frame0_keypoints = [[-1, -1]]*len(pose_keypoints)
#this will keep keypoints of this frame in memory
kpts_cam0.append(frame0_keypoints)
frame1_keypoints = []
if results1.pose_landmarks:
for i, landmark in enumerate(results1.pose_landmarks.landmark):
if i not in pose_keypoints: continue
pxl_x = landmark.x * frame1.shape[1]
pxl_y = landmark.y * frame1.shape[0]
pxl_x = int(round(pxl_x))
pxl_y = int(round(pxl_y))
cv.circle(frame1,(pxl_x, pxl_y), 3, (0,0,255), -1)
kpts = [pxl_x, pxl_y]
frame1_keypoints.append(kpts)
else:
#if no keypoints are found, simply fill the frame data with [-1,-1] for each kpt
frame1_keypoints = [[-1, -1]]*len(pose_keypoints)
#update keypoints container
kpts_cam1.append(frame1_keypoints)
#calculate 3d position
frame_p3ds = []
for uv1, uv2 in zip(frame0_keypoints, frame1_keypoints):
if uv1[0] == -1 or uv2[0] == -1:
_p3d = [-1, -1, -1]
else:
_p3d = DLT(P0, P1, uv1, uv2) #calculate 3d position of keypoint
frame_p3ds.append(_p3d)
'''
This contains the 3d position of each keypoint in current frame.
For real time application, this is what you want.
'''
frame_p3ds = np.array(frame_p3ds).reshape((12, 3))
kpts_3d.append(frame_p3ds)
# uncomment these if you want to see the full keypoints detections
# mp_drawing.draw_landmarks(frame0, results0.pose_landmarks, mp_pose.POSE_CONNECTIONS,
# landmark_drawing_spec=mp_drawing_styles.get_default_pose_landmarks_style())
#
# mp_drawing.draw_landmarks(frame1, results1.pose_landmarks, mp_pose.POSE_CONNECTIONS,
# landmark_drawing_spec=mp_drawing_styles.get_default_pose_landmarks_style())
cv.imshow('cam1', frame1)
cv.imshow('cam0', frame0)
k = cv.waitKey(1)
if k & 0xFF == 27: break #27 is ESC key.
cv.destroyAllWindows()
for cap in caps:
cap.release()
return np.array(kpts_cam0), np.array(kpts_cam1), np.array(kpts_3d)
if __name__ == '__main__':
#this will load the sample videos if no camera ID is given
input_stream1 = 'media/cam0_test.mp4'
input_stream2 = 'media/cam1_test.mp4'
#put camera id as command line arguements
if len(sys.argv) == 3:
input_stream1 = int(sys.argv[1])
input_stream2 = int(sys.argv[2])
#get projection matrices
P0 = get_projection_matrix(0)
P1 = get_projection_matrix(1)
kpts_cam0, kpts_cam1, kpts_3d = run_mp(input_stream1, input_stream2, P0, P1)
#this will create keypoints file in current working folder
write_keypoints_to_disk('kpts_cam0.dat', kpts_cam0)
write_keypoints_to_disk('kpts_cam1.dat', kpts_cam1)
write_keypoints_to_disk('kpts_3d.dat', kpts_3d)