-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathutils.py
409 lines (325 loc) · 17.5 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
import struct
import math
import numpy as np
import cv2
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from scipy.ndimage.morphology import binary_dilation
def get_pointcloud(color_img, depth_img, camera_intrinsics):
# Get depth image size
im_h = depth_img.shape[0]
im_w = depth_img.shape[1]
# Project depth into 3D point cloud in camera coordinates
pix_x,pix_y = np.meshgrid(np.linspace(0,im_w-1,im_w), np.linspace(0,im_h-1,im_h))
cam_pts_x = np.multiply(pix_x-camera_intrinsics[0][2],depth_img/camera_intrinsics[0][0])
cam_pts_y = np.multiply(pix_y-camera_intrinsics[1][2],depth_img/camera_intrinsics[1][1])
cam_pts_z = depth_img.copy()
cam_pts_x.shape = (im_h*im_w,1)
cam_pts_y.shape = (im_h*im_w,1)
cam_pts_z.shape = (im_h*im_w,1)
# Reshape image into colors for 3D point cloud
rgb_pts_r = color_img[:,:,0]
rgb_pts_g = color_img[:,:,1]
rgb_pts_b = color_img[:,:,2]
rgb_pts_r.shape = (im_h*im_w,1)
rgb_pts_g.shape = (im_h*im_w,1)
rgb_pts_b.shape = (im_h*im_w,1)
cam_pts = np.concatenate((cam_pts_x, cam_pts_y, cam_pts_z), axis=1)
rgb_pts = np.concatenate((rgb_pts_r, rgb_pts_g, rgb_pts_b), axis=1)
return cam_pts, rgb_pts
def get_heightmap(color_img, depth_img, cam_intrinsics, cam_pose, workspace_limits, heightmap_resolution):
# Compute heightmap size
heightmap_size = np.round(((workspace_limits[1][1] - workspace_limits[1][0])/heightmap_resolution, (workspace_limits[0][1] - workspace_limits[0][0])/heightmap_resolution)).astype(int)
# Get 3D point cloud from RGB-D images
surface_pts, color_pts = get_pointcloud(color_img, depth_img, cam_intrinsics)
# Transform 3D point cloud from camera coordinates to robot coordinates
surface_pts = np.transpose(np.dot(cam_pose[0:3,0:3],np.transpose(surface_pts)) + np.tile(cam_pose[0:3,3:],(1,surface_pts.shape[0])))
# Sort surface points by z value
sort_z_ind = np.argsort(surface_pts[:,2])
surface_pts = surface_pts[sort_z_ind]
color_pts = color_pts[sort_z_ind]
# Filter out surface points outside heightmap boundaries
heightmap_valid_ind = np.logical_and(np.logical_and(np.logical_and(np.logical_and(surface_pts[:,0] >= workspace_limits[0][0], surface_pts[:,0] < workspace_limits[0][1]), surface_pts[:,1] >= workspace_limits[1][0]), surface_pts[:,1] < workspace_limits[1][1]), surface_pts[:,2] < workspace_limits[2][1])
surface_pts = surface_pts[heightmap_valid_ind]
color_pts = color_pts[heightmap_valid_ind]
# Create orthographic top-down-view RGB-D heightmaps
color_heightmap_r = np.zeros((heightmap_size[0], heightmap_size[1], 1), dtype=np.uint8)
color_heightmap_g = np.zeros((heightmap_size[0], heightmap_size[1], 1), dtype=np.uint8)
color_heightmap_b = np.zeros((heightmap_size[0], heightmap_size[1], 1), dtype=np.uint8)
depth_heightmap = np.zeros(heightmap_size)
heightmap_pix_x = np.floor((surface_pts[:,0] - workspace_limits[0][0])/heightmap_resolution).astype(int)
heightmap_pix_y = np.floor((surface_pts[:,1] - workspace_limits[1][0])/heightmap_resolution).astype(int)
color_heightmap_r[heightmap_pix_y,heightmap_pix_x] = color_pts[:,[0]]
color_heightmap_g[heightmap_pix_y,heightmap_pix_x] = color_pts[:,[1]]
color_heightmap_b[heightmap_pix_y,heightmap_pix_x] = color_pts[:,[2]]
color_heightmap = np.concatenate((color_heightmap_r, color_heightmap_g, color_heightmap_b), axis=2)
depth_heightmap[heightmap_pix_y,heightmap_pix_x] = surface_pts[:,2]
z_bottom = workspace_limits[2][0]
depth_heightmap = depth_heightmap - z_bottom
depth_heightmap[depth_heightmap < 0] = 0
depth_heightmap[depth_heightmap == -z_bottom] = np.nan
return color_heightmap, depth_heightmap
# Save a 3D point cloud to a binary .ply file
def pcwrite(xyz_pts, filename, rgb_pts=None):
assert xyz_pts.shape[1] == 3, 'input XYZ points should be an Nx3 matrix'
if rgb_pts is None:
rgb_pts = np.ones(xyz_pts.shape).astype(np.uint8)*255
assert xyz_pts.shape == rgb_pts.shape, 'input RGB colors should be Nx3 matrix and same size as input XYZ points'
# Write header for .ply file
pc_file = open(filename, 'wb')
pc_file.write(bytearray('ply\n', 'utf8'))
pc_file.write(bytearray('format binary_little_endian 1.0\n', 'utf8'))
pc_file.write(bytearray(('element vertex %d\n' % xyz_pts.shape[0]), 'utf8'))
pc_file.write(bytearray('property float x\n', 'utf8'))
pc_file.write(bytearray('property float y\n', 'utf8'))
pc_file.write(bytearray('property float z\n', 'utf8'))
pc_file.write(bytearray('property uchar red\n', 'utf8'))
pc_file.write(bytearray('property uchar green\n', 'utf8'))
pc_file.write(bytearray('property uchar blue\n', 'utf8'))
pc_file.write(bytearray('end_header\n', 'utf8'))
# Write 3D points to .ply file
for i in range(xyz_pts.shape[0]):
pc_file.write(bytearray(struct.pack("fffccc",xyz_pts[i][0],xyz_pts[i][1],xyz_pts[i][2],rgb_pts[i][0].tostring(),rgb_pts[i][1].tostring(),rgb_pts[i][2].tostring())))
pc_file.close()
def get_affordance_vis(grasp_affordances, input_images, num_rotations, best_pix_ind):
vis = None
for vis_row in range(num_rotations/4):
tmp_row_vis = None
for vis_col in range(4):
rotate_idx = vis_row*4+vis_col
affordance_vis = grasp_affordances[rotate_idx,:,:]
affordance_vis[affordance_vis < 0] = 0 # assume probability
# affordance_vis = np.divide(affordance_vis, np.max(affordance_vis))
affordance_vis[affordance_vis > 1] = 1 # assume probability
affordance_vis.shape = (grasp_affordances.shape[1], grasp_affordances.shape[2])
affordance_vis = cv2.applyColorMap((affordance_vis*255).astype(np.uint8), cv2.COLORMAP_JET)
input_image_vis = (input_images[rotate_idx,:,:,:]*255).astype(np.uint8)
input_image_vis = cv2.resize(input_image_vis, (0,0), fx=0.5, fy=0.5, interpolation=cv2.INTER_NEAREST)
affordance_vis = (0.5*cv2.cvtColor(input_image_vis, cv2.COLOR_RGB2BGR) + 0.5*affordance_vis).astype(np.uint8)
if rotate_idx == best_pix_ind[0]:
affordance_vis = cv2.circle(affordance_vis, (int(best_pix_ind[2]), int(best_pix_ind[1])), 7, (0,0,255), 2)
if tmp_row_vis is None:
tmp_row_vis = affordance_vis
else:
tmp_row_vis = np.concatenate((tmp_row_vis,affordance_vis), axis=1)
if vis is None:
vis = tmp_row_vis
else:
vis = np.concatenate((vis,tmp_row_vis), axis=0)
return vis
def get_difference(color_heightmap, color_space, bg_color_heightmap):
color_space = np.concatenate((color_space, np.asarray([[0.0, 0.0, 0.0]])), axis=0)
color_space.shape = (color_space.shape[0], 1, 1, color_space.shape[1])
color_space = np.tile(color_space, (1, color_heightmap.shape[0], color_heightmap.shape[1], 1))
# Normalize color heightmaps
color_heightmap = color_heightmap.astype(float)/255.0
color_heightmap.shape = (1, color_heightmap.shape[0], color_heightmap.shape[1], color_heightmap.shape[2])
color_heightmap = np.tile(color_heightmap, (color_space.shape[0], 1, 1, 1))
bg_color_heightmap = bg_color_heightmap.astype(float)/255.0
bg_color_heightmap.shape = (1, bg_color_heightmap.shape[0], bg_color_heightmap.shape[1], bg_color_heightmap.shape[2])
bg_color_heightmap = np.tile(bg_color_heightmap, (color_space.shape[0], 1, 1, 1))
# Compute nearest neighbor distances to key colors
key_color_dist = np.sqrt(np.sum(np.power(color_heightmap - color_space,2), axis=3))
# key_color_dist_prob = F.softmax(Variable(torch.from_numpy(key_color_dist), volatile=True), dim=0).data.numpy()
bg_key_color_dist = np.sqrt(np.sum(np.power(bg_color_heightmap - color_space,2), axis=3))
# bg_key_color_dist_prob = F.softmax(Variable(torch.from_numpy(bg_key_color_dist), volatile=True), dim=0).data.numpy()
key_color_match = np.argmin(key_color_dist, axis=0)
bg_key_color_match = np.argmin(bg_key_color_dist, axis=0)
key_color_match[key_color_match == color_space.shape[0] - 1] = color_space.shape[0] + 1
bg_key_color_match[bg_key_color_match == color_space.shape[0] - 1] = color_space.shape[0] + 2
return np.sum(key_color_match == bg_key_color_match).astype(float)/np.sum(bg_key_color_match < color_space.shape[0]).astype(float)
# Get rotation matrix from euler angles
def euler2rotm(theta):
R_x = np.array([[1, 0, 0 ],
[0, math.cos(theta[0]), -math.sin(theta[0]) ],
[0, math.sin(theta[0]), math.cos(theta[0]) ]
])
R_y = np.array([[math.cos(theta[1]), 0, math.sin(theta[1]) ],
[0, 1, 0 ],
[-math.sin(theta[1]), 0, math.cos(theta[1]) ]
])
R_z = np.array([[math.cos(theta[2]), -math.sin(theta[2]), 0],
[math.sin(theta[2]), math.cos(theta[2]), 0],
[0, 0, 1]
])
R = np.dot(R_z, np.dot( R_y, R_x ))
# R = np.dot(R_x, np.dot( R_y, R_z ))
return R
# Get rotation matrix from euler angles
def obj_euler2rotm(theta):
R_x = np.array([[1, 0, 0 ],
[0, math.cos(theta[0]), -math.sin(theta[0]) ],
[0, math.sin(theta[0]), math.cos(theta[0]) ]
])
R_y = np.array([[math.cos(theta[1]), 0, math.sin(theta[1]) ],
[0, 1, 0 ],
[-math.sin(theta[1]), 0, math.cos(theta[1]) ]
])
R_z = np.array([[math.cos(theta[2]), -math.sin(theta[2]), 0],
[math.sin(theta[2]), math.cos(theta[2]), 0],
[0, 0, 1]
])
# R = np.dot(R_z, np.dot( R_y, R_x ))
R = np.dot(np.dot( R_x, R_y ), R_z )
return R
# Checks if a matrix is a valid rotation matrix.
def isRotm(R) :
Rt = np.transpose(R)
shouldBeIdentity = np.dot(Rt, R)
I = np.identity(3, dtype = R.dtype)
n = np.linalg.norm(I - shouldBeIdentity)
return n < 1e-6
# Calculates rotation matrix to euler angles
def rotm2euler(R) :
assert(isRotm(R))
sy = math.sqrt(R[0,0] * R[0,0] + R[1,0] * R[1,0])
singular = sy < 1e-6
if not singular :
x = math.atan2(R[2,1] , R[2,2])
y = math.atan2(-R[2,0], sy)
z = math.atan2(R[1,0], R[0,0])
else :
x = math.atan2(-R[1,2], R[1,1])
y = math.atan2(-R[2,0], sy)
z = 0
return np.array([x, y, z])
def angle2rotm(angle, axis, point=None):
# Copyright (c) 2006-2018, Christoph Gohlke
sina = math.sin(angle)
cosa = math.cos(angle)
axis = axis/np.linalg.norm(axis)
# Rotation matrix around unit vector
R = np.diag([cosa, cosa, cosa])
R += np.outer(axis, axis) * (1.0 - cosa)
axis *= sina
R += np.array([[ 0.0, -axis[2], axis[1]],
[ axis[2], 0.0, -axis[0]],
[-axis[1], axis[0], 0.0]])
M = np.identity(4)
M[:3, :3] = R
if point is not None:
# Rotation not around origin
point = np.array(point[:3], dtype=np.float64, copy=False)
M[:3, 3] = point - np.dot(R, point)
return M
def rotm2angle(R):
# From: euclideanspace.com
epsilon = 0.01 # Margin to allow for rounding errors
epsilon2 = 0.1 # Margin to distinguish between 0 and 180 degrees
assert(isRotm(R))
if ((abs(R[0][1]-R[1][0])< epsilon) and (abs(R[0][2]-R[2][0])< epsilon) and (abs(R[1][2]-R[2][1])< epsilon)):
# Singularity found
# First check for identity matrix which must have +1 for all terms in leading diagonaland zero in other terms
if ((abs(R[0][1]+R[1][0]) < epsilon2) and (abs(R[0][2]+R[2][0]) < epsilon2) and (abs(R[1][2]+R[2][1]) < epsilon2) and (abs(R[0][0]+R[1][1]+R[2][2]-3) < epsilon2)):
# this singularity is identity matrix so angle = 0
return [0,1,0,0] # zero angle, arbitrary axis
# Otherwise this singularity is angle = 180
angle = np.pi
xx = (R[0][0]+1)/2
yy = (R[1][1]+1)/2
zz = (R[2][2]+1)/2
xy = (R[0][1]+R[1][0])/4
xz = (R[0][2]+R[2][0])/4
yz = (R[1][2]+R[2][1])/4
if ((xx > yy) and (xx > zz)): # R[0][0] is the largest diagonal term
if (xx< epsilon):
x = 0
y = 0.7071
z = 0.7071
else:
x = np.sqrt(xx)
y = xy/x
z = xz/x
elif (yy > zz): # R[1][1] is the largest diagonal term
if (yy< epsilon):
x = 0.7071
y = 0
z = 0.7071
else:
y = np.sqrt(yy)
x = xy/y
z = yz/y
else: # R[2][2] is the largest diagonal term so base result on this
if (zz< epsilon):
x = 0.7071
y = 0.7071
z = 0
else:
z = np.sqrt(zz)
x = xz/z
y = yz/z
return [angle,x,y,z] # Return 180 deg rotation
# As we have reached here there are no singularities so we can handle normally
s = np.sqrt((R[2][1] - R[1][2])*(R[2][1] - R[1][2]) + (R[0][2] - R[2][0])*(R[0][2] - R[2][0]) + (R[1][0] - R[0][1])*(R[1][0] - R[0][1])) # used to normalise
if (abs(s) < 0.001):
s = 1
# Prevent divide by zero, should not happen if matrix is orthogonal and should be
# Caught by singularity test above, but I've left it in just in case
angle = np.arccos(( R[0][0] + R[1][1] + R[2][2] - 1)/2)
x = (R[2][1] - R[1][2])/s
y = (R[0][2] - R[2][0])/s
z = (R[1][0] - R[0][1])/s
return [angle,x,y,z]
# get goal mask
def get_goal_mask(obj_contour, mask, workspace_limits, heightmap_resolution):
obj_contour[:, 0] = (obj_contour[:, 0] - workspace_limits[0][0]) / heightmap_resolution # drop_x to pixel_dimension2
obj_contour[:, 1] = (obj_contour[:, 1] - workspace_limits[1][0]) / heightmap_resolution # drop_y to pixel_dimension1
obj_contour = np.array(obj_contour).astype(int)
cv2.polylines(mask, [obj_contour], 1, (255, 255, 255), 5)
cv2.fillPoly(mask, [obj_contour], (255, 255, 255))
return mask
# get all-one mask
def get_all_mask(obj_contours, mask, obj_number, workspace_limits, heightmap_resolution):
for object_idx in range(obj_number):
obj_contours[object_idx][:, 0] = (obj_contours[object_idx][:, 0] - workspace_limits[0][0]) / heightmap_resolution # drop_x to pixel_dimension2
obj_contours[object_idx][:, 1] = (obj_contours[object_idx][:, 1] - workspace_limits[1][0]) / heightmap_resolution # drop_y to pixel_dimension1
obj_contours[object_idx] = np.array(obj_contours[object_idx]).astype(int)
cv2.polylines(mask, [obj_contours[object_idx]], 1, (0, 0, 0), 5)
cv2.fillPoly(mask, [obj_contours[object_idx]], (0, 0, 0))
return mask
# get grasp qualities of all objects
def get_obj_grasp_predictions(grasp_predictions, obj_contours, mask_all, img, obj_number, workspace_limits, heightmap_resolution):
obj_grasp_predictions = []
for object_idx in range(obj_number):
mask = np.zeros(img.shape[:2], np.uint8)
obj_contours[object_idx][:, 0] = (obj_contours[object_idx][:, 0] - workspace_limits[0][0]) / heightmap_resolution # drop_x to pixel_dimension2
obj_contours[object_idx][:, 1] = (obj_contours[object_idx][:, 1] - workspace_limits[1][0]) / heightmap_resolution # drop_y to pixel_dimension1
obj_contours[object_idx] = np.array(obj_contours[object_idx]).astype(int)
cv2.polylines(mask, [obj_contours[object_idx]], 1, (255, 255, 255), 3)
cv2.fillPoly(mask, [obj_contours[object_idx]], (255, 255, 255))
obj_grasp_prediction = np.multiply(grasp_predictions, mask)
obj_grasp_prediction = obj_grasp_prediction / 255
obj_grasp_predictions.append(obj_grasp_prediction)
# draw all masks
cv2.polylines(mask_all, [obj_contours[object_idx]], 1, (255, 255, 255), 3)
cv2.fillPoly(mask_all, [obj_contours[object_idx]], (255, 255, 255))
return obj_grasp_predictions, mask_all
# get occupy ratio
def get_occupy_ratio(goal_mask_heightmap, depth_heightmap):
margin_mask = binary_dilation(goal_mask_heightmap / 255, iterations=10).astype(np.float32)
margin_mask = margin_mask - goal_mask_heightmap / 255
margin_depth = margin_mask * depth_heightmap
margin_depth[np.isnan(margin_depth)] = 0
margin_depth[margin_depth > 0.3] = 0
margin_depth[margin_depth < 0.02] = 0
margin_depth[margin_depth > 0] = 1
margin_value = np.sum(margin_depth)
occupy_ratio = margin_value / np.sum(margin_mask)
return occupy_ratio
# change detection
def get_change_value(depth_diff):
depth_diff[np.isnan(depth_diff)] = 0
depth_diff[depth_diff > 0.3] = 0
depth_diff[depth_diff < 0.01] = 0
depth_diff[depth_diff > 0] = 1
change_value = np.sum(depth_diff)
return change_value
# Cross entropy loss for 2D outputs
class CrossEntropyLoss2d(nn.Module):
def __init__(self, weight=None, size_average=True):
super(CrossEntropyLoss2d, self).__init__()
self.nll_loss = nn.NLLLoss2d(weight, size_average)
def forward(self, inputs, targets):
return self.nll_loss(F.log_softmax(inputs, dim=1), targets)