Merge pull request #4 from anDoer/dev

Merge baselines into main branch.
anDoer · Aug 13, 2022 · c35fea9 · c35fea9
2 parents abf182c + 649df5b
commit c35fea9
Show file tree

Hide file tree

Showing 40 changed files with 5,947 additions and 0 deletions.
diff --git a/baselines/common/utils/data_utils.py b/baselines/common/utils/data_utils.py
@@ -0,0 +1,32 @@
+def get_posetrack_eval_dummy():
+ det = {
+ 'images': [],
+ 'annotations': [],
+ # categories must be a list containing precisely one item, describing the person structure
+ 'categories': [
+ {
+ 'name': 'person',
+ 'keypoints': ["nose",
+ "head_bottom", # "left_eye",
+ "head_top", # "right_eye",
+ "left_ear", # will be left zeroed out
+ "right_ear", # will be left zeroed out
+ "left_shoulder",
+ "right_shoulder",
+ "left_elbow",
+ "right_elbow",
+ "left_wrist",
+ "right_wrist",
+ "left_hip",
+ "right_hip",
+ "left_knee",
+ "right_knee",
+ "left_ankle",
+ "right_ankle",
+ ]
+ }
+ ]
+ }
+
+ return det
+
diff --git a/baselines/common/utils/generate_bbox_files_from_tracktor_detections.py b/baselines/common/utils/generate_bbox_files_from_tracktor_detections.py
@@ -0,0 +1,98 @@
+import os
+import json
+import numpy as np
+import argparse 
+
+from pycocotools.coco import COCO
+from tqdm import tqdm
+
+def build_args():
+ parser = argparse.ArgumentParser() 
+ parser.add_argument('--dataset_path', type=str, required=True)
+ parser.add_argument('--anno_path', type=str, default='posetrack_data/val')
+ parser.add_argument('--bbox_file', type=str, required=True) 
+ parser.add_argument('--save_path', type=str, required=True)
+ parser.add_argument('--bbox_thresh', type=float, default=0.5)
+ parser.add_argument('--keep_track_id', type=bool, default=False, help='For MOT approaches, we want to keep the track id')
+
+ args = parser.parse_args()
+
+ return args 
+
+def const():
+ bb_files = '/media/work2/doering/2020/code/tracking_wo_bnw/output/faster_rcnn_fpn/new_dataset/faster_rcnn_fpn_training_mot_posetrack_PRETRAINED_test/det/detections.json'
+
+def main():
+
+ args = build_args()
+ assert os.path.exists(args.dataset_path)
+ assert os.path.exists(args.bbox_file)
+
+ with open(args.bbox_file, 'r') as f:
+ sequence_bbs = json.load(f)
+
+ rec = []
+
+ annotation_path = os.path.join(args.dataset_path, args.anno_path)
+ sequence_files = os.listdir(annotation_path)
+ for seq_idx, file in enumerate(tqdm(sequence_files)):
+ seq_name = os.path.splitext(file)[0]
+
+ api = COCO(os.path.join(annotation_path, file))
+ img_ids = api.getImgIds()
+ imgs = api.loadImgs(img_ids)
+
+ for img_idx, img in enumerate(imgs):
+ estimated_bbs = np.array(sequence_bbs[seq_name][str(img_idx+1)])[0]
+
+ if len(estimated_bbs) > 0:
+ estimated_bb_scores = np.asarray(estimated_bbs[:, -1])
+ indexes = estimated_bb_scores > args.bbox_thresh
+ estimated_bbs = estimated_bbs[indexes]
+ estimated_bb_scores = estimated_bb_scores[indexes]
+
+ for bb_idx, bb in enumerate(estimated_bbs):
+ # faster rcnn provides boxes with x1, y1, x2, y2
+ # posetrack gt boxes provided as x1, y1, w, h
+
+ track_id = -1
+ if args.keep_track_id:
+ # bb contains track id at first position! 
+ track_id = int(bb[0])
+ bb = bb[1:]
+
+ bb[2] -= bb[0]
+ bb[3] -= bb[1]
+ sc = np.maximum(bb[2], bb[3])
+
+ if sc == 0:
+ continue
+
+ if bb[0] < 0:
+ bb[0] = 0
+
+ if bb[1] < 0:
+ bb[1] = 0
+
+ if bb[2] < 0 or bb[3] < 0:
+ continue
+
+ rec.append({
+ 'image_location': img['file_name'],
+ 'image': os.path.join(posetrack_home_fp, 'posetrack_data', img['file_name']),
+ 'keypoints': [],
+ 'bbox': bb.tolist(),
+ 'bbox_score': estimated_bb_scores[bb_idx],
+ 'img': img['id'],
+ 'track_id': track_id,
+ 'file_id': file,
+ 'vid_id': img['vid_id'],
+ 'seq_name': img['file_name'].split('/')[-2],
+ 'frame_idx': img_idx,
+ 'tot_frames': len(imgs)
+ })
+
+ os.makedirs(save_path, exist_ok=True)
+
+ with open(f"{save_path}/PoseTrack21_tracktor_bb_thres_{args.bbox_thresh}.json", 'w') as write_file:
+ json.dump(rec, write_file)
diff --git a/baselines/common/utils/inference_utils.py b/baselines/common/utils/inference_utils.py
@@ -0,0 +1,88 @@
+import numpy as np
+import torch
+from torch import nn
+from torch.autograd import Variable
+
+
+def get_preds_for_pose(prs, mat, sr, output_size, joint_scores=False):
+ pool = nn.MaxPool2d(3, 1, 1).cuda()
+
+ xoff = sr[0:17]
+ yoff = sr[17:34]
+
+ prs2 = prs
+
+ o = pool(Variable(prs.cuda())).data.cpu()
+ maxm = torch.eq(o, prs).float()
+ prs = prs * maxm
+
+ res_w = output_size[0] // 4
+ res_h = output_size[1] // 4
+
+ prso = prs.view(17, res_h * res_w)
+ val_k, ind = prso.topk(1, dim=1)
+ xs = ind % res_w
+ ys = (ind / res_w).long()
+
+ keypoints = []
+ score = 0
+ scores = []
+ points = torch.zeros(17, 2)
+ c = 0
+
+ for j in range(17):
+
+ x, y = xs[j][0], ys[j][0]
+ dx = xoff[j][int(y)][int(x)]
+ dy = yoff[j][int(y)][int(x)]
+ points[j][0] = (x * 4) + dx.item()
+ points[j][1] = (y * 4) + dy.item()
+
+ score += val_k[j][0]
+
+ if joint_scores:
+ scores.append(val_k[j][0].item() / 2.0)
+ c += 1
+
+ score /= c
+
+ for j in range(17):
+ point = torch.ones(3, 1)
+ point[0][0] = points[j][0]
+ point[1][0] = points[j][1]
+
+ keypoint = np.matmul(mat, point)
+ keypoints.append(float(keypoint[0].item()))
+ keypoints.append(float(keypoint[1].item()))
+ keypoints.append(1)
+
+ if joint_scores:
+ return keypoints, scores
+ else:
+ return keypoints, score.item() / 2.0
+
+
+def get_transform(param, crop_pos, output_size, scales):
+ shift_to_upper_left = np.identity(3)
+ shift_to_center = np.identity(3)
+
+ a = scales[0] * param['scale_x'] * np.cos(param['rot'])
+ b = scales[1] * param['scale_y'] * np.sin(param['rot'])
+
+ t = np.identity(3)
+ t[0][0] = a
+ if param['flip']:
+ t[0][0] = -a
+
+ t[0][1] = -b
+ t[1][0] = b
+ t[1][1] = a
+
+ shift_to_upper_left[0][2] = -crop_pos[0] + param['tx']
+ shift_to_upper_left[1][2] = -crop_pos[1] + param['ty']
+ shift_to_center[0][2] = output_size[0] / 2
+ shift_to_center[1][2] = output_size[1] / 2
+ t_form = np.matmul(t, shift_to_upper_left)
+ t_form = np.matmul(shift_to_center, t_form)
+
+ return t_form