-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathprocess_dataset.py
59 lines (53 loc) · 2.2 KB
/
process_dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
# This code hase been acquired from TRN-pytorch repository
# 'https://github.com/metalbubble/TRN-pytorch/blob/master/process_dataset.py'
# which is prepared by Bolei Zhou
#
# Processing the raw dataset of Jester
#
# generate the meta files:
# category.txt: the list of categories.
# train_videofolder.txt: each row contains [videoname num_frames classIDX]
# val_videofolder.txt: same as above
#
# Created by Bolei Zhou, Dec.2 2017
import os
import pdb
ROOT_DATASET_JESTER = '/usr/home/sut/datasets/jester/'
ROOT_DATASET = '/usr/home/sut/datasets/jester/rgb'
ROOT_DATASET_STH = '/usr/home/sut/datasets/something-something-v2/'
ROOT_DATASET_STH_RGB = '/usr/home/sut/datasets/something-something-v2/extracted-frames'
dataset_name = 'jester-v1'
with open('%s%s-labels.csv' % (ROOT_DATASET_JESTER, dataset_name)) as f:
lines = f.readlines()
categories = []
for line in lines:
line = line.rstrip()
categories.append(line)
categories = sorted(categories)
with open(os.path.join(ROOT_DATASET_JESTER,'category.txt'), 'w') as f:
f.write('\n'.join(categories))
dict_categories = {}
for i, category in enumerate(categories):
dict_categories[category] = i
files_input = ['%s%s-validation.csv' % (ROOT_DATASET_JESTER, dataset_name), '%s%s-train.csv' % (ROOT_DATASET_JESTER,dataset_name)]
files_output = ['val_videofolder.txt', 'train_videofolder.txt']
for (filename_input, filename_output) in zip(files_input, files_output):
with open(filename_input) as f:
lines = f.readlines()
folders = []
idx_categories = []
for line in lines:
line = line.rstrip()
items = line.split(';')
folders.append(items[0])
idx_categories.append(os.path.join(str(dict_categories[items[1]])))
output = []
for i in range(len(folders)):
curFolder = folders[i]
curIDX = idx_categories[i]
# counting the number of frames in each video folders
dir_files = os.listdir(os.path.join(ROOT_DATASET, curFolder))
output.append('%s %d %d' % (curFolder, len(dir_files), int(curIDX)))
print('%d/%d' % (i, len(folders)))
with open(os.path.join(ROOT_DATASET_JESTER,filename_output), 'w') as f:
f.write('\n'.join(output))