-
Notifications
You must be signed in to change notification settings - Fork 1
/
tsn_dataset.py
103 lines (86 loc) · 3.55 KB
/
tsn_dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import torch
import torch.utils.data as data
from PIL import Image
import os
import os.path
import numpy as np
from numpy.random import randint
class VideoRecord(object):
def __init__(self, row):
self._data = row
@property
def path(self):
return self._data[0]
@property
def num_frames(self):
return int(self._data[1])
@property
def label(self):
return int(self._data[2])
class TSNDataSet(data.Dataset):
def __init__(self, root_path, list_file,
num_segments=3, new_length=16, modality='RGB',
image_tmpl='frame{:06d}.jpg', transform=None,
force_grayscale=False, random_shift=True, test_mode=False):
self.root_path = root_path
self.list_file = list_file
self.num_segments = num_segments
self.new_length = new_length
self.modality = modality
self.image_tmpl = image_tmpl
self.transform = transform
self.random_shift = random_shift
self.test_mode = test_mode
self._parse_list()
def _load_image(self, directory, idx):
if self.modality == 'RGB':
return Image.open(os.path.join(directory, self.image_tmpl.format(idx))).convert('RGB')
def _parse_list(self):
self.video_list = [VideoRecord(x.strip().split(' ')) for x in open(self.list_file)]
def _sample_indices(self, record):
"""
:param record: VideoRecord
:return: list
"""
average_duration = (record.num_frames - self.new_length + 1) // self.num_segments
if average_duration > 0:
offsets = np.multiply(list(range(self.num_segments)), average_duration) \
+ randint(average_duration,size=self.num_segments)
elif record.num_frames > self.num_segments:
offsets = np.sort(randint(record.num_frames - self.new_length + 1, size=self.num_segments))
else:
offsets = np.zeros((self.num_segments,))
return offsets + 1
def _get_val_indices(self, record):
if record.num_frames > self.num_segments + self.new_length - 1:
tick = (record.num_frames - self.new_length + 1) / float(self.num_segments)
offsets = np.array([int(tick / 2.0 + tick * x) for x in range(self.num_segments)])
else:
offsets = np.zeros((self.num_segments,))
return offsets + 1
def _get_test_indices(self, record):
tick = (record.num_frames - self.new_length + 1) / float(self.num_segments)
offsets = np.array([int(tick / 2.0 + tick * x) for x in range(self.num_segments)])
return offsets + 1
def __getitem__(self, index):
record = self.video_list[index]
if not self.test_mode:
segment_indices = self._sample_indices(record) if self.random_shift else self._get_val_indices(record)
else:
segment_indices = self._get_test_indices(record)
return self.get(record, segment_indices)
def get(self, record, indices):
video = list()
for seg_ind in indices:
p = int(seg_ind)
clips = list()
for i in range(self.new_length):
seg_imgs = self._load_image(record.path, p)
clips.append(seg_imgs)
if p < record.num_frames:
p += 1
video.extend(clips)
video = self.transform(video)
return video, record.label
def __len__(self):
return len(self.video_list)