-
Notifications
You must be signed in to change notification settings - Fork 1
/
evaluate.py
160 lines (127 loc) · 5.98 KB
/
evaluate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
import os
import sys
from pathlib import Path
from easydict import EasyDict as edict
import argparse
p = str(Path(__file__).absolute().parents[2]) # python文件或模块内置的属性,类似于C++中的预定义宏(宏指示)
if p not in sys.path:
sys.path.append(p)
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" # Match nvidia-smi and CUDA device ids
import torch
torch.backends.cudnn.enabled = True
torch.backends.cudnn.benchmark = True
from lib.datasets import DAVISDataset, YouTubeVOSDataset
from model.tracker import Tracker
from model.augmenter import ImageAugmenter
from model.feature_extractor import ResnetFeatureExtractor
from model.seg_network import SegNetwork
from lib.evaluation import evaluate_dataset
class Parameters:
def __init__(self, weights, fast=False, device="cuda:0"):
self.device = device
self.weights = weights
self.num_aug = 5
self.train_skipping = 8
self.learning_rate = 0.1
# Autodetect the feature extractor
self.in_channels = weights['refiner.TSE.layer4.reduce.0.weight'].shape[1]
if self.in_channels == 1024:
self.feature_extractor = "resnet101"
elif self.in_channels == 256:
self.feature_extractor = "resnet18"
else:
raise ValueError
# self.in_channels = 256
# self.feature_extractor = 'resnet18'
if fast:
self.init_iters = (5, 10, 10, 10)
self.update_iters = (5,)
else:
self.init_iters = (5, 10, 10, 10, 10)
self.update_iters = (10,)
self.aug_params = edict(
num_aug=self.num_aug,
min_px_count=1,
fg_aug_params=edict(
rotation=[5, -5, 10, -10, 20, -20, 30, -30, 45, -45],
fliplr=[False, False, False, False, True],
scale=[0.5, 0.7, 1.0, 1.5, 2.0, 2.5],
skew=[(0.0, 0.0), (0.0, 0.0), (0.1, 0.1)],
blur_size=[0.0, 0.0, 0.0, 2.0],
blur_angle=[0, 45, 90, 135],
),
bg_aug_params=edict(
tcenter=[(0.5, 0.5)],
rotation=[0, 0, 0],
fliplr=[False],
scale=[1.0, 1.0, 1.2],
skew=[(0.0, 0.0)],
blur_size=[0.0, 0.0, 1.0, 2.0, 5.0],
blur_angle=[0, 45, 90, 135],
),
)
self.disc_params = edict(
layer="layer4", in_channels=self.in_channels, c_channels=96, out_channels=1,
init_iters=self.init_iters, update_iters=self.update_iters,
memory_size=80, train_skipping=self.train_skipping, learning_rate=self.learning_rate,
pixel_weighting=dict(method='hinge', tf=0.1),
filter_reg=(1e-4, 1e-2), precond=(1e-4, 1e-2), precond_lr=0.1, CG_forgetting_rate=750,
device=self.device, update_filters=True,
)
self.refnet_params = edict(
layers=("layer5", "layer4", "layer3", "layer2"),
nchannels=64, use_batch_norm=True,
)
def get_model(self):
augmenter = ImageAugmenter(self.aug_params)
extractor = ResnetFeatureExtractor(self.feature_extractor).to(self.device)
extractor_flow = ResnetFeatureExtractor('resnet18').to(self.device)
self.disc_params.in_channels = extractor.get_out_channels()[self.disc_params.layer]
p = self.refnet_params
refinement_layers_channels = {L: nch for L, nch in extractor.get_out_channels().items() if L in p.layers}
refiner = SegNetwork(1, p.nchannels, refinement_layers_channels, p.use_batch_norm)
mdl = Tracker(augmenter, extractor, extractor_flow, self.disc_params, refiner, self.device)
mdl.load_state_dict(self.weights, strict=True)
mdl.to(self.device)
return mdl
if __name__ == '__main__':
# Edit these paths for your local setup
# os.environ['CUDA_VISIBLE_DEVICES'] = '0'
paths = dict(
models=Path(__file__).parent, #/ "weights", # The .pth files should be here
davis="./data/DAVIS", # DAVIS dataset root
yt2018="./data/YouTubeVOS/2018", # YouTubeVOS 2018 root
output="./results", # Output path
)
datasets = dict(dv2016val=(DAVISDataset, dict(path=paths['davis'], year='2016', split='val')),
dv2017val=(DAVISDataset, dict(path=paths['davis'], year='2017', split='val')),
yt2018val=(YouTubeVOSDataset, dict(path=paths['yt2018'], year='2018', split='valid_all_frames')),
)
args_parser = argparse.ArgumentParser(description='Evaluate FRTM on a validation dataset')
args_parser.add_argument('--model', type=str, default='ckpts/FAMINet-2F.pth', help='name of model weights file', required=False)
args_parser.add_argument('--dset', type=str, default='dv2017val', choices=datasets.keys(), help='Dataset name.')
args_parser.add_argument('--dev', type=str, default="cuda:0", help='Target device to run on.')
args_parser.add_argument('--fast', action='store_true', default=False, help='Whether to use fewer optimizer steps.')
args = args_parser.parse_args()
# Setup
model_path = Path(paths['models']).expanduser().resolve() / args.model
if not model_path.exists():
print("Model file '%s' not found." % model_path)
quit(1)
weights = torch.load(model_path, map_location='cpu')['model']
dset = datasets[args.dset]
dset = dset[0](**dset[1])
ex_name = dset.name + "-" + model_path.stem + ("_fast" if args.fast else "")
out_path = Path(paths['output']).expanduser().resolve() / ex_name
out_path.mkdir(exist_ok=True, parents=True)
# Run the tracker and evaluate the results
p = Parameters(weights, device=args.dev)
tracker = p.get_model()
tracker.run_dataset(dset, out_path, speedrun=args.dset == 'dv2016val')
dset.all_annotations = True
print()
print("Computing J-scores")
evaluate_dataset(dset, out_path, measure='J')
print()
print("Computing F-scores")
evaluate_dataset(dset, out_path, measure='F')