-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathtemplates_iclr2020.yaml
138 lines (126 loc) · 4.17 KB
/
templates_iclr2020.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
job.type: search
search.type: ax
dataset.name: ___dataset___
# training settings (fixed)
train:
max_epochs: 400
auto_correct: True
# this is faster for smaller datasets, but does not work for some models (e.g.,
# TransE due to a pytorch issue) or for larger datasets. Change to spo in such
# cases (either here or in ax section of model config), results will not be
# affected.
negative_sampling.implementation: sp_po
# validation/evaluation settings (fixed)
valid:
every: 5
metric: mean_reciprocal_rank_filtered_with_test
filter_with_test: True
early_stopping:
patience: 10
min_threshold.epochs: 50
min_threshold.metric_value: 0.05
eval:
batch_size: 256
metrics_per.relation_type: True
# settings for reciprocal relations (if used)
import: [___model___, reciprocal_relations_model]
reciprocal_relations_model.base_model.type: ___model___
# ax settings: hyperparameter serach space
ax_search:
num_trials: 30
num_sobol_trials: 30
parameters:
___models___
# training hyperparameters
- name: train.batch_size
type: choice
values: [128, 256, 512, 1024]
is_ordered: True
- name: train.type
type: fixed
value: ___train_type___
- name: train.optimizer
type: choice
values: [Adam, Adagrad]
- name: train.loss
type: fixed
value: ___loss___
- name: train.optimizer_args.lr
type: range
bounds: [0.0003, 1.0]
log_scale: True
- name: train.lr_scheduler
type: fixed
value: ReduceLROnPlateau
- name: train.lr_scheduler_args.mode
type: fixed
value: max
- name: train.lr_scheduler_args.factor
type: fixed
value: 0.95
- name: train.lr_scheduler_args.threshold
type: fixed
value: 0.0001
- name: train.lr_scheduler_args.patience
type: range
bounds: [0, 10]
# embedding dimension
- name: lookup_embedder.dim
type: choice
values: [128, 256, 512]
is_ordered: True
# embedding initialization
- name: lookup_embedder.initialize
type: choice
values: [xavier_normal_, xavier_uniform_, normal_, uniform_]
- name: lookup_embedder.initialize_args.normal_.mean
type: fixed
value: 0.0
- name: lookup_embedder.initialize_args.normal_.std
type: range
bounds: [0.00001, 1.0]
log_scale: True
- name: lookup_embedder.initialize_args.uniform_.a
type: range
bounds: [-1.0, -0.00001]
- name: lookup_embedder.initialize_args.xavier_uniform_.gain
type: fixed
value: 1.0
- name: lookup_embedder.initialize_args.xavier_normal_.gain
type: fixed
value: 1.0
# embedding regularization
- name: lookup_embedder.regularize
type: choice
values: ['', 'l3', 'l2', 'l1']
is_ordered: True
- name: lookup_embedder.regularize_args.weighted
type: choice
values: [True, False]
- name: ___model___.entity_embedder.regularize_weight
type: range
bounds: [1.0e-20, 1.0e-01]
log_scale: True
- name: ___model___.relation_embedder.regularize_weight
type: range
bounds: [1.0e-20, 1.0e-01]
log_scale: True
# embedding dropout
- name: ___model___.entity_embedder.dropout
type: range
bounds: [-0.5, 0.5]
- name: ___model___.relation_embedder.dropout
type: range
bounds: [-0.5, 0.5]
# training-type specific hyperparameters
- name: KvsAll.label_smoothing #train_type: KvsAll
type: range #train_type: KvsAll
bounds: [-0.3, 0.3] #train_type: KvsAll
- name: negative_sampling.num_negatives_s #train_type: negative_sampling
type: range #train_type: negative_sampling
bounds: [1, 1000] #train_type: negative_sampling
log_scale: True #train_type: negative_sampling
- name: negative_sampling.num_negatives_o #train_type: negative_sampling
type: range #train_type: negative_sampling
bounds: [1, 1000] #train_type: negative_sampling
log_scale: True #train_type: negative_sampling