-
Notifications
You must be signed in to change notification settings - Fork 5
/
config-default.yaml
343 lines (310 loc) · 13.1 KB
/
config-default.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
# ************************************************************************
# *********** DEFAULT CONFIGURATION: DO NOT CHANGE THIS FILE *************
# ************************************************************************
## c_clause handler options #############################################
# Example:
# from c_clause import Loader, QAHandler
# from clause.config.options import Options
# opts = Options()
# opts.set("loader.load_zero_rules", False)
# opts.set("loader.b_min_support", 10)
# loader = Loader(opts.get("loader"))
# qa = QAHandler(opts.get("qa_handler"))
### Loader loads rulesets and datasets
### the options can be used to filter/ignore some rules
### and to modifiy some of the loaded rule behavior
# 'r_num_unseen' option:
# laplace smoothing applied to rule type 'r' for calculating confidence
# conf = support / (num_unseen+num_body_groundings)
# 'r_min_support' option:
# dont load rules of type 'r' if they have a lower support as 'r_min_support'
# 'r_min_preds' option: as above for the overall number of triple predicitions
# 'r_min_confidence': as above; note: confidence here is without laplace smooting
# 'r_max_length' option: as above; length refers to the number of body atoms
loader:
# only has effect when rules are loaded from disk
# if set >1 then rules will be first loaded and then parsed by multiple threads;
# if set to -1 then min{5, all_available_threads} is used; set to 1 to turn off
num_threads: -1
# set to False to display less output information
verbose: True
### Rule options
## B rules
# parsable example: "h(X,Y) <= b1(A,X), b2(A,Y)"
load_b_rules: True
b_num_unseen: 5
b_min_support: -1
b_min_preds: -1
b_min_conf: 0.0001
b_max_length: -1
# this value is per default set in AnyBURL for rule application of B-rules
# during rule grounding, whenever a new branch of the DFS leads to more
# than max_branching_factor new nodes (entities), the branch is not visited
# note that this will only have effect for large KGs such as Wd5m
b_max_branching_factor: 1000 #-1 for off
## U_c rules
# parsable examples (c and d being entities in the graph):
# "h(X,c) <= b1(X,d)"
# "h(c,Y) <= b1(Y,A), b2(d,A)"
load_u_c_rules: True
c_num_unseen: 5
c_min_support: -1
c_min_preds: -1
c_min_conf: 0.0001
c_max_length: -1
## U_d rules
# parsable examples (c being an entity in the graph):
# "h(X,c) <= b1(A,X), b2(A,B)"
# "h(X,c) <= b1(X,A)"
load_u_d_rules: True
# weight that is multiplied with the confidence
# we use AnyBURL default here
d_weight: 0.1
d_max_branching_factor: -1 #-1 for off
d_num_unseen: 5
d_min_support: -1
d_min_preds: -1
d_min_conf: 0.0001
d_max_length: -1
## Z rules
# parsable example (c being an entity in the graph): "h(X,c) <="
# can only be used for qa or ranking not for PredictionHandler
# has always length 1
load_zero_rules: True
# weight that is multiplied with the confidence
# we use AnyBURL default here
z_weight: 0.01
z_num_unseen: 5
z_min_support: -1
z_min_preds: -1
z_min_conf: 0.0001
## U_xxc rules
# parsable example: "h(X,X) <= b1(X,c)" (c being an entity in the graph)
# has always length 1
load_u_xxc_rules: True
xxc_num_unseen: 5
xxc_min_support: -1
xxc_min_preds: -1
xxc_min_conf: 0.0001
## U_xxd rules
# parsable example: "h(X,X) <= b1(X,A)"
# has always length 1
load_u_xxd_rules: True
xxd_num_unseen: 5
xxd_min_support: -1
xxd_min_preds: -1
xxd_min_conf: 0.0001
# calculates rankings based on the "target" KG loaded into DataLoader
# e.g. target argument from loader.load_data(data=.., filter=.., target=..)
# candidate rankings are calculated for every query (s, p, ?) and (?, p, o)
# of every triple (s, p, o) from "target"
ranking_handler:
# whether to cache the rules that predicted the query candidates,
# can be retrieved with handler.get_rules(bool, string: "head" or "tail")
# turn off for efficiency.
collect_rules: False
# number of candidates to calculate for every query
# the actual number of returned candidates will often deviate
# it might be less because no more candidates exist
# it might be more because we do not cut predictions of rules
# e.g., when there are 90 candidates already calculated and the next
# rule predicts 30 new candidates, we allow a maximum of 120 candidates
topk: 100
# select from "maxplus" / "noisyor"
# maxplus scores of a ranking will be of the highest predicting rule
# candidate discrimination is based on comparing the sequences of predicting
# rules confidences lexicographically; note that the outputed scores
# are only the ones of the rule with the highest confidence
# noisyor scores and ranking is based on sorting the noisy-or product
# the noisy-or sorting is based on -\sum_i(log(1-conf_i)) and transformed
# before outputted; this mitigates floating point considerations
aggregation_function: "maxplus"
# dont add candidate proposals c for queries (h, r, ?) or (?, r, t) if
# (h, r, c) or (c, r, t) exists in the KG given by the "data" argument of the loader
filter_w_data: True
# same as above but this only has effect for ranker.write_ranking()
# all other results remain query based, e.g. (h, r, ?) with candidates [c_1, c_2, c_3]
# independent of what true answers are on target for the the query
filter_w_target: True
# choose between "random" / "frequency"
# frequency: if two candidates can not be discriminated because they are predicted
# by exactly the same rules; discriminate them according to their number of appearances
# in the 'data' argument KG (e.g., the train data)
# random: random tie handling
tie_handling: "frequency"
# -1 for using ALL available threads
num_threads: -1
# if True, checks how many true answers (num_true) in "target" exists for a query
# and sets new_top_k = topk + num_true
# this is helpful when calculating rankings on large target files to not loose
# true answers by topk; say you have topk=100 but you have 150 true anwers
# the rule application module does not filter with "target" during the application
# therefore, you would never be able to calculate 50 of the true answers
# of course, during the application it is not checked if an answer is true
# e.g. you might end up with 250 false answers
# same effect is achieved by setting topk globally to a higher value (but slower)
adapt_topk: True
# set to False to display less output information
verbose: True
### stopping criteria for rule application
# stop rule application for a query if topk candidates are calculated AND at least disc_at_least
# of the best candidates are fully discriminated, i.e, they are pairwise distinct in regard
# to their predicting rules
# recommended values: 10 or 20 under topk=100 and maxplus
# the peformance improvement decreases when using more threads
disc_at_least: 10 # -1 for off, must not be bigger than topk
# stop rule application for a query as soon as hard_stop_at candidates
# are found (ignoring topk); set the value to topk under maxplus to achieve max-aggregation
# scores for all candidates very fast without getting a properly discriminated ranking
# recommended value: -1
hard_stop_at: -1 #-1 for off
# stops adding predicting rules to a candidate of a query if already num_top_rules
# predicted the candidate; if all candidates are predicted by num_top_rules, rule
# application is stopped; can be used in conjunction with "noisyor" to achieve
# noisy-or top-h (https://arxiv.org/pdf/2309.00306.pdf)
# recommended values: -1 under "maxplus"; 5 under "noisyor"
num_top_rules: -1
# calculates answer candidates and scores based on
# questions (h, r, ?) and (?, r, t)
qa_handler:
# same as ranking_handler; can be retrieved with handler.get_rules(bool)
# see documentation for data strucuture
collect_rules: False
# number of candidates to calculate for a query
# see ranking_handler for detailed description, it behaves identical
topk: 100
# select from "maxplus" / "noisyor"; see ranking handler
aggregation_function: "maxplus"
# dont add candidate proposals c for queries (h, r, ?) or (?, r, t) if
# (h, r, c) or (c, r, t) exists in the KG given by the "data" argument of the loader
filter_w_data: True
# choose between "random" / "frequency", see ranking_handler
tie_handling: "frequency"
# -1 for using ALL available threads
num_threads: -1
# set to False to display less output information
verbose: True
### stopping criteria for rule application
# see ranking_handler for detailed description
disc_at_least: 10 # -1 for off, must not be bigger than topk
hard_stop_at: -1 #-1 for off
num_top_rules: -1
# given input rules, calculates materialization (predictions)
# and stats (num_pred, num_true_preds)
rules_handler:
# whether to store triple predictions
# and stats (num_pred, num_true)
# to obtained with handler.get_predictions(bool), handler.get_statistics()
collect_predictions: True
collect_statistics: True
num_threads: -1 #-1 for ALL available threads
# set to False to display less output information
verbose: True
# given input triples, calculates triple scores and can output all
# predicting rules + their groundings (explanations)
prediction_handler:
collect_explanations: False
# select from "maxplus" / "noisyor"
# note that as we are not calculating candidate rankings; selecting maxplus simply
# results in "max-aggregation" scores
aggregation_function: "maxplus"
# for a given triple stop rule application if it was predicted by the
# num_top_rules with the highest confidences
# set to -1 to not apply any stopping criterion, e.g., to apply all rules
# if you want to only collect the best explanation for each triple, set to 1
# note that the noisyor score is influenced by this parameter
# and results in noisyor-top-h scores (https://arxiv.org/pdf/2309.00306.pdf)
num_top_rules: 5
num_threads: -1 #-1 for ALL available threads
# set to False to display less output information
verbose: True
## clause options ############################################################
### Learns rules with AnyBURL or Amie
learner:
# chose betweeen: "anyburl" / "amie"
mode: "amie"
anyburl:
# add any java VM parameters as list elements
# e.g. in config: java_options: ["-Dfile.encoding=UTF-8", "-Xmx6g"]
# or opts.set("learner.anyburl.java_options", ["-Dfile.encoding=UTF-8", "-Xmx6g"])
java_options: []
# learning time in seconds for AnyBURL
time: 60
# any raw key,val option supported by AnyBURL can be set under raw
raw:
# max length of B-rules
MAX_LENGTH_CYCLIC: 3
# don't learn rules with support < 2
THRESHOLD_CORRECT_PREDICTIONS: 2
# dont learn rules with confidence smaller 0.0001
THRESHOLD_CONFIDENCE: 0.0001
# num threads
WORKER_THREADS: 3
# for learning rules with only particular relations in the head
# use, e.g., SINGLE_RELATIONS: rel1,rel2
amie:
# add any java VM parameters as list elements
# e.g. in config, java_options: ["-Dfile.encoding=UTF-8", "-Xmx6g"]
# or opts.set("learner.amie.java_options", ["-Dfile.encoding=UTF-8", "-Xmx6g"])
java_options: []
# any raw key,val option supported by Amie can be set under raw
raw:
# for PyClause support, don't modify "bias"
# and don't modify "oftm"
bias: amie.mining.assistant.pyclause.AnyBurlMiningAssistant
ofmt: anyburl
# some important parameters with their AMIE default values
mins: 100
minc: 0.0
maxad: 3 # number of atoms = 1 head atom + body atoms
minhc: 0.01
minpca: 0.0
# this is special notation, which adds the parameter to the call (as a flag) and omits the value of the param
# by default constants are deactivated in AMIE
# const: "*flag*"
# maxadc: 2 # number of atoms in rules with constants = 1 head atom + body atoms
### An experimental rule miner that efficiently mines rules simultaneously
### it is very efficient for mining all U_c rules of length 1; less efficient for cyclical rules (B-rules)
torm_learner:
# choose betweeen "hybrid" / "torm"
# 'hybrid' calculates rule confidences for cyclical rules (B-Rules) with AnyBURL confidence sampling;
# when selected hybrid then torm options and learner.anyburl options apply; use anyburl.time
# for the learning time of B-rules
# 'torm' calculates rule confidences for B-rules with c_clause.RulesHandler materialization
mode: "torm"
torm:
# if set to false, rules that do not make any wrong prediction are suppressed
tautology: False
# rule mining options
# all options are min requirements
# e.g. confidence: 0.1 means learn only rules with confidence 0.1 or higher
# only b rules of lenght>1 are supported
# all other rule types are of length 1
b:
active: True
confidence: 0.0001
support: 2
length: 3
batchsize: 1000
uc:
active: True
confidence: 0.0001
support: 2
ud:
active: True
confidence: 0.0001
support: 2
z:
active: True
confidence: 0.0001
support: 2
xx_uc:
active: True
confidence: 0.0001
support: 2
xx_ud:
active: True
confidence: 0.0001
support: 2
io:
rule_format: "PyClause"