clause/config-default.yaml

# ************************************************************************
# *********** DEFAULT CONFIGURATION: DO NOT CHANGE THIS FILE *************
# ************************************************************************
 

## c_clause handler options #############################################

# Example:
# from c_clause import Loader, QAHandler
# from clause.config.options import Options
# opts = Options()
# opts.set("loader.load_zero_rules", False)
# opts.set("loader.b_min_support", 10)
# loader = Loader(opts.get("loader"))
# qa = QAHandler(opts.get("qa_handler"))


###  Loader loads rulesets and datasets
###  the options can be used to filter/ignore some rules
###  and to modifiy some of the loaded rule behavior

# 'r_num_unseen' option:
# laplace smoothing applied to rule type 'r' for calculating confidence
# conf = support / (num_unseen+num_body_groundings)

# 'r_min_support' option:
#  dont load rules of type 'r' if they have a lower support as 'r_min_support'
# 'r_min_preds' option: as above for the overall number of triple predicitions
# 'r_min_confidence': as above; note: confidence here is without laplace smooting
# 'r_max_length' option: as above; length refers to the number of body atoms
loader:
  # only has effect when rules are loaded from disk
  # if set >1 then rules will be first loaded and then parsed by multiple threads;
  # if set to -1 then min{5, all_available_threads} is used; set to 1 to turn off
  num_threads: -1
  # set to False to display less output information
  verbose: True
  ### Rule options
  ## B rules
  # parsable example: "h(X,Y) <= b1(A,X), b2(A,Y)"
  load_b_rules: True
  b_num_unseen: 5
  b_min_support: -1
  b_min_preds: -1
  b_min_conf: 0.0001
  b_max_length: -1
  # this value is per default set in AnyBURL for rule application of B-rules
  # during rule grounding, whenever a new branch of the DFS leads to more
  # than max_branching_factor new nodes (entities), the branch is not visited
  # note that this will only have effect for large KGs such as Wd5m
  b_max_branching_factor: 1000 #-1 for off
 
  ## U_c rules
  # parsable examples (c and d being entities in the graph):
  # "h(X,c) <= b1(X,d)"
  # "h(c,Y) <= b1(Y,A), b2(d,A)" 
  load_u_c_rules: True
  c_num_unseen: 5
  c_min_support: -1
  c_min_preds: -1
  c_min_conf: 0.0001
  c_max_length: -1

  ##  U_d rules
  # parsable examples (c being an entity in the graph):
  # "h(X,c) <= b1(A,X), b2(A,B)" 
  # "h(X,c) <= b1(X,A)"
  load_u_d_rules: True
  # weight that is multiplied with the confidence
  # we use AnyBURL default here
  d_weight: 0.1
  d_max_branching_factor: -1 #-1 for off
  d_num_unseen: 5
  d_min_support: -1
  d_min_preds: -1
  d_min_conf: 0.0001
  d_max_length: -1

  ## Z rules
  # parsable example (c being an entity in the graph): "h(X,c) <="
  # can only be used for qa or ranking not for PredictionHandler
  # has always length 1
  load_zero_rules: True
  # weight that is multiplied with the confidence
  # we use AnyBURL default here
  z_weight: 0.01
  z_num_unseen: 5
  z_min_support: -1
  z_min_preds: -1
  z_min_conf: 0.0001

  ## U_xxc rules
  # parsable example: "h(X,X) <= b1(X,c)" (c being an entity in the graph)
  # has always length 1
  load_u_xxc_rules: True
  xxc_num_unseen: 5
  xxc_min_support: -1
  xxc_min_preds: -1
  xxc_min_conf: 0.0001

  ## U_xxd rules
  # parsable example: "h(X,X) <= b1(X,A)"
  # has always length 1
  load_u_xxd_rules: True
  xxd_num_unseen: 5
  xxd_min_support: -1
  xxd_min_preds: -1
  xxd_min_conf: 0.0001


# calculates rankings based on the "target" KG loaded into DataLoader
# e.g. target argument from loader.load_data(data=.., filter=.., target=..)
# candidate rankings are calculated for every query (s, p, ?) and (?, p, o)
# of every triple (s, p, o) from "target" 
ranking_handler:
  # whether to cache the rules that predicted the query candidates,
  # can be retrieved with handler.get_rules(bool, string: "head" or "tail")
  # turn off for efficiency.
  collect_rules: False
  # number of candidates to calculate for every query
  # the actual number of returned candidates will often deviate
  # it might be less because no more candidates exist
  # it might be more because we do not cut predictions of rules
  # e.g., when there are 90 candidates already calculated and the next
  # rule predicts 30 new candidates, we allow a maximum of 120 candidates
  topk: 100
  # select from "maxplus" / "noisyor"
  # maxplus scores of a ranking will be of the highest predicting rule
  # candidate discrimination is based on comparing the sequences of predicting
  # rules confidences lexicographically; note that the outputed scores
  # are only the ones of the rule with the highest confidence 
  # noisyor scores and ranking is based on sorting the noisy-or product
  # the noisy-or sorting is based on -\sum_i(log(1-conf_i)) and transformed
  # before outputted; this mitigates floating point considerations 
  aggregation_function: "maxplus"

  # dont add candidate proposals c for queries (h, r, ?) or (?, r, t) if
  # (h, r, c) or (c, r, t) exists in the KG given by the "data" argument of the loader
  filter_w_data: True
  # same as above but this only has effect for ranker.write_ranking()
  # all other results remain query based, e.g. (h, r, ?) with candidates [c_1, c_2, c_3]
  # independent of what true answers are on target for the the query
  filter_w_target: True
  # choose between "random" / "frequency"
  # frequency: if two candidates can not be discriminated because they are predicted
  # by exactly the same rules; discriminate them according to their number of appearances
  # in the 'data' argument KG (e.g., the train data)
  # random: random tie handling
  tie_handling: "frequency"
  # -1 for using ALL available threads
  num_threads: -1 
  # if True, checks how many true answers (num_true) in "target" exists for a query
  # and sets new_top_k = topk + num_true
  # this is helpful when calculating rankings on large target files to not loose
  # true answers by topk; say you have topk=100 but you have 150 true anwers
  # the rule application module does not filter with "target" during the application
  # therefore, you would never be able to calculate 50 of the true answers
  # of course, during the application it is not checked if an answer is true
  # e.g. you might end up with 250 false answers
  # same effect is achieved by setting topk globally to a higher value (but slower)
  adapt_topk: True

  # set to False to display less output information
  verbose: True

  ### stopping criteria for rule application

  # stop rule application for a query if topk candidates are calculated AND at least disc_at_least
  # of the best candidates are fully discriminated, i.e, they are pairwise distinct in regard
  # to their predicting rules
  # recommended values: 10 or 20 under topk=100 and maxplus
  # the peformance improvement decreases when using more threads
  disc_at_least: 10 # -1 for off, must not be bigger than topk

  # stop rule application for a query as soon as hard_stop_at candidates
  # are found (ignoring topk); set the value to topk under maxplus to achieve max-aggregation
  # scores for all candidates very fast without getting a properly discriminated ranking
  # recommended value: -1
  hard_stop_at: -1 #-1 for off

  # stops adding predicting rules to a candidate of a query if already num_top_rules
  # predicted the candidate; if all candidates are predicted by num_top_rules, rule
  # application is stopped; can be used in conjunction with "noisyor" to achieve
  # noisy-or top-h (https://arxiv.org/pdf/2309.00306.pdf)
  # recommended values: -1 under "maxplus"; 5 under "noisyor"
  num_top_rules: -1


# calculates answer candidates and scores based on 
# questions (h, r, ?) and (?, r, t)
qa_handler:
  # same as ranking_handler; can be retrieved with handler.get_rules(bool)
  # see documentation for data strucuture
  collect_rules: False
  # number of candidates to calculate for a query
  # see ranking_handler for detailed description, it behaves identical
  topk: 100
  # select from "maxplus" / "noisyor"; see ranking handler
  aggregation_function: "maxplus"

  # dont add candidate proposals c for queries (h, r, ?) or (?, r, t) if
  # (h, r, c) or (c, r, t) exists in the KG given by the "data" argument of the loader
  filter_w_data: True
  # choose between "random" / "frequency", see ranking_handler
  tie_handling: "frequency"
  # -1 for using ALL available threads
  num_threads: -1
  # set to False to display less output information
  verbose: True 

  ### stopping criteria for rule application
  # see ranking_handler for detailed description
  disc_at_least: 10 # -1 for off, must not be bigger than topk
  hard_stop_at: -1 #-1 for off
  num_top_rules: -1

# given input rules, calculates materialization (predictions)
# and stats (num_pred, num_true_preds)
rules_handler:
  # whether to store triple predictions 
  # and stats (num_pred, num_true)
  # to obtained with handler.get_predictions(bool), handler.get_statistics()
  collect_predictions: True
  collect_statistics: True
  num_threads: -1 #-1 for ALL available threads
  # set to False to display less output information
  verbose: True

# given input triples, calculates triple scores and can output all
# predicting rules + their groundings (explanations)
prediction_handler:
  collect_explanations: False
  # select from "maxplus" / "noisyor"
  # note that as we are not calculating candidate rankings; selecting maxplus simply
  # results in "max-aggregation" scores
  aggregation_function: "maxplus"
  # for a given triple stop rule application if it was predicted by the
  # num_top_rules with the highest confidences
  # set to -1 to not apply any stopping criterion, e.g., to apply all rules
  # if you want to only collect the best explanation for each triple, set to 1
  # note that the noisyor score is influenced by this parameter
  # and results in noisyor-top-h scores (https://arxiv.org/pdf/2309.00306.pdf)
  num_top_rules: 5 
  
  num_threads: -1 #-1 for ALL available threads
  # set to False to display less output information
  verbose: True


## clause options ############################################################

### Learns rules with AnyBURL or Amie
learner:
  # chose betweeen: "anyburl" / "amie"
  mode: "amie"  
  anyburl:
    # add any java VM parameters as list elements
    # e.g. in config: java_options: ["-Dfile.encoding=UTF-8", "-Xmx6g"]
    # or opts.set("learner.anyburl.java_options", ["-Dfile.encoding=UTF-8", "-Xmx6g"])
    java_options: []
    # learning time in seconds for AnyBURL
    time: 60
    # any raw key,val option supported by AnyBURL can be set under raw
    raw:
      # max length of B-rules
      MAX_LENGTH_CYCLIC: 3
      # don't learn rules with support < 2
      THRESHOLD_CORRECT_PREDICTIONS: 2
      # dont learn rules with confidence smaller 0.0001
      THRESHOLD_CONFIDENCE: 0.0001
      # num threads
      WORKER_THREADS: 3
      # for learning rules with only particular relations in the head
      # use, e.g., SINGLE_RELATIONS: rel1,rel2
  amie:
    # add any java VM parameters as list elements
    # e.g. in config, java_options: ["-Dfile.encoding=UTF-8", "-Xmx6g"]
    # or opts.set("learner.amie.java_options", ["-Dfile.encoding=UTF-8", "-Xmx6g"])
    java_options: []
    # any raw key,val option supported by Amie can be set under raw
    raw:
      # for PyClause support, don't modify "bias"
      # and don't modify "oftm"
      bias: amie.mining.assistant.pyclause.AnyBurlMiningAssistant
      ofmt: anyburl
      # some important parameters with their AMIE default values
      mins: 100
      minc: 0.0
      maxad: 3 # number of atoms = 1 head atom + body atoms
      minhc: 0.01
      minpca: 0.0
      # this is special notation, which adds the parameter to the call (as a flag) and omits the value of the param
      # by default constants are deactivated in AMIE
      # const: "*flag*" 
      # maxadc: 2 # number of atoms in rules with constants = 1 head atom + body atoms
      

### An experimental rule miner that efficiently mines rules simultaneously
### it is very efficient for mining all U_c rules of length 1; less efficient for cyclical rules (B-rules)
torm_learner:
  #  choose betweeen "hybrid" / "torm"
  # 'hybrid' calculates rule confidences for cyclical rules (B-Rules) with AnyBURL confidence sampling;
  #  when selected hybrid then torm options and learner.anyburl options apply; use anyburl.time 
  #  for the learning time of B-rules
  # 'torm' calculates rule confidences for B-rules with c_clause.RulesHandler materialization
  mode: "torm"  
  torm:
    # if set to false, rules that do not make any wrong prediction are suppressed
    tautology: False 
    # rule mining options
    # all options are min requirements
    # e.g. confidence: 0.1 means learn only rules with confidence 0.1 or higher 
    # only b rules of lenght>1 are supported 
    # all other rule types are of length 1
    b:
      active: True
      confidence: 0.0001
      support: 2
      length: 3
      batchsize: 1000
    uc:
      active: True
      confidence: 0.0001
      support: 2
    ud:
      active: True
      confidence: 0.0001
      support: 2
    z:
      active: True
      confidence: 0.0001
      support: 2
    xx_uc:
      active: True
      confidence: 0.0001
      support: 2
    xx_ud:
      active: True
      confidence: 0.0001
      support: 2

io:
  rule_format: "PyClause"