-
Notifications
You must be signed in to change notification settings - Fork 0
/
config.yaml
53 lines (47 loc) · 1.6 KB
/
config.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
extrinsic:
source:
dir:
- dataset/subset/subset_1/sou_1
- dataset/subset/subset_2/sou_2
- dataset/subset/subset_3/sou_3
pth: dataset/source_sent_all_three_subset.csv
suspicious:
dir:
- dataset/subset/subset_1/sus_1
pth: dataset/suspicious_sent.csv
index: dataset/output/se_index_subset_1_2_3.index
save: dataset/output/set1/SE/se_output_subset_1_with_all_three_source.csv
intrinsic:
suspicious:
dir:
- dataset/pan-plagiarism-corpus-2009.part3/pan-plagiarism-corpus-2009/intrinsic-analysis-corpus/suspicious-documents
- dataset/pan-plagiarism-corpus-2009.part2/pan-plagiarism-corpus-2009/intrinsic-analysis-corpus/suspicious-documents
- dataset/pan-plagiarism-corpus-2009/intrinsic-analysis-corpus/suspicious-documents
pth: dataset/suspicious_sent_intrinsic_1.csv
save: dataset/output/intrinsic_output.csv
features:
- automated_readability_index
- average_sentence_length_chars
- average_sentence_length_words
- average_syllables_per_word
- average_word_frequency_class
- average_word_length
- coleman_liau_index
- flesch_reading_ease
- functionword_frequency
- linsear_write_formula
- most_common_words_without_stopwords
- number_frequency
- punctuation_frequency
- sentence_length_distribution
- special_character_frequency
- stopword_ratio
- top_3_gram_frequency
- top_bigram_frequency
- top_word_bigram_frequency
- uppercase_frequency
- word_length_distribution
- yule_k_metric
evaluation:
results: dataset/results.csv
ground_truth: dataset/ground_truth.csv