-
Notifications
You must be signed in to change notification settings - Fork 21
/
preprocess_1.0.sh
executable file
·43 lines (36 loc) · 1.57 KB
/
preprocess_1.0.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
#!/usr/bin/env bash
set -e
############### AMR v1.0 ################
# Directory where intermediate utils will be saved to speed up processing.
util_dir=data/AMR/amr_1.0_utils
#
# # AMR data with **features**
data_dir=data/AMR/amr_1.0
train_data=${data_dir}/train.txt.features
dev_data=${data_dir}/dev.txt.features
test_data=${data_dir}/test.txt.features
# ========== Set the above variables correctly ==========
printf "Cleaning inputs...`date`\n"
python -u -m stog.data.dataset_readers.amr_parsing.preprocess.input_cleaner \
--amr_files ${train_data} ${dev_data} ${test_data}
printf "Done.`date`\n\n"
printf "Recategorizing subgraphs...`date`\n"
python -u -m stog.data.dataset_readers.amr_parsing.preprocess.recategorizer \
--dump_dir ${util_dir} \
--amr_files ${train_data}.input_clean ${dev_data}.input_clean
python -u -m stog.data.dataset_readers.amr_parsing.preprocess.text_anonymizor \
--amr_file ${test_data}.input_clean \
--util_dir ${util_dir}
printf "Done.`date`\n\n"
printf "Removing senses...`date`\n"
python -u -m stog.data.dataset_readers.amr_parsing.preprocess.sense_remover \
--util_dir ${util_dir} \
--amr_files ${train_data}.input_clean.recategorize \
${dev_data}.input_clean.recategorize \
${test_data}.input_clean.recategorize
printf "Done.`date`\n\n"
printf "Renaming preprocessed files...`date`\n"
mv ${test_data}.input_clean.recategorize.nosense ${test_data}.preproc
mv ${train_data}.input_clean.recategorize.nosense ${train_data}.preproc
mv ${dev_data}.input_clean.recategorize.nosense ${dev_data}.preproc
rm ${data_dir}/*.input_clean*