-
Notifications
You must be signed in to change notification settings - Fork 1
/
MultiQA_workflow_runner.sh
63 lines (55 loc) · 1.71 KB
/
MultiQA_workflow_runner.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
#!/bin/bash
#SBATCH --verbose
#SBATCH --job-name=MultiQA_testRun
#SBATCH --time=30:00:00
#SBATCH --nodes=1
#SBATCH --mem=64GB
#SBATCH --ntasks-per-node=1
#SBATCH --cpus-per-task=8
#SBATCH --gres=gpu:2
#SBATCH --mail-type=END,FAIL
#SBATCH --mail-user=ry708@nyu.edu
#SBATCH --output=/scratch/ry708/log/multiQA_%A_%a.out
#SBATCH --error=/scratch/ry708/log/multiQA_%A_%a.err
################## Configs ##################
dataset="QUASAR-T"
data_dir="data/custom_datasets"
length="long"
num_paragraphs=$SLURM_ARRAY_TASK_ID
model_dir="saved_model/${dataset}"
today=$(date +%Y%m%d)
model_name="${num_paragraphs}_${dataset}_${today}"
declare -a data_types=("train" "dev")
cd /scratch/ry708/CSCI/dsga_1012/project/MultiQA
[[ ! -d ${model_dir} ]] && mkdir -p ${model_dir}
# Generate dataset
for dt in "${data_types[@]}"
do
context_file="${data_dir}/${dataset}_${dt}_${length}_contexts.json"
question_file="${data_dir}/${dataset}_${dt}_questions.json"
java -jar scripts/preprocessing/quasar_data_prep.jar \
${context_file} \
${question_file} \
${SLURM_JOBTMP} \
${num_paragraphs}
prefix="${num_paragraphs}_${dataset}_${length}_{dt}"
python scripts/reader/preprocess.py \
${SLURM_JOBTMP} \
${SLURM_JOBTMP} \
--split p${prefix}
done
# Data generated from previous steps
train_data="${num_paragraphs}_${dataset}_${length}_train-processed-corenlp.txt"
dev_data="${num_paragraphs}_${dataset}_${length}_dev-processed-corenlp.txt"
# Train on HPC
python scripts/reader/train.py \
--embedding-file glove.840B.300d.txt \
--data-dir ${SLURM_JOBTMP} \
--model-dir ${model_dir} \
--model-name ${model_name} \
--train-file ${train_data} \
--dev-file ${dev_data} \
--tune-partial 1000 \
--train-file ${train_data} \
--dev-file ${dev_data} \
--num-epochs 1