forked from ufal/hamledt
-
Notifications
You must be signed in to change notification settings - Fork 0
/
release_hamledt_2.0.sh
executable file
·49 lines (49 loc) · 1.81 KB
/
release_hamledt_2.0.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
#!/bin/bash
SHARE=/net/projects/tectomt_shared
WORK=$SHARE/data/resources/hamledt
ARCH=$SHARE/data/archive/hamledt/2.0_2014-05-24_treex-r12700
DIST=$SHARE/hamledt/2.0
FREELANGS="ar cs da et fa fi grc la nl pt ro sv ta"
PATCHLANGS="bg bn ca de el en es eu hi hu it ja ru sk sl te tr"
LANGS="$FREELANGS $PATCHLANGS"
for i in $LANGS ; do
echo $i
mkdir -p $ARCH/$i/treex/001_pdtstyle/train
mkdir -p $ARCH/$i/treex/001_pdtstyle/test
mkdir -p $ARCH/$i/conll/train
mkdir -p $ARCH/$i/conll/test
mkdir -p $ARCH/$i/stanford/train
mkdir -p $ARCH/$i/stanford/test
cp $WORK/$i/treex/001_pdtstyle/train/*.treex.gz $ARCH/$i/treex/001_pdtstyle/train
cp $WORK/$i/treex/001_pdtstyle/test/*.treex.gz $ARCH/$i/treex/001_pdtstyle/test
cp $WORK/$i/conll/train/*.conll.gz $ARCH/$i/conll/train
cp $WORK/$i/conll/test/*.conll.gz $ARCH/$i/conll/test
cp $WORK/$i/stanford/train/*.treex.gz $ARCH/$i/stanford/train
cp $WORK/$i/stanford/test/*.treex.gz $ARCH/$i/stanford/test
cp $WORK/$i/stanford/train/*.conll $ARCH/$i/stanford/train
cp $WORK/$i/stanford/test/*.conll $ARCH/$i/stanford/test
cp $WORK/$i/stanford/train/*.stanford $ARCH/$i/stanford/train
cp $WORK/$i/stanford/test/*.stanford $ARCH/$i/stanford/test
gzip $ARCH/$i/stanford/{train,test}/*.{conll,stanford}
done
mkdir -p $DIST
for i in $FREELANGS ; do
echo free $i
cp -r $ARCH/$i $DIST/$i
done
for i in $PATCHLANGS ; do
echo patch $i
for style in conll stanford ; do
for dataset in train test ; do
cd $ARCH/$i/$style/$dataset
mkdir -p $DIST/$i/$style/$dataset
for file in *.conll.gz ; do
gunzip -c $file | /net/work/people/zeman/tectomt/treex/devel/hamledt/create_conll_patch.pl | gzip -c > $DIST/$i/$style/$dataset/$file
done
done
done
done
echo Packing $SHARE/hamledt/hamledt-2.0-free.tar
cd $DIST
cd ..
tar cf hamledt-2.0-free.tar 2.0