-
Notifications
You must be signed in to change notification settings - Fork 0
/
wc-all.sh
executable file
·42 lines (33 loc) · 1.03 KB
/
wc-all.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
#!/usr/bin/env bash
#
# compare the number of lines in the ID lists and of the record files.
#
SECONDS=0
# VERSION=v2019-08
VERSION=$1
source set-variables.sh
for i in {0..15}
do
j=$(printf "%02d" $i)
time=$(date +%T)
echo "$time> $j"
echo "count lines of ID"
wc -l $DATA_EXPORT_DIR/$VERSION/ids/part-0${j}* > wc-csv-${j}.txt
echo "count lines of records"
wc -l $DATA_EXPORT_DIR/$VERSION/full/part-0${j}* > wc-json-${j}.txt
echo "now wait a bit"
sleep 5s
done
# aggregate and clean ID lists
cat wc-csv-*.txt | sed "s,$DATA_EXPORT_DIR/$VERSION/ids/,," | grep -v total | awk '{print $2,$1}' > wc-csv.txt
# aggregate and clean record list
cat wc-json-*.txt | sed "s,$DATA_EXPORT_DIR/$VERSION/full/,," | sed 's,.json,,' | grep -v total | awk '{print $2,$1}' > wc-json.txt
# compare
diff wc-csv.txt wc-json.txt > wc-diff.txt
duration=$SECONDS
hours=$(($duration / (60*60)))
mins=$(($duration % (60*60) / 60))
secs=$(($duration % 60))
date +"%T"
echo "$time> check IDs DONE"
printf "%02d:%02d:%02d elapsed.\n" $hours $mins $secs