-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathrun-analytics-next-month
executable file
·80 lines (64 loc) · 3.25 KB
/
run-analytics-next-month
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
#!/bin/bash -e
#
# Run analytics on the first day of next month.
# (Even if it fails, at least the snapshot should be ready.)
#
function logdate() {
echo -n $(date +%A\ %H:%M\ %Z)
}
# Used in hive/normalize/create_tmp_interp_tables.sh
echo "Checking for CLB on http://ws2.gbif.org:7000/"
curl -Ss --fail 'http://ws2.gbif.org:7000/species/match?name=Abies'
# Wait until the first of the month (UTC).
while [[ $(date -u +%-d) -ne 1 ]]; do
echo $(logdate) "Date is "$(date -u +%Y-%m-%d)", too soon for analytics."
sleep 15m
done
# Check the HDFS build has completed
startTime=$(date -u +%s -d 'today 00:00:00')
while [[ $(hdfs dfs -stat %Y /user/hive/warehouse/prod_h.db/occurrence/000000_0) -lt ${startTime}000 ]]; do
echo $(logdate) "occurrence (hdfs) is still old, from" $(hdfs dfs -stat %y /user/hive/warehouse/prod_h.db/occurrence/000000_0) \
"(waiting until" $(date -u +%Y-%m-%d\ %H:%M\ %Z -d 'today 00:00:00') "or later)"
sleep 5m
done
echo $(logdate) "occurrence (hdfs) is fresh, from" $(hdfs dfs -stat %y /user/hive/warehouse/prod_h.db/occurrence/000000_0)
sleep 15m
echo
date -u -R
(
echo $(logdate) "Suspending map tiles build"
WID=$(oozie jobs -oozie http://c5oozie.gbif.org:11000/oozie/ -jobtype coordinator -filter name=MapBuild-Tiles | awk 'NR==3 {print $1}')
if [ -n "$WID" ]; then
oozie job -oozie http://c5oozie.gbif.org:11000/oozie/ -suspend $WID
fi
) || echo $(logdate) "Failed to suspend map tiles build" | mail -s "Failed to suspend map tiles build." mblissett@gbif.org
(
echo $(logdate) "Suspending clustering"
WID=$(oozie jobs -oozie http://c5oozie.gbif.org:11000/oozie/ -jobtype coordinator -filter name=Clustering | awk 'NR==3 {print $1}')
if [ -n "$WID" ]; then
oozie job -oozie http://c5oozie.gbif.org:11000/oozie/ -suspend $WID
fi
) || echo $(logdate) "Failed to suspend clustering coordinator" | mail -s "Failed to suspend clustering coordinator." mblissett@gbif.org
(
echo $(logdate) "Suspending GRSciColl cache"
WID=$(oozie jobs -oozie http://c5oozie.gbif.org:11000/oozie/ -jobtype coordinator -filter name=Grscicoll-cache | awk 'NR==3 {print $1}')
if [ -n "$WID" ]; then
oozie job -oozie http://c5oozie.gbif.org:11000/oozie/ -suspend $WID
fi
) || echo $(logdate) "Failed to suspend GRSciColl cache" | mail -s "Failed to suspend GRSciColl cache." mblissett@gbif.org
echo $(logdate) "Starting analytics!"
cd /home/hdfs/analytics/hive/import/hdfs
echo $(logdate) "Creating snapshot"
./create_new_snapshot.sh snapshot $(date -u +%Y%m%d) prod_h occurrence
echo $(logdate) "Snapshot completed"
sleep 1m
echo $(logdate) "Snapshot $(date +%Y%m%d) (probably) is ready for backup." | mail -s "Quarterly snapshot ready for backup" mblissett@gbif.org
cd /home/hdfs/analytics
echo $(logdate) "Running Registry analytics"
./build.sh -registryStatistics
echo $(logdate) "Waiting 6 hours to allow monthly downloads some time."
sleep 6h
echo $(logdate) "Running Occurrence analytics"
./build.sh -interpretSnapshots -summarizeSnapshots -downloadCsvs -processCsvs -makeFigures -queryDownloads || echo $(logdate) "Analytics exited with a failure code: $?"
echo $(logdate) "Analytics completed"
echo $(logdate) "Analytics script exited." | mail -s "Analytics script exited -- map tiles needs resuming." mblissett@gbif.org