-
Notifications
You must be signed in to change notification settings - Fork 0
/
collate_priors_posteriors.sh
executable file
·82 lines (70 loc) · 2.69 KB
/
collate_priors_posteriors.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
#!/bin/bash
# T. Carman, Dec 2020
# NOTE: Not designed to be run as a script! Use by copying relevant
# bits of code to the appropriate interperter...
# Collate a posteriors table or priors table.
# This code works the same for priors if you substitute
# "prior.distns.Rdata" for "post.disnts.MA.Rdata"
#
# The posterior distribution data exists in .Rdata files in
# the run folders like this:
# $ ngee_dhs_runs/dhs_1_cmt04/pft/CMT04-Betula/post.distns.MA.Rdata
# goal here is to pull all the data out and into a single table
# with a column for the PFT name (pft + cmt)
# Copy all the data into the pecan directory which is mounted
# in the pecan docker stack, so that I can use Rstudio
# to muck around with it.
# had to install gnu coreutils (port install coreutils) to get
# gcp command and use explicit path because gcp is aliased to
# "git cherry pick"...
WHICH=$1
case $WHICH in
*post*) BASEFILEPATTERN="post.distns.MA.Rdata" ;;
*prior*) BASEFILEPATTERN="prior.distns.Rdata" ;;
*) echo "Hmmm - unrecognized ARG: $1"; exit -1; ;;
esac
for f in $(find /Users/tobeycarman/Documents/SEL/ngee_dhs_code/ngee_dhs_runs -name "$BASEFILEPATTERN");
do
# --parents flag not provided with macOS cp, but maintains
# directory structure...
/opt/local/bin/gcp --parents $f ~/Documents/SEL/PEcAn/PECAN/tbc-scratch;
done
# Turns out the above was not necessary as I have R installed
# on my host. So the following script will run, but I guess
# I used Rstudio to view the .Rdata files and poke around
# to see how everything is laid out...
# Convert .Rdata to csv so I can work with it in python
for F in $(find tbc-scratch -name "$BASEFILEPATTERN");
do
case $F in
*post*) DATA="post.distns" ;;
*prior*) DATA="prior.distns" ;;
*) echo "Hmmmm"; exit -1 ;;
esac
# write the R script to convert to csv
cat <<EOF > oneoffR.R
setwd("$(dirname $F)")
load("$(basename $F)")
write.table($DATA, "$(basename $F.csv)", sep=",")
EOF
# Call the script
Rscript --vanilla oneoffR.R
done
# Pull the CMT/PFT name out of the file path and
# prepend it to every line in the file
#
# Figure out if we are working on priors or posteriors
case $BASEFILEPATTERN in
*prior*) OUTFILE="tbc-scratch/priors_table.csv"; FINDPATTERN="prior.distns.Rdata.csv" ;;
*post*) OUTFILE="tbc-scratch/posteriors_table.csv"; FINDPATTERN="post.distns.MA.Rdata.csv" ;;
*) echo "Hmmm..."; exit -1 ;;
esac
echo "" > $OUTFILE
for f in $(find tbc-scratch/ -name $FINDPATTERN); do
# f name looks something like this:
# kougorak_cmt04/pft/CMT04-Salix/post.distns.MA.Rdata
# or kougorak_cmt04/pft/CMT04-Salix/prior.distns.Rdata
CMT=$(python -c "import sys; print(sys.argv[1].split('/')[-2])" $f);
echo $CMT
sed s/^/\"$CMT\"\,/g $f >> $OUTFILE
done