Merge pull request #50 from NCAR/develop

Update master for 0.4 tag
NCAR · Mar 13, 2017 · b82532f · b82532f
2 parents d96026d + 1fcb993
commit b82532f
Show file tree

Hide file tree

Showing 38 changed files with 2,800 additions and 1,272 deletions.
diff --git a/.gitignore b/.gitignore
@@ -35,4 +35,5 @@ test_regression
 test_calendar
 test_qm
 test_config
-downscale
+test_random
+gard
diff --git a/.travis.yml b/.travis.yml
@@ -1,19 +1,27 @@
-sudo: false
+sudo: false # use container-based build
 language: fortran
-compiler:
- - gfortran
-os:
- - linux
+notifications:
+ email: false
+
+compiler: gfortran-6
+os: linux
+env: TESTID='gard_linux'
 addons:
  apt:
- source:
+ sources:
  - ubuntu-toolchain-r-test
  packages:
- - gfortran
- - libnetcdf-dev
  - liblapack-dev
+ - libnetcdf-dev
+ - gfortran-6
+before_install:
+ - source ci/gard_install_utils
+ - gard_before_install
+install:
+ - gard_install
 script:
- - sed -i "s|NCDF_PATH = /usr/local|NCDF_PATH = /usr|" src/makefile
- - sed -i "s|LAPACK_PATH = /usr/local|LAPACK_PATH = /usr|" src/makefile
- - make -C src -j4 test
- - make -C src -j4
+ - gard_script
+after_success:
+ - gard_after_success
+after_failure:
+ - gard_after_failure
diff --git a/ci/gard_install_utils b/ci/gard_install_utils
@@ -0,0 +1,115 @@
+#!/usr/bin/env bash
+
+set -e
+set -x
+
+export CC=/usr/bin/gcc-6
+export FC=/usr/bin/gfortran-6
+
+if [ -z "$WORKDIR" ]; then
+ export WORKDIR=$HOME/workdir
+ mkdir -p $WORKDIR
+fi
+
+if [ -z "$INSTALLDIR" ]; then
+ export INSTALLDIR=$HOME/installdir
+ mkdir -p $INSTALLDIR
+fi
+
+function install_szip {
+ echo install_szip
+ cd $WORKDIR
+ wget --no-check-certificate -q http://www.hdfgroup.org/ftp/lib-external/szip/2.1/src/szip-2.1.tar.gz
+ tar -xzf szip-2.1.tar.gz
+ cd szip-2.1
+ ./configure --prefix=$INSTALLDIR &> config.log
+ make &> make.log
+ make install
+ export CPPFLAGS="$CPPFLAGS -I${INSTALLDIR}/include"
+ export LDFLAGS="$LDFLAGS -L${INSTALLDIR}/lib"
+}
+
+function install_hdf5 {
+ echo install_hdf5
+ cd $WORKDIR
+ wget --no-check-certificate -q http://www.hdfgroup.org/ftp/HDF5/current/src/hdf5-1.10.0-patch1.tar.gz
+ tar -xzf hdf5-1.10.0-patch1.tar.gz
+ cd hdf5-1.10.0-patch1
+ ./configure --prefix=$INSTALLDIR &> config.log
+ make &> make.log
+ make install
+ export LIBDIR=${INSTALLDIR}/lib
+}
+
+function install_netcdf_c {
+ echo install_netcdf_c
+ cd $WORKDIR
+ wget --no-check-certificate -q ftp://ftp.unidata.ucar.edu/pub/netcdf/netcdf-4.4.1.tar.gz
+ tar -xzf netcdf-4.4.1.tar.gz
+ cd netcdf-4.4.1
+ ./configure --prefix=$INSTALLDIR &> config.log
+ make &> make.log
+ make install
+ export LD_LIBRARY_PATH=${INSTALLDIR}/lib
+}
+
+function install_netcdf_fortran {
+ echo install_netcdf_fortran
+ cd $WORKDIR
+ wget --no-check-certificate -q ftp://ftp.unidata.ucar.edu/pub/netcdf/netcdf-fortran-4.4.4.tar.gz
+ tar -xzf netcdf-fortran-4.4.4.tar.gz
+ cd netcdf-fortran-4.4.4
+ ./configure --prefix=$INSTALLDIR &> config.log
+ make &> make.log
+ make install
+}
+
+function gard_before_install {
+ echo gard_before_install
+ # Install szip (used by hdf5)
+ install_szip
+ # Install HDF5
+ install_hdf5
+ # Install NetCDF-C
+ install_netcdf_c
+ # Install NetCDF fortran
+ install_netcdf_fortran
+}
+
+function gard_install {
+ echo gard_install
+ cd ${TRAVIS_BUILD_DIR}
+ sed -i "s|NCDF_PATH = /usr/local|NCDF_PATH = ${INSTALLDIR}|" src/makefile
+ sed -i "s|LAPACK_PATH = /usr/local|LAPACK_PATH = /usr|" src/makefile
+ make -C src clean; make -C src -j4 test
+ make -C src clean; make -C src -j4 MODE=debugslow
+ make -C src clean; make -C src -j4 MODE=debug
+ make -C src clean; make -C src -j4 MODE=debugompslow
+ make -C src clean; make -C src -j4 MODE=debugomp
+ make -C src clean; make -C src -j4 MODE=profile
+ make -C src clean; make -C src -j4 MODE=fast
+ make -C src clean; make -C src -j4
+ echo "GARD install succeeded"
+}
+
+function gard_script {
+ cd ./src
+ ./gard --version
+ ./gard -h
+ ./test_calendar
+ # ./test_random
+ ./test_regression
+ # ./test_config ../run/downscale_options.txt
+ cd ../
+ echo "GARD script succeeded"
+}
+
+function gard_after_success {
+ echo gard_after_success
+ echo "GARD build succeeded"
+}
+
+function gard_after_failure {
+ echo gard_after_failure
+ echo "GARD build failed"
+}
diff --git a/docs/config.md b/docs/config.md
diff --git a/docs/running.md b/docs/running.md
@@ -13,6 +13,8 @@ GARD has the following dependencies:
 1. LAPACK — Linear Algebra PACKage.
 1. netCDF4 - Network Common Data Form.
 
+*Note: GARD allocates memory to the stack. Users should set the "The maximum stack size." to "unlimited" prior to building/running GARD. `ulimit -s unlimited`*
+
 ## Building GARD
 
 GARD is built using a standard `makefile`. From the command line, simply run the following command:
@@ -43,3 +45,10 @@ After building GARD, it is run on the command line following this syntax:
 Use the following to generate a list of e.g. GEFS precipitation files for input.
 
  ls -1 gefs/2010/*/apcp_sfc_*_mean.nc | sed 's/*//g;s/$/"/g;s/^/"/g'>gefs_pr_file.txt
+
+## Common Errors
+
+1. Segmentation Fault
+ - GARD allocates memory to the stack. Users should set the "The maximum stack size" to "unlimited" prior to building/running GARD. `ulimit -s unlimited`
+2. Random errors (e.g. debug not staying set at False)
+ - Make sure all filenames in the namelist are in quotations. 
diff --git a/run/downscale_options.txt b/run/downscale_options.txt
@@ -4,7 +4,7 @@
  ! observation_file = "downscale_options.nml"
  ! prediction_file = "downscale_options.nml"
 
- output_file = "log_reg_output/"
+ output_file = "output/gard_out_" ! prefix for output files
 
  start_date = "2000-01-01 00:00:00" ! start time for the output data (only needs to exist in the prediction dataset)
  end_date = "2000-01-31 23:00:00" ! end time for the output data
@@ -15,23 +15,35 @@
  start_transform = "1990-01-01 00:00:00" ! start time for any transformations (e.g. quantile mapping) (must exist in both the training and the prediction datasets)
  end_transform = "1999-01-01 23:00:00" ! end time for the transformation period
 
+ start_post = "1990-01-01 00:00:00" ! start time for any post processing transformations (e.g. quantile mapping) (must exist in the prediction dataset)
+ end_post = "1999-01-01 23:00:00" ! end time for the post-proc transformation period
+
+
  ! model types
- pure_regression = False ! set to true (and others to false) to compute a single regression (no analogs) between training variables and observations to be applied to all predictor data
- pure_analog = False ! set to true (and others to false) to use the selected analogs (no regression) to predict the output
- analog_regression = True ! set to true (and others to false) to perform a regression only on the selected analog days when predicting the output
+ pure_regression = False ! set to true (and others to false) to compute a single regression (no analogs) between training variables and observations to be applied to all predictor data
+ pure_analog = False ! set to true (and others to false) to use the selected analogs (no regression) to predict the output
+ analog_regression = True ! set to true (and others to false) to perform a regression only on the selected analog days when predicting the output
+
+ ! Non-model type
+ pass_through = False ! set to true to simply pass a predictor variable through instead of computing regressions or analogs
+ pass_through_var = 1 ! Option to select which predictor variable is passed through
 
  ! analog selection parameters
- n_analogs = 200 ! set the number of analogs to find for each time step
- ! n_log_analogs = 20 ! set to the number of analogs to use for the logistic_from_analog_exceedance calculation if using something other than n_analogs
- ! analog_threshold = 0.25 ! set to use a threshold distance when selecting analogs instead of a fixed number (in units of standard deviations averaged across input variables)
+ n_analogs  = 200  ! set the number of analogs to find for each time step
+ ! n_log_analogs  = 20  ! set to the number of analogs to use for the logistic_from_analog_exceedance calculation if using something other than n_analogs
+ ! analog_threshold = 0.25  ! set to use a threshold distance when selecting analogs instead of a fixed number (in units of standard deviations averaged across input variables)
 
  ! model options
  sample_analog = False ! when using pure_analog this will sample randomly from the selection of analogs instead of using the mean
  logistic_from_analog_exceedance = False ! when true the logistic probability is calculated from the number of analogs that exceed the threshold instead of from a logistic regression
  logistic_threshold = 0 ! when set, the probability of exceeding this value will be calculated and the standard output will be trained only for the data that exceed this value
  weight_analogs = True ! when calculating analog_mean, analog_error, or logistic_from_analog, weight each analog by it's inverse square distance from the current input
 
+ ! output post processing transformation
+ post_correction_transform = 0 ! post processing to apply (e.g. quantile mapping to entire input obs period) (one entry for every output variable)
+
  debug = False ! prints more output at runtime and outputs files including the coefficients used in each analog regression (or analog values) as well as the predictor data
+ interactive = False ! Print downscaling status as a percentage on the command line
 /
 
 ! define the input atmospheric model data to be used when training the model
@@ -45,6 +57,13 @@
  lon_name = "XLONG" ! name of a variable in the input data that contains the longitude of each grid cell (can be a 1D or 2D variable)
  time_name = "XTIME" ! name of a variable in the input data that contains the time data (must be a number ideally with a units attribute such as "days since YYYY-MM-DD hh:mm:ss")
  nfiles = 21 ! the number of files to be read
+ selected_level = -1 ! If the input data have 4 dimensions (e.g. one dimension is elevation) this is the index that will be used
+ ! this assumes that the z dimension is the 2nd (time, z, y, x) in the NetCDF file
+ ! if -1, the first level will be used.
+
+ ! Controls normalization applied to training data.
+ ! 0 = don't normalize, 1 = normalize based on the training data mean and standard deviation
+ normalization_method = 1
 
  ! the following are arrays with one element for each input variable
  input_transformations = 0, 0, 3, 3 ! transformations to apply to each input variable (0=none, 1=qm?, 2=log, 3=cuberoot, 4=fifthroot)
@@ -55,6 +74,13 @@
 
  ! calendar to be used when interpreting the time variable, "gregorian", "standard", "noleap", "365-day","360-day" all work
  calendar = "gregorian"
+ calendar_start_year = 1900 ! set this to the year the time data use as time 0, will be read from the NetCDF units attribute if possible
+ timezone_offset = 0 ! offset (in hours) to add to the time data to permit a better match with the local obs. data
+
+ ! the following options are primarily for forecasting
+ ! GEFS data have n time-steps per file, corresponding to the n-forecast lead times
+ selected_time = -1 ! if set, only this time step will be read from each input file
+ time_indices = -1, -1, -1 ! this can be a list of time steps to read and average over instead
 /
 
 ! Define the input atmospheric model data to be used when applying the model for future predictions
@@ -72,13 +98,23 @@
 
  input_transformations = 0, 0, 3, 3
 
- ! this is the main difference between training and prediction data, permits an additional transformation to e.g. quantile map (transformation=1) each variable to match the training data
+ ! Here is the main difference between training and prediction data
+ ! This permits an additional transformation to e.g. quantile map (transformation=1) each variable to match the training data
+ ! this is primarily for climate simulations
  transformations = 0, 0, 0, 0
+ ! Also, if normalization_method = 2, the means and standard deviations from the training data will be used to normalize the predictors
+ ! this is primarily for forecasting applications. 1=Normalize based on prediction data, 0=no normalization applied
+ normalization_method = 1
 
  var_names = "T2", "PSFC", "PREC_ACC_NC", "PREC_ACC_C"
  file_list = "filelists/erai_files_200x.txt", "filelists/erai_files_200x.txt", "filelists/erai_files_200x.txt", "filelists/erai_files_200x.txt"
 
  calendar = "gregorian"
+ calendar_start_year = 1900
+ timezone_offset = 0
+
+ selected_time = -1
+ time_indices = -1, -1, -1
 /
 
 ! Define the input observation data to be used when training the model
@@ -99,4 +135,10 @@
  file_list = "filelists/obs_files_complete.txt"
 
  calendar = "gregorian"
+ calendar_start_year = 1900
+
+ ! specify a variable to use to find which grid cells should be masked
+ mask_variable = 1
+ ! specify a value to use to in that variable to define masked gridcells
+ mask_value = 1e20
 /