Merge pull request #79 from goldingn/dev

v0.2.0 candidate
greta-dev · Jun 26, 2017 · 72bdc23 · 72bdc23
2 parents a8839fa + ad33a5f
commit 72bdc23
Show file tree

Hide file tree

Showing 97 changed files with 2,454 additions and 2,551 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -8,3 +8,9 @@ logos
 .travis.yml
 docs
 man/figures/plot_greta_legend.R
+cran-comments.md
+vignettes/build_vignettes.R
+vignettes/example_models_cache
+vignettes/get_started_cache
+vignettes/technical_details_cache
+vignettes/get_started_files
diff --git a/.gitignore b/.gitignore
@@ -2,11 +2,16 @@
 .Rhistory
 .RData
 .Ruserdata
+
 README_cache
-vignettes/getting_started_cache
-vignettes/how_does_this_work_cache
+vignettes/example_models_cache
+vignettes/get_started_cache
+vignettes/technical_details_cache
+
 vignettes/figures
+
 *.pdf
 docs/*.Rmd
 docs/examples
 docs/*_cache
+cran-comments.md
diff --git a/.travis.yml b/.travis.yml
@@ -14,7 +14,7 @@ r_packages:
  - knitr
  - rmarkdown
  - rsvg
- - MCMCvis
+ - bayesplot
  - extraDistr
 
 r_github_packages:

diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,22 +1,11 @@
 Package: greta
 Type: Package
-Title: Probabilistic Modelling with TensorFlow
-Version: 0.1.9
-Date: 2017-05-28
+Title: Simple and Scalable Statistical Modelling in R
+Version: 0.2.0
+Date: 2017-06-26
 Authors@R: person("Nick", "Golding", role = c("aut", "cre"),
  email = "nick.golding.research@gmail.com")
-Description: Existing tools for fitting bespoke statistical models (such as
- BUGS, JAGS and STAN) are very effective for moderately-sized problems, but
- don't scale so well to large datasets. These tools also require users to learn
- a domain-specific language and fix errors at compile time. greta enables users
- to construct probabilistic models interactively in native R code, then sample
- from those models efficiently using Hamiltonian Monte Carlo. TensorFlow is used
- to perform all of the calculations, so greta is particularly fast where the
- model contains large linear algebra operations. greta can also be run across
- distributed machines or on GPUs, just by installing the relevant version of
- TensorFlow. This package is in the early stages of development. Future releases
- will likely enable fitting models with fast approximate inference schemes,
- different samplers, and more distributions and operations.
+Description: Write statistical models in R and fit them by MCMC on CPUs and GPUs, using Google TensorFlow (see <https://goldingn.github.io/greta> for more information).
 License: Apache License 2.0
 URL: https://github.com/goldingn/greta
 BugReports: https://github.com/goldingn/greta/issues
@@ -27,27 +16,29 @@ LazyData: true
 Depends:
  R (>= 3.0)
 Collate:
- 'greta_package.R'
+ 'package.R'
  'overloaded.R'
  'node_class.R'
  'node_types.R'
  'variable.R'
- 'distributions.R'
+ 'probability_distributions.R'
  'unknowns_class.R'
  'greta_array_class.R'
  'as_data.R'
  'utils.R'
- 'syntax.R'
+ 'distribution.R'
  'operators.R'
  'functions.R'
- 'transformations.R'
+ 'transforms.R'
  'structures.R'
  'extract_replace_combine.R'
  'dynamics_module.R'
  'dag_class.R'
  'greta_model_class.R'
  'progress_bar.R'
+ 'inference.R'
  'samplers.R'
+ 'install_tensorflow.R'
 Imports:
  R6,
  tensorflow,
@@ -58,7 +49,8 @@ Suggests:
  knitr,
  rmarkdown,
  DiagrammeR,
- MCMCvis,
+ bayesplot,
+ lattice,
  testthat,
  mvtnorm,
  MCMCpack,

diff --git a/NAMESPACE b/NAMESPACE
@@ -30,6 +30,7 @@ S3method(as.greta_array,logical)
 S3method(as.greta_array,matrix)
 S3method(as.greta_array,node)
 S3method(as.greta_array,numeric)
+S3method(as.matrix,greta_array)
 S3method(as_data,default)
 S3method(as_data,greta_array)
 S3method(asin,greta_array)
@@ -75,7 +76,6 @@ S3method(tail,greta_array)
 S3method(tan,greta_array)
 export("%*%")
 export("distribution<-")
-export("likelihood<-")
 export(as_data)
 export(bernoulli)
 export(beta)
@@ -91,16 +91,17 @@ export(distribution)
 export(dynamics)
 export(exponential)
 export(f)
-export(free)
 export(gamma)
 export(greta_array)
 export(hypergeometric)
 export(icauchit)
 export(icloglog)
 export(ilogit)
+export(install_tensorflow)
 export(inverse_gamma)
 export(iprobit)
 export(laplace)
+export(lkj_correlation)
 export(log1pe)
 export(logistic)
 export(lognormal)
@@ -111,6 +112,7 @@ export(multivariate_normal)
 export(negative_binomial)
 export(normal)
 export(ones)
+export(opt)
 export(pareto)
 export(poisson)
 export(stashed_samples)
@@ -132,6 +134,7 @@ importFrom(reticulate,py_set_attr)
 importFrom(stats,na.omit)
 importFrom(stats,rnorm)
 importFrom(stats,runif)
+importFrom(tensorflow,install_tensorflow)
 importFrom(utils,head)
 importFrom(utils,setTxtProgressBar)
 importFrom(utils,tail)

diff --git a/R/as_data.R b/R/as_data.R
@@ -1,5 +1,5 @@
 #' @name as_data
-#' @title Declare R Objects as Data
+#' @title convert other objects to greta arrays
 #' @description define an object in an R session as a data greta array for use
 #' as data in a greta model.
 #' @param x an R object that can be coerced to a greta_array (see details).

diff --git a/R/dag_class.R b/R/dag_class.R
@@ -110,7 +110,7 @@ dag_class <- R6Class(
  # define all nodes, node densities and free states in the environment
  lapply(self$node_list, function (x) x$define_tf(self))
 
- # define an overall log density and relevant gradients there
+ # define an overall log density and gradients, plus adjusted versions
  self$define_joint_density()
  self$define_gradients()
 
@@ -132,6 +132,42 @@ dag_class <- R6Class(
 
  },
 
+ # define tensor for overall log density and gradients
+ define_joint_density = function () {
+
+ # get names of densities for all distribution nodes
+ density_names <- self$get_tf_names(types = 'distribution')
+
+ # get TF density tensors for all distribution
+ densities <- lapply(density_names, get, envir = self$tf_environment)
+
+ # reduce_sum them
+ summed_densities <- lapply(densities, tf$reduce_sum)
+
+ # remove their names and sum them together
+ names(summed_densities) <- NULL
+ joint_density <- tf$add_n(summed_densities)
+
+ # assign overall density to environment
+ assign('joint_density', joint_density, envir = self$tf_environment)
+
+ # define adjusted joint density
+
+ # get names of adjustment tensors for all variable nodes
+ adj_names <- paste0(self$get_tf_names(types = 'variable'), '_adj')
+
+ # get TF density tensors for all distribution
+ adj <- lapply(adj_names, get, envir = self$tf_environment)
+
+ # remove their names and sum them together
+ names(adj) <- NULL
+ total_adj <- tf$add_n(adj)
+
+ # assign overall density to environment
+ assign('joint_density_adj', joint_density + total_adj, envir = self$tf_environment)
+
+ },
+
  define_gradients = function () {
 
  # get names of free states for all variable nodes
@@ -144,47 +180,36 @@ dag_class <- R6Class(
  # names of tensors
  free_name <- paste0(name, '_free')
  gradient_name <- paste0(name, '_gradient')
+ gradient_adj_name <- paste0(name, '_gradient_adj')
 
+ # raw gradients
  gradient <- tf$gradients(self$tf_environment$joint_density,
  self$tf_environment[[free_name]])
  gradient_reshape <- tf$reshape(gradient, shape(-1))
-
  self$tf_environment[[gradient_name]] <- gradient_reshape
 
+ # adjusted gradients
+ gradient_adj <- tf$gradients(self$tf_environment$joint_density_adj,
+ self$tf_environment[[free_name]])
+ gradient_adj_reshape <- tf$reshape(gradient_adj, shape(-1))
+ self$tf_environment[[gradient_adj_name]] <- gradient_adj_reshape
+
  }
 
  # combine the gradients into one tensor
  gradient_names <- paste0(variable_tf_names, '_gradient')
-
  gradient_list <- lapply(gradient_names,
  get,
  envir = self$tf_environment)
-
  self$tf_environment$gradients <- tf$concat(gradient_list, 0L)
 
- },
-
- # define tensor for overall log density and gradients
- define_joint_density = function () {
-
- # get names of densities for all distribution nodes
- density_names <- self$get_tf_names(types = 'distribution')
-
- # get TF density tensors for all distribution
- densities <- lapply(density_names, get, envir = self$tf_environment)
+ # same for adjusted gradients
+ gradient_adj_names <- paste0(variable_tf_names, '_gradient_adj')
+ gradient_adj_list <- lapply(gradient_adj_names,
+ get,
+ envir = self$tf_environment)
 
- # convert to double precision floats
- densities_double <- lapply(densities, tf$cast, tf$float64)
-
- # reduce_sum them
- summed_densities <- lapply(densities_double, tf$reduce_sum)
-
- # remove their names and sum them together
- names(summed_densities) <- NULL
- sum_total <- tf$add_n(summed_densities)
-
- # assign overall density to environment
- assign('joint_density', sum_total, envir = self$tf_environment)
+ self$tf_environment$gradients_adj <- tf$concat(gradient_adj_list, 0L)
 
  },
 
@@ -217,17 +242,20 @@ dag_class <- R6Class(
 
  },
 
- log_density = function() {
+ # get log density and gradient of joint density w.r.t. free states of all
+ # variable nodes, with or without applying the jacobian adjustment
+ log_density = function(adjusted = TRUE) {
 
- with(self$tf_environment,
- sess$run(joint_density, feed_dict = parameter_dict))
+ cleanly(with(self$tf_environment,
+  sess$run(joint_density_adj, feed_dict = parameter_dict)))
 
  },
 
- # get gradient of joint density w.r.t. free states of all variable nodes
- gradients = function () {
- with(self$tf_environment,
- sess$run(gradients, feed_dict = parameter_dict))
+ gradients = function (adjusted = TRUE) {
+
+ cleanly(with(self$tf_environment,
+ sess$run(gradients_adj, feed_dict = parameter_dict)))
+
  },
 
  # return the current values of the traced nodes, as a named vector