Merge pull request #15 from SilviaTerra/edit2019

Edit2019
ncx-co · May 3, 2019 · a2572f5 · a2572f5
2 parents 0d17456 + 7f37f9c
commit a2572f5
Show file tree

Hide file tree

Showing 38 changed files with 1,221 additions and 882 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -1,2 +1,4 @@
+^Meta$
+^doc$
 ^.*\.Rproj$
 ^\.Rproj\.user$
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,5 @@
+Meta
+doc
 .Rproj.user
 .Rhistory
 .RData

diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,4 +1,4 @@
-Package: forestSampling
+Package: forestsamplr
 Title: Standard Forest Sampling Design Workups
 Version: 0.0.0.9000
 Authors@R: c(
@@ -11,9 +11,10 @@ Depends:
 License: MIT
 Encoding: UTF-8
 LazyData: true
-RoxygenNote: 6.0.1
+RoxygenNote: 6.1.1
 Suggests: testthat,
     knitr,
     rmarkdown
 VignetteBuilder: knitr
 Imports: dplyr
+Requires: dplyr
diff --git a/R/all_cluster.R b/R/all_cluster.R
@@ -1,21 +1,21 @@
 #' @title Summarize all cluster sample
 #' @description Summarizes population-level statistics for
-#' cluster sample data. This function has two options: (1) 
-#' Cluster sample with a normal distribution and (2) Cluster 
+#' cluster sample data. This function has two options: (1)
+#' Cluster sample with a normal distribution and (2) Cluster
 #' sample with a Bernoulli distribution.
-#' @usage summarize_all_cluster(data, attribute = NA, element = TRUE, 
-#'                              plotTot = NA, 
+#' @usage summarize_all_cluster(data, attribute = NA, element = TRUE,
+#'                              plotTot = NA,
 #'                              desiredConfidence = 0.95,
 #'                              bernoulli = F)
 #' @param data data frame containing observations of variable of
 #' interest for either cluster-level or plot-level data.
 #' @param attribute character name of attribute to be summarized.
 #' @param element logical true if parameter data is plot-level, false if
 #' parameter data is cluster-level. Default is True.
-#' @param plotTot numeric population size. Equivalent to the 
+#' @param plotTot numeric population size. Equivalent to the
 #' total number of possible elements in the population.
 #' @param desiredConfidence numeric desired confidence level (e.g. 0.9).
-#' @param bernoulli logical TRUE if data fitting the Bernoulli 
+#' @param bernoulli logical TRUE if data fitting the Bernoulli
 #' distribution is used.
 #' @return data frame of stand-level statistics including
 #' standard error and confidence interval limits.
@@ -28,55 +28,66 @@
 #' 
 #' # Plot level data can be expressed as:
 #' 
-#' plotLevelDataExample <- data.frame(clusterID = c(1, 1, 1, 1, 1, 2,  
-#'                                                  2, 3, 4, 4, 4, 4, 
-#'                                                  4, 4, 5, 5, 5, 5, 
-#'                                                  5), 
-#'                                    attr = c(1000, 1250, 950, 900, 
-#'                                             1005, 1000, 1250, 950,
-#'                                             900, 1005, 1000, 1250, 
-#'                                             950, 900, 1005, 1000, 
-#'                                             1250, 950, 900), 
-#'                                    isUsed = c(T, T, T, T, T, T, T, 
-#'                                               T, T, T, T, T, T, T, 
-#'                                               F, F, F, F, F))
+#' plotLevelDataExample <- data.frame(
+#'   clusterID = c(
+#'     1, 1, 1, 1, 1, 2,
+#'     2, 3, 4, 4, 4, 4,
+#'     4, 4, 5, 5, 5, 5,
+#'     5
+#'   ),
+#'   attr = c(
+#'     1000, 1250, 950, 900,
+#'     1005, 1000, 1250, 950,
+#'     900, 1005, 1000, 1250,
+#'     950, 900, 1005, 1000,
+#'     1250, 950, 900
+#'   ),
+#'   isUsed = c(
+#'     T, T, T, T, T, T, T,
+#'     T, T, T, T, T, T, T,
+#'     F, F, F, F, F
+#'   )
+#' )
 #' 
-#' # Cluster level data can be expressed as: 
+#' # Cluster level data can be expressed as:
 #' 
-#' clusterLevelDataExample <- data.frame(clusterID = c(1, 2, 3, 4, 5), 
-#'                                       clusterElements = c(4, 2, 9, 
-#'                                                           4, 10), 
-#'                                       sumAttr = c(1000, 1250, 950,
-#'                                                   900, 1005), 
-#'                                       isUsed = c(T, T, F, T, T))
+#' clusterLevelDataExample <- data.frame(
+#'   clusterID = c(1, 2, 3, 4, 5),
+#'   clusterElements = c(
+#'     4, 2, 9,
+#'     4, 10
+#'   ),
+#'   sumAttr = c(
+#'     1000, 1250, 950,
+#'     900, 1005
+#'   ),
+#'   isUsed = c(T, T, F, T, T)
+#' )
 #' # Set element = FALSE
 #' 
 #' 
-#' # Bernoulli data can be expressed as: 
+#' # Bernoulli data can be expressed as:
 #' 
-#' bernoulliData <- data.frame(plots = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10),
-#'                             propAlive = c(0.75, 0.80, 0.80, 0.85, 
-#'                                           0.70, 0.90, 0.70, 0.75, 
-#'                                           0.80, 0.65))
+#' bernoulliData <- data.frame(
+#'   plots = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10),
+#'   propAlive = c(
+#'     0.75, 0.80, 0.80, 0.85,
+#'     0.70, 0.90, 0.70, 0.75,
+#'     0.80, 0.65
+#'   )
+#' )
 #' # Set parameter bernoulli = TRUE
-#' 
 #' }
 #' @export
 
 
-summarize_all_cluster <- function(data, attribute = NA, element = TRUE, 
-                                  plotTot = NA, desiredConfidence = 0.95, bernoulli = F) { 
-
+summarize_all_cluster <- function(data, attribute = NA, element = TRUE,
+                                  plotTot = NA, desiredConfidence = 0.95, bernoulli = F) {
   if (bernoulli == F) {
-
     out <- summarize_cluster(data, element, attribute, desiredConfidence)
-
   } else {
-
     out <- summarize_cluster_discrete(data, attribute, plotTot, desiredConfidence)
-
   }
-  
+
   return(out)
-
 }
diff --git a/R/all_srs.R b/R/all_srs.R
@@ -1,11 +1,11 @@
 #' @title Summarize all simple random sample
 #' @description Summarizes population-level statistics for
-#' simple random sample data. This function has three options: (1) SRS 
+#' simple random sample data. This function has three options: (1) SRS
 #' of a finite population or sampled without replacement,
 #' (2) SRS of an infinite population or sampled with replacement,
 #' and (3) SRS with a Bernoulli distribution.
 #' @usage summarize_all_srs(data, attribute = 'attr',
-#'                          popSize = NA, desiredConfidence = 0.95, 
+#'                          popSize = NA, desiredConfidence = 0.95,
 #'                          infiniteReplacement = F, bernoulli = F)
 #' @param data data frame or vector containing observations of
 #' variable of interest. Variable of interest must already be expanded
@@ -17,7 +17,7 @@
 #' @param infiniteReplacement logical true if sample was done with replacement
 #' or from an infinite population. False if sampled without replacement,
 #' from a finite population. Defaults to False.
-#' @param bernoulli logical TRUE if data fitting the Bernoulli 
+#' @param bernoulli logical TRUE if data fitting the Bernoulli
 #' distribution is used.
 #' @return a data frame of population mean, variance, standard error, and
 #' high and low confidence limits.
@@ -35,35 +35,34 @@
 #' 
 #' # Data frame data example:
 #' 
-#' data <- data.frame(bapa = c(120, 140, 160, 110, 100, 90), 
-#'                    plots = c(1, 2, 3, 4, 5, 6))
-#' attribute <- 'bapa'
+#' data <- data.frame(
+#'   bapa = c(120, 140, 160, 110, 100, 90),
+#'   plots = c(1, 2, 3, 4, 5, 6)
+#' )
+#' attribute <- "bapa"
 #' 
 #' 
 #' # Bernoulli data example:
 #' 
-#' data <- data.frame(alive = c(T, T, F, T, F, F), 
-#'                    plots = c(1, 2, 3, 4, 5, 6))
-#' attribute <- 'alive'
-#' 
+#' data <- data.frame(
+#'   alive = c(T, T, F, T, F, F),
+#'   plots = c(1, 2, 3, 4, 5, 6)
+#' )
+#' attribute <- "alive"
 #' }
 #' @export
 
 
-summarize_all_srs <- function(data, attribute = 'attr', popSize = NA,
-                              desiredConfidence = 0.95, infiniteReplacement = F, bernoulli = F) { 
-
+summarize_all_srs <- function(data, attribute = "attr", popSize = NA,
+                              desiredConfidence = 0.95, infiniteReplacement = F, bernoulli = F) {
   if (bernoulli == F) {
-
-    out <- summarize_simple_random(data, attribute, popSize, 
-                                   desiredConfidence, infiniteReplacement)
-
+    out <- summarize_simple_random(
+      data, attribute, popSize,
+      desiredConfidence, infiniteReplacement
+    )
   } else {
-
     out <- summarize_simple_random_discrete(data, attribute, popTot = popSize, desiredConfidence)
-
   }
-  
+
   return(out)
-
 }
diff --git a/R/cluster.R b/R/cluster.R
@@ -5,7 +5,7 @@
 #' variance terms refer to the variance of the mean.
 #' @param data data frame containing observations of variable of
 #' interest for either cluster-level or element-level data.
-#' @param element logical true if parameter data is element-level 
+#' @param element logical true if parameter data is element-level
 #' (plot-level), false if parameter data is cluster-level. Default is True.
 #' @param attribute character name of attribute to be summarized.
 #' @param desiredConfidence numeric desired confidence level (e.g. 0.9).
@@ -15,12 +15,16 @@
 #' @import dplyr
 #' @examples
 #' \dontrun{
-#' dataPlot <- data.frame(clusterID = c(1, 1, 1, 1, 1, 2, 2, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5),
-#'                        attr = c(1000, 1250, 950, 900, 1005, 1000, 1250, 950, 900, 1005, 1000,
-#'                                 1250, 950, 900, 1005, 1000, 1250, 950, 900),
-#'                        isUsed = c(T, T, T, T, T, T, T, T, T, T, T, T, T, T, F, F, F, F, F))
-#' element = TRUE
-#' attribute = 'attr'
+#' dataPlot <- data.frame(
+#'   clusterID = c(1, 1, 1, 1, 1, 2, 2, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5),
+#'   attr = c(
+#'     1000, 1250, 950, 900, 1005, 1000, 1250, 950, 900, 1005, 1000,
+#'     1250, 950, 900, 1005, 1000, 1250, 950, 900
+#'   ),
+#'   isUsed = c(T, T, T, T, T, T, T, T, T, T, T, T, T, T, F, F, F, F, F)
+#' )
+#' element <- TRUE
+#' attribute <- "attr"
 #' }
 #' @export
 
@@ -39,36 +43,31 @@ summarize_cluster <- function(data, element = TRUE, attribute = NA, desiredConfi
     if (element) {
       # change variable of interest name to attr, unsummarized
       data$attr <- attrTemp
-
     } else {
       # change variable of interest name to sumAttr, summarized by element
       data$sumAttr <- attrTemp
-
     }
-
   }
 
-if (element) {
-
-  # calculates cluster values from element data
+  if (element) {
 
-  cluster <- data %>%
-    group_by(clusterID) %>%
-    summarize(sumAttr = sum(attr), # sum attributes by cluster
-              clusterElements = n()) %>% # tally of elements in each cluster
-    left_join(distinct(data, clusterID, .keep_all = TRUE)) # maintain isUsed for each cluster
+    # calculates cluster values from element data
 
-} else {
+    cluster <- data %>%
+      group_by(clusterID) %>%
+      summarize(
+        sumAttr = sum(attr), # sum attributes by cluster
+        clusterElements = n()
+      ) %>% # tally of elements in each cluster
+      left_join(distinct(data, clusterID, .keep_all = TRUE)) # maintain isUsed for each cluster
+  } else {
 
-  # reassigns data as cluster, if input data is cluster-level data
-  cluster <- data
-
-}
+    # reassigns data as cluster, if input data is cluster-level data
+    cluster <- data
+  }
 
   if (length(cluster$clusterID) == 1) {
-
     stop("Must have multiple clusters. Consider other analysis.")
-
   }
 
   # basic values: sample-level
@@ -77,38 +76,42 @@ if (element) {
     mutate(nSamp = n()) %>% # num clusters
     mutate(mSampBar = sum(clusterElements) / nSamp) %>% # avg num elements in a cluster
     mutate(df = sum(clusterElements) - 1)
-  
+
 
   # basic values: population-level
   popValues <- cluster %>%
-  summarize(mPop = sum(clusterElements),
-            nPop = n(), # num clusters
-            mPopBar = mPop / nPop)
+    summarize(
+      mPop = sum(clusterElements),
+      nPop = n(), # num clusters
+      mPopBar = mPop / nPop
+    )
   if (is.na(popValues$mPopBar) | popValues$mPopBar == sampValues$mSampBar[[1]]) {
     # if Mbar (pop) is unknown, approximate it with mbar (samp)
 
     popValues$mPopBar <- sum(sampValues$mSampBar[[1]])
-
   }
-  
+
   clusterSummary <- sampValues %>%
     mutate(yBar = sum(sumAttr) / sum(clusterElements)) %>%
-    mutate(ySETempNum = (sumAttr - yBar * clusterElements) ^ 2) %>%
-    summarize(ySE = sqrt(((popValues$nPop - nSamp[[1]]) /
-                            (popValues$nPop * nSamp[[1]] * (popValues$mPopBar ^ 2)))
-                         * (sum(ySETempNum) / (nSamp[[1]] - 1))),
-              yBar = mean(yBar),
-              nSamp = mean(nSamp),
-              mSampBar = mean(mSampBar),
-              df = df[[1]]) %>%
+    mutate(ySETempNum = (sumAttr - yBar * clusterElements)^2) %>%
+    summarize(
+      ySE = sqrt(
+        ((popValues$nPop - nSamp[[1]]) / (popValues$nPop * nSamp[[1]] *
+          (popValues$mPopBar^2))) * (sum(ySETempNum) / (nSamp[[1]] - 1))
+      ),
+      yBar = mean(yBar),
+      nSamp = mean(nSamp),
+      mSampBar = mean(mSampBar),
+      df = df[[1]]
+    ) %>%
     mutate(highCL = yBar + qt(1 - ((1 - desiredConfidence) / 2), df) * ySE) %>%
     mutate(lowCL = yBar - qt(1 - ((1 - desiredConfidence) / 2), df) * ySE) %>%
-    select(standardError = ySE, lowerLimitCI = lowCL, upperLimitCI = highCL, 
-           mean = yBar, nSamp, mSampBar) %>%
+    select(
+      standardError = ySE, lowerLimitCI = lowCL, upperLimitCI = highCL,
+      mean = yBar, nSamp, mSampBar
+    ) %>%
     bind_cols(popValues)
 
   # return data frame of stand-level statistics
   return(clusterSummary)
-
 }
-