Skip to content

Commit

Permalink
Merge pull request #15 from SilviaTerra/edit2019
Browse files Browse the repository at this point in the history
Edit2019
  • Loading branch information
nancpond authored May 3, 2019
2 parents 0d17456 + 7f37f9c commit a2572f5
Show file tree
Hide file tree
Showing 38 changed files with 1,221 additions and 882 deletions.
2 changes: 2 additions & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
^Meta$
^doc$
^.*\.Rproj$
^\.Rproj\.user$
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
Meta
doc
.Rproj.user
.Rhistory
.RData
Expand Down
5 changes: 3 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Package: forestSampling
Package: forestsamplr
Title: Standard Forest Sampling Design Workups
Version: 0.0.0.9000
Authors@R: c(
Expand All @@ -11,9 +11,10 @@ Depends:
License: MIT
Encoding: UTF-8
LazyData: true
RoxygenNote: 6.0.1
RoxygenNote: 6.1.1
Suggests: testthat,
knitr,
rmarkdown
VignetteBuilder: knitr
Imports: dplyr
Requires: dplyr
91 changes: 51 additions & 40 deletions R/all_cluster.R
Original file line number Diff line number Diff line change
@@ -1,21 +1,21 @@
#' @title Summarize all cluster sample
#' @description Summarizes population-level statistics for
#' cluster sample data. This function has two options: (1)
#' Cluster sample with a normal distribution and (2) Cluster
#' cluster sample data. This function has two options: (1)
#' Cluster sample with a normal distribution and (2) Cluster
#' sample with a Bernoulli distribution.
#' @usage summarize_all_cluster(data, attribute = NA, element = TRUE,
#' plotTot = NA,
#' @usage summarize_all_cluster(data, attribute = NA, element = TRUE,
#' plotTot = NA,
#' desiredConfidence = 0.95,
#' bernoulli = F)
#' @param data data frame containing observations of variable of
#' interest for either cluster-level or plot-level data.
#' @param attribute character name of attribute to be summarized.
#' @param element logical true if parameter data is plot-level, false if
#' parameter data is cluster-level. Default is True.
#' @param plotTot numeric population size. Equivalent to the
#' @param plotTot numeric population size. Equivalent to the
#' total number of possible elements in the population.
#' @param desiredConfidence numeric desired confidence level (e.g. 0.9).
#' @param bernoulli logical TRUE if data fitting the Bernoulli
#' @param bernoulli logical TRUE if data fitting the Bernoulli
#' distribution is used.
#' @return data frame of stand-level statistics including
#' standard error and confidence interval limits.
Expand All @@ -28,55 +28,66 @@
#'
#' # Plot level data can be expressed as:
#'
#' plotLevelDataExample <- data.frame(clusterID = c(1, 1, 1, 1, 1, 2,
#' 2, 3, 4, 4, 4, 4,
#' 4, 4, 5, 5, 5, 5,
#' 5),
#' attr = c(1000, 1250, 950, 900,
#' 1005, 1000, 1250, 950,
#' 900, 1005, 1000, 1250,
#' 950, 900, 1005, 1000,
#' 1250, 950, 900),
#' isUsed = c(T, T, T, T, T, T, T,
#' T, T, T, T, T, T, T,
#' F, F, F, F, F))
#' plotLevelDataExample <- data.frame(
#' clusterID = c(
#' 1, 1, 1, 1, 1, 2,
#' 2, 3, 4, 4, 4, 4,
#' 4, 4, 5, 5, 5, 5,
#' 5
#' ),
#' attr = c(
#' 1000, 1250, 950, 900,
#' 1005, 1000, 1250, 950,
#' 900, 1005, 1000, 1250,
#' 950, 900, 1005, 1000,
#' 1250, 950, 900
#' ),
#' isUsed = c(
#' T, T, T, T, T, T, T,
#' T, T, T, T, T, T, T,
#' F, F, F, F, F
#' )
#' )
#'
#' # Cluster level data can be expressed as:
#' # Cluster level data can be expressed as:
#'
#' clusterLevelDataExample <- data.frame(clusterID = c(1, 2, 3, 4, 5),
#' clusterElements = c(4, 2, 9,
#' 4, 10),
#' sumAttr = c(1000, 1250, 950,
#' 900, 1005),
#' isUsed = c(T, T, F, T, T))
#' clusterLevelDataExample <- data.frame(
#' clusterID = c(1, 2, 3, 4, 5),
#' clusterElements = c(
#' 4, 2, 9,
#' 4, 10
#' ),
#' sumAttr = c(
#' 1000, 1250, 950,
#' 900, 1005
#' ),
#' isUsed = c(T, T, F, T, T)
#' )
#' # Set element = FALSE
#'
#'
#' # Bernoulli data can be expressed as:
#' # Bernoulli data can be expressed as:
#'
#' bernoulliData <- data.frame(plots = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10),
#' propAlive = c(0.75, 0.80, 0.80, 0.85,
#' 0.70, 0.90, 0.70, 0.75,
#' 0.80, 0.65))
#' bernoulliData <- data.frame(
#' plots = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10),
#' propAlive = c(
#' 0.75, 0.80, 0.80, 0.85,
#' 0.70, 0.90, 0.70, 0.75,
#' 0.80, 0.65
#' )
#' )
#' # Set parameter bernoulli = TRUE
#'
#' }
#' @export


summarize_all_cluster <- function(data, attribute = NA, element = TRUE,
plotTot = NA, desiredConfidence = 0.95, bernoulli = F) {

summarize_all_cluster <- function(data, attribute = NA, element = TRUE,
plotTot = NA, desiredConfidence = 0.95, bernoulli = F) {
if (bernoulli == F) {

out <- summarize_cluster(data, element, attribute, desiredConfidence)

} else {

out <- summarize_cluster_discrete(data, attribute, plotTot, desiredConfidence)

}

return(out)

}
41 changes: 20 additions & 21 deletions R/all_srs.R
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
#' @title Summarize all simple random sample
#' @description Summarizes population-level statistics for
#' simple random sample data. This function has three options: (1) SRS
#' simple random sample data. This function has three options: (1) SRS
#' of a finite population or sampled without replacement,
#' (2) SRS of an infinite population or sampled with replacement,
#' and (3) SRS with a Bernoulli distribution.
#' @usage summarize_all_srs(data, attribute = 'attr',
#' popSize = NA, desiredConfidence = 0.95,
#' popSize = NA, desiredConfidence = 0.95,
#' infiniteReplacement = F, bernoulli = F)
#' @param data data frame or vector containing observations of
#' variable of interest. Variable of interest must already be expanded
Expand All @@ -17,7 +17,7 @@
#' @param infiniteReplacement logical true if sample was done with replacement
#' or from an infinite population. False if sampled without replacement,
#' from a finite population. Defaults to False.
#' @param bernoulli logical TRUE if data fitting the Bernoulli
#' @param bernoulli logical TRUE if data fitting the Bernoulli
#' distribution is used.
#' @return a data frame of population mean, variance, standard error, and
#' high and low confidence limits.
Expand All @@ -35,35 +35,34 @@
#'
#' # Data frame data example:
#'
#' data <- data.frame(bapa = c(120, 140, 160, 110, 100, 90),
#' plots = c(1, 2, 3, 4, 5, 6))
#' attribute <- 'bapa'
#' data <- data.frame(
#' bapa = c(120, 140, 160, 110, 100, 90),
#' plots = c(1, 2, 3, 4, 5, 6)
#' )
#' attribute <- "bapa"
#'
#'
#' # Bernoulli data example:
#'
#' data <- data.frame(alive = c(T, T, F, T, F, F),
#' plots = c(1, 2, 3, 4, 5, 6))
#' attribute <- 'alive'
#'
#' data <- data.frame(
#' alive = c(T, T, F, T, F, F),
#' plots = c(1, 2, 3, 4, 5, 6)
#' )
#' attribute <- "alive"
#' }
#' @export


summarize_all_srs <- function(data, attribute = 'attr', popSize = NA,
desiredConfidence = 0.95, infiniteReplacement = F, bernoulli = F) {

summarize_all_srs <- function(data, attribute = "attr", popSize = NA,
desiredConfidence = 0.95, infiniteReplacement = F, bernoulli = F) {
if (bernoulli == F) {

out <- summarize_simple_random(data, attribute, popSize,
desiredConfidence, infiniteReplacement)

out <- summarize_simple_random(
data, attribute, popSize,
desiredConfidence, infiniteReplacement
)
} else {

out <- summarize_simple_random_discrete(data, attribute, popTot = popSize, desiredConfidence)

}

return(out)

}
89 changes: 46 additions & 43 deletions R/cluster.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#' variance terms refer to the variance of the mean.
#' @param data data frame containing observations of variable of
#' interest for either cluster-level or element-level data.
#' @param element logical true if parameter data is element-level
#' @param element logical true if parameter data is element-level
#' (plot-level), false if parameter data is cluster-level. Default is True.
#' @param attribute character name of attribute to be summarized.
#' @param desiredConfidence numeric desired confidence level (e.g. 0.9).
Expand All @@ -15,12 +15,16 @@
#' @import dplyr
#' @examples
#' \dontrun{
#' dataPlot <- data.frame(clusterID = c(1, 1, 1, 1, 1, 2, 2, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5),
#' attr = c(1000, 1250, 950, 900, 1005, 1000, 1250, 950, 900, 1005, 1000,
#' 1250, 950, 900, 1005, 1000, 1250, 950, 900),
#' isUsed = c(T, T, T, T, T, T, T, T, T, T, T, T, T, T, F, F, F, F, F))
#' element = TRUE
#' attribute = 'attr'
#' dataPlot <- data.frame(
#' clusterID = c(1, 1, 1, 1, 1, 2, 2, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5),
#' attr = c(
#' 1000, 1250, 950, 900, 1005, 1000, 1250, 950, 900, 1005, 1000,
#' 1250, 950, 900, 1005, 1000, 1250, 950, 900
#' ),
#' isUsed = c(T, T, T, T, T, T, T, T, T, T, T, T, T, T, F, F, F, F, F)
#' )
#' element <- TRUE
#' attribute <- "attr"
#' }
#' @export

Expand All @@ -39,36 +43,31 @@ summarize_cluster <- function(data, element = TRUE, attribute = NA, desiredConfi
if (element) {
# change variable of interest name to attr, unsummarized
data$attr <- attrTemp

} else {
# change variable of interest name to sumAttr, summarized by element
data$sumAttr <- attrTemp

}

}

if (element) {

# calculates cluster values from element data
if (element) {

cluster <- data %>%
group_by(clusterID) %>%
summarize(sumAttr = sum(attr), # sum attributes by cluster
clusterElements = n()) %>% # tally of elements in each cluster
left_join(distinct(data, clusterID, .keep_all = TRUE)) # maintain isUsed for each cluster
# calculates cluster values from element data

} else {
cluster <- data %>%
group_by(clusterID) %>%
summarize(
sumAttr = sum(attr), # sum attributes by cluster
clusterElements = n()
) %>% # tally of elements in each cluster
left_join(distinct(data, clusterID, .keep_all = TRUE)) # maintain isUsed for each cluster
} else {

# reassigns data as cluster, if input data is cluster-level data
cluster <- data

}
# reassigns data as cluster, if input data is cluster-level data
cluster <- data
}

if (length(cluster$clusterID) == 1) {

stop("Must have multiple clusters. Consider other analysis.")

}

# basic values: sample-level
Expand All @@ -77,38 +76,42 @@ if (element) {
mutate(nSamp = n()) %>% # num clusters
mutate(mSampBar = sum(clusterElements) / nSamp) %>% # avg num elements in a cluster
mutate(df = sum(clusterElements) - 1)


# basic values: population-level
popValues <- cluster %>%
summarize(mPop = sum(clusterElements),
nPop = n(), # num clusters
mPopBar = mPop / nPop)
summarize(
mPop = sum(clusterElements),
nPop = n(), # num clusters
mPopBar = mPop / nPop
)
if (is.na(popValues$mPopBar) | popValues$mPopBar == sampValues$mSampBar[[1]]) {
# if Mbar (pop) is unknown, approximate it with mbar (samp)

popValues$mPopBar <- sum(sampValues$mSampBar[[1]])

}

clusterSummary <- sampValues %>%
mutate(yBar = sum(sumAttr) / sum(clusterElements)) %>%
mutate(ySETempNum = (sumAttr - yBar * clusterElements) ^ 2) %>%
summarize(ySE = sqrt(((popValues$nPop - nSamp[[1]]) /
(popValues$nPop * nSamp[[1]] * (popValues$mPopBar ^ 2)))
* (sum(ySETempNum) / (nSamp[[1]] - 1))),
yBar = mean(yBar),
nSamp = mean(nSamp),
mSampBar = mean(mSampBar),
df = df[[1]]) %>%
mutate(ySETempNum = (sumAttr - yBar * clusterElements)^2) %>%
summarize(
ySE = sqrt(
((popValues$nPop - nSamp[[1]]) / (popValues$nPop * nSamp[[1]] *
(popValues$mPopBar^2))) * (sum(ySETempNum) / (nSamp[[1]] - 1))
),
yBar = mean(yBar),
nSamp = mean(nSamp),
mSampBar = mean(mSampBar),
df = df[[1]]
) %>%
mutate(highCL = yBar + qt(1 - ((1 - desiredConfidence) / 2), df) * ySE) %>%
mutate(lowCL = yBar - qt(1 - ((1 - desiredConfidence) / 2), df) * ySE) %>%
select(standardError = ySE, lowerLimitCI = lowCL, upperLimitCI = highCL,
mean = yBar, nSamp, mSampBar) %>%
select(
standardError = ySE, lowerLimitCI = lowCL, upperLimitCI = highCL,
mean = yBar, nSamp, mSampBar
) %>%
bind_cols(popValues)

# return data frame of stand-level statistics
return(clusterSummary)

}

Loading

0 comments on commit a2572f5

Please sign in to comment.