Skip to content

Commit

Permalink
Fixed minor bug in the transformation from hidden to observed traits …
Browse files Browse the repository at this point in the history
…for sample.clade.traits. Cleaned up binarize and fixed small bug where it was binning fossil times wrong.
  • Loading branch information
brpetrucci committed Jul 6, 2023
1 parent 614e3a0 commit bee6887
Show file tree
Hide file tree
Showing 5 changed files with 106 additions and 33 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ S3method(summary,sim)
S3method(tail,sim)
export(bd.sim)
export(bd.sim.traits)
export(binarize)
export(binner)
export(draw.sim)
export(find.lineages)
Expand Down
68 changes: 42 additions & 26 deletions R/binarize.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,55 +3,71 @@
#' Given the output of \code{sample.clade(..., returnTrue = FALSE)}, returns
#' the occurrence counts in each bin (i.e., the same as
#' \code{sample.clade(..., returnTrue = TRUE)}). This helps to trace
#' perfect parallels between both output formats of \code{sample.clade()}.
#' perfect parallels between both output formats of \code{sample.clade}.
#'
#' @param fossil A \code{data.frame} exactly as returned
#' by sample.clade(..., returnTrue = FALSE). See \code{help(sample.clade)}
#' @param fossils A \code{data.frame} exactly as returned
#' by sample.clade(..., returnTrue = FALSE). See \code{?sample.clade)}
#' for details.
#'
#' @param bins A vector of time intervals corresponding to geological time
#' ranges.
#'
#' @return A \code{data.frame} exactly as returned
#' by sample.clade(..., returnTrue = TRUE). See \code{help(sample.clade)}
#' for details.
#' @return A \code{data.frame} exactly as returned by
#' \code{sample.clade(..., returnTrue = TRUE)}. See \code{?sample.clade)} for
#' details.
#'
#' @author Matheus Januario and Bruno do Rosario Petrucci
#' @author Matheus Januario.
#'
#' @details This function helps a user to bins "true occurrences" directly into
#' binned occurrences, allowing for comparisons among "perfectly known" fossils
#' records and records that have a certain resolution (given by the \code{bins}
#' inputted).
#' @details This function helps a user bin "true occurrences" directly into
#' binned occurrences, allowing for comparisons among "perfectly known"
#' fossil records and records that have a certain resolution (given by the
#' \code{bins} parameter).
#'
#' @examples
#'
#' ###
#' # run a birth-death simulation:
#' sim= bd.sim(n0 = 1, lambda = .1, mu = .05, tMax = 50)
#' sim <- bd.sim(n0 = 1, lambda = 0.1, mu = 0.05, tMax = 50)
#'
#' # choose bins
#' bins = seq(0,50,by=1)
#' bins <- seq(0, 50, by = 1)
#'
#' # generate "true" fossil occurrences
#' fsls1 = sample.clade(sim, rho = 1, tMax=50, returnTrue = T)
#' fossils_true <- sample.clade(sim, rho = 1, tMax = 50, returnTrue = TRUE)
#'
#' # bin the true occurrences
#' fossils_binned <- binarize(fossils_true, bins)
#'
#' # bin the true occurrences:
#' fsls2 = binarize(fsls1, bins)
#' # compare
#' fossils_true
#' fossils_binned
#'
#' @name binarize
#' @rdname binarize
#' @export
#'

binarize=function(fossil, bins){

bins = sort(bins, decreasing = T)
binarize <- function(fossils, bins) {
# sort bins in decreasing order
bins <- sort(bins, decreasing = TRUE)

res=fossil[-ncol(fossil)]
res$MaxT=NA
res$MinT=NA
for(i in 1:nrow(fossil)){
id = hist(fossil$SampT[i], breaks=bins, plot=FALSE)$counts
res$MaxT[i]= bins[which(id==1)+1]
res$MinT[i]= bins[which(id==1)]
# start results data frame
res <- fossils[-ncol(fossils)]

# initialize max and min columns
res$MaxT <- NA
res$MinT <- NA

# iterate through all fossils
for (i in 1:nrow(fossils)) {
# find the lowest bin with higher value
id <- max(which(bins >= fossils$SampT[i]))

# set max to that bin
res$MaxT[i] <- bins[id]

# and min to the one right after
res$MinT[i] <- bins[id + 1]
}
return(res)
}
Expand Down
12 changes: 6 additions & 6 deletions R/sample.clade.traits.R
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@
#'
#' # run the simulation
#' sim <- bd.sim.traits(n0, lambda, mu, tMax, nTraits = nTraits,
#' nStates = nStates,,
#' nStates = nStates,
#' X0 = X0, Q = Q, nFinal = c(2, Inf))
#' # note how sim$TRAITS contains states 2 and 3, even though this
#' # is a HiSSE model, because we need that information to run hidden states
Expand Down Expand Up @@ -276,7 +276,7 @@ sample.clade.traits <- function(sim, rho, tMax, traits,
traitsSp$max <- tMax - maxT
traitsSp$min <- tMax - minT
names(traitsSp) <- c("value", "min", "max")

# initialize vector
sampled <- c()

Expand Down Expand Up @@ -305,7 +305,7 @@ sample.clade.traits <- function(sim, rho, tMax, traits,
if (nHidden > 1) {
# set them to normal states
traitsSp$value <- traitsSp$value %% nStates

if (nrow(traitsSp) > 1) {
# duplicate rows
dup <- c()
Expand All @@ -324,7 +324,7 @@ sample.clade.traits <- function(sim, rho, tMax, traits,
} else {
# if count > 0, change the max of count rows ago to max of last row
if (count > 0) {
traitsSp$min[i - 1 - count] <- traitsSp$min[i - 1]
traitsSp$max[i - 1 - count] <- traitsSp$max[i - 1]

# return count to 0
count <- 0
Expand All @@ -334,13 +334,13 @@ sample.clade.traits <- function(sim, rho, tMax, traits,

# need to do a last check in case the last row is a duplicate
if (count > 0) {
traitsSp$min[i - count] <- traitsSp$min[i]
traitsSp$max[i - count] <- traitsSp$max[i]
}

# delete duplicates
if (!is.null(dup)) traitsSp <- traitsSp[-dup, ]
}

# invert time for max and min
traitsSp$max <- tMax - traitsSp$max
traitsSp$min <- tMax - traitsSp$min
Expand Down
56 changes: 56 additions & 0 deletions man/binarize.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/sample.clade.traits.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit bee6887

Please sign in to comment.