fix bugs in 16S vignettes

claraqin · Apr 9, 2024 · 1eaab2a · 1eaab2a
1 parent 1fa2c6a
commit 1eaab2a
Show file tree

Hide file tree

Showing 4 changed files with 33 additions and 21 deletions.
diff --git a/R/dirs.R b/R/dirs.R
@@ -37,7 +37,9 @@ setBaseDirectory <- function(dir = getwd()) {
 #' }
 makeDataDirectories <- function(check_location = TRUE) {
  if(identical(check_location, TRUE)) {
- resp <- readline(paste("Create new data directory structure in current working directory (", getwd(), ")? y/n: ", sep=" "))
+ resp <- readline(paste("Create new data directory structure in base director (",
+ get("NEONMICROBE_DIR_BASE", envir = neonmicrobe_env),
+ ")? y/n: ", sep=" "))
  if(!(resp %in% c("y","Y"))) return(invisible(NULL))
  }
 
@@ -80,15 +82,22 @@ makeDataDirectories <- function(check_location = TRUE) {
  createDirIfNotExist(file.path(outputs_dir, "mid_process", "ITS", c("1_filtN",
  "2_trimmed",
  "3_filtered",
- "4_seqtabs")))
+ "4_seqtabs",
+ "5_tax")))
  createDirIfNotExist(file.path(outputs_dir, "mid_process", "16S", c("1_trimmed",
  "2_filtered",
- "3_seqtabs")))
+ "3_seqtabs",
+ "4_collapsed",
+ "5_tax")))
 
  # Also create directories for read-tracking tables
  createDirIfNotExist(file.path(outputs_dir, "track_reads", c("ITS",
  "16S")))
 
+ # Also create directories for Phyloseq objects
+ createDirIfNotExist(file.path(outputs_dir, "phyloseq", c("ITS",
+ "16S")))
+
  # If preset directory for soil data does not exist, create it
  createDirIfNotExist(soil_dir)
 

diff --git a/vignettes/add-environmental-variables-16s.Rmd b/vignettes/add-environmental-variables-16s.Rmd
@@ -12,8 +12,9 @@ knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
 )
-devtools::load_all() # TODO: Switch this to library(neonMicrobe) before publishing
-setBaseDirectory(dirname(getwd()))
+library(neonMicrobe)
+# devtools::load_all() # TODO: Switch this to library(neonMicrobe) before publishing
+# setBaseDirectory(dirname(getwd()))
 knitr::opts_knit$set(
  root.dir = NEONMICROBE_DIR_BASE()
 )
@@ -50,19 +51,19 @@ setBaseDirectory()
 
 # Load outputs from DADA2
 
-Load in the outputs from dada2. These would normally consist of the ASV table and the associated taxonomy table, but for the purposes of demonstration, we load only the ASV table in this vignette. In particular, we'll us the ASV table that came pre-loaded with `neonMicrobe` for use in examples.
+Load in the outputs from dada2. These would normally consist of the ASV table and the associated taxonomy table, but for the purposes of demonstration, we load only the ASV table in this vignette. In particular, we'll use the ASV table that came pre-loaded with `neonMicrobe` for use in examples.
 
 ```{r}
 data("seqtab_greatplains_16s")
 seqtab_orig <- seqtab_greatplains_16s
 ```
 
-For reference, this is what loading your own data into this workflow might look like, if you have previously saved them to an Rds file:
+For reference, this is what loading your own data into this workflow might look like, if you have previously saved them to an Rds file. Please note that if you actually run the following code chunk after having run the "process-16s-sequences" vignette, then this will yield a sequence table `seqtab_orig` containing only 1 sample. This is expected behavior. To include more samples or a different set of samples, use the code in the "process-16s-sequences" vignette as a template, modifying the sample selection under the "Process reads into ASV tables" section. Alternatively, simply use the preloaded `seqtab_orig` to follow along in the vignette for now.
 
 ```{r, eval=FALSE}
 seqtab_orig <- readRDS(file.path(NEONMICROBE_DIR_MIDPROCESS(), "16S", "4_collapsed",
  "NEON_16S_seqtab_nochim_grasslands_COLLAPSED.Rds"))
-taxa_orig <- readRDS(file.path(NEONMICROBE_DIR_OUTPUTS(), "16S", "5_tax", 
+taxa_orig <- readRDS(file.path(NEONMICROBE_DIR_MIDPROCESS(), "16S", "5_tax", 
  "NEON_16S_tax_grasslands_COLLAPSED.Rds"))
 ```
 
@@ -154,7 +155,7 @@ ps
 Don't forget to save!
 
 ```{r, eval = FALSE}
-saveRDS(ps, file.path(NEONMICROBE_DIR_OUTPUTS(), "phyloseq", "NEON_16S_phyloseq_greatplains.Rds"))
+saveRDS(ps, file.path(NEONMICROBE_DIR_OUTPUTS(), "phyloseq", "16S", "NEON_16S_phyloseq_greatplains.Rds"))
 ```
 
 If the saved object still takes up too much memory, consider removing or consolidating additional taxa, using agglomeration functions in `dada2`. The `speedyseq` [package](https://github.com/mikemc/speedyseq) is also useful for manipulating large Phyloseq objects
@@ -165,5 +166,5 @@ If the saved object still takes up too much memory, consider removing or consoli
 # This protects against an OTU with small mean & trivially large C.V. 
 # (from phyloseq tutorial)
 ps_subset = filter_taxa(ps, function(x) sum(x > 3) > 10, TRUE)
-saveRDS(ps_subset, file.path(newest_outputs, "NEON_16S_phyloseq_greatplains_subset.Rds"))
+saveRDS(ps_subset, file.path(NEONMICROBE_DIR_OUTPUTS(), "phyloseq", "16S", "NEON_16S_phyloseq_greatplains_subset.Rds"))
 ```
diff --git a/vignettes/add-environmental-variables-its.Rmd b/vignettes/add-environmental-variables-its.Rmd
@@ -1,8 +1,8 @@
 ---
-title: "Convert ITS data to Phyloseq with environmental variables"
+title: "Add Environmental Variables to ITS Data"
 output: rmarkdown::html_vignette
 vignette: >
- %\VignetteIndexEntry{dada2-to-phyloseq-its}
+ %\VignetteIndexEntry{Add Environmental Variables to ITS Data}
  %\VignetteEngine{knitr::rmarkdown}
  %\VignetteEncoding{UTF-8}
 ---
@@ -171,4 +171,4 @@ If the saved object still takes up too much memory, consider removing or consoli
 # (from phyloseq tutorial)
 ps_subset = filter_taxa(ps, function(x) sum(x > 3) > 10, TRUE)
 saveRDS(ps_subset, file.path(newest_outputs, "NEON_ITS_phyloseq_subset.rds"))
-```
+```
diff --git a/vignettes/process-16s-sequences.Rmd b/vignettes/process-16s-sequences.Rmd
@@ -12,8 +12,9 @@ knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
 )
-devtools::load_all() # TODO: Switch this to library(neonMicrobe) before publishing
-setBaseDirectory(dirname(getwd()))
+library(neonMicrobe)
+# devtools::load_all() # TODO: Switch this to library(neonMicrobe) before publishing
+# setBaseDirectory(dirname(getwd()))
 knitr::opts_knit$set(
  root.dir = NEONMICROBE_DIR_BASE()
 )
@@ -211,9 +212,10 @@ While `mergeSequenceTables()` checks for duplicate samples, it performs no ASV c
 
 ```{r, eval=FALSE}
 seqtab_joined_collapsed <- collapseNoMismatch(seqtab_joined)
-saveRDS(seqtab_joined_collapse, 
- file.path(NEONMICROBE_PATH_MIDPROCESS(), "16S", "4_collapsed",
+saveRDS(seqtab_joined_collapsed, 
+ file.path(NEONMICROBE_DIR_MIDPROCESS(), "16S", "4_collapsed",
  "NEON_16S_seqtab_nochim_grasslands_COLLAPSED.Rds"))
+seqtab_joined_collapsed <- readRDS(file.path(NEONMICROBE_DIR_MIDPROCESS(), "16S", "4_collapsed", "NEON_16S_seqtab_nochim_grasslands_COLLAPSED.Rds"))
 ```
 
 
@@ -225,8 +227,8 @@ In this final section of the vignette, we assign taxonomy to each of the identif
 # Remove low-abundance taxa and low-quality samples
 keep_taxa <- which(colSums(seqtab_joined_collapsed) > 10)
 keep_samples <- which(rowSums(seqtab_joined_collapsed) > 1000)
-seqtab_joined_collapsed <- seqtab_joined_collapsed[,keep_taxa]
-seqtab_joined_collapsed <- seqtab_joined_collapsed[keep_samples,]
+seqtab_joined_collapsed <- seqtab_joined_collapsed[,keep_taxa,drop=FALSE]
+seqtab_joined_collapsed <- seqtab_joined_collapsed[keep_samples,,drop=FALSE]
 
 # Check size of dataset
 print(dim(seqtab_joined_collapsed))
@@ -236,7 +238,7 @@ We will be assigning taxonomy using the SILVA reference database. Download the S
 
 ```{r, eval=FALSE}
 silva.url <- "https://zenodo.org/record/3986799/files/silva_nr99_v138_train_set.fa.gz"
-download.file(silva.url, NEONMICROBE_DIR_TAXREF(), basename(silva.url))
+download.file(silva.url, file.path(NEONMICROBE_DIR_TAXREF(), basename(silva.url)))
 ```
 
 Assign taxonomy using the SILVA reference database:
@@ -253,7 +255,7 @@ Save the taxonomy table to file:
 ```{r, eval=FALSE}
 saveRDS(
  tax, 
- file.path(NEONMICROBE_DIR_OUTPUTS(), "16S", "5_tax", 
+ file.path(NEONMICROBE_DIR_MIDPROCESS(), "16S", "5_tax", 
  "NEON_16S_tax_grasslands_COLLAPSED.Rds")
 )
 ```