diff --git a/01-getting-started/getting-started.html b/01-getting-started/getting-started.html index 5c21a721..644caaac 100644 --- a/01-getting-started/getting-started.html +++ b/01-getting-started/getting-started.html @@ -1,10 +1,11 @@ - + + @@ -1343,6 +1344,7 @@ } img { max-width:100%; + height: auto; } .tabbed-pane { padding-top: 12px; @@ -1491,7 +1493,6 @@ border: none; display: inline-block; border-radius: 4px; - background-color: transparent; } .tabset-dropdown > .nav-tabs.nav-tabs-open > li { @@ -1520,12 +1521,6 @@ } } -@media print { -.toc-content { - /* see https://github.com/w3c/csswg-drafts/issues/4434 */ - float: right; -} -} .toc-content { padding-left: 30px; @@ -1619,6 +1614,7 @@
  • Differential Expression
  • Dimension Reduction - PCA
  • Dimension Reduction - UMAP
  • +
  • Ensembl Gene ID Annotation
  • @@ -1838,7 +1834,7 @@

    References

    theme: "bootstrap3", context: '.toc-content', hashGenerator: function (text) { - return text.replace(/[.\\/?&!#<>]/g, '').replace(/\s/g, '_'); + return text.replace(/[.\\/?&!#<>]/g, '').replace(/\s/g, '_').toLowerCase(); }, ignoreSelector: ".toc-ignore", scrollTo: 0 diff --git a/02-microarray/00-intro-to-microarray.html b/02-microarray/00-intro-to-microarray.html index ecc7b9fb..22c51e34 100644 --- a/02-microarray/00-intro-to-microarray.html +++ b/02-microarray/00-intro-to-microarray.html @@ -1,10 +1,11 @@ - + + @@ -1343,6 +1344,7 @@ } img { max-width:100%; + height: auto; } .tabbed-pane { padding-top: 12px; @@ -1491,7 +1493,6 @@ border: none; display: inline-block; border-radius: 4px; - background-color: transparent; } .tabset-dropdown > .nav-tabs.nav-tabs-open > li { @@ -1520,12 +1521,6 @@ } } -@media print { -.toc-content { - /* see https://github.com/w3c/csswg-drafts/issues/4434 */ - float: right; -} -} .toc-content { padding-left: 30px; @@ -1619,6 +1614,7 @@
  • Differential Expression
  • Dimension Reduction - PCA
  • Dimension Reduction - UMAP
  • +
  • Ensembl Gene ID Annotation
  • @@ -1719,7 +1715,7 @@

    CCDL for ALSF

    theme: "bootstrap3", context: '.toc-content', hashGenerator: function (text) { - return text.replace(/[.\\/?&!#<>]/g, '').replace(/\s/g, '_'); + return text.replace(/[.\\/?&!#<>]/g, '').replace(/\s/g, '_').toLowerCase(); }, ignoreSelector: ".toc-ignore", scrollTo: 0 diff --git a/02-microarray/clustering_microarray_01_heatmap.html b/02-microarray/clustering_microarray_01_heatmap.html index d9e34b5e..184e75dc 100644 --- a/02-microarray/clustering_microarray_01_heatmap.html +++ b/02-microarray/clustering_microarray_01_heatmap.html @@ -1,10 +1,11 @@ - + + @@ -1343,6 +1344,7 @@ } img { max-width:100%; + height: auto; } .tabbed-pane { padding-top: 12px; @@ -1491,7 +1493,6 @@ border: none; display: inline-block; border-radius: 4px; - background-color: transparent; } .tabset-dropdown > .nav-tabs.nav-tabs-open > li { @@ -1520,12 +1521,6 @@ } } -@media print { -.toc-content { - /* see https://github.com/w3c/csswg-drafts/issues/4434 */ - float: right; -} -} .toc-content { padding-left: 30px; @@ -1619,6 +1614,7 @@
  • Differential Expression
  • Dimension Reduction - PCA
  • Dimension Reduction - UMAP
  • +
  • Ensembl Gene ID Annotation
  • @@ -1901,7 +1897,7 @@

    4.4 Create a heatmap

    ))(25), scale = "row" # Scale values in the direction of genes (rows) ) -

    +

    We’ve created a heatmap but although our genes and samples are clustered, there is not much information that we can gather here because we did not provide the pheatmap() function with annotation labels for our samples.

    First let’s save our clustered heatmap.

    @@ -1918,8 +1914,8 @@

    4.4.1 Save heatmap as a PNG

    # Close the png file: dev.off() -
    ## png 
    -##   2
    +
    ## quartz_off_screen 
    +##                 2

    Now, let’s add some annotation bars to our heatmap.

    @@ -1963,7 +1959,7 @@

    4.5.1 Create annotated heatmap -

    +

    Now that we have annotation bars on our heatmap, we have a better idea of the cell line and treatment groups that appear to cluster together. More specifically, we can see that the samples seem to cluster by their cell lines of origin, but not necessarily as much by whether or not they received the PLX4302 treatment.

    Let’s save our annotated heatmap.

    @@ -1981,8 +1977,8 @@

    4.5.2 Save annotated heatmap as a # Close the png file: dev.off() -
    ## png 
    -##   2
    +
    ## quartz_off_screen 
    +##                 2
    @@ -1998,40 +1994,35 @@

    6 Print session info

    At the end of every analysis, before saving your notebook, we recommend printing out your session info. This helps make your code more reproducible by recording what versions of softwares and packages you used to run this.

    # Print session info
     sessionInfo()
    -
    ## R version 4.0.2 (2020-06-22)
    -## Platform: x86_64-pc-linux-gnu (64-bit)
    -## Running under: Ubuntu 20.04 LTS
    +
    ## R version 3.6.1 (2019-07-05)
    +## Platform: x86_64-apple-darwin15.6.0 (64-bit)
    +## Running under: macOS Mojave 10.14.6
     ## 
     ## Matrix products: default
    -## BLAS/LAPACK: /usr/lib/x86_64-linux-gnu/openblas-openmp/libopenblasp-r0.3.8.so
    +## BLAS:   /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
    +## LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
     ## 
     ## locale:
    -##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
    -##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
    -##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=C             
    -##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
    -##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
    -## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
    +## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
     ## 
     ## attached base packages:
     ## [1] stats     graphics  grDevices utils     datasets  methods   base     
     ## 
     ## other attached packages:
    -## [1] magrittr_1.5    pheatmap_1.0.12 optparse_1.6.6 
    +## [1] magrittr_1.5    pheatmap_1.0.12 optparse_1.6.2 
     ## 
     ## loaded via a namespace (and not attached):
    -##  [1] Rcpp_1.0.5         pillar_1.4.6       compiler_4.0.2     RColorBrewer_1.1-2
    -##  [5] R.methodsS3_1.8.0  R.utils_2.9.2      tools_4.0.2        digest_0.6.25     
    -##  [9] gtable_0.3.0       evaluate_0.14      lifecycle_0.2.0    tibble_3.0.3      
    -## [13] R.cache_0.14.0     pkgconfig_2.0.3    rlang_0.4.7        cli_2.0.2         
    -## [17] rstudioapi_0.11    yaml_2.2.1         xfun_0.16          dplyr_1.0.0       
    -## [21] styler_1.3.2       stringr_1.4.0      knitr_1.29         generics_0.0.2    
    -## [25] vctrs_0.3.2        hms_0.5.3          tidyselect_1.1.0   grid_4.0.2        
    -## [29] getopt_1.20.3      glue_1.4.1         R6_2.4.1           fansi_0.4.1       
    -## [33] rmarkdown_2.3      farver_2.0.3       purrr_0.3.4        readr_1.3.1       
    -## [37] rematch2_2.1.2     scales_1.1.1       backports_1.1.9    ellipsis_0.3.1    
    -## [41] htmltools_0.5.0    assertthat_0.2.1   colorspace_1.4-1   utf8_1.1.4        
    -## [45] stringi_1.4.6      munsell_0.5.0      crayon_1.3.4       R.oo_1.23.0
    +## [1] Rcpp_1.0.4.6 pillar_1.4.4 compiler_3.6.1 RColorBrewer_1.1-2 +## [5] tools_3.6.1 digest_0.6.25 evaluate_0.14 lifecycle_0.2.0 +## [9] tibble_3.0.1 gtable_0.3.0 pkgconfig_2.0.3 rlang_0.4.6 +## [13] cli_2.0.2 rstudioapi_0.11 yaml_2.2.1 xfun_0.14 +## [17] dplyr_0.8.3 withr_2.2.0 styler_1.1.1 stringr_1.4.0 +## [21] knitr_1.28 vctrs_0.3.1 hms_0.5.3 tidyselect_0.2.5 +## [25] grid_3.6.1 getopt_1.20.3 glue_1.4.1 R6_2.4.1 +## [29] fansi_0.4.1 rmarkdown_1.14 readr_1.3.1 purrr_0.3.4 +## [33] rematch2_2.1.0 backports_1.1.7 scales_1.0.0 ellipsis_0.3.1 +## [37] htmltools_0.3.6 assertthat_0.2.1 colorspace_1.4-1 utf8_1.1.4 +## [41] stringi_1.4.6 munsell_0.5.0 crayon_1.3.4

    Gu Z., R. Eils, and M. Schlesner, 2016 Complex heatmaps reveal patterns and correlations in multidimensional genomic data. Bioinformatics.

    @@ -2095,7 +2086,7 @@

    6 Print session info

    theme: "bootstrap3", context: '.toc-content', hashGenerator: function (text) { - return text.replace(/[.\\/?&!#<>]/g, '').replace(/\s/g, '_'); + return text.replace(/[.\\/?&!#<>]/g, '').replace(/\s/g, '_').toLowerCase(); }, ignoreSelector: ".toc-ignore", scrollTo: 0 diff --git a/02-microarray/dimension-reduction_microarray_01_pca.html b/02-microarray/dimension-reduction_microarray_01_pca.html index de92dfa0..912e8f66 100644 --- a/02-microarray/dimension-reduction_microarray_01_pca.html +++ b/02-microarray/dimension-reduction_microarray_01_pca.html @@ -1,10 +1,11 @@ - + + @@ -1343,6 +1344,7 @@ } img { max-width:100%; + height: auto; } .tabbed-pane { padding-top: 12px; @@ -1491,7 +1493,6 @@ border: none; display: inline-block; border-radius: 4px; - background-color: transparent; } .tabset-dropdown > .nav-tabs.nav-tabs-open > li { @@ -1520,12 +1521,6 @@ } } -@media print { -.toc-content { - /* see https://github.com/w3c/csswg-drafts/issues/4434 */ - float: right; -} -} .toc-content { padding-left: 30px; @@ -1619,6 +1614,7 @@
  • Differential Expression
  • Dimension Reduction - PCA
  • Dimension Reduction - UMAP
  • +
  • Ensembl Gene ID Annotation
  • @@ -2179,12 +2175,12 @@

    4.3 Perform Principal Components ## GSM917288 -0.3817511 -0.4095233 -0.6309443 0.5985548 -0.4305740 0.3042307 ## GSM917230 -1.0901163 -2.1937892 -1.9281503 -1.1493521 -0.4783177 0.6360478 ## PC284 PC285 -## GSM917111 9.52572473 -2.012539e-14 -## GSM917250 1.33152530 -1.168683e-14 -## GSM917281 -0.25177843 -1.847134e-14 -## GSM917062 1.11478570 2.137179e-15 -## GSM917288 -0.07702027 -1.411024e-14 -## GSM917230 -1.67762146 -2.067790e-14 +## GSM917111 9.52572473 -4.615319e-14 +## GSM917250 1.33152530 -6.445799e-15 +## GSM917281 -0.25177843 1.736458e-15 +## GSM917062 1.11478570 1.382228e-14 +## GSM917288 -0.07702027 -3.591745e-14 +## GSM917230 -1.67762146 -4.780139e-15

    We can see that we now have 285 principal component values for each of our samples.

    @@ -2236,7 +2232,7 @@

    4.6 Plot PCA Results

    # Print out plot here pca_plot -

    +

    Looks like Group 4 and SHH groups somewhat cluster with each other but Group 3 seems to be less distinct as there are some samples clustering with Group 4 as well.

    We can add another label to our plot to get more information about our dataset. Let’s also label the data points based on the histological subtype that each sample belongs to.

    # Make a scatterplot with ggplot2
    @@ -2254,7 +2250,7 @@ 

    4.6 Plot PCA Results

    # Print out plot here pca_plot
    -

    +

    Adding the histological subtype label to our plot made our plot more informative, but the diffuse Group 3 data doesn’t appear to be related to a histology subtype. We could test out other variables as annotation labels to get a further understanding of the cluster behavior of each subgroup.

    @@ -2285,40 +2281,35 @@

    6 Session info

    At the end of every analysis, before saving your notebook, we recommend printing out your session info. This helps make your code more reproducible by recording what versions of softwares and packages you used to run this.

    # Print session info
     sessionInfo()
    -
    ## R version 4.0.2 (2020-06-22)
    -## Platform: x86_64-pc-linux-gnu (64-bit)
    -## Running under: Ubuntu 20.04 LTS
    +
    ## R version 3.6.1 (2019-07-05)
    +## Platform: x86_64-apple-darwin15.6.0 (64-bit)
    +## Running under: macOS Mojave 10.14.6
     ## 
     ## Matrix products: default
    -## BLAS/LAPACK: /usr/lib/x86_64-linux-gnu/openblas-openmp/libopenblasp-r0.3.8.so
    +## BLAS:   /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
    +## LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
     ## 
     ## locale:
    -##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
    -##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
    -##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=C             
    -##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
    -##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
    -## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
    +## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
     ## 
     ## attached base packages:
     ## [1] stats     graphics  grDevices utils     datasets  methods   base     
     ## 
     ## other attached packages:
    -## [1] magrittr_1.5   ggplot2_3.3.2  optparse_1.6.6
    +## [1] magrittr_1.5   ggplot2_3.2.1  optparse_1.6.2
     ## 
     ## loaded via a namespace (and not attached):
    -##  [1] Rcpp_1.0.5        pillar_1.4.6      compiler_4.0.2    R.methodsS3_1.8.0
    -##  [5] R.utils_2.9.2     tools_4.0.2       digest_0.6.25     evaluate_0.14    
    -##  [9] lifecycle_0.2.0   tibble_3.0.3      gtable_0.3.0      R.cache_0.14.0   
    -## [13] pkgconfig_2.0.3   rlang_0.4.7       cli_2.0.2         rstudioapi_0.11  
    -## [17] yaml_2.2.1        xfun_0.16         withr_2.2.0       dplyr_1.0.0      
    -## [21] styler_1.3.2      stringr_1.4.0     knitr_1.29        generics_0.0.2   
    -## [25] vctrs_0.3.2       hms_0.5.3         tidyselect_1.1.0  grid_4.0.2       
    -## [29] getopt_1.20.3     glue_1.4.1        R6_2.4.1          fansi_0.4.1      
    -## [33] rmarkdown_2.3     farver_2.0.3      purrr_0.3.4       readr_1.3.1      
    -## [37] rematch2_2.1.2    scales_1.1.1      backports_1.1.9   ellipsis_0.3.1   
    -## [41] htmltools_0.5.0   assertthat_0.2.1  colorspace_1.4-1  labeling_0.3     
    -## [45] stringi_1.4.6     munsell_0.5.0     crayon_1.3.4      R.oo_1.23.0
    +## [1] Rcpp_1.0.4.6 pillar_1.4.4 compiler_3.6.1 tools_3.6.1 +## [5] digest_0.6.25 evaluate_0.14 lifecycle_0.2.0 tibble_3.0.1 +## [9] gtable_0.3.0 pkgconfig_2.0.3 rlang_0.4.6 cli_2.0.2 +## [13] rstudioapi_0.11 yaml_2.2.1 xfun_0.14 dplyr_0.8.3 +## [17] withr_2.2.0 styler_1.1.1 stringr_1.4.0 knitr_1.28 +## [21] vctrs_0.3.1 hms_0.5.3 tidyselect_0.2.5 grid_3.6.1 +## [25] getopt_1.20.3 glue_1.4.1 R6_2.4.1 fansi_0.4.1 +## [29] rmarkdown_1.14 readr_1.3.1 purrr_0.3.4 rematch2_2.1.0 +## [33] backports_1.1.7 scales_1.0.0 ellipsis_0.3.1 htmltools_0.3.6 +## [37] assertthat_0.2.1 colorspace_1.4-1 labeling_0.3 stringi_1.4.6 +## [41] lazyeval_0.2.2 munsell_0.5.0 crayon_1.3.4

    Brems M., 2017 A one-stop shop for principal component analysis

    @@ -2391,7 +2382,7 @@

    6 Session info

    theme: "bootstrap3", context: '.toc-content', hashGenerator: function (text) { - return text.replace(/[.\\/?&!#<>]/g, '').replace(/\s/g, '_'); + return text.replace(/[.\\/?&!#<>]/g, '').replace(/\s/g, '_').toLowerCase(); }, ignoreSelector: ".toc-ignore", scrollTo: 0 diff --git a/02-microarray/dimension-reduction_microarray_02_umap.html b/02-microarray/dimension-reduction_microarray_02_umap.html index 55e6261c..ceae7f5f 100644 --- a/02-microarray/dimension-reduction_microarray_02_umap.html +++ b/02-microarray/dimension-reduction_microarray_02_umap.html @@ -1,10 +1,11 @@ - + + @@ -1343,6 +1344,7 @@ } img { max-width:100%; + height: auto; } .tabbed-pane { padding-top: 12px; @@ -1491,7 +1493,6 @@ border: none; display: inline-block; border-radius: 4px; - background-color: transparent; } .tabset-dropdown > .nav-tabs.nav-tabs-open > li { @@ -1520,12 +1521,6 @@ } } -@media print { -.toc-content { - /* see https://github.com/w3c/csswg-drafts/issues/4434 */ - float: right; -} -} .toc-content { padding-left: 30px; @@ -1619,6 +1614,7 @@
  • Differential Expression
  • Dimension Reduction - PCA
  • Dimension Reduction - UMAP
  • +
  • Ensembl Gene ID Annotation
  • @@ -1868,13 +1864,13 @@

    4.3 Perform Uniform Manifold Appr )

    Let’s take a preview at the data frame we created in the chunk above.

    head(umap_df)
    -
    ##   refinebio_accession_code         X1         X2       histology subgroup
    -## 1                GSM917111 3.34604756  0.7845626         Classic  Group 4
    -## 2                GSM917250 2.33114827  4.0503256         Classic  Group 4
    -## 3                GSM917281 2.52365294  2.4093729         Classic  Group 4
    -## 4                GSM917062 0.02614889  1.3860845         Classic  Group 3
    -## 5                GSM917288 4.15696395 -1.0232268         Classic  Group 4
    -## 6                GSM917230 1.20599505  1.2974269 Medulloblastoma  Group 4
    +
    ##   refinebio_accession_code           X1       X2       histology subgroup
    +## 1                GSM917111  0.980478303 3.498827         Classic  Group 4
    +## 2                GSM917250  0.007078067 0.164898         Classic  Group 4
    +## 3                GSM917281  0.871498571 1.661061         Classic  Group 4
    +## 4                GSM917062 -2.389040189 2.977578         Classic  Group 3
    +## 5                GSM917288  2.706804399 4.452762         Classic  Group 4
    +## 6                GSM917230 -0.750989064 2.847606 Medulloblastoma  Group 4

    Now, let’s plot the UMAP scores.

    # Make a scatterplot using `ggplot2` functionality
     umap_plot <- ggplot(
    @@ -1889,7 +1885,7 @@ 

    4.3 Perform Uniform Manifold Appr # Print out plot here umap_plot

    -

    +

    It’s hard to interpret our UMAP results without some metadata labels on our plot.

    Let’s label the data points based on their genotype subgroup since this is central to the subgroup specific based hypothesis in the original paper (Northcott et al. 2012).

    # Make a scatterplot with ggplot2
    @@ -1906,7 +1902,7 @@ 

    4.3 Perform Uniform Manifold Appr # Print out plot here umap_plot

    -

    +

    It looks like Group 4 and SHH groups somewhat cluster with each other but Group 3 seems to also be clustering with Group 4.

    We can add another label to our plot to potentially gain insight on the clustering behavior of our data. Let’s also label the data points based on the histological subtype that each sample belongs to.

    # Make a scatterplot with ggplot2
    @@ -1924,7 +1920,7 @@ 

    4.3 Perform Uniform Manifold Appr # Print out plot here umap_plot

    -

    +

    Our histological subtype groups don’t appear to be clustering in a discernible pattern. We could test out other variables as annotation labels to get a further understanding of the cluster behavior of each subgroup.

    @@ -1953,42 +1949,37 @@

    6 Session info

    At the end of every analysis, before saving your notebook, we recommend printing out your session info. This helps make your code more reproducible by recording what versions of softwares and packages you used to run this.

    # Print session info
     sessionInfo()
    -
    ## R version 4.0.2 (2020-06-22)
    -## Platform: x86_64-pc-linux-gnu (64-bit)
    -## Running under: Ubuntu 20.04 LTS
    +
    ## R version 3.6.1 (2019-07-05)
    +## Platform: x86_64-apple-darwin15.6.0 (64-bit)
    +## Running under: macOS Mojave 10.14.6
     ## 
     ## Matrix products: default
    -## BLAS/LAPACK: /usr/lib/x86_64-linux-gnu/openblas-openmp/libopenblasp-r0.3.8.so
    +## BLAS:   /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
    +## LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
     ## 
     ## locale:
    -##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
    -##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
    -##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=C             
    -##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
    -##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
    -## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
    +## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
     ## 
     ## attached base packages:
     ## [1] stats     graphics  grDevices utils     datasets  methods   base     
     ## 
     ## other attached packages:
    -## [1] magrittr_1.5   ggplot2_3.3.2  umap_0.2.6.0   optparse_1.6.6
    +## [1] magrittr_1.5   ggplot2_3.2.1  umap_0.2.3     optparse_1.6.2
     ## 
     ## loaded via a namespace (and not attached):
    -##  [1] reticulate_1.16   styler_1.3.2      tidyselect_1.1.0  xfun_0.16        
    -##  [5] rematch2_2.1.2    purrr_0.3.4       lattice_0.20-41   colorspace_1.4-1 
    -##  [9] vctrs_0.3.2       generics_0.0.2    htmltools_0.5.0   getopt_1.20.3    
    -## [13] yaml_2.2.1        rlang_0.4.7       R.oo_1.23.0       pillar_1.4.6     
    -## [17] glue_1.4.1        withr_2.2.0       R.utils_2.9.2     R.cache_0.14.0   
    -## [21] lifecycle_0.2.0   stringr_1.4.0     munsell_0.5.0     gtable_0.3.0     
    -## [25] R.methodsS3_1.8.0 evaluate_0.14     labeling_0.3      knitr_1.29       
    -## [29] fansi_0.4.1       Rcpp_1.0.5        readr_1.3.1       openssl_1.4.2    
    -## [33] backports_1.1.9   scales_1.1.1      jsonlite_1.7.0    farver_2.0.3     
    -## [37] RSpectra_0.16-0   hms_0.5.3         askpass_1.1       digest_0.6.25    
    -## [41] stringi_1.4.6     dplyr_1.0.0       grid_4.0.2        cli_2.0.2        
    -## [45] tools_4.0.2       tibble_3.0.3      crayon_1.3.4      pkgconfig_2.0.3  
    -## [49] ellipsis_0.3.1    Matrix_1.2-18     assertthat_0.2.1  rmarkdown_2.3    
    -## [53] rstudioapi_0.11   R6_2.4.1          compiler_4.0.2
    +## [1] Rcpp_1.0.4.6 RSpectra_0.15-0 pillar_1.4.4 compiler_3.6.1 +## [5] tools_3.6.1 digest_0.6.25 gtable_0.3.0 jsonlite_1.7.0 +## [9] lattice_0.20-38 evaluate_0.14 lifecycle_0.2.0 tibble_3.0.1 +## [13] pkgconfig_2.0.3 rlang_0.4.6 Matrix_1.2-17 cli_2.0.2 +## [17] rstudioapi_0.11 yaml_2.2.1 xfun_0.14 dplyr_0.8.3 +## [21] withr_2.2.0 styler_1.1.1 stringr_1.4.0 knitr_1.28 +## [25] vctrs_0.3.1 askpass_1.1 hms_0.5.3 tidyselect_0.2.5 +## [29] grid_3.6.1 getopt_1.20.3 reticulate_1.13 glue_1.4.1 +## [33] R6_2.4.1 fansi_0.4.1 rmarkdown_1.14 readr_1.3.1 +## [37] purrr_0.3.4 rematch2_2.1.0 scales_1.0.0 backports_1.1.7 +## [41] ellipsis_0.3.1 htmltools_0.3.6 assertthat_0.2.1 colorspace_1.4-1 +## [45] labeling_0.3 stringi_1.4.6 lazyeval_0.2.2 munsell_0.5.0 +## [49] openssl_1.4.1 crayon_1.3.4

    Konopka T., 2020 Uniform manifold approximation and projection.

    @@ -2061,7 +2052,7 @@

    6 Session info

    theme: "bootstrap3", context: '.toc-content', hashGenerator: function (text) { - return text.replace(/[.\\/?&!#<>]/g, '').replace(/\s/g, '_'); + return text.replace(/[.\\/?&!#<>]/g, '').replace(/\s/g, '_').toLowerCase(); }, ignoreSelector: ".toc-ignore", scrollTo: 0 diff --git a/02-microarray/gene-id-annotation_microarray_01_ensembl.Rmd b/02-microarray/gene-id-annotation_microarray_01_ensembl.Rmd new file mode 100644 index 00000000..0613c248 --- /dev/null +++ b/02-microarray/gene-id-annotation_microarray_01_ensembl.Rmd @@ -0,0 +1,352 @@ +--- +title: "Obtaining Annotation for Ensembl IDs - Microarray" +author: "CCDL for ALSF" +date: "`r format(Sys.time(), '%B %Y')`" +output: + html_notebook: + toc: true + toc_float: true + number_sections: true +--- + +# Purpose of this analysis + +The purpose of this notebook is to provide an example of mapping gene IDs for microarray data obtained from refine.bio using `AnnotationDbi` packages [@Carlson2020-package]. + +⬇️ [**Jump to the analysis code**](#analysis) ⬇️ + +# How to run this example + +For general information about our tutorials and the basic software packages you will need, please see our ['Getting Started' section](https://alexslemonade.github.io/refinebio-examples/01-getting-started/getting-started.html#how-this-tutorial-is-structured). +We recommend taking a look at our [Resources for Learning R](https://alexslemonade.github.io/refinebio-examples/01-getting-started/getting-started.html#resources-for-learning-r) if you have not written code in R before. + +## Obtain the `.Rmd` file + +To run this example yourself, [download the `.Rmd` for this analysis by clicking this link](https://alexslemonade.github.io/refinebio-examples/02-microarray/gene-id-convert_microarray_01_ensembl.Rmd). + +You can open this `.Rmd` file in RStudio and follow the rest of these steps from there. (See our [section about getting started with R notebooks](https://alexslemonade.github.io/refinebio-examples/01-getting-started/getting-started.html#how-to-get-and-use-rmds) if you are unfamiliar with `.Rmd` files.) +Clicking this link will most likely send this to your downloads folder on your computer. +Move this `.Rmd` file to where you would like this example and its files to be stored. + +## Set up your analysis folders + +Good file organization is helpful for keeping your data analysis project on track! +We have set up some code that will automatically set up a folder structure for you. +Run this next chunk to set up your folders! + +If you have trouble running this chunk, see our [introduction to using `.Rmd`s](https://alexslemonade.github.io/refinebio-examples/01-getting-started/getting-started.html#how-to-get-and-use-rmds) for more resources and explanations. + +```{r} +# Create the data folder if it doesn't exist +if (!dir.exists("data")) { + dir.create("data") +} + +# Define the file path to the plots directory +plots_dir <- "plots" # Can replace with path to desired output plots directory + +# Create the plots folder if it doesn't exist +if (!dir.exists(plots_dir)) { + dir.create(plots_dir) +} + +# Define the file path to the results directory +results_dir <- "results" # Can replace with path to desired output results directory + +# Create the results folder if it doesn't exist +if (!dir.exists(results_dir)) { + dir.create(results_dir) +} +``` + +In the same place you put this `.Rmd` file, you should now have three new empty folders called `data`, `plots`, and `results`! + +## Obtain the dataset from refine.bio + +For general information about downloading data for these examples, see our ['Getting Started' section](https://alexslemonade.github.io/refinebio-examples/01-getting-started/getting-started.html#how-to-get-the-data). + +Go to this [dataset's page on refine.bio](https://www.refine.bio/experiments/GSE13490/cancer-stem-cells-are-enriched-in-the-side-population-cells-in-a-mouse-model-of-glioma). + +Click the "Download Now" button on the right side of this screen. + + + +Fill out the pop up window with your email and our Terms and Conditions: + + + +It may take a few minutes for the dataset to process. +You will get an email when it is ready. + +## About the dataset we are using for this example + +For this example analysis, we will use this [mouse glioma stem cells dataset](https://www.refine.bio/experiments/GSE13490/cancer-stem-cells-are-enriched-in-the-side-population-cells-in-a-mouse-model-of-glioma). + +This dataset has 15 microarray mouse glioma model samples. +The samples were obtained from parental biological replicates and from resistant sub-line biological replicates that were transplanted into recipient mice. + +## Place the dataset in your new `data/` folder + +refine.bio will send you a download button in the email when it is ready. +Follow the prompt to download a zip file that has a name with a series of letters and numbers and ends in `.zip`. +Double clicking should unzip this for you and create a folder of the same name. + + + +For more details on the contents of this folder see [these docs on refine.bio](http://docs.refine.bio/en/latest/main_text.html#downloadable-files). + +The `` folder has the data and metadata TSV files you will need for this example analysis. +Experiment accession IDs usually look something like `GSE1235` or `SRP12345`. + +Copy and paste the `GSE13490` folder into your newly created `data/` folder. + +## Check out our file structure! + +Your new analysis folder should contain: + +- The example analysis `.Rmd` you downloaded +- A folder called "data" which contains: + - The `GSE13490` folder which contains: + - The gene expression + - The metadata TSV +- A folder for `plots` (currently empty) +- A folder for `results` (currently empty) + +Your example analysis folder should now look something like this (except with respective experiment accession ID and analysis notebook name you are using): + + + +In order for our example here to run without a hitch, we need these files to be in these locations so we've constructed a test to check before we get started with the analysis. +Run this chunk to double check that your files are in the right place. + +```{r} +# Define the file path to the data directory +data_dir <- file.path("data", "GSE13490") + +# Check if the gene expression matrix file is in the data directory stored as `data_dir` +file.exists(file.path(data_dir, "GSE13490.tsv")) + +# Check if the metadata file is in the data directory stored as `data_dir` +file.exists(file.path(data_dir, "metadata_GSE13490.tsv")) +``` + +If the chunk above printed out `FALSE` to either of those tests, you won't be able to run this analysis _as is_ until those files are in the appropriate place. + +If the concept of a "file path" is unfamiliar to you; we recommend taking a look at our [section about file paths](https://alexslemonade.github.io/refinebio-examples/01-getting-started/getting-started.html#an-important-note-about-file-paths-and-Rmds). + +# Using a different refine.bio dataset with this analysis? + +If you'd like to adapt an example analysis to use a different dataset from [refine.bio](https://www.refine.bio/), we recommend placing the files in the `data/` directory you created and changing the filenames and paths in the notebook to match these files (we've put comments to signify where you would need to change the code). +We suggest saving plots and results to `plots/` and `results/` directories, respectively, as these are automatically created by the notebook. +From here you can customize this analysis example to fit your own scientific questions and preferences. + +refine.bio data comes with gene level data with Ensembl IDs. +Although this example notebook uses Ensembl IDs from Mouse, (Mus musculus), to obtain gene symbols this notebook can be easily converted for use with different species or annotation types eg protein IDs, gene ontology, accession numbers. + +For different species, wherever the abbreviation `org.Mm.eg.db` or `Mm` is written, it must be replaced with the respective species abbreviation eg for Homo sapiens `org.Hs.eg.db` or `Hs` would be used. +In the case of our [RNA-seq gene identifier annotation example notebook](https://github.com/AlexsLemonade/refinebio-examples/blob/master/03-rnaseq/gene-id-annotation_rnaseq_01_ensembl.html), a Zebrafish (Danio rerio) dataset is used, meaning `org.Dr.eg.db` or `Dr` would also need to be used there. +A full list of the annotation R packages from Bioconductor is at this [link](https://bioconductor.org/packages/release/BiocViews.html#___AnnotationData) [@annotation-packages]. + +*** + +   + + +# Obtaining Annotation for Ensembl IDs - Microarray + +Ensembl IDs can be used to obtain various different annotations at the gene/transcript level. +Let's get ready to use the Ensembl IDs from our mouse dataset to obtain the associated gene symbols. + +## Install libraries + +See our Getting Started page with [instructions for package installation](https://alexslemonade.github.io/refinebio-examples/01-getting-started/getting-started.html#what-you-need-to-install) for a list of the other software you will need, as well as more tips and resources. + +In this analysis, we will be using the `org.Mm.eg.db` R package [@Carlson2019]. +[Other species can be used](#using-a-different-refinebio-dataset-with-this-analysis). + +```{r} +# Install the mouse package +if (!("org.Mm.eg.db" %in% installed.packages())) { + # Install this package if it isn't installed yet + BiocManager::install("org.Mm.eg.db", update = FALSE) +} +``` + +Attach the packages we need for this analysis. + +```{r} +# Attach the library +library(org.Mm.eg.db) + +# We will need this so we can use the pipe: %>% +library(magrittr) +``` + +## Import and set up data + +Data downloaded from refine.bio include a metadata tab separated values (TSV) file and a data TSV file. +This chunk of code will read the both TSV files and add them as data frames to your environment. + +```{r} +# Read in metadata TSV file +metadata <- readr::read_tsv(file.path( + data_dir, # Replace with path to your metadata file + "metadata_GSE13490.tsv" # Replace with the name of your metadata file +)) + +# Read in data TSV file +df <- readr::read_tsv(file.path( + data_dir, # Replace with path to your data file + "GSE13490.tsv" # Replace with the name of your data file +)) %>% + # Tuck away the Gene ID column as rownames + tibble::column_to_rownames("Gene") +``` + +Let's ensure that the metadata and data are in the same sample order. + +```{r} +# Make the data in the order of the metadata +df <- df %>% + dplyr::select(metadata$geo_accession) + +# Check if this is in the same order +all.equal(colnames(df), metadata$geo_accession) + +# Bring back the "Gene" column in preparation for mapping +df <- df %>% + tibble::rownames_to_column("Gene") +``` + + +## Map Ensembl IDs to gene symbols + +The `mapIds()` function has a `multiVals` argument which denotes what to do when there are multiple mapped values for a single gene identifier. +The default behavior is to return just the first mapped value. +It is good to keep in mind that various downstream analyses may benefit from varied strategies at this step. +Use `?mapIds` to see more options or strategies. + +In the next chunk, we will run the `mapIds()` function and supply the `multiVals` argument with the `"list"` option in order to get a large list with all the mapped values found for each gene identifier. + +```{r} +# Map ensembl IDs to their associated gene symbols +mapped_list <- mapIds( + org.Mm.eg.db, # Replace with annotation package for the organism relevant to your data + keys = df$Gene, + column = "SYMBOL", # Replace with the type of gene identifiers you would like to map to + keytype = "ENSEMBL", # Replace with the type of gene identifiers in your data + multiVals = "list" +) +``` + +## Explore gene ID conversion + +Now, let's take a look at our `mapped_list` to see how the mapping went. + +```{r} +# Let's use the `head()` function to take a preview at our mapped list +head(mapped_list) +``` + +It looks like we have gene symbols that were successfully mapped to the Ensembl IDs we provided. +However, the data is now in a `list` object, making it a little more difficult to explore. +We are going to turn our list object into a data frame object in the next chunk. + +```{r} +# Let's make our object a bit more manageable for exploration by turning it into a data frame using the `reshape2::melt()` function +mapped_df <- mapped_list %>% + reshape2::melt(value.name = "Symbol") %>% + dplyr::rename("Ensembl" = "L1") # Here we are renaming the column holding the Ensembl IDs as good practice +``` + +Now let's take a peek at our data frame. + +```{r} +head(mapped_df) +``` + +We can see that our data frame has a new column `Symbol`. +Let's get a summary of the gene symbols returned in the `Symbol` column of our mapped data frame. + +```{r} +# We can use the `summary()` function to get a better idea of the distribution of symbols in the `Symbol` column +summary(mapped_df$Symbol) +``` + +There are 998 NA's in our data frame, which means that 998 out of the 17918 Ensembl IDs did not map to gene symbols. +998 out of 17918 is not too bad a rate, in our opinion, but note that different gene identifier types will have different mapping rates and that is to be expected. +Regardless, it is always good to be aware of how many genes you are potentially "losing" if you rely on this new gene identifier you've mapped to for downstream analyses. + +However, if you have almost all NA's it is possible that the function was executed incorrectly or you may want to consider using a different gene identifier, if possible. + +Now let's check to see if we have any genes that were mapped to multiple symbols. + +```{r} +multi_mapped <- mapped_df %>% + # Let's group by the Ensembl IDs in the `Ensembl` column + dplyr::group_by(Ensembl) %>% + # Create a new variable containing the number of symbols mapped to each ID + dplyr::mutate(gene_symbol_count = dplyr::n()) %>% + # Arrange by the genes with the highest number of symbols mapped + dplyr::arrange(desc(gene_symbol_count)) %>% + # Filter to include only the rows with multi mappings + dplyr::filter(gene_symbol_count > 1) + +# Let's look at the first 6 rows of our `multi_mapped` object +head(multi_mapped) +``` + +Looks like we have some cases where 3 gene symbols mapped to a single Ensembl ID. +We have a total of 130 out of 17984 Ensembl IDs with multiple mappings to gene symbols. +If we are not too worried about the 130 IDs with multiple mappings, we can filter them out for the purpose of having 1:1 mappings for our downstream analysis. + +## Map Ensembl IDs to gene symbols -- filtering out multi mappings + +The next code chunk we will rerun the `mapIds()` function, this time supplying the `"filter"` option to the `multiVals` argument. +This will remove all instances of multiple mappings and return a list of only the gene identifiers and symbols that had 1:1 mapping. +Use `?mapIds` to see more options or strategies. + +```{r} +# Map ensembl IDs to their associated gene symbols +filtered_mapped_df <- data.frame( + "Symbol" = mapIds( + org.Mm.eg.db, # Replace with annotation package for the organism relevant to your data + keys = df$Gene, + column = "SYMBOL", # Replace with the type of gene identifiers you would like to map to + keytype = "ENSEMBL", # Replace with the type of gene identifiers in your data + multiVals = "filter" # This will drop any `Gene`s that have multiple matches + ) +) %>% + # Make an `Ensembl` column to store the rownames + tibble::rownames_to_column("Ensembl") %>% + # Join the remaining data from `df` using the Ensembl IDs + dplyr::inner_join(df, by = c("Ensembl" = "Gene")) +``` + +Now, let's write our filtered and mapped results to file! + +## Write mapped results to file + +```{r Write to file} +# Write mapped and annotated data frame to output file +readr::write_tsv(filtered_mapped_df, file.path( + results_dir, + "GSE13490_Gene_Symbols.tsv" # Replace with a relevant output file name +)) +``` + + +# Resources for further learning + +- Marc Carlson has prepared a nice [Introduction to Bioconductor Annotation Packages](https://bioconductor.org/packages/release/bioc/vignettes/AnnotationDbi/inst/doc/IntroToAnnotationPackages.pdf) [@Carlson2020-vignette] +- See our [RNA-seq gene ID conversion notebook](https://github.com/AlexsLemonade/refinebio-examples/blob/master/03-rnaseq/gene-id-annotation_rnaseq_01_ensembl.html) as another applicable example, since the steps for this workflow do not change with technology [@gene-id-annotation-rna-seq]. + +# Session info + +At the end of every analysis, before saving your notebook, we recommend printing out your session info. +This helps make your code more reproducible by recording what versions of softwares and packages you used to run this. + +```{r} +# Print session info +sessionInfo() +``` diff --git a/02-microarray/gene-id-annotation_microarray_01_ensembl.html b/02-microarray/gene-id-annotation_microarray_01_ensembl.html new file mode 100644 index 00000000..ecc1dfbe --- /dev/null +++ b/02-microarray/gene-id-annotation_microarray_01_ensembl.html @@ -0,0 +1,2169 @@ + + + + + + + + + + + + + + + +Obtaining Annotation for Ensembl IDs - Microarray + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + +
    +
    +
    +
    +
    + +
    + + + + + + + + + +
    +

    1 Purpose of this analysis

    +

    The purpose of this notebook is to provide an example of mapping gene IDs for microarray data obtained from refine.bio using AnnotationDbi packages (Carlson 2020a).

    +

    ⬇️ Jump to the analysis code ⬇️

    +
    +
    +

    2 How to run this example

    +

    For general information about our tutorials and the basic software packages you will need, please see our ‘Getting Started’ section. We recommend taking a look at our Resources for Learning R if you have not written code in R before.

    +
    +

    2.1 Obtain the .Rmd file

    +

    To run this example yourself, download the .Rmd for this analysis by clicking this link.

    +

    You can open this .Rmd file in RStudio and follow the rest of these steps from there. (See our section about getting started with R notebooks if you are unfamiliar with .Rmd files.) Clicking this link will most likely send this to your downloads folder on your computer. Move this .Rmd file to where you would like this example and its files to be stored.

    +
    +
    +

    2.2 Set up your analysis folders

    +

    Good file organization is helpful for keeping your data analysis project on track! We have set up some code that will automatically set up a folder structure for you. Run this next chunk to set up your folders!

    +

    If you have trouble running this chunk, see our introduction to using .Rmds for more resources and explanations.

    +
    # Create the data folder if it doesn't exist
    +if (!dir.exists("data")) {
    +  dir.create("data")
    +}
    +
    +# Define the file path to the plots directory
    +plots_dir <- "plots" # Can replace with path to desired output plots directory
    +
    +# Create the plots folder if it doesn't exist
    +if (!dir.exists(plots_dir)) {
    +  dir.create(plots_dir)
    +}
    +
    +# Define the file path to the results directory
    +results_dir <- "results" # Can replace with path to desired output results directory
    +
    +# Create the results folder if it doesn't exist
    +if (!dir.exists(results_dir)) {
    +  dir.create(results_dir)
    +}
    +

    In the same place you put this .Rmd file, you should now have three new empty folders called data, plots, and results!

    +
    +
    +

    2.3 Obtain the dataset from refine.bio

    +

    For general information about downloading data for these examples, see our ‘Getting Started’ section.

    +

    Go to this dataset’s page on refine.bio.

    +

    Click the “Download Now” button on the right side of this screen.

    +

    +

    Fill out the pop up window with your email and our Terms and Conditions:

    +

    +

    It may take a few minutes for the dataset to process. You will get an email when it is ready.

    +
    +
    +

    2.4 About the dataset we are using for this example

    +

    For this example analysis, we will use this mouse glioma stem cells dataset.

    +

    This dataset has 15 microarray mouse glioma model samples. The samples were obtained from parental biological replicates and from resistant sub-line biological replicates that were transplanted into recipient mice.

    +
    +
    +

    2.5 Place the dataset in your new data/ folder

    +

    refine.bio will send you a download button in the email when it is ready. Follow the prompt to download a zip file that has a name with a series of letters and numbers and ends in .zip. Double clicking should unzip this for you and create a folder of the same name.

    +

    +

    For more details on the contents of this folder see these docs on refine.bio.

    +

    The <experiment_accession_id> folder has the data and metadata TSV files you will need for this example analysis. Experiment accession IDs usually look something like GSE1235 or SRP12345.

    +

    Copy and paste the GSE13490 folder into your newly created data/ folder.

    +
    +
    +

    2.6 Check out our file structure!

    +

    Your new analysis folder should contain:

    +
      +
    • The example analysis .Rmd you downloaded
      +
    • +
    • A folder called “data” which contains: +
        +
      • The GSE13490 folder which contains: +
          +
        • The gene expression
          +
        • +
        • The metadata TSV
          +
        • +
      • +
    • +
    • A folder for plots (currently empty)
      +
    • +
    • A folder for results (currently empty)
    • +
    +

    Your example analysis folder should now look something like this (except with respective experiment accession ID and analysis notebook name you are using):

    +

    +

    In order for our example here to run without a hitch, we need these files to be in these locations so we’ve constructed a test to check before we get started with the analysis. Run this chunk to double check that your files are in the right place.

    +
    # Define the file path to the data directory
    +data_dir <- file.path("data", "GSE13490")
    +
    +# Check if the gene expression matrix file is in the data directory stored as `data_dir`
    +file.exists(file.path(data_dir, "GSE13490.tsv"))
    +
    ## [1] TRUE
    +
    # Check if the metadata file is in the data directory stored as `data_dir`
    +file.exists(file.path(data_dir, "metadata_GSE13490.tsv"))
    +
    ## [1] TRUE
    +

    If the chunk above printed out FALSE to either of those tests, you won’t be able to run this analysis as is until those files are in the appropriate place.

    +

    If the concept of a “file path” is unfamiliar to you; we recommend taking a look at our section about file paths.

    +
    +
    +
    +

    3 Using a different refine.bio dataset with this analysis?

    +

    If you’d like to adapt an example analysis to use a different dataset from refine.bio, we recommend placing the files in the data/ directory you created and changing the filenames and paths in the notebook to match these files (we’ve put comments to signify where you would need to change the code). We suggest saving plots and results to plots/ and results/ directories, respectively, as these are automatically created by the notebook. From here you can customize this analysis example to fit your own scientific questions and preferences.

    +

    refine.bio data comes with gene level data with Ensembl IDs. Although this example notebook uses Ensembl IDs from Mouse, (Mus musculus), to obtain gene symbols this notebook can be easily converted for use with different species or annotation types eg protein IDs, gene ontology, accession numbers.

    +

    For different species, wherever the abbreviation org.Mm.eg.db or Mm is written, it must be replaced with the respective species abbreviation eg for Homo sapiens org.Hs.eg.db or Hs would be used. In the case of our RNA-seq gene identifier annotation example notebook, a Zebrafish (Danio rerio) dataset is used, meaning org.Dr.eg.db or Dr would also need to be used there. A full list of the annotation R packages from Bioconductor is at this link (R Bioconductor Team 2003).

    +
    + +

     

    +
    +
    +

    4 Obtaining Annotation for Ensembl IDs - Microarray

    +

    Ensembl IDs can be used to obtain various different annotations at the gene/transcript level. Let’s get ready to use the Ensembl IDs from our mouse dataset to obtain the associated gene symbols.

    +
    +

    4.1 Install libraries

    +

    See our Getting Started page with instructions for package installation for a list of the other software you will need, as well as more tips and resources.

    +

    In this analysis, we will be using the org.Mm.eg.db R package (Carlson 2019). Other species can be used.

    +
    # Install the mouse package
    +if (!("org.Mm.eg.db" %in% installed.packages())) {
    +  # Install this package if it isn't installed yet
    +  BiocManager::install("org.Mm.eg.db", update = FALSE)
    +}
    +

    Attach the packages we need for this analysis.

    +
    # Attach the library
    +library(org.Mm.eg.db)
    +
    ## Loading required package: AnnotationDbi
    +
    ## Loading required package: stats4
    +
    ## Loading required package: BiocGenerics
    +
    ## Loading required package: parallel
    +
    ## 
    +## Attaching package: 'BiocGenerics'
    +
    ## The following objects are masked from 'package:parallel':
    +## 
    +##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
    +##     clusterExport, clusterMap, parApply, parCapply, parLapply,
    +##     parLapplyLB, parRapply, parSapply, parSapplyLB
    +
    ## The following objects are masked from 'package:stats':
    +## 
    +##     IQR, mad, sd, var, xtabs
    +
    ## The following objects are masked from 'package:base':
    +## 
    +##     anyDuplicated, append, as.data.frame, basename, cbind, colnames,
    +##     dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
    +##     grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
    +##     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
    +##     rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
    +##     union, unique, unsplit, which, which.max, which.min
    +
    ## Loading required package: Biobase
    +
    ## Welcome to Bioconductor
    +## 
    +##     Vignettes contain introductory material; view with
    +##     'browseVignettes()'. To cite Bioconductor, see
    +##     'citation("Biobase")', and for packages 'citation("pkgname")'.
    +
    ## Loading required package: IRanges
    +
    ## Loading required package: S4Vectors
    +
    ## 
    +## Attaching package: 'S4Vectors'
    +
    ## The following object is masked from 'package:base':
    +## 
    +##     expand.grid
    +
    ## 
    +
    # We will need this so we can use the pipe: %>%
    +library(magrittr)
    +
    +
    +

    4.2 Import and set up data

    +

    Data downloaded from refine.bio include a metadata tab separated values (TSV) file and a data TSV file. This chunk of code will read the both TSV files and add them as data frames to your environment.

    +
    # Read in metadata TSV file
    +metadata <- readr::read_tsv(file.path(
    +  data_dir, # Replace with path to your metadata file
    +  "metadata_GSE13490.tsv" # Replace with the name of your metadata file
    +))
    +
    ## Parsed with column specification:
    +## cols(
    +##   .default = col_character(),
    +##   refinebio_age = col_logical(),
    +##   refinebio_cell_line = col_logical(),
    +##   refinebio_compound = col_logical(),
    +##   refinebio_disease = col_logical(),
    +##   refinebio_disease_stage = col_logical(),
    +##   refinebio_genetic_information = col_logical(),
    +##   refinebio_processed = col_logical(),
    +##   refinebio_race = col_logical(),
    +##   refinebio_sex = col_logical(),
    +##   refinebio_source_archive_url = col_logical(),
    +##   refinebio_specimen_part = col_logical(),
    +##   refinebio_subject = col_logical(),
    +##   refinebio_time = col_logical(),
    +##   refinebio_treatment = col_logical(),
    +##   channel_count = col_double(),
    +##   data_row_count = col_double(),
    +##   taxid_ch1 = col_double()
    +## )
    +
    ## See spec(...) for full column specifications.
    +
    # Read in data TSV file
    +df <- readr::read_tsv(file.path(
    +  data_dir, # Replace with path to your data file
    +  "GSE13490.tsv" # Replace with the name of your data file
    +)) %>%
    +  # Tuck away the Gene ID column as rownames
    +  tibble::column_to_rownames("Gene")
    +
    ## Parsed with column specification:
    +## cols(
    +##   Gene = col_character(),
    +##   GSM340064 = col_double(),
    +##   GSM340065 = col_double(),
    +##   GSM340066 = col_double(),
    +##   GSM340067 = col_double(),
    +##   GSM340068 = col_double(),
    +##   GSM340069 = col_double(),
    +##   GSM340070 = col_double(),
    +##   GSM340071 = col_double(),
    +##   GSM340072 = col_double(),
    +##   GSM340073 = col_double(),
    +##   GSM340074 = col_double(),
    +##   GSM340075 = col_double(),
    +##   GSM340076 = col_double(),
    +##   GSM340077 = col_double(),
    +##   GSM340078 = col_double()
    +## )
    +

    Let’s ensure that the metadata and data are in the same sample order.

    +
    # Make the data in the order of the metadata
    +df <- df %>%
    +  dplyr::select(metadata$geo_accession)
    +
    +# Check if this is in the same order
    +all.equal(colnames(df), metadata$geo_accession)
    +
    ## [1] TRUE
    +
    # Bring back the "Gene" column in preparation for mapping
    +df <- df %>%
    +  tibble::rownames_to_column("Gene")
    +
    +
    +

    4.3 Map Ensembl IDs to gene symbols

    +

    The mapIds() function has a multiVals argument which denotes what to do when there are multiple mapped values for a single gene identifier. The default behavior is to return just the first mapped value. It is good to keep in mind that various downstream analyses may benefit from varied strategies at this step. Use ?mapIds to see more options or strategies.

    +

    In the next chunk, we will run the mapIds() function and supply the multiVals argument with the "list" option in order to get a large list with all the mapped values found for each gene identifier.

    +
    # Map ensembl IDs to their associated gene symbols
    +mapped_list <- mapIds(
    +  org.Mm.eg.db, # Replace with annotation package for the organism relevant to your data
    +  keys = df$Gene,
    +  column = "SYMBOL", # Replace with the type of gene identifiers you would like to map to
    +  keytype = "ENSEMBL", # Replace with the type of gene identifiers in your data
    +  multiVals = "list"
    +)
    +
    ## 'select()' returned 1:many mapping between keys and columns
    +
    +
    +

    4.4 Explore gene ID conversion

    +

    Now, let’s take a look at our mapped_list to see how the mapping went.

    +
    # Let's use the `head()` function to take a preview at our mapped list
    +head(mapped_list)
    +
    ## $ENSMUSG00000000001
    +## [1] "Gnai3"
    +## 
    +## $ENSMUSG00000000003
    +## [1] "Pbsn"
    +## 
    +## $ENSMUSG00000000028
    +## [1] "Cdc45"
    +## 
    +## $ENSMUSG00000000031
    +## [1] "H19"
    +## 
    +## $ENSMUSG00000000037
    +## [1] "Scml2"
    +## 
    +## $ENSMUSG00000000049
    +## [1] "Apoh"
    +

    It looks like we have gene symbols that were successfully mapped to the Ensembl IDs we provided. However, the data is now in a list object, making it a little more difficult to explore. We are going to turn our list object into a data frame object in the next chunk.

    +
    # Let's make our object a bit more manageable for exploration by turning it into a data frame using the `reshape2::melt()` function
    +mapped_df <- mapped_list %>%
    +  reshape2::melt(value.name = "Symbol") %>%
    +  dplyr::rename("Ensembl" = "L1") # Here we are renaming the column holding the Ensembl IDs as good practice
    +

    Now let’s take a peek at our data frame.

    +
    head(mapped_df)
    +
    ##   Symbol            Ensembl
    +## 1  Gnai3 ENSMUSG00000000001
    +## 2   Pbsn ENSMUSG00000000003
    +## 3  Cdc45 ENSMUSG00000000028
    +## 4    H19 ENSMUSG00000000031
    +## 5  Scml2 ENSMUSG00000000037
    +## 6   Apoh ENSMUSG00000000049
    +

    We can see that our data frame has a new column Symbol. Let’s get a summary of the gene symbols returned in the Symbol column of our mapped data frame.

    +
    # We can use the `summary()` function to get a better idea of the distribution of symbols in the `Symbol` column
    +summary(mapped_df$Symbol)
    +
    ##  Gm13023   Nudt10     Pms2    Gnai3     Pbsn    Cdc45      H19    Scml2 
    +##        2        2        2        1        1        1        1        1 
    +##     Apoh     Narf     Cav2     Klf6    Scmh1    Cox5a     Tbx2     Tbx4 
    +##        1        1        1        1        1        1        1        1 
    +##     Zfy2     Ngfr     Wnt3    Wnt9a      Fer     Xpo6     Tfe3    Axin2 
    +##        1        1        1        1        1        1        1        1 
    +##    Brat1    Gna12 Slc22a18   Itgb2l    Igsf5   Pih1d2     Dlat     Sdhd 
    +##        1        1        1        1        1        1        1        1 
    +##    Fgf23     Fgf6    Ccnd2   Gpr107    Nalcn   Btbd17    Slfn4       Th 
    +##        1        1        1        1        1        1        1        1 
    +##     Ins2   Scnn1g     Drp2  Tspan32     Lhx2   Clec2g     Gmpr    Glra1 
    +##        1        1        1        1        1        1        1        1 
    +##     Mid2   Trim25     Dgke   Scpep1      Mnt    Itgb2    Hddc2  Tpd52l1 
    +##        1        1        1        1        1        1        1        1 
    +##     Pemt     Cdh1     Cdh4    Ckmt1    Bcl6b  Clec10a   Alox12    Arvcf 
    +##        1        1        1        1        1        1        1        1 
    +##     Comt     Rtca      Dbt   Dazap2    Mcts1     Rem1    Rnf17 Trappc10 
    +##        1        1        1        1        1        1        1        1 
    +##     Ccm2      Wap    Tbrg4  Tmprss2      Mx1      Fap      Gcg   Ndufa9 
    +##        1        1        1        1        1        1        1        1 
    +##    Egfl6      Lck    Tssk3  Cttnbp2   Galnt1     Myf5    Mkrn2    Pparg 
    +##        1        1        1        1        1        1        1        1 
    +##     Raf1   Gm4532    Sept1    Pdgfb   Acvrl1    Grasp   Acvr1b   Tom1l2 
    +##        1        1        1        1        1        1        1        1 
    +##    Gpa33  Zfp385a  (Other)     NA's 
    +##        1        1    16885      998
    +

    There are 998 NA’s in our data frame, which means that 998 out of the 17918 Ensembl IDs did not map to gene symbols. 998 out of 17918 is not too bad a rate, in our opinion, but note that different gene identifier types will have different mapping rates and that is to be expected. Regardless, it is always good to be aware of how many genes you are potentially “losing” if you rely on this new gene identifier you’ve mapped to for downstream analyses.

    +

    However, if you have almost all NA’s it is possible that the function was executed incorrectly or you may want to consider using a different gene identifier, if possible.

    +

    Now let’s check to see if we have any genes that were mapped to multiple symbols.

    +
    multi_mapped <- mapped_df %>%
    +  # Let's group by the Ensembl IDs in the `Ensembl` column
    +  dplyr::group_by(Ensembl) %>%
    +  # Create a new variable containing the number of symbols mapped to each ID
    +  dplyr::mutate(gene_symbol_count = dplyr::n()) %>%
    +  # Arrange by the genes with the highest number of symbols mapped
    +  dplyr::arrange(desc(gene_symbol_count)) %>%
    +  # Filter to include only the rows with multi mappings
    +  dplyr::filter(gene_symbol_count > 1)
    +
    +# Let's look at the first 6 rows of our `multi_mapped` object
    +head(multi_mapped)
    +
    ## # A tibble: 6 x 3
    +## # Groups:   Ensembl [2]
    +##   Symbol       Ensembl            gene_symbol_count
    +##   <fct>        <chr>                          <int>
    +## 1 Rpl23        ENSMUSG00000071415                 3
    +## 2 LOC100044627 ENSMUSG00000071415                 3
    +## 3 LOC100862455 ENSMUSG00000071415                 3
    +## 4 Ppp2r3d      ENSMUSG00000093803                 3
    +## 5 LOC108167320 ENSMUSG00000093803                 3
    +## 6 LOC108167694 ENSMUSG00000093803                 3
    +

    Looks like we have some cases where 3 gene symbols mapped to a single Ensembl ID. We have a total of 130 out of 17984 Ensembl IDs with multiple mappings to gene symbols. If we are not too worried about the 130 IDs with multiple mappings, we can filter them out for the purpose of having 1:1 mappings for our downstream analysis.

    +
    +
    +

    4.5 Map Ensembl IDs to gene symbols – filtering out multi mappings

    +

    The next code chunk we will rerun the mapIds() function, this time supplying the "filter" option to the multiVals argument. This will remove all instances of multiple mappings and return a list of only the gene identifiers and symbols that had 1:1 mapping. Use ?mapIds to see more options or strategies.

    +
    # Map ensembl IDs to their associated gene symbols
    +filtered_mapped_df <- data.frame(
    +  "Symbol" = mapIds(
    +    org.Mm.eg.db, # Replace with annotation package for the organism relevant to your data
    +    keys = df$Gene,
    +    column = "SYMBOL", # Replace with the type of gene identifiers you would like to map to
    +    keytype = "ENSEMBL", # Replace with the type of gene identifiers in your data
    +    multiVals = "filter" # This will drop any `Gene`s that have multiple matches
    +  )
    +) %>%
    +  # Make an `Ensembl` column to store the rownames
    +  tibble::rownames_to_column("Ensembl") %>%
    +  # Join the remaining data from `df` using the Ensembl IDs
    +  dplyr::inner_join(df, by = c("Ensembl" = "Gene"))
    +
    ## 'select()' returned 1:many mapping between keys and columns
    +

    Now, let’s write our filtered and mapped results to file!

    +
    +
    +

    4.6 Write mapped results to file

    +
    # Write mapped and annotated data frame to output file
    +readr::write_tsv(filtered_mapped_df, file.path(
    +  results_dir,
    +  "GSE13490_Gene_Symbols.tsv" # Replace with a relevant output file name
    +))
    +
    +
    +
    +

    5 Resources for further learning

    + +
    +
    +

    6 Session info

    +

    At the end of every analysis, before saving your notebook, we recommend printing out your session info. This helps make your code more reproducible by recording what versions of softwares and packages you used to run this.

    +
    # Print session info
    +sessionInfo()
    +
    ## R version 3.6.1 (2019-07-05)
    +## Platform: x86_64-apple-darwin15.6.0 (64-bit)
    +## Running under: macOS Mojave 10.14.6
    +## 
    +## Matrix products: default
    +## BLAS:   /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
    +## LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
    +## 
    +## locale:
    +## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
    +## 
    +## attached base packages:
    +## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
    +## [8] methods   base     
    +## 
    +## other attached packages:
    +## [1] magrittr_1.5         org.Mm.eg.db_3.8.2   AnnotationDbi_1.47.0
    +## [4] IRanges_2.19.10      S4Vectors_0.23.23    Biobase_2.45.0      
    +## [7] BiocGenerics_0.31.5  optparse_1.6.2      
    +## 
    +## loaded via a namespace (and not attached):
    +##  [1] Rcpp_1.0.4.6     plyr_1.8.4       pillar_1.4.4     compiler_3.6.1  
    +##  [5] tools_3.6.1      bit_1.1-14       digest_0.6.25    memoise_1.1.0   
    +##  [9] RSQLite_2.1.2    evaluate_0.14    lifecycle_0.2.0  tibble_3.0.1    
    +## [13] pkgconfig_2.0.3  rlang_0.4.6      DBI_1.0.0        cli_2.0.2       
    +## [17] rstudioapi_0.11  yaml_2.2.1       xfun_0.14        dplyr_0.8.3     
    +## [21] withr_2.2.0      styler_1.1.1     stringr_1.4.0    knitr_1.28      
    +## [25] vctrs_0.3.1      hms_0.5.3        tidyselect_0.2.5 bit64_0.9-7     
    +## [29] getopt_1.20.3    glue_1.4.1       R6_2.4.1         fansi_0.4.1     
    +## [33] rmarkdown_1.14   reshape2_1.4.3   blob_1.2.0       readr_1.3.1     
    +## [37] purrr_0.3.4      rematch2_2.1.0   backports_1.1.7  ellipsis_0.3.1  
    +## [41] htmltools_0.3.6  assertthat_0.2.1 utf8_1.1.4       stringi_1.4.6   
    +## [45] crayon_1.3.4
    +
    +
    +

    Carlson M., 2019 Genome wide annotation for mouse

    +
    +
    +

    Carlson M., 2020a AnnotationDbi

    +
    +
    +

    Carlson M., 2020b AnnotationDbi: Introduction to bioconductor annotation packages

    +
    +
    +

    CCDL, 2020 Obtaining annotation for ensembl ids - rna-seq.

    +
    +
    +

    R Bioconductor Team, 2003 Packages found under annotationdata

    +
    +
    +
    + + + +
    +
    + +
    + + + + + + + + + + + + + + + + diff --git a/03-rnaseq/00-intro-to-rnaseq.html b/03-rnaseq/00-intro-to-rnaseq.html index 6d34aecc..6e947fbd 100644 --- a/03-rnaseq/00-intro-to-rnaseq.html +++ b/03-rnaseq/00-intro-to-rnaseq.html @@ -1,10 +1,11 @@ - + + @@ -1343,6 +1344,7 @@ } img { max-width:100%; + height: auto; } .tabbed-pane { padding-top: 12px; @@ -1491,7 +1493,6 @@ border: none; display: inline-block; border-radius: 4px; - background-color: transparent; } .tabset-dropdown > .nav-tabs.nav-tabs-open > li { @@ -1520,12 +1521,6 @@ } } -@media print { -.toc-content { - /* see https://github.com/w3c/csswg-drafts/issues/4434 */ - float: right; -} -} .toc-content { padding-left: 30px; @@ -1619,6 +1614,7 @@
  • Differential Expression
  • Dimension Reduction - PCA
  • Dimension Reduction - UMAP
  • +
  • Ensembl Gene ID Annotation
  • @@ -1629,7 +1625,7 @@ @@ -1839,6 +1835,7 @@

    4.1 Install libraries

    ## The following objects are masked from 'package:Biobase':
     ## 
     ##     anyMissing, rowMedians
    +
    ## Loading required package: BiocParallel
    ## 
     ## Attaching package: 'DelayedArray'
    ## The following objects are masked from 'package:matrixStats':
    @@ -1893,12 +1890,12 @@ 

    4.2 Import and set up data

    ## # A tibble: 6 x 20
     ##   refinebio_acces… experiment_acce… refinebio_age refinebio_cell_…
     ##   <chr>            <chr>            <lgl>         <lgl>           
    -## 1 SRR3189694       SRP070849        NA            NA              
    -## 2 SRR3189680       SRP070849        NA            NA              
    -## 3 SRR3189690       SRP070849        NA            NA              
    -## 4 SRR3189684       SRP070849        NA            NA              
    -## 5 SRR3189683       SRP070849        NA            NA              
    -## 6 SRR3189691       SRP070849        NA            NA              
    +## 1 SRR3189684       SRP070849        NA            NA              
    +## 2 SRR3189690       SRP070849        NA            NA              
    +## 3 SRR3189691       SRP070849        NA            NA              
    +## 4 SRR3189683       SRP070849        NA            NA              
    +## 5 SRR3189693       SRP070849        NA            NA              
    +## 6 SRR3189692       SRP070849        NA            NA              
     ## # … with 16 more variables: refinebio_compound <lgl>, refinebio_disease <lgl>,
     ## #   refinebio_disease_stage <lgl>, refinebio_genetic_information <lgl>,
     ## #   refinebio_organism <chr>, refinebio_platform <chr>,
    @@ -1977,7 +1974,7 @@ 

    4.7 Create a heatmap

    ), scale = "row" # Scale values in the direction of genes (rows) )
    -

    +

    We’ve created a heatmap but although our genes and samples are clustered, there is not much information that we can gather here because we did not provide the pheatmap() function with annotation labels for our samples.

    First let’s save our clustered heatmap.

    @@ -1994,8 +1991,8 @@

    4.7.1 Save heatmap as a png

    # Close the png file: dev.off()
    -
    ## png 
    -##   2
    +
    ## quartz_off_screen 
    +##                 2

    Now, let’s add some annotation bars to our heatmap.

    @@ -2041,7 +2038,7 @@

    4.8.1 Create annotated heatmap -

    +

    Now that we have annotation bars on our heatmap, we have a better idea of the sample variable groups that appear to cluster together.

    Let’s save our annotated heatmap.

    @@ -2059,8 +2056,8 @@

    4.8.2 Save annotated heatmap as a # Close the png file: dev.off() -
    ## png 
    -##   2
    +
    ## quartz_off_screen 
    +##                 2

    @@ -2076,59 +2073,58 @@

    6 Session info

    At the end of every analysis, before saving your notebook, we recommend printing out your session info. This helps make your code more reproducible by recording what versions of softwares and packages you used to run this.

    # Print session info
     sessionInfo()
    -
    ## R version 4.0.2 (2020-06-22)
    -## Platform: x86_64-pc-linux-gnu (64-bit)
    -## Running under: Ubuntu 20.04 LTS
    +
    ## R version 3.6.1 (2019-07-05)
    +## Platform: x86_64-apple-darwin15.6.0 (64-bit)
    +## Running under: macOS Mojave 10.14.6
     ## 
     ## Matrix products: default
    -## BLAS/LAPACK: /usr/lib/x86_64-linux-gnu/openblas-openmp/libopenblasp-r0.3.8.so
    +## BLAS:   /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
    +## LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
     ## 
     ## locale:
    -##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
    -##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
    -##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=C             
    -##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
    -##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
    -## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
    +## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
     ## 
     ## attached base packages:
     ## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
     ## [8] methods   base     
     ## 
     ## other attached packages:
    -##  [1] magrittr_1.5                DESeq2_1.28.1              
    -##  [3] SummarizedExperiment_1.18.2 DelayedArray_0.14.1        
    -##  [5] matrixStats_0.56.0          Biobase_2.48.0             
    -##  [7] GenomicRanges_1.40.0        GenomeInfoDb_1.24.2        
    -##  [9] IRanges_2.22.2              S4Vectors_0.26.1           
    -## [11] BiocGenerics_0.34.0         pheatmap_1.0.12            
    -## [13] optparse_1.6.6             
    +##  [1] magrittr_1.5                DESeq2_1.26.0              
    +##  [3] SummarizedExperiment_1.15.6 DelayedArray_0.11.4        
    +##  [5] BiocParallel_1.19.2         matrixStats_0.54.0         
    +##  [7] Biobase_2.45.0              GenomicRanges_1.37.14      
    +##  [9] GenomeInfoDb_1.21.1         IRanges_2.19.10            
    +## [11] S4Vectors_0.23.23           BiocGenerics_0.31.5        
    +## [13] pheatmap_1.0.12             optparse_1.6.2             
     ## 
     ## loaded via a namespace (and not attached):
    -##  [1] bit64_0.9-7.1          splines_4.0.2          R.utils_2.9.2         
    -##  [4] assertthat_0.2.1       blob_1.2.1             GenomeInfoDbData_1.2.3
    -##  [7] yaml_2.2.1             pillar_1.4.6           RSQLite_2.2.0         
    -## [10] backports_1.1.9        lattice_0.20-41        glue_1.4.1            
    -## [13] digest_0.6.25          RColorBrewer_1.1-2     XVector_0.28.0        
    -## [16] colorspace_1.4-1       htmltools_0.5.0        Matrix_1.2-18         
    -## [19] R.oo_1.23.0            XML_3.99-0.5           pkgconfig_2.0.3       
    -## [22] genefilter_1.70.0      zlibbioc_1.34.0        purrr_0.3.4           
    -## [25] xtable_1.8-4           scales_1.1.1           getopt_1.20.3         
    -## [28] BiocParallel_1.22.0    tibble_3.0.3           annotate_1.66.0       
    -## [31] styler_1.3.2           farver_2.0.3           generics_0.0.2        
    -## [34] ggplot2_3.3.2          ellipsis_0.3.1         cli_2.0.2             
    -## [37] survival_3.1-12        crayon_1.3.4           memoise_1.1.0         
    -## [40] evaluate_0.14          R.methodsS3_1.8.0      fansi_0.4.1           
    -## [43] R.cache_0.14.0         tools_4.0.2            hms_0.5.3             
    -## [46] lifecycle_0.2.0        stringr_1.4.0          locfit_1.5-9.4        
    -## [49] munsell_0.5.0          AnnotationDbi_1.50.3   compiler_4.0.2        
    -## [52] rlang_0.4.7            grid_4.0.2             RCurl_1.98-1.2        
    -## [55] rstudioapi_0.11        bitops_1.0-6           rmarkdown_2.3         
    -## [58] gtable_0.3.0           DBI_1.1.0              rematch2_2.1.2        
    -## [61] R6_2.4.1               knitr_1.29             dplyr_1.0.0           
    -## [64] utf8_1.1.4             bit_1.1-15.2           readr_1.3.1           
    -## [67] stringi_1.4.6          Rcpp_1.0.5             vctrs_0.3.2           
    -## [70] geneplotter_1.66.0     tidyselect_1.1.0       xfun_0.16
    +## [1] bitops_1.0-6 bit64_0.9-7 RColorBrewer_1.1-2 +## [4] tools_3.6.1 backports_1.1.7 utf8_1.1.4 +## [7] R6_2.4.1 rpart_4.1-15 Hmisc_4.2-0 +## [10] DBI_1.0.0 lazyeval_0.2.2 colorspace_1.4-1 +## [13] nnet_7.3-12 withr_2.2.0 tidyselect_0.2.5 +## [16] gridExtra_2.3 bit_1.1-14 compiler_3.6.1 +## [19] cli_2.0.2 htmlTable_1.13.1 scales_1.0.0 +## [22] checkmate_1.9.4 readr_1.3.1 genefilter_1.67.1 +## [25] stringr_1.4.0 digest_0.6.25 foreign_0.8-72 +## [28] rmarkdown_1.14 XVector_0.25.0 base64enc_0.1-3 +## [31] pkgconfig_2.0.3 htmltools_0.3.6 styler_1.1.1 +## [34] htmlwidgets_1.3 rlang_0.4.6 rstudioapi_0.11 +## [37] RSQLite_2.1.2 acepack_1.4.1 dplyr_0.8.3 +## [40] RCurl_1.95-4.12 GenomeInfoDbData_1.2.1 Formula_1.2-3 +## [43] Matrix_1.2-17 Rcpp_1.0.4.6 munsell_0.5.0 +## [46] fansi_0.4.1 lifecycle_0.2.0 stringi_1.4.6 +## [49] yaml_2.2.1 zlibbioc_1.31.0 grid_3.6.1 +## [52] blob_1.2.0 crayon_1.3.4 lattice_0.20-38 +## [55] splines_3.6.1 annotate_1.63.0 hms_0.5.3 +## [58] locfit_1.5-9.1 knitr_1.28 pillar_1.4.4 +## [61] geneplotter_1.63.0 XML_3.98-1.20 glue_1.4.1 +## [64] evaluate_0.14 latticeExtra_0.6-28 data.table_1.12.2 +## [67] vctrs_0.3.1 gtable_0.3.0 getopt_1.20.3 +## [70] purrr_0.3.4 rematch2_2.1.0 assertthat_0.2.1 +## [73] ggplot2_3.2.1 xfun_0.14 xtable_1.8-4 +## [76] survival_2.44-1.1 tibble_3.0.1 memoise_1.1.0 +## [79] AnnotationDbi_1.47.0 cluster_2.1.0 ellipsis_0.3.1

    Gu Z., R. Eils, and M. Schlesner, 2016 Complex heatmaps reveal patterns and correlations in multidimensional genomic data. Bioinformatics.

    @@ -2195,7 +2191,7 @@

    6 Session info

    theme: "bootstrap3", context: '.toc-content', hashGenerator: function (text) { - return text.replace(/[.\\/?&!#<>]/g, '').replace(/\s/g, '_'); + return text.replace(/[.\\/?&!#<>]/g, '').replace(/\s/g, '_').toLowerCase(); }, ignoreSelector: ".toc-ignore", scrollTo: 0 diff --git a/03-rnaseq/differential-expression_rnaseq_01.html b/03-rnaseq/differential-expression_rnaseq_01.html index a2975f9f..6509daeb 100644 --- a/03-rnaseq/differential-expression_rnaseq_01.html +++ b/03-rnaseq/differential-expression_rnaseq_01.html @@ -1,10 +1,11 @@ - + + @@ -1343,6 +1344,7 @@ } img { max-width:100%; + height: auto; } .tabbed-pane { padding-top: 12px; @@ -1491,7 +1493,6 @@ border: none; display: inline-block; border-radius: 4px; - background-color: transparent; } .tabset-dropdown > .nav-tabs.nav-tabs-open > li { @@ -1520,12 +1521,6 @@ } } -@media print { -.toc-content { - /* see https://github.com/w3c/csswg-drafts/issues/4434 */ - float: right; -} -} .toc-content { padding-left: 30px; @@ -1619,6 +1614,7 @@
  • Differential Expression
  • Dimension Reduction - PCA
  • Dimension Reduction - UMAP
  • +
  • Ensembl Gene ID Annotation
  • @@ -1629,7 +1625,7 @@

    4.3 Set up metadata

    @@ -1935,16 +1932,16 @@

    4.3 Set up metadata

    ## # A tibble: 6 x 2
     ##   refinebio_title      asxl_mutation_status
     ##   <chr>                <chr>               
    -## 1 CBF54-BM-ASXLwt      no_mutation         
    -## 2 49060-2010-BM-ASXLwt no_mutation         
    +## 1 CBF369-Blood-ASXL2   asxl_mutation       
    +## 2 41267-BM-ASXL2       asxl_mutation       
     ## 3 CBF234-BM-ASXLwt     no_mutation         
    -## 4 CBF124-BM-ASXLwt     no_mutation         
    -## 5 41267-BM-ASXL2       asxl_mutation       
    -## 6 45565-BM-ASXL1       asxl_mutation
    +## 4 40810-BM-ASXLwt no_mutation +## 5 49060-2010-BM-ASXLwt no_mutation +## 6 CBF77-Blood-ASXL2 asxl_mutation

    Before we set up our model in the next step, we want to check if our modeling variable is set correctly. We want our “control” to to be set as the first level in the variable we provide as our experimental variable. Here we will use the str() function to print out a preview of the structure of our variable

    # Print out a preview of `asxl_mutation_status`
     str(metadata$asxl_mutation_status)
    -
    ##  chr [1:16] "no_mutation" "no_mutation" "no_mutation" "no_mutation" ...
    +
    ##  chr [1:16] "asxl_mutation" "asxl_mutation" "no_mutation" "no_mutation" ...

    Currently, asxl_mutation_status is a character. To make sure it is set how we want for the DESeq object and subsequent testing, let’s mutate it to a factor so we can explicitly set the levels.

    # Make asxl_mutation_status a factor and set the levels appropriately
     metadata <- metadata %>%
    @@ -2000,23 +1997,32 @@ 

    4.6 Run differential expression a coef = 2, # This is based on what log fold change coefficient was used in DESeq(), the default is 2. res = deseq_results # This needs to be the DESeq results table )

    -
    ## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
    -##     Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
    -##     sequence count data: removing the noise and preserving large differences.
    -##     Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
    +
    ## using 'normal' for LFC shrinkage, the Normal prior from Love et al (2014).
    +## 
    +## Note that type='apeglm' and type='ashr' have shown to have less bias than type='normal'.
    +## See ?lfcShrink for more details on shrinkage type, and the DESeq2 vignette.
    +## Reference: https://doi.org/10.1093/bioinformatics/bty895

    Now let’s take a peek at what our results table looks like.

    head(deseq_results)
    ## log2 fold change (MAP): asxl mutation status asxl mutation vs no mutation 
     ## Wald test p-value: asxl mutation status asxl mutation vs no mutation 
    -## DataFrame with 6 rows and 5 columns
    -##                    baseMean log2FoldChange      lfcSE    pvalue      padj
    -##                   <numeric>      <numeric>  <numeric> <numeric> <numeric>
    -## ENSG00000000003   52.852059    9.64776e-07 0.00144269 0.6604581  0.998525
    -## ENSG00000000005    0.260056    2.03308e-07 0.00144269 0.5791283        NA
    -## ENSG00000000419  406.161355   -1.38160e-06 0.00144268 0.7076357  0.998525
    -## ENSG00000000457  564.784021   -2.36497e-06 0.00144268 0.3719972  0.998525
    -## ENSG00000000460  401.130684    3.87280e-06 0.00144269 0.1627644  0.998525
    -## ENSG00000000938 1500.527448    3.07435e-06 0.00144269 0.0354596  0.720601
    +## DataFrame with 6 rows and 6 columns +## baseMean log2FoldChange lfcSE +## <numeric> <numeric> <numeric> +## ENSG00000000003 52.8520587655854 0.12593859821352 0.292427834964723 +## ENSG00000000005 0.260055561421088 0.0412286839932318 0.143323523204828 +## ENSG00000000419 406.161355152407 -0.0872933065549999 0.232613912574037 +## ENSG00000000457 564.784020901503 -0.239724224515893 0.26943713590588 +## ENSG00000000460 401.130684469072 0.373394326554735 0.267898911424576 +## ENSG00000000938 1500.52744835278 0.623523965932518 0.304354167130984 +## stat pvalue padj +## <numeric> <numeric> <numeric> +## ENSG00000000003 0.439280729657335 0.660458135481068 0.998525105718721 +## ENSG00000000005 -0.554658424129523 0.579128318966208 NA +## ENSG00000000419 -0.375033246094286 0.707635741349663 0.998525105718721 +## ENSG00000000457 -0.892738568914621 0.371997190749564 0.998525105718721 +## ENSG00000000460 1.39583439943176 0.162764368904366 0.998525105718721 +## ENSG00000000938 2.10307054824782 0.0354596030191536 0.720600898549108

    Note it is not filtered or sorted, so we will use tidyverse to do this before saving our results to a file. Sort and filter the results.

    # this is of class DESeqResults -- we want a data frame
     deseq_df <- deseq_results %>%
    @@ -2031,25 +2037,25 @@ 

    4.6 Run differential expression a dplyr::arrange(dplyr::desc(log2FoldChange))

    Let’s print out what the top results are.

    head(deseq_df)
    -
    ##              Gene   baseMean log2FoldChange     lfcSE       pvalue      padj
    -## 1 ENSG00000049247  19.204037       6.174579 2.2940036 0.0167489785 0.5761216
    -## 2 ENSG00000162551 179.261210       5.824919 1.2427084 0.0008553285 0.1690009
    -## 3 ENSG00000273079  26.113327       4.172007 0.7078167 0.0203873911 0.6263925
    -## 4 ENSG00000148926  96.538019       3.952953 0.8327353 0.0284780003 0.6768306
    -## 5 ENSG00000251139   5.018181       3.867049 0.8624662 0.0270431735        NA
    -## 6 ENSG00000108244  17.988423       3.731020 0.9987549 0.0012635570 0.2025067
    -##   threshold
    -## 1     FALSE
    -## 2     FALSE
    -## 3     FALSE
    -## 4     FALSE
    -## 5        NA
    -## 6     FALSE
    +
    ##              Gene    baseMean log2FoldChange     lfcSE       stat       pvalue
    +## 1 ENSG00000115590  152.279126       1.873505 0.3037832  3.3030575 0.0009563678
    +## 2 ENSG00000273079   26.113327       1.687493 0.3045453  2.3191410 0.0203873911
    +## 3 ENSG00000154589   51.259590       1.686259 0.3031344  1.9123102 0.0558364219
    +## 4 ENSG00000164588    9.276832       1.659394 0.2705837 -0.3576455 0.7206086031
    +## 5 ENSG00000103569 1218.109594       1.636066 0.3035410  3.3306457 0.0008664481
    +## 6 ENSG00000244115   92.694075       1.571892 0.2753955  1.3663784 0.1718202238
    +##        padj threshold
    +## 1 0.1782265     FALSE
    +## 2 0.6263925     FALSE
    +## 3 0.8208543     FALSE
    +## 4        NA        NA
    +## 5 0.1690009     FALSE
    +## 6 0.9985251     FALSE

    4.6.1 Check results by plotting one gene

    To double check what a differentially expressed gene looks like, we can plot one with DESeq2::plotCounts() function.

    plotCounts(ddset, gene = "ENSG00000196074", intgroup = "asxl_mutation_status")
    -

    +

    The mutation group samples have higher expression of this gene than the control group, which helps assure us that the results are showing us what we are looking for.

    @@ -2073,7 +2079,7 @@

    4.8 Create a volcano plot

    x = "log2FoldChange", # The variable in `deseq_df` you want to be plotted on the x axis y = "padj" # The variable in `deseq_df` you want to be plotted on the y axis ) -

    +

    Here the red point is the gene that meets both the default p value and log2 fold change cutoff (which are 10e-6 and 1 respectively).

    We used the adjusted p values for our plot above, so you may want to loosen this cutoff with the pCutoff argument (Take a look at all the options for tailoring this plot using ?EnhancedVolcano).

    Let’s make the same plot again, but adjust the pCutoff since we are using multiple-testing corrected p values and this time we will assign the plot to our environment as volcano_plot.

    @@ -2088,7 +2094,7 @@

    4.8 Create a volcano plot

    # Print out plot here volcano_plot -

    +

    This looks pretty good. Let’s save it to a PNG.

    ggsave(
       plot = volcano_plot,
    @@ -2112,63 +2118,59 @@ 

    6 Session info

    At the end of every analysis, before saving your notebook, we recommend printing out your session info. This helps make your code more reproducible by recording what versions of softwares and packages you used to run this.

    # Print session info
     sessionInfo()
    -
    ## R version 4.0.2 (2020-06-22)
    -## Platform: x86_64-pc-linux-gnu (64-bit)
    -## Running under: Ubuntu 20.04 LTS
    +
    ## R version 3.6.1 (2019-07-05)
    +## Platform: x86_64-apple-darwin15.6.0 (64-bit)
    +## Running under: macOS Mojave 10.14.6
     ## 
     ## Matrix products: default
    -## BLAS/LAPACK: /usr/lib/x86_64-linux-gnu/openblas-openmp/libopenblasp-r0.3.8.so
    +## BLAS:   /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
    +## LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
     ## 
     ## locale:
    -##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
    -##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
    -##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=C             
    -##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
    -##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
    -## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
    +## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
     ## 
     ## attached base packages:
     ## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
     ## [8] methods   base     
     ## 
     ## other attached packages:
    -##  [1] magrittr_1.5                ggplot2_3.3.2              
    -##  [3] DESeq2_1.28.1               SummarizedExperiment_1.18.2
    -##  [5] DelayedArray_0.14.1         matrixStats_0.56.0         
    -##  [7] Biobase_2.48.0              GenomicRanges_1.40.0       
    -##  [9] GenomeInfoDb_1.24.2         IRanges_2.22.2             
    -## [11] S4Vectors_0.26.1            BiocGenerics_0.34.0        
    -## [13] optparse_1.6.6             
    +##  [1] magrittr_1.5                ggplot2_3.2.1              
    +##  [3] DESeq2_1.26.0               SummarizedExperiment_1.15.6
    +##  [5] DelayedArray_0.11.4         BiocParallel_1.19.2        
    +##  [7] matrixStats_0.54.0          Biobase_2.45.0             
    +##  [9] GenomicRanges_1.37.14       GenomeInfoDb_1.21.1        
    +## [11] IRanges_2.19.10             S4Vectors_0.23.23          
    +## [13] BiocGenerics_0.31.5         optparse_1.6.2             
     ## 
     ## loaded via a namespace (and not attached):
    -##  [1] bitops_1.0-6           bit64_0.9-7.1          RColorBrewer_1.1-2    
    -##  [4] numDeriv_2016.8-1.1    R.cache_0.14.0         tools_4.0.2           
    -##  [7] backports_1.1.9        utf8_1.1.4             R6_2.4.1              
    -## [10] DBI_1.1.0              colorspace_1.4-1       apeglm_1.10.0         
    -## [13] withr_2.2.0            tidyselect_1.1.0       bit_1.1-15.2          
    -## [16] compiler_4.0.2         cli_2.0.2              labeling_0.3          
    -## [19] scales_1.1.1           mvtnorm_1.1-1          readr_1.3.1           
    -## [22] genefilter_1.70.0      stringr_1.4.0          digest_0.6.25         
    -## [25] rmarkdown_2.3          R.utils_2.9.2          XVector_0.28.0        
    -## [28] pkgconfig_2.0.3        htmltools_0.5.0        styler_1.3.2          
    -## [31] bbmle_1.0.23.1         rlang_0.4.7            rstudioapi_0.11       
    -## [34] RSQLite_2.2.0          farver_2.0.3           generics_0.0.2        
    -## [37] BiocParallel_1.22.0    dplyr_1.0.0            R.oo_1.23.0           
    -## [40] RCurl_1.98-1.2         GenomeInfoDbData_1.2.3 Matrix_1.2-18         
    -## [43] Rcpp_1.0.5             munsell_0.5.0          fansi_0.4.1           
    -## [46] lifecycle_0.2.0        R.methodsS3_1.8.0      stringi_1.4.6         
    -## [49] yaml_2.2.1             MASS_7.3-51.6          zlibbioc_1.34.0       
    -## [52] plyr_1.8.6             grid_4.0.2             blob_1.2.1            
    -## [55] ggrepel_0.8.2          bdsmatrix_1.3-4        crayon_1.3.4          
    -## [58] lattice_0.20-41        splines_4.0.2          annotate_1.66.0       
    -## [61] hms_0.5.3              locfit_1.5-9.4         knitr_1.29            
    -## [64] pillar_1.4.6           geneplotter_1.66.0     XML_3.99-0.5          
    -## [67] glue_1.4.1             evaluate_0.14          EnhancedVolcano_1.6.0 
    -## [70] vctrs_0.3.2            gtable_0.3.0           getopt_1.20.3         
    -## [73] purrr_0.3.4            rematch2_2.1.2         assertthat_0.2.1      
    -## [76] emdbook_1.3.12         xfun_0.16              xtable_1.8-4          
    -## [79] coda_0.19-3            survival_3.1-12        tibble_3.0.3          
    -## [82] AnnotationDbi_1.50.3   memoise_1.1.0          ellipsis_0.3.1
    +## [1] bitops_1.0-6 bit64_0.9-7 RColorBrewer_1.1-2 +## [4] tools_3.6.1 backports_1.1.7 utf8_1.1.4 +## [7] R6_2.4.1 rpart_4.1-15 Hmisc_4.2-0 +## [10] DBI_1.0.0 lazyeval_0.2.2 colorspace_1.4-1 +## [13] nnet_7.3-12 withr_2.2.0 tidyselect_0.2.5 +## [16] gridExtra_2.3 bit_1.1-14 compiler_3.6.1 +## [19] cli_2.0.2 htmlTable_1.13.1 labeling_0.3 +## [22] scales_1.0.0 checkmate_1.9.4 readr_1.3.1 +## [25] genefilter_1.67.1 stringr_1.4.0 digest_0.6.25 +## [28] foreign_0.8-72 rmarkdown_1.14 XVector_0.25.0 +## [31] base64enc_0.1-3 pkgconfig_2.0.3 htmltools_0.3.6 +## [34] styler_1.1.1 htmlwidgets_1.3 rlang_0.4.6 +## [37] rstudioapi_0.11 RSQLite_2.1.2 acepack_1.4.1 +## [40] dplyr_0.8.3 RCurl_1.95-4.12 GenomeInfoDbData_1.2.1 +## [43] Formula_1.2-3 Matrix_1.2-17 Rcpp_1.0.4.6 +## [46] munsell_0.5.0 fansi_0.4.1 lifecycle_0.2.0 +## [49] stringi_1.4.6 yaml_2.2.1 zlibbioc_1.31.0 +## [52] grid_3.6.1 blob_1.2.0 ggrepel_0.8.1 +## [55] crayon_1.3.4 lattice_0.20-38 splines_3.6.1 +## [58] annotate_1.63.0 hms_0.5.3 locfit_1.5-9.1 +## [61] knitr_1.28 pillar_1.4.4 geneplotter_1.63.0 +## [64] XML_3.98-1.20 glue_1.4.1 evaluate_0.14 +## [67] latticeExtra_0.6-28 data.table_1.12.2 EnhancedVolcano_1.4.0 +## [70] vctrs_0.3.1 gtable_0.3.0 getopt_1.20.3 +## [73] purrr_0.3.4 rematch2_2.1.0 assertthat_0.2.1 +## [76] xfun_0.14 xtable_1.8-4 survival_2.44-1.1 +## [79] tibble_3.0.1 AnnotationDbi_1.47.0 memoise_1.1.0 +## [82] cluster_2.1.0 ellipsis_0.3.1

    Blighe K., S. Rana, and M. Lewis, 2020 EnhancedVolcano: Publication-ready volcano plots with enhanced colouring and labeling.

    @@ -2235,7 +2237,7 @@

    6 Session info

    theme: "bootstrap3", context: '.toc-content', hashGenerator: function (text) { - return text.replace(/[.\\/?&!#<>]/g, '').replace(/\s/g, '_'); + return text.replace(/[.\\/?&!#<>]/g, '').replace(/\s/g, '_').toLowerCase(); }, ignoreSelector: ".toc-ignore", scrollTo: 0 diff --git a/03-rnaseq/dimension-reduction_rnaseq_01_pca.html b/03-rnaseq/dimension-reduction_rnaseq_01_pca.html index 442ee0ed..f3ae1756 100644 --- a/03-rnaseq/dimension-reduction_rnaseq_01_pca.html +++ b/03-rnaseq/dimension-reduction_rnaseq_01_pca.html @@ -1,10 +1,11 @@ - + + @@ -1343,6 +1344,7 @@ } img { max-width:100%; + height: auto; } .tabbed-pane { padding-top: 12px; @@ -1491,7 +1493,6 @@ border: none; display: inline-block; border-radius: 4px; - background-color: transparent; } .tabset-dropdown > .nav-tabs.nav-tabs-open > li { @@ -1520,12 +1521,6 @@ } } -@media print { -.toc-content { - /* see https://github.com/w3c/csswg-drafts/issues/4434 */ - float: right; -} -} .toc-content { padding-left: 30px; @@ -1619,6 +1614,7 @@
  • Differential Expression
  • Dimension Reduction - PCA
  • Dimension Reduction - UMAP
  • +
  • Ensembl Gene ID Annotation
  • @@ -1833,6 +1829,7 @@

    4.1 Install libraries

    ## The following objects are masked from 'package:Biobase':
     ## 
     ##     anyMissing, rowMedians
    +
    ## Loading required package: BiocParallel
    ## 
     ## Attaching package: 'DelayedArray'
    ## The following objects are masked from 'package:matrixStats':
    @@ -1945,13 +1942,13 @@ 

    4.6 Create PCA plot using DESeq2<
    plotPCA(dds_norm,
       intgroup = "refinebio_treatment"
     )
    -

    +

    In this chunk, we are going to add another variable to our plot for annotation.

    Now we’ll plot the PCA using both refinebio_treatment and refinebio_disease variables for labels since they are central to the androgen deprivation therapy (ADT) based hypothesis in the original paper (Sharma et al. 2018).

    plotPCA(dds_norm,
       intgroup = c("refinebio_treatment", "refinebio_disease") # Note that we are able to add another variable to the intgroup argument here by providing a vector of the variable names with  `c()` function
     )
    -

    +

    In the plot above, it is hard to distinguish the different refinebio_treatment values which contain the data on whether or not samples have been treated with ADT versus the refinebio_disease values which refer to the method by which the samples were obtained from patients (i.e. biopsy).

    Let’s use the ggplot2 package functionality to customize our plot further and make the annotation labels better distinguishable.

    First let’s use plotPCA() to receive and store the PCA values for plotting.

    @@ -1998,59 +1995,58 @@

    6 Print session info

    At the end of every analysis, before saving your notebook, we recommend printing out your session info. This helps make your code more reproducible by recording what versions of softwares and packages you used to run this.

    # Print session info
     sessionInfo()
    -
    ## R version 4.0.2 (2020-06-22)
    -## Platform: x86_64-pc-linux-gnu (64-bit)
    -## Running under: Ubuntu 20.04 LTS
    +
    ## R version 3.6.1 (2019-07-05)
    +## Platform: x86_64-apple-darwin15.6.0 (64-bit)
    +## Running under: macOS Mojave 10.14.6
     ## 
     ## Matrix products: default
    -## BLAS/LAPACK: /usr/lib/x86_64-linux-gnu/openblas-openmp/libopenblasp-r0.3.8.so
    +## BLAS:   /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
    +## LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
     ## 
     ## locale:
    -##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
    -##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
    -##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=C             
    -##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
    -##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
    -## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
    +## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
     ## 
     ## attached base packages:
     ## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
     ## [8] methods   base     
     ## 
     ## other attached packages:
    -##  [1] magrittr_1.5                ggplot2_3.3.2              
    -##  [3] DESeq2_1.28.1               SummarizedExperiment_1.18.2
    -##  [5] DelayedArray_0.14.1         matrixStats_0.56.0         
    -##  [7] Biobase_2.48.0              GenomicRanges_1.40.0       
    -##  [9] GenomeInfoDb_1.24.2         IRanges_2.22.2             
    -## [11] S4Vectors_0.26.1            BiocGenerics_0.34.0        
    -## [13] optparse_1.6.6             
    +##  [1] magrittr_1.5                ggplot2_3.2.1              
    +##  [3] DESeq2_1.26.0               SummarizedExperiment_1.15.6
    +##  [5] DelayedArray_0.11.4         BiocParallel_1.19.2        
    +##  [7] matrixStats_0.54.0          Biobase_2.45.0             
    +##  [9] GenomicRanges_1.37.14       GenomeInfoDb_1.21.1        
    +## [11] IRanges_2.19.10             S4Vectors_0.23.23          
    +## [13] BiocGenerics_0.31.5         optparse_1.6.2             
     ## 
     ## loaded via a namespace (and not attached):
    -##  [1] bit64_0.9-7.1          splines_4.0.2          R.utils_2.9.2         
    -##  [4] assertthat_0.2.1       blob_1.2.1             GenomeInfoDbData_1.2.3
    -##  [7] yaml_2.2.1             pillar_1.4.6           RSQLite_2.2.0         
    -## [10] backports_1.1.9        lattice_0.20-41        glue_1.4.1            
    -## [13] digest_0.6.25          RColorBrewer_1.1-2     XVector_0.28.0        
    -## [16] colorspace_1.4-1       htmltools_0.5.0        Matrix_1.2-18         
    -## [19] R.oo_1.23.0            XML_3.99-0.5           pkgconfig_2.0.3       
    -## [22] genefilter_1.70.0      zlibbioc_1.34.0        purrr_0.3.4           
    -## [25] xtable_1.8-4           scales_1.1.1           getopt_1.20.3         
    -## [28] BiocParallel_1.22.0    tibble_3.0.3           annotate_1.66.0       
    -## [31] styler_1.3.2           farver_2.0.3           generics_0.0.2        
    -## [34] ellipsis_0.3.1         withr_2.2.0            cli_2.0.2             
    -## [37] survival_3.1-12        crayon_1.3.4           memoise_1.1.0         
    -## [40] evaluate_0.14          R.methodsS3_1.8.0      fansi_0.4.1           
    -## [43] R.cache_0.14.0         tools_4.0.2            hms_0.5.3             
    -## [46] lifecycle_0.2.0        stringr_1.4.0          locfit_1.5-9.4        
    -## [49] munsell_0.5.0          AnnotationDbi_1.50.3   compiler_4.0.2        
    -## [52] rlang_0.4.7            grid_4.0.2             RCurl_1.98-1.2        
    -## [55] rstudioapi_0.11        labeling_0.3           bitops_1.0-6          
    -## [58] rmarkdown_2.3          gtable_0.3.0           DBI_1.1.0             
    -## [61] rematch2_2.1.2         R6_2.4.1               dplyr_1.0.0           
    -## [64] knitr_1.29             bit_1.1-15.2           readr_1.3.1           
    -## [67] stringi_1.4.6          Rcpp_1.0.5             vctrs_0.3.2           
    -## [70] geneplotter_1.66.0     tidyselect_1.1.0       xfun_0.16
    +## [1] bitops_1.0-6 bit64_0.9-7 RColorBrewer_1.1-2 +## [4] tools_3.6.1 backports_1.1.7 R6_2.4.1 +## [7] rpart_4.1-15 Hmisc_4.2-0 DBI_1.0.0 +## [10] lazyeval_0.2.2 colorspace_1.4-1 nnet_7.3-12 +## [13] withr_2.2.0 tidyselect_0.2.5 gridExtra_2.3 +## [16] bit_1.1-14 compiler_3.6.1 cli_2.0.2 +## [19] htmlTable_1.13.1 labeling_0.3 scales_1.0.0 +## [22] checkmate_1.9.4 readr_1.3.1 genefilter_1.67.1 +## [25] stringr_1.4.0 digest_0.6.25 foreign_0.8-72 +## [28] rmarkdown_1.14 XVector_0.25.0 base64enc_0.1-3 +## [31] pkgconfig_2.0.3 htmltools_0.3.6 styler_1.1.1 +## [34] htmlwidgets_1.3 rlang_0.4.6 rstudioapi_0.11 +## [37] RSQLite_2.1.2 acepack_1.4.1 dplyr_0.8.3 +## [40] RCurl_1.95-4.12 GenomeInfoDbData_1.2.1 Formula_1.2-3 +## [43] Matrix_1.2-17 Rcpp_1.0.4.6 munsell_0.5.0 +## [46] fansi_0.4.1 lifecycle_0.2.0 stringi_1.4.6 +## [49] yaml_2.2.1 zlibbioc_1.31.0 grid_3.6.1 +## [52] blob_1.2.0 crayon_1.3.4 lattice_0.20-38 +## [55] splines_3.6.1 annotate_1.63.0 hms_0.5.3 +## [58] locfit_1.5-9.1 knitr_1.28 pillar_1.4.4 +## [61] geneplotter_1.63.0 XML_3.98-1.20 glue_1.4.1 +## [64] evaluate_0.14 latticeExtra_0.6-28 data.table_1.12.2 +## [67] vctrs_0.3.1 gtable_0.3.0 getopt_1.20.3 +## [70] purrr_0.3.4 rematch2_2.1.0 assertthat_0.2.1 +## [73] xfun_0.14 xtable_1.8-4 survival_2.44-1.1 +## [76] tibble_3.0.1 AnnotationDbi_1.47.0 memoise_1.1.0 +## [79] cluster_2.1.0 ellipsis_0.3.1

    Freytag S., 2019 Workshop: Dimension reduction with r

    @@ -2123,7 +2119,7 @@

    6 Print session info

    theme: "bootstrap3", context: '.toc-content', hashGenerator: function (text) { - return text.replace(/[.\\/?&!#<>]/g, '').replace(/\s/g, '_'); + return text.replace(/[.\\/?&!#<>]/g, '').replace(/\s/g, '_').toLowerCase(); }, ignoreSelector: ".toc-ignore", scrollTo: 0 diff --git a/03-rnaseq/dimension-reduction_rnaseq_02_umap.html b/03-rnaseq/dimension-reduction_rnaseq_02_umap.html index 32c16294..cd43504e 100644 --- a/03-rnaseq/dimension-reduction_rnaseq_02_umap.html +++ b/03-rnaseq/dimension-reduction_rnaseq_02_umap.html @@ -1,10 +1,11 @@ - + + @@ -1343,6 +1344,7 @@ } img { max-width:100%; + height: auto; } .tabbed-pane { padding-top: 12px; @@ -1491,7 +1493,6 @@ border: none; display: inline-block; border-radius: 4px; - background-color: transparent; } .tabset-dropdown > .nav-tabs.nav-tabs-open > li { @@ -1520,12 +1521,6 @@ } } -@media print { -.toc-content { - /* see https://github.com/w3c/csswg-drafts/issues/4434 */ - float: right; -} -} .toc-content { padding-left: 30px; @@ -1619,6 +1614,7 @@
  • Differential Expression
  • Dimension Reduction - PCA
  • Dimension Reduction - UMAP
  • +
  • Ensembl Gene ID Annotation
  • @@ -1837,6 +1833,7 @@

    4.1 Install libraries

    ## The following objects are masked from 'package:Biobase':
     ## 
     ##     anyMissing, rowMedians
    +
    ## Loading required package: BiocParallel
    ## 
     ## Attaching package: 'DelayedArray'
    ## The following objects are masked from 'package:matrixStats':
    @@ -1983,7 +1980,7 @@ 

    4.10 Create UMAP plot

    ) ) + geom_point() # This tells R that we want a scatterplot
    -

    +

    Let’s try adding a variable to our plot for annotation.

    In this code chunk, the variable refinebio_treatment is given to the ggplot() function so we can label by androgen deprivation therapy (ADT) status.

    # Plot using `ggplot()` function
    @@ -1996,7 +1993,7 @@ 

    4.10 Create UMAP plot

    ) ) + geom_point() # This tells R that we want a scatterplot
    -

    +

    In the next code chunk, we are going to add another variable to our plot for annotation.

    We’ll plot using both refinebio_treatment and refinebio_disease variables for labels since they are central to the androgen deprivation therapy (ADT) based hypothesis in the original paper (Sharma et al. 2018).

    # Plot using `ggplot()` function
    @@ -2013,7 +2010,7 @@ 

    4.10 Create UMAP plot

    # Display the plot that we saved above final_annotated_umap_plot
    -

    +

    Although it does appear that majority of the pre-ADT and post-ADT appear to cluster together, there are still questions remaining as we look at outliers.

    4.10.1 Interpretation of UMAP plot and results

    @@ -2053,61 +2050,61 @@

    6 Print session info

    At the end of every analysis, before saving your notebook, we recommend printing out your session info. This helps make your code more reproducible by recording what versions of softwares and packages you used to run this.

    # Print session info
     sessionInfo()
    -
    ## R version 4.0.2 (2020-06-22)
    -## Platform: x86_64-pc-linux-gnu (64-bit)
    -## Running under: Ubuntu 20.04 LTS
    +
    ## R version 3.6.1 (2019-07-05)
    +## Platform: x86_64-apple-darwin15.6.0 (64-bit)
    +## Running under: macOS Mojave 10.14.6
     ## 
     ## Matrix products: default
    -## BLAS/LAPACK: /usr/lib/x86_64-linux-gnu/openblas-openmp/libopenblasp-r0.3.8.so
    +## BLAS:   /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
    +## LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
     ## 
     ## locale:
    -##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
    -##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
    -##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=C             
    -##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
    -##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
    -## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
    +## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
     ## 
     ## attached base packages:
     ## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
     ## [8] methods   base     
     ## 
     ## other attached packages:
    -##  [1] magrittr_1.5                ggplot2_3.3.2              
    -##  [3] umap_0.2.6.0                DESeq2_1.28.1              
    -##  [5] SummarizedExperiment_1.18.2 DelayedArray_0.14.1        
    -##  [7] matrixStats_0.56.0          Biobase_2.48.0             
    -##  [9] GenomicRanges_1.40.0        GenomeInfoDb_1.24.2        
    -## [11] IRanges_2.22.2              S4Vectors_0.26.1           
    -## [13] BiocGenerics_0.34.0         optparse_1.6.6             
    +##  [1] magrittr_1.5                ggplot2_3.2.1              
    +##  [3] umap_0.2.3                  DESeq2_1.26.0              
    +##  [5] SummarizedExperiment_1.15.6 DelayedArray_0.11.4        
    +##  [7] BiocParallel_1.19.2         matrixStats_0.54.0         
    +##  [9] Biobase_2.45.0              GenomicRanges_1.37.14      
    +## [11] GenomeInfoDb_1.21.1         IRanges_2.19.10            
    +## [13] S4Vectors_0.23.23           BiocGenerics_0.31.5        
    +## [15] optparse_1.6.2             
     ## 
     ## loaded via a namespace (and not attached):
    -##  [1] bitops_1.0-6           bit64_0.9-7.1          RColorBrewer_1.1-2    
    -##  [4] R.cache_0.14.0         tools_4.0.2            backports_1.1.9       
    -##  [7] R6_2.4.1               DBI_1.1.0              colorspace_1.4-1      
    -## [10] withr_2.2.0            tidyselect_1.1.0       bit_1.1-15.2          
    -## [13] compiler_4.0.2         cli_2.0.2              labeling_0.3          
    -## [16] scales_1.1.1           readr_1.3.1            genefilter_1.70.0     
    -## [19] askpass_1.1            stringr_1.4.0          digest_0.6.25         
    -## [22] rmarkdown_2.3          R.utils_2.9.2          XVector_0.28.0        
    -## [25] pkgconfig_2.0.3        htmltools_0.5.0        styler_1.3.2          
    -## [28] rlang_0.4.7            rstudioapi_0.11        RSQLite_2.2.0         
    -## [31] farver_2.0.3           generics_0.0.2         jsonlite_1.7.0        
    -## [34] BiocParallel_1.22.0    dplyr_1.0.0            R.oo_1.23.0           
    -## [37] RCurl_1.98-1.2         GenomeInfoDbData_1.2.3 Matrix_1.2-18         
    -## [40] Rcpp_1.0.5             munsell_0.5.0          fansi_0.4.1           
    -## [43] reticulate_1.16        lifecycle_0.2.0        R.methodsS3_1.8.0     
    -## [46] stringi_1.4.6          yaml_2.2.1             zlibbioc_1.34.0       
    -## [49] grid_4.0.2             blob_1.2.1             crayon_1.3.4          
    -## [52] lattice_0.20-41        splines_4.0.2          annotate_1.66.0       
    -## [55] hms_0.5.3              locfit_1.5-9.4         knitr_1.29            
    -## [58] pillar_1.4.6           geneplotter_1.66.0     XML_3.99-0.5          
    -## [61] glue_1.4.1             evaluate_0.14          vctrs_0.3.2           
    -## [64] gtable_0.3.0           openssl_1.4.2          getopt_1.20.3         
    -## [67] purrr_0.3.4            rematch2_2.1.2         assertthat_0.2.1      
    -## [70] xfun_0.16              xtable_1.8-4           RSpectra_0.16-0       
    -## [73] survival_3.1-12        tibble_3.0.3           AnnotationDbi_1.50.3  
    -## [76] memoise_1.1.0          ellipsis_0.3.1
    +## [1] bitops_1.0-6 bit64_0.9-7 RColorBrewer_1.1-2 +## [4] tools_3.6.1 backports_1.1.7 R6_2.4.1 +## [7] rpart_4.1-15 Hmisc_4.2-0 DBI_1.0.0 +## [10] lazyeval_0.2.2 colorspace_1.4-1 nnet_7.3-12 +## [13] withr_2.2.0 tidyselect_0.2.5 gridExtra_2.3 +## [16] bit_1.1-14 compiler_3.6.1 cli_2.0.2 +## [19] htmlTable_1.13.1 labeling_0.3 scales_1.0.0 +## [22] checkmate_1.9.4 readr_1.3.1 genefilter_1.67.1 +## [25] askpass_1.1 stringr_1.4.0 digest_0.6.25 +## [28] foreign_0.8-72 rmarkdown_1.14 XVector_0.25.0 +## [31] base64enc_0.1-3 pkgconfig_2.0.3 htmltools_0.3.6 +## [34] styler_1.1.1 htmlwidgets_1.3 rlang_0.4.6 +## [37] rstudioapi_0.11 RSQLite_2.1.2 jsonlite_1.7.0 +## [40] acepack_1.4.1 dplyr_0.8.3 RCurl_1.95-4.12 +## [43] GenomeInfoDbData_1.2.1 Formula_1.2-3 Matrix_1.2-17 +## [46] Rcpp_1.0.4.6 munsell_0.5.0 fansi_0.4.1 +## [49] reticulate_1.13 lifecycle_0.2.0 stringi_1.4.6 +## [52] yaml_2.2.1 zlibbioc_1.31.0 grid_3.6.1 +## [55] blob_1.2.0 crayon_1.3.4 lattice_0.20-38 +## [58] splines_3.6.1 annotate_1.63.0 hms_0.5.3 +## [61] locfit_1.5-9.1 knitr_1.28 pillar_1.4.4 +## [64] geneplotter_1.63.0 XML_3.98-1.20 glue_1.4.1 +## [67] evaluate_0.14 latticeExtra_0.6-28 data.table_1.12.2 +## [70] vctrs_0.3.1 openssl_1.4.1 gtable_0.3.0 +## [73] getopt_1.20.3 purrr_0.3.4 rematch2_2.1.0 +## [76] assertthat_0.2.1 xfun_0.14 xtable_1.8-4 +## [79] RSpectra_0.15-0 survival_2.44-1.1 tibble_3.0.1 +## [82] AnnotationDbi_1.47.0 memoise_1.1.0 cluster_2.1.0 +## [85] ellipsis_0.3.1

    CCDL, 2020 ScRNA-seq dimension reduction.

    @@ -2189,7 +2186,7 @@

    6 Print session info

    theme: "bootstrap3", context: '.toc-content', hashGenerator: function (text) { - return text.replace(/[.\\/?&!#<>]/g, '').replace(/\s/g, '_'); + return text.replace(/[.\\/?&!#<>]/g, '').replace(/\s/g, '_').toLowerCase(); }, ignoreSelector: ".toc-ignore", scrollTo: 0 diff --git a/04-advanced-topics/00-intro-to-advanced-topics.html b/04-advanced-topics/00-intro-to-advanced-topics.html index 355455d4..4fad9f01 100644 --- a/04-advanced-topics/00-intro-to-advanced-topics.html +++ b/04-advanced-topics/00-intro-to-advanced-topics.html @@ -1,10 +1,11 @@ - + + @@ -1343,6 +1344,7 @@ } img { max-width:100%; + height: auto; } .tabbed-pane { padding-top: 12px; @@ -1491,7 +1493,6 @@ border: none; display: inline-block; border-radius: 4px; - background-color: transparent; } .tabset-dropdown > .nav-tabs.nav-tabs-open > li { @@ -1520,12 +1521,6 @@ } } -@media print { -.toc-content { - /* see https://github.com/w3c/csswg-drafts/issues/4434 */ - float: right; -} -} .toc-content { padding-left: 30px; @@ -1619,6 +1614,7 @@
  • Differential Expression
  • Dimension Reduction - PCA
  • Dimension Reduction - UMAP
  • +
  • Ensembl Gene ID Annotation
  • @@ -1719,7 +1715,7 @@

    CCDL for ALSF

    theme: "bootstrap3", context: '.toc-content', hashGenerator: function (text) { - return text.replace(/[.\\/?&!#<>]/g, '').replace(/\s/g, '_'); + return text.replace(/[.\\/?&!#<>]/g, '').replace(/\s/g, '_').toLowerCase(); }, ignoreSelector: ".toc-ignore", scrollTo: 0 diff --git a/Dockerfile b/Dockerfile index b174de1f..ef393f81 100644 --- a/Dockerfile +++ b/Dockerfile @@ -51,6 +51,7 @@ RUN R -e "BiocManager::install(c('affy', 'apeglm', 'Biobase', 'ComplexHeatmap', # Installs packages needed for plottings # treemap, interactive plots, and hex plots # Rtsne and umap are required for dimension reduction analyses +# org.Mm.eg.db is required for gene mapping RUN install2.r --error --deps TRUE \ ggfortify \ ggsignif \ @@ -58,7 +59,8 @@ RUN install2.r --error --deps TRUE \ pheatmap \ Rtsne \ umap \ - VennDiagram + VennDiagram \ + org.Mm.eg.db ########################## # Install packages from github diff --git a/Snakefile b/Snakefile index afbd64a2..418343ee 100644 --- a/Snakefile +++ b/Snakefile @@ -6,6 +6,7 @@ rule target: "02-microarray/clustering_microarray_01_heatmap.html", "02-microarray/dimension-reduction_microarray_01_pca.html", "02-microarray/dimension-reduction_microarray_02_umap.html", + "02-microarray/gene-id-annotation_microarray_01_ensembl.html", "03-rnaseq/clustering_rnaseq_01_heatmap.html", "03-rnaseq/differential-expression_rnaseq_01.html", "03-rnaseq/00-intro-to-rnaseq.html", diff --git a/components/_navbar.html b/components/_navbar.html index 84903bf0..aa887da1 100644 --- a/components/_navbar.html +++ b/components/_navbar.html @@ -26,6 +26,7 @@
  • Differential Expression
  • Dimension Reduction - PCA
  • Dimension Reduction - UMAP
  • +
  • Ensembl Gene ID Annotation
  • diff --git a/references.bib b/references.bib index 35439d5d..f956d6b1 100644 --- a/references.bib +++ b/references.bib @@ -1,3 +1,10 @@ +@Website{annotation-packages, + title = {Packages found under AnnotationData}, + author = {{R Bioconductor Team}}, + year = {2003}, + url = {https://bioconductor.org/packages/release/BiocViews.html#___AnnotationData}, +} + @Manual{Blighe2020, title = {EnhancedVolcano: Publication-ready volcano plots with enhanced colouring and labeling}, @@ -14,6 +21,27 @@ @Article{Brems2017 url = {https://towardsdatascience.com/a-one-stop-shop-for-principal-component-analysis-5582fb7e0a9c}, } +@Website{Carlson2019, + title = {Genome wide annotation for Mouse}, + author = {Marc Carlson}, + year = {2019}, + url = {https://bioconductor.org/packages/release/data/annotation/html/org.Mm.eg.db.html}, +} + +@Website{Carlson2020-package, + title = {AnnotationDbi}, + author = {Marc Carlson}, + year = {2020}, + url = {https://www.bioconductor.org/packages/release/bioc/html/AnnotationDbi.html}, +} + +@Website{Carlson2020-vignette, + title = {AnnotationDbi: Introduction To Bioconductor Annotation Packages}, + author = {Marc Carlson}, + year = {2020}, + url = {https://bioconductor.org/packages/release/bioc/vignettes/AnnotationDbi/inst/doc/IntroToAnnotationPackages.pdf}, +} + @Manual{CCDL2020, title = {scRNA-seq Dimension Reduction}, author = {CCDL}, @@ -35,6 +63,13 @@ @Website{Freytag2019 url = {https://rpubs.com/Saskia/520216}, } +@Manual{gene-id-annotation-rna-seq, + title = {Obtaining Annotation for Ensembl IDs - RNA-seq}, + author = {CCDL}, + year = {2020}, + url = {https://github.com/AlexsLemonade/refinebio-examples/blob/master/03-rnaseq/gene-id-convert_rnaseq_01_ensembl.html}, +} + @Manual{Gonzalez2014, title = {Statistical analysis of RNA-Seq data}, author = {Ignacio Gonzalez},