From adbdac564db9d9e3a125dceeadaa7a333e2e001d Mon Sep 17 00:00:00 2001 From: "Luke W. Johnston" Date: Wed, 7 Jun 2023 10:28:58 +0200 Subject: [PATCH 01/18] mistake for mac commands --- _variables.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/_variables.yml b/_variables.yml index c1c5981..5ef5ed0 100644 --- a/_variables.yml +++ b/_variables.yml @@ -1,7 +1,8 @@ +# Automatically created by `r3admin::copy_common_file('_variables.yml')` on 2023-06-07. keybind: palette: '{{< kbd linux=Ctrl-Shift-P mac=Cmd-Shift-P win=Ctrl-Shift-P >}}' git: '{{< kbd linux=Ctrl-Shift-M mac=Cmd-Shift-M win=Ctrl-Shift-M >}} or with the Palette ({{< var keybind.palette >}}, then type "commit")' - chunk: '{{< kbd linux=Ctrl-Shift-I mac=Cmd-Shift-I win=Ctrl-Shift-I >}} or with the Palette ({{< var keybind.palette >}}, then type "new chunk")' + chunk: '{{< kbd linux=Ctrl-Shift-I mac=Cmd-Option-I win=Ctrl-Shift-I >}} or with the Palette ({{< var keybind.palette >}}, then type "new chunk")' restart-r: '{{< kbd linux=Ctrl-Shift-F10 mac=Cmd-Shift-F10 win=Ctrl-Shift-F10 >}} or with the Palette ({{< var keybind.palette >}}, then type "restart")' source: '{{< kbd linux=Ctrl-Shift-S mac=Cmd-Shift-S win=Ctrl-Shift-S >}} or with the Palette ({{< var keybind.palette >}}, then type "source")' render: '{{< kbd linux=Ctrl-Shift-K mac=Cmd-Shift-K win=Ctrl-Shift-K >}} or with the Palette ({{< var keybind.palette >}}, then type "render")' From 97c0577ef106b01439fd44760f0e2239a9a5f6bf Mon Sep 17 00:00:00 2001 From: "Luke W. Johnston" Date: Wed, 7 Jun 2023 11:13:47 +0200 Subject: [PATCH 02/18] move text about deleting everything in qmd, plus not use library purrr --- sessions/functionals.qmd | 41 ++++++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 18 deletions(-) diff --git a/sessions/functionals.qmd b/sessions/functionals.qmd index 5507aac..1fc54d1 100644 --- a/sessions/functionals.qmd +++ b/sessions/functionals.qmd @@ -203,22 +203,30 @@ But what does functionals have to do with what we are doing now? Well, our `import_user_info()` function only takes in one data file. But we have 22 files that we could load all at once if we used functionals. -The first thing we have to do is add `library(purrr)` to the `setup` -code chunk in the `doc/learning.qmd` document. Then we need to add the -package dependency by going to the **Console** and running: +Before we continue, let's clean up the `doc/learning.qmd` file by +deleting **everything** below the `setup` code chunk that contains the +`library()` and `source()` code. Why do we delete everything? Because it +keeps things cleaner and makes it easier to look through the file (both +for you and for us as instructors). And because we use Git, nothing is +truly gone so you can always go back to the text later. Next, we restart +the R session with {{< var keybind.restart-r >}}. + +Next, we'll need to add `{purrr}` as a package dependency by going to +the **Console** and running: ``` r usethis::use_package("purrr") ``` -Then, the next step for using the `map()` functional is to get a vector -or list of all the dataset files available to us. We will return to -using the `{fs}` package, which has a function called `dir_ls()` that -finds files of a certain pattern. In our case, the pattern is -`user_info.csv`. So, let's add `library(fs)` to the `setup` code chunk. -Then, go to the bottom of the `doc/learning.qmd` document, create a new -header called `## Using map`, and create a code chunk below that with -{{< var keybind.chunk >}} +Since `{purrr}` is part of the `{tidyverse}`, we don't need to load it +with `library()`. The next step for using the `map()` functional is to +get a vector or list of all the dataset files available to us. We will +return to using the `{fs}` package, which has a function called +`dir_ls()` that finds files of a certain pattern. In our case, the +pattern is `user_info.csv`. So, let's add `library(fs)` to the `setup` +code chunk. Then, go to the bottom of the `doc/learning.qmd` document, +create a new header called `## Using map`, and create a code chunk below +that with {{< var keybind.chunk >}} The `dir_ls()` function takes the path that we want to search (`data-raw/mmash/`), uses the argument `regexp` (short for [regular @@ -441,13 +449,10 @@ Markdown file is only a sandbox to test code out and in the end we want a script that takes the raw data, processes it, and creates a working dataset we can use for analysis. -First thing we will do is delete **everything** below the `setup` code -chunk that contains the `library()` and `source()` code. Why do we -delete everything? Because it keeps things cleaner and makes it easier -to look through the file. And because we use Git, nothing is truly gone -so you can always go back to the text later. Next, we restart the R -session with {{< var keybind.restart-r >}}. Then we'll create a new code -chunk below the `setup` chunk where we will use the +Like we did before, delete **everything** below the `setup` code chunk +that contains the `library()` and `source()` code. Then, we will restart +the R session with {{< var keybind.restart-r >}} and we'll create a new +code chunk below the `setup` chunk where we will use the `import_multiple_files()` function to import the user info and saliva data. From deb2d1b0de7f04d95072a954fc4bdeb9dbd5868e Mon Sep 17 00:00:00 2001 From: "Luke W. Johnston" Date: Wed, 7 Jun 2023 11:13:52 +0200 Subject: [PATCH 03/18] typo --- sessions/functionals.qmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sessions/functionals.qmd b/sessions/functionals.qmd index 1fc54d1..6ef14f8 100644 --- a/sessions/functionals.qmd +++ b/sessions/functionals.qmd @@ -437,7 +437,7 @@ import_multiple_files <- function(file_pattern, import_function) { import_multiple_files("saliva.csv", import_saliva) ``` -## Adding to the processing script and clean up Quart / R Markdown document +## Adding to the processing script and clean up Quarto / R Markdown document We've now made a function that imports multiple data files based on the type of data file, we can start using this function directly, like we From 174c8bd650b222bd6baf535e086ff8f90d7ece16 Mon Sep 17 00:00:00 2001 From: "Luke W. Johnston" Date: Wed, 7 Jun 2023 11:15:01 +0200 Subject: [PATCH 04/18] move brainstorming exercise to end of session --- sessions/functionals.qmd | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/sessions/functionals.qmd b/sessions/functionals.qmd index 6ef14f8..b3b5b13 100644 --- a/sessions/functionals.qmd +++ b/sessions/functionals.qmd @@ -327,18 +327,6 @@ user_info_df %>% Now that we have this working, let's **add and commit** the changes to the Git history, by using {{< var keybind.git >}} -## Exercise: Brainstorm and discuss how you'd use functionals in your work - -> Time: 10 minutes. - -As a group, discuss if you've ever used for loops or functionals like -`map()` and your experiences with either. Discuss any advantages to -using for loops over functionals and vice versa. Then, brainstorm and -discuss as many ways as you can for how you might incorporate -functionals like `map()`, or replace for loops with them, into your own -work. Afterwards, groups will briefly share some of what they thought of -before we move on to the next exercise. - ## Exercise: Make a function for importing other datasets with functionals {#sec-ex-function-import-all-data} > Time: 25 minutes. @@ -985,6 +973,18 @@ Next, go to the R Markdown / Quarto document and again delete add and commit the changes to the Git history with {{< var keybind.git >}}. +## Exercise: Brainstorm and discuss how you'd use functionals in your work + +> Time: 10 minutes. + +As a group, discuss if you've ever used for loops or functionals like +`map()` and your experiences with either. Discuss any advantages to +using for loops over functionals and vice versa. Then, brainstorm and +discuss as many ways as you can for how you might incorporate +functionals like `map()`, or replace for loops with them, into your own +work. Afterwards, groups will briefly share some of what they thought of +before we move on to the next exercise. + ## Summary - R is a functional programming language: From af2e2fc604f3853083d1c9b057b2b497f33b09ef Mon Sep 17 00:00:00 2001 From: "Luke W. Johnston" Date: Wed, 7 Jun 2023 11:16:14 +0200 Subject: [PATCH 05/18] move brainstorming exercise to end of session --- sessions/functions.qmd | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/sessions/functions.qmd b/sessions/functions.qmd index dbc88e9..8b13f46 100644 --- a/sessions/functions.qmd +++ b/sessions/functions.qmd @@ -183,17 +183,6 @@ Once we've created that, let's open up the Git Interface with {{< var keybind.git >}} and **add and commit** these changes to our history. -## Exercise: Brainstorm and discuss why and what you could make as a function - -> Time: 15 minutes. - -You've learned the basics of making your own, custom function. Now, as a -group, brainstorm and discuss some ways that you might make functions in -your own work to help reduce repetition. What type of code might you -make as a function for your own project? Do you think others, maybe in -your research group, might use this function too? Afterwards, all the -groups will briefly share what they thought of. - ## Making a function for vroom Now that we have a basic understanding of what a function looks like, @@ -842,6 +831,17 @@ import_actigraph <- function(file_path) { } ``` +## Exercise: Brainstorm and discuss why and what you could make as a function + +> Time: 15 minutes. + +You've learned the basics of making your own, custom function. Now, as a +group, brainstorm and discuss some ways that you might make functions in +your own work to help reduce repetition. What type of code might you +make as a function for your own project? Do you think others, maybe in +your research group, might use this function too? Afterwards, all the +groups will briefly share what they thought of. + ## Summary ::: {.callout-note appearance="minimal" collapse="true"} From d0c390ceb2e80c83de76fc816ce184288d23d62b Mon Sep 17 00:00:00 2001 From: "Luke W. Johnston" Date: Wed, 7 Jun 2023 11:21:34 +0200 Subject: [PATCH 06/18] trigger downlit in this file --- sessions/introduction.qmd | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sessions/introduction.qmd b/sessions/introduction.qmd index 297279f..248bd81 100644 --- a/sessions/introduction.qmd +++ b/sessions/introduction.qmd @@ -1,5 +1,10 @@ # Introduction to course {#sec-introduction} +```{r, include=FALSE} +# To trigger downlit usage. +library(styler) +``` + > [**Introduction slides**](../slides/introduction.html)
From 0fa9673eb48cd6629e96bb8a94b39e0232da3d18 Mon Sep 17 00:00:00 2001 From: "Luke W. Johnston" Date: Wed, 7 Jun 2023 11:21:45 +0200 Subject: [PATCH 07/18] don't need to create a setup code chunk, already done by r3 pkg --- sessions/importing.qmd | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/sessions/importing.qmd b/sessions/importing.qmd index d31bada..12f49df 100644 --- a/sessions/importing.qmd +++ b/sessions/importing.qmd @@ -77,12 +77,11 @@ to eventually save a version of the raw data that is specific to your research questions. The first step to processing data is to import it into R so we can work on it. So for now, we'll open up the `doc/learning.qmd` file so we can start building and testing out the -code. At the bottom of the file, create a new header by typing out -`## Importing raw data`. Right below the header, make a new code chunk -with {{< var keybind.chunk >}} and call it `setup`. Inside the code -chunk, load the `{vroom}` package with `library(vroom)` as well as -`library(here)`. Since we'll also be using the `{tidyverse}` package, -let's also add `library(tidyverse)`. It should look like this: +code. There should be a `setup` code chunk already be in the file, where +we will put the `library()` code for loading the `{vroom}` package with +`library(vroom)` as well as `library(here)`. Since we'll also be using +the `{tidyverse}` package, let's also add `library(tidyverse)`. It +should look like this: ```` ```{{r setup}} @@ -92,12 +91,13 @@ library(here) ``` ```` -This is a special, named code chunk that tells R to run this code chunk -first whenever you open this Quarto / R Markdown file and run code -inside of the file. It's in this `setup` code chunk that we will add -`library()` functions when we want to load other packages. After adding -this code chunk, create a new code chunk right below it using -{{< var keybind.chunk >}}. +This `setup` code chunk is a special, named code chunk that tells R to +run this code chunk first whenever you open this Quarto / R Markdown +file and run code inside of the file. It's in this `setup` code chunk +that we will add `library()` functions when we want to load other +packages. After adding this code chunk, create a new header by typing +out `## Importing raw data`, followed by creating a new code chunk right +below it using {{< var keybind.chunk >}}. ::: callout-note ## Reading task: \~5 minutes @@ -599,3 +599,4 @@ next session. #| include: false save.image(here::here("_temp/importing.RData")) ``` + From 304bda9ca56a81aabd489358a24702d4adde3d7e Mon Sep 17 00:00:00 2001 From: "Luke W. Johnston" Date: Wed, 7 Jun 2023 13:35:18 +0200 Subject: [PATCH 08/18] really emphasize deleting --- sessions/functions.qmd | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/sessions/functions.qmd b/sessions/functions.qmd index 8b13f46..917667f 100644 --- a/sessions/functions.qmd +++ b/sessions/functions.qmd @@ -434,9 +434,17 @@ import_saliva <- function(file_path) { ## Continuing the workflow +::: {.callout-note appearance="minimal" collapse="true"} +## Instructor note + +Really emphasize to **cut** and paste, so that the function in the +`doc/learning.qmd` file is **deleted** and no longer kept in the Quarto +document. +::: + We've created two functions. Now we need to move those functions from -the `doc/learning.qmd` file and into the `R/` folder. We do this for a -few reasons: +the `doc/learning.qmd` file and into the `R/` folder by **cutting and +pasting** (not just copying). We do this for a few reasons: 1. To prevent the Quarto / R Markdown document from becoming too long and having a large portion of R code over other text. @@ -869,3 +877,4 @@ next session. #| include: false save.image(here::here("_temp/functions.RData")) ``` + From 1b7c82fa6485808a01839aff1127439d495d4fd5 Mon Sep 17 00:00:00 2001 From: "Luke W. Johnston" Date: Wed, 7 Jun 2023 13:41:07 +0200 Subject: [PATCH 09/18] cut and paste, emphasize that --- sessions/dplyr-joins.qmd | 14 ++++++++------ sessions/functions.qmd | 2 +- sessions/pivots.qmd | 3 ++- 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/sessions/dplyr-joins.qmd b/sessions/dplyr-joins.qmd index 0a9b9fa..c5401c0 100644 --- a/sessions/dplyr-joins.qmd +++ b/sessions/dplyr-joins.qmd @@ -121,7 +121,8 @@ another number from 0 to 9. Now that we've identified a possible regex to use to extract the user ID, let's test it out on the `user_info_df` data. Once it works, we will -convert it into a function and move it into the `R/functions.R` file. +convert it into a function and move (**cut and paste**) it into the +`R/functions.R` file. Since we will create a new column for the user ID, we will use the `mutate()` function from the `{dplyr}` package. We'll use the @@ -194,11 +195,12 @@ and extracts the user ID from it. **First step**: While in the same process you've done previously. 1. Call the new function `extract_user_id` and add one argument called - `imported_data`. - Remember to output the code into an object and - `return()` it at the end of the function. - Include Roxygen - documentation. -2. After writing it and testing that the function works, move the - function into `R/functions.R`. + `imported_data`. + - Remember to output the code into an object and `return()` it at + the end of the function. + - Include Roxygen documentation. +2. After writing it and testing that the function works, move (**cut** + and paste) the function into `R/functions.R`. 3. Run `{styler}` while in the `R/functions.R` file with {{< var keybind.styler >}}. 4. Replace the code in the `doc/learning.qmd` file with the function diff --git a/sessions/functions.qmd b/sessions/functions.qmd index 917667f..c65e90b 100644 --- a/sessions/functions.qmd +++ b/sessions/functions.qmd @@ -722,7 +722,7 @@ code you worked on previously that imported the `RR.csv` and - Name the new functions `import_rr` and `import_actigraph`. -2. Move the function into `R/functions.R`. +2. Move (by **cutting** and pasting) the function into `R/functions.R`. 3. Restart R, `source()` the functions file, using {{< var keybind.source >}}, and test that the functions work by diff --git a/sessions/pivots.qmd b/sessions/pivots.qmd index faa0980..ffe23b6 100644 --- a/sessions/pivots.qmd +++ b/sessions/pivots.qmd @@ -388,7 +388,8 @@ the code we just wrote above into a function. explicit function calls with `packagename::`. - Don't forget, you can use `?functionname` to find out which package the function comes from. -5. Move the newly created function over into the `R/functions.R` file. +5. Move (**cut** and paste) the newly created function over into the + `R/functions.R` file. 6. Run `{styler}` while in the `R/functions.R` file with {{< var keybind.styler >}}. 7. Restart R with {{< var keybind.restart-r >}}, go into the From bc2376477e870287ab2aa3168111687bf3be5747 Mon Sep 17 00:00:00 2001 From: "Luke W. Johnston" Date: Wed, 7 Jun 2023 13:44:12 +0200 Subject: [PATCH 10/18] 30 minutes instead of 20 --- sessions/functionals.qmd | 2 +- sessions/functions.qmd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sessions/functionals.qmd b/sessions/functionals.qmd index b3b5b13..5e89784 100644 --- a/sessions/functionals.qmd +++ b/sessions/functionals.qmd @@ -329,7 +329,7 @@ the Git history, by using {{< var keybind.git >}} ## Exercise: Make a function for importing other datasets with functionals {#sec-ex-function-import-all-data} -> Time: 25 minutes. +> Time: \~30 minutes. We need to do basically the exact same thing for the `saliva.csv`, `RR.csv`, and `Actigraph.csv` datasets, following this format: diff --git a/sessions/functions.qmd b/sessions/functions.qmd index c65e90b..71ce7f2 100644 --- a/sessions/functions.qmd +++ b/sessions/functions.qmd @@ -710,7 +710,7 @@ Before moving on to the next exercise, discuss with your group about: ## Exercise: Move and update the rest of the functions -> Time: 20 minutes. +> Time: \~30 minutes. Repeat this process of making functions by doing this to the rest of the code you worked on previously that imported the `RR.csv` and From 80b4d6478dd8a44596cf23d0db35cba8f5b944de Mon Sep 17 00:00:00 2001 From: "Luke W. Johnston" Date: Wed, 7 Jun 2023 13:44:40 +0200 Subject: [PATCH 11/18] small explanation before continuing --- sessions/dplyr-joins.qmd | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sessions/dplyr-joins.qmd b/sessions/dplyr-joins.qmd index c5401c0..22c0673 100644 --- a/sessions/dplyr-joins.qmd +++ b/sessions/dplyr-joins.qmd @@ -441,6 +441,9 @@ columns and so can't be a vector itself), we need to combine the datasets together in a `list()` and reduce them with `full_join()`. ::: +Let's code this together, using `reduce()`, `full_join()`, and `list()` +while in the `doc/learning.qmd` file. + ```{r} combined_data <- reduce(list(user_info_df, saliva_df), full_join) combined_data From 6607a5a2efa98d58dacd95f29ffad24ce54bb332 Mon Sep 17 00:00:00 2001 From: "Luke W. Johnston" Date: Wed, 7 Jun 2023 13:51:42 +0200 Subject: [PATCH 12/18] remove pipe exercise --- sessions/functionals.qmd | 36 ++++++++++++------------------------ 1 file changed, 12 insertions(+), 24 deletions(-) diff --git a/sessions/functionals.qmd b/sessions/functionals.qmd index 5e89784..6b6545a 100644 --- a/sessions/functionals.qmd +++ b/sessions/functionals.qmd @@ -495,36 +495,18 @@ technique to each group through *vectorization*. This technique works really well for a range of tasks, including for our task of summarizing some of the MMASH data so we can merge it all into one dataset. -## Exercise: What is the pipe? - -> Time: 5 minutes. +## Summarising data through functionals {#sec-summarise-with-functionals} ::: {.callout-note appearance="minimal" collapse="true"} ## Instructor note -Before starting this exercise, ask how many have used the pipe before. -If everyone has, then move on to the next section. If some haven't, let -the others in the group explain, but **do not** use much time or even -demonstrate it. If they don't know what it is, they can look it up -after. We covered this in the introduction course, so we should not -cover it again here. +Before starting this section, ask how many have used the pipe before. If +everyone has, then move on. If some haven't, very briefly explain it, +but **do not** use much time on it since we will be using it shortly and +they will see how it works then. We covered this in the introduction +course, so we should not cover it again here. ::: -We haven't used the `%>%` pipe from the `{magrittr}` package yet, but it -is used extensively in many R packages and is the foundation of -tidyverse packages. The function fundamentally changed how people write -R code so much that in version 4.1 a similar function, `|>`, was added -to base R. To make sure everyone is aware of what the pipe is, in your -groups please do either task: - -- If one or more person in the group doesn't know what the pipe is, - take some time to talk about and explain it (if you know). -- If no one in the group knows, please read [the section on - it](https://r-cubed-intro.rostools.org/sessions/data-management.html#chaining-functions-with-the-pipe) - from the beginner course. - -## Summarising data through functionals {#sec-summarise-with-functionals} - Functionals and vectorization are an integral component of how R works and they appear throughout many of R's functions and packages. They are particularly used throughout the `{tidyverse}` packages like `{dplyr}`. @@ -554,6 +536,12 @@ user_info_df %>% select(age) ``` +::: aside +Don't know what the `%>%` pipe is? Check out [the section on +it](https://r-cubed-intro.rostools.org/sessions/data-management.html#chaining-functions-with-the-pipe) +from the beginner course. +::: + But many `{dplyr}` verbs can also take functions as input. When you combine `select()` with the `where()` function, you can select different variables. The `where()` function is a `tidyselect` helper, a set of From c59077ed170b343a20741eadcf2ac81bc73673bb Mon Sep 17 00:00:00 2001 From: "Luke W. Johnston" Date: Wed, 7 Jun 2023 14:15:06 +0200 Subject: [PATCH 13/18] map_dfr has been superseded by list_rbind --- sessions/dplyr-joins.qmd | 9 ++--- sessions/functionals.qmd | 84 ++++++++++++++++++---------------------- 2 files changed, 41 insertions(+), 52 deletions(-) diff --git a/sessions/dplyr-joins.qmd b/sessions/dplyr-joins.qmd index 22c0673..0705490 100644 --- a/sessions/dplyr-joins.qmd +++ b/sessions/dplyr-joins.qmd @@ -274,8 +274,8 @@ file path variable, we need to actually use it within our processing pipeline. Since we want this function to work on all the datasets that we will import, we need to add it to the `import_multiple_files()` function. We'll go to the `import_multiple_files()` function in -`R/functions.R` and use the `%>%` to add it after using the `map_dfr()` -function. The code should look something like: +`R/functions.R` and use the `%>%` to add it after using the +`list_rbind()` function. The code should look something like: ```{r add-extract-user-to-import} import_multiple_files <- function(file_pattern, import_function) { @@ -284,9 +284,8 @@ import_multiple_files <- function(file_pattern, import_function) { recurse = TRUE ) - combined_data <- purrr::map_dfr(data_files, import_function, - .id = "file_path_id" - ) %>% + combined_data <- purrr::map(data_files, import_function) %>% + purrr::list_rbind(names_to = "file_path_id") %>% extract_user_id() # Add the function here. return(combined_data) } diff --git a/sessions/functionals.qmd b/sessions/functionals.qmd index 6b6545a..05f4038 100644 --- a/sessions/functionals.qmd +++ b/sessions/functionals.qmd @@ -190,6 +190,15 @@ as above, now using the `~` shortcut: map(1:5, ~ paste(.x, "seconds have passed")) ``` +`map()` will always output a list, but sometimes we might want to output +a different data type. If we look into the help documentation with +`?map`, it shows several other types of map that all start with `map_`: + +- `map_chr()` outputs a character vector. +- `map_int()` outputs an integer. +- `map_dbl()` outputs a numeric value, called a "double" in + programming. + This is the basics of using functionals. Functions, vectorization, and functionals provide expressive and powerful approaches to a simple task: *Doing an action on each item in a set of items*. And while technically @@ -273,42 +282,21 @@ This is great because with one line of code we imported all these datasets! But we're missing an important bit of information: The user ID. A powerful feature of the `{purrr}` package is that it has other functions to make working with functionals easier. We know `map()` -always outputs a list. What if you want to output a character vector -instead? If we check the help: - -``` r -?map -``` - -::: {.callout-note appearance="minimal" collapse="true"} -## Instructor note - -Go through this help documentation and talk a bit about it. -::: - -We see that there are other functions, including a function called -`map_chr()` that seems to output a character vector. There are several -others that give an output based on the ending of `map_`, such as: - -- `map_int()` outputs an integer. -- `map_dbl()` outputs a numeric value, called a "double" in - programming. -- `map_dfr()` outputs a data frame, combining the list items by row - (`r`). -- `map_dfc()` outputs a data frame, combining the list items by column - (`c`). - -The `map_dfr()` looks like the one we want, since we want all these -datasets together as one. If we look at the help for it, we see that it -has an argument `.id`, which we can use to create a new column that sets +always outputs a list. But what we want is a single data frame at the +end that also contains the user ID information. + +The function that will take a list and convert it into a data frame is +called `list_rbind()` to bind ("stack") by rows or `list_cbind()` to +bind ("stack") by columns. We want to bind by rows, so will use +`list_rbind()` and if we look at the help for it, we see that it has an +argument `names_to`. This argument lets us create a new column that sets the user ID, or in this case, the file path to the dataset, which has the user ID information in it. So, let's use it and create a new column called `file_path_id`. ```{r} -user_info_df <- map_dfr(user_info_files, import_user_info, - .id = "file_path_id" -) +user_info_df <- map(user_info_files, import_user_info) %>% + list_rbind(names_to = "file_path_id") ``` Your `file_path_id` variable will look different. Don't worry, we're @@ -334,12 +322,14 @@ the Git history, by using {{< var keybind.git >}} We need to do basically the exact same thing for the `saliva.csv`, `RR.csv`, and `Actigraph.csv` datasets, following this format: -``` r -user_info_files <- dir_ls(here("data-raw/mmash/"), - regexp = "user_info.csv", - recurse = TRUE) -user_info_df <- map_dfr(user_info_files, import_user_info, - .id = "file_path_id") +```{r} +#| eval: false +user_info_files <- dir_ls(here("data-raw/mmash/"), + regexp = "user_info.csv", + recurse = TRUE +) +user_info_df <- map(user_info_files, import_user_info) %>% + list_rbind(names_to = "file_path_id") ``` For importing the other datasets, we have to modify the code in two @@ -387,13 +377,14 @@ Use this code as a guide to help complete this exercise: ``` r ___ <- ___(___, ___) { - ___ <- ___dir_ls(___here("data-raw/mmash/"), - regexp = ___, - recurse = TRUE) - - ___ <- ___map_dfr(___, ___, - .id = "file_path_id") - ___(___) + ___ <- ___dir_ls(___here("data-raw/mmash/"), + regexp = ___, + recurse = TRUE + ) + + ___ <- ___map(___, ____) %>% + ___list_rbind(names_to = "file_path_id") + ___(___) } ``` @@ -415,9 +406,8 @@ import_multiple_files <- function(file_pattern, import_function) { recurse = TRUE ) - combined_data <- purrr::map_dfr(data_files, import_function, - .id = "file_path_id" - ) + combined_data <- purrr::map(data_files, import_function) %>% + purrr::list_rbind(names_to = "file_path_id") return(combined_data) } From 3bdbc49ecbe8a23eb734a3a729260af740fc3efd Mon Sep 17 00:00:00 2001 From: "Luke W. Johnston" Date: Thu, 8 Jun 2023 10:55:42 +0200 Subject: [PATCH 14/18] stringr to dep with use_package --- sessions/dplyr-joins.qmd | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/sessions/dplyr-joins.qmd b/sessions/dplyr-joins.qmd index 0705490..8a4c1ff 100644 --- a/sessions/dplyr-joins.qmd +++ b/sessions/dplyr-joins.qmd @@ -42,7 +42,7 @@ wonderful package to use for working with character data is called `{stringr}`, which we'll use to extract the user ID from the `file_path_id` column. -The main driver behind the functions in stringr are [regular +The main driver behind the functions in `{stringr}` are [regular expressions](https://en.wikipedia.org/wiki/Regular_expression) (or regex for short). These expressions are powerful, very concise ways of finding patterns in text. Because they are so concise, though, they are also @@ -128,11 +128,19 @@ Since we will create a new column for the user ID, we will use the `mutate()` function from the `{dplyr}` package. We'll use the `str_extract()` function from the `{stringr}` package to "extract a string" by using the regex `user_[1-9][0-9]?` that we discussed from the -exercise. We're also using an argument to `mutate()` you might not have -seen previously, called `.before`. This will insert the new `user_id` -column before the column we use and we do this entirely for visual -reasons, since it is easier to see the newly created column when we run -the code. In your `doc/learning.qmd` file, create a new header called +exercise. Since we're going to use `{stringr}`, so let's add it as a +package dependency: + +```{r} +#| eval: false +usethis::use_package("stringr") +``` + +We're also using an argument to `mutate()` you might not have seen +previously, called `.before`. This will insert the new `user_id` column +before the column we use and we do this entirely for visual reasons, +since it is easier to see the newly created column when we run the code. +In your `doc/learning.qmd` file, create a new header called `## Using regex for user ID` at the bottom of the document, and create a new code chunk below that. @@ -140,7 +148,7 @@ new code chunk below that. ## Instructor note Walk through writing this code, briefly explain/remind how to use -mutate, and about the stringr function. +mutate, and about the `{stringr}` function. ::: ```{r extract-user-id} From 108fcb05dd574c7c07accea47c7208e318692f91 Mon Sep 17 00:00:00 2001 From: "Luke W. Johnston" Date: Thu, 8 Jun 2023 10:56:05 +0200 Subject: [PATCH 15/18] use list first and pipe to reduce --- sessions/dplyr-joins.qmd | 38 +++++++++++++++++++++++++------------- 1 file changed, 25 insertions(+), 13 deletions(-) diff --git a/sessions/dplyr-joins.qmd b/sessions/dplyr-joins.qmd index 8a4c1ff..131e6ee 100644 --- a/sessions/dplyr-joins.qmd +++ b/sessions/dplyr-joins.qmd @@ -452,15 +452,23 @@ Let's code this together, using `reduce()`, `full_join()`, and `list()` while in the `doc/learning.qmd` file. ```{r} -combined_data <- reduce(list(user_info_df, saliva_df), full_join) -combined_data +list( + user_info_df, + saliva_df +) %>% + reduce(full_join) ``` We now have the data in a form that would make sense to join it with the other datasets. So lets try it: ```{r} -reduce(list(user_info_df, saliva_df, summarised_rr_df), full_join) +list( + user_info_df, + saliva_df, + summarised_rr_df +) %>% + reduce(full_join) ``` Hmm, but wait, we now have four rows of each user, when we should have @@ -580,7 +588,13 @@ saliva_with_day_df ...Now, let's use the `reduce()` with `full_join()` again: ```{r} -reduce(list(user_info_df, saliva_with_day_df, summarised_rr_df), full_join) +list( + user_info_df, + saliva_df, + summarised_rr_df, + summarised_actigraph_df +) %>% + reduce(full_join) ``` We now have two rows per participant! Let's add and commit the changes @@ -643,15 +657,13 @@ saliva_with_day_df <- saliva_df %>% TRUE ~ NA_real_ )) -mmash <- reduce( - list( - user_info_df, - saliva_with_day_df, - summarised_rr_df, - summarised_actigraph_df - ), - full_join -) +mmash <- list( + user_info_df, + saliva_df, + summarised_rr_df, + summarised_actigraph_df +) %>% + reduce(full_join) ``` Lastly, we have to save this final dataset into the `data/` folder. From 968e20a354dfd038ec585352b32b4216d0f21de4 Mon Sep 17 00:00:00 2001 From: "Luke W. Johnston" Date: Thu, 8 Jun 2023 10:56:49 +0200 Subject: [PATCH 16/18] don't need to include vroom as a library call inmmash --- sessions/dplyr-joins.qmd | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/sessions/dplyr-joins.qmd b/sessions/dplyr-joins.qmd index 131e6ee..59005b1 100644 --- a/sessions/dplyr-joins.qmd +++ b/sessions/dplyr-joins.qmd @@ -607,14 +607,13 @@ to bring it all together and put it into the `data-raw/mmash.R` script so we can create a final working dataset. Open up the `data-raw/mmash.R` file and the top of the file, add the -`{vroom}` package to the end of the list of other packages. Move the -code `library(fs)` to go with the other packages as well. It should look -something like this now: +`{tidyverse}` package to the end of the list of other packages if it +isn't there already. Move the code `library(fs)` to go with the other +packages as well. It should look something like this now: ```{r} library(here) library(tidyverse) -library(vroom) library(fs) ``` From 8b187cb4555e19c4041f9106cf59e591de17447a Mon Sep 17 00:00:00 2001 From: "Luke W. Johnston" Date: Thu, 8 Jun 2023 10:56:55 +0200 Subject: [PATCH 17/18] run styler --- sessions/dplyr-joins.qmd | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sessions/dplyr-joins.qmd b/sessions/dplyr-joins.qmd index 59005b1..72556a8 100644 --- a/sessions/dplyr-joins.qmd +++ b/sessions/dplyr-joins.qmd @@ -292,8 +292,8 @@ import_multiple_files <- function(file_pattern, import_function) { recurse = TRUE ) - combined_data <- purrr::map(data_files, import_function) %>% - purrr::list_rbind(names_to = "file_path_id") %>% + combined_data <- purrr::map(data_files, import_function) %>% + purrr::list_rbind(names_to = "file_path_id") %>% extract_user_id() # Add the function here. return(combined_data) } @@ -319,18 +319,18 @@ to use `user_id` instead of `file_path_id`: summarised_rr_df <- rr_df %>% group_by(user_id, day) %>% summarise(across(ibi_s, list( - mean = ~ mean(.x, na.rm = TRUE), + mean = ~ mean(.x, na.rm = TRUE), sd = ~ sd(.x, na.rm = TRUE) - ))) %>% + ))) %>% ungroup() summarised_actigraph_df <- actigraph_df %>% group_by(user_id, day) %>% # These statistics will probably be different for you summarise(across(hr, list( - mean = ~ mean(.x, na.rm = TRUE), + mean = ~ mean(.x, na.rm = TRUE), sd = ~ sd(.x, na.rm = TRUE) - ))) %>% + ))) %>% ungroup() ``` From 02ed85987f997856d921bf40172cff3f15109077 Mon Sep 17 00:00:00 2001 From: "Luke W. Johnston" Date: Thu, 8 Jun 2023 10:57:02 +0200 Subject: [PATCH 18/18] less time for this reading task --- sessions/functionals.qmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sessions/functionals.qmd b/sessions/functionals.qmd index 05f4038..fdaeb92 100644 --- a/sessions/functionals.qmd +++ b/sessions/functionals.qmd @@ -773,7 +773,7 @@ have): ``` ::: callout-note -## Reading task: \~5 minutes +## Reading task: \~2 minutes This message talks about regrouping, and overriding based on the `.groups` argument. If we look in the help `?summarise`, at the