From 26ae14333c5574c36125680f03cde9fa6a632afd Mon Sep 17 00:00:00 2001 From: Pranav Anbarasu Date: Thu, 14 Mar 2024 18:26:25 +0000 Subject: [PATCH 1/2] Use .groups param in summarise() to keep established groupings --- scripts/process-data/fitbitsleeplogs.R | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/scripts/process-data/fitbitsleeplogs.R b/scripts/process-data/fitbitsleeplogs.R index 126140a..cf4716f 100644 --- a/scripts/process-data/fitbitsleeplogs.R +++ b/scripts/process-data/fitbitsleeplogs.R @@ -163,7 +163,7 @@ numepisodes_df_alltime <- NumEpisodes = ifelse(!is.na(LogId), 1, NA)) %>% group_by(ParticipantIdentifier, startdate2) %>% select(ParticipantIdentifier, startdate2, NumEpisodes) %>% - summarise(numeps = sum(NumEpisodes)) %>% + summarise(numeps = sum(NumEpisodes), .groups = "keep") %>% ungroup() %>% group_by(ParticipantIdentifier) %>% summarise(startdate = min(startdate2), @@ -172,13 +172,14 @@ numepisodes_df_alltime <- variance = var(numeps, na.rm = T), `5pct` = stats::quantile(as.numeric(numeps), 0.05, na.rm = T), `95pct` = stats::quantile(as.numeric(numeps), 0.95, na.rm = T), - numrecords = dplyr::n()) %>% + numrecords = dplyr::n(), + .groups = "keep") %>% ungroup() %>% left_join(y = df %>% select(ParticipantIdentifier, EndDate) %>% group_by(ParticipantIdentifier) %>% - summarise(enddate = max(lubridate::as_date(EndDate))) %>% + summarise(enddate = max(lubridate::as_date(EndDate)), .groups = "keep") %>% ungroup(), by = "ParticipantIdentifier") %>% select(ParticipantIdentifier, startdate, enddate, tidyselect::everything()) @@ -197,7 +198,8 @@ numepisodes_df_weekly <- variance = var(NumEpisodes, na.rm = T), `5pct` = stats::quantile(as.numeric(NumEpisodes), 0.05, na.rm = T), `95pct` = stats::quantile(as.numeric(NumEpisodes), 0.95, na.rm = T), - numrecords = dplyr::n()) %>% + numrecords = dplyr::n(), + .groups = "keep") %>% ungroup() %>% rename(startdate = startdate3, enddate = enddate3) %>% select(ParticipantIdentifier, startdate, enddate, tidyselect::everything()) @@ -219,8 +221,9 @@ numawakenings_logid_filtered <- filter(IsMainSleep==TRUE) %>% group_by(LogId) %>% summarise(NumAwakenings = sum(Value %in% c("wake", "awake") & - !(row_number() == 1 & Value %in% c("wake", "awake")) & - !(row_number() == n() & Value %in% c("wake", "awake")))) %>% + !(row_number() == 1 & Value %in% c("wake", "awake")) & + !(row_number() == n() & Value %in% c("wake", "awake"))), + .groups = "keep") %>% ungroup() # Merge the original df with the numawakenings df to create a united df From f7aa42a29630f32a736cd2c09aab6cf30767c37a Mon Sep 17 00:00:00 2001 From: Pranav Anbarasu Date: Thu, 14 Mar 2024 18:27:23 +0000 Subject: [PATCH 2/2] Order LogIds by StartDate before calculating NumAwakenings --- scripts/process-data/fitbitsleeplogs.R | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/scripts/process-data/fitbitsleeplogs.R b/scripts/process-data/fitbitsleeplogs.R index cf4716f..8db6a14 100644 --- a/scripts/process-data/fitbitsleeplogs.R +++ b/scripts/process-data/fitbitsleeplogs.R @@ -214,7 +214,10 @@ sleeplogsdetails_df <- arrow::open_dataset(file.path(downloadLocation, "dataset_fitbitsleeplogs_sleeplogdetails")) %>% select(all_of(sleeplogsdetails_vars)) %>% collect() %>% - left_join(y = (df %>% select(LogId, IsMainSleep)), by = "LogId") + left_join(y = (df %>% select(LogId, IsMainSleep)), by = "LogId") %>% + group_by(LogId) %>% + arrange(StartDate, .by_group = TRUE) %>% + ungroup() numawakenings_logid_filtered <- sleeplogsdetails_df %>%