From 26ae14333c5574c36125680f03cde9fa6a632afd Mon Sep 17 00:00:00 2001
From: Pranav Anbarasu <pranavanba@gmail.com>
Date: Thu, 14 Mar 2024 18:26:25 +0000
Subject: [PATCH 1/2] Use .groups param in summarise() to keep established
 groupings

---
 scripts/process-data/fitbitsleeplogs.R | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/scripts/process-data/fitbitsleeplogs.R b/scripts/process-data/fitbitsleeplogs.R
index 126140a..cf4716f 100644
--- a/scripts/process-data/fitbitsleeplogs.R
+++ b/scripts/process-data/fitbitsleeplogs.R
@@ -163,7 +163,7 @@ numepisodes_df_alltime <-
          NumEpisodes = ifelse(!is.na(LogId), 1, NA)) %>% 
   group_by(ParticipantIdentifier, startdate2) %>% 
   select(ParticipantIdentifier, startdate2, NumEpisodes) %>% 
-  summarise(numeps = sum(NumEpisodes)) %>% 
+  summarise(numeps = sum(NumEpisodes), .groups = "keep") %>% 
   ungroup() %>% 
   group_by(ParticipantIdentifier) %>% 
   summarise(startdate = min(startdate2),
@@ -172,13 +172,14 @@ numepisodes_df_alltime <-
             variance = var(numeps, na.rm = T), 
             `5pct` = stats::quantile(as.numeric(numeps), 0.05, na.rm = T), 
             `95pct` = stats::quantile(as.numeric(numeps), 0.95, na.rm = T), 
-            numrecords = dplyr::n()) %>% 
+            numrecords = dplyr::n(),
+            .groups = "keep") %>% 
   ungroup() %>% 
   left_join(y = 
               df %>% 
               select(ParticipantIdentifier, EndDate) %>% 
               group_by(ParticipantIdentifier) %>% 
-              summarise(enddate = max(lubridate::as_date(EndDate))) %>% 
+              summarise(enddate = max(lubridate::as_date(EndDate)), .groups = "keep") %>% 
               ungroup(), 
             by = "ParticipantIdentifier") %>% 
   select(ParticipantIdentifier, startdate, enddate, tidyselect::everything())
@@ -197,7 +198,8 @@ numepisodes_df_weekly <-
             variance = var(NumEpisodes, na.rm = T), 
             `5pct` = stats::quantile(as.numeric(NumEpisodes), 0.05, na.rm = T), 
             `95pct` = stats::quantile(as.numeric(NumEpisodes), 0.95, na.rm = T), 
-            numrecords = dplyr::n()) %>% 
+            numrecords = dplyr::n(),
+            .groups = "keep") %>% 
   ungroup() %>% 
   rename(startdate = startdate3, enddate = enddate3) %>% 
   select(ParticipantIdentifier, startdate, enddate, tidyselect::everything())
@@ -219,8 +221,9 @@ numawakenings_logid_filtered <-
   filter(IsMainSleep==TRUE) %>% 
   group_by(LogId) %>% 
   summarise(NumAwakenings = sum(Value %in% c("wake", "awake") &
-                                     !(row_number() == 1 & Value %in% c("wake", "awake")) &
-                                     !(row_number() == n() & Value %in% c("wake", "awake")))) %>% 
+                                  !(row_number() == 1 & Value %in% c("wake", "awake")) &
+                                  !(row_number() == n() & Value %in% c("wake", "awake"))), 
+            .groups = "keep") %>% 
   ungroup()
 
 # Merge the original df with the numawakenings df to create a united df

From f7aa42a29630f32a736cd2c09aab6cf30767c37a Mon Sep 17 00:00:00 2001
From: Pranav Anbarasu <pranavanba@gmail.com>
Date: Thu, 14 Mar 2024 18:27:23 +0000
Subject: [PATCH 2/2] Order LogIds by StartDate before calculating
 NumAwakenings

---
 scripts/process-data/fitbitsleeplogs.R | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/scripts/process-data/fitbitsleeplogs.R b/scripts/process-data/fitbitsleeplogs.R
index cf4716f..8db6a14 100644
--- a/scripts/process-data/fitbitsleeplogs.R
+++ b/scripts/process-data/fitbitsleeplogs.R
@@ -214,7 +214,10 @@ sleeplogsdetails_df <-
   arrow::open_dataset(file.path(downloadLocation, "dataset_fitbitsleeplogs_sleeplogdetails")) %>% 
   select(all_of(sleeplogsdetails_vars)) %>% 
   collect() %>% 
-  left_join(y = (df %>% select(LogId, IsMainSleep)), by = "LogId")
+  left_join(y = (df %>% select(LogId, IsMainSleep)), by = "LogId") %>% 
+  group_by(LogId) %>% 
+  arrange(StartDate, .by_group = TRUE) %>% 
+  ungroup()
 
 numawakenings_logid_filtered <- 
   sleeplogsdetails_df %>%