Skip to content

Commit

Permalink
Merge pull request #28 from Sage-Bionetworks/RMHDR-258-new-i2b2-measu…
Browse files Browse the repository at this point in the history
…res-for-2024Q3

Merge changes from PR #27
  • Loading branch information
pranavanba authored Jul 10, 2024
2 parents 0efc5fe + 09896df commit 2dc8bc6
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 12 deletions.
4 changes: 2 additions & 2 deletions config/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ default:

staging:
ontologyFileID: syn52050046
parquetDirID: syn59800646
parquetDirID: syn61250818
# dataset_name_filter: !expr c("fitbit","healthkit")
deleteExistingDir: FALSE
concept_replacements: !expr c("mins" = "minutes",
Expand All @@ -18,7 +18,7 @@ staging:
synFolderID: syn52504335
# method: sts
s3bucket: recover-main-project
s3basekey: main/archive/2024-05-21/
s3basekey: main/archive/2024-06-13/
downloadLocation: ./temp-parquet
selectedVarsFileID: syn53503994
outputConceptsDir: ./temp-output-concepts
Expand Down
2 changes: 1 addition & 1 deletion pipeline/run-pipeline.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Get config variables
list2env(x = config::get(file = "config/config.yml",
config = "prod"),
config = "staging"),
envir = .GlobalEnv)

# Fetch data
Expand Down
80 changes: 71 additions & 9 deletions scripts/process-data/fitbitsleeplogs.R
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,8 @@ numepisodes_df_weekly <-
rename(startdate = startdate3, enddate = enddate3) %>%
select(ParticipantIdentifier, startdate, enddate, tidyselect::everything())

# Use the sleeplogs_sleeplogdetails dataset to create the NumAwakenings derived variable
# Use the sleeplogs_sleeplogdetails dataset to create NumAwakenings,
# REM Onset Latency, and REM Fragmentation Index derived variables
sleeplogsdetails_vars <-
selected_vars %>%
filter(grepl("sleeplogdetails", Export, ignore.case = TRUE)) %>%
Expand All @@ -214,23 +215,78 @@ sleeplogsdetails_df <-
arrow::open_dataset(file.path(downloadLocation, "dataset_fitbitsleeplogs_sleeplogdetails")) %>%
select(all_of(sleeplogsdetails_vars)) %>%
collect() %>%
left_join(y = (df %>% select(LogId, IsMainSleep)), by = "LogId") %>%
group_by(LogId) %>%
distinct() %>%
left_join(y = (df %>% select(ParticipantIdentifier, LogId, IsMainSleep)),
by = join_by("LogId", "ParticipantIdentifier")) %>%
group_by(ParticipantIdentifier, LogId, id) %>%
arrange(StartDate, .by_group = TRUE) %>%
ungroup()

sleeplogsdetails_df_unique <- sleeplogsdetails_df[!duplicated(sleeplogsdetails_df),]

numawakenings_logid_filtered <-
sleeplogsdetails_df %>%
sleeplogsdetails_df_unique %>%
filter(IsMainSleep==TRUE) %>%
group_by(LogId) %>%
group_by(ParticipantIdentifier, LogId, id) %>%
summarise(NumAwakenings = sum(Value %in% c("wake", "awake") &
!(row_number() == 1 & Value %in% c("wake", "awake")) &
!(row_number() == n() & Value %in% c("wake", "awake"))),
.groups = "keep") %>%
ungroup()
ungroup() %>%
select(ParticipantIdentifier, LogId, NumAwakenings)

regex_wake <- stringr::regex("wake|awake", ignore_case = TRUE)

regex_rem <- stringr::regex("rem", ignore_case = TRUE)

# Merge the original df with the numawakenings df to create a united df
df_joined <- left_join(x = df, y = numawakenings_logid_filtered, by = "LogId")
rem_onset_latency <-
sleeplogsdetails_df_unique %>%
filter(if_any(c(StartDate, EndDate, Value, Type), ~ . != "")) %>%
filter(IsMainSleep==TRUE) %>%
group_by(ParticipantIdentifier, LogId, id) %>%
arrange(StartDate, .by_group = TRUE) %>%
mutate(
firstNonWake = ifelse(
stringr::str_detect(Value, regex_wake, negate = TRUE) & cumsum(stringr::str_detect(Value, regex_wake, negate = TRUE))==1,
TRUE,
FALSE),
firstREM = ifelse(
stringr::str_detect(Value, regex_rem) & cumsum(stringr::str_detect(Value, regex_rem))==1,
TRUE,
FALSE)) %>%
summarise(
remOnsetLatency = difftime(
StartDate[firstREM] %>% first() %>% lubridate::ymd_hms(),
StartDate[firstNonWake] %>% first() %>% lubridate::ymd_hms(),
units = "secs")) %>%
ungroup() %>%
select(ParticipantIdentifier, LogId, remOnsetLatency)

rem_transitions <-
sleeplogsdetails_df_unique %>%
filter(if_any(c(StartDate, EndDate, Value, Type), ~ . != "")) %>%
filter(IsMainSleep==TRUE) %>%
group_by(ParticipantIdentifier, LogId, id) %>%
arrange(StartDate, .by_group = TRUE) %>%
mutate(prevValue = dplyr::lag(Value, 1)) %>%
filter(prevValue == "rem" & Value != "rem") %>%
summarise(remTransitions = n())

rem_fragmentation_index <-
rem_transitions %>%
left_join(y = (df %>% select(ParticipantIdentifier, LogId, SleepLevelRem)),
by = join_by("ParticipantIdentifier", "LogId")) %>%
filter(SleepLevelRem > 0) %>%
mutate(SleepLevelRem = as.numeric(SleepLevelRem),
remFragmentationIndex = remTransitions/(SleepLevelRem/60)) %>%
select(ParticipantIdentifier, LogId, remFragmentationIndex)

# Merge the original df with the numawakenings, remOnsetLatency, and
# remFragmentationIndex dfs to create a united df
df_joined <-
left_join(x = df, y = numawakenings_logid_filtered, by = join_by("ParticipantIdentifier", "LogId")) %>%
left_join(y = rem_onset_latency, by = join_by("ParticipantIdentifier", "LogId")) %>%
left_join(y = rem_fragmentation_index, by = join_by("ParticipantIdentifier", "LogId"))

colnames(df_joined) <- tolower(colnames(df_joined))

Expand All @@ -252,7 +308,8 @@ approved_concepts_summarized <-
excluded_concepts
)

df_joined[approved_concepts_summarized] <- lapply(df_joined[approved_concepts_summarized], as.numeric)
df_joined[approved_concepts_summarized] <-
lapply(df_joined[approved_concepts_summarized], as.numeric)

# Get QA/QC ranges for variables and exclude values outside the ranges
bounds <-
Expand Down Expand Up @@ -389,6 +446,11 @@ rm(sleeplogs_stat_summarize,
sleeplogsdetails_vars,
sleeplogsdetails_df,
numawakenings_logid_filtered,
regex_wake,
regex_rem,
rem_onset_latency,
rem_transitions,
rem_fragmentation_index,
df_joined,
excluded_concepts,
approved_concepts_summarized,
Expand Down

0 comments on commit 2dc8bc6

Please sign in to comment.