update quality dashboard to include clean extracts for failed steps

rpodcast · May 9, 2024 · 55eb7e2 · 55eb7e2
1 parent 96efe20
commit 55eb7e2
Show file tree

Hide file tree

Showing 3 changed files with 73 additions and 23 deletions.
diff --git a/R/fct_tables.R b/R/fct_tables.R
@@ -61,14 +61,17 @@ pointblank_table <- function(pointblank_object, report_date, extracts = NULL) {
         TRUE ~ "darkgreen"
       )
     ) |>
-    select(label, n, all_passed, proc_duration_s, n_passed, n_failed, f_passed, f_failed, step_id, assertion_type, all_passed_colors, all_passed_icons, f_passed_colors, i)
+    select(label, brief, n, all_passed, proc_duration_s, n_passed, n_failed, f_passed, f_failed, step_id, assertion_type, all_passed_colors, all_passed_icons, f_passed_colors, i)
 
   tbl <- reactable::reactable(
     df,
     details = function(index) {
       all_passed <- dplyr::slice(df, index) |>
         dplyr::pull(all_passed)
 
+      preprocessing_note <- dplyr::slice(df, index) |>
+        dplyr::pull(brief)
+
       if (!all_passed) {
         step_index <- dplyr::slice(df, index) |>
           dplyr::pull(i) |>
@@ -81,31 +84,41 @@ pointblank_table <- function(pointblank_object, report_date, extracts = NULL) {
           dplyr::pull(assertion_type)
 
         if (!is.null(extracts)) {
-          extract_df <- extracts[[step_index]]
+          extract_df <- extracts[[step_id]]
         } else {
           extract_df <- get_data_extracts(pointblank_object, step_index)
         }
 
-        if (assertion_type == "rows_distinct") {
-          reactable(
-            dplyr::distinct(extract_df),
-            filterable = TRUE
-          )
-        } else {
-          record_detail_table(
-            extract_df,
-            preprocess = FALSE,
-            nrow = 1000
-            #clean_podcast_df(extract_df)
-          )
-        }
+        #reactable::reactable(extract_df)
+        record_detail_table(
+          extract_df,
+          preprocess = FALSE,
+          nrow = 1000,
+          preprocessing_note = preprocessing_note
+        )
+
+        # if (assertion_type == "rows_distinct") {
+        #   reactable(
+        #     dplyr::distinct(extract_df),
+        #     filterable = TRUE
+        #   )
+        # } else {
+        #   record_detail_table(
+        #     extract_df,
+        #     preprocess = FALSE,
+        #     nrow = 1000,
+        #     preprocessing_note = preprocessing_note
+        #     #clean_podcast_df(extract_df)
+        #   )
+        # }
       }
     },
     columns = list(
       label = colDef(
         name = "Assessment",
         width = 350
       ),
+      brief = colDef(show = FALSE),
       all_passed = colDef(
         name = "Passed",
         width = 150,
@@ -288,7 +301,7 @@ record_analysis_table <- function(df, podcast_dup_df, report_date) {
   return(tbl)
 }
 
-record_detail_table <- function(df, preprocess = TRUE, nrow = NULL) {
+record_detail_table <- function(df, preprocess = TRUE, nrow = NULL, preprocessing_note = NULL) {
   if (preprocess) {
     # obtain categories df
     cat_df <- gen_categories_df(df)
@@ -323,6 +336,7 @@ record_detail_table <- function(df, preprocess = TRUE, nrow = NULL) {
     df,
     defaultColDef = colDef(vAlign = "center", headerClass = "header"),
     columns = list(
+      #imageUrl = colDef(show = FALSE),
       imageUrl = colDef(
         name = NULL,
         maxWidth = 70,
@@ -498,5 +512,6 @@ record_detail_table <- function(df, preprocess = TRUE, nrow = NULL) {
     ),
     theme = podcast_db_theme()
     #elementId = paste0('detail-table-', record_group)
-  )
+  ) |>
+    reactablefmtr::add_title(title = preprocessing_note)
 }
diff --git a/R/utils.R b/R/utils.R
@@ -21,6 +21,35 @@ podcastdb_pointblank_object <- function(url, dev_mode = FALSE) {
   return(res)
 }
 
+get_pointblank_data_extracts <- function(pointblank_object, exports_root_path, dev_mode = FALSE) {
+  meta_df <- pointblank_object$validation_set
+
+  failed_i_values <- meta_df |>
+    dplyr::filter(!all_passed) |>
+    dplyr::pull(i)
+
+  failed_step_id_values <- meta_df |>
+    dplyr::filter(!all_passed) |>
+    dplyr::pull(step_id)
+
+  extract_list <- purrr::map(failed_step_id_values, ~{
+    if (dev_mode) {
+      tmp_file <- glue::glue("dev_files/{.x}.rds")
+    } else {
+      tmp_file <- tempfile(pattern = .x)
+      download.file(
+        url = paste0(url, glue::glue("/{.x}.rds")),
+        destfile = tmp_file
+      )
+    }
+    res <- readRDS(tmp_file)
+    return(res)
+  })
+
+  names(extract_list) <- failed_step_id_values
+  return(extract_list)
+}
+
 podcastdb_dupdf_object <- function(url, dev_mode = FALSE) {
   if (dev_mode) {
     tmp_file <- "dev_files/podcast_dup_df.rds"

diff --git a/index.qmd b/index.qmd
@@ -17,13 +17,14 @@ format:
 execute: 
   echo: false
 params:
-  version: "1.0.0"
+  version: "1.1.0"
   dev_mode: false
   pointblank_object_path: "https://podcast20-projects.us-east-1.linodeobjects.com/exports/podcastdb_pointblank_object/podcastdb_pointblank_object"
   podcast_dup_df_path: "https://podcast20-projects.us-east-1.linodeobjects.com/exports/podcast_dup_df.rds"
   podcast_analysis_df_path: "https://podcast20-projects.us-east-1.linodeobjects.com/exports/analysis_metrics_df.rds"
   podcast_timestamp_path: "https://podcast20-projects.us-east-1.linodeobjects.com/exports/job_timestamp.txt"
   podcast_log_path: "https://podcast20-projects.us-east-1.linodeobjects.com/logs/"
+  exports_root_path: "https://podcast20-projects.us-east-1.linodeobjects.com/exports/"
 ---
 
 ```{css}
@@ -118,12 +119,17 @@ record_analysis_table(analysis_metrics_df, podcast_dup_df, report_date = podcast
 
 ```{r}
 #| label: process-pointblank-extracts
-pb_extracts <- get_data_extracts(pointblank_object)
-pb_extracts <- purrr::map_at(
-  pb_extracts,
-  c('1', '3', '4', '7'),
-  ~clean_podcast_df(.x, dev_mode = params$dev_mode)
+#pb_extracts <- get_data_extracts(pointblank_object)
+pb_extracts <- get_pointblank_data_extracts(
+  pointblank_object,
+  exports_root_path = params$exports_root_path,
+  dev_mode = params$dev_mode
 )
+# pb_extracts <- purrr::map_at(
+#   pb_extracts,
+#   c('1', '3', '4', '7'),
+#   ~clean_podcast_df(.x, dev_mode = params$dev_mode)
+# )
 ```
 
 ::: {.card title="Validation Summary"}