diff --git a/DESCRIPTION b/DESCRIPTION index 1a82db1..1a8c11a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -50,7 +50,8 @@ Imports: waiter, tibble, digest, - lifecycle + lifecycle, + methods Suggests: testthat (>= 3.0.0), knitr, @@ -62,7 +63,8 @@ Suggests: haven, lintr, xml2, - here + here, + readr VignetteBuilder: knitr Config/testthat/edition: 3 Depends: diff --git a/NAMESPACE b/NAMESPACE index f7fe2b1..de6cc1b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -15,6 +15,7 @@ importFrom(dplyr,distinct) importFrom(dplyr,filter) importFrom(dplyr,group_by) importFrom(dplyr,mutate) +importFrom(dplyr,rename_with) importFrom(dplyr,select) importFrom(dplyr,ungroup) importFrom(magrittr,"%>%") @@ -50,14 +51,20 @@ importFrom(shiny,uiOutput) importFrom(stats,aggregate) importFrom(stringi,stri_wrap) importFrom(stringr,str_c) +importFrom(stringr,str_count) +importFrom(stringr,str_detect) +importFrom(stringr,str_extract) +importFrom(stringr,str_remove) importFrom(stringr,str_remove_all) importFrom(stringr,str_replace) importFrom(stringr,str_replace_all) importFrom(stringr,str_starts) +importFrom(stringr,str_trim) importFrom(tibble,tibble) importFrom(tidyr,all_of) importFrom(tidyr,complete) importFrom(tidyr,pivot_wider) +importFrom(tidyr,separate) importFrom(utils,capture.output) importFrom(utils,getParseData) importFrom(utils,lsf.str) diff --git a/NEWS.md b/NEWS.md index a954efe..82c0b52 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,9 +1,11 @@ + # logrx 0.3.0 - Moved website theme to bootstrap 5, enabled search (#179) - Add `show_repo_url` option in `axecute()` to capture repo URL(s) into log file (#167) - Moved website theme to Bootstrap 5, enabled search (#179) - Add `include_rds` argument to `axecute()` to export log as rds file +- Add `read_log_file()` to read logrx log file as optional function - Add `library_call_linter()` to ensure all library calls are at the top of the script (#163) - Remove argument for remove_log_object from `axecute()` still accessible via `log_write()` (#182) - Added functionality so `axecute()` works with `.Rmd` files (#140) diff --git a/R/axecute.R b/R/axecute.R index 62a18cd..07a6678 100644 --- a/R/axecute.R +++ b/R/axecute.R @@ -49,7 +49,7 @@ axecute <- function(file, log_name = NA, show_repo_url = FALSE, ...){ # deprecations - if (hasArg(remove_log_object)) { + if (methods::hasArg(remove_log_object)) { lifecycle::deprecate_stop("0.3.0", "axecute(remove_log_object = )", "axecute(include_rds = )") } diff --git a/R/read_log_file.R b/R/read_log_file.R new file mode 100644 index 0000000..ca6f5fe --- /dev/null +++ b/R/read_log_file.R @@ -0,0 +1,297 @@ +#' Reformat subsections in log lines +#' +#' @param log_txt String vector. Object with log text lines +#' +#' @importFrom stringr str_detect +#' @importFrom stringr str_count +#' @importFrom stringr str_remove +#' +#' @return tibble that ensures formatted subsections +#' +#' @examples +#' \dontrun{ +#' reformat_subsections(readlines(log_file_path)) +#' } +#' +#' @noRd +#' +reformat_subsections <- function(log_txt) { + adj_log_txt <- c() + for (i in log_txt) { + adj_tf <- stringr::str_detect( + i, + "Errors:|Warnings:|Messages:|Output:|Result:" + ) + if (adj_tf) { + nrem <- stringr::str_count(i) + i <- stringr::str_remove(i, ":") + i <- + paste("-", i, paste(rep("-", 54 - nrem), collapse = ""), + collapse = "" + ) + } + adj_log_txt <- c(adj_log_txt, i) + } + return(adj_log_txt) +} + +#' Nest sections in log lines vector +#' +#' @param adj_log_txt String vector. Object with formatted log text lines +#' +#' @importFrom stringr str_remove_all +#' +#' @return list that includes nested log sections +#' +#' @noRd +#' +nest_sections <- function(adj_log_txt) { + sect_headers <- c() + sect_status <- FALSE + sect_info <- list() + for (i in adj_log_txt) { + if (i == paste(rep("-", 80), collapse = "")) { + sect_status <- !sect_status + } else if (sect_status == TRUE) { + sect_headers <- c(sect_headers, i) + } else { + cur_pos <- length(sect_headers) + if (length(sect_info) == cur_pos) { + sect_info[[cur_pos]] <- c(sect_info[[cur_pos]], i) + } else { + sect_info[[cur_pos]] <- i + } + } + } + sect_headers <- + stringr::str_remove_all(sect_headers, "-?\\s{3,}-?") + names(sect_info) <- sect_headers + + return(sect_info) +} + +#' Nest subsections in log lines vector +#' +#' @param adj_log_txt String vector. Object with formatted log text lines +#' @param sect_info String vector. Object with nested sections +#' +#' @importFrom stringr str_extract +#' @importFrom stringr str_trim +#' @importFrom stringr str_remove_all +#' +#' @return list that includes nested log subsections +#' +#' @noRd +#' +nest_subsections <- function(adj_log_txt, sect_info) { + subsect_headers <- stats::na.omit( + stringr::str_extract(adj_log_txt, "\\-\\s\\w+\\s(\\w+\\s)?\\-{3,70}") + ) + subset_sections <- function(section) { + subsect_status <- FALSE + subsect_info <- list() + for (i in section) { + if (i %in% subsect_headers) { + latest_subsect <- stringr::str_trim( + stringr::str_remove_all(i, "\\-") + ) + subsect_status <- TRUE + } else if (subsect_status) { + subsect_info[[latest_subsect]] <- + c(subsect_info[[latest_subsect]], i) + } else { + subsect_info <- c(subsect_info, i) + } + } + subsect_info + } + nested_log <- lapply(sect_info, subset_sections) + return(nested_log) +} + +#' Nest sections and subsections in log lines vector +#' +#' @param adj_log_txt String vector. Object with formatted log text lines +#' +#' @return list that includes nested log sections and subsections +#' +#' @noRd +#' +nest_log <- function(adj_log_txt) { + nest_subsections( + adj_log_txt, + nest_sections(adj_log_txt) + ) +} + +#' Parse nested log list to tibbles for object where appropriate +#' +#' @param nested_log String vector. +#' Object with nested log output (from `nest_log()`) +#' +#' @importFrom tibble tibble +#' @importFrom tidyr separate +#' @importFrom stringr str_replace_all +#' @importFrom dplyr rename_with +#' @importFrom dplyr mutate +#' +#' @return list with objects coerced as tibbles +#' +#' @noRd +#' +parse_log <- function(nested_log) { + if (!requireNamespace("readr", quietly = TRUE)) { + warning(strwrap("Install the readr package to use log parsing feature.", + prefix = " ", initial = "")) + return(list()) + } + + parsed_log <- nested_log + + if ("logrx Metadata" %in% names(nested_log)) { + parsed_log$`logrx Metadata` <- + nested_log$`logrx Metadata` %>% + unlist() %>% + tibble::tibble() %>% + tidyr::separate(".", + sep = "\\: ", + into = c("Variable", "Value"), + extra = "merge" + ) + } + + if ("User and File Information" %in% names(nested_log)) { + parsed_log$`User and File Information` <- + nested_log$`User and File Information` %>% + unlist() %>% + stringr::str_trim() %>% + tibble::tibble() %>% + tidyr::separate(".", + sep = "\\: ", + into = c("Variable", "Value") + ) + } + + if ("Session Information" %in% names(nested_log)) { + parsed_log$`Session Information`$`Session info` <- + nested_log$`Session Information`$`Session info` %>% + unlist() %>% + stringr::str_trim() %>% + tibble::tibble() %>% + tidyr::separate(".", + sep = "\\s", + into = c("setting", "value"), + extra = "merge", + ) %>% + dplyr::mutate(dplyr::across(dplyr::where(is.character), stringr::str_trim)) + + parsed_log$`Session Information`$`Packages` <- + nested_log$`Session Information`$`Packages` %>% + # remove indicator whether the package is attached to the search path + stringr::str_replace_all("\\*", " ") %>% + # account for loaded packages due to load_all() + stringr::str_replace_all(" P ", " ") %>% + readr::read_table(skip = 1, col_names = FALSE) %>% + dplyr::rename_with(~ c( + "package", + "version", + "date", + "lib", + "source", + "lang", + "r_version" + )) %>% + dplyr::mutate( + lang = stringr::str_remove(lang, "\\("), + r_version = stringr::str_remove(r_version, "\\)") + ) + + parsed_log$`Session Information`$`External software` <- + nested_log$`Session Information`$`External software` %>% + stringr::str_trim() %>% + tibble::tibble() %>% + tidyr::separate(".", + sep = "\\s", + into = c("setting", "value"), + extra = "merge", + ) %>% + dplyr::mutate(dplyr::across(dplyr::where(is.character), stringr::str_trim)) + } + + if ("Masked Functions" %in% names(nested_log)) { + parsed_log$`Masked Functions` <- + nested_log$`Masked Functions` %>% + unlist() %>% + tibble::tibble("Masked Functions" = .) + } + + if ("Used Package and Functions" %in% names(nested_log)) { + parsed_log$`Used Package and Functions` <- + nested_log$`Used Package and Functions` %>% + unlist() %>% + tibble::tibble() %>% + tidyr::separate(".", + sep = "\\} ", + into = c("library", "function_names") + ) %>% + dplyr::mutate(library = stringr::str_remove(library, "\\{")) + } + + if ("Program Run Time Information" %in% names(nested_log)) { + parsed_log$`Program Run Time Information` <- + nested_log$`Program Run Time Information` %>% + unlist() %>% + tibble::tibble() %>% + tidyr::separate(".", + sep = "\\: ", + into = c("Variable", "Value") + ) + } + + if ("Log Output File" %in% names(nested_log)) { + parsed_log$`Log Output File` <- + nested_log$`Log Output File` %>% + unlist() %>% + tibble::tibble() %>% + tidyr::separate(".", + sep = "\\: ", + into = c("Variable", "Value") + ) + } + + return(parsed_log) +} + +#' Read and parse logrx file +#' +#' @param file String. Path to a logrx log file +#' +#' @return Tibble. Object that includes nested and parsed content +#' +#' @examples +#' \dontrun{ +#' read_log_file(previous_log_filepath) +#' } +#' +read_log_file <- function(file) { + if (!file.exists(file)) { + stop("Path does not exist:", sQuote(file)) + } + + if (!requireNamespace("readr", quietly = TRUE)) { + warning(strwrap("Install the readr package to use log parsing feature.", + prefix = " ", initial = "")) + return(list()) + } + + con <- file(file.path(file), "r") + flines <- readLines(con) + close(con) + + parsed_log <- flines %>% + reformat_subsections() %>% + nest_log() %>% + parse_log() + + return(parsed_log) +} diff --git a/_pkgdown.yml b/_pkgdown.yml index e9bc99b..3352e1f 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -35,6 +35,10 @@ reference: - write_log_header - write_unapproved_functions - write_used_functions +- title: Read Log + desc: Functionality to Read Log Files +- contents: + - read_log_file - title: Utilities desc: Utility functions - contents: diff --git a/man/read_log_file.Rd b/man/read_log_file.Rd new file mode 100644 index 0000000..6a2d2e6 --- /dev/null +++ b/man/read_log_file.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/read_log_file.R +\name{read_log_file} +\alias{read_log_file} +\title{Read and parse logrx file} +\usage{ +read_log_file(file) +} +\arguments{ +\item{file}{String. Path to a logrx log file} +} +\value{ +Tibble. Object that includes nested and parsed content +} +\description{ +Read and parse logrx file +} +\examples{ +\dontrun{ +read_log_file(previous_log_filepath) +} + +} diff --git a/tests/testthat/test-parse.R b/tests/testthat/test-parse.R new file mode 100644 index 0000000..6a33b42 --- /dev/null +++ b/tests/testthat/test-parse.R @@ -0,0 +1,53 @@ +test_that("read_log_file will parse a logrx log file and create the necessary object", { + skip_if_not_installed("readr") + options("log.rx" = NULL) + scriptPath <- tempfile() + logDir <- tempdir() + writeLines("print('hello logrx')", con = scriptPath) + + # check no log is currently written out + filePath <- file.path(logDir, "log_out_parse") + expect_warning(expect_error(file(filePath, "r"), "cannot open the connection")) + + axecute(scriptPath, log_name = "log_out_parse", log_path = logDir) + + # check that the log file can be parsed + parsedFile <- read_log_file(filePath) +'' + expect_length(parsedFile, 9) + expect_named( + parsedFile, + c( + "logrx Metadata", + "User and File Information", + "Session Information", + "Masked Functions", + "Used Package and Functions", + "Program Run Time Information", + "Errors and Warnings", + "Messages, Output, and Result", + "Log Output File" + ) + ) + expect_true(all(sapply( + parsedFile[!names(parsedFile) %in% + c("Session Information", + "Messages, Output, and Result", + "Errors and Warnings")], + is.data.frame + ))) + + expect_true( + all(sapply( + parsedFile[!names(parsedFile) %in% + c("Session Information", + "Messages, Output, and Result", + "Errors and Warnings")], + nrow + ) > 0) + ) + + # remove all the stuff we added + rm(scriptPath, logDir, parsedFile) + log_remove() +})