diff --git a/DESCRIPTION b/DESCRIPTION index 443dd75..0b4140f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: datasetjson Type: Package Title: Read and Write CDISC Dataset JSON Files -Version: 0.0.1 +Version: 0.1.0 Authors@R: c( person(given = "Mike", family = "Stackhouse", @@ -24,8 +24,8 @@ Roxygen: list(markdown = TRUE) RoxygenNote: 7.2.3 Depends: R (>= 3.5) Imports: - jsonlite (>= 1.8.7), - jsonvalidate (>= 1.3.2) + jsonlite (>= 1.8.0), + jsonvalidate (>= 1.3.1) Suggests: testthat (>= 2.1.0), knitr, diff --git a/NEWS.md b/NEWS.md index 94cf0c6..a0c92c1 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,12 @@ -# datasetjson 0.0.0.9999 +# datasetjson 0.1.0 + +- Capability to read and validate Dataset JSON files from URLs has been added (#8) +- Remove autoset of fileOID using output path (#3) +- Don't auto-populate optional attributes with NA (#16) +- Push dependency versions back (#18) +- Default `pretty` parameter on `write_dataset_json()` to false (#20) + +# datasetjson 0.0.1 + +Initial development version of datasetjson, introducing core objects, readers and writers. -Intial development version of datasetjson, introducing core objects, readers and writers. diff --git a/R/data_metadata.R b/R/data_metadata.R index 14e62a2..0bf84d2 100644 --- a/R/data_metadata.R +++ b/R/data_metadata.R @@ -21,7 +21,7 @@ #' data_meta_updated <- set_metadata_version(data_meta_updated, "MDV.MSGv2.0.SDTMIG.3.3.SDTM.1.7") #' data_meta_updated <- set_study_oid(data_meta_updated, "SOMESTUDY") #' -data_metadata <- function(study = "NA", metadata_version = "NA", metadata_ref = "NA") { +data_metadata <- function(study = NULL, metadata_version = NULL, metadata_ref = NULL) { x <- list( studyOID = study, diff --git a/R/file_metadata.R b/R/file_metadata.R index 12592be..10a2f1f 100644 --- a/R/file_metadata.R +++ b/R/file_metadata.R @@ -7,6 +7,8 @@ #' file." #' @param sys_version sourceSystemVersion, defined as "The version of the #' sourceSystem" +#' @param file_oid fileOID parameter, defined as "A unique identifier for this +#' file." #' @param version Dataset JSON schema version being used #' #' @return file_metadata object @@ -26,17 +28,17 @@ #' file_meta_updated <- set_file_oid(file_meta, "/some/path") #' file_meta_updated <- set_originator(file_meta_updated, "Some Org") #' file_meta_updated <- set_source_system(file_meta_updated, "source system", "1.0") -file_metadata <- function(originator="NA", sys = "NA", sys_version = "NA", version = "1.0.0") { +file_metadata <- function(originator=NULL, sys = NULL, sys_version = NULL, file_oid = NULL, version = "1.0.0") { if (!(version %in% c("1.0.0"))) { stop("Unsupported version specified - currently only version 1.0.0 is supported", call.=FALSE) } x <- list( - "creationDateTime"= get_datetime(), + "creationDateTime"= character(), "datasetJSONVersion"= version, - "fileOID" = character(), - "asOfDateTime" = character(), + "fileOID" = file_oid, + "asOfDateTime" = NULL, # Not sure we want this to exist? "originator" = originator, "sourceSystem" = sys, "sourceSystemVersion" = sys_version @@ -59,10 +61,20 @@ get_datetime <- function() { format(Sys.time(), "%Y-%m-%dT%H:%M:%S") } -#' Set source system information +#' File Metadata Setters +#' +#' Set information about the file and source system used to generate the Dataset +#' JSON object. +#' +#' @details #' -#' Set information about the source system used to generate the Dataset JSON -#' object. +#' The fileOID parameter should be structured following description outlined in +#' the ODM V2.0 specification. "FileOIDs should be universally unique if at all +#' possible. One way to ensure this is to prefix every FileOID with an internet +#' domain name owned by the creator of the ODM file or database (followed by a +#' forward slash, "/"). For example, +#' FileOID="BestPharmaceuticals.com/Study5894/1" might be a good way to denote +#' the first file in a series for study 5894 from Best Pharmaceuticals." #' #' @param x datasetjson object #' @param sys sourceSystem parameter, defined as "The computer system or diff --git a/R/read_dataset_json.R b/R/read_dataset_json.R index 8406899..5f5fa2b 100644 --- a/R/read_dataset_json.R +++ b/R/read_dataset_json.R @@ -1,9 +1,10 @@ #' Read a Dataset JSON to datasetjson object #' -#' This function validated a dataset JSON file on disk against the Dataset JSON schema, and if valid -#' returns a datasetjson object +#' This function validates a dataset JSON file against the Dataset JSON schema, +#' and if valid returns a datasetjson object. The Dataset JSON file can be +#' either a file path on disk of a URL which contains the Dataset JSON file. #' -#' @param file File path on disk, or a pre-loaded Dataset JSON file in a single element character string +#' @param file File path or URL of a Dataset JSON file #' #' @return datasetjson object #' @export @@ -12,6 +13,8 @@ #' # Read from disk #' \dontrun{ #' dat <- read_dataset_json("path/to/file.json") +#' # Read file from URL +#' dat <- dataset_json('https://www.somesite.com/file.json') #' } #' #' # Read from an already imported character vector @@ -19,8 +22,15 @@ #' js <- write_dataset_json(ds_json) #' dat <- read_dataset_json(js) read_dataset_json <- function(file) { + + if (path_is_url(file)) { + file_contents <- read_from_url(file) + } else { + file_contents <- readLines(file) + } + # Validate the input file against the schema - valid <- jsonvalidate::json_validate(file, schema_1_0_0, engine="ajv") + valid <- jsonvalidate::json_validate(file_contents, schema = schema_1_0_0, engine="ajv") if (!valid) { stop(paste0(c("Dataset JSON file is invalid per the JSON schema. ", @@ -29,7 +39,7 @@ read_dataset_json <- function(file) { } # Read the file and convert to datasetjson object - ds_json <- jsonlite::fromJSON(file) + ds_json <- jsonlite::fromJSON(file_contents) # Pull the object out with a lot of assumptions because the format has already # been validated diff --git a/R/utils.R b/R/utils.R index 30d335e..650ed21 100644 --- a/R/utils.R +++ b/R/utils.R @@ -54,3 +54,64 @@ set_col_attr <- function(nm, d, attr, items) { attr(x, attr) <- items[items$name == nm,][[attr]] x } + +#' Get the index of nulls in a list +#' +#' @param x A list +#' +#' @return Integer vector of indices +#' @noRd +get_null_inds <- function(x) { + which(vapply(x, is.null, FUN.VALUE = TRUE)) +} + +#' Remove nulls from a Dataset JSON object +#' +#' Only targets the file and data metadata to pull off optional elements +#' +#' @param x A Dataset JSON object +#' +#' @return A Dataset JSON object +#' @noRd +remove_nulls <- function(x) { + + # Specifically target the data metadata + dm_nulls <- get_null_inds(x[[get_data_type(x)]]) + if (length(dm_nulls) > 0) { + x[[get_data_type(x)]] <- x[[get_data_type(x)]][-dm_nulls] + } + + # Top level + fm_nulls <- get_null_inds(x) + if (length(fm_nulls) > 0) { + x <- x[-fm_nulls] + } + + x +} + +#' Check if given path is a URL +#' +#' @param path character string +#' +#' @return Boolean +#' @noRd +path_is_url <- function(path) { + grepl("^((http|ftp)s?|sftp|file)://", path) +} + +#' Read data from a URL +#' +#' This function will let you pull data that's provided from a simple curl of a +#' URL +#' +#' @param path valid URL string +#' +#' @return Contents of URL +#' @noRd +read_from_url <- function(path) { + con <- url(path, method = "libcurl") + x <- readLines(con, warn=FALSE) # the EOL warning shouldn't be a problem for readers + close(con) + x +} diff --git a/R/validate_dataset_json.R b/R/validate_dataset_json.R index c511bfe..ae37d0c 100644 --- a/R/validate_dataset_json.R +++ b/R/validate_dataset_json.R @@ -1,10 +1,11 @@ #' Validate a Dataset JSON file #' -#' This function calls `jsonvalidate::json_validate()` directly, with the parameters -#' necessary to retrieve the error information of an invalid JSON file per the -#' Dataset JSON schema. +#' This function calls `jsonvalidate::json_validate()` directly, with the +#' parameters necessary to retrieve the error information of an invalid JSON +#' file per the Dataset JSON schema. #' -#' @param x Path to a Dataset JSON file or a character vector holding JSON text +#' @param x File path or URL of a Dataset JSON file, or a character vector +#' holding JSON text #' #' @return A data frame #' @export @@ -13,6 +14,7 @@ #' #' \dontrun{ #' validate_dataset_json('path/to/file.json') +#' validate_dataset_json('https://www.somesite.com/file.json') #' } #' #' ds_json <- dataset_json(iris, "IG.IRIS", "IRIS", "Iris", iris_items) @@ -20,7 +22,14 @@ #' #' validate_dataset_json(js) validate_dataset_json <- function(x) { - v <- jsonvalidate::json_validate(x, schema_1_0_0, engine="ajv", verbose=TRUE) + # If contents are a URL then pull out the content + if (path_is_url(x)) { + js <- read_from_url(x) + } else { + js <- x + } + + v <- jsonvalidate::json_validate(js, schema_1_0_0, engine="ajv", verbose=TRUE) if (!v) { warning("File contains errors!") return(attr(v, 'errors')) diff --git a/R/write_dataset_json.R b/R/write_dataset_json.R index 9674751..95f3afb 100644 --- a/R/write_dataset_json.R +++ b/R/write_dataset_json.R @@ -16,22 +16,19 @@ #' \dontrun{ #' write_dataset_json(ds_json, "path/to/file.json") #' } -write_dataset_json <- function(x, file, pretty=TRUE) { +write_dataset_json <- function(x, file, pretty=FALSE) { stopifnot_datasetjson(x) - # Populate the as-of datetime - x[['asOfDateTime']] <- get_datetime() + # Populate the creation datetime + x[['creationDateTime']] <- get_datetime() + + x <- remove_nulls(x) if (!missing(file)) { # Make sure the output path exists if(!dir.exists(dirname(file))) { stop("Folder supplied to `file` does not exist", call.=FALSE) } - - # Attach the file OID - x <- set_file_oid(x, tools::file_path_sans_ext(file)) - } else{ - x <- set_file_oid(x, "NA") } # Create the JSON text diff --git a/README.Rmd b/README.Rmd index 1d93186..7a00480 100644 --- a/README.Rmd +++ b/README.Rmd @@ -7,11 +7,7 @@ knitr::opts_chunk$set(echo = TRUE) library(datasetjson) ``` -# datasetjson - - - - +# **datasetjson** [](https://app.codecov.io/gh/atorus-research/datasetjson) @@ -67,7 +63,7 @@ write_dataset_json(ds_updated, file = "./iris.json") Or if you don't provide a file path, the JSON text will return directly. ```{r write_print} -js_text <- write_dataset_json(ds_updated) +js_text <- write_dataset_json(ds_updated, pretty=TRUE) cat(js_text) ``` @@ -87,6 +83,8 @@ print(attr(dat$Sepal.Width, "type")) ``` Note that Dataset JSON is an early CDISC standard and is still subject to change, as as such this package will be updated. Backwards compatibility will be enforced once the standard itself is more stable. Until then, it is not recommended to use this package within production activities. +# [](https://www.cdisc.org/) + ## Acknowledgements Thank you to Ben Straub and Eric Simms (GSK) for help and input during the original CDISC Dataset JSON hackathon that motivated this work. diff --git a/README.md b/README.md index 73e368c..60ac649 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,6 @@ -# datasetjson +# **datasetjson** - - - [](https://app.codecov.io/gh/atorus-research/datasetjson) @@ -73,15 +70,14 @@ write_dataset_json(ds_updated, file = "./iris.json") Or if you don’t provide a file path, the JSON text will return directly. ``` r -js_text <- write_dataset_json(ds_updated) +js_text <- write_dataset_json(ds_updated, pretty=TRUE) cat(js_text) ``` ## { - ## "creationDateTime": "2023-09-15T17:57:31", + ## "creationDateTime": "2023-09-25T12:20:23", ## "datasetJSONVersion": "1.0.0", - ## "fileOID": "NA", - ## "asOfDateTime": "2023-09-15T17:57:31", + ## "fileOID": "/some/path", ## "originator": "Some Org", ## "sourceSystem": "source system", ## "sourceSystemVersion": "1.0", @@ -174,7 +170,7 @@ attached as attributes on the data frame itself: print(attr(dat, "creationDateTime")) ``` - ## [1] "2023-09-15T17:57:31" + ## [1] "2023-09-25T12:20:23" ``` r print(attr(dat$Sepal.Length, "OID")) @@ -194,6 +190,8 @@ compatibility will be enforced once the standard itself is more stable. Until then, it is not recommended to use this package within production activities. +# [](https://www.cdisc.org/) + ## Acknowledgements Thank you to Ben Straub and Eric Simms (GSK) for help and input during diff --git a/man/data_metadata.Rd b/man/data_metadata.Rd index df34df7..9786158 100644 --- a/man/data_metadata.Rd +++ b/man/data_metadata.Rd @@ -4,7 +4,7 @@ \alias{data_metadata} \title{Create the data metadata container for a Dataset JSON object} \usage{ -data_metadata(study = "NA", metadata_version = "NA", metadata_ref = "NA") +data_metadata(study = NULL, metadata_version = NULL, metadata_ref = NULL) } \arguments{ \item{study}{Study OID value} diff --git a/man/figures/cdisc.png b/man/figures/cdisc.png new file mode 100644 index 0000000..e2e9805 Binary files /dev/null and b/man/figures/cdisc.png differ diff --git a/man/figures/logo.svg b/man/figures/logo.svg new file mode 100644 index 0000000..911aeca --- /dev/null +++ b/man/figures/logo.svg @@ -0,0 +1,164 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/man/file_metadata.Rd b/man/file_metadata.Rd index 92bc8bf..342285d 100644 --- a/man/file_metadata.Rd +++ b/man/file_metadata.Rd @@ -5,9 +5,10 @@ \title{Create a file metadata object} \usage{ file_metadata( - originator = "NA", - sys = "NA", - sys_version = "NA", + originator = NULL, + sys = NULL, + sys_version = NULL, + file_oid = NULL, version = "1.0.0" ) } @@ -22,6 +23,9 @@ file."} \item{sys_version}{sourceSystemVersion, defined as "The version of the sourceSystem"} +\item{file_oid}{fileOID parameter, defined as "A unique identifier for this +file."} + \item{version}{Dataset JSON schema version being used} } \value{ diff --git a/man/file_metadata_setters.Rd b/man/file_metadata_setters.Rd index 49ba5f3..4b5dd0f 100644 --- a/man/file_metadata_setters.Rd +++ b/man/file_metadata_setters.Rd @@ -5,7 +5,7 @@ \alias{set_originator} \alias{set_file_oid} \alias{set_data_type} -\title{Set source system information} +\title{File Metadata Setters} \usage{ set_source_system(x, sys, sys_version) @@ -39,8 +39,17 @@ Persons)} datasetjson or file_metadata object } \description{ -Set information about the source system used to generate the Dataset JSON -object. +Set information about the file and source system used to generate the Dataset +JSON object. +} +\details{ +The fileOID parameter should be structured following description outlined in +the ODM V2.0 specification. "FileOIDs should be universally unique if at all +possible. One way to ensure this is to prefix every FileOID with an internet +domain name owned by the creator of the ODM file or database (followed by a +forward slash, "/"). For example, +FileOID="BestPharmaceuticals.com/Study5894/1" might be a good way to denote +the first file in a series for study 5894 from Best Pharmaceuticals." } \examples{ file_meta <- file_metadata() diff --git a/man/read_dataset_json.Rd b/man/read_dataset_json.Rd index 27f2b6d..b5ce954 100644 --- a/man/read_dataset_json.Rd +++ b/man/read_dataset_json.Rd @@ -7,19 +7,21 @@ read_dataset_json(file) } \arguments{ -\item{file}{File path on disk, or a pre-loaded Dataset JSON file in a single element character string} +\item{file}{File path or URL of a Dataset JSON file} } \value{ datasetjson object } \description{ -This function validated a dataset JSON file on disk against the Dataset JSON schema, and if valid -returns a datasetjson object +This function validates a dataset JSON file against the Dataset JSON schema, +and if valid returns a datasetjson object. The Dataset JSON file can be +either a file path on disk of a URL which contains the Dataset JSON file. } \examples{ # Read from disk \dontrun{ dat <- read_dataset_json("path/to/file.json") + dat <- dataset_json('https://www.somesite.com/file.json') } # Read from an already imported character vector diff --git a/man/validate_dataset_json.Rd b/man/validate_dataset_json.Rd index b12e309..1feee46 100644 --- a/man/validate_dataset_json.Rd +++ b/man/validate_dataset_json.Rd @@ -7,20 +7,22 @@ validate_dataset_json(x) } \arguments{ -\item{x}{Path to a Dataset JSON file or a character vector holding JSON text} +\item{x}{File path or URL of a Dataset JSON file, or a character vector +holding JSON text} } \value{ A data frame } \description{ -This function calls \code{jsonvalidate::json_validate()} directly, with the parameters -necessary to retrieve the error information of an invalid JSON file per the -Dataset JSON schema. +This function calls \code{jsonvalidate::json_validate()} directly, with the +parameters necessary to retrieve the error information of an invalid JSON +file per the Dataset JSON schema. } \examples{ \dontrun{ validate_dataset_json('path/to/file.json') + validate_dataset_json('https://www.somesite.com/file.json') } ds_json <- dataset_json(iris, "IG.IRIS", "IRIS", "Iris", iris_items) diff --git a/man/write_dataset_json.Rd b/man/write_dataset_json.Rd index 2945381..addf89c 100644 --- a/man/write_dataset_json.Rd +++ b/man/write_dataset_json.Rd @@ -4,7 +4,7 @@ \alias{write_dataset_json} \title{Write out a Dataset JSON file} \usage{ -write_dataset_json(x, file, pretty = TRUE) +write_dataset_json(x, file, pretty = FALSE) } \arguments{ \item{x}{datasetjson object} diff --git a/pkgdown/favicon/apple-touch-icon-120x120.png b/pkgdown/favicon/apple-touch-icon-120x120.png index 3fa2c4b..c29481b 100644 Binary files a/pkgdown/favicon/apple-touch-icon-120x120.png and b/pkgdown/favicon/apple-touch-icon-120x120.png differ diff --git a/pkgdown/favicon/apple-touch-icon-152x152.png b/pkgdown/favicon/apple-touch-icon-152x152.png index be5629a..ae35db7 100644 Binary files a/pkgdown/favicon/apple-touch-icon-152x152.png and b/pkgdown/favicon/apple-touch-icon-152x152.png differ diff --git a/pkgdown/favicon/apple-touch-icon-180x180.png b/pkgdown/favicon/apple-touch-icon-180x180.png index c455510..dd077d3 100644 Binary files a/pkgdown/favicon/apple-touch-icon-180x180.png and b/pkgdown/favicon/apple-touch-icon-180x180.png differ diff --git a/pkgdown/favicon/apple-touch-icon-60x60.png b/pkgdown/favicon/apple-touch-icon-60x60.png index 2aa90ed..d2214f4 100644 Binary files a/pkgdown/favicon/apple-touch-icon-60x60.png and b/pkgdown/favicon/apple-touch-icon-60x60.png differ diff --git a/pkgdown/favicon/apple-touch-icon-76x76.png b/pkgdown/favicon/apple-touch-icon-76x76.png index e08ac0d..9605f8e 100644 Binary files a/pkgdown/favicon/apple-touch-icon-76x76.png and b/pkgdown/favicon/apple-touch-icon-76x76.png differ diff --git a/pkgdown/favicon/apple-touch-icon.png b/pkgdown/favicon/apple-touch-icon.png index ad35366..f4b26b0 100644 Binary files a/pkgdown/favicon/apple-touch-icon.png and b/pkgdown/favicon/apple-touch-icon.png differ diff --git a/pkgdown/favicon/favicon-16x16.png b/pkgdown/favicon/favicon-16x16.png index 7344250..447da5b 100644 Binary files a/pkgdown/favicon/favicon-16x16.png and b/pkgdown/favicon/favicon-16x16.png differ diff --git a/pkgdown/favicon/favicon-32x32.png b/pkgdown/favicon/favicon-32x32.png index 9fc7c00..32075ee 100644 Binary files a/pkgdown/favicon/favicon-32x32.png and b/pkgdown/favicon/favicon-32x32.png differ diff --git a/pkgdown/favicon/favicon.ico b/pkgdown/favicon/favicon.ico index 47d4a4b..0b66149 100644 Binary files a/pkgdown/favicon/favicon.ico and b/pkgdown/favicon/favicon.ico differ diff --git a/tests/testthat/test-data_metadata.R b/tests/testthat/test-data_metadata.R index f956a19..6a6cce9 100644 --- a/tests/testthat/test-data_metadata.R +++ b/tests/testthat/test-data_metadata.R @@ -1,9 +1,9 @@ data_meta <- data_metadata() test_that("Default data_metadata object produces correctly", { - expect_equal(data_meta$studyOID, "NA") - expect_equal(data_meta$metaDataVersionOID, "NA") - expect_equal(data_meta$metaDataRef, "NA") + expect_null(data_meta$studyOID) + expect_null(data_meta$metaDataVersionOID) + expect_null(data_meta$metaDataRef) }) test_that("data_metadata setters work properly", { diff --git a/tests/testthat/test-datasetjson.R b/tests/testthat/test-datasetjson.R index 6e060d0..131ff46 100644 --- a/tests/testthat/test-datasetjson.R +++ b/tests/testthat/test-datasetjson.R @@ -8,23 +8,23 @@ test_that("datasetjson object builds with minimal defaults", { # I just want to remove the potential for a corner case # where the call to system time splits across a second - expect_equal(grep("\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}", ds_json$creationDateTime), 1) + # expect_equal(grep("\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}", ds_json$creationDateTime), 1) # File metadata expect_equal(ds_json$datasetJSONVersion, "1.0.0") - expect_equal(ds_json$fileOID, character()) - expect_equal(ds_json$asOfDateTime, character()) - expect_equal(ds_json$originator, "NA") - expect_equal(ds_json$sourceSystem, "NA") - expect_equal(ds_json$sourceSystemVersion, "NA") + expect_null(ds_json$fileOID) + expect_null(ds_json$asOfDateTime) + expect_null(ds_json$originator) + expect_null(ds_json$sourceSystem) + expect_null(ds_json$sourceSystemVersion) # Data type is correct expect_equal(tail(names(ds_json), 1), "clinicalData") # Data metadata - expect_equal(ds_json$clinicalData$studyOID, "NA") - expect_equal(ds_json$clinicalData$metaDataVersionOID, "NA") - expect_equal(ds_json$clinicalData$metaDataRef, "NA") + expect_null(ds_json$clinicalData$studyOID) + expect_null(ds_json$clinicalData$metaDataVersionOID) + expect_null(ds_json$clinicalData$metaDataRef) # item_id passes through expect_equal(names(ds_json$clinicalData$itemGroupData), "IG.IRIS") diff --git a/tests/testthat/test-file_metadata.R b/tests/testthat/test-file_metadata.R index 0d6d396..5da222a 100644 --- a/tests/testthat/test-file_metadata.R +++ b/tests/testthat/test-file_metadata.R @@ -2,11 +2,11 @@ test_that("Default file_metadata object produces correctly", { file_meta <- file_metadata() expect_equal(file_meta$datasetJSONVersion, "1.0.0") - expect_equal(file_meta$fileOID, character()) - expect_equal(file_meta$asOfDateTime, character()) - expect_equal(file_meta$originator, "NA") - expect_equal(file_meta$sourceSystem, "NA") - expect_equal(file_meta$sourceSystemVersion, "NA") + expect_null(file_meta$fileOID, character()) + expect_null(file_meta$asOfDateTime) + expect_null(file_meta$originator) + expect_null(file_meta$sourceSystem) + expect_null(file_meta$sourceSystemVersion) }) test_that("Parameters pass through on file_metadata call", { diff --git a/tests/testthat/test-read_dataset_json.R b/tests/testthat/test-read_dataset_json.R index 4856c90..0394fae 100644 --- a/tests/testthat/test-read_dataset_json.R +++ b/tests/testthat/test-read_dataset_json.R @@ -50,3 +50,13 @@ test_that("read_dataset_json matches xpt", { expect_equal(nrow(e), 87) }) + +test_that("Dataset JSON can be read from a URL", { + file_path <- test_path("testdata", "ta.json") + url_file_path <- paste0("file://", normalizePath(test_path("testdata", "ta.json"))) + + from_disk <- read_dataset_json(file_path) + from_url <- read_dataset_json(url_file_path) + + expect_equal(from_disk, from_url) +}) diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R index 738a5dd..562d370 100644 --- a/tests/testthat/test-utils.R +++ b/tests/testthat/test-utils.R @@ -11,3 +11,47 @@ test_that("Type checker functions throw proper errors", { expect_error(set_item_data(1, iris), "Input must be a datasetjson or dataset_metadata object") }) + +test_that("NULL removals process effectively", { + ds_json <- dataset_json(iris[1, ], "IG.IRIS", "IRIS", "Iris", iris_items) + + x <- remove_nulls(ds_json) + + non_null_names_fm <- c( + "creationDateTime", "datasetJSONVersion", "fileOID", "asOfDateTime", "originator", + "sourceSystem", "sourceSystemVersion", "clinicalData" + ) + + non_null_names_dm <- c( + "studyOID", "metaDataVersionOID", "metaDataRef", "itemGroupData" + ) + + expect_equal(names(ds_json), non_null_names_fm) + expect_equal(names(ds_json$clinicalData), non_null_names_dm) + + null_names_fm <- c( + "creationDateTime", "datasetJSONVersion", "clinicalData" + ) + + null_names_dm <- "itemGroupData" + + expect_equal(names(x), null_names_fm) + expect_equal(names(x$clinicalData), null_names_dm) +}) + +test_that("URL checker regex works as expected", { + url_list <- c( + "https://github.com/cdisc-org/DataExchange-DatasetJson/raw/master/examples/sdtm/ti.json", # true + "http://github.com/cdisc-org/DataExchange-DatasetJson/raw/master/examples/sdtm/ti.json", # true + test_path("testdata", "ta.json"), # false + normalizePath(test_path("testdata", "ta.json")), # false + paste0("file://", normalizePath(test_path("testdata", "ta.json"))), # true + paste0("ftp://", normalizePath(test_path("testdata", "ta.json"))), # true + paste0("ftps://", normalizePath(test_path("testdata", "ta.json"))), # true + paste0("sftp://", normalizePath(test_path("testdata", "ta.json"))) # true + ) + + bool_check <- c(TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE) + + expect_equal(path_is_url(url_list), bool_check) +}) diff --git a/tests/testthat/test-validate_dataset_json.R b/tests/testthat/test-validate_dataset_json.R index effad1f..94f821f 100644 --- a/tests/testthat/test-validate_dataset_json.R +++ b/tests/testthat/test-validate_dataset_json.R @@ -6,3 +6,14 @@ test_that("validate_dataset_json returns correct messages", { expect_message(validate_dataset_json(js), "File is valid per the Dataset JSON v1.0.0 schema") }) + +test_that("JSON can checked from URL", { + fpath <- paste0("file://", normalizePath(test_path("testdata", "ae.json"))) + expect_warning( + err <- validate_dataset_json(fpath), + "File contains errors!" + ) + + # Loose check of number of issues + expect_equal(dim(err), c(87, 9)) +}) diff --git a/tests/testthat/test-write_dataset_json.R b/tests/testthat/test-write_dataset_json.R index 58281ea..9828103 100644 --- a/tests/testthat/test-write_dataset_json.R +++ b/tests/testthat/test-write_dataset_json.R @@ -34,7 +34,7 @@ test_that("write_dataset_json matches the original json", { comp <- jsonlite::read_json(json_location) expected <- jsonlite::read_json(test_path("testdata/adsl.json")) - # remove fileOID and creationDateTime, this will alway differ + # remove fileOID and creationDateTime, this will always differ # remove asOfDateTime, this is not in adsl.json (to confirm if extensible) comp$fileOID <- NULL expected$fileOID <- NULL @@ -102,7 +102,7 @@ test_that("write_dataset_json matches the original json", { expect_equal(comp, expected) # Error check - ds_json$creationDateTime <- 1 + ds_json$asOfDateTime<- 1 expect_error(write_dataset_json(ds_json, json_location), "Dataset JSON file is invalid") }) diff --git a/vignettes/getting_started.Rmd b/vignettes/getting_started.Rmd index e2e182c..50404fb 100644 --- a/vignettes/getting_started.Rmd +++ b/vignettes/getting_started.Rmd @@ -148,7 +148,7 @@ write_dataset_json(ds_updated, file="iris.json") The `write_dataset_json()` also has the option to return the JSON output as a character string. ```{r write_char} -js <- write_dataset_json(ds_updated) +js <- write_dataset_json(ds_updated, pretty=TRUE) cat(js) ``` diff --git a/vignettes/odm_details.Rmd b/vignettes/odm_details.Rmd new file mode 100644 index 0000000..57b29e0 --- /dev/null +++ b/vignettes/odm_details.Rmd @@ -0,0 +1,33 @@ +--- +title: "Notes on ODM V2" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Notes on ODM V2} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +Dataset JSON inherently pulls some standards from the [ODM v2.0](https://wiki.cdisc.org/display/ODM2/ODM+v2.0+Specification) (Operational Data Model). From [CDISC](https://www.cdisc.org/standards/data-exchange/odm-xml/odm-v2-0): + +> The Operational Data Model (ODM) is a vendor-neutral, platform-independent data exchange format, intended primarily for interchange and archival of clinical study data pertaining to individual subjects, aggregated collections of subjects, and integrated research studies. The model includes the clinical data along with its associated metadata, administrative data, reference data, and audit information. All information that needs to be shared among different software systems during the setup, operation, analysis, submission, and archiving of a study are included in the model. + +Without having a background in ODM, some fields within the Dataset JSON file may be hard to understand without the ODM references available. This vignette provides some quick defintions that may prove useful. + +## fileOID + +> FileOIDs should be universally unique if at all possible. One way to ensure this is to prefix every FileOID with an internet domain name owned by the creator of the ODM file or database (followed by a forward slash, "/"). For example, FileOID="BestPharmaceuticals.com/Study5894/1" might be a good way to denote the first file in a series for study 5894 from Best Pharmaceuticals. + +[*Reference link*](https://wiki.cdisc.org/display/ODM2/Single+Files+and+Collections) + +## asOfDateTime and creationDateTime + +> The creationDateTime attribute indicates when the ODM document was created. In contrast, the asOfDateTime attribute tells when the document content was accurate by specifying the date/time at which the source database was queried to create the ODM document. This is of particular importance when a series of files is used to give an evolving view of a changing database. + +[*Reference link*](https://wiki.cdisc.org/display/ODM2/Single+Files+and+Collections)