Skip to content

Commit

Permalink
write updates
Browse files Browse the repository at this point in the history
  • Loading branch information
nicholas-masel committed Sep 16, 2024
1 parent 9cbe86e commit 4a5ccad
Show file tree
Hide file tree
Showing 11 changed files with 338 additions and 67 deletions.
179 changes: 179 additions & 0 deletions R/variable_metadata.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
#' Generate an individual element that fills the itemGroupData field
#'
#' @param items Variable metadata
#' @param .data Dataframe to be written to Dataset JSON file
#'
#' @return dataset_metadata object
#' @export
#' @examples
#' dataset_meta <- dataset_metadata(
#' items = iris_items
#' )
variable_metadata <- function(items, .data) {

# Check items before moving any further
validate_variable_items(items)

# Attach in the variable metadata
if (!("ITEMGROUPDATASEQ" %in% items$itemOID)) {
igds_row <- data.frame(
itemOID = "ITEMGROUPDATASEQ",
name = "ITEMGROUPDATASEQ",
label = "Record Identifier",
dataType = "integer"
)

# Match up columns and fill
igds_row[setdiff(names(items), names(igds_row))] <- NA
items[setdiff(names(igds_row), names(items))] <- NA

items <- rbind(igds_row, items)
}

items_converted <- df_to_list_rows(items)

# Create the container with proper elements
# x <- list(
# # list(
# # "records" = NULL,
# # "name" = name,
# # "label" = label,
# "columns" = items_converted
# # ,
# # "itemData" = NULL
# # )
# )

# Set the Object ID
# names(x) <- item_id

structure(
items_converted,
class = c('variable_metadata', 'list')
)

# Set data if it's provided
# if (!missing(.data)) {
# set_item_data(x, .data)
# }

# x
}

#' #' Apply dataframe to itemData attribute
#' #'
#' #' This function will set the itemData attribute within a datasetjson or
#' #' dataset_metadata object. It additionally sets the records parameter with the
#' #' proper number of rows in .data.
#' #'
#' #' @param x Object to set itemData
#' #' @param .data Dataframe to be written to Dataset JSON file
#' #' @param ... Additional params
#' #'
#' #' @return Input object with itemData applied
#' #' @noRd
#' set_item_data <- function(x, .data, ...) {
#' stopifnot_dataset_metadata(x)
#'
#' if (!inherits(.data, "data.frame")) {
#' stop(".data must be a data.frame", call.=FALSE)
#' }
#'
#' records <- nrow(.data)
#' # Derive ITEMGROUPDATASEQ and insert it up front in the dataframe
#' item_data <- cbind(ITEMGROUPDATASEQ = 1:records, .data)
#'
#' # Convert data and date times
#' item_data <- convert_to_sas_datenum(item_data)
#' item_data <- convert_to_sas_datetimenum(item_data)
#'
#' # Insert into object in proper spots
#' x[[1]][['records']] <- records
#' x[[1]][['itemData']] <- unname(item_data)
#' x
#' }

#' Verify that the item metadata supplied is the appropriate format
#'
#' This function does the following checks and consolidates to a single error message:
#' - Columns missing that must be present
#' - Columns present that are not permissible
#' - Columns with NAs that must be fully populated
#' - Columns columns that should be character or integer but aren't
#' - Within the type column, if the values are within the permissible list per
#' the schema
#' @param items
#'
#' @return Error Check
#' @noRd
validate_variable_items <- function(items) {
required_cols <- c("itemOID", "name", "label", "dataType")
all_cols <- c("itemOID", "name", "label", "dataType", "targetDataType",
"displayFormat", "length", "keySequence")

# Check for missing or extraneous columns
missing_cols <- setdiff(required_cols, names(items))
err_missing_cols <- sprintf("Column `%s` is missing and must be present", missing_cols)
additional_cols <- setdiff(names(items), all_cols)
err_additional_cols <- sprintf("Column `%s` is not a permissible column", additional_cols)

# Check for for NAs in required columns
any_nas <- vapply(items[intersect(required_cols, names(items))], function(X) any(is.na(X)), FUN.VALUE = TRUE)
has_nas <- names(any_nas)[any_nas]
err_nas <- sprintf("Column `%s` must not have NA values", has_nas)

# Check columns that should be character
char_cols <- intersect(c("itemOID", "name", "label", "dataType", "targetDataType", "displayFormat"), names(items))
are_char_cols <- vapply(items[char_cols], is.character, FUN.VALUE=TRUE)
not_char_cols <- names(are_char_cols)[!are_char_cols]
err_char_cols <- sprintf("Column `%s` must be of type character", not_char_cols)

# Check columns that should be integers
int_cols <- intersect(c("length", "keySequence"), names(items))
are_int_cols <- vapply(items[int_cols], is.integer, FUN.VALUE=TRUE)
not_int_cols <- names(are_int_cols)[!are_int_cols]
err_int_cols <- sprintf("Column `%s` must be of type integer", not_int_cols)

# Check that dataType values are within the permissible list
err_type_vars <- character()
if ('dataType' %in% names(items)) {
bad_types <- !(items$dataType %in% c("string", "integer", "float", "double", "decimal", "boolean",
"datetime", "date", "time", "URI"))
bad_type_vars <- items$name[bad_types]
bad_type_vals <- items$dataType[bad_types]
err_type_vars <- sprintf(
paste("Variable %s has an invalid dataType value of %s.",
"Must be one of string, integer, float, double, decimal, boolean, datetime, date, time, URI"),
bad_type_vars, bad_type_vals
)
}


all_errs <- c(err_missing_cols, err_additional_cols, err_nas, err_char_cols, err_int_cols, err_type_vars)

if (length(all_errs) > 0) {
msg_prep <- paste0("\n\t", all_errs)
err_msg <- paste0(c("Error: Issues found in items data:", msg_prep))
stop(err_msg, call.=FALSE)
}
}

#' Convert an dataframe into a named list of rows without NAs
#'
#' The variable attributes are stored as named lists within the output
#' JSON file, so to write them out the dataframe needs to be a named
#' list of rows
#'
#' @param x A data.frame
#'
#' @return List of named lists with single elements
#' @noRd
df_to_list_rows <- function(x) {
# Split the dataframe rows into individual rows
rows <- unname(split(x, seq(nrow(x))))
# Convert each row into a named list while removing NAs
lapply(rows, function(X) {
y <- as.list(X)
y[!is.na(y)]
})
}
46 changes: 40 additions & 6 deletions R/write_dataset_json.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,26 +3,60 @@
#' @param x datasetjson object
#' @param file File path to save Dataset JSON file
#' @param pretty If TRUE, write with readable formatting
#' @param items Variable metadata
#' @param type JSON or NDJSON, if NDJSON variable names are left on rows for streaming
#'
#' @return NULL when file written to disk, otherwise character string
#' @export
#'
#' @examples
#' # Write to character object
#' ds_json <- dataset_json(iris, "IG.IRIS", "IRIS", "Iris", iris_items)
#' js <- write_dataset_json(ds_json)
#' js <- write_dataset_json(ds_json, iris_items)
#'
#' # Write to disk
#' \dontrun{
#' write_dataset_json(ds_json, "path/to/file.json")
#' }
write_dataset_json <- function(x, file, pretty=FALSE) {
write_dataset_json <- function(x, file, pretty=FALSE, items, type="JSON") {
stopifnot_datasetjson(x)

# Populate the creation datetime
x[['creationDateTime']] <- get_datetime()
attr(x, 'datasetJSONCreationDateTime') <- get_datetime()

x <- remove_nulls(x)
# Store number of records
attr(x, 'records') <- nrow(x)

# Pull attributes into a list and order
temp <- attributes(x)[c(
"datasetJSONCreationDateTime",
"datasetJSONVersion",
"fileOID",
"dbLastModifiedDateTime",
"originator",
"sourceSystem",
"studyOID",
"metaDataVersionOID",
"metaDataRef",
"itemGroupOID",
"isReferenceData",
"records",
"name",
"label")
]

# add ITEMGROUPDATASEQ to data
records <- nrow(x)
x <- cbind(ITEMGROUPDATASEQ = 1:records, x)

# add variable metadata and data
temp$columns <- variable_metadata(items)
temp$rows <- x

# Leave the names for NSJSON, remove for JSON
if (type == "JSON") {
names(temp$rows) <- NULL
}

if (!missing(file)) {
# Make sure the output path exists
Expand All @@ -40,14 +74,14 @@ write_dataset_json <- function(x, file, pretty=FALSE) {
if (!missing(file)) {
# Write file to disk
yyjsonr::write_json_file(
x,
temp,
filename = file,
opts = json_opts
)
} else {
# Print to console
yyjsonr::write_json_str(
x,
temp,
opts = json_opts
)
}
Expand Down
Binary file modified data/iris_items.rda
Binary file not shown.
1 change: 1 addition & 0 deletions dm.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"datasetJSONCreationDateTime":"2024-09-16T17:14:40","datasetJSONVersion":"1.1.0","fileOID":"www.cdisc.org/StudyMSGv2/1/Define-XML_2.1.0/2024-08-05/dm","dbLastModifiedDateTime":"2020-08-21T09:14:29","originator":"CDISC SDTM MSG Team","sourceSystem":{"name":"SAS on X64_10PRO","version":"9.0401M7"},"studyOID":"cdisc.com/CDISCPILOT01","metaDataVersionOID":"MDV.MSGv2.0.SDTMIG.3.3.SDTM.1.7","metaDataRef":"define.xml","itemGroupOID":"IG.DM","isReferenceData":false,"records":18,"name":"DM","label":"Demographics","columns":[{"itemOID":"ITEMGROUPDATASEQ","name":"ITEMGROUPDATASEQ","label":"Record Identifier","dataType":"integer"},{"itemOID":"IT.DM.STUDYID","name":"STUDYID","label":"Study Identifier","dataType":"string","length":12,"keySequence":1},{"itemOID":"IT.DM.DOMAIN","name":"DOMAIN","label":"Domain Abbreviation","dataType":"string","length":2},{"itemOID":"IT.DM.USUBJID","name":"USUBJID","label":"Unique Subject Identifier","dataType":"string","length":8,"keySequence":2},{"itemOID":"IT.DM.SUBJID","name":"SUBJID","label":"Subject Identifier for the Study","dataType":"string","length":4},{"itemOID":"IT.DM.RFSTDTC","name":"RFSTDTC","label":"Subject Reference Start Date/Time","dataType":"date"},{"itemOID":"IT.DM.RFENDTC","name":"RFENDTC","label":"Subject Reference End Date/Time","dataType":"date"},{"itemOID":"IT.DM.RFXSTDTC","name":"RFXSTDTC","label":"Date/Time of First Study Treatment","dataType":"date"},{"itemOID":"IT.DM.RFXENDTC","name":"RFXENDTC","label":"Date/Time of Last Study Treatment","dataType":"date"},{"itemOID":"IT.DM.RFICDTC","name":"RFICDTC","label":"Date/Time of Informed Consent","dataType":"date"},{"itemOID":"IT.DM.RFPENDTC","name":"RFPENDTC","label":"Date/Time of End of Participation","dataType":"date"},{"itemOID":"IT.DM.DTHDTC","name":"DTHDTC","label":"Date/Time of Death","dataType":"date"},{"itemOID":"IT.DM.DTHFL","name":"DTHFL","label":"Subject Death Flag","dataType":"string","length":1},{"itemOID":"IT.DM.SITEID","name":"SITEID","label":"Study Site Identifier","dataType":"string","length":3},{"itemOID":"IT.DM.BRTHDTC","name":"BRTHDTC","label":"Date/Time of Birth","dataType":"date"},{"itemOID":"IT.DM.AGE","name":"AGE","label":"Age","dataType":"integer"},{"itemOID":"IT.DM.AGEU","name":"AGEU","label":"Age Units","dataType":"string","length":5},{"itemOID":"IT.DM.SEX","name":"SEX","label":"Sex","dataType":"string","length":1},{"itemOID":"IT.DM.RACE","name":"RACE","label":"Race","dataType":"string","length":41},{"itemOID":"IT.DM.ETHNIC","name":"ETHNIC","label":"Ethnicity","dataType":"string","length":22},{"itemOID":"IT.DM.ARMCD","name":"ARMCD","label":"Planned Arm Code","dataType":"string","length":8},{"itemOID":"IT.DM.ARM","name":"ARM","label":"Description of Planned Arm","dataType":"string","length":28},{"itemOID":"IT.DM.ACTARMCD","name":"ACTARMCD","label":"Actual Arm Code","dataType":"string","length":8},{"itemOID":"IT.DM.ACTARM","name":"ACTARM","label":"Description of Actual Arm","dataType":"string","length":28},{"itemOID":"IT.DM.ARMNRS","name":"ARMNRS","label":"Reason Arm and/or Actual Arm is Null","dataType":"string","length":14},{"itemOID":"IT.DM.ACTARMUD","name":"ACTARMUD","label":"Description of Unplanned Actual Arm","dataType":"string","length":200},{"itemOID":"IT.DM.COUNTRY","name":"COUNTRY","label":"Country","dataType":"string","length":3}],"rows":[[1,"CDISCPILOT01","DM","CDISC001","1115","2012-11-30","2013-01-23","2012-11-30","2013-01-23","2012-11-23","2013-05-20","","","701","1928",84.0,"YEARS","M","WHITE","NOT HISPANIC OR LATINO","ZAN_LOW","Zanomaline Low Dose (54 mg)","ZAN_LOW","Zanomaline Low Dose (54 mg)","","","USA"],[2,"CDISCPILOT01","DM","CDISC002","1211","2012-11-15","2013-01-14","2012-11-15","2013-01-12","2012-10-30","2013-01-14","2013-01-14","Y","701","1936",76.0,"YEARS","F","WHITE","NOT HISPANIC OR LATINO","ZAN_LOW","Zanomaline Low Dose (54 mg)","ZAN_LOW","Zanomaline Low Dose (54 mg)","","","USA"],[3,"CDISCPILOT01","DM","CDISC003","1302","2013-08-29","2013-11-05","2013-08-29","2013-11-05","2013-08-20","2014-02-13","","","701","1951",61.0,"YEARS","M","WHITE","NOT HISPANIC OR LATINO","ZAN_HIGH","Zanomaline High Dose (81 mg)","ZAN_HIGH","Zanomaline High Dose (81 mg)","","","USA"],[4,"CDISCPILOT01","DM","CDISC004","1345","2013-10-08","2014-03-18","2013-10-08","2014-03-18","2013-10-01","2014-03-18","","","701","1950",63.0,"YEARS","F","WHITE","NOT HISPANIC OR LATINO","PLACEBO","Placebo","PLACEBO","Placebo","","","USA"],[5,"CDISCPILOT01","DM","CDISC005","1383","2013-02-04","2013-08-06","2013-02-04","2013-08-06","2013-01-22","2013-08-06","","","701","1941",72.0,"YEARS","F","WHITE","NOT HISPANIC OR LATINO","ZAN_HIGH","Zanomaline High Dose (81 mg)","ZAN_HIGH","Zanomaline High Dose (81 mg)","","","USA"],[6,"CDISCPILOT01","DM","CDISC006","1429","2013-03-19","2013-04-30","2013-03-19","2013-04-30","2013-02-25","2013-04-30","","","701","1929",84.0,"YEARS","F","WHITE","NOT HISPANIC OR LATINO","ZAN_LOW","Zanomaline Low Dose (54 mg)","ZAN_LOW","Zanomaline Low Dose (54 mg)","","","USA"],[7,"CDISCPILOT01","DM","CDISC007","1444","2013-01-05","2013-02-13","2013-01-05","2013-02-12","2012-12-31","2013-06-20","","","701","1949",63.0,"YEARS","M","WHITE","HISPANIC OR LATINO","ZAN_HIGH","Zanomaline High Dose (81 mg)","ZAN_HIGH","Zanomaline High Dose (81 mg)","","","USA"],[8,"CDISCPILOT01","DM","CDISC008","1445","2014-05-11","2014-11-01","2014-05-11","2014-11-01","2014-05-01","2014-11-01","2014-11-01","Y","704","1939",75.0,"YEARS","M","MULTIPLE","NOT HISPANIC OR LATINO","PLACEBO","Placebo","PLACEBO","Placebo","","","USA"],[9,"CDISCPILOT01","DM","CDISC009","1087","2012-10-22","2013-04-28","2012-10-22","2013-04-28","2012-10-06","2013-04-28","","","708","1938",74.0,"YEARS","F","WHITE","NOT HISPANIC OR LATINO","PLACEBO","Placebo","PLACEBO","Placebo","","","USA"],[10,"CDISCPILOT01","DM","CDISC010","1236","2013-09-21","2013-09-26","2013-09-21","2013-09-21","2013-09-08","2013-09-26","","","708","1927",86.0,"YEARS","F","WHITE","NOT HISPANIC OR LATINO","ZAN_HIGH","Zanomaline High Dose (81 mg)","ZAN_HIGH","Zanomaline High Dose (81 mg)","","","USA"],[11,"CDISCPILOT01","DM","CDISC011","1336","2012-12-07","2013-06-05","2012-12-07","2013-06-05","2012-11-21","2013-07-05","","","708","1939",73.0,"YEARS","M","WHITE","NOT HISPANIC OR LATINO","ZAN_HIGH","Zanomaline High Dose (81 mg)","ZAN_HIGH","Zanomaline High Dose (81 mg)","","","USA"],[12,"CDISCPILOT01","DM","CDISC012","1378","2013-09-03","2014-01-28","2013-09-03","2014-01-28","2013-08-24","2014-01-28","","","708","1946",67.0,"YEARS","M","BLACK OR AFRICAN AMERICAN","NOT HISPANIC OR LATINO","PLACEBO","Placebo","PLACEBO","Placebo","","","USA"],[13,"CDISCPILOT01","DM","CDISC013","1083","2013-07-22","2013-08-03","2013-07-22","2013-08-01","2013-07-09","2013-08-03","2013-08-03","Y","710","1924",89.0,"YEARS","F","WHITE","NOT HISPANIC OR LATINO","PLACEBO","Placebo","PLACEBO","Placebo","","","USA"],[14,"CDISCPILOT01","DM","CDISC014","1012","2013-04-03","2013-05-02","2013-04-03","2013-04-29","2013-03-20","2013-09-18","","","711","1945",67.0,"YEARS","F","WHITE","NOT HISPANIC OR LATINO","ZAN_HIGH","Zanomaline High Dose (81 mg)","ZAN_HIGH","Zanomaline High Dose (81 mg)","","","USA"],[15,"CDISCPILOT01","DM","CDISC015","1022","","","","","2014-03-17","2014-03-17","","","711","1928",86.0,"YEARS","F","WHITE","NOT HISPANIC OR LATINO","","","","","SCREEN FAILURE","","USA"],[16,"CDISCPILOT01","DM","CDISC016","1143","2013-04-03","2013-06-01","2013-04-03","2013-05-30","2013-03-30","2013-09-22","","","711","1936",76.0,"YEARS","F","WHITE","NOT HISPANIC OR LATINO","ZAN_LOW","Zanomaline Low Dose (54 mg)","ZAN_LOW","Zanomaline Low Dose (54 mg)","","","USA"],[17,"CDISCPILOT01","DM","CDISC017","1250","2013-09-21","2014-02-08","2013-09-21","2014-01-31","2013-08-21","2014-03-08","","","718","1931",82.0,"YEARS","F","WHITE","HISPANIC OR LATINO","ZAN_LOW","Zanomaline Low Dose (54 mg)","ZAN_LOW","Zanomaline Low Dose (54 mg)","","","USA"],[18,"CDISCPILOT01","DM","CDISC018","1427","2012-12-17","2013-02-18","2012-12-17","2013-02-11","2012-12-13","2013-06-03","","","718","1938",74.0,"YEARS","F","BLACK OR AFRICAN AMERICAN","NOT HISPANIC OR LATINO","ZAN_HIGH","Zanomaline High Dose (81 mg)","ZAN_HIGH","Zanomaline High Dose (81 mg)","","","USA"]]}
1 change: 1 addition & 0 deletions ta.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"datasetJSONCreationDateTime":"2024-09-16T17:17:50","datasetJSONVersion":"1.1.0","fileOID":"www.cdisc.org/StudyMSGv2/1/Define-XML_2.1.0/2024-08-05/ta","dbLastModifiedDateTime":"2020-08-21T09:14:26","originator":"CDISC SDTM MSG Team","sourceSystem":{"name":"SAS on X64_10PRO","version":"9.0401M7"},"studyOID":"cdisc.com/CDISCPILOT01","metaDataVersionOID":"MDV.MSGv2.0.SDTMIG.3.3.SDTM.1.7","metaDataRef":"define.xml","itemGroupOID":"IG.TA","isReferenceData":true,"records":8,"name":"TA","label":"Trial Arms","columns":[{"itemOID":"ITEMGROUPDATASEQ","name":"ITEMGROUPDATASEQ","label":"Record Identifier","dataType":"integer"},{"itemOID":"IT.TA.STUDYID","name":"STUDYID","label":"Study Identifier","dataType":"string","length":12,"keySequence":1},{"itemOID":"IT.TA.DOMAIN","name":"DOMAIN","label":"Domain Abbreviation","dataType":"string","length":2},{"itemOID":"IT.TA.ARMCD","name":"ARMCD","label":"Planned Arm Code","dataType":"string","length":8,"keySequence":2},{"itemOID":"IT.TA.ARM","name":"ARM","label":"Description of Planned Arm","dataType":"string","length":28},{"itemOID":"IT.TA.TAETORD","name":"TAETORD","label":"Planned Order of Element within Arm","dataType":"integer","keySequence":3},{"itemOID":"IT.TA.ETCD","name":"ETCD","label":"Element Code","dataType":"string","length":7},{"itemOID":"IT.TA.ELEMENT","name":"ELEMENT","label":"Description of Element","dataType":"string","length":26},{"itemOID":"IT.TA.TABRANCH","name":"TABRANCH","label":"Branch","dataType":"string","length":200},{"itemOID":"IT.TA.TATRANS","name":"TATRANS","label":"Transition Rule","dataType":"string","length":200},{"itemOID":"IT.TA.EPOCH","name":"EPOCH","label":"Epoch","dataType":"string","length":9}],"rows":[[1,"CDISCPILOT01","TA","PLACEBO","Placebo",1.0,"SCREEN","Screening","Randomized to Placebo","","SCREENING"],[2,"CDISCPILOT01","TA","PLACEBO","Placebo",2.0,"PLACEBO","Placebo","","","TREATMENT"],[3,"CDISCPILOT01","TA","ZAN_LOW","Zanomaline Low Dose (54 mg)",1.0,"SCREEN","Screening","Randomized to Zanomaline Low Dose","","SCREENING"],[4,"CDISCPILOT01","TA","ZAN_LOW","Zanomaline Low Dose (54 mg)",2.0,"LOW","Zanomaline 54 mg","","","TREATMENT"],[5,"CDISCPILOT01","TA","ZAN_HIGH","Zanomaline High Dose (81 mg)",1.0,"SCREEN","Screening","Randomized to Zanomaline High Dose","","SCREENING"],[6,"CDISCPILOT01","TA","ZAN_HIGH","Zanomaline High Dose (81 mg)",2.0,"TITRATE","Zanomaline 54 mg Titration","","","TREATMENT"],[7,"CDISCPILOT01","TA","ZAN_HIGH","Zanomaline High Dose (81 mg)",3.0,"HIGH","Zanomaline 81 mg","","","TREATMENT"],[8,"CDISCPILOT01","TA","ZAN_HIGH","Zanomaline High Dose (81 mg)",4.0,"TITRATE","Zanomaline 54 mg Titration","","","TREATMENT"]]}
19 changes: 19 additions & 0 deletions test.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
{
"datasetJSONCreationDateTime": "2024-09-16T15:58:26",
"datasetJSONVersion": "1.1.0",
"fileOID": "/some/path",
"dbLastModifiedDateTime": "2023-02-15T10:23:15",
"originator": "Some Org",
"sourceSystem": {
"name": "source system",
"version": "1.0"
},
"studyOID": "SOMESTUDY",
"metaDataVersionOID": "MDV.MSGv2.0.SDTMIG.3.3.SDTM.1.7",
"metaDataRef": "some/define.xml",
"itemGroupOID": "IG.IRIS",
"isReferenceData": false,
"records": 5,
"name": "IRIS",
"label": "Iris"
}
Loading

0 comments on commit 4a5ccad

Please sign in to comment.