Merge pull request #49 from philipdelff/anyDelim

Any delim
NMautoverse · Aug 13, 2024 · 33dead9 · 33dead9
2 parents 46f1499 + f957586
commit 33dead9
Show file tree

Hide file tree

Showing 128 changed files with 25,889 additions and 4,580 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -42,6 +42,7 @@ input.grd
 input.shk
 INTER
 LINK.LNK
+README.html
 tests/testthat/testOutput/simulations/nonmem
 tests/testthat/testOutput/simulations/NMsimData_xgxr.+\.rds
 tests/testthat/testData/simulations/xgxr..._subprobs/NMsim_xgxr..._subprobs_dir.+

diff --git a/.gitignore b/.gitignore
@@ -55,7 +55,6 @@ vignettes/*\.R
 inst/examples/nonmem/modelfit_dir*
 inst/examples/nonmem/trashed_*
 inst/examples/nonmem/backup_*
-inst/examples/nonmem/*\.ext
 inst/examples/nonmem/*\.phi
 inst/examples/nonmem/xgxr001dir/backup_*
 tests_manual/testthat/testOutput

diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,32 +1,37 @@
 Package: NMdata
 Type: Package
 Title: Preparation, Checking and Post-Processing Data for PK/PD Modeling
-Version: 0.1.6.901
-Authors@R: c(person("Philip", "Delff", email = "philip@delff.dk", role = c("aut", "cre")),
-  person("Eric", "Anderson", email = "andersone@metrumrg.com",role = c("ctb")),
-  person("Matthew","Fidler", role = c("ctb"), email = "matt.fidler@novartis.com")
-)
+Version: 0.1.6.915
+Authors@R:
+  c(person(given="Philip", family="Delff",
+     email = "philip@delff.dk",
+     role = c("aut", "cre")),
+    person(given="Eric", family="Anderson",
+    email = "andersone@metrumrg.com",
+    role = c("ctb")),
+  person(given="Matthew",family="Fidler", role = c("ctb"), email = "matt.fidler@novartis.com"))
 Maintainer: Philip Delff <philip@delff.dk>
 Description: Efficient tools for preparation, checking and post-processing of data in PK/PD (pharmacokinetics/pharmacodynamics) modeling, with focus on use of Nonmem. Attention is paid to ensure consistency, traceability, and Nonmem compatibility of Data. Rigorously checks final Nonmem datasets. Implemented in 'data.table', but easily integrated with 'base' and 'tidyverse'.
 License: MIT + file LICENSE
-RoxygenNote: 7.2.3
+RoxygenNote: 7.3.1
 Roxygen: list(roclets=c("collate", "namespace", "rd", "vignette"))
 Depends: R (>= 3.1.0)
 Imports:
   data.table,
   fst
 Suggests:
-    testthat,
-    knitr,
-    NMsim,
-    formatR,
-    mime,
-    rmarkdown,
-    ggplot2,
-    tibble,
-    covr,
-    htmltools,
-    spelling
+  testthat,
+  knitr,
+  NMsim,
+  NMcalc,
+  formatR,
+  mime,
+  rmarkdown,
+  ggplot2,
+  tibble,
+  covr,
+  htmltools,
+  spelling
 Encoding: UTF-8
 URL: https://philipdelff.github.io/NMdata/
 BugReports: https://github.com/philipdelff/NMdata/issues

diff --git a/NAMESPACE b/NAMESPACE
@@ -23,7 +23,9 @@ export(NMreadExt)
 export(NMreadParsText)
 export(NMreadPhi)
 export(NMreadSection)
+export(NMreadShk)
 export(NMreadTab)
+export(NMrelate)
 export(NMreplaceDataFile)
 export(NMscanData)
 export(NMscanInput)

diff --git a/NEWS.md b/NEWS.md
@@ -1,11 +1,43 @@
 
 # 0.1.7
+## New features
+* `NMreadPartab()` has been generalized to support comment formats very
+  generally. `NMreadPartab()` reads the comments in `$THETA`, `$OMEGA`
+  and `$SIGMA` sections, splits them into variables, and organizes
+  those variables in a parameter table. With this upgrade, pretty much
+  any structure should be supported as long as delimitors are not
+  alphabetic or numeric (so any special characters should
+  work). Notice, delimitors can change between fields . Example:
+  "$THETA 1.4 ; 3 - CL (Clearance) [L/h]" would be matched by
+  `NMreadPartab(...,format="%init ;%idx-%symbol(%label)[%unit]")`
+  which would then return a table including columns init, idx, symbol,
+  label, and unit. The comments must be systematic within say `$THETA`
+  but the format can be different for `$OMEGA` and `$SIGMA`. See
+  examples in `?NMreadParTab`.
+
+* `NMrelate()` is a new automated approach to label parameters. It
+  interprets Nonmem code and provides labels used in the control
+  stream. If the line `TVCL=THETA(1)` is the only line in the code
+  that references THETA(1), `NMrelate()` will return a label
+  `TVCL`.
+
+* `mergeCheck()` has additional features available in the common.cols
+  argument.
+
 ## Bugfixes
 * `NMscanInput()` and `NMreadCsv()` could fail if file names had no
   extensions. Fixed.
 
 * `NMreplaceDataFile()` now works on directories and regular
   expressions to find models to update.
+
+* Some internal functions would make some functions including
+  `NMscanData()` fail if used within `lapply()`. Fixed.
+
+* `NMexpandDoses()` would give a warning if `length(cols.id)>1`. Fixed.
+
+* `NMreadExt()` would mess up iterations and parameter estimates if
+  `as.fun` was set to returning something else than `data.table`s. Fixed.
 
 # 0.1.6
 

diff --git a/R/NMcheckData.R b/R/NMcheckData.R
@@ -384,6 +384,7 @@ NMcheckData <- function(data,file,covs,covs.occ,cols.num,col.id="ID",
     }
     NMisMissing <- function(x) is.na(x) | (is.character(x) & x %in% na.strings)
 
+
     ## listEvents is for row-level findings
     ## @param col is the actual column to be used for the condition
     ## @param name The name of the check. Will end up in the check column in the resulting table.
@@ -430,6 +431,8 @@ NMcheckData <- function(data,file,covs,covs.occ,cols.num,col.id="ID",
         rbind(events,res,fill=TRUE)
     }
 
+
+
     reportFindings <- function(findings,data,col.id,col.row,c.row,col.row.orig,col.id.orig,quiet,as.fun,return.summary){
 
 ### Add ID's to row-level findings
@@ -609,8 +612,10 @@ NMcheckData <- function(data,file,covs,covs.occ,cols.num,col.id="ID",
 
 ######## Default numeric columns. Will be checked for presence, NA, non-numeric (col-level)
 ### Others that: If column present, must be numeric, and values must be non-NA. Remember eg DV, CMT and AMT can be NA.
-
-    cols.num.all <- c( col.time,"EVID",col.id,col.mdv,
+
+    cols.num.all <- c( col.time,col.evid,col.id)
+    if(type.data=="est") cols.num.all <- c(cols.num.all,col.mdv)
+    cols.num.all <- c(cols.num.all,
                       covs,names(covs.occ),as.character(unlist(covs.occ))
                       )
     if(!is.null(col.flagn.orig)) cols.num.all <- c(cols.num.all,col.flagn)
@@ -675,7 +680,7 @@ NMcheckData <- function(data,file,covs,covs.occ,cols.num,col.id="ID",
     findings <- listEvents(col.time,"Negative time",fun=function(x)x>=0,events=findings,debug=F)
 
 ### EVID must be in c(0,1,2,3,4)
-    findings <- listEvents("EVID","EVID not in 0:4",function(x) x%in%c(0:4),events=findings)
+    findings <- listEvents(col.evid,"EVID not in 0:4",function(x) x%in%c(0:4),events=findings)
 
 ### ID must be a positive integer
     findings <- listEvents(col.id,paste(col.id,"not a positive integer"),
@@ -685,16 +690,26 @@ NMcheckData <- function(data,file,covs,covs.occ,cols.num,col.id="ID",
 
 ### MDV should perfectly reflect is.na(DV)
     if(col.mdv%in%colnames(data)){
-
-        data[,MDVDV:=!is.na(get(col.mdv))&get(col.mdv)==as.numeric(is.na(get(col.dv)))]
-        findings <- listEvents("MDVDV","MDV does not match DV",colname=col.mdv,fun=function(x)x==TRUE,dat=data[get(col.evid)==0],events=findings)
+        if(col.dv%in%colnames(data)){
+            data[,MDVDV:=!is.na(get(col.mdv))&get(col.mdv)==as.numeric(is.na(get(col.dv)))]
+            findings <- listEvents("MDVDV","MDV does not match DV",colname=col.mdv,fun=function(x)x==TRUE,dat=data[get(col.evid)==0],events=findings)
+        } else {
+            findings <- listEvents("MDVDV","MDV found, DV not"
+                                  ,colname=col.mdv
+                                  ,fun=function(x)x==TRUE
+                                  ,col.required=FALSE
+                                  ,dat=data[get(col.evid)==0]
+                                  ,events=findings
+                                   )
+        }
     }
 
 ###  columns that are required for all rows done
 
 #### all other required columns (NA elements OK). Run NMisNumeric for each element, then translate using NMasNumeric
 
-    cols.req <- c(col.cmt,col.dv,col.amt)
+    cols.req <- c(col.cmt,col.amt)
+    if(type.data=="est") cols.req <- c(cols.req,col.dv)
     for(col in cols.req){
 
         findings <- listEvents(col,name="Not numeric",fun=function(x)NMisNumeric(x,na.strings=na.strings,each=TRUE),
@@ -729,13 +744,22 @@ NMcheckData <- function(data,file,covs,covs.occ,cols.num,col.id="ID",
 ### DV must be present
 ### DV must be numeric for EVID==0
     if(col.mdv%in%colnames(data)){
-        findings <- listEvents(col.dv,"DV not numeric",fun=is.na,events=findings,invert=TRUE,dat=data[EVID%in%c(0)&get(col.mdv)==0])
+        findings <- listEvents(col.dv,"DV not numeric",fun=is.na,events=findings,invert=TRUE
+                              ,col.required=type.data=="est"
+                              ,dat=data[EVID%in%c(0)&get(col.mdv)==0])
     } else {
-        findings <- listEvents(col.dv,"DV not numeric",fun=is.na,events=findings,invert=TRUE,dat=data[EVID%in%c(0)])
+        findings <- listEvents(col.dv,"DV not numeric",
+                               fun=is.na,events=findings,
+                               invert=TRUE
+                              ,col.required=type.data=="est"
+                               ,dat=data[EVID%in%c(0)]
+                               )
     }
 
 ### DV should be NA or 0 for dosing records
-    findings <- listEvents(col.dv,"DV not NA or 0 in dosing recs",fun=function(x)is.na(x)|as.numeric(x)==0,events=findings,dat=data[EVID%in%c(1,4)])
+    if(type.data=="est"){
+        findings <- listEvents(col.dv,"DV not NA or 0 in dosing recs",fun=function(x)is.na(x)|as.numeric(x)==0,events=findings,dat=data[EVID%in%c(1,4)])
+    }
 
 
 #### AMT

diff --git a/R/NMcheckDataFile.R b/R/NMcheckDataFile.R
@@ -50,7 +50,8 @@ NMcheckDataFile <- function(file,col.row,col.id="ID",formats.read="csv",quiet=FA
         messageWrap("use.input=FALSE not allowed",track.msg = TRUE,fun.msg=stop)
     }
 
-    use.rds <- deprecatedArg(oldarg="use.rds",msg="Use `formats.read` instead. Overwriting `formats.read`.")
+    args <- getArgs(sys.call(),parent.frame())
+    use.rds <- deprecatedArg(oldarg="use.rds",msg="Use `formats.read` instead. Overwriting `formats.read`.",args=args)
     if(!is.null(use.rds)&&use.rds){
         formats.read <- c("rds","csv")
     }

diff --git a/R/NMdataConf.R b/R/NMdataConf.R
@@ -232,7 +232,8 @@ NMdataConf <- function(...,allow.unknown=FALSE){
             val <- NMdataDecideOption("use.rds",val,allow.unknown=allow.unknown)
             if(!is.null(val)){
 
-                args1 <- getArgs()
+                ##args1 <- getArgs()
+                args1 <- getArgs(sys.call(),parent.frame())
                 deprecatedArg(oldarg="use.rds",args=args1)
                 message("overwriting `formats.read`")
                 dots[["formats.read"]] <- c("csv")
@@ -466,6 +467,19 @@ NMdataConfOptions <- function(name,allow.unknown=TRUE){
                x
            }
         )
+       ,
+        file.shk=list(
+            default=function(file) {
+                fnExtension(file,ext=".shk")
+            }
+            ## has to be length 1 character or function
+           ,is.allowed=function(x) is.function(x) || (length(x)==1 && is.character(x))
+           ,msg.not.allowed="file.shk must be a function or a character of length 1"
+           ,process=function(x) {
+               if(is.character(x)) return(function(file) x)
+               x
+           }
+        )
        ,
         file.mod=list(
             ## default=function(file) fnExtension(file,ext=".mod")

diff --git a/R/NMexpandDoses.R b/R/NMexpandDoses.R
@@ -79,8 +79,8 @@ NMexpandDoses <- function(data,col.time="TIME",col.id="ID",col.evid="EVID",track
         return(data)
     }
 
-    if(!col.id%in%colnames(data)){
-        stop("col.id must refer to a name of an existing column in data.")
+    if(!all(col.id%in%colnames(data))){
+        stop("col.id must refer to names of existing columns in data (most often just one column).")
     }
     if(!col.time%in%colnames(data)){
         stop("col.time must refer to a name of an existing column in data.")

diff --git a/R/NMextractDataFile.R b/R/NMextractDataFile.R
@@ -12,7 +12,7 @@
 ##'     and the existence of the file in that directory is not
 ##'     checked.
 ##' @param file.mod The input control stream. Default is to look for
-##'     \"file\" with extension changed to .mod (PSN style). You can
+##'     \"file\" with extension changed to `.mod` (PSN style). You can
 ##'     also supply the path to the file, or you can provide a
 ##'     function that translates the output file path to the input
 ##'     file path. The default behavior can be configured using

diff --git a/R/NMextractText.R b/R/NMextractText.R
@@ -55,7 +55,6 @@
 ##' @param keepEmpty Deprecated. See keep.empty.
 ##' @param keepName Deprecated. See keep.name.
 ##' @param asOne Deprecated. See as.one.
-##' @param cleanSpaces Deprecated. See clean.spaces.
 ##' @return character vector with extracted lines.
 ##' @details This function is planned to get a more general name and
 ##'     then be called by NMreadSection.
@@ -73,26 +72,26 @@ NMextractText <- function(file, lines, text, section, char.section,
                           type="mod", linesep="\n",
                           ## deprecated arguments
                           keepEmpty, keepName,
-                          keepComments, asOne,
-                          cleanSpaces
+                          keepComments, asOne
                           ){
 
     nsection <- NULL
     idx <- NULL
 
 #### Section start: Pre-process arguments ####
 
-    args <- getArgs()
+    ## args <- getArgs()
+    args <- getArgs(sys.call(),parent.frame())
 
 ### deprecated since 2023-06-14: keepEmpty, keepName, keepComments, asOne, cleanSpaces
     keep.empty <- deprecatedArg("keepEmpty","keep.empty",args=args)
     keep.name <- deprecatedArg("keepName","keep.name",args=args)
     keep.comments <- deprecatedArg("keepComments","keep.comments",args=args)
     as.one <- deprecatedArg("asOne","as.one",args=args)
-    clean.spaces <- deprecatedArg("cleanSpaces","clean.spaces",args=args)
+    ## clean.spaces <- deprecatedArg("cleanSpaces","clean.spaces",args=args)
 
     if(!return%in%c("idx","text")) stop("text must be one of text or idx.")
-
+    
     if(sum(c(!missing(file)&&!is.null(file),
              !missing(lines)&&!is.null(lines),
              !missing(text)&&!is.null(text)
@@ -194,13 +193,7 @@ NMextractText <- function(file, lines, text, section, char.section,
 
     if(clean.spaces){
         if(!return=="text") {
-            stop("cleanSpaces can only be TRUE if return=='text'")
-        }
-        cleanSpaces <- function(x,double=TRUE,lead=TRUE,trail=TRUE){
-            if(double) x <- gsub(paste0(" +")," ",x)
-            if(lead) x <- sub(paste0("^ +"),"",x)
-            if(trail) x <- sub(paste0(" +$"),"",x)
-            x
+            stop("clean.spaces can only be TRUE if return=='text'")
         }
         ## result <- lapply(result,cleanSpaces)
 

diff --git a/R/NMreadCov.R b/R/NMreadCov.R
@@ -1,6 +1,10 @@
 ##' Read in data file
 ##'
-##' @param file The .cov covariance Nonmem matrix file to read 
+##' @param file The .cov covariance Nonmem matrix file to read
+##' @param auto.ext If `TRUE` (default) the extension will automatically
+##'     be modified using `NMdataConf()$file.cov`. This means `file`
+##'     can be the path to an input or output control stream, and
+##'     `NMreadCov()` will still read the `.cov` file.
 ##' @param ... Passed to fread
 ##'
 ##' @details This function is taken from nonmem2rx::nmcov which was
@@ -14,7 +18,16 @@
 
 
 
-NMreadCov <- function (file, ...) {
+NMreadCov <- function (file,auto.ext, ...) {
+
+
+    if(missing(auto.ext) || is.null(auto.ext)) auto.ext <- TRUE
+    fun.file.cov <- NMdataDecideOption("file.cov")
+    if(auto.ext){
+        file <- fun.file.cov(file)
+    }
+
+
     if(!file.exists(file)){stop("file does not exist.")}
     TABLE <- NULL
     NMREP <- NULL