From 36d82d60534d9f2f4c0973e1a1625fc1963ab4f9 Mon Sep 17 00:00:00 2001
From: Philip Delff <philip@delff.dk>
Date: Mon, 4 Dec 2023 14:14:48 -0500
Subject: [PATCH] dt-based NMreplaceText, bugfix in NMwriteData messages

---
 DESCRIPTION                                   |   2 +-
 R/NMexpandDoses.R                             |   9 ++
 R/NMextractDataFile.R                         |   2 +-
 R/NMextractText.R                             |  99 ++++++++++++------
 R/NMtransInp.R                                |   2 +-
 R/NMwriteData.R                               |   2 +-
 .../testReference/NMextractText_2.rds         | Bin 37 -> 105 bytes
 tests/testthat/test_NMextractText.R           |   8 +-
 tests/testthat/test_NMwriteSection.R          |   5 +-
 9 files changed, 91 insertions(+), 38 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 98d36e83..ef2cfbe9 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: NMdata
 Type: Package
 Title: Preparation, Checking and Post-Processing Data for PK/PD Modeling
-Version: 0.1.3.901
+Version: 0.1.3.902
 Authors@R: c(person("Philip", "Delff", email = "philip@delff.dk", role = c("aut", "cre")))
 Maintainer: Philip Delff <philip@delff.dk>
 Description: Efficient tools for preparation, checking and post-processing of data in PK/PD (pharmacokinetics/pharmacodynamics) modeling, with focus on use of Nonmem. Attention is paid to ensure consistency, traceability, and Nonmem compatibility of Data. Rigorously checks final Nonmem datasets. Implemented in 'data.table', but easily integrated with 'base' and 'tidyverse'.
diff --git a/R/NMexpandDoses.R b/R/NMexpandDoses.R
index cbd88b89..25ee23ae 100644
--- a/R/NMexpandDoses.R
+++ b/R/NMexpandDoses.R
@@ -107,6 +107,15 @@ NMexpandDoses <- function(data,col.time="TIME",col.id="ID",col.evid="EVID",track
     if(any(recs.folded[II==0|is.na(II)])) {
         warning("II values of zero found in events to be expanded. Is this an error?")
     }
+    if(any(recs.folded[II%%1]!=0)) {
+        warning("II seem to contain non-integers. Is this an error?")
+    }
+    if(any(recs.folded[ADDL%%1]!=0)) {
+        warning("II seem to contain non-integers. Is this an error?")
+    }
+    
+
+    
     
     newtimes <- data[get(rec.tmp)%in%recs.folded,
                      .(TIME=seq(get(col.time),by=II,length.out=ADDL+1)
diff --git a/R/NMextractDataFile.R b/R/NMextractDataFile.R
index 1dcd2095..9a953940 100644
--- a/R/NMextractDataFile.R
+++ b/R/NMextractDataFile.R
@@ -54,7 +54,7 @@ NMextractDataFile <- function(file,dir.data=NULL,file.mod,file.data=NULL){
             lines.data <- NMreadSection(file,section="INFILE",keep.name=FALSE,keep.comments=FALSE,keep.empty=FALSE)
         }
         if(is.null(lines.data)) stop("Could not find $DATA or $INFILE section in nonmem model. Please check the lst file.")
-
+        
         ## pick $DATA and the next string
         lines.data2 <- paste(lines.data,collapse=" ")
 ### remove leading blanks, then only use string until first blank
diff --git a/R/NMextractText.R b/R/NMextractText.R
index aca806f3..db155216 100644
--- a/R/NMextractText.R
+++ b/R/NMextractText.R
@@ -81,21 +81,35 @@ NMextractText <- function(file, lines, text, section, char.section,
     
     args <- getArgs()
     
-    ### deprecated since 2023-06-14: keepEmpty, keepName, keepComments, asOne, cleanSpaces
+### deprecated since 2023-06-14: keepEmpty, keepName, keepComments, asOne, cleanSpaces
     keep.empty <- deprecatedArg("keepEmpty","keep.empty",args=args)
     keep.name <- deprecatedArg("keepName","keep.name",args=args)
     keep.comments <- deprecatedArg("keepComments","keep.comments",args=args)
     as.one <- deprecatedArg("asOne","as.one",args=args)
     clean.spaces <- deprecatedArg("cleanSpaces","clean.spaces",args=args)
 
-### Section end: Pre-process arguments
-
-
+    if(!return%in%c("idx","text")) stop("text must be one of text or idx.")
 
     if(sum(c(!missing(file)&&!is.null(file),
              !missing(lines)&&!is.null(lines),
              !missing(text)&&!is.null(text)
              ))!=1) stop("Exactly one of file, lines, or text must be supplied")
+
+
+    ## works with both .mod and .lst
+    if(length(type)>1) stop("type must be a single-element character.")
+    if(is.null(type)||is.na(type)||grepl("^ *$",type)){
+        type <- "all"
+    }
+    if(type=="lst") type <- "res"
+
+    if(!match.exactly){
+        section <- substring(section,1,3)
+    }
+
+    
+### Section end: Pre-process arguments
+
     if(!missing(file)&&!is.null(file)) {
         if(!file.exists(file)) stop("When using the file argument, file has to point to an existing file.")
         lines <- readLines(file,warn=FALSE)
@@ -103,18 +117,7 @@ NMextractText <- function(file, lines, text, section, char.section,
     if(!missing(text)&&!is.null(text)) {
         lines <- strsplit(text,split=linesep)[[1]]
     }
-
-    if(!match.exactly){
-        section <- substring(section,1,3)
-    }
     
-    if(!return%in%c("idx","text")) stop("text must be one of text or idx.")
-    
-    ## works with both .mod and .lst
-    if(length(type)>1) stop("type must be a single-element character.")
-    if(is.null(type)||is.na(type)||grepl("^ *$",type)){
-        type <- "all"
-    }
 
     ## This line can give problems because of possible special characters in company names or the registerred trademark character. We are not using it anyway.
     lines <- lines[!grepl("^ *License Registered to:",lines,useBytes=TRUE)]
@@ -144,49 +147,83 @@ NMextractText <- function(file, lines, text, section, char.section,
         }
         idx.section <- idx.st:idx.end
     })
-    result <- idx.sections
+    ## result <- idx.sections
+    
+    if(length(idx.sections)==0) return(NULL)
+    
+    secs.list <- lapply(1:length(idx.sections),
+                        function(I){data.table(nsection=I,idx=idx.sections[[I]],text=lines[idx.sections[[I]]])
+                        })
+    dt.res <- rbindlist(secs.list)
     
     
     if(!keep.comments){
+#### dont drop a line from idx if there is a comment in it
         ## result <- lapply(result,function(x)
-        ##     x[!grepl(" *;",lines[x])]
+        ##     x[!grepl(" *;.*$",lines[x])]
         ##     )
         lines <- sub(pattern=" *;.*$",replacement="",x=lines)
+        dt.res[,text:=sub(pattern=" *;.*$",replacement="",x=text)]
     }
 
     if(!keep.empty){
-        result <- lapply(result,function(x)
-            x[!grepl("^ *$",lines[x])]
-            )
+        ## result <- lapply(result,function(x)
+        ##     x[!grepl("^ *$",lines[x])]
+        ##     )
+        dt.res <- dt.res[!grepl("^ *$",text)]
     }
 
     
-    if(return=="text"){
-        result <- lapply(result,function(x)lines[x])
-    }
+    ## if(return=="text"){
+    ##     result <- lapply(result,function(x)lines[x])
+    ## }
     
     if(!keep.name){
         if(!return=="text") {
             stop("keepName can only be FALSE if return=='text'")
         }
-### todo test the addition of "[a-zA-Z]*"
-        result <- lapply(result, function(x)sub(paste0("^ *\\$",section,"[a-zA-Z]*"),"",x))
+
+        ## result <- lapply(result, function(x)sub(paste0("^ *\\$",section,"[a-zA-Z]*"),"",x))
+        ##  "[a-zA-Z]*" is needed for abbrev section names. Like for SIMULATION in case of SIM.
+        dt.res[,text:=sub(paste0("^ *\\$",section,"[a-zA-Z]*"),"",text)]
     }
 
+
     if(clean.spaces){
         if(!return=="text") {
             stop("cleanSpaces can only be TRUE if return=='text'")
         }
-        result <- lapply(result, function(x)sub(paste0("^ +"),"",x))
-        result <- lapply(result, function(x)sub(paste0(" +$"),"",x))
-        result <- lapply(result, function(x)sub(paste0(" +")," ",x))
+        cleanSpaces <- function(x,double=TRUE,lead=TRUE,trail=TRUE){
+            if(double) x <- gsub(paste0(" +")," ",x)
+            if(lead) x <- sub(paste0("^ +"),"",x)
+            if(trail) x <- sub(paste0(" +$"),"",x)
+            x
+        }
+        ## result <- lapply(result,cleanSpaces)
+        
+        dt.res[,text:=cleanSpaces(text)]
+    }
+
+    if(!keep.empty){
+        dt.res <- dt.res[!grepl("^ *$",text)]
     }
+
+    
     
-    if(as.one) {result <- do.call(c,result)}
+    if(as.one) {
+        ## result <- do.call(c,result)
+        dt.res[,nsection:=1]
+    }
+
+    ## if(simplify && length(result)==1) result <- result[[1]]
+
+    if(return=="idx") res.from.dt <- lapply(split(dt.res,by="nsection"),function(x)x[,idx])
+    if(return=="text") res.from.dt <- lapply(split(dt.res,by="nsection"),function(x)x[,text])
 
-    if(simplify && length(result)==1) result <- result[[1]]
+    if(simplify && dt.res[,uniqueN(nsection)==1]) res.from.dt <- res.from.dt[[1]]
     
 
-    return (result)
+    ## return (result)
+    return (res.from.dt)
     
 }
diff --git a/R/NMtransInp.R b/R/NMtransInp.R
index db846e2c..da5a3627 100644
--- a/R/NMtransInp.R
+++ b/R/NMtransInp.R
@@ -25,7 +25,7 @@ NMtransInp <- function(data,file,translate=TRUE,recover.cols=TRUE){
     INPUT <- NULL
 
 ### Section end: Dummy variables, only not to get NOTE's in package checks ####
-
+    
     stopifnot(is.data.table(data))
     
 #### this should be supported now
diff --git a/R/NMwriteData.R b/R/NMwriteData.R
index a2cbb6d2..6583886f 100644
--- a/R/NMwriteData.R
+++ b/R/NMwriteData.R
@@ -376,7 +376,7 @@ NMwriteData <- function(data,file,formats.write=c("csv","rds"),
         file.fst <- fnExtension(file,".fst")
         ##     if(doStamp) data <- do.call(NMstamp,append(list(data=data,writtenTo=file.fst),args.stamp))
         do.call(write_fst,append(list(x=data,path=file.fst),args.write_fst))
-        ##     files.written <- c(files.written,file.rds)
+        files.written <- c(files.written,file.fst)
     }
 
     ## write meta data for csv and fst
diff --git a/tests/testthat/testReference/NMextractText_2.rds b/tests/testthat/testReference/NMextractText_2.rds
index b7bfa67499033094d346c54194f118a16a177286..00166ef40a7e51ff019e5f5e98b9ba4857a95b64 100644
GIT binary patch
literal 105
zcmb2|=3oE==I#ec2?+^F35h8w2}x{5k}M4~rZ%=V3VU>~ZmHPOW}&5#A@M@uMP!y%
zgf`0$_6qByiDnm#o4l-lCrmWEX58%6F6p>bb;1(guR1)5pN>Urojt?hDKn#kL|e}$
GpbY?ip(ft|

literal 37
ocmb2|=3oE=w(bW>2?+^F35h8w2}x{5lK&VPJf>G|6#~iv0IvE8!vFvP

diff --git a/tests/testthat/test_NMextractText.R b/tests/testthat/test_NMextractText.R
index 28322265..956b52cf 100644
--- a/tests/testthat/test_NMextractText.R
+++ b/tests/testthat/test_NMextractText.R
@@ -16,8 +16,12 @@ test_that("basic",{
 test_that("basic - lst mode",{
 
     fileRef <- "testReference/NMextractText_2.rds"
-    file.lst <- "testData/nonmem/xgxr004.lst"
-    res1 <- NMextractText(file.lst,section="THETA",char.section="\\$",type="lst")
 
+    ## readRDS(fileRef)
+    file.lst <- "testData/nonmem/xgxr004.lst"
+    res1 <- NMextractText(file.lst,section="THETA",char.section="\\$",type="mod")
     expect_equal_to_reference(res1,fileRef,version=2)
+
+    res1 <- NMextractText(file.lst,section="THETA",char.section="\\$",type="lst")
+    expect_null(res1)
 })
diff --git a/tests/testthat/test_NMwriteSection.R b/tests/testthat/test_NMwriteSection.R
index 0ec08420..2d9fbab5 100644
--- a/tests/testthat/test_NMwriteSection.R
+++ b/tests/testthat/test_NMwriteSection.R
@@ -128,13 +128,15 @@ test_that("update INPUT based on NMgenText",{
 
     text.nm <- NMgenText(NMreadCsv("testData/data/xgxr2.csv"),capitalize = T,width=95)
     res <- NMwriteSection("testData/nonmem/xgxr011.mod",
-                          list.section=text.nm["INPUT"],newfile=NULL
+                          list.section=text.nm["INPUT"],
+                          newfile=NULL
                           )
 
     ##input.new
     input.new <- NMreadSection(lines=res,section="input")
     
     expect_equal_to_reference(input.new,fileRef)
+    
 })
 
 
@@ -186,3 +188,4 @@ test_that("No newfile supplied",{
     expect_equal_to_reference(res,fileRef,version=2)
 
 })
+