Supporting #9

dipterix · Jul 16, 2024 · d0fcebe · d0fcebe
1 parent e82a751
commit d0fcebe
Show file tree

Hide file tree

Showing 5 changed files with 78 additions and 47 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: filearray
 Type: Package
 Title: File-Backed Array for Out-of-Memory Computation
-Version: 0.1.6.9000
+Version: 0.1.6.9001
 Language: en-US
 Encoding: UTF-8
 License: LGPL-3

diff --git a/R/methods-subsetAssign.R b/R/methods-subsetAssign.R
@@ -24,8 +24,8 @@ fa_subsetAssign1 <- function(x, ..., value){
                 stop("SubsetAssign FileArray only allows x[] <- v or x[i,j,...] <- v (single index not allowed)")
             }
         }
-        if(length(value) != prod(dim)){
-            stop("SubsetAssign FileArray `value` length mismatch.")
+        if(!length(value) %in% c(1, prod(dim))){
+            stop("SubsetAssign FileArray `value` length mismatch: `value` length must be either 1 or the same length of the subset.")
         }
         target_dim <- dim
         x$initialize_partition(x$.partition_info[, 1])
@@ -62,8 +62,8 @@ fa_subsetAssign1 <- function(x, ..., value){
         }
 
         target_dim <- sapply(locs, length)
-        if(prod(target_dim) != length(value)){
-            stop("SubsetAssign FileArray `value` length mismatch.")
+        if(!length(value) %in% c(1, prod(target_dim))){
+            stop("SubsetAssign FileArray `value` length mismatch: `value` length must be either 1 or the same length of the subset.")
         }
 
         # make sure partitions exist

diff --git a/src/save.cpp b/src/save.cpp
@@ -226,7 +226,8 @@ void subset_assign_partition(
         char* conn0, T* value, const R_xlen_t block_size, 
         int64_t* idx1ptr0, R_xlen_t idx1len, 
         int64_t idx1_start, int64_t idx2_start, 
-        int64_t* idx2ptr0, R_xlen_t idx2len ) {
+        int64_t* idx2ptr0, R_xlen_t idx2len,
+        const int &value_inc = 1) {
     // TODO: swap_endian
     int elem_size = sizeof(T);
 
@@ -261,7 +262,7 @@ void subset_assign_partition(
         // block_size: 861584, idx1len: 9001, idx1_start: 216024, idx2_start: 0, idx2_len: 1
         // ### idx2ii:0, start_loc: 0, buf pos: 0, idx1_start: 216024
 
-        for(idx1ii = 0; idx1ii < idx1len; idx1ii++, idx1ptr++, valptr2++){
+        for(idx1ii = 0; idx1ii < idx1len; idx1ii++, idx1ptr++, valptr2+=value_inc){
             // calculate pointer location in the file
             // no check here, but tmp_loc should be >=0
 
@@ -287,6 +288,10 @@ struct FARRAssigner : public TinyParallel::Worker {
     const List& sch;
     T* value_ptr;
 
+    // value_ptr increment size, can either be 0 (length(value) == 1)
+    // or 1 (length(value) is the same as subset size)
+    int value_ptr_inc;
+
     SEXP idx1;
     SEXP idx1range;
     List idx2s;
@@ -306,9 +311,16 @@ struct FARRAssigner : public TinyParallel::Worker {
 
     FARRAssigner(
         const std::string& filebase,
-        const List& sch, T* value_ptr
+        const List& sch, 
+        const int64_t& value_len,
+        T* value_ptr
     ): filebase(filebase), sch(sch) {
         this->value_ptr = value_ptr;
+        if( value_len - 1 == 0 ) {
+            this->value_ptr_inc = 0;
+        } else {
+            this->value_ptr_inc = 1;
+        }
         this->idx1 = sch["idx1"];
         this->idx1range = sch["idx1range"];
         this->idx2s = sch["idx2s"];
@@ -433,7 +445,7 @@ struct FARRAssigner : public TinyParallel::Worker {
 
                 int64_t* idx2_ptr = INTEGER64(idx2);
                 R_xlen_t idx2_len = Rf_xlength(idx2);
-                T* value_ptr2 = value_ptr + (idx1len * skips);
+                T* value_ptr2 = value_ptr + (idx1len * skips) * this->value_ptr_inc;
                 int64_t* idx1ptr = idx1ptr0;
 
                 // Rcout << "block_size: " << block_size << ", idx1len: " << idx1len << ", idx1_start: " << idx1_start << 
@@ -443,7 +455,8 @@ struct FARRAssigner : public TinyParallel::Worker {
                     begin, value_ptr2,
                     block_size, idx1ptr, idx1len, 
                     idx1_start, idx2_start, 
-                    idx2_ptr, idx2_len );
+                    idx2_ptr, idx2_len,
+                    this->value_ptr_inc );
 
 
                 // region.flush();
@@ -472,8 +485,11 @@ struct FARRAssigner : public TinyParallel::Worker {
 template <typename T>
 SEXP FARR_subset_assign_template(
         const std::string& filebase, 
-        const List& sch, T* value_ptr){
-    FARRAssigner<T> assigner(filebase, sch, value_ptr);
+        const List& sch, 
+        const R_xlen_t &value_len,
+        T* value_ptr
+){
+    FARRAssigner<T> assigner(filebase, sch, value_len, value_ptr);
     assigner.save();
     return( R_NilValue );
 }
@@ -520,43 +536,44 @@ SEXP FARR_subset_assign2(
 
     // coerce vector to desired SEXP type
     SEXP value_ = PROTECT(convert_as(value, sexp_type));
-    SEXPTYPE valtype = TYPEOF(value_);
-
-    // allocate buffers
-    int ncores = getThreads();
-    std::vector<SEXP> buff_pool(ncores);
-    for(int i = 0; i < ncores; i++){
-        buff_pool[i] = PROTECT(Rf_allocVector(
-            valtype, idx1_end - idx1_start + 1));
-    }
+    // SEXPTYPE valtype = TYPEOF(value_);
+
+    // // allocate buffers
+    // int ncores = getThreads();
+    // std::vector<SEXP> buff_pool(ncores);
+    // for(int i = 0; i < ncores; i++){
+    //     buff_pool[i] = PROTECT(Rf_allocVector(
+    //         valtype, idx1_end - idx1_start + 1));
+    // }
 
 
     switch(sexp_type) {
     case INTSXP: {
-        FARR_subset_assign_template(fbase, sch, INTEGER(value_));
+        FARR_subset_assign_template(fbase, sch, XLENGTH(value_), INTEGER(value_));
         break;
     }
     case CPLXSXP:
     case REALSXP: {
-        FARR_subset_assign_template(fbase, sch, REAL(value_));
+        FARR_subset_assign_template(fbase, sch, XLENGTH(value_), REAL(value_));
         break;
     }
     case FLTSXP: {
-        FARR_subset_assign_template(fbase, sch, FLOAT(value_));
+        FARR_subset_assign_template(fbase, sch, XLENGTH(value_), FLOAT(value_));
         break;
     }
     case LGLSXP:
     case RAWSXP: {
-        FARR_subset_assign_template(fbase, sch, RAW(value_));
+        FARR_subset_assign_template(fbase, sch, XLENGTH(value_), RAW(value_));
         break;
     }
     default: {
-        UNPROTECT( 1 + ncores );
+        UNPROTECT( 1 );
         stop("SEXP type not supported.");
+        return(R_NilValue); // wall
     }
     }
 
-    UNPROTECT( 1 + ncores );
+    UNPROTECT( 1 );
     return(R_NilValue);
 
 }

diff --git a/tests/testthat/test-cpp.R b/tests/testthat/test-cpp.R
@@ -106,9 +106,13 @@ test_that("C++: IO - subset/assign", {
         lapply(dim, function(d) {
             sample(c(1:d), size = d, replace = FALSE)
         })
-    expect_error({
+    expect_no_error({
         x[locs[[1]], locs[[2]], locs[[3]]] <- 1
     })
+    expect_equal(
+        unique(as.vector(x[locs[[1]], locs[[2]], locs[[3]]])),
+        1
+    )
 
     expect_true({
         x[locs[[1]], locs[[2]], locs[[3]]] <- 1:prod(sapply(locs, length))
@@ -202,10 +206,15 @@ test_that("C++: IO - subset/assign - complex", {
         lapply(dim, function(d) {
             sample(c(1:d), size = d, replace = FALSE)
         })
-    expect_error({
+    expect_no_error({
         x[locs[[1]], locs[[2]], locs[[3]]] <- 1
     })
 
+    expect_equal(
+        unique(as.vector(x[locs[[1]], locs[[2]], locs[[3]]])),
+        1 + 0i
+    )
+
     expect_true({
         x[locs[[1]], locs[[2]], locs[[3]]] <- tmp[1:prod(sapply(locs, length))]
         TRUE
@@ -303,9 +312,14 @@ test_that("C++: IO - subset/assign - float", {
         lapply(dim, function(d) {
             sample(c(1:d), size = d, replace = FALSE)
         })
-    expect_error({
+    expect_no_error({
         x[locs[[1]], locs[[2]], locs[[3]]] <- 1
     })
+    expect_equal(
+        unique(as.vector(x[locs[[1]], locs[[2]], locs[[3]]])),
+        1
+    )
+
 
     expect_true({
         x[locs[[1]], locs[[2]], locs[[3]]] <- 1:prod(sapply(locs, length))

diff --git a/vignettes/performance.Rmd b/vignettes/performance.Rmd
@@ -27,7 +27,7 @@ The simulation was performed on `MacBook Air 2020 (M1 Chip, ARM, 8GB RAM)`, with
 
 We mainly test the performance of `double` and `float` data type. The dimensions for both arrays are `100x100x100x100`. Both arrays are around `800MB` in native R. This is because R does not have float precision. However, while `double` array occupies `800MB` space on the hard disk, `float` array only uses half size (`400MB`).
 
-```{r setup}
+```{r setup, eval = FALSE}
 library(filearray)
 
 options(digits = 3)
@@ -69,7 +69,7 @@ Writing along margins refer to something like `x[,,,i] <- v` (along the last mar
 
 1.  partition margin
 
-```{r}
+```{r, eval = FALSE}
 microbenchmark::microbenchmark(
   double = {
     for(i in 1:100){
@@ -91,7 +91,7 @@ microbenchmark::microbenchmark(
 
 2. Write along fast margin
 
-```{r}
+```{r, eval = FALSE}
 microbenchmark::microbenchmark(
   double = {
     for(i in 1:100){
@@ -113,7 +113,7 @@ microbenchmark::microbenchmark(
 
 3. Writing along slow margin
 
-```{r}
+```{r, eval = FALSE}
 microbenchmark::microbenchmark(
   double = {
     for(i in 1:100){
@@ -140,7 +140,7 @@ Instead of writing one slice at a time along each margin, we write `100x100x100x
 
 1.  Write blocks of data along the partition margin
 
-```{r}
+```{r, eval = FALSE}
 microbenchmark::microbenchmark(
   double = {
     for(i in 1:10){
@@ -164,7 +164,7 @@ microbenchmark::microbenchmark(
 
 2. Write blocks of data along the fast margin
 
-```{r}
+```{r, eval = FALSE}
 microbenchmark::microbenchmark(
   double = {
     for(i in 1:10){
@@ -188,7 +188,7 @@ microbenchmark::microbenchmark(
 
 3. Write blocks of data along slow margin
 
-```{r}
+```{r, eval = FALSE}
 microbenchmark::microbenchmark(
   double = {
     for(i in 1:10){
@@ -214,7 +214,7 @@ microbenchmark::microbenchmark(
 
 ### 1. Read the whole array
 
-```{r}
+```{r, eval = FALSE}
 microbenchmark::microbenchmark(
   double = { x_dbl[] },
   float = { x_flt[] },
@@ -229,7 +229,7 @@ microbenchmark::microbenchmark(
 
 ### 2. Read along margins
 
-```{r}
+```{r, eval = FALSE}
 microbenchmark::microbenchmark(
   farr_double_partition_margin = { x_dbl[,,,1] },
   farr_double_fast_margin = { x_dbl[,,1,] },
@@ -260,7 +260,7 @@ The file array indexing is close to handling in-memory arrays in R!
 
 ### 3. Random access
 
-```{r}
+```{r, eval = FALSE}
 # access 50 x 50 x 50 x 50 sub-array, with random indices
 idx1 <- sample(1:100, 50)
 idx2 <- sample(1:100, 50)
@@ -287,26 +287,26 @@ Random access could be faster than base R (also much less memory!)
 
 Collapse calculates the margin sum/mean. Collapse function in `filearray` uses single thread. This is because the bottle-neck often comes from hard-disk accessing speed. However, it is still faster than native R, and is more memory-efficient.
 
-```{r}
+```{r, eval = FALSE}
 keep <- c(2, 4)
 output <- filearray_create(tempfile(), dim(x_dbl)[keep])
 output$initialize_partition()
 microbenchmark::microbenchmark(
   farr_double = { x_dbl$collapse(keep = keep, method = "sum") },
   farr_float = { x_flt$collapse(keep = keep, method = "sum") },
   native = { apply(y, keep, sum) },
-  dipsaus = { dipsaus::collapse(y, keep, average = FALSE) },
+  ravetools = { ravetools::collapse(y, keep, average = FALSE) },
   unit = "s", times = 5
 )
 
 #> Unit: seconds
 #>         expr   min    lq  mean median    uq   max neval
-#>  farr_double 0.782 0.790 1.009  0.799 0.832 1.840     5
-#>   farr_float 0.765 0.779 0.929  0.930 1.043 1.127     5
-#>       native 0.964 1.174 1.222  1.213 1.370 1.390     5
-#>      dipsaus 0.185 0.190 0.202  0.199 0.203 0.233     5
+#>  farr_double 0.651 0.666 0.867  0.716 0.718 1.583     5
+#>   farr_float 0.628 0.637 0.737  0.642 0.652 1.124     5
+#>       native 1.011 1.029 1.128  1.078 1.207 1.316     5
+#>    ravetools 0.109 0.110 0.126  0.131 0.138 0.139     5
 ```
 
-The `dipsaus` package uses multiple threads to collapse arrays in-memory. It is `7~8x` as fast as base R. File array is `1~2x` as fast as base R. Both `dipsaus` and `filearray` have little memory over-heads.
+The `ravetools` package uses multiple threads to collapse arrays in-memory. It is `7~8x` as fast as base R. File array is `1.5~2x` as fast as base R. Both `ravetools` and `filearray` have little memory over-heads.