Skip to content

Commit

Permalink
Supporting #9
Browse files Browse the repository at this point in the history
  • Loading branch information
dipterix committed Jul 16, 2024
1 parent e82a751 commit d0fcebe
Show file tree
Hide file tree
Showing 5 changed files with 78 additions and 47 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: filearray
Type: Package
Title: File-Backed Array for Out-of-Memory Computation
Version: 0.1.6.9000
Version: 0.1.6.9001
Language: en-US
Encoding: UTF-8
License: LGPL-3
Expand Down
8 changes: 4 additions & 4 deletions R/methods-subsetAssign.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ fa_subsetAssign1 <- function(x, ..., value){
stop("SubsetAssign FileArray only allows x[] <- v or x[i,j,...] <- v (single index not allowed)")
}
}
if(length(value) != prod(dim)){
stop("SubsetAssign FileArray `value` length mismatch.")
if(!length(value) %in% c(1, prod(dim))){
stop("SubsetAssign FileArray `value` length mismatch: `value` length must be either 1 or the same length of the subset.")
}
target_dim <- dim
x$initialize_partition(x$.partition_info[, 1])
Expand Down Expand Up @@ -62,8 +62,8 @@ fa_subsetAssign1 <- function(x, ..., value){
}

target_dim <- sapply(locs, length)
if(prod(target_dim) != length(value)){
stop("SubsetAssign FileArray `value` length mismatch.")
if(!length(value) %in% c(1, prod(target_dim))){
stop("SubsetAssign FileArray `value` length mismatch: `value` length must be either 1 or the same length of the subset.")
}

# make sure partitions exist
Expand Down
61 changes: 39 additions & 22 deletions src/save.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,8 @@ void subset_assign_partition(
char* conn0, T* value, const R_xlen_t block_size,
int64_t* idx1ptr0, R_xlen_t idx1len,
int64_t idx1_start, int64_t idx2_start,
int64_t* idx2ptr0, R_xlen_t idx2len ) {
int64_t* idx2ptr0, R_xlen_t idx2len,
const int &value_inc = 1) {
// TODO: swap_endian
int elem_size = sizeof(T);

Expand Down Expand Up @@ -261,7 +262,7 @@ void subset_assign_partition(
// block_size: 861584, idx1len: 9001, idx1_start: 216024, idx2_start: 0, idx2_len: 1
// ### idx2ii:0, start_loc: 0, buf pos: 0, idx1_start: 216024

for(idx1ii = 0; idx1ii < idx1len; idx1ii++, idx1ptr++, valptr2++){
for(idx1ii = 0; idx1ii < idx1len; idx1ii++, idx1ptr++, valptr2+=value_inc){
// calculate pointer location in the file
// no check here, but tmp_loc should be >=0

Expand All @@ -287,6 +288,10 @@ struct FARRAssigner : public TinyParallel::Worker {
const List& sch;
T* value_ptr;

// value_ptr increment size, can either be 0 (length(value) == 1)
// or 1 (length(value) is the same as subset size)
int value_ptr_inc;

SEXP idx1;
SEXP idx1range;
List idx2s;
Expand All @@ -306,9 +311,16 @@ struct FARRAssigner : public TinyParallel::Worker {

FARRAssigner(
const std::string& filebase,
const List& sch, T* value_ptr
const List& sch,
const int64_t& value_len,
T* value_ptr
): filebase(filebase), sch(sch) {
this->value_ptr = value_ptr;
if( value_len - 1 == 0 ) {
this->value_ptr_inc = 0;
} else {
this->value_ptr_inc = 1;
}
this->idx1 = sch["idx1"];
this->idx1range = sch["idx1range"];
this->idx2s = sch["idx2s"];
Expand Down Expand Up @@ -433,7 +445,7 @@ struct FARRAssigner : public TinyParallel::Worker {

int64_t* idx2_ptr = INTEGER64(idx2);
R_xlen_t idx2_len = Rf_xlength(idx2);
T* value_ptr2 = value_ptr + (idx1len * skips);
T* value_ptr2 = value_ptr + (idx1len * skips) * this->value_ptr_inc;
int64_t* idx1ptr = idx1ptr0;

// Rcout << "block_size: " << block_size << ", idx1len: " << idx1len << ", idx1_start: " << idx1_start <<
Expand All @@ -443,7 +455,8 @@ struct FARRAssigner : public TinyParallel::Worker {
begin, value_ptr2,
block_size, idx1ptr, idx1len,
idx1_start, idx2_start,
idx2_ptr, idx2_len );
idx2_ptr, idx2_len,
this->value_ptr_inc );


// region.flush();
Expand Down Expand Up @@ -472,8 +485,11 @@ struct FARRAssigner : public TinyParallel::Worker {
template <typename T>
SEXP FARR_subset_assign_template(
const std::string& filebase,
const List& sch, T* value_ptr){
FARRAssigner<T> assigner(filebase, sch, value_ptr);
const List& sch,
const R_xlen_t &value_len,
T* value_ptr
){
FARRAssigner<T> assigner(filebase, sch, value_len, value_ptr);
assigner.save();
return( R_NilValue );
}
Expand Down Expand Up @@ -520,43 +536,44 @@ SEXP FARR_subset_assign2(

// coerce vector to desired SEXP type
SEXP value_ = PROTECT(convert_as(value, sexp_type));
SEXPTYPE valtype = TYPEOF(value_);

// allocate buffers
int ncores = getThreads();
std::vector<SEXP> buff_pool(ncores);
for(int i = 0; i < ncores; i++){
buff_pool[i] = PROTECT(Rf_allocVector(
valtype, idx1_end - idx1_start + 1));
}
// SEXPTYPE valtype = TYPEOF(value_);

// // allocate buffers
// int ncores = getThreads();
// std::vector<SEXP> buff_pool(ncores);
// for(int i = 0; i < ncores; i++){
// buff_pool[i] = PROTECT(Rf_allocVector(
// valtype, idx1_end - idx1_start + 1));
// }


switch(sexp_type) {
case INTSXP: {
FARR_subset_assign_template(fbase, sch, INTEGER(value_));
FARR_subset_assign_template(fbase, sch, XLENGTH(value_), INTEGER(value_));
break;
}
case CPLXSXP:
case REALSXP: {
FARR_subset_assign_template(fbase, sch, REAL(value_));
FARR_subset_assign_template(fbase, sch, XLENGTH(value_), REAL(value_));
break;
}
case FLTSXP: {
FARR_subset_assign_template(fbase, sch, FLOAT(value_));
FARR_subset_assign_template(fbase, sch, XLENGTH(value_), FLOAT(value_));
break;
}
case LGLSXP:
case RAWSXP: {
FARR_subset_assign_template(fbase, sch, RAW(value_));
FARR_subset_assign_template(fbase, sch, XLENGTH(value_), RAW(value_));
break;
}
default: {
UNPROTECT( 1 + ncores );
UNPROTECT( 1 );
stop("SEXP type not supported.");
return(R_NilValue); // wall
}
}

UNPROTECT( 1 + ncores );
UNPROTECT( 1 );
return(R_NilValue);

}
Expand Down
20 changes: 17 additions & 3 deletions tests/testthat/test-cpp.R
Original file line number Diff line number Diff line change
Expand Up @@ -106,9 +106,13 @@ test_that("C++: IO - subset/assign", {
lapply(dim, function(d) {
sample(c(1:d), size = d, replace = FALSE)
})
expect_error({
expect_no_error({
x[locs[[1]], locs[[2]], locs[[3]]] <- 1
})
expect_equal(
unique(as.vector(x[locs[[1]], locs[[2]], locs[[3]]])),
1
)

expect_true({
x[locs[[1]], locs[[2]], locs[[3]]] <- 1:prod(sapply(locs, length))
Expand Down Expand Up @@ -202,10 +206,15 @@ test_that("C++: IO - subset/assign - complex", {
lapply(dim, function(d) {
sample(c(1:d), size = d, replace = FALSE)
})
expect_error({
expect_no_error({
x[locs[[1]], locs[[2]], locs[[3]]] <- 1
})

expect_equal(
unique(as.vector(x[locs[[1]], locs[[2]], locs[[3]]])),
1 + 0i
)

expect_true({
x[locs[[1]], locs[[2]], locs[[3]]] <- tmp[1:prod(sapply(locs, length))]
TRUE
Expand Down Expand Up @@ -303,9 +312,14 @@ test_that("C++: IO - subset/assign - float", {
lapply(dim, function(d) {
sample(c(1:d), size = d, replace = FALSE)
})
expect_error({
expect_no_error({
x[locs[[1]], locs[[2]], locs[[3]]] <- 1
})
expect_equal(
unique(as.vector(x[locs[[1]], locs[[2]], locs[[3]]])),
1
)


expect_true({
x[locs[[1]], locs[[2]], locs[[3]]] <- 1:prod(sapply(locs, length))
Expand Down
34 changes: 17 additions & 17 deletions vignettes/performance.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ The simulation was performed on `MacBook Air 2020 (M1 Chip, ARM, 8GB RAM)`, with

We mainly test the performance of `double` and `float` data type. The dimensions for both arrays are `100x100x100x100`. Both arrays are around `800MB` in native R. This is because R does not have float precision. However, while `double` array occupies `800MB` space on the hard disk, `float` array only uses half size (`400MB`).

```{r setup}
```{r setup, eval = FALSE}
library(filearray)
options(digits = 3)
Expand Down Expand Up @@ -69,7 +69,7 @@ Writing along margins refer to something like `x[,,,i] <- v` (along the last mar

1. partition margin

```{r}
```{r, eval = FALSE}
microbenchmark::microbenchmark(
double = {
for(i in 1:100){
Expand All @@ -91,7 +91,7 @@ microbenchmark::microbenchmark(

2. Write along fast margin

```{r}
```{r, eval = FALSE}
microbenchmark::microbenchmark(
double = {
for(i in 1:100){
Expand All @@ -113,7 +113,7 @@ microbenchmark::microbenchmark(

3. Writing along slow margin

```{r}
```{r, eval = FALSE}
microbenchmark::microbenchmark(
double = {
for(i in 1:100){
Expand All @@ -140,7 +140,7 @@ Instead of writing one slice at a time along each margin, we write `100x100x100x

1. Write blocks of data along the partition margin

```{r}
```{r, eval = FALSE}
microbenchmark::microbenchmark(
double = {
for(i in 1:10){
Expand All @@ -164,7 +164,7 @@ microbenchmark::microbenchmark(

2. Write blocks of data along the fast margin

```{r}
```{r, eval = FALSE}
microbenchmark::microbenchmark(
double = {
for(i in 1:10){
Expand All @@ -188,7 +188,7 @@ microbenchmark::microbenchmark(

3. Write blocks of data along slow margin

```{r}
```{r, eval = FALSE}
microbenchmark::microbenchmark(
double = {
for(i in 1:10){
Expand All @@ -214,7 +214,7 @@ microbenchmark::microbenchmark(

### 1. Read the whole array

```{r}
```{r, eval = FALSE}
microbenchmark::microbenchmark(
double = { x_dbl[] },
float = { x_flt[] },
Expand All @@ -229,7 +229,7 @@ microbenchmark::microbenchmark(

### 2. Read along margins

```{r}
```{r, eval = FALSE}
microbenchmark::microbenchmark(
farr_double_partition_margin = { x_dbl[,,,1] },
farr_double_fast_margin = { x_dbl[,,1,] },
Expand Down Expand Up @@ -260,7 +260,7 @@ The file array indexing is close to handling in-memory arrays in R!

### 3. Random access

```{r}
```{r, eval = FALSE}
# access 50 x 50 x 50 x 50 sub-array, with random indices
idx1 <- sample(1:100, 50)
idx2 <- sample(1:100, 50)
Expand All @@ -287,26 +287,26 @@ Random access could be faster than base R (also much less memory!)

Collapse calculates the margin sum/mean. Collapse function in `filearray` uses single thread. This is because the bottle-neck often comes from hard-disk accessing speed. However, it is still faster than native R, and is more memory-efficient.

```{r}
```{r, eval = FALSE}
keep <- c(2, 4)
output <- filearray_create(tempfile(), dim(x_dbl)[keep])
output$initialize_partition()
microbenchmark::microbenchmark(
farr_double = { x_dbl$collapse(keep = keep, method = "sum") },
farr_float = { x_flt$collapse(keep = keep, method = "sum") },
native = { apply(y, keep, sum) },
dipsaus = { dipsaus::collapse(y, keep, average = FALSE) },
ravetools = { ravetools::collapse(y, keep, average = FALSE) },
unit = "s", times = 5
)
#> Unit: seconds
#> expr min lq mean median uq max neval
#> farr_double 0.782 0.790 1.009 0.799 0.832 1.840 5
#> farr_float 0.765 0.779 0.929 0.930 1.043 1.127 5
#> native 0.964 1.174 1.222 1.213 1.370 1.390 5
#> dipsaus 0.185 0.190 0.202 0.199 0.203 0.233 5
#> farr_double 0.651 0.666 0.867 0.716 0.718 1.583 5
#> farr_float 0.628 0.637 0.737 0.642 0.652 1.124 5
#> native 1.011 1.029 1.128 1.078 1.207 1.316 5
#> ravetools 0.109 0.110 0.126 0.131 0.138 0.139 5
```

The `dipsaus` package uses multiple threads to collapse arrays in-memory. It is `7~8x` as fast as base R. File array is `1~2x` as fast as base R. Both `dipsaus` and `filearray` have little memory over-heads.
The `ravetools` package uses multiple threads to collapse arrays in-memory. It is `7~8x` as fast as base R. File array is `1.5~2x` as fast as base R. Both `ravetools` and `filearray` have little memory over-heads.


0 comments on commit d0fcebe

Please sign in to comment.