From c09b0385d288848d72cb35770bc4fd52cf9c18cb Mon Sep 17 00:00:00 2001 From: Alex Peters <80283343+alexpeters1208@users.noreply.github.com> Date: Tue, 15 Aug 2023 15:31:46 -0500 Subject: [PATCH] R Client 2.0 - Table Ops (#4240) * Delete outdated testing files * Continue api expansion * More api stuff * Continue API expansion * Document new methods, start unit testing file. Roxygen not called yet * Rearrange methods, change gitignore, generate roxygen * Continue API expansion, do not expose all package functions to user * Get rid of experimenting that should not have been committed * Clean up R wrapping, implement agg_by, configure NAMESPACE to only expose relevant functions via roxygen tags * Refactor type conversion logic, R-level type checking, actually add NAMESPACE this time * Fix helper functions, add Aggregation to NAMESPACE * Fix error handling, move unit tests to install automatically * Update README to simplify install command in light of automatic test installation * Add Sorter class to make tableHandle.sort() work, change functions to not conflict with S3, more error handling, more unit tests * More unit tests * Overhaul table ops API for Dplyr feel * More unit tests * Simplify sorting API, more unit tests * Reorder table operations logically, connect ClientWrapper to newly exposed methods in C++ * Fix new table methods, add tests, add conversion functions to be used in queries * Update generics * Reorder agg_by ops, start agg_by testing * enable empty _by and agg ops * agg_by testing * Finish agg_by testing * Add table ops documentation, not hooked into R yet * Fix unit tests after renames * More unit tests * Even more unit tests * Naming consistency in table operations * Group_by and ungroup tests * empty_table and time_table tests * Join unit tests * Add close method * Apply Tidyverse style guide * Fix rbind implementation * Update DESCRIPTION w/ version * Fix problems from merge conflict * Refactor helper functions * Correction to helper function refactor * Add new clientOptionsWrapper in S4. Does not support piping * More rewrite * Working unit tests, refactor complete * Apply code review comments * Implement merge * Code review change * Cpp formatting * Test base pipe * Code review suggestions * Refactor import_table to as_dh_table with generics * Applying more code review * Applying even more code review * Update cpp to conform to new styleguide, need to uncomment pct when cpp fixed * Uncomment pct, all tests pass * Refactor auth API to conform to Python * Support multiple int/string options and extra headers * Code review to helper functions * Rename connect to dhConnect * Update NAMESPACE, add warnings, apply styleguide * Enable size 0 empty_table * Assert options inputs * Starting client_wrapper docs * Support ncol() and dim() * Get rid of mistakenly committed docs, add list type verification in merge * Code review suggestions * Verified that exact_join is an analog of left_join * Expand sort API, constrain merge args, add username and password to dhConnect * Apply code review * Kill merge, make merge_tables * Update version * Change to abs_sort * Support merge_tables(NULL) * Change push_to_table to import_table --- R/rdeephaven/.gitignore | 4 +- R/rdeephaven/DESCRIPTION | 4 +- R/rdeephaven/NAMESPACE | 82 +- R/rdeephaven/R/aggregate_wrapper.R | 89 ++ R/rdeephaven/R/client_options_wrapper.R | 117 --- R/rdeephaven/R/client_wrapper.R | 371 +++++--- R/rdeephaven/R/exports.R | 4 + R/rdeephaven/R/helper_functions.R | 98 +- R/rdeephaven/R/table_handle_wrapper.R | 247 ++--- R/rdeephaven/R/table_ops.R | 546 +++++++++++ R/rdeephaven/README.md | 2 +- R/rdeephaven/{ => inst}/tests/testthat.R | 0 .../inst/tests/testthat/test_agg_by.R | 797 ++++++++++++++++ .../tests/testthat/test_aggregate_wrapper.R | 185 ++++ .../inst/tests/testthat/test_client_wrapper.R | 309 +++++++ .../testthat/test_table_handle_wrapper.R | 233 +++++ .../inst/tests/testthat/test_table_ops.R | 863 ++++++++++++++++++ R/rdeephaven/man/Client.Rd | 133 --- R/rdeephaven/man/ClientOptions.Rd | 208 ----- R/rdeephaven/man/TableHandle.Rd | 167 ---- R/rdeephaven/src/client.cpp | 375 +++++++- .../testthat/test_client_options_wrapper.R | 163 ---- .../tests/testthat/test_client_wrapper.R | 215 ----- .../testthat/test_table_handle_wrapper.R | 187 ---- 24 files changed, 3951 insertions(+), 1448 deletions(-) create mode 100644 R/rdeephaven/R/aggregate_wrapper.R delete mode 100644 R/rdeephaven/R/client_options_wrapper.R create mode 100644 R/rdeephaven/R/table_ops.R rename R/rdeephaven/{ => inst}/tests/testthat.R (100%) create mode 100644 R/rdeephaven/inst/tests/testthat/test_agg_by.R create mode 100644 R/rdeephaven/inst/tests/testthat/test_aggregate_wrapper.R create mode 100644 R/rdeephaven/inst/tests/testthat/test_client_wrapper.R create mode 100644 R/rdeephaven/inst/tests/testthat/test_table_handle_wrapper.R create mode 100644 R/rdeephaven/inst/tests/testthat/test_table_ops.R delete mode 100644 R/rdeephaven/man/Client.Rd delete mode 100644 R/rdeephaven/man/ClientOptions.Rd delete mode 100644 R/rdeephaven/man/TableHandle.Rd delete mode 100644 R/rdeephaven/tests/testthat/test_client_options_wrapper.R delete mode 100644 R/rdeephaven/tests/testthat/test_client_wrapper.R delete mode 100644 R/rdeephaven/tests/testthat/test_table_handle_wrapper.R diff --git a/R/rdeephaven/.gitignore b/R/rdeephaven/.gitignore index 9f651736392..ad4856e82ab 100644 --- a/R/rdeephaven/.gitignore +++ b/R/rdeephaven/.gitignore @@ -2,4 +2,6 @@ lib/cpp-client/ lib/cpp-examples/ lib/cpp-dependencies/local lib/cpp-dependencies/src -lib/cpp-dependencies/env.sh \ No newline at end of file +lib/cpp-dependencies/env.sh +*.o +*.so diff --git a/R/rdeephaven/DESCRIPTION b/R/rdeephaven/DESCRIPTION index cadc81db8a5..c255c760014 100644 --- a/R/rdeephaven/DESCRIPTION +++ b/R/rdeephaven/DESCRIPTION @@ -1,7 +1,7 @@ Package: rdeephaven Type: Package Title: R Client for Deephaven Core -Version: 1.0 +Version: 0.27.1 Date: 2023-05-12 Author: Deephaven Data Labs Maintainer: Alex Peters @@ -13,7 +13,7 @@ Description: The `rdeephaven` package provides an R API for communicating with t and bind it to a server-side variable so you can access it from any Deephaven client. Finally, you can run Python or Groovy scripts on the Deephaven server, so long as your server is equipped with that capability. License: Apache License (== 2.0) -Depends: R (> 4.1.2), Rcpp (>= 1.0.10), arrow (>= 12.0.0), R6 (>= 2.5.0), dplyr (>= 1.1.0) +Depends: R (> 4.1.2), Rcpp (>= 1.0.10), arrow (>= 12.0.0), dplyr (>= 1.1.0) Imports: Rcpp (>= 1.0.10), arrow (>= 12.0.0), R6 (>= 2.5.0), dplyr (>= 1.1.0) LinkingTo: Rcpp Suggests: testthat (>= 3.0.0) diff --git a/R/rdeephaven/NAMESPACE b/R/rdeephaven/NAMESPACE index 0a33cb56cd5..40eb78d6810 100644 --- a/R/rdeephaven/NAMESPACE +++ b/R/rdeephaven/NAMESPACE @@ -1,3 +1,79 @@ -useDynLib(rdeephaven, .registration=TRUE) -importFrom(Rcpp, evalCpp) -exportPattern("^[[:alpha:]]+") \ No newline at end of file +# Generated by roxygen2: do not edit by hand + +export(agg_abs_sum) +export(agg_avg) +export(agg_count) +export(agg_first) +export(agg_last) +export(agg_max) +export(agg_median) +export(agg_min) +export(agg_percentile) +export(agg_std) +export(agg_sum) +export(agg_var) +export(agg_w_avg) +export(merge_tables) +export(sort) +exportClasses(Aggregation) +exportClasses(Client) +exportClasses(TableHandle) +exportMethods(abs_sum_by) +exportMethods(agg_by) +exportMethods(as.data.frame) +exportMethods(as_arrow_table) +exportMethods(as_data_frame) +exportMethods(as_record_batch_reader) +exportMethods(as_tibble) +exportMethods(avg_by) +exportMethods(bind_to_variable) +exportMethods(close) +exportMethods(count_by) +exportMethods(cross_join) +exportMethods(dhConnect) +exportMethods(dim) +exportMethods(drop_columns) +exportMethods(empty_table) +exportMethods(exact_join) +exportMethods(first_by) +exportMethods(group_by) +exportMethods(head) +exportMethods(head_by) +exportMethods(is_static) +exportMethods(last_by) +exportMethods(max_by) +exportMethods(median_by) +exportMethods(min_by) +exportMethods(natural_join) +exportMethods(ncol) +exportMethods(nrow) +exportMethods(open_table) +exportMethods(percentile_by) +exportMethods(import_table) +exportMethods(run_script) +exportMethods(select) +exportMethods(sort) +exportMethods(std_by) +exportMethods(sum_by) +exportMethods(tail) +exportMethods(tail_by) +exportMethods(time_table) +exportMethods(ungroup) +exportMethods(update) +exportMethods(update_view) +exportMethods(var_by) +exportMethods(view) +exportMethods(w_avg_by) +exportMethods(where) +import(Rcpp) +importFrom(Rcpp,evalCpp) +importFrom(arrow,RecordBatchReader) +importFrom(arrow,RecordBatchStreamReader) +importFrom(arrow,Table) +importFrom(arrow,arrow_table) +importFrom(arrow,as_arrow_table) +importFrom(arrow,as_record_batch_reader) +importFrom(dplyr,as_data_frame) +importFrom(dplyr,as_tibble) +importFrom(magrittr,"%>%") +useDynLib(rdeephaven, .registration = TRUE) diff --git a/R/rdeephaven/R/aggregate_wrapper.R b/R/rdeephaven/R/aggregate_wrapper.R new file mode 100644 index 00000000000..600e278c49d --- /dev/null +++ b/R/rdeephaven/R/aggregate_wrapper.R @@ -0,0 +1,89 @@ +#' @export +setClass( + "Aggregation", + representation( + .internal_rcpp_object = "Rcpp_INTERNAL_Aggregate" + ) +) + +### All of the functions below return an instance of the above class + +#' @export +agg_first <- function(cols = character()) { + verify_string("cols", cols, FALSE) + return(new("Aggregation", .internal_rcpp_object = INTERNAL_agg_first(cols))) +} + +#' @export +agg_last <- function(cols = character()) { + verify_string("cols", cols, FALSE) + return(new("Aggregation", .internal_rcpp_object = INTERNAL_agg_last(cols))) +} + +#' @export +agg_min <- function(cols = character()) { + verify_string("cols", cols, FALSE) + return(new("Aggregation", .internal_rcpp_object = INTERNAL_agg_min(cols))) +} + +#' @export +agg_max <- function(cols = character()) { + verify_string("cols", cols, FALSE) + return(new("Aggregation", .internal_rcpp_object = INTERNAL_agg_max(cols))) +} + +#' @export +agg_sum <- function(cols = character()) { + verify_string("cols", cols, FALSE) + return(new("Aggregation", .internal_rcpp_object = INTERNAL_agg_sum(cols))) +} + +#' @export +agg_abs_sum <- function(cols = character()) { + verify_string("cols", cols, FALSE) + return(new("Aggregation", .internal_rcpp_object = INTERNAL_agg_abs_sum(cols))) +} + +#' @export +agg_avg <- function(cols = character()) { + verify_string("cols", cols, FALSE) + return(new("Aggregation", .internal_rcpp_object = INTERNAL_agg_avg(cols))) +} + +#' @export +agg_w_avg <- function(wcol, cols = character()) { + verify_string("wcol", wcol, TRUE) + verify_string("cols", cols, FALSE) + return(new("Aggregation", .internal_rcpp_object = INTERNAL_agg_w_avg(wcol, cols))) +} + +#' @export +agg_median <- function(cols = character()) { + verify_string("cols", cols, FALSE) + return(new("Aggregation", .internal_rcpp_object = INTERNAL_agg_median(cols))) +} + +#' @export +agg_var <- function(cols = character()) { + verify_string("cols", cols, FALSE) + return(new("Aggregation", .internal_rcpp_object = INTERNAL_agg_var(cols))) +} + +#' @export +agg_std <- function(cols = character()) { + verify_string("cols", cols, FALSE) + return(new("Aggregation", .internal_rcpp_object = INTERNAL_agg_std(cols))) +} + +#' @export +agg_percentile <- function(percentile, cols = character()) { + verify_in_unit_interval("percentile", percentile, TRUE) + verify_string("cols", cols, FALSE) + return(new("Aggregation", .internal_rcpp_object = INTERNAL_agg_percentile(percentile, cols))) +} + +#' @export +agg_count <- function(col) { + verify_string("col", col, TRUE) + return(new("Aggregation", .internal_rcpp_object = INTERNAL_agg_count(col))) +} diff --git a/R/rdeephaven/R/client_options_wrapper.R b/R/rdeephaven/R/client_options_wrapper.R deleted file mode 100644 index 8093f10aeec..00000000000 --- a/R/rdeephaven/R/client_options_wrapper.R +++ /dev/null @@ -1,117 +0,0 @@ -#' @title Deephaven ClientOptions -#' @description Client options provide a simple interface to the Deephaven server's authentication protocols. -#' This makes it easy to connect to a Deephaven server with any flavor of authentication, and shields the API from -#' any future changes to the underlying implementation. -#' -#' Currently, three different kinds of authentication that a Deephaven server might be using are suported: -#' -#' - "default": Default (or anonymous) authentication does not require any username or password. If -#' running the Deephaven server locally, this is probably the kind of authentication needed. -#' -#' - "basic": Basic authentication requires a standard username and password pair. -#' -#' - "custom": Custom authentication requires general key-value pairs. -#' -#' In addition to setting the authentication parameters when connecting to a client, a console can be -#' started in one of our supported server languages. Python and Groovy are currently supported, and the -#' user must ensure that the server being connected to was started with support for the desired console language. -#' -#' @usage NULL -#' @format NULL -#' @docType class -#' -#' @examples -#' -#' # connect to a Deephaven server with a Python console running on "localhost:10000" using anonymous 'default' authentication -#' client_options <- ClientOptions$new() -#' client <- Client$new(target = "localhost:10000", client_options = client_options) -#' -#' # connect to a secure Deephaven server with a Groovy console using username/password authentication -#' client_options <- ClientOptions$new() -#' client_options$set_basic_authentication(username = "user", password = "p@ssw0rd123") -#' client_options$set_session_type("groovy") -#' client <- Client$new(target = "url/to/secure/server", client_options = client_options) -ClientOptions <- R6Class("ClientOptions", - public = list( - - #' @description - #' Create a ClientOptions instance. This will default to using default (anonymous) authentication and a Python console. - initialize = function() { - self$internal_client_options <- new(INTERNAL_ClientOptions) - }, - - #' @description - #' Use default (anonymous) authentication. If running a Deephaven server locally, this is likely the kind of authentication needed. - set_default_authentication = function() { - self$internal_client_options$set_default_authentication() - }, - - #' @description - #' Use basic (username/password based) authentication. - #' @param username Username of the account to use for authentication, supplied as a string. - #' @param password Password of the account, supplied as a string. - set_basic_authentication = function(username, password) { - .verify_string("username", username) - .verify_string("password", password) - self$internal_client_options$set_basic_authentication(username, password) - }, - - #' @description - #' Use custom (general key/value based) authentication. - #' @param auth_key Key to use for authentication, supplied as a string. - #' @param auth_value Value to use for authentication, supplied as a string. - set_custom_authentication = function(auth_key, auth_value) { - .verify_string("auth_key", auth_key) - .verify_string("auth_value", auth_value) - self$internal_client_options$set_custom_authentication(auth_key, auth_value) - }, - - #' @description - #' Set the session type of the console (e.g., "python", "groovy", etc.). The session type must be supported on the server. - #' @param session_type Desired language of the console. "python", "groovy", etc. - set_session_type = function(session_type) { - .verify_string("session_type", session_type) - self$internal_client_options$set_session_type(session_type) - }, - - #' @description - #' Use the TLS protocol in authentication and subsequent communication. - #' @param root_certs Optional PEM-encoded certificate root for server connections. Defaults to system defaults. - use_tls = function(root_certs = "") { - .verify_string("root_certs", root_certs) - self$internal_client_options$set_use_tls(TRUE) - self$internal_client_options$set_tls_root_certs(root_certs) - }, - - - #' Adds an int-valued option for the configuration of the underlying gRPC channels. - #' @param opt The option key. - #' @param val The option value. - add_int_option = function(opt, val) { - .verify_string("opt", opt) - .verify_int("val", val) - self$internal_client_options$add_int_option(opt, val) - }, - - #' @description - #' Adds a string-valued option for the configuration of the underlying gRPC channels. - #' @param opt The option key. - #' @param val The option valiue. - add_string_option = function(opt, val) { - .verify_string("opt", opt) - .verify_string("val", val) - self$internal_client_options$add_string_option(opt, val) - }, - - #' @description - #' Adds an extra header with a constant name and value to be sent with every outgoing server request. - #' @param header_name The header name - #' @param header_value The header value - add_extra_header = function(header_name, header_value) { - .verify_string("header_name", header_name) - .verify_string("header_value", header_value) - self$internal_client_options$add_extra_header(header_name, header_value) - }, - internal_client_options = NULL - ) -) diff --git a/R/rdeephaven/R/client_wrapper.R b/R/rdeephaven/R/client_wrapper.R index f6633ccb1ad..6b4be652c07 100644 --- a/R/rdeephaven/R/client_wrapper.R +++ b/R/rdeephaven/R/client_wrapper.R @@ -1,110 +1,275 @@ -#' @title The Deephaven Client -#' @description The Deephaven Client class is responsible for establishing and maintaining -#' a connection to a running Deephaven server and facilitating basic server requests. -#' -#' @usage NULL -#' @format NULL -#' @docType class -#' -#' @examples -#' -#' # connect to the Deephaven server running on "localhost:10000" using anonymous 'default' authentication -#' client_options <- ClientOptions$new() -#' client <- Client$new(target = "localhost:10000", client_options = client_options) -#' -#' # open a table that already exists on the server -#' new_table_handle1 <- client$open_table("table_on_the_server") -#' -#' # create a new dataframe, import onto the server, and retrieve a reference -#' new_data_frame <- data.frame(matrix(rnorm(10 * 1000), nrow = 10)) -#' new_table_handle2 <- client$import_table(new_data_frame) -#' -#' # run a python script on the server (default client options specify a Python console) -#' client$run_script("print([i for i in range(10)])") -Client <- R6Class("Client", - public = list( - - #' @description - #' Connect to a running Deephaven server. - #' @param target The address of the Deephaven server. - #' @param client_options ClientOptions instance with the parameters needed to connect to the server. - #' See ?ClientOptions for more information. - initialize = function(target, client_options) { - .verify_string("target", target) - if (class(client_options)[[1]] != "ClientOptions") { - stop(paste("'client_options' should be a Deephaven ClientOptions object. Got an object of type", class(client_options)[[1]], "instead.")) +#' @export +setClass( + "Client", + representation( + .internal_rcpp_object = "Rcpp_INTERNAL_Client" + ) +) + +setGeneric( + "dhConnect", + function(target, ...) { + return(standardGeneric("dhConnect")) + }, + signature = c("target") +) + +#' @export +setMethod( + "dhConnect", + signature = c(target = "character"), + function(target, + auth_type = "anonymous", + username = "", + password = "", + auth_token = "", + session_type = "python", + use_tls = FALSE, + tls_root_certs = "", + int_options = list(), + string_options = list(), + extra_headers = list()) { + options <- new(INTERNAL_ClientOptions) + + verify_string("target", target, TRUE) + verify_string("auth_type", auth_type, TRUE) + if (auth_type == "") { + stop("'auth_type' should be a non-empty string.") + } + verify_bool("use_tls", use_tls, TRUE) + + # check if auth_type needs to be changed and set credentials accordingly + if (auth_type == "anonymous") { + options$set_default_authentication() + } else if (auth_type == "basic") { + if (((username != "") && (password != "")) && (auth_token == "")) { + verify_string("username", username, TRUE) + verify_string("password", password, TRUE) + user_pass_token = paste(username, ":", password, sep = "") + options$set_basic_authentication(user_pass_token) + } else if (((username == "") && (password == "")) && (auth_token != "")) { + verify_string("auth_token", auth_token, TRUE) + options$set_basic_authentication(auth_token) + } else if (((username != "") || (password != "")) && (auth_token != "")) { + stop("Basic authentication was requested, but 'auth_token' was provided, as well as least one of 'username' and 'password'. Please provide either 'username' and 'password', or 'auth_token'.") + } else { + stop("Basic authentication was requested, but 'auth_token' was not provided, and at most one of 'username' or 'password' was provided. Please provide either 'username' and 'password', or 'auth_token'.") } - private$internal_client <- new(INTERNAL_Client, - target = target, - client_options = client_options$internal_client_options - ) - }, - - #' @description - #' Opens a table named 'name' from the server if it exists. - #' @param name Name of the table to open from the server, passed as a string. - #' @return TableHandle reference to the requested table. - open_table = function(name) { - .verify_string("name", name) - if (!private$check_for_table(name)) { - stop(paste0("The table '", name, "' you're trying to pull does not exist on the server.")) + } else { + if (auth_token != "") { + verify_string("auth_token", auth_token, TRUE) + options$set_custom_authentication(auth_type, auth_token) + } else { + stop("Custom authentication was requested, but no 'auth_token' was provided.") } - return(TableHandle$new(private$internal_client$open_table(name))) - }, - - #' @description - #' Imports a new table to the Deephaven server. Note that this new table is not automatically bound to - #' a variable name on the server. See `?TableHandle` for more information. - #' @param table_object An R Data Frame, a dplyr Tibble, an Arrow Table, or an Arrow RecordBatchReader - #' containing the data to import to the server. - #' @return TableHandle reference to the new table. - import_table = function(table_object) { - table_object_class <- class(table_object) - if (table_object_class[[1]] == "data.frame") { - return(TableHandle$new(private$df_to_dh_table(table_object))) + } + + # set session type if a valid session type is provided + if ((session_type == "python") || (session_type == "groovy")) { + options$set_session_type(session_type) + } else { + stop(paste0("'session_type' must be 'python' or 'groovy', but got ", session_type, ".")) + } + + # if tls is requested, set it and set the root_certs if provided + if (use_tls == TRUE) { + options$set_use_tls() + if (tls_root_certs != "") { + verify_string("tls_root_certs", tls_root_certs, TRUE) + options$set_tls_root_certs(tls_root_certs) } - if (table_object_class[[1]] == "tbl_df") { - return(TableHandle$new(private$tibble_to_dh_table(table_object))) - } else if (table_object_class[[1]] == "RecordBatchReader") { - return(TableHandle$new(private$rbr_to_dh_table(table_object))) - } else if ((length(table_object_class) == 4 && - table_object_class[[1]] == "Table" && - table_object_class[[3]] == "ArrowObject")) { - return(TableHandle$new(private$arrow_to_dh_table(table_object))) - } else { - stop(paste0("'table_object' must be either an R Data Frame, a dplyr Tibble, an Arrow Table, or an Arrow Record Batch Reader. Got an object of class ", table_object_class[[1]], " instead.")) + } + + # set extra header options if they are provided + if (length(int_options) != 0) { + verify_list("int_options", int_options, TRUE) + for (key in names(int_options)) { + verify_string("key", key, TRUE) + verify_int("value", int_options[[key]], TRUE) + options$add_int_options(key, int_options[[key]]) + } + } + + if (length(string_options) != 0) { + verify_list("string_options", string_options, TRUE) + for (key in names(string_options)) { + verify_string("key", key, TRUE) + verify_string("value", string_options[[key]], TRUE) + options$add_string_options(key, string_options[[key]]) } - }, - - #' @description - #' Runs a script on the server. The script must be in the language that the server console was started with. - #' @param script Code to be executed on the server, passed as a string. - run_script = function(script) { - .verify_string("script", script) - private$internal_client$run_script(script) } - ), - private = list( - internal_client = NULL, - check_for_table = function(name) { - return(private$internal_client$check_for_table(name)) - }, - rbr_to_dh_table = function(rbr) { - ptr <- private$internal_client$new_arrow_array_stream_ptr() - rbr$export_to_c(ptr) - return(private$internal_client$new_table_from_arrow_array_stream_ptr(ptr)) - }, - arrow_to_dh_table = function(arrow_tbl) { - rbr <- as_record_batch_reader(arrow_tbl) - return(private$rbr_to_dh_table(rbr)) - }, - tibble_to_dh_table = function(tibbl) { - arrow_tbl <- arrow_table(tibbl) - return(private$arrow_to_dh_table(arrow_tbl)) - }, - df_to_dh_table = function(data_frame) { - arrow_tbl <- arrow_table(data_frame) - return(private$arrow_to_dh_table(arrow_tbl)) + + if (length(extra_headers) != 0) { + verify_list("extra_headers", extra_headers, TRUE) + for (key in names(extra_headers)) { + verify_string("key", key, TRUE) + verify_string("value", extra_headers[[key]], TRUE) + options$add_extra_headers(key, extra_headers[[key]]) + } } - ) + + if ((auth_token != "") && (auth_type == "anonymous")) { + warning("'auth_token' was set but it will not be used, as 'auth_type' is 'anonymous'.") + } + + if (((username != "") || (password != "")) && auth_type != "basic") { + warning("At least one of 'username' and 'password' were set but they will not be used, as 'auth_type' is not 'basic'.") + } + + if ((tls_root_certs != "") && (use_tls == FALSE)) { + warning("'tls_root_certs' was set but it will not be used, as 'use_tls' is FALSE.") + } + + internal_client <- new(INTERNAL_Client, + target = target, + client_options = options + ) + return(new("Client", .internal_rcpp_object = internal_client)) + } +) + +### HELPER FUNCTIONS ### + +check_for_table <- function(client, name) { + return(client@.internal_rcpp_object$check_for_table(name)) +} + +### USER-FACING METHODS ### + +setGeneric( + "open_table", + function(client_instance, name) { + return(standardGeneric("open_table")) + }, + signature = c("client_instance", "name") +) + +#' @export +setMethod( + "open_table", + signature = c(client_instance = "Client", name = "character"), + function(client_instance, name) { + verify_string("name", name, TRUE) + if (!check_for_table(client_instance, name)) { + stop(paste0("The table '", name, "' does not exist on the server.")) + } + return(new("TableHandle", .internal_rcpp_object = client_instance@.internal_rcpp_object$open_table(name))) + } +) + +setGeneric( + "empty_table", + function(client_instance, size) { + return(standardGeneric("empty_table")) + }, + signature = c("client_instance", "size") +) + +#' @export +setMethod( + "empty_table", + signature = c(client_instance = "Client", size = "numeric"), + function(client_instance, size) { + verify_nonnegative_int("size", size, TRUE) + return(new("TableHandle", .internal_rcpp_object = client_instance@.internal_rcpp_object$empty_table(size))) + } +) + +setGeneric( + "time_table", + function(client_instance, period, ...) { + return(standardGeneric("time_table")) + }, + signature = c("client_instance", "period") +) + +#' @export +setMethod( + "time_table", + signature = c(client_instance = "Client", period = "numeric"), + function(client_instance, period, start_time = 0) { + verify_any_int("period", period, TRUE) + verify_any_int("start_time", start_time, TRUE) + return(new("TableHandle", .internal_rcpp_object = client_instance@.internal_rcpp_object$time_table(start_time, period))) + } +) + +setGeneric( + "import_table", + function(client_instance, table_object) { + return(standardGeneric("import_table")) + }, + signature = c("client_instance", "table_object") +) + +#' @export +setMethod( + "import_table", + signature = c(client_instance = "Client", table_object = "RecordBatchReader"), + function(client_instance, table_object) { + ptr <- client_instance@.internal_rcpp_object$new_arrow_array_stream_ptr() + table_object$export_to_c(ptr) + return( + new("TableHandle", + .internal_rcpp_object = client_instance@.internal_rcpp_object$new_table_from_arrow_array_stream_ptr(ptr) + ) + ) + } +) + +#' @export +setMethod( + "import_table", + signature = c(client_instance = "Client", table_object = "Table"), + function(client_instance, table_object) { + return(import_table(client_instance, as_record_batch_reader(table_object))) + } +) + +#' @export +setMethod( + "import_table", + signature = c(client_instance = "Client", table_object = "tbl_df"), + function(client_instance, table_object) { + return(import_table(client_instance, arrow_table(table_object))) + } +) + +#' @export +setMethod( + "import_table", + signature = c(client_instance = "Client", table_object = "data.frame"), + function(client_instance, table_object) { + return(import_table(client_instance, arrow_table(table_object))) + } +) + +setGeneric( + "run_script", + function(client_instance, script) { + return(standardGeneric("run_script")) + }, + signature = c("client_instance", "script") +) + +#' @export +setMethod( + "run_script", + signature = c(client_instance = "Client", script = "character"), + function(client_instance, script) { + verify_string("script", script, TRUE) + client_instance@.internal_rcpp_object$run_script(script) + return(NULL) + } +) + +# do not need to set generic for 'close', as it already exists as a generic +#' @export +setMethod( + "close", + signature = c(con = "Client"), + function(con) { + con@.internal_rcpp_object$close() + return(NULL) + } ) diff --git a/R/rdeephaven/R/exports.R b/R/rdeephaven/R/exports.R index 0c34ac82bc4..e786e262108 100644 --- a/R/rdeephaven/R/exports.R +++ b/R/rdeephaven/R/exports.R @@ -1,5 +1,9 @@ #' @import Rcpp #' @useDynLib rdeephaven, .registration = TRUE #' @importFrom Rcpp evalCpp +#' +#' @importFrom magrittr %>% +#' @importFrom arrow arrow_table as_arrow_table as_record_batch_reader Table RecordBatchReader RecordBatchStreamReader +#' @importFrom dplyr as_tibble as_data_frame loadModule("DeephavenInternalModule", TRUE) diff --git a/R/rdeephaven/R/helper_functions.R b/R/rdeephaven/R/helper_functions.R index ffc75302ae5..9c2391dbeab 100644 --- a/R/rdeephaven/R/helper_functions.R +++ b/R/rdeephaven/R/helper_functions.R @@ -1,18 +1,90 @@ -.verify_string <- function(arg_name, string_candidate) { - if (class(string_candidate)[[1]] != "character") { - stop(paste0("'", arg_name, "' must be passed as a single string. Got an object of class ", class(string_candidate)[[1]], " instead.")) - } else if (length(string_candidate) != 1) { - stop(paste0("'", arg_name, "' must be passed as a single string. Got a character vector of length ", length(string_candidate), " instead.")) +first_class <- function(arg) { + return(class(arg)[[1]]) +} + +verify_type <- function(arg_name, candidate, required_type, message_type_name, is_scalar) { + if (!is_scalar && (first_class(candidate) == "list")) { + if (any(lapply(candidate, first_class) != required_type)) { + stop(paste0("'", arg_name, "' must be a ", message_type_name, ", or a vector of ", message_type_name, "s. Got a vector with at least one element that is not a ", message_type_name, ".")) + } + } else if (is_scalar && (first_class(candidate) == "list")) { + if (first_class(candidate[[1]]) != required_type) { + stop(paste0("'", arg_name, "' must be a single ", message_type_name, ". Got an object of class ", first_class(candidate), ".")) + } else if (length(candidate) != 1) { + stop(paste0("'", arg_name, "' must be a single ", message_type_name, ". Got a vector of length ", length(candidate), ".")) + } + } else if (first_class(candidate) != required_type) { + if (!is_scalar) { + stop(paste0("'", arg_name, "' must be a ", message_type_name, " or a vector of ", message_type_name, "s. Got an object of class ", first_class(candidate), ".")) + } else { + stop(paste0("'", arg_name, "' must be a single ", message_type_name, ". Got an object of class ", first_class(candidate), ".")) + } + } else if (is_scalar && (length(candidate) != 1)) { + stop(paste0("'", arg_name, "' must be a single ", message_type_name, ". Got a vector of length ", length(candidate), ".")) + } +} + +# does not attempt to verify that candidate is numeric +verify_in_range <- function(arg_name, candidate, message, lb, ub, lb_open, ub_open) { + if (((!is.null(lb)) && ((any(candidate <= lb) && (lb_open)) || (any(candidate < lb) && (!lb_open)))) || + ((!is.null(ub)) && ((any(candidate >= ub) && (ub_open)) || (any(candidate > ub) && (!ub_open))))) { + if (length(candidate) == 1) { + stop(paste0("'", arg_name, "' must be ", message, ". Got '", arg_name, "' = ", candidate, ".")) + } else { + stop(paste0("Every element of '", arg_name, "' must be ", message, ". Got at least one element that is not ", message, ".")) + } } } -.verify_int <- function(arg_name, int_candidate) { - if (class(int_candidate)[[1]] != "numeric") { - stop(paste0("'", arg_name, "' must be an integer. Got an object of class ", class(int_candidate)[[1]], " instead.")) - } else if (all.equal(int_candidate, as.integer(int_candidate)) != TRUE) { - # must use != TRUE as the result of all.equal() is not strictly boolean - stop(paste0("'", arg_name, "' must be an integer. Got a non-integer numeric type instead.")) - } else if (length(int_candidate) != 1) { - stop(paste0("'", arg_name, "' must be an integer. Got a numeric vector of length ", length(int_candidate), " instead.")) +# does not attempt to verify that candidate is numeric +verify_int <- function(arg_name, candidate) { + if (candidate != as.integer(candidate)) { + if (length(candidate == 1)) { + stop(paste0("'", arg_name, "' must be an integer. Got '", arg_name, "' = ", candidate, ".")) + } else { + stop(paste0("Every element of '", arg_name, "' must be an integer. Got at least one non-integer element.")) + } } } + +verify_string <- function(arg_name, candidate, is_scalar) { + verify_type(arg_name, candidate, "character", "string", is_scalar) +} + +verify_bool <- function(arg_name, candidate, is_scalar) { + verify_type(arg_name, candidate, "logical", "boolean", is_scalar) +} + +verify_numeric <- function(arg_name, candidate, is_scalar) { + verify_type(arg_name, candidate, "numeric", "numeric", is_scalar) +} + +verify_list <- function(arg_name, candidate, is_scalar) { + verify_type(arg_name, candidate, "list", "list", is_scalar) +} + +verify_in_unit_interval <- function(arg_name, candidate, is_scalar) { + verify_numeric(arg_name, candidate, is_scalar) + verify_in_range(arg_name, candidate, message = "between 0 and 1 inclusive", lb = 0, ub = 1, lb_open = FALSE, ub_open = FALSE) +} + +verify_any_int <- function(arg_name, candidate, is_scalar) { + verify_numeric(arg_name, candidate, is_scalar) + verify_int(arg_name, candidate) +} + +verify_nonnegative_int <- function(arg_name, candidate, is_scalar) { + verify_numeric(arg_name, candidate, is_scalar) + verify_int(arg_name, candidate) + verify_in_range(arg_name, candidate, message = "a nonnegative integer", lb = 0, ub = NULL, lb_open = FALSE, ub_open = TRUE) +} + +verify_positive_int <- function(arg_name, candidate, is_scalar) { + verify_numeric(arg_name, candidate, is_scalar) + verify_int(arg_name, candidate) + verify_in_range(arg_name, candidate, message = "a positive integer", lb = 0, ub = NULL, lb_open = TRUE, ub_open = TRUE) +} + +strip_s4_wrapping <- function(s4_object) { + return(s4_object@.internal_rcpp_object) +} diff --git a/R/rdeephaven/R/table_handle_wrapper.R b/R/rdeephaven/R/table_handle_wrapper.R index ad958454403..349b8aba7cb 100644 --- a/R/rdeephaven/R/table_handle_wrapper.R +++ b/R/rdeephaven/R/table_handle_wrapper.R @@ -1,104 +1,147 @@ -#' @title Deephaven TableHandles -#' @description Deephaven TableHandles are references to tables living on a Deephaven server. They provide an -#' interface for interacting with tables on the server. -#' -#' @usage NULL -#' @format NULL -#' @docType class -#' -#' @examples -#' -#' # connect to the Deephaven server running on "localhost:10000" using anonymous 'default' authentication -#' client_options <- ClientOptions$new() -#' client <- Client$new(target = "localhost:10000", client_options = client_options) -#' -#' # open a table that already exists on the server -#' new_table_handle1 <- client$open_table("table_on_the_server") -#' -#' # convert the Deephaven table to an R data frame -#' new_data_frame <- new_table_handle1$to_data_frame() -#' -#' # modify new data frame in R -#' new_data_frame$New_Int_Col <- c(1, 2, 3, 4, 5) -#' new_data_frame$New_String_Col <- c("I", "am", "a", "string", "column") -#' -#' # push new data frame to the server and name it "new_table" -#' new_table_handle2 <- client$import_table(new_data_frame) -#' new_table_handle2$bind_to_variable("new_table") -TableHandle <- R6Class("TableHandle", - public = list( - initialize = function(table_handle) { - if (class(table_handle)[[1]] != "Rcpp_INTERNAL_TableHandle") { - stop("'table_handle' should be an internal Deephaven TableHandle. If you're seeing this, - you are trying to call the constructor of TableHandle directly, which is not advised.") - } - private$internal_table_handle <- table_handle - private$is_static_field <- private$internal_table_handle$is_static() - }, - - #' @description - #' Whether the table referenced by this TableHandle is static or not. - #' @return TRUE if the table is static, or FALSE if the table is ticking. - is_static = function() { - return(private$is_static_field) - }, - - #' @description - #' Number of rows in the table referenced by this TableHandle, currently only implemented for static tables. - #' @return The number of rows in the table. - nrow = function() { - if (!private$is_static_field) { - stop("The number of rows is not yet supported for dynamic tables.") - } - return(private$internal_table_handle$num_rows()) - }, - - #' @description - #' Binds the table referenced by this TableHandle to a variable on the server, - #' enabling it to be accessed by that name from any Deephaven API. - #' @param name Name for this table on the server. - bind_to_variable = function(name) { - .verify_string("name", name) - private$internal_table_handle$bind_to_variable(name) - }, - - #' @description - #' Imports the table referenced by this TableHandle into an Arrow RecordBatchStreamReader. - #' @return A RecordBatchStreamReader containing the data from the table referenced by this TableHandle. - to_arrow_record_batch_stream_reader = function() { - ptr <- private$internal_table_handle$get_arrow_array_stream_ptr() - rbsr <- RecordBatchStreamReader$import_from_c(ptr) - return(rbsr) - }, - - #' @description - #' Imports the table referenced by this TableHandle into an Arrow Table. - #' @return A Table containing the data from the table referenced by this TableHandle. - to_arrow_table = function() { - rbsr <- self$to_arrow_record_batch_stream_reader() - arrow_tbl <- rbsr$read_table() - return(arrow_tbl) - }, - - #' @description - #' Imports the table referenced by this TableHandle into a dplyr Tibble. - #' @return A Tibble containing the data from the table referenced by this TableHandle. - to_tibble = function() { - rbsr <- self$to_arrow_record_batch_stream_reader() - arrow_tbl <- rbsr$read_table() - return(as_tibble(arrow_tbl)) - }, - - #' @description - #' Imports the table referenced by this TableHandle into an R Data Frame. - #' @return A Data Frame containing the data from the table referenced by this TableHandle. - to_data_frame = function() { - arrow_tbl <- self$to_arrow_table() - return(as.data.frame(as.data.frame(arrow_tbl))) # TODO: for some reason as.data.frame on arrow table returns a tibble, not a data frame - } - ), - private = list( - internal_table_handle = NULL, - is_static_field = NULL +#' @export +setClass( + "TableHandle", + representation( + .internal_rcpp_object = "Rcpp_INTERNAL_TableHandle" ) ) + +### TABLEHANDLE PROPERTIES + +setGeneric( + "is_static", + function(table_handle_instance) { + return(standardGeneric("is_static")) + }, + signature = c("table_handle_instance") +) + +#' @export +setMethod( + "is_static", + signature = c(table_handle_instance = "TableHandle"), + function(table_handle_instance) { + return(table_handle_instance@.internal_rcpp_object$is_static()) + } +) + +#' @export +setMethod( + "head", + signature = c(x = "TableHandle"), + function(x, n, ...) { + verify_positive_int("n", n, TRUE) + return(new("TableHandle", .internal_rcpp_object = x@.internal_rcpp_object$head(n))) + } +) + +#' @export +setMethod( + "tail", + signature = c(x = "TableHandle"), + function(x, n, ...) { + verify_positive_int("n", n, TRUE) + return(new("TableHandle", .internal_rcpp_object = x@.internal_rcpp_object$tail(n))) + } +) + +#' @export +setMethod( + "nrow", + signature = c(x = "TableHandle"), + function(x) { + return(x@.internal_rcpp_object$num_rows()) + } +) + +#' @export +setMethod( + "ncol", + signature = c(x = "TableHandle"), + function(x) { + return(x@.internal_rcpp_object$num_cols()) + } +) + +#' @export +setMethod( + "dim", + signature = c(x = "TableHandle"), + function(x) { + return(c(x@.internal_rcpp_object$num_rows(), x@.internal_rcpp_object$num_cols())) + } +) + +setGeneric( + "bind_to_variable", + function(table_handle_instance, name) { + return(standardGeneric("bind_to_variable")) + }, + signature = c("table_handle_instance", "name") +) + +#' @export +setMethod( + "bind_to_variable", + signature = c(table_handle_instance = "TableHandle", name = "character"), + function(table_handle_instance, name) { + verify_string("name", name, TRUE) + table_handle_instance@.internal_rcpp_object$bind_to_variable(name) + return(NULL) + } +) + + +### TABLEHANDLE CONVERSIONS ### + +#' @export +setMethod( + "as_record_batch_reader", + signature = c(x = "TableHandle"), + function(x, ...) { + ptr <- x@.internal_rcpp_object$get_arrow_array_stream_ptr() + rbsr <- RecordBatchStreamReader$import_from_c(ptr) + return(rbsr) + } +) + +#' @export +setMethod( + "as_arrow_table", + signature = c(x = "TableHandle"), + function(x, ...) { + rbsr <- as_record_batch_reader(x) + arrow_tbl <- rbsr$read_table() + return(arrow_tbl) + } +) + +#' @export +setMethod( + "as_tibble", + signature = c(x = "TableHandle"), + function(x, ...) { + rbsr <- as_record_batch_reader(x) + arrow_tbl <- rbsr$read_table() + return(as_tibble(arrow_tbl)) + } +) + +#' @export +setMethod( + "as.data.frame", + signature = c(x = "TableHandle"), + function(x, ...) { + arrow_tbl <- as_arrow_table(x) + return(as.data.frame(as.data.frame(arrow_tbl))) + } +) + +#' @export +setMethod( + "as_data_frame", + signature = c(x = "TableHandle"), + function(x, ...) { + return(as.data.frame(x)) + } +) diff --git a/R/rdeephaven/R/table_ops.R b/R/rdeephaven/R/table_ops.R new file mode 100644 index 00000000000..40bea013348 --- /dev/null +++ b/R/rdeephaven/R/table_ops.R @@ -0,0 +1,546 @@ +#' @export +merge_tables <- function(...) { + table_list <- unlist(c(...)) + if (length(table_list) == 0) { + return(NULL) + } + verify_type("table_list", table_list, "TableHandle", "Deephaven TableHandle", FALSE) + if (length(table_list) == 1) { + return(table_list[[1]]) + } + unwrapped_table_list <- lapply(table_list, strip_s4_wrapping) + return(new("TableHandle", .internal_rcpp_object = unwrapped_table_list[[1]]$merge(unwrapped_table_list[2:length(unwrapped_table_list)]))) +} + +setGeneric( + "select", + function(table_handle, by = character(), ...) { + return(standardGeneric("select")) + }, + signature = c("table_handle", "by") +) + +#' @export +setMethod( + "select", + signature = c(table_handle = "TableHandle"), + function(table_handle, by = character()) { + verify_string("by", by, FALSE) + return(new("TableHandle", .internal_rcpp_object = table_handle@.internal_rcpp_object$select(by))) + } +) + +setGeneric( + "view", + function(table_handle, by = character(), ...) { + return(standardGeneric("view")) + }, + signature = c("table_handle", "by") +) + +#' @export +setMethod( + "view", + signature = c(table_handle = "TableHandle"), + function(table_handle, by = character()) { + verify_string("by", by, FALSE) + return(new("TableHandle", .internal_rcpp_object = table_handle@.internal_rcpp_object$view(by))) + } +) + +setGeneric( + "update", + function(table_handle, by = character(), ...) { + standardGeneric("update") + }, + signature = c("table_handle", "by") +) + +#' @export +setMethod( + "update", + signature = c(table_handle = "TableHandle"), + function(table_handle, by = character()) { + verify_string("by", by, FALSE) + return(new("TableHandle", .internal_rcpp_object = table_handle@.internal_rcpp_object$update(by))) + } +) + +setGeneric( + "update_view", + function(table_handle, by = character(), ...) { + standardGeneric("update_view") + }, + signature = c("table_handle", "by") +) + +#' @export +setMethod( + "update_view", + signature = c(table_handle = "TableHandle"), + function(table_handle, by = character()) { + verify_string("by", by, FALSE) + return(new("TableHandle", .internal_rcpp_object = table_handle@.internal_rcpp_object$update_view(by))) + } +) + +setGeneric( + "drop_columns", + function(table_handle, by = character(), ...) { + standardGeneric("drop_columns") + }, + signature = c("table_handle", "by") +) + +#' @export +setMethod( + "drop_columns", + signature = c(table_handle = "TableHandle"), + function(table_handle, by = character()) { + verify_string("by", by, FALSE) + return(new("TableHandle", .internal_rcpp_object = table_handle@.internal_rcpp_object$drop_columns(by))) + } +) + +setGeneric( + "where", + function(table_handle, filter, ...) { + standardGeneric("where") + }, + signature = c("table_handle", "filter") +) + +#' @export +setMethod( + "where", + signature = c(table_handle = "TableHandle"), + function(table_handle, filter) { + verify_string("filter", filter, TRUE) + return(new("TableHandle", .internal_rcpp_object = table_handle@.internal_rcpp_object$where(filter))) + } +) + +setGeneric( + "group_by", + function(table_handle, by = character(), ...) { + standardGeneric("group_by") + }, + signature = c("table_handle", "by") +) + +#' @export +setMethod( + "group_by", + signature = c(table_handle = "TableHandle"), + function(table_handle, by = character()) { + verify_string("by", by, FALSE) + return(new("TableHandle", .internal_rcpp_object = table_handle@.internal_rcpp_object$group_by(by))) + } +) + +setGeneric( + "ungroup", + function(table_handle, by = character(), ...) { + standardGeneric("ungroup") + }, + signature = c("table_handle", "by") +) + +#' @export +setMethod( + "ungroup", + signature = c(table_handle = "TableHandle"), + function(table_handle, by = character()) { + verify_string("by", by, FALSE) + return(new("TableHandle", .internal_rcpp_object = table_handle@.internal_rcpp_object$ungroup(by))) + } +) + +setGeneric( + "agg_by", + function(table_handle, aggs, by = character(), ...) { + standardGeneric("agg_by") + }, + signature = c("table_handle", "aggs", "by") +) + +#' @export +setMethod( + "agg_by", + signature = c(table_handle = "TableHandle"), + function(table_handle, aggs, by = character()) { + verify_type("aggs", aggs, "Aggregation", "Deephaven Aggregation", FALSE) + verify_string("by", by, FALSE) + aggs <- c(aggs) + unwrapped_aggs <- lapply(aggs, strip_s4_wrapping) + return(new("TableHandle", .internal_rcpp_object = table_handle@.internal_rcpp_object$agg_by(unwrapped_aggs, by))) + } +) + +setGeneric( + "first_by", + function(table_handle, by = character(), ...) { + standardGeneric("first_by") + }, + signature = c("table_handle", "by") +) + +#' @export +setMethod( + "first_by", + signature = c(table_handle = "TableHandle"), + function(table_handle, by = character()) { + verify_string("by", by, FALSE) + return(new("TableHandle", .internal_rcpp_object = table_handle@.internal_rcpp_object$first_by(by))) + } +) + +setGeneric( + "last_by", + function(table_handle, by = character(), ...) { + standardGeneric("last_by") + }, + signature = c("table_handle", "by") +) + +#' @export +setMethod( + "last_by", + signature = c(table_handle = "TableHandle"), + function(table_handle, by = character()) { + verify_string("by", by, FALSE) + return(new("TableHandle", .internal_rcpp_object = table_handle@.internal_rcpp_object$last_by(by))) + } +) + +setGeneric( + "head_by", + function(table_handle, num_rows, by = character(), ...) { + standardGeneric("head_by") + }, + signature = c("table_handle", "num_rows", "by") +) + +#' @export +setMethod( + "head_by", + signature = c(table_handle = "TableHandle"), + function(table_handle, num_rows, by = character()) { + verify_positive_int("num_rows", num_rows, TRUE) + verify_string("by", by, FALSE) + return(new("TableHandle", .internal_rcpp_object = table_handle@.internal_rcpp_object$head_by(num_rows, by))) + } +) + +setGeneric( + "tail_by", + function(table_handle, num_rows, by = character(), ...) { + standardGeneric("tail_by") + }, + signature = c("table_handle", "num_rows", "by") +) + +#' @export +setMethod( + "tail_by", + signature = c(table_handle = "TableHandle"), + function(table_handle, num_rows, by = character()) { + verify_positive_int("num_rows", num_rows, TRUE) + verify_string("by", by, FALSE) + return(new("TableHandle", .internal_rcpp_object = table_handle@.internal_rcpp_object$tail_by(num_rows, by))) + } +) + +setGeneric( + "min_by", + function(table_handle, by = character(), ...) { + standardGeneric("min_by") + }, + signature = c("table_handle", "by") +) + +#' @export +setMethod( + "min_by", + signature = c(table_handle = "TableHandle"), + function(table_handle, by = character()) { + verify_string("by", by, FALSE) + return(new("TableHandle", .internal_rcpp_object = table_handle@.internal_rcpp_object$min_by(by))) + } +) + +setGeneric( + "max_by", + function(table_handle, by = character(), ...) { + standardGeneric("max_by") + }, + signature = c("table_handle", "by") +) + +#' @export +setMethod( + "max_by", + signature = c(table_handle = "TableHandle"), + function(table_handle, by = character()) { + verify_string("by", by, FALSE) + return(new("TableHandle", .internal_rcpp_object = table_handle@.internal_rcpp_object$max_by(by))) + } +) + +setGeneric( + "sum_by", + function(table_handle, by = character(), ...) { + standardGeneric("sum_by") + }, + signature = c("table_handle", "by") +) + +#' @export +setMethod( + "sum_by", + signature = c(table_handle = "TableHandle"), + function(table_handle, by = character()) { + verify_string("by", by, FALSE) + return(new("TableHandle", .internal_rcpp_object = table_handle@.internal_rcpp_object$sum_by(by))) + } +) + +setGeneric( + "abs_sum_by", + function(table_handle, by = character(), ...) { + standardGeneric("abs_sum_by") + }, + signature = c("table_handle", "by") +) + +#' @export +setMethod( + "abs_sum_by", + signature = c(table_handle = "TableHandle"), + function(table_handle, by = character()) { + verify_string("by", by, FALSE) + return(new("TableHandle", .internal_rcpp_object = table_handle@.internal_rcpp_object$abs_sum_by(by))) + } +) + +setGeneric( + "avg_by", + function(table_handle, by = character(), ...) { + standardGeneric("avg_by") + }, + signature = c("table_handle", "by") +) + +#' @export +setMethod( + "avg_by", + signature = c(table_handle = "TableHandle"), + function(table_handle, by = character()) { + verify_string("by", by, FALSE) + return(new("TableHandle", .internal_rcpp_object = table_handle@.internal_rcpp_object$avg_by(by))) + } +) + +setGeneric( + "w_avg_by", + function(table_handle, wcol, by = character(), ...) { + standardGeneric("w_avg_by") + }, + signature = c("table_handle", "wcol", "by") +) + +#' @export +setMethod( + "w_avg_by", + signature = c(table_handle = "TableHandle"), + function(table_handle, wcol, by = character()) { + verify_string("wcol", wcol, TRUE) + verify_string("by", by, FALSE) + return(new("TableHandle", .internal_rcpp_object = table_handle@.internal_rcpp_object$w_avg_by(wcol, by))) + } +) + +setGeneric( + "median_by", + function(table_handle, by = character(), ...) { + standardGeneric("median_by") + }, + signature = c("table_handle", "by") +) + +#' @export +setMethod( + "median_by", + signature = c(table_handle = "TableHandle"), + function(table_handle, by = character()) { + verify_string("by", by, FALSE) + return(new("TableHandle", .internal_rcpp_object = table_handle@.internal_rcpp_object$median_by(by))) + } +) + +setGeneric( + "var_by", + function(table_handle, by = character(), ...) { + standardGeneric("var_by") + }, + signature = c("table_handle", "by") +) + +#' @export +setMethod( + "var_by", + signature = c(table_handle = "TableHandle"), + function(table_handle, by = character()) { + verify_string("by", by, FALSE) + return(new("TableHandle", .internal_rcpp_object = table_handle@.internal_rcpp_object$var_by(by))) + } +) + +setGeneric( + "std_by", + function(table_handle, by = character(), ...) { + standardGeneric("std_by") + }, + signature = c("table_handle", "by") +) + +#' @export +setMethod( + "std_by", + signature = c(table_handle = "TableHandle"), + function(table_handle, by = character()) { + verify_string("by", by, FALSE) + return(new("TableHandle", .internal_rcpp_object = table_handle@.internal_rcpp_object$std_by(by))) + } +) + +setGeneric( + "percentile_by", + function(table_handle, percentile, by = character(), ...) { + standardGeneric("percentile_by") + }, + signature = c("table_handle", "percentile", "by") +) + +#' @export +setMethod( + "percentile_by", + signature = c(table_handle = "TableHandle"), + function(table_handle, percentile, by = character()) { + verify_in_unit_interval("percentile", percentile, TRUE) + verify_string("by", by, FALSE) + return(new("TableHandle", .internal_rcpp_object = table_handle@.internal_rcpp_object$percentile_by(percentile, by))) + } +) + +setGeneric( + "count_by", + function(table_handle, col, by = character(), ...) { + standardGeneric("count_by") + }, + signature = c("table_handle", "col", "by") +) + +#' @export +setMethod( + "count_by", + signature = c(table_handle = "TableHandle"), + function(table_handle, col = "n", by = character()) { + verify_string("col", col, TRUE) + verify_string("by", by, FALSE) + return(new("TableHandle", .internal_rcpp_object = table_handle@.internal_rcpp_object$count_by(col, by))) + } +) + +setGeneric( + "cross_join", + function(table_handle, right_side, columns_to_match, columns_to_add, ...) { + standardGeneric("cross_join") + }, + signature = c("table_handle", "right_side", "columns_to_match", "columns_to_add") +) + +#' @export +setMethod( + "cross_join", + signature = c(table_handle = "TableHandle", right_side = "TableHandle"), + function(table_handle, right_side, columns_to_match = character(), columns_to_add = character()) { + verify_string("columns_to_match", columns_to_match, FALSE) + verify_string("columns_to_add", columns_to_add, FALSE) + return(new("TableHandle", .internal_rcpp_object = table_handle@.internal_rcpp_object$cross_join( + right_side@.internal_rcpp_object, + columns_to_match, columns_to_add + ))) + } +) + +setGeneric( + "natural_join", + function(table_handle, right_side, columns_to_match, columns_to_add, ...) { + standardGeneric("natural_join") + }, + signature = c("table_handle", "right_side", "columns_to_match", "columns_to_add") +) + +#' @export +setMethod( + "natural_join", + signature = c(table_handle = "TableHandle", right_side = "TableHandle"), + function(table_handle, right_side, columns_to_match = character(), columns_to_add = character()) { + verify_string("columns_to_match", columns_to_match, FALSE) + verify_string("columns_to_add", columns_to_add, FALSE) + return(new("TableHandle", .internal_rcpp_object = table_handle@.internal_rcpp_object$natural_join( + right_side@.internal_rcpp_object, + columns_to_match, columns_to_add + ))) + } +) + +setGeneric( + "exact_join", + function(table_handle, right_side, columns_to_match, columns_to_add, ...) { + standardGeneric("exact_join") + }, + signature = c("table_handle", "right_side", "columns_to_match", "columns_to_add") +) + +#' @export +setMethod( + "exact_join", + signature = c(table_handle = "TableHandle", right_side = "TableHandle"), + function(table_handle, right_side, columns_to_match = character(), columns_to_add = character()) { + verify_string("columns_to_match", columns_to_match, FALSE) + verify_string("columns_to_add", columns_to_add, FALSE) + return(new("TableHandle", .internal_rcpp_object = table_handle@.internal_rcpp_object$exact_join( + right_side@.internal_rcpp_object, + columns_to_match, columns_to_add + ))) + } +) + +#' @export +setGeneric( + "sort", + function(table_handle, by = character(), descending = FALSE, abs_sort = FALSE, ...) { + standardGeneric("sort") + }, + signature = c("table_handle", "by", "descending") +) + +#' @export +setMethod( + "sort", + signature = c(table_handle = "TableHandle"), + function(table_handle, by, descending = FALSE, abs_sort = FALSE) { + verify_string("by", by, FALSE) + verify_bool("descending", descending, FALSE) + verify_bool("abs_sort", abs_sort, FALSE) + if ((length(descending) > 1) && length(descending) != length(by)) { + stop(paste0("'descending' must be the same length as 'by' if more than one entry is supplied. Got 'by' with length ", length(by), " and 'descending' with length ", length(descending), ".")) + } + if ((length(abs_sort) > 1) && length(abs_sort) != length(by)) { + stop(paste0("'abs_sort' must be the same length as 'by' if more than one entry is supplied. Got 'by' with length ", length(by), " and 'abs_sort' with length ", length(abs_sort), ".")) + } + return(new("TableHandle", .internal_rcpp_object = table_handle@.internal_rcpp_object$sort(by, descending, abs_sort))) + } +) diff --git a/R/rdeephaven/README.md b/R/rdeephaven/README.md index b84d010f09f..e755758184d 100644 --- a/R/rdeephaven/README.md +++ b/R/rdeephaven/README.md @@ -86,7 +86,7 @@ Currently, the R client is only supported on Ubuntu 20.04 or 22.04 and must be b ``` then install the deephaven client itself: ```r - install.packages("/path/to/rdeephaven", INSTALL_opts="--install-tests", repos=NULL, type="source") + install.packages("/path/to/rdeephaven", repos=NULL, type="source") ``` This last command can also be executed from RStudio without the need for explicitly starting an R console. diff --git a/R/rdeephaven/tests/testthat.R b/R/rdeephaven/inst/tests/testthat.R similarity index 100% rename from R/rdeephaven/tests/testthat.R rename to R/rdeephaven/inst/tests/testthat.R diff --git a/R/rdeephaven/inst/tests/testthat/test_agg_by.R b/R/rdeephaven/inst/tests/testthat/test_agg_by.R new file mode 100644 index 00000000000..2480fffd74b --- /dev/null +++ b/R/rdeephaven/inst/tests/testthat/test_agg_by.R @@ -0,0 +1,797 @@ +library(testthat) +library(dplyr) +library(rdeephaven) + +setup <- function() { + df1 <- data.frame( + string_col = c("I", "am", "a", "string", "column"), + int_col = c(0, 1, 2, 3, 4), + dbl_col = c(1.65, 3.1234, 100000.5, 543.234567, 0.00) + ) + + df2 <- data.frame( + col1 = rep(3.14, 100), + col2 = rep("hello!", 100), + col3 = rnorm(100) + ) + + df3 <- data.frame(matrix(rnorm(10 * 1000), nrow = 10)) + + df4 <- data.frame( + time_col = seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date() + 30), by = "1 sec")[250000], + bool_col = sample(c(TRUE, FALSE), 250000, TRUE), + int_col = sample(0:10000, 250000, TRUE) + ) + + df5 <- data.frame( + X = c("A", "B", "A", "C", "B", "A", "B", "B", "C"), + Y = c("M", "N", "O", "N", "P", "M", "O", "P", "M"), + Number1 = c(100, -44, 49, 11, -66, 50, 29, 18, -70), + Number2 = c(-55, 76, 20, 130, 230, -50, 73, 137, 214) + ) + + df6 <- data.frame( + X = c("B", "C", "B", "A", "A", "C", "B", "C", "B", "A"), + Y = c("N", "N", "M", "P", "O", "P", "O", "N", "O", "O"), + Number1 = c(55, 72, 86, -45, 1, 0, 345, -65, 99, -5), + Number2 = c(76, 4, -6, 34, 12, -76, 45, -5, 34, 6) + ) + + # set up client + client <- dhConnect(target = "localhost:10000") + + # move dataframes to server and get TableHandles for testing + th1 <- import_table(client, df1) + th2 <- import_table(client, df2) + th3 <- import_table(client, df3) + th4 <- import_table(client, df4) + th5 <- import_table(client, df5) + th6 <- import_table(client, df6) + + return(list( + "client" = client, + "df1" = df1, "df2" = df2, "df3" = df3, "df4" = df4, "df5" = df5, "df6" = df6, + "th1" = th1, "th2" = th2, "th3" = th3, "th4" = th4, "th5" = th5, "th6" = th6 + )) +} + +test_that("agg_first behaves as expected", { + data <- setup() + + new_tb1 <- data$df1 %>% + dplyr::group_by(string_col) %>% + summarise(int_col = first(int_col)) + new_th1 <- data$th1 %>% + agg_by(agg_first("int_col"), "string_col") %>% + sort("string_col") + expect_equal(as.data.frame(new_th1), as.data.frame(new_tb1)) + + new_tb2 <- data$df2 %>% + dplyr::group_by(col1, col2) %>% + summarise(col3 = first(col3)) + new_th2 <- data$th2 %>% + agg_by(agg_first("col3"), c("col1", "col2")) %>% + sort(c("col1", "col2")) + expect_equal(as.data.frame(new_th2), as.data.frame(new_tb2)) + + new_tb3 <- data$df3 %>% + dplyr::group_by(X1, X2, X3, X4) %>% + summarise(X5 = first(X5), X6 = first(X6), X7 = first(X7), X8 = first(X8)) + new_th3 <- data$th3 %>% + agg_by(agg_first(c("X5", "X6", "X7", "X8")), c("X1", "X2", "X3", "X4")) %>% + sort(c("X1", "X2", "X3", "X4")) + expect_equal(as.data.frame(new_th3), as.data.frame(new_tb3)) + + new_tb4 <- data$df4 %>% + dplyr::group_by(bool_col) %>% + summarise(time_col = first(time_col), int_col = first(int_col)) + new_th4 <- data$th4 %>% + agg_by(c(agg_first("time_col"), agg_first("int_col")), "bool_col") %>% + sort("bool_col") + expect_equal(as.data.frame(new_th4), as.data.frame(new_tb4)) + + new_tb5 <- data$df5 %>% + dplyr::group_by(X) %>% + summarise(Number1 = first(Number1), Number2 = first(Number2)) + new_th5 <- data$th5 %>% + agg_by(agg_first(c("Number1", "Number2")), "X") %>% + sort("X") + expect_equal(as.data.frame(new_th5), as.data.frame(new_tb5)) + + new_tb6 <- data$df6 %>% + dplyr::group_by(X, Y) %>% + summarise(Number1 = first(Number1), Number2 = first(Number2)) + new_th6 <- data$th6 %>% + agg_by(agg_first(c("Number1", "Number2")), c("X", "Y")) %>% + sort(c("X", "Y")) + expect_equal(as.data.frame(new_th6), as.data.frame(new_tb6)) + + close(data$client) +}) + +test_that("agg_first with built-in pipe behaves as expected", { + data <- setup() + + new_tb1 <- data$df1 |> + dplyr::group_by(string_col) |> + summarise(int_col = first(int_col)) + new_th1 <- data$th1 |> + agg_by(agg_first("int_col"), "string_col") |> + sort("string_col") + expect_equal(as.data.frame(new_th1), as.data.frame(new_tb1)) + + new_tb2 <- data$df2 |> + dplyr::group_by(col1, col2) |> + summarise(col3 = first(col3)) + new_th2 <- data$th2 |> + agg_by(agg_first("col3"), c("col1", "col2")) |> + sort(c("col1", "col2")) + expect_equal(as.data.frame(new_th2), as.data.frame(new_tb2)) + + new_tb3 <- data$df3 |> + dplyr::group_by(X1, X2, X3, X4) |> + summarise(X5 = first(X5), X6 = first(X6), X7 = first(X7), X8 = first(X8)) + new_th3 <- data$th3 |> + agg_by(agg_first(c("X5", "X6", "X7", "X8")), c("X1", "X2", "X3", "X4")) |> + sort(c("X1", "X2", "X3", "X4")) + expect_equal(as.data.frame(new_th3), as.data.frame(new_tb3)) + + new_tb4 <- data$df4 |> + dplyr::group_by(bool_col) |> + summarise(time_col = first(time_col), int_col = first(int_col)) + new_th4 <- data$th4 |> + agg_by(c(agg_first("time_col"), agg_first("int_col")), "bool_col") |> + sort("bool_col") + expect_equal(as.data.frame(new_th4), as.data.frame(new_tb4)) + + new_tb5 <- data$df5 |> + dplyr::group_by(X) |> + summarise(Number1 = first(Number1), Number2 = first(Number2)) + new_th5 <- data$th5 |> + agg_by(agg_first(c("Number1", "Number2")), "X") |> + sort("X") + expect_equal(as.data.frame(new_th5), as.data.frame(new_tb5)) + + new_tb6 <- data$df6 |> + dplyr::group_by(X, Y) |> + summarise(Number1 = first(Number1), Number2 = first(Number2)) + new_th6 <- data$th6 |> + agg_by(agg_first(c("Number1", "Number2")), c("X", "Y")) |> + sort(c("X", "Y")) + expect_equal(as.data.frame(new_th6), as.data.frame(new_tb6)) + + close(data$client) +}) + +test_that("agg_last behaves as expected", { + data <- setup() + + new_tb1 <- data$df1 %>% + dplyr::group_by(string_col) %>% + summarise(int_col = last(int_col)) + new_th1 <- data$th1 %>% + agg_by(agg_last("int_col"), "string_col") %>% + sort("string_col") + expect_equal(as.data.frame(new_th1), as.data.frame(new_tb1)) + + new_tb2 <- data$df2 %>% + dplyr::group_by(col1, col2) %>% + summarise(col3 = last(col3)) + new_th2 <- data$th2 %>% + agg_by(agg_last("col3"), c("col1", "col2")) %>% + sort(c("col1", "col2")) + expect_equal(as.data.frame(new_th2), as.data.frame(new_tb2)) + + new_tb3 <- data$df3 %>% + dplyr::group_by(X1, X2, X3, X4) %>% + summarise(X5 = last(X5), X6 = last(X6), X7 = last(X7), X8 = last(X8)) + new_th3 <- data$th3 %>% + agg_by(agg_last(c("X5", "X6", "X7", "X8")), c("X1", "X2", "X3", "X4")) %>% + sort(c("X1", "X2", "X3", "X4")) + expect_equal(as.data.frame(new_th3), as.data.frame(new_tb3)) + + new_tb4 <- data$df4 %>% + dplyr::group_by(bool_col) %>% + summarise(time_col = last(time_col), int_col = last(int_col)) + new_th4 <- data$th4 %>% + agg_by(c(agg_last("time_col"), agg_last("int_col")), "bool_col") %>% + sort("bool_col") + expect_equal(as.data.frame(new_th4), as.data.frame(new_tb4)) + + new_tb5 <- data$df5 %>% + dplyr::group_by(X) %>% + summarise(Number1 = last(Number1), Number2 = last(Number2)) + new_th5 <- data$th5 %>% + agg_by(agg_last(c("Number1", "Number2")), "X") %>% + sort("X") + expect_equal(as.data.frame(new_th5), as.data.frame(new_tb5)) + + new_tb6 <- data$df6 %>% + dplyr::group_by(X, Y) %>% + summarise(Number1 = last(Number1), Number2 = last(Number2)) + new_th6 <- data$th6 %>% + agg_by(agg_last(c("Number1", "Number2")), c("X", "Y")) %>% + sort(c("X", "Y")) + expect_equal(as.data.frame(new_th6), as.data.frame(new_tb6)) + + close(data$client) +}) + +test_that("agg_min behaves as expected", { + data <- setup() + + new_tb1 <- data$df1 %>% + dplyr::group_by(string_col) %>% + summarise(int_col = min(int_col)) + new_th1 <- data$th1 %>% + agg_by(agg_min("int_col"), "string_col") %>% + sort("string_col") + expect_equal(as.data.frame(new_th1), as.data.frame(new_tb1)) + + new_tb2 <- data$df2 %>% + dplyr::group_by(col1, col2) %>% + summarise(col3 = min(col3)) + new_th2 <- data$th2 %>% + agg_by(agg_min("col3"), c("col1", "col2")) %>% + sort(c("col1", "col2")) + expect_equal(as.data.frame(new_th2), as.data.frame(new_tb2)) + + new_tb3 <- data$df3 %>% + dplyr::group_by(X1, X2, X3, X4) %>% + summarise(X5 = min(X5), X6 = min(X6), X7 = min(X7), X8 = min(X8)) + new_th3 <- data$th3 %>% + agg_by(agg_min(c("X5", "X6", "X7", "X8")), c("X1", "X2", "X3", "X4")) %>% + sort(c("X1", "X2", "X3", "X4")) + expect_equal(as.data.frame(new_th3), as.data.frame(new_tb3)) + + new_tb4 <- data$df4 %>% + dplyr::group_by(bool_col) %>% + summarise(time_col = min(time_col), int_col = min(int_col)) + new_th4 <- data$th4 %>% + agg_by(c(agg_min("time_col"), agg_min("int_col")), "bool_col") %>% + sort("bool_col") + expect_equal(as.data.frame(new_th4), as.data.frame(new_tb4)) + + new_tb5 <- data$df5 %>% + dplyr::group_by(X) %>% + summarise(Number1 = min(Number1), Number2 = min(Number2)) + new_th5 <- data$th5 %>% + agg_by(agg_min(c("Number1", "Number2")), "X") %>% + sort("X") + expect_equal(as.data.frame(new_th5), as.data.frame(new_tb5)) + + new_tb6 <- data$df6 %>% + dplyr::group_by(X, Y) %>% + summarise(Number1 = min(Number1), Number2 = min(Number2)) + new_th6 <- data$th6 %>% + agg_by(agg_min(c("Number1", "Number2")), c("X", "Y")) %>% + sort(c("X", "Y")) + expect_equal(as.data.frame(new_th6), as.data.frame(new_tb6)) + + close(data$client) +}) + +test_that("agg_max behaves as expected", { + data <- setup() + + new_tb1 <- data$df1 %>% + dplyr::group_by(string_col) %>% + summarise(int_col = max(int_col)) + new_th1 <- data$th1 %>% + agg_by(agg_max("int_col"), "string_col") %>% + sort("string_col") + expect_equal(as.data.frame(new_th1), as.data.frame(new_tb1)) + + new_tb2 <- data$df2 %>% + dplyr::group_by(col1, col2) %>% + summarise(col3 = max(col3)) + new_th2 <- data$th2 %>% + agg_by(agg_max("col3"), c("col1", "col2")) %>% + sort(c("col1", "col2")) + expect_equal(as.data.frame(new_th2), as.data.frame(new_tb2)) + + new_tb3 <- data$df3 %>% + dplyr::group_by(X1, X2, X3, X4) %>% + summarise(X5 = max(X5), X6 = max(X6), X7 = max(X7), X8 = max(X8)) + new_th3 <- data$th3 %>% + agg_by(agg_max(c("X5", "X6", "X7", "X8")), c("X1", "X2", "X3", "X4")) %>% + sort(c("X1", "X2", "X3", "X4")) + expect_equal(as.data.frame(new_th3), as.data.frame(new_tb3)) + + new_tb4 <- data$df4 %>% + dplyr::group_by(bool_col) %>% + summarise(time_col = max(time_col), int_col = max(int_col)) + new_th4 <- data$th4 %>% + agg_by(c(agg_max("time_col"), agg_max("int_col")), "bool_col") %>% + sort("bool_col") + expect_equal(as.data.frame(new_th4), as.data.frame(new_tb4)) + + new_tb5 <- data$df5 %>% + dplyr::group_by(X) %>% + summarise(Number1 = max(Number1), Number2 = max(Number2)) + new_th5 <- data$th5 %>% + agg_by(agg_max(c("Number1", "Number2")), "X") %>% + sort("X") + expect_equal(as.data.frame(new_th5), as.data.frame(new_tb5)) + + new_tb6 <- data$df6 %>% + dplyr::group_by(X, Y) %>% + summarise(Number1 = max(Number1), Number2 = max(Number2)) + new_th6 <- data$th6 %>% + agg_by(agg_max(c("Number1", "Number2")), c("X", "Y")) %>% + sort(c("X", "Y")) + expect_equal(as.data.frame(new_th6), as.data.frame(new_tb6)) + + close(data$client) +}) + +test_that("agg_sum behaves as expected", { + data <- setup() + + new_tb1 <- data$df1 %>% + dplyr::group_by(string_col) %>% + summarise(int_col = sum(int_col)) + new_th1 <- data$th1 %>% + agg_by(agg_sum("int_col"), "string_col") %>% + sort("string_col") + expect_equal(as.data.frame(new_th1), as.data.frame(new_tb1)) + + new_tb2 <- data$df2 %>% + dplyr::group_by(col1, col2) %>% + summarise(col3 = sum(col3)) + new_th2 <- data$th2 %>% + agg_by(agg_sum("col3"), c("col1", "col2")) %>% + sort(c("col1", "col2")) + expect_equal(as.data.frame(new_th2), as.data.frame(new_tb2)) + + new_tb3 <- data$df3 %>% + dplyr::group_by(X1, X2, X3, X4) %>% + summarise(X5 = sum(X5), X6 = sum(X6), X7 = sum(X7), X8 = sum(X8)) + new_th3 <- data$th3 %>% + agg_by(agg_sum(c("X5", "X6", "X7", "X8")), c("X1", "X2", "X3", "X4")) %>% + sort(c("X1", "X2", "X3", "X4")) + expect_equal(as.data.frame(new_th3), as.data.frame(new_tb3)) + + new_tb4 <- data$df4 %>% + dplyr::group_by(bool_col) %>% + summarise(int_col = sum(int_col)) + new_th4 <- data$th4 %>% + agg_by(agg_sum("int_col"), "bool_col") %>% + sort("bool_col") + expect_equal(as.data.frame(new_th4), as.data.frame(new_tb4)) + + new_tb5 <- data$df5 %>% + dplyr::group_by(X) %>% + summarise(Number1 = sum(Number1), Number2 = sum(Number2)) + new_th5 <- data$th5 %>% + agg_by(agg_sum(c("Number1", "Number2")), "X") %>% + sort("X") + expect_equal(as.data.frame(new_th5), as.data.frame(new_tb5)) + + new_tb6 <- data$df6 %>% + dplyr::group_by(X, Y) %>% + summarise(Number1 = sum(Number1), Number2 = sum(Number2)) + new_th6 <- data$th6 %>% + agg_by(agg_sum(c("Number1", "Number2")), c("X", "Y")) %>% + sort(c("X", "Y")) + expect_equal(as.data.frame(new_th6), as.data.frame(new_tb6)) + + close(data$client) +}) + +test_that("agg_abs_sum behaves as expected", { + data <- setup() + + new_tb1 <- data$df1 %>% + dplyr::group_by(string_col) %>% + summarise(int_col = sum(abs(int_col))) + new_th1 <- data$th1 %>% + agg_by(agg_abs_sum("int_col"), "string_col") %>% + sort("string_col") + expect_equal(as.data.frame(new_th1), as.data.frame(new_tb1)) + + new_tb2 <- data$df2 %>% + dplyr::group_by(col1, col2) %>% + summarise(col3 = sum(abs(col3))) + new_th2 <- data$th2 %>% + agg_by(agg_abs_sum("col3"), c("col1", "col2")) %>% + sort(c("col1", "col2")) + expect_equal(as.data.frame(new_th2), as.data.frame(new_tb2)) + + new_tb3 <- data$df3 %>% + dplyr::group_by(X1, X2, X3, X4) %>% + summarise(X5 = sum(abs(X5)), X6 = sum(abs(X6)), X7 = sum(abs(X7)), X8 = sum(abs(X8))) + new_th3 <- data$th3 %>% + agg_by(agg_abs_sum(c("X5", "X6", "X7", "X8")), c("X1", "X2", "X3", "X4")) %>% + sort(c("X1", "X2", "X3", "X4")) + expect_equal(as.data.frame(new_th3), as.data.frame(new_tb3)) + + new_tb4 <- data$df4 %>% + dplyr::group_by(bool_col) %>% + summarise(int_col = sum(abs(int_col))) + new_th4 <- data$th4 %>% + agg_by(agg_abs_sum("int_col"), "bool_col") %>% + sort("bool_col") + expect_equal(as.data.frame(new_th4), as.data.frame(new_tb4)) + + new_tb5 <- data$df5 %>% + dplyr::group_by(X) %>% + summarise(Number1 = sum(abs(Number1)), Number2 = sum(abs(Number2))) + new_th5 <- data$th5 %>% + agg_by(agg_abs_sum(c("Number1", "Number2")), "X") %>% + sort("X") + expect_equal(as.data.frame(new_th5), as.data.frame(new_tb5)) + + new_tb6 <- data$df6 %>% + dplyr::group_by(X, Y) %>% + summarise(Number1 = sum(abs(Number1)), Number2 = sum(abs(Number2))) + new_th6 <- data$th6 %>% + agg_by(agg_abs_sum(c("Number1", "Number2")), c("X", "Y")) %>% + sort(c("X", "Y")) + expect_equal(as.data.frame(new_th6), as.data.frame(new_tb6)) + + close(data$client) +}) + +test_that("agg_avg behaves as expected", { + data <- setup() + + new_tb1 <- data$df1 %>% + dplyr::group_by(string_col) %>% + summarise(int_col = mean(int_col)) + new_th1 <- data$th1 %>% + agg_by(agg_avg("int_col"), "string_col") %>% + sort("string_col") + expect_equal(as.data.frame(new_th1), as.data.frame(new_tb1)) + + new_tb2 <- data$df2 %>% + dplyr::group_by(col1, col2) %>% + summarise(col3 = mean(col3)) + new_th2 <- data$th2 %>% + agg_by(agg_avg("col3"), c("col1", "col2")) %>% + sort(c("col1", "col2")) + expect_equal(as.data.frame(new_th2), as.data.frame(new_tb2)) + + new_tb3 <- data$df3 %>% + dplyr::group_by(X1, X2, X3, X4) %>% + summarise(X5 = mean(X5), X6 = mean(X6), X7 = mean(X7), X8 = mean(X8)) + new_th3 <- data$th3 %>% + agg_by(agg_avg(c("X5", "X6", "X7", "X8")), c("X1", "X2", "X3", "X4")) %>% + sort(c("X1", "X2", "X3", "X4")) + expect_equal(as.data.frame(new_th3), as.data.frame(new_tb3)) + + new_tb4 <- data$df4 %>% + dplyr::group_by(bool_col) %>% + summarise(int_col = mean(int_col)) + new_th4 <- data$th4 %>% + agg_by(agg_avg("int_col"), "bool_col") %>% + sort("bool_col") + expect_equal(as.data.frame(new_th4), as.data.frame(new_tb4)) + + new_tb5 <- data$df5 %>% + dplyr::group_by(X) %>% + summarise(Number1 = mean(Number1), Number2 = mean(Number2)) + new_th5 <- data$th5 %>% + agg_by(agg_avg(c("Number1", "Number2")), "X") %>% + sort("X") + expect_equal(as.data.frame(new_th5), as.data.frame(new_tb5)) + + new_tb6 <- data$df6 %>% + dplyr::group_by(X, Y) %>% + summarise(Number1 = mean(Number1), Number2 = mean(Number2)) + new_th6 <- data$th6 %>% + agg_by(agg_avg(c("Number1", "Number2")), c("X", "Y")) %>% + sort(c("X", "Y")) + expect_equal(as.data.frame(new_th6), as.data.frame(new_tb6)) + + close(data$client) +}) + +test_that("agg_w_avg behaves as expected", { + data <- setup() + + new_tb1 <- data$df1 %>% + dplyr::group_by(string_col) %>% + summarise(int_col = weighted.mean(int_col, dbl_col)) + new_th1 <- data$th1 %>% + agg_by(agg_w_avg("dbl_col", "int_col"), "string_col") %>% + sort("string_col") + expect_equal(as.data.frame(new_th1), as.data.frame(new_tb1)) + + new_tb2 <- data$df2 %>% + dplyr::group_by(col1, col2) %>% + summarise(col3 = weighted.mean(col3, col1)) + new_th2 <- data$th2 %>% + agg_by(agg_w_avg("col1", "col3"), c("col1", "col2")) %>% + sort(c("col1", "col2")) + expect_equal(as.data.frame(new_th2), as.data.frame(new_tb2)) + + new_tb3 <- data$df3 %>% + dplyr::group_by(X1, X2, X3, X4) %>% + summarise( + X5 = weighted.mean(X5, X9), X6 = weighted.mean(X6, X9), + X7 = weighted.mean(X7, X9), X8 = weighted.mean(X8, X9) + ) + new_th3 <- data$th3 %>% + agg_by(agg_w_avg("X9", c("X5", "X6", "X7", "X8")), c("X1", "X2", "X3", "X4")) %>% + sort(c("X1", "X2", "X3", "X4")) + expect_equal(as.data.frame(new_th3), as.data.frame(new_tb3)) + + new_tb4 <- data$df4 %>% + dplyr::group_by(bool_col) %>% + summarise(int_col = weighted.mean(int_col, int_col)) + new_th4 <- data$th4 %>% + agg_by(agg_w_avg("int_col", "int_col"), "bool_col") %>% + sort("bool_col") + expect_equal(as.data.frame(new_th4), as.data.frame(new_tb4)) + + new_tb5 <- data$df5 %>% + dplyr::group_by(X) %>% + mutate(weights = Number1 * Number2) %>% + summarise( + Number1 = weighted.mean(Number1, weights), + Number2 = weighted.mean(Number2, weights) + ) + new_th5 <- data$th5 %>% + update("weights = Number1 * Number2") %>% + agg_by(agg_w_avg("weights", c("Number1", "Number2")), "X") %>% + sort("X") + expect_equal(as.data.frame(new_th5), as.data.frame(new_tb5)) + + new_tb6 <- data$df6 %>% + dplyr::group_by(X, Y) %>% + mutate(weights = Number1 * Number2) %>% + summarise(Number1 = weighted.mean(Number1, weights), Number2 = weighted.mean(Number2, weights)) + new_th6 <- data$th6 %>% + update("weights = Number1 * Number2") %>% + agg_by(agg_w_avg("weights", c("Number1", "Number2")), c("X", "Y")) %>% + sort(c("X", "Y")) + expect_equal(as.data.frame(new_th6), as.data.frame(new_tb6)) + + close(data$client) +}) + +test_that("agg_median behaves as expected", { + data <- setup() + + new_tb1 <- data$df1 %>% + dplyr::group_by(string_col) %>% + summarise(int_col = median(int_col)) + new_th1 <- data$th1 %>% + agg_by(agg_median("int_col"), "string_col") %>% + sort("string_col") + expect_equal(as.data.frame(new_th1), as.data.frame(new_tb1)) + + new_tb2 <- data$df2 %>% + dplyr::group_by(col1, col2) %>% + summarise(col3 = median(col3)) + new_th2 <- data$th2 %>% + agg_by(agg_median("col3"), c("col1", "col2")) %>% + sort(c("col1", "col2")) + expect_equal(as.data.frame(new_th2), as.data.frame(new_tb2)) + + new_tb3 <- data$df3 %>% + dplyr::group_by(X1, X2, X3, X4) %>% + summarise(X5 = median(X5), X6 = median(X6), X7 = median(X7), X8 = median(X8)) + new_th3 <- data$th3 %>% + agg_by(agg_median(c("X5", "X6", "X7", "X8")), c("X1", "X2", "X3", "X4")) %>% + sort(c("X1", "X2", "X3", "X4")) + expect_equal(as.data.frame(new_th3), as.data.frame(new_tb3)) + + new_tb4 <- data$df4 %>% + dplyr::group_by(bool_col) %>% + summarise(int_col = median(int_col)) + new_th4 <- data$th4 %>% + agg_by(agg_median("int_col"), "bool_col") %>% + sort("bool_col") + expect_equal(as.data.frame(new_th4), as.data.frame(new_tb4)) + + new_tb5 <- data$df5 %>% + dplyr::group_by(X) %>% + summarise(Number1 = median(Number1), Number2 = median(Number2)) + new_th5 <- data$th5 %>% + agg_by(agg_median(c("Number1", "Number2")), "X") %>% + sort("X") + expect_equal(as.data.frame(new_th5), as.data.frame(new_tb5)) + + new_tb6 <- data$df6 %>% + dplyr::group_by(X, Y) %>% + summarise(Number1 = median(Number1), Number2 = median(Number2)) + new_th6 <- data$th6 %>% + agg_by(agg_median(c("Number1", "Number2")), c("X", "Y")) %>% + sort(c("X", "Y")) + expect_equal(as.data.frame(new_th6), as.data.frame(new_tb6)) + + close(data$client) +}) + +test_that("agg_var behaves as expected", { + data <- setup() + + new_tb1 <- data$df1 %>% + dplyr::group_by(string_col) %>% + summarise(int_col = var(int_col)) + new_th1 <- data$th1 %>% + agg_by(agg_var("int_col"), "string_col") %>% + sort("string_col") + expect_equal(as.data.frame(new_th1), as.data.frame(new_tb1)) + + new_tb2 <- data$df2 %>% + dplyr::group_by(col1, col2) %>% + summarise(col3 = var(col3)) + new_th2 <- data$th2 %>% + agg_by(agg_var("col3"), c("col1", "col2")) %>% + sort(c("col1", "col2")) + expect_equal(as.data.frame(new_th2), as.data.frame(new_tb2)) + + new_tb3 <- data$df3 %>% + dplyr::group_by(X1, X2, X3, X4) %>% + summarise(X5 = var(X5), X6 = var(X6), X7 = var(X7), X8 = var(X8)) + new_th3 <- data$th3 %>% + agg_by(agg_var(c("X5", "X6", "X7", "X8")), c("X1", "X2", "X3", "X4")) %>% + sort(c("X1", "X2", "X3", "X4")) + expect_equal(as.data.frame(new_th3), as.data.frame(new_tb3)) + + new_tb4 <- data$df4 %>% + dplyr::group_by(bool_col) %>% + summarise(int_col = var(int_col)) + new_th4 <- data$th4 %>% + agg_by(agg_var("int_col"), "bool_col") %>% + sort("bool_col") + expect_equal(as.data.frame(new_th4), as.data.frame(new_tb4)) + + new_tb5 <- data$df5 %>% + dplyr::group_by(X) %>% + summarise(Number1 = var(Number1), Number2 = var(Number2)) + new_th5 <- data$th5 %>% + agg_by(agg_var(c("Number1", "Number2")), "X") %>% + sort("X") + expect_equal(as.data.frame(new_th5), as.data.frame(new_tb5)) + + new_tb6 <- data$df6 %>% + dplyr::group_by(X, Y) %>% + summarise(Number1 = var(Number1), Number2 = var(Number2)) + new_th6 <- data$th6 %>% + agg_by(agg_var(c("Number1", "Number2")), c("X", "Y")) %>% + sort(c("X", "Y")) + expect_equal(as.data.frame(new_th6), as.data.frame(new_tb6)) + + close(data$client) +}) + +test_that("agg_std behaves as expected", { + data <- setup() + + new_tb1 <- data$df1 %>% + dplyr::group_by(string_col) %>% + summarise(int_col = sd(int_col)) + new_th1 <- data$th1 %>% + agg_by(agg_std("int_col"), "string_col") %>% + sort("string_col") + expect_equal(as.data.frame(new_th1), as.data.frame(new_tb1)) + + new_tb2 <- data$df2 %>% + dplyr::group_by(col1, col2) %>% + summarise(col3 = sd(col3)) + new_th2 <- data$th2 %>% + agg_by(agg_std("col3"), c("col1", "col2")) %>% + sort(c("col1", "col2")) + expect_equal(as.data.frame(new_th2), as.data.frame(new_tb2)) + + new_tb3 <- data$df3 %>% + dplyr::group_by(X1, X2, X3, X4) %>% + summarise(X5 = sd(X5), X6 = sd(X6), X7 = sd(X7), X8 = sd(X8)) + new_th3 <- data$th3 %>% + agg_by(agg_std(c("X5", "X6", "X7", "X8")), c("X1", "X2", "X3", "X4")) %>% + sort(c("X1", "X2", "X3", "X4")) + expect_equal(as.data.frame(new_th3), as.data.frame(new_tb3)) + + new_tb4 <- data$df4 %>% + dplyr::group_by(bool_col) %>% + summarise(int_col = sd(int_col)) + new_th4 <- data$th4 %>% + agg_by(agg_std("int_col"), "bool_col") %>% + sort("bool_col") + expect_equal(as.data.frame(new_th4), as.data.frame(new_tb4)) + + new_tb5 <- data$df5 %>% + dplyr::group_by(X) %>% + summarise(Number1 = sd(Number1), Number2 = sd(Number2)) + new_th5 <- data$th5 %>% + agg_by(agg_std(c("Number1", "Number2")), "X") %>% + sort("X") + expect_equal(as.data.frame(new_th5), as.data.frame(new_tb5)) + + new_tb6 <- data$df6 %>% + dplyr::group_by(X, Y) %>% + summarise(Number1 = sd(Number1), Number2 = sd(Number2)) + new_th6 <- data$th6 %>% + agg_by(agg_std(c("Number1", "Number2")), c("X", "Y")) %>% + sort(c("X", "Y")) + expect_equal(as.data.frame(new_th6), as.data.frame(new_tb6)) + + close(data$client) +}) + +test_that("agg_percentile behaves as expected", { + # There is not a clean analog to agg_percentile in dplyr, so we create the + # dataframes directly, and only make comparisons on deterministic data frames. + + data <- setup() + + new_df1 <- data.frame(int_col = 2) + new_th1 <- data$th1 %>% + agg_by(agg_percentile(0.4, "int_col")) + expect_equal(as.data.frame(new_th1), new_df1) + + new_df2 <- data.frame( + X = c("A", "B", "C"), + Number1 = c(50, 18, 11), + Number2 = c(-50, 137, 214) + ) + new_th2 <- data$th5 %>% + agg_by(agg_percentile(0.6, c("Number1", "Number2")), "X") %>% + sort("X") + expect_equal(as.data.frame(new_th2), new_df2) + + new_df3 <- data.frame( + X = c("A", "A", "B", "B", "B", "C", "C"), + Y = c("O", "P", "M", "N", "O", "N", "P"), + Number1 = c(-5, -45, 86, 55, 99, -65, 0), + Number2 = c(6, 34, -6, 76, 34, -5, -76) + ) + new_th3 <- data$th6 %>% + agg_by(agg_percentile(0.3, c("Number1", "Number2")), c("X", "Y")) %>% + sort(c("X", "Y")) + expect_equal(as.data.frame(new_th3), new_df3) + + close(data$client) +}) + +test_that("agg_count behaves as expected", { + data <- setup() + + new_tb1 <- data$df1 %>% + count(string_col) + new_th1 <- data$th1 %>% + agg_by(agg_count("n"), "string_col") %>% + sort("string_col") + expect_equal(as.data.frame(new_th1), as.data.frame(new_tb1)) + + new_tb2 <- data$df2 %>% + count(col1, col2) + new_th2 <- data$th2 %>% + agg_by(agg_count("n"), c("col1", "col2")) %>% + sort(c("col1", "col2")) + expect_equal(as.data.frame(new_th2), as.data.frame(new_tb2)) + + new_tb3 <- data$df3 %>% + count(X1, X2, X3, X4) + new_th3 <- data$th3 %>% + agg_by(agg_count("n"), c("X1", "X2", "X3", "X4")) %>% + sort(c("X1", "X2", "X3", "X4")) + expect_equal(as.data.frame(new_th3), as.data.frame(new_tb3)) + + new_tb4 <- data$df4 %>% + count(bool_col) + new_th4 <- data$th4 %>% + agg_by(agg_count("n"), "bool_col") %>% + sort("bool_col") + expect_equal(as.data.frame(new_th4), as.data.frame(new_tb4)) + + new_tb5 <- data$df5 %>% + count(X) + new_th5 <- data$th5 %>% + agg_by(agg_count("n"), "X") %>% + sort("X") + expect_equal(as.data.frame(new_th5), as.data.frame(new_tb5)) + + new_tb6 <- data$df6 %>% + count(X, Y) + new_th6 <- data$th6 %>% + agg_by(agg_count("n"), c("X", "Y")) %>% + sort(c("X", "Y")) + expect_equal(as.data.frame(new_th6), as.data.frame(new_tb6)) + + close(data$client) +}) diff --git a/R/rdeephaven/inst/tests/testthat/test_aggregate_wrapper.R b/R/rdeephaven/inst/tests/testthat/test_aggregate_wrapper.R new file mode 100644 index 00000000000..d68e36f414b --- /dev/null +++ b/R/rdeephaven/inst/tests/testthat/test_aggregate_wrapper.R @@ -0,0 +1,185 @@ +library(testthat) +library(rdeephaven) + +##### TESTING BAD INPUTS ##### + +test_that("agg_min fails nicely when 'cols' is a bad type", { + expect_error( + agg_min(5), + "'cols' must be a string or a vector of strings. Got an object of class numeric." + ) + expect_error( + agg_min(TRUE), + "'cols' must be a string or a vector of strings. Got an object of class logical." + ) +}) + +test_that("agg_max fails nicely when 'cols' is a bad type", { + expect_error( + agg_max(5), + "'cols' must be a string or a vector of strings. Got an object of class numeric." + ) + expect_error( + agg_max(TRUE), + "'cols' must be a string or a vector of strings. Got an object of class logical." + ) +}) + +test_that("agg_sum fails nicely when 'cols' is a bad type", { + expect_error( + agg_sum(5), + "'cols' must be a string or a vector of strings. Got an object of class numeric." + ) + expect_error( + agg_sum(TRUE), + "'cols' must be a string or a vector of strings. Got an object of class logical." + ) +}) + +test_that("agg_abs_sum fails nicely when 'cols' is a bad type", { + expect_error( + agg_abs_sum(5), + "'cols' must be a string or a vector of strings. Got an object of class numeric." + ) + expect_error( + agg_abs_sum(TRUE), + "'cols' must be a string or a vector of strings. Got an object of class logical." + ) +}) + +test_that("agg_avg fails nicely when 'cols' is a bad type", { + expect_error( + agg_avg(5), + "'cols' must be a string or a vector of strings. Got an object of class numeric." + ) + expect_error( + agg_avg(TRUE), + "'cols' must be a string or a vector of strings. Got an object of class logical." + ) +}) + +test_that("agg_w_avg fails nicely when 'wcol' is a bad type", { + expect_error( + agg_w_avg(5, "string"), + "'wcol' must be a single string. Got an object of class numeric." + ) + expect_error( + agg_w_avg(TRUE, "string"), + "'wcol' must be a single string. Got an object of class logical." + ) + expect_error( + agg_w_avg(c("Multiple", "strings", "bad"), "string"), + "'wcol' must be a single string. Got a vector of length 3." + ) +}) + +test_that("agg_w_avg fails nicely when 'cols' is a bad type", { + expect_error( + agg_w_avg("string", 5), + "'cols' must be a string or a vector of strings. Got an object of class numeric." + ) + expect_error( + agg_w_avg("string", TRUE), + "'cols' must be a string or a vector of strings. Got an object of class logical." + ) +}) + +test_that("agg_var fails nicely when 'cols' is a bad type", { + expect_error( + agg_var(5), + "'cols' must be a string or a vector of strings. Got an object of class numeric." + ) + expect_error( + agg_var(TRUE), + "'cols' must be a string or a vector of strings. Got an object of class logical." + ) +}) + +test_that("agg_std fails nicely when 'cols' is a bad type", { + expect_error( + agg_std(5), + "'cols' must be a string or a vector of strings. Got an object of class numeric." + ) + expect_error( + agg_std(TRUE), + "'cols' must be a string or a vector of strings. Got an object of class logical." + ) +}) + +test_that("agg_first fails nicely when 'cols' is a bad type", { + expect_error( + agg_first(5), + "'cols' must be a string or a vector of strings. Got an object of class numeric." + ) + expect_error( + agg_first(TRUE), + "'cols' must be a string or a vector of strings. Got an object of class logical." + ) +}) + +test_that("agg_last fails nicely when 'cols' is a bad type", { + expect_error( + agg_last(5), + "'cols' must be a string or a vector of strings. Got an object of class numeric." + ) + expect_error( + agg_last(TRUE), + "'cols' must be a string or a vector of strings. Got an object of class logical." + ) +}) + +test_that("agg_median fails nicely when 'cols' is a bad type", { + expect_error( + agg_median(5), + "'cols' must be a string or a vector of strings. Got an object of class numeric." + ) + expect_error( + agg_median(TRUE), + "'cols' must be a string or a vector of strings. Got an object of class logical." + ) +}) + +test_that("agg_percentile fails nicely when 'percentile' is bad", { + expect_error( + agg_percentile("string", "string"), + "'percentile' must be a single numeric. Got an object of class character." + ) + expect_error( + agg_percentile(TRUE, "string"), + "'percentile' must be a single numeric. Got an object of class logical." + ) + expect_error( + agg_percentile(5, "string"), + "'percentile' must be between 0 and 1 inclusive. Got 'percentile' = 5." + ) + expect_error( + agg_percentile(c(5, 6, 7, 8), "string"), + "'percentile' must be a single numeric. Got a vector of length 4." + ) +}) + +test_that("agg_percentile fails nicely when 'cols' is a bad type", { + expect_error( + agg_percentile(0.5, 5), + "'cols' must be a string or a vector of strings. Got an object of class numeric." + ) + expect_error( + agg_percentile(0.5, TRUE), + "'cols' must be a string or a vector of strings. Got an object of class logical." + ) +}) + +test_that("agg_count fails nicely when 'col' is a bad type", { + expect_error( + agg_count(5), + "'col' must be a single string. Got an object of class numeric." + ) + expect_error( + agg_count(TRUE), + "'col' must be a single string. Got an object of class logical." + ) + expect_error( + agg_count(c("Many", "strings")), + "'col' must be a single string. Got a vector of length 2." + ) +}) diff --git a/R/rdeephaven/inst/tests/testthat/test_client_wrapper.R b/R/rdeephaven/inst/tests/testthat/test_client_wrapper.R new file mode 100644 index 00000000000..9e779e2a7d7 --- /dev/null +++ b/R/rdeephaven/inst/tests/testthat/test_client_wrapper.R @@ -0,0 +1,309 @@ +library(testthat) +library(rdeephaven) + +setup <- function() { + df1 <- data.frame( + string_col = c("I", "am", "a", "string", "column"), + int_col = c(0, 1, 2, 3, 4), + dbl_col = c(1.65, 3.1234, 100000.5, 543.234567, 0.00) + ) + + df2 <- data.frame( + col1 = rep(3.14, 100), + col2 = rep("hello!", 100), + col3 = rnorm(100) + ) + + df3 <- data.frame(matrix(rnorm(10 * 1000), nrow = 10)) + + df4 <- data.frame( + time_col = seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date() + 30), by = "1 sec")[250000], + bool_col = sample(c(TRUE, FALSE), 250000, TRUE), + int_col = sample(0:10000, 250000, TRUE) + ) + + return(list("df1" = df1, "df2" = df2, "df3" = df3, "df4" = df4)) +} + +##### TESTING GOOD INPUTS ##### + +test_that("client dhConnection works in the simple case of anonymous authentication", { + + # TODO: assumes server is actually running on localhost:10000, this is probably bad for CI + expect_no_error(client <- dhConnect(target = "localhost:10000")) + +}) + +test_that("import_table does not fail with data frame inputs of simple column types", { + data <- setup() + + client <- dhConnect(target = "localhost:10000") + + expect_no_error(import_table(client, data$df1)) + expect_no_error(import_table(client, data$df2)) + expect_no_error(import_table(client, data$df3)) + expect_no_error(import_table(client, data$df4)) + + close(client) +}) + +test_that("import_table does not fail with tibble inputs of simple column types", { + data <- setup() + + client <- dhConnect(target = "localhost:10000") + + expect_no_error(import_table(client, as_tibble(data$df1))) + expect_no_error(import_table(client, as_tibble(data$df2))) + expect_no_error(import_table(client, as_tibble(data$df3))) + expect_no_error(import_table(client, as_tibble(data$df4))) + + close(client) +}) + +test_that("import_table does not fail with arrow table inputs of simple column types", { + data <- setup() + + client <- dhConnect(target = "localhost:10000") + + expect_no_error(import_table(client, as_arrow_table(data$df1))) + expect_no_error(import_table(client, as_arrow_table(data$df2))) + expect_no_error(import_table(client, as_arrow_table(data$df3))) + expect_no_error(import_table(client, as_arrow_table(data$df4))) + + close(client) +}) + +test_that("import_table does not fail with record batch reader inputs of simple column types", { + data <- setup() + + client <- dhConnect(target = "localhost:10000") + + expect_no_error(import_table(client, as_record_batch_reader(data$df1))) + expect_no_error(import_table(client, as_record_batch_reader(data$df2))) + expect_no_error(import_table(client, as_record_batch_reader(data$df3))) + expect_no_error(import_table(client, as_record_batch_reader(data$df4))) + + close(client) +}) + +# The following tests assume the correctness of import_table(...) AND bind_to_variable(), +# as we have to create data, push it to the server, and name it in order to test open_table(). +# Additionally, we assume the correctness of as.data.frame() to make concrete comparisons. + +test_that("open_table opens the correct table from the server using %>%", { + data <- setup() + + client <- dhConnect(target = "localhost:10000") + + th1 <- import_table(client, data$df1) + th1 %>% bind_to_variable("table1") + expect_equal(as.data.frame(open_table(client, "table1")), as.data.frame(th1)) + + th2 <- import_table(client, data$df2) + th2 %>% bind_to_variable("table2") + expect_equal(as.data.frame(open_table(client, "table2")), as.data.frame(th2)) + + th3 <- import_table(client, data$df3) + th3 %>% bind_to_variable("table3") + expect_equal(as.data.frame(open_table(client, "table3")), as.data.frame(th3)) + + th4 <- import_table(client, data$df4) + th4 %>% bind_to_variable("table4") + expect_equal(as.data.frame(open_table(client, "table4")), as.data.frame(th4)) + + close(client) +}) + +test_that("open_table opens the correct table from the server using |>", { + data <- setup() + + client <- dhConnect(target = "localhost:10000") + + th1 <- import_table(client, data$df1) + th1 |> bind_to_variable("table1") + expect_equal(as.data.frame(open_table(client, "table1")), as.data.frame(th1)) + + th2 <- import_table(client, data$df2) + th2 |> bind_to_variable("table2") + expect_equal(as.data.frame(open_table(client, "table2")), as.data.frame(th2)) + + th3 <- import_table(client, data$df3) + th3 |> bind_to_variable("table3") + expect_equal(as.data.frame(open_table(client, "table3")), as.data.frame(th3)) + + th4 <- import_table(client, data$df4) + th4 |> bind_to_variable("table4") + expect_equal(as.data.frame(open_table(client, "table4")), as.data.frame(th4)) + + close(client) +}) + +test_that("empty_table correctly creates tables on the server using %>%", { + client <- dhConnect(target = "localhost:10000") + + th1 <- empty_table(client, 10) %>% update("X = i") + df1 <- data.frame(X = c(0, 1, 2, 3, 4, 5, 6, 7, 8, 9)) + expect_equal(as.data.frame(th1), df1) + + close(client) +}) + +test_that("empty_table correctly creates tables on the server using |>", { + client <- dhConnect(target = "localhost:10000") + + th1 <- empty_table(client, 10) |> update("X = i") + df1 <- data.frame(X = c(0, 1, 2, 3, 4, 5, 6, 7, 8, 9)) + expect_equal(as.data.frame(th1), df1) + + close(client) +}) + +# TODO: Test time_table good inputs + +test_that("run_script correctly runs a python script", { + client <- dhConnect(target = "localhost:10000") + + expect_no_error(run_script(client, + ' +from deephaven import new_table +from deephaven.column import string_col, int_col + +static_table_from_python_script = new_table([ +string_col("Name_String_Col", ["Data String 1", "Data String 2", "Data String 3"]), +int_col("Name_Int_Col", [44, 55, 66]) +]) +' + )) + + expect_no_error(open_table(client, "static_table_from_python_script")) + + close(client) +}) + +##### TESTING BAD INPUTS ##### + +test_that("dhConnect fails nicely with bad inputs", { + + expect_error( + dhConnect(target = "localhost:10000", auth_type = "basic"), + "Basic authentication was requested, but 'auth_token' was not provided, and at most one of 'username' or 'password' was provided. Please provide either 'username' and 'password', or 'auth_token'." + ) + expect_error( + dhConnect(target = "localhost:10000", auth_type = "basic", username = "user"), + "Basic authentication was requested, but 'auth_token' was not provided, and at most one of 'username' or 'password' was provided. Please provide either 'username' and 'password', or 'auth_token'." + ) + expect_error( + dhConnect(target = "localhost:10000", auth_type = "basic", password = "pass"), + "Basic authentication was requested, but 'auth_token' was not provided, and at most one of 'username' or 'password' was provided. Please provide either 'username' and 'password', or 'auth_token'." + ) + expect_error( + dhConnect(target = "localhost:10000", auth_type = "basic", username = "user", auth_token = "token"), + "Basic authentication was requested, but 'auth_token' was provided, as well as least one of 'username' and 'password'. Please provide either 'username' and 'password', or 'auth_token'." + ) + expect_error( + dhConnect(target = "localhost:10000", auth_type = "basic", password = "pass", auth_token = "token"), + "Basic authentication was requested, but 'auth_token' was provided, as well as least one of 'username' and 'password'. Please provide either 'username' and 'password', or 'auth_token'." + ) + expect_error( + dhConnect(target = "localhost:10000", auth_type = "basic", username = "user", password = "pass", auth_token = "token"), + "Basic authentication was requested, but 'auth_token' was provided, as well as least one of 'username' and 'password'. Please provide either 'username' and 'password', or 'auth_token'." + ) + expect_error( + dhConnect(target = "localhost:10000", auth_type = "custom"), + "Custom authentication was requested, but no 'auth_token' was provided." + ) + expect_error( + dhConnect(target = "localhost:10000", auth_type = ""), + "'auth_type' should be a non-empty string." + ) + expect_error( + dhConnect(target = "localhost:10000", auth_type = "basic", auth_token = 1234), + "'auth_token' must be a single string. Got an object of class numeric." + ) + expect_error( + dhConnect(target = "localhost:10000", session_type = "blahblah"), + "'session_type' must be 'python' or 'groovy', but got blahblah." + ) + expect_error( + dhConnect(target = "localhost:10000", session_type = 1234), + "'session_type' must be 'python' or 'groovy', but got 1234." + ) + expect_error( + dhConnect(target = "localhost:10000", use_tls = "banana"), + "'use_tls' must be a single boolean. Got an object of class character." + ) + expect_error( + dhConnect(target = "localhost:10000", int_options = 1234), + "'int_options' must be a single list. Got an object of class numeric." + ) + expect_error( + dhConnect(target = "localhost:10000", string_options = 1234), + "'string_options' must be a single list. Got an object of class numeric." + ) + expect_error( + dhConnect(target = "localhost:10000", extra_headers = 1234), + "'extra_headers' must be a single list. Got an object of class numeric." + ) + +}) + +test_that("import_table fails nicely with bad inputs", { + library(datasets) + + client <- dhConnect(target = "localhost:10000") + + expect_error(import_table(client, 12345), cat("unable to find an inherited method for function ‘import_table’ for signature ‘\"Client\", \"numeric\"’")) + expect_error(import_table(client, "hello!"), cat("unable to find an inherited method for function ‘import_table’ for signature ‘\"Client\", \"character\"’")) + + # TODO: this needs better error handling, but it is unclear whether that happens on the server side or the R side. + data(iris) + expect_error(import_table(client, iris)) + + data(HairEyeColor) + expect_error(import_table(client, HairEyeColor), cat("unable to find an inherited method for function ‘import_table’ for signature ‘\"Client\", \"table\"’")) + + close(client) +}) + +test_that("open_table fails nicely with bad inputs", { + client <- dhConnect(target = "localhost:10000") + + expect_error(open_table(client, ""), "The table '' does not exist on the server.") + expect_error(open_table(client, 12345), cat("unable to find an inherited method for function ‘open_table’ for signature ‘\"Client\", \"numeric\"’")) + expect_error(open_table(client, c("I", "am", "string")), "'name' must be a single string. Got a vector of length 3.") + + close(client) +}) + +test_that("empty_table fails nicely with bad inputs", { + client <- dhConnect(target = "localhost:10000") + + expect_error(empty_table(client, -3), "'size' must be a nonnegative integer. Got 'size' = -3.") + expect_error(empty_table(client, 1.2345), "'size' must be an integer. Got 'size' = 1.2345.") + expect_error(empty_table(client, "hello!"), cat("unable to find an inherited method for function ‘empty_table’ for signature ‘\"Client\", \"character\"’")) + expect_error(empty_table(client, c(1, 2, 3, 4)), "'size' must be a single numeric. Got a vector of length 4.") + + close(client) +}) + +test_that("time_table fails nicely with bad inputs", { + client <- dhConnect(target = "localhost:10000") + + expect_error(time_table(client, 1.23, 1000), "'period' must be an integer. Got 'period' = 1.23.") + expect_error(time_table(client, 1000, 1.23), "'start_time' must be an integer. Got 'start_time' = 1.23.") + expect_error(time_table(client, c(1, 2, 3), 1000), "'period' must be a single numeric. Got a vector of length 3.") + expect_error(time_table(client, 1000, c(1, 2, 3)), "'start_time' must be a single numeric. Got a vector of length 3.") + expect_error(time_table(client, "hello!", 1000), cat("unable to find an inherited method for function ‘time_table’ for signature ‘\"Client\", \"character\", \"numeric\"’")) + expect_error(time_table(client, 1000, "hello!"), cat("unable to find an inherited method for function ‘time_table’ for signature ‘\"Client\", \"numeric\", \"character\"’")) + + close(client) +}) + +test_that("run_script fails nicely with bad input types", { + client <- dhConnect(target = "localhost:10000") + + expect_error(run_script(client, 12345), cat("unable to find an inherited method for function ‘run_script’ for signature ‘\"Client\", \"numeric\"’")) + expect_error(run_script(client, c("I", "am", "a", "string")), "'script' must be a single string. Got a vector of length 4.") + + close(client) +}) diff --git a/R/rdeephaven/inst/tests/testthat/test_table_handle_wrapper.R b/R/rdeephaven/inst/tests/testthat/test_table_handle_wrapper.R new file mode 100644 index 00000000000..49fa4ab12ba --- /dev/null +++ b/R/rdeephaven/inst/tests/testthat/test_table_handle_wrapper.R @@ -0,0 +1,233 @@ +library(testthat) +library(rdeephaven) + +setup <- function() { + df1 <- data.frame( + string_col = c("I", "am", "a", "string", "column"), + int_col = c(0, 1, 2, 3, 4), + dbl_col = c(1.65, 3.1234, 100000.5, 543.234567, 0.00) + ) + + df2 <- data.frame( + col1 = rep(3.14, 100), + col2 = rep("hello!", 100), + col3 = rnorm(100) + ) + + df3 <- data.frame(matrix(rnorm(10 * 1000), nrow = 10)) + + df4 <- data.frame( + time_col = seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date() + 30), by = "1 sec")[250000], + bool_col = sample(c(TRUE, FALSE), 250000, TRUE), + int_col = sample(0:10000, 250000, TRUE) + ) + + # set up client + client <- dhConnect(target = "localhost:10000") + + # move dataframes to server and get TableHandles for testing + th1 <- import_table(client, df1) + th2 <- import_table(client, df2) + th3 <- import_table(client, df3) + th4 <- import_table(client, df4) + + # time table to test is_static() + th5 <- time_table(client, 1000000000) %>% update("X = ii") + + return(list( + "client" = client, + "df1" = df1, "df2" = df2, "df3" = df3, "df4" = df4, + "th1" = th1, "th2" = th2, "th3" = th3, "th4" = th4, "th5" = th5 + )) +} + +##### TESTING GOOD INPUTS ##### + +test_that("is_static returns the correct value", { + data <- setup() + + expect_true(is_static(data$th1)) + expect_true(is_static(data$th2)) + expect_true(is_static(data$th3)) + expect_true(is_static(data$th4)) + expect_false(is_static(data$th5)) + + close(data$client) +}) + +test_that("nrow returns the correct number of rows", { + data <- setup() + + expect_equal(nrow(data$th1), nrow(data$df1)) + expect_equal(nrow(data$th2), nrow(data$df2)) + expect_equal(nrow(data$th3), nrow(data$df3)) + expect_equal(nrow(data$th4), nrow(data$df4)) + + close(data$client) +}) + +test_that("ncol returns the correct number of columns", { + data <- setup() + + expect_equal(ncol(data$th1), ncol(data$df1)) + expect_equal(ncol(data$th2), ncol(data$df2)) + expect_equal(ncol(data$th3), ncol(data$df3)) + expect_equal(ncol(data$th4), ncol(data$df4)) + + close(data$client) +}) + +test_that("dim returns the correct dimension", { + data <- setup() + + expect_equal(dim(data$th1), dim(data$df1)) + expect_equal(dim(data$th2), dim(data$df2)) + expect_equal(dim(data$th3), dim(data$df3)) + expect_equal(dim(data$th4), dim(data$df4)) + + close(data$client) +}) + +test_that("bind_to_variable binds the table to a variable", { + data <- setup() + + data$th1 %>% bind_to_variable("table1") + expect_no_error(open_table(data$client, "table1")) + + data$th2 %>% bind_to_variable("table2") + expect_no_error(open_table(data$client, "table2")) + + data$th3 %>% bind_to_variable("table3") + expect_no_error(open_table(data$client, "table3")) + + data$th4 %>% bind_to_variable("table4") + expect_no_error(open_table(data$client, "table4")) + + close(data$client) +}) + +test_that("as_record_batch_reader returns an identical stream reader", { + data <- setup() + + # actual equality of RecordBatchStreamReaders is not expected, as they contain underlying pointers to relevant data, + # as well as metadata from the Deephaven server, which we do not expect rbr created fully in R to have. + # We care about equality in the sense that coercing to dataframes should yield identical dataframes, so we cast to dataframes and compare. + # Additionally, as.data.frame() does not convert arrow tables to data frames, but to Tibbles. Need another as.data.frame to get a data frame. + + rbr1 <- as_record_batch_reader(data$th1) + expect_equal(as.data.frame(as.data.frame(rbr1$read_table())), data$df1) + + rbr2 <- as_record_batch_reader(data$th2) + expect_equal(as.data.frame(as.data.frame(rbr2$read_table())), data$df2) + + rbr3 <- as_record_batch_reader(data$th3) + expect_equal(as.data.frame(as.data.frame(rbr3$read_table())), data$df3) + + rbr4 <- as_record_batch_reader(data$th4) + expect_equal(as.data.frame(as.data.frame(rbr4$read_table())), data$df4) + + close(data$client) +}) + +test_that("as_arrow_table returns the correct Arrow table", { + data <- setup() + + # The rationale for casting RecordBatchStreamReaders to dataframes for comparison also applies to Arrow Tables. + # Additionally, as.data.frame() does not convert arrow tables to data frames, but to Tibbles. Need another as.data.frame to get a data frame. + + arrow_tbl1 <- as_arrow_table(data$th1) + expect_equal(as.data.frame(as.data.frame(arrow_tbl1)), data$df1) + + arrow_tbl2 <- as_arrow_table(data$th2) + expect_equal(as.data.frame(as.data.frame(arrow_tbl2)), data$df2) + + arrow_tbl3 <- as_arrow_table(data$th3) + expect_equal(as.data.frame(as.data.frame(arrow_tbl3)), data$df3) + + arrow_tbl4 <- as_arrow_table(data$th4) + expect_equal(as.data.frame(as.data.frame(arrow_tbl4)), data$df4) + + close(data$client) +}) + +test_that("as_tibble returns the correct Tibble", { + data <- setup() + + tibble1 <- as_tibble(data$th1) + expect_equal(tibble1, as_tibble(data$df1)) + + tibble2 <- as_tibble(data$th2) + expect_equal(tibble2, as_tibble(data$df2)) + + tibble3 <- as_tibble(data$th3) + expect_equal(tibble3, as_tibble(data$df3)) + + tibble4 <- as_tibble(data$th4) + expect_equal(tibble4, as_tibble(data$df4)) + + close(data$client) +}) + +test_that("as.data.frame returns the correct data frame", { + data <- setup() + + data_frame1 <- as.data.frame(data$th1) + expect_equal(data_frame1, data$df1) + + data_frame2 <- as.data.frame(data$th2) + expect_equal(data_frame2, data$df2) + + data_frame3 <- as.data.frame(data$th3) + expect_equal(data_frame3, data$df3) + + data_frame4 <- as.data.frame(data$th4) + expect_equal(data_frame4, data$df4) + + close(data$client) +}) + +test_that("as_data_frame returns the correct data frame", { + data <- setup() + + data_frame1 <- as_data_frame(data$th1) + expect_equal(data_frame1, data$df1) + + data_frame2 <- as_data_frame(data$th2) + expect_equal(data_frame2, data$df2) + + data_frame3 <- as_data_frame(data$th3) + expect_equal(data_frame3, data$df3) + + data_frame4 <- as_data_frame(data$th4) + expect_equal(data_frame4, data$df4) + + close(data$client) +}) + +##### TESTING BAD INPUTS ##### + +test_that("bind_to_variable fails nicely on bad inputs", { + data <- setup() + + expect_error( + data$th1 %>% bind_to_variable(12345), + cat("unable to find an inherited method for function ‘bind_to_variable’ for signature ‘\"TableHandle\", \"numeric\"’") + ) + + expect_error( + data$th1 %>% bind_to_variable(c("multiple", "strings")), + "'name' must be a single string. Got a vector of length 2." + ) + + expect_error( + data$th1 %>% bind_to_variable(data$df1), + cat("unable to find an inherited method for function 'bind_to_variable' for signature '\"TableHandle\", \"data.frame\"'") + ) + + expect_error( + data$th1 %>% bind_to_variable(list("list", "of", "strings")), + cat("unable to find an inherited method for function ‘bind_to_variable’ for signature ‘\"TableHandle\", \"list\"’") + ) + + close(data$client) +}) diff --git a/R/rdeephaven/inst/tests/testthat/test_table_ops.R b/R/rdeephaven/inst/tests/testthat/test_table_ops.R new file mode 100644 index 00000000000..7cbfda33d93 --- /dev/null +++ b/R/rdeephaven/inst/tests/testthat/test_table_ops.R @@ -0,0 +1,863 @@ +library(testthat) +library(dplyr) +library(rdeephaven) + +setup <- function() { + df1 <- data.frame( + string_col = c("I", "am", "a", "string", "column"), + int_col = c(0, 1, 2, 3, 4), + dbl_col = c(1.65, 3.1234, 100000.5, 543.234567, 0.00) + ) + + df2 <- data.frame( + col1 = rep(3.14, 100), + col2 = rep("hello!", 100), + col3 = rnorm(100) + ) + + df3 <- data.frame(matrix(rnorm(10 * 1000), nrow = 10)) + + df4 <- data.frame( + time_col = seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date() + 30), by = "1 sec")[250000], + bool_col = sample(c(TRUE, FALSE), 250000, TRUE), + int_col = sample(0:10000, 250000, TRUE) + ) + + df5 <- data.frame( + X = c("A", "B", "A", "C", "B", "A", "B", "B", "C"), + Y = c("M", "N", "O", "N", "P", "M", "O", "P", "M"), + Number1 = c(100, -44, 49, 11, -66, 50, 29, 18, -70), + Number2 = c(-55, 76, 20, 130, 230, -50, 73, 137, 214) + ) + + df6 <- data.frame( + X = c("B", "C", "B", "A", "A", "C", "B", "C", "B", "A"), + Y = c("N", "N", "M", "P", "O", "P", "O", "N", "O", "O"), + Number1 = c(55, 72, 86, -45, 1, 0, 345, -65, 99, -5), + Number2 = c(76, 4, -6, 34, 12, -76, 45, -5, 34, 6) + ) + + # set up client + client <- dhConnect(target = "localhost:10000") + + # move dataframes to server and get TableHandles for testing + th1 <- import_table(client, df1) + th2 <- import_table(client, df2) + th3 <- import_table(client, df3) + th4 <- import_table(client, df4) + th5 <- import_table(client, df5) + th6 <- import_table(client, df6) + + return(list( + "client" = client, + "df1" = df1, "df2" = df2, "df3" = df3, "df4" = df4, "df5" = df5, "df6" = df6, + "th1" = th1, "th2" = th2, "th3" = th3, "th4" = th4, "th5" = th5, "th6" = th6 + )) +} + +##### TESTING GOOD INPUTS ##### + +test_that("merge_tables behaves as expected", { + data <- setup() + + expect_equal(NULL, merge_tables(NULL)) + + new_df1 <- rbind(data$df5) + new_th1 <- merge_tables(data$th5) + expect_equal(as.data.frame(new_th1), new_df1) + + new_df2 <- rbind(data$df5, data$df6) + new_th2 <- merge_tables(data$th5, data$th6) + expect_equal(as.data.frame(new_th2), new_df2) + + new_df3 <- rbind(data$df5, data$df6, data$df6, data$df5) + new_th3 <- merge_tables(data$th5, data$th6, data$th6, data$th5) + expect_equal(as.data.frame(new_th3), new_df3) + + new_th4 <- merge_tables(c(data$th5, data$th6)) + expect_equal(as.data.frame(new_th4), new_df2) + + new_th5 <- merge_tables(c(data$th5, data$th6, NULL, data$th6, data$th5)) + expect_equal(as.data.frame(new_th5), new_df3) + + new_th6 <- merge_tables(data$th5, c(data$th6, data$th6, data$th5)) + expect_equal(as.data.frame(new_th6), new_df3) + + close(data$client) +}) + +test_that("select behaves as expected", { + data <- setup() + + new_tb1 <- data$df1 %>% + dplyr::select(string_col) + new_th1 <- data$th1 %>% + select("string_col") + expect_equal(as.data.frame(new_th1), as.data.frame(new_tb1)) + + new_tb2 <- data$df2 %>% + dplyr::select(col2, col3) + new_th2 <- data$th2 %>% + select(c("col2", "col3")) + expect_equal(as.data.frame(new_th2), as.data.frame(new_tb2)) + + new_tb3 <- data$df3 %>% + dplyr::select(X1, X2) %>% + rename(first_col = X1) + new_th3 <- data$th3 %>% + select(c("first_col = X1", "X2")) + expect_equal(as.data.frame(new_th3), as.data.frame(new_tb3)) + + new_tb4 <- data$df4 %>% + dplyr::select(int_col) %>% + mutate(new_col = int_col + 1, .keep = "none") + new_th4 <- data$th4 %>% + select("new_col = int_col + 1") + expect_equal(as.data.frame(new_th4), as.data.frame(new_tb4)) + + new_tb5 <- data$df5 %>% + mutate(Number3 = Number1 * Number2) %>% + dplyr::select(X, Number3) + new_th5 <- data$th5 %>% + select(c("X", "Number3 = Number1 * Number2")) + expect_equal(as.data.frame(new_th5), as.data.frame(new_tb5)) + + close(data$client) +}) + +test_that("select with base pipe behaves as expected", { + data <- setup() + + new_tb1 <- data$df1 |> + dplyr::select(string_col) + new_th1 <- data$th1 |> + select("string_col") + expect_equal(as.data.frame(new_th1), as.data.frame(new_tb1)) + + new_tb2 <- data$df2 |> + dplyr::select(col2, col3) + new_th2 <- data$th2 |> + select(c("col2", "col3")) + expect_equal(as.data.frame(new_th2), as.data.frame(new_tb2)) + + new_tb3 <- data$df3 |> + dplyr::select(X1, X2) |> + rename(first_col = X1) + new_th3 <- data$th3 |> + select(c("first_col = X1", "X2")) + expect_equal(as.data.frame(new_th3), as.data.frame(new_tb3)) + + new_tb4 <- data$df4 |> + dplyr::select(int_col) |> + mutate(new_col = int_col + 1, .keep = "none") + new_th4 <- data$th4 |> + select("new_col = int_col + 1") + expect_equal(as.data.frame(new_th4), as.data.frame(new_tb4)) + + new_tb5 <- data$df5 |> + mutate(Number3 = Number1 * Number2) |> + dplyr::select(X, Number3) + new_th5 <- data$th5 |> + select(c("X", "Number3 = Number1 * Number2")) + expect_equal(as.data.frame(new_th5), as.data.frame(new_tb5)) + + close(data$client) +}) + +test_that("view behaves as expected", { + data <- setup() + + new_tb1 <- data$df1 %>% + dplyr::select(string_col) + new_th1 <- data$th1 %>% + view("string_col") + expect_equal(as.data.frame(new_th1), as.data.frame(new_tb1)) + + new_tb2 <- data$df2 %>% + dplyr::select(col2, col3) + new_th2 <- data$th2 %>% + view(c("col2", "col3")) + expect_equal(as.data.frame(new_th2), as.data.frame(new_tb2)) + + new_tb3 <- data$df3 %>% + dplyr::select(X1, X2) %>% + rename(first_col = X1) + new_th3 <- data$th3 %>% + view(c("first_col = X1", "X2")) + expect_equal(as.data.frame(new_th3), as.data.frame(new_tb3)) + + new_tb4 <- data$df4 %>% + dplyr::select(int_col) %>% + mutate(new_col = int_col + 1, .keep = "none") + new_th4 <- data$th4 %>% + view("new_col = int_col + 1") + expect_equal(as.data.frame(new_th4), as.data.frame(new_tb4)) + + new_tb5 <- data$df5 %>% + mutate(Number3 = Number1 * Number2) %>% + dplyr::select(X, Number3) + new_th5 <- data$th5 %>% + view(c("X", "Number3 = Number1 * Number2")) + expect_equal(as.data.frame(new_th5), as.data.frame(new_tb5)) + + close(data$client) +}) + +test_that("update behaves as expected", { + data <- setup() + + new_tb1 <- data$df1 %>% + mutate(dbl_col_again = dbl_col) + new_th1 <- data$th1 %>% + update("dbl_col_again = dbl_col") + expect_equal(as.data.frame(new_th1), as.data.frame(new_tb1)) + + new_tb2 <- data$df2 %>% + mutate(col4 = col3 * 2) + new_th2 <- data$th2 %>% + update("col4 = col3 * 2") + expect_equal(as.data.frame(new_tb2), as.data.frame(new_th2)) + + new_tb3 <- data$df3 %>% + mutate(X1001 = X1000, X1002 = X1001) + new_th3 <- data$th3 %>% + update(c("X1001 = X1000", "X1002 = X1001")) + expect_equal(as.data.frame(new_tb3), as.data.frame(new_th3)) + + new_tb4 <- data$df4 %>% + mutate(new_col = sqrt(3 * int_col)) + new_th4 <- data$th4 %>% + update("new_col = sqrt(3 * int_col)") + expect_equal(as.data.frame(new_tb4), as.data.frame(new_th4)) + + new_tb5 <- data$df5 %>% + mutate(Number3 = Number1 + Number2) + new_th5 <- data$th5 %>% + update("Number3 = Number1 + Number2") + expect_equal(as.data.frame(new_tb5), as.data.frame(new_th5)) + + close(data$client) +}) + +test_that("update_view behaves as expected", { + data <- setup() + + new_tb1 <- data$df1 %>% + mutate(dbl_col_again = dbl_col) + new_th1 <- data$th1 %>% + update_view("dbl_col_again = dbl_col") + expect_equal(as.data.frame(new_th1), as.data.frame(new_tb1)) + + new_tb2 <- data$df2 %>% + mutate(col4 = col3 * 2) + new_th2 <- data$th2 %>% + update_view("col4 = col3 * 2") + expect_equal(as.data.frame(new_th2), as.data.frame(new_tb2)) + + new_tb3 <- data$df3 %>% + mutate(X1001 = X1000, X1002 = X1001) + new_th3 <- data$th3 %>% + update_view(c("X1001 = X1000", "X1002 = X1001")) + expect_equal(as.data.frame(new_th3), as.data.frame(new_tb3)) + + new_tb4 <- data$df4 %>% + mutate(new_col = sqrt(3 * int_col)) + new_th4 <- data$th4 %>% + update_view("new_col = sqrt(3 * int_col)") + expect_equal(as.data.frame(new_th4), as.data.frame(new_tb4)) + + new_tb5 <- data$df5 %>% + mutate(Number3 = Number1 + Number2) + new_th5 <- data$th5 %>% + update_view("Number3 = Number1 + Number2") + expect_equal(as.data.frame(new_th5), as.data.frame(new_tb5)) + + close(data$client) +}) + +test_that("drop_columns behaves as expected", { + data <- setup() + + new_tb1 <- data$df1 %>% + dplyr::select(-string_col) + new_th1 <- data$th1 %>% + drop_columns("string_col") + expect_equal(as.data.frame(new_th1), as.data.frame(new_tb1)) + + new_tb2 <- data$df2 %>% + dplyr::select(-c(col1, col2)) + new_th2 <- data$th2 %>% + drop_columns(c("col1", "col2")) + expect_equal(as.data.frame(new_th2), as.data.frame(new_tb2)) + + new_tb3 <- data$df3 %>% + dplyr::select(-paste0("X", seq(2, 1000))) + new_th3 <- data$th3 %>% + drop_columns(paste0("X", seq(2, 1000))) + expect_equal(as.data.frame(new_th3), as.data.frame(new_tb3)) + + close(data$client) +}) + +test_that("where behaves as expected", { + data <- setup() + + new_tb1 <- data$df1 %>% + filter(int_col < 3) + new_th1 <- data$th1 %>% + where("int_col < 3") + expect_equal(as.data.frame(new_th1), as.data.frame(new_tb1)) + + new_tb2 <- data$df2 %>% + filter(col2 == "hello!") + new_th2 <- data$th2 %>% + where("col2 == `hello!`") + expect_equal(as.data.frame(new_th2), as.data.frame(new_tb2)) + + new_tb3 <- data$df3 %>% + filter(X1 - X4 + X8 + X32 - 2 * X5 >= 0) + new_th3 <- data$th3 %>% + where("X1 - X4 + X8 + X32 - 2*X5 >= 0") + expect_equal(as.data.frame(new_th3), as.data.frame(new_tb3)) + + close(data$client) +}) + +test_that("group_by and ungroup behave as expected", { + data <- setup() + + # There is not a clean analog to group_by() in dplyr, so we evaluate + # correctness by evaluating that these functions behave as inverses. + # Easiest when grouping columns are first, otherwise we must also reorder. + + new_th1 <- data$th1 %>% + group_by("string_col") %>% + ungroup() %>% + sort("string_col") + expect_equal(as.data.frame(new_th1), as.data.frame(data$th1 %>% sort("string_col"))) + + new_th3 <- data$th3 %>% + group_by(c("X1", "X2", "X3", "X4", "X5")) %>% + ungroup() %>% + sort(c("X1", "X2", "X3", "X4", "X5")) + expect_equal(as.data.frame(new_th3), as.data.frame(data$th3 %>% sort(c("X1", "X2", "X3", "X4", "X5")))) + + new_th5 <- data$th5 %>% + group_by("X") %>% + ungroup() %>% + sort("X") + expect_equal(as.data.frame(new_th5), as.data.frame(data$th5 %>% sort("X"))) + + new_th6 <- data$th6 %>% + group_by(c("X", "Y")) %>% + ungroup() %>% + sort(c("X", "Y")) + expect_equal(as.data.frame(new_th6), as.data.frame(data$th6 %>% sort(c("X", "Y")))) + + close(data$client) +}) + +test_that("first_by behaves as expected", { + data <- setup() + + new_tb1 <- data$df5 %>% + dplyr::select(-Y) %>% + dplyr::group_by(X) %>% + summarise(across(everything(), first)) + new_th1 <- data$th5 %>% + drop_columns("Y") %>% + first_by("X") + expect_equal(as.data.frame(new_th1), as.data.frame(new_tb1)) + + new_tb2 <- data$df5 %>% + dplyr::group_by(X, Y) %>% + summarise(across(everything(), first)) %>% + arrange(X, Y) + new_th2 <- data$th5 %>% + first_by(c("X", "Y")) %>% + sort(c("X", "Y")) + expect_equal(as.data.frame(new_th2), as.data.frame(new_tb2)) + + close(data$client) +}) + +test_that("last_by behaves as expected", { + data <- setup() + + new_tb1 <- data$df5 %>% + dplyr::select(-Y) %>% + dplyr::group_by(X) %>% + summarise(across(everything(), last)) + new_th1 <- data$th5 %>% + drop_columns("Y") %>% + last_by("X") + expect_equal(as.data.frame(new_th1), as.data.frame(new_tb1)) + + new_tb2 <- data$df5 %>% + dplyr::group_by(X, Y) %>% + summarise(across(everything(), last)) %>% + arrange(X, Y) + new_th2 <- data$th5 %>% + last_by(c("X", "Y")) %>% + sort(c("X", "Y")) + expect_equal(as.data.frame(new_th2), as.data.frame(new_tb2)) + + close(data$client) +}) + +test_that("head_by behaves as expected", { + data <- setup() + + new_tb1 <- data$df5 %>% + dplyr::group_by(X) %>% + slice_head(n = 2) + new_th1 <- data$th5 %>% + head_by(2, "X") + expect_equal(as.data.frame(new_th1), as.data.frame(new_tb1)) + + new_tb2 <- data$df5 %>% + dplyr::group_by(X, Y) %>% + slice_head(n = 2) + new_th2 <- data$th5 %>% + head_by(2, c("X", "Y")) %>% + sort(c("X", "Y")) + expect_equal(as.data.frame(new_th2), as.data.frame(new_tb2)) + + close(data$client) +}) + +test_that("tail_by behaves as expected", { + data <- setup() + + new_tb1 <- data$df5 %>% + dplyr::group_by(X) %>% + slice_tail(n = 2) + new_th1 <- data$th5 %>% + tail_by(2, "X") + expect_equal(as.data.frame(new_th1), as.data.frame(new_tb1)) + + new_tb2 <- data$df5 %>% + dplyr::group_by(X, Y) %>% + slice_tail(n = 2) + new_th2 <- data$th5 %>% + tail_by(2, c("X", "Y")) %>% + sort(c("X", "Y")) + expect_equal(as.data.frame(new_th2), as.data.frame(new_tb2)) + + close(data$client) +}) + +test_that("min_by behaves as expected", { + data <- setup() + + new_tb1 <- data$df1 %>% + dplyr::group_by(int_col) %>% + summarise(across(everything(), min)) + new_th1 <- data$th1 %>% + min_by("int_col") + expect_equal(as.data.frame(new_th1), as.data.frame(new_tb1)) + + new_tb2 <- data$df2 %>% + dplyr::group_by(col2) %>% + summarise(across(everything(), min)) + new_th2 <- data$th2 %>% + min_by("col2") + expect_equal(as.data.frame(new_th2), as.data.frame(new_tb2)) + + new_tb3 <- data$df3 %>% + mutate(bool_col1 = X1 >= 0, bool_col2 = X2 >= 0) %>% + dplyr::group_by(bool_col1, bool_col2) %>% + summarise(across(everything(), min)) %>% + arrange(bool_col1, bool_col2) # need to sort because resulting row orders are not the same + new_th3 <- data$th3 %>% + update(c("bool_col1 = X1 >= 0", "bool_col2 = X2 >= 0")) %>% + min_by(c("bool_col1", "bool_col2")) %>% + sort(c("bool_col1", "bool_col2")) # need to sort because resulting row orders are not the same + expect_equal(as.data.frame(new_th3), as.data.frame(new_tb3)) + + new_tb4 <- data$df4 %>% + dplyr::group_by(bool_col) %>% + summarise(across(everything(), min)) %>% + arrange(bool_col) + new_th4 <- data$th4 %>% + min_by("bool_col") %>% + sort("bool_col") + expect_equal(as.data.frame(new_th4), as.data.frame(new_tb4)) + + close(data$client) +}) + +test_that("max_by behaves as expected", { + data <- setup() + + new_tb1 <- data$df1 %>% + dplyr::group_by(int_col) %>% + summarise(across(everything(), max)) + new_th1 <- data$th1 %>% + max_by("int_col") + expect_equal(as.data.frame(new_th1), as.data.frame(new_tb1)) + + new_tb2 <- data$df2 %>% + dplyr::group_by(col2) %>% + summarise(across(everything(), max)) + new_th2 <- data$th2 %>% + max_by("col2") + expect_equal(as.data.frame(new_th2), as.data.frame(new_tb2)) + + new_tb3 <- data$df3 %>% + mutate(bool_col1 = X1 >= 0, bool_col2 = X2 >= 0) %>% + dplyr::group_by(bool_col1, bool_col2) %>% + summarise(across(everything(), max)) %>% + arrange(bool_col1, bool_col2) # need to sort because resulting row orders are not the same + new_th3 <- data$th3 %>% + update(c("bool_col1 = X1 >= 0", "bool_col2 = X2 >= 0")) %>% + max_by(c("bool_col1", "bool_col2")) %>% + sort(c("bool_col1", "bool_col2")) # need to sort because resulting row orders are not the same + expect_equal(as.data.frame(new_th3), as.data.frame(new_tb3)) + + new_tb4 <- data$df4 %>% + dplyr::group_by(bool_col) %>% + summarise(across(everything(), max)) %>% + arrange(bool_col) + new_th4 <- data$th4 %>% + max_by("bool_col") %>% + sort("bool_col") + expect_equal(as.data.frame(new_th4), as.data.frame(new_tb4)) + + close(data$client) +}) + +test_that("sum_by behaves as expected", { + data <- setup() + + new_tb1 <- data$df5 %>% + dplyr::select(-Y) %>% + dplyr::group_by(X) %>% + summarise(across(everything(), sum)) + new_th1 <- data$th5 %>% + drop_columns("Y") %>% + sum_by("X") + expect_equal(as.data.frame(new_th1), as.data.frame(new_tb1)) + + new_tb2 <- data$df5 %>% + dplyr::group_by(X, Y) %>% + summarise(across(everything(), sum)) %>% + arrange(X, Y) + new_th2 <- data$th5 %>% + sum_by(c("X", "Y")) %>% + sort(c("X", "Y")) + expect_equal(as.data.frame(new_th2), as.data.frame(new_tb2)) + + close(data$client) +}) + +test_that("abs_sum_by behaves as expected", { + data <- setup() + + new_tb1 <- data$df5 %>% + dplyr::select(-Y) %>% + mutate(Number1 = abs(Number1), Number2 = abs(Number2)) %>% + dplyr::group_by(X) %>% + summarise(across(everything(), sum)) + new_th1 <- data$th5 %>% + drop_columns("Y") %>% + abs_sum_by("X") + expect_equal(as.data.frame(new_th1), as.data.frame(new_tb1)) + + new_tb2 <- data$df5 %>% + mutate(Number1 = abs(Number1), Number2 = abs(Number2)) %>% + dplyr::group_by(X, Y) %>% + summarise(across(everything(), sum)) %>% + arrange(X, Y) + new_th2 <- data$th5 %>% + abs_sum_by(c("X", "Y")) %>% + sort(c("X", "Y")) + expect_equal(as.data.frame(new_th2), as.data.frame(new_tb2)) + + close(data$client) +}) + +test_that("avg_by behaves as expected", { + data <- setup() + + new_tb1 <- data$df5 %>% + dplyr::select(-Y) %>% + dplyr::group_by(X) %>% + summarise(across(everything(), mean)) + new_th1 <- data$th5 %>% + drop_columns("Y") %>% + avg_by("X") + expect_equal(as.data.frame(new_th1), as.data.frame(new_tb1)) + + new_tb2 <- data$df5 %>% + dplyr::group_by(X, Y) %>% + summarise(across(everything(), mean)) %>% + arrange(X, Y) + new_th2 <- data$th5 %>% + avg_by(c("X", "Y")) %>% + sort(c("X", "Y")) + expect_equal(as.data.frame(new_th2), as.data.frame(new_tb2)) + + close(data$client) +}) + +test_that("w_avg_by behaves as expected", { + data <- setup() + + new_tb1 <- data$df5 %>% + dplyr::select(-Y) %>% + mutate(weights = Number1 * Number2) %>% + dplyr::group_by(X) %>% + summarise( + Number1 = weighted.mean(Number1, weights), + Number2 = weighted.mean(Number2, weights) + ) + new_th1 <- data$th5 %>% + drop_columns("Y") %>% + update("weights = Number1 * Number2") %>% + w_avg_by("weights", "X") + expect_equal(as.data.frame(new_th1), as.data.frame(new_tb1)) + + new_tb2 <- data$df5 %>% + mutate(weights = Number1 * Number2) %>% + dplyr::group_by(X, Y) %>% + summarise( + Number1 = weighted.mean(Number1, weights), + Number2 = weighted.mean(Number2, weights) + ) %>% + arrange(X, Y) + new_th2 <- data$th5 %>% + update("weights = Number1 * Number2") %>% + w_avg_by("weights", c("X", "Y")) %>% + sort(c("X", "Y")) + expect_equal(as.data.frame(new_th2), as.data.frame(new_tb2)) + + close(data$client) +}) + +test_that("median_by behaves as expected", { + data <- setup() + + new_tb1 <- data$df5 %>% + dplyr::select(-Y) %>% + dplyr::group_by(X) %>% + summarise(across(everything(), median)) + new_th1 <- data$th5 %>% + drop_columns("Y") %>% + median_by("X") + expect_equal(as.data.frame(new_th1), as.data.frame(new_tb1)) + + new_tb2 <- data$df5 %>% + dplyr::group_by(X, Y) %>% + summarise(across(everything(), median)) %>% + arrange(X, Y) + new_th2 <- data$th5 %>% + median_by(c("X", "Y")) %>% + sort(c("X", "Y")) + expect_equal(as.data.frame(new_th2), as.data.frame(new_tb2)) + + close(data$client) +}) + +test_that("var_by behaves as expected", { + data <- setup() + + new_tb1 <- data$df5 %>% + dplyr::select(-Y) %>% + dplyr::group_by(X) %>% + summarise(across(everything(), var)) + new_th1 <- data$th5 %>% + drop_columns("Y") %>% + var_by("X") + expect_equal(as.data.frame(new_th1), as.data.frame(new_tb1)) + + new_tb2 <- data$df5 %>% + dplyr::group_by(X, Y) %>% + summarise(across(everything(), var)) %>% + arrange(X, Y) + new_th2 <- data$th5 %>% + var_by(c("X", "Y")) %>% + sort(c("X", "Y")) + expect_equal(as.data.frame(new_th2), as.data.frame(new_tb2)) + + close(data$client) +}) + +test_that("std_by behaves as expected", { + data <- setup() + + new_tb1 <- data$df5 %>% + dplyr::select(-Y) %>% + dplyr::group_by(X) %>% + summarise(across(everything(), sd)) + new_th1 <- data$th5 %>% + drop_columns("Y") %>% + std_by("X") + expect_equal(as.data.frame(new_th1), as.data.frame(new_tb1)) + + new_tb2 <- data$df5 %>% + dplyr::group_by(X, Y) %>% + summarise(across(everything(), sd)) %>% + arrange(X, Y) + new_th2 <- data$th5 %>% + std_by(c("X", "Y")) %>% + sort(c("X", "Y")) + expect_equal(as.data.frame(new_th2), as.data.frame(new_tb2)) + + close(data$client) +}) + +test_that("percentile_by behaves as expected", { + data <- setup() + + # There is not a clean analog to `percentile_by` in dplyr, + # so we construct these data frames directly. + + new_df1 <- data.frame( + X = c("A", "B", "C"), + Number1 = c(50, -44, -70), + Number2 = c(-50, 76, 130) + ) + new_th1 <- data$th5 %>% + drop_columns("Y") %>% + percentile_by(0.4, "X") + expect_equal(as.data.frame(new_th1), new_df1) + + new_df2 <- data.frame( + X = c("A", "B", "A", "C", "B", "B", "C"), + Y = c("M", "N", "O", "N", "P", "O", "M"), + Number1 = c(50, -44, 49, 11, -66, 29, -70), + Number2 = c(-55, 76, 20, 130, 137, 73, 214) + ) + new_th2 <- data$th5 %>% + percentile_by(0.4, c("X", "Y")) + expect_equal(as.data.frame(new_th2), new_df2) + + + close(data$client) +}) + +test_that("count_by behaves as expected", { + data <- setup() + + new_tb1 <- data$df5 %>% + count(X) + new_th1 <- data$th5 %>% + count_by("n", "X") + expect_equal(as.data.frame(new_th1), as.data.frame(new_tb1)) + + new_tb2 <- data$df5 %>% + count(X, Y) + new_th2 <- data$th5 %>% + count_by("n", c("X", "Y")) %>% + sort(c("X", "Y")) + expect_equal(as.data.frame(new_th2), as.data.frame(new_tb2)) + + close(data$client) +}) + +test_that("sort behaves as expected", { + data <- setup() + + new_tb1 <- data$df1 %>% + arrange(dbl_col) + new_th1 <- data$th1 %>% + sort("dbl_col") + expect_equal(as.data.frame(new_th1), as.data.frame(new_tb1)) + + new_tb2 <- data$df2 %>% + arrange(desc(col3)) + new_th2 <- data$th2 %>% + sort("col3", descending = TRUE) + expect_equal(as.data.frame(new_th2), as.data.frame(new_tb2)) + + new_tb3 <- data$df3 %>% + arrange(X1, X2, X3, X4, X5) + new_th3 <- data$th3 %>% + sort(c("X1", "X2", "X3", "X4", "X5")) + expect_equal(as.data.frame(new_th3), as.data.frame(new_tb3)) + + new_tb4 <- data$df4 %>% + arrange(desc(bool_col), desc(int_col)) + new_th4 <- data$th4 %>% + sort(c("bool_col", "int_col"), descending = TRUE) + expect_equal(as.data.frame(new_th4), as.data.frame(new_tb4)) + + new_tb5 <- data$df5 %>% + arrange(X, desc(Y), Number1) + new_th5 <- data$th5 %>% + sort(c("X", "Y", "Number1"), descending = c(FALSE, TRUE, FALSE)) + expect_equal(as.data.frame(new_th5), as.data.frame(new_tb5)) + + close(data$client) +}) + +test_that("cross_join behaves as expected", { + data <- setup() + + new_th1 <- data$th5 %>% + cross_join(data$th6, + columns_to_match = character(), + columns_to_add = c("X_y = X", "Y_y = Y", "Number1_y = Number1", "Number2_y = Number2") + ) + new_tb1 <- data$df5 %>% + dplyr::cross_join(data$df6) %>% + rename( + X = X.x, Y = Y.x, Number1 = Number1.x, Number2 = Number2.x, + X_y = X.y, Y_y = Y.y, Number1_y = Number1.y, Number2_y = Number2.y + ) + expect_equal(as.data.frame(new_th1), as.data.frame(new_tb1)) + + close(data$client) +}) + +test_that("natural_join behaves as expected", { + data <- setup() + + new_th2 <- data$th6 %>% + drop_columns("Y") %>% + avg_by("X") + new_th1 <- data$th5 %>% + natural_join(new_th2, + columns_to_match = "X", + columns_to_add = c("Number3 = Number1", "Number4 = Number2") + ) + + new_tb2 <- data$df6 %>% + dplyr::select(-Y) %>% + dplyr::group_by(X) %>% + summarise(across(everything(), mean)) + new_tb1 <- data$df5 %>% + left_join(new_tb2, by = "X") %>% + rename( + Number1 = Number1.x, Number2 = Number2.x, + Number3 = Number1.y, Number4 = Number2.y + ) + expect_equal(as.data.frame(new_th1), as.data.frame(new_tb1)) + + close(data$client) +}) + +test_that("exact_join behaves as expected", { + data <- setup() + + new_th2 <- data$th6 %>% + drop_columns("Y") %>% + avg_by("X") + new_th1 <- data$th5 %>% + exact_join(new_th2, + columns_to_match = "X", + columns_to_add = c("Number3 = Number1", "Number4 = Number2") + ) + + new_tb2 <- data$df6 %>% + dplyr::select(-Y) %>% + dplyr::group_by(X) %>% + summarise(across(everything(), mean)) + new_tb1 <- data$df5 %>% + left_join(new_tb2, by = "X") %>% + rename(Number1 = Number1.x, Number2 = Number2.x, Number3 = Number1.y, Number4 = Number2.y) + expect_equal(as.data.frame(new_th1), as.data.frame(new_tb1)) + + close(data$client) +}) diff --git a/R/rdeephaven/man/Client.Rd b/R/rdeephaven/man/Client.Rd deleted file mode 100644 index 33b67a6a68a..00000000000 --- a/R/rdeephaven/man/Client.Rd +++ /dev/null @@ -1,133 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/client_wrapper.R -\docType{class} -\name{Client} -\alias{Client} -\title{The Deephaven Client} -\description{ -The Deephaven Client class is responsible for establishing and maintaining -a connection to a running Deephaven server and facilitating basic server requests. -} -\examples{ - -# connect to the Deephaven server running on "localhost:10000" using anonymous 'default' authentication -client_options <- ClientOptions$new() -client <- Client$new(target="localhost:10000", client_options=client_options) - -# open a table that already exists on the server -new_table_handle1 <- client$open_table("table_on_the_server") - -# create a new dataframe, import onto the server, and retrieve a reference -new_data_frame <- data.frame(matrix(rnorm(10 * 1000), nrow = 10)) -new_table_handle2 <- client$import_table(new_data_frame) - -# run a python script on the server (default client options specify a Python console) -client$run_script("print([i for i in range(10)])") -} -\section{Methods}{ -\subsection{Public methods}{ -\itemize{ -\item \href{#method-Client-new}{\code{Client$new()}} -\item \href{#method-Client-open_table}{\code{Client$open_table()}} -\item \href{#method-Client-import_table}{\code{Client$import_table()}} -\item \href{#method-Client-run_script}{\code{Client$run_script()}} -\item \href{#method-Client-clone}{\code{Client$clone()}} -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-Client-new}{}}} -\subsection{Method \code{new()}}{ -Connect to a running Deephaven server. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{Client$new(target, client_options)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{target}}{The address of the Deephaven server.} - -\item{\code{client_options}}{ClientOptions instance with the parameters needed to connect to the server. -See ?ClientOptions for more information.} -} -\if{html}{\out{
}} -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-Client-open_table}{}}} -\subsection{Method \code{open_table()}}{ -Opens a table named 'name' from the server if it exists. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{Client$open_table(name)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{name}}{Name of the table to open from the server, passed as a string.} -} -\if{html}{\out{
}} -} -\subsection{Returns}{ -TableHandle reference to the requested table. -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-Client-import_table}{}}} -\subsection{Method \code{import_table()}}{ -Imports a new table to the Deephaven server. Note that this new table is not automatically bound to -a variable name on the server. See `?TableHandle` for more information. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{Client$import_table(table_object)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{table_object}}{An R Data Frame, a dplyr Tibble, an Arrow Table, or an Arrow RecordBatchReader -containing the data to import to the server.} -} -\if{html}{\out{
}} -} -\subsection{Returns}{ -TableHandle reference to the new table. -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-Client-run_script}{}}} -\subsection{Method \code{run_script()}}{ -Runs a script on the server. The script must be in the language that the server console was started with. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{Client$run_script(script)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{script}}{Code to be executed on the server, passed as a string.} -} -\if{html}{\out{
}} -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-Client-clone}{}}} -\subsection{Method \code{clone()}}{ -The objects of this class are cloneable with this method. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{Client$clone(deep = FALSE)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{deep}}{Whether to make a deep clone.} -} -\if{html}{\out{
}} -} -} -} diff --git a/R/rdeephaven/man/ClientOptions.Rd b/R/rdeephaven/man/ClientOptions.Rd deleted file mode 100644 index 06d85d6cefc..00000000000 --- a/R/rdeephaven/man/ClientOptions.Rd +++ /dev/null @@ -1,208 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/client_options_wrapper.R -\docType{class} -\name{ClientOptions} -\alias{ClientOptions} -\title{Deephaven ClientOptions} -\description{ -Client options provide a simple interface to the Deephaven server's authentication protocols. -This makes it easy to connect to a Deephaven server with any flavor of authentication, and shields the API from -any future changes to the underlying implementation. - -Currently, three different kinds of authentication that a Deephaven server might be using are suported: - -- "default": Default (or anonymous) authentication does not require any username or password. If - running the Deephaven server locally, this is probably the kind of authentication needed. - -- "basic": Basic authentication requires a standard username and password pair. - -- "custom": Custom authentication requires general key-value pairs. - -In addition to setting the authentication parameters when connecting to a client, a console can be -started in one of our supported server languages. Python and Groovy are currently supported, and the -user must ensure that the server being connected to was started with support for the desired console language. -} -\examples{ - -# connect to a Deephaven server with a Python console running on "localhost:10000" using anonymous 'default' authentication -client_options <- ClientOptions$new() -client <- Client$new(target="localhost:10000", client_options=client_options) - -# connect to a secure Deephaven server with a Groovy console using username/password authentication -client_options <- ClientOptions$new() -client_options$set_basic_authentication(username="user", password="p@ssw0rd123") -client_options$set_session_type("groovy") -client <- Client$new(target="url/to/secure/server", client_options=client_options) -} -\section{Methods}{ -\subsection{Public methods}{ -\itemize{ -\item \href{#method-ClientOptions-new}{\code{ClientOptions$new()}} -\item \href{#method-ClientOptions-set_default_authentication}{\code{ClientOptions$set_default_authentication()}} -\item \href{#method-ClientOptions-set_basic_authentication}{\code{ClientOptions$set_basic_authentication()}} -\item \href{#method-ClientOptions-set_custom_authentication}{\code{ClientOptions$set_custom_authentication()}} -\item \href{#method-ClientOptions-set_session_type}{\code{ClientOptions$set_session_type()}} -\item \href{#method-ClientOptions-use_tls}{\code{ClientOptions$use_tls()}} -\item \href{#method-ClientOptions-add_int_option}{\code{ClientOptions$add_int_option()}} -\item \href{#method-ClientOptions-add_string_option}{\code{ClientOptions$add_string_option()}} -\item \href{#method-ClientOptions-add_extra_header}{\code{ClientOptions$add_extra_header()}} -\item \href{#method-ClientOptions-clone}{\code{ClientOptions$clone()}} -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-ClientOptions-new}{}}} -\subsection{Method \code{new()}}{ -Create a ClientOptions instance. This will default to using default (anonymous) authentication and a Python console. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{ClientOptions$new()}\if{html}{\out{
}} -} - -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-ClientOptions-set_default_authentication}{}}} -\subsection{Method \code{set_default_authentication()}}{ -Use default (anonymous) authentication. If running a Deephaven server locally, this is likely the kind of authentication needed. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{ClientOptions$set_default_authentication()}\if{html}{\out{
}} -} - -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-ClientOptions-set_basic_authentication}{}}} -\subsection{Method \code{set_basic_authentication()}}{ -Use basic (username/password based) authentication. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{ClientOptions$set_basic_authentication(username, password)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{username}}{Username of the account to use for authentication, supplied as a string.} - -\item{\code{password}}{Password of the account, supplied as a string.} -} -\if{html}{\out{
}} -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-ClientOptions-set_custom_authentication}{}}} -\subsection{Method \code{set_custom_authentication()}}{ -Use custom (general key/value based) authentication. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{ClientOptions$set_custom_authentication(auth_key, auth_value)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{auth_key}}{Key to use for authentication, supplied as a string.} - -\item{\code{auth_value}}{Value to use for authentication, supplied as a string.} -} -\if{html}{\out{
}} -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-ClientOptions-set_session_type}{}}} -\subsection{Method \code{set_session_type()}}{ -Set the session type of the console (e.g., "python", "groovy", etc.). The session type must be supported on the server. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{ClientOptions$set_session_type(session_type)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{session_type}}{Desired language of the console. "python", "groovy", etc.} -} -\if{html}{\out{
}} -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-ClientOptions-use_tls}{}}} -\subsection{Method \code{use_tls()}}{ -Use the TLS protocol in authentication and subsequent communication. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{ClientOptions$use_tls(root_certs = "")}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{root_certs}}{Optional PEM-encoded certificate root for server connections. Defaults to system defaults.} -} -\if{html}{\out{
}} -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-ClientOptions-add_int_option}{}}} -\subsection{Method \code{add_int_option()}}{ -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{ClientOptions$add_int_option(opt, val)}\if{html}{\out{
}} -} - -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-ClientOptions-add_string_option}{}}} -\subsection{Method \code{add_string_option()}}{ -Adds a string-valued option for the configuration of the underlying gRPC channels. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{ClientOptions$add_string_option(opt, val)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{opt}}{The option key.} - -\item{\code{val}}{The option valiue.} -} -\if{html}{\out{
}} -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-ClientOptions-add_extra_header}{}}} -\subsection{Method \code{add_extra_header()}}{ -Adds an extra header with a constant name and value to be sent with every outgoing server request. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{ClientOptions$add_extra_header(header_name, header_value)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{header_name}}{The header name} - -\item{\code{header_value}}{The header value} -} -\if{html}{\out{
}} -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-ClientOptions-clone}{}}} -\subsection{Method \code{clone()}}{ -The objects of this class are cloneable with this method. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{ClientOptions$clone(deep = FALSE)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{deep}}{Whether to make a deep clone.} -} -\if{html}{\out{
}} -} -} -} diff --git a/R/rdeephaven/man/TableHandle.Rd b/R/rdeephaven/man/TableHandle.Rd deleted file mode 100644 index c2bc107af0c..00000000000 --- a/R/rdeephaven/man/TableHandle.Rd +++ /dev/null @@ -1,167 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/table_handle_wrapper.R -\docType{class} -\name{TableHandle} -\alias{TableHandle} -\title{Deephaven TableHandles} -\description{ -Deephaven TableHandles are references to tables living on a Deephaven server. They provide an -interface for interacting with tables on the server. -} -\examples{ - -# connect to the Deephaven server running on "localhost:10000" using anonymous 'default' authentication -client_options <- ClientOptions$new() -client <- Client$new(target="localhost:10000", client_options=client_options) - -# open a table that already exists on the server -new_table_handle1 <- client$open_table("table_on_the_server") - -# convert the Deephaven table to an R data frame -new_data_frame <- new_table_handle1$to_data_frame() - -# modify new data frame in R -new_data_frame$New_Int_Col <- c(1, 2, 3, 4, 5) -new_data_frame$New_String_Col <- c("I", "am", "a", "string", "column") - -# push new data frame to the server and name it "new_table" -new_table_handle2 <- client$import_table(new_data_frame) -new_table_handle2$bind_to_variable("new_table") -} -\section{Methods}{ -\subsection{Public methods}{ -\itemize{ -\item \href{#method-TableHandle-new}{\code{TableHandle$new()}} -\item \href{#method-TableHandle-is_static}{\code{TableHandle$is_static()}} -\item \href{#method-TableHandle-nrow}{\code{TableHandle$nrow()}} -\item \href{#method-TableHandle-bind_to_variable}{\code{TableHandle$bind_to_variable()}} -\item \href{#method-TableHandle-to_arrow_record_batch_stream_reader}{\code{TableHandle$to_arrow_record_batch_stream_reader()}} -\item \href{#method-TableHandle-to_arrow_table}{\code{TableHandle$to_arrow_table()}} -\item \href{#method-TableHandle-to_tibble}{\code{TableHandle$to_tibble()}} -\item \href{#method-TableHandle-to_data_frame}{\code{TableHandle$to_data_frame()}} -\item \href{#method-TableHandle-clone}{\code{TableHandle$clone()}} -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-TableHandle-new}{}}} -\subsection{Method \code{new()}}{ -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{TableHandle$new(table_handle)}\if{html}{\out{
}} -} - -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-TableHandle-is_static}{}}} -\subsection{Method \code{is_static()}}{ -Whether the table referenced by this TableHandle is static or not. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{TableHandle$is_static()}\if{html}{\out{
}} -} - -\subsection{Returns}{ -TRUE if the table is static, or FALSE if the table is ticking. -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-TableHandle-nrow}{}}} -\subsection{Method \code{nrow()}}{ -Number of rows in the table referenced by this TableHandle, currently only implemented for static tables. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{TableHandle$nrow()}\if{html}{\out{
}} -} - -\subsection{Returns}{ -The number of rows in the table. -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-TableHandle-bind_to_variable}{}}} -\subsection{Method \code{bind_to_variable()}}{ -Binds the table referenced by this TableHandle to a variable on the server, -enabling it to be accessed by that name from any Deephaven API. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{TableHandle$bind_to_variable(name)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{name}}{Name for this table on the server.} -} -\if{html}{\out{
}} -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-TableHandle-to_arrow_record_batch_stream_reader}{}}} -\subsection{Method \code{to_arrow_record_batch_stream_reader()}}{ -Imports the table referenced by this TableHandle into an Arrow RecordBatchStreamReader. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{TableHandle$to_arrow_record_batch_stream_reader()}\if{html}{\out{
}} -} - -\subsection{Returns}{ -A RecordBatchStreamReader containing the data from the table referenced by this TableHandle. -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-TableHandle-to_arrow_table}{}}} -\subsection{Method \code{to_arrow_table()}}{ -Imports the table referenced by this TableHandle into an Arrow Table. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{TableHandle$to_arrow_table()}\if{html}{\out{
}} -} - -\subsection{Returns}{ -A Table containing the data from the table referenced by this TableHandle. -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-TableHandle-to_tibble}{}}} -\subsection{Method \code{to_tibble()}}{ -Imports the table referenced by this TableHandle into a dplyr Tibble. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{TableHandle$to_tibble()}\if{html}{\out{
}} -} - -\subsection{Returns}{ -A Tibble containing the data from the table referenced by this TableHandle. -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-TableHandle-to_data_frame}{}}} -\subsection{Method \code{to_data_frame()}}{ -Imports the table referenced by this TableHandle into an R Data Frame. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{TableHandle$to_data_frame()}\if{html}{\out{
}} -} - -\subsection{Returns}{ -A Data Frame containing the data from the table referenced by this TableHandle. -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-TableHandle-clone}{}}} -\subsection{Method \code{clone()}}{ -The objects of this class are cloneable with this method. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{TableHandle$clone(deep = FALSE)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{deep}}{Whether to make a deep clone.} -} -\if{html}{\out{
}} -} -} -} diff --git a/R/rdeephaven/src/client.cpp b/R/rdeephaven/src/client.cpp index c86440bdf46..bd8b386ce3e 100644 --- a/R/rdeephaven/src/client.cpp +++ b/R/rdeephaven/src/client.cpp @@ -1,3 +1,9 @@ +/* + * Most of the methods here wrap methods defined in client.h and client_options.h to expose them to R via Rcpp. + * Thus, the only methods that are documented here are the ones that are unique to these classes, and not already + * documented in one of the header files mentioned above. + */ + #include #include #include @@ -5,6 +11,7 @@ #include #include "deephaven/client/client.h" +#include "deephaven/client/columns.h" #include "deephaven/client/flight.h" #include "deephaven/client/utility/arrow_util.h" @@ -13,38 +20,262 @@ #include +using deephaven::dhcore::utility::Base64Encode; + // forward declaration of classes +class AggregateWrapper; +class TableHandleWrapper; class ClientOptionsWrapper; class ClientWrapper; +// forward declaration of conversion functions +std::vector convertRcppListToVectorOfTypeAggregate(Rcpp::List rcpp_list); +std::vector convertRcppListToVectorOfTypeTableHandle(Rcpp::List rcpp_list); + + // ######################### DH WRAPPERS ######################### +class AggregateWrapper { +public: + AggregateWrapper(); + AggregateWrapper(deephaven::client::Aggregate aggregate) : + internal_aggregation(std::move(aggregate)) {} +private: + deephaven::client::Aggregate internal_aggregation; + friend TableHandleWrapper; + friend std::vector convertRcppListToVectorOfTypeAggregate(Rcpp::List rcpp_list); +}; + +std::vector convertRcppListToVectorOfTypeAggregate(Rcpp::List rcpp_list) { + std::vector converted_list; + converted_list.reserve(rcpp_list.size()); + + for(int i = 0; i < rcpp_list.size(); i++) { + Rcpp::Environment rcpp_list_element = rcpp_list[i]; + Rcpp::XPtr xptr(rcpp_list_element.get(".pointer")); + deephaven::client::Aggregate internal_aggregation = xptr->internal_aggregation; + converted_list.push_back(internal_aggregation); + } + + return converted_list; +} + +AggregateWrapper* INTERNAL_agg_min(std::vector columnSpecs) { + return new AggregateWrapper(deephaven::client::Aggregate::Min(columnSpecs)); +} + +AggregateWrapper* INTERNAL_agg_max(std::vector columnSpecs) { + return new AggregateWrapper(deephaven::client::Aggregate::Max(columnSpecs)); +} + +AggregateWrapper* INTERNAL_agg_first(std::vector columnSpecs) { + return new AggregateWrapper(deephaven::client::Aggregate::First(columnSpecs)); +} + +AggregateWrapper* INTERNAL_agg_last(std::vector columnSpecs) { + return new AggregateWrapper(deephaven::client::Aggregate::Last(columnSpecs)); +} + +AggregateWrapper* INTERNAL_agg_sum(std::vector columnSpecs) { + return new AggregateWrapper(deephaven::client::Aggregate::Sum(columnSpecs)); +} + +AggregateWrapper* INTERNAL_agg_absSum(std::vector columnSpecs) { + return new AggregateWrapper(deephaven::client::Aggregate::AbsSum(columnSpecs)); +} + +AggregateWrapper* INTERNAL_agg_avg(std::vector columnSpecs) { + return new AggregateWrapper(deephaven::client::Aggregate::Avg(columnSpecs)); +} + +AggregateWrapper* INTERNAL_agg_wAvg(std::string weightColumn, std::vector columnSpecs) { + return new AggregateWrapper(deephaven::client::Aggregate::WAvg(weightColumn, columnSpecs)); +} + +AggregateWrapper* INTERNAL_agg_median(std::vector columnSpecs) { + return new AggregateWrapper(deephaven::client::Aggregate::Med(columnSpecs)); +} + +AggregateWrapper* INTERNAL_agg_var(std::vector columnSpecs) { + return new AggregateWrapper(deephaven::client::Aggregate::Var(columnSpecs)); +} + +AggregateWrapper* INTERNAL_agg_std(std::vector columnSpecs) { + return new AggregateWrapper(deephaven::client::Aggregate::Std(columnSpecs)); +} + +// TODO: capitalize the pct method when a fix is merged +AggregateWrapper* INTERNAL_agg_percentile(double percentile, std::vector columnSpecs) { + return new AggregateWrapper(deephaven::client::Aggregate::pct(percentile, false, columnSpecs)); +} + +AggregateWrapper* INTERNAL_agg_count(std::string columnSpec) { + return new AggregateWrapper(deephaven::client::Aggregate::Count(columnSpec)); +} + + class TableHandleWrapper { public: TableHandleWrapper(deephaven::client::TableHandle ref_table) : - internal_tbl_hdl(std::move(ref_table)) {}; + internal_tbl_hdl(std::move(ref_table)) {}; - // TODO: DEEPHAVEN QUERY METHODS WILL GO HERE + TableHandleWrapper* Select(std::vector columnSpecs) { + return new TableHandleWrapper(internal_tbl_hdl.Select(columnSpecs)); + }; + + TableHandleWrapper* View(std::vector columnSpecs) { + return new TableHandleWrapper(internal_tbl_hdl.View(columnSpecs)); + }; + + TableHandleWrapper* Update(std::vector columnSpecs) { + return new TableHandleWrapper(internal_tbl_hdl.Update(columnSpecs)); + }; + + TableHandleWrapper* UpdateView(std::vector columnSpecs) { + return new TableHandleWrapper(internal_tbl_hdl.UpdateView(columnSpecs)); + }; + + TableHandleWrapper* DropColumns(std::vector columnSpecs) { + return new TableHandleWrapper(internal_tbl_hdl.DropColumns(columnSpecs)); + }; + + TableHandleWrapper* Where(std::string condition) { + return new TableHandleWrapper(internal_tbl_hdl.Where(condition)); + }; + + TableHandleWrapper* GroupBy(std::vector columnSpecs) { + return new TableHandleWrapper(internal_tbl_hdl.By(columnSpecs)); + }; + + TableHandleWrapper* Ungroup(std::vector groupByColumns) { + return new TableHandleWrapper(internal_tbl_hdl.Ungroup(false, groupByColumns)); + }; + + TableHandleWrapper* AggBy(Rcpp::List aggregations, std::vector groupByColumns) { + std::vector converted_aggregations = convertRcppListToVectorOfTypeAggregate(aggregations); + return new TableHandleWrapper(internal_tbl_hdl.By(deephaven::client::AggregateCombo::Create(converted_aggregations), groupByColumns)); + } + + TableHandleWrapper* FirstBy(std::vector columnSpecs) { + return new TableHandleWrapper(internal_tbl_hdl.FirstBy(columnSpecs)); + }; + + TableHandleWrapper* LastBy(std::vector columnSpecs) { + return new TableHandleWrapper(internal_tbl_hdl.LastBy(columnSpecs)); + }; + + TableHandleWrapper* HeadBy(int64_t n, std::vector columnSpecs) { + return new TableHandleWrapper(internal_tbl_hdl.HeadBy(n, columnSpecs)); + }; + + TableHandleWrapper* TailBy(int64_t n, std::vector columnSpecs) { + return new TableHandleWrapper(internal_tbl_hdl.TailBy(n, columnSpecs)); + }; + + TableHandleWrapper* MinBy(std::vector columnSpecs) { + return new TableHandleWrapper(internal_tbl_hdl.MinBy(columnSpecs)); + }; + + TableHandleWrapper* MaxBy(std::vector columnSpecs) { + return new TableHandleWrapper(internal_tbl_hdl.MaxBy(columnSpecs)); + }; + + TableHandleWrapper* SumBy(std::vector columnSpecs) { + return new TableHandleWrapper(internal_tbl_hdl.SumBy(columnSpecs)); + }; + + TableHandleWrapper* AbsSumBy(std::vector columnSpecs) { + return new TableHandleWrapper(internal_tbl_hdl.AbsSumBy(columnSpecs)); + }; + + TableHandleWrapper* AvgBy(std::vector columnSpecs) { + return new TableHandleWrapper(internal_tbl_hdl.AvgBy(columnSpecs)); + }; + + TableHandleWrapper* WAvgBy(std::string weightColumn, std::vector columnSpecs) { + return new TableHandleWrapper(internal_tbl_hdl.WAvgBy(weightColumn, columnSpecs)); + }; + + TableHandleWrapper* MedianBy(std::vector columnSpecs) { + return new TableHandleWrapper(internal_tbl_hdl.MedianBy(columnSpecs)); + }; + + TableHandleWrapper* VarBy(std::vector columnSpecs) { + return new TableHandleWrapper(internal_tbl_hdl.VarBy(columnSpecs)); + }; + + TableHandleWrapper* StdBy(std::vector columnSpecs) { + return new TableHandleWrapper(internal_tbl_hdl.StdBy(columnSpecs)); + }; + + TableHandleWrapper* PercentileBy(double percentile, std::vector columnSpecs) { + return new TableHandleWrapper(internal_tbl_hdl.PercentileBy(percentile, columnSpecs)); + }; + + TableHandleWrapper* CountBy(std::string countByColumn, std::vector columnSpecs) { + return new TableHandleWrapper(internal_tbl_hdl.CountBy(countByColumn, columnSpecs)); + }; + + TableHandleWrapper* CrossJoin(const TableHandleWrapper &rightSide, std::vector columnsToMatch, std::vector columnsToAdd) { + return new TableHandleWrapper(internal_tbl_hdl.CrossJoin(rightSide.internal_tbl_hdl, columnsToMatch, columnsToAdd)); + }; + + TableHandleWrapper* NaturalJoin(const TableHandleWrapper &rightSide, std::vector columnsToMatch, std::vector columnsToAdd) { + return new TableHandleWrapper(internal_tbl_hdl.NaturalJoin(rightSide.internal_tbl_hdl, columnsToMatch, columnsToAdd)); + }; + + TableHandleWrapper* ExactJoin(const TableHandleWrapper &rightSide, std::vector columnsToMatch, std::vector columnsToAdd) { + return new TableHandleWrapper(internal_tbl_hdl.ExactJoin(rightSide.internal_tbl_hdl, columnsToMatch, columnsToAdd)); + }; + + TableHandleWrapper* Head(int64_t n) { + return new TableHandleWrapper(internal_tbl_hdl.Head(n)); + }; + + TableHandleWrapper* Tail(int64_t n) { + return new TableHandleWrapper(internal_tbl_hdl.Tail(n)); + }; + + TableHandleWrapper* Merge(Rcpp::List sources) { + std::vector converted_sources = convertRcppListToVectorOfTypeTableHandle(sources); + return new TableHandleWrapper(internal_tbl_hdl.Merge(converted_sources)); + }; + + TableHandleWrapper* Sort(std::vector columnSpecs, std::vector descending, std::vector absSort) { + std::vector sort_pairs; + sort_pairs.reserve(columnSpecs.size()); + + if (descending.size() == 1) { + descending = std::vector(columnSpecs.size(), descending[0]); + } + + if (absSort.size() == 1) { + absSort = std::vector(columnSpecs.size(), absSort[0]); + } + + for(int i = 0; i < columnSpecs.size(); i++) { + if (!descending[i]) { + sort_pairs.push_back(deephaven::client::SortPair::Ascending(columnSpecs[i], absSort[i])); + } else { + sort_pairs.push_back(deephaven::client::SortPair::Descending(columnSpecs[i], absSort[i])); + } + } + + return new TableHandleWrapper(internal_tbl_hdl.Sort(sort_pairs)); + }; - /** - * Whether the table was static at the time internal_tbl_hdl was created. - */ bool IsStatic() { return internal_tbl_hdl.IsStatic(); } - /** - * Number of rows in the table at the time internal_tbl_hdl was created. - */ int64_t NumRows() { return internal_tbl_hdl.NumRows(); } - /** - * Binds the table referenced by this table handle to a variable on the server called tableName. - * Without this call, new tables are not accessible from the client. - * @param tableName Name for the new table on the server. - */ + int64_t NumCols() { + return internal_tbl_hdl.Schema()->NumCols(); + } + void BindToVariable(std::string tableName) { internal_tbl_hdl.BindToVariable(tableName); } @@ -70,26 +301,41 @@ class TableHandleWrapper { private: deephaven::client::TableHandle internal_tbl_hdl; + friend std::vector convertRcppListToVectorOfTypeTableHandle(Rcpp::List rcpp_list); }; +std::vector convertRcppListToVectorOfTypeTableHandle(Rcpp::List rcpp_list) { + std::vector converted_list; + converted_list.reserve(rcpp_list.size()); + + for(int i = 0; i < rcpp_list.size(); i++) { + Rcpp::Environment rcpp_list_element = rcpp_list[i]; + Rcpp::XPtr xptr(rcpp_list_element.get(".pointer")); + deephaven::client::TableHandle internal_tbl_hdl = xptr->internal_tbl_hdl; + converted_list.push_back(internal_tbl_hdl); + } + + return converted_list; +} + -// TODO: Document this guy class ClientOptionsWrapper { public: ClientOptionsWrapper() : - internal_options(std::make_shared()) {} + internal_options(std::make_shared()) {} void SetDefaultAuthentication() { internal_options->SetDefaultAuthentication(); } - void SetBasicAuthentication(const std::string &username, const std::string &password) { - internal_options->SetBasicAuthentication(username, password); + void SetBasicAuthentication(const std::string &authentication_token) { + const std::string authentication_token_base64 = Base64Encode(authentication_token); + internal_options->SetCustomAuthentication("Basic", authentication_token_base64); } - void SetCustomAuthentication(const std::string &authenticationKey, const std::string &authenticationValue) { - internal_options->SetCustomAuthentication(authenticationKey, authenticationValue); + void SetCustomAuthentication(const std::string &authentication_type, const std::string &authentication_token) { + internal_options->SetCustomAuthentication(authentication_type, authentication_token); } void SetSessionType(const std::string &sessionType) { @@ -122,26 +368,24 @@ class ClientOptionsWrapper { }; - class ClientWrapper { public: ClientWrapper(std::string target, const ClientOptionsWrapper &client_options) : - internal_client(deephaven::client::Client::Connect(target, *client_options.internal_options)) {} + internal_client(deephaven::client::Client::Connect(target, *client_options.internal_options)) {} - /** - * Fetches a reference to a table named tableName on the server if it exists. - * @param tableName Name of the table to search for. - * @return TableHandle reference to the fetched table. - */ TableHandleWrapper* OpenTable(std::string tableName) { return new TableHandleWrapper(internal_tbl_hdl_mngr.FetchTable(tableName)); } - /** - * Runs a script on the server in the console language if a console was created. - * @param code String of the code to be executed on the server. - */ + TableHandleWrapper* EmptyTable(int64_t size) { + return new TableHandleWrapper(internal_tbl_hdl_mngr.EmptyTable(size)); + } + + TableHandleWrapper* TimeTable(int64_t startTimeNanos, int64_t periodNanos) { + return new TableHandleWrapper(internal_tbl_hdl_mngr.TimeTable(startTimeNanos, periodNanos)); + }; + void RunScript(std::string code) { internal_tbl_hdl_mngr.RunScript(code); } @@ -191,7 +435,7 @@ class ClientWrapper { auto ticket = internal_tbl_hdl_mngr.NewTicket(); auto fd = deephaven::client::utility::ConvertTicketToFlightDescriptor(ticket); - + deephaven::client::utility::OkOrThrow(DEEPHAVEN_EXPR_MSG(wrapper.FlightClient()->DoPut(options, fd, schema, &fsw, &fmr))); while(true) { std::shared_ptr this_batch; @@ -208,8 +452,12 @@ class ClientWrapper { return new TableHandleWrapper(new_tbl_hdl); } + void Close() { + internal_client.Close(); + } + private: - const deephaven::client::Client internal_client; + deephaven::client::Client internal_client; const deephaven::client::TableHandleManager internal_tbl_hdl_mngr = internal_client.GetManager(); }; @@ -220,17 +468,73 @@ using namespace Rcpp; RCPP_EXPOSED_CLASS(ClientOptionsWrapper) RCPP_EXPOSED_CLASS(TableHandleWrapper) +RCPP_EXPOSED_CLASS(AggregateWrapper) +RCPP_EXPOSED_CLASS(SortPairWrapper) RCPP_EXPOSED_CLASS(ArrowArrayStream) RCPP_MODULE(DeephavenInternalModule) { + class_("INTERNAL_Aggregate") + ; + function("INTERNAL_agg_first", &INTERNAL_agg_first); + function("INTERNAL_agg_last", &INTERNAL_agg_last); + function("INTERNAL_agg_min", &INTERNAL_agg_min); + function("INTERNAL_agg_max", &INTERNAL_agg_max); + function("INTERNAL_agg_sum", &INTERNAL_agg_sum); + function("INTERNAL_agg_abs_sum", &INTERNAL_agg_absSum); + function("INTERNAL_agg_avg", &INTERNAL_agg_avg); + function("INTERNAL_agg_w_avg", &INTERNAL_agg_wAvg); + function("INTERNAL_agg_median", &INTERNAL_agg_median); + function("INTERNAL_agg_var", &INTERNAL_agg_var); + function("INTERNAL_agg_std", &INTERNAL_agg_std); + function("INTERNAL_agg_percentile", &INTERNAL_agg_percentile); + function("INTERNAL_agg_count", &INTERNAL_agg_count); + class_("INTERNAL_TableHandle") + .method("select", &TableHandleWrapper::Select) + .method("view", &TableHandleWrapper::View) + .method("update", &TableHandleWrapper::Update) + .method("update_view", &TableHandleWrapper::UpdateView) + .method("drop_columns", &TableHandleWrapper::DropColumns) + .method("where", &TableHandleWrapper::Where) + + .method("group_by", &TableHandleWrapper::GroupBy) + .method("ungroup", &TableHandleWrapper::Ungroup) + + .method("agg_by", &TableHandleWrapper::AggBy) + .method("first_by", &TableHandleWrapper::FirstBy) + .method("last_by", &TableHandleWrapper::LastBy) + .method("head_by", &TableHandleWrapper::HeadBy) + .method("tail_by", &TableHandleWrapper::TailBy) + .method("min_by", &TableHandleWrapper::MinBy) + .method("max_by", &TableHandleWrapper::MaxBy) + .method("sum_by", &TableHandleWrapper::SumBy) + .method("abs_sum_by", &TableHandleWrapper::AbsSumBy) + .method("avg_by", &TableHandleWrapper::AvgBy) + .method("w_avg_by", &TableHandleWrapper::WAvgBy) + .method("median_by", &TableHandleWrapper::MedianBy) + .method("var_by", &TableHandleWrapper::VarBy) + .method("std_by", &TableHandleWrapper::StdBy) + .method("percentile_by", &TableHandleWrapper::PercentileBy) + .method("count_by", &TableHandleWrapper::CountBy) + + .method("cross_join", &TableHandleWrapper::CrossJoin) + .method("natural_join", &TableHandleWrapper::NaturalJoin) + .method("exact_join", &TableHandleWrapper::ExactJoin) + + .method("head", &TableHandleWrapper::Head) + .method("tail", &TableHandleWrapper::Tail) + .method("merge", &TableHandleWrapper::Merge) + .method("sort", &TableHandleWrapper::Sort) + .method("is_static", &TableHandleWrapper::IsStatic) .method("num_rows", &TableHandleWrapper::NumRows) + .method("num_cols", &TableHandleWrapper::NumCols) .method("bind_to_variable", &TableHandleWrapper::BindToVariable) .method("get_arrow_array_stream_ptr", &TableHandleWrapper::GetArrowArrayStreamPtr) ; + class_("INTERNAL_ClientOptions") .constructor() .method("set_default_authentication", &ClientOptionsWrapper::SetDefaultAuthentication) @@ -244,12 +548,17 @@ RCPP_MODULE(DeephavenInternalModule) { .method("add_extra_header", &ClientOptionsWrapper::AddExtraHeader) ; + class_("INTERNAL_Client") .constructor() .method("open_table", &ClientWrapper::OpenTable) + .method("empty_table", &ClientWrapper::EmptyTable) + .method("time_table", &ClientWrapper::TimeTable) .method("check_for_table", &ClientWrapper::CheckForTable) .method("run_script", &ClientWrapper::RunScript) .method("new_arrow_array_stream_ptr", &ClientWrapper::NewArrowArrayStreamPtr) .method("new_table_from_arrow_array_stream_ptr", &ClientWrapper::NewTableFromArrowArrayStreamPtr) + .method("close", &ClientWrapper::Close) ; -} + +} \ No newline at end of file diff --git a/R/rdeephaven/tests/testthat/test_client_options_wrapper.R b/R/rdeephaven/tests/testthat/test_client_options_wrapper.R deleted file mode 100644 index 075f550d05b..00000000000 --- a/R/rdeephaven/tests/testthat/test_client_options_wrapper.R +++ /dev/null @@ -1,163 +0,0 @@ -library(testthat) -library(rdeephaven) - -##### TESTING GOOD INPUTS ##### - -test_that("initializing ClientOptions does not throw an error", { - expect_no_error(client_options <- ClientOptions$new()) -}) - -test_that("setting default authentication does not throw an error", { - client_options <- ClientOptions$new() - expect_no_error(client_options$set_default_authentication()) -}) - -test_that("setting basic authentication with string inputs does not throw an error", { - client_options <- ClientOptions$new() - expect_no_error(client_options$set_basic_authentication("my_username", "my_password")) -}) - -test_that("setting custom authentication with string inputs does not throw an error", { - client_options <- ClientOptions$new() - expect_no_error(client_options$set_custom_authentication("my_key", "my_value")) -}) - -test_that("setting session type to python does not throw an error", { - client_options <- ClientOptions$new() - expect_no_error(client_options$set_session_type("python")) -}) - -test_that("setting session type to groovy does not throw an error", { - client_options <- ClientOptions$new() - expect_no_error(client_options$set_session_type("groovy")) -}) - -##### TESTING BAD INPUTS ##### - -test_that("setting basic authentication with bad input types fails nicely", { - client_options <- ClientOptions$new() - expect_error( - client_options$set_basic_authentication(12345, "my_password"), - "'username' must be passed as a single string. Got an object of class numeric instead." - ) - expect_error( - client_options$set_basic_authentication("my_username", 12345), - "'password' must be passed as a single string. Got an object of class numeric instead." - ) - expect_error( - client_options$set_basic_authentication(c("I", "am", "a", "string"), "my_password"), - "'username' must be passed as a single string. Got a character vector of length 4 instead." - ) - expect_error( - client_options$set_basic_authentication("my_username", c("I", "am", "a", "string")), - "'password' must be passed as a single string. Got a character vector of length 4 instead." - ) -}) - -test_that("setting custom authentication with bad input types fails nicely", { - client_options <- ClientOptions$new() - expect_error( - client_options$set_custom_authentication(12345, "my_auth_value"), - "'auth_key' must be passed as a single string. Got an object of class numeric instead." - ) - expect_error( - client_options$set_custom_authentication("my_auth_key", 12345), - "'auth_value' must be passed as a single string. Got an object of class numeric instead." - ) - expect_error( - client_options$set_custom_authentication(c("I", "am", "a", "string"), "my_auth_value"), - "'auth_key' must be passed as a single string. Got a character vector of length 4 instead." - ) - expect_error( - client_options$set_custom_authentication("my_auth_key", c("I", "am", "a", "string")), - "'auth_value' must be passed as a single string. Got a character vector of length 4 instead." - ) -}) - -test_that("setting bad session type fails nicely", { - client_options <- ClientOptions$new() - expect_error( - client_options$set_session_type(12345), - "'session_type' must be passed as a single string. Got an object of class numeric instead." - ) - expect_error( - client_options$set_session_type(c("I", "am", "string")), - "'session_type' must be passed as a single string. Got a character vector of length 3 instead." - ) -}) - -test_that("using tls with bad input type fails nicely", { - client_options <- ClientOptions$new() - expect_error( - client_options$use_tls(12345), - "'root_certs' must be passed as a single string. Got an object of class numeric instead." - ) - expect_error( - client_options$use_tls(c("double", "string")), - "'root_certs' must be passed as a single string. Got a character vector of length 2 instead." - ) -}) - -test_that("add_int_option with bad types fails nicely", { - client_options <- ClientOptions$new() - expect_error( - client_options$add_int_option(12345, 12345), - "'opt' must be passed as a single string. Got an object of class numeric instead." - ) - expect_error( - client_options$add_int_option(c("several", "strings"), 12345), - "'opt' must be passed as a single string. Got a character vector of length 2 instead." - ) - expect_error( - client_options$add_int_option("option_key", "blah blah"), - "'val' must be an integer. Got an object of class character instead." - ) - expect_error( - client_options$add_int_option("option_key", 12345.6789), - "'val' must be an integer. Got a non-integer numeric type instead." - ) - expect_error( - client_options$add_int_option("option_key", c(1, 2, 3, 4, 5)), - "'val' must be an integer. Got a numeric vector of length 5 instead." - ) -}) - -test_that("add_string_option with bad types fails nicely", { - client_options <- ClientOptions$new() - expect_error( - client_options$add_string_option(12345, "option_val"), - "'opt' must be passed as a single string. Got an object of class numeric instead." - ) - expect_error( - client_options$add_string_option(c("several", "strings"), "option_val"), - "'opt' must be passed as a single string. Got a character vector of length 2 instead." - ) - expect_error( - client_options$add_string_option("option_key", 12345), - "'val' must be passed as a single string. Got an object of class numeric instead." - ) - expect_error( - client_options$add_string_option("option_key", c("several", "many", "strings")), - "'val' must be passed as a single string. Got a character vector of length 3 instead." - ) -}) - -test_that("add_extra_header with bad types fails nicely", { - client_options <- ClientOptions$new() - expect_error( - client_options$add_extra_header(12345, "header_value"), - "'header_name' must be passed as a single string. Got an object of class numeric instead." - ) - expect_error( - client_options$add_extra_header(c("several", "strings"), "header_value"), - "'header_name' must be passed as a single string. Got a character vector of length 2 instead." - ) - expect_error( - client_options$add_extra_header("header_name", 12345), - "'header_value' must be passed as a single string. Got an object of class numeric instead." - ) - expect_error( - client_options$add_extra_header("header_name", c("several", "many", "strings")), - "'header_value' must be passed as a single string. Got a character vector of length 3 instead." - ) -}) diff --git a/R/rdeephaven/tests/testthat/test_client_wrapper.R b/R/rdeephaven/tests/testthat/test_client_wrapper.R deleted file mode 100644 index 71060498790..00000000000 --- a/R/rdeephaven/tests/testthat/test_client_wrapper.R +++ /dev/null @@ -1,215 +0,0 @@ -library(testthat) -library(rdeephaven) - -setup <- function() { - df1 <- data.frame( - string_col = c("I", "am", "a", "string", "column"), - int_col = c(0, 1, 2, 3, 4), - dbl_col = c(1.65, 3.1234, 100000.5, 543.234567, 0.00) - ) - - df2 <- data.frame( - col1 = rep(3.14, 100), - col2 = rep("hello!", 100), - col3 = rnorm(100) - ) - - df3 <- data.frame(matrix(rnorm(10 * 1000), nrow = 10)) - - df4 <- data.frame( - time_col = seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date() + 30), by = "1 sec")[250000], - bool_col = sample(c(TRUE, FALSE), 250000, TRUE), - int_col = sample(0:10000, 250000, TRUE) - ) - - return(list("df1" = df1, "df2" = df2, "df3" = df3, "df4" = df4)) -} - -##### TESTING GOOD INPUTS ##### - -test_that("client connection works in the simple case of anonymous authentication", { - # assumes correctness of client options - client_options <- ClientOptions$new() - - # TODO: assumes server is actually running on localhost:10000, this is probably bad for CI - expect_no_error(client <- Client$new(target = "localhost:10000", client_options = client_options)) -}) - -# All of the following tests assume the correctness of Client$new(...) to make the connection. - -test_that("import_table does not fail with data frame inputs of simple column types", { - data <- setup() - - client_options <- ClientOptions$new() - client <- Client$new(target = "localhost:10000", client_options = client_options) - - expect_no_error(client$import_table(data$df1)) - expect_no_error(client$import_table(data$df2)) - expect_no_error(client$import_table(data$df3)) - expect_no_error(client$import_table(data$df4)) -}) - -test_that("import_table does not fail with tibble inputs of simple column types", { - data <- setup() - - client_options <- ClientOptions$new() - client <- Client$new(target = "localhost:10000", client_options = client_options) - - expect_no_error(client$import_table(as_tibble(data$df1))) - expect_no_error(client$import_table(as_tibble(data$df2))) - expect_no_error(client$import_table(as_tibble(data$df3))) - expect_no_error(client$import_table(as_tibble(data$df4))) -}) - -test_that("import_table does not fail with arrow table inputs of simple column types", { - data <- setup() - - client_options <- ClientOptions$new() - client <- Client$new(target = "localhost:10000", client_options = client_options) - - expect_no_error(client$import_table(arrow_table(data$df1))) - expect_no_error(client$import_table(arrow_table(data$df2))) - expect_no_error(client$import_table(arrow_table(data$df3))) - expect_no_error(client$import_table(arrow_table(data$df4))) -}) - -test_that("import_table does not fail with record batch reader inputs of simple column types", { - data <- setup() - - client_options <- ClientOptions$new() - client <- Client$new(target = "localhost:10000", client_options = client_options) - - expect_no_error(client$import_table(as_record_batch_reader(arrow_table(data$df1)))) - expect_no_error(client$import_table(as_record_batch_reader(arrow_table(data$df2)))) - expect_no_error(client$import_table(as_record_batch_reader(arrow_table(data$df3)))) - expect_no_error(client$import_table(as_record_batch_reader(arrow_table(data$df4)))) -}) - -# The following tests additionally assume the correctness of client$import_table(...) AND table_handle$bind_to_variable(), -# as we have to create data, push it to the server, and name it in order to test client$open_table(). -# Additionally, we assume the correctness of table_handle$to_data_frame() to make concrete comparisons. - -test_that("open_table opens the correct table from the server", { - data <- setup() - - client_options <- ClientOptions$new() - client <- Client$new(target = "localhost:10000", client_options = client_options) - - th1 <- client$import_table(data$df1) - th1$bind_to_variable("table1") - expect_equal(client$open_table("table1")$to_data_frame(), th1$to_data_frame()) - - th2 <- client$import_table(data$df2) - th2$bind_to_variable("table2") - expect_equal(client$open_table("table2")$to_data_frame(), th2$to_data_frame()) - - th3 <- client$import_table(data$df3) - th3$bind_to_variable("table3") - expect_equal(client$open_table("table3")$to_data_frame(), th3$to_data_frame()) - - th4 <- client$import_table(data$df4) - th4$bind_to_variable("table4") - expect_equal(client$open_table("table4")$to_data_frame(), th4$to_data_frame()) -}) - -test_that("run_script correctly runs a python script", { - client_options <- ClientOptions$new() - client <- Client$new(target = "localhost:10000", client_options = client_options) - - expect_no_error(client$run_script( - ' -from deephaven import new_table -from deephaven.column import string_col, int_col - -static_table_from_python_script = new_table([ -string_col("Name_String_Col", ["Data String 1", "Data String 2", "Data String 3"]), -int_col("Name_Int_Col", [44, 55, 66]) -]) -' - )) - - expect_no_error(client$open_table("static_table_from_python_script")) -}) - -##### TESTING BAD INPUTS ##### - -test_that("client connection fails nicely with bad target but good client_options", { - # assumes correctness of client options - client_options <- ClientOptions$new() - - # TODO: Bad address needs better error handling from the R side - expect_error(client <- Client$new(target = "bad address", client_options = client_options)) - expect_error( - client <- Client$new(target = 12345, client_options = client_options), - "'target' must be passed as a single string. Got an object of class numeric instead." - ) - expect_error( - client <- Client$new(target = c("hello", "my", "name", "is"), client_options = client_options), - "'target' must be passed as a single string. Got a character vector of length 4 instead." - ) -}) - -test_that("client connection fails nicely with good target but bad client_options", { - # TODO: these all assume that the server is actually running on localhost:10000, probably bad for CI - expect_error( - client <- Client$new(target = "localhost:10000", client_options = "bad"), - "'client_options' should be a Deephaven ClientOptions object. Got an object of type character instead." - ) - expect_error( - client <- Client$new(target = "localhost:10000", client_options = 12345), - "'client_options' should be a Deephaven ClientOptions object. Got an object of type numeric instead." - ) - - # TODO: Invalid auth details needs better error handling from the R side - bad_client_options1 <- ClientOptions$new() - bad_client_options1$set_basic_authentication("my_username", "my_password") - expect_error(client <- Client$new(target = "localhost:10000", client_options = bad_client_options1)) - - bad_client_options2 <- ClientOptions$new() - bad_client_options2$set_session_type("groovy") - expect_error(client <- Client$new(target = "localhost:10000", client_options = bad_client_options2)) -}) - -test_that("import_table fails nicely with bad inputs", { - library(datasets) - - client_options <- ClientOptions$new() - client <- Client$new(target = "localhost:10000", client_options = client_options) - - expect_error( - client$import_table(12345), - "'table_object' must be either an R Data Frame, a dplyr Tibble, an Arrow Table, or an Arrow Record Batch Reader. Got an object of class numeric instead." - ) - expect_error( - client$import_table("hello!"), - "'table_object' must be either an R Data Frame, a dplyr Tibble, an Arrow Table, or an Arrow Record Batch Reader. Got an object of class character instead." - ) - - # TODO: this needs better error handling, but it is unclear whether that happens on the server side or the R side. - data(iris) - expect_error(client$import_table(iris)) - - data(HairEyeColor) - expect_error( - client$import_table(HairEyeColor), - "'table_object' must be either an R Data Frame, a dplyr Tibble, an Arrow Table, or an Arrow Record Batch Reader. Got an object of class table instead." - ) -}) - -test_that("open_table fails nicely with bad inputs", { - client_options <- ClientOptions$new() - client <- Client$new(target = "localhost:10000", client_options = client_options) - - expect_error(client$open_table(""), "The table '' you're trying to pull does not exist on the server.") - expect_error(client$open_table(12345), "'name' must be passed as a single string. Got an object of class numeric instead.") - expect_error(client$open_table(client_options), "'name' must be passed as a single string. Got an object of class ClientOptions instead.") - expect_error(client$open_table(c("I", "am", "string")), "'name' must be passed as a single string. Got a character vector of length 3 instead.") -}) - -test_that("run_script fails nicely with bad input types", { - client_options <- ClientOptions$new() - client <- Client$new(target = "localhost:10000", client_options = client_options) - - expect_error(client$run_script(12345), "'script' must be passed as a single string. Got an object of class numeric instead.") - expect_error(client$run_script(c("I", "am", "a", "string")), "'script' must be passed as a single string. Got a character vector of length 4 instead.") -}) diff --git a/R/rdeephaven/tests/testthat/test_table_handle_wrapper.R b/R/rdeephaven/tests/testthat/test_table_handle_wrapper.R deleted file mode 100644 index 45e9fbaa2f2..00000000000 --- a/R/rdeephaven/tests/testthat/test_table_handle_wrapper.R +++ /dev/null @@ -1,187 +0,0 @@ -library(testthat) -library(rdeephaven) - -setup <- function() { - df1 <- data.frame( - string_col = c("I", "am", "a", "string", "column"), - int_col = c(0, 1, 2, 3, 4), - dbl_col = c(1.65, 3.1234, 100000.5, 543.234567, 0.00) - ) - - df2 <- data.frame( - col1 = rep(3.14, 100), - col2 = rep("hello!", 100), - col3 = rnorm(100) - ) - - df3 <- data.frame(matrix(rnorm(10 * 1000), nrow = 10)) - - df4 <- data.frame( - time_col = seq.POSIXt(as.POSIXct(Sys.Date()), as.POSIXct(Sys.Date() + 30), by = "1 sec")[250000], - bool_col = sample(c(TRUE, FALSE), 250000, TRUE), - int_col = sample(0:10000, 250000, TRUE) - ) - - # in order to test TableHandle, we need to have tables on the server that we know everything about. - # thus, we have to push these created tables to the server and get TableHandles to each of them. - # thus, we depend on the correctness of client$import_table(), ClientOptions$new(), and Client$new() - - # set up client - client_options <- ClientOptions$new() - client <- Client$new(target = "localhost:10000", client_options = client_options) - - # move dataframes to server and get TableHandles for testing - th1 <- client$import_table(df1) - th2 <- client$import_table(df2) - th3 <- client$import_table(df3) - th4 <- client$import_table(df4) - - return(list( - "client" = client, - "df1" = df1, "df2" = df2, "df3" = df3, "df4" = df4, - "th1" = th1, "th2" = th2, "th3" = th3, "th4" = th4 - )) -} - -##### TESTING GOOD INPUTS ##### - -test_that("is_static returns the correct value", { - data <- setup() - - expect_true(data$th1$is_static()) - expect_true(data$th2$is_static()) - expect_true(data$th3$is_static()) - expect_true(data$th4$is_static()) - - # TODO: test ticking tables when they can be created from R -}) - -test_that("nrow returns the correct number of rows", { - data <- setup() - - expect_equal(data$th1$nrow(), nrow(data$df1)) - expect_equal(data$th2$nrow(), nrow(data$df2)) - expect_equal(data$th3$nrow(), nrow(data$df3)) - expect_equal(data$th4$nrow(), nrow(data$df4)) - - # TODO: test nrow(data$th) when it is implemented -}) - -test_that("bind_to_variable binds the table to a variable", { - data <- setup() - - data$th1$bind_to_variable("table1") - expect_no_error(data$client$open_table("table1")) - - data$th2$bind_to_variable("table2") - expect_no_error(data$client$open_table("table2")) - - data$th3$bind_to_variable("table3") - expect_no_error(data$client$open_table("table3")) - - data$th4$bind_to_variable("table4") - expect_no_error(data$client$open_table("table4")) -}) - -test_that("to_arrow_record_batch_stream_reader returns an identical stream reader", { - data <- setup() - - # actual equality of RecordBatchStreamReaders is not expected, as they contain underlying pointers to relevant data, - # as well as metadata from the Deephaven server, which we do not expect rbr created fully in R to have. - # We care about equality in the sense that coercing to dataframes should yield identical dataframes, so we cast to dataframes and compare. - # Additionally, as.data.frame() does not convert arrow tables to data frames, but to Tibbles. Need another as.data.frame to get a data frame. - - rbr1 <- data$th1$to_arrow_record_batch_stream_reader() - expect_equal(as.data.frame(as.data.frame(rbr1$read_table())), data$df1) - - rbr2 <- data$th2$to_arrow_record_batch_stream_reader() - expect_equal(as.data.frame(as.data.frame(rbr2$read_table())), data$df2) - - rbr3 <- data$th3$to_arrow_record_batch_stream_reader() - expect_equal(as.data.frame(as.data.frame(rbr3$read_table())), data$df3) - - rbr4 <- data$th4$to_arrow_record_batch_stream_reader() - expect_equal(as.data.frame(as.data.frame(rbr4$read_table())), data$df4) -}) - -test_that("to_arrow_table returns a valid Arrow table", { - data <- setup() - - # The rationale for casting RecordBatchStreamReaders to dataframes for comparison also applies to Arrow Tables. - # Additionally, as.data.frame() does not convert arrow tables to data frames, but to Tibbles. Need another as.data.frame to get a data frame. - - arrow_tbl1 <- data$th1$to_arrow_table() - expect_equal(as.data.frame(as.data.frame(arrow_tbl1)), data$df1) - - arrow_tbl2 <- data$th2$to_arrow_table() - expect_equal(as.data.frame(as.data.frame(arrow_tbl2)), data$df2) - - arrow_tbl3 <- data$th3$to_arrow_table() - expect_equal(as.data.frame(as.data.frame(arrow_tbl3)), data$df3) - - arrow_tbl4 <- data$th4$to_arrow_table() - expect_equal(as.data.frame(as.data.frame(arrow_tbl4)), data$df4) -}) - -test_that("to_tibble returns a valid Tibble", { - data <- setup() - - tibble1 <- data$th1$to_tibble() - expect_equal(tibble1, as_tibble(data$df1)) - - tibble2 <- data$th2$to_tibble() - expect_equal(tibble2, as_tibble(data$df2)) - - tibble3 <- data$th3$to_tibble() - expect_equal(tibble3, as_tibble(data$df3)) - - tibble4 <- data$th4$to_tibble() - expect_equal(tibble4, as_tibble(data$df4)) -}) - -test_that("to_data_frame returns a valid data frame", { - data <- setup() - - data_frame1 <- data$th1$to_data_frame() - expect_equal(data_frame1, data$df1) - - data_frame2 <- data$th2$to_data_frame() - expect_equal(data_frame2, data$df2) - - data_frame3 <- data$th3$to_data_frame() - expect_equal(data_frame3, data$df3) - - data_frame4 <- data$th4$to_data_frame() - expect_equal(data_frame4, data$df4) -}) - -##### TESTING BAD INPUTS ##### - -test_that("trying to instantiate a TableHandle directly fails nicely", { - expect_error(TableHandle$new("hello!"), "'table_handle' should be an internal Deephaven TableHandle. If you're seeing this, - you are trying to call the constructor of TableHandle directly, which is not advised.") -}) - -test_that("bind_to_variable fails nicely on bad inputs", { - data <- setup() - - expect_error( - data$th1$bind_to_variable(12345), - "'name' must be passed as a single string. Got an object of class numeric instead." - ) - - expect_error( - data$th1$bind_to_variable(c("multiple", "strings")), - "'name' must be passed as a single string. Got a character vector of length 2 instead." - ) - - expect_error( - data$th1$bind_to_variable(data$df1), - "'name' must be passed as a single string. Got an object of class data.frame instead." - ) - - expect_error( - data$th1$bind_to_variable(list("list", "of", "strings")), - "'name' must be passed as a single string. Got an object of class list instead." - ) -})