diff --git a/R/rdeephaven/NAMESPACE b/R/rdeephaven/NAMESPACE index b5c79e0e035..634a18faaf7 100644 --- a/R/rdeephaven/NAMESPACE +++ b/R/rdeephaven/NAMESPACE @@ -7,7 +7,6 @@ S3method(as_tibble,TableHandle) S3method(dim,TableHandle) S3method(head,TableHandle) S3method(tail,TableHandle) -export(Aggregation) export(Client) export(TableHandle) export(agg_abs_sum) @@ -33,4 +32,4 @@ importFrom(arrow,as_arrow_table) importFrom(arrow,as_record_batch_reader) importFrom(dplyr,as_data_frame) importFrom(dplyr,as_tibble) -useDynLib(rdeephaven, .registration = TRUE) +useDynLib(rdeephaven, .registration = TRUE) \ No newline at end of file diff --git a/R/rdeephaven/R/aggregate_wrapper.R b/R/rdeephaven/R/aggregate_wrapper.R index daafae48361..b5b3bbeadc3 100644 --- a/R/rdeephaven/R/aggregate_wrapper.R +++ b/R/rdeephaven/R/aggregate_wrapper.R @@ -1,3 +1,6 @@ +#' @description +#' An Aggregation represents an aggregation operation that can be passed to `agg_by()` or `agg_all_by()`. +#' Note that Aggregations should not be instantiated directly by user code, but rather by provided agg_* functions. Aggregation <- R6Class("Aggregation", cloneable = FALSE, public = list( @@ -17,68 +20,148 @@ Aggregation <- R6Class("Aggregation", ### All of the functions below return an instance of the above class +#' @description +#' Creates a First aggregation that computes the first value of each column in `cols` for each aggregation group. +#' @param cols String or list of strings denoting the column(s) to aggregate. Can be renaming expressions, i.e. “new_col = col”. +#' Default is to aggregate all non-grouping columns, which is only valid in the `agg_all_by()` operation. +#' @return Aggregation function to be used in `agg_by()` or `agg_all_by()`. +#' @export agg_first <- function(cols = character()) { verify_string("cols", cols, FALSE) return(Aggregation$new(INTERNAL_agg_first, "agg_first", cols=cols)) } +#' @description +#' Creates a Last aggregation that computes the last value of each column in `cols` for each aggregation group. +#' @param cols String or list of strings denoting the column(s) to aggregate. Can be renaming expressions, i.e. “new_col = col”. +#' Default is to aggregate all non-grouping columns, which is only valid in the `agg_all_by()` operation. +#' @return Aggregation function to be used in `agg_by()` or `agg_all_by()`. +#' @export agg_last <- function(cols = character()) { verify_string("cols", cols, FALSE) return(Aggregation$new(INTERNAL_agg_last, "agg_last", cols=cols)) } +#' @description +#' Creates a Minimum aggregation that computes the minimum of each column in `cols` for each aggregation group. +#' @param cols String or list of strings denoting the column(s) to aggregate. Can be renaming expressions, i.e. “new_col = col”. +#' Default is to aggregate all non-grouping columns, which is only valid in the `agg_all_by()` operation. +#' @return Aggregation function to be used in `agg_by()` or `agg_all_by()`. +#' @export agg_min <- function(cols = character()) { verify_string("cols", cols, FALSE) return(Aggregation$new(INTERNAL_agg_min, "agg_min", cols=cols)) } +#' @description +#' Creates a Maximum aggregation that computes the maximum of each column in `cols` for each aggregation group. +#' @param cols String or list of strings denoting the column(s) to aggregate. Can be renaming expressions, i.e. “new_col = col”. +#' Default is to aggregate all non-grouping columns, which is only valid in the `agg_all_by()` operation. +#' @return Aggregation function to be used in `agg_by()` or `agg_all_by()`. +#' @export agg_max <- function(cols = character()) { verify_string("cols", cols, FALSE) return(Aggregation$new(INTERNAL_agg_max, "agg_max", cols=cols)) } +#' @description +#' Creates a Sum aggregation that computes the sum of each column in `cols` for each aggregation group. +#' @param cols String or list of strings denoting the column(s) to aggregate. Can be renaming expressions, i.e. “new_col = col”. +#' Default is to aggregate all non-grouping columns, which is only valid in the `agg_all_by()` operation. +#' @return Aggregation function to be used in `agg_by()` or `agg_all_by()`. +#' @export agg_sum <- function(cols = character()) { verify_string("cols", cols, FALSE) return(Aggregation$new(INTERNAL_agg_sum, "agg_sum", cols=cols)) } +#' @description +#' Creates an Absolute Sum aggregation that computes the absolute sum of each column in `cols` for each aggregation group. +#' @param cols String or list of strings denoting the column(s) to aggregate. Can be renaming expressions, i.e. “new_col = col”. +#' Default is to aggregate all non-grouping columns, which is only valid in the `agg_all_by()` operation. +#' @return Aggregation function to be used in `agg_by()` or `agg_all_by()`. +#' @export agg_abs_sum <- function(cols = character()) { verify_string("cols", cols, FALSE) return(Aggregation$new(INTERNAL_agg_abs_sum, "agg_abs_sum", cols=cols)) } +#' @description +#' Creates an Average aggregation that computes the average of each column in `cols` for each aggregation group. +#' @param cols String or list of strings denoting the column(s) to aggregate. Can be renaming expressions, i.e. “new_col = col”. +#' Default is to aggregate all non-grouping columns, which is only valid in the `agg_all_by()` operation. +#' @return Aggregation function to be used in `agg_by()` or `agg_all_by()`. +#' @export agg_avg <- function(cols = character()) { verify_string("cols", cols, FALSE) return(Aggregation$new(INTERNAL_agg_avg, "agg_avg", cols=cols)) } +#' @description +#' Creates a Weighted Average aggregation that computes the weighted average of each column in `cols` for each aggregation group. +#' @param wcol String denoting the column to use for weights. This must be a numeric column. +#' @param cols String or list of strings denoting the column(s) to aggregate. Can be renaming expressions, i.e. “new_col = col”. +#' Default is to aggregate all non-grouping columns, which is only valid in the `agg_all_by()` operation. +#' @return Aggregation function to be used in `agg_by()` or `agg_all_by()`. +#' @export agg_w_avg <- function(wcol, cols = character()) { verify_string("wcol", wcol, TRUE) verify_string("cols", cols, FALSE) return(Aggregation$new(INTERNAL_agg_w_avg, "agg_w_avg", wcol=wcol, cols=cols)) } +#' @description +#' Creates a Median aggregation that computes the median of each column in `cols` for each aggregation group. +#' @param cols String or list of strings denoting the column(s) to aggregate. Can be renaming expressions, i.e. “new_col = col”. +#' Default is to aggregate all non-grouping columns, which is only valid in the `agg_all_by()` operation. +#' @return Aggregation function to be used in `agg_by()` or `agg_all_by()`. +#' @export agg_median <- function(cols = character()) { verify_string("cols", cols, FALSE) return(Aggregation$new(INTERNAL_agg_median, "agg_median", cols=cols)) } +#' @description +#' Creates a Variance aggregation that computes the variance of each column in `cols` for each aggregation group. +#' @param cols String or list of strings denoting the column(s) to aggregate. Can be renaming expressions, i.e. “new_col = col”. +#' Default is to aggregate all non-grouping columns, which is only valid in the `agg_all_by()` operation. +#' @return Aggregation function to be used in `agg_by()` or `agg_all_by()`. +#' @export agg_var <- function(cols = character()) { verify_string("cols", cols, FALSE) return(Aggregation$new(INTERNAL_agg_var, "agg_var", cols=cols)) } +#' @description +#' Creates a Standard Deviation aggregation that computes the standard deviation of each column in `cols`, for each aggregation group. +#' @param cols String or list of strings denoting the column(s) to aggregate. Can be renaming expressions, i.e. “new_col = col”. +#' Default is to aggregate all non-grouping columns, which is only valid in the `agg_all_by()` operation. +#' @return Aggregation function to be used in `agg_by()` or `agg_all_by()`. +#' @export agg_std <- function(cols = character()) { verify_string("cols", cols, FALSE) return(Aggregation$new(INTERNAL_agg_std, "agg_std", cols=cols)) } +#' @description +#' Creates a Percentile aggregation that computes the given percentile of each column in `cols` for each aggregation group. +#' @param percentile Numeric scalar between 0 and 1 denoting the percentile to compute. +#' @param cols String or list of strings denoting the column(s) to aggregate. Can be renaming expressions, i.e. “new_col = col”. +#' Default is to aggregate all non-grouping columns, which is only valid in the `agg_all_by()` operation. +#' @return Aggregation function to be used in `agg_by()` or `agg_all_by()`. +#' @export agg_percentile <- function(percentile, cols = character()) { verify_in_unit_interval("percentile", percentile, TRUE) verify_string("cols", cols, FALSE) return(Aggregation$new(INTERNAL_agg_percentile, "agg_percentile", percentile=percentile, cols=cols)) } +#' @description +#' Creates a Count aggregation that counts the number of rows in each aggregation group. +#' Note that this operation is not supported in `agg_all_by()`. +#' @param col String denoting the name of the new column to hold the counts of each aggregation group. +#' @return Aggregation function to be used in `agg_by()`. +#' @export agg_count <- function(col) { verify_string("col", col, TRUE) return(Aggregation$new(INTERNAL_agg_count, "agg_count", col=col)) diff --git a/R/rdeephaven/R/client_wrapper.R b/R/rdeephaven/R/client_wrapper.R index acd6e2a145c..6ce3dee5346 100644 --- a/R/rdeephaven/R/client_wrapper.R +++ b/R/rdeephaven/R/client_wrapper.R @@ -1,8 +1,17 @@ +#' @description +#' A Client is the entry point for interacting with the Deephaven server. It is used to create new tables, +#' import data to and export data from the server, and run queries on the server. #' @export Client <- R6Class("Client", cloneable = FALSE, public = list( .internal_rcpp_object = NULL, + + #' @description + #' Calls `initialize_for_xptr` if the first argument is an external pointer, and `initialize_for_target` if the + #' first argument is a string. In the latter case, the remaining keyword arguments are passed to `initialize_for_target`. + #' @param ... Either an external pointer to an existing client connection, or a string denoting the address + #' of a running Deephaven server followed by keyword arguments to `initialize_from_target`. initialize = function(...) { args <- list(...) if (length(args) == 1) { @@ -19,10 +28,41 @@ Client <- R6Class("Client", } return(do.call(self$initialize_for_target, args)) }, + + #' @description + #' Initializes a Client object using a pointer to an existing client connection. + #' @param xptr External pointer to an existing client connection. initialize_for_xptr = function(xptr) { verify_type("xptr", xptr, "externalptr", "XPtr", TRUE) self$.internal_rcpp_object = new(INTERNAL_Client, xptr) }, + + #' @description + #' Initializes a Client object and connects to a Deephaven server. + #' @param target String denoting the address of a Deephaven server, formatted as `"ip:port"`. + #' @param auth_type String denoting the authentication type. Can be `"anonymous"`, `"basic"`, + #' or any custom-built authenticator supported by the server, such as `"io.deephaven.authentication.psk.PskAuthenticationHandler"`. + #' Default is `anonymous`. + #' @param username String denoting the username, which only applies if `auth_type` is `basic`. + #' Username and password should not be used in conjunction with `auth_token`. Defaults to an empty string. + #' @param password String denoting the password, which only applies if `auth_type` is `basic`. + #' Username and password should not be used in conjunction with `auth_token`. Defaults to an empty string. + #' @param auth_token String denoting the authentication token. When `auth_type` + #' is `anonymous`, it will be ignored; when `auth_type` is `basic`, it must be + #' `"user:password"` or left blank; when `auth_type` is a custom-built authenticator, it must + #' conform to the specific requirement of that authenticator. This should not be used + #' in conjunction with `username` and `password`. Defaults to an empty string. + #' @param session_type String denoting the session type supported on the server. + #' Currently, `python` and `groovy` are supported. Defaults to `python`. + #' @param use_tls Whether or not to use a TLS connection. Defaults to `FALSE`. + #' @param tls_root_certs String denoting PEM encoded root certificates to use for TLS connection, + #' or `""` to use system defaults. Only used if `use_tls == TRUE`. Defaults to system defaults. + #' @param int_options List of name-value pairs for int-valued options to the underlying + #' grpc channel creation. Defaults to an empty list, which implies not using any channel options. + #' @param string_options List of name-value pairs for string-valued options to the underlying + #' grpc channel creation. Defaults to an empty list, which implies not using any channel options. + #' @param extra_headers List of name-value pairs for additional headers and values + #' to add to server requests. Defaults to an empty list, which implies not using any extra headers. initialize_for_target = function( target, auth_type = "anonymous", @@ -138,15 +178,32 @@ Client <- R6Class("Client", client_options = options ) }, + + #' @description + #' Creates an empty table on the server with 'size' rows and no columns. + #' @param size Non-negative integer specifying the number of rows for the new table. + #' @return TableHandle reference to the new table. empty_table = function(size) { verify_nonnegative_int("size", size, TRUE) return(TableHandle$new(self$.internal_rcpp_object$empty_table(size))) }, + + #' @description + #' Creates a ticking table on the server. + #' @param period ISO-8601-formatted string specifying the update frequency of the new table. + #' @param start_time Optional ISO-8601-formatted string specifying the start time of the table. + #' Defaults to now. + #' @return TableHandle reference to the new table. time_table = function(period, start_time = "now") { verify_string("period", period, TRUE) verify_string("start_time", start_time, TRUE) return(TableHandle$new(self$.internal_rcpp_object$time_table(period, start_time))) }, + + #' @description + #' Retrieves a reference to a named table on the server using its name. + #' @param name String denoting the name of the table to retrieve. + #' @return TableHandle reference to the named table. open_table = function(name) { verify_string("name", name, TRUE) if (!private$check_for_table(name)) { @@ -154,6 +211,13 @@ Client <- R6Class("Client", } return(TableHandle$new(self$.internal_rcpp_object$open_table(name))) }, + + #' @description + #' Imports a new table to the Deephaven server. Note that this new table is not automatically bound to + #' a variable name on the server. See `?TableHandle` for more information. + #' @param table_object R Data Frame, dplyr Tibble, Arrow Table, Arrow RecordBatchReader, or other supported table + #' containing the data to import to the server. + #' @return TableHandle reference to the new table. import_table = function(table_object) { table_object_class <- class(table_object) if (table_object_class[[1]] == "data.frame") { @@ -170,14 +234,27 @@ Client <- R6Class("Client", stop(paste0("'table_object' must be a single data frame, tibble, arrow table, or record batch reader. Got an object of class ", table_object_class[[1]], ".")) } }, + + #' @description + #' Retrieves a reference to a named table in the server using its Arrow Flight ticket. + #' @param ticket String denoting the Arrow Flight ticket. + #' @return TableHandle reference to the table. ticket_to_table = function(ticket) { verify_string("ticket", ticket, TRUE) return(TableHandle$new(self$.internal_rcpp_object$make_table_handle_from_ticket(ticket))) }, + + #' @description + #' Runs a script on the server. The script must be in the language that the server console was started with. + #' @param script String containing the code to be executed on the server. run_script = function(script) { verify_string("script", script, TRUE) self$.internal_rcpp_object$run_script(script) }, + + #' @description + #' Closes the client connection. After this method is called, any further server calls will + #' be undefined and will likely result in an error. close = function() { self$.internal_rcpp_object$close() } diff --git a/R/rdeephaven/R/table_handle_wrapper.R b/R/rdeephaven/R/table_handle_wrapper.R index 63c2b8071d9..d538665bfbc 100644 --- a/R/rdeephaven/R/table_handle_wrapper.R +++ b/R/rdeephaven/R/table_handle_wrapper.R @@ -1,8 +1,15 @@ -#' @export +#' @description +#' A TableHandle holds a reference to a Deephaven Table on the server, and provides methods for operating on that table. +#' Note that TableHandles should not be instantiated directly by user code, but rather by server calls accessible from +#' the `Client` class. See `?Client` for more information. TableHandle <- R6Class("TableHandle", cloneable = FALSE, public = list( .internal_rcpp_object = NULL, + + #' @description + #' Initializes a new TableHandle from an internal Deephaven TableHandle. + #' @param table_handle Internal Deephaven TableHandle. initialize = function(table_handle) { if (class(table_handle)[[1]] != "Rcpp_INTERNAL_TableHandle") { stop("'table_handle' should be an internal Deephaven TableHandle. If you're seeing this, @@ -10,9 +17,17 @@ TableHandle <- R6Class("TableHandle", } self$.internal_rcpp_object <- table_handle }, + + #' @description + #' Determines whether the table referenced by this TableHandle is static or not. + #' @return TRUE if the table is static, or FALSE if the table is ticking. is_static = function() { return(self$.internal_rcpp_object$is_static()) }, + + #' @description + #' Binds the table referenced by this TableHandle to a variable on the server, so that it can be referenced by that name. + #' @param name Name for this table on the server. bind_to_variable = function(name) { verify_string("name", name, TRUE) self$.internal_rcpp_object$bind_to_variable(name) @@ -20,23 +35,51 @@ TableHandle <- R6Class("TableHandle", ### BASE R METHODS, ALSO IMPLEMENTED FUNCTIONALLY + #' @description + #' Creates a new table containing the first `n` rows of this table. + #' @param n Positive integer specifying the number of rows to return. + #' @return A TableHandle referencing the new table. head = function(n) { verify_positive_int("n", n, TRUE) return(TableHandle$new(self$.internal_rcpp_object$head(n))) }, + + #' @description + #' Creates a new table containing the last `n` rows of this table. + #' @param n Positive integer specifying the number of rows to return. + #' @return A TableHandle referencing the new table consisting of the last n rows of the parent table. tail = function(n) { verify_positive_int("n", n, TRUE) return(TableHandle$new(self$.internal_rcpp_object$tail(n))) }, + + #' @description + #' Gets the number of rows in the table referenced by this TableHandle. + #' @return The number of rows in the table. nrow = function() { return(self$.internal_rcpp_object$num_rows()) }, + + #' @description + #' Gets the number of columns in the table referenced by this TableHandle. + #' @return The number of columns in the table. ncol = function() { return(self$.internal_rcpp_object$num_cols()) }, + + #' @description + #' Gets the dimensions of the table referenced by this TableHandle. Equivalent to c(nrow, ncol). + #' @return A vector of length 2, where the first element is the number of rows in the table and the second + #' element is the number of columns in the table. dim = function() { return(c(self$nrow(), self$ncol())) }, + + #' @description + #' Merges several tables into one table on the server. The tables must have the same schema as this table, and can + #' be supplied as a list of TableHandles, any number of TableHandles, or a mix of both. + #' @param ... Arbitrary number of TableHandles or vectors of TableHandles with a schema matching this table. + #' @return A TableHandle referencing the new table. merge = function(...) { table_list <- unlist(c(...)) if (length(table_list) == 0) { @@ -49,21 +92,36 @@ TableHandle <- R6Class("TableHandle", ### CONVERSION METHODS, ALSO IMPLEMENTED FUNCTIONALLY + #' @description + #' Converts the table referenced by this TableHandle to an Arrow RecordBatchStreamReader. + #' @return An Arrow RecordBatchStreamReader constructed from the data of this TableHandle. as_record_batch_reader = function() { ptr <- self$.internal_rcpp_object$get_arrow_array_stream_ptr() rbsr <- arrow::RecordBatchStreamReader$import_from_c(ptr) return(rbsr) }, + + #' @description + #' Converts the table referenced by this TableHandle to an Arrow Table. + #' @return An Arrow Table constructed from the data of this TableHandle. as_arrow_table = function() { rbsr <- self$as_record_batch_reader() arrow_tbl <- rbsr$read_table() return(arrow_tbl) }, + + #' @description + #' Converts the table referenced by this TableHandle to a dplyr tibble. + #' @return A dplyr tibble constructed from the data of this TableHandle. as_tibble = function() { rbsr <- self$as_record_batch_reader() arrow_tbl <- rbsr$read_table() return(as_tibble(arrow_tbl)) }, + + #' @description + #' Converts the table referenced by this TableHandle to an R data frame. + #' @return An R data frame constructed from the data of this TableHandle. as_data_frame = function() { arrow_tbl <- self$as_arrow_table() return(as.data.frame(as.data.frame(arrow_tbl))) # TODO: for some reason as.data.frame on arrow table returns a tibble, not a data frame @@ -71,38 +129,88 @@ TableHandle <- R6Class("TableHandle", ### DEEPHAVEN TABLE OPERATIONS + #' @description + #' Creates a new in-memory table that includes one column for each formula. + #' If no formula is specified, all columns will be included. + #' @param formulas String or list of strings denoting the column formulas. + #' @return A TableHandle referencing the new table. select = function(formulas = character()) { verify_string("formulas", formulas, FALSE) return(TableHandle$new(self$.internal_rcpp_object$select(formulas))) }, + + #' @description + #' Creates a new formula table that includes one column for each formula. + #' @param formulas String or list of strings denoting the column formulas. + #' @return A TableHandle referencing the new table. view = function(formulas = character()) { verify_string("formulas", formulas, FALSE) return(TableHandle$new(self$.internal_rcpp_object$view(formulas))) }, + + #' @description + #' Creates a new table containing a new, in-memory column for each formula. + #' @param formulas String or list of strings denoting the column formulas. + #' @return A TableHandle referencing the new table. update = function(formulas = character()) { verify_string("formulas", formulas, FALSE) return(TableHandle$new(self$.internal_rcpp_object$update(formulas))) }, + + #' @description + #' Creates a new table containing a new formula column for each formula. + #' @param formulas String or list of strings denoting the column formulas. + #' @return A TableHandle referencing the new table. update_view = function(formulas = character()) { verify_string("formulas", formulas, FALSE) return(TableHandle$new(self$.internal_rcpp_object$update_view(formulas))) }, + + #' @description + #' Creates a new table that has the same number of rows as this table, + #' but omits the columns specified in `cols`. + #' @param cols String or list of strings denoting the names of the columns to drop. + #' @return A TableHandle referencing the new table. drop_columns = function(cols = character()) { verify_string("cols", cols, FALSE) return(TableHandle$new(self$.internal_rcpp_object$drop_columns(cols))) }, + + #' @description + #' Creates a new table containing only the rows meeting the filter condition. + #' @param filter String denoting the filter condition. + #' @return A TableHandle referencing the new table. where = function(filter) { verify_string("filter", filter, TRUE) return(TableHandle$new(self$.internal_rcpp_object$where(filter))) }, + + #' @description + #' Creates a new table containing grouping columns and grouped data, with column content is grouped into arrays. + #' If no group-by column is given, the content of each column is grouped into its own array. + #' @param by String or list of strings denoting the names of the columns to group by. + #' @return A TableHandle referencing the new table. group_by = function(by = character()) { verify_string("by", by, FALSE) return(TableHandle$new(self$.internal_rcpp_object$group_by(by))) }, + + #' @description + #' Creates a new table in which array columns from the source table are unwrapped into separate rows. + #' The ungroup columns should be of array types. + #' @param by String or list of strings denoting the names of the columns to ungroup. + #' @return A TableHandle referencing the new table. ungroup = function(by = character()) { verify_string("by", by, FALSE) return(TableHandle$new(self$.internal_rcpp_object$ungroup(by))) }, + + #' @description + #' Creates a new table containing grouping columns and grouped data. The resulting grouped data is defined by the + #' aggregation(s) specified. See `?Aggregations` for more information. + #' @param aggs Aggregation or list of Aggregations to perform on non-grouping columns. + #' @param by String or list of strings denoting the names of the columns to group by. + #' @return A TableHandle referencing the new table. agg_by = function(aggs, by = character()) { verify_type("aggs", aggs, "Aggregation", "Deephaven Aggregation", FALSE) verify_string("by", by, FALSE) @@ -115,76 +223,166 @@ TableHandle <- R6Class("TableHandle", unwrapped_aggs <- lapply(aggs, strip_r6_wrapping) return(TableHandle$new(self$.internal_rcpp_object$agg_by(unwrapped_aggs, by))) }, + + #' @description + #' Creates a new table containing grouping columns and grouped data. The resulting grouped data is defined by the + #' aggregation(s) specified. See `?Aggregations` for more information. + #' This method applies the aggregation to all columns of the table, so it can only + #' accept one aggregation at a time. + #' @param agg Aggregation to perform on non-grouping columns. + #' @param by String or list of strings denoting the names of the columns to group by. + #' @return A TableHandle referencing the new table. agg_all_by = function(agg, by = character()) { verify_type("agg", agg, "Aggregation", "Deephaven Aggregation", TRUE) - verify_string("by", by, FALSE) return(TableHandle$new(self$.internal_rcpp_object$agg_all_by(agg$.internal_rcpp_object, by))) }, + + #' @description + #' Creates a new table containing the first row of each group. + #' @param by String or list of strings denoting the names of the columns to group by. + #' @return A TableHandle referencing the new table. first_by = function(by = character()) { verify_string("by", by, FALSE) return(TableHandle$new(self$.internal_rcpp_object$first_by(by))) }, + + #' @description + #' Creates a new table containing the last row of each group. + #' @param by String or list of strings denoting the names of the columns to group by. + #' @return A TableHandle referencing the new table. last_by = function(by = character()) { verify_string("by", by, FALSE) return(TableHandle$new(self$.internal_rcpp_object$last_by(by))) }, + + #' @description + #' Creates a new table containing the first `num_rows` rows of each group. + #' @param num_rows Positive integer specifying the number of rows to return. + #' @param by String or list of strings denoting the names of the columns to group by. + #' @return A TableHandle referencing the new table. head_by = function(num_rows, by = character()) { verify_positive_int("num_rows", num_rows, TRUE) verify_string("by", by, FALSE) return(TableHandle$new(self$.internal_rcpp_object$head_by(num_rows, by))) }, + + #' @description + #' Creates a new table containing the last `num_rows` rows of each group. + #' @param num_rows Positive integer specifying the number of rows to return. + #' @param by String or list of strings denoting the names of the columns to group by. + #' @return A TableHandle referencing the new table. tail_by = function(num_rows, by = character()) { verify_positive_int("num_rows", num_rows, TRUE) verify_string("by", by, FALSE) return(TableHandle$new(self$.internal_rcpp_object$tail_by(num_rows, by))) }, + + #' @description + #' Creates a new table containing the column-wise minimum of each group. + #' @param by String or list of strings denoting the names of the columns to group by. + #' @return A TableHandle referencing the new table. min_by = function(by = character()) { verify_string("by", by, FALSE) return(TableHandle$new(self$.internal_rcpp_object$min_by(by))) }, + + #' @description + #' Creates a new table containing the column-wise maximum of each group. + #' @param by String or list of strings denoting the names of the columns to group by. + #' @return A TableHandle referencing the new table. max_by = function(by = character()) { verify_string("by", by, FALSE) return(TableHandle$new(self$.internal_rcpp_object$max_by(by))) }, + + #' @description + #' Creates a new table containing the column-wise sum of each group. + #' @param by String or list of strings denoting the names of the columns to group by. + #' @return A TableHandle referencing the new table. sum_by = function(by = character()) { verify_string("by", by, FALSE) return(TableHandle$new(self$.internal_rcpp_object$sum_by(by))) }, + + #' @description + #' Creates a new table containing the column-wise absolute sum of each group. + #' @param by String or list of strings denoting the names of the columns to group by. + #' @return A TableHandle referencing the new table. abs_sum_by = function(by = character()) { verify_string("by", by, FALSE) return(TableHandle$new(self$.internal_rcpp_object$abs_sum_by(by))) }, + + #' @description + #' Creates a new table containing the column-wise average of each group. + #' @param by String or list of strings denoting the names of the columns to group by. + #' @return A TableHandle referencing the new table. avg_by = function(by = character()) { verify_string("by", by, FALSE) return(TableHandle$new(self$.internal_rcpp_object$avg_by(by))) }, + + #' @description + #' Creates a new table containing the column-wise weighted average of each group. + #' @param wcol String denoting the name of the column to use as weights. + #' @param by String or list of strings denoting the names of the columns to group by. + #' @return A TableHandle referencing the new table. w_avg_by = function(wcol, by = character()) { verify_string("wcol", wcol, TRUE) verify_string("by", by, FALSE) return(TableHandle$new(self$.internal_rcpp_object$w_avg_by(wcol, by))) }, + + #' @description + #' Creates a new table containing the column-wise median of each group. + #' @param by String or list of strings denoting the names of the columns to group by. + #' @return A TableHandle referencing the new table. median_by = function(by = character()) { verify_string("by", by, FALSE) return(TableHandle$new(self$.internal_rcpp_object$median_by(by))) }, + + #' @description + #' Creates a new table containing the column-wise variance of each group. + #' @param by String or list of strings denoting the names of the columns to group by. + #' @return A TableHandle referencing the new table. var_by = function(by = character()) { verify_string("by", by, FALSE) return(TableHandle$new(self$.internal_rcpp_object$var_by(by))) }, + + #' @description + #' Creates a new table containing the column-wise standard deviation of each group. + #' @param by String or list of strings denoting the names of the columns to group by. + #' @return A TableHandle referencing the new table. std_by = function(by = character()) { verify_string("by", by, FALSE) return(TableHandle$new(self$.internal_rcpp_object$std_by(by))) }, + + #' @description + #' Creates a new table containing the column-wise percentile of each group. + #' @param percentile Numeric scalar between 0 and 1 denoting the percentile to compute. + #' @param by String or list of strings denoting the names of the columns to group by. + #' @return A TableHandle referencing the new table. percentile_by = function(percentile, by = character()) { verify_in_unit_interval("percentile", percentile, TRUE) verify_string("by", by, FALSE) return(TableHandle$new(self$.internal_rcpp_object$percentile_by(percentile, by))) }, + + #' @description + #' Creates a new table containing the number of rows in each group. + #' @param col String denoting the name of the new column to hold the counts of each group. + #' @param by String or list of strings denoting the names of the columns to group by. + #' @return A TableHandle referencing the new table. count_by = function(col, by = character()) { verify_string("col", col, TRUE) verify_string("by", by, FALSE) return(TableHandle$new(self$.internal_rcpp_object$count_by(col, by))) }, + + #' @export cross_join = function(table, on = character(), joins = character()) { verify_string("on", on, FALSE) verify_string("joins", joins, FALSE) @@ -193,6 +391,15 @@ TableHandle <- R6Class("TableHandle", on, joins ))) }, + + #' @description + #' Creates a new table containing all the rows and columns of this table, plus additional columns containing data + #' from the right table. For columns appended to the left table (joins), row values equal the row values from the + #' right table where the key values in the left and right tables are equal. + #' If there is no matching key in the right table, appended row values are NULL. + #' @param table TableHandle referencing the table to join with. + #' @param on String or list of strings denoting the names of the columns to join on. + #' @param joins String or list of strings denoting the names of the columns to add from `table`. natural_join = function(table, on = character(), joins = character()) { verify_string("on", on, FALSE) verify_string("joins", joins, FALSE) @@ -201,6 +408,14 @@ TableHandle <- R6Class("TableHandle", on, joins ))) }, + + #' @description + #' Creates a new table containing all the rows and columns of this table, plus additional columns containing data + #' from the right table. For columns appended to the left table (joins), row values equal the row values from the + #' right table where the key values in the left and right tables are equal. + #' @param table TableHandle referencing the table to join with. + #' @param on String or list of strings denoting the names of the columns to join on. + #' @param joins String or list of strings denoting the names of the columns to add from `table`. exact_join = function(table, on = character(), joins = character()) { verify_string("on", on, FALSE) verify_string("joins", joins, FALSE) @@ -209,6 +424,15 @@ TableHandle <- R6Class("TableHandle", on, joins ))) }, + + #' @description + #' Creates a new table containing all the rows and columns of this table, sorted by the specified columns. + #' @param order_by String or list of strings denoting the names of the columns to sort by. + #' @param descending Boolean or list of booleans denoting whether to sort in descending order. + #' If a list is supplied, it must be the same length as `order_by`. + #' @param abs_sort Boolean or list of booleans denoting whether to sort by absolute value. + #' If a list is supplied, it must be the same length as `order_by`. + #' @return A TableHandle referencing the new table. sort = function(order_by, descending = FALSE, abs_sort = FALSE) { verify_string("order_by", order_by, FALSE) verify_bool("descending", descending, FALSE) @@ -249,6 +473,11 @@ dim.TableHandle <- function(x) { return(x$dim()) } +#' @description +#' Merges several tables into one table on the server. The tables must have the same schema, and can +#' be supplied as a list of TableHandles, any number of TableHandles, or a mix of both. +#' @param ... Arbitrary number of TableHandles or vectors of TableHandles with a uniform schema. +#' @return A TableHandle referencing the new table. #' @export merge_tables <- function(...) { table_list <- unlist(c(...))