From 5be00e79c5b4b04289a8b113bb182c27cf118b57 Mon Sep 17 00:00:00 2001 From: jacobvjk Date: Tue, 9 Apr 2024 14:44:27 +0200 Subject: [PATCH 01/12] add basic aggregate/filter/group functionality --- R/aggregate_alignment_loanbook_exposure.R | 41 +++++-- man/aggregate_alignment_loanbook_exposure.Rd | 14 ++- ...st-aggregate_alignment_loanbook_exposure.R | 107 ++++++++++++++++++ 3 files changed, 154 insertions(+), 8 deletions(-) diff --git a/R/aggregate_alignment_loanbook_exposure.R b/R/aggregate_alignment_loanbook_exposure.R index 9f0625e..ae87a12 100644 --- a/R/aggregate_alignment_loanbook_exposure.R +++ b/R/aggregate_alignment_loanbook_exposure.R @@ -5,13 +5,21 @@ #' @param level Character. Vector that indicates if the aggregate alignment #' metric should be returned based on the net technology deviations (`net`) or #' disaggregated into buildout and phaseout technologies (`bo_po`). +#' @param group_var Character. Optional vector of length >= 1 that indicates the +#' name of one or more variables the results should be aggregated by, instead +#' of using group_id as the main dimension of aggregation. All names indicated +#' must be available variables in the `matched` data set. The intended use +#' case is to allow analyzing the loan books by additional traits of interest, +#' such as types of financial institutions. #' #' @return NULL #' @export aggregate_alignment_loanbook_exposure <- function(data, matched, - level = c("net", "bo_po")) { + level = c("net", "bo_po"), + group_var = NULL) { group_vars <- c("group_id", "scenario", "region", "sector", "year", "direction") + group_matched <- "group_id" level <- rlang::arg_match(level) # validate input data sets @@ -21,30 +29,49 @@ aggregate_alignment_loanbook_exposure <- function(data, group_vars = group_vars ) + # optionally extend by `group_var`. this currently only allows aggregation of supersets for group_var + if (!is.null(group_var)) { + if (!inherits(group_var, "character")) { + stop(glue::glue("`group_var` must be a character vector. Your input is {class(group_var)}.")) + } + group_vars <- c(group_vars[!group_vars == "group_id"], group_var) + group_matched <- group_var + + group_id_by_group_var <- matched %>% + dplyr::distinct(.data$group_id, !!!rlang::syms(group_var)) + + data <- data %>% + dplyr::inner_join( + group_id_by_group_var, + by = c("group_id") + ) + } + matched <- matched %>% dplyr::select( dplyr::all_of( - c("group_id", "id_loan", "loan_size_outstanding", "loan_size_outstanding_currency", "name_abcd", "sector") + c(group_matched, "id_loan", "loan_size_outstanding", "loan_size_outstanding_currency", "name_abcd", "sector") ) ) %>% dplyr::summarise( loan_size_outstanding = sum(.data$loan_size_outstanding, na.rm = TRUE), - .by = c("group_id", "loan_size_outstanding_currency", "name_abcd", "sector") + .by = dplyr::all_of(c(group_matched, "loan_size_outstanding_currency", "name_abcd", "sector")) ) %>% dplyr::mutate( exposure_weight = .data$loan_size_outstanding / sum(.data$loan_size_outstanding, na.rm = TRUE), - .by = c("group_id", "loan_size_outstanding_currency") + .by = dplyr::all_of(c(group_matched, "loan_size_outstanding_currency")) ) aggregate_exposure_company <- data %>% dplyr::inner_join( matched, - by = c("group_id", "name_abcd", "sector") + by = c(group_matched, "name_abcd", "sector") ) sector_aggregate_exposure_loanbook_summary <- aggregate_exposure_company %>% dplyr::mutate( - n_companies = dplyr::n(), + # TODO: must be n_distinct + n_companies = dplyr::n_distinct(.data$name_abcd), .by = dplyr::all_of(group_vars) ) %>% dplyr::mutate( @@ -140,7 +167,7 @@ aggregate_alignment_loanbook_exposure <- function(data, "share_companies_aligned", "exposure_weighted_net_alignment" ) ) %>% - dplyr::arrange(.data$group_id, .data$scenario, .data$region, .data$sector, .data$year) + dplyr::arrange(!!!rlang::syms(group_matched), .data$scenario, .data$region, .data$sector, .data$year) return(out) } diff --git a/man/aggregate_alignment_loanbook_exposure.Rd b/man/aggregate_alignment_loanbook_exposure.Rd index 425d54e..9e6ff73 100644 --- a/man/aggregate_alignment_loanbook_exposure.Rd +++ b/man/aggregate_alignment_loanbook_exposure.Rd @@ -4,7 +4,12 @@ \alias{aggregate_alignment_loanbook_exposure} \title{Return loan book level aggregation of company alignment metrics by exposure} \usage{ -aggregate_alignment_loanbook_exposure(data, matched, level = c("net", "bo_po")) +aggregate_alignment_loanbook_exposure( + data, + matched, + level = c("net", "bo_po"), + group_var = NULL +) } \arguments{ \item{data}{data.frame. Holds output pf company indicators} @@ -14,6 +19,13 @@ aggregate_alignment_loanbook_exposure(data, matched, level = c("net", "bo_po")) \item{level}{Character. Vector that indicates if the aggregate alignment metric should be returned based on the net technology deviations (\code{net}) or disaggregated into buildout and phaseout technologies (\code{bo_po}).} + +\item{group_var}{Character. Optional vector of length >= 1 that indicates the +name of one or more variables the results should be aggregated by, instead +of using group_id as the main dimension of aggregation. All names indicated +must be available variables in the \code{matched} data set. The intended use +case is to allow analyzing the loan books by additional traits of interest, +such as types of financial institutions.} } \description{ Return loan book level aggregation of company alignment metrics by exposure diff --git a/tests/testthat/test-aggregate_alignment_loanbook_exposure.R b/tests/testthat/test-aggregate_alignment_loanbook_exposure.R index 81f1657..45146e1 100644 --- a/tests/testthat/test-aggregate_alignment_loanbook_exposure.R +++ b/tests/testthat/test-aggregate_alignment_loanbook_exposure.R @@ -111,4 +111,111 @@ test_that("net aggregate results have the same columns as buildout/phaseout aggr names(test_output_aggregate_alignment_loanbook_exposure_net) ) }) + +# When an additional variable is passed via group_var, add group_var to +# variables considered in aggregation (GH: 33) + +# styler: off +test_data_company_net <- tibble::tribble( + ~group_id, ~name_abcd, ~sector, ~activity_unit, ~region, ~scenario_source, ~scenario, ~year, ~direction, ~total_deviation, ~alignment_metric, + "test_lbk_1", "test_company_1", "power", "MW", "global", "test_source", "test_scenario", 2027, "net", -110, -0.3, + "test_lbk_1", "test_company_2", "power", "MW", "global", "test_source", "test_scenario", 2027, "net", -70, -0.4, + "test_lbk_2", "test_company_2", "power", "MW", "global", "test_source", "test_scenario", 2027, "net", -70, -0.4, + "test_lbk_2", "test_company_3", "power", "MW", "global", "test_source", "test_scenario", 2027, "net", -40, -0.2, + "test_lbk_3", "test_company_3", "power", "MW", "global", "test_source", "test_scenario", 2027, "net", -40, -0.2, + "test_lbk_3", "test_company_4", "power", "MW", "global", "test_source", "test_scenario", 2027, "net", 50, 0.1, + "test_lbk_4", "test_company_4", "power", "MW", "global", "test_source", "test_scenario", 2027, "net", 50, 0.1, + "test_lbk_4", "test_company_1", "power", "MW", "global", "test_source", "test_scenario", 2027, "net", -110, -0.3 +) + +test_matched_group_var <- tibble::tribble( + ~group_id, ~id_loan, ~loan_size_outstanding, ~loan_size_outstanding_currency, ~name_abcd, ~sector, ~foo, ~bar, + "test_lbk_1", "L1", 300000, "USD", "test_company_1", "power", "Yes", "Yes", + "test_lbk_1", "L2", 700000, "USD", "test_company_2", "power", "Yes", "Yes", + "test_lbk_2", "L2", 700000, "USD", "test_company_2", "power", "No", "Yes", + "test_lbk_2", "L3", 1000000, "USD", "test_company_3", "power", "No", "Yes", + "test_lbk_3", "L3", 1000000, "USD", "test_company_3", "power", "No", "Yes", + "test_lbk_3", "L4", 500000, "USD", "test_company_4", "power", "No", "Yes", + "test_lbk_4", "L1", 300000, "USD", "test_company_1", "power", "Yes", "No", + "test_lbk_4", "L4", 500000, "USD", "test_company_4", "power", "Yes", "No" +) +# styler: on + +test_that("net aggregate results with a group_var returns results for each group", { + n_groups <- length(unique(test_matched_group_var$foo)) + + test_output_with_group_var <- test_data_company_net %>% + aggregate_alignment_loanbook_exposure( + matched = test_matched_group_var, + level = test_level_net, + group_var = "foo" + ) + + expect_equal( + nrow(test_output_with_group_var), + n_groups + ) +}) + +test_that("net aggregate results with a group_var returns results for each group for multiple variables", { + n_groups_2 <- nrow(dplyr::distinct(test_matched_group_var, .data$foo, .data$bar)) + + test_output_with_group_var_2 <- test_data_company_net %>% + aggregate_alignment_loanbook_exposure( + matched = test_matched_group_var, + level = test_level_net, + group_var = c("foo", "bar") + ) + + expect_equal( + nrow(test_output_with_group_var_2), + n_groups_2 + ) +}) + +# styler: off +test_data_company_bopo <- tibble::tribble( + ~group_id, ~name_abcd, ~sector, ~activity_unit, ~region, ~scenario_source, ~scenario, ~year, ~direction, ~total_deviation, ~alignment_metric, + "test_lbk_1", "test_company_1", "power", "MW", "global", "test_source", "test_scenario", 2027, "buildout", -10, -0.05, + "test_lbk_1", "test_company_1", "power", "MW", "global", "test_source", "test_scenario", 2027, "phaseout", -100, -0.25, + "test_lbk_1", "test_company_2", "power", "MW", "global", "test_source", "test_scenario", 2027, "buildout", -50, -0.35, + "test_lbk_1", "test_company_2", "power", "MW", "global", "test_source", "test_scenario", 2027, "phaseout", -20, -0.05, + "test_lbk_2", "test_company_2", "power", "MW", "global", "test_source", "test_scenario", 2027, "buildout", -50, -0.35, + "test_lbk_2", "test_company_2", "power", "MW", "global", "test_source", "test_scenario", 2027, "phaseout", -20, -0.05, + "test_lbk_2", "test_company_3", "power", "MW", "global", "test_source", "test_scenario", 2027, "phaseout", -40, -0.2, + "test_lbk_3", "test_company_3", "power", "MW", "global", "test_source", "test_scenario", 2027, "phaseout", -40, -0.2, + "test_lbk_3", "test_company_4", "power", "MW", "global", "test_source", "test_scenario", 2027, "buildout", 60, 0.15, + "test_lbk_3", "test_company_4", "power", "MW", "global", "test_source", "test_scenario", 2027, "phaseout", -10, -0.05, + "test_lbk_4", "test_company_4", "power", "MW", "global", "test_source", "test_scenario", 2027, "buildout", 60, 0.15, + "test_lbk_4", "test_company_4", "power", "MW", "global", "test_source", "test_scenario", 2027, "phaseout", -10, -0.05, + "test_lbk_4", "test_company_1", "power", "MW", "global", "test_source", "test_scenario", 2027, "buildout", -10, -0.05, + "test_lbk_4", "test_company_1", "power", "MW", "global", "test_source", "test_scenario", 2027, "phaseout", -100, -0.25 +) +# styler: on + +test_that("bopo aggregate results with a group_var returns results for each available combination of buildout/phaseout and group", { + group_var_by_group_id <- dplyr::distinct(test_matched_group_var, .data$group_id, .data$foo) + direction_by_group_id <- dplyr::distinct(test_data_company_bopo, .data$group_id, .data$direction) + + n_groups <- group_var_by_group_id %>% + dplyr::inner_join( + direction_by_group_id, + by = "group_id" + ) %>% + dplyr::distinct(.data$direction, .data$foo) %>% + nrow() + + test_output_with_group_var <- test_data_company_bopo %>% + aggregate_alignment_loanbook_exposure( + matched = test_matched_group_var, + level = test_level_bopo, + group_var = "foo" + ) + + expect_equal( + nrow(test_output_with_group_var), + n_groups + ) +}) + # nolint end From 1c7f4ea63fad1b76dbd9139c7dfdc9672af18a49 Mon Sep 17 00:00:00 2001 From: jacobvjk Date: Tue, 9 Apr 2024 14:47:26 +0200 Subject: [PATCH 02/12] address tidyselect issue --- R/aggregate_alignment_loanbook_exposure.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/aggregate_alignment_loanbook_exposure.R b/R/aggregate_alignment_loanbook_exposure.R index ae87a12..0521330 100644 --- a/R/aggregate_alignment_loanbook_exposure.R +++ b/R/aggregate_alignment_loanbook_exposure.R @@ -163,7 +163,7 @@ aggregate_alignment_loanbook_exposure <- function(data, ) %>% dplyr::relocate( c( - group_vars, "n_companies", "n_companies_aligned", + dplyr::all_of(group_vars), "n_companies", "n_companies_aligned", "share_companies_aligned", "exposure_weighted_net_alignment" ) ) %>% From c5d5d125878c495e0267515f8d7c28811c7c8d46 Mon Sep 17 00:00:00 2001 From: jacobvjk Date: Tue, 9 Apr 2024 15:05:27 +0200 Subject: [PATCH 03/12] bump dev version --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index bda22eb..3a810ac 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: pacta.multi.loanbook.analysis Title: Tools to Calculate Climate Metrics for Multiple Loanbooks -Version: 0.0.0.9002 +Version: 0.0.0.9003 Authors@R: c(person(given = "Jacob", family = "Kastl", From a013f259dab2798a36a9e5b14bd145798a204eea Mon Sep 17 00:00:00 2001 From: jacobvjk Date: Tue, 9 Apr 2024 15:17:46 +0200 Subject: [PATCH 04/12] prettify --- R/aggregate_alignment_loanbook_exposure.R | 25 ++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/R/aggregate_alignment_loanbook_exposure.R b/R/aggregate_alignment_loanbook_exposure.R index 0521330..3215c0c 100644 --- a/R/aggregate_alignment_loanbook_exposure.R +++ b/R/aggregate_alignment_loanbook_exposure.R @@ -70,7 +70,6 @@ aggregate_alignment_loanbook_exposure <- function(data, sector_aggregate_exposure_loanbook_summary <- aggregate_exposure_company %>% dplyr::mutate( - # TODO: must be n_distinct n_companies = dplyr::n_distinct(.data$name_abcd), .by = dplyr::all_of(group_vars) ) %>% @@ -146,13 +145,17 @@ aggregate_alignment_loanbook_exposure <- function(data, ) aggregate_exposure_company <- aggregate_exposure_company %>% - bind_rows(opposite_direction) %>% + dplyr::bind_rows(opposite_direction) %>% dplyr::select(-"n_directions") } sector_aggregate_exposure_loanbook_alignment <- aggregate_exposure_company %>% dplyr::summarise( - exposure_weighted_net_alignment = stats::weighted.mean(.data$alignment_metric, w = .data$exposure_weight, na.rm = TRUE), + exposure_weighted_net_alignment = stats::weighted.mean( + x = .data$alignment_metric, + w = .data$exposure_weight, + na.rm = TRUE + ), .by = dplyr::all_of(group_vars) ) @@ -162,12 +165,20 @@ aggregate_alignment_loanbook_exposure <- function(data, by = group_vars ) %>% dplyr::relocate( - c( - dplyr::all_of(group_vars), "n_companies", "n_companies_aligned", - "share_companies_aligned", "exposure_weighted_net_alignment" + dplyr::all_of( + c( + group_vars, "n_companies", "n_companies_aligned", + "share_companies_aligned", "exposure_weighted_net_alignment" + ) ) ) %>% - dplyr::arrange(!!!rlang::syms(group_matched), .data$scenario, .data$region, .data$sector, .data$year) + dplyr::arrange( + !!!rlang::syms(group_matched), + .data$scenario, + .data$region, + .data$sector, + .data$year + ) return(out) } From f932e886ceebffda3ba75747eb7ade820467a4ce Mon Sep 17 00:00:00 2001 From: jacobvjk Date: Wed, 10 Apr 2024 14:56:02 +0200 Subject: [PATCH 05/12] add tests and update handling --- R/aggregate_alignment_loanbook_exposure.R | 5 +- ...st-aggregate_alignment_loanbook_exposure.R | 85 +++++++++++++++++++ 2 files changed, 89 insertions(+), 1 deletion(-) diff --git a/R/aggregate_alignment_loanbook_exposure.R b/R/aggregate_alignment_loanbook_exposure.R index 3215c0c..3799db6 100644 --- a/R/aggregate_alignment_loanbook_exposure.R +++ b/R/aggregate_alignment_loanbook_exposure.R @@ -44,6 +44,9 @@ aggregate_alignment_loanbook_exposure <- function(data, dplyr::inner_join( group_id_by_group_var, by = c("group_id") + ) %>% + dplyr::distinct( + dplyr::across(-"group_id") ) } @@ -117,7 +120,7 @@ aggregate_alignment_loanbook_exposure <- function(data, if (level == "bo_po") { aggregate_exposure_company <- aggregate_exposure_company %>% dplyr::mutate( - n_directions = dplyr::n(), + n_directions = dplyr::n_distinct(.data$direction, na.rm = TRUE), .by = dplyr::all_of( c( group_vars[!group_vars == "direction"], "name_abcd", "sector", diff --git a/tests/testthat/test-aggregate_alignment_loanbook_exposure.R b/tests/testthat/test-aggregate_alignment_loanbook_exposure.R index 45146e1..77f9bb1 100644 --- a/tests/testthat/test-aggregate_alignment_loanbook_exposure.R +++ b/tests/testthat/test-aggregate_alignment_loanbook_exposure.R @@ -218,4 +218,89 @@ test_that("bopo aggregate results with a group_var returns results for each avai ) }) +test_that("aggregated net alignment by group_var foo equals sum of aggregated buildout and phaseout alignments by group_var foo", { + test_output_with_group_var_bopo <- test_data_company_bopo %>% + aggregate_alignment_loanbook_exposure( + matched = test_matched_group_var, + level = test_level_bopo, + group_var = "foo" + ) + + test_output_with_group_var_net <- test_data_company_net %>% + aggregate_alignment_loanbook_exposure( + matched = test_matched_group_var, + level = test_level_net, + group_var = "foo" + ) + + expect_equal( + sum(test_output_with_group_var_bopo$exposure_weighted_net_alignment, na.rm = TRUE), + sum(test_output_with_group_var_net$exposure_weighted_net_alignment, na.rm = TRUE) + ) +}) + +test_that("net aggregated loan size by group_var foo equals sum of matched loan size by group_var foo", { + test_output_with_group_var_net <- test_data_company_net %>% + aggregate_alignment_loanbook_exposure( + matched = test_matched_group_var, + level = test_level_net, + group_var = "foo" + ) + + expect_equal( + sum(test_output_with_group_var_net$sum_loan_size_outstanding, na.rm = TRUE), + sum(test_matched_group_var$loan_size_outstanding, na.rm = TRUE) + ) +}) + +test_that("aggregated net alignment by group_var that mirrors group_id is equivalent to aggregated net alignment", { + test_output_net <- test_data_company_net %>% + aggregate_alignment_loanbook_exposure( + matched = test_matched_group_var, + level = test_level_net + ) + + test_matched_group_var_same_as_group_id <- test_matched_group_var %>% + dplyr::mutate(baz = .data$group_id) + + test_output_with_group_var_net <- test_data_company_net %>% + aggregate_alignment_loanbook_exposure( + matched = test_matched_group_var_same_as_group_id, + level = test_level_net, + group_var = "baz" + ) %>% + dplyr::rename(group_id = "baz") %>% + dplyr::select(names(test_output_net)) + + expect_equal( + test_output_with_group_var_net, + test_output_net + ) +}) + +test_that("aggregated bopo alignment by group_var that mirrors group_id is equivalent to aggregated bopo alignment", { + test_output_bopo <- test_data_company_bopo %>% + aggregate_alignment_loanbook_exposure( + matched = test_matched_group_var, + level = test_level_bopo + ) + + test_matched_group_var_same_as_group_id <- test_matched_group_var %>% + dplyr::mutate(baz = .data$group_id) + + test_output_with_group_var_bopo <- test_data_company_bopo %>% + aggregate_alignment_loanbook_exposure( + matched = test_matched_group_var_same_as_group_id, + level = test_level_bopo, + group_var = "baz" + ) %>% + dplyr::rename(group_id = "baz") %>% + dplyr::select(names(test_output_bopo)) + + expect_equal( + test_output_with_group_var_bopo, + test_output_bopo + ) +}) + # nolint end From 03e2a4e271c8e94724cc154a501d8ea6fa5891dc Mon Sep 17 00:00:00 2001 From: jacobvjk Date: Thu, 11 Apr 2024 15:29:24 +0200 Subject: [PATCH 06/12] rm group_id expectation and change group argument name to .by --- R/aggregate_alignment_loanbook_exposure.R | 103 +++++++++------ R/calculate_company_alignment_metric.R | 80 +++++++++--- man/aggregate_alignment_loanbook_exposure.Rd | 13 +- ...st-aggregate_alignment_loanbook_exposure.R | 117 ++++-------------- .../test-calculate_company_alignment_metric.R | 23 ++-- 5 files changed, 168 insertions(+), 168 deletions(-) diff --git a/R/aggregate_alignment_loanbook_exposure.R b/R/aggregate_alignment_loanbook_exposure.R index 3799db6..161bc09 100644 --- a/R/aggregate_alignment_loanbook_exposure.R +++ b/R/aggregate_alignment_loanbook_exposure.R @@ -5,70 +5,76 @@ #' @param level Character. Vector that indicates if the aggregate alignment #' metric should be returned based on the net technology deviations (`net`) or #' disaggregated into buildout and phaseout technologies (`bo_po`). -#' @param group_var Character. Optional vector of length >= 1 that indicates the -#' name of one or more variables the results should be aggregated by, instead -#' of using group_id as the main dimension of aggregation. All names indicated -#' must be available variables in the `matched` data set. The intended use -#' case is to allow analyzing the loan books by additional traits of interest, -#' such as types of financial institutions. +#' @param .by Character. Optionally, a selection of columns to +#' group by, instead. All columns indicated must be available variables in the +#' `matched` data set. The intended use case is to allow analyzing the loan +#' books by additional traits of interest, such as types of financial +#' institutions. #' #' @return NULL #' @export aggregate_alignment_loanbook_exposure <- function(data, matched, level = c("net", "bo_po"), - group_var = NULL) { - group_vars <- c("group_id", "scenario", "region", "sector", "year", "direction") - group_matched <- "group_id" + .by = NULL) { + group_vars <- c( + "scenario", + "region", + "sector", + "year", + "direction" + ) level <- rlang::arg_match(level) # validate input data sets validate_input_data_aggregate_alignment_loanbook_exposure( data = data, matched = matched, - group_vars = group_vars + group_vars = group_vars, + .by = .by ) - # optionally extend by `group_var`. this currently only allows aggregation of supersets for group_var - if (!is.null(group_var)) { - if (!inherits(group_var, "character")) { - stop(glue::glue("`group_var` must be a character vector. Your input is {class(group_var)}.")) + if (!is.null(.by)) { + if (!inherits(.by, "character")) { + stop(glue::glue("`.by` must be a character vector. Your input is {class(.by)}.")) } - group_vars <- c(group_vars[!group_vars == "group_id"], group_var) - group_matched <- group_var - - group_id_by_group_var <- matched %>% - dplyr::distinct(.data$group_id, !!!rlang::syms(group_var)) - - data <- data %>% - dplyr::inner_join( - group_id_by_group_var, - by = c("group_id") - ) %>% - dplyr::distinct( - dplyr::across(-"group_id") - ) + group_vars <- c(.by, group_vars) } matched <- matched %>% dplyr::select( dplyr::all_of( - c(group_matched, "id_loan", "loan_size_outstanding", "loan_size_outstanding_currency", "name_abcd", "sector") + c( + .by, + "id_loan", + "loan_size_outstanding", + "loan_size_outstanding_currency", + "name_abcd", + "sector" + ) ) ) %>% dplyr::summarise( loan_size_outstanding = sum(.data$loan_size_outstanding, na.rm = TRUE), - .by = dplyr::all_of(c(group_matched, "loan_size_outstanding_currency", "name_abcd", "sector")) + .by = dplyr::all_of( + c( + .env$.by, + "loan_size_outstanding_currency", + "name_abcd", + "sector" + ) + ) ) %>% dplyr::mutate( exposure_weight = .data$loan_size_outstanding / sum(.data$loan_size_outstanding, na.rm = TRUE), - .by = dplyr::all_of(c(group_matched, "loan_size_outstanding_currency")) + .by = dplyr::all_of(c(.env$.by, "loan_size_outstanding_currency")) ) aggregate_exposure_company <- data %>% dplyr::inner_join( matched, - by = c(group_matched, "name_abcd", "sector") + by = c("name_abcd", "sector"), + relationship = "many-to-many" ) sector_aggregate_exposure_loanbook_summary <- aggregate_exposure_company %>% @@ -123,8 +129,11 @@ aggregate_alignment_loanbook_exposure <- function(data, n_directions = dplyr::n_distinct(.data$direction, na.rm = TRUE), .by = dplyr::all_of( c( - group_vars[!group_vars == "direction"], "name_abcd", "sector", - "activity_unit", "loan_size_outstanding_currency" + group_vars[!group_vars == "direction"], + "name_abcd", + "sector", + "activity_unit", + "loan_size_outstanding_currency" ) ) ) @@ -170,13 +179,16 @@ aggregate_alignment_loanbook_exposure <- function(data, dplyr::relocate( dplyr::all_of( c( - group_vars, "n_companies", "n_companies_aligned", - "share_companies_aligned", "exposure_weighted_net_alignment" + group_vars, + "n_companies", + "n_companies_aligned", + "share_companies_aligned", + "exposure_weighted_net_alignment" ) ) ) %>% dplyr::arrange( - !!!rlang::syms(group_matched), + !!!rlang::syms(.by), .data$scenario, .data$region, .data$sector, @@ -188,19 +200,28 @@ aggregate_alignment_loanbook_exposure <- function(data, validate_input_data_aggregate_alignment_loanbook_exposure <- function(data, matched, - group_vars) { + group_vars, + .by = NULL) { validate_data_has_expected_cols( data = data, expected_columns = c( - group_vars, "name_abcd", "activity_unit", "scenario_source", "alignment_metric" + group_vars, + "name_abcd", + "activity_unit", + "scenario_source", + "alignment_metric" ) ) validate_data_has_expected_cols( data = matched, expected_columns = c( - "group_id", "id_loan", "loan_size_outstanding", - "loan_size_outstanding_currency", "name_abcd", "sector" + "id_loan", + "loan_size_outstanding", + "loan_size_outstanding_currency", + "name_abcd", + "sector", + .by ) ) diff --git a/R/calculate_company_alignment_metric.R b/R/calculate_company_alignment_metric.R index f76b0ec..77936fc 100644 --- a/R/calculate_company_alignment_metric.R +++ b/R/calculate_company_alignment_metric.R @@ -129,8 +129,12 @@ remove_tech_no_plans_no_target <- function(data, target_scenario) { data_to_remove <- data %>% dplyr::group_by( - .data$group_id, .data$name_abcd, .data$region, .data$scenario_source, - .data$sector, .data$technology, .data$year + .data$name_abcd, + .data$region, + .data$scenario_source, + .data$sector, + .data$technology, + .data$year ) %>% dplyr::rename(target = !!rlang::sym(target_scenario)) %>% dplyr::summarise( @@ -147,7 +151,14 @@ remove_tech_no_plans_no_target <- function(data, data <- data %>% dplyr::anti_join( data_to_remove, - by = c("group_id", "name_abcd", "region", "scenario_source", "sector", "technology", "year") + by = c( + "name_abcd", + "region", + "scenario_source", + "sector", + "technology", + "year" + ) ) return(data) @@ -157,8 +168,11 @@ remove_sector_no_target <- function(data, target_scenario) { data_to_remove_no_target_in_sector <- data %>% dplyr::group_by( - .data$group_id, .data$name_abcd, .data$region, .data$scenario_source, - .data$sector, .data$year + .data$name_abcd, + .data$region, + .data$scenario_source, + .data$sector, + .data$year ) %>% dplyr::rename(target = !!rlang::sym(target_scenario)) %>% dplyr::summarise( @@ -171,7 +185,13 @@ remove_sector_no_target <- function(data, data <- data %>% dplyr::anti_join( data_to_remove_no_target_in_sector, - by = c("group_id", "name_abcd", "region", "scenario_source", "sector", "year") + by = c( + "name_abcd", + "region", + "scenario_source", + "sector", + "year" + ) ) return(data) @@ -264,7 +284,6 @@ calculate_company_aggregate_alignment_tms <- function(data, # arrange output data <- data %>% dplyr::arrange( - .data$group_id, .data$sector, .data$name_abcd, .data$region, @@ -281,8 +300,12 @@ add_net_absolute_scenario_value <- function(data, dplyr::mutate( net_absolute_scenario_value = sum(!!rlang::sym(target_scenario), na.rm = TRUE), .by = c( - "group_id", "name_abcd", "scenario_source", "region", "sector", - "activity_unit", "year" + "name_abcd", + "scenario_source", + "region", + "sector", + "activity_unit", + "year" ) ) @@ -294,8 +317,14 @@ add_total_deviation <- function(data) { dplyr::summarise( total_deviation = sum(.data$total_tech_deviation, na.rm = TRUE), .by = c( - "group_id", "name_abcd", "scenario_source", "region", "sector", - "activity_unit", "year", "net_absolute_scenario_value", "direction" + "name_abcd", + "scenario_source", + "region", + "sector", + "activity_unit", + "year", + "net_absolute_scenario_value", + "direction" ) ) @@ -311,8 +340,15 @@ calculate_company_alignment_metric <- function(data, ) %>% dplyr::select( c( - "group_id", "name_abcd", "sector", "activity_unit", "region", - "scenario_source", "scenario", "year", "direction", "total_deviation", + "name_abcd", + "sector", + "activity_unit", + "region", + "scenario_source", + "scenario", + "year", + "direction", + "total_deviation", "alignment_metric" ) ) @@ -324,12 +360,12 @@ fill_missing_direction <- function(data) { # there is currently no way to use data masking inside tidyr::nesting() # see https://github.com/tidyverse/tidyr/issues/971#issuecomment-985671947 # the following line is a workaround to avoid the R CMD NOTE - group_id <- name_abcd <- sector <- activity_unit <- region <- scenario_source <- scenario <- year <- NULL + name_abcd <- sector <- activity_unit <- region <- scenario_source <- scenario <- year <- NULL data <- data %>% tidyr::complete( tidyr::nesting( - group_id, name_abcd, sector, activity_unit, region, scenario_source, scenario, year # nolint: object_usage_linter. + name_abcd, sector, activity_unit, region, scenario_source, scenario, year # nolint: object_usage_linter. ), .data$direction, fill = list( @@ -397,7 +433,6 @@ calculate_company_aggregate_alignment_sda <- function(data, add_total_deviation_sda() %>% calculate_company_alignment_metric(scenario = scenario) %>% dplyr::arrange( - .data$group_id, .data$sector, .data$name_abcd, .data$region, @@ -438,7 +473,12 @@ add_total_deviation_sda <- function(data) { dplyr::mutate( total_deviation = (.data$projected - .data$net_absolute_scenario_value) * -1, .by = c( - "group_id", "name_abcd", "scenario_source", "region", "sector", "activity_unit", "year" + "name_abcd", + "scenario_source", + "region", + "sector", + "activity_unit", + "year" ) ) @@ -551,7 +591,7 @@ validate_input_data_calculate_company_tech_deviation <- function(data, expected_columns = c( "sector", "technology", "year", "region", "scenario_source", "name_abcd", "metric", "production", "technology_share", "scope", - "percentage_of_initial_production_by_scope", "group_id" + "percentage_of_initial_production_by_scope" ) ) @@ -614,7 +654,7 @@ validate_input_data_calculate_company_aggregate_alignment_tms <- function(data, data = data, expected_columns = c( "sector", "technology", "year", "region", "scenario_source", "name_abcd", - "group_id", "projected", paste0("target_", scenario), "direction", + "projected", paste0("target_", scenario), "direction", "total_tech_deviation", "activity_unit" ) ) @@ -693,7 +733,7 @@ validate_input_data_calculate_company_aggregate_alignment_sda <- function(data) data = data, expected_columns = c( "sector", "year", "region", "scenario_source", "name_abcd", - "emission_factor_metric", "emission_factor_value", "group_id" + "emission_factor_metric", "emission_factor_value" ) ) diff --git a/man/aggregate_alignment_loanbook_exposure.Rd b/man/aggregate_alignment_loanbook_exposure.Rd index 9e6ff73..8b2aa84 100644 --- a/man/aggregate_alignment_loanbook_exposure.Rd +++ b/man/aggregate_alignment_loanbook_exposure.Rd @@ -8,7 +8,7 @@ aggregate_alignment_loanbook_exposure( data, matched, level = c("net", "bo_po"), - group_var = NULL + .by = NULL ) } \arguments{ @@ -20,12 +20,11 @@ aggregate_alignment_loanbook_exposure( metric should be returned based on the net technology deviations (\code{net}) or disaggregated into buildout and phaseout technologies (\code{bo_po}).} -\item{group_var}{Character. Optional vector of length >= 1 that indicates the -name of one or more variables the results should be aggregated by, instead -of using group_id as the main dimension of aggregation. All names indicated -must be available variables in the \code{matched} data set. The intended use -case is to allow analyzing the loan books by additional traits of interest, -such as types of financial institutions.} +\item{.by}{\if{html}{\out{}} Character. Optionally, a selection of columns to +group by, instead. All columns indicated must be available variables in the +\code{matched} data set. The intended use case is to allow analyzing the loan +books by additional traits of interest, such as types of financial +institutions.} } \description{ Return loan book level aggregation of company alignment metrics by exposure diff --git a/tests/testthat/test-aggregate_alignment_loanbook_exposure.R b/tests/testthat/test-aggregate_alignment_loanbook_exposure.R index 77f9bb1..7cd2f75 100644 --- a/tests/testthat/test-aggregate_alignment_loanbook_exposure.R +++ b/tests/testthat/test-aggregate_alignment_loanbook_exposure.R @@ -117,15 +117,11 @@ test_that("net aggregate results have the same columns as buildout/phaseout aggr # styler: off test_data_company_net <- tibble::tribble( - ~group_id, ~name_abcd, ~sector, ~activity_unit, ~region, ~scenario_source, ~scenario, ~year, ~direction, ~total_deviation, ~alignment_metric, - "test_lbk_1", "test_company_1", "power", "MW", "global", "test_source", "test_scenario", 2027, "net", -110, -0.3, - "test_lbk_1", "test_company_2", "power", "MW", "global", "test_source", "test_scenario", 2027, "net", -70, -0.4, - "test_lbk_2", "test_company_2", "power", "MW", "global", "test_source", "test_scenario", 2027, "net", -70, -0.4, - "test_lbk_2", "test_company_3", "power", "MW", "global", "test_source", "test_scenario", 2027, "net", -40, -0.2, - "test_lbk_3", "test_company_3", "power", "MW", "global", "test_source", "test_scenario", 2027, "net", -40, -0.2, - "test_lbk_3", "test_company_4", "power", "MW", "global", "test_source", "test_scenario", 2027, "net", 50, 0.1, - "test_lbk_4", "test_company_4", "power", "MW", "global", "test_source", "test_scenario", 2027, "net", 50, 0.1, - "test_lbk_4", "test_company_1", "power", "MW", "global", "test_source", "test_scenario", 2027, "net", -110, -0.3 + ~name_abcd, ~sector, ~activity_unit, ~region, ~scenario_source, ~scenario, ~year, ~direction, ~total_deviation, ~alignment_metric, + "test_company_1", "power", "MW", "global", "test_source", "test_scenario", 2027, "net", -110, -0.3, + "test_company_2", "power", "MW", "global", "test_source", "test_scenario", 2027, "net", -70, -0.4, + "test_company_3", "power", "MW", "global", "test_source", "test_scenario", 2027, "net", -40, -0.2, + "test_company_4", "power", "MW", "global", "test_source", "test_scenario", 2027, "net", 50, 0.1 ) test_matched_group_var <- tibble::tribble( @@ -148,7 +144,7 @@ test_that("net aggregate results with a group_var returns results for each group aggregate_alignment_loanbook_exposure( matched = test_matched_group_var, level = test_level_net, - group_var = "foo" + .by = "foo" ) expect_equal( @@ -164,7 +160,7 @@ test_that("net aggregate results with a group_var returns results for each group aggregate_alignment_loanbook_exposure( matched = test_matched_group_var, level = test_level_net, - group_var = c("foo", "bar") + .by = c("foo", "bar") ) expect_equal( @@ -175,46 +171,31 @@ test_that("net aggregate results with a group_var returns results for each group # styler: off test_data_company_bopo <- tibble::tribble( - ~group_id, ~name_abcd, ~sector, ~activity_unit, ~region, ~scenario_source, ~scenario, ~year, ~direction, ~total_deviation, ~alignment_metric, - "test_lbk_1", "test_company_1", "power", "MW", "global", "test_source", "test_scenario", 2027, "buildout", -10, -0.05, - "test_lbk_1", "test_company_1", "power", "MW", "global", "test_source", "test_scenario", 2027, "phaseout", -100, -0.25, - "test_lbk_1", "test_company_2", "power", "MW", "global", "test_source", "test_scenario", 2027, "buildout", -50, -0.35, - "test_lbk_1", "test_company_2", "power", "MW", "global", "test_source", "test_scenario", 2027, "phaseout", -20, -0.05, - "test_lbk_2", "test_company_2", "power", "MW", "global", "test_source", "test_scenario", 2027, "buildout", -50, -0.35, - "test_lbk_2", "test_company_2", "power", "MW", "global", "test_source", "test_scenario", 2027, "phaseout", -20, -0.05, - "test_lbk_2", "test_company_3", "power", "MW", "global", "test_source", "test_scenario", 2027, "phaseout", -40, -0.2, - "test_lbk_3", "test_company_3", "power", "MW", "global", "test_source", "test_scenario", 2027, "phaseout", -40, -0.2, - "test_lbk_3", "test_company_4", "power", "MW", "global", "test_source", "test_scenario", 2027, "buildout", 60, 0.15, - "test_lbk_3", "test_company_4", "power", "MW", "global", "test_source", "test_scenario", 2027, "phaseout", -10, -0.05, - "test_lbk_4", "test_company_4", "power", "MW", "global", "test_source", "test_scenario", 2027, "buildout", 60, 0.15, - "test_lbk_4", "test_company_4", "power", "MW", "global", "test_source", "test_scenario", 2027, "phaseout", -10, -0.05, - "test_lbk_4", "test_company_1", "power", "MW", "global", "test_source", "test_scenario", 2027, "buildout", -10, -0.05, - "test_lbk_4", "test_company_1", "power", "MW", "global", "test_source", "test_scenario", 2027, "phaseout", -100, -0.25 + ~name_abcd, ~sector, ~activity_unit, ~region, ~scenario_source, ~scenario, ~year, ~direction, ~total_deviation, ~alignment_metric, + "test_company_1", "power", "MW", "global", "test_source", "test_scenario", 2027, "buildout", -10, -0.05, + "test_company_1", "power", "MW", "global", "test_source", "test_scenario", 2027, "phaseout", -100, -0.25, + "test_company_2", "power", "MW", "global", "test_source", "test_scenario", 2027, "buildout", -50, -0.35, + "test_company_2", "power", "MW", "global", "test_source", "test_scenario", 2027, "phaseout", -20, -0.05, + "test_company_3", "power", "MW", "global", "test_source", "test_scenario", 2027, "phaseout", -40, -0.2, + "test_company_4", "power", "MW", "global", "test_source", "test_scenario", 2027, "buildout", 60, 0.15, + "test_company_4", "power", "MW", "global", "test_source", "test_scenario", 2027, "phaseout", -10, -0.05 ) # styler: on -test_that("bopo aggregate results with a group_var returns results for each available combination of buildout/phaseout and group", { - group_var_by_group_id <- dplyr::distinct(test_matched_group_var, .data$group_id, .data$foo) - direction_by_group_id <- dplyr::distinct(test_data_company_bopo, .data$group_id, .data$direction) - - n_groups <- group_var_by_group_id %>% - dplyr::inner_join( - direction_by_group_id, - by = "group_id" - ) %>% - dplyr::distinct(.data$direction, .data$foo) %>% - nrow() +test_that("bopo aggregate results grouped by foo returns results for each available combination of buildout/phaseout and group foo", { + n_groups <- dplyr::distinct(test_matched_group_var, .data$foo) + n_directions <- dplyr::distinct(test_data_company_bopo, .data$direction) test_output_with_group_var <- test_data_company_bopo %>% aggregate_alignment_loanbook_exposure( matched = test_matched_group_var, level = test_level_bopo, - group_var = "foo" + .by = "foo" ) expect_equal( nrow(test_output_with_group_var), - n_groups + nrow(n_groups) * nrow(n_directions) ) }) @@ -223,14 +204,14 @@ test_that("aggregated net alignment by group_var foo equals sum of aggregated bu aggregate_alignment_loanbook_exposure( matched = test_matched_group_var, level = test_level_bopo, - group_var = "foo" + .by = "foo" ) test_output_with_group_var_net <- test_data_company_net %>% aggregate_alignment_loanbook_exposure( matched = test_matched_group_var, level = test_level_net, - group_var = "foo" + .by = "foo" ) expect_equal( @@ -239,12 +220,12 @@ test_that("aggregated net alignment by group_var foo equals sum of aggregated bu ) }) -test_that("net aggregated loan size by group_var foo equals sum of matched loan size by group_var foo", { +test_that("net aggregated loan size by foo equals sum of matched loan size by foo", { test_output_with_group_var_net <- test_data_company_net %>% aggregate_alignment_loanbook_exposure( matched = test_matched_group_var, level = test_level_net, - group_var = "foo" + .by = "foo" ) expect_equal( @@ -253,54 +234,4 @@ test_that("net aggregated loan size by group_var foo equals sum of matched loan ) }) -test_that("aggregated net alignment by group_var that mirrors group_id is equivalent to aggregated net alignment", { - test_output_net <- test_data_company_net %>% - aggregate_alignment_loanbook_exposure( - matched = test_matched_group_var, - level = test_level_net - ) - - test_matched_group_var_same_as_group_id <- test_matched_group_var %>% - dplyr::mutate(baz = .data$group_id) - - test_output_with_group_var_net <- test_data_company_net %>% - aggregate_alignment_loanbook_exposure( - matched = test_matched_group_var_same_as_group_id, - level = test_level_net, - group_var = "baz" - ) %>% - dplyr::rename(group_id = "baz") %>% - dplyr::select(names(test_output_net)) - - expect_equal( - test_output_with_group_var_net, - test_output_net - ) -}) - -test_that("aggregated bopo alignment by group_var that mirrors group_id is equivalent to aggregated bopo alignment", { - test_output_bopo <- test_data_company_bopo %>% - aggregate_alignment_loanbook_exposure( - matched = test_matched_group_var, - level = test_level_bopo - ) - - test_matched_group_var_same_as_group_id <- test_matched_group_var %>% - dplyr::mutate(baz = .data$group_id) - - test_output_with_group_var_bopo <- test_data_company_bopo %>% - aggregate_alignment_loanbook_exposure( - matched = test_matched_group_var_same_as_group_id, - level = test_level_bopo, - group_var = "baz" - ) %>% - dplyr::rename(group_id = "baz") %>% - dplyr::select(names(test_output_bopo)) - - expect_equal( - test_output_with_group_var_bopo, - test_output_bopo - ) -}) - # nolint end diff --git a/tests/testthat/test-calculate_company_alignment_metric.R b/tests/testthat/test-calculate_company_alignment_metric.R index 97f94fc..a57fee3 100644 --- a/tests/testthat/test-calculate_company_alignment_metric.R +++ b/tests/testthat/test-calculate_company_alignment_metric.R @@ -2,12 +2,19 @@ # nolint start: indentation_linter. # styler: off +# test_data_calculate_company_tech_deviation <- tibble::tribble( +# ~sector, ~technology, ~year, ~region, ~scenario_source, ~name_abcd, ~metric, ~production, ~technology_share, ~scope, ~percentage_of_initial_production_by_scope, ~group_id, +# "automotive", "electric", 2027, "global", "scenario_source", "test_company", "projected", 25, 0.25, "sector", 0.005, "test_group", +# "automotive", "electric", 2027, "global", "scenario_source", "test_company", "target_scenario", 20, 0.25, "sector", 0.01, "test_group", +# "automotive", "ice", 2027, "global", "scenario_source", "test_company", "projected", 75, 0.75, "technology", 0.005, "test_group", +# "automotive", "ice", 2027, "global", "scenario_source", "test_company", "target_scenario", 60, 0.75, "technology", 0.01, "test_group" +# ) test_data_calculate_company_tech_deviation <- tibble::tribble( - ~sector, ~technology, ~year, ~region, ~scenario_source, ~name_abcd, ~metric, ~production, ~technology_share, ~scope, ~percentage_of_initial_production_by_scope, ~group_id, - "automotive", "electric", 2027, "global", "scenario_source", "test_company", "projected", 25, 0.25, "sector", 0.005, "test_group", - "automotive", "electric", 2027, "global", "scenario_source", "test_company", "target_scenario", 20, 0.25, "sector", 0.01, "test_group", - "automotive", "ice", 2027, "global", "scenario_source", "test_company", "projected", 75, 0.75, "technology", 0.005, "test_group", - "automotive", "ice", 2027, "global", "scenario_source", "test_company", "target_scenario", 60, 0.75, "technology", 0.01, "test_group" + ~sector, ~technology, ~year, ~region, ~scenario_source, ~name_abcd, ~metric, ~production, ~technology_share, ~scope, ~percentage_of_initial_production_by_scope, ~group_id, ~foo, + "automotive", "electric", 2027, "global", "scenario_source", "test_company", "projected", 25, 0.25, "sector", 0.005, "test_group", "Yes", + "automotive", "electric", 2027, "global", "scenario_source", "test_company", "target_scenario", 20, 0.25, "sector", 0.01, "test_group", "Yes", + "automotive", "ice", 2027, "global", "scenario_source", "test_company", "projected", 75, 0.75, "technology", 0.005, "test_group", "No", + "automotive", "ice", 2027, "global", "scenario_source", "test_company", "target_scenario", 60, 0.75, "technology", 0.01, "test_group", "No" ) test_technology_direction <- tibble::tribble( @@ -477,12 +484,14 @@ test_output_calculate_company_aggregate_alignment_sda <- calculate_company_aggre ) added_columns <- c("activity_unit", "scenario", "direction", "total_deviation", "alignment_metric") -dropped_columns <- c("emission_factor_metric", "emission_factor_value") +dropped_columns <- c("emission_factor_metric", "emission_factor_value", "group_id") +# dropped_columns <- c("emission_factor_metric", "emission_factor_value") expected_output_columns <- c(names(test_data_calculate_company_aggregate_alignment_sda), added_columns) expected_output_columns <- expected_output_columns[!expected_output_columns %in% dropped_columns] expected_output_rows <- test_data_calculate_company_aggregate_alignment_sda %>% - dplyr::distinct(.data$sector, .data$year, .data$region, .data$scenario_source, .data$name_abcd, .data$group_id) %>% + dplyr::distinct(.data$sector, .data$year, .data$region, .data$scenario_source, .data$name_abcd) %>% + # dplyr::distinct(.data$sector, .data$year, .data$region, .data$scenario_source, .data$name_abcd, .data$group_id) %>% nrow() test_that("calculate_company_aggregate_alignment_sda returns expected structure of outputs", { From 4357ac79d72dbb323b53ac5a7439e035bf8d6a59 Mon Sep 17 00:00:00 2001 From: jacobvjk Date: Thu, 11 Apr 2024 15:55:06 +0200 Subject: [PATCH 07/12] rm group_id from tests where not required --- ...st-aggregate_alignment_loanbook_exposure.R | 36 ++-- .../test-calculate_company_alignment_metric.R | 176 +++++++++--------- 2 files changed, 111 insertions(+), 101 deletions(-) diff --git a/tests/testthat/test-aggregate_alignment_loanbook_exposure.R b/tests/testthat/test-aggregate_alignment_loanbook_exposure.R index 7cd2f75..d9d4309 100644 --- a/tests/testthat/test-aggregate_alignment_loanbook_exposure.R +++ b/tests/testthat/test-aggregate_alignment_loanbook_exposure.R @@ -3,35 +3,35 @@ # nolint start: indentation_linter. # styler: off test_data_aggregate_alignment_loanbook_exposure_net <- tibble::tribble( - ~group_id, ~name_abcd, ~sector, ~activity_unit, ~region, ~scenario_source, ~scenario, ~year, ~direction, ~total_deviation, ~alignment_metric, - "test_group", "test_company_1", "power", "MW", "global", "test_source", "test_scenario", 2027, "net", -110, -0.3, - "test_group", "test_company_2", "power", "MW", "global", "test_source", "test_scenario", 2027, "net", -70, -0.4, - "test_group", "test_company_3", "power", "MW", "global", "test_source", "test_scenario", 2027, "net", -40, -0.2, - "test_group", "test_company_4", "power", "MW", "global", "test_source", "test_scenario", 2027, "net", 50, 0.1 + ~name_abcd, ~sector, ~activity_unit, ~region, ~scenario_source, ~scenario, ~year, ~direction, ~total_deviation, ~alignment_metric, + "test_company_1", "power", "MW", "global", "test_source", "test_scenario", 2027, "net", -110, -0.3, + "test_company_2", "power", "MW", "global", "test_source", "test_scenario", 2027, "net", -70, -0.4, + "test_company_3", "power", "MW", "global", "test_source", "test_scenario", 2027, "net", -40, -0.2, + "test_company_4", "power", "MW", "global", "test_source", "test_scenario", 2027, "net", 50, 0.1 ) # styler: on # styler: off test_data_aggregate_alignment_loanbook_exposure_bopo <- tibble::tribble( - ~group_id, ~name_abcd, ~sector, ~activity_unit, ~region, ~scenario_source, ~scenario, ~year, ~direction, ~total_deviation, ~alignment_metric, - "test_group", "test_company_1", "power", "MW", "global", "test_source", "test_scenario", 2027, "buildout", -10, -0.05, - "test_group", "test_company_1", "power", "MW", "global", "test_source", "test_scenario", 2027, "phaseout", -100, -0.25, - "test_group", "test_company_2", "power", "MW", "global", "test_source", "test_scenario", 2027, "buildout", -50, -0.35, - "test_group", "test_company_2", "power", "MW", "global", "test_source", "test_scenario", 2027, "phaseout", -20, -0.05, - "test_group", "test_company_3", "power", "MW", "global", "test_source", "test_scenario", 2027, "phaseout", -40, -0.2, - "test_group", "test_company_4", "power", "MW", "global", "test_source", "test_scenario", 2027, "buildout", 60, 0.15, - "test_group", "test_company_4", "power", "MW", "global", "test_source", "test_scenario", 2027, "phaseout", -10, -0.05 + ~name_abcd, ~sector, ~activity_unit, ~region, ~scenario_source, ~scenario, ~year, ~direction, ~total_deviation, ~alignment_metric, + "test_company_1", "power", "MW", "global", "test_source", "test_scenario", 2027, "buildout", -10, -0.05, + "test_company_1", "power", "MW", "global", "test_source", "test_scenario", 2027, "phaseout", -100, -0.25, + "test_company_2", "power", "MW", "global", "test_source", "test_scenario", 2027, "buildout", -50, -0.35, + "test_company_2", "power", "MW", "global", "test_source", "test_scenario", 2027, "phaseout", -20, -0.05, + "test_company_3", "power", "MW", "global", "test_source", "test_scenario", 2027, "phaseout", -40, -0.2, + "test_company_4", "power", "MW", "global", "test_source", "test_scenario", 2027, "buildout", 60, 0.15, + "test_company_4", "power", "MW", "global", "test_source", "test_scenario", 2027, "phaseout", -10, -0.05 ) # styler: on # styler: off test_matched <- tibble::tribble( - ~group_id, ~id_loan, ~loan_size_outstanding, ~loan_size_outstanding_currency, ~name_abcd, ~sector, - "test_group", "L1", 300000, "USD", "test_company_1", "power", - "test_group", "L2", 700000, "USD", "test_company_2", "power", - "test_group", "L3", 1000000, "USD", "test_company_3", "power", - "test_group", "L4", 500000, "USD", "test_company_4", "power" + ~id_loan, ~loan_size_outstanding, ~loan_size_outstanding_currency, ~name_abcd, ~sector, + "L1", 300000, "USD", "test_company_1", "power", + "L2", 700000, "USD", "test_company_2", "power", + "L3", 1000000, "USD", "test_company_3", "power", + "L4", 500000, "USD", "test_company_4", "power" ) # styler: on diff --git a/tests/testthat/test-calculate_company_alignment_metric.R b/tests/testthat/test-calculate_company_alignment_metric.R index a57fee3..954ae2e 100644 --- a/tests/testthat/test-calculate_company_alignment_metric.R +++ b/tests/testthat/test-calculate_company_alignment_metric.R @@ -2,19 +2,12 @@ # nolint start: indentation_linter. # styler: off -# test_data_calculate_company_tech_deviation <- tibble::tribble( -# ~sector, ~technology, ~year, ~region, ~scenario_source, ~name_abcd, ~metric, ~production, ~technology_share, ~scope, ~percentage_of_initial_production_by_scope, ~group_id, -# "automotive", "electric", 2027, "global", "scenario_source", "test_company", "projected", 25, 0.25, "sector", 0.005, "test_group", -# "automotive", "electric", 2027, "global", "scenario_source", "test_company", "target_scenario", 20, 0.25, "sector", 0.01, "test_group", -# "automotive", "ice", 2027, "global", "scenario_source", "test_company", "projected", 75, 0.75, "technology", 0.005, "test_group", -# "automotive", "ice", 2027, "global", "scenario_source", "test_company", "target_scenario", 60, 0.75, "technology", 0.01, "test_group" -# ) test_data_calculate_company_tech_deviation <- tibble::tribble( - ~sector, ~technology, ~year, ~region, ~scenario_source, ~name_abcd, ~metric, ~production, ~technology_share, ~scope, ~percentage_of_initial_production_by_scope, ~group_id, ~foo, - "automotive", "electric", 2027, "global", "scenario_source", "test_company", "projected", 25, 0.25, "sector", 0.005, "test_group", "Yes", - "automotive", "electric", 2027, "global", "scenario_source", "test_company", "target_scenario", 20, 0.25, "sector", 0.01, "test_group", "Yes", - "automotive", "ice", 2027, "global", "scenario_source", "test_company", "projected", 75, 0.75, "technology", 0.005, "test_group", "No", - "automotive", "ice", 2027, "global", "scenario_source", "test_company", "target_scenario", 60, 0.75, "technology", 0.01, "test_group", "No" + ~sector, ~technology, ~year, ~region, ~scenario_source, ~name_abcd, ~metric, ~production, ~technology_share, ~scope, ~percentage_of_initial_production_by_scope, + "automotive", "electric", 2027, "global", "scenario_source", "test_company", "projected", 25, 0.25, "sector", 0.005, + "automotive", "electric", 2027, "global", "scenario_source", "test_company", "target_scenario", 20, 0.25, "sector", 0.01, + "automotive", "ice", 2027, "global", "scenario_source", "test_company", "projected", 75, 0.75, "technology", 0.005, + "automotive", "ice", 2027, "global", "scenario_source", "test_company", "target_scenario", 60, 0.75, "technology", 0.01 ) test_technology_direction <- tibble::tribble( @@ -49,41 +42,41 @@ test_target_scenario <- paste0("target_", test_scenario) # 1) zero projected and target values # styler: off test_data_remove_tech_no_plans_no_target_1 <- tibble::tribble( - ~sector, ~technology, ~year, ~region, ~scenario_source, ~name_abcd, ~projected, ~target_scenario, ~group_id, - "automotive", "electric", 2027, "global", "scenario_source", "test_company", 25, 20, "test_group", - "automotive", "ice", 2027, "global", "scenario_source", "test_company", 0, 0, "test_group" + ~sector, ~technology, ~year, ~region, ~scenario_source, ~name_abcd, ~projected, ~target_scenario, + "automotive", "electric", 2027, "global", "scenario_source", "test_company", 25, 20, + "automotive", "ice", 2027, "global", "scenario_source", "test_company", 0, 0 ) # 2) zero projected value, positive target test_data_remove_tech_no_plans_no_target_2 <- tibble::tribble( - ~sector, ~technology, ~year, ~region, ~scenario_source, ~name_abcd, ~projected, ~target_scenario, ~group_id, - "automotive", "electric", 2027, "global", "scenario_source", "test_company", 25, 20, "test_group", - "automotive", "ice", 2027, "global", "scenario_source", "test_company", 0, 10, "test_group" + ~sector, ~technology, ~year, ~region, ~scenario_source, ~name_abcd, ~projected, ~target_scenario, + "automotive", "electric", 2027, "global", "scenario_source", "test_company", 25, 20, + "automotive", "ice", 2027, "global", "scenario_source", "test_company", 0, 10 ) # 3) positive projected value, zero target test_data_remove_tech_no_plans_no_target_3 <- tibble::tribble( - ~sector, ~technology, ~year, ~region, ~scenario_source, ~name_abcd, ~projected, ~target_scenario, ~group_id, - "automotive", "electric", 2027, "global", "scenario_source", "test_company", 25, 20, "test_group", - "automotive", "ice", 2027, "global", "scenario_source", "test_company", 10, 0, "test_group" + ~sector, ~technology, ~year, ~region, ~scenario_source, ~name_abcd, ~projected, ~target_scenario, + "automotive", "electric", 2027, "global", "scenario_source", "test_company", 25, 20, + "automotive", "ice", 2027, "global", "scenario_source", "test_company", 10, 0 ) # 4) positive projected and target values test_data_remove_tech_no_plans_no_target_4 <- tibble::tribble( - ~sector, ~technology, ~year, ~region, ~scenario_source, ~name_abcd, ~projected, ~target_scenario, ~group_id, - "automotive", "electric", 2027, "global", "scenario_source", "test_company", 25, 20, "test_group", - "automotive", "ice", 2027, "global", "scenario_source", "test_company", 75, 60, "test_group" + ~sector, ~technology, ~year, ~region, ~scenario_source, ~name_abcd, ~projected, ~target_scenario, + "automotive", "electric", 2027, "global", "scenario_source", "test_company", 25, 20, + "automotive", "ice", 2027, "global", "scenario_source", "test_company", 75, 60 ) # 5) NAs in projected and target values # TODO: reconsider if this needs to be handled somewhere else, effectively NAs are treated like zero test_data_remove_tech_no_plans_no_target_5 <- tibble::tribble( - ~sector, ~technology, ~year, ~region, ~scenario_source, ~name_abcd, ~projected, ~target_scenario, ~group_id, - "automotive", "electric", 2027, "global", "scenario_source", "test_company", NA_real_, NA_real_, "test_group", - "automotive", "ice", 2027, "global", "scenario_source", "test_company", NA_real_, NA_real_, "test_group" + ~sector, ~technology, ~year, ~region, ~scenario_source, ~name_abcd, ~projected, ~target_scenario, + "automotive", "electric", 2027, "global", "scenario_source", "test_company", NA_real_, NA_real_, + "automotive", "ice", 2027, "global", "scenario_source", "test_company", NA_real_, NA_real_ ) # 6) NA in one of projected and target values # TODO: reconsider if this needs to be handled somewhere else, effectively NAs are treated like zero test_data_remove_tech_no_plans_no_target_6 <- tibble::tribble( - ~sector, ~technology, ~year, ~region, ~scenario_source, ~name_abcd, ~projected, ~target_scenario, ~group_id, - "automotive", "electric", 2027, "global", "scenario_source", "test_company", NA_real_, NA_real_, "test_group", - "automotive", "ice", 2027, "global", "scenario_source", "test_company", 40, 20, "test_group" + ~sector, ~technology, ~year, ~region, ~scenario_source, ~name_abcd, ~projected, ~target_scenario, + "automotive", "electric", 2027, "global", "scenario_source", "test_company", NA_real_, NA_real_, + "automotive", "ice", 2027, "global", "scenario_source", "test_company", 40, 20 ) # styler: on @@ -125,28 +118,28 @@ test_that("only rows with zero values in both projected and target values are re # styler: off # 1) zero values in all target values of sector test_data_remove_sector_no_target_1 <- tibble::tribble( - ~sector, ~technology, ~year, ~region, ~scenario_source, ~name_abcd, ~projected, ~target_scenario, ~group_id, - "automotive", "electric", 2027, "global", "scenario_source", "test_company", 25, 0, "test_group", - "automotive", "ice", 2027, "global", "scenario_source", "test_company", 10, 0, "test_group" + ~sector, ~technology, ~year, ~region, ~scenario_source, ~name_abcd, ~projected, ~target_scenario, + "automotive", "electric", 2027, "global", "scenario_source", "test_company", 25, 0, + "automotive", "ice", 2027, "global", "scenario_source", "test_company", 10, 0 ) # 2) zero values in some target values of sector test_data_remove_sector_no_target_2 <- tibble::tribble( - ~sector, ~technology, ~year, ~region, ~scenario_source, ~name_abcd, ~projected, ~target_scenario, ~group_id, - "automotive", "electric", 2027, "global", "scenario_source", "test_company", 25, 0, "test_group", - "automotive", "ice", 2027, "global", "scenario_source", "test_company", 10, 5, "test_group" + ~sector, ~technology, ~year, ~region, ~scenario_source, ~name_abcd, ~projected, ~target_scenario, + "automotive", "electric", 2027, "global", "scenario_source", "test_company", 25, 0, + "automotive", "ice", 2027, "global", "scenario_source", "test_company", 10, 5 ) # 3) zero values in all projected values of sector, but not target test_data_remove_sector_no_target_3 <- tibble::tribble( - ~sector, ~technology, ~year, ~region, ~scenario_source, ~name_abcd, ~projected, ~target_scenario, ~group_id, - "automotive", "electric", 2027, "global", "scenario_source", "test_company", 0, 0, "test_group", - "automotive", "ice", 2027, "global", "scenario_source", "test_company", 0, 5, "test_group" + ~sector, ~technology, ~year, ~region, ~scenario_source, ~name_abcd, ~projected, ~target_scenario, + "automotive", "electric", 2027, "global", "scenario_source", "test_company", 0, 0, + "automotive", "ice", 2027, "global", "scenario_source", "test_company", 0, 5 ) # 4) NA values in all target values of sector # TODO: reconsider if this needs to be handled somewhere else, effectively NAs are treated like zero test_data_remove_sector_no_target_4 <- tibble::tribble( - ~sector, ~technology, ~year, ~region, ~scenario_source, ~name_abcd, ~projected, ~target_scenario, ~group_id, - "automotive", "electric", 2027, "global", "scenario_source", "test_company", 25, NA_real_, "test_group", - "automotive", "ice", 2027, "global", "scenario_source", "test_company", 10, NA_real_, "test_group" + ~sector, ~technology, ~year, ~region, ~scenario_source, ~name_abcd, ~projected, ~target_scenario, + "automotive", "electric", 2027, "global", "scenario_source", "test_company", 25, NA_real_, + "automotive", "ice", 2027, "global", "scenario_source", "test_company", 10, NA_real_ ) # styler: on @@ -267,11 +260,11 @@ test_that("total_tech_deviation is less or equal 0 for all technologies in bridg # styler: off # TODO: add a case of a pure phaseout sector test_data_calculate_company_aggregate_alignment_tms <- tibble::tribble( - ~sector, ~technology, ~year, ~region, ~scenario_source, ~name_abcd, ~group_id, ~projected, ~target_scenario, ~direction, ~total_tech_deviation, ~activity_unit, - "power", "gascap", 2027, "global", "test_source", "test_company", "test_group", 100, 80, "phaseout", -20, "MW", - "power", "renewablescap", 2027, "global", "test_source", "test_company", "test_group", 32, 40, "buildout", -8, "MW", + ~sector, ~technology, ~year, ~region, ~scenario_source, ~name_abcd, ~projected, ~target_scenario, ~direction, ~total_tech_deviation, ~activity_unit, + "power", "gascap", 2027, "global", "test_source", "test_company", 100, 80, "phaseout", -20, "MW", + "power", "renewablescap", 2027, "global", "test_source", "test_company", 32, 40, "buildout", -8, "MW", # additional case where a company is only active in one of the two directions (# 89) - "power", "renewablescap", 2027, "global", "test_source", "test_company_2", "test_group", 50, 75, "buildout", -25, "MW" + "power", "renewablescap", 2027, "global", "test_source", "test_company_2", 50, 75, "buildout", -25, "MW" ) # styler: on @@ -302,10 +295,15 @@ test_that("calculate_company_aggregate_alignment_tms returns expected directions # number of units analysed n_groups_net <- test_data_calculate_company_aggregate_alignment_tms %>% dplyr::distinct( - .data$group_id, .data$name_abcd, .data$scenario_source, .data$region, - .data$year, .data$sector, .data$activity_unit + .data$name_abcd, + .data$scenario_source, + .data$region, + .data$year, + .data$sector, + .data$activity_unit ) %>% nrow() + n_groups_bopo <- n_groups_net * 2 test_that("calculate_company_aggregate_alignment_tms returns expected number of rows", { @@ -355,12 +353,12 @@ test_that("consistency checks of calculate_company_aggregate_alignment_tms() pas ## add_net_absolute_scenario_value---- # styler: off test_data_add_net_absolute_scenario_value <- tibble::tribble( - ~group_id, ~name_abcd, ~scenario_source, ~region, ~sector, ~technology, ~activity_unit, ~year, ~target_scenario, ~direction, ~total_tech_deviation, - "test_group", "test_company", "test_source", "somewhere", "test_sector", "tech_A", "output unit", 2027, 20, "buildout", -5, - "test_group", "test_company", "test_source", "somewhere", "test_sector", "tech_B", "output unit", 2027, 50, "phaseout", 10, - "test_group", "test_company", "test_source", "somewhere", "test_sector", "tech_C", "output unit", 2027, 30, "phaseout", -10, - "test_group", "test_company", "test_source", "somewhere", "other_sector", "tech_X", "output unit", 2027, 30, "buildout", -20, - "test_group", "test_company", "test_source", "somewhere", "other_sector", "tech_Y", "output unit", 2027, NA_real_, "phaseout", NA_real_ + ~name_abcd, ~scenario_source, ~region, ~sector, ~technology, ~activity_unit, ~year, ~target_scenario, ~direction, ~total_tech_deviation, + "test_company", "test_source", "somewhere", "test_sector", "tech_A", "output unit", 2027, 20, "buildout", -5, + "test_company", "test_source", "somewhere", "test_sector", "tech_B", "output unit", 2027, 50, "phaseout", 10, + "test_company", "test_source", "somewhere", "test_sector", "tech_C", "output unit", 2027, 30, "phaseout", -10, + "test_company", "test_source", "somewhere", "other_sector", "tech_X", "output unit", 2027, 30, "buildout", -20, + "test_company", "test_source", "somewhere", "other_sector", "tech_Y", "output unit", 2027, NA_real_, "phaseout", NA_real_ ) # styler: on @@ -381,12 +379,12 @@ test_that("add_net_absolute_scenario_value adds sum of scenario values as expect ## add_total_deviation---- # styler: off test_data_add_total_deviation_bo_po <- tibble::tribble( - ~group_id, ~name_abcd, ~scenario_source, ~region, ~sector, ~technology, ~activity_unit, ~year, ~net_absolute_scenario_value, ~direction, ~total_tech_deviation, - "test_group", "test_company", "test_source", "somewhere", "test_sector", "tech_A", "output unit", 2027, 100, "buildout", -5, - "test_group", "test_company", "test_source", "somewhere", "test_sector", "tech_B", "output unit", 2027, 100, "phaseout", 10, - "test_group", "test_company", "test_source", "somewhere", "test_sector", "tech_C", "output unit", 2027, 100, "phaseout", -10, - "test_group", "test_company", "test_source", "somewhere", "other_sector", "tech_X", "output unit", 2027, 30, "buildout", -20, - "test_group", "test_company", "test_source", "somewhere", "other_sector", "tech_Y", "output unit", 2027, 30, "phaseout", 5 + ~name_abcd, ~scenario_source, ~region, ~sector, ~technology, ~activity_unit, ~year, ~net_absolute_scenario_value, ~direction, ~total_tech_deviation, + "test_company", "test_source", "somewhere", "test_sector", "tech_A", "output unit", 2027, 100, "buildout", -5, + "test_company", "test_source", "somewhere", "test_sector", "tech_B", "output unit", 2027, 100, "phaseout", 10, + "test_company", "test_source", "somewhere", "test_sector", "tech_C", "output unit", 2027, 100, "phaseout", -10, + "test_company", "test_source", "somewhere", "other_sector", "tech_X", "output unit", 2027, 30, "buildout", -20, + "test_company", "test_source", "somewhere", "other_sector", "tech_Y", "output unit", 2027, 30, "phaseout", 5 ) # styler: on test_data_add_total_deviation_net <- test_data_add_total_deviation_bo_po %>% @@ -413,10 +411,10 @@ test_that("add_total_deviation adds deviation by sector and direction as expecte ## calculate_company_alignment_metric---- # styler: off test_data_calculate_company_alignment_metric <- tibble::tribble( - ~group_id, ~name_abcd, ~sector, ~activity_unit, ~region, ~scenario_source, ~year, ~direction, ~total_deviation, ~net_absolute_scenario_value, - "test_group", "test_company", "sector_a", "output_unit", "somewhere", "that_source", 2027, "net", 20, 40, - "test_group", "test_company", "sector_b", "output_unit", "somewhere", "that_source", 2027, "net", 50, 40, - "test_group", "some_company", "sector_a", "output_unit", "somewhere", "that_source", 2027, "net", 30, 30 + ~name_abcd, ~sector, ~activity_unit, ~region, ~scenario_source, ~year, ~direction, ~total_deviation, ~net_absolute_scenario_value, + "test_company", "sector_a", "output_unit", "somewhere", "that_source", 2027, "net", 20, 40, + "test_company", "sector_b", "output_unit", "somewhere", "that_source", 2027, "net", 50, 40, + "some_company", "sector_a", "output_unit", "somewhere", "that_source", 2027, "net", 30, 30 ) # styler: on test_scenario <- "some_scenario" @@ -436,9 +434,9 @@ test_that("calculate_company_alignment_metric calculates company alignment metri ## fill_missing_direction (# 89)---- # styler: off test_data_fill_missing_direction <- tibble::tribble( - ~group_id, ~name_abcd, ~sector, ~activity_unit, ~region, ~scenario_source, ~scenario, ~year, ~direction, ~total_deviation, ~alignment_metric, - "test_group", "test_company_1", "power", "MW", "global", "test_source", "scenario", 2027, "buildout", -1, -0.5, - "test_group", "test_company_2", "power", "MW", "global", "test_source", "scenario", 2027, "phaseout", 1, 0.1 + ~name_abcd, ~sector, ~activity_unit, ~region, ~scenario_source, ~scenario, ~year, ~direction, ~total_deviation, ~alignment_metric, + "test_company_1", "power", "MW", "global", "test_source", "scenario", 2027, "buildout", -1, -0.5, + "test_company_2", "power", "MW", "global", "test_source", "scenario", 2027, "phaseout", 1, 0.1 ) # styler: on @@ -449,8 +447,12 @@ test_output_fill_missing_direction <- fill_missing_direction( # number of units of analysis n_units <- test_data_fill_missing_direction %>% dplyr::distinct( - .data$group_id, .data$name_abcd, .data$scenario_source, .data$region, - .data$year, .data$sector, .data$activity_unit + .data$name_abcd, + .data$scenario_source, + .data$region, + .data$year, + .data$sector, + .data$activity_unit ) %>% nrow() @@ -464,11 +466,11 @@ test_that("fill_missing_direction returns both directions for all units of analy # calculate_company_aggregate_alignment_sda---- # styler: off test_data_calculate_company_aggregate_alignment_sda <- tibble::tribble( - ~sector, ~year, ~region, ~scenario_source, ~name_abcd, ~group_id, ~emission_factor_metric, ~emission_factor_value, - "steel", 2027, "global", "test_source", "company_A", "test_group", "projected", 0.8, - "steel", 2027, "global", "test_source", "company_A", "test_group", "target_scenario", 0.7, - "steel", 2027, "global", "test_source", "company_B", "test_group", "projected", 0.55, - "steel", 2027, "global", "test_source", "company_B", "test_group", "target_scenario", 0.6 + ~sector, ~year, ~region, ~scenario_source, ~name_abcd, ~emission_factor_metric, ~emission_factor_value, + "steel", 2027, "global", "test_source", "company_A", "projected", 0.8, + "steel", 2027, "global", "test_source", "company_A", "target_scenario", 0.7, + "steel", 2027, "global", "test_source", "company_B", "projected", 0.55, + "steel", 2027, "global", "test_source", "company_B", "target_scenario", 0.6 ) # styler: on @@ -484,14 +486,18 @@ test_output_calculate_company_aggregate_alignment_sda <- calculate_company_aggre ) added_columns <- c("activity_unit", "scenario", "direction", "total_deviation", "alignment_metric") -dropped_columns <- c("emission_factor_metric", "emission_factor_value", "group_id") -# dropped_columns <- c("emission_factor_metric", "emission_factor_value") +dropped_columns <- c("emission_factor_metric", "emission_factor_value") expected_output_columns <- c(names(test_data_calculate_company_aggregate_alignment_sda), added_columns) expected_output_columns <- expected_output_columns[!expected_output_columns %in% dropped_columns] expected_output_rows <- test_data_calculate_company_aggregate_alignment_sda %>% - dplyr::distinct(.data$sector, .data$year, .data$region, .data$scenario_source, .data$name_abcd) %>% - # dplyr::distinct(.data$sector, .data$year, .data$region, .data$scenario_source, .data$name_abcd, .data$group_id) %>% + dplyr::distinct( + .data$sector, + .data$year, + .data$region, + .data$scenario_source, + .data$name_abcd + ) %>% nrow() test_that("calculate_company_aggregate_alignment_sda returns expected structure of outputs", { @@ -586,7 +592,11 @@ test_that("output columns replace emission_factor_* cols with projected and targ test_nrows <- nrow(test_output_prep_and_wrangle_aggregate_alignment_sda_1) expected_nrows <- test_data_prep_and_wrangle_aggregate_alignment_sda_1 %>% - dplyr::distinct(.data$scenario_source, .data$name_abcd, .data$year) %>% + dplyr::distinct( + .data$scenario_source, + .data$name_abcd, + .data$year + ) %>% nrow() test_that("number of output rows are distinct number of input rows that do not contain emission_factor_* data", { @@ -634,10 +644,10 @@ test_that("years outside of start_year and start_year + time_frame are dropped", # add_total_deviation_sda # styler: off test_data_add_total_deviation_sda <- tibble::tribble( - ~group_id, ~name_abcd, ~scenario_source, ~region, ~sector, ~activity_unit, ~year, ~projected, ~net_absolute_scenario_value, - "test_group", "company_A", "scenario_source", "some_place", "sector_1", "output_unit", 2027, 0.8, 0.7, - "test_group", "company_A", "scenario_source", "some_place", "sector_2", "output_unit", 2027, 1.1, 0.8, - "test_group", "company_B", "scenario_source", "some_place", "sector_1", "output_unit", 2027, 0.6, 0.7 + ~name_abcd, ~scenario_source, ~region, ~sector, ~activity_unit, ~year, ~projected, ~net_absolute_scenario_value, + "company_A", "scenario_source", "some_place", "sector_1", "output_unit", 2027, 0.8, 0.7, + "company_A", "scenario_source", "some_place", "sector_2", "output_unit", 2027, 1.1, 0.8, + "company_B", "scenario_source", "some_place", "sector_1", "output_unit", 2027, 0.6, 0.7 ) # styler: on From 0333f0ece4b5509c922aa8a1f0d86c0983194a83 Mon Sep 17 00:00:00 2001 From: jacobvjk Date: Fri, 12 Apr 2024 11:45:17 +0200 Subject: [PATCH 08/12] aggregate_alignment_loanbook_exposure outputs results at comp and lbk level --- R/aggregate_alignment_loanbook_exposure.R | 94 ++++++++++--------- ...st-aggregate_alignment_loanbook_exposure.R | 42 ++++----- 2 files changed, 70 insertions(+), 66 deletions(-) diff --git a/R/aggregate_alignment_loanbook_exposure.R b/R/aggregate_alignment_loanbook_exposure.R index 161bc09..2ae7631 100644 --- a/R/aggregate_alignment_loanbook_exposure.R +++ b/R/aggregate_alignment_loanbook_exposure.R @@ -77,6 +77,52 @@ aggregate_alignment_loanbook_exposure <- function(data, relationship = "many-to-many" ) + # if a company only has technologies going in one direction in a sector with + # high carbon and low carbon technologies, add an empty entry for the other + # direction to ensure the aggregation is correct + if (level == "bo_po") { + aggregate_exposure_company <- aggregate_exposure_company %>% + dplyr::mutate( + n_directions = dplyr::n_distinct(.data$direction, na.rm = TRUE), + .by = dplyr::all_of( + c( + group_vars[!group_vars == "direction"], + "name_abcd", + "sector", + "activity_unit", + "loan_size_outstanding_currency" + ) + ) + ) + + single_direction <- aggregate_exposure_company %>% + dplyr::filter( + .data$n_directions == 1, + .data$direction %in% c("buildout", "phaseout"), + .data$sector %in% c("automotive", "hdv", "power") + ) + + opposite_direction <- single_direction %>% + dplyr::mutate( + direction = dplyr::if_else( + .data$direction == "buildout", + "phaseout", + "buildout" + ), + total_deviation = 0, + alignment_metric = 0 + ) + + aggregate_exposure_company <- aggregate_exposure_company %>% + dplyr::bind_rows(opposite_direction) %>% + dplyr::select(-"n_directions") + } + + out_company <- aggregate_exposure_company %>% + dplyr::relocate( + dplyr::all_of(c(.by)) + ) + sector_aggregate_exposure_loanbook_summary <- aggregate_exposure_company %>% dplyr::mutate( n_companies = dplyr::n_distinct(.data$name_abcd), @@ -120,47 +166,6 @@ aggregate_alignment_loanbook_exposure <- function(data, ) } - # if a company only has technologies going in one direction in a sector with - # high carbon and low carbon technologies, add an empty entry for the other - # direction to ensure the aggregation is correct - if (level == "bo_po") { - aggregate_exposure_company <- aggregate_exposure_company %>% - dplyr::mutate( - n_directions = dplyr::n_distinct(.data$direction, na.rm = TRUE), - .by = dplyr::all_of( - c( - group_vars[!group_vars == "direction"], - "name_abcd", - "sector", - "activity_unit", - "loan_size_outstanding_currency" - ) - ) - ) - - single_direction <- aggregate_exposure_company %>% - dplyr::filter( - .data$n_directions == 1, - .data$direction %in% c("buildout", "phaseout"), - .data$sector %in% c("automotive", "hdv", "power") - ) - - opposite_direction <- single_direction %>% - dplyr::mutate( - direction = dplyr::if_else( - .data$direction == "buildout", - "phaseout", - "buildout" - ), - total_deviation = 0, - alignment_metric = 0 - ) - - aggregate_exposure_company <- aggregate_exposure_company %>% - dplyr::bind_rows(opposite_direction) %>% - dplyr::select(-"n_directions") - } - sector_aggregate_exposure_loanbook_alignment <- aggregate_exposure_company %>% dplyr::summarise( exposure_weighted_net_alignment = stats::weighted.mean( @@ -171,7 +176,7 @@ aggregate_alignment_loanbook_exposure <- function(data, .by = dplyr::all_of(group_vars) ) - out <- sector_aggregate_exposure_loanbook_summary %>% + out_aggregate <- sector_aggregate_exposure_loanbook_summary %>% dplyr::inner_join( sector_aggregate_exposure_loanbook_alignment, by = group_vars @@ -195,6 +200,11 @@ aggregate_alignment_loanbook_exposure <- function(data, .data$year ) + out <- list( + company = out_company, + aggregate = out_aggregate + ) + return(out) } diff --git a/tests/testthat/test-aggregate_alignment_loanbook_exposure.R b/tests/testthat/test-aggregate_alignment_loanbook_exposure.R index d9d4309..fe4b210 100644 --- a/tests/testthat/test-aggregate_alignment_loanbook_exposure.R +++ b/tests/testthat/test-aggregate_alignment_loanbook_exposure.R @@ -52,14 +52,14 @@ test_output_aggregate_alignment_loanbook_exposure_bopo <- test_data_aggregate_al test_that("aggregated net alignment equals sum of aggregated buildout and phaseout alignments", { expect_equal( - test_output_aggregate_alignment_loanbook_exposure_net$exposure_weighted_net_alignment, - sum(test_output_aggregate_alignment_loanbook_exposure_bopo$exposure_weighted_net_alignment, na.rm = TRUE) + test_output_aggregate_alignment_loanbook_exposure_net$aggregate$exposure_weighted_net_alignment, + sum(test_output_aggregate_alignment_loanbook_exposure_bopo$aggregate$exposure_weighted_net_alignment, na.rm = TRUE) ) }) test_that("net aggregated loan size equals sum of matched loan size", { expect_equal( - sum(test_output_aggregate_alignment_loanbook_exposure_net$sum_loan_size_outstanding, na.rm = TRUE), + sum(test_output_aggregate_alignment_loanbook_exposure_net$aggregate$sum_loan_size_outstanding, na.rm = TRUE), sum(test_matched$loan_size_outstanding, na.rm = TRUE) ) }) @@ -68,38 +68,32 @@ test_that("number of identified companies equals unique list of companies in inp n_companies_input_net <- length(unique(test_data_aggregate_alignment_loanbook_exposure_net$name_abcd)) expect_equal( - test_output_aggregate_alignment_loanbook_exposure_net$n_companies, + test_output_aggregate_alignment_loanbook_exposure_net$aggregate$n_companies, n_companies_input_net ) }) -test_that("number of identified companies equals unique list of companies in input data", { - n_companies_input_buildout <- test_data_aggregate_alignment_loanbook_exposure_bopo %>% - dplyr::filter(.data$direction == "buildout") %>% +test_that("number of identified companies per direction equals unique list of companies in input data", { + n_companies_input_bopo <- test_data_aggregate_alignment_loanbook_exposure_bopo %>% dplyr::distinct(.data$name_abcd) %>% nrow() - n_output_buildout <- test_output_aggregate_alignment_loanbook_exposure_bopo %>% + n_output_buildout <- test_output_aggregate_alignment_loanbook_exposure_bopo$aggregate %>% dplyr::filter(.data$direction == "buildout") %>% dplyr::pull(.data$n_companies) expect_equal( n_output_buildout, - n_companies_input_buildout + n_companies_input_bopo ) - n_companies_input_phaseout <- test_data_aggregate_alignment_loanbook_exposure_bopo %>% - dplyr::filter(.data$direction == "phaseout") %>% - dplyr::distinct(.data$name_abcd) %>% - nrow() - - n_output_phaseout <- test_output_aggregate_alignment_loanbook_exposure_bopo %>% + n_output_phaseout <- test_output_aggregate_alignment_loanbook_exposure_bopo$aggregate %>% dplyr::filter(.data$direction == "phaseout") %>% dplyr::pull(.data$n_companies) expect_equal( n_output_phaseout, - n_companies_input_phaseout + n_companies_input_bopo ) }) @@ -107,8 +101,8 @@ test_that("net aggregate results have the same columns as buildout/phaseout aggr exposure_columns <- c("sum_loan_size_outstanding", "sum_exposure_companies_aligned", "share_exposure_aligned") expect_equal( - c(names(test_output_aggregate_alignment_loanbook_exposure_bopo), exposure_columns), - names(test_output_aggregate_alignment_loanbook_exposure_net) + c(names(test_output_aggregate_alignment_loanbook_exposure_bopo$aggregate), exposure_columns), + names(test_output_aggregate_alignment_loanbook_exposure_net$aggregate) ) }) @@ -148,7 +142,7 @@ test_that("net aggregate results with a group_var returns results for each group ) expect_equal( - nrow(test_output_with_group_var), + nrow(test_output_with_group_var$aggregate), n_groups ) }) @@ -164,7 +158,7 @@ test_that("net aggregate results with a group_var returns results for each group ) expect_equal( - nrow(test_output_with_group_var_2), + nrow(test_output_with_group_var_2$aggregate), n_groups_2 ) }) @@ -194,7 +188,7 @@ test_that("bopo aggregate results grouped by foo returns results for each availa ) expect_equal( - nrow(test_output_with_group_var), + nrow(test_output_with_group_var$aggregate), nrow(n_groups) * nrow(n_directions) ) }) @@ -215,8 +209,8 @@ test_that("aggregated net alignment by group_var foo equals sum of aggregated bu ) expect_equal( - sum(test_output_with_group_var_bopo$exposure_weighted_net_alignment, na.rm = TRUE), - sum(test_output_with_group_var_net$exposure_weighted_net_alignment, na.rm = TRUE) + sum(test_output_with_group_var_bopo$aggregate$exposure_weighted_net_alignment, na.rm = TRUE), + sum(test_output_with_group_var_net$aggregate$exposure_weighted_net_alignment, na.rm = TRUE) ) }) @@ -229,7 +223,7 @@ test_that("net aggregated loan size by foo equals sum of matched loan size by fo ) expect_equal( - sum(test_output_with_group_var_net$sum_loan_size_outstanding, na.rm = TRUE), + sum(test_output_with_group_var_net$aggregate$sum_loan_size_outstanding, na.rm = TRUE), sum(test_matched_group_var$loan_size_outstanding, na.rm = TRUE) ) }) From 3d69be1294ec61b7a90bef3fe467c150d3583dd2 Mon Sep 17 00:00:00 2001 From: jacobvjk Date: Fri, 12 Apr 2024 12:23:00 +0200 Subject: [PATCH 09/12] add test for .by variables in matched data --- R/utils.R | 4 ++-- .../test-aggregate_alignment_loanbook_exposure.R | 14 ++++++++++++++ 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/R/utils.R b/R/utils.R index b770e0e..37df56f 100644 --- a/R/utils.R +++ b/R/utils.R @@ -18,9 +18,9 @@ validate_data_has_expected_cols <- function(data, if (!data_has_expected_columns) { affected_cols <- glue::glue_collapse(sort(setdiff(expected_columns, names(data))), sep = ", ") rlang::abort(c( - "Must include expected columns in data set.", + "Must include expected columns in input data set.", x = glue::glue("Missing columns: {affected_cols}."), - i = "Please check that data have expected columns." + i = "Please check that data has expected columns." )) } invisible() diff --git a/tests/testthat/test-aggregate_alignment_loanbook_exposure.R b/tests/testthat/test-aggregate_alignment_loanbook_exposure.R index fe4b210..833df7f 100644 --- a/tests/testthat/test-aggregate_alignment_loanbook_exposure.R +++ b/tests/testthat/test-aggregate_alignment_loanbook_exposure.R @@ -131,6 +131,20 @@ test_matched_group_var <- tibble::tribble( ) # styler: on +test_that("net aggregate results with grouped with bad .by returns ", { + expect_error( + { + test_data_company_net %>% + aggregate_alignment_loanbook_exposure( + matched = test_matched_group_var, + level = test_level_net, + .by = "bad" + ) + }, + regexp = "Must include expected columns in input data set." + ) +}) + test_that("net aggregate results with a group_var returns results for each group", { n_groups <- length(unique(test_matched_group_var$foo)) From 5a9908d3aa80780760c9765afec8f40183f16c96 Mon Sep 17 00:00:00 2001 From: jacobvjk Date: Wed, 17 Apr 2024 10:58:16 +0200 Subject: [PATCH 10/12] documentation --- R/aggregate_alignment_loanbook_exposure.R | 13 ++-- man/aggregate_alignment_loanbook_exposure.Rd | 13 ++-- ...st-aggregate_alignment_loanbook_exposure.R | 60 +++++++++---------- 3 files changed, 48 insertions(+), 38 deletions(-) diff --git a/R/aggregate_alignment_loanbook_exposure.R b/R/aggregate_alignment_loanbook_exposure.R index 2ae7631..cf00a2e 100644 --- a/R/aggregate_alignment_loanbook_exposure.R +++ b/R/aggregate_alignment_loanbook_exposure.R @@ -1,15 +1,20 @@ #' Return loan book level aggregation of company alignment metrics by exposure #' -#' @param data data.frame. Holds output pf company indicators -#' @param matched data.frame. Holds matched and prioritised loan book +#' @param data data.frame. Holds output of company indicators. Contains columns +#' `"name_abcd"`, `"sector"`, `"activity_unit"`, `"region"`, +#' `"scenario_source"`, `"scenario"`, `"year"`, `"direction"`, +#' `"total_deviation"` and `"alignment_metric"`. +#' @param matched data.frame. Holds matched and prioritized loan book data. #' @param level Character. Vector that indicates if the aggregate alignment #' metric should be returned based on the net technology deviations (`net`) or #' disaggregated into buildout and phaseout technologies (`bo_po`). #' @param .by Character. Optionally, a selection of columns to -#' group by, instead. All columns indicated must be available variables in the +#' group by. All columns indicated must be available variables in the #' `matched` data set. The intended use case is to allow analyzing the loan #' books by additional traits of interest, such as types of financial -#' institutions. +#' institutions. Default is `NULL`, which means the aggregation is done at the +#' meta level. It is not possible to group by the critical columns of the +#' `data` and `matched` inputs. #' #' @return NULL #' @export diff --git a/man/aggregate_alignment_loanbook_exposure.Rd b/man/aggregate_alignment_loanbook_exposure.Rd index 8b2aa84..7c974fd 100644 --- a/man/aggregate_alignment_loanbook_exposure.Rd +++ b/man/aggregate_alignment_loanbook_exposure.Rd @@ -12,19 +12,24 @@ aggregate_alignment_loanbook_exposure( ) } \arguments{ -\item{data}{data.frame. Holds output pf company indicators} +\item{data}{data.frame. Holds output of company indicators. Contains columns +\code{"name_abcd"}, \code{"sector"}, \code{"activity_unit"}, \code{"region"}, +\code{"scenario_source"}, \code{"scenario"}, \code{"year"}, \code{"direction"}, +\code{"total_deviation"} and \code{"alignment_metric"}.} -\item{matched}{data.frame. Holds matched and prioritised loan book} +\item{matched}{data.frame. Holds matched and prioritized loan book data.} \item{level}{Character. Vector that indicates if the aggregate alignment metric should be returned based on the net technology deviations (\code{net}) or disaggregated into buildout and phaseout technologies (\code{bo_po}).} \item{.by}{\if{html}{\out{}} Character. Optionally, a selection of columns to -group by, instead. All columns indicated must be available variables in the +group by. All columns indicated must be available variables in the \code{matched} data set. The intended use case is to allow analyzing the loan books by additional traits of interest, such as types of financial -institutions.} +institutions. Default is \code{NULL}, which means the aggregation is done at the +meta level. It is not possible to group by the critical columns of the +\code{data} and \code{matched} inputs.} } \description{ Return loan book level aggregation of company alignment metrics by exposure diff --git a/tests/testthat/test-aggregate_alignment_loanbook_exposure.R b/tests/testthat/test-aggregate_alignment_loanbook_exposure.R index 833df7f..e8a1c4a 100644 --- a/tests/testthat/test-aggregate_alignment_loanbook_exposure.R +++ b/tests/testthat/test-aggregate_alignment_loanbook_exposure.R @@ -106,8 +106,8 @@ test_that("net aggregate results have the same columns as buildout/phaseout aggr ) }) -# When an additional variable is passed via group_var, add group_var to -# variables considered in aggregation (GH: 33) +# When an additional variable is passed via .by, add .by to variables considered +# in aggregation (GH: 33) # styler: off test_data_company_net <- tibble::tribble( @@ -118,7 +118,7 @@ test_data_company_net <- tibble::tribble( "test_company_4", "power", "MW", "global", "test_source", "test_scenario", 2027, "net", 50, 0.1 ) -test_matched_group_var <- tibble::tribble( +test_matched_dot_by <- tibble::tribble( ~group_id, ~id_loan, ~loan_size_outstanding, ~loan_size_outstanding_currency, ~name_abcd, ~sector, ~foo, ~bar, "test_lbk_1", "L1", 300000, "USD", "test_company_1", "power", "Yes", "Yes", "test_lbk_1", "L2", 700000, "USD", "test_company_2", "power", "Yes", "Yes", @@ -136,7 +136,7 @@ test_that("net aggregate results with grouped with bad .by returns ", { { test_data_company_net %>% aggregate_alignment_loanbook_exposure( - matched = test_matched_group_var, + matched = test_matched_dot_by, level = test_level_net, .by = "bad" ) @@ -145,34 +145,34 @@ test_that("net aggregate results with grouped with bad .by returns ", { ) }) -test_that("net aggregate results with a group_var returns results for each group", { - n_groups <- length(unique(test_matched_group_var$foo)) +test_that("net aggregate results with .by specified returns results for each group", { + n_groups <- length(unique(test_matched_dot_by$foo)) - test_output_with_group_var <- test_data_company_net %>% + test_output_with_dot_by <- test_data_company_net %>% aggregate_alignment_loanbook_exposure( - matched = test_matched_group_var, + matched = test_matched_dot_by, level = test_level_net, .by = "foo" ) expect_equal( - nrow(test_output_with_group_var$aggregate), + nrow(test_output_with_dot_by$aggregate), n_groups ) }) -test_that("net aggregate results with a group_var returns results for each group for multiple variables", { - n_groups_2 <- nrow(dplyr::distinct(test_matched_group_var, .data$foo, .data$bar)) +test_that("net aggregate results with multiple variables specified in .by returns results for each combination of groups", { + n_groups_2 <- nrow(dplyr::distinct(test_matched_dot_by, .data$foo, .data$bar)) - test_output_with_group_var_2 <- test_data_company_net %>% + test_output_with_dot_by_2 <- test_data_company_net %>% aggregate_alignment_loanbook_exposure( - matched = test_matched_group_var, + matched = test_matched_dot_by, level = test_level_net, .by = c("foo", "bar") ) expect_equal( - nrow(test_output_with_group_var_2$aggregate), + nrow(test_output_with_dot_by_2$aggregate), n_groups_2 ) }) @@ -191,54 +191,54 @@ test_data_company_bopo <- tibble::tribble( # styler: on test_that("bopo aggregate results grouped by foo returns results for each available combination of buildout/phaseout and group foo", { - n_groups <- dplyr::distinct(test_matched_group_var, .data$foo) + n_groups <- dplyr::distinct(test_matched_dot_by, .data$foo) n_directions <- dplyr::distinct(test_data_company_bopo, .data$direction) - test_output_with_group_var <- test_data_company_bopo %>% + test_output_with_dot_by <- test_data_company_bopo %>% aggregate_alignment_loanbook_exposure( - matched = test_matched_group_var, + matched = test_matched_dot_by, level = test_level_bopo, .by = "foo" ) expect_equal( - nrow(test_output_with_group_var$aggregate), + nrow(test_output_with_dot_by$aggregate), nrow(n_groups) * nrow(n_directions) ) }) -test_that("aggregated net alignment by group_var foo equals sum of aggregated buildout and phaseout alignments by group_var foo", { - test_output_with_group_var_bopo <- test_data_company_bopo %>% +test_that("aggregated net alignment grouped by foo equals sum of aggregated buildout and phaseout alignments grouped by foo", { + test_output_with_dot_by_bopo <- test_data_company_bopo %>% aggregate_alignment_loanbook_exposure( - matched = test_matched_group_var, + matched = test_matched_dot_by, level = test_level_bopo, .by = "foo" ) - test_output_with_group_var_net <- test_data_company_net %>% + test_output_with_dot_by_net <- test_data_company_net %>% aggregate_alignment_loanbook_exposure( - matched = test_matched_group_var, + matched = test_matched_dot_by, level = test_level_net, .by = "foo" ) expect_equal( - sum(test_output_with_group_var_bopo$aggregate$exposure_weighted_net_alignment, na.rm = TRUE), - sum(test_output_with_group_var_net$aggregate$exposure_weighted_net_alignment, na.rm = TRUE) + sum(test_output_with_dot_by_bopo$aggregate$exposure_weighted_net_alignment, na.rm = TRUE), + sum(test_output_with_dot_by_net$aggregate$exposure_weighted_net_alignment, na.rm = TRUE) ) }) -test_that("net aggregated loan size by foo equals sum of matched loan size by foo", { - test_output_with_group_var_net <- test_data_company_net %>% +test_that("net aggregated loan size grouped by foo equals sum of matched loan size grouped by foo", { + test_output_with_dot_by_net <- test_data_company_net %>% aggregate_alignment_loanbook_exposure( - matched = test_matched_group_var, + matched = test_matched_dot_by, level = test_level_net, .by = "foo" ) expect_equal( - sum(test_output_with_group_var_net$aggregate$sum_loan_size_outstanding, na.rm = TRUE), - sum(test_matched_group_var$loan_size_outstanding, na.rm = TRUE) + sum(test_output_with_dot_by_net$aggregate$sum_loan_size_outstanding, na.rm = TRUE), + sum(test_matched_dot_by$loan_size_outstanding, na.rm = TRUE) ) }) From 39e292e6d30807e48cadc94b57c39fd0c0dea727 Mon Sep 17 00:00:00 2001 From: jacobvjk Date: Wed, 17 Apr 2024 11:24:16 +0200 Subject: [PATCH 11/12] correct documentation --- R/aggregate_alignment_loanbook_exposure.R | 14 +++++++------- man/aggregate_alignment_loanbook_exposure.Rd | 14 +++++++------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/R/aggregate_alignment_loanbook_exposure.R b/R/aggregate_alignment_loanbook_exposure.R index cf00a2e..9944092 100644 --- a/R/aggregate_alignment_loanbook_exposure.R +++ b/R/aggregate_alignment_loanbook_exposure.R @@ -8,13 +8,13 @@ #' @param level Character. Vector that indicates if the aggregate alignment #' metric should be returned based on the net technology deviations (`net`) or #' disaggregated into buildout and phaseout technologies (`bo_po`). -#' @param .by Character. Optionally, a selection of columns to -#' group by. All columns indicated must be available variables in the -#' `matched` data set. The intended use case is to allow analyzing the loan -#' books by additional traits of interest, such as types of financial -#' institutions. Default is `NULL`, which means the aggregation is done at the -#' meta level. It is not possible to group by the critical columns of the -#' `data` and `matched` inputs. +#' @param .by Character. Optionally, a selection of columns to group by. All +#' columns indicated must be available variables in the `matched` data set. +#' The intended use case is to allow analyzing the loan books by additional +#' traits of interest, such as types of financial institutions. Default is +#' `NULL`, which means the aggregation is done at the meta level. It is not +#' possible to group by the critical columns of the `data` and `matched` +#' inputs. #' #' @return NULL #' @export diff --git a/man/aggregate_alignment_loanbook_exposure.Rd b/man/aggregate_alignment_loanbook_exposure.Rd index 7c974fd..e101e19 100644 --- a/man/aggregate_alignment_loanbook_exposure.Rd +++ b/man/aggregate_alignment_loanbook_exposure.Rd @@ -23,13 +23,13 @@ aggregate_alignment_loanbook_exposure( metric should be returned based on the net technology deviations (\code{net}) or disaggregated into buildout and phaseout technologies (\code{bo_po}).} -\item{.by}{\if{html}{\out{}} Character. Optionally, a selection of columns to -group by. All columns indicated must be available variables in the -\code{matched} data set. The intended use case is to allow analyzing the loan -books by additional traits of interest, such as types of financial -institutions. Default is \code{NULL}, which means the aggregation is done at the -meta level. It is not possible to group by the critical columns of the -\code{data} and \code{matched} inputs.} +\item{.by}{Character. Optionally, a selection of columns to group by. All +columns indicated must be available variables in the \code{matched} data set. +The intended use case is to allow analyzing the loan books by additional +traits of interest, such as types of financial institutions. Default is +\code{NULL}, which means the aggregation is done at the meta level. It is not +possible to group by the critical columns of the \code{data} and \code{matched} +inputs.} } \description{ Return loan book level aggregation of company alignment metrics by exposure From fc709059c25a963bca5c56e37568f20ba268255d Mon Sep 17 00:00:00 2001 From: jacobvjk Date: Wed, 17 Apr 2024 11:55:01 +0200 Subject: [PATCH 12/12] do not allow grouping by crucial variables --- R/aggregate_alignment_loanbook_exposure.R | 24 +++++++++++++++++++ ...st-aggregate_alignment_loanbook_exposure.R | 17 ++++++++++++- 2 files changed, 40 insertions(+), 1 deletion(-) diff --git a/R/aggregate_alignment_loanbook_exposure.R b/R/aggregate_alignment_loanbook_exposure.R index 9944092..8158092 100644 --- a/R/aggregate_alignment_loanbook_exposure.R +++ b/R/aggregate_alignment_loanbook_exposure.R @@ -240,5 +240,29 @@ validate_input_data_aggregate_alignment_loanbook_exposure <- function(data, ) ) + if (!is.null(.by)) { + if ( + any( + .by %in% c( + group_vars, + "name_abcd", + "activity_unit", + "scenario_source", + "alignment_metric", + "id_loan", + "loan_size_outstanding", + "loan_size_outstanding_currency", + "name_abcd", + "sector" + ) + ) + ) { + stop( + "It is not possible to group by the critical columns of the `data` and + `matched` inputs. Please check your .by argument!" + ) + } + } + invisible() } diff --git a/tests/testthat/test-aggregate_alignment_loanbook_exposure.R b/tests/testthat/test-aggregate_alignment_loanbook_exposure.R index e8a1c4a..c4b2d3b 100644 --- a/tests/testthat/test-aggregate_alignment_loanbook_exposure.R +++ b/tests/testthat/test-aggregate_alignment_loanbook_exposure.R @@ -131,7 +131,7 @@ test_matched_dot_by <- tibble::tribble( ) # styler: on -test_that("net aggregate results with grouped with bad .by returns ", { +test_that("net aggregate results with bad .by returns error", { expect_error( { test_data_company_net %>% @@ -145,6 +145,21 @@ test_that("net aggregate results with grouped with bad .by returns ", { ) }) +test_that("net aggregate results with .by arg as crucial variable returns error", { + expect_error( + { + test_data_company_net %>% + aggregate_alignment_loanbook_exposure( + matched = test_matched_dot_by, + level = test_level_net, + .by = "loan_size_outstanding" + ) + }, + regexp = "It is not possible to group by the critical columns of the `data` and + `matched` inputs." + ) +}) + test_that("net aggregate results with .by specified returns results for each group", { n_groups <- length(unique(test_matched_dot_by$foo))