From 5be00e79c5b4b04289a8b113bb182c27cf118b57 Mon Sep 17 00:00:00 2001
From: jacobvjk <jacob.kastl@gmail.com>
Date: Tue, 9 Apr 2024 14:44:27 +0200
Subject: [PATCH 01/12] add basic aggregate/filter/group functionality

---
 R/aggregate_alignment_loanbook_exposure.R     |  41 +++++--
 man/aggregate_alignment_loanbook_exposure.Rd  |  14 ++-
 ...st-aggregate_alignment_loanbook_exposure.R | 107 ++++++++++++++++++
 3 files changed, 154 insertions(+), 8 deletions(-)

diff --git a/R/aggregate_alignment_loanbook_exposure.R b/R/aggregate_alignment_loanbook_exposure.R
index 9f0625e..ae87a12 100644
--- a/R/aggregate_alignment_loanbook_exposure.R
+++ b/R/aggregate_alignment_loanbook_exposure.R
@@ -5,13 +5,21 @@
 #' @param level Character. Vector that indicates if the aggregate alignment
 #'   metric should be returned based on the net technology deviations (`net`) or
 #'   disaggregated into buildout and phaseout technologies (`bo_po`).
+#' @param group_var Character. Optional vector of length >= 1 that indicates the
+#'   name of one or more variables the results should be aggregated by, instead
+#'   of using group_id as the main dimension of aggregation. All names indicated
+#'   must be available variables in the `matched` data set. The intended use
+#'   case is to allow analyzing the loan books by additional traits of interest,
+#'   such as types of financial institutions.
 #'
 #' @return NULL
 #' @export
 aggregate_alignment_loanbook_exposure <- function(data,
                                                   matched,
-                                                  level = c("net", "bo_po")) {
+                                                  level = c("net", "bo_po"),
+                                                  group_var = NULL) {
   group_vars <- c("group_id", "scenario", "region", "sector", "year", "direction")
+  group_matched <- "group_id"
   level <- rlang::arg_match(level)
 
   # validate input data sets
@@ -21,30 +29,49 @@ aggregate_alignment_loanbook_exposure <- function(data,
     group_vars = group_vars
   )
 
+  # optionally extend by `group_var`. this currently only allows aggregation of supersets for group_var
+  if (!is.null(group_var)) {
+    if (!inherits(group_var, "character")) {
+      stop(glue::glue("`group_var` must be a character vector. Your input is {class(group_var)}."))
+    }
+    group_vars <- c(group_vars[!group_vars == "group_id"], group_var)
+    group_matched <- group_var
+
+    group_id_by_group_var <- matched %>%
+      dplyr::distinct(.data$group_id, !!!rlang::syms(group_var))
+
+    data <- data %>%
+      dplyr::inner_join(
+        group_id_by_group_var,
+        by = c("group_id")
+      )
+  }
+
   matched <- matched %>%
     dplyr::select(
       dplyr::all_of(
-        c("group_id", "id_loan", "loan_size_outstanding", "loan_size_outstanding_currency", "name_abcd", "sector")
+        c(group_matched, "id_loan", "loan_size_outstanding", "loan_size_outstanding_currency", "name_abcd", "sector")
       )
     ) %>%
     dplyr::summarise(
       loan_size_outstanding = sum(.data$loan_size_outstanding, na.rm = TRUE),
-      .by = c("group_id", "loan_size_outstanding_currency", "name_abcd", "sector")
+      .by = dplyr::all_of(c(group_matched, "loan_size_outstanding_currency", "name_abcd", "sector"))
     ) %>%
     dplyr::mutate(
       exposure_weight = .data$loan_size_outstanding / sum(.data$loan_size_outstanding, na.rm = TRUE),
-      .by = c("group_id", "loan_size_outstanding_currency")
+      .by = dplyr::all_of(c(group_matched, "loan_size_outstanding_currency"))
     )
 
   aggregate_exposure_company <- data %>%
     dplyr::inner_join(
       matched,
-      by = c("group_id", "name_abcd", "sector")
+      by = c(group_matched, "name_abcd", "sector")
     )
 
   sector_aggregate_exposure_loanbook_summary <- aggregate_exposure_company %>%
     dplyr::mutate(
-      n_companies = dplyr::n(),
+      # TODO: must be n_distinct
+      n_companies = dplyr::n_distinct(.data$name_abcd),
       .by = dplyr::all_of(group_vars)
     ) %>%
     dplyr::mutate(
@@ -140,7 +167,7 @@ aggregate_alignment_loanbook_exposure <- function(data,
         "share_companies_aligned", "exposure_weighted_net_alignment"
       )
     ) %>%
-    dplyr::arrange(.data$group_id, .data$scenario, .data$region, .data$sector, .data$year)
+    dplyr::arrange(!!!rlang::syms(group_matched), .data$scenario, .data$region, .data$sector, .data$year)
 
   return(out)
 }
diff --git a/man/aggregate_alignment_loanbook_exposure.Rd b/man/aggregate_alignment_loanbook_exposure.Rd
index 425d54e..9e6ff73 100644
--- a/man/aggregate_alignment_loanbook_exposure.Rd
+++ b/man/aggregate_alignment_loanbook_exposure.Rd
@@ -4,7 +4,12 @@
 \alias{aggregate_alignment_loanbook_exposure}
 \title{Return loan book level aggregation of company alignment metrics by exposure}
 \usage{
-aggregate_alignment_loanbook_exposure(data, matched, level = c("net", "bo_po"))
+aggregate_alignment_loanbook_exposure(
+  data,
+  matched,
+  level = c("net", "bo_po"),
+  group_var = NULL
+)
 }
 \arguments{
 \item{data}{data.frame. Holds output pf company indicators}
@@ -14,6 +19,13 @@ aggregate_alignment_loanbook_exposure(data, matched, level = c("net", "bo_po"))
 \item{level}{Character. Vector that indicates if the aggregate alignment
 metric should be returned based on the net technology deviations (\code{net}) or
 disaggregated into buildout and phaseout technologies (\code{bo_po}).}
+
+\item{group_var}{Character. Optional vector of length >= 1 that indicates the
+name of one or more variables the results should be aggregated by, instead
+of using group_id as the main dimension of aggregation. All names indicated
+must be available variables in the \code{matched} data set. The intended use
+case is to allow analyzing the loan books by additional traits of interest,
+such as types of financial institutions.}
 }
 \description{
 Return loan book level aggregation of company alignment metrics by exposure
diff --git a/tests/testthat/test-aggregate_alignment_loanbook_exposure.R b/tests/testthat/test-aggregate_alignment_loanbook_exposure.R
index 81f1657..45146e1 100644
--- a/tests/testthat/test-aggregate_alignment_loanbook_exposure.R
+++ b/tests/testthat/test-aggregate_alignment_loanbook_exposure.R
@@ -111,4 +111,111 @@ test_that("net aggregate results have the same columns as buildout/phaseout aggr
     names(test_output_aggregate_alignment_loanbook_exposure_net)
   )
 })
+
+# When an additional variable is passed via group_var, add group_var to
+# variables considered in aggregation (GH: 33)
+
+# styler: off
+test_data_company_net <- tibble::tribble(
+  ~group_id, ~name_abcd,       ~sector, ~activity_unit,  ~region, ~scenario_source,       ~scenario, ~year, ~direction, ~total_deviation, ~alignment_metric,
+  "test_lbk_1", "test_company_1", "power",           "MW", "global",    "test_source", "test_scenario",  2027,      "net",             -110,              -0.3,
+  "test_lbk_1", "test_company_2", "power",           "MW", "global",    "test_source", "test_scenario",  2027,      "net",              -70,              -0.4,
+  "test_lbk_2", "test_company_2", "power",           "MW", "global",    "test_source", "test_scenario",  2027,      "net",              -70,              -0.4,
+  "test_lbk_2", "test_company_3", "power",           "MW", "global",    "test_source", "test_scenario",  2027,      "net",              -40,              -0.2,
+  "test_lbk_3", "test_company_3", "power",           "MW", "global",    "test_source", "test_scenario",  2027,      "net",              -40,              -0.2,
+  "test_lbk_3", "test_company_4", "power",           "MW", "global",    "test_source", "test_scenario",  2027,      "net",               50,               0.1,
+  "test_lbk_4", "test_company_4", "power",           "MW", "global",    "test_source", "test_scenario",  2027,      "net",               50,               0.1,
+  "test_lbk_4", "test_company_1", "power",           "MW", "global",    "test_source", "test_scenario",  2027,      "net",             -110,              -0.3
+)
+
+test_matched_group_var <- tibble::tribble(
+     ~group_id, ~id_loan, ~loan_size_outstanding, ~loan_size_outstanding_currency,       ~name_abcd, ~sector,  ~foo, ~bar,
+  "test_lbk_1",     "L1",                 300000,                           "USD", "test_company_1", "power", "Yes", "Yes",
+  "test_lbk_1",     "L2",                 700000,                           "USD", "test_company_2", "power", "Yes", "Yes",
+  "test_lbk_2",     "L2",                 700000,                           "USD", "test_company_2", "power",  "No", "Yes",
+  "test_lbk_2",     "L3",                1000000,                           "USD", "test_company_3", "power",  "No", "Yes",
+  "test_lbk_3",     "L3",                1000000,                           "USD", "test_company_3", "power",  "No", "Yes",
+  "test_lbk_3",     "L4",                 500000,                           "USD", "test_company_4", "power",  "No", "Yes",
+  "test_lbk_4",     "L1",                 300000,                           "USD", "test_company_1", "power", "Yes",  "No",
+  "test_lbk_4",     "L4",                 500000,                           "USD", "test_company_4", "power", "Yes",  "No"
+)
+# styler: on
+
+test_that("net aggregate results with a group_var returns results for each group", {
+  n_groups <- length(unique(test_matched_group_var$foo))
+
+  test_output_with_group_var <- test_data_company_net %>%
+    aggregate_alignment_loanbook_exposure(
+      matched = test_matched_group_var,
+      level = test_level_net,
+      group_var = "foo"
+    )
+
+  expect_equal(
+    nrow(test_output_with_group_var),
+    n_groups
+  )
+})
+
+test_that("net aggregate results with a group_var returns results for each group for multiple variables", {
+  n_groups_2 <- nrow(dplyr::distinct(test_matched_group_var, .data$foo, .data$bar))
+
+  test_output_with_group_var_2 <- test_data_company_net %>%
+    aggregate_alignment_loanbook_exposure(
+      matched = test_matched_group_var,
+      level = test_level_net,
+      group_var = c("foo", "bar")
+    )
+
+  expect_equal(
+    nrow(test_output_with_group_var_2),
+    n_groups_2
+  )
+})
+
+# styler: off
+test_data_company_bopo <- tibble::tribble(
+     ~group_id, ~name_abcd,       ~sector, ~activity_unit,  ~region, ~scenario_source,       ~scenario, ~year, ~direction, ~total_deviation, ~alignment_metric,
+  "test_lbk_1", "test_company_1", "power",           "MW", "global",    "test_source", "test_scenario",  2027, "buildout",              -10,             -0.05,
+  "test_lbk_1", "test_company_1", "power",           "MW", "global",    "test_source", "test_scenario",  2027, "phaseout",             -100,             -0.25,
+  "test_lbk_1", "test_company_2", "power",           "MW", "global",    "test_source", "test_scenario",  2027, "buildout",              -50,             -0.35,
+  "test_lbk_1", "test_company_2", "power",           "MW", "global",    "test_source", "test_scenario",  2027, "phaseout",              -20,             -0.05,
+  "test_lbk_2", "test_company_2", "power",           "MW", "global",    "test_source", "test_scenario",  2027, "buildout",              -50,             -0.35,
+  "test_lbk_2", "test_company_2", "power",           "MW", "global",    "test_source", "test_scenario",  2027, "phaseout",              -20,             -0.05,
+  "test_lbk_2", "test_company_3", "power",           "MW", "global",    "test_source", "test_scenario",  2027, "phaseout",              -40,              -0.2,
+  "test_lbk_3", "test_company_3", "power",           "MW", "global",    "test_source", "test_scenario",  2027, "phaseout",              -40,              -0.2,
+  "test_lbk_3", "test_company_4", "power",           "MW", "global",    "test_source", "test_scenario",  2027, "buildout",               60,              0.15,
+  "test_lbk_3", "test_company_4", "power",           "MW", "global",    "test_source", "test_scenario",  2027, "phaseout",              -10,             -0.05,
+  "test_lbk_4", "test_company_4", "power",           "MW", "global",    "test_source", "test_scenario",  2027, "buildout",               60,              0.15,
+  "test_lbk_4", "test_company_4", "power",           "MW", "global",    "test_source", "test_scenario",  2027, "phaseout",              -10,             -0.05,
+  "test_lbk_4", "test_company_1", "power",           "MW", "global",    "test_source", "test_scenario",  2027, "buildout",              -10,             -0.05,
+  "test_lbk_4", "test_company_1", "power",           "MW", "global",    "test_source", "test_scenario",  2027, "phaseout",             -100,             -0.25
+)
+# styler: on
+
+test_that("bopo aggregate results with a group_var returns results for each available combination of buildout/phaseout and group", {
+  group_var_by_group_id <- dplyr::distinct(test_matched_group_var, .data$group_id, .data$foo)
+  direction_by_group_id <- dplyr::distinct(test_data_company_bopo, .data$group_id, .data$direction)
+
+  n_groups <- group_var_by_group_id %>%
+    dplyr::inner_join(
+      direction_by_group_id,
+      by = "group_id"
+    ) %>%
+    dplyr::distinct(.data$direction, .data$foo) %>%
+    nrow()
+
+  test_output_with_group_var <- test_data_company_bopo %>%
+    aggregate_alignment_loanbook_exposure(
+      matched = test_matched_group_var,
+      level = test_level_bopo,
+      group_var = "foo"
+    )
+
+  expect_equal(
+    nrow(test_output_with_group_var),
+    n_groups
+  )
+})
+
 # nolint end

From 1c7f4ea63fad1b76dbd9139c7dfdc9672af18a49 Mon Sep 17 00:00:00 2001
From: jacobvjk <jacob.kastl@gmail.com>
Date: Tue, 9 Apr 2024 14:47:26 +0200
Subject: [PATCH 02/12] address tidyselect issue

---
 R/aggregate_alignment_loanbook_exposure.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/aggregate_alignment_loanbook_exposure.R b/R/aggregate_alignment_loanbook_exposure.R
index ae87a12..0521330 100644
--- a/R/aggregate_alignment_loanbook_exposure.R
+++ b/R/aggregate_alignment_loanbook_exposure.R
@@ -163,7 +163,7 @@ aggregate_alignment_loanbook_exposure <- function(data,
     ) %>%
     dplyr::relocate(
       c(
-        group_vars, "n_companies", "n_companies_aligned",
+        dplyr::all_of(group_vars), "n_companies", "n_companies_aligned",
         "share_companies_aligned", "exposure_weighted_net_alignment"
       )
     ) %>%

From c5d5d125878c495e0267515f8d7c28811c7c8d46 Mon Sep 17 00:00:00 2001
From: jacobvjk <jacob.kastl@gmail.com>
Date: Tue, 9 Apr 2024 15:05:27 +0200
Subject: [PATCH 03/12] bump dev version

---
 DESCRIPTION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index bda22eb..3a810ac 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: pacta.multi.loanbook.analysis
 Title: Tools to Calculate Climate Metrics for Multiple Loanbooks
-Version: 0.0.0.9002
+Version: 0.0.0.9003
 Authors@R: 
     c(person(given = "Jacob",
              family = "Kastl",

From a013f259dab2798a36a9e5b14bd145798a204eea Mon Sep 17 00:00:00 2001
From: jacobvjk <jacob.kastl@gmail.com>
Date: Tue, 9 Apr 2024 15:17:46 +0200
Subject: [PATCH 04/12] prettify

---
 R/aggregate_alignment_loanbook_exposure.R | 25 ++++++++++++++++-------
 1 file changed, 18 insertions(+), 7 deletions(-)

diff --git a/R/aggregate_alignment_loanbook_exposure.R b/R/aggregate_alignment_loanbook_exposure.R
index 0521330..3215c0c 100644
--- a/R/aggregate_alignment_loanbook_exposure.R
+++ b/R/aggregate_alignment_loanbook_exposure.R
@@ -70,7 +70,6 @@ aggregate_alignment_loanbook_exposure <- function(data,
 
   sector_aggregate_exposure_loanbook_summary <- aggregate_exposure_company %>%
     dplyr::mutate(
-      # TODO: must be n_distinct
       n_companies = dplyr::n_distinct(.data$name_abcd),
       .by = dplyr::all_of(group_vars)
     ) %>%
@@ -146,13 +145,17 @@ aggregate_alignment_loanbook_exposure <- function(data,
       )
 
     aggregate_exposure_company <- aggregate_exposure_company %>%
-      bind_rows(opposite_direction) %>%
+      dplyr::bind_rows(opposite_direction) %>%
       dplyr::select(-"n_directions")
   }
 
   sector_aggregate_exposure_loanbook_alignment <- aggregate_exposure_company %>%
     dplyr::summarise(
-      exposure_weighted_net_alignment = stats::weighted.mean(.data$alignment_metric, w = .data$exposure_weight, na.rm = TRUE),
+      exposure_weighted_net_alignment = stats::weighted.mean(
+        x = .data$alignment_metric,
+        w = .data$exposure_weight,
+        na.rm = TRUE
+      ),
       .by = dplyr::all_of(group_vars)
     )
 
@@ -162,12 +165,20 @@ aggregate_alignment_loanbook_exposure <- function(data,
       by = group_vars
     ) %>%
     dplyr::relocate(
-      c(
-        dplyr::all_of(group_vars), "n_companies", "n_companies_aligned",
-        "share_companies_aligned", "exposure_weighted_net_alignment"
+      dplyr::all_of(
+        c(
+          group_vars, "n_companies", "n_companies_aligned",
+          "share_companies_aligned", "exposure_weighted_net_alignment"
+        )
       )
     ) %>%
-    dplyr::arrange(!!!rlang::syms(group_matched), .data$scenario, .data$region, .data$sector, .data$year)
+    dplyr::arrange(
+      !!!rlang::syms(group_matched),
+      .data$scenario,
+      .data$region,
+      .data$sector,
+      .data$year
+    )
 
   return(out)
 }

From f932e886ceebffda3ba75747eb7ade820467a4ce Mon Sep 17 00:00:00 2001
From: jacobvjk <jacob.kastl@gmail.com>
Date: Wed, 10 Apr 2024 14:56:02 +0200
Subject: [PATCH 05/12] add tests and update handling

---
 R/aggregate_alignment_loanbook_exposure.R     |  5 +-
 ...st-aggregate_alignment_loanbook_exposure.R | 85 +++++++++++++++++++
 2 files changed, 89 insertions(+), 1 deletion(-)

diff --git a/R/aggregate_alignment_loanbook_exposure.R b/R/aggregate_alignment_loanbook_exposure.R
index 3215c0c..3799db6 100644
--- a/R/aggregate_alignment_loanbook_exposure.R
+++ b/R/aggregate_alignment_loanbook_exposure.R
@@ -44,6 +44,9 @@ aggregate_alignment_loanbook_exposure <- function(data,
       dplyr::inner_join(
         group_id_by_group_var,
         by = c("group_id")
+      ) %>%
+      dplyr::distinct(
+        dplyr::across(-"group_id")
       )
   }
 
@@ -117,7 +120,7 @@ aggregate_alignment_loanbook_exposure <- function(data,
   if (level == "bo_po") {
     aggregate_exposure_company <- aggregate_exposure_company %>%
       dplyr::mutate(
-        n_directions = dplyr::n(),
+        n_directions = dplyr::n_distinct(.data$direction, na.rm = TRUE),
         .by = dplyr::all_of(
           c(
             group_vars[!group_vars == "direction"], "name_abcd", "sector",
diff --git a/tests/testthat/test-aggregate_alignment_loanbook_exposure.R b/tests/testthat/test-aggregate_alignment_loanbook_exposure.R
index 45146e1..77f9bb1 100644
--- a/tests/testthat/test-aggregate_alignment_loanbook_exposure.R
+++ b/tests/testthat/test-aggregate_alignment_loanbook_exposure.R
@@ -218,4 +218,89 @@ test_that("bopo aggregate results with a group_var returns results for each avai
   )
 })
 
+test_that("aggregated net alignment by group_var foo equals sum of aggregated buildout and phaseout alignments by group_var foo", {
+  test_output_with_group_var_bopo <- test_data_company_bopo %>%
+    aggregate_alignment_loanbook_exposure(
+      matched = test_matched_group_var,
+      level = test_level_bopo,
+      group_var = "foo"
+    )
+
+  test_output_with_group_var_net <- test_data_company_net %>%
+    aggregate_alignment_loanbook_exposure(
+      matched = test_matched_group_var,
+      level = test_level_net,
+      group_var = "foo"
+    )
+
+  expect_equal(
+    sum(test_output_with_group_var_bopo$exposure_weighted_net_alignment, na.rm = TRUE),
+    sum(test_output_with_group_var_net$exposure_weighted_net_alignment, na.rm = TRUE)
+  )
+})
+
+test_that("net aggregated loan size by group_var foo equals sum of matched loan size by group_var foo", {
+  test_output_with_group_var_net <- test_data_company_net %>%
+    aggregate_alignment_loanbook_exposure(
+      matched = test_matched_group_var,
+      level = test_level_net,
+      group_var = "foo"
+    )
+
+  expect_equal(
+    sum(test_output_with_group_var_net$sum_loan_size_outstanding, na.rm = TRUE),
+    sum(test_matched_group_var$loan_size_outstanding, na.rm = TRUE)
+  )
+})
+
+test_that("aggregated net alignment by group_var that mirrors group_id is equivalent to aggregated net alignment", {
+  test_output_net <- test_data_company_net %>%
+    aggregate_alignment_loanbook_exposure(
+      matched = test_matched_group_var,
+      level = test_level_net
+    )
+
+  test_matched_group_var_same_as_group_id <- test_matched_group_var %>%
+    dplyr::mutate(baz = .data$group_id)
+
+  test_output_with_group_var_net <- test_data_company_net %>%
+    aggregate_alignment_loanbook_exposure(
+      matched = test_matched_group_var_same_as_group_id,
+      level = test_level_net,
+      group_var = "baz"
+    ) %>%
+    dplyr::rename(group_id = "baz") %>%
+    dplyr::select(names(test_output_net))
+
+  expect_equal(
+    test_output_with_group_var_net,
+    test_output_net
+  )
+})
+
+test_that("aggregated bopo alignment by group_var that mirrors group_id is equivalent to aggregated bopo alignment", {
+  test_output_bopo <- test_data_company_bopo %>%
+    aggregate_alignment_loanbook_exposure(
+      matched = test_matched_group_var,
+      level = test_level_bopo
+    )
+
+  test_matched_group_var_same_as_group_id <- test_matched_group_var %>%
+    dplyr::mutate(baz = .data$group_id)
+
+  test_output_with_group_var_bopo <- test_data_company_bopo %>%
+    aggregate_alignment_loanbook_exposure(
+      matched = test_matched_group_var_same_as_group_id,
+      level = test_level_bopo,
+      group_var = "baz"
+    ) %>%
+    dplyr::rename(group_id = "baz") %>%
+    dplyr::select(names(test_output_bopo))
+
+  expect_equal(
+    test_output_with_group_var_bopo,
+    test_output_bopo
+  )
+})
+
 # nolint end

From 03e2a4e271c8e94724cc154a501d8ea6fa5891dc Mon Sep 17 00:00:00 2001
From: jacobvjk <jacob.kastl@gmail.com>
Date: Thu, 11 Apr 2024 15:29:24 +0200
Subject: [PATCH 06/12] rm group_id expectation and change group argument name
 to .by

---
 R/aggregate_alignment_loanbook_exposure.R     | 103 +++++++++------
 R/calculate_company_alignment_metric.R        |  80 +++++++++---
 man/aggregate_alignment_loanbook_exposure.Rd  |  13 +-
 ...st-aggregate_alignment_loanbook_exposure.R | 117 ++++--------------
 .../test-calculate_company_alignment_metric.R |  23 ++--
 5 files changed, 168 insertions(+), 168 deletions(-)

diff --git a/R/aggregate_alignment_loanbook_exposure.R b/R/aggregate_alignment_loanbook_exposure.R
index 3799db6..161bc09 100644
--- a/R/aggregate_alignment_loanbook_exposure.R
+++ b/R/aggregate_alignment_loanbook_exposure.R
@@ -5,70 +5,76 @@
 #' @param level Character. Vector that indicates if the aggregate alignment
 #'   metric should be returned based on the net technology deviations (`net`) or
 #'   disaggregated into buildout and phaseout technologies (`bo_po`).
-#' @param group_var Character. Optional vector of length >= 1 that indicates the
-#'   name of one or more variables the results should be aggregated by, instead
-#'   of using group_id as the main dimension of aggregation. All names indicated
-#'   must be available variables in the `matched` data set. The intended use
-#'   case is to allow analyzing the loan books by additional traits of interest,
-#'   such as types of financial institutions.
+#' @param .by <tidy-select> Character. Optionally, a selection of columns to
+#'   group by, instead. All columns indicated must be available variables in the
+#'   `matched` data set. The intended use case is to allow analyzing the loan
+#'   books by additional traits of interest, such as types of financial
+#'   institutions.
 #'
 #' @return NULL
 #' @export
 aggregate_alignment_loanbook_exposure <- function(data,
                                                   matched,
                                                   level = c("net", "bo_po"),
-                                                  group_var = NULL) {
-  group_vars <- c("group_id", "scenario", "region", "sector", "year", "direction")
-  group_matched <- "group_id"
+                                                  .by = NULL) {
+  group_vars <- c(
+    "scenario",
+    "region",
+    "sector",
+    "year",
+    "direction"
+  )
   level <- rlang::arg_match(level)
 
   # validate input data sets
   validate_input_data_aggregate_alignment_loanbook_exposure(
     data = data,
     matched = matched,
-    group_vars = group_vars
+    group_vars = group_vars,
+    .by = .by
   )
 
-  # optionally extend by `group_var`. this currently only allows aggregation of supersets for group_var
-  if (!is.null(group_var)) {
-    if (!inherits(group_var, "character")) {
-      stop(glue::glue("`group_var` must be a character vector. Your input is {class(group_var)}."))
+  if (!is.null(.by)) {
+    if (!inherits(.by, "character")) {
+      stop(glue::glue("`.by` must be a character vector. Your input is {class(.by)}."))
     }
-    group_vars <- c(group_vars[!group_vars == "group_id"], group_var)
-    group_matched <- group_var
-
-    group_id_by_group_var <- matched %>%
-      dplyr::distinct(.data$group_id, !!!rlang::syms(group_var))
-
-    data <- data %>%
-      dplyr::inner_join(
-        group_id_by_group_var,
-        by = c("group_id")
-      ) %>%
-      dplyr::distinct(
-        dplyr::across(-"group_id")
-      )
+    group_vars <- c(.by, group_vars)
   }
 
   matched <- matched %>%
     dplyr::select(
       dplyr::all_of(
-        c(group_matched, "id_loan", "loan_size_outstanding", "loan_size_outstanding_currency", "name_abcd", "sector")
+        c(
+          .by,
+          "id_loan",
+          "loan_size_outstanding",
+          "loan_size_outstanding_currency",
+          "name_abcd",
+          "sector"
+        )
       )
     ) %>%
     dplyr::summarise(
       loan_size_outstanding = sum(.data$loan_size_outstanding, na.rm = TRUE),
-      .by = dplyr::all_of(c(group_matched, "loan_size_outstanding_currency", "name_abcd", "sector"))
+      .by = dplyr::all_of(
+        c(
+          .env$.by,
+          "loan_size_outstanding_currency",
+          "name_abcd",
+          "sector"
+        )
+      )
     ) %>%
     dplyr::mutate(
       exposure_weight = .data$loan_size_outstanding / sum(.data$loan_size_outstanding, na.rm = TRUE),
-      .by = dplyr::all_of(c(group_matched, "loan_size_outstanding_currency"))
+      .by = dplyr::all_of(c(.env$.by, "loan_size_outstanding_currency"))
     )
 
   aggregate_exposure_company <- data %>%
     dplyr::inner_join(
       matched,
-      by = c(group_matched, "name_abcd", "sector")
+      by = c("name_abcd", "sector"),
+      relationship = "many-to-many"
     )
 
   sector_aggregate_exposure_loanbook_summary <- aggregate_exposure_company %>%
@@ -123,8 +129,11 @@ aggregate_alignment_loanbook_exposure <- function(data,
         n_directions = dplyr::n_distinct(.data$direction, na.rm = TRUE),
         .by = dplyr::all_of(
           c(
-            group_vars[!group_vars == "direction"], "name_abcd", "sector",
-            "activity_unit", "loan_size_outstanding_currency"
+            group_vars[!group_vars == "direction"],
+            "name_abcd",
+            "sector",
+            "activity_unit",
+            "loan_size_outstanding_currency"
           )
         )
       )
@@ -170,13 +179,16 @@ aggregate_alignment_loanbook_exposure <- function(data,
     dplyr::relocate(
       dplyr::all_of(
         c(
-          group_vars, "n_companies", "n_companies_aligned",
-          "share_companies_aligned", "exposure_weighted_net_alignment"
+          group_vars,
+          "n_companies",
+          "n_companies_aligned",
+          "share_companies_aligned",
+          "exposure_weighted_net_alignment"
         )
       )
     ) %>%
     dplyr::arrange(
-      !!!rlang::syms(group_matched),
+      !!!rlang::syms(.by),
       .data$scenario,
       .data$region,
       .data$sector,
@@ -188,19 +200,28 @@ aggregate_alignment_loanbook_exposure <- function(data,
 
 validate_input_data_aggregate_alignment_loanbook_exposure <- function(data,
                                                                       matched,
-                                                                      group_vars) {
+                                                                      group_vars,
+                                                                      .by = NULL) {
   validate_data_has_expected_cols(
     data = data,
     expected_columns = c(
-      group_vars, "name_abcd", "activity_unit", "scenario_source", "alignment_metric"
+      group_vars,
+      "name_abcd",
+      "activity_unit",
+      "scenario_source",
+      "alignment_metric"
     )
   )
 
   validate_data_has_expected_cols(
     data = matched,
     expected_columns = c(
-      "group_id", "id_loan", "loan_size_outstanding",
-      "loan_size_outstanding_currency", "name_abcd", "sector"
+      "id_loan",
+      "loan_size_outstanding",
+      "loan_size_outstanding_currency",
+      "name_abcd",
+      "sector",
+      .by
     )
   )
 
diff --git a/R/calculate_company_alignment_metric.R b/R/calculate_company_alignment_metric.R
index f76b0ec..77936fc 100644
--- a/R/calculate_company_alignment_metric.R
+++ b/R/calculate_company_alignment_metric.R
@@ -129,8 +129,12 @@ remove_tech_no_plans_no_target <- function(data,
                                            target_scenario) {
   data_to_remove <- data %>%
     dplyr::group_by(
-      .data$group_id, .data$name_abcd, .data$region, .data$scenario_source,
-      .data$sector, .data$technology, .data$year
+      .data$name_abcd,
+      .data$region,
+      .data$scenario_source,
+      .data$sector,
+      .data$technology,
+      .data$year
     ) %>%
     dplyr::rename(target = !!rlang::sym(target_scenario)) %>%
     dplyr::summarise(
@@ -147,7 +151,14 @@ remove_tech_no_plans_no_target <- function(data,
   data <- data %>%
     dplyr::anti_join(
       data_to_remove,
-      by = c("group_id", "name_abcd", "region", "scenario_source", "sector", "technology", "year")
+      by = c(
+        "name_abcd",
+        "region",
+        "scenario_source",
+        "sector",
+        "technology",
+        "year"
+      )
     )
 
   return(data)
@@ -157,8 +168,11 @@ remove_sector_no_target <- function(data,
                                     target_scenario) {
   data_to_remove_no_target_in_sector <- data %>%
     dplyr::group_by(
-      .data$group_id, .data$name_abcd, .data$region, .data$scenario_source,
-      .data$sector, .data$year
+      .data$name_abcd,
+      .data$region,
+      .data$scenario_source,
+      .data$sector,
+      .data$year
     ) %>%
     dplyr::rename(target = !!rlang::sym(target_scenario)) %>%
     dplyr::summarise(
@@ -171,7 +185,13 @@ remove_sector_no_target <- function(data,
   data <- data %>%
     dplyr::anti_join(
       data_to_remove_no_target_in_sector,
-      by = c("group_id", "name_abcd", "region", "scenario_source", "sector", "year")
+      by = c(
+        "name_abcd",
+        "region",
+        "scenario_source",
+        "sector",
+        "year"
+      )
     )
 
   return(data)
@@ -264,7 +284,6 @@ calculate_company_aggregate_alignment_tms <- function(data,
   # arrange output
   data <- data %>%
     dplyr::arrange(
-      .data$group_id,
       .data$sector,
       .data$name_abcd,
       .data$region,
@@ -281,8 +300,12 @@ add_net_absolute_scenario_value <- function(data,
     dplyr::mutate(
       net_absolute_scenario_value = sum(!!rlang::sym(target_scenario), na.rm = TRUE),
       .by = c(
-        "group_id", "name_abcd", "scenario_source", "region", "sector",
-        "activity_unit", "year"
+        "name_abcd",
+        "scenario_source",
+        "region",
+        "sector",
+        "activity_unit",
+        "year"
       )
     )
 
@@ -294,8 +317,14 @@ add_total_deviation <- function(data) {
     dplyr::summarise(
       total_deviation = sum(.data$total_tech_deviation, na.rm = TRUE),
       .by = c(
-        "group_id", "name_abcd", "scenario_source", "region", "sector",
-        "activity_unit", "year", "net_absolute_scenario_value", "direction"
+        "name_abcd",
+        "scenario_source",
+        "region",
+        "sector",
+        "activity_unit",
+        "year",
+        "net_absolute_scenario_value",
+        "direction"
       )
     )
 
@@ -311,8 +340,15 @@ calculate_company_alignment_metric <- function(data,
     ) %>%
     dplyr::select(
       c(
-        "group_id", "name_abcd", "sector", "activity_unit", "region",
-        "scenario_source", "scenario", "year", "direction", "total_deviation",
+        "name_abcd",
+        "sector",
+        "activity_unit",
+        "region",
+        "scenario_source",
+        "scenario",
+        "year",
+        "direction",
+        "total_deviation",
         "alignment_metric"
       )
     )
@@ -324,12 +360,12 @@ fill_missing_direction <- function(data) {
   # there is currently no way to use data masking inside tidyr::nesting()
   # see https://github.com/tidyverse/tidyr/issues/971#issuecomment-985671947
   # the following line is a workaround to avoid the R CMD NOTE
-  group_id <- name_abcd <- sector <- activity_unit <- region <- scenario_source <- scenario <- year <- NULL
+  name_abcd <- sector <- activity_unit <- region <- scenario_source <- scenario <- year <- NULL
 
   data <- data %>%
     tidyr::complete(
       tidyr::nesting(
-        group_id, name_abcd, sector, activity_unit, region, scenario_source, scenario, year # nolint: object_usage_linter.
+        name_abcd, sector, activity_unit, region, scenario_source, scenario, year # nolint: object_usage_linter.
       ),
       .data$direction,
       fill = list(
@@ -397,7 +433,6 @@ calculate_company_aggregate_alignment_sda <- function(data,
     add_total_deviation_sda() %>%
     calculate_company_alignment_metric(scenario = scenario) %>%
     dplyr::arrange(
-      .data$group_id,
       .data$sector,
       .data$name_abcd,
       .data$region,
@@ -438,7 +473,12 @@ add_total_deviation_sda <- function(data) {
     dplyr::mutate(
       total_deviation = (.data$projected - .data$net_absolute_scenario_value) * -1,
       .by = c(
-        "group_id", "name_abcd", "scenario_source", "region", "sector", "activity_unit", "year"
+        "name_abcd",
+        "scenario_source",
+        "region",
+        "sector",
+        "activity_unit",
+        "year"
       )
     )
 
@@ -551,7 +591,7 @@ validate_input_data_calculate_company_tech_deviation <- function(data,
     expected_columns = c(
       "sector", "technology", "year", "region", "scenario_source", "name_abcd",
       "metric", "production", "technology_share", "scope",
-      "percentage_of_initial_production_by_scope", "group_id"
+      "percentage_of_initial_production_by_scope"
     )
   )
 
@@ -614,7 +654,7 @@ validate_input_data_calculate_company_aggregate_alignment_tms <- function(data,
     data = data,
     expected_columns = c(
       "sector", "technology", "year", "region", "scenario_source", "name_abcd",
-      "group_id", "projected", paste0("target_", scenario), "direction",
+      "projected", paste0("target_", scenario), "direction",
       "total_tech_deviation", "activity_unit"
     )
   )
@@ -693,7 +733,7 @@ validate_input_data_calculate_company_aggregate_alignment_sda <- function(data)
     data = data,
     expected_columns = c(
       "sector", "year", "region", "scenario_source", "name_abcd",
-      "emission_factor_metric", "emission_factor_value", "group_id"
+      "emission_factor_metric", "emission_factor_value"
     )
   )
 
diff --git a/man/aggregate_alignment_loanbook_exposure.Rd b/man/aggregate_alignment_loanbook_exposure.Rd
index 9e6ff73..8b2aa84 100644
--- a/man/aggregate_alignment_loanbook_exposure.Rd
+++ b/man/aggregate_alignment_loanbook_exposure.Rd
@@ -8,7 +8,7 @@ aggregate_alignment_loanbook_exposure(
   data,
   matched,
   level = c("net", "bo_po"),
-  group_var = NULL
+  .by = NULL
 )
 }
 \arguments{
@@ -20,12 +20,11 @@ aggregate_alignment_loanbook_exposure(
 metric should be returned based on the net technology deviations (\code{net}) or
 disaggregated into buildout and phaseout technologies (\code{bo_po}).}
 
-\item{group_var}{Character. Optional vector of length >= 1 that indicates the
-name of one or more variables the results should be aggregated by, instead
-of using group_id as the main dimension of aggregation. All names indicated
-must be available variables in the \code{matched} data set. The intended use
-case is to allow analyzing the loan books by additional traits of interest,
-such as types of financial institutions.}
+\item{.by}{\if{html}{\out{<tidy-select>}} Character. Optionally, a selection of columns to
+group by, instead. All columns indicated must be available variables in the
+\code{matched} data set. The intended use case is to allow analyzing the loan
+books by additional traits of interest, such as types of financial
+institutions.}
 }
 \description{
 Return loan book level aggregation of company alignment metrics by exposure
diff --git a/tests/testthat/test-aggregate_alignment_loanbook_exposure.R b/tests/testthat/test-aggregate_alignment_loanbook_exposure.R
index 77f9bb1..7cd2f75 100644
--- a/tests/testthat/test-aggregate_alignment_loanbook_exposure.R
+++ b/tests/testthat/test-aggregate_alignment_loanbook_exposure.R
@@ -117,15 +117,11 @@ test_that("net aggregate results have the same columns as buildout/phaseout aggr
 
 # styler: off
 test_data_company_net <- tibble::tribble(
-  ~group_id, ~name_abcd,       ~sector, ~activity_unit,  ~region, ~scenario_source,       ~scenario, ~year, ~direction, ~total_deviation, ~alignment_metric,
-  "test_lbk_1", "test_company_1", "power",           "MW", "global",    "test_source", "test_scenario",  2027,      "net",             -110,              -0.3,
-  "test_lbk_1", "test_company_2", "power",           "MW", "global",    "test_source", "test_scenario",  2027,      "net",              -70,              -0.4,
-  "test_lbk_2", "test_company_2", "power",           "MW", "global",    "test_source", "test_scenario",  2027,      "net",              -70,              -0.4,
-  "test_lbk_2", "test_company_3", "power",           "MW", "global",    "test_source", "test_scenario",  2027,      "net",              -40,              -0.2,
-  "test_lbk_3", "test_company_3", "power",           "MW", "global",    "test_source", "test_scenario",  2027,      "net",              -40,              -0.2,
-  "test_lbk_3", "test_company_4", "power",           "MW", "global",    "test_source", "test_scenario",  2027,      "net",               50,               0.1,
-  "test_lbk_4", "test_company_4", "power",           "MW", "global",    "test_source", "test_scenario",  2027,      "net",               50,               0.1,
-  "test_lbk_4", "test_company_1", "power",           "MW", "global",    "test_source", "test_scenario",  2027,      "net",             -110,              -0.3
+        ~name_abcd, ~sector, ~activity_unit,  ~region, ~scenario_source,       ~scenario, ~year, ~direction, ~total_deviation, ~alignment_metric,
+  "test_company_1", "power",           "MW", "global",    "test_source", "test_scenario",  2027,      "net",             -110,              -0.3,
+  "test_company_2", "power",           "MW", "global",    "test_source", "test_scenario",  2027,      "net",              -70,              -0.4,
+  "test_company_3", "power",           "MW", "global",    "test_source", "test_scenario",  2027,      "net",              -40,              -0.2,
+  "test_company_4", "power",           "MW", "global",    "test_source", "test_scenario",  2027,      "net",               50,               0.1
 )
 
 test_matched_group_var <- tibble::tribble(
@@ -148,7 +144,7 @@ test_that("net aggregate results with a group_var returns results for each group
     aggregate_alignment_loanbook_exposure(
       matched = test_matched_group_var,
       level = test_level_net,
-      group_var = "foo"
+      .by = "foo"
     )
 
   expect_equal(
@@ -164,7 +160,7 @@ test_that("net aggregate results with a group_var returns results for each group
     aggregate_alignment_loanbook_exposure(
       matched = test_matched_group_var,
       level = test_level_net,
-      group_var = c("foo", "bar")
+      .by = c("foo", "bar")
     )
 
   expect_equal(
@@ -175,46 +171,31 @@ test_that("net aggregate results with a group_var returns results for each group
 
 # styler: off
 test_data_company_bopo <- tibble::tribble(
-     ~group_id, ~name_abcd,       ~sector, ~activity_unit,  ~region, ~scenario_source,       ~scenario, ~year, ~direction, ~total_deviation, ~alignment_metric,
-  "test_lbk_1", "test_company_1", "power",           "MW", "global",    "test_source", "test_scenario",  2027, "buildout",              -10,             -0.05,
-  "test_lbk_1", "test_company_1", "power",           "MW", "global",    "test_source", "test_scenario",  2027, "phaseout",             -100,             -0.25,
-  "test_lbk_1", "test_company_2", "power",           "MW", "global",    "test_source", "test_scenario",  2027, "buildout",              -50,             -0.35,
-  "test_lbk_1", "test_company_2", "power",           "MW", "global",    "test_source", "test_scenario",  2027, "phaseout",              -20,             -0.05,
-  "test_lbk_2", "test_company_2", "power",           "MW", "global",    "test_source", "test_scenario",  2027, "buildout",              -50,             -0.35,
-  "test_lbk_2", "test_company_2", "power",           "MW", "global",    "test_source", "test_scenario",  2027, "phaseout",              -20,             -0.05,
-  "test_lbk_2", "test_company_3", "power",           "MW", "global",    "test_source", "test_scenario",  2027, "phaseout",              -40,              -0.2,
-  "test_lbk_3", "test_company_3", "power",           "MW", "global",    "test_source", "test_scenario",  2027, "phaseout",              -40,              -0.2,
-  "test_lbk_3", "test_company_4", "power",           "MW", "global",    "test_source", "test_scenario",  2027, "buildout",               60,              0.15,
-  "test_lbk_3", "test_company_4", "power",           "MW", "global",    "test_source", "test_scenario",  2027, "phaseout",              -10,             -0.05,
-  "test_lbk_4", "test_company_4", "power",           "MW", "global",    "test_source", "test_scenario",  2027, "buildout",               60,              0.15,
-  "test_lbk_4", "test_company_4", "power",           "MW", "global",    "test_source", "test_scenario",  2027, "phaseout",              -10,             -0.05,
-  "test_lbk_4", "test_company_1", "power",           "MW", "global",    "test_source", "test_scenario",  2027, "buildout",              -10,             -0.05,
-  "test_lbk_4", "test_company_1", "power",           "MW", "global",    "test_source", "test_scenario",  2027, "phaseout",             -100,             -0.25
+  ~name_abcd,       ~sector, ~activity_unit,  ~region, ~scenario_source,       ~scenario, ~year, ~direction, ~total_deviation, ~alignment_metric,
+  "test_company_1", "power",           "MW", "global",    "test_source", "test_scenario",  2027, "buildout",              -10,             -0.05,
+  "test_company_1", "power",           "MW", "global",    "test_source", "test_scenario",  2027, "phaseout",             -100,             -0.25,
+  "test_company_2", "power",           "MW", "global",    "test_source", "test_scenario",  2027, "buildout",              -50,             -0.35,
+  "test_company_2", "power",           "MW", "global",    "test_source", "test_scenario",  2027, "phaseout",              -20,             -0.05,
+  "test_company_3", "power",           "MW", "global",    "test_source", "test_scenario",  2027, "phaseout",              -40,              -0.2,
+  "test_company_4", "power",           "MW", "global",    "test_source", "test_scenario",  2027, "buildout",               60,              0.15,
+  "test_company_4", "power",           "MW", "global",    "test_source", "test_scenario",  2027, "phaseout",              -10,             -0.05
 )
 # styler: on
 
-test_that("bopo aggregate results with a group_var returns results for each available combination of buildout/phaseout and group", {
-  group_var_by_group_id <- dplyr::distinct(test_matched_group_var, .data$group_id, .data$foo)
-  direction_by_group_id <- dplyr::distinct(test_data_company_bopo, .data$group_id, .data$direction)
-
-  n_groups <- group_var_by_group_id %>%
-    dplyr::inner_join(
-      direction_by_group_id,
-      by = "group_id"
-    ) %>%
-    dplyr::distinct(.data$direction, .data$foo) %>%
-    nrow()
+test_that("bopo aggregate results grouped by foo returns results for each available combination of buildout/phaseout and group foo", {
+  n_groups <- dplyr::distinct(test_matched_group_var, .data$foo)
+  n_directions <- dplyr::distinct(test_data_company_bopo, .data$direction)
 
   test_output_with_group_var <- test_data_company_bopo %>%
     aggregate_alignment_loanbook_exposure(
       matched = test_matched_group_var,
       level = test_level_bopo,
-      group_var = "foo"
+      .by = "foo"
     )
 
   expect_equal(
     nrow(test_output_with_group_var),
-    n_groups
+    nrow(n_groups) * nrow(n_directions)
   )
 })
 
@@ -223,14 +204,14 @@ test_that("aggregated net alignment by group_var foo equals sum of aggregated bu
     aggregate_alignment_loanbook_exposure(
       matched = test_matched_group_var,
       level = test_level_bopo,
-      group_var = "foo"
+      .by = "foo"
     )
 
   test_output_with_group_var_net <- test_data_company_net %>%
     aggregate_alignment_loanbook_exposure(
       matched = test_matched_group_var,
       level = test_level_net,
-      group_var = "foo"
+      .by = "foo"
     )
 
   expect_equal(
@@ -239,12 +220,12 @@ test_that("aggregated net alignment by group_var foo equals sum of aggregated bu
   )
 })
 
-test_that("net aggregated loan size by group_var foo equals sum of matched loan size by group_var foo", {
+test_that("net aggregated loan size by foo equals sum of matched loan size by foo", {
   test_output_with_group_var_net <- test_data_company_net %>%
     aggregate_alignment_loanbook_exposure(
       matched = test_matched_group_var,
       level = test_level_net,
-      group_var = "foo"
+      .by = "foo"
     )
 
   expect_equal(
@@ -253,54 +234,4 @@ test_that("net aggregated loan size by group_var foo equals sum of matched loan
   )
 })
 
-test_that("aggregated net alignment by group_var that mirrors group_id is equivalent to aggregated net alignment", {
-  test_output_net <- test_data_company_net %>%
-    aggregate_alignment_loanbook_exposure(
-      matched = test_matched_group_var,
-      level = test_level_net
-    )
-
-  test_matched_group_var_same_as_group_id <- test_matched_group_var %>%
-    dplyr::mutate(baz = .data$group_id)
-
-  test_output_with_group_var_net <- test_data_company_net %>%
-    aggregate_alignment_loanbook_exposure(
-      matched = test_matched_group_var_same_as_group_id,
-      level = test_level_net,
-      group_var = "baz"
-    ) %>%
-    dplyr::rename(group_id = "baz") %>%
-    dplyr::select(names(test_output_net))
-
-  expect_equal(
-    test_output_with_group_var_net,
-    test_output_net
-  )
-})
-
-test_that("aggregated bopo alignment by group_var that mirrors group_id is equivalent to aggregated bopo alignment", {
-  test_output_bopo <- test_data_company_bopo %>%
-    aggregate_alignment_loanbook_exposure(
-      matched = test_matched_group_var,
-      level = test_level_bopo
-    )
-
-  test_matched_group_var_same_as_group_id <- test_matched_group_var %>%
-    dplyr::mutate(baz = .data$group_id)
-
-  test_output_with_group_var_bopo <- test_data_company_bopo %>%
-    aggregate_alignment_loanbook_exposure(
-      matched = test_matched_group_var_same_as_group_id,
-      level = test_level_bopo,
-      group_var = "baz"
-    ) %>%
-    dplyr::rename(group_id = "baz") %>%
-    dplyr::select(names(test_output_bopo))
-
-  expect_equal(
-    test_output_with_group_var_bopo,
-    test_output_bopo
-  )
-})
-
 # nolint end
diff --git a/tests/testthat/test-calculate_company_alignment_metric.R b/tests/testthat/test-calculate_company_alignment_metric.R
index 97f94fc..a57fee3 100644
--- a/tests/testthat/test-calculate_company_alignment_metric.R
+++ b/tests/testthat/test-calculate_company_alignment_metric.R
@@ -2,12 +2,19 @@
 
 # nolint start: indentation_linter.
 # styler: off
+# test_data_calculate_company_tech_deviation <- tibble::tribble(
+#        ~sector, ~technology, ~year,  ~region,  ~scenario_source,     ~name_abcd,           ~metric, ~production, ~technology_share,       ~scope, ~percentage_of_initial_production_by_scope,    ~group_id,
+#   "automotive",  "electric",  2027, "global", "scenario_source", "test_company",       "projected",          25,              0.25,     "sector",                                      0.005, "test_group",
+#   "automotive",  "electric",  2027, "global", "scenario_source", "test_company", "target_scenario",          20,              0.25,     "sector",                                       0.01, "test_group",
+#   "automotive",       "ice",  2027, "global", "scenario_source", "test_company",       "projected",          75,              0.75, "technology",                                      0.005, "test_group",
+#   "automotive",       "ice",  2027, "global", "scenario_source", "test_company", "target_scenario",          60,              0.75, "technology",                                       0.01, "test_group"
+# )
 test_data_calculate_company_tech_deviation <- tibble::tribble(
-       ~sector, ~technology, ~year,  ~region,  ~scenario_source,     ~name_abcd,           ~metric, ~production, ~technology_share,       ~scope, ~percentage_of_initial_production_by_scope,    ~group_id,
-  "automotive",  "electric",  2027, "global", "scenario_source", "test_company",       "projected",          25,              0.25,     "sector",                                      0.005, "test_group",
-  "automotive",  "electric",  2027, "global", "scenario_source", "test_company", "target_scenario",          20,              0.25,     "sector",                                       0.01, "test_group",
-  "automotive",       "ice",  2027, "global", "scenario_source", "test_company",       "projected",          75,              0.75, "technology",                                      0.005, "test_group",
-  "automotive",       "ice",  2027, "global", "scenario_source", "test_company", "target_scenario",          60,              0.75, "technology",                                       0.01, "test_group"
+  ~sector, ~technology, ~year,  ~region,  ~scenario_source,     ~name_abcd,           ~metric, ~production, ~technology_share,       ~scope, ~percentage_of_initial_production_by_scope,    ~group_id, ~foo,
+  "automotive",  "electric",  2027, "global", "scenario_source", "test_company",       "projected",          25,              0.25,     "sector",                                      0.005, "test_group", "Yes",
+  "automotive",  "electric",  2027, "global", "scenario_source", "test_company", "target_scenario",          20,              0.25,     "sector",                                       0.01, "test_group", "Yes",
+  "automotive",       "ice",  2027, "global", "scenario_source", "test_company",       "projected",          75,              0.75, "technology",                                      0.005, "test_group", "No",
+  "automotive",       "ice",  2027, "global", "scenario_source", "test_company", "target_scenario",          60,              0.75, "technology",                                       0.01, "test_group", "No"
 )
 
 test_technology_direction <- tibble::tribble(
@@ -477,12 +484,14 @@ test_output_calculate_company_aggregate_alignment_sda <- calculate_company_aggre
 )
 
 added_columns <- c("activity_unit", "scenario", "direction", "total_deviation", "alignment_metric")
-dropped_columns <- c("emission_factor_metric", "emission_factor_value")
+dropped_columns <- c("emission_factor_metric", "emission_factor_value", "group_id")
+# dropped_columns <- c("emission_factor_metric", "emission_factor_value")
 expected_output_columns <- c(names(test_data_calculate_company_aggregate_alignment_sda), added_columns)
 expected_output_columns <- expected_output_columns[!expected_output_columns %in% dropped_columns]
 
 expected_output_rows <- test_data_calculate_company_aggregate_alignment_sda %>%
-  dplyr::distinct(.data$sector, .data$year, .data$region, .data$scenario_source, .data$name_abcd, .data$group_id) %>%
+  dplyr::distinct(.data$sector, .data$year, .data$region, .data$scenario_source, .data$name_abcd) %>%
+  # dplyr::distinct(.data$sector, .data$year, .data$region, .data$scenario_source, .data$name_abcd, .data$group_id) %>%
   nrow()
 
 test_that("calculate_company_aggregate_alignment_sda returns expected structure of outputs", {

From 4357ac79d72dbb323b53ac5a7439e035bf8d6a59 Mon Sep 17 00:00:00 2001
From: jacobvjk <jacob.kastl@gmail.com>
Date: Thu, 11 Apr 2024 15:55:06 +0200
Subject: [PATCH 07/12] rm group_id from tests where not required

---
 ...st-aggregate_alignment_loanbook_exposure.R |  36 ++--
 .../test-calculate_company_alignment_metric.R | 176 +++++++++---------
 2 files changed, 111 insertions(+), 101 deletions(-)

diff --git a/tests/testthat/test-aggregate_alignment_loanbook_exposure.R b/tests/testthat/test-aggregate_alignment_loanbook_exposure.R
index 7cd2f75..d9d4309 100644
--- a/tests/testthat/test-aggregate_alignment_loanbook_exposure.R
+++ b/tests/testthat/test-aggregate_alignment_loanbook_exposure.R
@@ -3,35 +3,35 @@
 # nolint start: indentation_linter.
 # styler: off
 test_data_aggregate_alignment_loanbook_exposure_net <- tibble::tribble(
-     ~group_id, ~name_abcd,       ~sector, ~activity_unit,  ~region, ~scenario_source,       ~scenario, ~year, ~direction, ~total_deviation, ~alignment_metric,
-  "test_group", "test_company_1", "power",           "MW", "global",    "test_source", "test_scenario",  2027,      "net",             -110,              -0.3,
-  "test_group", "test_company_2", "power",           "MW", "global",    "test_source", "test_scenario",  2027,      "net",              -70,              -0.4,
-  "test_group", "test_company_3", "power",           "MW", "global",    "test_source", "test_scenario",  2027,      "net",              -40,              -0.2,
-  "test_group", "test_company_4", "power",           "MW", "global",    "test_source", "test_scenario",  2027,      "net",               50,               0.1
+        ~name_abcd, ~sector, ~activity_unit,  ~region, ~scenario_source,       ~scenario, ~year, ~direction, ~total_deviation, ~alignment_metric,
+  "test_company_1", "power",           "MW", "global",    "test_source", "test_scenario",  2027,      "net",             -110,              -0.3,
+  "test_company_2", "power",           "MW", "global",    "test_source", "test_scenario",  2027,      "net",              -70,              -0.4,
+  "test_company_3", "power",           "MW", "global",    "test_source", "test_scenario",  2027,      "net",              -40,              -0.2,
+  "test_company_4", "power",           "MW", "global",    "test_source", "test_scenario",  2027,      "net",               50,               0.1
 )
 # styler: on
 
 # styler: off
 test_data_aggregate_alignment_loanbook_exposure_bopo <- tibble::tribble(
-     ~group_id, ~name_abcd,       ~sector, ~activity_unit,  ~region, ~scenario_source,       ~scenario, ~year, ~direction, ~total_deviation, ~alignment_metric,
-  "test_group", "test_company_1", "power",           "MW", "global",    "test_source", "test_scenario",  2027, "buildout",              -10,             -0.05,
-  "test_group", "test_company_1", "power",           "MW", "global",    "test_source", "test_scenario",  2027, "phaseout",             -100,             -0.25,
-  "test_group", "test_company_2", "power",           "MW", "global",    "test_source", "test_scenario",  2027, "buildout",              -50,             -0.35,
-  "test_group", "test_company_2", "power",           "MW", "global",    "test_source", "test_scenario",  2027, "phaseout",              -20,             -0.05,
-  "test_group", "test_company_3", "power",           "MW", "global",    "test_source", "test_scenario",  2027, "phaseout",              -40,              -0.2,
-  "test_group", "test_company_4", "power",           "MW", "global",    "test_source", "test_scenario",  2027, "buildout",               60,              0.15,
-  "test_group", "test_company_4", "power",           "MW", "global",    "test_source", "test_scenario",  2027, "phaseout",              -10,             -0.05
+        ~name_abcd, ~sector, ~activity_unit,  ~region, ~scenario_source,       ~scenario, ~year, ~direction, ~total_deviation, ~alignment_metric,
+  "test_company_1", "power",           "MW", "global",    "test_source", "test_scenario",  2027, "buildout",              -10,             -0.05,
+  "test_company_1", "power",           "MW", "global",    "test_source", "test_scenario",  2027, "phaseout",             -100,             -0.25,
+  "test_company_2", "power",           "MW", "global",    "test_source", "test_scenario",  2027, "buildout",              -50,             -0.35,
+  "test_company_2", "power",           "MW", "global",    "test_source", "test_scenario",  2027, "phaseout",              -20,             -0.05,
+  "test_company_3", "power",           "MW", "global",    "test_source", "test_scenario",  2027, "phaseout",              -40,              -0.2,
+  "test_company_4", "power",           "MW", "global",    "test_source", "test_scenario",  2027, "buildout",               60,              0.15,
+  "test_company_4", "power",           "MW", "global",    "test_source", "test_scenario",  2027, "phaseout",              -10,             -0.05
 
 )
 # styler: on
 
 # styler: off
 test_matched <- tibble::tribble(
-     ~group_id, ~id_loan, ~loan_size_outstanding, ~loan_size_outstanding_currency,       ~name_abcd, ~sector,
-  "test_group",     "L1",                 300000,                           "USD", "test_company_1", "power",
-  "test_group",     "L2",                 700000,                           "USD", "test_company_2", "power",
-  "test_group",     "L3",                1000000,                           "USD", "test_company_3", "power",
-  "test_group",     "L4",                 500000,                           "USD", "test_company_4", "power"
+  ~id_loan, ~loan_size_outstanding, ~loan_size_outstanding_currency,       ~name_abcd, ~sector,
+      "L1",                 300000,                           "USD", "test_company_1", "power",
+      "L2",                 700000,                           "USD", "test_company_2", "power",
+      "L3",                1000000,                           "USD", "test_company_3", "power",
+      "L4",                 500000,                           "USD", "test_company_4", "power"
 )
 # styler: on
 
diff --git a/tests/testthat/test-calculate_company_alignment_metric.R b/tests/testthat/test-calculate_company_alignment_metric.R
index a57fee3..954ae2e 100644
--- a/tests/testthat/test-calculate_company_alignment_metric.R
+++ b/tests/testthat/test-calculate_company_alignment_metric.R
@@ -2,19 +2,12 @@
 
 # nolint start: indentation_linter.
 # styler: off
-# test_data_calculate_company_tech_deviation <- tibble::tribble(
-#        ~sector, ~technology, ~year,  ~region,  ~scenario_source,     ~name_abcd,           ~metric, ~production, ~technology_share,       ~scope, ~percentage_of_initial_production_by_scope,    ~group_id,
-#   "automotive",  "electric",  2027, "global", "scenario_source", "test_company",       "projected",          25,              0.25,     "sector",                                      0.005, "test_group",
-#   "automotive",  "electric",  2027, "global", "scenario_source", "test_company", "target_scenario",          20,              0.25,     "sector",                                       0.01, "test_group",
-#   "automotive",       "ice",  2027, "global", "scenario_source", "test_company",       "projected",          75,              0.75, "technology",                                      0.005, "test_group",
-#   "automotive",       "ice",  2027, "global", "scenario_source", "test_company", "target_scenario",          60,              0.75, "technology",                                       0.01, "test_group"
-# )
 test_data_calculate_company_tech_deviation <- tibble::tribble(
-  ~sector, ~technology, ~year,  ~region,  ~scenario_source,     ~name_abcd,           ~metric, ~production, ~technology_share,       ~scope, ~percentage_of_initial_production_by_scope,    ~group_id, ~foo,
-  "automotive",  "electric",  2027, "global", "scenario_source", "test_company",       "projected",          25,              0.25,     "sector",                                      0.005, "test_group", "Yes",
-  "automotive",  "electric",  2027, "global", "scenario_source", "test_company", "target_scenario",          20,              0.25,     "sector",                                       0.01, "test_group", "Yes",
-  "automotive",       "ice",  2027, "global", "scenario_source", "test_company",       "projected",          75,              0.75, "technology",                                      0.005, "test_group", "No",
-  "automotive",       "ice",  2027, "global", "scenario_source", "test_company", "target_scenario",          60,              0.75, "technology",                                       0.01, "test_group", "No"
+       ~sector, ~technology, ~year,  ~region,  ~scenario_source,     ~name_abcd,           ~metric, ~production, ~technology_share,       ~scope, ~percentage_of_initial_production_by_scope,
+  "automotive",  "electric",  2027, "global", "scenario_source", "test_company",       "projected",          25,              0.25,     "sector",                                      0.005,
+  "automotive",  "electric",  2027, "global", "scenario_source", "test_company", "target_scenario",          20,              0.25,     "sector",                                       0.01,
+  "automotive",       "ice",  2027, "global", "scenario_source", "test_company",       "projected",          75,              0.75, "technology",                                      0.005,
+  "automotive",       "ice",  2027, "global", "scenario_source", "test_company", "target_scenario",          60,              0.75, "technology",                                       0.01
 )
 
 test_technology_direction <- tibble::tribble(
@@ -49,41 +42,41 @@ test_target_scenario <- paste0("target_", test_scenario)
 # 1) zero projected and target values
 # styler: off
 test_data_remove_tech_no_plans_no_target_1 <- tibble::tribble(
-       ~sector, ~technology, ~year,  ~region,  ~scenario_source,     ~name_abcd, ~projected, ~target_scenario,    ~group_id,
-  "automotive",  "electric",  2027, "global", "scenario_source", "test_company",         25,               20, "test_group",
-  "automotive",       "ice",  2027, "global", "scenario_source", "test_company",          0,                0, "test_group"
+       ~sector, ~technology, ~year,  ~region,  ~scenario_source,     ~name_abcd, ~projected, ~target_scenario,
+  "automotive",  "electric",  2027, "global", "scenario_source", "test_company",         25,               20,
+  "automotive",       "ice",  2027, "global", "scenario_source", "test_company",          0,                0
 )
 # 2) zero projected value, positive target
 test_data_remove_tech_no_plans_no_target_2 <- tibble::tribble(
-       ~sector, ~technology, ~year,  ~region,  ~scenario_source,     ~name_abcd, ~projected, ~target_scenario,    ~group_id,
-  "automotive",  "electric",  2027, "global", "scenario_source", "test_company",         25,               20, "test_group",
-  "automotive",       "ice",  2027, "global", "scenario_source", "test_company",          0,               10, "test_group"
+       ~sector, ~technology, ~year,  ~region,  ~scenario_source,     ~name_abcd, ~projected, ~target_scenario,
+  "automotive",  "electric",  2027, "global", "scenario_source", "test_company",         25,               20,
+  "automotive",       "ice",  2027, "global", "scenario_source", "test_company",          0,               10
 )
 # 3) positive projected value, zero target
 test_data_remove_tech_no_plans_no_target_3 <- tibble::tribble(
-       ~sector, ~technology, ~year,  ~region,  ~scenario_source,     ~name_abcd, ~projected, ~target_scenario,    ~group_id,
-  "automotive",  "electric",  2027, "global", "scenario_source", "test_company",         25,               20, "test_group",
-  "automotive",       "ice",  2027, "global", "scenario_source", "test_company",         10,                0, "test_group"
+       ~sector, ~technology, ~year,  ~region,  ~scenario_source,     ~name_abcd, ~projected, ~target_scenario,
+  "automotive",  "electric",  2027, "global", "scenario_source", "test_company",         25,               20,
+  "automotive",       "ice",  2027, "global", "scenario_source", "test_company",         10,                0
 )
 # 4) positive projected and target values
 test_data_remove_tech_no_plans_no_target_4 <- tibble::tribble(
-       ~sector, ~technology, ~year,  ~region,  ~scenario_source,     ~name_abcd, ~projected, ~target_scenario,    ~group_id,
-  "automotive",  "electric",  2027, "global", "scenario_source", "test_company",         25,               20, "test_group",
-  "automotive",       "ice",  2027, "global", "scenario_source", "test_company",         75,               60, "test_group"
+       ~sector, ~technology, ~year,  ~region,  ~scenario_source,     ~name_abcd, ~projected, ~target_scenario,
+  "automotive",  "electric",  2027, "global", "scenario_source", "test_company",         25,               20,
+  "automotive",       "ice",  2027, "global", "scenario_source", "test_company",         75,               60
 )
 # 5) NAs in projected and target values
 # TODO: reconsider if this needs to be handled somewhere else, effectively NAs are treated like zero
 test_data_remove_tech_no_plans_no_target_5 <- tibble::tribble(
-       ~sector, ~technology, ~year,  ~region,  ~scenario_source,     ~name_abcd, ~projected, ~target_scenario,    ~group_id,
-  "automotive",  "electric",  2027, "global", "scenario_source", "test_company",   NA_real_,         NA_real_, "test_group",
-  "automotive",       "ice",  2027, "global", "scenario_source", "test_company",   NA_real_,         NA_real_, "test_group"
+       ~sector, ~technology, ~year,  ~region,  ~scenario_source,     ~name_abcd, ~projected, ~target_scenario,
+  "automotive",  "electric",  2027, "global", "scenario_source", "test_company",   NA_real_,         NA_real_,
+  "automotive",       "ice",  2027, "global", "scenario_source", "test_company",   NA_real_,         NA_real_
 )
 # 6) NA in one of projected and target values
 # TODO: reconsider if this needs to be handled somewhere else, effectively NAs are treated like zero
 test_data_remove_tech_no_plans_no_target_6 <- tibble::tribble(
-       ~sector, ~technology, ~year,  ~region,  ~scenario_source,     ~name_abcd, ~projected, ~target_scenario,    ~group_id,
-  "automotive",  "electric",  2027, "global", "scenario_source", "test_company",   NA_real_,         NA_real_, "test_group",
-  "automotive",       "ice",  2027, "global", "scenario_source", "test_company",         40,               20, "test_group"
+       ~sector, ~technology, ~year,  ~region,  ~scenario_source,     ~name_abcd, ~projected, ~target_scenario,
+  "automotive",  "electric",  2027, "global", "scenario_source", "test_company",   NA_real_,         NA_real_,
+  "automotive",       "ice",  2027, "global", "scenario_source", "test_company",         40,               20
 )
 # styler: on
 
@@ -125,28 +118,28 @@ test_that("only rows with zero values in both projected and target values are re
 # styler: off
 # 1) zero values in all target values of sector
 test_data_remove_sector_no_target_1 <- tibble::tribble(
-       ~sector, ~technology, ~year,  ~region,  ~scenario_source,     ~name_abcd, ~projected, ~target_scenario,    ~group_id,
-  "automotive",  "electric",  2027, "global", "scenario_source", "test_company",         25,                0, "test_group",
-  "automotive",       "ice",  2027, "global", "scenario_source", "test_company",         10,                0, "test_group"
+       ~sector, ~technology, ~year,  ~region,  ~scenario_source,     ~name_abcd, ~projected, ~target_scenario,
+  "automotive",  "electric",  2027, "global", "scenario_source", "test_company",         25,                0,
+  "automotive",       "ice",  2027, "global", "scenario_source", "test_company",         10,                0
 )
 # 2) zero values in some target values of sector
 test_data_remove_sector_no_target_2 <- tibble::tribble(
-       ~sector, ~technology, ~year,  ~region,  ~scenario_source,     ~name_abcd, ~projected, ~target_scenario,    ~group_id,
-  "automotive",  "electric",  2027, "global", "scenario_source", "test_company",         25,                0, "test_group",
-  "automotive",       "ice",  2027, "global", "scenario_source", "test_company",         10,                5, "test_group"
+       ~sector, ~technology, ~year,  ~region,  ~scenario_source,     ~name_abcd, ~projected, ~target_scenario,
+  "automotive",  "electric",  2027, "global", "scenario_source", "test_company",         25,                0,
+  "automotive",       "ice",  2027, "global", "scenario_source", "test_company",         10,                5
 )
 # 3) zero values in all projected values of sector, but not target
 test_data_remove_sector_no_target_3 <- tibble::tribble(
-       ~sector, ~technology, ~year,  ~region,  ~scenario_source,     ~name_abcd, ~projected, ~target_scenario,    ~group_id,
-  "automotive",  "electric",  2027, "global", "scenario_source", "test_company",          0,                0, "test_group",
-  "automotive",       "ice",  2027, "global", "scenario_source", "test_company",          0,                5, "test_group"
+       ~sector, ~technology, ~year,  ~region,  ~scenario_source,     ~name_abcd, ~projected, ~target_scenario,
+  "automotive",  "electric",  2027, "global", "scenario_source", "test_company",          0,                0,
+  "automotive",       "ice",  2027, "global", "scenario_source", "test_company",          0,                5
 )
 # 4) NA values in all target values of sector
 # TODO: reconsider if this needs to be handled somewhere else, effectively NAs are treated like zero
 test_data_remove_sector_no_target_4 <- tibble::tribble(
-       ~sector, ~technology, ~year,  ~region,  ~scenario_source,     ~name_abcd, ~projected, ~target_scenario,    ~group_id,
-  "automotive",  "electric",  2027, "global", "scenario_source", "test_company",         25,         NA_real_, "test_group",
-  "automotive",       "ice",  2027, "global", "scenario_source", "test_company",         10,         NA_real_, "test_group"
+       ~sector, ~technology, ~year,  ~region,  ~scenario_source,     ~name_abcd, ~projected, ~target_scenario,
+  "automotive",  "electric",  2027, "global", "scenario_source", "test_company",         25,         NA_real_,
+  "automotive",       "ice",  2027, "global", "scenario_source", "test_company",         10,         NA_real_
 )
 # styler: on
 
@@ -267,11 +260,11 @@ test_that("total_tech_deviation is less or equal 0 for all technologies in bridg
 # styler: off
 # TODO: add a case of a pure phaseout sector
 test_data_calculate_company_aggregate_alignment_tms <- tibble::tribble(
-  ~sector,     ~technology, ~year,  ~region, ~scenario_source,       ~name_abcd,    ~group_id, ~projected, ~target_scenario, ~direction, ~total_tech_deviation, ~activity_unit,
-  "power",        "gascap",  2027, "global",    "test_source",   "test_company", "test_group",        100,               80, "phaseout",                   -20,           "MW",
-  "power", "renewablescap",  2027, "global",    "test_source",   "test_company", "test_group",         32,               40, "buildout",                    -8,           "MW",
+  ~sector,     ~technology, ~year,  ~region, ~scenario_source,       ~name_abcd, ~projected, ~target_scenario, ~direction, ~total_tech_deviation, ~activity_unit,
+  "power",        "gascap",  2027, "global",    "test_source",   "test_company",        100,               80, "phaseout",                   -20,           "MW",
+  "power", "renewablescap",  2027, "global",    "test_source",   "test_company",         32,               40, "buildout",                    -8,           "MW",
   # additional case where a company is only active in one of the two directions (# 89)
-  "power", "renewablescap",  2027, "global",    "test_source", "test_company_2", "test_group",         50,               75, "buildout",                   -25,           "MW"
+  "power", "renewablescap",  2027, "global",    "test_source", "test_company_2",         50,               75, "buildout",                   -25,           "MW"
 )
 # styler: on
 
@@ -302,10 +295,15 @@ test_that("calculate_company_aggregate_alignment_tms returns expected directions
 # number of units analysed
 n_groups_net <- test_data_calculate_company_aggregate_alignment_tms %>%
   dplyr::distinct(
-    .data$group_id, .data$name_abcd, .data$scenario_source, .data$region,
-    .data$year, .data$sector, .data$activity_unit
+    .data$name_abcd,
+    .data$scenario_source,
+    .data$region,
+    .data$year,
+    .data$sector,
+    .data$activity_unit
   ) %>%
   nrow()
+
 n_groups_bopo <- n_groups_net * 2
 
 test_that("calculate_company_aggregate_alignment_tms returns expected number of rows", {
@@ -355,12 +353,12 @@ test_that("consistency checks of calculate_company_aggregate_alignment_tms() pas
 ## add_net_absolute_scenario_value----
 # styler: off
 test_data_add_net_absolute_scenario_value <- tibble::tribble(
-     ~group_id,     ~name_abcd, ~scenario_source,     ~region,        ~sector, ~technology, ~activity_unit, ~year, ~target_scenario, ~direction, ~total_tech_deviation,
-  "test_group", "test_company",    "test_source", "somewhere",  "test_sector",    "tech_A",  "output unit",  2027,               20, "buildout",                    -5,
-  "test_group", "test_company",    "test_source", "somewhere",  "test_sector",    "tech_B",  "output unit",  2027,               50, "phaseout",                    10,
-  "test_group", "test_company",    "test_source", "somewhere",  "test_sector",    "tech_C",  "output unit",  2027,               30, "phaseout",                   -10,
-  "test_group", "test_company",    "test_source", "somewhere", "other_sector",    "tech_X",  "output unit",  2027,               30, "buildout",                   -20,
-  "test_group", "test_company",    "test_source", "somewhere", "other_sector",    "tech_Y",  "output unit",  2027,         NA_real_, "phaseout",              NA_real_
+      ~name_abcd, ~scenario_source,     ~region,        ~sector, ~technology, ~activity_unit, ~year, ~target_scenario, ~direction, ~total_tech_deviation,
+  "test_company",    "test_source", "somewhere",  "test_sector",    "tech_A",  "output unit",  2027,               20, "buildout",                    -5,
+  "test_company",    "test_source", "somewhere",  "test_sector",    "tech_B",  "output unit",  2027,               50, "phaseout",                    10,
+  "test_company",    "test_source", "somewhere",  "test_sector",    "tech_C",  "output unit",  2027,               30, "phaseout",                   -10,
+  "test_company",    "test_source", "somewhere", "other_sector",    "tech_X",  "output unit",  2027,               30, "buildout",                   -20,
+  "test_company",    "test_source", "somewhere", "other_sector",    "tech_Y",  "output unit",  2027,         NA_real_, "phaseout",              NA_real_
 )
 # styler: on
 
@@ -381,12 +379,12 @@ test_that("add_net_absolute_scenario_value adds sum of scenario values as expect
 ## add_total_deviation----
 # styler: off
 test_data_add_total_deviation_bo_po <- tibble::tribble(
-     ~group_id,     ~name_abcd, ~scenario_source,     ~region,        ~sector, ~technology, ~activity_unit, ~year, ~net_absolute_scenario_value, ~direction, ~total_tech_deviation,
-  "test_group", "test_company",    "test_source", "somewhere",  "test_sector",    "tech_A",  "output unit",  2027,                          100, "buildout",                    -5,
-  "test_group", "test_company",    "test_source", "somewhere",  "test_sector",    "tech_B",  "output unit",  2027,                          100, "phaseout",                    10,
-  "test_group", "test_company",    "test_source", "somewhere",  "test_sector",    "tech_C",  "output unit",  2027,                          100, "phaseout",                   -10,
-  "test_group", "test_company",    "test_source", "somewhere", "other_sector",    "tech_X",  "output unit",  2027,                           30, "buildout",                   -20,
-  "test_group", "test_company",    "test_source", "somewhere", "other_sector",    "tech_Y",  "output unit",  2027,                           30, "phaseout",                     5
+      ~name_abcd, ~scenario_source,     ~region,        ~sector, ~technology, ~activity_unit, ~year, ~net_absolute_scenario_value, ~direction, ~total_tech_deviation,
+  "test_company",    "test_source", "somewhere",  "test_sector",    "tech_A",  "output unit",  2027,                          100, "buildout",                    -5,
+  "test_company",    "test_source", "somewhere",  "test_sector",    "tech_B",  "output unit",  2027,                          100, "phaseout",                    10,
+  "test_company",    "test_source", "somewhere",  "test_sector",    "tech_C",  "output unit",  2027,                          100, "phaseout",                   -10,
+  "test_company",    "test_source", "somewhere", "other_sector",    "tech_X",  "output unit",  2027,                           30, "buildout",                   -20,
+  "test_company",    "test_source", "somewhere", "other_sector",    "tech_Y",  "output unit",  2027,                           30, "phaseout",                     5
 )
 # styler: on
 test_data_add_total_deviation_net <- test_data_add_total_deviation_bo_po %>%
@@ -413,10 +411,10 @@ test_that("add_total_deviation adds deviation by sector and direction as expecte
 ## calculate_company_alignment_metric----
 # styler: off
 test_data_calculate_company_alignment_metric <- tibble::tribble(
-     ~group_id,     ~name_abcd,    ~sector, ~activity_unit,     ~region, ~scenario_source, ~year, ~direction, ~total_deviation, ~net_absolute_scenario_value,
-  "test_group", "test_company", "sector_a",  "output_unit", "somewhere",    "that_source",  2027,      "net",               20,                           40,
-  "test_group", "test_company", "sector_b",  "output_unit", "somewhere",    "that_source",  2027,      "net",               50,                           40,
-  "test_group", "some_company", "sector_a",  "output_unit", "somewhere",    "that_source",  2027,      "net",               30,                           30
+      ~name_abcd,    ~sector, ~activity_unit,     ~region, ~scenario_source, ~year, ~direction, ~total_deviation, ~net_absolute_scenario_value,
+  "test_company", "sector_a",  "output_unit", "somewhere",    "that_source",  2027,      "net",               20,                           40,
+  "test_company", "sector_b",  "output_unit", "somewhere",    "that_source",  2027,      "net",               50,                           40,
+  "some_company", "sector_a",  "output_unit", "somewhere",    "that_source",  2027,      "net",               30,                           30
 )
 # styler: on
 test_scenario <- "some_scenario"
@@ -436,9 +434,9 @@ test_that("calculate_company_alignment_metric calculates company alignment metri
 ## fill_missing_direction (# 89)----
 # styler: off
 test_data_fill_missing_direction <- tibble::tribble(
-     ~group_id,       ~name_abcd, ~sector, ~activity_unit,  ~region, ~scenario_source,  ~scenario, ~year, ~direction, ~total_deviation, ~alignment_metric,
-  "test_group", "test_company_1", "power",           "MW", "global",    "test_source", "scenario",  2027, "buildout",               -1,              -0.5,
-  "test_group", "test_company_2", "power",           "MW", "global",    "test_source", "scenario",  2027, "phaseout",                1,               0.1
+        ~name_abcd, ~sector, ~activity_unit,  ~region, ~scenario_source,  ~scenario, ~year, ~direction, ~total_deviation, ~alignment_metric,
+  "test_company_1", "power",           "MW", "global",    "test_source", "scenario",  2027, "buildout",               -1,              -0.5,
+  "test_company_2", "power",           "MW", "global",    "test_source", "scenario",  2027, "phaseout",                1,               0.1
 )
 # styler: on
 
@@ -449,8 +447,12 @@ test_output_fill_missing_direction <- fill_missing_direction(
 # number of units of analysis
 n_units <- test_data_fill_missing_direction %>%
   dplyr::distinct(
-    .data$group_id, .data$name_abcd, .data$scenario_source, .data$region,
-    .data$year, .data$sector, .data$activity_unit
+    .data$name_abcd,
+    .data$scenario_source,
+    .data$region,
+    .data$year,
+    .data$sector,
+    .data$activity_unit
   ) %>%
   nrow()
 
@@ -464,11 +466,11 @@ test_that("fill_missing_direction returns both directions for all units of analy
 # calculate_company_aggregate_alignment_sda----
 # styler: off
 test_data_calculate_company_aggregate_alignment_sda <- tibble::tribble(
-  ~sector, ~year,  ~region, ~scenario_source,  ~name_abcd,    ~group_id, ~emission_factor_metric, ~emission_factor_value,
-  "steel",  2027, "global",    "test_source", "company_A", "test_group",             "projected",                    0.8,
-  "steel",  2027, "global",    "test_source", "company_A", "test_group",       "target_scenario",                    0.7,
-  "steel",  2027, "global",    "test_source", "company_B", "test_group",             "projected",                   0.55,
-  "steel",  2027, "global",    "test_source", "company_B", "test_group",       "target_scenario",                    0.6
+  ~sector, ~year,  ~region, ~scenario_source,  ~name_abcd, ~emission_factor_metric, ~emission_factor_value,
+  "steel",  2027, "global",    "test_source", "company_A",             "projected",                    0.8,
+  "steel",  2027, "global",    "test_source", "company_A",       "target_scenario",                    0.7,
+  "steel",  2027, "global",    "test_source", "company_B",             "projected",                   0.55,
+  "steel",  2027, "global",    "test_source", "company_B",       "target_scenario",                    0.6
 )
 # styler: on
 
@@ -484,14 +486,18 @@ test_output_calculate_company_aggregate_alignment_sda <- calculate_company_aggre
 )
 
 added_columns <- c("activity_unit", "scenario", "direction", "total_deviation", "alignment_metric")
-dropped_columns <- c("emission_factor_metric", "emission_factor_value", "group_id")
-# dropped_columns <- c("emission_factor_metric", "emission_factor_value")
+dropped_columns <- c("emission_factor_metric", "emission_factor_value")
 expected_output_columns <- c(names(test_data_calculate_company_aggregate_alignment_sda), added_columns)
 expected_output_columns <- expected_output_columns[!expected_output_columns %in% dropped_columns]
 
 expected_output_rows <- test_data_calculate_company_aggregate_alignment_sda %>%
-  dplyr::distinct(.data$sector, .data$year, .data$region, .data$scenario_source, .data$name_abcd) %>%
-  # dplyr::distinct(.data$sector, .data$year, .data$region, .data$scenario_source, .data$name_abcd, .data$group_id) %>%
+  dplyr::distinct(
+    .data$sector,
+    .data$year,
+    .data$region,
+    .data$scenario_source,
+    .data$name_abcd
+  ) %>%
   nrow()
 
 test_that("calculate_company_aggregate_alignment_sda returns expected structure of outputs", {
@@ -586,7 +592,11 @@ test_that("output columns replace emission_factor_* cols with projected and targ
 
 test_nrows <- nrow(test_output_prep_and_wrangle_aggregate_alignment_sda_1)
 expected_nrows <- test_data_prep_and_wrangle_aggregate_alignment_sda_1 %>%
-  dplyr::distinct(.data$scenario_source, .data$name_abcd, .data$year) %>%
+  dplyr::distinct(
+    .data$scenario_source,
+    .data$name_abcd,
+    .data$year
+  ) %>%
   nrow()
 
 test_that("number of output rows are distinct number of input rows that do not contain emission_factor_* data", {
@@ -634,10 +644,10 @@ test_that("years outside of start_year and start_year + time_frame are dropped",
 # add_total_deviation_sda
 # styler: off
 test_data_add_total_deviation_sda <- tibble::tribble(
-     ~group_id,  ~name_abcd,  ~scenario_source,      ~region,    ~sector, ~activity_unit, ~year, ~projected, ~net_absolute_scenario_value,
-  "test_group", "company_A", "scenario_source", "some_place", "sector_1",  "output_unit",  2027,        0.8,                          0.7,
-  "test_group", "company_A", "scenario_source", "some_place", "sector_2",  "output_unit",  2027,        1.1,                          0.8,
-  "test_group", "company_B", "scenario_source", "some_place", "sector_1",  "output_unit",  2027,        0.6,                          0.7
+   ~name_abcd,  ~scenario_source,      ~region,    ~sector, ~activity_unit, ~year, ~projected, ~net_absolute_scenario_value,
+  "company_A", "scenario_source", "some_place", "sector_1",  "output_unit",  2027,        0.8,                          0.7,
+  "company_A", "scenario_source", "some_place", "sector_2",  "output_unit",  2027,        1.1,                          0.8,
+  "company_B", "scenario_source", "some_place", "sector_1",  "output_unit",  2027,        0.6,                          0.7
 )
 # styler: on
 

From 0333f0ece4b5509c922aa8a1f0d86c0983194a83 Mon Sep 17 00:00:00 2001
From: jacobvjk <jacob.kastl@gmail.com>
Date: Fri, 12 Apr 2024 11:45:17 +0200
Subject: [PATCH 08/12] aggregate_alignment_loanbook_exposure outputs results
 at comp and lbk level

---
 R/aggregate_alignment_loanbook_exposure.R     | 94 ++++++++++---------
 ...st-aggregate_alignment_loanbook_exposure.R | 42 ++++-----
 2 files changed, 70 insertions(+), 66 deletions(-)

diff --git a/R/aggregate_alignment_loanbook_exposure.R b/R/aggregate_alignment_loanbook_exposure.R
index 161bc09..2ae7631 100644
--- a/R/aggregate_alignment_loanbook_exposure.R
+++ b/R/aggregate_alignment_loanbook_exposure.R
@@ -77,6 +77,52 @@ aggregate_alignment_loanbook_exposure <- function(data,
       relationship = "many-to-many"
     )
 
+  # if a company only has technologies going in one direction in a sector with
+  # high carbon and low carbon technologies, add an empty entry for the other
+  # direction to ensure the aggregation is correct
+  if (level == "bo_po") {
+    aggregate_exposure_company <- aggregate_exposure_company %>%
+      dplyr::mutate(
+        n_directions = dplyr::n_distinct(.data$direction, na.rm = TRUE),
+        .by = dplyr::all_of(
+          c(
+            group_vars[!group_vars == "direction"],
+            "name_abcd",
+            "sector",
+            "activity_unit",
+            "loan_size_outstanding_currency"
+          )
+        )
+      )
+
+    single_direction <- aggregate_exposure_company %>%
+      dplyr::filter(
+        .data$n_directions == 1,
+        .data$direction %in% c("buildout", "phaseout"),
+        .data$sector %in% c("automotive", "hdv", "power")
+      )
+
+    opposite_direction <- single_direction %>%
+      dplyr::mutate(
+        direction = dplyr::if_else(
+          .data$direction == "buildout",
+          "phaseout",
+          "buildout"
+        ),
+        total_deviation = 0,
+        alignment_metric = 0
+      )
+
+    aggregate_exposure_company <- aggregate_exposure_company %>%
+      dplyr::bind_rows(opposite_direction) %>%
+      dplyr::select(-"n_directions")
+  }
+
+  out_company <- aggregate_exposure_company %>%
+    dplyr::relocate(
+      dplyr::all_of(c(.by))
+    )
+
   sector_aggregate_exposure_loanbook_summary <- aggregate_exposure_company %>%
     dplyr::mutate(
       n_companies = dplyr::n_distinct(.data$name_abcd),
@@ -120,47 +166,6 @@ aggregate_alignment_loanbook_exposure <- function(data,
       )
   }
 
-  # if a company only has technologies going in one direction in a sector with
-  # high carbon and low carbon technologies, add an empty entry for the other
-  # direction to ensure the aggregation is correct
-  if (level == "bo_po") {
-    aggregate_exposure_company <- aggregate_exposure_company %>%
-      dplyr::mutate(
-        n_directions = dplyr::n_distinct(.data$direction, na.rm = TRUE),
-        .by = dplyr::all_of(
-          c(
-            group_vars[!group_vars == "direction"],
-            "name_abcd",
-            "sector",
-            "activity_unit",
-            "loan_size_outstanding_currency"
-          )
-        )
-      )
-
-    single_direction <- aggregate_exposure_company %>%
-      dplyr::filter(
-        .data$n_directions == 1,
-        .data$direction %in% c("buildout", "phaseout"),
-        .data$sector %in% c("automotive", "hdv", "power")
-      )
-
-    opposite_direction <- single_direction %>%
-      dplyr::mutate(
-        direction = dplyr::if_else(
-          .data$direction == "buildout",
-          "phaseout",
-          "buildout"
-        ),
-        total_deviation = 0,
-        alignment_metric = 0
-      )
-
-    aggregate_exposure_company <- aggregate_exposure_company %>%
-      dplyr::bind_rows(opposite_direction) %>%
-      dplyr::select(-"n_directions")
-  }
-
   sector_aggregate_exposure_loanbook_alignment <- aggregate_exposure_company %>%
     dplyr::summarise(
       exposure_weighted_net_alignment = stats::weighted.mean(
@@ -171,7 +176,7 @@ aggregate_alignment_loanbook_exposure <- function(data,
       .by = dplyr::all_of(group_vars)
     )
 
-  out <- sector_aggregate_exposure_loanbook_summary %>%
+  out_aggregate <- sector_aggregate_exposure_loanbook_summary %>%
     dplyr::inner_join(
       sector_aggregate_exposure_loanbook_alignment,
       by = group_vars
@@ -195,6 +200,11 @@ aggregate_alignment_loanbook_exposure <- function(data,
       .data$year
     )
 
+  out <- list(
+    company = out_company,
+    aggregate = out_aggregate
+  )
+
   return(out)
 }
 
diff --git a/tests/testthat/test-aggregate_alignment_loanbook_exposure.R b/tests/testthat/test-aggregate_alignment_loanbook_exposure.R
index d9d4309..fe4b210 100644
--- a/tests/testthat/test-aggregate_alignment_loanbook_exposure.R
+++ b/tests/testthat/test-aggregate_alignment_loanbook_exposure.R
@@ -52,14 +52,14 @@ test_output_aggregate_alignment_loanbook_exposure_bopo <- test_data_aggregate_al
 
 test_that("aggregated net alignment equals sum of aggregated buildout and phaseout alignments", {
   expect_equal(
-    test_output_aggregate_alignment_loanbook_exposure_net$exposure_weighted_net_alignment,
-    sum(test_output_aggregate_alignment_loanbook_exposure_bopo$exposure_weighted_net_alignment, na.rm = TRUE)
+    test_output_aggregate_alignment_loanbook_exposure_net$aggregate$exposure_weighted_net_alignment,
+    sum(test_output_aggregate_alignment_loanbook_exposure_bopo$aggregate$exposure_weighted_net_alignment, na.rm = TRUE)
   )
 })
 
 test_that("net aggregated loan size equals sum of matched loan size", {
   expect_equal(
-    sum(test_output_aggregate_alignment_loanbook_exposure_net$sum_loan_size_outstanding, na.rm = TRUE),
+    sum(test_output_aggregate_alignment_loanbook_exposure_net$aggregate$sum_loan_size_outstanding, na.rm = TRUE),
     sum(test_matched$loan_size_outstanding, na.rm = TRUE)
   )
 })
@@ -68,38 +68,32 @@ test_that("number of identified companies equals unique list of companies in inp
   n_companies_input_net <- length(unique(test_data_aggregate_alignment_loanbook_exposure_net$name_abcd))
 
   expect_equal(
-    test_output_aggregate_alignment_loanbook_exposure_net$n_companies,
+    test_output_aggregate_alignment_loanbook_exposure_net$aggregate$n_companies,
     n_companies_input_net
   )
 })
 
-test_that("number of identified companies equals unique list of companies in input data", {
-  n_companies_input_buildout <- test_data_aggregate_alignment_loanbook_exposure_bopo %>%
-    dplyr::filter(.data$direction == "buildout") %>%
+test_that("number of identified companies per direction equals unique list of companies in input data", {
+  n_companies_input_bopo <- test_data_aggregate_alignment_loanbook_exposure_bopo %>%
     dplyr::distinct(.data$name_abcd) %>%
     nrow()
 
-  n_output_buildout <- test_output_aggregate_alignment_loanbook_exposure_bopo %>%
+  n_output_buildout <- test_output_aggregate_alignment_loanbook_exposure_bopo$aggregate %>%
     dplyr::filter(.data$direction == "buildout") %>%
     dplyr::pull(.data$n_companies)
 
   expect_equal(
     n_output_buildout,
-    n_companies_input_buildout
+    n_companies_input_bopo
   )
 
-  n_companies_input_phaseout <- test_data_aggregate_alignment_loanbook_exposure_bopo %>%
-    dplyr::filter(.data$direction == "phaseout") %>%
-    dplyr::distinct(.data$name_abcd) %>%
-    nrow()
-
-  n_output_phaseout <- test_output_aggregate_alignment_loanbook_exposure_bopo %>%
+  n_output_phaseout <- test_output_aggregate_alignment_loanbook_exposure_bopo$aggregate %>%
     dplyr::filter(.data$direction == "phaseout") %>%
     dplyr::pull(.data$n_companies)
 
   expect_equal(
     n_output_phaseout,
-    n_companies_input_phaseout
+    n_companies_input_bopo
   )
 })
 
@@ -107,8 +101,8 @@ test_that("net aggregate results have the same columns as buildout/phaseout aggr
   exposure_columns <- c("sum_loan_size_outstanding", "sum_exposure_companies_aligned", "share_exposure_aligned")
 
   expect_equal(
-    c(names(test_output_aggregate_alignment_loanbook_exposure_bopo), exposure_columns),
-    names(test_output_aggregate_alignment_loanbook_exposure_net)
+    c(names(test_output_aggregate_alignment_loanbook_exposure_bopo$aggregate), exposure_columns),
+    names(test_output_aggregate_alignment_loanbook_exposure_net$aggregate)
   )
 })
 
@@ -148,7 +142,7 @@ test_that("net aggregate results with a group_var returns results for each group
     )
 
   expect_equal(
-    nrow(test_output_with_group_var),
+    nrow(test_output_with_group_var$aggregate),
     n_groups
   )
 })
@@ -164,7 +158,7 @@ test_that("net aggregate results with a group_var returns results for each group
     )
 
   expect_equal(
-    nrow(test_output_with_group_var_2),
+    nrow(test_output_with_group_var_2$aggregate),
     n_groups_2
   )
 })
@@ -194,7 +188,7 @@ test_that("bopo aggregate results grouped by foo returns results for each availa
     )
 
   expect_equal(
-    nrow(test_output_with_group_var),
+    nrow(test_output_with_group_var$aggregate),
     nrow(n_groups) * nrow(n_directions)
   )
 })
@@ -215,8 +209,8 @@ test_that("aggregated net alignment by group_var foo equals sum of aggregated bu
     )
 
   expect_equal(
-    sum(test_output_with_group_var_bopo$exposure_weighted_net_alignment, na.rm = TRUE),
-    sum(test_output_with_group_var_net$exposure_weighted_net_alignment, na.rm = TRUE)
+    sum(test_output_with_group_var_bopo$aggregate$exposure_weighted_net_alignment, na.rm = TRUE),
+    sum(test_output_with_group_var_net$aggregate$exposure_weighted_net_alignment, na.rm = TRUE)
   )
 })
 
@@ -229,7 +223,7 @@ test_that("net aggregated loan size by foo equals sum of matched loan size by fo
     )
 
   expect_equal(
-    sum(test_output_with_group_var_net$sum_loan_size_outstanding, na.rm = TRUE),
+    sum(test_output_with_group_var_net$aggregate$sum_loan_size_outstanding, na.rm = TRUE),
     sum(test_matched_group_var$loan_size_outstanding, na.rm = TRUE)
   )
 })

From 3d69be1294ec61b7a90bef3fe467c150d3583dd2 Mon Sep 17 00:00:00 2001
From: jacobvjk <jacob.kastl@gmail.com>
Date: Fri, 12 Apr 2024 12:23:00 +0200
Subject: [PATCH 09/12] add test for .by variables in matched data

---
 R/utils.R                                          |  4 ++--
 .../test-aggregate_alignment_loanbook_exposure.R   | 14 ++++++++++++++
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/R/utils.R b/R/utils.R
index b770e0e..37df56f 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -18,9 +18,9 @@ validate_data_has_expected_cols <- function(data,
   if (!data_has_expected_columns) {
     affected_cols <- glue::glue_collapse(sort(setdiff(expected_columns, names(data))), sep = ", ")
     rlang::abort(c(
-      "Must include expected columns in data set.",
+      "Must include expected columns in input data set.",
       x = glue::glue("Missing columns: {affected_cols}."),
-      i = "Please check that data have expected columns."
+      i = "Please check that data has expected columns."
     ))
   }
   invisible()
diff --git a/tests/testthat/test-aggregate_alignment_loanbook_exposure.R b/tests/testthat/test-aggregate_alignment_loanbook_exposure.R
index fe4b210..833df7f 100644
--- a/tests/testthat/test-aggregate_alignment_loanbook_exposure.R
+++ b/tests/testthat/test-aggregate_alignment_loanbook_exposure.R
@@ -131,6 +131,20 @@ test_matched_group_var <- tibble::tribble(
 )
 # styler: on
 
+test_that("net aggregate results with grouped with bad .by returns ", {
+  expect_error(
+    {
+      test_data_company_net %>%
+        aggregate_alignment_loanbook_exposure(
+          matched = test_matched_group_var,
+          level = test_level_net,
+          .by = "bad"
+      )
+    },
+    regexp = "Must include expected columns in input data set."
+  )
+})
+
 test_that("net aggregate results with a group_var returns results for each group", {
   n_groups <- length(unique(test_matched_group_var$foo))
 

From 5a9908d3aa80780760c9765afec8f40183f16c96 Mon Sep 17 00:00:00 2001
From: jacobvjk <jacob.kastl@gmail.com>
Date: Wed, 17 Apr 2024 10:58:16 +0200
Subject: [PATCH 10/12] documentation

---
 R/aggregate_alignment_loanbook_exposure.R     | 13 ++--
 man/aggregate_alignment_loanbook_exposure.Rd  | 13 ++--
 ...st-aggregate_alignment_loanbook_exposure.R | 60 +++++++++----------
 3 files changed, 48 insertions(+), 38 deletions(-)

diff --git a/R/aggregate_alignment_loanbook_exposure.R b/R/aggregate_alignment_loanbook_exposure.R
index 2ae7631..cf00a2e 100644
--- a/R/aggregate_alignment_loanbook_exposure.R
+++ b/R/aggregate_alignment_loanbook_exposure.R
@@ -1,15 +1,20 @@
 #' Return loan book level aggregation of company alignment metrics by exposure
 #'
-#' @param data data.frame. Holds output pf company indicators
-#' @param matched data.frame. Holds matched and prioritised loan book
+#' @param data data.frame. Holds output of company indicators. Contains columns
+#'   `"name_abcd"`,	`"sector"`,	`"activity_unit"`,	`"region"`,
+#'   `"scenario_source"`,	`"scenario"`,	`"year"`,	`"direction"`,
+#'   `"total_deviation"` and	`"alignment_metric"`.
+#' @param matched data.frame. Holds matched and prioritized loan book data.
 #' @param level Character. Vector that indicates if the aggregate alignment
 #'   metric should be returned based on the net technology deviations (`net`) or
 #'   disaggregated into buildout and phaseout technologies (`bo_po`).
 #' @param .by <tidy-select> Character. Optionally, a selection of columns to
-#'   group by, instead. All columns indicated must be available variables in the
+#'   group by. All columns indicated must be available variables in the
 #'   `matched` data set. The intended use case is to allow analyzing the loan
 #'   books by additional traits of interest, such as types of financial
-#'   institutions.
+#'   institutions. Default is `NULL`, which means the aggregation is done at the
+#'   meta level. It is not possible to group by the critical columns of the
+#'   `data` and `matched` inputs.
 #'
 #' @return NULL
 #' @export
diff --git a/man/aggregate_alignment_loanbook_exposure.Rd b/man/aggregate_alignment_loanbook_exposure.Rd
index 8b2aa84..7c974fd 100644
--- a/man/aggregate_alignment_loanbook_exposure.Rd
+++ b/man/aggregate_alignment_loanbook_exposure.Rd
@@ -12,19 +12,24 @@ aggregate_alignment_loanbook_exposure(
 )
 }
 \arguments{
-\item{data}{data.frame. Holds output pf company indicators}
+\item{data}{data.frame. Holds output of company indicators. Contains columns
+\code{"name_abcd"},    \code{"sector"},    \code{"activity_unit"},    \code{"region"},
+\code{"scenario_source"},    \code{"scenario"},    \code{"year"},    \code{"direction"},
+\code{"total_deviation"} and    \code{"alignment_metric"}.}
 
-\item{matched}{data.frame. Holds matched and prioritised loan book}
+\item{matched}{data.frame. Holds matched and prioritized loan book data.}
 
 \item{level}{Character. Vector that indicates if the aggregate alignment
 metric should be returned based on the net technology deviations (\code{net}) or
 disaggregated into buildout and phaseout technologies (\code{bo_po}).}
 
 \item{.by}{\if{html}{\out{<tidy-select>}} Character. Optionally, a selection of columns to
-group by, instead. All columns indicated must be available variables in the
+group by. All columns indicated must be available variables in the
 \code{matched} data set. The intended use case is to allow analyzing the loan
 books by additional traits of interest, such as types of financial
-institutions.}
+institutions. Default is \code{NULL}, which means the aggregation is done at the
+meta level. It is not possible to group by the critical columns of the
+\code{data} and \code{matched} inputs.}
 }
 \description{
 Return loan book level aggregation of company alignment metrics by exposure
diff --git a/tests/testthat/test-aggregate_alignment_loanbook_exposure.R b/tests/testthat/test-aggregate_alignment_loanbook_exposure.R
index 833df7f..e8a1c4a 100644
--- a/tests/testthat/test-aggregate_alignment_loanbook_exposure.R
+++ b/tests/testthat/test-aggregate_alignment_loanbook_exposure.R
@@ -106,8 +106,8 @@ test_that("net aggregate results have the same columns as buildout/phaseout aggr
   )
 })
 
-# When an additional variable is passed via group_var, add group_var to
-# variables considered in aggregation (GH: 33)
+# When an additional variable is passed via .by, add .by to variables considered
+# in aggregation (GH: 33)
 
 # styler: off
 test_data_company_net <- tibble::tribble(
@@ -118,7 +118,7 @@ test_data_company_net <- tibble::tribble(
   "test_company_4", "power",           "MW", "global",    "test_source", "test_scenario",  2027,      "net",               50,               0.1
 )
 
-test_matched_group_var <- tibble::tribble(
+test_matched_dot_by <- tibble::tribble(
      ~group_id, ~id_loan, ~loan_size_outstanding, ~loan_size_outstanding_currency,       ~name_abcd, ~sector,  ~foo, ~bar,
   "test_lbk_1",     "L1",                 300000,                           "USD", "test_company_1", "power", "Yes", "Yes",
   "test_lbk_1",     "L2",                 700000,                           "USD", "test_company_2", "power", "Yes", "Yes",
@@ -136,7 +136,7 @@ test_that("net aggregate results with grouped with bad .by returns ", {
     {
       test_data_company_net %>%
         aggregate_alignment_loanbook_exposure(
-          matched = test_matched_group_var,
+          matched = test_matched_dot_by,
           level = test_level_net,
           .by = "bad"
       )
@@ -145,34 +145,34 @@ test_that("net aggregate results with grouped with bad .by returns ", {
   )
 })
 
-test_that("net aggregate results with a group_var returns results for each group", {
-  n_groups <- length(unique(test_matched_group_var$foo))
+test_that("net aggregate results with .by specified returns results for each group", {
+  n_groups <- length(unique(test_matched_dot_by$foo))
 
-  test_output_with_group_var <- test_data_company_net %>%
+  test_output_with_dot_by <- test_data_company_net %>%
     aggregate_alignment_loanbook_exposure(
-      matched = test_matched_group_var,
+      matched = test_matched_dot_by,
       level = test_level_net,
       .by = "foo"
     )
 
   expect_equal(
-    nrow(test_output_with_group_var$aggregate),
+    nrow(test_output_with_dot_by$aggregate),
     n_groups
   )
 })
 
-test_that("net aggregate results with a group_var returns results for each group for multiple variables", {
-  n_groups_2 <- nrow(dplyr::distinct(test_matched_group_var, .data$foo, .data$bar))
+test_that("net aggregate results with multiple variables specified in .by returns results for each combination of groups", {
+  n_groups_2 <- nrow(dplyr::distinct(test_matched_dot_by, .data$foo, .data$bar))
 
-  test_output_with_group_var_2 <- test_data_company_net %>%
+  test_output_with_dot_by_2 <- test_data_company_net %>%
     aggregate_alignment_loanbook_exposure(
-      matched = test_matched_group_var,
+      matched = test_matched_dot_by,
       level = test_level_net,
       .by = c("foo", "bar")
     )
 
   expect_equal(
-    nrow(test_output_with_group_var_2$aggregate),
+    nrow(test_output_with_dot_by_2$aggregate),
     n_groups_2
   )
 })
@@ -191,54 +191,54 @@ test_data_company_bopo <- tibble::tribble(
 # styler: on
 
 test_that("bopo aggregate results grouped by foo returns results for each available combination of buildout/phaseout and group foo", {
-  n_groups <- dplyr::distinct(test_matched_group_var, .data$foo)
+  n_groups <- dplyr::distinct(test_matched_dot_by, .data$foo)
   n_directions <- dplyr::distinct(test_data_company_bopo, .data$direction)
 
-  test_output_with_group_var <- test_data_company_bopo %>%
+  test_output_with_dot_by <- test_data_company_bopo %>%
     aggregate_alignment_loanbook_exposure(
-      matched = test_matched_group_var,
+      matched = test_matched_dot_by,
       level = test_level_bopo,
       .by = "foo"
     )
 
   expect_equal(
-    nrow(test_output_with_group_var$aggregate),
+    nrow(test_output_with_dot_by$aggregate),
     nrow(n_groups) * nrow(n_directions)
   )
 })
 
-test_that("aggregated net alignment by group_var foo equals sum of aggregated buildout and phaseout alignments by group_var foo", {
-  test_output_with_group_var_bopo <- test_data_company_bopo %>%
+test_that("aggregated net alignment grouped by foo equals sum of aggregated buildout and phaseout alignments grouped by foo", {
+  test_output_with_dot_by_bopo <- test_data_company_bopo %>%
     aggregate_alignment_loanbook_exposure(
-      matched = test_matched_group_var,
+      matched = test_matched_dot_by,
       level = test_level_bopo,
       .by = "foo"
     )
 
-  test_output_with_group_var_net <- test_data_company_net %>%
+  test_output_with_dot_by_net <- test_data_company_net %>%
     aggregate_alignment_loanbook_exposure(
-      matched = test_matched_group_var,
+      matched = test_matched_dot_by,
       level = test_level_net,
       .by = "foo"
     )
 
   expect_equal(
-    sum(test_output_with_group_var_bopo$aggregate$exposure_weighted_net_alignment, na.rm = TRUE),
-    sum(test_output_with_group_var_net$aggregate$exposure_weighted_net_alignment, na.rm = TRUE)
+    sum(test_output_with_dot_by_bopo$aggregate$exposure_weighted_net_alignment, na.rm = TRUE),
+    sum(test_output_with_dot_by_net$aggregate$exposure_weighted_net_alignment, na.rm = TRUE)
   )
 })
 
-test_that("net aggregated loan size by foo equals sum of matched loan size by foo", {
-  test_output_with_group_var_net <- test_data_company_net %>%
+test_that("net aggregated loan size grouped by foo equals sum of matched loan size grouped by foo", {
+  test_output_with_dot_by_net <- test_data_company_net %>%
     aggregate_alignment_loanbook_exposure(
-      matched = test_matched_group_var,
+      matched = test_matched_dot_by,
       level = test_level_net,
       .by = "foo"
     )
 
   expect_equal(
-    sum(test_output_with_group_var_net$aggregate$sum_loan_size_outstanding, na.rm = TRUE),
-    sum(test_matched_group_var$loan_size_outstanding, na.rm = TRUE)
+    sum(test_output_with_dot_by_net$aggregate$sum_loan_size_outstanding, na.rm = TRUE),
+    sum(test_matched_dot_by$loan_size_outstanding, na.rm = TRUE)
   )
 })
 

From 39e292e6d30807e48cadc94b57c39fd0c0dea727 Mon Sep 17 00:00:00 2001
From: jacobvjk <jacob.kastl@gmail.com>
Date: Wed, 17 Apr 2024 11:24:16 +0200
Subject: [PATCH 11/12] correct documentation

---
 R/aggregate_alignment_loanbook_exposure.R    | 14 +++++++-------
 man/aggregate_alignment_loanbook_exposure.Rd | 14 +++++++-------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/R/aggregate_alignment_loanbook_exposure.R b/R/aggregate_alignment_loanbook_exposure.R
index cf00a2e..9944092 100644
--- a/R/aggregate_alignment_loanbook_exposure.R
+++ b/R/aggregate_alignment_loanbook_exposure.R
@@ -8,13 +8,13 @@
 #' @param level Character. Vector that indicates if the aggregate alignment
 #'   metric should be returned based on the net technology deviations (`net`) or
 #'   disaggregated into buildout and phaseout technologies (`bo_po`).
-#' @param .by <tidy-select> Character. Optionally, a selection of columns to
-#'   group by. All columns indicated must be available variables in the
-#'   `matched` data set. The intended use case is to allow analyzing the loan
-#'   books by additional traits of interest, such as types of financial
-#'   institutions. Default is `NULL`, which means the aggregation is done at the
-#'   meta level. It is not possible to group by the critical columns of the
-#'   `data` and `matched` inputs.
+#' @param .by Character. Optionally, a selection of columns to group by. All
+#'   columns indicated must be available variables in the `matched` data set.
+#'   The intended use case is to allow analyzing the loan books by additional
+#'   traits of interest, such as types of financial institutions. Default is
+#'   `NULL`, which means the aggregation is done at the meta level. It is not
+#'   possible to group by the critical columns of the `data` and `matched`
+#'   inputs.
 #'
 #' @return NULL
 #' @export
diff --git a/man/aggregate_alignment_loanbook_exposure.Rd b/man/aggregate_alignment_loanbook_exposure.Rd
index 7c974fd..e101e19 100644
--- a/man/aggregate_alignment_loanbook_exposure.Rd
+++ b/man/aggregate_alignment_loanbook_exposure.Rd
@@ -23,13 +23,13 @@ aggregate_alignment_loanbook_exposure(
 metric should be returned based on the net technology deviations (\code{net}) or
 disaggregated into buildout and phaseout technologies (\code{bo_po}).}
 
-\item{.by}{\if{html}{\out{<tidy-select>}} Character. Optionally, a selection of columns to
-group by. All columns indicated must be available variables in the
-\code{matched} data set. The intended use case is to allow analyzing the loan
-books by additional traits of interest, such as types of financial
-institutions. Default is \code{NULL}, which means the aggregation is done at the
-meta level. It is not possible to group by the critical columns of the
-\code{data} and \code{matched} inputs.}
+\item{.by}{Character. Optionally, a selection of columns to group by. All
+columns indicated must be available variables in the \code{matched} data set.
+The intended use case is to allow analyzing the loan books by additional
+traits of interest, such as types of financial institutions. Default is
+\code{NULL}, which means the aggregation is done at the meta level. It is not
+possible to group by the critical columns of the \code{data} and \code{matched}
+inputs.}
 }
 \description{
 Return loan book level aggregation of company alignment metrics by exposure

From fc709059c25a963bca5c56e37568f20ba268255d Mon Sep 17 00:00:00 2001
From: jacobvjk <jacob.kastl@gmail.com>
Date: Wed, 17 Apr 2024 11:55:01 +0200
Subject: [PATCH 12/12] do not allow grouping by crucial variables

---
 R/aggregate_alignment_loanbook_exposure.R     | 24 +++++++++++++++++++
 ...st-aggregate_alignment_loanbook_exposure.R | 17 ++++++++++++-
 2 files changed, 40 insertions(+), 1 deletion(-)

diff --git a/R/aggregate_alignment_loanbook_exposure.R b/R/aggregate_alignment_loanbook_exposure.R
index 9944092..8158092 100644
--- a/R/aggregate_alignment_loanbook_exposure.R
+++ b/R/aggregate_alignment_loanbook_exposure.R
@@ -240,5 +240,29 @@ validate_input_data_aggregate_alignment_loanbook_exposure <- function(data,
     )
   )
 
+  if (!is.null(.by)) {
+    if (
+      any(
+        .by %in% c(
+          group_vars,
+          "name_abcd",
+          "activity_unit",
+          "scenario_source",
+          "alignment_metric",
+          "id_loan",
+          "loan_size_outstanding",
+          "loan_size_outstanding_currency",
+          "name_abcd",
+          "sector"
+        )
+      )
+    ) {
+      stop(
+        "It is not possible to group by the critical columns of the `data` and
+        `matched` inputs. Please check your .by argument!"
+      )
+    }
+  }
+
   invisible()
 }
diff --git a/tests/testthat/test-aggregate_alignment_loanbook_exposure.R b/tests/testthat/test-aggregate_alignment_loanbook_exposure.R
index e8a1c4a..c4b2d3b 100644
--- a/tests/testthat/test-aggregate_alignment_loanbook_exposure.R
+++ b/tests/testthat/test-aggregate_alignment_loanbook_exposure.R
@@ -131,7 +131,7 @@ test_matched_dot_by <- tibble::tribble(
 )
 # styler: on
 
-test_that("net aggregate results with grouped with bad .by returns ", {
+test_that("net aggregate results with bad .by returns error", {
   expect_error(
     {
       test_data_company_net %>%
@@ -145,6 +145,21 @@ test_that("net aggregate results with grouped with bad .by returns ", {
   )
 })
 
+test_that("net aggregate results with .by arg as crucial variable returns error", {
+  expect_error(
+    {
+      test_data_company_net %>%
+        aggregate_alignment_loanbook_exposure(
+          matched = test_matched_dot_by,
+          level = test_level_net,
+          .by = "loan_size_outstanding"
+        )
+    },
+    regexp = "It is not possible to group by the critical columns of the `data` and
+        `matched` inputs."
+  )
+})
+
 test_that("net aggregate results with .by specified returns results for each group", {
   n_groups <- length(unique(test_matched_dot_by$foo))