diff --git a/vignettes/epi_df.Rmd b/vignettes/epi_df.Rmd
index 15f76338..519a2330 100644
--- a/vignettes/epi_df.Rmd
+++ b/vignettes/epi_df.Rmd
@@ -209,12 +209,65 @@ Epidata API](https://cmu-delphi.github.io/delphi-epidata/api/flusurv.html)
 
 ```{r}
 library(epidatr)
-flu_data <- pub_flusurv(
+flu_data_api <- pub_flusurv(
   locations = "ca",
   epiweeks = epirange(201801, 202001),
-) %>%
-  select(location, epiweek, issue, rate_age_0, rate_age_1, rate_age_2, rate_age_3, rate_age_4) %>%
-  tidyr::pivot_longer(cols = starts_with("rate_age_"), names_to = "age_group", values_to = "rate")
+)
+flu_data <- flu_data_api %>%
+  select(location, epiweek, issue, rate_age_0, rate_age_1, rate_age_2, rate_age_3, rate_age_4, rate_overall) %>%
+  tidyr::pivot_longer(
+    cols = starts_with("rate_age_"), names_to = "age_group",
+    names_prefix = "rate_age_", names_transform = function(x) paste0("age_group_", x),
+    values_to = "rate"
+  ) %>%
+  inner_join(
+    # tibble(
+    #   location = "CA",
+    #   age_group = paste0("age_group_", 0:4),
+    #   # population_proportion = c(0.06187, 0.16343, 0.43359, 0.19666, 0.14445)
+    #   # ^ (estimated via single regression)
+    #   # population_proportion = c(201265, 520077, 1725382, 699145, 551243) %>% {. / sum(.)}
+    #   # ^ (from https://www.cdc.gov/nchs/nvss/bridged_race.htm vintage 2020 year 2018 data alone)
+    #   population_proportion = c(198705, 518211, 1724467, 696784, 568237) %>% {. / sum(.)}
+    #   # ^ (from https://www.census.gov/data/tables/time-series/demo/popest/2010s-counties-detail.html, only taking estimates for one year)
+    # ),
+    # by = c("location", "age_group"),
+    # prepared from https://www.cdc.gov/nchs/nvss/bridged_race.htm vintage 2020(?) years 2017 and 2018 data:
+    # tribble(
+    #   ~location, ~age_group,    ~epiweek,              ~pop,
+    #   "CA",      "age_group_0", as.Date("2017-07-01"), 203813,
+    #   "CA",      "age_group_1", as.Date("2017-07-01"), 521827,
+    #   "CA",      "age_group_2", as.Date("2017-07-01"), 1722399,
+    #   "CA",      "age_group_3", as.Date("2017-07-01"), 700090,
+    #   "CA",      "age_group_4", as.Date("2017-07-01"), 534789,
+    #   "CA",      "age_group_0", as.Date("2018-07-01"), 201265,
+    #   "CA",      "age_group_1", as.Date("2018-07-01"), 520077,
+    #   "CA",      "age_group_2", as.Date("2018-07-01"), 1725382,
+    #   "CA",      "age_group_3", as.Date("2018-07-01"), 699145,
+    #   "CA",      "age_group_4", as.Date("2018-07-01"), 551243,
+    #   ),
+    # join_by(location, closest(y$epiweek <= x$epiweek), age_group),
+    # suffix = c("", "_for_pop"),
+    # relationship = "many-to-one", unmatched = "error"
+    # prepared from https://www.census.gov/programs-surveys/popest/technical-documentation/research/evaluation-estimates/2020-evaluation-estimates/2010s-county-detail.html vintage 2020(?) years 2017 and 2018 data (identical to above)
+    tribble(
+      ~location, ~age_group,    ~epiweek,              ~pop,
+      "CA",      "age_group_0", as.Date("2017-07-01"), 203813,
+      "CA",      "age_group_1", as.Date("2017-07-01"), 521827,
+      "CA",      "age_group_2", as.Date("2017-07-01"), 1722399,
+      "CA",      "age_group_3", as.Date("2017-07-01"), 700090,
+      "CA",      "age_group_4", as.Date("2017-07-01"), 534789,
+      "CA",      "age_group_0", as.Date("2018-07-01"), 201265,
+      "CA",      "age_group_1", as.Date("2018-07-01"), 520077,
+      "CA",      "age_group_2", as.Date("2018-07-01"), 1725382,
+      "CA",      "age_group_3", as.Date("2018-07-01"), 699145,
+      "CA",      "age_group_4", as.Date("2018-07-01"), 551243,
+    ),
+    join_by(location, closest(y$epiweek <= x$epiweek), age_group),
+    suffix = c("", "_for_pop"),
+    relationship = "many-to-one", unmatched = "error"
+  ) %>%
+  select(-epiweek_for_pop)
 flu_data
 ```
 
@@ -234,9 +287,26 @@ time).
 Now we can aggregate the data by `age_group`, if we want to compute the total:
 
 ```{r}
-group_cols <- key_colnames(exclude = "age_group")
-flu_data %>%
-  sum_groups_epi_df("rate", group_cols = group_cols)
+group_cols <- key_colnames(flu_data, exclude = "age_group")
+rate_overall_recalc_edf <-
+  flu_data %>%
+  # mutate(weighted_rate = rate * population_proportion) %>%
+  group_by(geo_value, time_value) %>%
+  mutate(weighted_rate = rate * pop / sum(pop)) %>%
+  ungroup() %>%
+  sum_groups_epi_df("weighted_rate", group_cols = group_cols) %>%
+  rename(rate_overall_recalc = weighted_rate) %>%
+  mutate(rate_overall_recalc = round(rate_overall_recalc, 1)) %>%
+  # compare to published overall rates:
+  inner_join(
+    flu_data_api %>%
+      select(geo_value = location, time_value = epiweek, rate_overall),
+    by = c("geo_value", "time_value"),
+    relationship = "one-to-one", unmatched = "error"
+  )
+max(abs(rate_overall_recalc_edf$rate_overall - rate_overall_recalc_edf$rate_overall_recalc))
+rate_overall_recalc_edf %>%
+  slice_max(abs(rate_overall_recalc_edf$rate_overall - rate_overall_recalc_edf$rate_overall_recalc))
 ```
 
 ## Detecting and filling time gaps with `complete.epi_df`
@@ -515,4 +585,3 @@ Engineering. Copyright Johns Hopkins University 2020.
 API](https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html):
 These signals are taken directly from the JHU CSSE [COVID-19 GitHub
 repository](https://github.com/CSSEGISandData/COVID-19) without changes.
-