diff --git a/04_calculus.qmd b/04_calculus.qmd index 5561376..a1517ff 100644 --- a/04_calculus.qmd +++ b/04_calculus.qmd @@ -522,8 +522,8 @@ Suppose we want to determine the area $A(R)$ of a region $R$ defined by a curve #| fig-cap: The Riemann Integral as a Sum of Evaluations f3 <- function(x) -15 * (x - 5) + (x - 5)^3 + 50 -d1 <- tibble(x = seq(0, 10, 1)) %>% mutate(f = f3(x)) -d2 <- tibble(x = seq(0, 10, 0.1)) %>% mutate(f = f3(x)) +d1 <- tibble(x = seq(0, 10, 1)) |> mutate(f = f3(x)) +d2 <- tibble(x = seq(0, 10, 0.1)) |> mutate(f = f3(x)) range <- tibble::tibble(x = c(0, 10)) diff --git a/11_data-handling_counting.qmd b/11_data-handling_counting.qmd index 07c64d5..b0ca608 100644 --- a/11_data-handling_counting.qmd +++ b/11_data-handling_counting.qmd @@ -289,7 +289,7 @@ These `tidyverse` commands from the `dplyr` package are newer and not built-in, - 80 percent of your data wrangling needs might be doable with these basic `dplyr` functions: `select`, `mutate`, `group_by`, `summarize`, and `arrange`. - These verbs roughly correspond to the same commands in SQL, another important language in data science. -- The `%>%` symbol is a pipe. It takes the thing on the left side and pipes it down to the function on the right side. We could have done `count(cen10, race)` as `cen10 %>% count(race)`. That means take `cen10` and pass it on to the function `count`, which will count observations by race and return a collapsed dataset with the categories in its own variable and their respective counts in `n`. +- The `|>` symbol is a pipe. It takes the thing on the left side and pipes it down to the function on the right side. We could have done `count(cen10, race)` as `cen10 |> count(race)`. That means take `cen10` and pass it on to the function `count`, which will count observations by race and return a collapsed dataset with the categories in its own variable and their respective counts in `n`. ### Extra: A sneak peak at Ober's data diff --git a/12_matricies-manipulation.qmd b/12_matricies-manipulation.qmd index 69bfa3d..b2508e7 100644 --- a/12_matricies-manipulation.qmd +++ b/12_matricies-manipulation.qmd @@ -323,14 +323,14 @@ But a subset of your data -- individual values-- can be considered a matrix too. # Below two lines of code do the same thing cen10[1:20, ] -cen10 %>% slice(1:20) +cen10 |> slice(1:20) # Of the first 20 rows of the entire data, look at values of just race and age # Below two lines of code do the same thing cen10[1:20, c("race", "age")] -cen10 %>% - slice(1:20) %>% +cen10 |> + slice(1:20) |> select(race, age) ``` @@ -340,14 +340,14 @@ A vector is a special type of matrix with only one column or only one row # One column cen10[1:10, c("age")] -cen10 %>% - slice(1:10) %>% +cen10 |> + slice(1:10) |> select(c("age")) # One row cen10[2, ] -cen10 %>% slice(2) +cen10 |> slice(2) ``` What if we want a special subset of the data? For example, what if I only want the records of individuals in California? What if I just want the age and race of individuals in California? @@ -356,15 +356,15 @@ What if we want a special subset of the data? For example, what if I only want t # subset for CA rows ca_subset <- cen10[cen10$state == "California", ] -ca_subset_tidy <- cen10 %>% filter(state == "California") +ca_subset_tidy <- cen10 |> filter(state == "California") all_equal(ca_subset, ca_subset_tidy) # subset for CA rows and select age and race ca_subset_age_race <- cen10[cen10$state == "California", c("age", "race")] -ca_subset_age_race_tidy <- cen10 %>% - filter(state == "California") %>% +ca_subset_age_race_tidy <- cen10 |> + filter(state == "California") |> select(age, race) all_equal(ca_subset_age_race, ca_subset_age_race_tidy) @@ -375,22 +375,22 @@ Some common operators that can be used to filter or to use as a condition. Remem ```{r} # all individuals older than 30 and younger than 70 s1 <- cen10[cen10$age > 30 & cen10$age < 70, ] -s2 <- cen10 %>% filter(age > 30 & age < 70) +s2 <- cen10 |> filter(age > 30 & age < 70) all_equal(s1, s2) # all individuals in either New York or California s3 <- cen10[cen10$state == "New York" | cen10$state == "California", ] -s4 <- cen10 %>% filter(state == "New York" | state == "California") +s4 <- cen10 |> filter(state == "New York" | state == "California") all_equal(s3, s4) # all individuals in any of the following states: California, Ohio, Nevada, Michigan s5 <- cen10[cen10$state %in% c("California", "Ohio", "Nevada", "Michigan"), ] -s6 <- cen10 %>% filter(state %in% c("California", "Ohio", "Nevada", "Michigan")) +s6 <- cen10 |> filter(state %in% c("California", "Ohio", "Nevada", "Michigan")) all_equal(s5, s6) # all individuals NOT in any of the following states: California, Ohio, Nevada, Michigan s7 <- cen10[!(cen10$state %in% c("California", "Ohio", "Nevada", "Michigan")), ] -s8 <- cen10 %>% filter(!state %in% c("California", "Ohio", "Nevada", "Michigan")) +s8 <- cen10 |> filter(!state %in% c("California", "Ohio", "Nevada", "Michigan")) all_equal(s7, s8) ``` diff --git a/13_functions_obj_loops.qmd b/13_functions_obj_loops.qmd index 1c07124..6ac3fcd 100644 --- a/13_functions_obj_loops.qmd +++ b/13_functions_obj_loops.qmd @@ -157,10 +157,10 @@ Anything can be an object! Even graphs (in `ggplot`) can be assigned, re-assigne ```{r} #| warning: false -grp_race <- group_by(cen10, race) %>% +grp_race <- group_by(cen10, race) |> summarize(count = n()) -grp_race_ordered <- arrange(grp_race, count) %>% +grp_race_ordered <- arrange(grp_race, count) |> mutate(race = forcats::as_factor(race)) gg_tab <- ggplot(data = grp_race_ordered) + diff --git a/14_visualization.qmd b/14_visualization.qmd index 22e166b..ad4a4b1 100644 --- a/14_visualization.qmd +++ b/14_visualization.qmd @@ -37,7 +37,7 @@ Up till now, you should have covered: - Reading and handling data - Matrices and Vectors - What does `:` mean in R? What about `==`? `,`?, `!=` , `&`, `|`, `%in%` - - What does `%>%` do? + - What does `|>` do? Today we'll cover: @@ -115,8 +115,8 @@ count(cen10, race, sort = TRUE) `count` is a kind of shorthand for `group_by()` and `summarize`. This code would have done the same. ```{r} -cen10 %>% - group_by(race) %>% +cen10 |> + group_by(race) |> summarize(n = n()) ``` @@ -184,7 +184,7 @@ In ggplot you might do this by: ```{r} library(forcats) -grp_race_ordered <- arrange(grp_race, n) %>% +grp_race_ordered <- arrange(grp_race, n) |> mutate(race = as_factor(race)) ggplot(data = grp_race_ordered, aes(x = race, y = n)) + @@ -234,7 +234,7 @@ Check out each of these table objects in your console and familiarize yourself w How would you make the same figure with `ggplot()`? First, we want a count for each state $\times$ race combination. So group by those two factors and count how many observations are in each two-way categorization. `group_by()` can take any number of variables, separated by commas. ```{r} -grp_race_state <- cen10 %>% +grp_race_state <- cen10 |> count(race, state) ``` diff --git a/23_solution_programming.qmd b/23_solution_programming.qmd index aad08fc..4c72804 100644 --- a/23_solution_programming.qmd +++ b/23_solution_programming.qmd @@ -21,10 +21,10 @@ cen10 <- readRDS("data/input/usc2010_001percent.Rds") Group by state, noting that the mean of a set of logicals is a mean of 1s (`TRUE`) and 0s (`FALSE`). ```{r} -grp_st <- cen10 %>% - group_by(state) %>% - summarize(prop = mean(age >= 65)) %>% - arrange(prop) %>% +grp_st <- cen10 |> + group_by(state) |> + summarize(prop = mean(age >= 65)) |> + arrange(prop) |> mutate(state = as_factor(state)) ``` @@ -52,12 +52,12 @@ justices <- read_csv("data/input/justices_court-median.csv") Keep justices who are in the dataset in 2016, ```{r} -in_2017 <- justices %>% - filter(term >= 2016) %>% - distinct(justice) %>% # unique values +in_2017 <- justices |> + filter(term >= 2016) |> + distinct(justice) |> # unique values mutate(present_2016 = 1) # keep an indicator to distinguish from rest after merge -df_indicator <- justices %>% +df_indicator <- justices |> left_join(in_2017) ``` @@ -95,10 +95,10 @@ sample_acs <- read_csv("data/input/acs2015_1percent.csv") ### Checkpoint #3 {.unnumbered} ```{r} -cen10 %>% - group_by(state) %>% - summarise(avg_age = mean(age)) %>% - arrange(desc(avg_age)) %>% +cen10 |> + group_by(state) |> + summarise(avg_age = mean(age)) |> + arrange(desc(avg_age)) |> slice(1:10) ``` @@ -108,7 +108,7 @@ cen10 %>% states_of_interest <- c("California", "Massachusetts", "New Hampshire", "Washington") for (state_i in states_of_interest) { - state_subset <- cen10 %>% filter(state == state_i) + state_subset <- cen10 |> filter(state == state_i) print(state_i) @@ -180,20 +180,20 @@ merged_mid_polity <- left_join(polity, #| eval: false # don't include the -88, -77, -66 values in calculating the mean of polity -mean_polity_by_year <- merged_mid_polity %>% - group_by(year) %>% +mean_polity_by_year <- merged_mid_polity |> + group_by(year) |> summarise(mean_polity = mean(polity[which(polity < 11 & polity > -11)])) mean_polity_by_year_ordered <- arrange(mean_polity_by_year, year) -mean_polity_by_year_mid <- merged_mid_polity %>% - group_by(year, dispute) %>% +mean_polity_by_year_mid <- merged_mid_polity |> + group_by(year, dispute) |> summarise(mean_polity_mid = mean(polity[which(polity < 11 & polity > -11)])) mean_polity_by_year_mid_ordered <- arrange(mean_polity_by_year_mid, year) -mean_polity_no_mid <- mean_polity_by_year_mid_ordered %>% filter(dispute == 0) -mean_polity_yes_mid <- mean_polity_by_year_mid_ordered %>% filter(dispute == 1) +mean_polity_no_mid <- mean_polity_by_year_mid_ordered |> filter(dispute == 0) +mean_polity_yes_mid <- mean_polity_by_year_mid_ordered |> filter(dispute == 1) answer <- ggplot(data = mean_polity_by_year_ordered, aes(x = year, y = mean_polity)) + geom_line() + diff --git a/R_exercises/02_matrices-manipulation.R b/R_exercises/02_matrices-manipulation.R index ebae870..7b03a59 100644 --- a/R_exercises/02_matrices-manipulation.R +++ b/R_exercises/02_matrices-manipulation.R @@ -77,14 +77,14 @@ cross_tab[6, 2] # Below two lines of code do the same thing cen10[1:20, ] -cen10 %>% slice(1:20) +cen10 |> slice(1:20) # Of the first 20 rows of the entire data, look at values of just race and age # Below two lines of code do the same thing cen10[1:20, c("race", "age")] -cen10 %>% - slice(1:20) %>% +cen10 |> + slice(1:20) |> select(race, age) @@ -93,14 +93,14 @@ cen10 %>% # One column cen10[1:10, c("age")] -cen10 %>% - slice(1:10) %>% +cen10 |> + slice(1:10) |> select(c("age")) # One row cen10[2, ] -cen10 %>% slice(2) +cen10 |> slice(2) # What if we want a special subset of the data? For example, what if I only want the @@ -110,15 +110,15 @@ cen10 %>% slice(2) # subset for CA rows ca_subset <- cen10[cen10$state == "California", ] -ca_subset_tidy <- cen10 %>% filter(state == "California") +ca_subset_tidy <- cen10 |> filter(state == "California") all_equal(ca_subset, ca_subset_tidy) # subset for CA rows and select age and race ca_subset_age_race <- cen10[cen10$state == "California", c("age", "race")] -ca_subset_age_race_tidy <- cen10 %>% - filter(state == "California") %>% +ca_subset_age_race_tidy <- cen10 |> + filter(state == "California") |> select(age, race) all_equal(ca_subset_age_race, ca_subset_age_race_tidy) @@ -130,23 +130,23 @@ all_equal(ca_subset_age_race, ca_subset_age_race_tidy) # all individuals older than 30 and younger than 70 s1 <- cen10[cen10$age > 30 & cen10$age < 70, ] -s2 <- cen10 %>% filter(age > 30 & age < 70) +s2 <- cen10 |> filter(age > 30 & age < 70) all_equal(s1, s2) # all individuals in either New York or California s3 <- cen10[cen10$state == "New York" | cen10$state == "California", ] -s4 <- cen10 %>% filter(state == "New York" | state == "California") +s4 <- cen10 |> filter(state == "New York" | state == "California") all_equal(s3, s4) # all individuals in any of the following states: California, Ohio, Nevada, Michigan s5 <- cen10[cen10$state %in% c("California", "Ohio", "Nevada", "Michigan"), ] -s6 <- cen10 %>% filter(state %in% c("California", "Ohio", "Nevada", "Michigan")) +s6 <- cen10 |> filter(state %in% c("California", "Ohio", "Nevada", "Michigan")) all_equal(s5, s6) # all individuals NOT in any of the following states: California, Ohio, Nevada, Michigan s7 <- cen10[!(cen10$state %in% c("California", "Ohio", "Nevada", "Michigan")), ] -s8 <- cen10 %>% filter(!state %in% c("California", "Ohio", "Nevada", "Michigan")) +s8 <- cen10 |> filter(!state %in% c("California", "Ohio", "Nevada", "Michigan")) all_equal(s7, s8) diff --git a/R_exercises/03_functions_obj_loops.R b/R_exercises/03_functions_obj_loops.R index e2c7cae..d941286 100644 --- a/R_exercises/03_functions_obj_loops.R +++ b/R_exercises/03_functions_obj_loops.R @@ -144,10 +144,10 @@ class(ols) # Anything can be an object! Even graphs (in `ggplot`) can be assigned, re-assigned, and edited. -grp_race <- group_by(cen10, race) %>% +grp_race <- group_by(cen10, race) |> summarize(count = n()) -grp_race_ordered <- arrange(grp_race, count) %>% +grp_race_ordered <- arrange(grp_race, count) |> mutate(race = forcats::as_factor(race)) gg_tab <- ggplot(data = grp_race_ordered) + diff --git a/R_exercises/04_visualization.R b/R_exercises/04_visualization.R index 2c237d5..1c22b4e 100644 --- a/R_exercises/04_visualization.R +++ b/R_exercises/04_visualization.R @@ -16,7 +16,7 @@ library(scales) # * Reading and handling data # * Matrices and Vectors # * What does `:` mean in R? What about `==`? `,`?, `!=` , `&`, `|`, `%in% ` -# * What does `%>%` do? +# * What does `|>` do? # # # Today we'll cover: @@ -92,8 +92,8 @@ count(cen10, race, sort = TRUE) # `count` is a kind of shorthand for `group_by()` and `summarize`. This code would have done the same. -cen10 %>% - group_by(race) %>% +cen10 |> + group_by(race) |> summarize(n = n()) @@ -169,7 +169,7 @@ barplot(sort(table(cen10$race)), # sort numbers # In ggplot you might do this by: library(forcats) -grp_race_ordered <- arrange(grp_race, n) %>% +grp_race_ordered <- arrange(grp_race, n) |> mutate(race = as_factor(race)) ggplot(data = grp_race_ordered, aes(x = race, y = n)) + @@ -224,7 +224,7 @@ ptab_race_state # `group_by()` can take any number of variables, separated by commas. -grp_race_state <- cen10 %>% +grp_race_state <- cen10 |> count(race, state) diff --git a/_book/Math-Prefresher-for-Political-Scientists.pdf b/_book/Math-Prefresher-for-Political-Scientists.pdf index c84e7e8..fe507bc 100644 Binary files a/_book/Math-Prefresher-for-Political-Scientists.pdf and b/_book/Math-Prefresher-for-Political-Scientists.pdf differ