base pipe |>

IQSS · Jul 5, 2024 · fd6c57f · fd6c57f
1 parent 76f5bba
commit fd6c57f
Show file tree

Hide file tree

Showing 10 changed files with 62 additions and 62 deletions.
diff --git a/04_calculus.qmd b/04_calculus.qmd
@@ -522,8 +522,8 @@ Suppose we want to determine the area $A(R)$ of a region $R$ defined by a curve
 #| fig-cap: The Riemann Integral as a Sum of Evaluations
 f3 <- function(x) -15 * (x - 5) + (x - 5)^3 + 50
 
-d1 <- tibble(x = seq(0, 10, 1)) %>% mutate(f = f3(x))
-d2 <- tibble(x = seq(0, 10, 0.1)) %>% mutate(f = f3(x))
+d1 <- tibble(x = seq(0, 10, 1)) |> mutate(f = f3(x))
+d2 <- tibble(x = seq(0, 10, 0.1)) |> mutate(f = f3(x))
 
 range <- tibble::tibble(x = c(0, 10))
 

diff --git a/11_data-handling_counting.qmd b/11_data-handling_counting.qmd
@@ -289,7 +289,7 @@ These `tidyverse` commands from the `dplyr` package are newer and not built-in,
 
 -   80 percent of your data wrangling needs might be doable with these basic `dplyr` functions: `select`, `mutate`, `group_by`, `summarize`, and `arrange`.
 -   These verbs roughly correspond to the same commands in SQL, another important language in data science.
--   The `%>%` symbol is a pipe. It takes the thing on the left side and pipes it down to the function on the right side. We could have done `count(cen10, race)` as `cen10 %>% count(race)`. That means take `cen10` and pass it on to the function `count`, which will count observations by race and return a collapsed dataset with the categories in its own variable and their respective counts in `n`.
+-   The `|>` symbol is a pipe. It takes the thing on the left side and pipes it down to the function on the right side. We could have done `count(cen10, race)` as `cen10 |> count(race)`. That means take `cen10` and pass it on to the function `count`, which will count observations by race and return a collapsed dataset with the categories in its own variable and their respective counts in `n`.
 
 ### Extra: A sneak peak at Ober's data
 

diff --git a/12_matricies-manipulation.qmd b/12_matricies-manipulation.qmd
@@ -323,14 +323,14 @@ But a subset of your data -- individual values-- can be considered a matrix too.
 # Below two lines of code do the same thing
 cen10[1:20, ]
 
-cen10 %>% slice(1:20)
+cen10 |> slice(1:20)
 
 # Of the first 20 rows of the entire data, look at values of just race and age
 # Below two lines of code do the same thing
 cen10[1:20, c("race", "age")]
 
-cen10 %>%
-  slice(1:20) %>%
+cen10 |>
+  slice(1:20) |>
   select(race, age)
 ```
 
@@ -340,14 +340,14 @@ A vector is a special type of matrix with only one column or only one row
 # One column
 cen10[1:10, c("age")]
 
-cen10 %>%
-  slice(1:10) %>%
+cen10 |>
+  slice(1:10) |>
   select(c("age"))
 
 # One row
 cen10[2, ]
 
-cen10 %>% slice(2)
+cen10 |> slice(2)
 ```
 
 What if we want a special subset of the data? For example, what if I only want the records of individuals in California? What if I just want the age and race of individuals in California?
@@ -356,15 +356,15 @@ What if we want a special subset of the data? For example, what if I only want t
 # subset for CA rows
 ca_subset <- cen10[cen10$state == "California", ]
 
-ca_subset_tidy <- cen10 %>% filter(state == "California")
+ca_subset_tidy <- cen10 |> filter(state == "California")
 
 all_equal(ca_subset, ca_subset_tidy)
 
 # subset for CA rows and select age and race
 ca_subset_age_race <- cen10[cen10$state == "California", c("age", "race")]
 
-ca_subset_age_race_tidy <- cen10 %>%
-  filter(state == "California") %>%
+ca_subset_age_race_tidy <- cen10 |>
+  filter(state == "California") |>
   select(age, race)
 
 all_equal(ca_subset_age_race, ca_subset_age_race_tidy)
@@ -375,22 +375,22 @@ Some common operators that can be used to filter or to use as a condition. Remem
 ```{r}
 # all individuals older than 30 and younger than 70
 s1 <- cen10[cen10$age > 30 & cen10$age < 70, ]
-s2 <- cen10 %>% filter(age > 30 & age < 70)
+s2 <- cen10 |> filter(age > 30 & age < 70)
 all_equal(s1, s2)
 
 # all individuals in either New York or California
 s3 <- cen10[cen10$state == "New York" | cen10$state == "California", ]
-s4 <- cen10 %>% filter(state == "New York" | state == "California")
+s4 <- cen10 |> filter(state == "New York" | state == "California")
 all_equal(s3, s4)
 
 # all individuals in any of the following states: California, Ohio, Nevada, Michigan
 s5 <- cen10[cen10$state %in% c("California", "Ohio", "Nevada", "Michigan"), ]
-s6 <- cen10 %>% filter(state %in% c("California", "Ohio", "Nevada", "Michigan"))
+s6 <- cen10 |> filter(state %in% c("California", "Ohio", "Nevada", "Michigan"))
 all_equal(s5, s6)
 
 # all individuals NOT in any of the following states: California, Ohio, Nevada, Michigan
 s7 <- cen10[!(cen10$state %in% c("California", "Ohio", "Nevada", "Michigan")), ]
-s8 <- cen10 %>% filter(!state %in% c("California", "Ohio", "Nevada", "Michigan"))
+s8 <- cen10 |> filter(!state %in% c("California", "Ohio", "Nevada", "Michigan"))
 all_equal(s7, s8)
 ```
 

diff --git a/13_functions_obj_loops.qmd b/13_functions_obj_loops.qmd
@@ -157,10 +157,10 @@ Anything can be an object! Even graphs (in `ggplot`) can be assigned, re-assigne
 ```{r}
 #| warning: false
 
-grp_race <- group_by(cen10, race) %>%
+grp_race <- group_by(cen10, race) |>
   summarize(count = n())
 
-grp_race_ordered <- arrange(grp_race, count) %>%
+grp_race_ordered <- arrange(grp_race, count) |>
   mutate(race = forcats::as_factor(race))
 
 gg_tab <- ggplot(data = grp_race_ordered) +

diff --git a/14_visualization.qmd b/14_visualization.qmd
@@ -37,7 +37,7 @@ Up till now, you should have covered:
 -   Reading and handling data
 -   Matrices and Vectors
     -   What does `:` mean in R? What about `==`? `,`?, `!=` , `&`, `|`, `%in%`
-    -   What does `%>%` do?
+    -   What does `|>` do?
 
 Today we'll cover:
 
@@ -115,8 +115,8 @@ count(cen10, race, sort = TRUE)
 `count` is a kind of shorthand for `group_by()` and `summarize`. This code would have done the same.
 
 ```{r}
-cen10 %>%
-  group_by(race) %>%
+cen10 |>
+  group_by(race) |>
   summarize(n = n())
 ```
 
@@ -184,7 +184,7 @@ In ggplot you might do this by:
 ```{r}
 library(forcats)
 
-grp_race_ordered <- arrange(grp_race, n) %>%
+grp_race_ordered <- arrange(grp_race, n) |>
   mutate(race = as_factor(race))
 
 ggplot(data = grp_race_ordered, aes(x = race, y = n)) +
@@ -234,7 +234,7 @@ Check out each of these table objects in your console and familiarize yourself w
 How would you make the same figure with `ggplot()`? First, we want a count for each state $\times$ race combination. So group by those two factors and count how many observations are in each two-way categorization. `group_by()` can take any number of variables, separated by commas.
 
 ```{r}
-grp_race_state <- cen10 %>%
+grp_race_state <- cen10 |>
   count(race, state)
 ```
 

diff --git a/23_solution_programming.qmd b/23_solution_programming.qmd
@@ -21,10 +21,10 @@ cen10 <- readRDS("data/input/usc2010_001percent.Rds")
 Group by state, noting that the mean of a set of logicals is a mean of 1s (`TRUE`) and 0s (`FALSE`).
 
 ```{r}
-grp_st <- cen10 %>%
-  group_by(state) %>%
-  summarize(prop = mean(age >= 65)) %>%
-  arrange(prop) %>%
+grp_st <- cen10 |>
+  group_by(state) |>
+  summarize(prop = mean(age >= 65)) |>
+  arrange(prop) |>
   mutate(state = as_factor(state))
 ```
 
@@ -52,12 +52,12 @@ justices <- read_csv("data/input/justices_court-median.csv")
 Keep justices who are in the dataset in 2016,
 
 ```{r}
-in_2017 <- justices %>%
-  filter(term >= 2016) %>%
-  distinct(justice) %>% # unique values
+in_2017 <- justices |>
+  filter(term >= 2016) |>
+  distinct(justice) |> # unique values
   mutate(present_2016 = 1) # keep an indicator to distinguish from rest after merge
 
-df_indicator <- justices %>%
+df_indicator <- justices |>
   left_join(in_2017)
 ```
 
@@ -95,10 +95,10 @@ sample_acs <- read_csv("data/input/acs2015_1percent.csv")
 ### Checkpoint #3 {.unnumbered}
 
 ```{r}
-cen10 %>%
-  group_by(state) %>%
-  summarise(avg_age = mean(age)) %>%
-  arrange(desc(avg_age)) %>%
+cen10 |>
+  group_by(state) |>
+  summarise(avg_age = mean(age)) |>
+  arrange(desc(avg_age)) |>
   slice(1:10)
 ```
 
@@ -108,7 +108,7 @@ cen10 %>%
 states_of_interest <- c("California", "Massachusetts", "New Hampshire", "Washington")
 
 for (state_i in states_of_interest) {
-  state_subset <- cen10 %>% filter(state == state_i)
+  state_subset <- cen10 |> filter(state == state_i)
 
   print(state_i)
 
@@ -180,20 +180,20 @@ merged_mid_polity <- left_join(polity,
 #| eval: false
 
 # don't include the -88, -77, -66 values in calculating the mean of polity
-mean_polity_by_year <- merged_mid_polity %>%
-  group_by(year) %>%
+mean_polity_by_year <- merged_mid_polity |>
+  group_by(year) |>
   summarise(mean_polity = mean(polity[which(polity < 11 & polity > -11)]))
 
 mean_polity_by_year_ordered <- arrange(mean_polity_by_year, year)
 
-mean_polity_by_year_mid <- merged_mid_polity %>%
-  group_by(year, dispute) %>%
+mean_polity_by_year_mid <- merged_mid_polity |>
+  group_by(year, dispute) |>
   summarise(mean_polity_mid = mean(polity[which(polity < 11 & polity > -11)]))
 
 mean_polity_by_year_mid_ordered <- arrange(mean_polity_by_year_mid, year)
 
-mean_polity_no_mid <- mean_polity_by_year_mid_ordered %>% filter(dispute == 0)
-mean_polity_yes_mid <- mean_polity_by_year_mid_ordered %>% filter(dispute == 1)
+mean_polity_no_mid <- mean_polity_by_year_mid_ordered |> filter(dispute == 0)
+mean_polity_yes_mid <- mean_polity_by_year_mid_ordered |> filter(dispute == 1)
 
 answer <- ggplot(data = mean_polity_by_year_ordered, aes(x = year, y = mean_polity)) +
   geom_line() +

diff --git a/R_exercises/02_matrices-manipulation.R b/R_exercises/02_matrices-manipulation.R
@@ -77,14 +77,14 @@ cross_tab[6, 2]
 # Below two lines of code do the same thing
 cen10[1:20, ]
 
-cen10 %>% slice(1:20)
+cen10 |> slice(1:20)
 
 # Of the first 20 rows of the entire data, look at values of just race and age
 # Below two lines of code do the same thing
 cen10[1:20, c("race", "age")]
 
-cen10 %>%
-  slice(1:20) %>%
+cen10 |>
+  slice(1:20) |>
   select(race, age)
 
 
@@ -93,14 +93,14 @@ cen10 %>%
 # One column
 cen10[1:10, c("age")]
 
-cen10 %>%
-  slice(1:10) %>%
+cen10 |>
+  slice(1:10) |>
   select(c("age"))
 
 # One row
 cen10[2, ]
 
-cen10 %>% slice(2)
+cen10 |> slice(2)
 
 
 # What if we want a special subset of the data? For example, what if I only want the
@@ -110,15 +110,15 @@ cen10 %>% slice(2)
 # subset for CA rows
 ca_subset <- cen10[cen10$state == "California", ]
 
-ca_subset_tidy <- cen10 %>% filter(state == "California")
+ca_subset_tidy <- cen10 |> filter(state == "California")
 
 all_equal(ca_subset, ca_subset_tidy)
 
 # subset for CA rows and select age and race
 ca_subset_age_race <- cen10[cen10$state == "California", c("age", "race")]
 
-ca_subset_age_race_tidy <- cen10 %>%
-  filter(state == "California") %>%
+ca_subset_age_race_tidy <- cen10 |>
+  filter(state == "California") |>
   select(age, race)
 
 all_equal(ca_subset_age_race, ca_subset_age_race_tidy)
@@ -130,23 +130,23 @@ all_equal(ca_subset_age_race, ca_subset_age_race_tidy)
 
 # all individuals older than 30 and younger than 70
 s1 <- cen10[cen10$age > 30 & cen10$age < 70, ]
-s2 <- cen10 %>% filter(age > 30 & age < 70)
+s2 <- cen10 |> filter(age > 30 & age < 70)
 all_equal(s1, s2)
 
 # all individuals in either New York or California
 s3 <- cen10[cen10$state == "New York" | cen10$state == "California", ]
-s4 <- cen10 %>% filter(state == "New York" | state == "California")
+s4 <- cen10 |> filter(state == "New York" | state == "California")
 all_equal(s3, s4)
 
 
 # all individuals in any of the following states: California, Ohio, Nevada, Michigan
 s5 <- cen10[cen10$state %in% c("California", "Ohio", "Nevada", "Michigan"), ]
-s6 <- cen10 %>% filter(state %in% c("California", "Ohio", "Nevada", "Michigan"))
+s6 <- cen10 |> filter(state %in% c("California", "Ohio", "Nevada", "Michigan"))
 all_equal(s5, s6)
 
 # all individuals NOT in any of the following states: California, Ohio, Nevada, Michigan
 s7 <- cen10[!(cen10$state %in% c("California", "Ohio", "Nevada", "Michigan")), ]
-s8 <- cen10 %>% filter(!state %in% c("California", "Ohio", "Nevada", "Michigan"))
+s8 <- cen10 |> filter(!state %in% c("California", "Ohio", "Nevada", "Michigan"))
 all_equal(s7, s8)
 
 

diff --git a/R_exercises/03_functions_obj_loops.R b/R_exercises/03_functions_obj_loops.R
@@ -144,10 +144,10 @@ class(ols)
 
 # Anything can be an object! Even graphs (in `ggplot`) can be assigned, re-assigned, and edited.
 
-grp_race <- group_by(cen10, race) %>%
+grp_race <- group_by(cen10, race) |>
   summarize(count = n())
 
-grp_race_ordered <- arrange(grp_race, count) %>%
+grp_race_ordered <- arrange(grp_race, count) |>
   mutate(race = forcats::as_factor(race))
 
 gg_tab <- ggplot(data = grp_race_ordered) +

diff --git a/R_exercises/04_visualization.R b/R_exercises/04_visualization.R
@@ -16,7 +16,7 @@ library(scales)
 # * Reading and handling data
 # * Matrices and Vectors
 # * What does `:` mean in R? What about `==`? `,`?, `!=` , `&`, `|`, `%in% `
-# * What does `%>%` do?
+# * What does `|>` do?
 #
 #
 # Today we'll cover:
@@ -92,8 +92,8 @@ count(cen10, race, sort = TRUE)
 
 # `count` is a kind of shorthand for `group_by()` and `summarize`. This code would have done the same.
 
-cen10 %>%
-  group_by(race) %>%
+cen10 |>
+  group_by(race) |>
   summarize(n = n())
 
 
@@ -169,7 +169,7 @@ barplot(sort(table(cen10$race)), # sort numbers
 # In ggplot you might do this by:
 library(forcats)
 
-grp_race_ordered <- arrange(grp_race, n) %>%
+grp_race_ordered <- arrange(grp_race, n) |>
   mutate(race = as_factor(race))
 
 ggplot(data = grp_race_ordered, aes(x = race, y = n)) +
@@ -224,7 +224,7 @@ ptab_race_state
 # `group_by()` can take any number of variables, separated by commas.
 
 
-grp_race_state <- cen10 %>%
+grp_race_state <- cen10 |>
   count(race, state)
 
 

diff --git a/_book/Math-Prefresher-for-Political-Scientists.pdf b/_book/Math-Prefresher-for-Political-Scientists.pdf