library(rvest)
+library(xml2)
+
+# Reading the HTML table with the function xml2::read_html
+covid <- read_html(
+ x = "https://en.wikipedia.org/w/index.php?title=COVID-19_pandemic_death_rates_by_country&oldid=1117643862"
+ )
+
+# Let's see the output
+covid
{html_document}
<html class="client-nojs vector-feature-language-in-header-enabled vector-feature-language-in-main-page-header-disabled vector-feature-language-alert-in-sidebar-enabled vector-feature-sticky-header-disabled vector-feature-page-tools-pinned-disabled vector-feature-toc-pinned-enabled vector-feature-main-menu-pinned-disabled vector-feature-limited-width-enabled vector-feature-limited-width-content-enabled vector-feature-zebra-design-disabled" lang="en" dir="ltr">
@@ -1599,9 +1609,9 @@ Web scraping raw HTML: Example (cont 1.)
Web scraping with xml2
and the rvest
package (cont. 2)
Now that we know what the path is, let’s use that and extract
-table <- xml_find_all(covid, xpath = '//*[@id="covid-19-pandemic-cases-and-mortality-by-country"]/div[5]/table')
-table <- html_table(table) # This returns a list of tables
-head(table[[1]])
+table <- xml_find_all(covid, xpath = '//*[@id="covid-19-pandemic-cases-and-mortality-by-country"]/div[5]/table')
+table <- html_table(table) # This returns a list of tables
+head(table[[1]])
# A tibble: 6 × 4
Country `Deaths / million` Deaths Cases
@@ -1658,48 +1668,48 @@ GA: Some examples
GitHub Actions: Workflow
The workflow file (stored under .github/workflows
)
-# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
-# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
-on:
- push:
- branches: [main, master]
- schedule:
- - cron: '0 0 * * 0' # https://crontab.guru/
-
-name: Build it
-
-jobs:
- Build:
- runs-on: ubuntu-latest
- container: rocker/tidyverse:4.2.2
- env:
- GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
- GITHUB_REPO: ${{ github.event.repository.name }}
- steps:
- - uses: actions/checkout@v3
- with:
- fetch-depth: 0
-
- # Installing quarto
- - uses: quarto-dev/quarto-actions/setup@v2
- with:
- version: 0.3.71
-
- - name: Install packags and render
- run: |
- install2.r xml2 quarto
- quarto render README.qmd
-
- # There's an error with EndBug, need to use the safe.directory
- # option. More here
- # https://git-scm.com/docs/git-config#Documentation/git-config.txt-safedirectory
- - name: Dealing with GitConfig
- run: |
- git config --global --add safe.directory /__w/${GITHUB_REPO}/${GITHUB_REPO}
-
- - uses: EndBug/add-and-commit@v9
- with:
- add: README.md
+# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
+# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
+on:
+ push:
+ branches: [main, master]
+ schedule:
+ - cron: '0 0 * * 0' # https://crontab.guru/
+
+name: Build it
+
+jobs:
+ Build:
+ runs-on: ubuntu-latest
+ container: rocker/tidyverse:4.2.2
+ env:
+ GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
+ GITHUB_REPO: ${{ github.event.repository.name }}
+ steps:
+ - uses: actions/checkout@v3
+ with:
+ fetch-depth: 0
+
+ # Installing quarto
+ - uses: quarto-dev/quarto-actions/setup@v2
+ with:
+ version: 0.3.71
+
+ - name: Install packags and render
+ run: |
+ install2.r xml2 quarto
+ quarto render README.qmd
+
+ # There's an error with EndBug, need to use the safe.directory
+ # option. More here
+ # https://git-scm.com/docs/git-config#Documentation/git-config.txt-safedirectory
+ - name: Dealing with GitConfig
+ run: |
+ git config --global --add safe.directory /__w/${GITHUB_REPO}/${GITHUB_REPO}
+
+ - uses: EndBug/add-and-commit@v9
+ with:
+ add: README.md
Let’s see bit by bit
@@ -1711,11 +1721,11 @@ GA: Trigger
When there’s a push to the main or master branches.
And once a week, every Monday at 0 hours.
-
+
-- uses: actions/checkout@v3
- with:
- fetch-depth: 0
-
-- uses: quarto-dev/quarto-actions/setup@v2
- with:
- version: 0.3.71
-
-- name: Install packags and render
- run: |
- install2.r xml2 quarto
- quarto render README.qmd
-
-- name: Dealing with GitConfig
- run: |
- git config --global --add safe.directory /__w/${GITHUB_REPO}/${GITHUB_REPO}
-
-- uses: EndBug/add-and-commit@v9
- with:
- add: README.md
+- uses: actions/checkout@v3
+ with:
+ fetch-depth: 0
+
+- uses: quarto-dev/quarto-actions/setup@v2
+ with:
+ version: 0.3.71
+
+- name: Install packags and render
+ run: |
+ install2.r xml2 quarto
+ quarto render README.qmd
+
+- name: Dealing with GitConfig
+ run: |
+ git config --global --add safe.directory /__w/${GITHUB_REPO}/${GITHUB_REPO}
+
+- uses: EndBug/add-and-commit@v9
+ with:
+ add: README.md