From c70c7a3c4be479b44c73baf83765c461b1952fa0 Mon Sep 17 00:00:00 2001 From: gvegayon Date: Fri, 21 Apr 2023 22:16:12 +0000 Subject: [PATCH] Re-build examples --- docs/week-10-slides.html | 162 +++++++++++++++++++++------------------ 1 file changed, 86 insertions(+), 76 deletions(-) diff --git a/docs/week-10-slides.html b/docs/week-10-slides.html index 8dda7b1..bb1a92b 100644 --- a/docs/week-10-slides.html +++ b/docs/week-10-slides.html @@ -1576,6 +1576,16 @@

Fundamentals of Web Scrapping

Web scraping raw HTML: Example

We want to directly capture the table of COVID-19 death rates per country from Wikipedia.

+
library(rvest)
+library(xml2)
+
+# Reading the HTML table with the function xml2::read_html
+covid <- read_html(
+  x = "https://en.wikipedia.org/w/index.php?title=COVID-19_pandemic_death_rates_by_country&oldid=1117643862"
+  )
+
+# Let's see the output
+covid
{html_document}
 <html class="client-nojs vector-feature-language-in-header-enabled vector-feature-language-in-main-page-header-disabled vector-feature-language-alert-in-sidebar-enabled vector-feature-sticky-header-disabled vector-feature-page-tools-pinned-disabled vector-feature-toc-pinned-enabled vector-feature-main-menu-pinned-disabled vector-feature-limited-width-enabled vector-feature-limited-width-content-enabled vector-feature-zebra-design-disabled" lang="en" dir="ltr">
@@ -1599,9 +1609,9 @@ 

Web scraping raw HTML: Example (cont 1.)

Web scraping with xml2 and the rvest package (cont. 2)

Now that we know what the path is, let’s use that and extract

-
table <- xml_find_all(covid, xpath = '//*[@id="covid-19-pandemic-cases-and-mortality-by-country"]/div[5]/table')
-table <- html_table(table) # This returns a list of tables
-head(table[[1]])
+
table <- xml_find_all(covid, xpath = '//*[@id="covid-19-pandemic-cases-and-mortality-by-country"]/div[5]/table')
+table <- html_table(table) # This returns a list of tables
+head(table[[1]])
# A tibble: 6 × 4
   Country                `Deaths / million` Deaths    Cases      
@@ -1658,48 +1668,48 @@ 

GA: Some examples

GitHub Actions: Workflow

The workflow file (stored under .github/workflows)

-
# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
-# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
-on:
-  push:
-    branches: [main, master]
-  schedule:
-    - cron: '0 0 * * 0' # https://crontab.guru/
-
-name: Build it
-
-jobs:
-  Build:
-    runs-on: ubuntu-latest
-    container: rocker/tidyverse:4.2.2
-    env:
-      GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
-      GITHUB_REPO: ${{ github.event.repository.name }}
-    steps:
-      - uses: actions/checkout@v3
-        with:
-          fetch-depth: 0
-    
-      # Installing quarto
-      - uses: quarto-dev/quarto-actions/setup@v2
-        with:
-          version: 0.3.71
-    
-      - name: Install packags and render
-        run: |
-          install2.r xml2 quarto
-          quarto render README.qmd
-    
-      # There's an error with EndBug, need to use the safe.directory
-      # option. More here
-      # https://git-scm.com/docs/git-config#Documentation/git-config.txt-safedirectory
-      - name: Dealing with GitConfig
-        run: |
-          git config --global --add safe.directory /__w/${GITHUB_REPO}/${GITHUB_REPO}
-          
-      - uses: EndBug/add-and-commit@v9
-        with:
-          add: README.md
+
# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
+# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
+on:
+  push:
+    branches: [main, master]
+  schedule:
+    - cron: '0 0 * * 0' # https://crontab.guru/
+
+name: Build it
+
+jobs:
+  Build:
+    runs-on: ubuntu-latest
+    container: rocker/tidyverse:4.2.2
+    env:
+      GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
+      GITHUB_REPO: ${{ github.event.repository.name }}
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+    
+      # Installing quarto
+      - uses: quarto-dev/quarto-actions/setup@v2
+        with:
+          version: 0.3.71
+    
+      - name: Install packags and render
+        run: |
+          install2.r xml2 quarto
+          quarto render README.qmd
+    
+      # There's an error with EndBug, need to use the safe.directory
+      # option. More here
+      # https://git-scm.com/docs/git-config#Documentation/git-config.txt-safedirectory
+      - name: Dealing with GitConfig
+        run: |
+          git config --global --add safe.directory /__w/${GITHUB_REPO}/${GITHUB_REPO}
+          
+      - uses: EndBug/add-and-commit@v9
+        with:
+          add: README.md

Let’s see bit by bit

@@ -1711,11 +1721,11 @@

GA: Trigger

  • When there’s a push to the main or master branches.

  • And once a week, every Monday at 0 hours.

  • -
    on:
    -  push:
    -    branches: [main, master]
    -  schedule:
    -    - cron: '0 0 * * 0' # https://crontab.guru/
    +
    on:
    +  push:
    +    branches: [main, master]
    +  schedule:
    +    - cron: '0 0 * * 0' # https://crontab.guru/
    @@ -1724,12 +1734,12 @@

    GA: Configuration of the Jobs

  • It runs on the lastest version of Ubuntu

  • But within a container (rocker/tidyverse:4.2.2)

  • -
    Build:
    -    runs-on: ubuntu-latest
    -    container: rocker/tidyverse:4.2.2
    -    env:
    -      GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
    -      GITHUB_REPO: ${{ github.event.repository.name }}
    +
    Build:
    +    runs-on: ubuntu-latest
    +    container: rocker/tidyverse:4.2.2
    +    env:
    +      GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
    +      GITHUB_REPO: ${{ github.event.repository.name }}
    • It sets two environment variables (accessible with the dollar sign): GITHUB_PAT and GITHUB_REPO.
    @@ -1766,26 +1776,26 @@

    GA: Steps

    -
    - uses: actions/checkout@v3
    -  with:
    -    fetch-depth: 0
    -
    -- uses: quarto-dev/quarto-actions/setup@v2
    -  with:
    -    version: 0.3.71
    -
    -- name: Install packags and render
    -  run: |
    -    install2.r xml2 quarto
    -    quarto render README.qmd
    -
    -- name: Dealing with GitConfig
    -  run: |
    -    git config --global --add safe.directory /__w/${GITHUB_REPO}/${GITHUB_REPO}
    -    
    -- uses: EndBug/add-and-commit@v9
    -  with:
    -    add: README.md
    +
    - uses: actions/checkout@v3
    +  with:
    +    fetch-depth: 0
    +
    +- uses: quarto-dev/quarto-actions/setup@v2
    +  with:
    +    version: 0.3.71
    +
    +- name: Install packags and render
    +  run: |
    +    install2.r xml2 quarto
    +    quarto render README.qmd
    +
    +- name: Dealing with GitConfig
    +  run: |
    +    git config --global --add safe.directory /__w/${GITHUB_REPO}/${GITHUB_REPO}
    +    
    +- uses: EndBug/add-and-commit@v9
    +  with:
    +    add: README.md