Skip to content

scraperv3

scraperv3 #100

Workflow file for this run

name: scraperv3
on:
schedule:
- cron: '0 4 * * 2' # in the midnight Monday to Tuesday UTC-4 (Tuesday at 04.00 AM UTC)
- cron: '0 4 * * 4' # Thursday
- cron: '0 4 * * 6' # Saturday
jobs:
scraping:
runs-on: ubuntu-latest
steps:
- name: Checkout repo
uses: actions/checkout@v3
- name: Setup python
uses: actions/setup-python@v4
with:
python-version: '3.8'
- name: Install python module
run: |
python -m pip install --upgrade pip
pip install playwright
playwright install --with-deps firefox
- name: Run scraper
run: python scraper.py
- name: Commit result
run: |
git config user.name "github-actions[bot]"
git config user.email "github-actions[bot]@users.noreply.github.com"
git add history/*.json
timestamp=$(TZ='America/New_York' date +'%a, %F at %H:%M %Z')
git commit -m "Scraping updated: ${timestamp}" || exit 0
git push
forwarding:
runs-on: ubuntu-latest
needs: scraping
steps:
- name: Checkout repo
uses: actions/checkout@v3
- name: Setup R
uses: r-lib/actions/setup-r@v2
with:
r-version: '4.1.1'
- name: Install R library
run: |
sudo apt-get install libcurl4-openssl-dev
Rscript -e 'install.packages(c("dplyr", "googlesheets4", "jsonlite"))'
- name: Run wrangler
run: Rscript wrangler.R
- name: Run forwarder
env:
CRED_PATH: ${{ secrets.CRED_PATH }}
SECRET_TOKEN: ${{ secrets.SECRET_TOKEN }}
SHEETS_FORECLOSE: ${{ secrets.SHEETS_FORECLOSE }}
SHEETS_TAXDEED: ${{ secrets.SHEETS_TAXDEED }}
SHEETS_TEST: ${{ secrets.SHEETS_TEST }}
run: |
echo $SECRET_TOKEN >> $CRED_PATH
Rscript foreclose.R
Rscript taxdeed.R
rm $CRED_PATH
- name: Commit data
run: |
git config user.name "github-actions[bot]"
git config user.email "github-actions[bot]@users.noreply.github.com"
git pull
git add *.rds history/foreclose/*.csv history/taxdeed/*.csv
timestamp=$(TZ='America/New_York' date +'%a, %F at %H:%M %Z')
git commit -m "Data updated: ${timestamp}" || exit 0
git push