Skip to content

scraper-3rd-bidder-work #59

scraper-3rd-bidder-work

scraper-3rd-bidder-work #59

name: scraper-3rd-bidder-work
on:
schedule:
- cron: '0 22 * * 1-5' # 9:00 PM UTC every weekday (Monday to Friday)
jobs:
scraping:
runs-on: ubuntu-latest
steps:
- name: Checkout repo
uses: actions/checkout@v3
with:
ref: scrapdev2 # Ensure we're checking out the scrapdev2 branch
fetch-depth: 1 # Fetch only the latest commit
- name: Setup python
uses: actions/setup-python@v4
with:
python-version: '3.11'
- name: Cache pip
uses: actions/cache@v3
with:
path: ~/.cache/pip
key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }}
restore-keys: |
${{ runner.os }}-pip-
- name: Install python modules
run: |
python -m pip install --upgrade pip
pip install playwright
playwright install --with-deps firefox
- name: Run scraper
run: python scraper.py
- name: Commit result
run: |
git config user.name "github-actions[bot]"
git config user.email "github-actions[bot]@users.noreply.github.com"
git add history/*.json
timestamp=$(TZ='America/New_York' date +'%a, %F at %H:%M %Z')
git commit -m "Scraping updated: ${timestamp}" || exit 0
git push
forwarding:
runs-on: ubuntu-latest
needs: scraping
steps:
- name: Checkout repo
uses: actions/checkout@v3
with:
ref: scrapdev2 # Ensure we're checking out the scrapdev2 branch
fetch-depth: 1 # Fetch only the latest commit
- name: Setup R
uses: r-lib/actions/setup-r@v2
with:
r-version: '4.1.1'
- name: Cache R packages
uses: actions/cache@v3
with:
path: ~/R/x86_64-pc-linux-gnu-library/4.1
key: ${{ runner.os }}-R-${{ hashFiles('**/DESCRIPTION') }}
restore-keys: |
${{ runner.os }}-R-
- name: Install R libraries
run: |
sudo apt-get install libcurl4-openssl-dev
Rscript -e 'install.packages(c("dplyr", "googlesheets4", "jsonlite"))'
- name: Run wrangler
run: Rscript wrangler.R
- name: Run forwarder
env:
CRED_PATH: ${{ secrets.CRED_PATH }}
SECRET_TOKEN: ${{ secrets.SECRET_TOKEN }}
SHEETS_FORECLOSE: ${{ secrets.SHEETS_FORECLOSE }}
SHEETS_TAXDEED: ${{ secrets.SHEETS_TAXDEED }}
SHEETS_TEST: ${{ secrets.SHEETS_TEST }}
run: |
echo $SECRET_TOKEN >> $CRED_PATH
Rscript foreclose.R
Rscript taxdeed.R
rm $CRED_PATH
- name: Commit data
run: |
git config user.name "github-actions[bot]"
git config user.email "github-actions[bot]@users.noreply.github.com"
git pull
git add *.rds history/foreclose/*.csv history/taxdeed/*.csv
timestamp=$(TZ='America/New_York' date +'%a, %F at %H:%M %Z')
git commit -m "Data updated: ${timestamp}" || exit 0
git push