scraper-3rd-bidder-work #54
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: scraper-3rd-bidder-work | |
on: | |
schedule: | |
- cron: '0 22 * * 1-5' # 9:00 PM UTC every weekday (Monday to Friday) | |
jobs: | |
scraping: | |
runs-on: ubuntu-latest | |
steps: | |
- name: Checkout repo | |
uses: actions/checkout@v3 | |
with: | |
ref: scrapdev2 # Ensure we're checking out the scrapdev2 branch | |
fetch-depth: 1 # Fetch only the latest commit | |
- name: Setup python | |
uses: actions/setup-python@v4 | |
with: | |
python-version: '3.11' | |
- name: Cache pip | |
uses: actions/cache@v3 | |
with: | |
path: ~/.cache/pip | |
key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }} | |
restore-keys: | | |
${{ runner.os }}-pip- | |
- name: Install python modules | |
run: | | |
python -m pip install --upgrade pip | |
pip install playwright | |
playwright install --with-deps firefox | |
- name: Run scraper | |
run: python scraper.py | |
- name: Commit result | |
run: | | |
git config user.name "github-actions[bot]" | |
git config user.email "github-actions[bot]@users.noreply.github.com" | |
git add history/*.json | |
timestamp=$(TZ='America/New_York' date +'%a, %F at %H:%M %Z') | |
git commit -m "Scraping updated: ${timestamp}" || exit 0 | |
git push | |
forwarding: | |
runs-on: ubuntu-latest | |
needs: scraping | |
steps: | |
- name: Checkout repo | |
uses: actions/checkout@v3 | |
with: | |
ref: scrapdev2 # Ensure we're checking out the scrapdev2 branch | |
fetch-depth: 1 # Fetch only the latest commit | |
- name: Setup R | |
uses: r-lib/actions/setup-r@v2 | |
with: | |
r-version: '4.1.1' | |
- name: Cache R packages | |
uses: actions/cache@v3 | |
with: | |
path: ~/R/x86_64-pc-linux-gnu-library/4.1 | |
key: ${{ runner.os }}-R-${{ hashFiles('**/DESCRIPTION') }} | |
restore-keys: | | |
${{ runner.os }}-R- | |
- name: Install R libraries | |
run: | | |
sudo apt-get install libcurl4-openssl-dev | |
Rscript -e 'install.packages(c("dplyr", "googlesheets4", "jsonlite"))' | |
- name: Run wrangler | |
run: Rscript wrangler.R | |
- name: Run forwarder | |
env: | |
CRED_PATH: ${{ secrets.CRED_PATH }} | |
SECRET_TOKEN: ${{ secrets.SECRET_TOKEN }} | |
SHEETS_FORECLOSE: ${{ secrets.SHEETS_FORECLOSE }} | |
SHEETS_TAXDEED: ${{ secrets.SHEETS_TAXDEED }} | |
SHEETS_TEST: ${{ secrets.SHEETS_TEST }} | |
run: | | |
echo $SECRET_TOKEN >> $CRED_PATH | |
Rscript foreclose.R | |
Rscript taxdeed.R | |
rm $CRED_PATH | |
- name: Commit data | |
run: | | |
git config user.name "github-actions[bot]" | |
git config user.email "github-actions[bot]@users.noreply.github.com" | |
git pull | |
git add *.rds history/foreclose/*.csv history/taxdeed/*.csv | |
timestamp=$(TZ='America/New_York' date +'%a, %F at %H:%M %Z') | |
git commit -m "Data updated: ${timestamp}" || exit 0 | |
git push |