Skip to content

Scrape latest data #755

Scrape latest data

Scrape latest data #755

Workflow file for this run

name: Scrape latest data
on:
push:
workflow_dispatch:
schedule:
- cron: '21 11 * * *'
jobs:
scheduled:
runs-on: ubuntu-latest
steps:
- name: Check out this repo
uses: actions/checkout@v3
with:
fetch-depth: 0
- name: Set up Python
uses: actions/setup-python@v4.3.0
with:
python-version: 3.8
- name: Install Python dependencies
# Install csv-diff with https://github.com/simonw/csv-diff/pull/19
run: python3 -m pip install git+https://github.com/mikecoop83/csv-diff@c3d32f758343a2ba3737d612e6e906fd9d77322b
- name: Fetch latest data
run: |-
cp doaj.csv doaj-old.csv
curl -o doaj.csv -L https://doaj.org/csv
# Generate commit message using csv-diff
csv-diff doaj-old.csv doaj.csv --key "URL in DOAJ" --encoding "utf-8" --singular=journal --plural=journals | head -n 1 > message.txt
- name: Commit and push if it changed
run: |-
git config user.name "Automated"
git config user.email "actions@users.noreply.github.com"
git add doaj.csv
timestamp=$(date -u)
git commit -F message.txt || exit 0
git push