Skip to content

Replace dataset with correction #1029

Replace dataset with correction

Replace dataset with correction #1029

Workflow file for this run

# Runs process_dataset.py on all new and changed files.
#
# Submissions from forks only trigger the "validation" step to validate the new
# data. After merging into a new branch in the parent repo, the workflow will
# run again and trigger the "update" step to add reaction IDs, etc. in
# preparation for merging into the "main" branch.
name: Submission
on: [pull_request, push]
env:
ORD_SCHEMA_TAG: v0.3.93
jobs:
count_reactions:
if: >-
github.event_name == 'pull_request' &&
! github.event.pull_request.head.repo.fork
runs-on: ubuntu-latest
steps:
- name: Checkout ord-data
uses: actions/checkout@v2
with:
ref: ${{ github.event.pull_request.head.sha }}
lfs: true
if: github.event_name == 'pull_request'
- name: Checkout ord-schema
uses: actions/checkout@v2
with:
repository: Open-Reaction-Database/ord-schema
ref: ${{ env.ORD_SCHEMA_TAG }}
path: ord-schema
- uses: actions/setup-python@v4
with:
python-version: '3.11'
- name: Install ord_schema
run: |
cd "${GITHUB_WORKSPACE}/ord-schema"
python -m pip install --upgrade pip
python -m pip install wheel
python -m pip install .
- name: Update reactions badge
run: |
cd "${GITHUB_WORKSPACE}"
python ord-schema/badges/reactions.py --root=data --output=badges/reactions.svg
git add badges/reactions.svg
git config user.name github-actions
git config user.email github-actions@github.com
# Fail gracefully if there is nothing to commit.
git commit -a -m "Update badges" || (( $? == 1 ))
git push "https://${GITHUB_ACTOR}:${GITHUB_TOKEN}@github.com/${GITHUB_REPOSITORY}.git" "HEAD:${GITHUB_HEAD_REF}"
check_file_types:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Add upstream for comparisons to HEAD
run: |
cd "${GITHUB_WORKSPACE}"
git remote add upstream \
"https://github.com/Open-Reaction-Database/${GITHUB_REPOSITORY##*/}.git"
echo "Current branch: $(git rev-parse --abbrev-ref HEAD)"
git fetch --no-tags --prune --depth=1 upstream \
+refs/heads/*:refs/remotes/upstream/*
- name: Check changed files (PR from fork)
run: |
cd "${GITHUB_WORKSPACE}"
git diff --name-only upstream/main > changed_files.txt
grep -vE '\.pb(txt)?(.gz)?$' changed_files.txt && \
echo "Error: submissions should only contain (gzipped) *.pbtxt or *.pb files" && \
exit 1 || true
if: >-
github.event_name == 'pull_request' &&
github.event.pull_request.head.repo.fork
check_target_branch:
runs-on: ubuntu-latest
steps:
- name: Check target branch
run: |
echo "Target branch: ${GITHUB_BASE_REF}"
test "${GITHUB_BASE_REF}" != "main"
if: >-
github.event_name == 'pull_request' &&
github.event.pull_request.head.repo.fork
process_submission:
runs-on: ubuntu-latest
steps:
- name: Checkout ord-data
uses: actions/checkout@v2
with:
lfs: true
if: github.event_name == 'push'
- name: Checkout ord-data
uses: actions/checkout@v2
with:
ref: ${{ github.event.pull_request.head.sha }}
lfs: true
if: github.event_name == 'pull_request'
- name: Add upstream for comparisons to HEAD
run: |
cd "${GITHUB_WORKSPACE}"
git remote add upstream \
"https://github.com/Open-Reaction-Database/${GITHUB_REPOSITORY##*/}.git"
echo "Current branch: $(git rev-parse --abbrev-ref HEAD)"
git fetch --no-tags --prune --depth=1 upstream \
+refs/heads/*:refs/remotes/upstream/*
- name: Checkout ord-schema
uses: actions/checkout@v2
with:
repository: Open-Reaction-Database/ord-schema
ref: ${{ env.ORD_SCHEMA_TAG }}
path: ord-schema
- name: Identify changed files
# NOTE(kearnes): This sets the NUM_CHANGED_FILES variable.
run: |
cd "${GITHUB_WORKSPACE}"
git diff --name-status upstream/main > changed_files.txt
echo "Found $(wc -l < changed_files.txt | tr -d ' ') changed files"
cat changed_files.txt
# Use `|| (( $? == 1 ))` in case no lines match (exit code is nonzero).
grep -E "\.pb(txt)?(.gz)?$" changed_files.txt > changed_data_files.txt || (( $? == 1 ))
# Use LOCAL_NUM_CHANGED since ::set-env values are not available immediately.
LOCAL_NUM_CHANGED="$(wc -l < changed_data_files.txt | tr -d ' ')"
echo "NUM_CHANGED_FILES=${LOCAL_NUM_CHANGED}" >> $GITHUB_ENV
echo "Found ${LOCAL_NUM_CHANGED} changed dataset files"
cat changed_data_files.txt
- uses: actions/setup-python@v4
with:
python-version: '3.11'
if: env.NUM_CHANGED_FILES != '0'
- name: Install ord_schema
run: |
cd "${GITHUB_WORKSPACE}/ord-schema"
python -m pip install --upgrade pip
python -m pip install wheel
python -m pip install .
if: env.NUM_CHANGED_FILES != '0'
- name: Validate submission
run: |
cd "${GITHUB_WORKSPACE}"
python ./ord-schema/ord_schema/scripts/process_dataset.py \
--input_file=changed_data_files.txt \
--base=upstream/main
if: >-
env.NUM_CHANGED_FILES != '0' &&
! (github.event_name == 'pull_request' &&
! github.event.pull_request.head.repo.fork)
- name: Update submission
run: |
cd "${GITHUB_WORKSPACE}"
python ./ord-schema/ord_schema/scripts/process_dataset.py \
--input_file=changed_data_files.txt \
--update \
--cleanup \
--base=upstream/main \
--issue=${{ github.event.number }} \
--token=${{ secrets.GITHUB_TOKEN }}
git config user.name github-actions
git config user.email github-actions@github.com
# Fail gracefully if there is nothing to commit.
git commit -a -m "Update submission" || (( $? == 1 ))
git push "https://${GITHUB_ACTOR}:${GITHUB_TOKEN}@github.com/${GITHUB_REPOSITORY}.git" "HEAD:${GITHUB_HEAD_REF}"
if: >-
env.NUM_CHANGED_FILES != '0' &&
github.event_name == 'pull_request' &&
! github.event.pull_request.head.repo.fork