Match Data Web Scraping #340
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Match Data Web Scraping | |
on: | |
schedule: | |
- cron: '0 1 * * *' | |
jobs: | |
web_scraping: | |
runs-on: ubuntu-latest | |
steps: | |
- name: Check out this repo | |
uses: actions/checkout@v2 | |
- name: Set up Python | |
uses: actions/setup-python@v2 | |
with: | |
python-version: '3.9' | |
- name: Installed package list | |
run: apt list --installed | |
- name: Download ChromeDriver | |
run: | | |
wget -q "https://storage.googleapis.com/chrome-for-testing-public/131.0.6778.0/linux64/chromedriver-linux64.zip" -O chromedriver.zip | |
unzip chromedriver.zip | |
cp chromedriver-linux64/chromedriver /usr/local/bin/ | |
chmod +x /usr/local/bin/chromedriver | |
- name: Remove Chrome | |
run: sudo apt purge google-chrome-stable | |
- name: Remove default Chromium | |
run: sudo apt purge chromium-browser | |
- name: Install a new Chromium | |
run: sudo apt install -y chromium-browser | |
- name: Install all necessary packages | |
run: pip install requests beautifulsoup4 pandas webdriver_manager selenium kaggle | |
- name: Run the latest matches scraping script | |
run: python scripts/000_Web_Scraping/001_Scraping_Latest_Matches.py | |
- name: Run the fixtures scraping script | |
run: python scripts/000_Web_Scraping/002_Scraping_Future_Matches.py | |
- name: Commit changes | |
run: | | |
git config --global user.email "${{ secrets.GIT_EMAIL }}" | |
git config --global user.name "${{ secrets.GIT_USERNAME }}" | |
if git diff --quiet -- data/source/; then | |
echo "No changes in data/source/ directory. Skipping commit." | |
else | |
git add -A data/source/ | |
git commit -m "Update data/source/" | |
git remote set-url origin https://x-access-token:${{ secrets.GITHUB_TOKEN }}@github.com/${{ github.repository }} | |
git push | |
fi | |
env: | |
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
- name: Create .kaggle directory | |
run: mkdir -p /home/runner/.kaggle | |
- name: Set up Kaggle credentials | |
run: echo "$KAGGLE_JSON" > /home/runner/.kaggle/kaggle.json && chmod 600 /home/runner/.kaggle/kaggle.json | |
env: | |
KAGGLE_JSON: ${{ secrets.KAGGLE_JSON }} | |
- name: Update Kaggle | |
run: | | |
cd data/source/match_dataset | |
kaggle datasets version -p . -m "Updated data" |