Include a prefix to the beam pipeline's stages #2361
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Continuous Integration - mlcroissant (CI) | |
on: | |
push: | |
branches: | |
- main | |
pull_request: | |
branches: | |
- main | |
merge_group: | |
jobs: | |
python-test: | |
name: Format / Unit Tests / Python ${{ matrix.python-version }} | |
strategy: | |
fail-fast: false | |
matrix: | |
# Tests currently fail for 3.8 and 3.9. | |
python-version: ['3.10', '3.11'] | |
runs-on: ubuntu-latest | |
defaults: | |
run: | |
working-directory: ./python/mlcroissant | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Set up Python | |
uses: actions/setup-python@v5 | |
with: | |
python-version: ${{ matrix.python-version }} | |
- name: Install library | |
run: sudo apt-get install -y libgraphviz-dev git-lfs && pip install .[dev] | |
- name: PyTest | |
run: make pytest | |
# Pylint is not compatible with Apache Beam in Python 3.11: | |
# https://github.com/pylint-dev/pylint/blob/02616372282fd84862636d58071e6f3c62b53559/pyproject.toml#L38 | |
- name: Install Pylint separately. | |
run: pip install pylint | |
- name: PyLint | |
run: make pylint | |
# Pyflakes can see unused imports | |
- name: PyFlakes | |
run: make pyflakes | |
- name: Docstrings are defined | |
run: make flake8 | |
notebook-test: | |
name: Notebook Tests / Python ${{ matrix.python-version }} | |
strategy: | |
fail-fast: false | |
matrix: | |
# Tests currently fail for 3.8 and 3.9. | |
python-version: ['3.10', '3.11'] | |
runs-on: ubuntu-latest | |
defaults: | |
run: | |
working-directory: ./python/mlcroissant | |
env: | |
GITHUB_REPOSITORY: ${{ github.event.pull_request.head.repo.full_name }} | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Set up Python | |
uses: actions/setup-python@v5 | |
with: | |
python-version: ${{ matrix.python-version }} | |
- name: Install library | |
run: sudo apt-get install -y libgraphviz-dev | |
- run: pip install ipython ipykernel nbconvert | |
# Notebooks are in the recipes/ folder. | |
- name: Run notebook | |
run: | | |
GITHUB_REPOSITORY="${{ env.GITHUB_REPOSITORY }}" | |
ipython kernel install --user --name croissant-notebook | |
for notebook in recipes/*ipynb | |
do | |
if [ "$notebook" = "recipes/flores200_datapipes.ipynb" ] | |
then | |
echo "Skipping notebook=${notebook}" | |
else | |
jupyter nbconvert \ | |
--ExecutePreprocessor.timeout=600 \ | |
--ExecutePreprocessor.kernel_name=croissant-notebook \ | |
--to notebook \ | |
--execute "$notebook" | |
fi | |
done | |
python-format: | |
name: Python format | |
runs-on: ubuntu-latest | |
steps: | |
- uses: actions/checkout@v2 | |
- uses: actions/setup-python@v2 | |
with: | |
python-version: '3.11' | |
- uses: psf/black@stable | |
with: | |
options: --check --line-length 88 --preview | |
src: './python/mlcroissant' | |
version: "23.11.0" | |
- uses: isort/isort-action@master | |
with: | |
sort-paths: './python/mlcroissant' | |
pytype-test: | |
name: PyType | |
runs-on: ubuntu-latest | |
defaults: | |
run: | |
working-directory: ./python/mlcroissant | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Set up Python | |
uses: actions/setup-python@v5 | |
with: | |
python-version: '3.11' | |
- name: Install dependencies except mlcroissant itself | |
run: > | |
pip install absl-py \ | |
apache-beam \ | |
etils[epath] \ | |
GitPython \ | |
jsonpath_rw \ | |
mypy \ | |
networkx \ | |
pandas \ | |
pyarrow \ | |
pytest \ | |
pytype \ | |
rdflib \ | |
requests \ | |
torchdata \ | |
tqdm | |
- name: PyType | |
run: make pytype | |
mypy-test: | |
name: MyPy | |
runs-on: ubuntu-latest | |
defaults: | |
run: | |
working-directory: ./python/mlcroissant | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Set up Python | |
uses: actions/setup-python@v5 | |
with: | |
python-version: '3.11' | |
- name: Install dependencies except mlcroissant itself | |
run: > | |
pip install absl-py \ | |
apache-beam \ | |
etils[epath] \ | |
GitPython \ | |
jsonpath_rw \ | |
mypy \ | |
networkx \ | |
pandas \ | |
pyarrow \ | |
pytest \ | |
pytype \ | |
rdflib \ | |
requests \ | |
torchdata \ | |
tqdm | |
- name: MyPy | |
run: make mypy | |
validation-test: | |
name: Validation / JSON-LD Tests / Python 3.11 | |
runs-on: ubuntu-latest | |
defaults: | |
run: | |
working-directory: ./python/mlcroissant | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Set up Python | |
uses: actions/setup-python@v5 | |
with: | |
python-version: '3.11' | |
- name: Install library | |
run: pip install .[git] | |
- name: Validate JSON-LD files | |
run: | | |
JSON_FILES=$(find ../../datasets/ -type f -name "*.json" ! -path '*/data/*') | |
for file in ${JSON_FILES} | |
do | |
echo "Validating ${file}..." | |
python mlcroissant/scripts/validate.py --jsonld ${file} | |
done | |
generation-test: | |
name: Generation / JSON-LD Tests / Python 3.11 | |
runs-on: ubuntu-latest | |
defaults: | |
run: | |
working-directory: ./python/mlcroissant | |
strategy: | |
matrix: | |
version: ['0.8', '1.0'] | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Set up Python | |
uses: actions/setup-python@v5 | |
with: | |
python-version: '3.11' | |
- name: Install library | |
run: pip install . | |
- name: Generate JSON-LD files - Titanic | |
run: mlcroissant load --jsonld ../../datasets/${{ matrix.version }}/titanic/metadata.json --record_set passengers | |
- name: Generate JSON-LD files - PASS | |
run: pip install .[image] && python mlcroissant/scripts/load.py --jsonld ../../datasets/${{ matrix.version }}/pass-mini/metadata.json --record_set images |