From fd6736b6008cd4a9ae050d192baac890d9d7e906 Mon Sep 17 00:00:00 2001 From: Anthony Mahanna Date: Tue, 17 Oct 2023 19:17:37 -0400 Subject: [PATCH] checkpoint --- .circleci/config.yml | 10 +-- .github/workflows/build.yml | 56 ---------------- .github/workflows/release.yml | 72 +++++++------------- adbcug_adapter/adapter.py | 8 ++- tests/conftest.py | 26 ++------ tests/test_adapter.py | 120 ++++++++++++++++++++-------------- 6 files changed, 110 insertions(+), 182 deletions(-) delete mode 100644 .github/workflows/build.yml diff --git a/.circleci/config.yml b/.circleci/config.yml index 28f31bd..29cd5c7 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -27,15 +27,15 @@ jobs: - run: name: Run black command: black --check --verbose --diff --color --config=pyproject.toml ./adbcug_adapter ./tests/ + - run: + name: Run flake8 + command: flake8 ./adbcug_adapter ./tests - run: name: Run isort command: isort --check ./adbcug_adapter ./tests/ - # - run: - # name: Run mypy - # command: mypy ./adbcug_adapter ./tests - run: - name: Run flake8 - command: flake8 ./adbcug_adapter ./tests + name: Run mypy + command: mypy ./adbcug_adapter - save_cache: key: pip-and-local-cache paths: diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml deleted file mode 100644 index 762f8f3..0000000 --- a/.github/workflows/build.yml +++ /dev/null @@ -1,56 +0,0 @@ -name: build -on: - workflow_dispatch: - push: - branches: [ master ] - pull_request: - branches: [ master ] -env: - PACKAGE_DIR: adbcug_adapter - TESTS_DIR: tests -jobs: - build: - runs-on: ubuntu-latest - container: ${{ matrix.container }} - strategy: - matrix: - include: - - python: "3.8" - container: "rapidsai/rapidsai:cuda11.8-runtime-ubuntu22.04-py3.8" - - - python: "3.9" - container: "rapidsai/rapidsai:cuda11.8-runtime-ubuntu22.04-py3.9" - - - python: "3.10" - container: "rapidsai/rapidsai:cuda11.8-runtime-ubuntu22.04-py3.10" - - name: Python ${{ matrix.python }} - steps: - - uses: actions/checkout@v3 - - name: Setup Python ${{ matrix.python }} - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python }} - - name: Install packages - run: pip install -e '.[dev]' - # - name: Run black - # run: black --check --verbose --diff --color ${{env.PACKAGE_DIR}} ${{env.TESTS_DIR}} - # - name: Run flake8 - # run: flake8 ${{env.PACKAGE_DIR}} ${{env.TESTS_DIR}} - # - name: Run isort - # run: isort --check --profile=black ${{env.PACKAGE_DIR}} ${{env.TESTS_DIR}} - # - name: Run mypy - # run: mypy ${{env.PACKAGE_DIR}} ${{env.TESTS_DIR}} - # - name: Setup Docker - # uses: docker/setup-buildx-action@v2.8.0 - # - name: Set up ArangoDB Instance via Docker - # run: docker create --name adb -p 8529:8529 -e ARANGO_ROOT_PASSWORD= arangodb/arangodb - # - name: Start ArangoDB Instance - # run: docker start adb - - name: Run pytest - run: pytest --cov=${{env.PACKAGE_DIR}} --cov-report xml --cov-report term-missing -v --color=yes --no-cov-on-fail --code-highlight=yes - - name: Publish to coveralls.io - if: matrix.python == '3.8' - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: conda run -n ${{ matrix.python }} coveralls --service=github diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index aa79339..9774930 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -7,50 +7,8 @@ env: PACKAGE_DIR: adbcug_adapter TESTS_DIR: tests jobs: - build: - runs-on: self-hosted - defaults: - run: - shell: bash -l {0} - strategy: - matrix: - include: - - python: "3.7" - DB_NAME: "py37" - - - python: "3.8" - DB_NAME: "py38" - - - python: "3.9" - DB_NAME: "py39" - name: gpu - steps: - - uses: actions/checkout@v2 - - name: Set up pip & install packages - run: | - source ~/anaconda3/etc/profile.d/conda.sh - conda activate ${{ matrix.python }} - python -m pip install --upgrade pip setuptools wheel - pip install .[dev] - - name: Run black - run: conda run -n ${{ matrix.python }} black --check --verbose --diff --color ${{env.PACKAGE_DIR}} ${{env.TESTS_DIR}} - - name: Run flake8 - run: flake8 ${{env.PACKAGE_DIR}} ${{env.TESTS_DIR}} - - name: Run isort - run: isort --check --profile=black ${{env.PACKAGE_DIR}} ${{env.TESTS_DIR}} - - name: Run mypy - run: conda run -n ${{ matrix.python }} mypy ${{env.PACKAGE_DIR}} ${{env.TESTS_DIR}} - - name: Run pytest in conda env - run: conda run -n ${{ matrix.python }} pytest --dbName ${{ matrix.DB_NAME }} --cov=${{env.PACKAGE_DIR}} --cov-report xml --cov-report term-missing -v --color=yes --no-cov-on-fail --code-highlight=yes - - name: Publish to coveralls.io - if: matrix.python == '3.8' - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: conda run -n ${{ matrix.python }} coveralls --service=github - release: - needs: build - runs-on: self-hosted + runs-on: ubuntu-latest defaults: run: shell: bash -l {0} @@ -63,13 +21,27 @@ jobs: uses: actions/setup-python@v2 with: python-version: "3.9" - - name: Conda Build - run: | - source ~/anaconda3/etc/profile.d/conda.sh - conda activate condabuild39 - anaconda logout - conda config --set anaconda_upload yes - conda-build --token '${{ secrets.CONDA_TOKEN }}' --user arangodb . + + - name: Install packages + run: pip install setuptools wheel twine setuptools-scm[toml] + + - name: Remove (old) distribution + run: rm -rf dist + + - name: Build distribution + run: python setup.py sdist bdist_wheel + + - name: Publish to Test PyPi + env: + TWINE_USERNAME: __token__ + TWINE_PASSWORD: ${{ secrets.TWINE_PASSWORD_TEST }} + run: twine upload --repository testpypi dist/* #--skip-existing + + - name: Publish to PyPi + env: + TWINE_USERNAME: __token__ + TWINE_PASSWORD: ${{ secrets.TWINE_PASSWORD }} + run: twine upload --repository pypi dist/* #--skip-existing changelog: needs: release diff --git a/adbcug_adapter/adapter.py b/adbcug_adapter/adapter.py index 52918b8..7cf9272 100644 --- a/adbcug_adapter/adapter.py +++ b/adbcug_adapter/adapter.py @@ -322,7 +322,7 @@ def cugraph_to_arangodb( name, overwrite_graph, edge_definitions, orphan_collections ) - adb_v_cols: List[str] = adb_graph.vertex_collections() # type: ignore + adb_v_cols: List[str] = adb_graph.vertex_collections() adb_e_cols: List[str] = [ c["edge_collection"] for c in adb_graph.edge_definitions() # type: ignore ] @@ -389,7 +389,11 @@ def cugraph_to_arangodb( bar_progress = get_bar_progress("(CUG → ADB): Edges", "#5E3108") bar_progress_task = bar_progress.add_task("Edges", total=len(cug_edges)) - cug_weights = cug_edges[edge_attr] if cug_graph.is_weighted() else None + cug_weights = ( + cug_edges[edge_attr] + if cug_graph.is_weighted() and edge_attr is not None + else None + ) with Live(Group(bar_progress, spinner_progress)): for i in range(len(cug_edges)): diff --git a/tests/conftest.py b/tests/conftest.py index 260703a..b66f64f 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,10 +1,8 @@ import logging -import os import subprocess from pathlib import Path -from typing import Any, List, Optional +from typing import Any, List -from adb_cloud_connector import get_temp_credentials from arango import ArangoClient from arango.database import StandardDatabase from cudf import DataFrame @@ -13,7 +11,7 @@ from adbcug_adapter import ADBCUG_Adapter from adbcug_adapter.controller import ADBCUG_Controller -from adbcug_adapter.typings import CUGId, Json +from adbcug_adapter.typings import CUGId PROJECT_DIR = Path(__file__).parent.parent @@ -56,7 +54,6 @@ def pytest_configure(config: Any) -> None: global adbcug_adapter, bipartite_adbcug_adapter, likes_adbcug_adapter adbcug_adapter = ADBCUG_Adapter(db, logging_lvl=logging.DEBUG) bipartite_adbcug_adapter = ADBCUG_Adapter(db, Bipartite_ADBCUG_Controller()) - likes_adbcug_adapter = ADBCUG_Adapter(db, Likes_ADBCUG_Controller()) if db.has_graph("fraud-detection") is False: arango_restore(con, "examples/data/fraud_dump") @@ -138,6 +135,9 @@ def get_bipartite_graph() -> CUGGraph: class Bipartite_ADBCUG_Controller(ADBCUG_Controller): + def _identify_cugraph_node(self, cug_node_id: CUGId, adb_v_cols: List[str]) -> str: + return str(cug_node_id).split("/")[0] + def _keyify_cugraph_node(self, cug_node_id: CUGId, col: str) -> str: return self._string_to_arangodb_key_helper(str(cug_node_id).split("/")[1]) @@ -164,19 +164,3 @@ def get_likes_graph() -> CUGGraph: edges, source="src", destination="dst", edge_attr="likes" ) return cug_graph - - -class Likes_ADBCUG_Controller(ADBCUG_Controller): - def _identify_cugraph_edge( - self, - from_cug_node: Json, - to_cug_node: Json, - adb_e_cols: List[str], - weight: Optional[Any] = None, - ) -> str: - if weight is True: - return "likes" - elif weight is False: - return "dislikes_intentional_typo_here" - else: - raise ValueError(f"Unrecognized 'weight' value: {weight}") diff --git a/tests/test_adapter.py b/tests/test_adapter.py index 3ed600c..e45d5e3 100644 --- a/tests/test_adapter.py +++ b/tests/test_adapter.py @@ -5,8 +5,8 @@ from cugraph import Graph as CUGGraph from cugraph import MultiGraph as CUGMultiGraph -from adbcug_adapter import ADBCUG_Adapter -from adbcug_adapter.typings import ADBMetagraph, Json +from adbcug_adapter import ADBCUG_Adapter, ADBCUG_Controller +from adbcug_adapter.typings import ADBMetagraph, CUGId, Json from .conftest import ( adbcug_adapter, @@ -16,7 +16,6 @@ get_divisibility_graph, get_drivers_graph, get_likes_graph, - likes_adbcug_adapter, ) @@ -133,7 +132,7 @@ def test_adb_graph_to_cug( v_cols = arango_graph.vertex_collections() e_cols = {col["edge_collection"] for col in arango_graph.edge_definitions()} - cug_g = adapter.arangodb_graph_to_cugraph(name) + cug_g = adapter.arangodb_graph_to_cugraph(name, batch_size=10) assert_cugraph_data( cug_g, metagraph={ @@ -145,7 +144,7 @@ def test_adb_graph_to_cug( @pytest.mark.parametrize( "adapter, name, cug_g, edge_definitions, orphan_collections, \ - keyify_nodes, keyify_edges, overwrite_graph, edge_attr, adb_import_kwargs", + overwrite_graph, batch_size, edge_attr, adb_import_kwargs", [ ( adbcug_adapter, @@ -159,11 +158,10 @@ def test_adb_graph_to_cug( } ], None, - True, - False, False, + 50, "quotient", - {"batch_size": 100, "on_duplicate": "replace"}, + {"on_duplicate": "replace"}, ), ( adbcug_adapter, @@ -172,8 +170,7 @@ def test_adb_graph_to_cug( None, None, False, - False, - False, + None, "quotient", {"overwrite": True}, ), @@ -190,9 +187,8 @@ def test_adb_graph_to_cug( ], None, True, - False, - True, - "", + 1, + None, {"overwrite": True}, ), ], @@ -203,10 +199,9 @@ def test_cug_to_adb( cug_g: CUGGraph, edge_definitions: Optional[List[Json]], orphan_collections: Optional[List[str]], - keyify_nodes: bool, - keyify_edges: bool, overwrite_graph: bool, - edge_attr: str, + batch_size: int, + edge_attr: Optional[str], adb_import_kwargs: Dict[str, Any], ) -> None: adb_g = adapter.cugraph_to_arangodb( @@ -214,23 +209,22 @@ def test_cug_to_adb( cug_g, edge_definitions, orphan_collections, - keyify_nodes, - keyify_edges, overwrite_graph, - edge_attr, + batch_size=batch_size, + edge_attr=edge_attr, **adb_import_kwargs, ) assert_arangodb_data( adapter, cug_g, adb_g, - keyify_nodes, - keyify_edges, edge_attr, ) -def test_cug_to_adb_invalid_collections() -> None: +def test_nx_to_adb_invalid_collections() -> None: + db.delete_graph("Drivers", ignore_missing=True, drop_collections=True) + cug_g_1 = get_drivers_graph() e_d_1 = [ { @@ -239,11 +233,24 @@ def test_cug_to_adb_invalid_collections() -> None: "to_vertex_collections": ["Car"], } ] + # Raise NotImplementedError on missing vertex collection identification + with pytest.raises(NotImplementedError): + adbcug_adapter.cugraph_to_arangodb("Drivers", cug_g_1, e_d_1) + + class Custom_ADBCUG_Controller(ADBCUG_Controller): + def _identify_cugraph_node( + self, cug_node_id: CUGId, adb_v_cols: List[str] + ) -> str: + return "invalid_vertex_collection" + + custom_adbcug_adapter = ADBCUG_Adapter(db, Custom_ADBCUG_Controller()) + # Raise ValueError on invalid vertex collection identification with pytest.raises(ValueError): - adbcug_adapter.cugraph_to_arangodb( - "Drivers", cug_g_1, e_d_1, on_duplicate="replace" - ) + custom_adbcug_adapter.cug_to_arangodb("Drivers", cug_g_1, e_d_1) + + db.delete_graph("Drivers", ignore_missing=True, drop_collections=True) + db.delete_graph("Feelings", ignore_missing=True, drop_collections=True) cug_g_2 = get_likes_graph() e_d_2 = [ @@ -258,20 +265,42 @@ def test_cug_to_adb_invalid_collections() -> None: "to_vertex_collections": ["Person"], }, ] + + # Raise NotImplementedError on missing edge collection identification + with pytest.raises(NotImplementedError): + adbcug_adapter.cugraph_to_arangodb("Feelings", cug_g_2, e_d_2) + + db.delete_graph("Feelings", ignore_missing=True, drop_collections=True) + + class Custom_ADBCUG_Controller(ADBCUG_Controller): + def _identify_cugraph_node( + self, cug_node_id: CUGId, adb_v_cols: List[str] + ) -> str: + return str(cug_node_id).split("/")[0] + + def _identify_cugraph_edge( + self, + from_node_id: CUGId, + to_node_id: CUGId, + cug_map: Dict[CUGId, str], + adb_e_cols: List[str], + ) -> str: + return "invalid_edge_collection" + + custom_adbcug_adapter = ADBCUG_Adapter(db, Custom_ADBCUG_Controller()) + # Raise ValueError on invalid edge collection identification with pytest.raises(ValueError): - likes_adbcug_adapter.cugraph_to_arangodb( - "Feelings", cug_g_2, e_d_2, on_duplicate="replace" - ) + custom_adbcug_adapter.cugraph_to_arangodb("Feelings", cug_g_2, e_d_2) + + db.delete_graph("Feelings", ignore_missing=True, drop_collections=True) def assert_arangodb_data( adapter: ADBCUG_Adapter, cug_g: CUGGraph, adb_g: ADBGraph, - keyify_nodes: bool, - keyify_edges: bool, - edge_attr: str, + edge_attr: Optional[str], ) -> None: cug_map = dict() @@ -287,37 +316,32 @@ def assert_arangodb_data( if has_one_vcol else adapter.cntrl._identify_cugraph_node(cug_id, adb_v_cols) ) - key = ( - adapter.cntrl._keyify_cugraph_node(cug_id, col) if keyify_nodes else str(i) - ) + key = adapter.cntrl._keyify_cugraph_node(i, cug_id, col) - adb_v_id = col + "/" + key - cug_map[cug_id] = { - "cug_id": cug_id, - "adb_id": adb_v_id, - "adb_col": col, - "adb_key": key, - } + adb_v_id = f"{col}/{key}" + cug_map[cug_id] = adb_v_id assert adb_g.vertex_collection(col).has(key) for from_node_id, to_node_id, *weight in cug_g.view_edge_list().values_host: - from_n = cug_map[from_node_id] - to_n = cug_map[to_node_id] - col = ( adb_e_cols[0] if has_one_ecol - else adapter.cntrl._identify_cugraph_edge(from_n, to_n, adb_e_cols) + else adapter.cntrl._identify_cugraph_edge( + from_node_id, to_node_id, cug_map, adb_e_cols + ) ) + adb_edges = adb_g.edge_collection(col).find( { - "_from": from_n["adb_id"], - "_to": to_n["adb_id"], + "_from": cug_map[from_node_id], + "_to": cug_map[to_node_id], } ) - assert len(adb_edges) > 0 + assert len(adb_edges) == 1 + if edge_attr: + assert adb_edges[0][edge_attr] == weight[0] def assert_cugraph_data(cug_g: CUGMultiGraph, metagraph: ADBMetagraph) -> None: