From fd6736b6008cd4a9ae050d192baac890d9d7e906 Mon Sep 17 00:00:00 2001
From: Anthony Mahanna <anthony.mahanna@arangodb.com>
Date: Tue, 17 Oct 2023 19:17:37 -0400
Subject: [PATCH] checkpoint

---
 .circleci/config.yml          |  10 +--
 .github/workflows/build.yml   |  56 ----------------
 .github/workflows/release.yml |  72 +++++++-------------
 adbcug_adapter/adapter.py     |   8 ++-
 tests/conftest.py             |  26 ++------
 tests/test_adapter.py         | 120 ++++++++++++++++++++--------------
 6 files changed, 110 insertions(+), 182 deletions(-)
 delete mode 100644 .github/workflows/build.yml

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 28f31bd..29cd5c7 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -27,15 +27,15 @@ jobs:
       - run:
           name: Run black
           command: black --check --verbose --diff --color --config=pyproject.toml ./adbcug_adapter ./tests/
+      - run:
+          name: Run flake8
+          command: flake8 ./adbcug_adapter ./tests
       - run:
           name: Run isort
           command: isort --check ./adbcug_adapter ./tests/
-      # - run:
-      #     name: Run mypy
-      #     command: mypy ./adbcug_adapter ./tests
       - run:
-          name: Run flake8
-          command: flake8 ./adbcug_adapter ./tests
+          name: Run mypy
+          command: mypy ./adbcug_adapter
       - save_cache:
           key: pip-and-local-cache
           paths:
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
deleted file mode 100644
index 762f8f3..0000000
--- a/.github/workflows/build.yml
+++ /dev/null
@@ -1,56 +0,0 @@
-name: build
-on:
-  workflow_dispatch:
-  push:
-    branches: [ master ]
-  pull_request:
-    branches: [ master ]
-env:
-  PACKAGE_DIR: adbcug_adapter
-  TESTS_DIR: tests
-jobs:
-  build:
-    runs-on: ubuntu-latest
-    container: ${{ matrix.container }}
-    strategy:
-      matrix:
-        include:
-          - python: "3.8"
-            container: "rapidsai/rapidsai:cuda11.8-runtime-ubuntu22.04-py3.8"
-
-          - python: "3.9"
-            container: "rapidsai/rapidsai:cuda11.8-runtime-ubuntu22.04-py3.9"
-
-          - python: "3.10"
-            container: "rapidsai/rapidsai:cuda11.8-runtime-ubuntu22.04-py3.10"
-
-    name: Python ${{ matrix.python }}
-    steps:
-      - uses: actions/checkout@v3
-      - name: Setup Python ${{ matrix.python }}
-        uses: actions/setup-python@v4
-        with:
-          python-version: ${{ matrix.python }}
-      - name: Install packages
-        run: pip install -e '.[dev]'
-      # - name: Run black
-      #   run: black --check --verbose --diff --color ${{env.PACKAGE_DIR}} ${{env.TESTS_DIR}}
-      # - name: Run flake8
-      #   run: flake8 ${{env.PACKAGE_DIR}} ${{env.TESTS_DIR}}
-      # - name: Run isort
-      #   run: isort --check --profile=black ${{env.PACKAGE_DIR}} ${{env.TESTS_DIR}}
-      # - name: Run mypy
-      #   run: mypy ${{env.PACKAGE_DIR}} ${{env.TESTS_DIR}}
-      # - name: Setup Docker
-      #   uses: docker/setup-buildx-action@v2.8.0
-      # - name: Set up ArangoDB Instance via Docker
-      #   run: docker create --name adb -p 8529:8529 -e ARANGO_ROOT_PASSWORD= arangodb/arangodb
-      # - name: Start ArangoDB Instance
-      #   run: docker start adb
-      - name: Run pytest
-        run: pytest --cov=${{env.PACKAGE_DIR}} --cov-report xml --cov-report term-missing -v --color=yes --no-cov-on-fail --code-highlight=yes
-      - name: Publish to coveralls.io
-        if: matrix.python == '3.8'
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-        run: conda run -n ${{ matrix.python }} coveralls --service=github
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index aa79339..9774930 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -7,50 +7,8 @@ env:
   PACKAGE_DIR: adbcug_adapter
   TESTS_DIR: tests
 jobs:
-  build:
-    runs-on: self-hosted
-    defaults:
-      run:
-        shell: bash -l {0}
-    strategy:
-      matrix:
-        include:
-          - python: "3.7"
-            DB_NAME: "py37"
-
-          - python: "3.8"
-            DB_NAME: "py38"
-
-          - python: "3.9"
-            DB_NAME: "py39"
-    name: gpu
-    steps:
-      - uses: actions/checkout@v2
-      - name: Set up pip & install packages
-        run: |
-          source ~/anaconda3/etc/profile.d/conda.sh
-          conda activate ${{ matrix.python }}
-          python -m pip install --upgrade pip setuptools wheel
-          pip install .[dev]
-      - name: Run black
-        run: conda run -n ${{ matrix.python }} black --check --verbose --diff --color ${{env.PACKAGE_DIR}} ${{env.TESTS_DIR}}
-      - name: Run flake8
-        run: flake8 ${{env.PACKAGE_DIR}} ${{env.TESTS_DIR}}
-      - name: Run isort
-        run: isort --check --profile=black ${{env.PACKAGE_DIR}} ${{env.TESTS_DIR}}
-      - name: Run mypy
-        run: conda run -n ${{ matrix.python }} mypy ${{env.PACKAGE_DIR}} ${{env.TESTS_DIR}}
-      - name: Run pytest in conda env
-        run: conda run -n ${{ matrix.python }} pytest --dbName ${{ matrix.DB_NAME }} --cov=${{env.PACKAGE_DIR}} --cov-report xml --cov-report term-missing -v --color=yes --no-cov-on-fail --code-highlight=yes
-      - name: Publish to coveralls.io
-        if: matrix.python == '3.8'
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-        run: conda run -n ${{ matrix.python }} coveralls --service=github
-
   release:
-    needs: build
-    runs-on: self-hosted
+    runs-on: ubuntu-latest
     defaults:
       run:
         shell: bash -l {0}
@@ -63,13 +21,27 @@ jobs:
         uses: actions/setup-python@v2
         with:
           python-version: "3.9"
-      - name: Conda Build
-        run: |
-          source ~/anaconda3/etc/profile.d/conda.sh
-          conda activate condabuild39
-          anaconda logout
-          conda config --set anaconda_upload yes
-          conda-build --token '${{ secrets.CONDA_TOKEN }}' --user arangodb .
+
+      - name: Install packages
+        run: pip install setuptools wheel twine setuptools-scm[toml]
+
+      - name: Remove (old) distribution
+        run: rm -rf dist
+
+      - name: Build distribution
+        run: python setup.py sdist bdist_wheel
+
+      - name: Publish to Test PyPi
+        env:
+          TWINE_USERNAME: __token__
+          TWINE_PASSWORD: ${{ secrets.TWINE_PASSWORD_TEST }}
+        run: twine upload --repository testpypi dist/* #--skip-existing
+
+      - name: Publish to PyPi
+        env:
+          TWINE_USERNAME: __token__
+          TWINE_PASSWORD: ${{ secrets.TWINE_PASSWORD }}
+        run: twine upload --repository pypi dist/* #--skip-existing
 
   changelog:
     needs: release
diff --git a/adbcug_adapter/adapter.py b/adbcug_adapter/adapter.py
index 52918b8..7cf9272 100644
--- a/adbcug_adapter/adapter.py
+++ b/adbcug_adapter/adapter.py
@@ -322,7 +322,7 @@ def cugraph_to_arangodb(
             name, overwrite_graph, edge_definitions, orphan_collections
         )
 
-        adb_v_cols: List[str] = adb_graph.vertex_collections()  # type: ignore
+        adb_v_cols: List[str] = adb_graph.vertex_collections()
         adb_e_cols: List[str] = [
             c["edge_collection"] for c in adb_graph.edge_definitions()  # type: ignore
         ]
@@ -389,7 +389,11 @@ def cugraph_to_arangodb(
         bar_progress = get_bar_progress("(CUG → ADB): Edges", "#5E3108")
         bar_progress_task = bar_progress.add_task("Edges", total=len(cug_edges))
 
-        cug_weights = cug_edges[edge_attr] if cug_graph.is_weighted() else None
+        cug_weights = (
+            cug_edges[edge_attr]
+            if cug_graph.is_weighted() and edge_attr is not None
+            else None
+        )
 
         with Live(Group(bar_progress, spinner_progress)):
             for i in range(len(cug_edges)):
diff --git a/tests/conftest.py b/tests/conftest.py
index 260703a..b66f64f 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,10 +1,8 @@
 import logging
-import os
 import subprocess
 from pathlib import Path
-from typing import Any, List, Optional
+from typing import Any, List
 
-from adb_cloud_connector import get_temp_credentials
 from arango import ArangoClient
 from arango.database import StandardDatabase
 from cudf import DataFrame
@@ -13,7 +11,7 @@
 
 from adbcug_adapter import ADBCUG_Adapter
 from adbcug_adapter.controller import ADBCUG_Controller
-from adbcug_adapter.typings import CUGId, Json
+from adbcug_adapter.typings import CUGId
 
 PROJECT_DIR = Path(__file__).parent.parent
 
@@ -56,7 +54,6 @@ def pytest_configure(config: Any) -> None:
     global adbcug_adapter, bipartite_adbcug_adapter, likes_adbcug_adapter
     adbcug_adapter = ADBCUG_Adapter(db, logging_lvl=logging.DEBUG)
     bipartite_adbcug_adapter = ADBCUG_Adapter(db, Bipartite_ADBCUG_Controller())
-    likes_adbcug_adapter = ADBCUG_Adapter(db, Likes_ADBCUG_Controller())
 
     if db.has_graph("fraud-detection") is False:
         arango_restore(con, "examples/data/fraud_dump")
@@ -138,6 +135,9 @@ def get_bipartite_graph() -> CUGGraph:
 
 
 class Bipartite_ADBCUG_Controller(ADBCUG_Controller):
+    def _identify_cugraph_node(self, cug_node_id: CUGId, adb_v_cols: List[str]) -> str:
+        return str(cug_node_id).split("/")[0]
+
     def _keyify_cugraph_node(self, cug_node_id: CUGId, col: str) -> str:
         return self._string_to_arangodb_key_helper(str(cug_node_id).split("/")[1])
 
@@ -164,19 +164,3 @@ def get_likes_graph() -> CUGGraph:
         edges, source="src", destination="dst", edge_attr="likes"
     )
     return cug_graph
-
-
-class Likes_ADBCUG_Controller(ADBCUG_Controller):
-    def _identify_cugraph_edge(
-        self,
-        from_cug_node: Json,
-        to_cug_node: Json,
-        adb_e_cols: List[str],
-        weight: Optional[Any] = None,
-    ) -> str:
-        if weight is True:
-            return "likes"
-        elif weight is False:
-            return "dislikes_intentional_typo_here"
-        else:
-            raise ValueError(f"Unrecognized 'weight' value: {weight}")
diff --git a/tests/test_adapter.py b/tests/test_adapter.py
index 3ed600c..e45d5e3 100644
--- a/tests/test_adapter.py
+++ b/tests/test_adapter.py
@@ -5,8 +5,8 @@
 from cugraph import Graph as CUGGraph
 from cugraph import MultiGraph as CUGMultiGraph
 
-from adbcug_adapter import ADBCUG_Adapter
-from adbcug_adapter.typings import ADBMetagraph, Json
+from adbcug_adapter import ADBCUG_Adapter, ADBCUG_Controller
+from adbcug_adapter.typings import ADBMetagraph, CUGId, Json
 
 from .conftest import (
     adbcug_adapter,
@@ -16,7 +16,6 @@
     get_divisibility_graph,
     get_drivers_graph,
     get_likes_graph,
-    likes_adbcug_adapter,
 )
 
 
@@ -133,7 +132,7 @@ def test_adb_graph_to_cug(
     v_cols = arango_graph.vertex_collections()
     e_cols = {col["edge_collection"] for col in arango_graph.edge_definitions()}
 
-    cug_g = adapter.arangodb_graph_to_cugraph(name)
+    cug_g = adapter.arangodb_graph_to_cugraph(name, batch_size=10)
     assert_cugraph_data(
         cug_g,
         metagraph={
@@ -145,7 +144,7 @@ def test_adb_graph_to_cug(
 
 @pytest.mark.parametrize(
     "adapter, name, cug_g, edge_definitions, orphan_collections, \
-        keyify_nodes, keyify_edges, overwrite_graph, edge_attr, adb_import_kwargs",
+        overwrite_graph, batch_size, edge_attr, adb_import_kwargs",
     [
         (
             adbcug_adapter,
@@ -159,11 +158,10 @@ def test_adb_graph_to_cug(
                 }
             ],
             None,
-            True,
-            False,
             False,
+            50,
             "quotient",
-            {"batch_size": 100, "on_duplicate": "replace"},
+            {"on_duplicate": "replace"},
         ),
         (
             adbcug_adapter,
@@ -172,8 +170,7 @@ def test_adb_graph_to_cug(
             None,
             None,
             False,
-            False,
-            False,
+            None,
             "quotient",
             {"overwrite": True},
         ),
@@ -190,9 +187,8 @@ def test_adb_graph_to_cug(
             ],
             None,
             True,
-            False,
-            True,
-            "",
+            1,
+            None,
             {"overwrite": True},
         ),
     ],
@@ -203,10 +199,9 @@ def test_cug_to_adb(
     cug_g: CUGGraph,
     edge_definitions: Optional[List[Json]],
     orphan_collections: Optional[List[str]],
-    keyify_nodes: bool,
-    keyify_edges: bool,
     overwrite_graph: bool,
-    edge_attr: str,
+    batch_size: int,
+    edge_attr: Optional[str],
     adb_import_kwargs: Dict[str, Any],
 ) -> None:
     adb_g = adapter.cugraph_to_arangodb(
@@ -214,23 +209,22 @@ def test_cug_to_adb(
         cug_g,
         edge_definitions,
         orphan_collections,
-        keyify_nodes,
-        keyify_edges,
         overwrite_graph,
-        edge_attr,
+        batch_size=batch_size,
+        edge_attr=edge_attr,
         **adb_import_kwargs,
     )
     assert_arangodb_data(
         adapter,
         cug_g,
         adb_g,
-        keyify_nodes,
-        keyify_edges,
         edge_attr,
     )
 
 
-def test_cug_to_adb_invalid_collections() -> None:
+def test_nx_to_adb_invalid_collections() -> None:
+    db.delete_graph("Drivers", ignore_missing=True, drop_collections=True)
+
     cug_g_1 = get_drivers_graph()
     e_d_1 = [
         {
@@ -239,11 +233,24 @@ def test_cug_to_adb_invalid_collections() -> None:
             "to_vertex_collections": ["Car"],
         }
     ]
+    # Raise NotImplementedError on missing vertex collection identification
+    with pytest.raises(NotImplementedError):
+        adbcug_adapter.cugraph_to_arangodb("Drivers", cug_g_1, e_d_1)
+
+    class Custom_ADBCUG_Controller(ADBCUG_Controller):
+        def _identify_cugraph_node(
+            self, cug_node_id: CUGId, adb_v_cols: List[str]
+        ) -> str:
+            return "invalid_vertex_collection"
+
+    custom_adbcug_adapter = ADBCUG_Adapter(db, Custom_ADBCUG_Controller())
+
     # Raise ValueError on invalid vertex collection identification
     with pytest.raises(ValueError):
-        adbcug_adapter.cugraph_to_arangodb(
-            "Drivers", cug_g_1, e_d_1, on_duplicate="replace"
-        )
+        custom_adbcug_adapter.cug_to_arangodb("Drivers", cug_g_1, e_d_1)
+
+    db.delete_graph("Drivers", ignore_missing=True, drop_collections=True)
+    db.delete_graph("Feelings", ignore_missing=True, drop_collections=True)
 
     cug_g_2 = get_likes_graph()
     e_d_2 = [
@@ -258,20 +265,42 @@ def test_cug_to_adb_invalid_collections() -> None:
             "to_vertex_collections": ["Person"],
         },
     ]
+
+    # Raise NotImplementedError on missing edge collection identification
+    with pytest.raises(NotImplementedError):
+        adbcug_adapter.cugraph_to_arangodb("Feelings", cug_g_2, e_d_2)
+
+    db.delete_graph("Feelings", ignore_missing=True, drop_collections=True)
+
+    class Custom_ADBCUG_Controller(ADBCUG_Controller):
+        def _identify_cugraph_node(
+            self, cug_node_id: CUGId, adb_v_cols: List[str]
+        ) -> str:
+            return str(cug_node_id).split("/")[0]
+
+        def _identify_cugraph_edge(
+            self,
+            from_node_id: CUGId,
+            to_node_id: CUGId,
+            cug_map: Dict[CUGId, str],
+            adb_e_cols: List[str],
+        ) -> str:
+            return "invalid_edge_collection"
+
+    custom_adbcug_adapter = ADBCUG_Adapter(db, Custom_ADBCUG_Controller())
+
     # Raise ValueError on invalid edge collection identification
     with pytest.raises(ValueError):
-        likes_adbcug_adapter.cugraph_to_arangodb(
-            "Feelings", cug_g_2, e_d_2, on_duplicate="replace"
-        )
+        custom_adbcug_adapter.cugraph_to_arangodb("Feelings", cug_g_2, e_d_2)
+
+    db.delete_graph("Feelings", ignore_missing=True, drop_collections=True)
 
 
 def assert_arangodb_data(
     adapter: ADBCUG_Adapter,
     cug_g: CUGGraph,
     adb_g: ADBGraph,
-    keyify_nodes: bool,
-    keyify_edges: bool,
-    edge_attr: str,
+    edge_attr: Optional[str],
 ) -> None:
     cug_map = dict()
 
@@ -287,37 +316,32 @@ def assert_arangodb_data(
             if has_one_vcol
             else adapter.cntrl._identify_cugraph_node(cug_id, adb_v_cols)
         )
-        key = (
-            adapter.cntrl._keyify_cugraph_node(cug_id, col) if keyify_nodes else str(i)
-        )
+        key = adapter.cntrl._keyify_cugraph_node(i, cug_id, col)
 
-        adb_v_id = col + "/" + key
-        cug_map[cug_id] = {
-            "cug_id": cug_id,
-            "adb_id": adb_v_id,
-            "adb_col": col,
-            "adb_key": key,
-        }
+        adb_v_id = f"{col}/{key}"
+        cug_map[cug_id] = adb_v_id
 
         assert adb_g.vertex_collection(col).has(key)
 
     for from_node_id, to_node_id, *weight in cug_g.view_edge_list().values_host:
-        from_n = cug_map[from_node_id]
-        to_n = cug_map[to_node_id]
-
         col = (
             adb_e_cols[0]
             if has_one_ecol
-            else adapter.cntrl._identify_cugraph_edge(from_n, to_n, adb_e_cols)
+            else adapter.cntrl._identify_cugraph_edge(
+                from_node_id, to_node_id, cug_map, adb_e_cols
+            )
         )
+
         adb_edges = adb_g.edge_collection(col).find(
             {
-                "_from": from_n["adb_id"],
-                "_to": to_n["adb_id"],
+                "_from": cug_map[from_node_id],
+                "_to": cug_map[to_node_id],
             }
         )
 
-        assert len(adb_edges) > 0
+        assert len(adb_edges) == 1
+        if edge_attr:
+            assert adb_edges[0][edge_attr] == weight[0]
 
 
 def assert_cugraph_data(cug_g: CUGMultiGraph, metagraph: ADBMetagraph) -> None: