From e95e9f7794b6bac1212eca766a3a416f1e4d075f Mon Sep 17 00:00:00 2001
From: Rob Rossmiller <robert.rossmiller@tigergraph.com>
Date: Tue, 21 May 2024 12:12:16 -0400
Subject: [PATCH] update degree dataset

---
 ...{CompleteUnweighted.json => Complete.json} | 18 +++---
 .../degree_centrality/CompleteWeighted.json   | 38 ------------
 tests/data/create_baseline.py                 | 29 ++++-----
 tests/run.sh                                  |  6 +-
 tests/test/test_centrality.py                 | 60 ++++++++++++++-----
 5 files changed, 73 insertions(+), 78 deletions(-)
 rename tests/data/baseline/graph_algorithms_baselines/centrality/degree_centrality/{CompleteUnweighted.json => Complete.json} (53%)
 delete mode 100644 tests/data/baseline/graph_algorithms_baselines/centrality/degree_centrality/CompleteWeighted.json

diff --git a/tests/data/baseline/graph_algorithms_baselines/centrality/degree_centrality/CompleteUnweighted.json b/tests/data/baseline/graph_algorithms_baselines/centrality/degree_centrality/Complete.json
similarity index 53%
rename from tests/data/baseline/graph_algorithms_baselines/centrality/degree_centrality/CompleteUnweighted.json
rename to tests/data/baseline/graph_algorithms_baselines/centrality/degree_centrality/Complete.json
index 2228929b..ab4692cd 100644
--- a/tests/data/baseline/graph_algorithms_baselines/centrality/degree_centrality/CompleteUnweighted.json
+++ b/tests/data/baseline/graph_algorithms_baselines/centrality/degree_centrality/Complete.json
@@ -1,37 +1,37 @@
 [
   {
-    "@@top_scores_heap": [
+    "top_scores": [
       {
         "Vertex_ID": "A",
-        "score": 1.2857142857142856
+        "score": 1.1428571428571428
       },
       {
         "Vertex_ID": "B",
-        "score": 1.2857142857142856
+        "score": 1.1428571428571428
       },
       {
         "Vertex_ID": "C",
-        "score": 1.2857142857142856
+        "score": 1.1428571428571428
       },
       {
         "Vertex_ID": "D",
-        "score": 1.2857142857142856
+        "score": 1.1428571428571428
       },
       {
         "Vertex_ID": "E",
-        "score": 1.2857142857142856
+        "score": 1.1428571428571428
       },
       {
         "Vertex_ID": "F",
-        "score": 1.2857142857142856
+        "score": 1.1428571428571428
       },
       {
         "Vertex_ID": "G",
-        "score": 1.2857142857142856
+        "score": 1.1428571428571428
       },
       {
         "Vertex_ID": "H",
-        "score": 1.2857142857142856
+        "score": 1.1428571428571428
       }
     ]
   }
diff --git a/tests/data/baseline/graph_algorithms_baselines/centrality/degree_centrality/CompleteWeighted.json b/tests/data/baseline/graph_algorithms_baselines/centrality/degree_centrality/CompleteWeighted.json
deleted file mode 100644
index 2228929b..00000000
--- a/tests/data/baseline/graph_algorithms_baselines/centrality/degree_centrality/CompleteWeighted.json
+++ /dev/null
@@ -1,38 +0,0 @@
-[
-  {
-    "@@top_scores_heap": [
-      {
-        "Vertex_ID": "A",
-        "score": 1.2857142857142856
-      },
-      {
-        "Vertex_ID": "B",
-        "score": 1.2857142857142856
-      },
-      {
-        "Vertex_ID": "C",
-        "score": 1.2857142857142856
-      },
-      {
-        "Vertex_ID": "D",
-        "score": 1.2857142857142856
-      },
-      {
-        "Vertex_ID": "E",
-        "score": 1.2857142857142856
-      },
-      {
-        "Vertex_ID": "F",
-        "score": 1.2857142857142856
-      },
-      {
-        "Vertex_ID": "G",
-        "score": 1.2857142857142856
-      },
-      {
-        "Vertex_ID": "H",
-        "score": 1.2857142857142856
-      }
-    ]
-  }
-]
\ No newline at end of file
diff --git a/tests/data/create_baseline.py b/tests/data/create_baseline.py
index 6cd7bf1e..43c55cc6 100644
--- a/tests/data/create_baseline.py
+++ b/tests/data/create_baseline.py
@@ -3,26 +3,27 @@
 
 import networkx as nx
 import numpy as np
+from matplotlib import pyplot as plt
 
 baseline_path_root = "baseline/graph_algorithms_baselines"
 
 
 def run_degree_baseline(g: nx.Graph):
-    res = nx.centrality.degree_centrality(g)
-    nx.centrality.degree_centrality
+    # res = nx.centrality.degree_centrality(g)
+    s = 1.0 / (len(g) - 1.0)
+    res = {n: (d-1) * s for n, d in g.degree()} # d-1 because nx will double count the self-edge
 
     out = []
     for k, v in res.items():
         out.append({"Vertex_ID": k, "score": v})
 
-    out = [{"@@top_scores_heap": out}]
+    out = [{"top_scores": out}]
     return out
 
 
-def create_graph(path, edges, weights):
+def create_graph(edges, weights):
     g = nx.Graph()
-    # include edge weights if they exist
-    if weights is not None:
+    if weights:
         g.add_weighted_edges_from(edges)
     else:
         g.add_edges_from(edges)
@@ -30,25 +31,25 @@ def create_graph(path, edges, weights):
 
 
 def create_degree_baseline():
-    # input, output
+    # input, output, weighed
     paths = [
         (
             "unweighted_edges/complete_edges.csv",
-            f"{baseline_path_root}/centrality/degree_centrality/CompleteUnweighted.json",
+            f"{baseline_path_root}/centrality/degree_centrality/Complete.json",
             False,
         ),
-        (
-            "weighted_edges/complete_edges.csv",
-            f"{baseline_path_root}/centrality/degree_centrality/CompleteWeighted.json",
-            True,
-        ),
+        # (
+        #     "weighted_edges/complete_edges.csv",
+        #     f"{baseline_path_root}/centrality/weighted_degree_centrality/CompleteWeighted.json",
+        #     True,
+        # ),
     ]
 
     for p, o_path, w in paths:
         with open(p) as f:
             edges = np.array(list(csv.reader(f)))
 
-        g = create_graph(p, edges, w)
+        g = create_graph(edges, w)
 
         res = run_degree_baseline(g)
         with open(o_path, "w") as f:
diff --git a/tests/run.sh b/tests/run.sh
index df88fcc5..298110a6 100755
--- a/tests/run.sh
+++ b/tests/run.sh
@@ -1,4 +1,6 @@
 clear
-python test/setup.py &&
-	pytest
+# python test/setup.py &&
+pytest test/test_centrality.py::TestCentrality::test_degree_centrality4
+# pytest test/test_centrality.py
+	# pytest
 # pytest --junitxml "output.xml" #-n 4
diff --git a/tests/test/test_centrality.py b/tests/test/test_centrality.py
index d06036e4..a14bfcf9 100644
--- a/tests/test/test_centrality.py
+++ b/tests/test/test_centrality.py
@@ -7,7 +7,13 @@
 class TestCentrality:
     feat = util.get_featurizer()
     # undirected graphs
-    graph_types1 = ["Empty", "Line", "Ring", "Hub_Spoke", "Tree"]
+    graph_types1 = [
+        "Empty",
+        "Line",
+        "Ring",
+        "Hub_Spoke",
+        "Tree",
+    ]
     # directed graphs
     graph_types2 = [
         "Line_Directed",
@@ -21,7 +27,6 @@ class TestCentrality:
         "Ring_Weighted",
         "Hub_Spoke_Weighted",
         "Tree_Weighted",
-        "CompleteWeighted",
     ]
     # weighted directed graphs
     graph_types4 = [
@@ -29,8 +34,9 @@ class TestCentrality:
         "Ring_Directed_Weighted",
         "Hub_Spoke_Directed_Weighted",
         "Tree_Directed_Weighted",
-        "CompleteUnweighted",
     ]
+    # Complete Graphs
+    graph_types5 = ["Complete"]
 
     @pytest.mark.parametrize("test_name", graph_types1)
     def test_degree_centrality1(self, test_name):
@@ -57,7 +63,7 @@ def test_degree_centrality1(self, test_name):
         for b in baseline:
             found = False
             for r in result:
-                if r["Vertex_ID"] == b["Vertex_ID"] and r["score"] == r["score"]:
+                if r["Vertex_ID"] == b["Vertex_ID"] and r["score"] == b["score"]:
                     found = True
             if not found:
                 pytest.fail()
@@ -87,7 +93,7 @@ def test_degree_centrality2(self, test_name):
         for b in baseline:
             found = False
             for r in result:
-                if r["Vertex_ID"] == b["Vertex_ID"] and r["score"] == r["score"]:
+                if r["Vertex_ID"] == b["Vertex_ID"] and r["score"] == b["score"]:
                     found = True
             if not found:
                 pytest.fail()
@@ -117,11 +123,35 @@ def test_degree_centrality3(self, test_name):
         for b in baseline:
             found = False
             for r in result:
-                if r["Vertex_ID"] == b["Vertex_ID"] and r["score"] == r["score"]:
+                if r["Vertex_ID"] == b["Vertex_ID"] and r["score"] == b["score"]:
                     found = True
             if not found:
                 pytest.fail()
 
+    @pytest.mark.parametrize("test_name", graph_types5)
+    def test_degree_centrality4(self, test_name):
+        params = {
+            "v_type_set": ["V8"],
+            "e_type_set": [test_name],
+            "reverse_e_type_set": ["reverse_" + test_name],
+            "in_degree": False,
+            "out_degree": True,
+            "print_results": True,
+        }
+        with open(
+            f"data/baseline/graph_algorithms_baselines/centrality/degree_centrality/{test_name}.json"
+        ) as f:
+            baseline = json.load(f)
+
+        result = self.feat.runAlgorithm("tg_degree_cent", params=params)
+        result = sorted(result[0]["top_scores"], key=lambda x: x["Vertex_ID"])
+        baseline = sorted(baseline[0]["top_scores"], key=lambda x: x["Vertex_ID"])
+
+        for b in baseline:
+            for r in result:
+                if r["Vertex_ID"] == b["Vertex_ID"] and r["score"] != pytest.approx(b["score"]):
+                    pytest.fail(f'{r["score"]} != {b["score"]}')
+
     @pytest.mark.parametrize("test_name", graph_types3)
     def test_weighted_degree_centrality1(self, test_name):
         params = {
@@ -147,7 +177,7 @@ def test_weighted_degree_centrality1(self, test_name):
         for b in baseline:
             found = False
             for r in result:
-                if r["Vertex_ID"] == b["Vertex_ID"] and r["score"] == r["score"]:
+                if r["Vertex_ID"] == b["Vertex_ID"] and r["score"] == b["score"]:
                     found = True
             if not found:
                 pytest.fail()
@@ -177,7 +207,7 @@ def test_weighted_degree_centrality2(self, test_name):
         for b in baseline:
             found = False
             for r in result:
-                if r["Vertex_ID"] == b["Vertex_ID"] and r["score"] == r["score"]:
+                if r["Vertex_ID"] == b["Vertex_ID"] and r["score"] == b["score"]:
                     found = True
             if not found:
                 pytest.fail()
@@ -207,7 +237,7 @@ def test_weighted_degree_centrality3(self, test_name):
         for b in baseline:
             found = False
             for r in result:
-                if r["Vertex_ID"] == b["Vertex_ID"] and r["score"] == r["score"]:
+                if r["Vertex_ID"] == b["Vertex_ID"] and r["score"] == b["score"]:
                     found = True
             if not found:
                 pytest.fail()
@@ -237,7 +267,7 @@ def test_closeness_centrality(self, test_name):
         for b in baseline:
             found = False
             for r in result:
-                if r["Vertex_ID"] == b["Vertex_ID"] and r["score"] == r["score"]:
+                if r["Vertex_ID"] == b["Vertex_ID"] and r["score"] == b["score"]:
                     found = True
             if not found:
                 pytest.fail()
@@ -267,7 +297,7 @@ def test_closeness_centrality2(self, test_name):
         for b in baseline:
             found = False
             for r in result:
-                if r["Vertex_ID"] == b["Vertex_ID"] and r["score"] == r["score"]:
+                if r["Vertex_ID"] == b["Vertex_ID"] and r["score"] == b["score"]:
                     found = True
             if not found:
                 pytest.fail()
@@ -297,7 +327,7 @@ def test_harmonic_centrality(self, test_name):
         for b in baseline:
             found = False
             for r in result:
-                if r["Vertex_ID"] == b["Vertex_ID"] and r["score"] == r["score"]:
+                if r["Vertex_ID"] == b["Vertex_ID"] and r["score"] == b["score"]:
                     found = True
             if not found:
                 pytest.fail()
@@ -327,7 +357,7 @@ def test_harmonic_centrality2(self, test_name):
         for b in baseline:
             found = False
             for r in result:
-                if r["Vertex_ID"] == b["Vertex_ID"] and r["score"] == r["score"]:
+                if r["Vertex_ID"] == b["Vertex_ID"] and r["score"] == b["score"]:
                     found = True
             if not found:
                 pytest.fail()
@@ -358,7 +388,7 @@ def test_article_rank(self, test_name):
         for b in baseline:
             found = False
             for r in result:
-                if r["Vertex_ID"] == b["Vertex_ID"] and r["score"] == r["score"]:
+                if r["Vertex_ID"] == b["Vertex_ID"] and r["score"] == b["score"]:
                     found = True
             if not found:
                 pytest.fail()
@@ -390,7 +420,7 @@ def test_pagerank(self, test_name):
         for b in baseline:
             found = False
             for r in result:
-                if r["Vertex_ID"] == b["Vertex_ID"] and r["score"] == r["score"]:
+                if r["Vertex_ID"] == b["Vertex_ID"] and r["score"] == b["score"]:
                     found = True
             if not found:
                 pytest.fail()