Merge pull request #138 from tigergraph/3.8-migrate

3.8 Publish
tigergraph · Nov 15, 2022 · 8383057 · 8383057
2 parents 3d5d022 + 7a0a424
commit 8383057
Show file tree

Hide file tree

Showing 123 changed files with 7,490 additions and 1,838 deletions.
diff --git a/.DS_Store b/.DS_Store
diff --git a/.github/auto_request_review.yml b/.github/auto_request_review.yml
@@ -9,17 +9,17 @@ reviewers:
   # Reviewer groups each of which has a list of GitHub usernames
   groups:
     group1:
-      - YimingPan-Code
-      - Boyu1997
-    group2:
+      - yimingpantg
+      - TannerW
       - wyatt-joyner-tg
-      - tommytgraph
-      - harshadindigal
+      - parkererickson
+      - lennessyy
+      - a-m-thomas
     secondary:
-      - YimingPan-Code
+      - yimingpantg
       - wyatt-joyner-tg
     tertiary:
-      - xchang2014
+      - xinyuchtg
       - victor-gsl
 
 files:

diff --git a/GDBMS_ALGO/centrality/article_rank.gsql b/GDBMS_ALGO/centrality/article_rank.gsql
@@ -0,0 +1,113 @@
+CREATE TEMPLATE QUERY GDBMS_ALGO.centrality.article_rank (STRING v_type, STRING e_type,
+    FLOAT max_change = 0.001, INT maximum_iteration = 25, FLOAT damping = 0.85, INT top_k = 100,
+    BOOL print_results = TRUE, STRING result_attribute =    "", STRING file_path = "") SYNTAX V1 {
+
+    /*
+    First Author: karimsaraipour
+    First Commit Date: Oct 4, 2021
+
+    Recent Author: abdullaTigerGraph
+    Recent Commit Date: Apr 4, 2022
+
+
+    Repository:
+        https://github.com/tigergraph/gsql-graph-algorithms/tree/master/algorithms/Centrality
+
+    Maturity:
+        Production
+
+    Description: 
+        Compute the article rank score for each vertex in the GRAPH
+        In each iteration, compute a score for each vertex:
+            score = (1-damping) + damping*average outdegree*sum(received scores FROM its neighbors/average outdegree+Outdegree).
+        The article Rank algorithm stops when either of the following is true:
+        a) it reaches maximum_iteration iterations;
+        b) the max score change for any vertex compared to the last iteration <= max_change.
+
+        This query supports only taking in a single edge for the time being (8/13/2020).
+
+    Publications:
+        https://www.emerald.com/insight/content/doi/10.1108/00012530911005544/full/html
+
+    TigerGraph Documentation:
+        https://docs.tigergraph.com/graph-ml/current/centrality-algorithms/article-rank
+
+    Parameters:
+        v_type:
+            vertex types to traverse
+        print_results:
+            If True, print JSON output
+        e_type:
+            edge types to traverse
+        result_attribute:
+            INT attribute to store results to
+        maximum_iteration:
+            max #iterations
+        file_path:
+            file to write CSV output to
+        top_k:
+            #top scores to output
+        display_edges:
+            output edges for visualization
+        max_change:
+            max allowed change between iterations to achieve convergence
+        damping:
+            importance of traversal vs. random teleport
+    */
+
+
+    TYPEDEF TUPLE<VERTEX Vertex_ID, FLOAT score> Vertex_Score;
+    HeapAccum<Vertex_Score>(top_k, score DESC) @@top_scores_heap;
+    MaxAccum<FLOAT> @@max_diff = 9999;  # max score change in an iteration
+    SumAccum<FLOAT> @sum_recvd_score = 0; # sum of scores each vertex receives FROM neighbors
+    SumAccum<FLOAT> @sum_score = 0.15;  # initial score for every vertex is 0.15.
+    SetAccum<EDGE> @@edge_Set;  # list of all edges, if display is needed
+    AvgAccum @@avg_out;
+    SumAccum<INT> @sum_out_degree;
+    FILE f (file_path);
+
+    # PageRank iterations    
+    Start = {v_type};   # Start with all vertices of specified type(s)
+    Start = SELECT s 
+        FROM Start:s 
+        ACCUM 
+        s.@sum_out_degree += s.outdegree(e_type),
+        @@avg_out += s.outdegree(e_type);
+
+    WHILE @@max_diff > max_change 
+    LIMIT maximum_iteration DO @@max_diff = 0;
+
+        V = SELECT s 
+            FROM Start:s -(e_type:e)- v_type:t
+            ACCUM t.@sum_recvd_score += s.@sum_score/(@@avg_out+s.@sum_out_degree) 
+            POST-ACCUM 
+            s.@sum_score = (1.0-damping) + damping * s.@sum_recvd_score*@@avg_out,
+            s.@sum_recvd_score = 0,
+            @@max_diff += abs(s.@sum_score - s.@sum_score');
+
+    END; # END WHILE loop
+
+    # Output
+    IF file_path != "" THEN
+        f.println("Vertex_ID", "article Rank");
+    END;
+
+    V = SELECT s 
+        FROM Start:s
+        POST-ACCUM 
+            IF result_attribute != "" THEN 
+                s.setAttr(result_attribute, s.@sum_score) 
+            END,
+
+            IF file_path != "" THEN 
+                f.println(s, s.@sum_score) 
+            END,
+
+            IF print_results THEN 
+                @@top_scores_heap += Vertex_Score(s, s.@sum_score) 
+            END;
+
+    IF print_results THEN
+        PRINT @@top_scores_heap;
+    END;
+}
diff --git a/GDBMS_ALGO/centrality/betweenness_cent.gsql b/GDBMS_ALGO/centrality/betweenness_cent.gsql
@@ -0,0 +1,200 @@
+CREATE TEMPLATE QUERY GDBMS_ALGO.centrality.betweenness_cent(SET<STRING> v_type_set, SET<STRING> e_type_set, STRING reverse_e_type,INT max_hops = 10,
+    INT top_k = 100, BOOL print_results = True, STRING result_attribute = "",
+    STRING file_path = "", BOOL display_edges = FALSE) SYNTAX V1 { 
+
+    /*
+    First Author: karimsaraipour
+    First Commit Date: Sep 2, 2021
+
+    Recent Author: Boyu Jiang
+    Recent Commit Date: Mar 14, 2022
+
+
+    Repository:
+        https://github.com/tigergraph/gsql-graph-algorithms/tree/master/algorithms/Centrality
+
+    Maturity:
+        Production
+
+    Description: 
+        Compute Betweenness Centrality for each VERTEX. 
+        Use multi-source BFS.
+
+    Publications:
+        http://www.vldb.org/pvldb/vol8/p449-then.pdf
+
+    TigerGraph Documentation:
+        https://docs.tigergraph.com/graph-ml/current/centrality-algorithms/betweenness-centrality
+
+    Parameters:
+        v_type_set:
+            vertex types to traverse
+        print_results:
+            If True, print JSON output
+        e_type_set:
+            edge types to traverse
+        result_attribute:
+            INT attribute to store results to
+        reverse_e_type:
+            reverse edge type in directed graph, in undirected graph set reverse_e_type=e_type_set
+        max_hops:
+            look only this far from each vertex
+        file_path:
+            file to write CSV output to
+        top_k:
+            report only this many top scores
+        display_edges:
+            If True, output edges for visualization
+    */
+
+    TYPEDEF TUPLE<VERTEX Vertex_ID, FLOAT score> Vertex_Score; #tuple to store betweenness centrality score
+    HeapAccum<Vertex_Score>(top_k, score DESC) @@top_scores_heap; #heap to store top K score
+    SumAccum<INT> @@sum_curr_dist; #current distance
+    BitwiseOrAccum @bitwise_or_visit_next; #use bitwise instead of setAccum
+    BitwiseOrAccum @bitwise_or_seen;
+    BitwiseOrAccum @bitwise_or_visit; 
+    SumAccum<INT> @@sum_count = 1;#used to set unique ID
+    SumAccum<INT> @sum_id; #store the unique ID
+    SetAccum<INT> @@batch_set; #used to set unique ID
+    MapAccum<INT,INT> @@map; #used to set unique ID 
+    SetAccum<EDGE> @@edge_set;
+    SumAccum<FLOAT> @sum_delta = 0;
+    MapAccum<INT,BitwiseOrAccum> @times_map;
+    MapAccum<INT,SumAccum<INT>> @sigma_map;
+
+    INT empty=0;
+    FILE f (file_path);
+    INT num_vert;
+    INT batch_number;
+
+    # Compute betweenness	
+    all = {v_type_set};
+    num_vert = all.size();
+    batch_number = num_vert/60;
+
+    IF batch_number == 0 THEN 
+        batch_number = 1;
+    END;
+
+    #Calculate the sum of distance to other vertex for each vertex
+    FOREACH i IN RANGE[0, batch_number-1] DO
+        Current = SELECT s 
+            FROM all:s
+            WHERE getvid(s)%batch_number == i
+            POST-ACCUM 
+                @@map+=(getvid(s)->0),
+                @@batch_set+=getvid(s);
+
+        FOREACH ver in @@batch_set DO 
+            @@map += (ver->@@sum_count); @@sum_count += 1;
+        END; #set a unique ID for each vertex, ID from 1-63
+
+        Start = SELECT s 
+            FROM Current:s 
+            POST-ACCUM 
+                s.@sum_id=@@map.get(getvid(s));
+
+        Start = SELECT s 
+            FROM Current:s
+            POST-ACCUM 
+                s.@bitwise_or_seen = 1<<s.@sum_id,
+                s.@bitwise_or_visit = s.@bitwise_or_seen,
+                s.@sigma_map += (0->1),
+                s.@times_map += (0->s.@bitwise_or_visit); # set initial seen and visit
+
+        @@batch_set.clear();
+        @@map.clear();
+        @@sum_count=0;
+
+        WHILE (Start.size() > 0) LIMIT max_hops DO
+            @@sum_curr_dist+=1;
+
+            Start = SELECT t 
+                FROM Start:s -(reverse_e_type:e)-v_type_set:t
+                WHERE s.@bitwise_or_visit&-t.@bitwise_or_seen-1>0 AND s!=t #use -t.@seen-1 to get the trverse of t.@seen
+                ACCUM                               #updatevisitNext
+                    INT c = s.@bitwise_or_visit&-t.@bitwise_or_seen-1,
+                    IF c>0 THEN 
+                        t.@bitwise_or_visit_next+=c,
+                        t.@bitwise_or_seen+=c
+                    END,
+                    t.@sigma_map+=(@@sum_curr_dist->s.@sigma_map.get(@@sum_curr_dist-1)) #set sigma based on depth
+                POST-ACCUM 
+                    t.@bitwise_or_visit=t.@bitwise_or_visit_next,
+                    t.@times_map+=(@@sum_curr_dist->t.@bitwise_or_visit),
+                    t.@bitwise_or_visit_next=0;
+        END;
+
+        @@sum_curr_dist+=-1;
+
+        Start = SELECT s 
+            FROM all:s 
+            WHERE s.@sigma_map.get(@@sum_curr_dist)!=0;
+
+        WHILE (Start.size()>0) LIMIT max_hops DO
+            @@sum_curr_dist+=-1;
+            Start = SELECT t 
+                FROM Start:s -(reverse_e_type:e)- v_type_set:t
+                WHERE t.@times_map.get(@@sum_curr_dist)&s.@times_map.get(@@sum_curr_dist+1)!=0  
+                ACCUM 
+                    FLOAT currValue=t.@sigma_map.get(@@sum_curr_dist)/(s.@sigma_map.get(@@sum_curr_dist+1)*(1+s.@sum_delta)),
+                    INT r=t.@times_map.get(@@sum_curr_dist)&s.@times_map.get(@@sum_curr_dist+1),
+                    INT plus=0,
+                    WHILE r>0 DO 
+                        r=r&(r-1),plus=plus+1 #count how many 1 in the number, same as setAccum,size()
+                    END,
+                    FLOAT value = currValue*plus/2.0,
+                    t.@sum_delta+=value;
+
+            Start = SELECT s 
+                FROM all:s 
+                WHERE s.@sigma_map.get(@@sum_curr_dist)!=0;	
+        END;
+
+        @@sum_curr_dist=0;
+        Start = SELECT s 
+            FROM all:s 
+            POST-ACCUM 
+                s.@bitwise_or_seen=0,
+                s.@bitwise_or_visit=0,
+                s.@sigma_map.clear(),
+                s.@times_map.clear();
+    END;
+
+    #Output
+    IF file_path != "" THEN
+        f.println("Vertex_ID", "Betweenness");
+    END;
+
+    Start = SELECT s 
+        FROM all:s
+        POST-ACCUM 
+            IF result_attribute != "" THEN 
+                s.setAttr(result_attribute, s.@sum_delta) 
+            END,
+
+            IF print_results THEN 
+                @@top_scores_heap += Vertex_Score(s, s.@sum_delta) 
+            END,
+
+            IF file_path != "" THEN 
+                f.println(s, s.@sum_delta) 
+            END;
+
+    IF print_results THEN
+        PRINT @@top_scores_heap AS top_scores;
+
+        IF display_edges THEN
+            PRINT Start[Start.@sum_delta];
+
+            Start = SELECT s
+                FROM Start:s -(e_type_set:e)-:t
+                ACCUM 
+                    @@edge_set += e;
+
+            PRINT @@edge_set;
+
+        END;
+    END;
+
+}