Skip to content

Commit

Permalink
Merge pull request #138 from tigergraph/3.8-migrate
Browse files Browse the repository at this point in the history
3.8 Publish
  • Loading branch information
TannerW authored Nov 15, 2022
2 parents 3d5d022 + 7a0a424 commit 8383057
Show file tree
Hide file tree
Showing 123 changed files with 7,490 additions and 1,838 deletions.
Binary file added .DS_Store
Binary file not shown.
14 changes: 7 additions & 7 deletions .github/auto_request_review.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,17 @@ reviewers:
# Reviewer groups each of which has a list of GitHub usernames
groups:
group1:
- YimingPan-Code
- Boyu1997
group2:
- yimingpantg
- TannerW
- wyatt-joyner-tg
- tommytgraph
- harshadindigal
- parkererickson
- lennessyy
- a-m-thomas
secondary:
- YimingPan-Code
- yimingpantg
- wyatt-joyner-tg
tertiary:
- xchang2014
- xinyuchtg
- victor-gsl

files:
Expand Down
113 changes: 113 additions & 0 deletions GDBMS_ALGO/centrality/article_rank.gsql
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
CREATE TEMPLATE QUERY GDBMS_ALGO.centrality.article_rank (STRING v_type, STRING e_type,
FLOAT max_change = 0.001, INT maximum_iteration = 25, FLOAT damping = 0.85, INT top_k = 100,
BOOL print_results = TRUE, STRING result_attribute = "", STRING file_path = "") SYNTAX V1 {

/*
First Author: karimsaraipour
First Commit Date: Oct 4, 2021

Recent Author: abdullaTigerGraph
Recent Commit Date: Apr 4, 2022


Repository:
https://github.com/tigergraph/gsql-graph-algorithms/tree/master/algorithms/Centrality

Maturity:
Production

Description:
Compute the article rank score for each vertex in the GRAPH
In each iteration, compute a score for each vertex:
score = (1-damping) + damping*average outdegree*sum(received scores FROM its neighbors/average outdegree+Outdegree).
The article Rank algorithm stops when either of the following is true:
a) it reaches maximum_iteration iterations;
b) the max score change for any vertex compared to the last iteration <= max_change.

This query supports only taking in a single edge for the time being (8/13/2020).

Publications:
https://www.emerald.com/insight/content/doi/10.1108/00012530911005544/full/html

TigerGraph Documentation:
https://docs.tigergraph.com/graph-ml/current/centrality-algorithms/article-rank

Parameters:
v_type:
vertex types to traverse
print_results:
If True, print JSON output
e_type:
edge types to traverse
result_attribute:
INT attribute to store results to
maximum_iteration:
max #iterations
file_path:
file to write CSV output to
top_k:
#top scores to output
display_edges:
output edges for visualization
max_change:
max allowed change between iterations to achieve convergence
damping:
importance of traversal vs. random teleport
*/


TYPEDEF TUPLE<VERTEX Vertex_ID, FLOAT score> Vertex_Score;
HeapAccum<Vertex_Score>(top_k, score DESC) @@top_scores_heap;
MaxAccum<FLOAT> @@max_diff = 9999; # max score change in an iteration
SumAccum<FLOAT> @sum_recvd_score = 0; # sum of scores each vertex receives FROM neighbors
SumAccum<FLOAT> @sum_score = 0.15; # initial score for every vertex is 0.15.
SetAccum<EDGE> @@edge_Set; # list of all edges, if display is needed
AvgAccum @@avg_out;
SumAccum<INT> @sum_out_degree;
FILE f (file_path);

# PageRank iterations
Start = {v_type}; # Start with all vertices of specified type(s)
Start = SELECT s
FROM Start:s
ACCUM
s.@sum_out_degree += s.outdegree(e_type),
@@avg_out += s.outdegree(e_type);

WHILE @@max_diff > max_change
LIMIT maximum_iteration DO @@max_diff = 0;

V = SELECT s
FROM Start:s -(e_type:e)- v_type:t
ACCUM t.@sum_recvd_score += s.@sum_score/(@@avg_out+s.@sum_out_degree)
POST-ACCUM
s.@sum_score = (1.0-damping) + damping * s.@sum_recvd_score*@@avg_out,
s.@sum_recvd_score = 0,
@@max_diff += abs(s.@sum_score - s.@sum_score');

END; # END WHILE loop

# Output
IF file_path != "" THEN
f.println("Vertex_ID", "article Rank");
END;

V = SELECT s
FROM Start:s
POST-ACCUM
IF result_attribute != "" THEN
s.setAttr(result_attribute, s.@sum_score)
END,

IF file_path != "" THEN
f.println(s, s.@sum_score)
END,

IF print_results THEN
@@top_scores_heap += Vertex_Score(s, s.@sum_score)
END;

IF print_results THEN
PRINT @@top_scores_heap;
END;
}
200 changes: 200 additions & 0 deletions GDBMS_ALGO/centrality/betweenness_cent.gsql
Original file line number Diff line number Diff line change
@@ -0,0 +1,200 @@
CREATE TEMPLATE QUERY GDBMS_ALGO.centrality.betweenness_cent(SET<STRING> v_type_set, SET<STRING> e_type_set, STRING reverse_e_type,INT max_hops = 10,
INT top_k = 100, BOOL print_results = True, STRING result_attribute = "",
STRING file_path = "", BOOL display_edges = FALSE) SYNTAX V1 {

/*
First Author: karimsaraipour
First Commit Date: Sep 2, 2021

Recent Author: Boyu Jiang
Recent Commit Date: Mar 14, 2022


Repository:
https://github.com/tigergraph/gsql-graph-algorithms/tree/master/algorithms/Centrality

Maturity:
Production

Description:
Compute Betweenness Centrality for each VERTEX.
Use multi-source BFS.

Publications:
http://www.vldb.org/pvldb/vol8/p449-then.pdf

TigerGraph Documentation:
https://docs.tigergraph.com/graph-ml/current/centrality-algorithms/betweenness-centrality

Parameters:
v_type_set:
vertex types to traverse
print_results:
If True, print JSON output
e_type_set:
edge types to traverse
result_attribute:
INT attribute to store results to
reverse_e_type:
reverse edge type in directed graph, in undirected graph set reverse_e_type=e_type_set
max_hops:
look only this far from each vertex
file_path:
file to write CSV output to
top_k:
report only this many top scores
display_edges:
If True, output edges for visualization
*/

TYPEDEF TUPLE<VERTEX Vertex_ID, FLOAT score> Vertex_Score; #tuple to store betweenness centrality score
HeapAccum<Vertex_Score>(top_k, score DESC) @@top_scores_heap; #heap to store top K score
SumAccum<INT> @@sum_curr_dist; #current distance
BitwiseOrAccum @bitwise_or_visit_next; #use bitwise instead of setAccum
BitwiseOrAccum @bitwise_or_seen;
BitwiseOrAccum @bitwise_or_visit;
SumAccum<INT> @@sum_count = 1;#used to set unique ID
SumAccum<INT> @sum_id; #store the unique ID
SetAccum<INT> @@batch_set; #used to set unique ID
MapAccum<INT,INT> @@map; #used to set unique ID
SetAccum<EDGE> @@edge_set;
SumAccum<FLOAT> @sum_delta = 0;
MapAccum<INT,BitwiseOrAccum> @times_map;
MapAccum<INT,SumAccum<INT>> @sigma_map;

INT empty=0;
FILE f (file_path);
INT num_vert;
INT batch_number;

# Compute betweenness
all = {v_type_set};
num_vert = all.size();
batch_number = num_vert/60;

IF batch_number == 0 THEN
batch_number = 1;
END;

#Calculate the sum of distance to other vertex for each vertex
FOREACH i IN RANGE[0, batch_number-1] DO
Current = SELECT s
FROM all:s
WHERE getvid(s)%batch_number == i
POST-ACCUM
@@map+=(getvid(s)->0),
@@batch_set+=getvid(s);

FOREACH ver in @@batch_set DO
@@map += (ver->@@sum_count); @@sum_count += 1;
END; #set a unique ID for each vertex, ID from 1-63

Start = SELECT s
FROM Current:s
POST-ACCUM
s.@sum_id=@@map.get(getvid(s));

Start = SELECT s
FROM Current:s
POST-ACCUM
s.@bitwise_or_seen = 1<<s.@sum_id,
s.@bitwise_or_visit = s.@bitwise_or_seen,
s.@sigma_map += (0->1),
s.@times_map += (0->s.@bitwise_or_visit); # set initial seen and visit

@@batch_set.clear();
@@map.clear();
@@sum_count=0;

WHILE (Start.size() > 0) LIMIT max_hops DO
@@sum_curr_dist+=1;

Start = SELECT t
FROM Start:s -(reverse_e_type:e)-v_type_set:t
WHERE s.@bitwise_or_visit&-t.@bitwise_or_seen-1>0 AND s!=t #use -t.@seen-1 to get the trverse of t.@seen
ACCUM #updatevisitNext
INT c = s.@bitwise_or_visit&-t.@bitwise_or_seen-1,
IF c>0 THEN
t.@bitwise_or_visit_next+=c,
t.@bitwise_or_seen+=c
END,
t.@sigma_map+=(@@sum_curr_dist->s.@sigma_map.get(@@sum_curr_dist-1)) #set sigma based on depth
POST-ACCUM
t.@bitwise_or_visit=t.@bitwise_or_visit_next,
t.@times_map+=(@@sum_curr_dist->t.@bitwise_or_visit),
t.@bitwise_or_visit_next=0;
END;

@@sum_curr_dist+=-1;

Start = SELECT s
FROM all:s
WHERE s.@sigma_map.get(@@sum_curr_dist)!=0;

WHILE (Start.size()>0) LIMIT max_hops DO
@@sum_curr_dist+=-1;
Start = SELECT t
FROM Start:s -(reverse_e_type:e)- v_type_set:t
WHERE t.@times_map.get(@@sum_curr_dist)&s.@times_map.get(@@sum_curr_dist+1)!=0
ACCUM
FLOAT currValue=t.@sigma_map.get(@@sum_curr_dist)/(s.@sigma_map.get(@@sum_curr_dist+1)*(1+s.@sum_delta)),
INT r=t.@times_map.get(@@sum_curr_dist)&s.@times_map.get(@@sum_curr_dist+1),
INT plus=0,
WHILE r>0 DO
r=r&(r-1),plus=plus+1 #count how many 1 in the number, same as setAccum,size()
END,
FLOAT value = currValue*plus/2.0,
t.@sum_delta+=value;

Start = SELECT s
FROM all:s
WHERE s.@sigma_map.get(@@sum_curr_dist)!=0;
END;

@@sum_curr_dist=0;
Start = SELECT s
FROM all:s
POST-ACCUM
s.@bitwise_or_seen=0,
s.@bitwise_or_visit=0,
s.@sigma_map.clear(),
s.@times_map.clear();
END;

#Output
IF file_path != "" THEN
f.println("Vertex_ID", "Betweenness");
END;

Start = SELECT s
FROM all:s
POST-ACCUM
IF result_attribute != "" THEN
s.setAttr(result_attribute, s.@sum_delta)
END,

IF print_results THEN
@@top_scores_heap += Vertex_Score(s, s.@sum_delta)
END,

IF file_path != "" THEN
f.println(s, s.@sum_delta)
END;

IF print_results THEN
PRINT @@top_scores_heap AS top_scores;

IF display_edges THEN
PRINT Start[Start.@sum_delta];

Start = SELECT s
FROM Start:s -(e_type_set:e)-:t
ACCUM
@@edge_set += e;

PRINT @@edge_set;

END;
END;

}
Loading

0 comments on commit 8383057

Please sign in to comment.