-
Notifications
You must be signed in to change notification settings - Fork 1
/
cypher_queries.txt
226 lines (173 loc) · 7.77 KB
/
cypher_queries.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
Query 1 - Top 3 cited papers
MATCH ()-[r:CITED]->(p:Paper)-[:IN_COLLECTION]->(pro:Proceeding)
WITH pro, p, count(r) as cite_num
WITH pro, p, cite_num ORDER BY cite_num DESC
RETURN pro, collect(p)[0..3], collect(cite_num)[0..3]
---
Query 2 - Community
MATCH (a:Author)-[:CONTRIBUTED]->(p:Paper)-[in_col:IN_COLLECTION]->(pro:Proceeding),
(p)-[pub_in:PUBLISHED_IN]->(y:Year)
WITH a, pro, count(pub_in) as tot_pubs
WHERE tot_pubs > 3
RETURN a, pro, tot_pubs
---
// To calculate impact factor, we will count the number of citations in a paper that was published in year0. Year0 will be pipelined down to the next match to count the number of publications in each journal in Year0-1, and then again in Year0-2. These values are used to finally calculate impact factor. Any journal and year combinations where a year0, year1, and year2 combaination do not exist are automatically disqualified from being calculated as a match will not exist. So the RETURN will only provide values where there were publications and citations in all years.
Query 3 - Impact Factor
MATCH ()-[cite:CITED]->(p0:Paper)-[:IN_COLLECTION]->(jnl:Journal)-[:IN_YEAR]-(y0:Year)
MATCH (p0)-[:PUBLISHED_IN]->(y0)
WITH jnl, y0, count(cite) as cite_num
MATCH (p1:Paper)-[:IN_COLLECTION]->(jnl)
MATCH (p1)-[pub1:PUBLISHED_IN]->(y1:Year)
WHERE y1.year = y0.year - 1
WITH jnl, y0, cite_num, y1, count(pub1) AS pub_num1
MATCH (p2:Paper)-[:IN_COLLECTION]->(jnl)
MATCH (p2)-[pub2:PUBLISHED_IN]->(y2)
WHERE y2.year = y0.year - 2
WITH jnl, y0, y1, y2, cite_num, pub_num1, count(pub2) AS pub_num2
RETURN jnl AS Journal, y0 AS ImpactFactorYear, cite_num/(pub_num1+pub_num2) AS ImpactFactor
----
Query 4 - H-index
MATCH ()-[c:CITED]->(p:Paper),
(a:Author)-[:CONTRIBUTED]->(p)
WITH a, p, count(c) AS total_cited
WITH a, count(p) AS papers, collect(total_cited) AS list_cited
WITH a, [x IN range(1,size(list_cited)) where x<=list_cited[x-1]| [list_cited[x-1],x] ] as h_index_list
WITH a, h_index_list[-1][1] as h_index
RETURN a, h_index ORDER BY h_index DESC
- Query 4-v2 H-index with citations sorted
MATCH ()-[c:CITED]->(p:Paper),
(a:Author)-[:CONTRIBUTED]->(p)
WITH a, p, count(c) AS total_cited
WITH a, p, total_cited ORDER BY total_cited DESC
WITH a, count(p) AS papers, collect(total_cited) AS list_cited
WITH a,papers, list_cited, [x IN range(1,size(list_cited)) where x<=list_cited[x-1] | [list_cited[x-1],x] ] as h_index_list
RETURN a, papers, list_cited, h_index_list, h_index_list[-1][1] as h_index
ref: https://iprapas.github.io/posts/neo4j-recommender/
--- Queries for C Graph Algorithms (Louvain)
-- You will need to install plugins
-- Louvain ref: https://neo4j.com/docs/graph-data-science/1.8/algorithms/louvain/
- Create in-memory graph and store it in the graph catalog
CALL gds.graph.create('yoGraph',
'Author',
{
CONTRIBUTED: {
orientation: 'UNDIRECTED'
}
}
)
- Stream Results
CALL gds.louvain.stream('yoGraph')
YIELD nodeId, communityId, intermediateCommunityIds
RETURN gds.util.asNode(nodeId).name AS name, communityId, intermediateCommunityIds
ORDER BY name ASC
- Write results in the Node as property community
CALL gds.louvain.write('yoGraph',
{
writeProperty: 'community',
})
YIELD communityCount, modularity, modularities
-- Query for C Graph Algorithm (ArticleRank)
CALL gds.graph.create(
'PaperRank',
'Paper',
'CITED'
)
------------ D Recommender ------------
--- --- Part 1 - Define Research Community
MATCH (k:Keyword )
WHERE k.keyword IN ['design', 'product design', 'computer aided design', 'surveys', 'humans', 'friction stir welding',
'machine design']
SET k.community = 1
RETURN k
--- --- Part 2 - Find Journals part of the community
MATCH (k:Keyword {community: 1})
MATCH (j:Journal )<-[in_col:IN_COLLECTION]-()
WITH j, k, count(in_col) AS tot_papers
MATCH (k)-[]-(p:Paper)-[]-(j)
WITH j AS journal, count(DISTINCT p) / toFloat(tot_papers) AS papers_in_keyword, tot_papers
WHERE papers_in_keyword >= 0.9
SET journal.community = 1
RETURN journal, papers_in_keyword, tot_papers ORDER BY tot_papers DESC
--- --- Part 3 - Find top papers with highest page rank
- Page rank info:
The PageRank algorithm measures the importance of each node within the graph, based on the number incoming relationships
and the importance of the corresponding source nodes. We're running it on monopartite graph In our case the importance
of a paper will be based on how many times it is cited by the same conference paper.
// Set papers' community to the community of the Journals (1 in this case)
MATCH (p:Paper)-->(j:Journal {community: 1})
SET p.community = j.community
RETURN p, j
// Create Graph containing only community papers cited by same community papers
CALL gds.graph.create.cypher(
'Paper',
'MATCH (p:Paper)-->(j:Journal {community: 1}) RETURN id(p) AS id',
'MATCH (p:Paper {community: 1})<-[c:CITED]-(p2:Paper {community: 1})
RETURN id(p) AS source, id(p2) AS target'
)
YIELD
graphName AS graph, nodeQuery, nodeCount AS nodes, relationshipQuery, relationshipCount AS rels
// Check results -- temp
CALL gds.pageRank.stream('Paper')
YIELD nodeId, score
RETURN gds.util.asNode(nodeId).name AS name, score
ORDER BY score DESC, name ASC
// Do page rank on the filtered papers
CALL gds.pageRank.write('Paper', {
maxIterations: 20,
dampingFactor: 0.85,
writeProperty: 'pagerank'
})
YIELD nodePropertiesWritten, ranIterations
// View top-5 papers
MATCH (p:Paper {community: 1})
RETURN p, p.pagerank ORDER BY p.pagerank DESC LIMIT 5
--- --- Part 4 - authors
//Authors of top-5 papers
MATCH (a:Author)-[]->(p:Paper {community: 1})
WITH collect(a) AS authors, p, p.pagerank AS pagerank ORDER BY pagerank DESC LIMIT 5
UNWIND authors AS one_author
SET one_author.can_review = 'community1'
RETURN one_author, p, pagerank
//Gurus of community-1 "design" who have 2 papers in top-5
MATCH (a:Author)-[]->(p:Paper {community: 1})
WITH collect(a) AS authors, p, p.pagerank AS pagerank ORDER BY pagerank DESC LIMIT 5
UNWIND authors AS one_author
WITH one_author, count(p) AS papers_of_auth
WHERE papers_of_auth > 1
SET one_author.guru = True
RETURN one_author, papers_of_auth
--- Good Helper Queries
CALL db.schema.visualization
This query allows you to see schema of the graph.
------ Testing
- Shows intermediate state for: Find Journals part of the community
MATCH (k:Keyword )
WHERE k.keyword IN ['design', 'product design', 'computer aided design', 'surveys', 'humans', 'friction stir welding',
'machine design']
MATCH (j:Journal {title: "Journal of Mechanical Design, Transactions of the ASME"})<-[in_col:IN_COLLECTION]-()
WITH j, k, in_col
MATCH (k)-[]-(p:Paper)-[]-(jj:Journal {title: "Journal of Mechanical Design, Transactions of the ASME"})
RETURN jj, p, k, j, count(in_col) AS tot_papers
- Show papers that are cited by papers from community
MATCH (j:Journal {community: 1})<--(p:Paper)<-[c:CITED]-(p2:Paper)-->(j2:Journal {community: 1})
RETURN p, p2
MATCH (k:Keyword )
WHERE k.keyword IN ['design', 'product design', 'computer aided design', 'surveys', 'humans', 'friction stir welding',
'machine design']
MATCH (j:Journal )<-[in_col:IN_COLLECTION]-()
WITH j, k, count(in_col) AS tot_papers
MATCH (k)-[]-(p:Paper)-[]-(j)
WITH j, count(DISTINCT p) / toFloat(tot_papers) AS papers_in_keyword, tot_papers
WHERE papers_in_keyword >= 0.4
RETURN j, papers_in_keyword, tot_papers ORDER BY tot_papers DESC
Data for testing CASE
MERGE (a:Person {name: "Alice", age: 38, eyes: "brown"})
MERGE (b:Person {name: "bob", age: 25, eyes: "blue"})
MERGE (c:Person {name: "Charlie", age: 53, eyes: "Green"})
MERGE (d:Person {name: "Daniel", age: 20, eyes: "brown"})
MERGE (e:Person {name: "Eskil", age: 41, eyes: "blue", array: ['one', 'two', 'three'] })
MERGE (a)-[:Knows]->(b)
MERGE (a)-[:Knows]->(c)
MERGE (c)-[:Knows]->(d)
MERGE (b)-[:Knows]->(d)
MERGE (b)-[:Knows]->(e)