src/cypher_queries.txt

Query 1 - Top 3 cited papers

MATCH ()-[r:CITED]->(p:Paper)-[:IN_COLLECTION]->(pro:Proceeding)
WITH pro, p, count(r) as cite_num
WITH pro, p, cite_num ORDER BY cite_num DESC
RETURN pro, collect(p)[0..3], collect(cite_num)[0..3]
---

Query 2 - Community

MATCH (a:Author)-[:CONTRIBUTED]->(p:Paper)-[in_col:IN_COLLECTION]->(pro:Proceeding),
              (p)-[pub_in:PUBLISHED_IN]->(y:Year)
WITH  a, pro, count(pub_in) as tot_pubs
WHERE tot_pubs > 3
RETURN  a, pro, tot_pubs

---
// To calculate impact factor, we will count the number of citations in a paper that was published in year0. Year0 will be pipelined down to the next match to count the number of publications in each journal in Year0-1, and then again in Year0-2. These values are used to finally calculate impact factor. Any journal and year combinations where a year0, year1, and year2 combaination do not exist are automatically disqualified from being calculated as a match will not exist. So the RETURN will only provide values where there were publications and citations in all years.

Query 3 - Impact Factor

MATCH ()-[cite:CITED]->(p0:Paper)-[:IN_COLLECTION]->(jnl:Journal)-[:IN_YEAR]-(y0:Year)
MATCH (p0)-[:PUBLISHED_IN]->(y0)
WITH jnl, y0, count(cite) as cite_num

MATCH (p1:Paper)-[:IN_COLLECTION]->(jnl)
MATCH (p1)-[pub1:PUBLISHED_IN]->(y1:Year)
    WHERE y1.year = y0.year - 1
WITH jnl, y0, cite_num, y1, count(pub1) AS pub_num1

MATCH (p2:Paper)-[:IN_COLLECTION]->(jnl)
MATCH (p2)-[pub2:PUBLISHED_IN]->(y2)
    WHERE y2.year = y0.year - 2
WITH jnl, y0, y1, y2, cite_num, pub_num1, count(pub2) AS pub_num2

RETURN jnl AS Journal, y0 AS ImpactFactorYear, cite_num/(pub_num1+pub_num2) AS ImpactFactor


----

Query 4 - H-index

MATCH ()-[c:CITED]->(p:Paper),
      (a:Author)-[:CONTRIBUTED]->(p)
WITH  a, p, count(c) AS total_cited
WITH  a, count(p) AS papers, collect(total_cited) AS list_cited
WITH  a, [x IN range(1,size(list_cited)) where x<=list_cited[x-1]| [list_cited[x-1],x] ] as h_index_list

WITH a, h_index_list[-1][1] as h_index
RETURN a, h_index ORDER BY h_index DESC

- Query 4-v2 H-index with citations sorted
MATCH ()-[c:CITED]->(p:Paper),
      (a:Author)-[:CONTRIBUTED]->(p)
WITH  a, p, count(c) AS total_cited
WITH  a, p, total_cited ORDER BY total_cited DESC
WITH  a, count(p) AS papers, collect(total_cited) AS list_cited
WITH  a,papers, list_cited, [x IN range(1,size(list_cited)) where x<=list_cited[x-1] | [list_cited[x-1],x] ] as h_index_list

RETURN a, papers, list_cited, h_index_list, h_index_list[-1][1] as h_index
ref: https://iprapas.github.io/posts/neo4j-recommender/

--- Queries for C Graph Algorithms (Louvain)

-- You will need to install plugins
-- Louvain ref: https://neo4j.com/docs/graph-data-science/1.8/algorithms/louvain/
- Create in-memory graph and store it in the graph catalog
CALL gds.graph.create('yoGraph',
    'Author',
    {
        CONTRIBUTED: {
            orientation: 'UNDIRECTED'
        }
    }
)

- Stream Results
CALL gds.louvain.stream('yoGraph')
YIELD nodeId, communityId, intermediateCommunityIds
RETURN gds.util.asNode(nodeId).name AS name, communityId, intermediateCommunityIds
ORDER BY name ASC

- Write results in the Node as property community
CALL gds.louvain.write('yoGraph',
{
    writeProperty: 'community',
})
YIELD communityCount, modularity, modularities


-- Query for C Graph Algorithm (ArticleRank)
CALL gds.graph.create(
  'PaperRank',
  'Paper',
  'CITED'
)


------------ D Recommender ------------

--- --- Part 1 - Define Research Community
MATCH (k:Keyword )
WHERE k.keyword IN ['design', 'product design', 'computer aided design', 'surveys', 'humans', 'friction stir welding',
                    'machine design']
SET k.community = 1
RETURN k

--- --- Part 2 - Find Journals part of the community
MATCH (k:Keyword {community: 1})

MATCH (j:Journal )<-[in_col:IN_COLLECTION]-()

WITH j, k, count(in_col) AS tot_papers
MATCH (k)-[]-(p:Paper)-[]-(j)
WITH j AS journal, count(DISTINCT p) / toFloat(tot_papers) AS papers_in_keyword, tot_papers
WHERE papers_in_keyword >= 0.9
SET journal.community = 1
RETURN journal, papers_in_keyword, tot_papers ORDER BY tot_papers DESC

--- --- Part 3 - Find top papers with highest page rank
- Page rank info:
The PageRank algorithm measures the importance of each node within the graph, based on the number incoming relationships
and the importance of the corresponding source nodes. We're running it on monopartite graph In our case the importance
of a paper will be based on how many times it is cited by the same conference paper.

// Set papers' community to the community of the Journals (1 in this case)
MATCH (p:Paper)-->(j:Journal {community: 1})
SET p.community = j.community
RETURN p, j

// Create Graph containing only community papers cited by same community papers
CALL gds.graph.create.cypher(
  'Paper',
  'MATCH (p:Paper)-->(j:Journal {community: 1}) RETURN id(p) AS id',
  'MATCH (p:Paper {community: 1})<-[c:CITED]-(p2:Paper {community: 1})
  RETURN id(p) AS source, id(p2) AS target'
)
YIELD
  graphName AS graph, nodeQuery, nodeCount AS nodes, relationshipQuery, relationshipCount AS rels

// Check results -- temp
CALL gds.pageRank.stream('Paper')
YIELD nodeId, score
RETURN gds.util.asNode(nodeId).name AS name, score
ORDER BY score DESC, name ASC

// Do page rank on the filtered papers
CALL gds.pageRank.write('Paper', {
    maxIterations: 20,
    dampingFactor: 0.85,
    writeProperty: 'pagerank'
})
YIELD nodePropertiesWritten, ranIterations

// View top-5 papers
MATCH (p:Paper {community: 1})
RETURN p, p.pagerank ORDER BY p.pagerank DESC LIMIT 5

--- --- Part 4 - authors
//Authors of top-5 papers
MATCH (a:Author)-[]->(p:Paper {community: 1})
WITH collect(a) AS authors, p, p.pagerank AS pagerank ORDER BY pagerank DESC LIMIT 5
UNWIND authors AS one_author
SET one_author.can_review = 'community1'
RETURN one_author, p, pagerank

//Gurus of community-1 "design" who have 2 papers in top-5
MATCH (a:Author)-[]->(p:Paper {community: 1})
WITH collect(a) AS authors, p, p.pagerank AS pagerank ORDER BY pagerank DESC LIMIT 5
UNWIND authors AS one_author
WITH one_author, count(p) AS papers_of_auth
WHERE papers_of_auth > 1
SET one_author.guru = True
RETURN one_author, papers_of_auth


--- Good Helper Queries

CALL db.schema.visualization
This query allows you to see schema of the graph.

------ Testing

- Shows intermediate state for: Find Journals part of the community
MATCH (k:Keyword )
WHERE k.keyword IN ['design', 'product design', 'computer aided design', 'surveys', 'humans', 'friction stir welding',
                    'machine design']

MATCH (j:Journal {title: "Journal of Mechanical Design, Transactions of the ASME"})<-[in_col:IN_COLLECTION]-()
WITH j, k, in_col
MATCH (k)-[]-(p:Paper)-[]-(jj:Journal {title: "Journal of Mechanical Design, Transactions of the ASME"})
RETURN jj, p, k, j, count(in_col) AS tot_papers

- Show papers that are cited by papers from community
MATCH (j:Journal {community: 1})<--(p:Paper)<-[c:CITED]-(p2:Paper)-->(j2:Journal {community: 1})
RETURN p, p2

MATCH (k:Keyword )
WHERE k.keyword IN ['design', 'product design', 'computer aided design', 'surveys', 'humans', 'friction stir welding',
                    'machine design']

MATCH (j:Journal )<-[in_col:IN_COLLECTION]-()

WITH j, k, count(in_col) AS tot_papers
MATCH (k)-[]-(p:Paper)-[]-(j)
WITH j, count(DISTINCT p) / toFloat(tot_papers) AS papers_in_keyword, tot_papers
WHERE papers_in_keyword >= 0.4
RETURN j, papers_in_keyword, tot_papers ORDER BY tot_papers DESC


Data for testing CASE
MERGE (a:Person {name: "Alice", age: 38, eyes: "brown"})
MERGE (b:Person {name: "bob", age: 25, eyes: "blue"})
MERGE (c:Person {name: "Charlie", age: 53, eyes: "Green"})
MERGE (d:Person {name: "Daniel", age: 20, eyes: "brown"})
MERGE (e:Person {name: "Eskil", age: 41, eyes: "blue", array: ['one', 'two', 'three'] })

MERGE (a)-[:Knows]->(b)
MERGE (a)-[:Knows]->(c)
MERGE (c)-[:Knows]->(d)
MERGE (b)-[:Knows]->(d)
MERGE (b)-[:Knows]->(e)