Skip to content

Commit

Permalink
Merge branch 'dev'
Browse files Browse the repository at this point in the history
  • Loading branch information
Kwirtz committed Mar 19, 2023
2 parents 9d906b4 + ee0593e commit aa91604
Show file tree
Hide file tree
Showing 13 changed files with 221 additions and 112 deletions.
2 changes: 1 addition & 1 deletion novelpy/indicators/Author_proximity.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from pymongo import UpdateOne
import tqdm
from sklearn.metrics.pairwise import cosine_similarity
import json
import json
import os
import bson
import math
Expand Down
19 changes: 13 additions & 6 deletions novelpy/indicators/Foster2015.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,14 @@
import itertools
import numpy as np
import networkx as nx
import community as community_louvain
from packaging import version
from collections import defaultdict
from scipy.sparse import lil_matrix, spdiags
from scipy.sparse import lil_matrix
import community as community_louvain
from novelpy.utils.run_indicator_tools import create_output



class Foster2015(create_output):


Expand All @@ -22,11 +24,12 @@ def __init__(self,
variable,
sub_variable,
focal_year,
starting_year,
starting_year = None,
community_algorithm = "Louvain",
client_name = None,
db_name = None,
density = False):
density = False,
list_ids = None):

'''
Description
Expand Down Expand Up @@ -75,7 +78,8 @@ def __init__(self,
sub_variable = sub_variable,
focal_year = focal_year,
starting_year = starting_year,
density = density)
density = density,
list_ids = list_ids)

self.path_score = "Data/score/foster/{}".format(self.variable)

Expand Down Expand Up @@ -158,7 +162,10 @@ def get_indicator(self):
'''
self.get_data()
self.g = nx.from_scipy_sparse_matrix(self.current_adj, edge_attribute='weight')
if version.parse(nx.__version__) < version.parse("3.0"):
self.g = nx.from_scipy_sparse_matrix(self.current_adj, edge_attribute='weight')
else:
self.g = nx.from_scipy_sparse_array(self.current_adj, edge_attribute='weight')
print("Create empty df ...")
self.generate_commu_adj_matrix()
print("Empty df created !")
Expand Down
6 changes: 4 additions & 2 deletions novelpy/indicators/Lee2015.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ def __init__(self,
focal_year,
client_name = None,
db_name = None,
density = False):
density = False,
list_ids = None):
"""
Description
-----------
Expand Down Expand Up @@ -65,7 +66,8 @@ def __init__(self,
variable = variable,
sub_variable = sub_variable,
focal_year = focal_year,
density = density)
density = density,
list_ids = list_ids)


self.path_score = "Data/score/lee/{}".format(variable)
Expand Down
6 changes: 4 additions & 2 deletions novelpy/indicators/Uzzi2013.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,8 @@ def __init__(self,
client_name = None,
db_name = None,
nb_sample = 20,
density = False):
density = False,
list_ids = None):
"""
Description
-----------
Expand Down Expand Up @@ -292,7 +293,8 @@ def __init__(self,
variable = variable,
sub_variable = sub_variable,
focal_year = focal_year,
density = density)
density = density,
list_ids = list_ids)


self.path_sample = "Data/cooc_sample/{}/".format(self.variable)
Expand Down
8 changes: 5 additions & 3 deletions novelpy/indicators/Wang2017.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,12 @@ def __init__(self,
focal_year,
time_window_cooc,
n_reutilisation,
starting_year,
starting_year = None,
client_name = None,
db_name = None,
keep_item_percentile = 50,
density = False):
density = False,
list_ids = None):
"""
Description
Expand Down Expand Up @@ -100,7 +101,8 @@ def __init__(self,
n_reutilisation = n_reutilisation,
starting_year = starting_year,
density = density,
keep_item_percentile = keep_item_percentile)
keep_item_percentile = keep_item_percentile,
list_ids = list_ids)

self.path_score = "Data/score/wang/{}/".format(self.variable + "_" + str(self.time_window_cooc) + "_" + str(self.n_reutilisation)+ self.restricted )

Expand Down
62 changes: 36 additions & 26 deletions novelpy/indicators/WuBuBornmann.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,10 @@ def __init__(self,
refs_list_variable,
cits_list_variable,
year_variable,
variable = None,
client_name = None,
db_name = None):
db_name = None,
list_ids = None):
"""
Description
Expand Down Expand Up @@ -64,7 +66,11 @@ def __init__(self,
self.client_name = client_name
self.db_name = db_name
self.collection_name = collection_name

self.list_ids = list_ids
if variable == None:
self.variable = "citations"
else:
self.variable = variable

create_output.__init__(
self,
Expand All @@ -73,8 +79,9 @@ def __init__(self,
collection_name = collection_name,
id_variable = id_variable,
year_variable = year_variable,
variable = 'citations',
focal_year = focal_year)
variable = self.variable,
focal_year = focal_year,
list_ids = list_ids)

if client_name:
self.tomongo = True
Expand Down Expand Up @@ -109,11 +116,14 @@ def get_citation_network(self):
"""
if self.tomongo:
docs = self.collection.find({self.year_variable:self.focal_year})
self.papers_items = {doc[self.id_variable]:doc["citations"] for doc in docs}
if self.list_ids:
self.papers_items = {doc[self.id_variable]:doc[self.variable] for doc in docs if doc[self.id_variable] in self.list_ids}
else:
self.papers_items = {doc[self.id_variable]:doc[self.variable] for doc in docs}
else:
self.citation_network = pickle.load(open('Data/docs/{}.pkl'.format(self.collection_name),'rb'))
self.papers_items = {pmid:self.citation_network[pmid] for pmid in self.citation_network if self.citation_network[pmid][self.year_variable] == self.focal_year}

def compute_scores(self,
focal_paper_id,
focal_paper_refs,
Expand Down Expand Up @@ -148,7 +158,7 @@ def compute_scores(self,
client = pymongo.MongoClient(kwargs['client_name'])
db = client[kwargs['db_name']]
collection = db[kwargs['collection_name']]
focal_paper_id = int(focal_paper_id)
focal_paper_id = focal_paper_id

# papers that cites our focal paper

Expand All @@ -157,8 +167,8 @@ def compute_scores(self,
for citer in focal_paper_cits:
doc = collection.find_one({self.id_variable:citer})
if doc:
if 'citations' in doc:
citing_focal_paper.update({doc[self.id_variable]: doc['citations'][self.refs_list_variable]})
if self.variable in doc:
citing_focal_paper.update({doc[self.id_variable]: doc[self.variable][self.refs_list_variable]})
ids.update([citer])


Expand All @@ -169,18 +179,18 @@ def compute_scores(self,
for ref in focal_paper_refs:
doc = collection.find_one({self.id_variable:ref})
if doc:
if 'citations' in doc:
for citing_paper in doc['citations'][self.cits_list_variable]:
if self.variable in doc:
for citing_paper in doc[self.variable][self.cits_list_variable]:
if all([citing_paper != focal_paper_id,
citing_paper not in ids,
doc['year'] >= self.focal_year]):
doc[self.year_variable] >= self.focal_year]):

ref_citers = collection.find_one({self.id_variable:citing_paper})
if ref_citers:
if 'citations' in ref_citers.keys():
if self.variable in ref_citers.keys():
if ref_citers[self.id_variable] != focal_paper_id:
citing_ref_from_focal_paper.update({
ref_citers[self.id_variable]: ref_citers['citations'][self.refs_list_variable]
ref_citers[self.id_variable]: ref_citers[self.variable][self.refs_list_variable]
})
ids.update([ref_citers[self.id_variable]])
else:
Expand All @@ -192,22 +202,22 @@ def compute_scores(self,
#try:
if citer not in ids:
doc = self.citation_network[citer]
citing_focal_paper.update({citer: doc['citations'][self.refs_list_variable]})
citing_focal_paper.update({citer: doc[self.variable][self.refs_list_variable]})
ids.update([citer])

# papers that cite refs from focal paper
citing_ref_from_focal_paper = dict()
ids = set()
for ref in focal_paper_refs:
try:
citing_ref_from_fp = self.citation_network[ref]['citations'][self.cits_list_variable]
citing_ref_from_fp = self.citation_network[ref][self.variable][self.cits_list_variable]
for citing_paper in citing_ref_from_fp :

if all([citing_paper != focal_paper_id,
citing_paper not in ids]):
citing_paper_doc = self.citation_network[citing_paper]
if citing_paper_doc['year'] >= self.focal_year:
citers_refs = self.citation_network[citing_paper]['citations'][self.refs_list_variable]
citers_refs = self.citation_network[citing_paper][self.variable][self.refs_list_variable]
citing_ref_from_focal_paper.update({citing_paper: citers_refs})
ids.update([citing_paper])
except Exception as e:
Expand Down Expand Up @@ -307,16 +317,16 @@ def compute_scores_par(focal_paper_id,
client = pymongo.MongoClient(kwargs['client_name'])
db = client[kwargs['db_name']]
collection = db[kwargs['collection_name']]
focal_paper_id = int(focal_paper_id)
focal_paper_id = focal_paper_id

# papers that cites our focal paper

citing_focal_paper = dict()
ids = set()
for citer in focal_paper_cits:
doc = collection.find_one({id_variable:citer})
if 'citations' in doc:
citing_focal_paper.update({doc[id_variable]: doc['citations'][refs_list_variable]})
if self.variable in doc:
citing_focal_paper.update({doc[id_variable]: doc[self.variable][refs_list_variable]})
ids.update([citer])


Expand All @@ -326,17 +336,17 @@ def compute_scores_par(focal_paper_id,
ids = set()
for ref in focal_paper_refs:
doc = collection.find_one({id_variable:ref})
if 'citations' in doc.keys():
for citing_paper in doc['citations'][cits_list_variable]:
if self.variable in doc.keys():
for citing_paper in doc[self.variable][cits_list_variable]:
if all([citing_paper != focal_paper_id,
citing_paper not in ids,
doc['year'] >= focal_year]):

ref_citers = collection.find_one({id_variable:citing_paper})
if 'citations' in ref_citers.keys():
if self.variable in ref_citers.keys():
if ref_citers[id_variable] != focal_paper_id:
citing_ref_from_focal_paper.update({
ref_citers[id_variable]: ref_citers['citations'][refs_list_variable]
ref_citers[id_variable]: ref_citers[self.variable][refs_list_variable]
})
ids.update([ref_citers[id_variable]])

Expand Down Expand Up @@ -428,8 +438,8 @@ def compute_scores_par(focal_paper_id,
focal_paper_refs = self.papers_items[idx][self.refs_list_variable]
focal_paper_cits = self.papers_items[idx][self.cits_list_variable]
else:
focal_paper_refs = self.papers_items[idx]['citations'][self.refs_list_variable]
focal_paper_cits = self.papers_items[idx]['citations'][self.cits_list_variable]
focal_paper_refs = self.papers_items[idx][self.variable][self.refs_list_variable]
focal_paper_cits = self.papers_items[idx][self.variable][self.cits_list_variable]

paper_score = self.compute_scores(
focal_paper_id = idx,
Expand Down
26 changes: 17 additions & 9 deletions novelpy/utils/cooc_utils.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
import pymongo
import os
import re
import tqdm
import json
import pickle
import pymongo
import itertools
from scipy.sparse import lil_matrix, csr_matrix
import scipy.sparse as sp
from sklearn import preprocessing
import numpy as np
import pickle
import os
import json
from scipy.sparse import lil_matrix

class create_cooc:

Expand All @@ -16,7 +15,7 @@ def __init__(self,
sub_var,
year_var,
collection_name,
time_window,
time_window = None,
dtype = np.uint32,
weighted_network = False,
self_loop = False,
Expand Down Expand Up @@ -57,7 +56,6 @@ def __init__(self,
self.sub_var = sub_var
self.year_var = year_var
self.collection_name = collection_name
self.time_window = time_window
self.dtype = dtype
self.weighted_network = weighted_network
self.self_loop = self_loop
Expand All @@ -78,6 +76,16 @@ def __init__(self,
self.path_output = "Data/cooc/{}/{}_{}".format(var,type1,type2)
if not os.path.exists(self.path_output):
os.makedirs(self.path_output)

if time_window:
self.time_window = time_window
else:
if client_name:
self.time_window = self.db[collection_name].distinct(self.year_var)
else:
self.time_window = [int(re.sub('.json','',file)) for file in os.listdir("Data/docs/{}/".format(collection_name))]



def save_matrix(self,year):
'''
Expand Down
Loading

0 comments on commit aa91604

Please sign in to comment.