-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path01_clustering.py
79 lines (49 loc) · 2.33 KB
/
01_clustering.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import pandas as pd
import numpy as np
import json
import os
### code for getting the handles, the journal name and the metadata
# setting your personal path
path = r'C:/Users/steve/PycharmProjects/pythonProject1/json'
# initiating the lists for handles, journal name and metadata
os.chdir(path)
metadata = []
abstracts = []
keywords = []
## setting up the list of keywords to filter out the abstracts related to climate change
# 193 abstracts with this keyword list
keyword_list_climate = ['Sustainability', 'sustainability','sustainable development','Sustainable development','Sustainable Development',
'globalization', 'Globalization',
'environment', 'Environment'
'climate change', 'Climate change', 'Climate Change',
'energy','Energy', 'sustainable development goals' ]
# (116 + 88 + 55 + 43 + 36 + 35 = 373 ) keywords related to management topics
keyword_list_management = ['innovation', 'Innovation', 'entrepreneurship','Entrepreneurship', 'human capital', 'Human capital','Productivity','productivity',
'performance','Performance', 'efficiency','Efficiency']
#count = 100
#j = 0
# here we filter only the abstracts which fit a specific keyword
# change keyword_list_climate to a different keyword_list
for item in keyword_list_climate:
for i in os.listdir():
with open(i, "r", encoding='utf-8', errors='ignore') as f:
data = json.load(f)
handles.append(data['handle'])
journal_name.append(data['parentCollection']['name'])
#for obj in data["metadata"]:
# if obj["key"] == "dc.description.abstract":
# abstracts.append(obj["value"])
#keywords = [obj['key'] for obj in data['metadata'] if (obj['key'] == 'dc.subject.keyword' and obj['value'] == 'Journals')]
for obj in data["metadata"]:
if obj["key"] == "dc.subject.keyword":
if (obj['value'] == ''+ item):
for ob in data['metadata']:
if ob['key'] == 'dc.description.abstract':
abstracts.append(ob["value"])
#j += 1
#if j >= count:
# break
# put the list into a dataframe to print the abstracts with an index to count the number of abstracts
df = pd.DataFrame(abstracts)
print(df)
#print(df_filtered)