-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathupdate_from_mongoDB.py
74 lines (59 loc) · 2.16 KB
/
update_from_mongoDB.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import pymongo, json, sys
'''
Scrape MongoDB of issues for updating current status
'''
#progress bar
def printProgressBar (iteration, total, prefix = '', suffix = '', decimals = 1, length = 100, fill = '%'):
"""
Call in a loop to create terminal progress bar
@params:
iteration - Required : current iteration (Int)
total - Required : total iterations (Int)
prefix - Optional : prefix string (Str)
suffix - Optional : suffix string (Str)
decimals - Optional : positive number of decimals in percent complete (Int)
length - Optional : character length of bar (Int)
fill - Optional : bar fill character (Str)
"""
percent = ("{0:." + str(decimals) + "f}").format(100 * (iteration / float(total)))
filledLength = int(length * iteration // total)
bar = fill * filledLength + '-' * (length - filledLength)
print('\r%s |%s| %s%% %s' % (prefix, bar, percent, suffix), end = '\r')
# Print New Line on Complete
if iteration == total:
print()
flag = input('Do You Want to Update the Whole status file? (y/*) :')
try:
with open('status.json', 'r') as f:
status = json.load(f)
except:
status = {'pkglist':[], 'repodict':{}, 'issue_count':{}, 'deleted_pkgs':[]}
client = pymongo.MongoClient(host="da1.eecs.utk.edu")
db = client ['NPM_Popular_Package_Download']
coll = db['Issues']
l = coll.count()
j = 0
issue_dict = {}
repodict = {}
pkglist = []
for r in coll.find():
r.pop('_id', None)
j += 1
printProgressBar(j, l, prefix = 'Progress:', suffix = 'Complete', length = 50, decimals = 2)
url = r['issue']['repository_url'].replace('https://api.github.com/repos/','')
try:
if issue_dict[url] < int(r['issue']['number']):
issue_dict[url] = int(r['issue']['number'])
except:
issue_dict[url] = int(r['issue']['number'])
if flag.lower() == 'y':
pkg = r['packages']
if url not in repodict.keys():
repodict[url] = pkg
pkglist = list(set(pkglist + pkg))
if flag.lower() == 'y':
status['pkglist'] = pkglist
status['repodict'] = repodict
status['issue_count'] = issue_dict
with open('status.json', 'w') as f:
status = json.dump(status, f)