-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathsave_in_file.py
74 lines (55 loc) · 1.97 KB
/
save_in_file.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
#!/usr/bin/env python
import mechanicalsoup
import json
import random
import time
import string
delay_in_seconds= 0.5
browser = mechanicalsoup.StatefulBrowser()
baseGoodReadsURL = "https://www.goodreads.com/quotes/tag/"
# tagList = ['success','achievements','academic','sports','school','excellence']
def generateQuoteID(stringLength=8):
lettersAndDigits = string.ascii_letters + string.digits
return ''.join(random.choice(lettersAndDigits) for i in range(stringLength))
quoteDict = {
}
def quoteDictGenerator(tagRequested):
quoteList = []
tag = tagRequested
browser.open(baseGoodReadsURL+tag)
page = browser.get_current_page()
quoteListObject = page.find_all("div",attrs = {"class": "quoteText"})
# s = str(quoteList[1].contents[0])
cnt = 0
for quote in quoteListObject:
quoteText = quote.contents[0].strip()
author = quote.find("span",attrs = {"class":"authorOrTitle"})
authorName = author.text.strip()
quoteID = generateQuoteID()
if(len(quoteText)<200):
cnt+=1
quoteobj = {
"quote_id":quoteID,
"content":quoteText,
"author":authorName
}
if tag not in quoteDict.keys():
quoteDict[tag]=[]
quoteDict[tag].append(quoteobj)
print("No. of quotes fetched on {tag} = {count}".format(tag=tag,count=cnt))
def main():
tagListString = input("Enter your prefered tags (seperate them by ',' )")
tagList = tagListString.split(',')
for tag in tagList:
quoteDictGenerator(tag)
quoteJSONobj = json.dumps(quoteDict)
with open('quotes.json','w') as jsonFile:
json.dump(quoteJSONobj,jsonFile)
print("File successfully created with tags {}".format(tagListString))
if __name__=='__main__':
start_time = time.time()
main()
# quote = quotePicker(tagList[0])
# print(quote)
elapsed_time = time.time() - start_time
print("elapsed_time =",elapsed_time)