-
Notifications
You must be signed in to change notification settings - Fork 0
/
podcast.py
98 lines (61 loc) · 2.87 KB
/
podcast.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
'''Python program to scrape UC San Diego's podcasts for a quarter. Created by Aykan Fonseca.'''
# TODO:
# 1. Update to parse any quarter. Currently only parses the current quarter.
# Builtins
import time
import sys
# Pip install packages.
from bs4 import BeautifulSoup
from firebase import firebase
import requests
# Used to convert between shortened codes.
quarter_mapping = {'Fall':'FA', 'Winter':'WI', 'Spring':'SP', 'Summer Med School':'SU', 'Summer Session 1':'S1', 'Summer Session 2':'S2', 'Summer Session 3':'S3', 'Summer':'SA'}
podcast_url = "https://podcast.ucsd.edu"
def parse_data():
""" Parses data to upload to firebase."""
print("Parsing data.")
post = requests.get(podcast_url)
soup = BeautifulSoup(post.content, 'lxml')
# A list of dictionaries where each list is contains: title of course, professor, authentication, and podcast link.
podcasts = []
quarter_year = soup.find('div', {'class': 'quarter'}).h2.span.text.split(' ')[::2]
quarter = quarter_mapping[quarter_year[0]] + quarter_year[1][-2:]
table = soup.find('div', {'class': 'quarter'}).findAll('tr')
for item in table:
sub = {}
# # Append the class name, professor, if authentication is required (bool), and the link to the podcast.
sub['class'] = item.findAll('td')[0].text.strip().partition('-')[0].strip()
sub['professor'] = item.findAll('td')[1].text.strip()
sub['authentication'] = (item.findAll('td')[0].div != None)
sub['link'] = item.findAll('td')[2].findAll('a')[0]['href'][:-4]
podcasts.append(sub)
return podcasts, quarter
def update_db(podcasts, quarter):
""" Updates nodes if the node exists."""
print("Updating DB with data.")
database = firebase.FirebaseApplication("https://schedule-of-classes-8b222.firebaseio.com/")
for item in podcasts:
# If the podcast is for two courses, just pick the first one. TODO: NEED TO UPDATE.
if item['class'].split(' ') > 2:
item['class'] = ' '.join(item['class'].split(' ')[:2])
path = "/quarter/" + quarter + "/" + str(item['class']) + "/"
# Updates node when node exists. If not, don't add because we won't use.
if (database.get(path, None) != None):
database.put(path, 'podcast', {'authentication': item['authentication'], 'link': item['link']})
def reset_db():
""" Deletes data to firebase."""
print("Wiping information in database.")
database = firebase.FirebaseApplication("https://schedule-of-classes-8b222.firebaseio.com/")
database.delete('/quarter', None)
def main():
'''The main function.'''
print(sys.version)
reset = False
if (reset):
reset_db()
start = time.time()
podcasts, quarter = parse_data()
update_db(podcasts, quarter)
print("\nTime taken: " + str(time.time() - start))
if __name__ == '__main__':
main()