-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathget_degree_courses.py
60 lines (49 loc) · 1.27 KB
/
get_degree_courses.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# -*- coding: utf-8 -*-
import bs4
import urllib2
import urlparse
import string
import sys
from operator import itemgetter
def getFinalCourseURL(url):
url = url.replace('https','http').strip()
html = urllib2.urlopen(url)
soup = bs4.BeautifulSoup(html)
urls = soup.find_all('a')
return urls[4]['href'].replace('https','http').strip()
link = "http://fenix.tecnico.ulisboa.pt/cursos/"+sys.argv[1]+"/curriculo"
html = urllib2.urlopen(link)
soup = bs4.BeautifulSoup(html)
urls = soup.find_all('a')
final_html = """\
<tr>
<td>
<h3>Escolher uma cadeira</h3>
</td>
<td>
<select class="form-control" id="courseURL">
"""
silly = 0
lista = []
for url in urls:
if 'disciplina-curricular' in str(url):
silly = 1
url1 = url.get_text().strip()
if not any(url1 in tup for tup in lista):
url2 = getFinalCourseURL(url['href'])
lista.append( (url2, url1) )
else:
if silly == 1:
break
lista = sorted(lista,key=itemgetter(1))
for url2, url1 in lista:
final_html += "<option value='%s'>%s</option>" % (url2, url1)
final_html +="""\
</select>
</td>
<td>
<button type="button" class="btn btn-primary" onclick="processCourseURL()"><span class="glyphicon glyphicon-plus"></span></button>
</td>
</tr>
"""
print(final_html.encode('utf-8','ignore'))