-
Notifications
You must be signed in to change notification settings - Fork 3
/
alllinks.py
40 lines (35 loc) · 1.05 KB
/
alllinks.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import urllib.request #importing the request library
"""
Creating a function to read the url using request library.
Also decoding it to be easily useable.
"""
def get_page(url):
try:
with urllib.request.urlopen(url) as response:
html = response.read().decode()
return html
except Exception as e:
return e
"""
Creating a function to extract the links from the recieved data.
"""
def get_next_target(s):
start_link=s.find('<a href="htt')
if start_link == -1:
return None,0
start_quote=s.find('"',start_link)
end_quote=s.find('"',start_quote+1)
url=s[start_quote+1:end_quote]
return url,end_quote
"""
Creating a function to print the extracted links from the requested web page.
"""
def print_all_links(page):
while True:
url,endpos = get_next_target(page)
if url:
print (url)
page=page[endpos:]
else:
break
print_all_links(get_page('https://en.wikipedia.org/wiki/Abraham_Lincoln'))