-
Notifications
You must be signed in to change notification settings - Fork 44
/
get_images.py
executable file
·75 lines (55 loc) · 1.76 KB
/
get_images.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#!/usr/bin/env python3
"""
Extract image links from a web page
===================================
Author: Laszlo Szathmary, 2011 (jabba.laci@gmail.com)
GitHub: https://github.com/jabbalaci/Bash-Utils
Given a webpage, extract all image links.
Usage:
------
get_images.py URL [URL]... [options]
Options:
-l, --length Show lengths of images.
Last update: 2017-01-09 (yyyy-mm-dd)
"""
import sys
import urllib
from optparse import OptionParser
from urllib.parse import urljoin
import requests
from bs4 import BeautifulSoup
user_agent = {'User-agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:50.0) Gecko/20100101 Firefox/50.0'}
def get_content_length(url):
try:
h = requests.get(url, headers=user_agent).headers
return h['content-length']
except:
return "?"
def process(url, options):
r = requests.get(url, headers=user_agent)
soup = BeautifulSoup(r.text, "lxml")
for tag in soup.findAll('img', src=True):
image_url = urljoin(url, tag['src'])
print(image_url, end='')
if options.length:
length = get_content_length(image_url)
print('', length, end='')
print()
def main():
parser = OptionParser(usage='%prog URL [URL]... [options]')
#[options]
parser.add_option('-l',
'--length',
action='store_true',
default=False,
help='show lengths of images')
options, arguments = parser.parse_args()
if not arguments:
parser.print_help()
sys.exit(1)
# else, if at least one parameter was passed
for url in arguments:
process(url, options)
#############################################################################
if __name__ == "__main__":
main()