-
Notifications
You must be signed in to change notification settings - Fork 1
/
scraperImg.py
72 lines (62 loc) · 2.56 KB
/
scraperImg.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""
Created on Thu Feb 23 11:19:27 2017
img get
@author: johnmanli
"""
#/Users/johnmanli/Documents/selenium_chromeDriver/imgUrl/imgUrl.txt
import urllib2
def main():#{
with open("/Users/johnmanli/Documents/selenium_chromeDriver/imgUrl/imgUrl.txt", "r") as imgSrc:#{
#with open("/Users/johnmanli/Documents/selenium_chromeDriver/testFile.txt", "r") as imgSrc:#{
for idx, line in enumerate(imgSrc):#{
if idx < 5011:#{ #yep I am lazy, start in here as prevously got unknown exception
continue
#}
if line.rfind('\n')!= -1:#{
line = line[:line.rfind('\n')]
#}
print "downloading img: [", idx, "/46500]\n url: ", line
fileExtension = line[line.rfind('.'):]
try:#{
imgResponse = urllib2.urlopen(line)
#}
except urllib2.HTTPError as e:#{
with open("/Users/johnmanli/Documents/selenium_chromeDriver/errorImgUrl/error.csv", "a") as errorLog:#{
errorLog.write(line)
errorLog.write(',')
errorLog.write(str(e.code))
errorLog.write('\n')
#}
#}
except urllib2.URLError as e:#{
with open("/Users/johnmanli/Documents/selenium_chromeDriver/errorImgUrl/error.csv", "a") as errorLog:#{
errorLog.write(line)
errorLog.write(',')
errorLog.write(str(e.reason))
errorLog.write('\n')
#}
#}
except:#{
with open("/Users/johnmanli/Documents/selenium_chromeDriver/errorImgUrl/error.csv", "a") as errorLog:#{
errorLog.write(line)
errorLog.write(',')
errorLog.write("unknown exception")
errorLog.write('\n')
#}
pass
#}
else:#{
print "saving Imgs"
img = imgResponse.read()
with open ("/Users/johnmanli/Documents/selenium_chromeDriver/img/img"+str(idx)+fileExtension,'w') as outputImg:#{
outputImg.write(img)
outputImg.close()
#}
#}
#}
#}
#}
if __name__ == "__main__":
main()