forked from mokeyjay/Yandere-crawler
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Yandere.py
44 lines (37 loc) · 961 Bytes
/
Yandere.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import re
import Http
import Log
def get_html(page=1):
"""
获取列表页的html源码
:param page: 页码
:type page: int
:return: str
"""
url = 'https://yande.re/post?page='+str(page)
html = Http.get(url)
if not html:
Log.add('抓取 ' + url + ' 失败')
exit()
try:
html = html.decode('utf-8')
except:
Log.add(url + ' 解码失败')
exit(500)
return html
def get_li(html: str):
"""
获取li源码列表
:param html: html源码
:type html: str
:return: list
"""
return re.compile('<li style="width: 160px;" id="p.+?</li>').findall(html)
def get_info(li):
"""
获取详情。即id,largeimgurl,width,height
:param li: li的源码
:type li: str
:return: list (id, largeimg_url, width, height)
"""
return re.compile('id="p(\d+)" class=".+?img" href="(.+?)">.+?directlink-res">(\d+) x (\d+)</span>').findall(li)