-
Notifications
You must be signed in to change notification settings - Fork 1
/
titler.py
84 lines (73 loc) · 2.54 KB
/
titler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# -*- coding: utf-8 -*-
import urllib2
import Alfred
import sys
import re
from bs4 import BeautifulSoup
from xml.sax.saxutils import escape
# query = sys.argv[1]
URL_SCHEME_RE = re.compile(r'^[a-z][a-z0-9.+-]*://', re.IGNORECASE)
MARKDOWN_LINK_RE = re.compile(r'-{0,1}\s{0,1}\[(?P<title>.*)\]\({0,1}(?P<url>[^\)]*)\){0,1}', re.IGNORECASE)
HTTP = 'http://'
def fixCoding():
sysEncoding = sys.getdefaultencoding()
if sysEncoding != 'UTF-8':
reload(sys)
sys.setdefaultencoding('UTF-8')
fixCoding()
def decodeUrl(q):
'''
get url by format
[{title}]{url}
- [{title}]({url})
'''
match = MARKDOWN_LINK_RE.match(q)
if match:
return match.group("url")
else:
return q
def getUrl(url):
try:
url = decodeUrl(url)
if not URL_SCHEME_RE.match(url):
url=HTTP+url
req = urllib2.Request(url, headers={'User-Agent': "Magic Browser"})
response = urllib2.urlopen(req)
html = response.read()
response.close()
soup = BeautifulSoup(html)
title = escape(soup.title.contents[0].strip())
p = re.compile('\s+')
title = re.sub(p, ' ', title)
if title:
result = []
result.append("[{0}]{1}".format(title, response.geturl()))
result.append("- [{0}]({1})".format(title, response.geturl()))
result.append("[{0}]({1})".format(title, response.geturl()))
return result
else:
return False
except:
return False
handler = Alfred.Handler(args=sys.argv)
result = getUrl(handler.query)
# test
# not have html tag
# result = getUrl("http://learn.getchef.com/")
# title 遇到big5 網頁亂碼
# result = getUrl("http://www2.nsysu.edu.tw/csmlab/unix/vi_command.htm")
# need escape page
# result = getUrl("http://jex.im/regulex/")
# 解析我自己產出的格式
# result = getUrl("[讓你快速搜尋 Facebook 塗鴉牆內容——QSearch 團隊專訪 - Inside 硬塞的網路趨勢觀察]http://www.inside.com.tw/2013/02/19/qsearch-interview")
# result = getUrl("- [讓你快速搜尋 Facebook 塗鴉牆內容——QSearch 團隊專訪 - Inside 硬塞的網路趨勢觀察](http://www.inside.com.tw/2013/02/19/qsearch-interview)")
# replace \n \t
# result = getUrl("https://www.youtube.com/playlist?list=PL5QDUc5gluoS5WglgXmZg6yX5p6X8OTrN")
# not have Protocol
# result = getUrl("tw.yahoo.com")
if not result:
handler.add_new_item(title="Page not found :(")
else:
for key in result:
handler.add_new_item(title=key, arg=key, uid="#1")
handler.push()