forked from tianxianbaobao/UCS
-
Notifications
You must be signed in to change notification settings - Fork 0
/
login.py
executable file
·127 lines (106 loc) · 4.2 KB
/
login.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
# -*- coding: utf-8 -*-
"""
Date : 2015/09/25 20:21:53
FileName : login.py
Author : septicmk
"""
from HttpClient import *
import re
import logging
import ConfigParser
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
logging.basicConfig(
filename='UCS.log',
level=logging.DEBUG,
format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s',
datefmt='%a, %d %b %Y %H:%M:%S',
)
def check_existed(path):
import os
return os.path.exists(path)
def get_revalue(html, rex, er, ex):
v = re.search(rex, html)
if v is None:
if ex:
logging.error(er)
raise TypeError(er)
else:
logging.warning(er)
return ''
return v.group(1)
class Course:
def __init__(self):
self.hosturl = 'http://sep.ucas.ac.cn'
self.loginurl = 'http://sep.ucas.ac.cn/slogin'
self.req = HttpClient()
config = ConfigParser.ConfigParser()
config.read('./ucs.config')
self.usrname = config.get('USER','usrname')
self.passwd = config.get('USER', 'passwd')
self.pwd = config.get('USER', 'savedir')
def login(self):
self.req.Get(self.hosturl)
postData = {'userName': self.usrname,
'pwd': self.passwd,
'sb':'sb'}
html = self.req.Post(self.loginurl, postData)
logging.info('login success')
course_index = 'http://sep.ucas.ac.cn/portal/site/16'
html = self.req.Get(course_index)
Identity= get_revalue(html, r'Identity=(.+?)"', 'get Identity error', 1)
logging.debug("Identity=" + Identity)
html = self.req.Get("http://course.ucas.ac.cn/portal/plogin?Identity={0}".format(Identity) )
session = get_revalue(html, r'session=(.+?)&', 'get session error', 1)
_mid = get_revalue(html, r'_mid=(.+?)"', 'get mid error', 1)
guid = get_revalue(html, r'guid=(.+?)"', 'get guid error', 1)
logging.debug("session=" + session)
logging.debug("_mid=" + _mid)
logging.debug("guid=" + guid)
html = self.req.Get("http://course.ucas.ac.cn/portal?sakai.session={0}&_mid={1}".format(session, _mid))
course_urls = re.findall(r'http://course.ucas.ac.cn/portal/site/\d+', html)
#return course_urls
for test_url in course_urls:
self.get_resource(test_url)
def get_resource(self, url):
import HTMLParser
html_parser = HTMLParser.HTMLParser()
html = self.req.Get(url)
res_url = get_revalue(html, r'class="icon-sakai-resources" href="(.+?)"', 'get resource error', 1)
logging.debug("res_url= "+res_url)
html = self.req.Get(res_url)
wtf_url = get_revalue(html, r'http://course.ucas.ac.cn/portal/tool-reset/(.+?)/', 'what the fuxk error', 1)
html = self.req.Get("http://course.ucas.ac.cn/portal/tool-reset/{0}/?panel=Main".format(wtf_url))
html = html_parser.unescape(html)
#logging.debug(html)
title = get_revalue(html, r'<img src =.*?/>([\s\S]+?)</h3>', 'get title error', 1).strip().replace(' ','_')
logging.debug(title)
res = re.findall(r'http://course.ucas.ac.cn/access/content/group/[^"]+', html)
res = list(set(res))
self.download(title, res)
logging.debug(str(res))
def download(self, title, res):
import os
print 'linking... ' + title
logging.info('linking... ' + title)
if not os.path.exists(self.pwd):
os.makedirs(self.pwd)
_pwd = os.path.join(self.pwd, title)
if not os.path.exists(_pwd):
os.makedirs(_pwd)
for f in res:
name = get_revalue(f, r'([^/]+?)$', 'get name error', 1).replace(' ', '_')
if name.__contains__('copyrightAlertWindow'):
print 'contents is protected by COPYRIGHT, failed to download'
continue
__pwd = os.path.join(_pwd, name)
if check_existed(__pwd):
logging.info( name + ' already exists, skip')
continue
print 'downloading ' + name
logging.info('downloading ' + name)
self.req.Download(f, __pwd)
if __name__ == '__main__':
c = Course()
c.login()