-
Notifications
You must be signed in to change notification settings - Fork 0
/
backend.py
executable file
·106 lines (94 loc) · 3.55 KB
/
backend.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
#!/usr/bin/env python
import redis
import urllib
import simplejson
import re
import sys
from pprint import pprint
import logging
import tempfile
r = redis.Redis()
r.info()
def get_song_data(username,page):
statspage = 'http://www.thesixtyone.com/%s/stats/bumps/%s/' % (username,page)
sp = urllib.urlopen(statspage).read()
# tmp =file(tempfile.mkstemp(prefix='statpage',suffix='.html')[1],'w')
# tmp.write(sp)
# tmp.close()
songdata_raw = [c for c in sp.split('\n') if c.count('t61.song.data')][0]
songdata_json = songdata_raw[songdata_raw.find('t61.song.data')+ 16:-1]
songdata = simplejson.loads(songdata_json)
bumpdata = re.findall('song_metadata_(.*?)\".*?bump_report.*?>.*?<b>\+(.*?)rep</b>.*?\(.*?(\d+).*?(\d+)(.*?)\)',
''.join(sp.split()), re.MULTILINE|re.IGNORECASE)
enriched = []
for b in bumpdata:
b = list(b)
times = re.search('<b>x(\d+)</b>',b[4])
if times:
b[4] = times.group(1)
else:
b[4] = '1'
enriched.append(tuple(b))
logging.debug('bump data: %s' % str(enriched))
try:
if re.search('nextpage',sp):
match = re.search('.*<a[^>]+>(.*?)</a>.*nextpage',''.join(sp.split('\n')))
if match:
numpages = int(match.group(1))
else:
numpages = 1
else:
numpages = 1
except:
numpages = 1
logging.info('scrapped %s songs, %s bumpdata, %s numpages' % (len(songdata['by_id'].keys()),len(enriched),numpages))
return songdata['by_id'],enriched,numpages
def store_song_data(username,songdata,bumpdata):
for s in songdata.keys():
r.set('%s.song.%s' % (username,s), songdata[s])
r.sadd('%s.song.ids' % username,s)
for b in bumpdata:
r.set('%s.songs.reps.%s' % (username,b[0]),int(b[1]))
r.set('%s.songs.stats.%s' % (username,b[0]),(b[2],b[3],b[4]))
def rep_sort(username):
res = []
try:
for s in r.sort('%s.song.ids' % username,
by='%s.songs.reps.*' % username,
get='%s.song.*' % username,desc=True):
s = eval(s)
rep = r.get('%s.songs.reps.%s' % (username,s['id']))
stats = eval(r.get('%s.songs.stats.%s' % (username,s['id'])))
name = s['name']
photo_base_url = s['photo_base_url']
artist = s['artist']
score= s['score']
key = s['key']
id = s['id']
hearts = stats[2]
fromr = stats[0]
tor = stats[1]
artist_username=s['artist_username']
res.append((rep,name,artist,score,key,id,photo_base_url,artist_username,hearts))
except redis.ResponseError:
logging.error('no songs stored for user %s' % username)
return None
return res
if __name__ == '__main__':
try:
page = sys.argv[1]
print '----------------'
print 'processing page ',page
print '----------------'
songdata,bumpdata,numpages = get_song_data('durdn',page=page)
store_song_data('durdn',songdata,bumpdata)
print rep_sort('durdn')
except IndexError:
print '* processing page ',1
songdata,bumpdata,numpages = get_song_data('durdn',page=1)
store_song_data('durdn',songdata,bumpdata)
for p in range(numpages-1):
print '* processing page ',p+2
songdata,bumpdata,numpages = get_song_data('durdn',page=p+2)
store_song_data('durdn',songdata,bumpdata)
print rep_sort('durdn')