-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathprocess_jutge.py
executable file
·128 lines (93 loc) · 3.71 KB
/
process_jutge.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
#!/usr/bin/python3
from glob import glob
from os.path import basename,isdir,isfile
from os import mkdir
from shutil import copyfile,move
from tempfile import gettempdir
import argparse
parser = argparse.ArgumentParser()
group = parser.add_mutually_exclusive_group(required=True)
group.add_argument('--zip', metavar='file.zip', type=argparse.FileType('r'))
group.add_argument('--folder', type=str,default='')
parser.add_argument('--save-to', type=str,default='Problems_parsed')
parser.add_argument('--delay', metavar='milliseconds', type=int, default=100)
parser.add_argument('--no-download', action='store_true', default=0)
parser.add_argument('--overwritte', action='store_true', default=0)
parser.add_argument('--cookie', type=str, default='')
args = parser.parse_args()
if args.delay > 0:
from time import sleep
if not args.no_download:
from urllib.request import urlretrieve
import httplib2
from bs4 import BeautifulSoup, SoupStrainer
def getname(code):
web = 'https://jutge.org/problems/{}'.format(code)
http = httplib2.Http()
if args.cookie != '':
headers = {"Cookie": "PHPSESSID={}".format(args.cookie),
"Accept": "text/plain"}
status, response = http.request(web,headers=headers)
else :
status, response = http.request(web)
soup = BeautifulSoup(response,'lxml')
name = "-".join(soup.find('title').text.split('-')[1:])
name = name[1:].replace(' ','_').split()[0]
return name
if args.folder!='':
if args.no_download: exit(1)
prev = ['','']
cont = 0
for prog in glob(args.folder + '/*.*'):
try:
code, ext = basename(prog).split('.')
if isfile(prog) and len(code) == 9 and len(ext)>=1 and len(ext)<=3:
if prev[0] == code:
name = prev[1]
else:
name = getname(code)
if name != 'Error' :
prev = [code,name]
file_name = '{}/{}.{}'.format(args.folder,name,ext)
if not isfile(file_name):
print('Moving {} to {} ...'.format(prog,file_name))
move(prog,file_name)
cont += 1
except:
print("Skipping {}...".format(prog))
print ("FINISHED, {} files processed".format(cont))
exit(0)
from zipfile import ZipFile
extract_to = gettempdir() + '/process_jutge_TMP'
zip = ZipFile(args.zip.name, 'r')
mkdir(extract_to)
zip.extractall(extract_to)
zip.close()
if not isdir(args.save_to): mkdir(args.save_to)
extensions = ['cc','c','hs','php','bf','py']
count = 0
for folder in glob(extract_to + '/*') :
try:
code = basename(folder)
sources = []
for ext in extensions :
match = glob('{}/*AC.{}'.format(folder,ext))
if match:
sources.append([match[-1],ext]) # take last AC
for source in sources :
ext = source[1]
if ext == 'cc': ext = 'cpp' # Use cpp over cc for c++ files
if not glob('{}/{}*.{}'.format(args.save_to,code,ext)) or args.overwritte:
if args.no_download:
name = code
else:
name = getname(code)
if name == 'Error': name = code # If name cannot be found default to code to avoid collisions
file_name = '{}/{}.{}'.format(args.save_to,name,ext)
print('Copying {} to {} ...'.format(source[0],file_name))
copyfile(source[0],file_name)
count += 1
if args.delay > 0:
sleep(args.delay / 1000.0)
except: print('Skipping {}'.format(folder))
print ('FINISHED; Added {} programs'.format(count))