-
Notifications
You must be signed in to change notification settings - Fork 0
/
extract_zylabs.py
115 lines (95 loc) · 4.63 KB
/
extract_zylabs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
#!/usr/bin/env python
#
# 1. Download all submissions from the ZyLab. Don't extract. Rename as `lab3A.zip` or whatever.
# 2. Download an Assignment Report from ZyBooks (it doesn't matter which), save as report.csv. This is used only for school emails.
# 3. Run this script from the directory that includes the zipfile. It extracts the zip files and names them based on all partners.
# 4. See credit_partners.py if you want to automatically update the report spreadsheets to credit partners in pair programming.
import zipfile
import pathlib
import io
working_dir = pathlib.Path('.')
try:
import pandas as pd
report = pd.read_csv('report.csv')
all_student_ids = list(report['School email'].str.split('@', expand=True).iloc[:,0])
except:
all_student_ids = []
print("Warning: failed to load school emails from report.csv; partner-crediting functionality is disabled.")
print()
def get_submissions_for_assignment(zip_filename):
all_students_zipfile = zipfile.ZipFile(zip_filename)
# The solutions zipfile contains zipfiles for each student submission.
# The names are based on the student who *submitted*, but they may also have had a *partner*.
return [get_data_for_submission(all_students_zipfile, student_zipfile_name) for student_zipfile_name in all_students_zipfile.namelist()]
def get_data_for_submission(all_students_zipfile, student_zipfile_name):
# Names may have multiple parts, so split from the end.
name, email, date, time = student_zipfile_name[:-4].rsplit('_', 3)
# Try to normalize the email address into a user id.
email = email.lower()
if email.endswith('calvin.edu'):
email = email[:-len('calvin.edu')]
student_id = email
assert len(student_id) < 10
# Open this student's zipfile.
this_student_zipfile_data = all_students_zipfile.open(student_zipfile_name).read()
this_student_zipfile = zipfile.ZipFile(
io.BytesIO(this_student_zipfile_data)
)
submission_files = {}
for name_in_user_file in this_student_zipfile.namelist():
contents = this_student_zipfile.open(name_in_user_file).read().decode('utf8')
submission_files[name_in_user_file] = contents
other_authors = {author for content in submission_files.values() for author in get_credited_authors(content)}
if all_student_ids != []:
if student_id not in other_authors:
print("Submitting student didn't include their id:", student_id)
else:
other_authors.remove(student_id)
if other_authors:
print(f"{student_id} also credited {', '.join(other_authors)}")
return dict(
name=name.replace('_', ' '),
student_id=student_id,
date=date,
time=time,
files=submission_files,
other_authors=other_authors
)
def get_credited_authors(contents, be_conservative=False):
lower_contents = contents.lower()
authors = [student_id for student_id in all_student_ids if student_id in lower_contents]
if be_conservative:
if authors and len(authors) == lower_contents.count('author:'):
return authors
# Otherwise be conservative; we don't know the authors.
return None
return authors
# For each lab solutions zipfile downloaded from ZyLabs...
all_zipfiles = list(working_dir.glob('*.zip'))
assert len(all_zipfiles) < 10, "Lots of zipfiles, are you sure this is the right directory?"
if len(all_zipfiles) == 0:
print("No zipfiles found!")
partners = {}
for zip_filename in sorted(all_zipfiles):
stem = zip_filename.stem
print("\nExtracting Assignment:", stem)
submissions = get_submissions_for_assignment(zip_filename)
# Write 'lab1_0.zip' contents to the 'lab1_0' directory, etc.
out_dir = working_dir / stem
out_dir.mkdir(exist_ok=True)
partners[stem] = partners_for_this_part = []
all_students_seen = set()
for submission in submissions:
all_authors = [submission['student_id']] + sorted(submission['other_authors'])
partners_for_this_part.append(all_authors)
all_students_seen.update(all_authors)
submission_dirname = '_'.join(all_authors)
submission_dir = out_dir / submission_dirname
submission_dir.mkdir(exist_ok=True, parents=True)
for name_in_user_file, contents in submission['files'].items():
(submission_dir / name_in_user_file).write_text(contents, encoding="utf-8")
missing_students = [student_id for student_id in all_student_ids if student_id not in all_students_seen]
if missing_students:
print(f"{stem}: Didn't see anything from students: ", ', '.join(missing_students))
import json
json.dump(partners, open('partners.json', 'w'))