Skip to content

Commit

Permalink
[Bugfix:Plagiarism] Fix all versions bug (#72)
Browse files Browse the repository at this point in the history
* Fix multiple versions bug

* add config.json

* fix course

* Add placeholder file

* FIx tests(?)
  • Loading branch information
williamjallen authored Jan 29, 2022
1 parent a45457e commit 1fe7878
Show file tree
Hide file tree
Showing 31 changed files with 1,705 additions and 40 deletions.
8 changes: 5 additions & 3 deletions bin/process_all.sh
Original file line number Diff line number Diff line change
Expand Up @@ -85,9 +85,11 @@ mkdir -p "${BASEPATH}/users"

############################################################################
# Run Lichen
./tokenize_all.py "$tmp_location" || { rm -rf "$tmp_location"; exit 1; }
./hash_all.py "$tmp_location" || { rm -rf "$tmp_location"; exit 1; }
./compare_hashes.out "$tmp_location" || { rm -rf "$tmp_location"; echo "${KILL_ERROR_MESSAGE}"; exit 1; }
{ # We still want to unzip files if an error occurs when running Lichen here
./tokenize_all.py "$tmp_location" &&
./hash_all.py "$tmp_location" &&
./compare_hashes.out "$tmp_location" || echo "${KILL_ERROR_MESSAGE}";
}

############################################################################
# Zip the results back up and send them back to the course's lichen directory
Expand Down
50 changes: 27 additions & 23 deletions compare_hashes/compare_hashes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -157,8 +157,8 @@ int main(int argc, char* argv[]) {
std::unordered_set<hash> provided_code;
// stores all hashes from other gradeables
std::unordered_map<hash, std::unordered_map<user_id, std::vector<HashLocation>>> other_gradeables;
// stores the highest match for every student, used later for generating overall_rankings.txt
std::unordered_map<user_id, std::pair<int, Score>> highest_matches;
// stores the matches for every student, used later for generating overall_rankings.txt
std::unordered_map<user_id, std::vector<std::pair<version_number, Score>>> highest_matches;
// keeps track of max matching hashes across all submissions, used for calculation of ranking score
unsigned int max_hashes_matched = 0;

Expand Down Expand Up @@ -283,7 +283,7 @@ int main(int argc, char* argv[]) {
}
}

// if the hash doesn't match any of the provided code's hashes, try to find matched between other students
// if the hash doesn't match any of the provided code's hashes, try to find matches between other students
if (!provided_match_found) {
// look up that hash in the all_hashes table, loop over all other students that have the same hash
std::unordered_map<std::string, std::vector<HashLocation>> occurences = all_hashes[hash_itr->first];
Expand Down Expand Up @@ -333,23 +333,6 @@ int main(int argc, char* argv[]) {
continue;
}

// Save this submissions highest percent match for later when we generate overall_rankings.txt
float percentMatch = (*submission_itr)->getPercentage();
unsigned int totalMatchingHashes = (*submission_itr)->getMatchCount();
Score submission_score(totalMatchingHashes, percentMatch);
if (max_hashes_matched < totalMatchingHashes) {
max_hashes_matched = totalMatchingHashes;
}

std::unordered_map<user_id, std::pair<int, Score> >::iterator highest_matches_itr = highest_matches.find((*submission_itr)->student());
std::pair<int, Score> new_pair = {(*submission_itr)->version(), submission_score};
if (highest_matches_itr == highest_matches.end()) {
highest_matches.insert({(*submission_itr)->student(), new_pair});
}
else if (submission_score > highest_matches_itr->second.second) {
highest_matches_itr->second = new_pair;
}

// =========================================================================
// Write matches.json file

Expand Down Expand Up @@ -563,6 +546,19 @@ int main(int argc, char* argv[]) {
}
}

// =========================================================================
// Save this submission's highest percent match for later when we generate overall_rankings.txt
float percentMatch = (*submission_itr)->getPercentage();
unsigned int totalMatchingHashes = (*submission_itr)->getMatchCount();
Score submission_score(totalMatchingHashes, percentMatch);
if (max_hashes_matched < totalMatchingHashes) {
max_hashes_matched = totalMatchingHashes;
}

std::pair<version_number, Score> new_pair = {(*submission_itr)->version(), submission_score};
highest_matches[(*submission_itr)->student()].push_back(new_pair);
// =========================================================================

std::sort(student_ranking.begin(), student_ranking.end(), ranking_sorter);

// create the directory and a file to write into
Expand Down Expand Up @@ -609,10 +605,18 @@ int main(int argc, char* argv[]) {
// take the map of highest matches and convert it to a vector so we can sort it
// by percent match and then save it to a file
std::vector<StudentRanking> ranking;
for (std::unordered_map<user_id, std::pair<int, Score> >::iterator itr
for (std::unordered_map<user_id, std::vector<std::pair<version_number, Score>>>::iterator itr
= highest_matches.begin(); itr != highest_matches.end(); ++itr) {
ranking.push_back(StudentRanking(itr->first, itr->second.first, "", itr->second.second));
ranking[ranking.size()-1].score.calculateScore(max_hashes_matched);

std::pair<version_number, Score> best_score = itr->second.front();
best_score.second.calculateScore(max_hashes_matched);
for (unsigned int i=0; i < itr->second.size(); i++) {
itr->second[i].second.calculateScore(max_hashes_matched);
if (itr->second[i].second > best_score.second) {
best_score = itr->second[i];
}
}
ranking.push_back(StudentRanking(itr->first, best_score.first, "", best_score.second));
}

std::sort(ranking.begin(), ranking.end(), ranking_sorter);
Expand Down
1 change: 1 addition & 0 deletions compare_hashes/score.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#define SCORE_H

#include <cassert>
#include <string>

typedef int location_in_submission;
typedef unsigned int hash;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
{
"semester": "f21",
"course": "plagiarism",
"gradeable": "multiple_versions",
"config_id": 1,
"version": "all_versions",
"regex": [
""
],
"regex_dirs": [
"submissions"
],
"language": "plaintext",
"threshold": 10,
"hash_size": 4,
"other_gradeables": [],
"ignore_submissions": []
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
Beginning Lichen run: 2021-12-21 17:20:31
CONCATENATE ALL...done in 0 seconds, 949 Bytes concatenated
TOKENIZE ALL...done in 0 seconds
HASH ALL...done in 0 seconds
COMPARE HASHES...finished loading in 0 seconds
hash walk: 33% complete
hash walk: 66% complete
hash walk: 100% complete
finished walking in 0 seconds
COMPARE HASHES done in 0 seconds
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
aphacker 2 81.4% 35
bitdiddle 1 81.4% 35
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[]
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
ccbfc51b
46d2e902
0a1bc040
6d14f9b3
a5d513dd
5e030a24
c715d526
fd3fa0fe
b1917b6c
ccbfc51b
fcf8964c
6afa4117
25a42a47
2ac066f5
c6097572
6011cbf5
adefe73d
36182b9f
36d719a0
fe129c06
e44ef48d
6bb90c04
083a9efd
93d49734
0f905a05
8bfb058d
06410254
61b171ee
6c920afa
05660ab4
30a548ac
b38f50f3
2997d7c5
297c601f
e8ccd482
ae6d442f
4de258e3
fae8aa98
24ac3d5d
fbdad65f
fc98ba6b
44bbaa49
83df01b7
964fade5
2ea0ba40
5494f32a
e248b1d9
528feb65
27d1db1f
c552988d
cf65191e
eff2064e
0847585b
c64da9e5
7b3dc1c1
045fe7d1
50ac87da
f5f088e7
ecb2eef0
7d75f52c
8576ec09
497a431b
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
[
{
"end": 4,
"others": [
{
"matchingpositions": [
{
"end": 4,
"start": 1
},
{
"end": 13,
"start": 10
}
],
"source_gradeable": "f21__plagiarism__multiple_versions",
"username": "bitdiddle",
"version": 1
}
],
"start": 1,
"type": "match"
},
{
"end": 6,
"others": [
{
"matchingpositions": [
{
"end": 6,
"start": 2
}
],
"source_gradeable": "f21__plagiarism__multiple_versions",
"username": "bitdiddle",
"version": 1
}
],
"start": 2,
"type": "match"
},
{
"end": 12,
"others": [
{
"matchingpositions": [
{
"end": 12,
"start": 8
}
],
"source_gradeable": "f21__plagiarism__multiple_versions",
"username": "bitdiddle",
"version": 1
}
],
"start": 8,
"type": "match"
},
{
"end": 13,
"others": [
{
"matchingpositions": [
{
"end": 4,
"start": 1
},
{
"end": 13,
"start": 10
}
],
"source_gradeable": "f21__plagiarism__multiple_versions",
"username": "bitdiddle",
"version": 1
}
],
"start": 10,
"type": "match"
},
{
"end": 14,
"others": [
{
"matchingpositions": [
{
"end": 14,
"start": 11
}
],
"source_gradeable": "f21__plagiarism__multiple_versions",
"username": "bitdiddle",
"version": 1
}
],
"start": 11,
"type": "match"
},
{
"end": 20,
"others": [
{
"matchingpositions": [
{
"end": 32,
"start": 26
}
],
"source_gradeable": "f21__plagiarism__multiple_versions",
"username": "bitdiddle",
"version": 1
}
],
"start": 14,
"type": "match"
},
{
"end": 34,
"others": [
{
"matchingpositions": [
{
"end": 46,
"start": 34
}
],
"source_gradeable": "f21__plagiarism__multiple_versions",
"username": "bitdiddle",
"version": 1
}
],
"start": 22,
"type": "match"
}
]
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
bitdiddle 1 f21__plagiarism__multiple_versions 32.79%
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
==== submission_1.txt ====
This file is meant to represent the first submission of three in a small test involving users with multiple submissions. This submission is a little longer than the other two submissions because we want to test that the system still works when the highest matching version is the second version.
Loading

0 comments on commit 1fe7878

Please sign in to comment.