Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CONCD-824 error in counting #2475

Merged
merged 7 commits into from
Jul 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
128 changes: 68 additions & 60 deletions concordia/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@

import pytesseract
from django.conf import settings
from django.contrib.auth.models import User
from django.contrib.auth.models import BaseUserManager, User
from django.core import signing
from django.core.cache import cache
from django.core.exceptions import ValidationError
from django.core.validators import RegexValidator
from django.db import connection, models
from django.db import models
from django.db.models import Count, ExpressionWrapper, F, JSONField, Q
from django.db.models.functions import Round
from django.db.models.signals import post_save
Expand All @@ -27,8 +27,10 @@

User._meta.get_field("email").__dict__["_unique"] = True

ONE_MINUTE = datetime.timedelta(minutes=1)
ONE_DAY = datetime.timedelta(days=1)
ONE_DAY_AGO = timezone.now() - ONE_DAY
THRESHOLD = 3


def resource_file_upload_path(instance, filename):
Expand All @@ -38,6 +40,28 @@ def resource_file_upload_path(instance, filename):
return time.strftime(path)


class ConcordiaUserManager(BaseUserManager):
def review_incidents(self):
user_incident_count = {}

for user in self.get_queryset().filter(is_superuser=False, is_staff=False):
incident_count = user.review_incidents()
if incident_count > 0:
user_incident_count[user.id] = incident_count

return user_incident_count

def transcribe_incidents(self):
user_incident_count = {}

for user in self.get_queryset().filter(is_superuser=False, is_staff=False):
incident_count = user.transcribe_incidents()
if incident_count > 0:
user_incident_count[user.id] = incident_count

return user_incident_count


class ConcordiaUser(User):
# This class is a simple proxy model to add
# additional user functionality to, without changing
Expand Down Expand Up @@ -72,6 +96,48 @@ def get_email_reconfirmation_key(self):
def validate_reconfirmation_email(self, email):
return email == self.get_email_for_reconfirmation()

def review_incidents(self, start=ONE_DAY_AGO, threshold=THRESHOLD):
recent_accepts = Transcription.objects.filter(
accepted__gte=start, reviewed_by=self
).values_list("accepted", flat=True)
recent_rejects = Transcription.objects.filter(
rejected__gte=start, reviewed_by=self
).values_list("rejected", flat=True)
timestamps = list(recent_accepts) + list(recent_rejects)
timestamps.sort()
incidents = 0
for i in range(len(timestamps)):
count = 1
for j in range(i + 1, len(timestamps)):
if (timestamps[j] - timestamps[i]).seconds <= 60:
count += 1
if count == threshold:
incidents += 1
break
else:
break
return incidents

def transcribe_incidents(self, start=ONE_DAY_AGO, threshold=THRESHOLD):
recent_transcriptions = Transcription.objects.filter(
submitted__gte=start, user=self
).order_by("submitted")
timestamps = recent_transcriptions.values_list("submitted", flat=True)
incidents = 0
for i in range(len(timestamps)):
count = 1
for j in range(i + 1, len(timestamps)):
if (timestamps[j] - timestamps[i]).seconds <= 60:
count += 1
if count == threshold:
incidents += 1
break
else:
break
return incidents

objects = ConcordiaUserManager()


class UserProfile(MetricsModelMixin("userprofile"), models.Model):
user = models.OneToOneField(User, on_delete=models.CASCADE, related_name="profile")
Expand Down Expand Up @@ -798,64 +864,6 @@ def recent_review_actions(self, days=1):
START = timezone.now() - datetime.timedelta(days=days)
return self.review_actions(START)

def reviewing_too_quickly(self, start=ONE_DAY_AGO):
with connection.cursor() as cursor:
cursor.execute(
f"""SELECT u.id, u.username, COUNT(*)
FROM concordia_transcription t1
JOIN concordia_transcription t2
ON t1.id < t2.id
JOIN concordia_transcription t3
ON t2.id < t3.id
AND t1.reviewed_by_id = t2.reviewed_by_id
AND t2.reviewed_by_id = t3.reviewed_by_id
AND t1.accepted >= '{start}'
AND t2.accepted >= '{start}'
AND t3.accepted >= '{start}'
AND ABS(
EXTRACT(EPOCH FROM (t1.updated_on - t2.updated_on))
) < 60
AND ABS(
EXTRACT(EPOCH FROM (t1.updated_on - t3.updated_on))
) < 60
AND ABS(EXTRACT(
EPOCH FROM (t2.updated_on - t3.updated_on))
) < 60
JOIN auth_user u on t1.reviewed_by_id = u.id
WHERE u.is_superuser = FALSE and u.is_staff = False
GROUP BY u.id, u.username""" # nosec B608
)
return cursor.fetchall()

def transcribing_too_quickly(self, start=ONE_DAY_AGO):
with connection.cursor() as cursor:
cursor.execute(
f"""SELECT u.id, u.username, COUNT(*)
FROM concordia_transcription t1
JOIN concordia_transcription t2
ON t1.id < t2.id
JOIN concordia_transcription t3
ON t2.id < t3.id
AND t1.user_id = t2.user_id
AND t2.user_id = t3.user_id
AND t1.submitted >= '{start}'
AND t2.submitted >= '{start}'
AND t3.submitted >= '{start}'
AND ABS(
EXTRACT(EPOCH FROM (t1.created_on - t2.created_on))
) < 60
AND ABS(
EXTRACT(EPOCH FROM (t1.created_on - t3.created_on))
) < 60
AND ABS(
EXTRACT(EPOCH FROM (t2.created_on - t3.created_on))
) < 60
JOIN auth_user u on t1.user_id = u.id
WHERE u.is_superuser = FALSE and u.is_staff = False
GROUP BY u.id, u.username""" # nosec B608
)
return cursor.fetchall()


class Transcription(MetricsModelMixin("transcription"), models.Model):
asset = models.ForeignKey(Asset, on_delete=models.CASCADE)
Expand Down
5 changes: 3 additions & 2 deletions concordia/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
AssetTranscriptionReservation,
Campaign,
CampaignRetirementProgress,
ConcordiaUser,
Item,
Project,
ResourceFile,
Expand Down Expand Up @@ -1067,8 +1068,8 @@ def unusual_activity():
"title": "Unusual User Activity Report for "
+ timezone.now().strftime("%b %d %Y, %I:%M %p"),
"domain": "https://" + site.domain,
"transcriptions": transcribing_too_quickly(),
"reviews": reviewing_too_quickly(),
"transcriptions": ConcordiaUser.objects.transcribe_incidents(),
"reviews": ConcordiaUser.objects.review_incidents(),
}

text_body_template = loader.get_template("emails/unusual_activity.txt")
Expand Down
134 changes: 96 additions & 38 deletions concordia/tests/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
AssetTranscriptionReservation,
Campaign,
CardFamily,
ConcordiaUser,
Resource,
Transcription,
TranscriptionStatus,
Expand Down Expand Up @@ -40,6 +41,101 @@
)


class ConcordiaUserTestCase(CreateTestUsers, TestCase):
def setUp(self):
self.transcription1 = create_transcription(
user=self.create_user(username="tester1"),
rejected=timezone.now() - timedelta(days=2),
)
self.transcription2 = create_transcription(
asset=self.transcription1.asset, user=get_anonymous_user()
)

def test_review_incidents(self):
self.transcription1.accepted = timezone.now()
self.transcription1.reviewed_by = self.create_user(username="tester2")
self.transcription1.save()
self.transcription2.accepted = self.transcription1.accepted + timedelta(
seconds=29
)
self.transcription2.reviewed_by = self.transcription1.reviewed_by
self.transcription2.save()
users = ConcordiaUser.objects.review_incidents()
self.assertNotIn(self.transcription1.user.id, users)

transcription3 = create_transcription(
asset=self.transcription1.asset,
user=self.transcription1.user,
reviewed_by=self.transcription1.reviewed_by,
accepted=self.transcription1.accepted + timedelta(seconds=58),
)
transcription4 = create_transcription(
asset=self.transcription1.asset,
user=self.transcription1.user,
reviewed_by=self.transcription1.reviewed_by,
accepted=transcription3.accepted + timedelta(minutes=1, seconds=1),
)
users = ConcordiaUser.objects.review_incidents()
self.assertEqual(len(users), 1)
self.assertEqual(users[self.transcription1.reviewed_by.id], 1)

create_transcription(
asset=self.transcription1.asset,
user=self.transcription1.user,
reviewed_by=self.transcription1.reviewed_by,
accepted=transcription4.accepted + timedelta(seconds=29),
)
create_transcription(
asset=self.transcription1.asset,
user=self.transcription1.user,
reviewed_by=self.transcription1.reviewed_by,
accepted=transcription4.accepted + timedelta(seconds=58),
)
users = ConcordiaUser.objects.review_incidents()
self.assertEqual(len(users), 1)
self.assertEqual(users[self.transcription1.reviewed_by.id], 2)

def test_transcribe_incidents(self):
self.transcription1.submitted = timezone.now()
self.transcription1.save()
self.transcription2.submitted = self.transcription1.submitted + timedelta(
seconds=29
)
self.transcription2.user = self.transcription1.user
self.transcription2.save()
users = ConcordiaUser.objects.transcribe_incidents()
self.assertEqual(len(users), 0)
self.assertNotIn(self.transcription1.user.id, users)

transcription3 = create_transcription(
asset=self.transcription1.asset,
user=self.transcription1.user,
submitted=self.transcription1.submitted + timedelta(seconds=58),
)
transcription4 = create_transcription(
asset=self.transcription1.asset,
user=self.transcription1.user,
submitted=transcription3.submitted + timedelta(minutes=1, seconds=1),
)
create_transcription(
asset=self.transcription1.asset,
user=self.transcription1.user,
submitted=transcription4.submitted + timedelta(seconds=59),
)
users = ConcordiaUser.objects.transcribe_incidents()
self.assertEqual(len(users), 1)
self.assertEqual(users[self.transcription1.user.id], 1)

create_transcription(
asset=self.transcription1.asset,
user=self.transcription1.user,
submitted=self.transcription1.submitted + timedelta(minutes=1, seconds=59),
)
users = ConcordiaUser.objects.transcribe_incidents()
self.assertEqual(len(users), 1)
self.assertEqual(users[self.transcription1.user.id], 2)


class AssetTestCase(CreateTestUsers, TestCase):
def setUp(self):
self.asset = create_asset()
Expand Down Expand Up @@ -176,44 +272,6 @@ def test_status(self):
TranscriptionStatus.CHOICE_MAP[TranscriptionStatus.COMPLETED],
)

def test_reviewing_too_quickly(self):
self.transcription1.accepted = timezone.now()
self.transcription1.reviewed_by = self.create_user(username="tester2")
self.transcription1.save()
self.transcription2.accepted = self.transcription1.accepted
self.transcription2.reviewed_by = self.transcription1.reviewed_by
self.transcription2.save()
transcriptions = Transcription.objects.reviewing_too_quickly()
self.assertEqual(len(transcriptions), 0)

transcription3 = create_transcription(
asset=self.transcription1.asset,
user=self.transcription1.user,
reviewed_by=self.transcription1.reviewed_by,
accepted=self.transcription1.accepted,
)
transcriptions = Transcription.objects.reviewing_too_quickly()
self.assertEqual(len(transcriptions), 1)
self.assertEqual(transcriptions[0][0], transcription3.reviewed_by.id)

def test_transcribing_too_quickly(self):
self.transcription1.submitted = timezone.now()
self.transcription1.save()
self.transcription2.submitted = self.transcription1.submitted
self.transcription2.user = self.transcription1.user
self.transcription2.save()
transcriptions = Transcription.objects.transcribing_too_quickly()
self.assertEqual(len(transcriptions), 0)

transcription3 = create_transcription(
asset=self.transcription1.asset,
user=self.transcription1.user,
submitted=self.transcription1.submitted,
)
transcriptions = Transcription.objects.transcribing_too_quickly()
self.assertEqual(len(transcriptions), 1)
self.assertEqual(transcriptions[0][0], transcription3.user.id)


class AssetTranscriptionReservationTest(CreateTestUsers, TestCase):
def setUp(self):
Expand Down