diff --git a/concordia/models.py b/concordia/models.py index 8f71982ab..580d286d1 100644 --- a/concordia/models.py +++ b/concordia/models.py @@ -6,12 +6,12 @@ import pytesseract from django.conf import settings -from django.contrib.auth.models import User +from django.contrib.auth.models import BaseUserManager, User from django.core import signing from django.core.cache import cache from django.core.exceptions import ValidationError from django.core.validators import RegexValidator -from django.db import connection, models +from django.db import models from django.db.models import Count, ExpressionWrapper, F, JSONField, Q from django.db.models.functions import Round from django.db.models.signals import post_save @@ -27,8 +27,10 @@ User._meta.get_field("email").__dict__["_unique"] = True +ONE_MINUTE = datetime.timedelta(minutes=1) ONE_DAY = datetime.timedelta(days=1) ONE_DAY_AGO = timezone.now() - ONE_DAY +THRESHOLD = 3 def resource_file_upload_path(instance, filename): @@ -38,6 +40,28 @@ def resource_file_upload_path(instance, filename): return time.strftime(path) +class ConcordiaUserManager(BaseUserManager): + def review_incidents(self): + user_incident_count = {} + + for user in self.get_queryset().filter(is_superuser=False, is_staff=False): + incident_count = user.review_incidents() + if incident_count > 0: + user_incident_count[user.id] = incident_count + + return user_incident_count + + def transcribe_incidents(self): + user_incident_count = {} + + for user in self.get_queryset().filter(is_superuser=False, is_staff=False): + incident_count = user.transcribe_incidents() + if incident_count > 0: + user_incident_count[user.id] = incident_count + + return user_incident_count + + class ConcordiaUser(User): # This class is a simple proxy model to add # additional user functionality to, without changing @@ -72,6 +96,48 @@ def get_email_reconfirmation_key(self): def validate_reconfirmation_email(self, email): return email == self.get_email_for_reconfirmation() + def review_incidents(self, start=ONE_DAY_AGO, threshold=THRESHOLD): + recent_accepts = Transcription.objects.filter( + accepted__gte=start, reviewed_by=self + ).values_list("accepted", flat=True) + recent_rejects = Transcription.objects.filter( + rejected__gte=start, reviewed_by=self + ).values_list("rejected", flat=True) + timestamps = list(recent_accepts) + list(recent_rejects) + timestamps.sort() + incidents = 0 + for i in range(len(timestamps)): + count = 1 + for j in range(i + 1, len(timestamps)): + if (timestamps[j] - timestamps[i]).seconds <= 60: + count += 1 + if count == threshold: + incidents += 1 + break + else: + break + return incidents + + def transcribe_incidents(self, start=ONE_DAY_AGO, threshold=THRESHOLD): + recent_transcriptions = Transcription.objects.filter( + submitted__gte=start, user=self + ).order_by("submitted") + timestamps = recent_transcriptions.values_list("submitted", flat=True) + incidents = 0 + for i in range(len(timestamps)): + count = 1 + for j in range(i + 1, len(timestamps)): + if (timestamps[j] - timestamps[i]).seconds <= 60: + count += 1 + if count == threshold: + incidents += 1 + break + else: + break + return incidents + + objects = ConcordiaUserManager() + class UserProfile(MetricsModelMixin("userprofile"), models.Model): user = models.OneToOneField(User, on_delete=models.CASCADE, related_name="profile") @@ -798,64 +864,6 @@ def recent_review_actions(self, days=1): START = timezone.now() - datetime.timedelta(days=days) return self.review_actions(START) - def reviewing_too_quickly(self, start=ONE_DAY_AGO): - with connection.cursor() as cursor: - cursor.execute( - f"""SELECT u.id, u.username, COUNT(*) - FROM concordia_transcription t1 - JOIN concordia_transcription t2 - ON t1.id < t2.id - JOIN concordia_transcription t3 - ON t2.id < t3.id - AND t1.reviewed_by_id = t2.reviewed_by_id - AND t2.reviewed_by_id = t3.reviewed_by_id - AND t1.accepted >= '{start}' - AND t2.accepted >= '{start}' - AND t3.accepted >= '{start}' - AND ABS( - EXTRACT(EPOCH FROM (t1.updated_on - t2.updated_on)) - ) < 60 - AND ABS( - EXTRACT(EPOCH FROM (t1.updated_on - t3.updated_on)) - ) < 60 - AND ABS(EXTRACT( - EPOCH FROM (t2.updated_on - t3.updated_on)) - ) < 60 - JOIN auth_user u on t1.reviewed_by_id = u.id - WHERE u.is_superuser = FALSE and u.is_staff = False - GROUP BY u.id, u.username""" # nosec B608 - ) - return cursor.fetchall() - - def transcribing_too_quickly(self, start=ONE_DAY_AGO): - with connection.cursor() as cursor: - cursor.execute( - f"""SELECT u.id, u.username, COUNT(*) - FROM concordia_transcription t1 - JOIN concordia_transcription t2 - ON t1.id < t2.id - JOIN concordia_transcription t3 - ON t2.id < t3.id - AND t1.user_id = t2.user_id - AND t2.user_id = t3.user_id - AND t1.submitted >= '{start}' - AND t2.submitted >= '{start}' - AND t3.submitted >= '{start}' - AND ABS( - EXTRACT(EPOCH FROM (t1.created_on - t2.created_on)) - ) < 60 - AND ABS( - EXTRACT(EPOCH FROM (t1.created_on - t3.created_on)) - ) < 60 - AND ABS( - EXTRACT(EPOCH FROM (t2.created_on - t3.created_on)) - ) < 60 - JOIN auth_user u on t1.user_id = u.id - WHERE u.is_superuser = FALSE and u.is_staff = False - GROUP BY u.id, u.username""" # nosec B608 - ) - return cursor.fetchall() - class Transcription(MetricsModelMixin("transcription"), models.Model): asset = models.ForeignKey(Asset, on_delete=models.CASCADE) diff --git a/concordia/tasks.py b/concordia/tasks.py index bb3054dad..120ad3152 100644 --- a/concordia/tasks.py +++ b/concordia/tasks.py @@ -24,6 +24,7 @@ AssetTranscriptionReservation, Campaign, CampaignRetirementProgress, + ConcordiaUser, Item, Project, ResourceFile, @@ -1067,8 +1068,8 @@ def unusual_activity(): "title": "Unusual User Activity Report for " + timezone.now().strftime("%b %d %Y, %I:%M %p"), "domain": "https://" + site.domain, - "transcriptions": transcribing_too_quickly(), - "reviews": reviewing_too_quickly(), + "transcriptions": ConcordiaUser.objects.transcribe_incidents(), + "reviews": ConcordiaUser.objects.review_incidents(), } text_body_template = loader.get_template("emails/unusual_activity.txt") diff --git a/concordia/tests/test_models.py b/concordia/tests/test_models.py index bfc4dec81..8a106095a 100644 --- a/concordia/tests/test_models.py +++ b/concordia/tests/test_models.py @@ -11,6 +11,7 @@ AssetTranscriptionReservation, Campaign, CardFamily, + ConcordiaUser, Resource, Transcription, TranscriptionStatus, @@ -40,6 +41,101 @@ ) +class ConcordiaUserTestCase(CreateTestUsers, TestCase): + def setUp(self): + self.transcription1 = create_transcription( + user=self.create_user(username="tester1"), + rejected=timezone.now() - timedelta(days=2), + ) + self.transcription2 = create_transcription( + asset=self.transcription1.asset, user=get_anonymous_user() + ) + + def test_review_incidents(self): + self.transcription1.accepted = timezone.now() + self.transcription1.reviewed_by = self.create_user(username="tester2") + self.transcription1.save() + self.transcription2.accepted = self.transcription1.accepted + timedelta( + seconds=29 + ) + self.transcription2.reviewed_by = self.transcription1.reviewed_by + self.transcription2.save() + users = ConcordiaUser.objects.review_incidents() + self.assertNotIn(self.transcription1.user.id, users) + + transcription3 = create_transcription( + asset=self.transcription1.asset, + user=self.transcription1.user, + reviewed_by=self.transcription1.reviewed_by, + accepted=self.transcription1.accepted + timedelta(seconds=58), + ) + transcription4 = create_transcription( + asset=self.transcription1.asset, + user=self.transcription1.user, + reviewed_by=self.transcription1.reviewed_by, + accepted=transcription3.accepted + timedelta(minutes=1, seconds=1), + ) + users = ConcordiaUser.objects.review_incidents() + self.assertEqual(len(users), 1) + self.assertEqual(users[self.transcription1.reviewed_by.id], 1) + + create_transcription( + asset=self.transcription1.asset, + user=self.transcription1.user, + reviewed_by=self.transcription1.reviewed_by, + accepted=transcription4.accepted + timedelta(seconds=29), + ) + create_transcription( + asset=self.transcription1.asset, + user=self.transcription1.user, + reviewed_by=self.transcription1.reviewed_by, + accepted=transcription4.accepted + timedelta(seconds=58), + ) + users = ConcordiaUser.objects.review_incidents() + self.assertEqual(len(users), 1) + self.assertEqual(users[self.transcription1.reviewed_by.id], 2) + + def test_transcribe_incidents(self): + self.transcription1.submitted = timezone.now() + self.transcription1.save() + self.transcription2.submitted = self.transcription1.submitted + timedelta( + seconds=29 + ) + self.transcription2.user = self.transcription1.user + self.transcription2.save() + users = ConcordiaUser.objects.transcribe_incidents() + self.assertEqual(len(users), 0) + self.assertNotIn(self.transcription1.user.id, users) + + transcription3 = create_transcription( + asset=self.transcription1.asset, + user=self.transcription1.user, + submitted=self.transcription1.submitted + timedelta(seconds=58), + ) + transcription4 = create_transcription( + asset=self.transcription1.asset, + user=self.transcription1.user, + submitted=transcription3.submitted + timedelta(minutes=1, seconds=1), + ) + create_transcription( + asset=self.transcription1.asset, + user=self.transcription1.user, + submitted=transcription4.submitted + timedelta(seconds=59), + ) + users = ConcordiaUser.objects.transcribe_incidents() + self.assertEqual(len(users), 1) + self.assertEqual(users[self.transcription1.user.id], 1) + + create_transcription( + asset=self.transcription1.asset, + user=self.transcription1.user, + submitted=self.transcription1.submitted + timedelta(minutes=1, seconds=59), + ) + users = ConcordiaUser.objects.transcribe_incidents() + self.assertEqual(len(users), 1) + self.assertEqual(users[self.transcription1.user.id], 2) + + class AssetTestCase(CreateTestUsers, TestCase): def setUp(self): self.asset = create_asset() @@ -176,44 +272,6 @@ def test_status(self): TranscriptionStatus.CHOICE_MAP[TranscriptionStatus.COMPLETED], ) - def test_reviewing_too_quickly(self): - self.transcription1.accepted = timezone.now() - self.transcription1.reviewed_by = self.create_user(username="tester2") - self.transcription1.save() - self.transcription2.accepted = self.transcription1.accepted - self.transcription2.reviewed_by = self.transcription1.reviewed_by - self.transcription2.save() - transcriptions = Transcription.objects.reviewing_too_quickly() - self.assertEqual(len(transcriptions), 0) - - transcription3 = create_transcription( - asset=self.transcription1.asset, - user=self.transcription1.user, - reviewed_by=self.transcription1.reviewed_by, - accepted=self.transcription1.accepted, - ) - transcriptions = Transcription.objects.reviewing_too_quickly() - self.assertEqual(len(transcriptions), 1) - self.assertEqual(transcriptions[0][0], transcription3.reviewed_by.id) - - def test_transcribing_too_quickly(self): - self.transcription1.submitted = timezone.now() - self.transcription1.save() - self.transcription2.submitted = self.transcription1.submitted - self.transcription2.user = self.transcription1.user - self.transcription2.save() - transcriptions = Transcription.objects.transcribing_too_quickly() - self.assertEqual(len(transcriptions), 0) - - transcription3 = create_transcription( - asset=self.transcription1.asset, - user=self.transcription1.user, - submitted=self.transcription1.submitted, - ) - transcriptions = Transcription.objects.transcribing_too_quickly() - self.assertEqual(len(transcriptions), 1) - self.assertEqual(transcriptions[0][0], transcription3.user.id) - class AssetTranscriptionReservationTest(CreateTestUsers, TestCase): def setUp(self):