Legend
diff --git a/pipeline/mooclink/urls.py b/pipeline/mooclink/urls.py
index 5ae9c82..ef35615 100644
--- a/pipeline/mooclink/urls.py
+++ b/pipeline/mooclink/urls.py
@@ -2,7 +2,7 @@
from mooclink.views.course import CourseOverView, CourseSettingsView, CourseAddLanguageView, RemoveUserAssignment
from mooclink.views.couse_video_bulkaction import CourseBulkActionConfirmation, CourseDoBulkAction, CourseSubscribe
-from mooclink.views.main import MainView, MainViewCourse, RedirectByItemId
+from mooclink.views.main import MainView, MainViewCourse, RedirectByItemId, JobAdminView, JobAdminResetView
from mooclink.views.service_provider_usage import ServiceProviderUsage, TodoView, ServiceProviderUsageCSV, \
ServiceProviderUsageQuarter
from mooclink.views.subtitle_action import SubtitleToAction
@@ -37,4 +37,7 @@
# Endpoint for redirecting to course / video by primary-key. Allowed only for admins
path('debug/get_item/', RedirectByItemId.as_view()),
+
+ path('jobs/', JobAdminView.as_view(), name="mooclink.jobs.index"),
+ path('jobs/reset/', JobAdminResetView.as_view(), name="mooclink.jobs.reset"),
]
diff --git a/pipeline/mooclink/views/couse_video_bulkaction.py b/pipeline/mooclink/views/couse_video_bulkaction.py
index edd5a86..8ea8273 100644
--- a/pipeline/mooclink/views/couse_video_bulkaction.py
+++ b/pipeline/mooclink/views/couse_video_bulkaction.py
@@ -1,7 +1,9 @@
+import io
import itertools
-import json
+import zipfile
from datetime import datetime, timedelta
from operator import itemgetter
+from zipfile import ZipFile
from django import views
from django.conf import settings
@@ -9,16 +11,18 @@
from django.contrib.auth.mixins import PermissionRequiredMixin, LoginRequiredMixin
from django.core.exceptions import PermissionDenied
from django.db import transaction
+from django.http import HttpResponse
from django.shortcuts import render, redirect
from django.template.loader import render_to_string
from django.urls import reverse
from django.utils import timezone
-from django_celery_beat.models import IntervalSchedule, PeriodicTask
+from django.utils.text import slugify
+from django_celery_beat.models import IntervalSchedule
-from core.exceptions import SecretNotFound
from core.models import Tenant, TranspipeUser
from mooclink.services.aws_translation_service import AwsTranslationService
from mooclink.services.deepl_translation_service import DeeplTranslationService
+from mooclink.services.periodic_task_service import PeriodicTaskService
from subtitles.api.xikolo_api import publish_subtitle_to_xikolo
from subtitles.models import Course, Video, IsoLanguage, Subtitle, SubtitleAssignment, AssignedLanguage
from subtitles.models.translation_service import TranslationService
@@ -274,19 +278,27 @@ def post(self, request, course_id, tenant_slug=None):
period=IntervalSchedule.MINUTES,
)
- periodic = PeriodicTask.objects.create(
- interval=schedule,
- name=f'Check AWS StandaloneTranslation for video={video.pk}',
- task='core.tasks.task_update_aws_standalone_translation_status',
+ # periodic = PeriodicTask.objects.create(
+ # interval=schedule,
+ # name=f'Check AWS StandaloneTranslation for video={video.pk}',
+ # task='core.tasks.task_update_aws_standalone_translation_status',
+ # start_time=datetime.now() + timedelta(minutes=15),
+ # kwargs=json.dumps({
+ # 'tenant_id': video.tenant_id,
+ # 'video_id': video.pk,
+ # })
+ # )
+
+ periodic = PeriodicTaskService.create_periodic_task(
+ service_type="core.tasks.task_update_aws_standalone_translation_status",
+ task="core.tasks.task_update_aws_standalone_translation_status",
+ video=video,
start_time=datetime.now() + timedelta(minutes=15),
- kwargs=json.dumps({
- 'tenant_id': video.tenant_id,
- 'video_id': video.pk,
- })
)
video.workflow_data['type'] = 'AWS_TRANSLATION_S_v1'
- video.workflow_data['periodic_task_id'] = periodic.pk
+ if periodic:
+ video.workflow_data['periodic_task_id'] = periodic.pk
video.workflow_data['initiated'] = str(datetime.utcnow())
video.save()
@@ -385,6 +397,32 @@ def post(self, request, course_id, tenant_slug=None):
SubtitleAssignment.objects.filter(subtitle=subtitle).update(deleted=timezone.now())
number_of_affected += 1
+ elif action == "download-vtt-files":
+ buffer = io.BytesIO()
+ zip_file = ZipFile(buffer, mode="w", compression=zipfile.ZIP_LZMA)
+
+ for video in videos_to_transcript:
+ with zip_file.open(f"{video.index:02d}_{slugify(video.title)}_{video.original_language.iso_code}.vtt", "w") as f:
+ if video.current_transcript:
+ f.write(video.current_transcript.latest_content.encode("utf-8"))
+
+ for (language, video) in videos_to_translate:
+ subtitle = video.subtitle_set.filter(language=language).first()
+
+ if not subtitle:
+ continue
+
+ with zip_file.open(f"{video.index:02d}_{slugify(video.title)}_{subtitle.language.iso_code}.vtt", "w") as f:
+ if video.current_transcript:
+ f.write(subtitle.latest_content.encode("utf-8"))
+
+ zip_file.close()
+
+ response = HttpResponse(buffer.getvalue())
+ response['Content-Type'] = 'application/x-zip-compressed'
+ response['Content-Disposition'] = f'attachment; filename=subtitles_{slugify(course.title)}.zip'
+
+ return response
else:
messages.error(request, f"Action {action} not available")
diff --git a/pipeline/mooclink/views/main.py b/pipeline/mooclink/views/main.py
index f4a81f4..59722c8 100644
--- a/pipeline/mooclink/views/main.py
+++ b/pipeline/mooclink/views/main.py
@@ -1,9 +1,13 @@
+import json
+
from django import views
from django.contrib import messages
from django.contrib.auth.mixins import PermissionRequiredMixin, LoginRequiredMixin
from django.core.exceptions import SuspiciousOperation
-from django.http import HttpRequest
+from django.db import transaction
+from django.http import HttpRequest, HttpResponseForbidden
from django.shortcuts import render, redirect
+from django_celery_beat.models import PeriodicTask
from core.models import Tenant
from subtitles.api.xikolo_api import get_xikolo_course_sections_and_videos, update_video_detail, get_xikolo_course
@@ -29,7 +33,7 @@ def get(self, request, tenant_slug, course_id, video_id):
video = None
- course = Course.objects.filter(tenant=tenant, ext_id=course_id).exclude(sync_status=SyncStatusChoices.SKELETON)\
+ course = Course.objects.filter(tenant=tenant, ext_id=course_id).exclude(sync_status=SyncStatusChoices.SKELETON) \
.first()
if not course:
@@ -86,10 +90,9 @@ def get(self, request, tenant_slug, course_id):
tenant = Tenant.objects.get(slug=tenant_slug)
course = None
- course = Course.objects.filter(tenant=tenant, ext_id=course_id).exclude(sync_status=SyncStatusChoices.SKELETON)\
+ course = Course.objects.filter(tenant=tenant, ext_id=course_id).exclude(sync_status=SyncStatusChoices.SKELETON) \
.first()
-
if course:
return redirect('mooclink.course.overview', tenant_slug, course.ext_id)
else:
@@ -136,9 +139,87 @@ def get(self, request: HttpRequest):
if 'video_id' in request.GET:
video = Video.objects.get(pk=request.GET['video_id'])
- return redirect('mooclink.video.index', video.tenant.slug, video.ext_id)
+ return redirect('mooclink.video.index', video.tenant.slug, video.course_section.course.ext_id, video.ext_id)
if 'course_id' in request.GET:
course = Course.objects.get(pk=request.GET['course_id'])
return redirect('mooclink.course.overview', course.tenant.slug, course.ext_id)
+
+
+class JobAdminView(LoginRequiredMixin, views.View):
+ def get(self, request: HttpRequest):
+ if not request.user.is_superuser:
+ return HttpResponseForbidden("Only superusers may see this page.")
+
+ pending_videos = (
+ Video.objects.exclude(workflow_status=None)
+ .exclude(workflow_status="")
+ .exclude(workflow_status__isnull=True)
+ .order_by("-id")
+ )
+
+ pending_ids = {pv.id for pv in pending_videos.all()}
+
+ orphaned_periodic_tasks = (
+ PeriodicTask.objects
+ .filter(task__startswith="core.tasks")
+ .order_by("-id")
+ )
+
+ orphaned_periodic_tasks_view = []
+
+ for orphaned in orphaned_periodic_tasks:
+ kwargs = json.loads(orphaned.kwargs)
+ video_id = kwargs.get("video_id", -1)
+
+ if video_id not in pending_ids:
+ orphaned_periodic_tasks_view.append({
+ "task": orphaned,
+ "video": Video.objects.filter(pk=kwargs.get("video_id", -1)).first(),
+ })
+
+ print(orphaned_periodic_tasks_view)
+
+ return render(request, "mooclink/admin/job_view.html", {
+ "pending_videos": pending_videos,
+ "orphaned_tasks": orphaned_periodic_tasks_view,
+ })
+
+
+class JobAdminResetView(LoginRequiredMixin, views.View):
+ def post(self, request: HttpRequest):
+ if not request.user.is_superuser:
+ return HttpResponseForbidden("Only superusers may see this page.")
+
+ if "video_id" in request.POST:
+ video = Video.objects.get(pk=request.POST["video_id"])
+
+ with transaction.atomic():
+ video.workflow_status = ""
+
+ old_wf = video.workflow_data
+ video.workflow_data = {
+ "_old": old_wf,
+ }
+ video.save()
+
+ if periodic_task_id := old_wf.get("periodic_task_id"):
+ PeriodicTask.objects.filter(pk=periodic_task_id).delete()
+
+ pds = PeriodicTask.objects.filter(name__endswith=f"for video={video.pk}")
+
+ if pds.count():
+ messages.info(request, f"Found orphaned periodic tasks {[pd.pk for pd in pds.all()]}")
+
+ pds.delete()
+
+ messages.success(request, f"Video {video.title} reset.")
+
+ elif "periodic_task_id" in request.POST:
+ periodic_task_id = request.POST["periodic_task_id"]
+ PeriodicTask.objects.filter(pk=periodic_task_id).delete()
+
+ messages.success(request, f"Task {periodic_task_id} deleted.")
+
+ return redirect("mooclink.jobs.index")
diff --git a/pipeline/mooclink/views/service_provider_usage.py b/pipeline/mooclink/views/service_provider_usage.py
index 926f95d..142b61e 100644
--- a/pipeline/mooclink/views/service_provider_usage.py
+++ b/pipeline/mooclink/views/service_provider_usage.py
@@ -52,6 +52,8 @@ def get(self, request: HttpRequest):
i.billed_minutes for i in l if i.service_provider == ServiceProviderUse.ServiceProvider.MLLP),
ServiceProviderUse.ServiceProvider.AWS_TRANSLATION: sum(i.billed_characters for i in l if
i.service_provider == ServiceProviderUse.ServiceProvider.AWS_TRANSLATION),
+ ServiceProviderUse.ServiceProvider.DEEPL: sum(i.billed_characters for i in l if
+ i.service_provider == ServiceProviderUse.ServiceProvider.DEEPL),
}
data[f"{year}Q{quarter}"]["list"].append(
@@ -72,11 +74,13 @@ def get(self, request: HttpRequest):
ServiceProviderUse.ServiceProvider.AWS_TRANSCRIPTION,
ServiceProviderUse.ServiceProvider.AWS_TRANSLATION,
ServiceProviderUse.ServiceProvider.MLLP,
+ ServiceProviderUse.ServiceProvider.DEEPL,
],
"service_provider_units": {
ServiceProviderUse.ServiceProvider.AWS_TRANSCRIPTION: "minutes",
ServiceProviderUse.ServiceProvider.AWS_TRANSLATION: "characters",
ServiceProviderUse.ServiceProvider.MLLP: "minutes",
+ ServiceProviderUse.ServiceProvider.DEEPL: "characters",
},
'tenants': request.user.tenants.all(),
"selectable_quarter": [f"{q['year']}Q{q['quarter']}" for q in selectable_quarter],
diff --git a/pipeline/mooclink/views/subtitle_action.py b/pipeline/mooclink/views/subtitle_action.py
index bbd5cc6..9638ee0 100644
--- a/pipeline/mooclink/views/subtitle_action.py
+++ b/pipeline/mooclink/views/subtitle_action.py
@@ -9,6 +9,7 @@
from django.http import JsonResponse, HttpResponse
from django.shortcuts import redirect
from django.urls import reverse
+from django.utils.text import slugify
from django_celery_beat.models import IntervalSchedule, PeriodicTask
from core.models import Tenant
@@ -142,6 +143,12 @@ def post(self, request, tenant_slug, course_id, video_id):
subtitle.save()
elif action == 'publish':
publish_subtitle_to_xikolo(request, subtitle.id)
+ elif action == "download_vtt":
+ safe_title = slugify(video.title)
+ response = HttpResponse(subtitle.latest_content, content_type="text/vtt")
+ response["Content-Disposition"] = f"attachment; filename=subtitle_{subtitle.id}_{safe_title}_{subtitle.language.iso_code}.vtt"
+
+ return response
elif action == 'restart_workflow':
if subtitle and not subtitle.is_transcript:
assigned_language = course.assignedlanguage_set.filter(iso_language__iso_code=request.POST['language'])\
diff --git a/pipeline/mooclink/views/video.py b/pipeline/mooclink/views/video.py
index 7cd107f..ac50b5d 100644
--- a/pipeline/mooclink/views/video.py
+++ b/pipeline/mooclink/views/video.py
@@ -305,9 +305,10 @@ def post(self, request, tenant_slug, course_id, video_id):
can_user_view_video(request, video, raise_ex=True)
is_transcript = video.original_language == language
- try:
- subtitle = video.subtitle_set.filter(language=language).order_by('-last_update').first()
- except Subtitle.DoesNotExist:
+
+ subtitle = video.subtitle_set.filter(language=language).order_by('-last_update').first()
+
+ if subtitle is None:
subtitle = Subtitle(
status=Subtitle.SubtitleStatus.AUTO_GENERATED,
origin=Subtitle.Origin.MOOC,
@@ -343,7 +344,7 @@ def post(self, request, tenant_slug, video_id):
video.workflow_status = None
if periodic_task_id := video.workflow_data.get('periodic_task_id'):
- PeriodicTask.objects.get(pk=periodic_task_id).delete()
+ PeriodicTask.objects.filter(pk=periodic_task_id).delete()
video.workflow_data['cleared'] = True
video.workflow_data['cleared_reason'] = "Manual Cancellation"
diff --git a/pipeline/subtitles/api/aws_api.py b/pipeline/subtitles/api/aws_api.py
index d7f5c58..b364edd 100644
--- a/pipeline/subtitles/api/aws_api.py
+++ b/pipeline/subtitles/api/aws_api.py
@@ -323,7 +323,7 @@ def aws_update_transcription_only(video):
video.workflow_status = None
if periodic_task_id := video.workflow_data.get('periodic_task_id'):
- PeriodicTask.objects.get(pk=periodic_task_id).delete()
+ PeriodicTask.objects.filter(pk=periodic_task_id).delete()
video.workflow_data['cleared'] = True
video.save()
@@ -400,7 +400,7 @@ def aws_update_transcription_only(video):
video.workflow_data['finished'] = str(timezone.now())
if periodic_task_id := video.workflow_data.get('periodic_task_id'):
- PeriodicTask.objects.get(pk=periodic_task_id).delete()
+ PeriodicTask.objects.filter(pk=periodic_task_id).delete()
video.workflow_data['cleared'] = True
video.save()
@@ -544,7 +544,7 @@ def aws_update_video_status(video, video_id=None):
with transaction.atomic():
video.workflow_status = None
if periodic_task_id := video.workflow_data.get('periodic_task_id'):
- PeriodicTask.objects.get(pk=periodic_task_id).delete()
+ PeriodicTask.objects.filter(pk=periodic_task_id).delete()
video.workflow_data['cleared'] = True
video.save()
diff --git a/pipeline/subtitles/api/mllp_api.py b/pipeline/subtitles/api/mllp_api.py
index 3c4933d..c754a31 100644
--- a/pipeline/subtitles/api/mllp_api.py
+++ b/pipeline/subtitles/api/mllp_api.py
@@ -246,7 +246,7 @@ def mllp_download_subtitle_file(request, subtitle, tenant=None, user=None):
with transaction.atomic():
if periodic_task_id := video.workflow_data.get('periodic_task_id'):
- PeriodicTask.objects.get(pk=periodic_task_id).delete()
+ PeriodicTask.objects.filter(pk=periodic_task_id).delete()
video.workflow_data['cleared'] = True
video.save()
diff --git a/pipeline/subtitles/migrations/0041_auto_20231222_1235.py b/pipeline/subtitles/migrations/0041_auto_20231222_1235.py
new file mode 100644
index 0000000..256d1b1
--- /dev/null
+++ b/pipeline/subtitles/migrations/0041_auto_20231222_1235.py
@@ -0,0 +1,31 @@
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('subtitles', '0040_auto_20220617_1310'),
+ ]
+
+ operations = [
+ migrations.AlterField(
+ model_name='assignedlanguage',
+ name='translation_service',
+ field=models.CharField(choices=[('MLLP', 'MLLP'), ('AWS', 'AWS'), ('MANUAL', 'Manual'), ('DEEPL', 'DEEPL')], default='MLLP', max_length=255),
+ ),
+ migrations.AlterField(
+ model_name='course',
+ name='transcription_service',
+ field=models.CharField(choices=[('MLLP', 'MLLP'), ('AWS', 'AWS'), ('MANUAL', 'Manual'), ('DEEPL', 'DEEPL')], default='MANUAL', max_length=255),
+ ),
+ migrations.AlterField(
+ model_name='serviceprovideruse',
+ name='service_provider',
+ field=models.CharField(choices=[('MLLP', 'MLLP'), ('AWS_TRANSCRIPTION', 'AWS Transcription'), ('AWS_TRANSLATION', 'AWS Translation'), ('DEEPL', 'DEEPL'), ('AUDESCRIBE_TRANSCRIPTION', 'Audescribe Transcription'), ('OTHER', 'Other')], db_index=True, default='OTHER', max_length=128),
+ ),
+ migrations.AlterField(
+ model_name='subtitle',
+ name='origin',
+ field=models.CharField(choices=[('MLLP', 'MLLP'), ('AWS', 'AWS'), ('DEEPL', 'DEEPL'), ('AUDESCR', 'AUDESCRIBE'), ('MANU', 'Manual upload'), ('MOOC', 'Downloaded from MOOC platform')], default='MANU', max_length=8),
+ ),
+ ]
diff --git a/pipeline/subtitles/models/service_provider_use.py b/pipeline/subtitles/models/service_provider_use.py
index 6730802..1704fd2 100644
--- a/pipeline/subtitles/models/service_provider_use.py
+++ b/pipeline/subtitles/models/service_provider_use.py
@@ -20,6 +20,8 @@ class ServiceProvider(models.TextChoices):
AWS_TRANSLATION = "AWS_TRANSLATION", _("AWS Translation")
DEEPL = "DEEPL", _("DEEPL")
+ AUDESCRIBE_TRANSCRIPTION = "AUDESCRIBE_TRANSCRIPTION", _("Audescribe Transcription")
+
OTHER = "OTHER", _("Other")
service_provider = models.CharField(max_length=128, db_index=True, choices=ServiceProvider.choices,
diff --git a/pipeline/subtitles/models/subtitle.py b/pipeline/subtitles/models/subtitle.py
index e9c375f..498cfaa 100644
--- a/pipeline/subtitles/models/subtitle.py
+++ b/pipeline/subtitles/models/subtitle.py
@@ -31,6 +31,7 @@ class Origin(models.TextChoices):
MLLP = "MLLP", _("MLLP")
AWS = "AWS", _("AWS")
DEEPL = "DEEPL", _("DEEPL")
+ AUDESCRIBE = "AUDESCR", _("AUDESCRIBE")
MANUAL_UPLOAD = "MANU", _("Manual upload")
MOOC = "MOOC", _("Downloaded from MOOC platform")
diff --git a/pipeline/subtitles/models/video.py b/pipeline/subtitles/models/video.py
index 8779a21..66b1d08 100644
--- a/pipeline/subtitles/models/video.py
+++ b/pipeline/subtitles/models/video.py
@@ -2,6 +2,7 @@
from datetime import datetime, timedelta
import celery
+from dateutil.parser import parse
from django.db import models
from django.db.models import Q
from django.utils.functional import cached_property
@@ -188,5 +189,12 @@ def video_url(self):
def is_workflow_in_progress(self):
return self.workflow_status in {'AWS_INITIATED', 'MLLP_INITIATED'}
+ @property
+ def job_initiated(self):
+ try:
+ return parse(self.workflow_data["initiated"])
+ except Exception:
+ return "n/a"
+
def __str__(self):
return self.title
diff --git a/pipeline/subtitles/templates/subtitles/base.html b/pipeline/subtitles/templates/subtitles/base.html
index 401ada8..5eb96b8 100644
--- a/pipeline/subtitles/templates/subtitles/base.html
+++ b/pipeline/subtitles/templates/subtitles/base.html
@@ -89,6 +89,11 @@
Tasks
{% endif %}
+ {% if user.is_superuser %}
+
+ Jobs
+
+ {% endif %}
{% if request.user.is_authenticated %}