Merge pull request #67 from Racix/last-minute-fixes

last minute fixes (translation, frontend, diarization and summary)
Racix · Dec 18, 2023 · aa78af8 · aa78af8
2 parents 2898fdf + f137ad4
commit aa78af8
Show file tree

Hide file tree

Showing 5 changed files with 21 additions and 9 deletions.
diff --git a/backend/api/app/main.py b/backend/api/app/main.py
@@ -330,7 +330,7 @@ async def start_media_summary(media_id: str, background_tasks: BackgroundTasks):
 
 
 async def do_summary(file_path: str, media_id: str):
-    timeout_seconds = 300
+    timeout_seconds = 600
     session_timeout = aiohttp.ClientTimeout(total=timeout_seconds)
     summarize_url = f"http://{os.environ['SUMMARIZATION_ADDRESS']}:{os.environ['API_PORT_GUEST']}/summarize"
     summarize = {}
@@ -347,7 +347,7 @@ async def do_summary(file_path: str, media_id: str):
                 form_new.add_field('json_data', json.dumps(analysis_info), content_type='application/json')
                 form_new.add_field('file', file)
 
-                async with aiohttp.request('POST', summarize_url, data=form_new) as response:
+                async with session.request('POST', summarize_url, data=form_new) as response:
                     if response.status == status.HTTP_201_CREATED:
                         summarize = await response.json()
                         status_data = {"status": status.HTTP_200_OK, "message": "Summarization done."}

diff --git a/backend/diarization/app/diarize.py b/backend/diarization/app/diarize.py
@@ -39,7 +39,7 @@ def configurations(wav_path: str, domain: str, rttm: str | None, speakers: int =
     pretrained_msdd = "models/diar_msdd_telephonic.nemo"
     config.diarizer.manifest_filepath = input_manifest_path
     # config.device = device
-    config.batch_size = 1
+    config.batch_size = 8
     config.diarizer.out_dir = ut.OUTPUT_DIR # Directory to store intermediate files and prediction outputs
     config.diarizer.speaker_embeddings.model_path = pretrained_speaker_model
     config.diarizer.msdd_model.model_path = pretrained_msdd  
@@ -67,6 +67,15 @@ def msdd_diarization(config: OmegaConf):
 
 
 def create_diarization(file_path: str, rttm: str | None, speakers: int = None):
-    config = configurations(file_path, "telephonic", rttm, speakers)
+    domain = ""
+    if speakers is not None:
+        if speakers > 2:
+            domain = "meeting" #used for when there are 3+ ppl in the audio file
+        else:
+            domain = "telephonic" #used for when its 2 ppl in the audio file
+    else:
+        domain = "telephonic"
+
+    config = configurations(file_path, domain, rttm, speakers)
     #cluster_diarization(config)
     msdd_diarization(config)
diff --git a/backend/summarization/app/summarize.py b/backend/summarization/app/summarize.py
@@ -1,10 +1,12 @@
-from llama_index import ListIndex, SimpleDirectoryReader, ServiceContext
+from llama_index import ListIndex, SimpleDirectoryReader, ServiceContext, set_global_tokenizer
 from llama_index.llms import LlamaCPP
 from llama_index.llms.llama_utils import (
     completion_to_prompt,
 )
 import json, tempfile, os
-
+set_global_tokenizer(
+    AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1").encode
+)
 def load_data(file_path: str):
     with open(file_path, 'r') as file:
         json_data = file.read()

diff --git a/backend/translation/app/translate.py b/backend/translation/app/translate.py
@@ -28,7 +28,7 @@ def translate_to_lang(data: dict, from_language: str, to_language: str) -> dict:
     if from_language not in language_codes:
         translation =  "Not a viable language"
         return translation
-    elif from_language != "en":
+    elif from_language != "en" and to_language != "en":
         install_language(from_language, "en")
         whole_text_translated = translate_json(data, from_language, "en")
         install_language("en", to_language)

diff --git a/frontend/src/TranscriptionDisplay.js b/frontend/src/TranscriptionDisplay.js
@@ -71,7 +71,7 @@ function TranscriptionDisplay() {
       }
 
       let response = await fetch(endpoint);
-      if (!response.ok && selectedLanguage!==originalLanguage) {
+      if (!response.ok && selectedLanguage && selectedLanguage!==originalLanguage) {
         const userResponse = window.confirm("No translation found of , do you want to start a translation? (this might take some time)");
 
         if (userResponse) {
@@ -81,6 +81,7 @@ function TranscriptionDisplay() {
         } else {
           // User clicked "Cancel"
           console.log("User clicked Cancel");
+          setSelectedLanguage("")
           return;
         }
         response = await fetch(endpoint,
@@ -324,7 +325,7 @@ function TranscriptionDisplay() {
                           Summarize
                         </button>
                       ) : (
-                        data && <div className="message-field">{data.message}</div>
+                        data && <div className="message-field summary-button blue-button">{data.message}</div>
                       )
                     )
                 )