Set diarization device manually

m-bain · May 4, 2023 · d8f0ef4 · d8f0ef4
1 parent 2d59eb9
commit d8f0ef4
Show file tree

Hide file tree

Showing 2 changed files with 8 additions and 2 deletions.
diff --git a/whisperx/diarize.py b/whisperx/diarize.py
@@ -1,14 +1,19 @@
 import numpy as np
 import pandas as pd
 from pyannote.audio import Pipeline
+from typing import Optional, Union
+import torch
 
 class DiarizationPipeline:
     def __init__(
         self,
         model_name="pyannote/speaker-diarization@2.1",
         use_auth_token=None,
+        device: Optional[Union[str, torch.device]] = "cpu",
     ):
-        self.model = Pipeline.from_pretrained(model_name, use_auth_token=use_auth_token)
+        if isinstance(device, str):
+            device = torch.device(device)
+        self.model = Pipeline.from_pretrained(model_name, use_auth_token=use_auth_token).to(device)
 
     def __call__(self, audio, min_speakers=None, max_speakers=None):
         segments = self.model(audio, min_speakers=min_speakers, max_speakers=max_speakers)

diff --git a/whisperx/transcribe.py b/whisperx/transcribe.py
@@ -193,8 +193,9 @@ def cli():
         if hf_token is None:
             print("Warning, no --hf_token used, needs to be saved in environment variable, otherwise will throw error loading diarization model...")
         tmp_results = results
+        print(">>Performing diarization...")
         results = []
-        diarize_model = DiarizationPipeline(use_auth_token=hf_token)
+        diarize_model = DiarizationPipeline(use_auth_token=hf_token, device=device)
         for result, input_audio_path in tmp_results:
             diarize_segments = diarize_model(input_audio_path, min_speakers=min_speakers, max_speakers=max_speakers)
             results_segments, word_segments = assign_word_speakers(diarize_segments, result["segments"])