Running Finetuned Model Error #821

0xm00n · 2024-06-19T00:07:18Z

Hi,

I finetuned the base whisper-large-v3 model, converted it to CTranslate2, and passed it in. However, I get the following error:

Traceback (most recent call last):
  File "/usr/local/bin/whisperx", line 8, in <module>
    sys.exit(cli())
  File "/usr/local/lib/python3.10/dist-packages/whisperx/transcribe.py", line 176, in cli
    result = model.transcribe(audio, batch_size=batch_size, chunk_size=chunk_size, print_progress=print_progress)
  File "/usr/local/lib/python3.10/dist-packages/whisperx/asr.py", line 194, in transcribe
    language = language or self.detect_language(audio)
  File "/usr/local/lib/python3.10/dist-packages/whisperx/asr.py", line 252, in detect_language
    encoder_output = self.model.encode(segment)
  File "/usr/local/lib/python3.10/dist-packages/whisperx/asr.py", line 86, in encode
    return self.model.encode(features, to_cpu=to_cpu)
ValueError: Invalid input features shape: expected an input with shape (1, 128, 3000), but got an input with shape (1, 80, 3000) instead

Am I missing a step? I used this guide to finetune whisper, https://huggingface.co/blog/fine-tune-whisper, except I did not push to the hub:

trainer.train()

model.save_pretrained(training_args.output_dir)
processor.save_pretrained(training_args.output_dir)

Here is my code. I am using google colab with L4 GPU:

import ctranslate2
from transformers import AutoTokenizer, AutoProcessor

# converting model to CTranslate2

model_path = "/path/to/checkpoint/checkpoint-5000"
output_dir = "/path/to/checkpoint/model/ctranslate2_model"

converter = ctranslate2.converters.TransformersConverter(
    model_name_or_path=model_path,
    load_as_float16=True
)

converter.convert(output_dir=output_dir, quantization="float16", force=True)
print(f"Model successfully converted to CTranslate2 format at {output_dir}")

import whisperx
import json
import os
import re
from google.colab import userdata

# running inference

HF_TOKEN = userdata.get('HF_TOKEN')

os.environ["TOKENIZERS_PARALLELISM"] = "false"

print(os.environ["TOKENIZERS_PARALLELISM"])

device = "cuda"
batch_size = 16 
compute_type = "float16" 

model = whisperx.load_model(output_dir, device, compute_type=compute_type)

audio_folder = "/path/to/audio/folder/"
output_folder = "/path/to/base_results/"

if not os.path.exists(output_folder):
    os.makedirs(output_folder)

# iterate over all WAV files in the audio folder
for filename in os.listdir(audio_folder):
    if filename.endswith(".wav"):
        audio_file = os.path.join(audio_folder, filename)
        print(audio_file)

        # extract the substring number from the audio file name
        match = re.search(r'(\d+) - ', filename)
        if match:
            number = match.group(1)
            output_file = os.path.join(output_folder, f"{number}_result.txt")
        else:
            # if the substring number is not found, use the original filename
            output_file = os.path.join(output_folder, f"{os.path.splitext(filename)[0]}_result.txt")

        audio = whisperx.load_audio(audio_file)
        result = model.transcribe(audio, batch_size=batch_size)
        print(result["segments"])  # before alignment

        # align whisper output
        model_a, metadata = whisperx.load_align_model(language_code=result["language"], device=device)
        result = whisperx.align(result["segments"], model_a, metadata, audio, device, return_char_alignments=False)
        print(result["segments"])  # after alignment

        # assign speaker labels
        diarize_model = whisperx.DiarizationPipeline(use_auth_token=HF_TOKEN, device=device)
        diarize_segments = diarize_model(audio)
        result = whisperx.assign_word_speakers(diarize_segments, result)
        print(diarize_segments)
        print(result["segments"])  # segments are now assigned speaker IDs

        # save the result to a JSON file
        with open(output_file, "w") as file:
            json.dump(result["segments"], file, indent=4)

        print(f"Result saved to: {output_file}")

The text was updated successfully, but these errors were encountered:

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Running Finetuned Model Error #821

Running Finetuned Model Error #821

0xm00n commented Jun 19, 2024 •

edited

Loading

Running Finetuned Model Error #821

Running Finetuned Model Error #821

Comments

0xm00n commented Jun 19, 2024 • edited Loading

0xm00n commented Jun 19, 2024 •

edited

Loading