generated from Tauffer-Consulting/domino_pieces_repository_template
-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'dev' of https://github.com/Tauffer-Consulting/openai_do…
…mino_pieces into dev
- Loading branch information
Showing
11 changed files
with
186 additions
and
46 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
FROM ghcr.io/tauffer-consulting/domino-base-piece:gpu | ||
|
||
|
||
# Install specific requirements | ||
ENV TZ=UTC | ||
ARG DEBIAN_FRONTEND=noninteractive | ||
RUN apt update | ||
RUN apt install ffmpeg -y | ||
RUN apt install git -y | ||
|
||
# Need to copy operators source code | ||
COPY config.toml domino/pieces_repository/ | ||
COPY pieces domino/pieces_repository/pieces | ||
COPY .domino domino/pieces_repository/.domino | ||
|
||
# Install specific python dependencies | ||
RUN pip install -U openai-whisper==20231106 | ||
|
||
# Dowload Whisper model | ||
RUN python3 -c "import whisper; whisper.load_model('tiny')" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
{ | ||
"name": "AudioTranscriptionLocalPiece", | ||
"description": "Runs transcription locally using Whisper, a general-purpose speech recognition model. Ref: https://github.com/openai/whisper", | ||
"dependency": { | ||
"dockerfile": "Dockerfile_whisper" | ||
}, | ||
"container_resources": { | ||
"use_gpu": true, | ||
"requests": { | ||
"cpu": "1000m", | ||
"memory": "3Gi" | ||
}, | ||
"limits": { | ||
"cpu": "5000m", | ||
"memory": "15Gi" | ||
} | ||
}, | ||
"style": { | ||
"node_label": "Audio Transcription Local", | ||
"icon_class_name": "fa-solid:comment-dots" | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
from pydantic import BaseModel, Field, FilePath | ||
from typing import Union | ||
from enum import Enum | ||
|
||
|
||
class ModelSizeType(str, Enum): | ||
tiny = "tiny" | ||
base = "base" | ||
small = "small" | ||
medium = "medium" | ||
large = "large" | ||
|
||
|
||
class OutputTypeType(str, Enum): | ||
string = "string" | ||
file = "file" | ||
both = "both" | ||
|
||
|
||
class InputModel(BaseModel): | ||
audio_file_path: str = Field( | ||
description='The path to the audio file to process.', | ||
json_schema_extra={ | ||
"from_upstream": "always" | ||
} | ||
) | ||
output_type: OutputTypeType = Field( | ||
default=OutputTypeType.string, | ||
description='The type of output for the result text. Options are `string`, `file` or `both`. Default is `string`.', | ||
) | ||
model_size: ModelSizeType = Field( | ||
description='The size of the model to use. Default is tiny.', | ||
default=ModelSizeType.tiny | ||
) | ||
|
||
|
||
class OutputModel(BaseModel): | ||
transcription_result: str = Field( | ||
default="", | ||
description="The result transcription text as a string." | ||
) | ||
file_path_transcription_result: Union[FilePath, str] = Field( | ||
default="", | ||
description="The path to the text file with the transcription result." | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
from domino.base_piece import BasePiece | ||
from .models import InputModel, OutputModel | ||
import whisper | ||
|
||
|
||
class AudioTranscriptionLocalPiece(BasePiece): | ||
|
||
def piece_function(self, input_data: InputModel): | ||
|
||
self.logger.info("Loading model...") | ||
model = whisper.load_model(input_data.model_size) | ||
|
||
self.logger.info("Transcribing audio file...") | ||
result = model.transcribe(str(input_data.audio_file_path))["text"] | ||
|
||
if input_data.output_type == "string": | ||
self.logger.info("Transcription complete successfully. Result returned as string.") | ||
transcription_result = result | ||
output_file_path = "" | ||
elif input_data.output_type == "file": | ||
self.logger.info("Transcription complete successfully. Result returned as file.") | ||
transcription_result = "" | ||
output_file_path = "transcription_result.txt" | ||
with open(output_file_path, "w") as f: | ||
f.write(result) | ||
else: | ||
self.logger.info("Transcription complete successfully. Result returned as string and file.") | ||
transcription_result = result | ||
output_file_path = "transcription_result.txt" | ||
with open(output_file_path, "w") as f: | ||
f.write(result) | ||
|
||
# Display result in the Domino GUI | ||
self.format_display_result(input_data=input_data, string_transcription_result=result) | ||
|
||
return OutputModel( | ||
transcription_result=transcription_result, | ||
file_path_transcription_result=output_file_path | ||
) | ||
|
||
def format_display_result(self, input_data: InputModel, string_transcription_result: str): | ||
md_text = f"""## Audio Transcription Result \n | ||
{string_transcription_result} \n | ||
## Args | ||
**model_size**: {input_data.model_size.value}\n | ||
""" | ||
file_path = f"{self.results_path}/display_result.md" | ||
with open(file_path, "w") as f: | ||
f.write(md_text) | ||
self.display_result = { | ||
"file_type": "md", | ||
"file_path": file_path | ||
} |
Binary file not shown.
20 changes: 20 additions & 0 deletions
20
pieces/AudioTranscriptionLocalPiece/test_localtranscription_piece.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
from domino.testing import piece_dry_run | ||
from pathlib import Path | ||
|
||
|
||
test_file = str(Path(__file__).parent / "test-audio-to-transcribe.mp3") | ||
|
||
|
||
def test_whisper_piece(): | ||
input_data = { | ||
"audio_file_path": test_file, | ||
"model_size": "tiny", | ||
"output_type": "both" | ||
} | ||
piece_output = piece_dry_run( | ||
piece_name="AudioTranscriptionLocalPiece", | ||
input_data=input_data, | ||
) | ||
assert piece_output["transcription_result"] | ||
assert piece_output["file_path_transcription_result"] | ||
assert "audio" in piece_output.get("transcription_result", "").lower() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters