Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
vinicvaz committed Nov 27, 2023
2 parents 305593e + 4705a31 commit 47cf83c
Show file tree
Hide file tree
Showing 11 changed files with 186 additions and 46 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/tests-dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ jobs:

- name: Run organize and build images
run: |
domino piece organize --build-images --source-url=https://github.com/${{github.repository}} --tag-overwrite=dev
domino piece organize --build-images --source-url=https://github.com/${{github.repository}} --tag-overwrite=development
- name: Install Tests Dependencies
run: pip install -r requirements-tests.txt
Expand Down
20 changes: 20 additions & 0 deletions dependencies/Dockerfile_whisper
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
FROM ghcr.io/tauffer-consulting/domino-base-piece:gpu


# Install specific requirements
ENV TZ=UTC
ARG DEBIAN_FRONTEND=noninteractive
RUN apt update
RUN apt install ffmpeg -y
RUN apt install git -y

# Need to copy operators source code
COPY config.toml domino/pieces_repository/
COPY pieces domino/pieces_repository/pieces
COPY .domino domino/pieces_repository/.domino

# Install specific python dependencies
RUN pip install -U openai-whisper==20231106

# Dowload Whisper model
RUN python3 -c "import whisper; whisper.load_model('tiny')"
22 changes: 22 additions & 0 deletions pieces/AudioTranscriptionLocalPiece/metadata.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"name": "AudioTranscriptionLocalPiece",
"description": "Runs transcription locally using Whisper, a general-purpose speech recognition model. Ref: https://github.com/openai/whisper",
"dependency": {
"dockerfile": "Dockerfile_whisper"
},
"container_resources": {
"use_gpu": true,
"requests": {
"cpu": "1000m",
"memory": "3Gi"
},
"limits": {
"cpu": "5000m",
"memory": "15Gi"
}
},
"style": {
"node_label": "Audio Transcription Local",
"icon_class_name": "fa-solid:comment-dots"
}
}
45 changes: 45 additions & 0 deletions pieces/AudioTranscriptionLocalPiece/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
from pydantic import BaseModel, Field, FilePath
from typing import Union
from enum import Enum


class ModelSizeType(str, Enum):
tiny = "tiny"
base = "base"
small = "small"
medium = "medium"
large = "large"


class OutputTypeType(str, Enum):
string = "string"
file = "file"
both = "both"


class InputModel(BaseModel):
audio_file_path: str = Field(
description='The path to the audio file to process.',
json_schema_extra={
"from_upstream": "always"
}
)
output_type: OutputTypeType = Field(
default=OutputTypeType.string,
description='The type of output for the result text. Options are `string`, `file` or `both`. Default is `string`.',
)
model_size: ModelSizeType = Field(
description='The size of the model to use. Default is tiny.',
default=ModelSizeType.tiny
)


class OutputModel(BaseModel):
transcription_result: str = Field(
default="",
description="The result transcription text as a string."
)
file_path_transcription_result: Union[FilePath, str] = Field(
default="",
description="The path to the text file with the transcription result."
)
54 changes: 54 additions & 0 deletions pieces/AudioTranscriptionLocalPiece/piece.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
from domino.base_piece import BasePiece
from .models import InputModel, OutputModel
import whisper


class AudioTranscriptionLocalPiece(BasePiece):

def piece_function(self, input_data: InputModel):

self.logger.info("Loading model...")
model = whisper.load_model(input_data.model_size)

self.logger.info("Transcribing audio file...")
result = model.transcribe(str(input_data.audio_file_path))["text"]

if input_data.output_type == "string":
self.logger.info("Transcription complete successfully. Result returned as string.")
transcription_result = result
output_file_path = ""
elif input_data.output_type == "file":
self.logger.info("Transcription complete successfully. Result returned as file.")
transcription_result = ""
output_file_path = "transcription_result.txt"
with open(output_file_path, "w") as f:
f.write(result)
else:
self.logger.info("Transcription complete successfully. Result returned as string and file.")
transcription_result = result
output_file_path = "transcription_result.txt"
with open(output_file_path, "w") as f:
f.write(result)

# Display result in the Domino GUI
self.format_display_result(input_data=input_data, string_transcription_result=result)

return OutputModel(
transcription_result=transcription_result,
file_path_transcription_result=output_file_path
)

def format_display_result(self, input_data: InputModel, string_transcription_result: str):
md_text = f"""## Audio Transcription Result \n
{string_transcription_result} \n
## Args
**model_size**: {input_data.model_size.value}\n
"""
file_path = f"{self.results_path}/display_result.md"
with open(file_path, "w") as f:
f.write(md_text)
self.display_result = {
"file_type": "md",
"file_path": file_path
}
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from domino.testing import piece_dry_run
from pathlib import Path


test_file = str(Path(__file__).parent / "test-audio-to-transcribe.mp3")


def test_whisper_piece():
input_data = {
"audio_file_path": test_file,
"model_size": "tiny",
"output_type": "both"
}
piece_output = piece_dry_run(
piece_name="AudioTranscriptionLocalPiece",
input_data=input_data,
)
assert piece_output["transcription_result"]
assert piece_output["file_path_transcription_result"]
assert "audio" in piece_output.get("transcription_result", "").lower()
2 changes: 1 addition & 1 deletion pieces/AudioTranscriptionPiece/metadata.json
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,6 @@
],
"style": {
"node_label": "OpenAI Audio Transcript",
"icon_class_name": "fa-solid:headset"
"icon_class_name": "fa-solid:comment-dots"
}
}
38 changes: 11 additions & 27 deletions pieces/AudioTranscriptionPiece/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,60 +2,44 @@
from enum import Enum



class OutputTypeType(str, Enum):
"""
Output type for the result text
"""
file = "file"
string = "string"
file_and_string = "file_and_string"
both = "both"


class InputModel(BaseModel):
"""
AudioTranscriptPiece input model
"""
audio_file_path: str = Field(
...,
description='The path to the audio file to process.',

json_schema_extra={
"from_upstream": "always"
}
)
output_type: OutputTypeType = Field(
default=OutputTypeType.string,
description='The type of output for the result text',

description='The type of output for the result text. Options are `string`, `file` or `both`. Default is `string`.',
)
temperature: float = Field(
description="What sampling temperature to use, between 0 and 1",
default=0.1,
gt=0.,
le=1,

)


class OutputModel(BaseModel):
"""
AudioTranscriptPiece output model
"""
message: str = Field(
description="Output message to log"
)
string_transcription_result: str = Field(
default=None,
transcription_result: str = Field(
default="",
description="The result transcription text as a string."
)
file_path_transcription_result: str = Field(
default=None,
description="The result transcription text as a file path."
default="",
description="The path to the text file with the transcription result."
)


class SecretsModel(BaseModel):
"""
AudioTranscriptPiece secret model
"""
OPENAI_API_KEY: str = Field(
description="OpenAI API key"
)
)
26 changes: 11 additions & 15 deletions pieces/AudioTranscriptionPiece/piece.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def piece_function(self, input_data: InputModel, secrets_data: SecretsModel):

# Input arguments are retrieved from the Input model object
file_path = input_data.audio_file_path

print("Making OpenAI audio transcription request...")
try:
full_audio = AudioSegment.from_mp3(file_path)
Expand All @@ -31,9 +31,9 @@ def piece_function(self, input_data: InputModel, secrets_data: SecretsModel):
endpoint = min((i+1)*ten_minutes, total_time-1)
minutes = full_audio[i*ten_minutes:endpoint]
minutes.export(f"audio_piece_{i}.mp3", format="mp3")
audio_file= open(f"audio_piece_{i}.mp3", "rb")
audio_file = open(f"audio_piece_{i}.mp3", "rb")
transcript = client.audio.transcriptions.create(
model="whisper-1",
model="whisper-1",
file=audio_file,
temperature=input_data.temperature
)
Expand All @@ -48,35 +48,31 @@ def piece_function(self, input_data: InputModel, secrets_data: SecretsModel):

# Display result in the Domino GUI
self.format_display_result(input_data=input_data, string_transcription_result=full_transcript)

if input_data.output_type == "string":
self.logger.info("Transcription complete successfully. Result returned as string.")
msg = f"Transcription complete successfully. Result returned as string."
return OutputModel(
message=msg,
string_transcription_result=full_transcript
transcription_result=full_transcript,
file_path_transcription_result=""
)

output_file_path = f"{self.results_path}/audio_transcription_result.txt"
with open(output_file_path, "w") as f:
f.write(full_transcript)

if input_data.output_type == "file":
self.logger.info(f"Transcription complete successfully. Result returned as file in {output_file_path}")
msg = f"Transcription complete successfully. Result returned as file."
return OutputModel(
message=msg,
transcription_result="",
file_path_transcription_result=output_file_path
)

self.logger.info(f"Transcription complete successfully. Result returned as string and file in {output_file_path}")
msg = f"Transcription complete successfully. Result returned as string and file."
return OutputModel(
message=msg,
string_transcription_result=full_transcript,
transcription_result=full_transcript,
file_path_transcription_result=output_file_path
)

def format_display_result(self, input_data: InputModel, string_transcription_result: str):
md_text = f"""
## Generated transcription: \n
Expand All @@ -91,4 +87,4 @@ def format_display_result(self, input_data: InputModel, string_transcription_res
self.display_result = {
"file_type": "md",
"file_path": file_path
}
}
3 changes: 1 addition & 2 deletions pieces/ImageGeneratorPiece/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ class InputModel(BaseModel):
ImageGeneratorPiece input model
"""
prompt: str = Field(
...,
description="A text description of the desired image",
)
size: ImageSize = Field(
Expand All @@ -43,7 +42,7 @@ class InputModel(BaseModel):
)
image_format: ImageFormat = Field(
default=ImageFormat.url,
description="The format in which the generated image is returned",
description="The format in which the generated image is returned",
)
output_type: OutputTypeType = Field(
default=OutputTypeType.string,
Expand Down

0 comments on commit 47cf83c

Please sign in to comment.