Skip to content

Commit

Permalink
add blip for captioning
Browse files Browse the repository at this point in the history
  • Loading branch information
HRashidi committed Nov 28, 2024
1 parent a337473 commit 76f6f28
Show file tree
Hide file tree
Showing 5 changed files with 101 additions and 105 deletions.
19 changes: 8 additions & 11 deletions aana_chat_with_video/configs/deployments.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from aana.core.models.sampling import SamplingParams
from aana.core.models.types import Dtype
from aana.deployments.vad_deployment import VadConfig, VadDeployment
from aana.deployments.hf_blip2_deployment import HFBlip2Config, HFBlip2Deployment
from aana.deployments.vllm_deployment import VLLMConfig, VLLMDeployment
from aana.deployments.whisper_deployment import (
WhisperComputeType,
Expand Down Expand Up @@ -40,19 +41,15 @@
},
{
"name": "captioning_deployment",
"instance": VLLMDeployment.options(
"instance": HFBlip2Deployment.options(
num_replicas=1,
max_ongoing_requests=1000,
ray_actor_options={"num_gpus": 0.25},
user_config=VLLMConfig(
model="Qwen/Qwen2-VL-2B-Instruct",
dtype=Dtype.AUTO,
gpu_memory_reserved=12000,
max_model_len=32768,
enforce_eager=True,
default_sampling_params=SamplingParams(
temperature=0.0, top_p=1.0, top_k=-1, max_tokens=512
),
engine_args={"trust_remote_code": True},
user_config=HFBlip2Config(
model="Salesforce/blip2-opt-2.7b",
dtype=Dtype.FLOAT16,
batch_size=2,
num_processing_threads=2,
).model_dump(mode="json"),
),
},
Expand Down
2 changes: 1 addition & 1 deletion aana_chat_with_video/configs/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ class Settings(AanaSettings):
"""A pydantic model for App settings."""

asr_model_name: str = "whisper_medium"
captioning_model_name: str = "qwen2-vl-2b-instruct"
captioning_model_name: str = "hf_blip2_opt_2_7b"
max_video_len: int = 60 * 20 # 20 minutes


Expand Down
16 changes: 5 additions & 11 deletions aana_chat_with_video/endpoints/index_video.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,19 +162,13 @@ async def run( # noqa: C901

timestamps.extend(frames_dict["timestamps"])
frame_ids.extend(frames_dict["frame_ids"])
chat_prompt = "Describe the content of the following image in a single sentence:"
dialogs = [
ImageChatDialog.from_prompt(prompt=chat_prompt, images=[frame]) for frame in frames_dict["frames"]
]

# Collect the tasks to run concurrently and wait for them to finish
tasks = [self.captioning_handle.chat(dialog) for dialog in dialogs]
captioning_output = await asyncio.gather(*tasks)
captioning_output = [caption["message"].content for caption in captioning_output]
captions.extend(captioning_output)
captioning_output = await self.captioning_handle.generate_batch(
images=frames_dict["frames"]
)
captions.extend(captioning_output["captions"])

yield {
"captions": captioning_output,
"captions": captioning_output["captions"],
"timestamps": frames_dict["timestamps"],
}

Expand Down
Loading

0 comments on commit 76f6f28

Please sign in to comment.