mobiusml · HRashidi · Nov 11, 2024 · Nov 15, 2024 · Nov 27, 2024 · movchan74
diff --git a/aana_chat_with_video/configs/deployments.py b/aana_chat_with_video/configs/deployments.py
@@ -43,10 +43,11 @@
         "instance": VLLMDeployment.options(
             num_replicas=1,
             ray_actor_options={"num_gpus": 0.25},
+            max_ongoing_requests=500,
             user_config=VLLMConfig(
                 model="Qwen/Qwen2-VL-2B-Instruct",
                 dtype=Dtype.AUTO,
-                gpu_memory_reserved=12000,
+                gpu_memory_reserved=15000,
                 max_model_len=32768,
                 enforce_eager=True,
                 default_sampling_params=SamplingParams(

diff --git a/aana_chat_with_video/endpoints/index_video.py b/aana_chat_with_video/endpoints/index_video.py
@@ -162,7 +162,7 @@ async def run(  # noqa: C901
 
                 timestamps.extend(frames_dict["timestamps"])
                 frame_ids.extend(frames_dict["frame_ids"])
-                chat_prompt = "Describe the content of the following image in a single sentence:"
+                chat_prompt = "Caption the content of the following image in a single and concise sentence"
                 dialogs = [
                     ImageChatDialog.from_prompt(prompt=chat_prompt, images=[frame]) for frame in frames_dict["frames"]
                 ]