From 7501a355b5d4cd9fdad9b97a3d28203cf376ba91 Mon Sep 17 00:00:00 2001
From: vikramxd <singh.vikram.1782000@gmail.com>
Date: Sat, 9 Nov 2024 10:54:18 +0000
Subject: [PATCH] Update dependencies in requirements.txt

---
 logs/api.log                   |  5 +++++
 requirements.txt               |  3 +++
 src/api/mochi_serve.py         |  8 ++++---
 src/configs/mochi_settings.py  | 11 +++++-----
 src/scripts/mochi_diffusers.py | 38 +++++++++++++++++++++++++++++++++-
 5 files changed, 55 insertions(+), 10 deletions(-)
 create mode 100644 logs/api.log
diff --git a/logs/api.log b/logs/api.log
new file mode 100644
index 0000000..bff60c5
--- /dev/null
+++ b/logs/api.log
@@ -0,0 +1,5 @@
+2024-11-09 07:49:24.302 | INFO     | __main__:<module>:271 - Starting server on port 8000
+2024-11-09 08:14:16.781 | INFO     | __main__:<module>:271 - Starting server on port 8000
+2024-11-09 08:19:05.939 | INFO     | __main__:<module>:271 - Starting server on port 8000
+2024-11-09 08:31:58.329 | INFO     | __main__:<module>:271 - Starting server on port 8000
+2024-11-09 08:41:50.867 | INFO     | __main__:<module>:271 - Starting server on port 8000
diff --git a/requirements.txt b/requirements.txt
index 70c9f1f..2f07b14 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -7,3 +7,6 @@ wandb
 setuptools
 pydantic-settings
 boto3
+prometheus-client
+loguru
+bitsandbytes
diff --git a/src/api/mochi_serve.py b/src/api/mochi_serve.py
index b19619b..079ad89 100644
--- a/src/api/mochi_serve.py
+++ b/src/api/mochi_serve.py
@@ -160,7 +160,7 @@ def predict(self, inputs: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
 
                         # Upload to S3
                         with open(temp_video_path, "rb") as video_file:
-                            s3_response = mp4_to_s3_json(video_file, "video.mp4")
+                            s3_response = mp4_to_s3_json(video_file, f"mochi_{int(time.time())}.mp4")
                         
                         result = {
                             "status": "success",
@@ -241,7 +241,7 @@ def encode_response(self, output: Union[Dict[str, Any], List[Any]]) -> Dict[str,
 if __name__ == "__main__":
     import sys
     prometheus_logger = PrometheusLogger()
-    prometheus_logger.mount(path="/metrics", app=make_asgi_app(registry=registry))
+    prometheus_logger.mount(path="/api/v1/metrics", app=make_asgi_app(registry=registry))
     # Configure logging
     logger.remove()
     logger.add(
@@ -266,7 +266,9 @@ def encode_response(self, output: Union[Dict[str, Any], List[Any]]) -> Dict[str,
             devices="auto",
             max_batch_size=1,
             track_requests=True,
-            loggers=prometheus_logger
+            loggers=prometheus_logger,
+            generate_client_file=False
+
         )
         logger.info("Starting server on port 8000")
         server.run(port=8000)
diff --git a/src/configs/mochi_settings.py b/src/configs/mochi_settings.py
index c67b0c9..24570f0 100644
--- a/src/configs/mochi_settings.py
+++ b/src/configs/mochi_settings.py
@@ -38,21 +38,20 @@ class MochiSettings(BaseSettings):
     model_name: str = 'Genmo-Mochi'
     transformer_path: str = "imnotednamode/mochi-1-preview-mix-nf4"
     pipeline_path: str = "VikramSingh178/mochi-diffuser-bf16"
-    dtype: torch.dtype = torch.bfloat16
+    dtype: torch.dtype = torch.float16
     device: str = "cuda"
     
     # Optimization Settings
     enable_vae_tiling: bool = True
     enable_model_cpu_offload: bool = True
-    enable_attention_slicing: bool = False
-    attention_slice_size: Optional[int] = None
+    
     
     # Video Generation Settings
     num_inference_steps: int = 20
     guidance_scale: float = 7.5
-    height: int = 480
-    width: int = 848
-    num_frames: int = 150
+    height: int = 640
+    width: int = 480
+    num_frames: int = 60
     fps: int = 10
     
     class Config:
diff --git a/src/scripts/mochi_diffusers.py b/src/scripts/mochi_diffusers.py
index e7c8797..d3bc232 100644
--- a/src/scripts/mochi_diffusers.py
+++ b/src/scripts/mochi_diffusers.py
@@ -170,4 +170,40 @@ def clear_memory(self) -> None:
         if torch.cuda.is_available():
             logger.info("Clearing CUDA memory cache")
             torch.cuda.empty_cache()
-            torch.cuda.reset_peak_memory_stats()
\ No newline at end of file
+            torch.cuda.reset_peak_memory_stats()
+
+if __name__ == "__main__":
+    from configs.mochi_settings import MochiSettings
+
+    settings = MochiSettings()
+
+    # Initialize inference class
+    mochi_inference = MochiInference(settings)
+
+    # Define prompt and output path
+    prompt = "Close-up of a chameleon's eye, with its scaly skin changing color. Ultra high resolution 4k."
+    output_path = "/home/ubuntu/Minimochi/outputs/output.mp4"
+
+    # Generate video
+    try:
+        video_path = mochi_inference.generate(
+            prompt=prompt,
+            negative_prompt='((((ugly)))), (((duplicate))), ((morbid)), ((mutilated)), out of frame, extra fingers, mutated hands, ((poorly drawn hands)), ((poorly drawn face)), (((mutation))), (((deformed))), ((ugly)), blurry, ((bad anatomy)), (((bad proportions))), ((extra limbs)), cloned face, (((disfigured))), out of frame, ugly, extra limbs, (bad anatomy), gross proportions, (malformed limbs), ((missing arms)), ((missing legs)), (((extra arms))), (((extra legs))), mutated hands, (fused fingers), (too many fingers), (((long neck)))',
+            output_path=output_path,
+            num_inference_steps=30,  
+            guidance_scale=3.5,
+            height=480,
+            width=848,
+            num_frames=150,
+            fps=30,
+        )
+        print(f"Video saved to: {video_path}")
+    except RuntimeError as e:
+        print(f"Failed to generate video: {e}")
+    
+    # Display GPU memory usage for debugging
+    allocated, max_allocated = mochi_inference.get_memory_usage()
+    print(f"Memory usage: {allocated:.2f}GB (peak: {max_allocated:.2f}GB)")
+
+    # Clear memory cache after inference
+    mochi_inference.clear_memory()