Skip to content

Commit

Permalink
remove streamlit demo since outdate
Browse files Browse the repository at this point in the history
  • Loading branch information
zRzRzRzRzRzRzR committed Sep 16, 2024
1 parent 0a558e0 commit 1b16921
Show file tree
Hide file tree
Showing 9 changed files with 29 additions and 297 deletions.
4 changes: 2 additions & 2 deletions inference/cli_demo_quantization.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,8 @@ def generate_video(
# Using with compile will run faster. First time infer will cost ~30min to compile.
# pipe.transformer.to(memory_format=torch.channels_last)

# for FP8 should remove pipe.enable_sequential_cpu_offload()
pipe.enable_sequential_cpu_offload()
# for FP8 should remove pipe.enable_model_cpu_offload()
pipe.enable_model_cpu_offload()

# This is not for FP8 and INT8 and should remove this line
# pipe.enable_sequential_cpu_offload()
Expand Down
12 changes: 6 additions & 6 deletions inference/gradio_composite_demo/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@
pipe_image = CogVideoXImageToVideoPipeline.from_pretrained(
"THUDM/CogVideoX-5b",
transformer=CogVideoXTransformer3DModel.from_pretrained(
"THUDM/CogVideoX-5b-I2V", subfolder="transformers", torch_dtype=torch.bfloat16
"THUDM/CogVideoX-5b-I2V", subfolder="transformer", torch_dtype=torch.bfloat16
),
vae=pipe.vae,
scheduler=pipe.scheduler,
Expand All @@ -65,10 +65,10 @@
).to(device)


pipe.transformer.to(memory_format=torch.channels_last)
pipe.transformer = torch.compile(pipe.transformer, mode="max-autotune", fullgraph=True)
pipe_image.transformer.to(memory_format=torch.channels_last)
pipe_image.transformer = torch.compile(pipe_image.transformer, mode="max-autotune", fullgraph=True)
# pipe.transformer.to(memory_format=torch.channels_last)
# pipe.transformer = torch.compile(pipe.transformer, mode="max-autotune", fullgraph=True)
# pipe_image.transformer.to(memory_format=torch.channels_last)
# pipe_image.transformer = torch.compile(pipe_image.transformer, mode="max-autotune", fullgraph=True)

os.makedirs("./output", exist_ok=True)
os.makedirs("./gradio_tmp", exist_ok=True)
Expand Down Expand Up @@ -241,7 +241,7 @@ def infer(
generator=torch.Generator(device="cpu").manual_seed(seed),
).frames
elif image_input is not None:
image_input = Image.fromarray(image_input) # Change to PIL
image_input = Image.fromarray(image_input).resize(size=(720, 480)) # Convert to PIL
image = load_image(image_input)
video_pt = pipe_image(
image=image,
Expand Down
66 changes: 10 additions & 56 deletions inference/gradio_web_demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@
THis is the main file for the gradio web demo. It uses the CogVideoX-2B model to generate videos gradio web demo.
set environment variable OPENAI_API_KEY to use the OpenAI API to enhance the prompt.
This demo only supports the text-to-video generation model.
If you wish to use the image-to-video or video-to-video generation models,
please use the gradio_composite_demo to implement the full GUI functionality.
Usage:
OpenAI_API_KEY=your_openai_api_key OpenAI_BASE_URL=https://api.openai.com/v1 python inference/gradio_web_demo.py
"""
Expand All @@ -18,12 +22,8 @@
from openai import OpenAI
import moviepy.editor as mp

dtype = torch.bfloat16
device = "cuda" # Need to use cuda
pipe = CogVideoXPipeline.from_pretrained("THUDM/CogVideoX-5b", torch_dtype=torch.bfloat16).to("cuda")

pipe = CogVideoXPipeline.from_pretrained("THUDM/CogVideoX-5b", torch_dtype=dtype).to(device)
pipe.enable_model_cpu_offload()
pipe.enable_sequential_cpu_offload()
pipe.vae.enable_slicing()
pipe.vae.enable_tiling()

Expand All @@ -47,6 +47,7 @@
def convert_prompt(prompt: str, retry_times: int = 3) -> str:
if not os.environ.get("OPENAI_API_KEY"):
return prompt

client = OpenAI()
text = prompt.strip()

Expand Down Expand Up @@ -83,7 +84,7 @@ def convert_prompt(prompt: str, retry_times: int = 3) -> str:
"content": f'Create an imaginative video descriptive caption or modify an earlier caption in ENGLISH for the user input: "{text}"',
},
],
model="glm-4-0520",
model="glm-4-plus",
temperature=0.01,
top_p=0.7,
stream=False,
Expand Down Expand Up @@ -145,19 +146,9 @@ def delete_old_files():
with gr.Blocks() as demo:
gr.Markdown("""
<div style="text-align: center; font-size: 32px; font-weight: bold; margin-bottom: 20px;">
CogVideoX-2B Huggingface Space🤗
</div>
<div style="text-align: center;">
<a href="https://huggingface.co/THUDM/CogVideoX-2B">🤗 2B Model Hub</a> |
<a href="https://github.com/THUDM/CogVideo">🌐 Github</a> |
<a href="https://arxiv.org/pdf/2408.06072">📜 arxiv </a>
</div>
<div style="text-align: center; font-size: 15px; font-weight: bold; color: red; margin-bottom: 20px;">
⚠️ This demo is for academic research and experiential use only.
Users should strictly adhere to local laws and ethics.
</div>
""")
CogVideoX Gradio Simple Space🤗
""")

with gr.Row():
with gr.Column():
prompt = gr.Textbox(label="Prompt (Less than 200 Words)", placeholder="Enter your prompt here", lines=5)
Expand All @@ -173,7 +164,6 @@ def delete_old_files():
"**Optional Parameters** (default values are recommended)<br>"
"Increasing the number of inference steps will produce more detailed videos, but it will slow down the process.<br>"
"50 steps are recommended for most cases.<br>"
"For the 5B model, 50 steps will take approximately 350 seconds."
)
with gr.Row():
num_inference_steps = gr.Number(label="Inference Steps", value=50)
Expand All @@ -186,42 +176,6 @@ def delete_old_files():
download_video_button = gr.File(label="📥 Download Video", visible=False)
download_gif_button = gr.File(label="📥 Download GIF", visible=False)

gr.Markdown("""
<table border="0" style="width: 100%; text-align: left; margin-top: 20px;">
<div style="text-align: center; font-size: 24px; font-weight: bold; margin-bottom: 20px;">
Demo Videos with 50 Inference Steps and 6.0 Guidance Scale.
</div>
<tr>
<td style="width: 25%; vertical-align: top; font-size: 0.8em;">
<p>A detailed wooden toy ship with intricately carved masts and sails is seen gliding smoothly over a plush, blue carpet that mimics the waves of the sea. The ship's hull is painted a rich brown, with tiny windows. The carpet, soft and textured, provides a perfect backdrop, resembling an oceanic expanse. Surrounding the ship are various other toys and children's items, hinting at a playful environment. The scene captures the innocence and imagination of childhood, with the toy ship's journey symbolizing endless adventures in a whimsical, indoor setting.</p>
</td>
<td style="width: 25%; vertical-align: top;">
<video src="https://github.com/user-attachments/assets/ea3af39a-3160-4999-90ec-2f7863c5b0e9" width="100%" controls autoplay></video>
</td>
<td style="width: 25%; vertical-align: top; font-size: 0.8em;">
<p>The camera follows behind a white vintage SUV with a black roof rack as it speeds up a steep dirt road surrounded by pine trees on a steep mountain slope, dust kicks up from its tires, the sunlight shines on the SUV as it speeds along the dirt road, casting a warm glow over the scene. The dirt road curves gently into the distance, with no other cars or vehicles in sight. The trees on either side of the road are redwoods, with patches of greenery scattered throughout. The car is seen from the rear following the curve with ease, making it seem as if it is on a rugged drive through the rugged terrain. The dirt road itself is surrounded by steep hills and mountains, with a clear blue sky above with wispy clouds.</p>
</td>
<td style="width: 25%; vertical-align: top;">
<video src="https://github.com/user-attachments/assets/9de41efd-d4d1-4095-aeda-246dd834e91d" width="100%" controls autoplay></video>
</td>
</tr>
<tr>
<td style="width: 25%; vertical-align: top; font-size: 0.8em;">
<p>A street artist, clad in a worn-out denim jacket and a colorful bandana, stands before a vast concrete wall in the heart, holding a can of spray paint, spray-painting a colorful bird on a mottled wall.</p>
</td>
<td style="width: 25%; vertical-align: top;">
<video src="https://github.com/user-attachments/assets/941d6661-6a8d-4a1b-b912-59606f0b2841" width="100%" controls autoplay></video>
</td>
<td style="width: 25%; vertical-align: top; font-size: 0.8em;">
<p>In the haunting backdrop of a war-torn city, where ruins and crumbled walls tell a story of devastation, a poignant close-up frames a young girl. Her face is smudged with ash, a silent testament to the chaos around her. Her eyes glistening with a mix of sorrow and resilience, capturing the raw emotion of a world that has lost its innocence to the ravages of conflict.</p>
</td>
<td style="width: 25%; vertical-align: top;">
<video src="https://github.com/user-attachments/assets/938529c4-91ae-4f60-b96b-3c3947fa63cb" width="100%" controls autoplay></video>
</td>
</tr>
</table>
""")

def generate(prompt, num_inference_steps, guidance_scale, model_choice, progress=gr.Progress(track_tqdm=True)):
tensor = infer(prompt, num_inference_steps, guidance_scale, progress=progress)
video_path = save_video(tensor)
Expand Down
219 changes: 0 additions & 219 deletions inference/streamlit_web_demo.py

This file was deleted.

1 change: 0 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ torchvision==0.19.0
sentencepiece==0.2.0
SwissArmyTransformer>=0.4.12
gradio>=4.44.0
streamlit>=1.38.0
imageio>=2.35.1
imageio-ffmpeg>=0.5.1
openai>=1.45.0
Expand Down
Loading

0 comments on commit 1b16921

Please sign in to comment.