forked from ai-forever/KandinskyVideo
-
Notifications
You must be signed in to change notification settings - Fork 0
/
predict.py
75 lines (69 loc) · 2.64 KB
/
predict.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
# Prediction interface for Cog ⚙️
# https://github.com/replicate/cog/blob/main/docs/python.md
import imageio
import numpy as np
from cog import BasePredictor, Input, Path
from video_kandinsky3 import *
class Predictor(BasePredictor):
def setup(self) -> None:
"""Load the model into memory to make running multiple predictions efficient"""
model_weights_dir = "model_weights"
self.t2v_pipe = get_T2V_pipeline(
"cuda:0",
fp16=True,
cache_dir=model_weights_dir,
unet_path=f"{model_weights_dir}/weights/kandinsky_video.pt",
interpolation_unet_path=f"{model_weights_dir}/weights/kandinsky_video_interpolation.pt",
movq_path=f"{model_weights_dir}/weights/movq.pt",
text_encode_path="google_flan_ul2_weights", # pre-loaded from google/flan-ul2
)
def predict(
self,
prompt: str = Input(
description="Input prompt.",
default="a red car is drifting on the mountain road, close view, fast movement",
),
negative_prompt: str = Input(
description="Specify things to not see in the output.",
default=None,
),
width: int = Input(
description="Width of output video. Lower the setting if out of memory.",
default=640,
),
height: int = Input(
description="Height of output video. Lower the setting if out of memory.",
default=384,
),
num_inference_steps: int = Input(
description="Number of denoising steps", default=50
),
guidance_scale: float = Input(
description="Scale for classifier-free guidance", default=5.0
),
interpolation_guidance_scale: float = Input(
description="Scale for interpolation guidance", default=0.25
),
interpolation_level: str = Input(
choices=["low", "medium", "high"],
default="low",
),
fps: int = Input(
description="fps for the output video.",
default=10,
),
) -> Path:
"""Run a single prediction on the model"""
video = self.t2v_pipe(
text=prompt,
negative_text=negative_prompt,
width=width,
height=height,
guidance_scale=guidance_scale,
interpolation_guidance_scale=interpolation_guidance_scale,
steps=num_inference_steps,
fps=interpolation_level,
)
output_path = "/tmp/output.mp4"
imageio.mimsave(output_path, [np.array(im) for im in video], fps=fps)
return Path(output_path)