Skip to content

Commit

Permalink
fix examples
Browse files Browse the repository at this point in the history
  • Loading branch information
chengzeyi committed Dec 25, 2024
1 parent e8e13b3 commit 7615556
Show file tree
Hide file tree
Showing 7 changed files with 44 additions and 44 deletions.
24 changes: 12 additions & 12 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ parallelize_pipe(
)
parallelize_vae(pipe.vae, mesh=mesh._flatten())

# pipe.enable_model_cpu_offload()
# pipe.enable_model_cpu_offload(gpu_id=dist.get_rank())

# torch._inductor.config.reorder_for_compute_comm_overlap = True
# pipe.transformer = torch.compile(pipe.transformer, mode="max-autotune-no-cudagraphs")
Expand Down Expand Up @@ -178,12 +178,12 @@ pipe = HunyuanVideoPipeline.from_pretrained(

pipe.vae.enable_tiling(
# Make it runnable on GPUs with 48GB memory
tile_sample_min_height=128,
tile_sample_stride_height=96,
tile_sample_min_width=128,
tile_sample_stride_width=96,
tile_sample_min_num_frames=32,
tile_sample_stride_num_frames=24,
# tile_sample_min_height=128,
# tile_sample_stride_height=96,
# tile_sample_min_width=128,
# tile_sample_stride_width=96,
# tile_sample_min_num_frames=32,
# tile_sample_stride_num_frames=24,
)

from para_attn.context_parallel import init_context_parallel_mesh
Expand All @@ -199,16 +199,16 @@ parallelize_pipe(
)
parallelize_vae(pipe.vae, mesh=mesh._flatten())

# pipe.enable_model_cpu_offload()
# pipe.enable_model_cpu_offload(gpu_id=dist.get_rank())

# torch._inductor.config.reorder_for_compute_comm_overlap = True
# pipe.transformer = torch.compile(pipe.transformer, mode="max-autotune-no-cudagraphs")

output = pipe(
prompt="A cat walks on the grass, realistic",
height=320,
width=512,
num_frames=61,
height=720,
width=1280,
num_frames=129,
num_inference_steps=30,
output_type="pil" if dist.get_rank() == 0 else "pt",
).frames[0]
Expand Down Expand Up @@ -254,7 +254,7 @@ parallelize_pipe(
)

# Enable memory savings
# pipe.enable_model_cpu_offload()
# pipe.enable_model_cpu_offload(gpu_id=dist.get_rank())
pipe.enable_vae_tiling()

# torch._inductor.config.reorder_for_compute_comm_overlap = True
Expand Down
18 changes: 9 additions & 9 deletions focus_attn_examples/run_hunyuan_video.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,12 @@

pipe.vae.enable_tiling(
# Make it runnable on GPUs with 48GB memory
tile_sample_min_height=128,
tile_sample_stride_height=96,
tile_sample_min_width=128,
tile_sample_stride_width=96,
tile_sample_min_num_frames=32,
tile_sample_stride_num_frames=24,
# tile_sample_min_height=128,
# tile_sample_stride_height=96,
# tile_sample_min_width=128,
# tile_sample_stride_width=96,
# tile_sample_min_num_frames=32,
# tile_sample_stride_num_frames=24,
)

from para_attn.focus_attn.diffusers_adapters import apply_focus_attn_on_pipe
Expand All @@ -36,9 +36,9 @@

output = pipe(
prompt="A cat walks on the grass, realistic",
height=320,
width=512,
num_frames=61,
height=720,
width=1280,
num_frames=129,
num_inference_steps=30,
).frames[0]

Expand Down
4 changes: 2 additions & 2 deletions parallel_examples/run_cogvideox.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@
),
)

# pipe.enable_model_cpu_offload()
# pipe.enable_sequential_cpu_offload()
# pipe.enable_model_cpu_offload(gpu_id=dist.get_rank())
# pipe.enable_sequential_cpu_offload(gpu_id=dist.get_rank())
pipe.vae.enable_slicing()
pipe.vae.enable_tiling()

Expand Down
2 changes: 1 addition & 1 deletion parallel_examples/run_flux.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
)
parallelize_vae(pipe.vae, mesh=mesh._flatten())

# pipe.enable_model_cpu_offload()
# pipe.enable_model_cpu_offload(gpu_id=dist.get_rank())

# torch._inductor.config.reorder_for_compute_comm_overlap = True
# pipe.transformer = torch.compile(pipe.transformer, mode="max-autotune-no-cudagraphs")
Expand Down
20 changes: 10 additions & 10 deletions parallel_examples/run_hunyuan_video.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,12 @@

pipe.vae.enable_tiling(
# Make it runnable on GPUs with 48GB memory
tile_sample_min_height=128,
tile_sample_stride_height=96,
tile_sample_min_width=128,
tile_sample_stride_width=96,
tile_sample_min_num_frames=32,
tile_sample_stride_num_frames=24,
# tile_sample_min_height=128,
# tile_sample_stride_height=96,
# tile_sample_min_width=128,
# tile_sample_stride_width=96,
# tile_sample_min_num_frames=32,
# tile_sample_stride_num_frames=24,
)

from para_attn.context_parallel import init_context_parallel_mesh
Expand All @@ -45,16 +45,16 @@
)
parallelize_vae(pipe.vae, mesh=mesh._flatten())

# pipe.enable_model_cpu_offload()
# pipe.enable_model_cpu_offload(gpu_id=dist.get_rank())

# torch._inductor.config.reorder_for_compute_comm_overlap = True
# pipe.transformer = torch.compile(pipe.transformer, mode="max-autotune-no-cudagraphs")

output = pipe(
prompt="A cat walks on the grass, realistic",
height=320,
width=512,
num_frames=61,
height=720,
width=1280,
num_frames=129,
num_inference_steps=30,
output_type="pil" if dist.get_rank() == 0 else "pt",
).frames[0]
Expand Down
2 changes: 1 addition & 1 deletion parallel_examples/run_mochi.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
)

# Enable memory savings
# pipe.enable_model_cpu_offload()
# pipe.enable_model_cpu_offload(gpu_id=dist.get_rank())
pipe.enable_vae_tiling()

# torch._inductor.config.reorder_for_compute_comm_overlap = True
Expand Down
18 changes: 9 additions & 9 deletions tests/context_parallel/test_diffusers_adapters.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,12 +210,12 @@ def new_pipe(self, dtype, device):

pipe.vae.enable_tiling(
# Make it runnable on GPUs with 48GB memory
tile_sample_min_height=128,
tile_sample_stride_height=96,
tile_sample_min_width=128,
tile_sample_stride_width=96,
tile_sample_min_num_frames=32,
tile_sample_stride_num_frames=24,
# tile_sample_min_height=128,
# tile_sample_stride_height=96,
# tile_sample_min_width=128,
# tile_sample_stride_width=96,
# tile_sample_min_num_frames=32,
# tile_sample_stride_num_frames=24,
)

# Fix OOM because of awful inductor lowering of attn_bias of _scaled_dot_product_efficient_attention
Expand All @@ -228,9 +228,9 @@ def new_pipe(self, dtype, device):
def call_pipe(self, pipe, *args, **kwargs):
return pipe(
prompt="A cat walks on the grass, realistic",
height=320,
width=512,
num_frames=61,
height=720,
width=1280,
num_frames=129,
num_inference_steps=30,
output_type="pil" if self.rank == 0 else "pt",
)
Expand Down

0 comments on commit 7615556

Please sign in to comment.