improve: update to diffusers 0.4.2

samedii · Oct 13, 2022 · 3763564 · 3763564
1 parent 04a558b
commit 3763564
Show file tree

Hide file tree

Showing 5 changed files with 40 additions and 28 deletions.
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -16,7 +16,7 @@
 project = "perceptor"
 copyright = "2022, Richard Löwenström"
 author = "Richard Löwenström"
-release = "v0.6.2"
+release = "v0.6.3"
 
 # -- General configuration ---------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration

diff --git a/perceptor/models/stable_diffusion/stable_diffusion.py b/perceptor/models/stable_diffusion/stable_diffusion.py
@@ -16,7 +16,7 @@
 # @cache
 class StableDiffusion(torch.nn.Module):
     def __init__(
-        self, name="CompVis/stable-diffusion-v1-4", fp16=False, auth_token=True
+        self, name="CompVis/stable-diffusion-v1-4", fp16=True, auth_token=True
     ):
         """
         Args:
@@ -37,12 +37,14 @@ def __init__(
             name,
             scheduler=scheduler,
             use_auth_token=auth_token,
-            **dict(
-                revision="fp16",
-                torch_dtype=torch.float16,
-            )
-            if fp16
-            else dict(),
+            **(
+                dict(
+                    revision="fp16",
+                    torch_dtype=torch.float16,
+                )
+                if fp16
+                else dict()
+            ),
         )
 
         self.vae = pipeline.vae
@@ -124,13 +126,15 @@ def encode(
             raise Exception(f"Width must be divisible by 32, got {w}")
         return (
             0.18215
-            * self.vae.encode(diffusion_space.encode(images.to(self.device))).mode()
+            * self.vae.encode(
+                diffusion_space.encode(images.to(self.device))
+            ).latent_dist.mode()
         )
 
     def decode(
         self, latents: lantern.Tensor.dims("NCHW").float()
     ) -> lantern.Tensor.dims("NCHW"):
-        return diffusion_space.decode(self.vae.decode(latents / 0.18215))
+        return diffusion_space.decode(self.vae.decode(latents / 0.18215).sample)
 
     @contextmanager
     def finetuneable_vae(self):
@@ -167,7 +171,10 @@ def random_diffused_latents(self, shape) -> lantern.Tensor:
             raise ValueError("Height must be divisible by 32")
         if w % 8 != 0:
             raise ValueError("Width must be divisible by 32")
-        return torch.randn((n, self.unet.in_channels, h // 8, w // 8)).to(self.device)
+        return (
+            torch.randn((n, self.unet.in_channels, h // 8, w // 8)).to(self.device)
+            * self.scheduler.init_noise_sigma
+        )
 
     def indices(self, indices) -> lantern.Tensor:
         if isinstance(indices, float) or isinstance(indices, int):
@@ -281,7 +288,9 @@ def test_stable_diffusion_step():
 
     # compare with diffusers
     scheduler = DDIMScheduler(
-        beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear"
+        beta_start=0.00085,
+        beta_end=0.012,
+        beta_schedule="scaled_linear",
     )
     scheduler.set_timesteps(1000)
     pipeline = StableDiffusionPipeline.from_pretrained(

diff --git a/perceptor/models/velocity_diffusion/velocity_diffusion.py b/perceptor/models/velocity_diffusion/velocity_diffusion.py
@@ -222,7 +222,9 @@ def test_conditioned_velocity_diffusion():
 def test_convert_sigma_ts():
     diffusion = VelocityDiffusion("cc12m_1_cfg")
     from_ts = 0.3
-    assert from_ts == diffusion.sigmas_to_ts(diffusion.sigmas(from_ts))
+    assert (
+        from_ts - diffusion.sigmas_to_ts(diffusion.sigmas(from_ts)).squeeze()
+    ).abs() <= 1e-5
 
 
 def test_schedule_ts():

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "perceptor"
-version = "0.6.2"
+version = "0.6.3"
 description = "Modular image generation library"
 authors = ["Richard Löwenström <samedii@gmail.com>", "dribnet"]
 readme = "README.md"
@@ -31,7 +31,7 @@ ninja = "^1.10.2"
 lpips = "^0.1.4"
 pytorch-lantern = "^0.12.0"
 taming-transformers-rom1504 = "^0.0.6"
-diffusers = "^0.2.4"
+diffusers = "^0.4.2"
 open-clip-torch = "^2.0.2"
 pytorch-zero-lit = "^0.2.2"