brycedrennan · brycedrennan · Jan 20, 2024 · Jan 20, 2024 · Jan 20, 2024
diff --git a/Makefile b/Makefile
@@ -210,7 +210,7 @@ vendorize_normal_map:
 
 
 vendorize_refiners:
-	export REPO=git@github.com:finegrain-ai/refiners.git PKG=refiners COMMIT=ce3035923ba71bcb5044708d2f1c37fd1d6722e9 && \
+	export REPO=git@github.com:finegrain-ai/refiners.git PKG=refiners COMMIT=91aea9b7ff63ddf93f99e2ce6a4452bd658b1948 && \
 	make download_repo REPO=$$REPO PKG=$$PKG COMMIT=$$COMMIT && \
 	mkdir -p ./imaginairy/vendored/$$PKG && \
 	rm -rf ./imaginairy/vendored/$$PKG/* && \

diff --git a/imaginairy/api/generate_refiners.py b/imaginairy/api/generate_refiners.py
@@ -289,7 +289,7 @@ def latent_logger(latents):
             msg = f"Unknown solver type: {prompt.solver_type}"
             raise ValueError(msg)
         sd.scheduler.to(device=sd.unet.device, dtype=sd.unet.dtype)
-        sd.set_num_inference_steps(prompt.steps)
+        sd.set_inference_steps(prompt.steps, first_step=first_step)
 
         if hasattr(sd, "mask_latents") and mask_image is not None:
             sd.set_inpainting_conditions(
@@ -306,11 +306,11 @@ def latent_logger(latents):
 
         if init_latent is not None:
             noise_step = noise_step if noise_step is not None else first_step
-            if first_step >= len(sd.steps):
+            if first_step >= len(sd.scheduler.all_steps):
                 noised_latent = init_latent
             else:
                 noised_latent = sd.scheduler.add_noise(
-                    x=init_latent, noise=noise, step=sd.steps[noise_step]
+                    x=init_latent, noise=noise, step=sd.scheduler.all_steps[noise_step]
                 )
 
         with lc.timing("text-conditioning"):
@@ -330,7 +330,7 @@ def latent_logger(latents):
 
         with lc.timing("unet"):
             for step in tqdm(
-                sd.steps[first_step:], bar_format="    {l_bar}{bar}{r_bar}", leave=False
+                sd.steps, bar_format="    {l_bar}{bar}{r_bar}", leave=False
             ):
                 log_latent(x, "noisy_latent")
                 x = sd(

diff --git a/imaginairy/utils/model_manager.py b/imaginairy/utils/model_manager.py
@@ -603,8 +603,8 @@ def load_sdxl_pipeline_from_diffusers_weights(
     text_encoder.load_state_dict(text_encoder_weights, assign=True)
     del text_encoder_weights
     lda = lda.to(device=device, dtype=torch.float32)
-    unet = unet.to(device=device)
-    text_encoder = text_encoder.to(device=device)
+    unet = unet.to(device=device, dtype=dtype)
+    text_encoder = text_encoder.to(device=device, dtype=dtype)
     if for_inpainting:
         StableDiffusionCls = StableDiffusion_XL_Inpainting
     else:

diff --git a/imaginairy/vendored/refiners/fluxion/utils.py b/imaginairy/vendored/refiners/fluxion/utils.py
@@ -193,19 +193,23 @@ def summarize_tensor(tensor: torch.Tensor, /) -> str:
         f"dtype={str(object=tensor.dtype).removeprefix('torch.')}",
         f"device={tensor.device}",
     ]
-    if not tensor.is_complex():
-        info_list.extend(
-            [
-                f"min={tensor.min():.2f}",  # type: ignore
-                f"max={tensor.max():.2f}",  # type: ignore
-            ]
-        )
+    if tensor.is_complex():
+        tensor_f = tensor.real.float()
+    else:
+        if tensor.numel() > 0:
+            info_list.extend(
+                [
+                    f"min={tensor.min():.2f}",  # type: ignore
+                    f"max={tensor.max():.2f}",  # type: ignore
+                ]
+            )
+        tensor_f = tensor.float()
 
     info_list.extend(
         [
-            f"mean={tensor.float().mean():.2f}",
-            f"std={tensor.float().std():.2f}",
-            f"norm={norm(x=tensor.float()):.2f}",
+            f"mean={tensor_f.mean():.2f}",
+            f"std={tensor_f.std():.2f}",
+            f"norm={norm(x=tensor_f):.2f}",
             f"grad={tensor.requires_grad}",
         ]
     )

diff --git a/imaginairy/vendored/refiners/foundationals/latent_diffusion/model.py b/imaginairy/vendored/refiners/foundationals/latent_diffusion/model.py
@@ -32,21 +32,21 @@ def __init__(
         self.clip_text_encoder = clip_text_encoder.to(device=self.device, dtype=self.dtype)
         self.scheduler = scheduler.to(device=self.device, dtype=self.dtype)
 
-    def set_num_inference_steps(self, num_inference_steps: int) -> None:
+    def set_inference_steps(self, num_steps: int, first_step: int = 0) -> None:
         initial_diffusion_rate = self.scheduler.initial_diffusion_rate
         final_diffusion_rate = self.scheduler.final_diffusion_rate
         device, dtype = self.scheduler.device, self.scheduler.dtype
         self.scheduler = self.scheduler.__class__(
-            num_inference_steps,
+            num_inference_steps=num_steps,
             initial_diffusion_rate=initial_diffusion_rate,
             final_diffusion_rate=final_diffusion_rate,
+            first_inference_step=first_step,
         ).to(device=device, dtype=dtype)
 
     def init_latents(
         self,
         size: tuple[int, int],
         init_image: Image.Image | None = None,
-        first_step: int = 0,
         noise: Tensor | None = None,
     ) -> Tensor:
         height, width = size
@@ -59,11 +59,15 @@ def init_latents(
         if init_image is None:
             return noise
         encoded_image = self.lda.encode_image(image=init_image.resize(size=(width, height)))
-        return self.scheduler.add_noise(x=encoded_image, noise=noise, step=self.steps[first_step])
+        return self.scheduler.add_noise(
+            x=encoded_image,
+            noise=noise,
+            step=self.scheduler.first_inference_step,
+        )
 
     @property
     def steps(self) -> list[int]:
-        return self.scheduler.steps
+        return self.scheduler.inference_steps
 
     @abstractmethod
     def set_unet_context(self, *, timestep: Tensor, clip_text_embedding: Tensor, **_: Tensor) -> None:

diff --git a/imaginairy/vendored/refiners/foundationals/latent_diffusion/range_adapter.py b/imaginairy/vendored/refiners/foundationals/latent_diffusion/range_adapter.py
@@ -24,23 +24,23 @@ def compute_sinusoidal_embedding(
 class RangeEncoder(fl.Chain):
     def __init__(
         self,
-        sinuosidal_embedding_dim: int,
+        sinusoidal_embedding_dim: int,
         embedding_dim: int,
         device: Device | str | None = None,
         dtype: DType | None = None,
     ) -> None:
-        self.sinuosidal_embedding_dim = sinuosidal_embedding_dim
+        self.sinusoidal_embedding_dim = sinusoidal_embedding_dim
         self.embedding_dim = embedding_dim
         super().__init__(
-            fl.Lambda(self.compute_sinuosoidal_embedding),
+            fl.Lambda(self.compute_sinusoidal_embedding),
             fl.Converter(set_device=False, set_dtype=True),
-            fl.Linear(in_features=sinuosidal_embedding_dim, out_features=embedding_dim, device=device, dtype=dtype),
+            fl.Linear(in_features=sinusoidal_embedding_dim, out_features=embedding_dim, device=device, dtype=dtype),
             fl.SiLU(),
             fl.Linear(in_features=embedding_dim, out_features=embedding_dim, device=device, dtype=dtype),
         )
 
-    def compute_sinuosoidal_embedding(self, x: Int[Tensor, "*batch 1"]) -> Float[Tensor, "*batch 1 embedding_dim"]:
-        return compute_sinusoidal_embedding(x, embedding_dim=self.sinuosidal_embedding_dim)
+    def compute_sinusoidal_embedding(self, x: Int[Tensor, "*batch 1"]) -> Float[Tensor, "*batch 1 embedding_dim"]:
+        return compute_sinusoidal_embedding(x, embedding_dim=self.sinusoidal_embedding_dim)
 
 
 class RangeAdapter2d(fl.Sum, Adapter[fl.Conv2d]):

diff --git a/imaginairy/vendored/refiners/foundationals/latent_diffusion/schedulers/ddim.py b/imaginairy/vendored/refiners/foundationals/latent_diffusion/schedulers/ddim.py
@@ -11,6 +11,7 @@ def __init__(
         initial_diffusion_rate: float = 8.5e-4,
         final_diffusion_rate: float = 1.2e-2,
         noise_schedule: NoiseSchedule = NoiseSchedule.QUADRATIC,
+        first_inference_step: int = 0,
         device: Device | str = "cpu",
         dtype: Dtype = float32,
     ) -> None:
@@ -20,6 +21,7 @@ def __init__(
             initial_diffusion_rate=initial_diffusion_rate,
             final_diffusion_rate=final_diffusion_rate,
             noise_schedule=noise_schedule,
+            first_inference_step=first_inference_step,
             device=device,
             dtype=dtype,
         )
@@ -35,6 +37,8 @@ def _generate_timesteps(self) -> Tensor:
         return timesteps.flip(0)
 
     def __call__(self, x: Tensor, noise: Tensor, step: int, generator: Generator | None = None) -> Tensor:
+        assert self.first_inference_step <= step < self.num_inference_steps, "invalid step {step}"
+
         timestep, previous_timestep = (
             self.timesteps[step],
             (

diff --git a/imaginairy/vendored/refiners/foundationals/latent_diffusion/schedulers/ddpm.py b/imaginairy/vendored/refiners/foundationals/latent_diffusion/schedulers/ddpm.py
@@ -5,8 +5,9 @@
 
 class DDPM(Scheduler):
     """
-    The Denoising Diffusion Probabilistic Models (DDPM) is a specific type of diffusion model,
-    which uses a specific strategy to generate the timesteps and applies the diffusion process in a specific way.
+    Denoising Diffusion Probabilistic Model
+
+    Only used for training Latent Diffusion models. Cannot be called.
     """
 
     def __init__(
@@ -15,13 +16,15 @@ def __init__(
         num_train_timesteps: int = 1_000,
         initial_diffusion_rate: float = 8.5e-4,
         final_diffusion_rate: float = 1.2e-2,
+        first_inference_step: int = 0,
         device: Device | str = "cpu",
     ) -> None:
         super().__init__(
             num_inference_steps=num_inference_steps,
             num_train_timesteps=num_train_timesteps,
             initial_diffusion_rate=initial_diffusion_rate,
             final_diffusion_rate=final_diffusion_rate,
+            first_inference_step=first_inference_step,
             device=device,
         )
 

diff --git a/imaginairy/vendored/refiners/foundationals/latent_diffusion/schedulers/dpm_solver.py b/imaginairy/vendored/refiners/foundationals/latent_diffusion/schedulers/dpm_solver.py
@@ -24,6 +24,7 @@ def __init__(
         final_diffusion_rate: float = 1.2e-2,
         last_step_first_order: bool = False,
         noise_schedule: NoiseSchedule = NoiseSchedule.QUADRATIC,
+        first_inference_step: int = 0,
         device: Device | str = "cpu",
         dtype: Dtype = float32,
     ):
@@ -33,12 +34,12 @@ def __init__(
             initial_diffusion_rate=initial_diffusion_rate,
             final_diffusion_rate=final_diffusion_rate,
             noise_schedule=noise_schedule,
+            first_inference_step=first_inference_step,
             device=device,
             dtype=dtype,
         )
         self.estimated_data = deque([tensor([])] * 2, maxlen=2)
         self.last_step_first_order = last_step_first_order
-        self._first_step_has_been_run = False
 
     def _generate_timesteps(self) -> Tensor:
         # We need to use numpy here because:
@@ -81,6 +82,7 @@ def multistep_dpm_solver_second_order_update(self, x: Tensor, step: int) -> Tens
         previous_scale_factor = self.cumulative_scale_factors[previous_timestep]
         previous_noise_std = self.noise_std[previous_timestep]
         current_noise_std = self.noise_std[current_timestep]
+
         estimation_delta = (current_data_estimation - next_data_estimation) / (
             (current_ratio - next_ratio) / (previous_ratio - current_ratio)
         )
@@ -100,13 +102,14 @@ def __call__(self, x: Tensor, noise: Tensor, step: int, generator: Generator | N
         backward Euler update, which is a numerical method commonly used to solve ordinary differential equations
         (ODEs).
         """
+        assert self.first_inference_step <= step < self.num_inference_steps, "invalid step {step}"
+
         current_timestep = self.timesteps[step]
         scale_factor, noise_ratio = self.cumulative_scale_factors[current_timestep], self.noise_std[current_timestep]
         estimated_denoised_data = (x - noise_ratio * noise) / scale_factor
         self.estimated_data.append(estimated_denoised_data)
 
-        if step == 0 or (self.last_step_first_order and step == self.num_inference_steps - 1) or not self._first_step_has_been_run:
-            self._first_step_has_been_run = True
+        if step == self.first_inference_step or (self.last_step_first_order and step == self.num_inference_steps - 1):
             return self.dpm_solver_first_order_update(x=x, noise=estimated_denoised_data, step=step)
 
         return self.multistep_dpm_solver_second_order_update(x=x, step=step)
diff --git a/imaginairy/vendored/refiners/foundationals/latent_diffusion/schedulers/euler.py b/imaginairy/vendored/refiners/foundationals/latent_diffusion/schedulers/euler.py
@@ -13,6 +13,7 @@ def __init__(
         initial_diffusion_rate: float = 8.5e-4,
         final_diffusion_rate: float = 1.2e-2,
         noise_schedule: NoiseSchedule = NoiseSchedule.QUADRATIC,
+        first_inference_step: int = 0,
         device: Device | str = "cpu",
         dtype: Dtype = float32,
     ):
@@ -24,6 +25,7 @@ def __init__(
             initial_diffusion_rate=initial_diffusion_rate,
             final_diffusion_rate=final_diffusion_rate,
             noise_schedule=noise_schedule,
+            first_inference_step=first_inference_step,
             device=device,
             dtype=dtype,
         )
@@ -64,6 +66,8 @@ def __call__(
         s_tmax: float = float("inf"),
         s_noise: float = 1.0,
     ) -> Tensor:
+        assert self.first_inference_step <= step < self.num_inference_steps, "invalid step {step}"
+
         sigma = self.sigmas[step]
 
         gamma = min(s_churn / (len(self.sigmas) - 1), 2**0.5 - 1) if s_tmin <= sigma <= s_tmax else 0

diff --git a/imaginairy/vendored/refiners/foundationals/latent_diffusion/schedulers/scheduler.py b/imaginairy/vendored/refiners/foundationals/latent_diffusion/schedulers/scheduler.py
@@ -33,6 +33,7 @@ def __init__(
         initial_diffusion_rate: float = 8.5e-4,
         final_diffusion_rate: float = 1.2e-2,
         noise_schedule: NoiseSchedule = NoiseSchedule.QUADRATIC,
+        first_inference_step: int = 0,
         device: Device | str = "cpu",
         dtype: DType = float32,
     ):
@@ -43,6 +44,7 @@ def __init__(
         self.initial_diffusion_rate = initial_diffusion_rate
         self.final_diffusion_rate = final_diffusion_rate
         self.noise_schedule = noise_schedule
+        self.first_inference_step = first_inference_step
         self.scale_factors = self.sample_noise_schedule()
         self.cumulative_scale_factors = sqrt(self.scale_factors.cumprod(dim=0))
         self.noise_std = sqrt(1.0 - self.scale_factors.cumprod(dim=0))
@@ -68,9 +70,13 @@ def _generate_timesteps(self) -> Tensor:
         ...
 
     @property
-    def steps(self) -> list[int]:
+    def all_steps(self) -> list[int]:
         return list(range(self.num_inference_steps))
 
+    @property
+    def inference_steps(self) -> list[int]:
+        return self.all_steps[self.first_inference_step :]
+
     def scale_model_input(self, x: Tensor, step: int) -> Tensor:
         """
         For compatibility with schedulers that need to scale the input according to the current timestep.

diff --git a/imaginairy/vendored/refiners/foundationals/latent_diffusion/stable_diffusion_xl/unet.py b/imaginairy/vendored/refiners/foundationals/latent_diffusion/stable_diffusion_xl/unet.py
@@ -28,7 +28,7 @@ def __init__(self, device: Device | str | None = None, dtype: DType | None = Non
                 fl.Chain(
                     fl.UseContext(context="diffusion", key="time_ids"),
                     fl.Unsqueeze(dim=-1),
-                    fl.Lambda(func=self.compute_sinuosoidal_embedding),
+                    fl.Lambda(func=self.compute_sinusoidal_embedding),
                     fl.Reshape(-1),
                 ),
                 dim=1,
@@ -49,7 +49,7 @@ def __init__(self, device: Device | str | None = None, dtype: DType | None = Non
             ),
         )
 
-    def compute_sinuosoidal_embedding(self, x: Tensor) -> Tensor:
+    def compute_sinusoidal_embedding(self, x: Tensor) -> Tensor:
         return compute_sinusoidal_embedding(x=x, embedding_dim=self.time_ids_embedding_dim)
 
 
@@ -61,7 +61,7 @@ def __init__(self, device: Device | str | None = None, dtype: DType | None = Non
                 fl.Chain(
                     fl.UseContext(context="diffusion", key="timestep"),
                     RangeEncoder(
-                        sinuosidal_embedding_dim=320,
+                        sinusoidal_embedding_dim=320,
                         embedding_dim=self.timestep_embedding_dim,
                         device=device,
                         dtype=dtype,

diff --git a/imaginairy/vendored/refiners/readme.txt b/imaginairy/vendored/refiners/readme.txt
@@ -1 +1 @@
-vendored from git@github.com:finegrain-ai/refiners.git @ ce3035923ba71bcb5044708d2f1c37fd1d6722e9
+vendored from git@github.com:finegrain-ai/refiners.git @ 91aea9b7ff63ddf93f99e2ce6a4452bd658b1948
diff --git a/tests/expected_output/test_large_image_.png b/tests/expected_output/test_large_image_.png