From 81fedeca045bdc935ca94c65ed8c709aab086a1d Mon Sep 17 00:00:00 2001
From: Michael <dartvauder007@protonmail.com>
Date: Thu, 17 Oct 2024 21:54:29 +0300
Subject: [PATCH 1/2] FIXES2

---
 Install.bat                        |   1 +
 Install.sh                         |   1 +
 LaunchFile/app.py                  | 252 ++++++++++++++++-------------
 RequirementsFiles/requirements.txt |  25 ++-
 Update.bat                         |   1 +
 Update.sh                          |   1 +
 6 files changed, 171 insertions(+), 110 deletions(-)

diff --git a/Install.bat b/Install.bat
index 29357762..7e669b3a 100644
--- a/Install.bat
+++ b/Install.bat
@@ -36,6 +36,7 @@ pip install --no-build-isolation -e git+https://github.com/turboderp/exllamav2.g
 pip install git+https://github.com/tencent-ailab/IP-Adapter.git 2>> %ERROR_LOG%
 pip install git+https://github.com/vork/PyNanoInstantMeshes.git 2>> %ERROR_LOG%
 pip install git+https://github.com/openai/CLIP.git 2>> %ERROR_LOG%
+pip install git+https://github.com/xhinker/sd_embed.git@main 2>> %ERROR_LOG%
 timeout /t 3 /nobreak >nul
 cls
 
diff --git a/Install.sh b/Install.sh
index 638a12d5..53d2503a 100644
--- a/Install.sh
+++ b/Install.sh
@@ -36,6 +36,7 @@ pip install --no-build-isolation -e git+https://github.com/turboderp/exllamav2.g
 pip install git+https://github.com/tencent-ailab/IP-Adapter.git 2>> "$ERROR_LOG"
 pip install git+https://github.com/vork/PyNanoInstantMeshes.git 2>> "$ERROR_LOG"
 pip install git+https://github.com/openai/CLIP.git 2>> "$ERROR_LOG"
+pip install git+https://github.com/xhinker/sd_embed.git@main 2>> "$ERROR_LOG"
 sleep 3
 clear
 
diff --git a/LaunchFile/app.py b/LaunchFile/app.py
index 336090c8..11875377 100644
--- a/LaunchFile/app.py
+++ b/LaunchFile/app.py
@@ -41,6 +41,7 @@
 from datetime import datetime
 from diffusers.utils import load_image, load_video, export_to_video, export_to_gif, export_to_ply, pt_to_pil
 from compel import Compel, ReturnedEmbeddingsType
+from sd_embed.embedding_funcs import get_weighted_text_embeddings_sd3, get_weighted_text_embeddings_flux1
 import trimesh
 from git import Repo
 import numpy as np
@@ -2529,11 +2530,11 @@ def generate_image_img2img(prompt, negative_prompt, init_image, strength, stable
 
     if not stable_diffusion_model_name:
         gr.Info("Please, select a StableDiffusion model!")
-        return None, None, None
+        return None, None
 
     if not init_image:
         gr.Info("Please, upload an initial image!")
-        return None, None, None
+        return None, None
 
     if enable_quantize:
         try:
@@ -2563,7 +2564,7 @@ def generate_image_img2img(prompt, negative_prompt, init_image, strength, stable
 
             if result.returncode != 0:
                 gr.Info(f"Error in sd-img2img-quantize.py: {result.stderr}")
-                return None, None, None
+                return None, None
 
             image_paths = []
             output = result.stdout.strip()
@@ -2573,11 +2574,11 @@ def generate_image_img2img(prompt, negative_prompt, init_image, strength, stable
                     if os.path.exists(image_path):
                         image_paths.append(image_path)
 
-            return image_paths, None, f"Images generated successfully using quantized model. Seed used: {seed}"
+            return image_paths, f"Images generated successfully using quantized model. Seed used: {seed}"
 
         except Exception as e:
             gr.Error(f"An error occurred: {str(e)}")
-            return None, None, None
+            return None, None
 
     else:
         try:
@@ -2586,7 +2587,7 @@ def generate_image_img2img(prompt, negative_prompt, init_image, strength, stable
 
             if not os.path.exists(stable_diffusion_model_path):
                 gr.Info(f"StableDiffusion model not found: {stable_diffusion_model_path}")
-                return None, None, None
+                return None, None
 
             if stable_diffusion_model_type == "SD":
                 stable_diffusion_model = StableDiffusionImg2ImgPipeline().StableDiffusionImg2ImgPipeline.from_single_file(
@@ -2601,10 +2602,10 @@ def generate_image_img2img(prompt, negative_prompt, init_image, strength, stable
                     stable_diffusion_model_path, use_safetensors=True, device_map="auto", attention_slice=1,
                     torch_dtype=torch.float16, variant="fp16")
             else:
-                return None, None, "Invalid StableDiffusion model type!"
+                return None, "Invalid StableDiffusion model type!"
         except (ValueError, KeyError):
             gr.Error("The selected model is not compatible with the chosen model type")
-            return None, None, None
+            return None, None
 
         device = "cuda" if torch.cuda.is_available() else "cpu"
 
@@ -2766,35 +2767,12 @@ def process_prompt_with_ti(input_prompt, textual_inversion_model_names):
             processed_prompt = process_prompt_with_ti(prompt, textual_inversion_model_names)
             processed_negative_prompt = process_prompt_with_ti(negative_prompt, textual_inversion_model_names)
 
-            def latents_to_rgb(latents):
-                weights = (
-                    (60, -60, 25, -70),
-                    (60, -5, 15, -50),
-                    (60, 10, -5, -35)
-                )
-
-                weights_tensor = torch.t(torch.tensor(weights, dtype=latents.dtype).to(latents.device))
-                biases_tensor = torch.tensor((150, 140, 130), dtype=latents.dtype).to(latents.device)
-                rgb_tensor = torch.einsum("...lxy,lr -> ...rxy", latents, weights_tensor) + biases_tensor.unsqueeze(
-                    -1).unsqueeze(-1)
-                image_array = rgb_tensor.clamp(0, 255)[0].byte().cpu().numpy()
-                image_array = image_array.transpose(1, 2, 0)
-
-                return Image.fromarray(image_array)
-
-            def decode_tensors(stable_diffusion_model, i, t, callback_kwargs):
-                latents = callback_kwargs["latents"]
-                image = latents_to_rgb(latents)
-                image.save(f"temp/{i}.png")
-                return callback_kwargs
-
             def combined_callback(stable_diffusion_model, i, t, callback_kwargs):
                 nonlocal stop_idx
                 if stop_signal and stop_idx is None:
                     stop_idx = i
                 if i == stop_idx:
                     stable_diffusion_model._interrupt = True
-                callback_kwargs = decode_tensors(stable_diffusion_model, i, t, callback_kwargs)
 
                 progress((i + 1) / stable_diffusion_steps, f"Step {i + 1}/{stable_diffusion_steps}")
 
@@ -2836,7 +2814,6 @@ def combined_callback(stable_diffusion_model, i, t, callback_kwargs):
                                                 callback_on_step_end_tensor_inputs=["latents"]).images
 
             image_paths = []
-            gif_images = []
             for i, image in enumerate(images):
                 today = datetime.now().date()
                 image_dir = os.path.join('outputs', f"StableDiffusion_{today.strftime('%Y%m%d')}")
@@ -2865,26 +2842,11 @@ def combined_callback(stable_diffusion_model, i, t, callback_kwargs):
                 add_metadata_to_file(image_path, metadata)
                 image_paths.append(image_path)
 
-            for i in range(stable_diffusion_steps):
-                if os.path.exists(f"temp/{i}.png"):
-                    gif_images.append(imageio.imread(f"temp/{i}.png"))
-
-            if gif_images:
-                gif_filename = f"img2img_process_{datetime.now().strftime('%Y%m%d_%H%M%S')}.gif"
-                gif_path = os.path.join(image_dir, gif_filename)
-                imageio.mimsave(gif_path, gif_images, duration=0.1)
-            else:
-                gif_path = None
-
-            for i in range(stable_diffusion_steps):
-                if os.path.exists(f"temp/{i}.png"):
-                    os.remove(f"temp/{i}.png")
-
-            return image_paths, [gif_path] if gif_path else [], f"Images generated successfully. Seed used: {seed}"
+            return image_paths, f"Images generated successfully. Seed used: {seed}"
 
         except Exception as e:
             gr.Error(f"An error occurred: {str(e)}")
-            return None, None, None
+            return None, None
 
         finally:
             if 'stable_diffusion_model' in locals():
@@ -3287,7 +3249,7 @@ def generate_image_controlnet(prompt, negative_prompt, init_image, sd_version, s
             prompt_embeds = compel_proc(prompt)
             negative_prompt_embeds = compel_proc(negative_prompt)
 
-            def combined_callback_sd(i, t, callback_kwargs):
+            def combined_callback_sd(pipe, i, t, callback_kwargs):
                 nonlocal stop_idx
                 if stop_signal and stop_idx is None:
                     stop_idx = i
@@ -3429,7 +3391,7 @@ def get_depth_map(image):
             )
             prompt_embeds, pooled_prompt_embeds = compel(prompt)
 
-            def combined_callback_sdxl(i, t, callback_kwargs):
+            def combined_callback_sdxl(pipe, i, t, callback_kwargs):
                 nonlocal stop_idx
                 if stop_signal and stop_idx is None:
                     stop_idx = i
@@ -3858,7 +3820,7 @@ def generate_image_inpaint(prompt, negative_prompt, init_image, mask_image, blur
             seed = int(seed)
         generator = torch.Generator(device).manual_seed(seed)
 
-        def combined_callback(i, t, callback_kwargs):
+        def combined_callback(stable_diffusion_model, i, t, callback_kwargs):
             nonlocal stop_idx
             if stop_signal and stop_idx is None:
                 stop_idx = i
@@ -4837,7 +4799,7 @@ def generate_image_ldm3d(prompt, negative_prompt, seed, width, height, num_infer
         flush()
 
 
-def generate_image_sd3_txt2img(prompt, negative_prompt, quantize_sd3_model_name, enable_quantize, seed, stop_button, vae_model_name, lora_model_names, lora_scales, num_inference_steps, guidance_scale, width, height, max_sequence_length, clip_skip, num_images_per_prompt, enable_taesd, output_format, progress=gr.Progress()):
+def generate_image_sd3_txt2img(prompt, negative_prompt, model_type, quantize_sd3_model_name, enable_quantize, seed, stop_button, vae_model_name, lora_model_names, lora_scales, num_inference_steps, guidance_scale, width, height, max_sequence_length, clip_skip, num_images_per_prompt, enable_taesd, output_format, progress=gr.Progress()):
     global stop_signal
     stop_signal = False
     stop_idx = None
@@ -4899,9 +4861,31 @@ def generate_image_sd3_txt2img(prompt, negative_prompt, quantize_sd3_model_name,
                 return None, None
 
             stable_diffusion_model_path = os.path.join("inputs", "image", "sd_models", quantize_sd3_model_name)
-            pipe = StableDiffusion3Pipeline().StableDiffusion3Pipeline.from_single_file(stable_diffusion_model_path,
-                                                                                        device_map=device,
-                                                                                        torch_dtype=torch.float16)
+
+            sd3_model_path = os.path.join("inputs", "image", "sd_models", "sd3")
+            if not os.path.exists(sd3_model_path):
+                gr.Info("Downloading Stable Diffusion 3 model...")
+                os.makedirs(sd3_model_path, exist_ok=True)
+                Repo.clone_from("https://huggingface.co/stabilityai/stable-diffusion-3-medium-diffusers",
+                                sd3_model_path)
+                gr.Info("Stable Diffusion 3 model downloaded")
+
+            if model_type == "Diffusers":
+                quantization_config = BitsAndBytesConfig().BitsAndBytesConfig(load_in_8bit=True)
+
+                text_encoder = T5EncoderModel().T5EncoderModel.from_pretrained(
+                    sd3_model_path,
+                    subfolder="text_encoder_3",
+                    quantization_config=quantization_config,
+                )
+                pipe = StableDiffusion3Pipeline().StableDiffusion3Pipeline.from_pretrained(sd3_model_path,
+                                                                                           device_map="balanced",
+                                                                                           text_encoder_3=text_encoder,
+                                                                                           torch_dtype=torch.float16)
+            else:
+                pipe = StableDiffusion3Pipeline().StableDiffusion3Pipeline.from_single_file(stable_diffusion_model_path,
+                                                                                            device_map=device,
+                                                                                            torch_dtype=torch.float16)
 
             if vae_model_name is not None:
                 vae_model_path = os.path.join("inputs", "image", "sd_models", "vae", f"{vae_model_name}")
@@ -4961,9 +4945,13 @@ def combined_callback(pipe, i, t, callback_kwargs):
 
                 return callback_kwargs
 
+            prompt_embeds, prompt_neg_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds = get_weighted_text_embeddings_sd3(pipe=pipe, prompt=prompt, neg_prompt=negative_prompt)
+
             images = pipe(
-                prompt=prompt,
-                negative_prompt=negative_prompt,
+                prompt_embeds=prompt_embeds,
+                negative_prompt_embeds=prompt_neg_embeds,
+                pooled_prompt_embeds=pooled_prompt_embeds,
+                negative_pooled_prompt_embeds=negative_pooled_prompt_embeds,
                 num_inference_steps=num_inference_steps,
                 guidance_scale=guidance_scale,
                 width=width,
@@ -5011,6 +4999,10 @@ def combined_callback(pipe, i, t, callback_kwargs):
         finally:
             if 'pipe' in locals():
                 del pipe
+            if 'text_encoder' in locals():
+                del text_encoder
+            if 'get_weighted_text_embeddings_sd3' in locals():
+                del get_weighted_text_embeddings_sd3
             flush()
 
 
@@ -5107,7 +5099,7 @@ def generate_image_sd3_img2img(prompt, negative_prompt, init_image, strength, qu
                 seed = int(seed)
             generator = torch.Generator(device).manual_seed(seed)
 
-            def combined_callback(i, t, callback_kwargs):
+            def combined_callback(pipe, i, t, callback_kwargs):
                 nonlocal stop_idx
                 if stop_signal and stop_idx is None:
                     stop_idx = i
@@ -5217,7 +5209,7 @@ def generate_image_sd3_controlnet(prompt, negative_prompt, init_image, controlne
             seed = int(seed)
         generator = torch.Generator(device).manual_seed(seed)
 
-        def combined_callback(i, t, callback_kwargs):
+        def combined_callback(pipe, i, t, callback_kwargs):
             nonlocal stop_idx
             if stop_signal and stop_idx is None:
                 stop_idx = i
@@ -5317,7 +5309,7 @@ def generate_image_sd3_inpaint(prompt, negative_prompt, init_image, mask_image,
             seed = int(seed)
         generator = torch.Generator(device).manual_seed(seed)
 
-        def combined_callback(i, t, callback_kwargs):
+        def combined_callback(pipe, i, t, callback_kwargs):
             nonlocal stop_idx
             if stop_signal and stop_idx is None:
                 stop_idx = i
@@ -5397,7 +5389,7 @@ def generate_image_cascade(prompt, negative_prompt, seed, stop_button, width, he
         seed = int(seed)
     generator = torch.Generator(device).manual_seed(seed)
 
-    def combined_callback_prior(i, t, callback_kwargs):
+    def combined_callback_prior(prior, i, t, callback_kwargs):
         nonlocal stop_idx
         if stop_signal and stop_idx is None:
             stop_idx = i
@@ -5406,7 +5398,7 @@ def combined_callback_prior(i, t, callback_kwargs):
         progress((i + 1) / prior_steps, f"Step {i + 1}/{prior_steps}")
         return callback_kwargs
 
-    def combined_callback_decoder(i, t, callback_kwargs):
+    def combined_callback_decoder(decoder, i, t, callback_kwargs):
         nonlocal stop_idx
         if stop_signal and stop_idx is None:
             stop_idx = i
@@ -5723,7 +5715,7 @@ def generate_riffusion_text2image(prompt, negative_prompt, seed, stop_button, nu
         pipe = StableDiffusionPipeline().StableDiffusionPipeline.from_pretrained(riffusion_model_path, torch_dtype=torch.float16)
         pipe = pipe.to(device)
 
-        def combined_callback(i, t, callback_kwargs):
+        def combined_callback(pipe, i, t, callback_kwargs):
             nonlocal stop_idx
             if stop_signal and stop_idx is None:
                 stop_idx = i
@@ -5870,7 +5862,7 @@ def generate_image_kandinsky_txt2img(prompt, negative_prompt, version, seed, sto
             pipe = KandinskyPipeline().KandinskyPipeline.from_pretrained(os.path.join(kandinsky_model_path, "2-1"))
             pipe.to(device)
 
-            def combined_callback_2_1(i, t, callback_kwargs):
+            def combined_callback_2_1(pipe, i, t, callback_kwargs):
                 nonlocal stop_idx
                 if stop_signal and stop_idx is None:
                     stop_idx = i
@@ -5903,7 +5895,7 @@ def combined_callback_2_1(i, t, callback_kwargs):
             pipe = KandinskyV22Pipeline().KandinskyV22Pipeline.from_pretrained(os.path.join(kandinsky_model_path, "2-2-decoder"))
             pipe.to(device)
 
-            def combined_callback_2_2(i, t, callback_kwargs):
+            def combined_callback_2_2(pipe, i, t, callback_kwargs):
                 nonlocal stop_idx
                 if stop_signal and stop_idx is None:
                     stop_idx = i
@@ -5933,7 +5925,7 @@ def combined_callback_2_2(i, t, callback_kwargs):
             pipe.to(device)
             pipe.enable_model_cpu_offload()
 
-            def combined_callback_3_0(i, t, callback_kwargs):
+            def combined_callback_3_0(pipe, i, t, callback_kwargs):
                 nonlocal stop_idx
                 if stop_signal and stop_idx is None:
                     stop_idx = i
@@ -6042,7 +6034,7 @@ def generate_image_kandinsky_img2img(prompt, negative_prompt, init_image, versio
             init_image = Image.open(init_image).convert("RGB")
             init_image = init_image.resize((width, height))
 
-            def combined_callback_2_1(i, t, callback_kwargs):
+            def combined_callback_2_1(pipe, i, t, callback_kwargs):
                 nonlocal stop_idx
                 if stop_signal and stop_idx is None:
                     stop_idx = i
@@ -6076,7 +6068,7 @@ def combined_callback_2_1(i, t, callback_kwargs):
             init_image = Image.open(init_image).convert("RGB")
             init_image = init_image.resize((width, height))
 
-            def combined_callback_2_2(i, t, callback_kwargs):
+            def combined_callback_2_2(pipe, i, t, callback_kwargs):
                 nonlocal stop_idx
                 if stop_signal and stop_idx is None:
                     stop_idx = i
@@ -6107,7 +6099,7 @@ def combined_callback_2_2(i, t, callback_kwargs):
             init_image = Image.open(init_image).convert("RGB")
             init_image = init_image.resize((width, height))
 
-            def combined_callback_3_0(i, t, callback_kwargs):
+            def combined_callback_3_0(pipe, i, t, callback_kwargs):
                 nonlocal stop_idx
                 if stop_signal and stop_idx is None:
                     stop_idx = i
@@ -6199,7 +6191,7 @@ def generate_image_kandinsky_inpaint(prompt, negative_prompt, init_image, mask_i
         mask_image = Image.open(mask_image).convert("L")
         mask_image = mask_image.resize((width, height))
 
-        def combined_callback_2_1_and_2_2(i, t, callback_kwargs):
+        def combined_callback_2_1_and_2_2(pipe, i, t, callback_kwargs):
             nonlocal stop_idx
             if stop_signal and stop_idx is None:
                 stop_idx = i
@@ -6319,7 +6311,6 @@ def generate_image_flux_txt2img(prompt, model_name, quantize_model_name, enable_
                     pipe.vae = vae.to(device)
 
             pipe.enable_model_cpu_offload()
-            pipe.enable_sequential_cpu_offload()
             pipe.vae.enable_slicing()
             pipe.vae.enable_tiling()
             pipe.to(torch.float16)
@@ -6363,8 +6354,11 @@ def combined_callback(pipe, i, t, callback_kwargs):
 
                 return callback_kwargs
 
+            prompt_embeds, pooled_prompt_embeds = get_weighted_text_embeddings_flux1(pipe=pipe, prompt=prompt)
+
             output = pipe(
-                prompt=prompt,
+                prompt_embeds=prompt_embeds,
+                pooled_prompt_embeds=pooled_prompt_embeds,
                 guidance_scale=guidance_scale,
                 height=height,
                 width=width,
@@ -6409,10 +6403,12 @@ def combined_callback(pipe, i, t, callback_kwargs):
         else:
             if 'pipe' in locals():
                 del pipe
+            if 'get_weighted_text_embeddings_flux1' in locals():
+                del get_weighted_text_embeddings_flux1
         flush()
 
 
-def generate_image_flux_img2img(prompt, init_image, model_name, quantize_model_name, enable_quantize, seed, stop_button, num_inference_steps, strength, guidance_scale, width, height, max_sequence_length, output_format, progress=gr.Progress()):
+def generate_image_flux_img2img(prompt, init_image, model_name, quantize_model_name, enable_quantize, seed, stop_button, vae_model_name, lora_model_names, lora_scales, num_inference_steps, strength, guidance_scale, width, height, max_sequence_length, output_format, progress=gr.Progress()):
     global stop_signal
     stop_signal = False
     stop_idx = None
@@ -6432,11 +6428,11 @@ def generate_image_flux_img2img(prompt, init_image, model_name, quantize_model_n
     flux_model_path = os.path.join("inputs", "image", "flux", model_name)
     quantize_model_path = os.path.join("inputs", "image", "flux", "quantize-flux", quantize_model_name)
 
-    if enable_quantize:
-        if not quantize_model_name:
-            gr.Info("Please select a GGUF model!")
-            return None, None
+    if not quantize_model_name:
+        gr.Info("Please select a Flux safetensors/GGUF model!")
+        return None, None
 
+    if enable_quantize:
         params = {
             'prompt': prompt,
             'guidance_scale': guidance_scale,
@@ -6452,7 +6448,7 @@ def generate_image_flux_img2img(prompt, init_image, model_name, quantize_model_n
         env = os.environ.copy()
         env['PYTHONPATH'] = os.pathsep.join(sys.path)
 
-        result = subprocess.run([sys.executable, 'inputs/image/quantize-flux/flux/flux-img2img-quantize.py', json.dumps(params)],
+        result = subprocess.run([sys.executable, 'inputs/image/flux/quantize-flux/flux-img2img-quantize.py', json.dumps(params)],
                                 capture_output=True, text=True)
 
         if result.returncode != 0:
@@ -6481,14 +6477,46 @@ def generate_image_flux_img2img(prompt, init_image, model_name, quantize_model_n
             gr.Info(f"Flux {model_name} model downloaded")
 
         try:
-            pipe = FluxImg2ImgPipeline().FluxImg2ImgPipeline.from_pretrained(flux_model_path,
-                                                                             torch_dtype=torch.bfloat16)
-            pipe = pipe.to(device)
+            pipe = FluxImg2ImgPipeline().FluxImg2ImgPipeline.from_pretrained(flux_model_path, torch_dtype=torch.bfloat16)
+
+            if vae_model_name is not None:
+                vae_model_path = os.path.join("inputs", "image", "flux", "flux-vae", f"{vae_model_name}")
+                if os.path.exists(vae_model_path):
+                    vae = AutoencoderKL().AutoencoderKL.from_single_file(vae_model_path, torch_dtype=torch.bfloat16)
+                    pipe.vae = vae.to(device)
 
             init_image = Image.open(init_image).convert("RGB")
             init_image = init_image.resize((width, height))
 
-            def combined_callback(i, t, callback_kwargs):
+            if isinstance(lora_scales, str):
+                lora_scales = [float(scale.strip()) for scale in lora_scales.split(',') if scale.strip()]
+            elif isinstance(lora_scales, (int, float)):
+                lora_scales = [float(lora_scales)]
+
+            lora_loaded = False
+            if lora_model_names and lora_scales:
+                if len(lora_model_names) != len(lora_scales):
+                    gr.Warning(
+                        f"Number of LoRA models ({len(lora_model_names)}) does not match number of scales ({len(lora_scales)}). Using available scales.", duration=5)
+
+                for i, lora_model_name in enumerate(lora_model_names):
+                    if i < len(lora_scales):
+                        lora_scale = lora_scales[i]
+                    else:
+                        lora_scale = 1.0
+
+                    lora_model_path = os.path.join("inputs", "image", "flux", "flux-lora", lora_model_name)
+                    if os.path.exists(lora_model_path):
+                        adapter_name = os.path.splitext(os.path.basename(lora_model_name))[0]
+                        try:
+                            pipe.load_lora_weights(lora_model_path, adapter_name=adapter_name)
+                            pipe.fuse_lora(lora_scale=lora_scale)
+                            lora_loaded = True
+                            gr.Info(f"Loaded LoRA {lora_model_name} with scale {lora_scale}")
+                        except Exception as e:
+                            gr.Warning(f"Error loading LoRA {lora_model_name}: {str(e)}", duration=5)
+
+            def combined_callback(pipe, i, t, callback_kwargs):
                 nonlocal stop_idx
                 if stop_signal and stop_idx is None:
                     stop_idx = i
@@ -6573,7 +6601,7 @@ def generate_image_flux_inpaint(prompt, init_image, mask_image, model_name, seed
         init_image = Image.open(init_image).convert("RGB")
         mask_image = Image.open(mask_image).convert("L")
 
-        def combined_callback(i, t, callback_kwargs):
+        def combined_callback(pipe, i, t, callback_kwargs):
             nonlocal stop_idx
             if stop_signal and stop_idx is None:
                 stop_idx = i
@@ -6667,7 +6695,7 @@ def generate_image_flux_controlnet(prompt, init_image, base_model_name, seed, st
 
         init_image = Image.open(init_image).convert("RGB")
 
-        def combined_callback(i, t, callback_kwargs):
+        def combined_callback(pipe, i, t, callback_kwargs):
             nonlocal stop_idx
             if stop_signal and stop_idx is None:
                 stop_idx = i
@@ -6751,7 +6779,7 @@ def generate_image_hunyuandit_txt2img(prompt, negative_prompt, seed, stop_button
         pipe.to(device)
         pipe.transformer.enable_forward_chunking(chunk_size=1, dim=1)
 
-        def combined_callback(i, t, callback_kwargs):
+        def combined_callback(pipe, i, t, callback_kwargs):
             nonlocal stop_idx
             if stop_signal and stop_idx is None:
                 stop_idx = i
@@ -6842,7 +6870,7 @@ def generate_image_hunyuandit_controlnet(prompt, negative_prompt, init_image, co
         init_image = Image.open(init_image).convert("RGB")
         init_image = init_image.resize((width, height))
 
-        def combined_callback(i, t, callback_kwargs):
+        def combined_callback(pipe, i, t, callback_kwargs):
             nonlocal stop_idx
             if stop_signal and stop_idx is None:
                 stop_idx = i
@@ -7328,7 +7356,7 @@ def generate_image_wurstchen(prompt, negative_prompt, seed, stop_button, width,
         prior_pipeline.prior = torch.compile(prior_pipeline.prior, mode="reduce-overhead", fullgraph=True)
         decoder_pipeline.decoder = torch.compile(decoder_pipeline.decoder, mode="reduce-overhead", fullgraph=True)
 
-        def combined_callback_prior(i, t, callback_kwargs):
+        def combined_callback_prior(prior_pipeline, i, t, callback_kwargs):
             nonlocal stop_idx
             if stop_signal and stop_idx is None:
                 stop_idx = i
@@ -7337,7 +7365,7 @@ def combined_callback_prior(i, t, callback_kwargs):
             progress((i + 1) / prior_steps, f"Step {i + 1}/{prior_steps}")
             return callback_kwargs
 
-        def combined_callback_decoder(i, t, callback_kwargs):
+        def combined_callback_decoder(decoder_pipeline, i, t, callback_kwargs):
             nonlocal stop_idx
             if stop_signal and stop_idx is None:
                 stop_idx = i
@@ -7451,7 +7479,7 @@ def generate_image_deepfloyd_txt2img(prompt, negative_prompt, seed, stop_button,
         pipe_i.text_encoder = torch.compile(pipe_i.text_encoder, mode="reduce-overhead", fullgraph=True)
         pipe_i.unet = torch.compile(pipe_i.unet, mode="reduce-overhead", fullgraph=True)
 
-        def combined_callback_pipe_i(i, t, callback_kwargs):
+        def combined_callback_pipe_i(pipe_i, i, t, callback_kwargs):
             nonlocal stop_idx
             if stop_signal and stop_idx is None:
                 stop_idx = i
@@ -7481,7 +7509,7 @@ def combined_callback_pipe_i(i, t, callback_kwargs):
         pipe_ii.enable_model_cpu_offload()
         pipe_ii.enable_sequential_cpu_offload()
 
-        def combined_callback_pipe_ii(i, t, callback_kwargs):
+        def combined_callback_pipe_ii(pipe_ii, i, t, callback_kwargs):
             nonlocal stop_idx
             if stop_signal and stop_idx is None:
                 stop_idx = i
@@ -7644,7 +7672,7 @@ def generate_image_deepfloyd_img2img(prompt, negative_prompt, init_image, seed,
         original_image = Image.open(init_image).convert("RGB")
         original_image = original_image.resize((width, height))
 
-        def combined_callback_stage_1(i, t, callback_kwargs):
+        def combined_callback_stage_1(stage_1, i, t, callback_kwargs):
             nonlocal stop_idx
             if stop_signal and stop_idx is None:
                 stop_idx = i
@@ -7653,7 +7681,7 @@ def combined_callback_stage_1(i, t, callback_kwargs):
             progress((i + 1) / num_inference_steps, f"Step {i + 1}/{num_inference_steps}")
             return callback_kwargs
 
-        def combined_callback_stage_2(i, t, callback_kwargs):
+        def combined_callback_stage_2(stage_2, i, t, callback_kwargs):
             nonlocal stop_idx
             if stop_signal and stop_idx is None:
                 stop_idx = i
@@ -7807,7 +7835,7 @@ def generate_image_deepfloyd_inpaint(prompt, negative_prompt, init_image, mask_i
         original_image = Image.open(init_image).convert("RGB")
         mask_image = Image.open(mask_image).convert("RGB")
 
-        def combined_callback_stage_1(i, t, callback_kwargs):
+        def combined_callback_stage_1(stage_1, i, t, callback_kwargs):
             nonlocal stop_idx
             if stop_signal and stop_idx is None:
                 stop_idx = i
@@ -7816,7 +7844,7 @@ def combined_callback_stage_1(i, t, callback_kwargs):
             progress((i + 1) / num_inference_steps, f"Step {i + 1}/{num_inference_steps}")
             return callback_kwargs
 
-        def combined_callback_stage_2(i, t, callback_kwargs):
+        def combined_callback_stage_2(stage_2, i, t, callback_kwargs):
             nonlocal stop_idx
             if stop_signal and stop_idx is None:
                 stop_idx = i
@@ -7955,7 +7983,7 @@ def generate_image_pixart(prompt, negative_prompt, version, seed, stop_button, n
 
         pipe.enable_model_cpu_offload()
 
-        def combined_callback(i, t, callback_kwargs):
+        def combined_callback(pipe, i, t, callback_kwargs):
             nonlocal stop_idx
             if stop_signal and stop_idx is None:
                 stop_idx = i
@@ -8212,7 +8240,7 @@ def generate_video_modelscope(prompt, negative_prompt, seed, stop_button, num_in
         pipe.enable_model_cpu_offload()
         pipe.enable_vae_slicing()
 
-        def combined_callback(i, t, callback_kwargs):
+        def combined_callback(pipe, i, t, callback_kwargs):
             nonlocal stop_idx
             if stop_signal and stop_idx is None:
                 stop_idx = i
@@ -8322,7 +8350,7 @@ def generate_video_zeroscope2(prompt, video_to_enhance, seed, stop_button, stren
                 frames.append(frame)
             cap.release()
 
-            def combined_callback_enhance(i, t, callback_kwargs):
+            def combined_callback_enhance(enhance_pipe, i, t, callback_kwargs):
                 nonlocal stop_idx
                 if stop_signal and stop_idx is None:
                     stop_idx = i
@@ -8388,7 +8416,7 @@ def combined_callback_enhance(i, t, callback_kwargs):
             base_pipe.enable_vae_slicing()
             base_pipe.unet.enable_forward_chunking(chunk_size=1, dim=1)
 
-            def combined_callback_base(i, t, callback_kwargs):
+            def combined_callback_base(base_pipe, i, t, callback_kwargs):
                 nonlocal stop_idx
                 if stop_signal and stop_idx is None:
                     stop_idx = i
@@ -8457,7 +8485,7 @@ def generate_video_cogvideox_text2video(prompt, negative_prompt, cogvideox_versi
         pipe.vae.enable_slicing()
         pipe.vae.enable_tiling()
 
-        def combined_callback(i, t, callback_kwargs):
+        def combined_callback(pipe, i, t, callback_kwargs):
             nonlocal stop_idx
             if stop_signal and stop_idx is None:
                 stop_idx = i
@@ -8543,7 +8571,7 @@ def generate_video_cogvideox_image2video(prompt, negative_prompt, init_image, se
 
         image = load_image(init_image)
 
-        def combined_callback(i, t, callback_kwargs):
+        def combined_callback(pipe, i, t, callback_kwargs):
             nonlocal stop_idx
             if stop_signal and stop_idx is None:
                 stop_idx = i
@@ -8607,7 +8635,7 @@ def generate_video_cogvideox_video2video(prompt, negative_prompt, init_video, co
 
         input_video = load_video(init_video)
 
-        def combined_callback(i, t, callback_kwargs):
+        def combined_callback(pipe, i, t, callback_kwargs):
             nonlocal stop_idx
             if stop_signal and stop_idx is None:
                 stop_idx = i
@@ -8672,7 +8700,7 @@ def generate_video_latte(prompt, negative_prompt, seed, stop_button, num_inferen
         pipe = LattePipeline().LattePipeline.from_pretrained(latte_model_path, torch_dtype=torch.float16).to(device)
         pipe.enable_model_cpu_offload()
 
-        def combined_callback(i, t, callback_kwargs):
+        def combined_callback(pipe, i, t, callback_kwargs):
             nonlocal stop_idx
             if stop_signal and stop_idx is None:
                 stop_idx = i
@@ -9004,7 +9032,7 @@ def generate_stableaudio(prompt, negative_prompt, seed, stop_button, num_inferen
     pipe = StableAudioPipeline().StableAudioPipeline.from_pretrained(sa_model_path, torch_dtype=torch.float16)
     pipe = pipe.to(device)
 
-    def combined_callback(i, t, callback_kwargs):
+    def combined_callback(pipe, i, t, callback_kwargs):
         nonlocal stop_idx
         if stop_signal and stop_idx is None:
             stop_idx = i
@@ -9240,7 +9268,7 @@ def generate_audio_audioldm2(prompt, negative_prompt, model_name, seed, stop_but
     pipe = AudioLDM2Pipeline().AudioLDM2Pipeline.from_pretrained(model_path, torch_dtype=torch.float16)
     pipe = pipe.to(device)
 
-    def combined_callback(i, t, callback_kwargs):
+    def combined_callback(pipe, i, t, callback_kwargs):
         nonlocal stop_idx
         if stop_signal and stop_idx is None:
             stop_idx = i
@@ -10597,7 +10625,6 @@ def create_footer():
     additional_inputs_accordion=gr.Accordion(label=_("StableDiffusion Settings", lang), open=False),
     outputs=[
         gr.Gallery(label=_("Generated images", lang), elem_id="gallery", columns=[2], rows=[2], object_fit="contain", height="auto"),
-        gr.Gallery(label=_("Generation process", lang), elem_id="process_gallery", columns=[2], rows=[2], object_fit="contain", height="auto"),
         gr.Textbox(label=_("Message", lang), type="text")
     ],
     title=_("NeuroSandboxWebUI - StableDiffusion (img2img)", lang),
@@ -11114,6 +11141,7 @@ def create_footer():
     inputs=[
         gr.Textbox(label=_("Enter your prompt", lang)),
         gr.Textbox(label=_("Enter your negative prompt", lang), value=""),
+        gr.Radio(choices=["Diffusers", "Safetensors"], label=_("Select model type", lang), value="Diffusers"),
         gr.Dropdown(choices=stable_diffusion_models_list, label=_("Select StableDiffusion model", lang), value=None),
         gr.Checkbox(label=_("Enable Quantize", lang), value=False),
         gr.Textbox(label=_("Seed (optional)", lang), value=""),
@@ -11529,7 +11557,7 @@ def create_footer():
     fn=generate_image_flux_txt2img,
     inputs=[
         gr.Textbox(label=_("Enter your prompt", lang)),
-        gr.Dropdown(choices=["FLUX.1-schnell", "FLUX.1-dev"], label=_("Select Flux model", lang), value="FLUX.1-schnell"),
+        gr.Radio(choices=["FLUX.1-schnell", "FLUX.1-dev"], label=_("Select model type", lang), value="FLUX.1-schnell"),
         gr.Dropdown(choices=quantized_flux_models_list, label=_("Select safetensors Flux model (GGUF if enabled quantize)", lang), value=None),
         gr.Checkbox(label=_("Enable Quantize", lang), value=False),
         gr.Textbox(label=_("Seed (optional)", lang), value=""),
@@ -11576,6 +11604,10 @@ def create_footer():
         gr.Button(value=_("Stop generation", lang), interactive=True, variant="stop")
     ],
     additional_inputs=[
+        gr.Dropdown(choices=vae_models_list, label=_("Select VAE model (optional)", lang), value=None),
+        gr.Dropdown(choices=flux_lora_models_list, label=_("Select LORA models (optional)", lang), value=None,
+                    multiselect=True),
+        gr.Textbox(label=_("LoRA Scales", lang)),
         gr.Slider(minimum=1, maximum=100, value=4, step=1, label=_("Steps", lang)),
         gr.Slider(minimum=0.0, maximum=1.0, value=0.95, step=0.01, label=_("Strength", lang)),
         gr.Slider(minimum=0.0, maximum=10.0, value=0.0, step=0.1, label=_("Guidance Scale", lang)),
@@ -12984,7 +13016,7 @@ def create_footer():
     inpaint_interface.input_components[10].click(stop_generation, [], [], queue=False)
     cascade_interface.input_components[3].click(stop_generation, [], [], queue=False)
     riffusion_text2image_interface.input_components[3].click(stop_generation, [], [], queue=False)
-    sd3_txt2img_interface.input_components[5].click(stop_generation, [], [], queue=False)
+    sd3_txt2img_interface.input_components[6].click(stop_generation, [], [], queue=False)
     sd3_img2img_interface.input_components[7].click(stop_generation, [], [], queue=False)
     sd3_controlnet_interface.input_components[5].click(stop_generation, [], [], queue=False)
     sd3_inpaint_interface.input_components[5].click(stop_generation, [], [], queue=False)
@@ -13039,12 +13071,14 @@ def create_footer():
         sd3_txt2img_interface.input_components[7],
         sd3_txt2img_interface.input_components[8],
         sd3_img2img_interface.input_components[5],
-        flux_img2img_interface.input_components[3],
         t2i_ip_adapter_interface.input_components[4],
         ip_adapter_faceid_interface.input_components[5],
         flux_txt2img_interface.input_components[2],
         flux_txt2img_interface.input_components[6],
         flux_txt2img_interface.input_components[7],
+        flux_img2img_interface.input_components[3],
+        flux_img2img_interface.input_components[7],
+        flux_img2img_interface.input_components[8],
         auraflow_interface.input_components[3],
         kolors_txt2img_interface.input_components[3],
         rvc_interface.input_components[1],
diff --git a/RequirementsFiles/requirements.txt b/RequirementsFiles/requirements.txt
index 6edfc44d..fdf9d500 100644
--- a/RequirementsFiles/requirements.txt
+++ b/RequirementsFiles/requirements.txt
@@ -389,4 +389,27 @@ gekko==1.2.1
 rouge==1.0.1
 cramjam==2.8.4
 fastparquet==2024.5.0
-optimum==1.22.0
\ No newline at end of file
+optimum==1.22.0
+asttokens==2.4.1
+comm==0.2.2
+debugpy==1.8.7
+executing==2.1.0
+ipykernel==6.29.5
+ipython==8.28.0
+ipywidgets==8.1.5
+jedi==0.19.1
+jupyter-client==8.6.3
+jupyter-core==5.7.2
+jupyterlab-widgets==3.0.13
+lark==1.2.2
+matplotlib-inline==0.1.7
+nest-asyncio==1.6.0
+optimum-quanto==0.2.5
+parso==0.8.4
+prompt-toolkit==3.0.48
+pure-eval==0.2.3
+pyzmq==26.2.0
+stack-data==0.6.3
+tornado==6.4.1
+traitlets==5.14.3
+widgetsnbextension==4.0.13
\ No newline at end of file
diff --git a/Update.bat b/Update.bat
index 292871e2..7e133d6d 100644
--- a/Update.bat
+++ b/Update.bat
@@ -34,6 +34,7 @@ pip install --no-build-isolation -e git+https://github.com/turboderp/exllamav2.g
 pip install git+https://github.com/tencent-ailab/IP-Adapter.git 2>> %ERROR_LOG%
 pip install git+https://github.com/vork/PyNanoInstantMeshes.git 2>> %ERROR_LOG%
 pip install git+https://github.com/openai/CLIP.git 2>> %ERROR_LOG%
+pip install git+https://github.com/xhinker/sd_embed.git@main 2>> %ERROR_LOG%
 timeout /t 3 /nobreak >nul
 cls
 
diff --git a/Update.sh b/Update.sh
index 0fa00bd9..def4e0d0 100644
--- a/Update.sh
+++ b/Update.sh
@@ -33,6 +33,7 @@ pip install --no-build-isolation -e git+https://github.com/turboderp/exllamav2.g
 pip install git+https://github.com/tencent-ailab/IP-Adapter.git 2>> "$ERROR_LOG"
 pip install git+https://github.com/vork/PyNanoInstantMeshes.git 2>> "$ERROR_LOG"
 pip install git+https://github.com/openai/CLIP.git 2>> "$ERROR_LOG"
+pip install git+https://github.com/xhinker/sd_embed.git@main 2>> "$ERROR_LOG"
 sleep 3
 clear
 

From 5022dedbbac9c04980700edbade1ee4eaafb4a10 Mon Sep 17 00:00:00 2001
From: Michael <dartvauder007@protonmail.com>
Date: Thu, 17 Oct 2024 21:59:12 +0300
Subject: [PATCH 2/2] Update app.py

---
 LaunchFile/app.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/LaunchFile/app.py b/LaunchFile/app.py
index 11875377..1aa32165 100644
--- a/LaunchFile/app.py
+++ b/LaunchFile/app.py
@@ -1351,7 +1351,7 @@ def image_to_base64_data_uri(image_path):
 
     else:
         if llm_model_type == "Llama":
-            tokenizer, llm_model = load_model(llm_model_name, llm_model_type, n_ctx, n_batch, n_ubatch, freq_base, freq_scale)
+            tokenizer, llm_model, * _ = load_model(llm_model_name, llm_model_type, n_ctx, n_batch, n_ubatch, freq_base, freq_scale)
         elif llm_model_type == "ExLlamaV2":
             cache, tokenizer, llm_model, * _ = load_model(llm_model_name, llm_model_type, n_ctx=None, n_batch=None, n_ubatch=None, freq_base=None, freq_scale=None)
         else: