Skip to content

Commit

Permalink
Merge pull request #204 from Dartvauder/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
Dartvauder authored Sep 17, 2024
2 parents 33c302e + ab08868 commit a32162d
Show file tree
Hide file tree
Showing 3 changed files with 99 additions and 10 deletions.
82 changes: 80 additions & 2 deletions LaunchFiles/appEN.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@
cache_dir = os.path.join("cache")
os.makedirs(cache_dir, exist_ok=True)
os.environ["XDG_CACHE_HOME"] = cache_dir
temp_dir = os.path.join("temp")
os.makedirs(temp_dir, exist_ok=True)
os.environ["TMPDIR"] = temp_dir
import gradio as gr
import langdetect
from datasets import load_dataset, Audio
Expand Down Expand Up @@ -102,6 +105,7 @@ def wrapper():

# Diffusers import
diffusers = lazy_import('diffusers', '')
BlipDiffusionPipeline = lazy_import('diffusers.pipelines', 'BlipDiffusionPipeline')
StableDiffusionPipeline = lazy_import('diffusers', 'StableDiffusionPipeline')
StableDiffusion3Pipeline = lazy_import('diffusers', 'StableDiffusion3Pipeline')
StableDiffusionXLPipeline = lazy_import('diffusers', 'StableDiffusionXLPipeline')
Expand Down Expand Up @@ -3380,6 +3384,54 @@ def generate_image_diffedit(source_prompt, source_negative_prompt, target_prompt
flush()


def generate_image_blip_diffusion(text_prompt_input, negative_prompt, cond_image, cond_subject, tgt_subject,
num_inference_steps, guidance_scale, height, width, output_format):
blip_diffusion_path = os.path.join("inputs", "image", "sd_models", "blip-diff")

if not os.path.exists(blip_diffusion_path):
print("Downloading BlipDiffusion model...")
os.makedirs(blip_diffusion_path, exist_ok=True)
Repo.clone_from("https://huggingface.co/Salesforce/blipdiffusion", blip_diffusion_path)
print("BlipDiffusion model downloaded")

try:
device = "cuda" if torch.cuda.is_available() else "cpu"
blip_diffusion_pipe = BlipDiffusionPipeline().BlipDiffusionPipeline.from_pretrained(
blip_diffusion_path, torch_dtype=torch.float16
).to(device)

cond_image = Image.open(cond_image).convert("RGB")

output = blip_diffusion_pipe(
text_prompt_input,
cond_image,
cond_subject,
tgt_subject,
guidance_scale=guidance_scale,
num_inference_steps=num_inference_steps,
negative_prompt=negative_prompt,
height=height,
width=width,
).images

today = datetime.now().date()
image_dir = os.path.join('outputs', f"BlipDiffusion_{today.strftime('%Y%m%d')}")
os.makedirs(image_dir, exist_ok=True)
image_filename = f"blip_diffusion_{datetime.now().strftime('%Y%m%d_%H%M%S')}.{output_format}"
image_path = os.path.join(image_dir, image_filename)

output[0].save(image_path, format=output_format.upper())

return image_path, "Image generated successfully."

except Exception as e:
return None, str(e)

finally:
del blip_diffusion_pipe
flush()


def generate_image_animatediff(prompt, negative_prompt, input_video, strength, model_type, stable_diffusion_model_name, seed, motion_lora_name, num_frames, num_inference_steps,
guidance_scale, width, height, clip_skip):

Expand Down Expand Up @@ -8550,6 +8602,32 @@ def reload_interface():
submit_btn="Generate"
)

blip_diffusion_interface = gr.Interface(
fn=generate_image_blip_diffusion,
inputs=[
gr.Textbox(label="Prompt"),
gr.Textbox(label="Negative Prompt", value=""),
gr.Image(label="Conditioning Image", type="filepath"),
gr.Textbox(label="Conditioning Subject"),
gr.Textbox(label="Target Subject"),
gr.Slider(minimum=1, maximum=100, value=30, step=1, label="Inference Steps"),
gr.Slider(minimum=0.1, maximum=30.0, value=8, step=0.1, label="Guidance Scale"),
gr.Slider(minimum=64, maximum=2048, value=512, step=64, label="Height"),
gr.Slider(minimum=64, maximum=2048, value=512, step=64, label="Width"),
gr.Radio(choices=["png", "jpeg"], label="Output Format", value="png")
],
outputs=[
gr.Image(type="filepath", label="Generated Image"),
gr.Textbox(label="Message")
],
title="NeuroSandboxWebUI - BlipDiffusion",
description="This interface allows you to generate images using BlipDiffusion. Upload a conditioning image, provide text prompts and subjects, and customize generation parameters.",
allow_flagging="never",
clear_btn=None,
stop_btn="Stop",
submit_btn="Generate"
)

animatediff_interface = gr.Interface(
fn=generate_image_animatediff,
inputs=[
Expand Down Expand Up @@ -10301,11 +10379,11 @@ def reload_interface():
gr.TabbedInterface(
[
gr.TabbedInterface(
[txt2img_interface, img2img_interface, depth2img_interface, marigold_interface, pix2pix_interface, controlnet_interface, latent_upscale_interface, supir_upscale_interface, sdxl_refiner_interface, inpaint_interface, outpaint_interface, gligen_interface, diffedit_interface, animatediff_interface, hotshotxl_interface, video_interface, ldm3d_interface,
[txt2img_interface, img2img_interface, depth2img_interface, marigold_interface, pix2pix_interface, controlnet_interface, latent_upscale_interface, supir_upscale_interface, sdxl_refiner_interface, inpaint_interface, outpaint_interface, gligen_interface, diffedit_interface, blip_diffusion_interface, animatediff_interface, hotshotxl_interface, video_interface, ldm3d_interface,
gr.TabbedInterface([sd3_txt2img_interface, sd3_img2img_interface, sd3_controlnet_interface, sd3_inpaint_interface],
tab_names=["txt2img", "img2img", "controlnet", "inpaint"]),
cascade_interface, t2i_ip_adapter_interface, ip_adapter_faceid_interface, riffusion_interface],
tab_names=["txt2img", "img2img", "depth2img", "marigold", "pix2pix", "controlnet", "upscale(latent)", "upscale(SUPIR)", "refiner", "inpaint", "outpaint", "gligen", "diffedit", "animatediff", "hotshotxl", "video", "ldm3d", "sd3", "cascade", "t2i-ip-adapter", "ip-adapter-faceid", "riffusion"]
tab_names=["txt2img", "img2img", "depth2img", "marigold", "pix2pix", "controlnet", "upscale(latent)", "upscale(SUPIR)", "refiner", "inpaint", "outpaint", "gligen", "diffedit", "blip-diffusion", "animatediff", "hotshotxl", "video", "ldm3d", "sd3", "cascade", "t2i-ip-adapter", "ip-adapter-faceid", "riffusion"]
),
kandinsky_interface, flux_interface, hunyuandit_interface, lumina_interface, kolors_interface, auraflow_interface, wurstchen_interface, deepfloyd_if_interface, pixart_interface, playgroundv2_interface
],
Expand Down
7 changes: 4 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ The goal of the project - to create the easiest possible application to use neur

### Text: <img width="1118" alt="1" src="https://github.com/user-attachments/assets/d0947d54-eb8b-4f20-986b-579f9652ff95">

### Image: <img width="1118" alt="2" src="https://github.com/user-attachments/assets/39506653-23e1-432b-b250-362146a693a5">
### Image: <img width="1112" alt="2" src="https://github.com/user-attachments/assets/02085575-1ae3-4e71-93eb-499c3103623a">

### Video: <img width="1115" alt="3" src="https://github.com/user-attachments/assets/032b248e-1ea8-4661-8a96-267e4a9ef01c">

Expand All @@ -31,7 +31,7 @@ The goal of the project - to create the easiest possible application to use neur
* Flexible and optimized interface (By Gradio)
* Debug logging to logs from `Install` and `Update` files
* Support for Transformers and llama.cpp models (LLM)
* Support for diffusers and safetensors models (StableDiffusion) - txt2img, img2img, depth2img, marigold, pix2pix, controlnet, upscale (latent), upscale (SUPIR), refiner, inpaint, outpaint, gligen, diffedit, animatediff, hotshot-xl, video, ldm3d, sd3, cascade, t2i-ip-adapter, ip-adapter-faceid and riffusion tabs
* Support for diffusers and safetensors models (StableDiffusion) - txt2img, img2img, depth2img, marigold, pix2pix, controlnet, upscale (latent), upscale (SUPIR), refiner, inpaint, outpaint, gligen, diffedit, blip-diffusion, animatediff, hotshot-xl, video, ldm3d, sd3, cascade, t2i-ip-adapter, ip-adapter-faceid and riffusion tabs
* Support for stable-diffusion-cpp models for FLUX
* Support of additional models for image generation: Kandinsky (txt2img, img2img, inpaint), Flux (with LoRA support), HunyuanDiT (txt2img, controlnet), Lumina-T2X, Kolors (txt2img with LoRA support, img2img, ip-adapter-plus), AuraFlow (with LoRA and AuraSR support), Würstchen, DeepFloydIF (txt2img, img2img, inpaint), PixArt and PlaygroundV2.5
* Support Extras with Rembg, CodeFormer, PixelOE, DDColor, DownScale, Format changer, FaceSwap (Roop) and Upscale (Real-ESRGAN) models for image, video and audio
Expand All @@ -40,7 +40,7 @@ The goal of the project - to create the easiest possible application to use neur
* Support AudioLDM 2 (Models: audio and music)
* Supports TTS and Whisper models (For LLM and TTS-STT)
* Support MMS for text-to-speech and speech-to-text
* Supports Lora, Textual inversion (embedding), Vae, MagicPrompt, Img2img, Depth, Marigold, Pix2Pix, Controlnet, Upscalers (latent and SUPIR), Refiner, Inpaint, Outpaint, GLIGEN, DiffEdit, AnimateDiff, HotShot-XL, Videos, LDM3D, SD3, Cascade, T2I-IP-ADAPTER, IP-Adapter-FaceID and Riffusion models (For StableDiffusion)
* Supports Lora, Textual inversion (embedding), Vae, MagicPrompt, Img2img, Depth, Marigold, Pix2Pix, Controlnet, Upscalers (latent and SUPIR), Refiner, Inpaint, Outpaint, GLIGEN, DiffEdit, BLIP-Diffusion, AnimateDiff, HotShot-XL, Videos, LDM3D, SD3, Cascade, T2I-IP-ADAPTER, IP-Adapter-FaceID and Riffusion models (For StableDiffusion)
* Support Multiband Diffusion model (For AudioCraft)
* Support LibreTranslate (Local API) and SeamlessM4Tv2 for language translations
* Support ModelScope, ZeroScope 2, CogVideoX and Latte for video generation
Expand Down Expand Up @@ -213,6 +213,7 @@ First of all, I want to thank the developers of [PyCharm](https://www.jetbrains.
* [SUPIR](https://github.com/Fanghua-Yu/SUPIR/blob/master/LICENSE)
* [MagicPrompt](https://huggingface.co/datasets/choosealicense/licenses/blob/main/markdown/mit.md)
* [Marigold](https://huggingface.co/datasets/choosealicense/licenses/blob/main/markdown/apache-2.0.md)
* [BLIP-Diffusion](https://huggingface.co/datasets/choosealicense/licenses/blob/main/markdown/apache-2.0.md)

#### These third-party repository codes are also used in my project:

Expand Down
20 changes: 15 additions & 5 deletions Wikies/WikiEN.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@

# Image:

### StableDiffusion - has twenty three sub-tabs:
### StableDiffusion - has twenty four sub-tabs:

#### txt2img:

Expand Down Expand Up @@ -162,9 +162,19 @@

#### diffedit:

1) Enter your Source Prompt and Source Negative Prompt for image masking (+ and - for prompt weighting)
2) Enter your Target Prompt and Target Negative Prompt for image diffediting (+ and - for prompt weighting)
3) Click the `Submit` button to get the generated image
1) Enter your Source Prompt and Source Negative Prompt for image masking
2) Enter your Target Prompt and Target Negative Prompt for image diff-editing
3) Upload the initial image
4) Set up the model according to the parameters you need
5) Click the `Submit` button to get the generated image

#### blip-diffusion:

1) Enter your Prompt
2) Upload the initial image
3) Enter your Conditioning and Target Subjects
4) Set up the model according to the parameters you need
5) Click the `Submit` button to get the generated image

#### animatediff:

Expand Down Expand Up @@ -478,7 +488,7 @@
* LLM models can be taken from [HuggingFace](https://huggingface.co/models) or from ModelDownloader inside interface
* StableDiffusion, vae, inpaint, embedding and lora models can be taken from [CivitAI](https://civitai.com/models) or from ModelDownloader inside interface
* RVC models can be taken from [VoiceModels](https://voice-models.com)
* StableAudio, AudioCraft, AudioLDM 2, TTS, Whisper, MMS, SeamlessM4Tv2, Wav2Lip, LivePortrait, SunoBark, MoonDream2, Upscalers (Latent and Real-ESRGAN), Refiner, GLIGEN, DiffEdit, Depth, Marigold, Pix2Pix, Controlnet, AnimateDiff, HotShot-XL, Videos, LDM3D, SD3, Cascade, T2I-IP-ADAPTER, IP-Adapter-FaceID, Riffusion, Rembg, Roop, CodeFormer, DDColor, PixelOE, Real-ESRGAN, StableFast3D, Shap-E, SV34D, Zero123Plus, UVR, Demucs, Kandinsky, Flux, HunyuanDiT, Lumina-T2X, Kolors, AuraFlow, AuraSR, Würstchen, DeepFloydIF, PixArt, PlaygroundV2.5, ModelScope, ZeroScope 2, CogVideoX, MagicPrompt, Latte and Multiband diffusion models are downloads automatically in *inputs* folder when are they used
* StableAudio, AudioCraft, AudioLDM 2, TTS, Whisper, MMS, SeamlessM4Tv2, Wav2Lip, LivePortrait, SunoBark, MoonDream2, Upscalers (Latent and Real-ESRGAN), Refiner, GLIGEN, DiffEdit, BLIP-Diffusion, Depth, Marigold, Pix2Pix, Controlnet, AnimateDiff, HotShot-XL, Videos, LDM3D, SD3, Cascade, T2I-IP-ADAPTER, IP-Adapter-FaceID, Riffusion, Rembg, Roop, CodeFormer, DDColor, PixelOE, Real-ESRGAN, StableFast3D, Shap-E, SV34D, Zero123Plus, UVR, Demucs, Kandinsky, Flux, HunyuanDiT, Lumina-T2X, Kolors, AuraFlow, AuraSR, Würstchen, DeepFloydIF, PixArt, PlaygroundV2.5, ModelScope, ZeroScope 2, CogVideoX, MagicPrompt, Latte and Multiband diffusion models are downloads automatically in *inputs* folder when are they used
* You can take voices anywhere. Record yours or take a recording from the Internet. Or just use those that are already in the project. The main thing is that it is pre-processed!

## Known Bugs:
Expand Down

0 comments on commit a32162d

Please sign in to comment.