How can I get correct ip adapter image embeds? I got 4D tensors and I cannnot use it. #7160
-
Reproducible sample script import torch
from diffusers import AutoPipelineForText2Image, DDIMScheduler
from diffusers.utils import load_image
pipeline = AutoPipelineForText2Image.from_pretrained(
"stabilityai/stable-diffusion-xl-base-1.0",
torch_dtype=torch.float16,
variant="fp16"
)
pipeline.scheduler = DDIMScheduler.from_config(pipeline.scheduler.config)
pipeline.load_ip_adapter(
"h94/IP-Adapter",
subfolder="sdxl_models",
weight_name=[
"ip-adapter-plus_sdxl_vit-h.safetensors",
"ip-adapter-plus-face_sdxl_vit-h.safetensors"
] ,
image_encoder_folder="models/image_encoder"
)
pipeline.set_ip_adapter_scale([0.7, 0.3])
pipeline.enable_model_cpu_offload()
face_image = load_image("https://huggingface.co/datasets/YiYiXu/testing-images/resolve/main/women_input.png")
style_folder = "https://huggingface.co/datasets/YiYiXu/testing-images/resolve/main/style_ziggy"
style_images = [load_image(f"{style_folder}/img{i}.png") for i in range(10)]
image_embeds = pipeline.prepare_ip_adapter_image_embeds(
ip_adapter_image=[style_images, face_image],
ip_adapter_image_embeds=None,
device="cuda",
num_images_per_prompt=1,
do_classifier_free_guidance=True
)
torch.save(image_embeds, "image_embeds.ipadpt")
print(f"type: {type(image_embeds)}")
print(f"len: {len(image_embeds)}")
for embeds in image_embeds:
print(f"shape: {embeds.shape}") outputs is
3D tensors is preferred, but 4D can be obtained. And I cannot use it. import torch
from diffusers import AutoPipelineForText2Image, DDIMScheduler
pipeline = AutoPipelineForText2Image.from_pretrained(
"stabilityai/stable-diffusion-xl-base-1.0",
torch_dtype=torch.float16,
variant="fp16"
)
pipeline.scheduler = DDIMScheduler.from_config(pipeline.scheduler.config)
pipeline.load_ip_adapter(
"h94/IP-Adapter",
subfolder="sdxl_models",
weight_name=[
"ip-adapter-plus_sdxl_vit-h.safetensors",
"ip-adapter-plus-face_sdxl_vit-h.safetensors"
],
image_encoder_folder=None
)
pipeline.set_ip_adapter_scale([0.7, 0.8])
pipeline.to("cuda")
image_embeds_fromfile = torch.load("image_embeds.ipadpt")
generator = torch.Generator(device="cpu").manual_seed(2024)
image = pipeline(
prompt="a woman",
ip_adapter_image_embeds=image_embeds_fromfile,
negative_prompt="monochrome, lowres, bad anatomy, worst quality, low quality",
num_inference_steps=50,
guidance_scale = 0,
num_images_per_prompt=1,
generator=generator,
).images[0]
image.save("result_from_image_embeds.png") I got this error.
|
Beta Was this translation helpful? Give feedback.
Replies: 4 comments 2 replies
-
Cc: @yiyixuxu. Are you using a particular branch of |
Beta Was this translation helpful? Give feedback.
-
#7016 was merged, so I use main branch.
diffusers-cli env output is
|
Beta Was this translation helpful? Give feedback.
-
I'm sorry. I don't understand what you are saying. Do you mean creating a new issue? |
Beta Was this translation helpful? Give feedback.
-
should be fixed with #7189 now! |
Beta Was this translation helpful? Give feedback.
should be fixed with #7189 now!