Skip to content

Commit e3d4a6b

Browse files
naykunsayakpaul
authored andcommitted
Performance Improve for Qwen Image Edit (#12190)
* fix(qwen-image-edit): - update condition reshaping logic to improve editing performance * fix(qwen-image-edit): - remove _auto_resize
1 parent ad00c56 commit e3d4a6b

File tree

1 file changed

+3
-35
lines changed

1 file changed

+3
-35
lines changed

src/diffusers/pipelines/qwenimage/pipeline_qwenimage_edit.py

Lines changed: 3 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -62,25 +62,6 @@
6262
>>> image.save("qwenimage_edit.png")
6363
```
6464
"""
65-
PREFERRED_QWENIMAGE_RESOLUTIONS = [
66-
(672, 1568),
67-
(688, 1504),
68-
(720, 1456),
69-
(752, 1392),
70-
(800, 1328),
71-
(832, 1248),
72-
(880, 1184),
73-
(944, 1104),
74-
(1024, 1024),
75-
(1104, 944),
76-
(1184, 880),
77-
(1248, 832),
78-
(1328, 800),
79-
(1392, 752),
80-
(1456, 720),
81-
(1504, 688),
82-
(1568, 672),
83-
]
8465

8566

8667
# Copied from diffusers.pipelines.qwenimage.pipeline_qwenimage.calculate_shift
@@ -565,7 +546,6 @@ def __call__(
565546
callback_on_step_end: Optional[Callable[[int, int, Dict], None]] = None,
566547
callback_on_step_end_tensor_inputs: List[str] = ["latents"],
567548
max_sequence_length: int = 512,
568-
_auto_resize: bool = True,
569549
):
570550
r"""
571551
Function invoked when calling the pipeline for generation.
@@ -684,18 +664,9 @@ def __call__(
684664
device = self._execution_device
685665
# 3. Preprocess image
686666
if image is not None and not (isinstance(image, torch.Tensor) and image.size(1) == self.latent_channels):
687-
img = image[0] if isinstance(image, list) else image
688-
image_height, image_width = self.image_processor.get_default_height_width(img)
689-
aspect_ratio = image_width / image_height
690-
if _auto_resize:
691-
_, image_width, image_height = min(
692-
(abs(aspect_ratio - w / h), w, h) for w, h in PREFERRED_QWENIMAGE_RESOLUTIONS
693-
)
694-
image_width = image_width // multiple_of * multiple_of
695-
image_height = image_height // multiple_of * multiple_of
696-
image = self.image_processor.resize(image, image_height, image_width)
667+
image = self.image_processor.resize(image, calculated_height, calculated_width)
697668
prompt_image = image
698-
image = self.image_processor.preprocess(image, image_height, image_width)
669+
image = self.image_processor.preprocess(image, calculated_height, calculated_width)
699670
image = image.unsqueeze(2)
700671

701672
has_neg_prompt = negative_prompt is not None or (
@@ -712,9 +683,6 @@ def __call__(
712683
max_sequence_length=max_sequence_length,
713684
)
714685
if do_true_cfg:
715-
# negative image is the same size as the original image, but all pixels are white
716-
# negative_image = Image.new("RGB", (image.width, image.height), (255, 255, 255))
717-
718686
negative_prompt_embeds, negative_prompt_embeds_mask = self.encode_prompt(
719687
image=prompt_image,
720688
prompt=negative_prompt,
@@ -741,7 +709,7 @@ def __call__(
741709
img_shapes = [
742710
[
743711
(1, height // self.vae_scale_factor // 2, width // self.vae_scale_factor // 2),
744-
(1, image_height // self.vae_scale_factor // 2, image_width // self.vae_scale_factor // 2),
712+
(1, calculated_height // self.vae_scale_factor // 2, calculated_width // self.vae_scale_factor // 2),
745713
]
746714
] * batch_size
747715

0 commit comments

Comments
 (0)