Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,14 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import List

import PIL.Image
import torch

from ...utils import logging
from ..modular_pipeline import AutoPipelineBlocks, ConditionalPipelineBlocks, SequentialPipelineBlocks
from ..modular_pipeline_utils import InsertableDict
from ..modular_pipeline_utils import InsertableDict, OutputParam
from .before_denoise import (
QwenImageControlNetBeforeDenoiserStep,
QwenImageCreateMaskLatentsStep,
Expand Down Expand Up @@ -394,6 +399,14 @@ def description(self):
+ " - for text-to-image generation, all you need to provide is prompt embeddings"
)

@property
def outputs(self):
return [
OutputParam(
name="latents", type_hint=torch.Tensor, description="The latents generated by the denoising step"
),
]


# ====================
# 3. DECODE
Expand Down Expand Up @@ -467,3 +480,9 @@ def description(self):
+ "- to run the controlnet workflow, you need to provide `control_image`\n"
+ "- for text-to-image generation, all you need to provide is `prompt`"
)

@property
def outputs(self):
return [
OutputParam(name="images", type_hint=List[List[PIL.Image.Image]]),
]
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,14 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Optional
from typing import List, Optional

import PIL.Image
import torch

from ...utils import logging
from ..modular_pipeline import AutoPipelineBlocks, ConditionalPipelineBlocks, SequentialPipelineBlocks
from ..modular_pipeline_utils import InsertableDict
from ..modular_pipeline_utils import InsertableDict, OutputParam
from .before_denoise import (
QwenImageCreateMaskLatentsStep,
QwenImageEditRoPEInputsStep,
Expand Down Expand Up @@ -307,6 +310,14 @@ def description(self):
" - `QwenImageEditDecodeStep` (edit) is used when `mask` is not provided.\n"
)

@property
def outputs(self):
Copy link
Collaborator Author

@yiyixuxu yiyixuxu Jan 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the outputs property is only used for auto docstring rendering,
by default outputs is same as intermediate_outputs https://github.com/huggingface/diffusers/blob/main/src/diffusers/modular_pipelines/modular_pipeline.py#L298
we can override here for better docstring

for example, if we want to print out the doc string for the denoise block for qwen layer model

from diffusers import ModularPipeline
pipe = ModularPipeline.from_pretrained("Qwen/Qwen-Image-Layered")
print(pipe.blocks.sub_blocks["denoise"].doc)

on main it shows

class QwenImageLayeredCoreDenoiseStep

  Core denoising workflow for QwenImage-Layered img2img task.

  Components:

      pachifier (`QwenImageLayeredPachifier`) [subfolder=]

      scheduler (`FlowMatchEulerDiscreteScheduler`) [subfolder=]

      guider (`ClassifierFreeGuidance`) [subfolder=]

      transformer (`QwenImageTransformer2DModel`) [subfolder=]

  Inputs:

      num_images_per_prompt (`None`, *optional*, defaults to 1):

      prompt_embeds (`None`):

      prompt_embeds_mask (`None`):

      negative_prompt_embeds (`None`, *optional*):

      negative_prompt_embeds_mask (`None`, *optional*):

      image_latents (`None`, *optional*):

      latents (`None`, *optional*):

      layers (`None`, *optional*, defaults to 4):

      generator (`None`, *optional*):

      num_inference_steps (`int`, *optional*, defaults to 50):

      sigmas (`List`, *optional*):

      attention_kwargs (`None`, *optional*):

      **denoiser_input_fields (`None`, *optional*):
          conditional model inputs for the denoiser: e.g. prompt_embeds, negative_prompt_embeds, etc.

  Outputs:

      batch_size (`int`):
          Number of prompts, the final batch size of model inputs should be batch_size * num_images_per_prompt

      dtype (`dtype`):
          Data type of model tensor inputs (determined by `prompt_embeds`)

      image_height (`int`):
          The image height calculated from the image latents dimension

      image_width (`int`):
          The image width calculated from the image latents dimension

      height (`int`):
          The height of the image output

      width (`int`):
          The width of the image output

      latents (`Tensor`):
          The initial latents to use for the denoising process

      timesteps (`Tensor`):

      img_shapes (`List`):
          The shapes of the image latents, used for RoPE calculation

      txt_seq_lens (`List`):
          The sequence lengths of the prompt embeds, used for RoPE calculation

      negative_txt_seq_lens (`List`):
          The sequence lengths of the negative prompt embeds, used for RoPE calculation

      additional_t_cond (`Tensor`):
          The additional t cond, used for RoPE calculation

with this change, in this PR branch

class QwenImageLayeredCoreDenoiseStep

  Core denoising workflow for QwenImage-Layered img2img task.

  Components:

      pachifier (`QwenImageLayeredPachifier`) [subfolder=]

      scheduler (`FlowMatchEulerDiscreteScheduler`) [subfolder=]

      guider (`ClassifierFreeGuidance`) [subfolder=]

      transformer (`QwenImageTransformer2DModel`) [subfolder=]

  Inputs:

      num_images_per_prompt (`None`, *optional*, defaults to 1):

      prompt_embeds (`None`):

      prompt_embeds_mask (`None`):

      negative_prompt_embeds (`None`, *optional*):

      negative_prompt_embeds_mask (`None`, *optional*):

      image_latents (`None`, *optional*):

      latents (`None`, *optional*):

      layers (`None`, *optional*, defaults to 4):

      generator (`None`, *optional*):

      num_inference_steps (`int`, *optional*, defaults to 50):

      sigmas (`List`, *optional*):

      attention_kwargs (`None`, *optional*):

      **denoiser_input_fields (`None`, *optional*):
          conditional model inputs for the denoiser: e.g. prompt_embeds, negative_prompt_embeds, etc.

  Outputs:

      latents (`Tensor`):
          The latents generated by the denoising step

return [
OutputParam(
name="latents", type_hint=torch.Tensor, description="The latents generated by the denoising step"
),
]


# ====================
# 5. AUTO BLOCKS & PRESETS
Expand Down Expand Up @@ -334,3 +345,9 @@ def description(self):
"- for edit (img2img) generation, you need to provide `image`\n"
"- for edit inpainting, you need to provide `mask_image` and `image`, optionally you can provide `padding_mask_crop`\n"
)

@property
def outputs(self):
return [
OutputParam(name="images", type_hint=List[List[PIL.Image.Image]], description="The generated images"),
]
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,14 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import List

import PIL.Image
import torch

from ...utils import logging
from ..modular_pipeline import SequentialPipelineBlocks
from ..modular_pipeline_utils import InsertableDict
from ..modular_pipeline_utils import InsertableDict, OutputParam
from .before_denoise import (
QwenImageEditPlusRoPEInputsStep,
QwenImagePrepareLatentsStep,
Expand Down Expand Up @@ -136,6 +141,14 @@ class QwenImageEditPlusCoreDenoiseStep(SequentialPipelineBlocks):
def description(self):
return "Core denoising workflow for QwenImage-Edit Plus edit (img2img) task."

@property
def outputs(self):
return [
OutputParam(
name="latents", type_hint=torch.Tensor, description="The latents generated by the denoising step"
),
]


# ====================
# 4. DECODE
Expand Down Expand Up @@ -179,3 +192,9 @@ def description(self):
"- Each image is resized independently based on its own aspect ratio.\n"
"- VL encoder uses 384x384 target area, VAE encoder uses 1024x1024 target area."
)

@property
def outputs(self):
return [
OutputParam(name="images", type_hint=List[List[PIL.Image.Image]], description="The generated images"),
]
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,14 @@
# limitations under the License.


from typing import List

import PIL.Image
import torch

from ...utils import logging
from ..modular_pipeline import SequentialPipelineBlocks
from ..modular_pipeline_utils import InsertableDict
from ..modular_pipeline_utils import InsertableDict, OutputParam
from .before_denoise import (
QwenImageLayeredPrepareLatentsStep,
QwenImageLayeredRoPEInputsStep,
Expand Down Expand Up @@ -134,6 +139,14 @@ class QwenImageLayeredCoreDenoiseStep(SequentialPipelineBlocks):
def description(self):
return "Core denoising workflow for QwenImage-Layered img2img task."

@property
def outputs(self):
return [
OutputParam(
name="latents", type_hint=torch.Tensor, description="The latents generated by the denoising step"
),
]


# ====================
# 4. AUTO BLOCKS & PRESETS
Expand All @@ -157,3 +170,9 @@ class QwenImageLayeredAutoBlocks(SequentialPipelineBlocks):
@property
def description(self):
return "Auto Modular pipeline for layered denoising tasks using QwenImage-Layered."

@property
def outputs(self):
return [
OutputParam(name="images", type_hint=List[List[PIL.Image.Image]], description="The generated images"),
]
Loading