Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
169 changes: 0 additions & 169 deletions USER_ISOLATION_IMPLEMENTATION.md

This file was deleted.

4 changes: 3 additions & 1 deletion docs/src/content/docs/start-here/system-requirements.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
title: Hardware Requirements
sidebar:
order: 1
lastUpdated: 2026-02-18
lastUpdated: 2026-05-11
---

import { Tabs, TabItem, Steps } from '@astrojs/starlight/components'
Expand All @@ -28,6 +28,8 @@ The requirements below are rough guidelines for best performance. GPUs with less
| FLUX.2 Klein 4B | 1024x1024 | Nvidia 30xx+ | 12GB | 16GB | FP8 works with 8GB+; Diffusers + encoder |
| FLUX.2 Klein 9B | 1024x1024 | Nvidia 40xx | 24GB | 32GB | FP8 works with 12GB+; Diffusers + encoder |
| Z-Image Turbo | 1024x1024 | Nvidia 20xx+ | 8GB | 16GB | Q4_K 8GB; Q8/BF16 16GB+ |
| Wan 2.2 A14B (T2V/I2V) | 1280x720 | Nvidia 30xx+ | 12GB | 32GB | Dual-expert MoE; Q4_K_M 12GB; Q8 18GB+; Diffusers requires 32GB+ |
| Wan 2.2 TI2V-5B | 1280x720 | Nvidia 20xx+ | 8GB | 16GB | Single transformer; Q4_K_M 6GB+; Q8 8GB+; Diffusers 12GB+ |

:::tip[`tmpfs` on Linux]
If your temporary directory is mounted as a `tmpfs`, ensure it has sufficient space.
Expand Down
2 changes: 2 additions & 0 deletions invokeai/app/api/dependencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@
QwenImageConditioningInfo,
SD3ConditioningInfo,
SDXLConditioningInfo,
WanConditioningInfo,
ZImageConditioningInfo,
)
from invokeai.backend.util.logging import InvokeAILogger
Expand Down Expand Up @@ -152,6 +153,7 @@ def initialize(
ZImageConditioningInfo,
QwenImageConditioningInfo,
AnimaConditioningInfo,
WanConditioningInfo,
],
ephemeral=True,
),
Expand Down
30 changes: 30 additions & 0 deletions invokeai/app/invocations/fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,9 @@ class FieldDescriptions:
z_image_model = "Z-Image model (Transformer) to load"
qwen_image_model = "Qwen Image Edit model (Transformer) to load"
qwen_vl_encoder = "Qwen2.5-VL tokenizer, processor and text/vision encoder"
wan_model = "Wan 2.2 model (Transformer) to load"
wan_t5_encoder = "UMT5-XXL tokenizer and text encoder for Wan 2.2"
wan_ref_image = "Reference-image (VAE-latent) conditioning for Wan 2.2 I2V."
sdxl_main_model = "SDXL Main model (UNet, VAE, CLIP1, CLIP2) to load"
sdxl_refiner_model = "SDXL Refiner Main Modde (UNet, VAE, CLIP2) to load"
onnx_main_model = "ONNX Main model (UNet, VAE, CLIP) to load"
Expand Down Expand Up @@ -364,6 +367,33 @@ class AnimaConditioningField(BaseModel):
)


class WanConditioningField(BaseModel):
"""A Wan 2.2 conditioning tensor primitive value.

Wan conditioning is the UMT5-XXL hidden state for the prompt plus an attention
mask marking valid (non-padding) tokens.
"""

conditioning_name: str = Field(description="The name of conditioning tensor")


class WanRefImageConditioningField(BaseModel):
"""Reference-image conditioning for Wan 2.2 I2V.

Carries the 20-channel VAE-latent condition tensor (4-channel first-frame
mask + 16-channel ref-image latents). The denoise loop concatenates this
to the 16-channel noise latents along the channel dim each step, producing
the 36-channel input the I2V-A14B transformer expects.

Also carries the spatial dims used to encode the image, so the denoise
node can sanity-check that the user's width/height match.
"""

condition_tensor_name: str = Field(description="Name of the saved [1, 20, 1, H/8, W/8] condition tensor.")
width: int = Field(description="Image width used during VAE encoding (matches denoise width).")
height: int = Field(description="Image height used during VAE encoding (matches denoise height).")


class ConditioningField(BaseModel):
"""A conditioning tensor primitive value"""

Expand Down
5 changes: 5 additions & 0 deletions invokeai/app/invocations/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,11 @@ def invoke(self, context: InvocationContext) -> MetadataOutput:
"anima_img2img",
"anima_inpaint",
"anima_outpaint",
"wan_txt2img",
"wan_img2img",
"wan_inpaint",
"wan_outpaint",
"wan_i2v",
]


Expand Down
48 changes: 48 additions & 0 deletions invokeai/app/invocations/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,14 @@ class Qwen3EncoderField(BaseModel):
loras: List[LoRAField] = Field(default_factory=list, description="LoRAs to apply on model loading")


class WanT5EncoderField(BaseModel):
"""Field for the UMT5-XXL text encoder used by Wan 2.2 models."""

tokenizer: ModelIdentifierField = Field(description="Info to load tokenizer submodel")
text_encoder: ModelIdentifierField = Field(description="Info to load text_encoder submodel")
loras: List[LoRAField] = Field(default_factory=list, description="LoRAs to apply on model loading")


class VAEField(BaseModel):
vae: ModelIdentifierField = Field(description="Info to load vae submodel")
seamless_axes: List[str] = Field(default_factory=list, description='Axes("x" and "y") to which apply seamless')
Expand All @@ -101,6 +109,46 @@ class TransformerField(BaseModel):
loras: List[LoRAField] = Field(description="LoRAs to apply on model loading")


class WanTransformerField(BaseModel):
"""Transformer field for Wan 2.2 models.

Wan 2.2 A14B is a Mixture-of-Experts model with two transformer experts:
a high-noise expert (active at large timesteps) and a low-noise expert
(active at small timesteps). TI2V-5B is a single-transformer model and only
populates ``transformer``.

``boundary_ratio`` matches Diffusers' ``WanPipeline`` semantics: it's the
boundary timestep as a fraction of ``num_train_timesteps`` (typically 1000),
so ``boundary_ratio=0.875`` means the high-noise expert handles t >= 875 and
the low-noise expert handles t < 875.
"""

transformer: ModelIdentifierField = Field(
description="Primary transformer submodel. For A14B this is the high-noise expert."
)
transformer_low_noise: ModelIdentifierField | None = Field(
default=None,
description="Low-noise transformer expert (Wan 2.2 A14B only). None for TI2V-5B.",
)
loras: List[LoRAField] = Field(
default_factory=list,
description="LoRAs to apply to the primary transformer. For A14B applied to the high-noise expert.",
)
loras_low_noise: List[LoRAField] = Field(
default_factory=list,
description="Optional separate LoRAs for the low-noise expert (Wan 2.2 A14B). "
"If empty and transformer_low_noise is set, the primary 'loras' list is reused.",
)
boundary_ratio: float = Field(
default=0.875,
ge=0.0,
le=1.0,
description="Boundary timestep as a fraction of num_train_timesteps (Wan 2.2 A14B only). "
"High-noise expert: t >= boundary_ratio * num_train_timesteps. Low-noise expert: t below. "
"Ignored for TI2V-5B.",
)


@invocation_output("unet_output")
class UNetOutput(BaseInvocationOutput):
"""Base class for invocations that output a UNet field."""
Expand Down
31 changes: 31 additions & 0 deletions invokeai/app/invocations/primitives.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@
SD3ConditioningField,
TensorField,
UIComponent,
WanConditioningField,
WanRefImageConditioningField,
ZImageConditioningField,
)
from invokeai.app.services.images.images_common import ImageDTO
Expand Down Expand Up @@ -497,6 +499,35 @@ def build(cls, conditioning_name: str) -> "AnimaConditioningOutput":
return cls(conditioning=AnimaConditioningField(conditioning_name=conditioning_name))


@invocation_output("wan_conditioning_output")
class WanConditioningOutput(BaseInvocationOutput):
"""Base class for nodes that output a Wan 2.2 text conditioning tensor."""

conditioning: WanConditioningField = OutputField(description=FieldDescriptions.cond)

@classmethod
def build(cls, conditioning_name: str) -> "WanConditioningOutput":
return cls(conditioning=WanConditioningField(conditioning_name=conditioning_name))


@invocation_output("wan_ref_image_output")
class WanRefImageOutput(BaseInvocationOutput):
"""Output of a Wan 2.2 reference-image VAE-encoder."""

ref_image: WanRefImageConditioningField = OutputField(
description="VAE-latent reference-image conditioning for Wan 2.2 I2V.",
title="Reference Image",
)

@classmethod
def build(cls, condition_tensor_name: str, width: int, height: int) -> "WanRefImageOutput":
return cls(
ref_image=WanRefImageConditioningField(
condition_tensor_name=condition_tensor_name, width=width, height=height
)
)


@invocation_output("conditioning_output")
class ConditioningOutput(BaseInvocationOutput):
"""Base class for nodes that output a single conditioning tensor"""
Expand Down
Loading
Loading