From 5725f7a1a3567aee3b0af4c2bde4730e9ff1b014 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Sat, 9 May 2026 10:33:30 -0400 Subject: [PATCH 01/12] feat(model): add Wan 2.2 image generation support (Phases 0-2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Foundation + TI2V-5B MVP + A14B dual-expert MoE for Wan 2.2 image generation. Wan was trained on video but is competitive with leading open-source image models when run at num_frames=1; this commit wires that path into InvokeAI. Phase 0 — Foundation: - BaseModelType.Wan + WanVariantType {T2V_A14B, TI2V_5B} - SubModelType.Transformer2 for the dual-expert MoE - MainModelDefaultSettings per variant - step_callback Wan branch (16-channel preview; 48-channel TI2V-5B falls back to slicing first 16 channels until proper factors land) - Frontend enums + node colour Phase 1 — TI2V-5B Diffusers MVP: - Main_Diffusers_Wan_Config probe (variant from transformer_2/ + vae/config.json::z_dim, with filename heuristic fallback) - WanDiffusersModel loader (subclasses GenericDiffusersLoader) - WanT5EncoderField, WanTransformerField (with dual-expert slots), WanConditioningField, WanConditioningInfo - New invocations: wan_model_loader, wan_text_encoder, wan_denoise, wan_image_to_latents, wan_latents_to_image - FlowMatchEulerDiscreteScheduler integration with on-disk config load - RectifiedFlowInpaintExtension reused for inpaint - 5D <-> 4D shape juggling: latents stay 4D in InvokeAI's pipeline, re-add T=1 only inside the transformer call / VAE encode-decode Phase 2 — A14B dual-expert MoE: - Probe reads boundary_ratio from model_index.json - Loader emits both transformer (high-noise) and transformer_low_noise (low-noise expert at transformer_2/) for A14B - _ExpertSwapper in wan_denoise drives GPU residency between experts: high-noise for t >= boundary_ratio * num_train_timesteps, low-noise below. Only one expert locked at a time so the cache can evict the other - relies on existing CachedModelWithPartialLoad to handle oversized models on lower-VRAM GPUs. - guidance_scale_low_noise field for separate low-noise CFG override Tests: - 24 passing tests covering probe variant detection, default settings, noise sampling, end-to-end denoise on a synthetic transformer (CPU), dual-expert boundary swap, CFG branch - 1 heavy-test placeholder gated by INVOKEAI_HEAVY_TESTS=1 for the real-weights smoke test Phase 3+ deferred: standalone VAE/encoder configs, GGUF, LoRA, ControlNet, ref image, inpaint UI, frontend wiring, starter models. Co-Authored-By: Claude Opus 4.7 (1M context) --- WAN_2_2_IMPLEMENTATION.md | 561 ++++++++++++++++++ invokeai/app/api/dependencies.py | 2 + invokeai/app/invocations/fields.py | 12 + invokeai/app/invocations/model.py | 48 ++ invokeai/app/invocations/primitives.py | 12 + invokeai/app/invocations/wan_denoise.py | 437 ++++++++++++++ .../app/invocations/wan_image_to_latents.py | 106 ++++ .../app/invocations/wan_latents_to_image.py | 95 +++ invokeai/app/invocations/wan_model_loader.py | 125 ++++ invokeai/app/invocations/wan_text_encoder.py | 111 ++++ invokeai/app/util/step_callback.py | 32 + .../backend/model_manager/configs/factory.py | 2 + .../backend/model_manager/configs/main.py | 114 +++- .../model_manager/load/model_loaders/wan.py | 82 +++ invokeai/backend/model_manager/taxonomy.py | 22 + .../diffusion/conditioning_data.py | 22 + invokeai/backend/wan/__init__.py | 0 invokeai/backend/wan/sampling_utils.py | 62 ++ .../web/src/features/nodes/types/common.ts | 5 + .../web/src/features/nodes/types/constants.ts | 1 + .../features/parameters/types/constants.ts | 4 + tests/app/invocations/test_wan_denoise.py | 482 +++++++++++++++ .../configs/test_wan_main_config.py | 110 ++++ .../test_wan_default_settings.py | 25 + tests/backend/wan/__init__.py | 0 tests/backend/wan/test_sampling_utils.py | 79 +++ 26 files changed, 2550 insertions(+), 1 deletion(-) create mode 100644 WAN_2_2_IMPLEMENTATION.md create mode 100644 invokeai/app/invocations/wan_denoise.py create mode 100644 invokeai/app/invocations/wan_image_to_latents.py create mode 100644 invokeai/app/invocations/wan_latents_to_image.py create mode 100644 invokeai/app/invocations/wan_model_loader.py create mode 100644 invokeai/app/invocations/wan_text_encoder.py create mode 100644 invokeai/backend/model_manager/load/model_loaders/wan.py create mode 100644 invokeai/backend/wan/__init__.py create mode 100644 invokeai/backend/wan/sampling_utils.py create mode 100644 tests/app/invocations/test_wan_denoise.py create mode 100644 tests/backend/model_manager/configs/test_wan_main_config.py create mode 100644 tests/backend/model_manager/test_wan_default_settings.py create mode 100644 tests/backend/wan/__init__.py create mode 100644 tests/backend/wan/test_sampling_utils.py diff --git a/WAN_2_2_IMPLEMENTATION.md b/WAN_2_2_IMPLEMENTATION.md new file mode 100644 index 00000000000..2899865d77f --- /dev/null +++ b/WAN_2_2_IMPLEMENTATION.md @@ -0,0 +1,561 @@ +# Wan 2.2 Image Generation — Implementation Plan + +**Branch:** `lstein/feature/wan-image-2-2` +**Status:** Planning +**Owner:** Lincoln Stein + +## 0. Naming and Ground Rules + +- New base: `BaseModelType.Wan = "wan"` (single base for both A14B and TI2V-5B; variants distinguish them). +- Backend module path: `invokeai/backend/wan/` (mirrors `invokeai/backend/anima/`, `invokeai/backend/flux/`). +- Invocations: prefix `wan_*` (e.g. `wan_model_loader`, `wan_text_encoder`, `wan_denoise`, `wan_lora_loader`, `wan_image_to_latents`, `wan_latents_to_image`, `wan_controlnet`, `wan_ref_image`). +- Submodel layout (per Diffusers `WanPipeline` / `WanImageToVideoPipeline`): `transformer/` (A14B has both `transformer/` and `transformer_2/`), `text_encoder/` (UMT5-XXL), `tokenizer/`, `vae/`, `scheduler/`. +- Diffusers 0.37.0 already in `pyproject.toml` and exposes `WanPipeline`, `WanImageToVideoPipeline`, `WanTransformer3DModel`, `AutoencoderKLWan`. **No diffusers bump required.** + +## 1. Model Architecture Reality Check (verified against Diffusers 0.37.0) + +These shape and signature facts shape every later design decision: + +- `WanTransformer3DModel.__init__` defaults: `patch_size=(1,2,2)`, `text_dim=4096` (UMT5-XXL hidden), `in_channels=16`, `num_layers=40`, `num_attention_heads=40`, `attention_head_dim=128`. So a `text_dim` of 4096 is the strongest UMT5-XXL fingerprint. +- `WanTransformer3DModel.forward(hidden_states, timestep, encoder_hidden_states, encoder_hidden_states_image=None, ...)` — text via `encoder_hidden_states`, optional CLIP image embedding via `encoder_hidden_states_image` (this is the I2V path; we will not feed it for pure T2I but **will** for "reference image at frame 1"). +- `WanPipeline.__call__(prompt, ..., num_frames, guidance_scale, guidance_scale_2, ...)` — Diffusers already handles the two-expert swap when `transformer_2` is loaded; `guidance_scale` is for the high-noise expert and `guidance_scale_2` is for the low-noise expert. +- `AutoencoderKLWan.__init__` default: `z_dim=16`, `scale_factor_temporal=4`, `scale_factor_spatial=8`. **Standard Wan VAE used by A14B.** +- TI2V-5B uses a larger Wan2.2-VAE with `z_dim=48`. Latent channels are the strongest discriminator on disk. +- For `num_frames=1`, the temporal patch dimension collapses, but Wan still expects `[B, C, T=1, H, W]` 5D tensors. Latents-to-image will need to squeeze T just like Anima already does. +- A14B carries **two transformers** (high-noise + low-noise expert), shipped as separate `transformer/` and `transformer_2/` subfolders. Each is ~14B params — drives every VRAM and quantization decision. + +## 2. Phasing Summary + +| Phase | Goal | Independent? | +|---|---|---| +| 0 | Probe + taxonomy + base type | foundational (gate for all others) | +| 1 | Diffusers-format MVP T2I (TI2V-5B first) | depends on 0 | +| 2 | A14B dual-expert loader + denoise hooks + **Low VRAM mode** | depends on 1 | +| 3 | Standalone VAE + UMT5-XXL encoder configs | depends on 0; can run parallel to 1/2 | +| 4 | GGUF transformer (single-file) — both experts | depends on 2, 3 | +| 5 | LoRA (single + dual-expert pairing) | depends on 2 | +| 6 | ControlNet | depends on 2 | +| 7 | Reference image (frame-1 I2V conditioning) | depends on 2 | +| 8 | Inpaint | depends on 2 (uses `RectifiedFlowInpaintExtension`) | +| 9 | Frontend wiring (model picker, params slice, graph builder) | depends on 1 minimum | +| 10 | Starter models, docs | last | + +Phases 5–8 can all run in parallel after Phase 2 lands. Phase 4 is the largest single unit of work. + +--- + +## VRAM Targets and the Low VRAM Mode + +Dev hardware: 16 GB VRAM card. Most InvokeAI users are at 16 GB or below, so the low-VRAM path is mandatory regardless. + +| Config | Active VRAM (transformer only) | Verdict | +|---|---|---| +| TI2V-5B @ bf16 | ~10 GB | Comfortable native fit | +| A14B @ bf16 (one expert resident) | ~28 GB per expert | Won't fit; needs CPU offload | +| A14B @ Q8 GGUF (one expert) | ~14 GB | Tight; possible with offload of encoder/VAE | +| A14B @ Q4_K_M GGUF (one expert) | ~7 GB | Comfortable; realistic 16 GB path | + +UMT5-XXL is ~5B params (~10 GB bf16) but only encodes once before denoise — it gets moved off GPU before the transformer runs. + +**Low VRAM mode** (revised in Phase 2 implementation): InvokeAI's model cache already exposes partial loading via `InvokeAIAppConfig.enable_partial_loading` (default `True`). When a model exceeds the VRAM budget the cache loads what fits and streams the rest from RAM per forward pass. Combined with `_ExpertSwapper` (which keeps only one expert locked at a time, freeing the other for cache eviction), the A14B-at-bf16-on-16-GB scenario is **already handled by existing infrastructure** — no `low_vram` field on `wan_denoise` is required. Users with less VRAM than the model size get the slow-but-functional path automatically. + +--- + +## Phase 0 — Foundation (taxonomy, base type, FE enum, probe scaffolding) + +### Backend changes + +- `invokeai/backend/model_manager/taxonomy.py` + - Add `Wan = "wan"` to `BaseModelType`. + - Add `class WanVariantType(str, Enum)` with `T2V_A14B = "t2v_a14b"` and `TI2V_5B = "ti2v_5b"`. + - Add `WanVariantType` to the `AnyVariant` union and to `variant_type_adapter`. +- `invokeai/backend/model_manager/configs/main.py` + - Add `MainModelDefaultSettings.from_base(BaseModelType.Wan, variant=...)`: A14B → `cls(steps=40, cfg_scale=4.0, width=1024, height=1024)`; TI2V-5B → `cls(steps=30, cfg_scale=5.0, width=1024, height=1024)`. Tune later. +- `invokeai/app/util/step_callback.py` + - Add `BaseModelType.Wan` branch. 16-channel projection matrix for A14B; TI2V-5B's 48-channel preview is a TODO (non-blocking). +- `invokeai/app/services/shared/sqlite_migrator/migrations/migration_NN.py` + - New migration to widen any base-model enum constraint (mirror Anima's `migration_26.py`). Bump `model_records_schema_version`. + +### Frontend changes + +- `invokeai/frontend/web/src/features/nodes/types/common.ts` — add `'wan'` to `zBaseModelType`/`zMainModelBase`; add `zWanVariantType`; include in `zAnyModelVariant`. +- `invokeai/frontend/web/src/features/parameters/types/constants.ts` — add `wan` to `CLIP_SKIP_MAP` with `maxClip=0`. +- `invokeai/frontend/web/src/features/nodes/types/constants.ts` — add `WanMainModelField` colour entry. + +### Decisions + +- **One base for both, or split?** One base (`Wan`) with two variants. They share text encoder (UMT5-XXL) and pipeline ergonomics. Splitting would double FE selectors for marginal gain. +- **Naming**: prefer `wan` over `wan-image` — Wan 2.3 is coming. + +### Test surface + +- `tests/backend/model_manager/configs/test_main_config.py` — migration adds new enum, existing rows still validate. + +--- + +## Phase 1 — Diffusers Pipeline MVP (TI2V-5B, T2I single-frame) + +Start with TI2V-5B because it's smaller (single transformer ~5B, fits ~16 GB), avoids the dual-expert complication, and validates the encoder/VAE/denoise path before adding the MoE layer. + +### Probe / config + +- `invokeai/backend/model_manager/configs/main.py` + - Add `Main_Diffusers_Wan_Config(Diffusers_Config_Base, Main_Config_Base, Config_Base)`: + - `base: Literal[BaseModelType.Wan]`, `variant: WanVariantType`. + - `from_model_on_disk` accepts class names `{"WanPipeline", "WanImageToVideoPipeline", "WanTransformer3DModel"}`. + - Variant detection: load `transformer/config.json`; if `in_channels` indicates 48-ch latents → TI2V-5B; if 16-ch and a sibling `transformer_2/` exists → A14B. Filename heuristic fallback. + - `has_dual_expert: bool` field set at probe time. +- `invokeai/backend/model_manager/configs/factory.py` — add `Main_Diffusers_Wan_Config` to `AnyModelConfig` union. + +### Loader + +- `invokeai/backend/model_manager/load/model_loaders/wan.py` (new). Mirror `qwen_image.py`. Initial scope: TI2V-5B only. + - Transformer: `WanTransformer3DModel.from_pretrained(model_path / "transformer", torch_dtype=bfloat16)`. + - VAE: `AutoencoderKLWan.from_pretrained(model_path / "vae", torch_dtype=bfloat16)`. + - Text encoder: standard `T5EncoderModel` / `T5TokenizerFast` from `text_encoder/` and `tokenizer/`. **Verify `model_type` in config.json — if `umt5`, use `UMT5EncoderModel` from transformers.** + +### Invocation nodes (TI2V-5B only) + +- `wan_model_loader.py` — outputs `transformer: TransformerField`, `vae: VAEField`, `text_encoder: WanTextEncoderField`. +- `model.py` — add `class WanTextEncoderField(BaseModel)` with `tokenizer`, `text_encoder`, `loras`. +- `wan_text_encoder.py` — runs UMT5-XXL, returns `WanConditioningField`. Output `WanConditioning` dataclass: `prompt_embeds: [seq_len, 4096]` + `prompt_attention_mask`. Add `WanConditioningInfo` to `invokeai/backend/stable_diffusion/diffusion/conditioning_data.py`. +- `fields.py` — add `WanConditioningField` and `wan_model` field-description string. +- `wan_image_to_latents.py` — VAE encode, mirroring `qwen_image_image_to_latents.py`. Wan VAE expects 5D `[B,3,1,H,W]`. +- `wan_latents_to_image.py` — VAE decode, squeeze T. +- `wan_denoise.py` — heart of the work for this phase. + +### Denoise loop design + +**Decision: bypass `WanPipeline.__call__` and drive the loop ourselves (Option A).** Same as every other InvokeAI backend — keeps LoRA / ControlNet / inpaint plumbing consistent. + +For Phase 1 (single transformer): +- `invokeai/backend/wan/sampling_utils.py` — `get_noise(...)` returning 5D `[1, z_dim, 1, H/8, W/8]`; a `WanScheduler` (start with `FlowMatchEulerDiscreteScheduler` from Diffusers). +- Pseudocode: + ```python + latents = get_noise(...) or noised init + for t in timesteps: + noise_pred_cond = transformer(latents, t, prompt_embeds, ...) + if cfg: + noise_pred_uncond = transformer(latents, t, neg_embeds, ...) + noise_pred = noise_pred_uncond + scale * (cond - uncond) + latents = scheduler.step(noise_pred, t, latents) + step_callback(...) + return latents + ``` +- Reuse `RectifiedFlowInpaintExtension` from `invokeai.backend.rectified_flow.rectified_flow_inpaint_extension`. + +### Open questions + +- Does `WanPipeline` use `FlowMatchEulerDiscreteScheduler`? Confirm against `Wan-AI/Wan2.2-TI2V-5B/scheduler/scheduler_config.json`. +- New `WanT5EncoderConfig` rather than reuse of `T5Encoder_T5Encoder_Config`? **Yes** — UMT5-XXL is not bit-compatible with T5-XXL. See Phase 3. +- Does `WanTransformer3DModel` accept attention mask through `attention_kwargs`? + +### Test surface + +- `tests/app/invocations/test_wan_text_encoder.py` — output shape sanity. +- `tests/app/invocations/test_wan_denoise.py` (slow, gated by `INVOKEAI_HEAVY_TESTS=1`) — 4-step denoise on TI2V-5B at 256x256, assert non-NaN. +- `tests/backend/model_manager/configs/test_wan_config.py` — variant detection. + +### Files touched in Phase 1 + +- `invokeai/backend/model_manager/taxonomy.py` +- `invokeai/backend/model_manager/configs/main.py` +- `invokeai/backend/model_manager/configs/factory.py` +- `invokeai/backend/model_manager/load/model_loaders/wan.py` (new) +- `invokeai/app/invocations/wan_model_loader.py` (new) +- `invokeai/app/invocations/wan_text_encoder.py` (new) +- `invokeai/app/invocations/wan_denoise.py` (new) +- `invokeai/app/invocations/wan_image_to_latents.py`, `wan_latents_to_image.py` (new) +- `invokeai/backend/wan/__init__.py`, `sampling_utils.py`, `conditioning_data.py` (new) + +--- + +## Phase 2 — Dual-Expert MoE (Wan2.2-T2V-A14B) + Low VRAM Mode + +### MoE detail + +Wan 2.2 A14B runs two `WanTransformer3DModel` instances. `WanPipeline` swaps based on a noise threshold. `boundary_ratio` (default 0.875) lives on the scheduler config — the high-noise expert handles the first 12.5% of denoising, low-noise handles the rest. + +### Loader changes + +- Extend `SubModelType` with `Transformer2 = "transformer_2"`. Cleanest path: each expert is its own cacheable entity, `apply_smart_model_patches` LoRAs each independently, matches Diffusers folder layout. Mirror in FE `common.ts`. + +### TransformerField split + +- New `WanTransformerField`: + ```python + class WanTransformerField(BaseModel): + transformer_high: ModelIdentifierField + transformer_low: ModelIdentifierField | None + loras_high: List[LoRAField] = [] + loras_low: List[LoRAField] = [] + boundary_ratio: float = 0.875 + ``` + In `invokeai/app/invocations/model.py`. Single explicit place where MoE-ness is encoded. +- `wan_model_loader.py` populates both. TI2V-5B leaves `transformer_low` as `None`. + +### Denoise loop changes + +- `wan_denoise.py`: + ```python + with ExitStack() as exit_stack: + _, transformer_high = exit_stack.enter_context(context.models.load(field.transformer_high).model_on_device()) + transformer_low = None + if field.transformer_low is not None: + _, transformer_low = exit_stack.enter_context(context.models.load(field.transformer_low).model_on_device()) + apply_loras(transformer_high, field.loras_high) + if transformer_low: apply_loras(transformer_low, field.loras_low) + + for i, t in enumerate(timesteps): + model = transformer_high + if transformer_low is not None and (t / t_max) < boundary_ratio: + model = transformer_low + noise_pred = model(...) + # ... + ``` + +### VRAM strategy (default mode) + +- Both experts in **system RAM** (~28 GB at bf16; cheap in 2026). +- Only the active expert on **GPU**. Boundary crossing once per denoise → ~2s CPU↔GPU transfer overhead. +- Implementation: re-enter `model_on_device()` for the other expert after boundary crossing. + +### Low VRAM mode (new — needed for 16 GB dev card and most users) + +- New `low_vram: bool` field on `wan_denoise` (also a global setting). +- Mode A (default): RAM-resident, GPU-juggle on boundary as above. +- Mode B (low VRAM): wrap each transformer with `enable_model_cpu_offload()` semantics — model stays on CPU, individual layers move to GPU on forward call. Slow (~minutes/step at bf16, but seconds/step at Q4 GGUF). Let users render even when full active-expert won't fit. +- Mode B also useful for keeping the text encoder CPU-resident the whole time on tight VRAM. + +### Dual-expert LoRA pairing + +- Community releases ship paired files: `xxx_high_noise.safetensors` + `xxx_low_noise.safetensors`. +- New `wan_lora_loader` accepts either single LoRA (auto-applied to both, with quality warning) or explicit `lora_high` + `lora_low` pair. +- Probe identifies each as `LoRA_LyCORIS_Wan_Config(base=Wan)` with optional `expert: Literal["high","low"] | None` from filename heuristic (`"high_noise"`/`"low_noise"` substring). + +### Files touched in Phase 2 + +- `invokeai/backend/model_manager/taxonomy.py` (add `Transformer2`) +- `invokeai/backend/model_manager/load/model_loaders/wan.py` +- `invokeai/app/invocations/model.py` (add `WanTransformerField`) +- `invokeai/app/invocations/wan_model_loader.py` (extend for dual) +- `invokeai/app/invocations/wan_denoise.py` (MoE swap + low VRAM mode) +- `invokeai/app/invocations/wan_lora_loader.py` +- `invokeai/frontend/web/src/features/nodes/types/common.ts` (Transformer2) + +### Open questions + +- `boundary_ratio` units in actual `scheduler_config.json` — timestep fraction vs sigma threshold? Read source of truth from disk. +- Expose `boundary_ratio` as advanced UI control? **Yes** — useful for experimentation, default from config. +- Expose `cfg_scale_low_noise` separately from `cfg_scale`? **Yes** as advanced override; default both to same value. + +### Test surface + +- Mock dual-expert load + boundary crossing: ensure correct expert called at each step. Fake transformer that records calls. +- Low VRAM mode smoke test against TI2V-5B (single-expert), confirm output matches default mode. + +--- + +## Phase 3 — Standalone VAE + UMT5-XXL Encoder Configs + +Makes the GGUF flow possible by allowing users to install only encoder + VAE + quantized transformer. + +### VAE configs + +- `invokeai/backend/model_manager/configs/vae.py` + - `_is_wan_vae(state_dict)`: 5D conv weights and `decoder.conv_in.weight.shape[1] in {16, 48}`. + - `VAE_Checkpoint_Wan_Config(Checkpoint_Config_Base, Config_Base)` with `base=Wan`, `latent_channels: Literal[16, 48]`. Detect via `state_dict["decoder.conv_in.weight"].shape[1]`. + - Update `_validate_looks_like_vae` to exclude Wan VAEs (mirror Qwen Image / FLUX.2 exclusion at lines 113-118). + - `VAE_Diffusers_Wan_Config` for diffusers-format Wan VAE (`AutoencoderKLWan`). +- `factory.py` — add both new VAE configs to `AnyModelConfig`. + +### VAE loader + +- `wan.py` — register `(base=Wan, type=VAE, format=Checkpoint)` and `format=Diffusers`. +- For checkpoint: build `AutoencoderKLWan(z_dim=...)` based on detected latent channels, then `model.load_state_dict(sd, assign=True)`. **VAE in fp16 is broken — use bf16** (FluxVAELoader pattern). + +### UMT5-XXL encoder + +- `invokeai/backend/model_manager/configs/wan_t5_encoder.py` (new) — `WanT5Encoder_Diffusers_Config` and `WanT5Encoder_Checkpoint_Config`. +- New config class **rather than reuse** of `T5Encoder_T5Encoder_Config`: + - UMT5-XXL has `model_type: "umt5"` in transformers. + - Different vocabulary — InvokeAI shouldn't let users wire a FLUX T5 into the Wan slot. +- New `ModelType.WanT5Encoder = "wan_t5_encoder"` and `ModelFormat.WanT5Encoder = "wan_t5_encoder"`. Add to taxonomy + FE enum. + +### Standalone-encoder loader + +- New class in `wan.py`: `(base=Any, type=WanT5Encoder, format=...)`. Loads `UMT5EncoderModel` for TextEncoder, `T5TokenizerFast` for Tokenizer. Mirror `T5EncoderLoader` in `flux.py:426-505`. + +### Component-source loader pattern + +- `wan_model_loader.py` follows `qwen_image_model_loader.py` pattern: optional standalone `vae_model` and `wan_t5_encoder_model` inputs override main model's submodels. Required when main model is single-file GGUF. + +### Files touched in Phase 3 + +- `invokeai/backend/model_manager/configs/vae.py` +- `invokeai/backend/model_manager/configs/wan_t5_encoder.py` (new) +- `invokeai/backend/model_manager/configs/factory.py` +- `invokeai/backend/model_manager/load/model_loaders/wan.py` +- `invokeai/backend/model_manager/taxonomy.py` +- FE: `isWanVAEModelConfig`, `isWanT5EncoderModelConfig` type guards in `services/api/types.ts`; `useWanVAEModels`, `useWanT5EncoderModels` hooks in `services/api/hooks/modelsByType.ts`. + +### Open questions + +- A14B and TI2V-5B ship the same UMT5-XXL `text_encoder/`? Verify; if yes, one encoder config covers both. + +--- + +## Phase 4 — GGUF Quantization for Both Experts + +Highest user impact: brings Wan 2.2 A14B onto consumer hardware. + +### Probe / config + +- `invokeai/backend/model_manager/configs/main.py` + - `Main_GGUF_Wan_Config(Checkpoint_Config_Base, Main_Config_Base, Config_Base)` with `base=Wan`, `format=GGUFQuantized`, `variant: WanVariantType`, `expert: Literal["high","low","none"] = "none"`. + - Detection: GGML tensors + Wan-specific keys (`blocks.0.attn1.to_q.weight`, `attn2.to_k.weight` shape `[head_dim*heads, 4096]` for UMT5 cross-attn). + - Expert from filename: `"high_noise"` / `"low_noise"` substring; fall back to `"none"`. **User must confirm** when ambiguous. + +### Loader + +- `wan.py` — `(base=Wan, type=Main, format=GGUFQuantized)`. Mirror `QwenImageGGUFCheckpointModel`: + 1. `gguf_sd_loader(model_path, compute_dtype=bfloat16)` + 2. Strip ComfyUI prefixes (`model.diffusion_model.`, `diffusion_model.`). + 3. Auto-detect arch (count `blocks.X.` keys → `num_layers`; `attn1.to_q.weight.shape[0]` → hidden dim). + 4. `with accelerate.init_empty_weights(): model = WanTransformer3DModel(**inferred_config)` + 5. `model.load_state_dict(sd, strict=False, assign=True)`. +- A14B's two GGUFs: same registration handles both — file alone is the unit, called twice by `wan_model_loader` invocation. + +### Pairing in the model loader invocation + +- UI sketch: + ``` + Transformer (High Noise) [GGUF or Diffusers] + Transformer (Low Noise) [GGUF or Diffusers, optional — empty for TI2V-5B] + Component Source [Diffusers, optional — for VAE/encoder] + Standalone VAE [optional] + Standalone Wan T5 Encoder [optional] + Low VRAM mode [bool] + ``` +- Low Noise field hidden on FE when High Noise variant is TI2V-5B. + +### Files touched in Phase 4 + +- `invokeai/backend/model_manager/configs/main.py` +- `invokeai/backend/model_manager/configs/factory.py` +- `invokeai/backend/model_manager/load/model_loaders/wan.py` +- `invokeai/app/invocations/wan_model_loader.py` (extend pickers) + +### Open questions + +- Reference GGUFs: `city96/Wan2.2-T2V-A14B-gguf`, `QuantStack/Wan2.2-TI2V-5B-GGUF`. Verify key naming matches Diffusers' `WanTransformer3DModel` exactly. +- If only one of the two A14B experts is GGUF'd, fall back to bf16 for the other (mixed quant within one denoise loop). Loader supports this — each transformer slot has independent format. + +--- + +## Phase 5 — LoRA + +### Probe / config + +- `invokeai/backend/model_manager/configs/lora.py` + - `_is_wan_lora(state_dict)`: keys like `blocks.0.attn1.to_q.lora_A.weight` / `lora_unet_blocks_0_attn1_to_q.lora_down.weight` / `transformer.blocks.0.attn1.to_q.lora_A.weight`. Exclude clashes with Anima (`cross_attn`/`self_attn`) and FLUX (`double_blocks`, `single_blocks`). + - `LoRA_LyCORIS_Wan_Config(LoRA_LyCORIS_Config_Base, Config_Base)` with `base=Wan`, optional `expert: Literal["high","low"] | None`. + - Register in `factory.py`. + +### LoRA conversion + +- `invokeai/backend/patches/lora_conversions/wan_lora_constants.py` (new) — `WAN_LORA_TRANSFORMER_PREFIX = "lora_transformer-"`. +- `invokeai/backend/patches/lora_conversions/wan_lora_conversion_utils.py` (new) — handle three formats: + - **Kohya**: `lora_unet_blocks_X_...` → diffusers `blocks.X....` + - **Diffusers PEFT**: `transformer.blocks.X.attn1.to_q.lora_A.weight` → strip `transformer.` prefix. + - **Native diffusion_model**: `diffusion_model.blocks.X....` → strip prefix. +- Start from `qwen_image_lora_conversion_utils.py` and adjust prefixes/key-renaming. + +### Loader integration + +- `invokeai/backend/model_manager/load/model_loaders/lora.py` — add `BaseModelType.Wan` branch calling `lora_model_from_wan_state_dict(state_dict, alpha=None)`. + +### Invocation node + +- `invokeai/app/invocations/wan_lora_loader.py`: + - Single LoRA mode (default): one picker, auto-applied to both experts. + - Dual LoRA mode: two pickers (high / low). Validates bases are both Wan and at most one of each `expert`. + - Mirrors `AnimaLoRALoaderInvocation` + `AnimaLoRACollectionLoader`. +- Output: `WanLoRALoaderOutput` containing the `WanTransformerField` with updated `loras_high` / `loras_low`. + +### Denoise integration + +- `wan_denoise.py` — when entering each transformer's `model_on_device()` context, apply `LayerPatcher.apply_smart_model_patches(model=transformer_high, patches=loras_high_iter, prefix=WAN_LORA_TRANSFORMER_PREFIX, ...)`. Pattern from `flux_denoise.py:434-443`. + +### Files touched in Phase 5 + +- `invokeai/backend/model_manager/configs/lora.py` +- `invokeai/backend/model_manager/configs/factory.py` +- `invokeai/backend/model_manager/load/model_loaders/lora.py` +- `invokeai/backend/patches/lora_conversions/wan_lora_constants.py` (new) +- `invokeai/backend/patches/lora_conversions/wan_lora_conversion_utils.py` (new) +- `invokeai/app/invocations/wan_lora_loader.py` (new) +- `invokeai/app/invocations/wan_denoise.py` + +--- + +## Phase 6 — ControlNet + +Wan ControlNet ecosystem **less mature** than FLUX. Common community models target Wan2.1, with Wan2.2 ports trickling out. Treat with thrash risk. + +### Approach + +- `invokeai/backend/wan/controlnet/` mirroring `invokeai/backend/flux/controlnet/`. Two state-dict identifiers initially: + - **InstantX-style**: `controlnet_x_embedder.` / `controlnet_blocks.` + `blocks.X.attn1.*` transformer keys. + - **Diffusers Wan ControlNet** (if/when one exists): `WanControlNetModel`-style. +- Configs: `ControlNet_Checkpoint_Wan_Config`, `ControlNet_Diffusers_Wan_Config` in `invokeai/backend/model_manager/configs/controlnet.py`. +- Loader: extend `wan.py`. +- Extension: `invokeai/backend/wan/extensions/wan_controlnet_extension.py` — callable taking control-image, returning per-block residuals. Pattern from `flux/extensions/instantx_controlnet_extension.py`. +- Invocation: `invokeai/app/invocations/wan_controlnet.py` — defines `WanControlNetField` and picker node. +- Denoise: `wan_denoise.py` accepts `control: WanControlNetField | list[WanControlNetField] | None`. + +### Risks + +- If community ControlNet weights only target one expert, need conditional injection. Defer until reference model in hand. +- ControlNet may want a separate VAE-encoded conditioning image (FLUX denoise pattern). +- **Gate on ecosystem maturity**: ship v1 without ControlNet if Wan2.2-native models aren't ready; add as v2. + +--- + +## Phase 7 — Reference Image (Frame-1 I2V Conditioning) + +Wan 2.2's I2V variant takes an image and produces a video starting from it. At `num_frames=1`, becomes a reference image — analogous to FLUX Kontext. + +### Decision: Path B — CLIP-vision conditioning via `encoder_hidden_states_image` + +`WanTransformer3DModel.forward` accepts `encoder_hidden_states_image: Optional[Tensor]`. I2V pipeline preprocesses the ref image through CLIP-vision and feeds those features. We do the same with stock `CLIPVisionModelWithProjection` (already in `invokeai/backend/model_manager/load/model_loaders/clip_vision.py`). + +Treats ref-image as conditioning rather than a different model. Simpler UI, no extra 30 GB checkpoint to install. Same approach as FLUX Kontext (`invokeai/backend/flux/extensions/kontext_extension.py`). + +### Implementation + +- `invokeai/backend/wan/extensions/wan_ref_image_extension.py` — encodes via CLIP vision, produces `image_embeds` for `encoder_hidden_states_image`. +- `wan_denoise.py` accepts `ref_image: WanRefImageConditioningField | None`. + +### Open questions + +- Wan2.2-T2V-A14B's `transformer/config.json` likely has `image_dim=None` (text-only); I2V variant has `image_dim != None`. **Ref-image path only works on I2V variants.** Either ship I2V as separate variant or detect and reject gracefully. Add `WanVariantType.I2V_A14B = "i2v_a14b"` if shipping. Probe via `transformer/config.json::image_dim`. + +--- + +## Phase 8 — Inpaint + +Inpaint = image-to-image with denoise mask. `RectifiedFlowInpaintExtension` already handles this for Anima and FLUX. Wan's flow-matching scheduler is mathematically identical; reuse should be straightforward. + +### Implementation + +- `wan_denoise.py` accepts `denoise_mask: DenoiseMaskField | None`. +- Reuse `RectifiedFlowInpaintExtension` from `invokeai.backend.rectified_flow.rectified_flow_inpaint_extension`. Anima needed `AnimaInpaintExtension` for shifted timesteps; for Wan, check if the scheduler shift introduces the same issue. If yes, subclass. + +### Files touched in Phase 8 + +- `invokeai/app/invocations/wan_denoise.py` (mask branch) +- Possibly `invokeai/backend/wan/wan_inpaint_extension.py` + +--- + +## Phase 9 — Frontend Wiring + +### Type definitions + +- `invokeai/frontend/web/src/services/api/types.ts` — `isWanMainModelConfig`, `isWanLoRAModelConfig`, `isWanVAEModelConfig`, `isWanT5EncoderModelConfig`, `isWanControlNetModelConfig`. Mirror Anima/Qwen Image at lines 286-322. +- `invokeai/frontend/web/src/services/api/hooks/modelsByType.ts` — `useWanMainModels`, `useWanVAEModels`, `useWanT5EncoderModels`, `useWanLoRAModels`, `useWanControlNetModels`. Mirror lines 105-113. + +### Params slice + +- `invokeai/frontend/web/src/features/controlLayers/store/paramsSlice.ts` + - Selectors: `selectWanVaeModel`, `selectWanT5EncoderModel`, `selectWanScheduler`, `selectWanBoundaryRatio`, `selectWanLowVramMode`. Anima sets the precedent. + - State: `wanVaeModel`, `wanT5EncoderModel`, etc. + +### Graph builder + +- `invokeai/frontend/web/src/features/nodes/util/graph/generation/buildWanGraph.ts` (new). Mirror `buildAnimaGraph.ts`. Differences: + - Two transformer pickers when variant is A14B. + - Dual-expert LoRA collection node. + - Separate VAE / WanT5Encoder pickers (GGUF requires them). + - Low VRAM toggle. +- `invokeai/frontend/web/src/features/nodes/util/graph/generation/addWanLoRAs.ts` (new). +- `invokeai/frontend/web/src/features/nodes/util/graph/types.ts` — register Wan in `GraphBuilderArg`. +- Graph dispatcher (`buildGenerationTabGraph.ts`) — add `case 'wan'`. + +### UI + +- ModelPicker, ControlLayer toolbox iterate over `BaseModelType` so adding `'wan'` should propagate. Audit `ModelPicker.tsx` for hardcoded base lists. + +--- + +## Phase 10 — Starter Models, Migration, Docs + +### Starter models + +- `invokeai/backend/model_manager/starter_models.py` — append `# region Wan` block: + ```python + wan_t5_encoder = StarterModel(name="Wan T5 Encoder (UMT5-XXL)", + base=BaseModelType.Any, source="Wan-AI/Wan2.2-T2V-A14B::text_encoder+tokenizer", + type=ModelType.WanT5Encoder, format=ModelFormat.WanT5Encoder, ...) + wan_vae = StarterModel(name="Wan VAE", + base=BaseModelType.Wan, source="Wan-AI/Wan2.2-T2V-A14B::vae/diffusion_pytorch_model.safetensors", + type=ModelType.VAE, format=ModelFormat.Checkpoint, ...) + wan_vae_2_2 = StarterModel(name="Wan2.2 VAE", + base=BaseModelType.Wan, source="Wan-AI/Wan2.2-TI2V-5B::vae/...", + type=ModelType.VAE, ...) + wan_t2v_a14b = StarterModel(name="Wan 2.2 T2V A14B", + base=BaseModelType.Wan, source="Wan-AI/Wan2.2-T2V-A14B", + type=ModelType.Main, variant=WanVariantType.T2V_A14B, ...) + wan_t2v_a14b_high_q4 = StarterModel(name="Wan 2.2 T2V A14B High Noise (Q4_K_M)", + base=BaseModelType.Wan, + source="https://huggingface.co/city96/Wan2.2-T2V-A14B-gguf/resolve/main/wan2.2-t2v-a14b-high-noise-Q4_K_M.gguf", + ..., dependencies=[wan_t5_encoder, wan_vae]) + wan_t2v_a14b_low_q4 = ... + wan_ti2v_5b = StarterModel(name="Wan 2.2 TI2V 5B", + base=BaseModelType.Wan, source="Wan-AI/Wan2.2-TI2V-5B", + variant=WanVariantType.TI2V_5B, ...) + ``` +- Verify each `source` URL exists before merge. + +### DB migration + +- New `migration_NN.py` only if `BaseModelType` Enum constraint rejects unknown values. Inspect `migration_26.py` (Anima) for pattern. + +### Docs + +- Update `docs/` (model support tables, getting-started for Wan). + +--- + +## Risk Register + +| # | Risk / Unknown | Mitigation | +|---|---|---| +| 1 | `WanPipeline` Option A bypass — enough hooks? | Source confirms `WanTransformer3DModel.forward` is callable directly. Low risk. | +| 2 | Dual-expert VRAM blowup | Default RAM-resident + GPU-juggle on boundary. Low VRAM mode covers tighter cases. GGUF Q4 → ~7 GB/expert. Document expectations. | +| 3 | GGUF availability for both A14B experts | `city96/Wan2.2-T2V-A14B-gguf` advertises both. Verify before Phase 4. Mixed-quant denoise as fallback. | +| 4 | UMT5-XXL vs T5-XXL distinction | Strict probe via `model_type`. Separate `WanT5Encoder` type prevents cross-wiring. | +| 5 | Wan ControlNet ecosystem maturity | Phase 6 may slip — ship v1 without if Wan2.2-native models not ready, ControlNet as v2. | +| 6 | Single-frame inference is OOD | Empirically fine. Document as known property. | +| 7 | Boundary ratio variability | Read from `scheduler/scheduler_config.json::boundary_ratio` per-model. Default 0.875. | +| 8 | TI2V-5B's 48-channel VAE | Probe both 16/48 in `_is_wan_vae`. Denoise loop reads `z_dim` from VAE config, doesn't hardcode. | +| 9 | DB enum widening | Standard migration template (Anima's `migration_26.py`). Low risk. | +| 10 | Diffusers' modular `Wan22Blocks`/`WanModularPipeline` — use it? | No. Modular = extra moving part. Stick to `WanPipeline`/`WanTransformer3DModel`. | +| 11 | FE vitest tests for new base type | Mostly automatic via zod enum; audit `*.test.ts` mentioning `'anima'`. | +| 12 | Step preview latents for Wan | Reuse FLUX 16-channel matrix for A14B. TI2V-5B's 48-channel: degraded preview (slice 16) until proper RGB factors generated via `scripts/generate_vae_linear_approximation.py`. | + +--- + +## Recommended Working Cadence + +1. Phases 0 + 1 (TI2V-5B Diffusers MVP) — one PR, foundational, no user-visible features but unblocks everything. +2. Phase 2 (A14B dual-expert + Low VRAM mode) — second PR, first user-visible feature. +3. Phase 3 (standalone components) — third PR, parallelizable with Phase 2. +4. Phase 4 (GGUF) — fourth PR, the big VRAM win. +5. Phase 5 (LoRA) — fifth PR. +6. Phases 6, 7, 8 in parallel — small targeted PRs. +7. Phase 9 (FE) tracks each backend phase. +8. Phase 10 (starters) gates final release. + +Total: ~4–6 weeks focused work. Schedule risk concentrated on Phase 6 (ControlNet) and Phase 4 (GGUF arch verification). diff --git a/invokeai/app/api/dependencies.py b/invokeai/app/api/dependencies.py index e7468c1bca4..07713c61976 100644 --- a/invokeai/app/api/dependencies.py +++ b/invokeai/app/api/dependencies.py @@ -62,6 +62,7 @@ QwenImageConditioningInfo, SD3ConditioningInfo, SDXLConditioningInfo, + WanConditioningInfo, ZImageConditioningInfo, ) from invokeai.backend.util.logging import InvokeAILogger @@ -152,6 +153,7 @@ def initialize( ZImageConditioningInfo, QwenImageConditioningInfo, AnimaConditioningInfo, + WanConditioningInfo, ], ephemeral=True, ), diff --git a/invokeai/app/invocations/fields.py b/invokeai/app/invocations/fields.py index e53aeb417b2..f0ec3c9b5b4 100644 --- a/invokeai/app/invocations/fields.py +++ b/invokeai/app/invocations/fields.py @@ -173,6 +173,8 @@ class FieldDescriptions: z_image_model = "Z-Image model (Transformer) to load" qwen_image_model = "Qwen Image Edit model (Transformer) to load" qwen_vl_encoder = "Qwen2.5-VL tokenizer, processor and text/vision encoder" + wan_model = "Wan 2.2 model (Transformer) to load" + wan_t5_encoder = "UMT5-XXL tokenizer and text encoder for Wan 2.2" sdxl_main_model = "SDXL Main model (UNet, VAE, CLIP1, CLIP2) to load" sdxl_refiner_model = "SDXL Refiner Main Modde (UNet, VAE, CLIP2) to load" onnx_main_model = "ONNX Main model (UNet, VAE, CLIP) to load" @@ -364,6 +366,16 @@ class AnimaConditioningField(BaseModel): ) +class WanConditioningField(BaseModel): + """A Wan 2.2 conditioning tensor primitive value. + + Wan conditioning is the UMT5-XXL hidden state for the prompt plus an attention + mask marking valid (non-padding) tokens. + """ + + conditioning_name: str = Field(description="The name of conditioning tensor") + + class ConditioningField(BaseModel): """A conditioning tensor primitive value""" diff --git a/invokeai/app/invocations/model.py b/invokeai/app/invocations/model.py index 0c96cdb1d9d..c33d207fec4 100644 --- a/invokeai/app/invocations/model.py +++ b/invokeai/app/invocations/model.py @@ -87,6 +87,14 @@ class Qwen3EncoderField(BaseModel): loras: List[LoRAField] = Field(default_factory=list, description="LoRAs to apply on model loading") +class WanT5EncoderField(BaseModel): + """Field for the UMT5-XXL text encoder used by Wan 2.2 models.""" + + tokenizer: ModelIdentifierField = Field(description="Info to load tokenizer submodel") + text_encoder: ModelIdentifierField = Field(description="Info to load text_encoder submodel") + loras: List[LoRAField] = Field(default_factory=list, description="LoRAs to apply on model loading") + + class VAEField(BaseModel): vae: ModelIdentifierField = Field(description="Info to load vae submodel") seamless_axes: List[str] = Field(default_factory=list, description='Axes("x" and "y") to which apply seamless') @@ -101,6 +109,46 @@ class TransformerField(BaseModel): loras: List[LoRAField] = Field(description="LoRAs to apply on model loading") +class WanTransformerField(BaseModel): + """Transformer field for Wan 2.2 models. + + Wan 2.2 A14B is a Mixture-of-Experts model with two transformer experts: + a high-noise expert (active at large timesteps) and a low-noise expert + (active at small timesteps). TI2V-5B is a single-transformer model and only + populates ``transformer``. + + ``boundary_ratio`` matches Diffusers' ``WanPipeline`` semantics: it's the + boundary timestep as a fraction of ``num_train_timesteps`` (typically 1000), + so ``boundary_ratio=0.875`` means the high-noise expert handles t >= 875 and + the low-noise expert handles t < 875. + """ + + transformer: ModelIdentifierField = Field( + description="Primary transformer submodel. For A14B this is the high-noise expert." + ) + transformer_low_noise: ModelIdentifierField | None = Field( + default=None, + description="Low-noise transformer expert (Wan 2.2 A14B only). None for TI2V-5B.", + ) + loras: List[LoRAField] = Field( + default_factory=list, + description="LoRAs to apply to the primary transformer. For A14B applied to the high-noise expert.", + ) + loras_low_noise: List[LoRAField] = Field( + default_factory=list, + description="Optional separate LoRAs for the low-noise expert (Wan 2.2 A14B). " + "If empty and transformer_low_noise is set, the primary 'loras' list is reused.", + ) + boundary_ratio: float = Field( + default=0.875, + ge=0.0, + le=1.0, + description="Boundary timestep as a fraction of num_train_timesteps (Wan 2.2 A14B only). " + "High-noise expert: t >= boundary_ratio * num_train_timesteps. Low-noise expert: t below. " + "Ignored for TI2V-5B.", + ) + + @invocation_output("unet_output") class UNetOutput(BaseInvocationOutput): """Base class for invocations that output a UNet field.""" diff --git a/invokeai/app/invocations/primitives.py b/invokeai/app/invocations/primitives.py index 7ec6c3dc149..417790e6ea6 100644 --- a/invokeai/app/invocations/primitives.py +++ b/invokeai/app/invocations/primitives.py @@ -29,6 +29,7 @@ SD3ConditioningField, TensorField, UIComponent, + WanConditioningField, ZImageConditioningField, ) from invokeai.app.services.images.images_common import ImageDTO @@ -497,6 +498,17 @@ def build(cls, conditioning_name: str) -> "AnimaConditioningOutput": return cls(conditioning=AnimaConditioningField(conditioning_name=conditioning_name)) +@invocation_output("wan_conditioning_output") +class WanConditioningOutput(BaseInvocationOutput): + """Base class for nodes that output a Wan 2.2 text conditioning tensor.""" + + conditioning: WanConditioningField = OutputField(description=FieldDescriptions.cond) + + @classmethod + def build(cls, conditioning_name: str) -> "WanConditioningOutput": + return cls(conditioning=WanConditioningField(conditioning_name=conditioning_name)) + + @invocation_output("conditioning_output") class ConditioningOutput(BaseInvocationOutput): """Base class for nodes that output a single conditioning tensor""" diff --git a/invokeai/app/invocations/wan_denoise.py b/invokeai/app/invocations/wan_denoise.py new file mode 100644 index 00000000000..57c69675f3a --- /dev/null +++ b/invokeai/app/invocations/wan_denoise.py @@ -0,0 +1,437 @@ +"""Wan 2.2 denoise invocation. + +Supports both single-transformer (TI2V-5B) and dual-expert MoE (A14B) denoising. +For A14B the high-noise expert handles timesteps ``t >= boundary_timestep`` and +the low-noise expert handles ``t < boundary_timestep``, where +``boundary_timestep = boundary_ratio * num_train_timesteps`` (typically 1000). + +To keep VRAM usage manageable both experts are pinned in the model cache +(system RAM) but only one is GPU-resident at a time. The boundary is normally +crossed once per denoise, so the swap incurs a single CPU→GPU transfer. + +Phase 8 will add inpaint via :class:`RectifiedFlowInpaintExtension`. + +The transformer call signature mirrors Diffusers' ``WanPipeline``: + + transformer( + hidden_states=latents_5d, # [B, C, 1, H/s, W/s] + timestep=t.expand(B), # scheduler-time + encoder_hidden_states=prompt_embeds, # [B, seq_len, 4096] + attention_kwargs=None, + return_dict=False, + )[0] +""" + +from __future__ import annotations + +from contextlib import ExitStack +from pathlib import Path +from typing import Callable, Iterator, Optional + +import torch +import torchvision.transforms as tv_transforms +from torchvision.transforms.functional import resize as tv_resize +from tqdm import tqdm + +from invokeai.app.invocations.baseinvocation import BaseInvocation, Classification, invocation +from invokeai.app.invocations.fields import ( + DenoiseMaskField, + FieldDescriptions, + Input, + InputField, + LatentsField, + WanConditioningField, +) +from invokeai.app.invocations.model import WanTransformerField +from invokeai.app.invocations.primitives import LatentsOutput +from invokeai.app.services.shared.invocation_context import InvocationContext +from invokeai.backend.model_manager.taxonomy import BaseModelType, WanVariantType +from invokeai.backend.rectified_flow.rectified_flow_inpaint_extension import RectifiedFlowInpaintExtension +from invokeai.backend.stable_diffusion.diffusers_pipeline import PipelineIntermediateState +from invokeai.backend.stable_diffusion.diffusion.conditioning_data import WanConditioningInfo +from invokeai.backend.util.devices import TorchDevice +from invokeai.backend.wan.sampling_utils import get_spatial_scale_factor, make_noise + + +def _resolve_variant(context: InvocationContext, transformer_field: WanTransformerField) -> WanVariantType: + """Look up the Wan variant from the main model config that produced this transformer.""" + config = context.models.get_config(transformer_field.transformer) + variant = getattr(config, "variant", None) + if not isinstance(variant, WanVariantType): + raise ValueError( + f"Could not determine Wan variant from model {config.name!r}: variant is {variant!r}." + ) + return variant + + +def _scheduler_path_for_transformer(context: InvocationContext, transformer_field: WanTransformerField) -> Path | None: + """Return the on-disk ``scheduler/`` directory for the main model, or None.""" + config = context.models.get_config(transformer_field.transformer) + model_root = context.models.get_absolute_path(config) + if model_root.is_file(): + return None + candidate = model_root / "scheduler" + if (candidate / "scheduler_config.json").exists(): + return candidate + return None + + +class _ExpertSwapper: + """Manages GPU residency of one or two Wan transformer experts. + + Both experts are kept in the model cache (system RAM); only one is on + device at a time. ``get(label)`` returns the model for the requested label, + swapping GPU residency when the label changes. The first ``get`` call also + enters the underlying ``model_on_device`` context for the requested expert. + """ + + HIGH = "high" + LOW = "low" + + def __init__(self, high_info: Any, low_info: Any | None) -> None: + self._high_info = high_info + self._low_info = low_info + self._active_label: str | None = None + self._active_ctx: Any | None = None + self._active_model: Any | None = None + + def get(self, label: str) -> Any: + if label not in (self.HIGH, self.LOW): + raise ValueError(f"Unknown expert label: {label!r}") + if label == self.LOW and self._low_info is None: + raise ValueError("Low-noise expert was requested but is not available.") + if label == self._active_label: + assert self._active_model is not None + return self._active_model + + # Release current GPU residency before bringing the other expert on device. + self._release() + + info = self._high_info if label == self.HIGH else self._low_info + ctx = info.model_on_device() + _cached, model = ctx.__enter__() + self._active_label = label + self._active_ctx = ctx + self._active_model = model + return model + + def _release(self) -> None: + if self._active_ctx is not None: + self._active_ctx.__exit__(None, None, None) + self._active_label = None + self._active_ctx = None + self._active_model = None + + def close(self) -> None: + self._release() + + +@invocation( + "wan_denoise", + title="Denoise - Wan 2.2", + tags=["image", "wan"], + category="image", + version="1.0.0", + classification=Classification.Prototype, +) +class WanDenoiseInvocation(BaseInvocation): + """Run the denoising process with a Wan 2.2 model. + + Drives a flow-matching Euler schedule via Diffusers' + ``FlowMatchEulerDiscreteScheduler``. CFG is supported when negative + conditioning is provided and ``guidance_scale != 1.0``. + + For Wan 2.2 A14B the high-noise expert handles timesteps at and above + ``boundary_ratio * num_train_timesteps``; the low-noise expert handles + timesteps below. Both experts share the model cache; only the active one is + GPU-resident at any time. + """ + + transformer: WanTransformerField = InputField( + description="Wan transformer field (transformer + optional dual-expert metadata).", + input=Input.Connection, + title="Transformer", + ) + positive_conditioning: WanConditioningField = InputField( + description=FieldDescriptions.positive_cond, input=Input.Connection + ) + negative_conditioning: Optional[WanConditioningField] = InputField( + default=None, description=FieldDescriptions.negative_cond, input=Input.Connection + ) + + latents: Optional[LatentsField] = InputField( + default=None, + description=FieldDescriptions.latents, + input=Input.Connection, + ) + denoise_mask: Optional[DenoiseMaskField] = InputField( + default=None, + description=FieldDescriptions.denoise_mask, + input=Input.Connection, + ) + + denoising_start: float = InputField(default=0.0, ge=0, le=1, description=FieldDescriptions.denoising_start) + denoising_end: float = InputField(default=1.0, ge=0, le=1, description=FieldDescriptions.denoising_end) + add_noise: bool = InputField(default=True, description="Add noise based on denoising start.") + + guidance_scale: float = InputField( + default=4.0, + ge=1.0, + description="Classifier-free guidance scale. 4.0 is the Wan 2.2 default for A14B; " + "TI2V-5B can tolerate higher values up to ~5.5.", + title="Guidance Scale", + ) + guidance_scale_low_noise: Optional[float] = InputField( + default=None, + ge=1.0, + description="Optional separate CFG scale for the low-noise expert (Wan 2.2 A14B only). " + "If unset, the primary 'Guidance Scale' is reused. Ignored for TI2V-5B.", + title="Guidance Scale (Low Noise)", + ) + width: int = InputField(default=1024, multiple_of=8, description="Width of the generated image.") + height: int = InputField(default=1024, multiple_of=8, description="Height of the generated image.") + steps: int = InputField(default=40, gt=0, description="Number of denoising steps.") + seed: int = InputField(default=0, description="Randomness seed for reproducibility.") + + @torch.no_grad() + def invoke(self, context: InvocationContext) -> LatentsOutput: + latents = self._run_diffusion(context) + latents = latents.detach().to("cpu") + name = context.tensors.save(tensor=latents) + return LatentsOutput.build(latents_name=name, latents=latents, seed=None) + + def _run_diffusion(self, context: InvocationContext) -> torch.Tensor: + if self.denoising_start >= self.denoising_end: + raise ValueError( + f"denoising_start ({self.denoising_start}) must be less than denoising_end ({self.denoising_end})." + ) + + device = TorchDevice.choose_torch_device() + inference_dtype = TorchDevice.choose_bfloat16_safe_dtype(device) + + variant = _resolve_variant(context, self.transformer) + spatial_scale = get_spatial_scale_factor(variant) + + scheduler = self._build_scheduler(context, device) + + pos_cond = self._load_conditioning( + context, self.positive_conditioning, device=device, dtype=inference_dtype + ) + do_cfg = self.guidance_scale != 1.0 and self.negative_conditioning is not None + neg_cond: WanConditioningInfo | None = None + if do_cfg: + assert self.negative_conditioning is not None + neg_cond = self._load_conditioning( + context, self.negative_conditioning, device=device, dtype=inference_dtype + ) + + # Schedule timesteps. set_timesteps populates scheduler.timesteps and + # scheduler.sigmas (where sigmas is in [0, 1] flow-matching space). + scheduler.set_timesteps(num_inference_steps=self.steps, device=device) + timesteps = scheduler.timesteps + # sigmas has length steps + 1. + sigmas = scheduler.sigmas + + # Apply denoising_start / denoising_end clipping. + if self.denoising_start > 0 or self.denoising_end < 1: + start_idx = int(self.denoising_start * self.steps) + end_idx = int(self.denoising_end * self.steps) + timesteps = timesteps[start_idx:end_idx] + sigmas = sigmas[start_idx : end_idx + 1] + total_steps = len(timesteps) + + # Load init latents (img2img) and convert 4D → 5D. + init_latents_5d: torch.Tensor | None = None + if self.latents is not None: + loaded = context.tensors.load(self.latents.latents_name).to(device=device, dtype=inference_dtype) + if loaded.ndim == 4: + loaded = loaded.unsqueeze(2) + init_latents_5d = loaded + + # Determine the latent channel count. Prefer init_latents shape; otherwise + # fall back to the variant default. (We avoid loading the transformer just + # to read .config.in_channels; the variant gives us the right answer.) + latent_channels = ( + init_latents_5d.shape[1] + if init_latents_5d is not None + else (48 if variant == WanVariantType.TI2V_5B else 16) + ) + + noise = make_noise( + batch_size=1, + latent_channels=latent_channels, + height=self.height, + width=self.width, + spatial_scale_factor=spatial_scale, + device=device, + dtype=inference_dtype, + seed=self.seed, + ) + + # Combine init latents + noise per the schedule's starting sigma. + if init_latents_5d is not None: + if self.add_noise: + s_0 = float(sigmas[0]) + latents = s_0 * noise + (1.0 - s_0) * init_latents_5d + else: + latents = init_latents_5d + else: + if self.denoising_start > 1e-5: + raise ValueError("denoising_start should be 0 when initial latents are not provided.") + latents = noise + + if total_steps <= 0: + return latents.squeeze(2) + + # Inpaint extension (4D space — the existing extension is shape-agnostic + # but operates on the squeezed-T shape we use for masks). + inpaint_mask = self._prep_inpaint_mask(context, latents.squeeze(2)) + inpaint_extension: RectifiedFlowInpaintExtension | None = None + if inpaint_mask is not None: + if init_latents_5d is None: + raise ValueError("Initial latents are required when using an inpaint mask (img2img inpainting).") + inpaint_extension = RectifiedFlowInpaintExtension( + init_latents=init_latents_5d.squeeze(2), + inpaint_mask=inpaint_mask, + noise=noise.squeeze(2), + ) + + step_callback = self._build_step_callback(context) + + # Resolve experts and the boundary timestep that triggers the MoE swap. + high_info = context.models.load(self.transformer.transformer) + low_info = ( + context.models.load(self.transformer.transformer_low_noise) + if self.transformer.transformer_low_noise is not None + else None + ) + # FlowMatchEulerDiscreteScheduler stores num_train_timesteps in its config + # (default 1000). Diffusers' WanPipeline computes: + # boundary_timestep = boundary_ratio * num_train_timesteps + num_train_timesteps = int(scheduler.config.num_train_timesteps) + boundary_timestep = ( + self.transformer.boundary_ratio * num_train_timesteps if low_info is not None else None + ) + + with ExitStack() as exit_stack: + swapper = _ExpertSwapper(high_info, low_info) + exit_stack.callback(swapper.close) + + for step_idx, t in enumerate(tqdm(timesteps, desc="Denoising (Wan 2.2)", total=total_steps)): + timestep = t.expand(latents.shape[0]) + + # Pick the active expert: high-noise for t >= boundary_timestep, + # low-noise below. Single-transformer models always use HIGH. + if low_info is not None and float(t) < float(boundary_timestep): + active_label = _ExpertSwapper.LOW + active_cfg = ( + self.guidance_scale_low_noise + if self.guidance_scale_low_noise is not None + else self.guidance_scale + ) + else: + active_label = _ExpertSwapper.HIGH + active_cfg = self.guidance_scale + + transformer = swapper.get(active_label) + + noise_pred_cond = transformer( + hidden_states=latents, + timestep=timestep, + encoder_hidden_states=pos_cond.prompt_embeds.unsqueeze(0), + attention_kwargs=None, + return_dict=False, + )[0] + + if do_cfg and neg_cond is not None: + noise_pred_uncond = transformer( + hidden_states=latents, + timestep=timestep, + encoder_hidden_states=neg_cond.prompt_embeds.unsqueeze(0), + attention_kwargs=None, + return_dict=False, + )[0] + noise_pred = noise_pred_uncond + active_cfg * (noise_pred_cond - noise_pred_uncond) + else: + noise_pred = noise_pred_cond + + latents = scheduler.step(noise_pred, t, latents, return_dict=False)[0] + + if inpaint_extension is not None: + sigma_prev = float(sigmas[step_idx + 1]) + latents_4d = latents.squeeze(2) + latents_4d = inpaint_extension.merge_intermediate_latents_with_init_latents( + latents_4d, sigma_prev + ) + latents = latents_4d.unsqueeze(2) + + step_callback( + PipelineIntermediateState( + step=step_idx + 1, + order=1, + total_steps=total_steps, + timestep=int(t.item()), + latents=latents.squeeze(2), + ) + ) + + # Squeeze T for downstream 4D consumers. + return latents.squeeze(2) + + def _build_scheduler(self, context: InvocationContext, device: torch.device): + """Construct ``FlowMatchEulerDiscreteScheduler`` for this run. + + Loads the model's on-disk scheduler config when available so per-model + ``shift`` settings are honoured; falls back to defaults otherwise. + """ + from diffusers import FlowMatchEulerDiscreteScheduler + + scheduler_dir = _scheduler_path_for_transformer(context, self.transformer) + if scheduler_dir is not None: + return FlowMatchEulerDiscreteScheduler.from_pretrained( + str(scheduler_dir), local_files_only=True + ) + return FlowMatchEulerDiscreteScheduler() + + def _load_conditioning( + self, + context: InvocationContext, + cond_field: WanConditioningField, + *, + device: torch.device, + dtype: torch.dtype, + ) -> WanConditioningInfo: + cond_data = context.conditioning.load(cond_field.conditioning_name) + assert len(cond_data.conditionings) == 1 + cond_info = cond_data.conditionings[0] + assert isinstance(cond_info, WanConditioningInfo) + return cond_info.to(device=device, dtype=dtype) + + def _prep_inpaint_mask(self, context: InvocationContext, latents_4d: torch.Tensor) -> torch.Tensor | None: + """Resize the user-supplied mask down to latent resolution. + + Convention matches Anima/FLUX: the original mask has 0 = preserve and + 1 = denoise; the extension expects the inverted form. + """ + if self.denoise_mask is None: + return None + mask = context.tensors.load(self.denoise_mask.mask_name) + mask = 1.0 - mask + _, _, latent_h, latent_w = latents_4d.shape + mask = tv_resize( + img=mask, + size=[latent_h, latent_w], + interpolation=tv_transforms.InterpolationMode.BILINEAR, + antialias=False, + ) + return mask.to(device=latents_4d.device, dtype=latents_4d.dtype) + + def _build_step_callback(self, context: InvocationContext) -> Callable[[PipelineIntermediateState], None]: + def step_callback(state: PipelineIntermediateState) -> None: + context.util.sd_step_callback(state, BaseModelType.Wan) + + return step_callback + + def _lora_iterator(self, context: InvocationContext) -> Iterator: + # Phase 5 will populate this with the actual LoRA application path. + return iter([]) diff --git a/invokeai/app/invocations/wan_image_to_latents.py b/invokeai/app/invocations/wan_image_to_latents.py new file mode 100644 index 00000000000..7903f650e66 --- /dev/null +++ b/invokeai/app/invocations/wan_image_to_latents.py @@ -0,0 +1,106 @@ +"""Wan 2.2 image-to-latents invocation. + +Encodes an image to latent space using the Wan VAE (AutoencoderKLWan). The Wan +VAE expects 5D ``[B, C, T, H, W]`` input with ``T=1`` for single images. After +encoding, latents are normalised against the per-channel ``latents_mean`` and +``latents_std`` stored in the VAE config — this matches the Diffusers +``WanPipeline`` reference and is the inverse of the denormalisation in +``wan_latents_to_image.py``. +""" + +import einops +import torch +from diffusers.models.autoencoders import AutoencoderKLWan + +from invokeai.app.invocations.baseinvocation import BaseInvocation, Classification, invocation +from invokeai.app.invocations.fields import ( + FieldDescriptions, + ImageField, + Input, + InputField, + WithBoard, + WithMetadata, +) +from invokeai.app.invocations.model import VAEField +from invokeai.app.invocations.primitives import LatentsOutput +from invokeai.app.services.shared.invocation_context import InvocationContext +from invokeai.backend.model_manager.load.load_base import LoadedModel +from invokeai.backend.stable_diffusion.diffusers_pipeline import image_resized_to_grid_as_tensor +from invokeai.backend.util.devices import TorchDevice +from invokeai.backend.util.vae_working_memory import estimate_vae_working_memory_flux + + +@invocation( + "wan_i2l", + title="Image to Latents - Wan 2.2", + tags=["image", "latents", "vae", "i2l", "wan"], + category="image", + version="1.0.0", + classification=Classification.Prototype, +) +class WanImageToLatentsInvocation(BaseInvocation, WithMetadata, WithBoard): + """Encodes an image with the Wan VAE (AutoencoderKLWan). + + The output latents have the temporal dimension squeezed out, so downstream + nodes see 4D ``[B, C, H, W]``. The denoise loop re-adds ``T=1`` before + feeding the transformer. + """ + + image: ImageField = InputField(description="The image to encode.") + vae: VAEField = InputField(description=FieldDescriptions.vae, input=Input.Connection) + + @staticmethod + def vae_encode(vae_info: LoadedModel, image_tensor: torch.Tensor) -> torch.Tensor: + if not isinstance(vae_info.model, AutoencoderKLWan): + raise TypeError( + f"Expected AutoencoderKLWan for Wan VAE, got {type(vae_info.model).__name__}." + ) + + estimated_working_memory = estimate_vae_working_memory_flux( + operation="encode", + image_tensor=image_tensor, + vae=vae_info.model, + ) + + with vae_info.model_on_device(working_mem_bytes=estimated_working_memory) as (_, vae): + assert isinstance(vae, AutoencoderKLWan) + + vae_dtype = next(iter(vae.parameters())).dtype + image_tensor = image_tensor.to(device=TorchDevice.choose_torch_device(), dtype=vae_dtype) + + with torch.inference_mode(): + # Wan VAE expects 5D [B, C, T, H, W]. + if image_tensor.ndim == 4: + image_tensor = image_tensor.unsqueeze(2) # [B, C, H, W] -> [B, C, 1, H, W] + + encoded = vae.encode(image_tensor, return_dict=False)[0] + latents = encoded.sample().to(dtype=vae_dtype) + + # Normalise to the denoiser's expected zero-centred space: + # (latents - mean) / std + latents_mean = torch.tensor(vae.config.latents_mean).view(1, -1, 1, 1, 1).to(latents) + latents_std = torch.tensor(vae.config.latents_std).view(1, -1, 1, 1, 1).to(latents) + latents = (latents - latents_mean) / latents_std + + # Drop the temporal dim to keep the rest of the InvokeAI pipeline 4D. + if latents.ndim == 5: + latents = latents.squeeze(2) + + return latents + + @torch.no_grad() + def invoke(self, context: InvocationContext) -> LatentsOutput: + image = context.images.get_pil(self.image.image_name) + + image_tensor = image_resized_to_grid_as_tensor(image.convert("RGB")) + if image_tensor.dim() == 3: + image_tensor = einops.rearrange(image_tensor, "c h w -> 1 c h w") + + vae_info = context.models.load(self.vae.vae) + + context.util.signal_progress("Running Wan VAE encode") + latents = self.vae_encode(vae_info=vae_info, image_tensor=image_tensor) + + latents = latents.to("cpu") + name = context.tensors.save(tensor=latents) + return LatentsOutput.build(latents_name=name, latents=latents, seed=None) diff --git a/invokeai/app/invocations/wan_latents_to_image.py b/invokeai/app/invocations/wan_latents_to_image.py new file mode 100644 index 00000000000..866d1cc8abc --- /dev/null +++ b/invokeai/app/invocations/wan_latents_to_image.py @@ -0,0 +1,95 @@ +"""Wan 2.2 latents-to-image invocation. + +Decodes Wan latents using the Wan VAE (AutoencoderKLWan). + +Latents from the denoise loop are in normalised space (zero-centred). Before +VAE decode they are denormalised using the VAE config's per-channel +``latents_mean`` / ``latents_std`` (matching Diffusers ``WanPipeline``). + +The VAE expects 5D ``[B, C, T, H, W]``; downstream nodes work with 4D, so this +node re-adds ``T=1`` before decode and squeezes it back out afterwards. +""" + +import torch +from diffusers.models.autoencoders import AutoencoderKLWan +from einops import rearrange +from PIL import Image + +from invokeai.app.invocations.baseinvocation import BaseInvocation, Classification, invocation +from invokeai.app.invocations.fields import ( + FieldDescriptions, + Input, + InputField, + LatentsField, + WithBoard, + WithMetadata, +) +from invokeai.app.invocations.model import VAEField +from invokeai.app.invocations.primitives import ImageOutput +from invokeai.app.services.shared.invocation_context import InvocationContext +from invokeai.backend.util.devices import TorchDevice +from invokeai.backend.util.vae_working_memory import estimate_vae_working_memory_flux + + +@invocation( + "wan_l2i", + title="Latents to Image - Wan 2.2", + tags=["latents", "image", "vae", "l2i", "wan"], + category="latents", + version="1.0.0", + classification=Classification.Prototype, +) +class WanLatentsToImageInvocation(BaseInvocation, WithMetadata, WithBoard): + """Decodes Wan latents back to RGB.""" + + latents: LatentsField = InputField(description=FieldDescriptions.latents, input=Input.Connection) + vae: VAEField = InputField(description=FieldDescriptions.vae, input=Input.Connection) + + @torch.no_grad() + def invoke(self, context: InvocationContext) -> ImageOutput: + latents = context.tensors.load(self.latents.latents_name) + + vae_info = context.models.load(self.vae.vae) + if not isinstance(vae_info.model, AutoencoderKLWan): + raise TypeError( + f"Expected AutoencoderKLWan for Wan VAE, got {type(vae_info.model).__name__}." + ) + + estimated_working_memory = estimate_vae_working_memory_flux( + operation="decode", + image_tensor=latents, + vae=vae_info.model, + ) + + with vae_info.model_on_device(working_mem_bytes=estimated_working_memory) as (_, vae): + context.util.signal_progress("Running Wan VAE decode") + assert isinstance(vae, AutoencoderKLWan) + + vae_dtype = next(iter(vae.parameters())).dtype + latents = latents.to(device=TorchDevice.choose_torch_device(), dtype=vae_dtype) + + TorchDevice.empty_cache() + + with torch.inference_mode(): + # Re-add the temporal dim if upstream squeezed it out. + if latents.ndim == 4: + latents = latents.unsqueeze(2) + + # Denormalise from denoiser space back to raw VAE space. + latents_mean = torch.tensor(vae.config.latents_mean).view(1, -1, 1, 1, 1).to(latents) + latents_std = torch.tensor(vae.config.latents_std).view(1, -1, 1, 1, 1).to(latents) + latents = latents * latents_std + latents_mean + + decoded = vae.decode(latents, return_dict=False)[0] + + if decoded.ndim == 5: + decoded = decoded.squeeze(2) + + img = decoded.clamp(-1, 1) + img = rearrange(img[0], "c h w -> h w c") + img_pil = Image.fromarray((127.5 * (img + 1.0)).byte().cpu().numpy()) + + TorchDevice.empty_cache() + + image_dto = context.images.save(image=img_pil) + return ImageOutput.build(image_dto) diff --git a/invokeai/app/invocations/wan_model_loader.py b/invokeai/app/invocations/wan_model_loader.py new file mode 100644 index 00000000000..48d049ceb85 --- /dev/null +++ b/invokeai/app/invocations/wan_model_loader.py @@ -0,0 +1,125 @@ +from typing import Optional + +from invokeai.app.invocations.baseinvocation import ( + BaseInvocation, + BaseInvocationOutput, + Classification, + invocation, + invocation_output, +) +from invokeai.app.invocations.fields import FieldDescriptions, Input, InputField, OutputField +from invokeai.app.invocations.model import ( + ModelIdentifierField, + VAEField, + WanT5EncoderField, + WanTransformerField, +) +from invokeai.app.services.shared.invocation_context import InvocationContext +from invokeai.backend.model_manager.taxonomy import BaseModelType, ModelFormat, ModelType, SubModelType + + +@invocation_output("wan_model_loader_output") +class WanModelLoaderOutput(BaseInvocationOutput): + """Wan 2.2 model loader output.""" + + transformer: WanTransformerField = OutputField( + description="Wan transformer (one or two experts depending on the variant)", + title="Transformer", + ) + wan_t5_encoder: WanT5EncoderField = OutputField( + description=FieldDescriptions.wan_t5_encoder, + title="UMT5-XXL Encoder", + ) + vae: VAEField = OutputField(description=FieldDescriptions.vae, title="VAE") + + +@invocation( + "wan_model_loader", + title="Main Model - Wan 2.2", + tags=["model", "wan"], + category="model", + version="1.0.0", + classification=Classification.Prototype, +) +class WanModelLoaderInvocation(BaseInvocation): + """Loads a Wan 2.2 model, outputting its submodels. + + Diffusers-format only for now; the transformer(s), VAE, and UMT5-XXL encoder + are pulled from the main model's submodel folders. + + For Wan 2.2 A14B (dual-expert MoE) the loader emits both ``transformer`` (the + high-noise expert at ``transformer/``) and ``transformer_low_noise`` (the + low-noise expert at ``transformer_2/``), along with the model's recorded + ``boundary_ratio`` for the denoise loop's expert swap. + + The standalone VAE picker is forward-compatibility wiring for Phase 3 (where + it becomes required for GGUF transformers). + """ + + model: ModelIdentifierField = InputField( + description=FieldDescriptions.wan_model, + input=Input.Direct, + ui_model_base=BaseModelType.Wan, + ui_model_type=ModelType.Main, + title="Transformer", + ) + + vae_model: Optional[ModelIdentifierField] = InputField( + default=None, + description="Standalone Wan VAE model. If not set, the VAE is loaded from the main " + "model (when in Diffusers format).", + input=Input.Direct, + ui_model_base=BaseModelType.Wan, + ui_model_type=ModelType.VAE, + title="VAE", + ) + + def invoke(self, context: InvocationContext) -> WanModelLoaderOutput: + main_config = context.models.get_config(self.model) + main_is_diffusers = main_config.format == ModelFormat.Diffusers + + # Primary transformer: the high-noise expert for A14B, or the only + # transformer for TI2V-5B. + transformer = self.model.model_copy(update={"submodel_type": SubModelType.Transformer}) + + # Dual-expert (A14B) wiring. The probe records ``has_dual_expert`` and + # the recorded ``boundary_ratio`` from model_index.json on the config. + transformer_low_noise = None + boundary_ratio = 0.875 # Sensible Wan A14B default; overridden by model config when present. + if getattr(main_config, "has_dual_expert", False): + transformer_low_noise = self.model.model_copy(update={"submodel_type": SubModelType.Transformer2}) + recorded = getattr(main_config, "boundary_ratio", None) + if recorded is not None: + boundary_ratio = float(recorded) + + # VAE: standalone override > main (if Diffusers). + if self.vae_model is not None: + vae = self.vae_model.model_copy(update={"submodel_type": SubModelType.VAE}) + elif main_is_diffusers: + vae = self.model.model_copy(update={"submodel_type": SubModelType.VAE}) + else: + raise ValueError( + "No source for VAE. Either set 'VAE' to a standalone Wan VAE model, " + "or use a Diffusers Wan main model." + ) + + # Tokenizer + text encoder: only from the main model in Phase 1. + # Phase 3 will add a standalone WanT5Encoder picker so GGUF mains can run + # without a Diffusers Wan checkpoint installed. + if not main_is_diffusers: + raise ValueError( + "Only Diffusers-format Wan models are supported in this build. " + "Standalone Wan T5 encoders will be supported in a future release." + ) + tokenizer = self.model.model_copy(update={"submodel_type": SubModelType.Tokenizer}) + text_encoder = self.model.model_copy(update={"submodel_type": SubModelType.TextEncoder}) + + return WanModelLoaderOutput( + transformer=WanTransformerField( + transformer=transformer, + transformer_low_noise=transformer_low_noise, + boundary_ratio=boundary_ratio, + ), + wan_t5_encoder=WanT5EncoderField(tokenizer=tokenizer, text_encoder=text_encoder), + vae=VAEField(vae=vae), + ) diff --git a/invokeai/app/invocations/wan_text_encoder.py b/invokeai/app/invocations/wan_text_encoder.py new file mode 100644 index 00000000000..183195f5a21 --- /dev/null +++ b/invokeai/app/invocations/wan_text_encoder.py @@ -0,0 +1,111 @@ +import torch + +from invokeai.app.invocations.baseinvocation import BaseInvocation, Classification, invocation +from invokeai.app.invocations.fields import ( + FieldDescriptions, + Input, + InputField, + UIComponent, +) +from invokeai.app.invocations.model import WanT5EncoderField +from invokeai.app.invocations.primitives import WanConditioningOutput +from invokeai.app.services.shared.invocation_context import InvocationContext +from invokeai.backend.model_manager.load.model_cache.utils import get_effective_device +from invokeai.backend.stable_diffusion.diffusion.conditioning_data import ( + ConditioningFieldData, + WanConditioningInfo, +) + +# Matches the diffusers WanPipeline default — Wan was trained at this prompt length. +WAN_T5_MAX_SEQ_LEN = 226 + + +@invocation( + "wan_text_encoder", + title="Prompt - Wan 2.2", + tags=["prompt", "conditioning", "wan"], + category="conditioning", + version="1.0.0", + classification=Classification.Prototype, +) +class WanTextEncoderInvocation(BaseInvocation): + """Encodes a text prompt for Wan 2.2 using the UMT5-XXL encoder. + + Output is the encoder's last hidden state (shape: [seq_len=226, 4096]) plus + an attention mask marking valid (non-padding) tokens. The Wan transformer + consumes these directly as ``encoder_hidden_states``. + """ + + prompt: str = InputField(description="Text prompt for Wan 2.2.", ui_component=UIComponent.Textarea) + wan_t5_encoder: WanT5EncoderField = InputField( + title="UMT5-XXL Encoder", + description=FieldDescriptions.wan_t5_encoder, + input=Input.Connection, + ) + + @torch.no_grad() + def invoke(self, context: InvocationContext) -> WanConditioningOutput: + prompt_embeds, attention_mask = self._encode(context) + + # Persist on CPU; the denoise loop will move to device as needed. + prompt_embeds = prompt_embeds.detach().to("cpu") + attention_mask = attention_mask.detach().to("cpu") if attention_mask is not None else None + + conditioning_data = ConditioningFieldData( + conditionings=[ + WanConditioningInfo(prompt_embeds=prompt_embeds, prompt_attention_mask=attention_mask) + ] + ) + conditioning_name = context.conditioning.save(conditioning_data) + return WanConditioningOutput.build(conditioning_name) + + def _encode(self, context: InvocationContext) -> tuple[torch.Tensor, torch.Tensor | None]: + from diffusers.pipelines.wan.pipeline_wan import prompt_clean + from transformers import AutoTokenizer, UMT5EncoderModel + + cleaned = prompt_clean(self.prompt) + + # The tokenizer is small enough to load directly from disk without going + # through the model cache. + tokenizer_config = context.models.get_config(self.wan_t5_encoder.tokenizer) + tokenizer_path = context.models.get_absolute_path(tokenizer_config) + tokenizer = AutoTokenizer.from_pretrained(str(tokenizer_path), local_files_only=True) + + text_inputs = tokenizer( + [cleaned], + padding="max_length", + max_length=WAN_T5_MAX_SEQ_LEN, + truncation=True, + add_special_tokens=True, + return_attention_mask=True, + return_tensors="pt", + ) + + # Load the text encoder via the model cache. + text_encoder_info = context.models.load(self.wan_t5_encoder.text_encoder) + with text_encoder_info.model_on_device() as (_, text_encoder): + assert isinstance(text_encoder, UMT5EncoderModel) + device = get_effective_device(text_encoder) + + input_ids = text_inputs.input_ids.to(device) + attention_mask = text_inputs.attention_mask.to(device) + + context.util.signal_progress("Running UMT5-XXL text encoder") + outputs = text_encoder(input_ids, attention_mask) + # Drop the batch dim (we always encode one prompt at a time). + prompt_embeds = outputs.last_hidden_state.squeeze(0) + attention_mask_out = attention_mask.squeeze(0) + + # Match the Diffusers reference: zero out the embeddings past the valid + # token count so the transformer sees clean padding. + valid_len = int(attention_mask_out.sum().item()) + if valid_len < prompt_embeds.shape[0]: + prompt_embeds = prompt_embeds.clone() + prompt_embeds[valid_len:] = 0 + + # If every token is valid we don't need the mask downstream. + mask_out: torch.Tensor | None = attention_mask_out + if attention_mask_out.all(): + mask_out = None + + return prompt_embeds.to(dtype=torch.bfloat16), mask_out diff --git a/invokeai/app/util/step_callback.py b/invokeai/app/util/step_callback.py index 08dc9a2265c..cb6e7cdf1ce 100644 --- a/invokeai/app/util/step_callback.py +++ b/invokeai/app/util/step_callback.py @@ -179,6 +179,31 @@ ANIMA_LATENT_RGB_BIAS = [-0.1835, -0.0868, -0.3360] +# Wan 2.2 A14B uses the standard 16-channel Wan VAE (same as Anima / Qwen Image). +# Wan 2.2 TI2V-5B uses Wan2.2-VAE with 48 latent channels — for now we slice the +# first 16 channels for the preview. TODO: generate dedicated 48-channel factors via +# scripts/generate_vae_linear_approximation.py once we have a TI2V-5B model on hand. +WAN_LATENT_RGB_FACTORS = [ + [-0.1299, -0.1692, 0.2932], + [0.0671, 0.0406, 0.0442], + [0.3568, 0.2548, 0.1747], + [0.0372, 0.2344, 0.1420], + [0.0313, 0.0189, -0.0328], + [0.0296, -0.0956, -0.0665], + [-0.3477, -0.4059, -0.2925], + [0.0166, 0.1902, 0.1975], + [-0.0412, 0.0267, -0.1364], + [-0.1293, 0.0740, 0.1636], + [0.0680, 0.3019, 0.1128], + [0.0032, 0.0581, 0.0639], + [-0.1251, 0.0927, 0.1699], + [0.0060, -0.0633, 0.0005], + [0.3477, 0.2275, 0.2950], + [0.1984, 0.0913, 0.1861], +] + +WAN_LATENT_RGB_BIAS = [-0.1835, -0.0868, -0.3360] + def sample_to_lowres_estimated_image( samples: torch.Tensor, @@ -270,6 +295,13 @@ def diffusion_step_callback( # Anima uses Wan 2.1 VAE with 16 latent channels latent_rgb_factors = ANIMA_LATENT_RGB_FACTORS latent_rgb_bias = ANIMA_LATENT_RGB_BIAS + elif base_model == BaseModelType.Wan: + latent_rgb_factors = WAN_LATENT_RGB_FACTORS + latent_rgb_bias = WAN_LATENT_RGB_BIAS + # TI2V-5B latents have 48 channels; slice the first 16 as a degraded preview + # until proper 48-channel factors are generated. + if sample.shape[-3] > 16: + sample = sample[..., :16, :, :] else: raise ValueError(f"Unsupported base model: {base_model}") diff --git a/invokeai/backend/model_manager/configs/factory.py b/invokeai/backend/model_manager/configs/factory.py index 985cb982d30..cae7c421f4d 100644 --- a/invokeai/backend/model_manager/configs/factory.py +++ b/invokeai/backend/model_manager/configs/factory.py @@ -78,6 +78,7 @@ Main_Diffusers_SD3_Config, Main_Diffusers_SDXL_Config, Main_Diffusers_SDXLRefiner_Config, + Main_Diffusers_Wan_Config, Main_Diffusers_ZImage_Config, Main_GGUF_Flux2_Config, Main_GGUF_FLUX_Config, @@ -173,6 +174,7 @@ Annotated[Main_Diffusers_Flux2_Config, Main_Diffusers_Flux2_Config.get_tag()], Annotated[Main_Diffusers_CogView4_Config, Main_Diffusers_CogView4_Config.get_tag()], Annotated[Main_Diffusers_QwenImage_Config, Main_Diffusers_QwenImage_Config.get_tag()], + Annotated[Main_Diffusers_Wan_Config, Main_Diffusers_Wan_Config.get_tag()], Annotated[Main_Diffusers_ZImage_Config, Main_Diffusers_ZImage_Config.get_tag()], # Main (Pipeline) - checkpoint format # IMPORTANT: FLUX.2 must be checked BEFORE FLUX.1 because FLUX.2 has specific validation diff --git a/invokeai/backend/model_manager/configs/main.py b/invokeai/backend/model_manager/configs/main.py index 2d002d68dc5..324e014dd10 100644 --- a/invokeai/backend/model_manager/configs/main.py +++ b/invokeai/backend/model_manager/configs/main.py @@ -31,6 +31,7 @@ QwenImageVariantType, SchedulerPredictionType, SubModelType, + WanVariantType, ZImageVariantType, ) from invokeai.backend.quantization.gguf.ggml_tensor import GGMLTensor @@ -59,7 +60,12 @@ class MainModelDefaultSettings(BaseModel): def from_base( cls, base: BaseModelType, - variant: Flux2VariantType | FluxVariantType | ModelVariantType | ZImageVariantType | None = None, + variant: Flux2VariantType + | FluxVariantType + | ModelVariantType + | WanVariantType + | ZImageVariantType + | None = None, ) -> Self | None: match base: case BaseModelType.StableDiffusion1: @@ -89,6 +95,12 @@ def from_base( return cls(steps=4, cfg_scale=1.0, width=1024, height=1024) case BaseModelType.QwenImage: return cls(steps=40, cfg_scale=4.0, width=1024, height=1024) + case BaseModelType.Wan: + # Wan 2.2 recommended defaults differ by variant. + if variant == WanVariantType.TI2V_5B: + return cls(steps=30, cfg_scale=5.0, width=1024, height=1024) + # Default to A14B settings (also used when variant is unknown). + return cls(steps=40, cfg_scale=4.0, width=1024, height=1024) case _: # TODO(psyche): Do we want defaults for other base types? return None @@ -1379,6 +1391,106 @@ def from_model_on_disk(cls, mod: ModelOnDisk, override_fields: dict[str, Any]) - return cls(**override_fields, variant=explicit_variant) +class Main_Diffusers_Wan_Config(Diffusers_Config_Base, Main_Config_Base, Config_Base): + """Model config for Wan 2.2 diffusers models. + + Covers both the dual-expert T2V-A14B family and the single-transformer TI2V-5B + family. Variant is detected from the on-disk transformer config (latent channel + count) plus the presence of a sibling ``transformer_2/`` directory. + """ + + base: Literal[BaseModelType.Wan] = Field(default=BaseModelType.Wan) + variant: WanVariantType = Field() + has_dual_expert: bool = Field( + default=False, + description="Whether this model ships two transformer experts (Wan 2.2 A14B MoE). False for TI2V-5B.", + ) + boundary_ratio: float | None = Field( + default=None, + description="MoE expert switch point as a fraction of num_train_timesteps (typically 1000). " + "None for single-transformer models. Read from model_index.json by Diffusers' WanPipeline.", + ) + + @classmethod + def from_model_on_disk(cls, mod: ModelOnDisk, override_fields: dict[str, Any]) -> Self: + raise_if_not_dir(mod) + + raise_for_override_fields(cls, override_fields) + + # Wan repos ship with WanPipeline (T2V) or WanImageToVideoPipeline (I2V/TI2V). + # Either class name is sufficient to identify a Wan diffusers model. + raise_for_class_name( + common_config_paths(mod.path), + { + "WanPipeline", + "WanImageToVideoPipeline", + }, + ) + + repo_variant = override_fields.pop("repo_variant", None) or cls._get_repo_variant_or_raise(mod) + + explicit_variant = override_fields.pop("variant", None) + has_dual_expert = (mod.path / "transformer_2" / "config.json").exists() + variant = explicit_variant or cls._detect_wan_variant(mod, has_dual_expert) + boundary_ratio = override_fields.pop("boundary_ratio", None) + if boundary_ratio is None: + boundary_ratio = cls._read_boundary_ratio(mod) + + return cls( + **override_fields, + repo_variant=repo_variant, + variant=variant, + has_dual_expert=has_dual_expert, + boundary_ratio=boundary_ratio, + ) + + @classmethod + def _read_boundary_ratio(cls, mod: ModelOnDisk) -> float | None: + """Pull ``boundary_ratio`` from ``model_index.json`` if present. + + Diffusers' ``WanPipeline.__init__`` registers it via ``register_to_config``, + which persists it as a top-level key in the saved pipeline config. + """ + try: + model_index = get_config_dict_or_raise(mod.path / "model_index.json") + except NotAMatchError: + return None + value = model_index.get("boundary_ratio") + if value is None: + return None + try: + return float(value) + except (TypeError, ValueError): + return None + + @classmethod + def _detect_wan_variant(cls, mod: ModelOnDisk, has_dual_expert: bool) -> WanVariantType: + """Detect Wan variant from transformer + VAE config. + + - A14B: dual transformer experts, standard Wan VAE (z_dim=16). + - TI2V-5B: single transformer, Wan2.2-VAE (z_dim=48). + """ + if has_dual_expert: + return WanVariantType.T2V_A14B + + # Single-transformer model: distinguish TI2V-5B from any future single-expert + # A14B-derived release by inspecting the VAE latent dimension. + try: + vae_config = get_config_dict_or_raise(mod.path / "vae" / "config.json") + z_dim = vae_config.get("z_dim") + if z_dim is not None and int(z_dim) >= 32: + return WanVariantType.TI2V_5B + except NotAMatchError: + # No VAE config to inspect — fall through to the heuristic path below. + pass + + # Filename / repo-name heuristic as a last resort. + name = mod.path.name.lower() + if "5b" in name or "ti2v" in name: + return WanVariantType.TI2V_5B + return WanVariantType.T2V_A14B + + class Main_Checkpoint_Anima_Config(Checkpoint_Config_Base, Main_Config_Base, Config_Base): """Model config for Anima single-file checkpoint models (safetensors). diff --git a/invokeai/backend/model_manager/load/model_loaders/wan.py b/invokeai/backend/model_manager/load/model_loaders/wan.py new file mode 100644 index 00000000000..36bb198523b --- /dev/null +++ b/invokeai/backend/model_manager/load/model_loaders/wan.py @@ -0,0 +1,82 @@ +"""Loader registrations for Wan 2.2 image-generation models. + +Phase 1 scope: +- Diffusers-format Wan 2.2 (TI2V-5B fully; A14B Transformer-only). +- Submodels handled: Transformer, VAE, TextEncoder, Tokenizer, Scheduler. + +Phase 2 will add ``Transformer2`` to support A14B's dual-expert MoE. +Phase 4 will add a GGUFQuantized loader for community single-file transformers. +""" + +from pathlib import Path +from typing import Optional + +import torch + +from invokeai.backend.model_manager.configs.base import Checkpoint_Config_Base, Diffusers_Config_Base +from invokeai.backend.model_manager.configs.factory import AnyModelConfig +from invokeai.backend.model_manager.load.model_loader_registry import ModelLoaderRegistry +from invokeai.backend.model_manager.load.model_loaders.generic_diffusers import GenericDiffusersLoader +from invokeai.backend.model_manager.taxonomy import ( + AnyModel, + BaseModelType, + ModelFormat, + ModelType, + SubModelType, +) + + +@ModelLoaderRegistry.register(base=BaseModelType.Wan, type=ModelType.Main, format=ModelFormat.Diffusers) +class WanDiffusersModel(GenericDiffusersLoader): + """Loader for Wan 2.2 diffusers-format models (T2V-A14B and TI2V-5B). + + Forces bfloat16 for the transformer and VAE — fp16 is unstable on Wan VAE + (same issue affects the Flux VAE). Resolves the appropriate Hugging Face + class for each submodel via the parent loader's ``get_hf_load_class``. + """ + + def _load_model( + self, + config: AnyModelConfig, + submodel_type: Optional[SubModelType] = None, + ) -> AnyModel: + if isinstance(config, Checkpoint_Config_Base): + raise NotImplementedError("Single-file checkpoint format is not yet supported for Wan models.") + + if submodel_type is None: + raise Exception("A submodel type must be provided when loading Wan main pipelines.") + + model_path = Path(config.path) + load_class = self.get_hf_load_class(model_path, submodel_type) + repo_variant = config.repo_variant if isinstance(config, Diffusers_Config_Base) else None + variant = repo_variant.value if repo_variant else None + model_path = model_path / submodel_type.value + + # bfloat16 across the board: matches Diffusers WanPipeline reference and + # avoids the fp16 instability seen in the Wan VAE. + dtype_kwarg = {"dtype": torch.bfloat16} + try: + result: AnyModel = load_class.from_pretrained( + model_path, + **dtype_kwarg, + variant=variant, + local_files_only=True, + ) + except TypeError: + # Older diffusers releases use torch_dtype instead of dtype. + dtype_kwarg = {"torch_dtype": torch.bfloat16} + result = load_class.from_pretrained( + model_path, + **dtype_kwarg, + variant=variant, + local_files_only=True, + ) + except OSError as e: + # Some Wan repos ship without a fp16 variant suffix on every submodel. + # If the requested variant isn't on disk, fall back to the default weights. + if variant and "no file named" in str(e): + result = load_class.from_pretrained(model_path, **dtype_kwarg, local_files_only=True) + else: + raise + + return result diff --git a/invokeai/backend/model_manager/taxonomy.py b/invokeai/backend/model_manager/taxonomy.py index a2e4e58bdc4..618585ea83a 100644 --- a/invokeai/backend/model_manager/taxonomy.py +++ b/invokeai/backend/model_manager/taxonomy.py @@ -58,6 +58,8 @@ class BaseModelType(str, Enum): """Indicates the model is associated with Qwen Image Edit 2511 model architecture.""" Anima = "anima" """Indicates the model is associated with Anima model architecture (Cosmos Predict2 DiT + LLM Adapter).""" + Wan = "wan" + """Indicates the model is associated with the Wan 2.2 model architecture (T2V-A14B / TI2V-5B), used for image generation at num_frames=1.""" Unknown = "unknown" """Indicates the model's base architecture is unknown.""" @@ -93,6 +95,7 @@ class SubModelType(str, Enum): UNet = "unet" Transformer = "transformer" + Transformer2 = "transformer_2" TextEncoder = "text_encoder" TextEncoder2 = "text_encoder_2" TextEncoder3 = "text_encoder_3" @@ -165,6 +168,22 @@ class QwenImageVariantType(str, Enum): """Qwen Image Edit - image editing model with reference image support.""" +class WanVariantType(str, Enum): + """Wan 2.2 model variants. + + Both variants are used for image generation at num_frames=1. They differ in + architecture: A14B is a Mixture-of-Experts model with two transformer experts + (high-noise and low-noise) totalling ~28B params; TI2V-5B is a single ~5B + transformer with a higher-compression VAE (z_dim=48). + """ + + T2V_A14B = "t2v_a14b" + """Wan 2.2 T2V-A14B - dual-expert MoE flagship (high-noise + low-noise transformers, standard 16-channel Wan VAE).""" + + TI2V_5B = "ti2v_5b" + """Wan 2.2 TI2V-5B - smaller single-transformer model with Wan2.2-VAE (48 latent channels).""" + + class Qwen3VariantType(str, Enum): """Qwen3 text encoder variants based on model size.""" @@ -248,6 +267,7 @@ class FluxLoRAFormat(str, Enum): Flux2VariantType, ZImageVariantType, QwenImageVariantType, + WanVariantType, Qwen3VariantType, ] variant_type_adapter = TypeAdapter[ @@ -257,6 +277,7 @@ class FluxLoRAFormat(str, Enum): | Flux2VariantType | ZImageVariantType | QwenImageVariantType + | WanVariantType | Qwen3VariantType ]( ModelVariantType @@ -265,5 +286,6 @@ class FluxLoRAFormat(str, Enum): | Flux2VariantType | ZImageVariantType | QwenImageVariantType + | WanVariantType | Qwen3VariantType ) diff --git a/invokeai/backend/stable_diffusion/diffusion/conditioning_data.py b/invokeai/backend/stable_diffusion/diffusion/conditioning_data.py index 6a9959f1e87..2274b34890b 100644 --- a/invokeai/backend/stable_diffusion/diffusion/conditioning_data.py +++ b/invokeai/backend/stable_diffusion/diffusion/conditioning_data.py @@ -130,6 +130,27 @@ def to(self, device: torch.device | None = None, dtype: torch.dtype | None = Non return self +@dataclass +class WanConditioningInfo: + """Wan 2.2 text conditioning information from the UMT5-XXL encoder. + + The Wan transformer takes the encoder's last hidden state directly as + cross-attention context (``encoder_hidden_states``). + """ + + prompt_embeds: torch.Tensor + """UMT5-XXL hidden states. Shape: (seq_len, hidden_size) where hidden_size=4096.""" + + prompt_attention_mask: torch.Tensor | None = None + """Attention mask marking valid (non-padding) tokens. Shape: (seq_len,). 1 for valid, 0 for padding.""" + + def to(self, device: torch.device | None = None, dtype: torch.dtype | None = None): + self.prompt_embeds = self.prompt_embeds.to(device=device, dtype=dtype) + if self.prompt_attention_mask is not None: + self.prompt_attention_mask = self.prompt_attention_mask.to(device=device) + return self + + @dataclass class ConditioningFieldData: # If you change this class, adding more types, you _must_ update the instantiation of ObjectSerializerDisk in @@ -144,6 +165,7 @@ class ConditioningFieldData: | List[ZImageConditioningInfo] | List[QwenImageConditioningInfo] | List[AnimaConditioningInfo] + | List[WanConditioningInfo] ) diff --git a/invokeai/backend/wan/__init__.py b/invokeai/backend/wan/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/invokeai/backend/wan/sampling_utils.py b/invokeai/backend/wan/sampling_utils.py new file mode 100644 index 00000000000..2ccbdf470fd --- /dev/null +++ b/invokeai/backend/wan/sampling_utils.py @@ -0,0 +1,62 @@ +"""Sampling utilities for Wan 2.2 image generation. + +Single-frame inference uses 5D ``[B, C, T=1, H, W]`` latent tensors. The +scale factors are dictated by the model variant: + +* A14B — standard Wan VAE: spatial 8x, latent channels 16 +* TI2V-5B — Wan2.2-VAE: spatial 16x, latent channels 48 +""" + +from __future__ import annotations + +import torch + +from invokeai.backend.model_manager.taxonomy import WanVariantType + + +def get_spatial_scale_factor(variant: WanVariantType) -> int: + """Return the VAE spatial downsampling factor for a Wan variant.""" + if variant == WanVariantType.TI2V_5B: + return 16 + return 8 # A14B and any future single-expert variant default to standard Wan VAE. + + +def get_default_latent_channels(variant: WanVariantType) -> int: + """Return the default latent-channel count for a Wan variant. + + Use the actual transformer ``in_channels`` from the loaded model when + possible; this helper is for cases where we need the count before the + transformer is on device (e.g. building the noise tensor before entering + the model-on-device context). + """ + if variant == WanVariantType.TI2V_5B: + return 48 + return 16 + + +def make_noise( + *, + batch_size: int, + latent_channels: int, + height: int, + width: int, + spatial_scale_factor: int, + device: torch.device, + dtype: torch.dtype, + seed: int, +) -> torch.Tensor: + """Generate Wan-shaped noise: ``[B, C, 1, H/s, W/s]``. + + Mirrors Anima's ``_get_noise``: noise is generated on CPU (deterministic + across CUDA / ROCm / MPS) and moved to ``device`` afterwards. + """ + return torch.randn( + batch_size, + latent_channels, + 1, # T = 1 for image generation + height // spatial_scale_factor, + width // spatial_scale_factor, + device="cpu", + dtype=torch.float32, + generator=torch.Generator(device="cpu").manual_seed(seed), + ).to(device=device, dtype=dtype) diff --git a/invokeai/frontend/web/src/features/nodes/types/common.ts b/invokeai/frontend/web/src/features/nodes/types/common.ts index fb2a1ce946a..cb4f68f8f8d 100644 --- a/invokeai/frontend/web/src/features/nodes/types/common.ts +++ b/invokeai/frontend/web/src/features/nodes/types/common.ts @@ -100,6 +100,7 @@ export const zBaseModelType = z.enum([ 'z-image', 'external', 'anima', + 'wan', 'unknown', ]); export type BaseModelType = z.infer; @@ -114,6 +115,7 @@ export const zMainModelBase = z.enum([ 'qwen-image', 'z-image', 'anima', + 'wan', ]); type MainModelBase = z.infer; export const isMainModelBase = (base: unknown): base is MainModelBase => zMainModelBase.safeParse(base).success; @@ -144,6 +146,7 @@ export type ModelType = z.infer; export const zSubModelType = z.enum([ 'unet', 'transformer', + 'transformer_2', 'text_encoder', 'text_encoder_2', 'text_encoder_3', @@ -163,6 +166,7 @@ export const zFluxVariantType = z.enum(['dev', 'dev_fill', 'schnell']); export const zFlux2VariantType = z.enum(['klein_4b', 'klein_4b_base', 'klein_9b', 'klein_9b_base']); export const zZImageVariantType = z.enum(['turbo', 'zbase']); const zQwenImageVariantType = z.enum(['generate', 'edit']); +export const zWanVariantType = z.enum(['t2v_a14b', 'ti2v_5b']); export const zQwen3VariantType = z.enum(['qwen3_4b', 'qwen3_8b', 'qwen3_06b']); export const zAnyModelVariant = z.union([ zModelVariantType, @@ -171,6 +175,7 @@ export const zAnyModelVariant = z.union([ zFlux2VariantType, zZImageVariantType, zQwenImageVariantType, + zWanVariantType, zQwen3VariantType, ]); export type AnyModelVariant = z.infer; diff --git a/invokeai/frontend/web/src/features/nodes/types/constants.ts b/invokeai/frontend/web/src/features/nodes/types/constants.ts index 9da499ab91c..7383629eb84 100644 --- a/invokeai/frontend/web/src/features/nodes/types/constants.ts +++ b/invokeai/frontend/web/src/features/nodes/types/constants.ts @@ -57,6 +57,7 @@ export const FIELD_COLORS: { [key: string]: string } = { CogView4MainModelField: 'teal.500', ZImageMainModelField: 'teal.500', AnimaMainModelField: 'teal.500', + WanMainModelField: 'teal.500', SDXLMainModelField: 'teal.500', SDXLRefinerModelField: 'teal.500', SpandrelImageToImageModelField: 'teal.500', diff --git a/invokeai/frontend/web/src/features/parameters/types/constants.ts b/invokeai/frontend/web/src/features/parameters/types/constants.ts index a3ffa24cc64..1674c16009e 100644 --- a/invokeai/frontend/web/src/features/parameters/types/constants.ts +++ b/invokeai/frontend/web/src/features/parameters/types/constants.ts @@ -49,6 +49,10 @@ export const CLIP_SKIP_MAP: { [key in BaseModelType]?: { maxClip: number; marker maxClip: 0, markers: [], }, + wan: { + maxClip: 0, + markers: [], + }, }; /** diff --git a/tests/app/invocations/test_wan_denoise.py b/tests/app/invocations/test_wan_denoise.py new file mode 100644 index 00000000000..e8fe15d5932 --- /dev/null +++ b/tests/app/invocations/test_wan_denoise.py @@ -0,0 +1,482 @@ +"""CPU-only integration tests for ``WanDenoiseInvocation``. + +These tests substitute a synthetic transformer (no weights) for the real +``WanTransformer3DModel`` so the denoise loop's shape-handling, scheduler +integration, CFG branch, and step-callback wiring can be exercised on a CPU +runner. End-to-end tests against real Wan checkpoints are gated behind +``INVOKEAI_HEAVY_TESTS=1`` and require a working CUDA install. +""" + +from __future__ import annotations + +import os +from contextlib import contextmanager +from pathlib import Path +from tempfile import TemporaryDirectory +from unittest.mock import MagicMock + +import pytest +import torch +import torch.nn as nn + +from invokeai.app.invocations.fields import WanConditioningField +from invokeai.app.invocations.model import WanTransformerField +from invokeai.app.invocations.wan_denoise import WanDenoiseInvocation +from invokeai.backend.model_manager.taxonomy import WanVariantType +from invokeai.backend.stable_diffusion.diffusion.conditioning_data import ( + ConditioningFieldData, + WanConditioningInfo, +) + + +class _ZeroTransformer(nn.Module): + """Stand-in for ``WanTransformer3DModel``. + + Returns ``torch.zeros_like(hidden_states)`` so the flow-matching scheduler + treats every step as a no-op velocity. After N steps the latents equal the + initial noise — a useful invariant for shape correctness. + + ``label`` lets dual-expert tests record which expert was invoked. + """ + + def __init__(self, label: str = "single") -> None: + super().__init__() + self.dtype = torch.float32 + self.label = label + self.calls: list[tuple[int, ...]] = [] + self.timesteps_seen: list[float] = [] + + def forward( # noqa: D401 — match diffusers signature + self, + hidden_states: torch.Tensor, + timestep: torch.Tensor, + encoder_hidden_states: torch.Tensor, + attention_kwargs=None, + return_dict: bool = True, + ): + # Record the call so assertions can verify shape contracts. + self.calls.append( + ( + tuple(hidden_states.shape), + tuple(timestep.shape), + tuple(encoder_hidden_states.shape), + ) + ) + # Record the timestep (t.expand(B) → take first element). + self.timesteps_seen.append(float(timestep.flatten()[0].item())) + out = torch.zeros_like(hidden_states) + if return_dict: + return type("Out", (), {"sample": out}) + return (out,) + + +@contextmanager +def _model_on_device_ctx(model: nn.Module): + yield (None, model) + + +def _make_loaded_model(model: nn.Module) -> MagicMock: + """Mock ``LoadedModel`` exposing only the methods the denoise loop touches.""" + loaded = MagicMock() + loaded.model_on_device = lambda: _model_on_device_ctx(model) + return loaded + + +def _build_context( + transformer: nn.Module, + *, + variant: WanVariantType, + model_root: Path, + pos_cond: WanConditioningInfo, + neg_cond: WanConditioningInfo | None, + transformer_low: nn.Module | None = None, +) -> MagicMock: + """Build a MagicMock InvocationContext sufficient for ``_run_diffusion``. + + When ``transformer_low`` is provided, ``context.models.load`` routes the + request based on the ``ModelIdentifierField.submodel_type`` so dual-expert + code paths see two distinct loaded models. + """ + config = MagicMock() + config.variant = variant + config.format = "diffusers" + + context = MagicMock() + context.models.get_config.return_value = config + context.models.get_absolute_path.return_value = model_root + + def _load(model_id) -> MagicMock: + submodel_type = getattr(model_id, "submodel_type", None) + if transformer_low is not None and str(submodel_type) == "SubModelType.Transformer2": + return _make_loaded_model(transformer_low) + return _make_loaded_model(transformer) + + context.models.load.side_effect = _load + + def _load_conditioning(name: str) -> ConditioningFieldData: + if name == "pos": + return ConditioningFieldData(conditionings=[pos_cond]) + if name == "neg" and neg_cond is not None: + return ConditioningFieldData(conditionings=[neg_cond]) + raise KeyError(name) + + context.conditioning.load.side_effect = _load_conditioning + context.util.signal_progress = MagicMock() + context.util.sd_step_callback = MagicMock() + context.logger = MagicMock() + return context + + +def _make_conditioning(seq_len: int = 226, hidden: int = 4096) -> WanConditioningInfo: + return WanConditioningInfo( + prompt_embeds=torch.zeros(seq_len, hidden), + prompt_attention_mask=None, + ) + + +def _make_invocation( + transformer_field: WanTransformerField, + pos_field: WanConditioningField, + neg_field: WanConditioningField | None, + *, + width: int, + height: int, + steps: int, + guidance_scale: float, + guidance_scale_low_noise: float | None = None, +) -> WanDenoiseInvocation: + return WanDenoiseInvocation( + id="test", + transformer=transformer_field, + positive_conditioning=pos_field, + negative_conditioning=neg_field, + width=width, + height=height, + steps=steps, + guidance_scale=guidance_scale, + guidance_scale_low_noise=guidance_scale_low_noise, + seed=42, + ) + + +@pytest.fixture +def fake_model_root(): + """A directory layout the denoise helpers can read. + + No ``scheduler/`` subfolder, so the scheduler falls back to defaults — that + keeps the test self-contained. + """ + with TemporaryDirectory() as tmp: + yield Path(tmp) + + +@pytest.fixture(autouse=True) +def _force_cpu(monkeypatch): + """Pin TorchDevice to CPU + float32 for deterministic, GPU-free tests.""" + from invokeai.backend.util.devices import TorchDevice + + monkeypatch.setattr(TorchDevice, "choose_torch_device", classmethod(lambda cls: torch.device("cpu"))) + monkeypatch.setattr( + TorchDevice, "choose_bfloat16_safe_dtype", classmethod(lambda cls, device=None: torch.float32) + ) + + +def _wan_transformer_field(*, dual: bool = False, boundary_ratio: float = 0.875) -> WanTransformerField: + """Build a WanTransformerField. With ``dual=True`` a low-noise expert slot + is also populated so the denoise loop exercises the MoE swap path.""" + base_id = { + "key": "wan-test", + "name": "wan-test", + "base": "wan", + "type": "main", + "hash": "h", + } + field_kwargs: dict = { + "transformer": {**base_id, "submodel_type": "transformer"}, + "boundary_ratio": boundary_ratio, + } + if dual: + field_kwargs["transformer_low_noise"] = {**base_id, "submodel_type": "transformer_2"} + return WanTransformerField(**field_kwargs) + + +class TestWanDenoiseShapes: + """Verify the denoise loop runs end-to-end on CPU for both variants.""" + + @pytest.mark.parametrize( + "variant,latent_channels,scale,height,width", + [ + (WanVariantType.T2V_A14B, 16, 8, 64, 64), + (WanVariantType.TI2V_5B, 48, 16, 64, 64), + ], + ) + def test_run_diffusion_returns_4d_finite( + self, variant, latent_channels, scale, height, width, fake_model_root + ) -> None: + transformer = _ZeroTransformer() + pos = _make_conditioning() + ctx = _build_context( + transformer, + variant=variant, + model_root=fake_model_root, + pos_cond=pos, + neg_cond=None, + ) + + inv = _make_invocation( + transformer_field=_wan_transformer_field(), + pos_field=WanConditioningField(conditioning_name="pos"), + neg_field=None, + width=width, + height=height, + steps=4, + guidance_scale=1.0, # disables CFG, so neg conditioning isn't required + ) + + latents = inv._run_diffusion(ctx) + + # Output is 4D [B, C, H/scale, W/scale] — temporal dim squeezed. + assert latents.ndim == 4 + assert latents.shape == (1, latent_channels, height // scale, width // scale) + assert torch.isfinite(latents).all() + + # Transformer should have been called exactly steps times. + assert len(transformer.calls) == 4 + # Hidden states are 5D with T=1. + h_shape, t_shape, ctx_shape = transformer.calls[0] + assert h_shape == (1, latent_channels, 1, height // scale, width // scale) + assert t_shape == (1,) + assert ctx_shape == (1, 226, 4096) + + # Step callback invoked once per step. + assert ctx.util.sd_step_callback.call_count == 4 + + def test_cfg_doubles_transformer_calls(self, fake_model_root) -> None: + """With cfg_scale != 1.0 and a negative prompt, each step runs the model twice.""" + transformer = _ZeroTransformer() + pos = _make_conditioning() + neg = _make_conditioning() + ctx = _build_context( + transformer, + variant=WanVariantType.T2V_A14B, + model_root=fake_model_root, + pos_cond=pos, + neg_cond=neg, + ) + + inv = _make_invocation( + transformer_field=_wan_transformer_field(), + pos_field=WanConditioningField(conditioning_name="pos"), + neg_field=WanConditioningField(conditioning_name="neg"), + width=64, + height=64, + steps=3, + guidance_scale=4.0, + ) + + inv._run_diffusion(ctx) + # 3 steps × 2 (cond + uncond) = 6 forward calls. + assert len(transformer.calls) == 6 + + def test_zero_velocity_preserves_initial_noise(self, fake_model_root) -> None: + """A zero-output transformer means the flow-match step never updates latents.""" + transformer = _ZeroTransformer() + pos = _make_conditioning() + ctx = _build_context( + transformer, + variant=WanVariantType.T2V_A14B, + model_root=fake_model_root, + pos_cond=pos, + neg_cond=None, + ) + + inv = _make_invocation( + transformer_field=_wan_transformer_field(), + pos_field=WanConditioningField(conditioning_name="pos"), + neg_field=None, + width=64, + height=64, + steps=4, + guidance_scale=1.0, + ) + + latents = inv._run_diffusion(ctx) + + # Reproduce the same noise the loop would have generated and compare. + from invokeai.backend.wan.sampling_utils import make_noise + + expected = make_noise( + batch_size=1, + latent_channels=16, + height=64, + width=64, + spatial_scale_factor=8, + device=torch.device("cpu"), + dtype=torch.float32, + seed=42, + ).squeeze(2) + + assert torch.allclose(latents, expected, atol=1e-5) + + +class TestWanDenoiseDualExpert: + """Verify the A14B dual-expert MoE swap behaves correctly.""" + + def test_swap_fires_at_boundary(self, fake_model_root) -> None: + """High expert handles t >= boundary_timestep, low expert handles t < boundary_timestep.""" + high = _ZeroTransformer(label="high") + low = _ZeroTransformer(label="low") + pos = _make_conditioning() + ctx = _build_context( + high, + transformer_low=low, + variant=WanVariantType.T2V_A14B, + model_root=fake_model_root, + pos_cond=pos, + neg_cond=None, + ) + + # boundary_ratio=0.5 → boundary_timestep=500 (default num_train_timesteps=1000). + inv = _make_invocation( + transformer_field=_wan_transformer_field(dual=True, boundary_ratio=0.5), + pos_field=WanConditioningField(conditioning_name="pos"), + neg_field=None, + width=64, + height=64, + steps=10, + guidance_scale=1.0, + ) + + inv._run_diffusion(ctx) + + # Both experts called. + assert len(high.timesteps_seen) > 0, "high-noise expert never invoked" + assert len(low.timesteps_seen) > 0, "low-noise expert never invoked" + + # Every high-noise timestep is >= 500; every low-noise timestep is < 500. + for t in high.timesteps_seen: + assert t >= 500.0, f"high-noise expert saw t={t}, should be >= 500" + for t in low.timesteps_seen: + assert t < 500.0, f"low-noise expert saw t={t}, should be < 500" + + # Total steps adds up. + assert len(high.timesteps_seen) + len(low.timesteps_seen) == 10 + + def test_no_swap_when_boundary_skipped(self, fake_model_root) -> None: + """boundary_ratio=0.0 → boundary_timestep=0 → all timesteps go to high-noise expert.""" + high = _ZeroTransformer(label="high") + low = _ZeroTransformer(label="low") + pos = _make_conditioning() + ctx = _build_context( + high, + transformer_low=low, + variant=WanVariantType.T2V_A14B, + model_root=fake_model_root, + pos_cond=pos, + neg_cond=None, + ) + + inv = _make_invocation( + transformer_field=_wan_transformer_field(dual=True, boundary_ratio=0.0), + pos_field=WanConditioningField(conditioning_name="pos"), + neg_field=None, + width=64, + height=64, + steps=4, + guidance_scale=1.0, + ) + + inv._run_diffusion(ctx) + + # boundary_timestep=0 → t >= 0 always → high-noise expert handles every step. + assert len(high.timesteps_seen) == 4 + assert len(low.timesteps_seen) == 0 + + def test_full_low_noise_when_boundary_at_max(self, fake_model_root) -> None: + """boundary_ratio=1.0 → boundary_timestep=1000 → almost all steps go to low-noise expert. + + With FlowMatchEuler the first timestep is exactly 1000 so the high-noise + expert handles it (>= boundary), and every subsequent timestep is < 1000. + """ + high = _ZeroTransformer(label="high") + low = _ZeroTransformer(label="low") + pos = _make_conditioning() + ctx = _build_context( + high, + transformer_low=low, + variant=WanVariantType.T2V_A14B, + model_root=fake_model_root, + pos_cond=pos, + neg_cond=None, + ) + + inv = _make_invocation( + transformer_field=_wan_transformer_field(dual=True, boundary_ratio=1.0), + pos_field=WanConditioningField(conditioning_name="pos"), + neg_field=None, + width=64, + height=64, + steps=4, + guidance_scale=1.0, + ) + + inv._run_diffusion(ctx) + + # First step is t==1000 → high. All later steps are < 1000 → low. + assert len(high.timesteps_seen) == 1 + assert high.timesteps_seen[0] == 1000.0 + assert len(low.timesteps_seen) == 3 + + def test_cfg_with_dual_experts_doubles_calls_per_step(self, fake_model_root) -> None: + """With negative conditioning + cfg_scale != 1, every step runs the active expert twice.""" + high = _ZeroTransformer(label="high") + low = _ZeroTransformer(label="low") + pos = _make_conditioning() + neg = _make_conditioning() + ctx = _build_context( + high, + transformer_low=low, + variant=WanVariantType.T2V_A14B, + model_root=fake_model_root, + pos_cond=pos, + neg_cond=neg, + ) + + inv = _make_invocation( + transformer_field=_wan_transformer_field(dual=True, boundary_ratio=0.5), + pos_field=WanConditioningField(conditioning_name="pos"), + neg_field=WanConditioningField(conditioning_name="neg"), + width=64, + height=64, + steps=6, + guidance_scale=4.0, + guidance_scale_low_noise=2.0, # Field accepted by the invocation; effect is implicit. + ) + + inv._run_diffusion(ctx) + + # Total transformer invocations: 6 steps × 2 (cond + uncond) = 12, split across experts. + total = len(high.timesteps_seen) + len(low.timesteps_seen) + assert total == 12 + + # Each unique timestep appears twice (cond + uncond) on the same expert. + from collections import Counter + + high_counts = Counter(high.timesteps_seen) + low_counts = Counter(low.timesteps_seen) + assert all(v == 2 for v in high_counts.values()), high_counts + assert all(v == 2 for v in low_counts.values()), low_counts + + # And the swap actually happened — both experts saw work. + assert len(high_counts) > 0 and len(low_counts) > 0 + + +@pytest.mark.skipif( + os.environ.get("INVOKEAI_HEAVY_TESTS") != "1", + reason="End-to-end test requires real Wan weights and CUDA; opt in with INVOKEAI_HEAVY_TESTS=1", +) +class TestWanDenoiseHeavy: + """Placeholder for a real-weights smoke test once CUDA is available.""" + + def test_real_ti2v_5b_runs(self) -> None: + pytest.skip("Heavy test stub — implement once a TI2V-5B checkpoint is installable.") diff --git a/tests/backend/model_manager/configs/test_wan_main_config.py b/tests/backend/model_manager/configs/test_wan_main_config.py new file mode 100644 index 00000000000..3109b5a9767 --- /dev/null +++ b/tests/backend/model_manager/configs/test_wan_main_config.py @@ -0,0 +1,110 @@ +"""Tests for Wan 2.2 model identification (Main_Diffusers_Wan_Config).""" + +import json +from pathlib import Path +from tempfile import TemporaryDirectory +from unittest.mock import MagicMock + +import pytest + +from invokeai.backend.model_manager.configs.main import Main_Diffusers_Wan_Config +from invokeai.backend.model_manager.taxonomy import BaseModelType, ModelFormat, WanVariantType + + +def _write_json(path: Path, data: dict) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + with path.open("w") as f: + json.dump(data, f) + + +def _build_a14b_layout(root: Path) -> None: + """Synthetic on-disk layout for Wan-AI/Wan2.2-T2V-A14B: dual transformers, z_dim=16.""" + _write_json(root / "model_index.json", {"_class_name": "WanPipeline"}) + _write_json(root / "transformer" / "config.json", {"_class_name": "WanTransformer3DModel", "in_channels": 16}) + _write_json(root / "transformer_2" / "config.json", {"_class_name": "WanTransformer3DModel", "in_channels": 16}) + _write_json(root / "vae" / "config.json", {"_class_name": "AutoencoderKLWan", "z_dim": 16}) + + +def _build_ti2v_5b_layout(root: Path) -> None: + """Synthetic on-disk layout for Wan-AI/Wan2.2-TI2V-5B: single transformer, z_dim=48.""" + _write_json(root / "model_index.json", {"_class_name": "WanImageToVideoPipeline"}) + _write_json(root / "transformer" / "config.json", {"_class_name": "WanTransformer3DModel", "in_channels": 48}) + _write_json(root / "vae" / "config.json", {"_class_name": "AutoencoderKLWan", "z_dim": 48}) + + +def _build_overrides(model_path: Path, name: str) -> dict: + return { + "hash": "test-hash", + "path": str(model_path), + "file_size": 0, + "name": name, + "source": str(model_path), + "source_type": "path", + } + + +def _make_mod(model_path: Path) -> MagicMock: + mod = MagicMock() + mod.path = model_path + return mod + + +class TestWanDiffusersIdentification: + """Wan diffusers probe: variant detection from transformer / VAE / dir layout.""" + + def test_a14b_detected_from_dual_transformer(self) -> None: + with TemporaryDirectory() as tmp: + root = Path(tmp) / "Wan2.2-T2V-A14B" + _build_a14b_layout(root) + + cfg = Main_Diffusers_Wan_Config.from_model_on_disk(_make_mod(root), _build_overrides(root, "A14B")) + + assert cfg.base == BaseModelType.Wan + assert cfg.format == ModelFormat.Diffusers + assert cfg.variant == WanVariantType.T2V_A14B + assert cfg.has_dual_expert is True + + def test_ti2v_5b_detected_from_z_dim(self) -> None: + with TemporaryDirectory() as tmp: + root = Path(tmp) / "Wan2.2-TI2V-5B" + _build_ti2v_5b_layout(root) + + cfg = Main_Diffusers_Wan_Config.from_model_on_disk(_make_mod(root), _build_overrides(root, "TI2V-5B")) + + assert cfg.variant == WanVariantType.TI2V_5B + assert cfg.has_dual_expert is False + + def test_filename_heuristic_when_vae_config_missing(self) -> None: + """When ``vae/config.json`` is missing, fall back to the directory name.""" + with TemporaryDirectory() as tmp: + root = Path(tmp) / "Wan2.2-TI2V-5B" + _write_json(root / "model_index.json", {"_class_name": "WanPipeline"}) + _write_json(root / "transformer" / "config.json", {"_class_name": "WanTransformer3DModel"}) + # No vae/config.json — single-transformer + dirname containing "5b" → TI2V-5B. + + cfg = Main_Diffusers_Wan_Config.from_model_on_disk(_make_mod(root), _build_overrides(root, "TI2V-5B")) + + assert cfg.variant == WanVariantType.TI2V_5B + + def test_explicit_variant_override_takes_precedence(self) -> None: + with TemporaryDirectory() as tmp: + root = Path(tmp) / "wan-something" + _build_a14b_layout(root) + overrides = _build_overrides(root, "Custom A14B") + overrides["variant"] = WanVariantType.TI2V_5B # Explicit override. + + cfg = Main_Diffusers_Wan_Config.from_model_on_disk(_make_mod(root), overrides) + assert cfg.variant == WanVariantType.TI2V_5B + # has_dual_expert is still detected from disk; the override only forces variant. + assert cfg.has_dual_expert is True + + def test_rejects_non_wan_pipeline(self) -> None: + """A model_index.json that isn't a Wan class name must not match.""" + from invokeai.backend.model_manager.configs.identification_utils import NotAMatchError + + with TemporaryDirectory() as tmp: + root = Path(tmp) / "not-wan" + _write_json(root / "model_index.json", {"_class_name": "FluxPipeline"}) + + with pytest.raises(NotAMatchError): + Main_Diffusers_Wan_Config.from_model_on_disk(_make_mod(root), _build_overrides(root, "fake")) diff --git a/tests/backend/model_manager/test_wan_default_settings.py b/tests/backend/model_manager/test_wan_default_settings.py new file mode 100644 index 00000000000..ff66cf4f067 --- /dev/null +++ b/tests/backend/model_manager/test_wan_default_settings.py @@ -0,0 +1,25 @@ +"""Tests for Wan 2.2 default settings.""" + +from invokeai.backend.model_manager.configs.main import MainModelDefaultSettings +from invokeai.backend.model_manager.taxonomy import BaseModelType, WanVariantType + + +class TestWanDefaultSettings: + def test_a14b_defaults(self) -> None: + s = MainModelDefaultSettings.from_base(BaseModelType.Wan, WanVariantType.T2V_A14B) + assert s is not None + assert s.steps == 40 + assert s.cfg_scale == 4.0 + assert s.width == 1024 + assert s.height == 1024 + + def test_ti2v_5b_defaults(self) -> None: + s = MainModelDefaultSettings.from_base(BaseModelType.Wan, WanVariantType.TI2V_5B) + assert s is not None + assert s.steps == 30 + assert s.cfg_scale == 5.0 + + def test_no_variant_falls_back_to_a14b_settings(self) -> None: + s = MainModelDefaultSettings.from_base(BaseModelType.Wan) + assert s is not None + assert s.steps == 40 diff --git a/tests/backend/wan/__init__.py b/tests/backend/wan/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/backend/wan/test_sampling_utils.py b/tests/backend/wan/test_sampling_utils.py new file mode 100644 index 00000000000..a1d27a53f7e --- /dev/null +++ b/tests/backend/wan/test_sampling_utils.py @@ -0,0 +1,79 @@ +"""Tests for Wan 2.2 sampling utilities.""" + +import torch + +from invokeai.backend.model_manager.taxonomy import WanVariantType +from invokeai.backend.wan.sampling_utils import ( + get_default_latent_channels, + get_spatial_scale_factor, + make_noise, +) + + +class TestVariantConstants: + def test_a14b_uses_8x_spatial(self) -> None: + assert get_spatial_scale_factor(WanVariantType.T2V_A14B) == 8 + + def test_ti2v_5b_uses_16x_spatial(self) -> None: + assert get_spatial_scale_factor(WanVariantType.TI2V_5B) == 16 + + def test_a14b_default_channels(self) -> None: + assert get_default_latent_channels(WanVariantType.T2V_A14B) == 16 + + def test_ti2v_5b_default_channels(self) -> None: + assert get_default_latent_channels(WanVariantType.TI2V_5B) == 48 + + +class TestMakeNoise: + def test_a14b_shape_at_1024(self) -> None: + noise = make_noise( + batch_size=1, + latent_channels=16, + height=1024, + width=1024, + spatial_scale_factor=8, + device=torch.device("cpu"), + dtype=torch.bfloat16, + seed=42, + ) + assert noise.shape == (1, 16, 1, 128, 128) + assert noise.dtype == torch.bfloat16 + + def test_ti2v_shape_at_1024(self) -> None: + noise = make_noise( + batch_size=1, + latent_channels=48, + height=1024, + width=1024, + spatial_scale_factor=16, + device=torch.device("cpu"), + dtype=torch.bfloat16, + seed=42, + ) + assert noise.shape == (1, 48, 1, 64, 64) + + def test_seed_is_deterministic(self) -> None: + kwargs = dict( + batch_size=1, + latent_channels=16, + height=256, + width=256, + spatial_scale_factor=8, + device=torch.device("cpu"), + dtype=torch.float32, + seed=123, + ) + a = make_noise(**kwargs) + b = make_noise(**kwargs) + assert torch.allclose(a, b) + + def test_seed_changes_output(self) -> None: + a = make_noise( + batch_size=1, latent_channels=16, height=256, width=256, spatial_scale_factor=8, + device=torch.device("cpu"), dtype=torch.float32, seed=1, + ) + b = make_noise( + batch_size=1, latent_channels=16, height=256, width=256, spatial_scale_factor=8, + device=torch.device("cpu"), dtype=torch.float32, seed=2, + ) + assert not torch.allclose(a, b) From ad4a31ea47a7dde20aad7be131636cd843febd01 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Sat, 9 May 2026 11:01:54 -0400 Subject: [PATCH 02/12] feat(model): Wan 2.2 Phase 3 + tokenizer-load fix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 3 adds standalone VAE and UMT5-XXL encoder configs so users can run GGUF-quantized Wan transformers (Phase 4) without installing the full ~30 GB Diffusers pipeline. VAE configs: - VAE_Checkpoint_Wan_Config + VAE_Diffusers_Wan_Config (16-channel A14B vs 48-channel TI2V-5B, distinguished by decoder.conv_in z_dim). - 16-channel files share the AutoencoderKLWan architecture with Qwen Image; disambiguated via filename heuristic ("wan" in name -> Wan, otherwise -> Qwen Image). Mirror exclusion in QwenImage's probe. - VAELoader gets a Wan branch that builds AutoencoderKLWan(z_dim=...) via init_empty_weights, mirroring the QwenImage single-file pattern. - Existing standard VAE probe excludes both QwenImage- and Wan-style state dicts. UMT5-XXL encoder: - New ModelType.WanT5Encoder + ModelFormat.WanT5Encoder. - WanT5Encoder_WanT5Encoder_Config probes the diffusers folder layout (text_encoder/config.json with model_type=umt5, or flat layout with config.json at root). Refuses full Wan pipelines. - WanT5EncoderLoader handles both layouts and loads UMT5EncoderModel + AutoTokenizer. Component-source plumbing: - WanModelLoaderInvocation now exposes wan_t5_encoder_model and component_source pickers (mirrors QwenImage pattern). Resolution order: standalone > main (if Diffusers) > component_source. Required when the main model is a single-file format in Phase 4. Bug fix in wan_text_encoder: - Tokenizer was loading via AutoTokenizer.from_pretrained() directly, which fails for nested layouts where files live in /tokenizer/. Now routed through the model cache so the registered loaders handle layout differences correctly. Frontend: - New type guards (isWanVAEModelConfig, isWanT5EncoderModelConfig, isWanMainModelConfig, isWanDiffusersMainModelConfig) and hooks/ selectors (useWanVAEModels, useWanT5EncoderModels, useWanDiffusersModels). New zSubModelType / zModelType / zModelFormat enum entries for transformer_2 and wan_t5_encoder. Tests: - 16 new tests covering z_dim detection, VAE checkpoint/diffusers probes, the bidirectional Qwen-vs-Wan filename deferral, and the UMT5 encoder probe (nested + flat + T5 + full-pipeline rejection). - Total Wan test count: 41 passing, 1 heavy-test placeholder skipped. - Full config test suite (63 tests) still passes. Co-Authored-By: Claude Opus 4.7 (1M context) fix(wan): unbreak frontend lint after Wan additions Five issues turned up running `make frontend-lint`: 1. wan_denoise.py used `from __future__ import annotations`, which made the `invoke()` return annotation a string ('LatentsOutput'). The InvocationRegistry's `get_output_annotation()` returns the raw annotation, so OpenAPI generation crashed with `'str' object has no attribute '__name__'`. Removed the future-import and added `Any` to the typing imports. 2. ModelRecordChanges.variant didn't list WanVariantType, so the generated schema's install/update endpoints rejected `t2v_a14b` and `ti2v_5b`. Added it. 3. Regenerated frontend/web/src/services/api/schema.ts from the live backend so it now includes BaseModelType.wan, ModelType.wan_t5_encoder, SubModelType.transformer_2, ModelFormat.wan_t5_encoder, the Wan variants, all Wan invocation types and their conditioning/transformer field types. 4. modelManagerV2/models.ts: added `wan_t5_encoder` to the category map, `wan` to the base color/long-name/short-name maps, the two Wan variants to the variant-name map, and `wan_t5_encoder` to the format-name map. 5. ModelManagerPanel/ModelFormatBadge.tsx: added `wan_t5_encoder` to FORMAT_NAME_MAP and FORMAT_COLOR_MAP. `make frontend-lint` now passes cleanly (tsc, dpdm, eslint, prettier). All 41 Wan Python tests still pass. Co-Authored-By: Claude Opus 4.7 (1M context) chore(wan): drop unused FE exports flagged by knip These were forward-compatibility wiring for Phase 9 (the FE graph builder) that has no consumers yet; knip rightly flagged them. Removed or de-exported. They'll come back when the graph builder lands and needs them. - common.ts: zWanVariantType drops `export` (still used internally by zAnyModelVariant). - types.ts: drop isWanMainModelConfig, isWanDiffusersMainModelConfig, isWanVAEModelConfig (no callers). The remaining isWanT5EncoderModelConfig is used by models.ts. WanT5EncoderModelConfig type drops `export` (still used as the type guard's narrowing target). - modelsByType.ts: drop the six unused useWan*/selectWan* hooks + selectors and their type-guard imports. `make frontend-lint` (tsc + dpdm + eslint + prettier + knip) now green. Co-Authored-By: Claude Opus 4.7 (1M context) docs(wan): use *-Diffusers HF repo names in plan The Wan-AI org publishes two flavours of each release: * Wan-AI/Wan2.2-{TI2V-5B,T2V-A14B,I2V-A14B} ← upstream native * Wan-AI/Wan2.2-{TI2V-5B,T2V-A14B,I2V-A14B}-Diffusers ← convertible The native release has _class_name=WanModel in config.json and ships weights flat at the repo root with no transformer/, vae/, text_encoder/ subdirs. It is not loadable by Diffusers' WanPipeline.from_pretrained. Update plan doc to reference the -Diffusers repos throughout (probe notes, starter-model entries) so the plumbing path matches what the Diffusers loader actually expects. Co-Authored-By: Claude Opus 4.7 (1M context) fix(wan): accept 0 as 'unset' sentinel for guidance_scale_low_noise The frontend renders Optional[float] inputs with default 0 in the numeric input rather than passing null/unset. Combined with ge=1.0, this caused every wan_denoise invocation to fail Pydantic validation with "Input should be greater than or equal to 1" until the user manually entered a value (or knew to leave the field disconnected). The validation error was rejected before invocation logging, so it never showed up in the server log either - making the failure hard to diagnose. Relaxing the constraint to ge=0.0 and treating values below 1.0 as the "fall back to primary Guidance Scale" sentinel. The user's natural FE default (0) now works as expected. Co-Authored-By: Claude Opus 4.7 (1M context) fix(wan): correct preview dimensions and colors for TI2V-5B Two bugs in the Wan branch of the diffusion step callback: 1. Wrong dimensions. The reported preview size hardcoded `* 8` for the spatial downscale ratio, but TI2V-5B's Wan2.2-VAE uses 16x. A 1024x1024 target was being announced to the FE as 512x512. 2. Wrong colors. The previous fallback for 48-channel TI2V-5B latents sliced the first 16 channels and applied the standard 16-channel Wan-VAE projection. Those channel layouts are unrelated, so the projection produced meaningless colors. Adding the proper Wan2.2-VAE 48-channel RGB projection matrix (and bias) from ComfyUI's Wan22 latent format, and selecting the right matrix + spatial-scale by latent channel count: 16 → A14B (Wan VAE, 8x), 48 → TI2V-5B (Wan2.2-VAE, 16x). Co-Authored-By: Claude Opus 4.7 (1M context) fix(wan): honor model's _class_name when building scheduler TI2V-5B's scheduler_config.json declares _class_name=UniPCMultistepScheduler with flow_shift=5.0. The previous code hardcoded FlowMatchEulerDiscreteScheduler.from_pretrained(...), which silently constructed a default-config FlowMatch instead of the UniPC the model expects. The mismatched noise schedule manifests as soft / under-denoised faces and global graininess in the final images. Now: read scheduler_config.json, look up the named class on the diffusers module, and instantiate that class via from_pretrained. UniPC and FlowMatch share the same step()/set_timesteps()/sigmas/num_train_timesteps interfaces, so the denoise loop works transparently for either. A14B continues to use FlowMatchEulerDiscreteScheduler when its scheduler config says so (its reference is FlowMatchEuler with shift=8.0). Falls back to FlowMatchEulerDiscreteScheduler defaults when no on-disk config is available. Co-Authored-By: Claude Opus 4.7 (1M context) fix(wan): match diffusers WanPipeline tokenizer length and latent dtype Two divergences from the Diffusers reference that were hurting image quality (soft / grainy / distorted faces at default settings): 1. Tokenizer max_sequence_length was 226 in wan_text_encoder, but the model was trained with 512-token sequences. The upstream native config.json has text_len: 512, and Diffusers' WanPipeline.__call__ default is 512 (overriding _get_t5_prompt_embeds's stale 226 default). Wan's cross-attention sees padded zeros past the prompt's actual length but expects to be looking at a 512-position context window. 2. Latents were stored in bf16 throughout the denoise loop. Diffusers' WanPipeline.prepare_latents explicitly uses dtype=torch.float32 and only casts to the transformer's dtype right at the forward call: latent_model_input = latents.to(transformer_dtype) Storing in bf16 between steps accumulates ~40 steps of bf16 quantization on the scheduler's small per-step deltas. Now latent_dtype = torch.float32 throughout, with a per-step cast for the transformer forward pass. Co-Authored-By: Claude Opus 4.7 (1M context) chore(wan): add diffusers reference comparison script scripts/wan_diffusers_reference.py runs a Diffusers-format Wan 2.2 checkpoint directly via WanPipeline.from_pretrained, with the same arguments InvokeAI's wan_denoise uses. Use to A/B against InvokeAI output when image quality is questionable. Defaults to enable_model_cpu_offload so the script fits on 16 GB cards where the full pipeline (transformer + UMT5-XXL + VAE) would otherwise OOM. --offload {model,sequential,none} controls the strategy. Co-Authored-By: Claude Opus 4.7 (1M context) --- WAN_2_2_IMPLEMENTATION.md | 12 +- invokeai/app/invocations/wan_denoise.py | 77 +- invokeai/app/invocations/wan_model_loader.py | 89 +- invokeai/app/invocations/wan_text_encoder.py | 41 +- .../model_records/model_records_base.py | 2 + invokeai/app/util/step_callback.py | 84 +- .../backend/model_manager/configs/factory.py | 11 + invokeai/backend/model_manager/configs/vae.py | 128 ++- .../model_manager/configs/wan_t5_encoder.py | 84 ++ .../model_manager/load/model_loaders/vae.py | 31 + .../model_manager/load/model_loaders/wan.py | 63 +- invokeai/backend/model_manager/taxonomy.py | 2 + .../web/src/features/modelManagerV2/models.ts | 13 + .../ModelManagerPanel/ModelFormatBadge.tsx | 2 + .../web/src/features/nodes/types/common.ts | 4 +- .../frontend/web/src/services/api/schema.ts | 868 +++++++++++++++++- .../frontend/web/src/services/api/types.ts | 5 + scripts/wan_diffusers_reference.py | 85 ++ .../configs/test_wan_t5_encoder_config.py | 100 ++ .../configs/test_wan_vae_config.py | 173 ++++ 20 files changed, 1754 insertions(+), 120 deletions(-) create mode 100644 invokeai/backend/model_manager/configs/wan_t5_encoder.py create mode 100644 scripts/wan_diffusers_reference.py create mode 100644 tests/backend/model_manager/configs/test_wan_t5_encoder_config.py create mode 100644 tests/backend/model_manager/configs/test_wan_vae_config.py diff --git a/WAN_2_2_IMPLEMENTATION.md b/WAN_2_2_IMPLEMENTATION.md index 2899865d77f..1ef71daaa85 100644 --- a/WAN_2_2_IMPLEMENTATION.md +++ b/WAN_2_2_IMPLEMENTATION.md @@ -146,7 +146,7 @@ For Phase 1 (single transformer): ### Open questions -- Does `WanPipeline` use `FlowMatchEulerDiscreteScheduler`? Confirm against `Wan-AI/Wan2.2-TI2V-5B/scheduler/scheduler_config.json`. +- Does `WanPipeline` use `FlowMatchEulerDiscreteScheduler`? Confirm against `Wan-AI/Wan2.2-TI2V-5B-Diffusers/scheduler/scheduler_config.json`. - New `WanT5EncoderConfig` rather than reuse of `T5Encoder_T5Encoder_Config`? **Yes** — UMT5-XXL is not bit-compatible with T5-XXL. See Phase 3. - Does `WanTransformer3DModel` accept attention mask through `attention_kwargs`? @@ -496,16 +496,16 @@ Inpaint = image-to-image with denoise mask. `RectifiedFlowInpaintExtension` alre - `invokeai/backend/model_manager/starter_models.py` — append `# region Wan` block: ```python wan_t5_encoder = StarterModel(name="Wan T5 Encoder (UMT5-XXL)", - base=BaseModelType.Any, source="Wan-AI/Wan2.2-T2V-A14B::text_encoder+tokenizer", + base=BaseModelType.Any, source="Wan-AI/Wan2.2-T2V-A14B-Diffusers::text_encoder+tokenizer", type=ModelType.WanT5Encoder, format=ModelFormat.WanT5Encoder, ...) wan_vae = StarterModel(name="Wan VAE", - base=BaseModelType.Wan, source="Wan-AI/Wan2.2-T2V-A14B::vae/diffusion_pytorch_model.safetensors", + base=BaseModelType.Wan, source="Wan-AI/Wan2.2-T2V-A14B-Diffusers::vae/diffusion_pytorch_model.safetensors", type=ModelType.VAE, format=ModelFormat.Checkpoint, ...) wan_vae_2_2 = StarterModel(name="Wan2.2 VAE", - base=BaseModelType.Wan, source="Wan-AI/Wan2.2-TI2V-5B::vae/...", + base=BaseModelType.Wan, source="Wan-AI/Wan2.2-TI2V-5B-Diffusers::vae/...", type=ModelType.VAE, ...) wan_t2v_a14b = StarterModel(name="Wan 2.2 T2V A14B", - base=BaseModelType.Wan, source="Wan-AI/Wan2.2-T2V-A14B", + base=BaseModelType.Wan, source="Wan-AI/Wan2.2-T2V-A14B-Diffusers", type=ModelType.Main, variant=WanVariantType.T2V_A14B, ...) wan_t2v_a14b_high_q4 = StarterModel(name="Wan 2.2 T2V A14B High Noise (Q4_K_M)", base=BaseModelType.Wan, @@ -513,7 +513,7 @@ Inpaint = image-to-image with denoise mask. `RectifiedFlowInpaintExtension` alre ..., dependencies=[wan_t5_encoder, wan_vae]) wan_t2v_a14b_low_q4 = ... wan_ti2v_5b = StarterModel(name="Wan 2.2 TI2V 5B", - base=BaseModelType.Wan, source="Wan-AI/Wan2.2-TI2V-5B", + base=BaseModelType.Wan, source="Wan-AI/Wan2.2-TI2V-5B-Diffusers", variant=WanVariantType.TI2V_5B, ...) ``` - Verify each `source` URL exists before merge. diff --git a/invokeai/app/invocations/wan_denoise.py b/invokeai/app/invocations/wan_denoise.py index 57c69675f3a..787b0674718 100644 --- a/invokeai/app/invocations/wan_denoise.py +++ b/invokeai/app/invocations/wan_denoise.py @@ -22,11 +22,9 @@ )[0] """ -from __future__ import annotations - from contextlib import ExitStack from pathlib import Path -from typing import Callable, Iterator, Optional +from typing import Any, Callable, Iterator, Optional import torch import torchvision.transforms as tv_transforms @@ -183,9 +181,10 @@ class WanDenoiseInvocation(BaseInvocation): ) guidance_scale_low_noise: Optional[float] = InputField( default=None, - ge=1.0, + ge=0.0, description="Optional separate CFG scale for the low-noise expert (Wan 2.2 A14B only). " - "If unset, the primary 'Guidance Scale' is reused. Ignored for TI2V-5B.", + "Values below 1.0 (including 0) fall back to the primary 'Guidance Scale'. " + "Ignored for TI2V-5B.", title="Guidance Scale (Low Noise)", ) width: int = InputField(default=1024, multiple_of=8, description="Width of the generated image.") @@ -240,10 +239,17 @@ def _run_diffusion(self, context: InvocationContext) -> torch.Tensor: sigmas = sigmas[start_idx : end_idx + 1] total_steps = len(timesteps) + # Latents stay in fp32 throughout the denoise loop to avoid accumulating + # bf16 quantization across the scheduler's small per-step deltas. We + # cast to bf16 only when calling the transformer, matching Diffusers' + # WanPipeline (which calls ``prepare_latents(..., dtype=torch.float32)`` + # then ``latent_model_input = latents.to(transformer_dtype)``). + latent_dtype = torch.float32 + # Load init latents (img2img) and convert 4D → 5D. init_latents_5d: torch.Tensor | None = None if self.latents is not None: - loaded = context.tensors.load(self.latents.latents_name).to(device=device, dtype=inference_dtype) + loaded = context.tensors.load(self.latents.latents_name).to(device=device, dtype=latent_dtype) if loaded.ndim == 4: loaded = loaded.unsqueeze(2) init_latents_5d = loaded @@ -264,7 +270,7 @@ def _run_diffusion(self, context: InvocationContext) -> torch.Tensor: width=self.width, spatial_scale_factor=spatial_scale, device=device, - dtype=inference_dtype, + dtype=latent_dtype, seed=self.seed, ) @@ -324,19 +330,22 @@ def _run_diffusion(self, context: InvocationContext) -> torch.Tensor: # low-noise below. Single-transformer models always use HIGH. if low_info is not None and float(t) < float(boundary_timestep): active_label = _ExpertSwapper.LOW - active_cfg = ( - self.guidance_scale_low_noise - if self.guidance_scale_low_noise is not None - else self.guidance_scale - ) + # Treat None or values below 1.0 (incl. the FE's default 0) + # as "use the primary guidance_scale". + low_cfg = self.guidance_scale_low_noise + active_cfg = low_cfg if (low_cfg is not None and low_cfg >= 1.0) else self.guidance_scale else: active_label = _ExpertSwapper.HIGH active_cfg = self.guidance_scale transformer = swapper.get(active_label) + # Cast latents to the transformer's dtype only for the forward + # pass; keep the scheduler-level latents in fp32. + latent_model_input = latents.to(dtype=inference_dtype) + noise_pred_cond = transformer( - hidden_states=latents, + hidden_states=latent_model_input, timestep=timestep, encoder_hidden_states=pos_cond.prompt_embeds.unsqueeze(0), attention_kwargs=None, @@ -345,7 +354,7 @@ def _run_diffusion(self, context: InvocationContext) -> torch.Tensor: if do_cfg and neg_cond is not None: noise_pred_uncond = transformer( - hidden_states=latents, + hidden_states=latent_model_input, timestep=timestep, encoder_hidden_states=neg_cond.prompt_embeds.unsqueeze(0), attention_kwargs=None, @@ -379,19 +388,41 @@ def _run_diffusion(self, context: InvocationContext) -> torch.Tensor: return latents.squeeze(2) def _build_scheduler(self, context: InvocationContext, device: torch.device): - """Construct ``FlowMatchEulerDiscreteScheduler`` for this run. - - Loads the model's on-disk scheduler config when available so per-model - ``shift`` settings are honoured; falls back to defaults otherwise. + """Construct the scheduler matching the model's on-disk ``scheduler_config.json``. + + Wan model variants ship different schedulers — e.g. TI2V-5B uses + ``UniPCMultistepScheduler`` with ``flow_shift=5.0``, while the + standard A14B reference uses ``FlowMatchEulerDiscreteScheduler``. + We dispatch on ``_class_name`` so the noise schedule matches what the + model was trained against. Falls back to ``FlowMatchEulerDiscreteScheduler`` + defaults when no on-disk config is available. """ + import json + + import diffusers from diffusers import FlowMatchEulerDiscreteScheduler scheduler_dir = _scheduler_path_for_transformer(context, self.transformer) - if scheduler_dir is not None: - return FlowMatchEulerDiscreteScheduler.from_pretrained( - str(scheduler_dir), local_files_only=True - ) - return FlowMatchEulerDiscreteScheduler() + if scheduler_dir is None: + return FlowMatchEulerDiscreteScheduler() + + # Read the on-disk class name and instantiate that class. Diffusers' + # SchedulerMixin.from_pretrained does class dispatch internally, but + # only when called from the abstract base; calling a concrete subclass + # silently builds the wrong type. Resolve it explicitly. + config_path = scheduler_dir / "scheduler_config.json" + try: + with config_path.open("r", encoding="utf-8") as f: + cfg = json.load(f) + class_name = cfg.get("_class_name") + scheduler_cls = getattr(diffusers, class_name, None) if class_name else None + except (OSError, json.JSONDecodeError): + scheduler_cls = None + + if scheduler_cls is None: + scheduler_cls = FlowMatchEulerDiscreteScheduler + + return scheduler_cls.from_pretrained(str(scheduler_dir), local_files_only=True) def _load_conditioning( self, diff --git a/invokeai/app/invocations/wan_model_loader.py b/invokeai/app/invocations/wan_model_loader.py index 48d049ceb85..38a1d60705a 100644 --- a/invokeai/app/invocations/wan_model_loader.py +++ b/invokeai/app/invocations/wan_model_loader.py @@ -44,16 +44,19 @@ class WanModelLoaderOutput(BaseInvocationOutput): class WanModelLoaderInvocation(BaseInvocation): """Loads a Wan 2.2 model, outputting its submodels. - Diffusers-format only for now; the transformer(s), VAE, and UMT5-XXL encoder - are pulled from the main model's submodel folders. - - For Wan 2.2 A14B (dual-expert MoE) the loader emits both ``transformer`` (the - high-noise expert at ``transformer/``) and ``transformer_low_noise`` (the - low-noise expert at ``transformer_2/``), along with the model's recorded - ``boundary_ratio`` for the denoise loop's expert swap. - - The standalone VAE picker is forward-compatibility wiring for Phase 3 (where - it becomes required for GGUF transformers). + Components can be mixed and matched, mirroring the Qwen Image loader pattern: + + - Transformer(s) always come from the main model. For A14B that's both + ``transformer/`` (high-noise) and ``transformer_2/`` (low-noise); for + TI2V-5B it's the single ``transformer/``. + - VAE: standalone Wan VAE > main (if Diffusers) > Component Source (Diffusers). + - UMT5-XXL encoder: standalone Wan T5 encoder > main (if Diffusers) > + Component Source (Diffusers). + + The Component Source slot lets users supply a Diffusers Wan main model purely + for VAE / encoder extraction when the actual transformer is in a single-file + format (GGUF in Phase 4). Together, the standalone VAE + standalone encoder + let a GGUF transformer run without a full ~30 GB Diffusers install. """ model: ModelIdentifierField = InputField( @@ -66,14 +69,35 @@ class WanModelLoaderInvocation(BaseInvocation): vae_model: Optional[ModelIdentifierField] = InputField( default=None, - description="Standalone Wan VAE model. If not set, the VAE is loaded from the main " - "model (when in Diffusers format).", + description="Standalone Wan VAE model. If not set, the VAE is loaded from the main model " + "(when in Diffusers format) or from the Component Source.", input=Input.Direct, ui_model_base=BaseModelType.Wan, ui_model_type=ModelType.VAE, title="VAE", ) + wan_t5_encoder_model: Optional[ModelIdentifierField] = InputField( + default=None, + description="Standalone Wan UMT5-XXL encoder. If not set, the encoder is loaded from the main " + "model (when in Diffusers format) or from the Component Source.", + input=Input.Direct, + ui_model_type=ModelType.WanT5Encoder, + title="Wan T5 Encoder", + ) + + component_source: Optional[ModelIdentifierField] = InputField( + default=None, + description="Diffusers Wan main model to extract VAE and/or encoder from. " + "Use this if you don't have separate VAE/encoder models. " + "Ignored for any submodel that is provided separately.", + input=Input.Direct, + ui_model_base=BaseModelType.Wan, + ui_model_type=ModelType.Main, + ui_model_format=ModelFormat.Diffusers, + title="Component Source (Diffusers)", + ) + def invoke(self, context: InvocationContext) -> WanModelLoaderOutput: main_config = context.models.get_config(self.model) main_is_diffusers = main_config.format == ModelFormat.Diffusers @@ -92,27 +116,37 @@ def invoke(self, context: InvocationContext) -> WanModelLoaderOutput: if recorded is not None: boundary_ratio = float(recorded) - # VAE: standalone override > main (if Diffusers). + # VAE: standalone override > main (if Diffusers) > component source. if self.vae_model is not None: vae = self.vae_model.model_copy(update={"submodel_type": SubModelType.VAE}) elif main_is_diffusers: vae = self.model.model_copy(update={"submodel_type": SubModelType.VAE}) + elif self.component_source is not None: + self._validate_component_source_format(context, self.component_source) + vae = self.component_source.model_copy(update={"submodel_type": SubModelType.VAE}) else: raise ValueError( - "No source for VAE. Either set 'VAE' to a standalone Wan VAE model, " - "or use a Diffusers Wan main model." + "No source for VAE. Either set 'VAE' to a standalone Wan VAE, " + "or set 'Component Source' to a Diffusers Wan main model." ) - # Tokenizer + text encoder: only from the main model in Phase 1. - # Phase 3 will add a standalone WanT5Encoder picker so GGUF mains can run - # without a Diffusers Wan checkpoint installed. - if not main_is_diffusers: + # Tokenizer + text encoder: standalone override > main (if Diffusers) > component source. + if self.wan_t5_encoder_model is not None: + tokenizer = self.wan_t5_encoder_model.model_copy(update={"submodel_type": SubModelType.Tokenizer}) + text_encoder = self.wan_t5_encoder_model.model_copy(update={"submodel_type": SubModelType.TextEncoder}) + elif main_is_diffusers: + tokenizer = self.model.model_copy(update={"submodel_type": SubModelType.Tokenizer}) + text_encoder = self.model.model_copy(update={"submodel_type": SubModelType.TextEncoder}) + elif self.component_source is not None: + self._validate_component_source_format(context, self.component_source) + tokenizer = self.component_source.model_copy(update={"submodel_type": SubModelType.Tokenizer}) + text_encoder = self.component_source.model_copy(update={"submodel_type": SubModelType.TextEncoder}) + else: raise ValueError( - "Only Diffusers-format Wan models are supported in this build. " - "Standalone Wan T5 encoders will be supported in a future release." + "No source for Wan T5 encoder. " + "Either set 'Wan T5 Encoder' to a standalone UMT5-XXL encoder, " + "or set 'Component Source' to a Diffusers Wan main model." ) - tokenizer = self.model.model_copy(update={"submodel_type": SubModelType.Tokenizer}) - text_encoder = self.model.model_copy(update={"submodel_type": SubModelType.TextEncoder}) return WanModelLoaderOutput( transformer=WanTransformerField( @@ -123,3 +157,12 @@ def invoke(self, context: InvocationContext) -> WanModelLoaderOutput: wan_t5_encoder=WanT5EncoderField(tokenizer=tokenizer, text_encoder=text_encoder), vae=VAEField(vae=vae), ) + + @staticmethod + def _validate_component_source_format(context: InvocationContext, model: ModelIdentifierField) -> None: + source_config = context.models.get_config(model) + if source_config.format != ModelFormat.Diffusers: + raise ValueError( + f"The Component Source model must be in Diffusers format. " + f"The selected model '{source_config.name}' is in {source_config.format.value} format." + ) diff --git a/invokeai/app/invocations/wan_text_encoder.py b/invokeai/app/invocations/wan_text_encoder.py index 183195f5a21..9cbb15476e0 100644 --- a/invokeai/app/invocations/wan_text_encoder.py +++ b/invokeai/app/invocations/wan_text_encoder.py @@ -16,8 +16,12 @@ WanConditioningInfo, ) -# Matches the diffusers WanPipeline default — Wan was trained at this prompt length. -WAN_T5_MAX_SEQ_LEN = 226 +# Wan models are trained with 512-token text sequences (matches the +# upstream config.json's ``text_len: 512`` and the WanPipeline.__call__ +# default). Diffusers' ``_get_t5_prompt_embeds`` has a stale 226 default +# that gets overridden by ``__call__``; using 512 here matches the actual +# pipeline behaviour. +WAN_T5_MAX_SEQ_LEN = 512 @invocation( @@ -61,27 +65,26 @@ def invoke(self, context: InvocationContext) -> WanConditioningOutput: def _encode(self, context: InvocationContext) -> tuple[torch.Tensor, torch.Tensor | None]: from diffusers.pipelines.wan.pipeline_wan import prompt_clean - from transformers import AutoTokenizer, UMT5EncoderModel + from transformers import UMT5EncoderModel cleaned = prompt_clean(self.prompt) - # The tokenizer is small enough to load directly from disk without going - # through the model cache. - tokenizer_config = context.models.get_config(self.wan_t5_encoder.tokenizer) - tokenizer_path = context.models.get_absolute_path(tokenizer_config) - tokenizer = AutoTokenizer.from_pretrained(str(tokenizer_path), local_files_only=True) - - text_inputs = tokenizer( - [cleaned], - padding="max_length", - max_length=WAN_T5_MAX_SEQ_LEN, - truncation=True, - add_special_tokens=True, - return_attention_mask=True, - return_tensors="pt", - ) + # Tokenizer + text encoder both routed through the model cache so the + # registered loaders handle the nested-vs-flat directory layout for us + # (main-model layout: /tokenizer/ + /text_encoder/; + # standalone WanT5Encoder layout may also be flat). + tokenizer_info = context.models.load(self.wan_t5_encoder.tokenizer) + with tokenizer_info.model_on_device() as (_, tokenizer): + text_inputs = tokenizer( + [cleaned], + padding="max_length", + max_length=WAN_T5_MAX_SEQ_LEN, + truncation=True, + add_special_tokens=True, + return_attention_mask=True, + return_tensors="pt", + ) - # Load the text encoder via the model cache. text_encoder_info = context.models.load(self.wan_t5_encoder.text_encoder) with text_encoder_info.model_on_device() as (_, text_encoder): assert isinstance(text_encoder, UMT5EncoderModel) diff --git a/invokeai/app/services/model_records/model_records_base.py b/invokeai/app/services/model_records/model_records_base.py index e06f8f2df91..3fedd8fb07c 100644 --- a/invokeai/app/services/model_records/model_records_base.py +++ b/invokeai/app/services/model_records/model_records_base.py @@ -33,6 +33,7 @@ Qwen3VariantType, QwenImageVariantType, SchedulerPredictionType, + WanVariantType, ZImageVariantType, ) @@ -134,6 +135,7 @@ def validate_source_url(cls, v: Any) -> Optional[str]: | Flux2VariantType | ZImageVariantType | QwenImageVariantType + | WanVariantType | Qwen3VariantType ] = Field(description="The variant of the model.", default=None) prediction_type: Optional[SchedulerPredictionType] = Field( diff --git a/invokeai/app/util/step_callback.py b/invokeai/app/util/step_callback.py index cb6e7cdf1ce..9364ec9b8ce 100644 --- a/invokeai/app/util/step_callback.py +++ b/invokeai/app/util/step_callback.py @@ -179,10 +179,8 @@ ANIMA_LATENT_RGB_BIAS = [-0.1835, -0.0868, -0.3360] -# Wan 2.2 A14B uses the standard 16-channel Wan VAE (same as Anima / Qwen Image). -# Wan 2.2 TI2V-5B uses Wan2.2-VAE with 48 latent channels — for now we slice the -# first 16 channels for the preview. TODO: generate dedicated 48-channel factors via -# scripts/generate_vae_linear_approximation.py once we have a TI2V-5B model on hand. +# Wan 2.2 A14B uses the standard 16-channel Wan VAE. +# Factors come from ComfyUI's Wan21 latent_format (same VAE as A14B). WAN_LATENT_RGB_FACTORS = [ [-0.1299, -0.1692, 0.2932], [0.0671, 0.0406, 0.0442], @@ -204,6 +202,61 @@ WAN_LATENT_RGB_BIAS = [-0.1835, -0.0868, -0.3360] +# Wan 2.2 TI2V-5B uses Wan2.2-VAE with 48 latent channels and 16x spatial downscale. +# Factors come from ComfyUI's Wan22 latent_format. +WAN22_LATENT_RGB_FACTORS = [ + [0.0119, 0.0103, 0.0046], + [-0.1062, -0.0504, 0.0165], + [0.0140, 0.0409, 0.0491], + [-0.0813, -0.0677, 0.0607], + [0.0656, 0.0851, 0.0808], + [0.0264, 0.0463, 0.0912], + [0.0295, 0.0326, 0.0590], + [-0.0244, -0.0270, 0.0025], + [0.0443, -0.0102, 0.0288], + [-0.0465, -0.0090, -0.0205], + [0.0359, 0.0236, 0.0082], + [-0.0776, 0.0854, 0.1048], + [0.0564, 0.0264, 0.0561], + [0.0006, 0.0594, 0.0418], + [-0.0319, -0.0542, -0.0637], + [-0.0268, 0.0024, 0.0260], + [0.0539, 0.0265, 0.0358], + [-0.0359, -0.0312, -0.0287], + [-0.0285, -0.1032, -0.1237], + [0.1041, 0.0537, 0.0622], + [-0.0086, -0.0374, -0.0051], + [0.0390, 0.0670, 0.2863], + [0.0069, 0.0144, 0.0082], + [0.0006, -0.0167, 0.0079], + [0.0313, -0.0574, -0.0232], + [-0.1454, -0.0902, -0.0481], + [0.0714, 0.0827, 0.0447], + [-0.0304, -0.0574, -0.0196], + [0.0401, 0.0384, 0.0204], + [-0.0758, -0.0297, -0.0014], + [0.0568, 0.1307, 0.1372], + [-0.0055, -0.0310, -0.0380], + [0.0239, -0.0305, 0.0325], + [-0.0663, -0.0673, -0.0140], + [-0.0416, -0.0047, -0.0023], + [0.0166, 0.0112, -0.0093], + [-0.0211, 0.0011, 0.0331], + [0.1833, 0.1466, 0.2250], + [-0.0368, 0.0370, 0.0295], + [-0.3441, -0.3543, -0.2008], + [-0.0479, -0.0489, -0.0420], + [-0.0660, -0.0153, 0.0800], + [-0.0101, 0.0068, 0.0156], + [-0.0690, -0.0452, -0.0927], + [-0.0145, 0.0041, 0.0015], + [0.0421, 0.0451, 0.0373], + [0.0504, -0.0483, -0.0356], + [-0.0837, 0.0168, 0.0055], +] + +WAN22_LATENT_RGB_BIAS = [0.0317, -0.0878, -0.1388] + def sample_to_lowres_estimated_image( samples: torch.Tensor, @@ -296,12 +349,14 @@ def diffusion_step_callback( latent_rgb_factors = ANIMA_LATENT_RGB_FACTORS latent_rgb_bias = ANIMA_LATENT_RGB_BIAS elif base_model == BaseModelType.Wan: - latent_rgb_factors = WAN_LATENT_RGB_FACTORS - latent_rgb_bias = WAN_LATENT_RGB_BIAS - # TI2V-5B latents have 48 channels; slice the first 16 as a degraded preview - # until proper 48-channel factors are generated. - if sample.shape[-3] > 16: - sample = sample[..., :16, :, :] + # A14B (16-ch standard Wan VAE, 8x spatial) vs TI2V-5B (48-ch Wan2.2-VAE, + # 16x spatial). The latent channel count uniquely identifies the variant. + if sample.shape[-3] == 48: + latent_rgb_factors = WAN22_LATENT_RGB_FACTORS + latent_rgb_bias = WAN22_LATENT_RGB_BIAS + else: + latent_rgb_factors = WAN_LATENT_RGB_FACTORS + latent_rgb_bias = WAN_LATENT_RGB_BIAS else: raise ValueError(f"Unsupported base model: {base_model}") @@ -319,8 +374,13 @@ def diffusion_step_callback( latent_rgb_bias=latent_rgb_bias_torch, ) - width = image.width * 8 - height = image.height * 8 + # Spatial downscale ratio: 8x is the SD/SDXL/FLUX/Wan-A14B default; + # Wan TI2V-5B's Wan2.2-VAE uses 16x. + spatial_scale = 8 + if base_model == BaseModelType.Wan and sample.shape[-3] == 48: + spatial_scale = 16 + width = image.width * spatial_scale + height = image.height * spatial_scale percentage = calc_percentage(intermediate_state) signal_progress("Denoising", percentage, image, (width, height)) diff --git a/invokeai/backend/model_manager/configs/factory.py b/invokeai/backend/model_manager/configs/factory.py index cae7c421f4d..8b0c35f98e8 100644 --- a/invokeai/backend/model_manager/configs/factory.py +++ b/invokeai/backend/model_manager/configs/factory.py @@ -103,6 +103,7 @@ ) from invokeai.backend.model_manager.configs.t5_encoder import T5Encoder_BnBLLMint8_Config, T5Encoder_T5Encoder_Config from invokeai.backend.model_manager.configs.text_llm import TextLLM_Diffusers_Config +from invokeai.backend.model_manager.configs.wan_t5_encoder import WanT5Encoder_WanT5Encoder_Config from invokeai.backend.model_manager.configs.textual_inversion import ( TI_File_SD1_Config, TI_File_SD2_Config, @@ -120,9 +121,11 @@ VAE_Checkpoint_SD1_Config, VAE_Checkpoint_SD2_Config, VAE_Checkpoint_SDXL_Config, + VAE_Checkpoint_Wan_Config, VAE_Diffusers_Flux2_Config, VAE_Diffusers_SD1_Config, VAE_Diffusers_SDXL_Config, + VAE_Diffusers_Wan_Config, ) from invokeai.backend.model_manager.model_on_disk import ModelOnDisk from invokeai.backend.model_manager.taxonomy import ( @@ -201,12 +204,18 @@ Annotated[VAE_Checkpoint_SDXL_Config, VAE_Checkpoint_SDXL_Config.get_tag()], Annotated[VAE_Checkpoint_FLUX_Config, VAE_Checkpoint_FLUX_Config.get_tag()], Annotated[VAE_Checkpoint_Flux2_Config, VAE_Checkpoint_Flux2_Config.get_tag()], + # IMPORTANT: VAE_Checkpoint_Wan_Config must be checked BEFORE QwenImage — + # both share the AutoencoderKLWan architecture and the Wan config relies + # on a filename heuristic to claim 16-channel files; ordering here lets + # Wan win when the filename suggests it. + Annotated[VAE_Checkpoint_Wan_Config, VAE_Checkpoint_Wan_Config.get_tag()], Annotated[VAE_Checkpoint_QwenImage_Config, VAE_Checkpoint_QwenImage_Config.get_tag()], Annotated[VAE_Checkpoint_Anima_Config, VAE_Checkpoint_Anima_Config.get_tag()], # VAE - diffusers format Annotated[VAE_Diffusers_SD1_Config, VAE_Diffusers_SD1_Config.get_tag()], Annotated[VAE_Diffusers_SDXL_Config, VAE_Diffusers_SDXL_Config.get_tag()], Annotated[VAE_Diffusers_Flux2_Config, VAE_Diffusers_Flux2_Config.get_tag()], + Annotated[VAE_Diffusers_Wan_Config, VAE_Diffusers_Wan_Config.get_tag()], # ControlNet - checkpoint format Annotated[ControlNet_Checkpoint_SD1_Config, ControlNet_Checkpoint_SD1_Config.get_tag()], Annotated[ControlNet_Checkpoint_SD2_Config, ControlNet_Checkpoint_SD2_Config.get_tag()], @@ -253,6 +262,8 @@ # Qwen VL Encoder (Qwen2.5-VL multimodal encoder for Qwen Image) Annotated[QwenVLEncoder_Diffusers_Config, QwenVLEncoder_Diffusers_Config.get_tag()], Annotated[QwenVLEncoder_Checkpoint_Config, QwenVLEncoder_Checkpoint_Config.get_tag()], + # Wan T5 Encoder (UMT5-XXL for Wan 2.2) + Annotated[WanT5Encoder_WanT5Encoder_Config, WanT5Encoder_WanT5Encoder_Config.get_tag()], # TI - file format Annotated[TI_File_SD1_Config, TI_File_SD1_Config.get_tag()], Annotated[TI_File_SD2_Config, TI_File_SD2_Config.get_tag()], diff --git a/invokeai/backend/model_manager/configs/vae.py b/invokeai/backend/model_manager/configs/vae.py index 5a88cf12781..49f272147d6 100644 --- a/invokeai/backend/model_manager/configs/vae.py +++ b/invokeai/backend/model_manager/configs/vae.py @@ -40,6 +40,11 @@ def _is_qwen_image_vae(state_dict: dict[str | int, Any]) -> bool: 1. Diffusers-format encoder/decoder keys (`encoder.conv_in`, `decoder.conv_in`) 2. 5-dimensional convolution weights (3D causal convolutions vs. standard 2D conv in SD/SDXL/FLUX VAEs) 3. 16-dimensional latent space (z_dim=16) + + Note: Wan 2.2 A14B reuses the same architecture (AutoencoderKLWan with z_dim=16), + so this function returns True for both. Disambiguation between the two for + standalone files relies on the filename heuristic in :func:`_is_wan_vae` and + config registration order. """ decoder_conv_in_key = "decoder.conv_in.weight" if decoder_conv_in_key not in state_dict: @@ -52,6 +57,34 @@ def _is_qwen_image_vae(state_dict: dict[str | int, Any]) -> bool: return shape[1] == 16 +def _wan_vae_z_dim(state_dict: dict[str | int, Any]) -> int | None: + """Return ``z_dim`` for a Wan-family VAE state dict, or ``None`` if it isn't one. + + Wan-family VAEs (AutoencoderKLWan) have 5D convolution weights and a + decoder.conv_in input channel count of 16 (Wan 2.1 / A14B / Qwen Image) or + 48 (Wan 2.2 TI2V-5B's Wan2.2-VAE). + """ + decoder_conv_in_key = "decoder.conv_in.weight" + if decoder_conv_in_key not in state_dict: + return None + weight = state_dict[decoder_conv_in_key] + shape = getattr(weight, "shape", None) + if shape is None or len(shape) != 5: + return None + z = int(shape[1]) + return z if z in (16, 48) else None + + +def _filename_suggests_wan(mod: ModelOnDisk) -> bool: + """Filename heuristic to distinguish standalone Wan VAE files from Qwen Image VAEs. + + Both use the same ``AutoencoderKLWan`` architecture for 16-channel files, so the + state dict alone can't tell them apart. Filenames in the wild (community ports, + ComfyUI repacks) typically include ``wan`` for Wan releases. + """ + return "wan" in mod.path.name.lower() + + def _is_flux2_vae(state_dict: dict[str | int, Any]) -> bool: """Check if state dict is a FLUX.2 VAE (AutoencoderKLFlux2). @@ -113,9 +146,10 @@ def _validate_looks_like_vae(cls, mod: ModelOnDisk) -> None: if _is_flux2_vae(state_dict): raise NotAMatchError("model is a FLUX.2 VAE, not a standard VAE") - # Exclude Qwen Image VAEs - they have their own config class - if _is_qwen_image_vae(state_dict): - raise NotAMatchError("model is a Qwen Image VAE, not a standard VAE") + # Exclude Qwen Image / Wan VAEs - they share the AutoencoderKLWan + # architecture and each has its own config class. + if _is_qwen_image_vae(state_dict) or _wan_vae_z_dim(state_dict) is not None: + raise NotAMatchError("model is a Wan-family VAE, not a standard VAE") @classmethod def _get_base_or_raise(cls, mod: ModelOnDisk) -> BaseModelType: @@ -215,9 +249,97 @@ def from_model_on_disk(cls, mod: ModelOnDisk, override_fields: dict[str, Any]) - if not _is_qwen_image_vae(state_dict): raise NotAMatchError("state dict does not look like a Qwen Image VAE") + # Defer to VAE_Checkpoint_Wan_Config for files whose names indicate Wan + # (both architectures are 16-channel AutoencoderKLWan and otherwise + # indistinguishable from the state dict alone). + if _filename_suggests_wan(mod): + raise NotAMatchError("filename suggests a Wan VAE, not Qwen Image") + return cls(**override_fields) +class VAE_Checkpoint_Wan_Config(Checkpoint_Config_Base, Config_Base): + """Model config for Wan 2.2 VAE checkpoint models (AutoencoderKLWan). + + Distinguishes A14B (z_dim=16, standard Wan VAE) from TI2V-5B (z_dim=48, + Wan2.2-VAE) via the input channel count of ``decoder.conv_in.weight``. + """ + + type: Literal[ModelType.VAE] = Field(default=ModelType.VAE) + format: Literal[ModelFormat.Checkpoint] = Field(default=ModelFormat.Checkpoint) + base: Literal[BaseModelType.Wan] = Field(default=BaseModelType.Wan) + latent_channels: Literal[16, 48] = Field( + description="VAE latent channel count: 16 for A14B (standard Wan VAE) or 48 for TI2V-5B (Wan2.2-VAE)." + ) + + @classmethod + def from_model_on_disk(cls, mod: ModelOnDisk, override_fields: dict[str, Any]) -> Self: + raise_if_not_file(mod) + + raise_for_override_fields(cls, override_fields) + + state_dict = mod.load_state_dict() + z_dim = _wan_vae_z_dim(state_dict) + if z_dim is None: + raise NotAMatchError("state dict does not look like a Wan VAE") + + # 48-channel files are unambiguously Wan2.2-VAE (TI2V-5B). 16-channel + # files are architecturally identical to Qwen Image's VAE; require the + # filename to suggest Wan to claim them, otherwise let the QwenImage + # config win. + latent_channels: int = z_dim + if latent_channels == 16 and not _filename_suggests_wan(mod): + raise NotAMatchError( + "16-channel AutoencoderKLWan VAE without 'wan' in filename — " + "deferring to Qwen Image VAE config." + ) + + explicit = override_fields.pop("latent_channels", None) + if explicit is not None: + latent_channels = int(explicit) + + return cls(**override_fields, latent_channels=latent_channels) + + +class VAE_Diffusers_Wan_Config(Diffusers_Config_Base, Config_Base): + """Model config for Wan 2.2 VAE in diffusers folder layout (AutoencoderKLWan).""" + + type: Literal[ModelType.VAE] = Field(default=ModelType.VAE) + format: Literal[ModelFormat.Diffusers] = Field(default=ModelFormat.Diffusers) + base: Literal[BaseModelType.Wan] = Field(default=BaseModelType.Wan) + latent_channels: Literal[16, 48] = Field( + default=16, + description="VAE latent channel count: 16 for A14B or 48 for TI2V-5B's Wan2.2-VAE.", + ) + + @classmethod + def from_model_on_disk(cls, mod: ModelOnDisk, override_fields: dict[str, Any]) -> Self: + raise_if_not_dir(mod) + + raise_for_override_fields(cls, override_fields) + + raise_for_class_name( + common_config_paths(mod.path), + {"AutoencoderKLWan"}, + ) + + # Read z_dim from the diffusers config to set latent_channels. + latent_channels: int = 16 + try: + config = get_config_dict_or_raise(common_config_paths(mod.path)) + z = config.get("z_dim") + if z is not None and int(z) in (16, 48): + latent_channels = int(z) + except NotAMatchError: + pass + + explicit = override_fields.pop("latent_channels", None) + if explicit is not None: + latent_channels = int(explicit) + + return cls(**override_fields, latent_channels=latent_channels) + + def _has_anima_vae_keys(state_dict: dict[str | int, Any]) -> bool: """Check if state dict looks like an Anima QwenImage VAE (AutoencoderKLQwenImage). diff --git a/invokeai/backend/model_manager/configs/wan_t5_encoder.py b/invokeai/backend/model_manager/configs/wan_t5_encoder.py new file mode 100644 index 00000000000..efda6a551a2 --- /dev/null +++ b/invokeai/backend/model_manager/configs/wan_t5_encoder.py @@ -0,0 +1,84 @@ +"""Configurations for the UMT5-XXL text encoder used by Wan 2.2. + +Wan ships a UMT5-XXL encoder (not the more common T5-XXL). The two are not +weight-compatible — UMT5 has a different vocabulary and ``model_type``. We +register a dedicated config + ModelType so users can't accidentally wire a +FLUX/SD3-style T5-XXL into a Wan slot. + +For Phase 3 we accept the diffusers-folder layout only. Single-file UMT5 +checkpoints are uncommon; if they show up later, a checkpoint config can be +added alongside this one. +""" + +from __future__ import annotations + +import json +from pathlib import Path +from typing import Any, Literal, Self + +from pydantic import Field + +from invokeai.backend.model_manager.configs.base import Config_Base +from invokeai.backend.model_manager.configs.identification_utils import ( + NotAMatchError, + raise_for_override_fields, + raise_if_not_dir, +) +from invokeai.backend.model_manager.model_on_disk import ModelOnDisk +from invokeai.backend.model_manager.taxonomy import BaseModelType, ModelFormat, ModelType + + +def _read_text_encoder_model_type(mod: ModelOnDisk) -> str | None: + """Return ``model_type`` from the encoder's ``config.json``. + + Diffusers encoder folders may live at the root (``config.json``) or under a + ``text_encoder/`` subdirectory. UMT5-XXL sets ``model_type`` to ``"umt5"``; + a regular T5-XXL would be ``"t5"``. + """ + candidates: list[Path] = [mod.path / "text_encoder" / "config.json", mod.path / "config.json"] + for path in candidates: + if path.exists(): + try: + with path.open("r", encoding="utf-8") as f: + config = json.load(f) + except (json.JSONDecodeError, OSError): + continue + mt = config.get("model_type") + if isinstance(mt, str): + return mt.lower() + return None + + +class WanT5Encoder_WanT5Encoder_Config(Config_Base): + """UMT5-XXL encoder in diffusers folder layout. + + Accepts either: + - A directory containing ``text_encoder/`` (and typically ``tokenizer/``) ─ the + shape produced by ``Wan-AI/Wan2.2-T2V-A14B::text_encoder+tokenizer``. + - A bare ``text_encoder/`` directory whose own ``config.json`` declares + ``model_type: umt5``. + """ + + base: Literal[BaseModelType.Any] = Field(default=BaseModelType.Any) + type: Literal[ModelType.WanT5Encoder] = Field(default=ModelType.WanT5Encoder) + format: Literal[ModelFormat.WanT5Encoder] = Field(default=ModelFormat.WanT5Encoder) + + @classmethod + def from_model_on_disk(cls, mod: ModelOnDisk, override_fields: dict[str, Any]) -> Self: + raise_if_not_dir(mod) + raise_for_override_fields(cls, override_fields) + + # Refuse to claim full Wan pipelines — they should match Main_Diffusers_Wan_Config. + if (mod.path / "model_index.json").exists() or (mod.path / "transformer").exists(): + raise NotAMatchError( + "directory looks like a full Wan pipeline (model_index.json or transformer/), " + "not a standalone Wan T5 encoder" + ) + + model_type = _read_text_encoder_model_type(mod) + if model_type is None: + raise NotAMatchError("no encoder config.json found at root or text_encoder/") + if model_type != "umt5": + raise NotAMatchError(f"encoder model_type is {model_type!r}, not 'umt5'") + + return cls(**override_fields) diff --git a/invokeai/backend/model_manager/load/model_loaders/vae.py b/invokeai/backend/model_manager/load/model_loaders/vae.py index 720821f3af8..75443cd2897 100644 --- a/invokeai/backend/model_manager/load/model_loaders/vae.py +++ b/invokeai/backend/model_manager/load/model_loaders/vae.py @@ -10,6 +10,7 @@ VAE_Checkpoint_Anima_Config, VAE_Checkpoint_Config_Base, VAE_Checkpoint_QwenImage_Config, + VAE_Checkpoint_Wan_Config, ) from invokeai.backend.model_manager.load.model_loader_registry import ModelLoaderRegistry from invokeai.backend.model_manager.load.model_loaders.generic_diffusers import GenericDiffusersLoader @@ -39,6 +40,8 @@ def _load_model( config.path, torch_dtype=self._torch_dtype, ) + elif isinstance(config, VAE_Checkpoint_Wan_Config): + return self._load_wan_vae(config) elif isinstance(config, VAE_Checkpoint_QwenImage_Config): return self._load_qwen_image_vae(config) elif isinstance(config, VAE_Checkpoint_Config_Base): @@ -49,6 +52,34 @@ def _load_model( else: return super()._load_model(config, submodel_type) + def _load_wan_vae(self, config: VAE_Checkpoint_Wan_Config) -> AnyModel: + """Load a Wan 2.2 VAE from a single safetensors file. + + Builds ``AutoencoderKLWan`` with ``z_dim`` taken from the config so + TI2V-5B's 48-channel Wan2.2-VAE constructs correctly. Mirrors the + QwenImage VAE single-file path: init empty, then ``load_state_dict``. + """ + import accelerate + from diffusers.models.autoencoders.autoencoder_kl_wan import AutoencoderKLWan + from safetensors.torch import load_file + + sd = load_file(config.path) + + if self._torch_dtype is not None: + for k in list(sd.keys()): + if sd[k].is_floating_point(): + sd[k] = sd[k].to(self._torch_dtype) + + new_sd_size = sum(t.nelement() * t.element_size() for t in sd.values()) + self._ram_cache.make_room(new_sd_size) + + with accelerate.init_empty_weights(): + model = AutoencoderKLWan(z_dim=config.latent_channels) + + model.load_state_dict(sd, strict=True, assign=True) + model.eval() + return model + def _load_qwen_image_vae(self, config: VAE_Checkpoint_QwenImage_Config) -> AnyModel: """Load a Qwen Image VAE from a single safetensors file. diff --git a/invokeai/backend/model_manager/load/model_loaders/wan.py b/invokeai/backend/model_manager/load/model_loaders/wan.py index 36bb198523b..42d8cb3e184 100644 --- a/invokeai/backend/model_manager/load/model_loaders/wan.py +++ b/invokeai/backend/model_manager/load/model_loaders/wan.py @@ -1,11 +1,11 @@ """Loader registrations for Wan 2.2 image-generation models. -Phase 1 scope: -- Diffusers-format Wan 2.2 (TI2V-5B fully; A14B Transformer-only). -- Submodels handled: Transformer, VAE, TextEncoder, Tokenizer, Scheduler. - -Phase 2 will add ``Transformer2`` to support A14B's dual-expert MoE. -Phase 4 will add a GGUFQuantized loader for community single-file transformers. +Currently covers: +- Main: Diffusers format (T2V-A14B with dual experts via Transformer + + Transformer2 submodels, plus TI2V-5B). Phase 4 will add a GGUFQuantized loader. +- WanT5Encoder: standalone UMT5-XXL encoder folder (``text_encoder/`` + + ``tokenizer/`` subdirs, or a flat ``text_encoder/`` folder). +- VAE: handled in ``vae.py`` (registered for type=VAE generically). """ from pathlib import Path @@ -15,6 +15,7 @@ from invokeai.backend.model_manager.configs.base import Checkpoint_Config_Base, Diffusers_Config_Base from invokeai.backend.model_manager.configs.factory import AnyModelConfig +from invokeai.backend.model_manager.load.load_default import ModelLoader from invokeai.backend.model_manager.load.model_loader_registry import ModelLoaderRegistry from invokeai.backend.model_manager.load.model_loaders.generic_diffusers import GenericDiffusersLoader from invokeai.backend.model_manager.taxonomy import ( @@ -80,3 +81,53 @@ def _load_model( raise return result + + +@ModelLoaderRegistry.register( + base=BaseModelType.Any, type=ModelType.WanT5Encoder, format=ModelFormat.WanT5Encoder +) +class WanT5EncoderLoader(ModelLoader): + """Loader for the standalone Wan UMT5-XXL encoder. + + Accepts two on-disk layouts: + 1. Parent dir with ``text_encoder/`` (and typically ``tokenizer/``) subdirs — + what ``Wan-AI/Wan2.2-T2V-A14B::text_encoder+tokenizer`` produces. + 2. A flat ``text_encoder/`` folder with ``config.json`` declaring + ``model_type: umt5`` directly at the root. In this case the tokenizer + is loaded from the same folder via ``AutoTokenizer.from_pretrained``. + """ + + def _load_model( + self, + config: AnyModelConfig, + submodel_type: Optional[SubModelType] = None, + ) -> AnyModel: + if submodel_type is None: + raise ValueError("A submodel type (Tokenizer or TextEncoder) must be provided.") + + root = Path(config.path) + nested_text_encoder = root / "text_encoder" + nested_tokenizer = root / "tokenizer" + + if submodel_type == SubModelType.TextEncoder: + from transformers import UMT5EncoderModel + + target = nested_text_encoder if nested_text_encoder.exists() else root + return UMT5EncoderModel.from_pretrained( + str(target), + torch_dtype=torch.bfloat16, + local_files_only=True, + ) + if submodel_type == SubModelType.Tokenizer: + from transformers import AutoTokenizer + + # Prefer a sibling tokenizer/ directory; fall back to the encoder dir + # itself, which is normal for "flat" downloads. + target = nested_tokenizer if nested_tokenizer.exists() else ( + nested_text_encoder if nested_text_encoder.exists() else root + ) + return AutoTokenizer.from_pretrained(str(target), local_files_only=True) + + raise ValueError( + f"Unsupported submodel type for WanT5Encoder: {submodel_type.value if submodel_type else 'None'}" + ) diff --git a/invokeai/backend/model_manager/taxonomy.py b/invokeai/backend/model_manager/taxonomy.py index 618585ea83a..d9819b5fe2b 100644 --- a/invokeai/backend/model_manager/taxonomy.py +++ b/invokeai/backend/model_manager/taxonomy.py @@ -81,6 +81,7 @@ class ModelType(str, Enum): T5Encoder = "t5_encoder" Qwen3Encoder = "qwen3_encoder" QwenVLEncoder = "qwen_vl_encoder" + WanT5Encoder = "wan_t5_encoder" SpandrelImageToImage = "spandrel_image_to_image" SigLIP = "siglip" FluxRedux = "flux_redux" @@ -212,6 +213,7 @@ class ModelFormat(str, Enum): T5Encoder = "t5_encoder" Qwen3Encoder = "qwen3_encoder" QwenVLEncoder = "qwen_vl_encoder" + WanT5Encoder = "wan_t5_encoder" BnbQuantizedLlmInt8b = "bnb_quantized_int8b" BnbQuantizednf4b = "bnb_quantized_nf4b" GGUFQuantized = "gguf_quantized" diff --git a/invokeai/frontend/web/src/features/modelManagerV2/models.ts b/invokeai/frontend/web/src/features/modelManagerV2/models.ts index 8f0e31ef5cd..60adf6474f5 100644 --- a/invokeai/frontend/web/src/features/modelManagerV2/models.ts +++ b/invokeai/frontend/web/src/features/modelManagerV2/models.ts @@ -22,6 +22,7 @@ import { isTIModelConfig, isUnknownModelConfig, isVAEModelConfig, + isWanT5EncoderModelConfig, } from 'services/api/types'; import { objectEntries } from 'tsafe'; @@ -84,6 +85,11 @@ const MODEL_CATEGORIES: Record = { i18nKey: 'modelManager.qwenVLEncoder', filter: isQwenVLEncoderModelConfig, }, + wan_t5_encoder: { + category: 'wan_t5_encoder', + i18nKey: 'modelManager.wanT5Encoder', + filter: isWanT5EncoderModelConfig, + }, control_lora: { category: 'control_lora', i18nKey: 'modelManager.controlLora', @@ -164,6 +170,7 @@ export const MODEL_BASE_TO_COLOR: Record = { 'z-image': 'cyan', external: 'orange', anima: 'invokePurple', + wan: 'cyan', unknown: 'red', }; @@ -186,6 +193,7 @@ export const MODEL_TYPE_TO_LONG_NAME: Record = { t5_encoder: 'T5 Encoder', qwen3_encoder: 'Qwen3 Encoder', qwen_vl_encoder: 'Qwen2.5-VL Encoder', + wan_t5_encoder: 'Wan T5 Encoder', clip_embed: 'CLIP Embed', siglip: 'SigLIP', flux_redux: 'FLUX Redux', @@ -211,6 +219,7 @@ export const MODEL_BASE_TO_LONG_NAME: Record = { 'z-image': 'Z-Image', external: 'External', anima: 'Anima', + wan: 'Wan 2.2', unknown: 'Unknown', }; @@ -231,6 +240,7 @@ export const MODEL_BASE_TO_SHORT_NAME: Record = { 'z-image': 'Z-Image', external: 'External', anima: 'Anima', + wan: 'Wan', unknown: 'Unknown', }; @@ -251,6 +261,8 @@ export const MODEL_VARIANT_TO_LONG_NAME: Record = { gigantic: 'CLIP G', generate: 'Qwen Image', edit: 'Qwen Image Edit', + t2v_a14b: 'Wan 2.2 T2V A14B', + ti2v_5b: 'Wan 2.2 TI2V 5B', qwen3_4b: 'Qwen3 4B', qwen3_8b: 'Qwen3 8B', qwen3_06b: 'Qwen3 0.6B', @@ -270,6 +282,7 @@ export const MODEL_FORMAT_TO_LONG_NAME: Record = { t5_encoder: 'T5 Encoder', qwen3_encoder: 'Qwen3 Encoder', qwen_vl_encoder: 'Qwen2.5-VL Encoder', + wan_t5_encoder: 'Wan T5 Encoder (UMT5-XXL)', bnb_quantized_int8b: 'BNB Quantized (int8b)', bnb_quantized_nf4b: 'BNB Quantized (nf4b)', gguf_quantized: 'GGUF Quantized', diff --git a/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelManagerPanel/ModelFormatBadge.tsx b/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelManagerPanel/ModelFormatBadge.tsx index 71d2efe0e45..1473e6dd076 100644 --- a/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelManagerPanel/ModelFormatBadge.tsx +++ b/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelManagerPanel/ModelFormatBadge.tsx @@ -16,6 +16,7 @@ const FORMAT_NAME_MAP: Record = { t5_encoder: 't5_encoder', qwen3_encoder: 'qwen3_encoder', qwen_vl_encoder: 'qwen_vl_encoder', + wan_t5_encoder: 'wan_t5_encoder', bnb_quantized_int8b: 'bnb_quantized_int8b', bnb_quantized_nf4b: 'quantized', gguf_quantized: 'gguf', @@ -37,6 +38,7 @@ const FORMAT_COLOR_MAP: Record = { t5_encoder: 'base', qwen3_encoder: 'base', qwen_vl_encoder: 'base', + wan_t5_encoder: 'base', bnb_quantized_int8b: 'base', bnb_quantized_nf4b: 'base', gguf_quantized: 'base', diff --git a/invokeai/frontend/web/src/features/nodes/types/common.ts b/invokeai/frontend/web/src/features/nodes/types/common.ts index cb4f68f8f8d..24bc5d8b0f6 100644 --- a/invokeai/frontend/web/src/features/nodes/types/common.ts +++ b/invokeai/frontend/web/src/features/nodes/types/common.ts @@ -136,6 +136,7 @@ export const zModelType = z.enum([ 't5_encoder', 'qwen3_encoder', 'qwen_vl_encoder', + 'wan_t5_encoder', 'clip_embed', 'siglip', 'flux_redux', @@ -166,7 +167,7 @@ export const zFluxVariantType = z.enum(['dev', 'dev_fill', 'schnell']); export const zFlux2VariantType = z.enum(['klein_4b', 'klein_4b_base', 'klein_9b', 'klein_9b_base']); export const zZImageVariantType = z.enum(['turbo', 'zbase']); const zQwenImageVariantType = z.enum(['generate', 'edit']); -export const zWanVariantType = z.enum(['t2v_a14b', 'ti2v_5b']); +const zWanVariantType = z.enum(['t2v_a14b', 'ti2v_5b']); export const zQwen3VariantType = z.enum(['qwen3_4b', 'qwen3_8b', 'qwen3_06b']); export const zAnyModelVariant = z.union([ zModelVariantType, @@ -192,6 +193,7 @@ export const zModelFormat = z.enum([ 't5_encoder', 'qwen3_encoder', 'qwen_vl_encoder', + 'wan_t5_encoder', 'bnb_quantized_int8b', 'bnb_quantized_nf4b', 'gguf_quantized', diff --git a/invokeai/frontend/web/src/services/api/schema.ts b/invokeai/frontend/web/src/services/api/schema.ts index 62070fcbbbe..22952097c8d 100644 --- a/invokeai/frontend/web/src/services/api/schema.ts +++ b/invokeai/frontend/web/src/services/api/schema.ts @@ -3558,7 +3558,7 @@ export type components = { */ type: "anima_text_encoder"; }; - AnyModelConfig: components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_QwenImage_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_Checkpoint_Anima_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_QwenImage_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Checkpoint_QwenImage_Config"] | components["schemas"]["VAE_Checkpoint_Anima_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_LyCORIS_QwenImage_Config"] | components["schemas"]["LoRA_LyCORIS_Anima_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["QwenVLEncoder_Diffusers_Config"] | components["schemas"]["QwenVLEncoder_Checkpoint_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["TextLLM_Diffusers_Config"] | components["schemas"]["ExternalApiModelConfig"] | components["schemas"]["Unknown_Config"]; + AnyModelConfig: components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_QwenImage_Config"] | components["schemas"]["Main_Diffusers_Wan_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_Checkpoint_Anima_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_QwenImage_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Checkpoint_Wan_Config"] | components["schemas"]["VAE_Checkpoint_QwenImage_Config"] | components["schemas"]["VAE_Checkpoint_Anima_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["VAE_Diffusers_Wan_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_LyCORIS_QwenImage_Config"] | components["schemas"]["LoRA_LyCORIS_Anima_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["QwenVLEncoder_Diffusers_Config"] | components["schemas"]["QwenVLEncoder_Checkpoint_Config"] | components["schemas"]["WanT5Encoder_WanT5Encoder_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["TextLLM_Diffusers_Config"] | components["schemas"]["ExternalApiModelConfig"] | components["schemas"]["Unknown_Config"]; /** * AppVersion * @description App Version Response @@ -3710,7 +3710,7 @@ export type components = { * fallback/null value `BaseModelType.Any` for these models, instead of making the model base optional. * @enum {string} */ - BaseModelType: "any" | "sd-1" | "sd-2" | "sd-3" | "sdxl" | "sdxl-refiner" | "flux" | "flux2" | "cogview4" | "z-image" | "external" | "qwen-image" | "anima" | "unknown"; + BaseModelType: "any" | "sd-1" | "sd-2" | "sd-3" | "sdxl" | "sdxl-refiner" | "flux" | "flux2" | "cogview4" | "z-image" | "external" | "qwen-image" | "anima" | "wan" | "unknown"; /** Batch */ Batch: { /** @@ -12248,7 +12248,7 @@ export type components = { * @description The nodes in this graph */ nodes?: { - [key: string]: components["schemas"]["AddInvocation"] | components["schemas"]["AlibabaCloudImageGenerationInvocation"] | components["schemas"]["AlphaMaskToTensorInvocation"] | components["schemas"]["AnimaDenoiseInvocation"] | components["schemas"]["AnimaImageToLatentsInvocation"] | components["schemas"]["AnimaLatentsToImageInvocation"] | components["schemas"]["AnimaLoRACollectionLoader"] | components["schemas"]["AnimaLoRALoaderInvocation"] | components["schemas"]["AnimaModelLoaderInvocation"] | components["schemas"]["AnimaTextEncoderInvocation"] | components["schemas"]["ApplyMaskTensorToImageInvocation"] | components["schemas"]["ApplyMaskToImageInvocation"] | components["schemas"]["BlankImageInvocation"] | components["schemas"]["BlendLatentsInvocation"] | components["schemas"]["BooleanCollectionInvocation"] | components["schemas"]["BooleanInvocation"] | components["schemas"]["BoundingBoxInvocation"] | components["schemas"]["CLIPSkipInvocation"] | components["schemas"]["CV2InfillInvocation"] | components["schemas"]["CalculateImageTilesEvenSplitInvocation"] | components["schemas"]["CalculateImageTilesInvocation"] | components["schemas"]["CalculateImageTilesMinimumOverlapInvocation"] | components["schemas"]["CannyEdgeDetectionInvocation"] | components["schemas"]["CanvasOutputInvocation"] | components["schemas"]["CanvasPasteBackInvocation"] | components["schemas"]["CanvasV2MaskAndCropInvocation"] | components["schemas"]["CenterPadCropInvocation"] | components["schemas"]["CogView4DenoiseInvocation"] | components["schemas"]["CogView4ImageToLatentsInvocation"] | components["schemas"]["CogView4LatentsToImageInvocation"] | components["schemas"]["CogView4ModelLoaderInvocation"] | components["schemas"]["CogView4TextEncoderInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["ColorCorrectInvocation"] | components["schemas"]["ColorInvocation"] | components["schemas"]["ColorMapInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["ConditioningCollectionInvocation"] | components["schemas"]["ConditioningInvocation"] | components["schemas"]["ContentShuffleInvocation"] | components["schemas"]["ControlNetInvocation"] | components["schemas"]["CoreMetadataInvocation"] | components["schemas"]["CreateDenoiseMaskInvocation"] | components["schemas"]["CreateGradientMaskInvocation"] | components["schemas"]["CropImageToBoundingBoxInvocation"] | components["schemas"]["CropLatentsCoreInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["DWOpenposeDetectionInvocation"] | components["schemas"]["DecodeInvisibleWatermarkInvocation"] | components["schemas"]["DenoiseLatentsInvocation"] | components["schemas"]["DenoiseLatentsMetaInvocation"] | components["schemas"]["DepthAnythingDepthEstimationInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["ExpandMaskWithFadeInvocation"] | components["schemas"]["FLUXLoRACollectionLoader"] | components["schemas"]["FaceIdentifierInvocation"] | components["schemas"]["FaceMaskInvocation"] | components["schemas"]["FaceOffInvocation"] | components["schemas"]["FloatBatchInvocation"] | components["schemas"]["FloatCollectionInvocation"] | components["schemas"]["FloatGenerator"] | components["schemas"]["FloatInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["FloatMathInvocation"] | components["schemas"]["FloatToIntegerInvocation"] | components["schemas"]["Flux2DenoiseInvocation"] | components["schemas"]["Flux2KleinLoRACollectionLoader"] | components["schemas"]["Flux2KleinLoRALoaderInvocation"] | components["schemas"]["Flux2KleinModelLoaderInvocation"] | components["schemas"]["Flux2KleinTextEncoderInvocation"] | components["schemas"]["Flux2VaeDecodeInvocation"] | components["schemas"]["Flux2VaeEncodeInvocation"] | components["schemas"]["FluxControlLoRALoaderInvocation"] | components["schemas"]["FluxControlNetInvocation"] | components["schemas"]["FluxDenoiseInvocation"] | components["schemas"]["FluxDenoiseLatentsMetaInvocation"] | components["schemas"]["FluxFillInvocation"] | components["schemas"]["FluxIPAdapterInvocation"] | components["schemas"]["FluxKontextConcatenateImagesInvocation"] | components["schemas"]["FluxKontextInvocation"] | components["schemas"]["FluxLoRALoaderInvocation"] | components["schemas"]["FluxModelLoaderInvocation"] | components["schemas"]["FluxReduxInvocation"] | components["schemas"]["FluxTextEncoderInvocation"] | components["schemas"]["FluxVaeDecodeInvocation"] | components["schemas"]["FluxVaeEncodeInvocation"] | components["schemas"]["FreeUInvocation"] | components["schemas"]["GeminiImageGenerationInvocation"] | components["schemas"]["GetMaskBoundingBoxInvocation"] | components["schemas"]["GroundingDinoInvocation"] | components["schemas"]["HEDEdgeDetectionInvocation"] | components["schemas"]["HeuristicResizeInvocation"] | components["schemas"]["IPAdapterInvocation"] | components["schemas"]["IdealSizeInvocation"] | components["schemas"]["IfInvocation"] | components["schemas"]["ImageBatchInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["ImageChannelMultiplyInvocation"] | components["schemas"]["ImageChannelOffsetInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["ImageGenerator"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["ImageInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["ImageMaskToTensorInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["ImageNoiseInvocation"] | components["schemas"]["ImagePanelLayoutInvocation"] | components["schemas"]["ImagePasteInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["IntegerBatchInvocation"] | components["schemas"]["IntegerCollectionInvocation"] | components["schemas"]["IntegerGenerator"] | components["schemas"]["IntegerInvocation"] | components["schemas"]["IntegerMathInvocation"] | components["schemas"]["InvertTensorMaskInvocation"] | components["schemas"]["InvokeAdjustImageHuePlusInvocation"] | components["schemas"]["InvokeEquivalentAchromaticLightnessInvocation"] | components["schemas"]["InvokeImageBlendInvocation"] | components["schemas"]["InvokeImageCompositorInvocation"] | components["schemas"]["InvokeImageDilateOrErodeInvocation"] | components["schemas"]["InvokeImageEnhanceInvocation"] | components["schemas"]["InvokeImageValueThresholdsInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["LaMaInfillInvocation"] | components["schemas"]["LatentsCollectionInvocation"] | components["schemas"]["LatentsInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["LineartAnimeEdgeDetectionInvocation"] | components["schemas"]["LineartEdgeDetectionInvocation"] | components["schemas"]["LlavaOnevisionVllmInvocation"] | components["schemas"]["LoRACollectionLoader"] | components["schemas"]["LoRALoaderInvocation"] | components["schemas"]["LoRASelectorInvocation"] | components["schemas"]["MLSDDetectionInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["MaskCombineInvocation"] | components["schemas"]["MaskEdgeInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["MaskFromIDInvocation"] | components["schemas"]["MaskTensorToImageInvocation"] | components["schemas"]["MediaPipeFaceDetectionInvocation"] | components["schemas"]["MergeMetadataInvocation"] | components["schemas"]["MergeTilesToImageInvocation"] | components["schemas"]["MetadataFieldExtractorInvocation"] | components["schemas"]["MetadataFromImageInvocation"] | components["schemas"]["MetadataInvocation"] | components["schemas"]["MetadataItemInvocation"] | components["schemas"]["MetadataItemLinkedInvocation"] | components["schemas"]["MetadataToBoolCollectionInvocation"] | components["schemas"]["MetadataToBoolInvocation"] | components["schemas"]["MetadataToControlnetsInvocation"] | components["schemas"]["MetadataToFloatCollectionInvocation"] | components["schemas"]["MetadataToFloatInvocation"] | components["schemas"]["MetadataToIPAdaptersInvocation"] | components["schemas"]["MetadataToIntegerCollectionInvocation"] | components["schemas"]["MetadataToIntegerInvocation"] | components["schemas"]["MetadataToLorasCollectionInvocation"] | components["schemas"]["MetadataToLorasInvocation"] | components["schemas"]["MetadataToModelInvocation"] | components["schemas"]["MetadataToSDXLLorasInvocation"] | components["schemas"]["MetadataToSDXLModelInvocation"] | components["schemas"]["MetadataToSchedulerInvocation"] | components["schemas"]["MetadataToStringCollectionInvocation"] | components["schemas"]["MetadataToStringInvocation"] | components["schemas"]["MetadataToT2IAdaptersInvocation"] | components["schemas"]["MetadataToVAEInvocation"] | components["schemas"]["ModelIdentifierInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["NormalMapInvocation"] | components["schemas"]["OpenAIImageGenerationInvocation"] | components["schemas"]["PBRMapsInvocation"] | components["schemas"]["PairTileImageInvocation"] | components["schemas"]["PasteImageIntoBoundingBoxInvocation"] | components["schemas"]["PiDiNetEdgeDetectionInvocation"] | components["schemas"]["PromptTemplateInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["QwenImageDenoiseInvocation"] | components["schemas"]["QwenImageImageToLatentsInvocation"] | components["schemas"]["QwenImageLatentsToImageInvocation"] | components["schemas"]["QwenImageLoRACollectionLoader"] | components["schemas"]["QwenImageLoRALoaderInvocation"] | components["schemas"]["QwenImageModelLoaderInvocation"] | components["schemas"]["QwenImageTextEncoderInvocation"] | components["schemas"]["RandomFloatInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["RectangleMaskInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["RoundInvocation"] | components["schemas"]["SD3DenoiseInvocation"] | components["schemas"]["SD3ImageToLatentsInvocation"] | components["schemas"]["SD3LatentsToImageInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["SDXLLoRACollectionLoader"] | components["schemas"]["SDXLLoRALoaderInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["SaveImageInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["SchedulerInvocation"] | components["schemas"]["Sd3ModelLoaderInvocation"] | components["schemas"]["Sd3TextEncoderInvocation"] | components["schemas"]["SeamlessModeInvocation"] | components["schemas"]["SeedreamImageGenerationInvocation"] | components["schemas"]["SegmentAnythingInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["SpandrelImageToImageAutoscaleInvocation"] | components["schemas"]["SpandrelImageToImageInvocation"] | components["schemas"]["StringBatchInvocation"] | components["schemas"]["StringCollectionInvocation"] | components["schemas"]["StringGenerator"] | components["schemas"]["StringInvocation"] | components["schemas"]["StringJoinInvocation"] | components["schemas"]["StringJoinThreeInvocation"] | components["schemas"]["StringReplaceInvocation"] | components["schemas"]["StringSplitInvocation"] | components["schemas"]["StringSplitNegInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["T2IAdapterInvocation"] | components["schemas"]["TextLLMInvocation"] | components["schemas"]["TileToPropertiesInvocation"] | components["schemas"]["TiledMultiDiffusionDenoiseLatents"] | components["schemas"]["UnsharpMaskInvocation"] | components["schemas"]["VAELoaderInvocation"] | components["schemas"]["ZImageControlInvocation"] | components["schemas"]["ZImageDenoiseInvocation"] | components["schemas"]["ZImageDenoiseMetaInvocation"] | components["schemas"]["ZImageImageToLatentsInvocation"] | components["schemas"]["ZImageLatentsToImageInvocation"] | components["schemas"]["ZImageLoRACollectionLoader"] | components["schemas"]["ZImageLoRALoaderInvocation"] | components["schemas"]["ZImageModelLoaderInvocation"] | components["schemas"]["ZImageSeedVarianceEnhancerInvocation"] | components["schemas"]["ZImageTextEncoderInvocation"]; + [key: string]: components["schemas"]["AddInvocation"] | components["schemas"]["AlibabaCloudImageGenerationInvocation"] | components["schemas"]["AlphaMaskToTensorInvocation"] | components["schemas"]["AnimaDenoiseInvocation"] | components["schemas"]["AnimaImageToLatentsInvocation"] | components["schemas"]["AnimaLatentsToImageInvocation"] | components["schemas"]["AnimaLoRACollectionLoader"] | components["schemas"]["AnimaLoRALoaderInvocation"] | components["schemas"]["AnimaModelLoaderInvocation"] | components["schemas"]["AnimaTextEncoderInvocation"] | components["schemas"]["ApplyMaskTensorToImageInvocation"] | components["schemas"]["ApplyMaskToImageInvocation"] | components["schemas"]["BlankImageInvocation"] | components["schemas"]["BlendLatentsInvocation"] | components["schemas"]["BooleanCollectionInvocation"] | components["schemas"]["BooleanInvocation"] | components["schemas"]["BoundingBoxInvocation"] | components["schemas"]["CLIPSkipInvocation"] | components["schemas"]["CV2InfillInvocation"] | components["schemas"]["CalculateImageTilesEvenSplitInvocation"] | components["schemas"]["CalculateImageTilesInvocation"] | components["schemas"]["CalculateImageTilesMinimumOverlapInvocation"] | components["schemas"]["CannyEdgeDetectionInvocation"] | components["schemas"]["CanvasOutputInvocation"] | components["schemas"]["CanvasPasteBackInvocation"] | components["schemas"]["CanvasV2MaskAndCropInvocation"] | components["schemas"]["CenterPadCropInvocation"] | components["schemas"]["CogView4DenoiseInvocation"] | components["schemas"]["CogView4ImageToLatentsInvocation"] | components["schemas"]["CogView4LatentsToImageInvocation"] | components["schemas"]["CogView4ModelLoaderInvocation"] | components["schemas"]["CogView4TextEncoderInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["ColorCorrectInvocation"] | components["schemas"]["ColorInvocation"] | components["schemas"]["ColorMapInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["ConditioningCollectionInvocation"] | components["schemas"]["ConditioningInvocation"] | components["schemas"]["ContentShuffleInvocation"] | components["schemas"]["ControlNetInvocation"] | components["schemas"]["CoreMetadataInvocation"] | components["schemas"]["CreateDenoiseMaskInvocation"] | components["schemas"]["CreateGradientMaskInvocation"] | components["schemas"]["CropImageToBoundingBoxInvocation"] | components["schemas"]["CropLatentsCoreInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["DWOpenposeDetectionInvocation"] | components["schemas"]["DecodeInvisibleWatermarkInvocation"] | components["schemas"]["DenoiseLatentsInvocation"] | components["schemas"]["DenoiseLatentsMetaInvocation"] | components["schemas"]["DepthAnythingDepthEstimationInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["ExpandMaskWithFadeInvocation"] | components["schemas"]["FLUXLoRACollectionLoader"] | components["schemas"]["FaceIdentifierInvocation"] | components["schemas"]["FaceMaskInvocation"] | components["schemas"]["FaceOffInvocation"] | components["schemas"]["FloatBatchInvocation"] | components["schemas"]["FloatCollectionInvocation"] | components["schemas"]["FloatGenerator"] | components["schemas"]["FloatInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["FloatMathInvocation"] | components["schemas"]["FloatToIntegerInvocation"] | components["schemas"]["Flux2DenoiseInvocation"] | components["schemas"]["Flux2KleinLoRACollectionLoader"] | components["schemas"]["Flux2KleinLoRALoaderInvocation"] | components["schemas"]["Flux2KleinModelLoaderInvocation"] | components["schemas"]["Flux2KleinTextEncoderInvocation"] | components["schemas"]["Flux2VaeDecodeInvocation"] | components["schemas"]["Flux2VaeEncodeInvocation"] | components["schemas"]["FluxControlLoRALoaderInvocation"] | components["schemas"]["FluxControlNetInvocation"] | components["schemas"]["FluxDenoiseInvocation"] | components["schemas"]["FluxDenoiseLatentsMetaInvocation"] | components["schemas"]["FluxFillInvocation"] | components["schemas"]["FluxIPAdapterInvocation"] | components["schemas"]["FluxKontextConcatenateImagesInvocation"] | components["schemas"]["FluxKontextInvocation"] | components["schemas"]["FluxLoRALoaderInvocation"] | components["schemas"]["FluxModelLoaderInvocation"] | components["schemas"]["FluxReduxInvocation"] | components["schemas"]["FluxTextEncoderInvocation"] | components["schemas"]["FluxVaeDecodeInvocation"] | components["schemas"]["FluxVaeEncodeInvocation"] | components["schemas"]["FreeUInvocation"] | components["schemas"]["GeminiImageGenerationInvocation"] | components["schemas"]["GetMaskBoundingBoxInvocation"] | components["schemas"]["GroundingDinoInvocation"] | components["schemas"]["HEDEdgeDetectionInvocation"] | components["schemas"]["HeuristicResizeInvocation"] | components["schemas"]["IPAdapterInvocation"] | components["schemas"]["IdealSizeInvocation"] | components["schemas"]["IfInvocation"] | components["schemas"]["ImageBatchInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["ImageChannelMultiplyInvocation"] | components["schemas"]["ImageChannelOffsetInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["ImageGenerator"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["ImageInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["ImageMaskToTensorInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["ImageNoiseInvocation"] | components["schemas"]["ImagePanelLayoutInvocation"] | components["schemas"]["ImagePasteInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["IntegerBatchInvocation"] | components["schemas"]["IntegerCollectionInvocation"] | components["schemas"]["IntegerGenerator"] | components["schemas"]["IntegerInvocation"] | components["schemas"]["IntegerMathInvocation"] | components["schemas"]["InvertTensorMaskInvocation"] | components["schemas"]["InvokeAdjustImageHuePlusInvocation"] | components["schemas"]["InvokeEquivalentAchromaticLightnessInvocation"] | components["schemas"]["InvokeImageBlendInvocation"] | components["schemas"]["InvokeImageCompositorInvocation"] | components["schemas"]["InvokeImageDilateOrErodeInvocation"] | components["schemas"]["InvokeImageEnhanceInvocation"] | components["schemas"]["InvokeImageValueThresholdsInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["LaMaInfillInvocation"] | components["schemas"]["LatentsCollectionInvocation"] | components["schemas"]["LatentsInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["LineartAnimeEdgeDetectionInvocation"] | components["schemas"]["LineartEdgeDetectionInvocation"] | components["schemas"]["LlavaOnevisionVllmInvocation"] | components["schemas"]["LoRACollectionLoader"] | components["schemas"]["LoRALoaderInvocation"] | components["schemas"]["LoRASelectorInvocation"] | components["schemas"]["MLSDDetectionInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["MaskCombineInvocation"] | components["schemas"]["MaskEdgeInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["MaskFromIDInvocation"] | components["schemas"]["MaskTensorToImageInvocation"] | components["schemas"]["MediaPipeFaceDetectionInvocation"] | components["schemas"]["MergeMetadataInvocation"] | components["schemas"]["MergeTilesToImageInvocation"] | components["schemas"]["MetadataFieldExtractorInvocation"] | components["schemas"]["MetadataFromImageInvocation"] | components["schemas"]["MetadataInvocation"] | components["schemas"]["MetadataItemInvocation"] | components["schemas"]["MetadataItemLinkedInvocation"] | components["schemas"]["MetadataToBoolCollectionInvocation"] | components["schemas"]["MetadataToBoolInvocation"] | components["schemas"]["MetadataToControlnetsInvocation"] | components["schemas"]["MetadataToFloatCollectionInvocation"] | components["schemas"]["MetadataToFloatInvocation"] | components["schemas"]["MetadataToIPAdaptersInvocation"] | components["schemas"]["MetadataToIntegerCollectionInvocation"] | components["schemas"]["MetadataToIntegerInvocation"] | components["schemas"]["MetadataToLorasCollectionInvocation"] | components["schemas"]["MetadataToLorasInvocation"] | components["schemas"]["MetadataToModelInvocation"] | components["schemas"]["MetadataToSDXLLorasInvocation"] | components["schemas"]["MetadataToSDXLModelInvocation"] | components["schemas"]["MetadataToSchedulerInvocation"] | components["schemas"]["MetadataToStringCollectionInvocation"] | components["schemas"]["MetadataToStringInvocation"] | components["schemas"]["MetadataToT2IAdaptersInvocation"] | components["schemas"]["MetadataToVAEInvocation"] | components["schemas"]["ModelIdentifierInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["NormalMapInvocation"] | components["schemas"]["OpenAIImageGenerationInvocation"] | components["schemas"]["PBRMapsInvocation"] | components["schemas"]["PairTileImageInvocation"] | components["schemas"]["PasteImageIntoBoundingBoxInvocation"] | components["schemas"]["PiDiNetEdgeDetectionInvocation"] | components["schemas"]["PromptTemplateInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["QwenImageDenoiseInvocation"] | components["schemas"]["QwenImageImageToLatentsInvocation"] | components["schemas"]["QwenImageLatentsToImageInvocation"] | components["schemas"]["QwenImageLoRACollectionLoader"] | components["schemas"]["QwenImageLoRALoaderInvocation"] | components["schemas"]["QwenImageModelLoaderInvocation"] | components["schemas"]["QwenImageTextEncoderInvocation"] | components["schemas"]["RandomFloatInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["RectangleMaskInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["RoundInvocation"] | components["schemas"]["SD3DenoiseInvocation"] | components["schemas"]["SD3ImageToLatentsInvocation"] | components["schemas"]["SD3LatentsToImageInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["SDXLLoRACollectionLoader"] | components["schemas"]["SDXLLoRALoaderInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["SaveImageInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["SchedulerInvocation"] | components["schemas"]["Sd3ModelLoaderInvocation"] | components["schemas"]["Sd3TextEncoderInvocation"] | components["schemas"]["SeamlessModeInvocation"] | components["schemas"]["SeedreamImageGenerationInvocation"] | components["schemas"]["SegmentAnythingInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["SpandrelImageToImageAutoscaleInvocation"] | components["schemas"]["SpandrelImageToImageInvocation"] | components["schemas"]["StringBatchInvocation"] | components["schemas"]["StringCollectionInvocation"] | components["schemas"]["StringGenerator"] | components["schemas"]["StringInvocation"] | components["schemas"]["StringJoinInvocation"] | components["schemas"]["StringJoinThreeInvocation"] | components["schemas"]["StringReplaceInvocation"] | components["schemas"]["StringSplitInvocation"] | components["schemas"]["StringSplitNegInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["T2IAdapterInvocation"] | components["schemas"]["TextLLMInvocation"] | components["schemas"]["TileToPropertiesInvocation"] | components["schemas"]["TiledMultiDiffusionDenoiseLatents"] | components["schemas"]["UnsharpMaskInvocation"] | components["schemas"]["VAELoaderInvocation"] | components["schemas"]["WanDenoiseInvocation"] | components["schemas"]["WanImageToLatentsInvocation"] | components["schemas"]["WanLatentsToImageInvocation"] | components["schemas"]["WanModelLoaderInvocation"] | components["schemas"]["WanTextEncoderInvocation"] | components["schemas"]["ZImageControlInvocation"] | components["schemas"]["ZImageDenoiseInvocation"] | components["schemas"]["ZImageDenoiseMetaInvocation"] | components["schemas"]["ZImageImageToLatentsInvocation"] | components["schemas"]["ZImageLatentsToImageInvocation"] | components["schemas"]["ZImageLoRACollectionLoader"] | components["schemas"]["ZImageLoRALoaderInvocation"] | components["schemas"]["ZImageModelLoaderInvocation"] | components["schemas"]["ZImageSeedVarianceEnhancerInvocation"] | components["schemas"]["ZImageTextEncoderInvocation"]; }; /** * Edges @@ -12285,7 +12285,7 @@ export type components = { * @description The results of node executions */ results: { - [key: string]: components["schemas"]["AnimaConditioningOutput"] | components["schemas"]["AnimaLoRALoaderOutput"] | components["schemas"]["AnimaModelLoaderOutput"] | components["schemas"]["BooleanCollectionOutput"] | components["schemas"]["BooleanOutput"] | components["schemas"]["BoundingBoxCollectionOutput"] | components["schemas"]["BoundingBoxOutput"] | components["schemas"]["CLIPOutput"] | components["schemas"]["CLIPSkipInvocationOutput"] | components["schemas"]["CalculateImageTilesOutput"] | components["schemas"]["CogView4ConditioningOutput"] | components["schemas"]["CogView4ModelLoaderOutput"] | components["schemas"]["CollectInvocationOutput"] | components["schemas"]["ColorCollectionOutput"] | components["schemas"]["ColorOutput"] | components["schemas"]["ConditioningCollectionOutput"] | components["schemas"]["ConditioningOutput"] | components["schemas"]["ControlOutput"] | components["schemas"]["DenoiseMaskOutput"] | components["schemas"]["FaceMaskOutput"] | components["schemas"]["FaceOffOutput"] | components["schemas"]["FloatCollectionOutput"] | components["schemas"]["FloatGeneratorOutput"] | components["schemas"]["FloatOutput"] | components["schemas"]["Flux2KleinLoRALoaderOutput"] | components["schemas"]["Flux2KleinModelLoaderOutput"] | components["schemas"]["FluxConditioningCollectionOutput"] | components["schemas"]["FluxConditioningOutput"] | components["schemas"]["FluxControlLoRALoaderOutput"] | components["schemas"]["FluxControlNetOutput"] | components["schemas"]["FluxFillOutput"] | components["schemas"]["FluxKontextOutput"] | components["schemas"]["FluxLoRALoaderOutput"] | components["schemas"]["FluxModelLoaderOutput"] | components["schemas"]["FluxReduxOutput"] | components["schemas"]["GradientMaskOutput"] | components["schemas"]["IPAdapterOutput"] | components["schemas"]["IdealSizeOutput"] | components["schemas"]["IfInvocationOutput"] | components["schemas"]["ImageCollectionOutput"] | components["schemas"]["ImageGeneratorOutput"] | components["schemas"]["ImageOutput"] | components["schemas"]["ImagePanelCoordinateOutput"] | components["schemas"]["IntegerCollectionOutput"] | components["schemas"]["IntegerGeneratorOutput"] | components["schemas"]["IntegerOutput"] | components["schemas"]["IterateInvocationOutput"] | components["schemas"]["LatentsCollectionOutput"] | components["schemas"]["LatentsMetaOutput"] | components["schemas"]["LatentsOutput"] | components["schemas"]["LoRALoaderOutput"] | components["schemas"]["LoRASelectorOutput"] | components["schemas"]["MDControlListOutput"] | components["schemas"]["MDIPAdapterListOutput"] | components["schemas"]["MDT2IAdapterListOutput"] | components["schemas"]["MaskOutput"] | components["schemas"]["MetadataItemOutput"] | components["schemas"]["MetadataOutput"] | components["schemas"]["MetadataToLorasCollectionOutput"] | components["schemas"]["MetadataToModelOutput"] | components["schemas"]["MetadataToSDXLModelOutput"] | components["schemas"]["ModelIdentifierOutput"] | components["schemas"]["ModelLoaderOutput"] | components["schemas"]["NoiseOutput"] | components["schemas"]["PBRMapsOutput"] | components["schemas"]["PairTileImageOutput"] | components["schemas"]["PromptTemplateOutput"] | components["schemas"]["QwenImageConditioningOutput"] | components["schemas"]["QwenImageLoRALoaderOutput"] | components["schemas"]["QwenImageModelLoaderOutput"] | components["schemas"]["SD3ConditioningOutput"] | components["schemas"]["SDXLLoRALoaderOutput"] | components["schemas"]["SDXLModelLoaderOutput"] | components["schemas"]["SDXLRefinerModelLoaderOutput"] | components["schemas"]["SchedulerOutput"] | components["schemas"]["Sd3ModelLoaderOutput"] | components["schemas"]["SeamlessModeOutput"] | components["schemas"]["String2Output"] | components["schemas"]["StringCollectionOutput"] | components["schemas"]["StringGeneratorOutput"] | components["schemas"]["StringOutput"] | components["schemas"]["StringPosNegOutput"] | components["schemas"]["T2IAdapterOutput"] | components["schemas"]["TileToPropertiesOutput"] | components["schemas"]["UNetOutput"] | components["schemas"]["VAEOutput"] | components["schemas"]["ZImageConditioningOutput"] | components["schemas"]["ZImageControlOutput"] | components["schemas"]["ZImageLoRALoaderOutput"] | components["schemas"]["ZImageModelLoaderOutput"]; + [key: string]: components["schemas"]["AnimaConditioningOutput"] | components["schemas"]["AnimaLoRALoaderOutput"] | components["schemas"]["AnimaModelLoaderOutput"] | components["schemas"]["BooleanCollectionOutput"] | components["schemas"]["BooleanOutput"] | components["schemas"]["BoundingBoxCollectionOutput"] | components["schemas"]["BoundingBoxOutput"] | components["schemas"]["CLIPOutput"] | components["schemas"]["CLIPSkipInvocationOutput"] | components["schemas"]["CalculateImageTilesOutput"] | components["schemas"]["CogView4ConditioningOutput"] | components["schemas"]["CogView4ModelLoaderOutput"] | components["schemas"]["CollectInvocationOutput"] | components["schemas"]["ColorCollectionOutput"] | components["schemas"]["ColorOutput"] | components["schemas"]["ConditioningCollectionOutput"] | components["schemas"]["ConditioningOutput"] | components["schemas"]["ControlOutput"] | components["schemas"]["DenoiseMaskOutput"] | components["schemas"]["FaceMaskOutput"] | components["schemas"]["FaceOffOutput"] | components["schemas"]["FloatCollectionOutput"] | components["schemas"]["FloatGeneratorOutput"] | components["schemas"]["FloatOutput"] | components["schemas"]["Flux2KleinLoRALoaderOutput"] | components["schemas"]["Flux2KleinModelLoaderOutput"] | components["schemas"]["FluxConditioningCollectionOutput"] | components["schemas"]["FluxConditioningOutput"] | components["schemas"]["FluxControlLoRALoaderOutput"] | components["schemas"]["FluxControlNetOutput"] | components["schemas"]["FluxFillOutput"] | components["schemas"]["FluxKontextOutput"] | components["schemas"]["FluxLoRALoaderOutput"] | components["schemas"]["FluxModelLoaderOutput"] | components["schemas"]["FluxReduxOutput"] | components["schemas"]["GradientMaskOutput"] | components["schemas"]["IPAdapterOutput"] | components["schemas"]["IdealSizeOutput"] | components["schemas"]["IfInvocationOutput"] | components["schemas"]["ImageCollectionOutput"] | components["schemas"]["ImageGeneratorOutput"] | components["schemas"]["ImageOutput"] | components["schemas"]["ImagePanelCoordinateOutput"] | components["schemas"]["IntegerCollectionOutput"] | components["schemas"]["IntegerGeneratorOutput"] | components["schemas"]["IntegerOutput"] | components["schemas"]["IterateInvocationOutput"] | components["schemas"]["LatentsCollectionOutput"] | components["schemas"]["LatentsMetaOutput"] | components["schemas"]["LatentsOutput"] | components["schemas"]["LoRALoaderOutput"] | components["schemas"]["LoRASelectorOutput"] | components["schemas"]["MDControlListOutput"] | components["schemas"]["MDIPAdapterListOutput"] | components["schemas"]["MDT2IAdapterListOutput"] | components["schemas"]["MaskOutput"] | components["schemas"]["MetadataItemOutput"] | components["schemas"]["MetadataOutput"] | components["schemas"]["MetadataToLorasCollectionOutput"] | components["schemas"]["MetadataToModelOutput"] | components["schemas"]["MetadataToSDXLModelOutput"] | components["schemas"]["ModelIdentifierOutput"] | components["schemas"]["ModelLoaderOutput"] | components["schemas"]["NoiseOutput"] | components["schemas"]["PBRMapsOutput"] | components["schemas"]["PairTileImageOutput"] | components["schemas"]["PromptTemplateOutput"] | components["schemas"]["QwenImageConditioningOutput"] | components["schemas"]["QwenImageLoRALoaderOutput"] | components["schemas"]["QwenImageModelLoaderOutput"] | components["schemas"]["SD3ConditioningOutput"] | components["schemas"]["SDXLLoRALoaderOutput"] | components["schemas"]["SDXLModelLoaderOutput"] | components["schemas"]["SDXLRefinerModelLoaderOutput"] | components["schemas"]["SchedulerOutput"] | components["schemas"]["Sd3ModelLoaderOutput"] | components["schemas"]["SeamlessModeOutput"] | components["schemas"]["String2Output"] | components["schemas"]["StringCollectionOutput"] | components["schemas"]["StringGeneratorOutput"] | components["schemas"]["StringOutput"] | components["schemas"]["StringPosNegOutput"] | components["schemas"]["T2IAdapterOutput"] | components["schemas"]["TileToPropertiesOutput"] | components["schemas"]["UNetOutput"] | components["schemas"]["VAEOutput"] | components["schemas"]["WanConditioningOutput"] | components["schemas"]["WanModelLoaderOutput"] | components["schemas"]["ZImageConditioningOutput"] | components["schemas"]["ZImageControlOutput"] | components["schemas"]["ZImageLoRALoaderOutput"] | components["schemas"]["ZImageModelLoaderOutput"]; }; /** * Errors @@ -15646,7 +15646,7 @@ export type components = { * Invocation * @description The ID of the invocation */ - invocation: components["schemas"]["AddInvocation"] | components["schemas"]["AlibabaCloudImageGenerationInvocation"] | components["schemas"]["AlphaMaskToTensorInvocation"] | components["schemas"]["AnimaDenoiseInvocation"] | components["schemas"]["AnimaImageToLatentsInvocation"] | components["schemas"]["AnimaLatentsToImageInvocation"] | components["schemas"]["AnimaLoRACollectionLoader"] | components["schemas"]["AnimaLoRALoaderInvocation"] | components["schemas"]["AnimaModelLoaderInvocation"] | components["schemas"]["AnimaTextEncoderInvocation"] | components["schemas"]["ApplyMaskTensorToImageInvocation"] | components["schemas"]["ApplyMaskToImageInvocation"] | components["schemas"]["BlankImageInvocation"] | components["schemas"]["BlendLatentsInvocation"] | components["schemas"]["BooleanCollectionInvocation"] | components["schemas"]["BooleanInvocation"] | components["schemas"]["BoundingBoxInvocation"] | components["schemas"]["CLIPSkipInvocation"] | components["schemas"]["CV2InfillInvocation"] | components["schemas"]["CalculateImageTilesEvenSplitInvocation"] | components["schemas"]["CalculateImageTilesInvocation"] | components["schemas"]["CalculateImageTilesMinimumOverlapInvocation"] | components["schemas"]["CannyEdgeDetectionInvocation"] | components["schemas"]["CanvasOutputInvocation"] | components["schemas"]["CanvasPasteBackInvocation"] | components["schemas"]["CanvasV2MaskAndCropInvocation"] | components["schemas"]["CenterPadCropInvocation"] | components["schemas"]["CogView4DenoiseInvocation"] | components["schemas"]["CogView4ImageToLatentsInvocation"] | components["schemas"]["CogView4LatentsToImageInvocation"] | components["schemas"]["CogView4ModelLoaderInvocation"] | components["schemas"]["CogView4TextEncoderInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["ColorCorrectInvocation"] | components["schemas"]["ColorInvocation"] | components["schemas"]["ColorMapInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["ConditioningCollectionInvocation"] | components["schemas"]["ConditioningInvocation"] | components["schemas"]["ContentShuffleInvocation"] | components["schemas"]["ControlNetInvocation"] | components["schemas"]["CoreMetadataInvocation"] | components["schemas"]["CreateDenoiseMaskInvocation"] | components["schemas"]["CreateGradientMaskInvocation"] | components["schemas"]["CropImageToBoundingBoxInvocation"] | components["schemas"]["CropLatentsCoreInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["DWOpenposeDetectionInvocation"] | components["schemas"]["DecodeInvisibleWatermarkInvocation"] | components["schemas"]["DenoiseLatentsInvocation"] | components["schemas"]["DenoiseLatentsMetaInvocation"] | components["schemas"]["DepthAnythingDepthEstimationInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["ExpandMaskWithFadeInvocation"] | components["schemas"]["FLUXLoRACollectionLoader"] | components["schemas"]["FaceIdentifierInvocation"] | components["schemas"]["FaceMaskInvocation"] | components["schemas"]["FaceOffInvocation"] | components["schemas"]["FloatBatchInvocation"] | components["schemas"]["FloatCollectionInvocation"] | components["schemas"]["FloatGenerator"] | components["schemas"]["FloatInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["FloatMathInvocation"] | components["schemas"]["FloatToIntegerInvocation"] | components["schemas"]["Flux2DenoiseInvocation"] | components["schemas"]["Flux2KleinLoRACollectionLoader"] | components["schemas"]["Flux2KleinLoRALoaderInvocation"] | components["schemas"]["Flux2KleinModelLoaderInvocation"] | components["schemas"]["Flux2KleinTextEncoderInvocation"] | components["schemas"]["Flux2VaeDecodeInvocation"] | components["schemas"]["Flux2VaeEncodeInvocation"] | components["schemas"]["FluxControlLoRALoaderInvocation"] | components["schemas"]["FluxControlNetInvocation"] | components["schemas"]["FluxDenoiseInvocation"] | components["schemas"]["FluxDenoiseLatentsMetaInvocation"] | components["schemas"]["FluxFillInvocation"] | components["schemas"]["FluxIPAdapterInvocation"] | components["schemas"]["FluxKontextConcatenateImagesInvocation"] | components["schemas"]["FluxKontextInvocation"] | components["schemas"]["FluxLoRALoaderInvocation"] | components["schemas"]["FluxModelLoaderInvocation"] | components["schemas"]["FluxReduxInvocation"] | components["schemas"]["FluxTextEncoderInvocation"] | components["schemas"]["FluxVaeDecodeInvocation"] | components["schemas"]["FluxVaeEncodeInvocation"] | components["schemas"]["FreeUInvocation"] | components["schemas"]["GeminiImageGenerationInvocation"] | components["schemas"]["GetMaskBoundingBoxInvocation"] | components["schemas"]["GroundingDinoInvocation"] | components["schemas"]["HEDEdgeDetectionInvocation"] | components["schemas"]["HeuristicResizeInvocation"] | components["schemas"]["IPAdapterInvocation"] | components["schemas"]["IdealSizeInvocation"] | components["schemas"]["IfInvocation"] | components["schemas"]["ImageBatchInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["ImageChannelMultiplyInvocation"] | components["schemas"]["ImageChannelOffsetInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["ImageGenerator"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["ImageInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["ImageMaskToTensorInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["ImageNoiseInvocation"] | components["schemas"]["ImagePanelLayoutInvocation"] | components["schemas"]["ImagePasteInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["IntegerBatchInvocation"] | components["schemas"]["IntegerCollectionInvocation"] | components["schemas"]["IntegerGenerator"] | components["schemas"]["IntegerInvocation"] | components["schemas"]["IntegerMathInvocation"] | components["schemas"]["InvertTensorMaskInvocation"] | components["schemas"]["InvokeAdjustImageHuePlusInvocation"] | components["schemas"]["InvokeEquivalentAchromaticLightnessInvocation"] | components["schemas"]["InvokeImageBlendInvocation"] | components["schemas"]["InvokeImageCompositorInvocation"] | components["schemas"]["InvokeImageDilateOrErodeInvocation"] | components["schemas"]["InvokeImageEnhanceInvocation"] | components["schemas"]["InvokeImageValueThresholdsInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["LaMaInfillInvocation"] | components["schemas"]["LatentsCollectionInvocation"] | components["schemas"]["LatentsInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["LineartAnimeEdgeDetectionInvocation"] | components["schemas"]["LineartEdgeDetectionInvocation"] | components["schemas"]["LlavaOnevisionVllmInvocation"] | components["schemas"]["LoRACollectionLoader"] | components["schemas"]["LoRALoaderInvocation"] | components["schemas"]["LoRASelectorInvocation"] | components["schemas"]["MLSDDetectionInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["MaskCombineInvocation"] | components["schemas"]["MaskEdgeInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["MaskFromIDInvocation"] | components["schemas"]["MaskTensorToImageInvocation"] | components["schemas"]["MediaPipeFaceDetectionInvocation"] | components["schemas"]["MergeMetadataInvocation"] | components["schemas"]["MergeTilesToImageInvocation"] | components["schemas"]["MetadataFieldExtractorInvocation"] | components["schemas"]["MetadataFromImageInvocation"] | components["schemas"]["MetadataInvocation"] | components["schemas"]["MetadataItemInvocation"] | components["schemas"]["MetadataItemLinkedInvocation"] | components["schemas"]["MetadataToBoolCollectionInvocation"] | components["schemas"]["MetadataToBoolInvocation"] | components["schemas"]["MetadataToControlnetsInvocation"] | components["schemas"]["MetadataToFloatCollectionInvocation"] | components["schemas"]["MetadataToFloatInvocation"] | components["schemas"]["MetadataToIPAdaptersInvocation"] | components["schemas"]["MetadataToIntegerCollectionInvocation"] | components["schemas"]["MetadataToIntegerInvocation"] | components["schemas"]["MetadataToLorasCollectionInvocation"] | components["schemas"]["MetadataToLorasInvocation"] | components["schemas"]["MetadataToModelInvocation"] | components["schemas"]["MetadataToSDXLLorasInvocation"] | components["schemas"]["MetadataToSDXLModelInvocation"] | components["schemas"]["MetadataToSchedulerInvocation"] | components["schemas"]["MetadataToStringCollectionInvocation"] | components["schemas"]["MetadataToStringInvocation"] | components["schemas"]["MetadataToT2IAdaptersInvocation"] | components["schemas"]["MetadataToVAEInvocation"] | components["schemas"]["ModelIdentifierInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["NormalMapInvocation"] | components["schemas"]["OpenAIImageGenerationInvocation"] | components["schemas"]["PBRMapsInvocation"] | components["schemas"]["PairTileImageInvocation"] | components["schemas"]["PasteImageIntoBoundingBoxInvocation"] | components["schemas"]["PiDiNetEdgeDetectionInvocation"] | components["schemas"]["PromptTemplateInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["QwenImageDenoiseInvocation"] | components["schemas"]["QwenImageImageToLatentsInvocation"] | components["schemas"]["QwenImageLatentsToImageInvocation"] | components["schemas"]["QwenImageLoRACollectionLoader"] | components["schemas"]["QwenImageLoRALoaderInvocation"] | components["schemas"]["QwenImageModelLoaderInvocation"] | components["schemas"]["QwenImageTextEncoderInvocation"] | components["schemas"]["RandomFloatInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["RectangleMaskInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["RoundInvocation"] | components["schemas"]["SD3DenoiseInvocation"] | components["schemas"]["SD3ImageToLatentsInvocation"] | components["schemas"]["SD3LatentsToImageInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["SDXLLoRACollectionLoader"] | components["schemas"]["SDXLLoRALoaderInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["SaveImageInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["SchedulerInvocation"] | components["schemas"]["Sd3ModelLoaderInvocation"] | components["schemas"]["Sd3TextEncoderInvocation"] | components["schemas"]["SeamlessModeInvocation"] | components["schemas"]["SeedreamImageGenerationInvocation"] | components["schemas"]["SegmentAnythingInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["SpandrelImageToImageAutoscaleInvocation"] | components["schemas"]["SpandrelImageToImageInvocation"] | components["schemas"]["StringBatchInvocation"] | components["schemas"]["StringCollectionInvocation"] | components["schemas"]["StringGenerator"] | components["schemas"]["StringInvocation"] | components["schemas"]["StringJoinInvocation"] | components["schemas"]["StringJoinThreeInvocation"] | components["schemas"]["StringReplaceInvocation"] | components["schemas"]["StringSplitInvocation"] | components["schemas"]["StringSplitNegInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["T2IAdapterInvocation"] | components["schemas"]["TextLLMInvocation"] | components["schemas"]["TileToPropertiesInvocation"] | components["schemas"]["TiledMultiDiffusionDenoiseLatents"] | components["schemas"]["UnsharpMaskInvocation"] | components["schemas"]["VAELoaderInvocation"] | components["schemas"]["ZImageControlInvocation"] | components["schemas"]["ZImageDenoiseInvocation"] | components["schemas"]["ZImageDenoiseMetaInvocation"] | components["schemas"]["ZImageImageToLatentsInvocation"] | components["schemas"]["ZImageLatentsToImageInvocation"] | components["schemas"]["ZImageLoRACollectionLoader"] | components["schemas"]["ZImageLoRALoaderInvocation"] | components["schemas"]["ZImageModelLoaderInvocation"] | components["schemas"]["ZImageSeedVarianceEnhancerInvocation"] | components["schemas"]["ZImageTextEncoderInvocation"]; + invocation: components["schemas"]["AddInvocation"] | components["schemas"]["AlibabaCloudImageGenerationInvocation"] | components["schemas"]["AlphaMaskToTensorInvocation"] | components["schemas"]["AnimaDenoiseInvocation"] | components["schemas"]["AnimaImageToLatentsInvocation"] | components["schemas"]["AnimaLatentsToImageInvocation"] | components["schemas"]["AnimaLoRACollectionLoader"] | components["schemas"]["AnimaLoRALoaderInvocation"] | components["schemas"]["AnimaModelLoaderInvocation"] | components["schemas"]["AnimaTextEncoderInvocation"] | components["schemas"]["ApplyMaskTensorToImageInvocation"] | components["schemas"]["ApplyMaskToImageInvocation"] | components["schemas"]["BlankImageInvocation"] | components["schemas"]["BlendLatentsInvocation"] | components["schemas"]["BooleanCollectionInvocation"] | components["schemas"]["BooleanInvocation"] | components["schemas"]["BoundingBoxInvocation"] | components["schemas"]["CLIPSkipInvocation"] | components["schemas"]["CV2InfillInvocation"] | components["schemas"]["CalculateImageTilesEvenSplitInvocation"] | components["schemas"]["CalculateImageTilesInvocation"] | components["schemas"]["CalculateImageTilesMinimumOverlapInvocation"] | components["schemas"]["CannyEdgeDetectionInvocation"] | components["schemas"]["CanvasOutputInvocation"] | components["schemas"]["CanvasPasteBackInvocation"] | components["schemas"]["CanvasV2MaskAndCropInvocation"] | components["schemas"]["CenterPadCropInvocation"] | components["schemas"]["CogView4DenoiseInvocation"] | components["schemas"]["CogView4ImageToLatentsInvocation"] | components["schemas"]["CogView4LatentsToImageInvocation"] | components["schemas"]["CogView4ModelLoaderInvocation"] | components["schemas"]["CogView4TextEncoderInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["ColorCorrectInvocation"] | components["schemas"]["ColorInvocation"] | components["schemas"]["ColorMapInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["ConditioningCollectionInvocation"] | components["schemas"]["ConditioningInvocation"] | components["schemas"]["ContentShuffleInvocation"] | components["schemas"]["ControlNetInvocation"] | components["schemas"]["CoreMetadataInvocation"] | components["schemas"]["CreateDenoiseMaskInvocation"] | components["schemas"]["CreateGradientMaskInvocation"] | components["schemas"]["CropImageToBoundingBoxInvocation"] | components["schemas"]["CropLatentsCoreInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["DWOpenposeDetectionInvocation"] | components["schemas"]["DecodeInvisibleWatermarkInvocation"] | components["schemas"]["DenoiseLatentsInvocation"] | components["schemas"]["DenoiseLatentsMetaInvocation"] | components["schemas"]["DepthAnythingDepthEstimationInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["ExpandMaskWithFadeInvocation"] | components["schemas"]["FLUXLoRACollectionLoader"] | components["schemas"]["FaceIdentifierInvocation"] | components["schemas"]["FaceMaskInvocation"] | components["schemas"]["FaceOffInvocation"] | components["schemas"]["FloatBatchInvocation"] | components["schemas"]["FloatCollectionInvocation"] | components["schemas"]["FloatGenerator"] | components["schemas"]["FloatInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["FloatMathInvocation"] | components["schemas"]["FloatToIntegerInvocation"] | components["schemas"]["Flux2DenoiseInvocation"] | components["schemas"]["Flux2KleinLoRACollectionLoader"] | components["schemas"]["Flux2KleinLoRALoaderInvocation"] | components["schemas"]["Flux2KleinModelLoaderInvocation"] | components["schemas"]["Flux2KleinTextEncoderInvocation"] | components["schemas"]["Flux2VaeDecodeInvocation"] | components["schemas"]["Flux2VaeEncodeInvocation"] | components["schemas"]["FluxControlLoRALoaderInvocation"] | components["schemas"]["FluxControlNetInvocation"] | components["schemas"]["FluxDenoiseInvocation"] | components["schemas"]["FluxDenoiseLatentsMetaInvocation"] | components["schemas"]["FluxFillInvocation"] | components["schemas"]["FluxIPAdapterInvocation"] | components["schemas"]["FluxKontextConcatenateImagesInvocation"] | components["schemas"]["FluxKontextInvocation"] | components["schemas"]["FluxLoRALoaderInvocation"] | components["schemas"]["FluxModelLoaderInvocation"] | components["schemas"]["FluxReduxInvocation"] | components["schemas"]["FluxTextEncoderInvocation"] | components["schemas"]["FluxVaeDecodeInvocation"] | components["schemas"]["FluxVaeEncodeInvocation"] | components["schemas"]["FreeUInvocation"] | components["schemas"]["GeminiImageGenerationInvocation"] | components["schemas"]["GetMaskBoundingBoxInvocation"] | components["schemas"]["GroundingDinoInvocation"] | components["schemas"]["HEDEdgeDetectionInvocation"] | components["schemas"]["HeuristicResizeInvocation"] | components["schemas"]["IPAdapterInvocation"] | components["schemas"]["IdealSizeInvocation"] | components["schemas"]["IfInvocation"] | components["schemas"]["ImageBatchInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["ImageChannelMultiplyInvocation"] | components["schemas"]["ImageChannelOffsetInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["ImageGenerator"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["ImageInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["ImageMaskToTensorInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["ImageNoiseInvocation"] | components["schemas"]["ImagePanelLayoutInvocation"] | components["schemas"]["ImagePasteInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["IntegerBatchInvocation"] | components["schemas"]["IntegerCollectionInvocation"] | components["schemas"]["IntegerGenerator"] | components["schemas"]["IntegerInvocation"] | components["schemas"]["IntegerMathInvocation"] | components["schemas"]["InvertTensorMaskInvocation"] | components["schemas"]["InvokeAdjustImageHuePlusInvocation"] | components["schemas"]["InvokeEquivalentAchromaticLightnessInvocation"] | components["schemas"]["InvokeImageBlendInvocation"] | components["schemas"]["InvokeImageCompositorInvocation"] | components["schemas"]["InvokeImageDilateOrErodeInvocation"] | components["schemas"]["InvokeImageEnhanceInvocation"] | components["schemas"]["InvokeImageValueThresholdsInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["LaMaInfillInvocation"] | components["schemas"]["LatentsCollectionInvocation"] | components["schemas"]["LatentsInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["LineartAnimeEdgeDetectionInvocation"] | components["schemas"]["LineartEdgeDetectionInvocation"] | components["schemas"]["LlavaOnevisionVllmInvocation"] | components["schemas"]["LoRACollectionLoader"] | components["schemas"]["LoRALoaderInvocation"] | components["schemas"]["LoRASelectorInvocation"] | components["schemas"]["MLSDDetectionInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["MaskCombineInvocation"] | components["schemas"]["MaskEdgeInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["MaskFromIDInvocation"] | components["schemas"]["MaskTensorToImageInvocation"] | components["schemas"]["MediaPipeFaceDetectionInvocation"] | components["schemas"]["MergeMetadataInvocation"] | components["schemas"]["MergeTilesToImageInvocation"] | components["schemas"]["MetadataFieldExtractorInvocation"] | components["schemas"]["MetadataFromImageInvocation"] | components["schemas"]["MetadataInvocation"] | components["schemas"]["MetadataItemInvocation"] | components["schemas"]["MetadataItemLinkedInvocation"] | components["schemas"]["MetadataToBoolCollectionInvocation"] | components["schemas"]["MetadataToBoolInvocation"] | components["schemas"]["MetadataToControlnetsInvocation"] | components["schemas"]["MetadataToFloatCollectionInvocation"] | components["schemas"]["MetadataToFloatInvocation"] | components["schemas"]["MetadataToIPAdaptersInvocation"] | components["schemas"]["MetadataToIntegerCollectionInvocation"] | components["schemas"]["MetadataToIntegerInvocation"] | components["schemas"]["MetadataToLorasCollectionInvocation"] | components["schemas"]["MetadataToLorasInvocation"] | components["schemas"]["MetadataToModelInvocation"] | components["schemas"]["MetadataToSDXLLorasInvocation"] | components["schemas"]["MetadataToSDXLModelInvocation"] | components["schemas"]["MetadataToSchedulerInvocation"] | components["schemas"]["MetadataToStringCollectionInvocation"] | components["schemas"]["MetadataToStringInvocation"] | components["schemas"]["MetadataToT2IAdaptersInvocation"] | components["schemas"]["MetadataToVAEInvocation"] | components["schemas"]["ModelIdentifierInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["NormalMapInvocation"] | components["schemas"]["OpenAIImageGenerationInvocation"] | components["schemas"]["PBRMapsInvocation"] | components["schemas"]["PairTileImageInvocation"] | components["schemas"]["PasteImageIntoBoundingBoxInvocation"] | components["schemas"]["PiDiNetEdgeDetectionInvocation"] | components["schemas"]["PromptTemplateInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["QwenImageDenoiseInvocation"] | components["schemas"]["QwenImageImageToLatentsInvocation"] | components["schemas"]["QwenImageLatentsToImageInvocation"] | components["schemas"]["QwenImageLoRACollectionLoader"] | components["schemas"]["QwenImageLoRALoaderInvocation"] | components["schemas"]["QwenImageModelLoaderInvocation"] | components["schemas"]["QwenImageTextEncoderInvocation"] | components["schemas"]["RandomFloatInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["RectangleMaskInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["RoundInvocation"] | components["schemas"]["SD3DenoiseInvocation"] | components["schemas"]["SD3ImageToLatentsInvocation"] | components["schemas"]["SD3LatentsToImageInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["SDXLLoRACollectionLoader"] | components["schemas"]["SDXLLoRALoaderInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["SaveImageInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["SchedulerInvocation"] | components["schemas"]["Sd3ModelLoaderInvocation"] | components["schemas"]["Sd3TextEncoderInvocation"] | components["schemas"]["SeamlessModeInvocation"] | components["schemas"]["SeedreamImageGenerationInvocation"] | components["schemas"]["SegmentAnythingInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["SpandrelImageToImageAutoscaleInvocation"] | components["schemas"]["SpandrelImageToImageInvocation"] | components["schemas"]["StringBatchInvocation"] | components["schemas"]["StringCollectionInvocation"] | components["schemas"]["StringGenerator"] | components["schemas"]["StringInvocation"] | components["schemas"]["StringJoinInvocation"] | components["schemas"]["StringJoinThreeInvocation"] | components["schemas"]["StringReplaceInvocation"] | components["schemas"]["StringSplitInvocation"] | components["schemas"]["StringSplitNegInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["T2IAdapterInvocation"] | components["schemas"]["TextLLMInvocation"] | components["schemas"]["TileToPropertiesInvocation"] | components["schemas"]["TiledMultiDiffusionDenoiseLatents"] | components["schemas"]["UnsharpMaskInvocation"] | components["schemas"]["VAELoaderInvocation"] | components["schemas"]["WanDenoiseInvocation"] | components["schemas"]["WanImageToLatentsInvocation"] | components["schemas"]["WanLatentsToImageInvocation"] | components["schemas"]["WanModelLoaderInvocation"] | components["schemas"]["WanTextEncoderInvocation"] | components["schemas"]["ZImageControlInvocation"] | components["schemas"]["ZImageDenoiseInvocation"] | components["schemas"]["ZImageDenoiseMetaInvocation"] | components["schemas"]["ZImageImageToLatentsInvocation"] | components["schemas"]["ZImageLatentsToImageInvocation"] | components["schemas"]["ZImageLoRACollectionLoader"] | components["schemas"]["ZImageLoRALoaderInvocation"] | components["schemas"]["ZImageModelLoaderInvocation"] | components["schemas"]["ZImageSeedVarianceEnhancerInvocation"] | components["schemas"]["ZImageTextEncoderInvocation"]; /** * Invocation Source Id * @description The ID of the prepared invocation's source node @@ -15656,7 +15656,7 @@ export type components = { * Result * @description The result of the invocation */ - result: components["schemas"]["AnimaConditioningOutput"] | components["schemas"]["AnimaLoRALoaderOutput"] | components["schemas"]["AnimaModelLoaderOutput"] | components["schemas"]["BooleanCollectionOutput"] | components["schemas"]["BooleanOutput"] | components["schemas"]["BoundingBoxCollectionOutput"] | components["schemas"]["BoundingBoxOutput"] | components["schemas"]["CLIPOutput"] | components["schemas"]["CLIPSkipInvocationOutput"] | components["schemas"]["CalculateImageTilesOutput"] | components["schemas"]["CogView4ConditioningOutput"] | components["schemas"]["CogView4ModelLoaderOutput"] | components["schemas"]["CollectInvocationOutput"] | components["schemas"]["ColorCollectionOutput"] | components["schemas"]["ColorOutput"] | components["schemas"]["ConditioningCollectionOutput"] | components["schemas"]["ConditioningOutput"] | components["schemas"]["ControlOutput"] | components["schemas"]["DenoiseMaskOutput"] | components["schemas"]["FaceMaskOutput"] | components["schemas"]["FaceOffOutput"] | components["schemas"]["FloatCollectionOutput"] | components["schemas"]["FloatGeneratorOutput"] | components["schemas"]["FloatOutput"] | components["schemas"]["Flux2KleinLoRALoaderOutput"] | components["schemas"]["Flux2KleinModelLoaderOutput"] | components["schemas"]["FluxConditioningCollectionOutput"] | components["schemas"]["FluxConditioningOutput"] | components["schemas"]["FluxControlLoRALoaderOutput"] | components["schemas"]["FluxControlNetOutput"] | components["schemas"]["FluxFillOutput"] | components["schemas"]["FluxKontextOutput"] | components["schemas"]["FluxLoRALoaderOutput"] | components["schemas"]["FluxModelLoaderOutput"] | components["schemas"]["FluxReduxOutput"] | components["schemas"]["GradientMaskOutput"] | components["schemas"]["IPAdapterOutput"] | components["schemas"]["IdealSizeOutput"] | components["schemas"]["IfInvocationOutput"] | components["schemas"]["ImageCollectionOutput"] | components["schemas"]["ImageGeneratorOutput"] | components["schemas"]["ImageOutput"] | components["schemas"]["ImagePanelCoordinateOutput"] | components["schemas"]["IntegerCollectionOutput"] | components["schemas"]["IntegerGeneratorOutput"] | components["schemas"]["IntegerOutput"] | components["schemas"]["IterateInvocationOutput"] | components["schemas"]["LatentsCollectionOutput"] | components["schemas"]["LatentsMetaOutput"] | components["schemas"]["LatentsOutput"] | components["schemas"]["LoRALoaderOutput"] | components["schemas"]["LoRASelectorOutput"] | components["schemas"]["MDControlListOutput"] | components["schemas"]["MDIPAdapterListOutput"] | components["schemas"]["MDT2IAdapterListOutput"] | components["schemas"]["MaskOutput"] | components["schemas"]["MetadataItemOutput"] | components["schemas"]["MetadataOutput"] | components["schemas"]["MetadataToLorasCollectionOutput"] | components["schemas"]["MetadataToModelOutput"] | components["schemas"]["MetadataToSDXLModelOutput"] | components["schemas"]["ModelIdentifierOutput"] | components["schemas"]["ModelLoaderOutput"] | components["schemas"]["NoiseOutput"] | components["schemas"]["PBRMapsOutput"] | components["schemas"]["PairTileImageOutput"] | components["schemas"]["PromptTemplateOutput"] | components["schemas"]["QwenImageConditioningOutput"] | components["schemas"]["QwenImageLoRALoaderOutput"] | components["schemas"]["QwenImageModelLoaderOutput"] | components["schemas"]["SD3ConditioningOutput"] | components["schemas"]["SDXLLoRALoaderOutput"] | components["schemas"]["SDXLModelLoaderOutput"] | components["schemas"]["SDXLRefinerModelLoaderOutput"] | components["schemas"]["SchedulerOutput"] | components["schemas"]["Sd3ModelLoaderOutput"] | components["schemas"]["SeamlessModeOutput"] | components["schemas"]["String2Output"] | components["schemas"]["StringCollectionOutput"] | components["schemas"]["StringGeneratorOutput"] | components["schemas"]["StringOutput"] | components["schemas"]["StringPosNegOutput"] | components["schemas"]["T2IAdapterOutput"] | components["schemas"]["TileToPropertiesOutput"] | components["schemas"]["UNetOutput"] | components["schemas"]["VAEOutput"] | components["schemas"]["ZImageConditioningOutput"] | components["schemas"]["ZImageControlOutput"] | components["schemas"]["ZImageLoRALoaderOutput"] | components["schemas"]["ZImageModelLoaderOutput"]; + result: components["schemas"]["AnimaConditioningOutput"] | components["schemas"]["AnimaLoRALoaderOutput"] | components["schemas"]["AnimaModelLoaderOutput"] | components["schemas"]["BooleanCollectionOutput"] | components["schemas"]["BooleanOutput"] | components["schemas"]["BoundingBoxCollectionOutput"] | components["schemas"]["BoundingBoxOutput"] | components["schemas"]["CLIPOutput"] | components["schemas"]["CLIPSkipInvocationOutput"] | components["schemas"]["CalculateImageTilesOutput"] | components["schemas"]["CogView4ConditioningOutput"] | components["schemas"]["CogView4ModelLoaderOutput"] | components["schemas"]["CollectInvocationOutput"] | components["schemas"]["ColorCollectionOutput"] | components["schemas"]["ColorOutput"] | components["schemas"]["ConditioningCollectionOutput"] | components["schemas"]["ConditioningOutput"] | components["schemas"]["ControlOutput"] | components["schemas"]["DenoiseMaskOutput"] | components["schemas"]["FaceMaskOutput"] | components["schemas"]["FaceOffOutput"] | components["schemas"]["FloatCollectionOutput"] | components["schemas"]["FloatGeneratorOutput"] | components["schemas"]["FloatOutput"] | components["schemas"]["Flux2KleinLoRALoaderOutput"] | components["schemas"]["Flux2KleinModelLoaderOutput"] | components["schemas"]["FluxConditioningCollectionOutput"] | components["schemas"]["FluxConditioningOutput"] | components["schemas"]["FluxControlLoRALoaderOutput"] | components["schemas"]["FluxControlNetOutput"] | components["schemas"]["FluxFillOutput"] | components["schemas"]["FluxKontextOutput"] | components["schemas"]["FluxLoRALoaderOutput"] | components["schemas"]["FluxModelLoaderOutput"] | components["schemas"]["FluxReduxOutput"] | components["schemas"]["GradientMaskOutput"] | components["schemas"]["IPAdapterOutput"] | components["schemas"]["IdealSizeOutput"] | components["schemas"]["IfInvocationOutput"] | components["schemas"]["ImageCollectionOutput"] | components["schemas"]["ImageGeneratorOutput"] | components["schemas"]["ImageOutput"] | components["schemas"]["ImagePanelCoordinateOutput"] | components["schemas"]["IntegerCollectionOutput"] | components["schemas"]["IntegerGeneratorOutput"] | components["schemas"]["IntegerOutput"] | components["schemas"]["IterateInvocationOutput"] | components["schemas"]["LatentsCollectionOutput"] | components["schemas"]["LatentsMetaOutput"] | components["schemas"]["LatentsOutput"] | components["schemas"]["LoRALoaderOutput"] | components["schemas"]["LoRASelectorOutput"] | components["schemas"]["MDControlListOutput"] | components["schemas"]["MDIPAdapterListOutput"] | components["schemas"]["MDT2IAdapterListOutput"] | components["schemas"]["MaskOutput"] | components["schemas"]["MetadataItemOutput"] | components["schemas"]["MetadataOutput"] | components["schemas"]["MetadataToLorasCollectionOutput"] | components["schemas"]["MetadataToModelOutput"] | components["schemas"]["MetadataToSDXLModelOutput"] | components["schemas"]["ModelIdentifierOutput"] | components["schemas"]["ModelLoaderOutput"] | components["schemas"]["NoiseOutput"] | components["schemas"]["PBRMapsOutput"] | components["schemas"]["PairTileImageOutput"] | components["schemas"]["PromptTemplateOutput"] | components["schemas"]["QwenImageConditioningOutput"] | components["schemas"]["QwenImageLoRALoaderOutput"] | components["schemas"]["QwenImageModelLoaderOutput"] | components["schemas"]["SD3ConditioningOutput"] | components["schemas"]["SDXLLoRALoaderOutput"] | components["schemas"]["SDXLModelLoaderOutput"] | components["schemas"]["SDXLRefinerModelLoaderOutput"] | components["schemas"]["SchedulerOutput"] | components["schemas"]["Sd3ModelLoaderOutput"] | components["schemas"]["SeamlessModeOutput"] | components["schemas"]["String2Output"] | components["schemas"]["StringCollectionOutput"] | components["schemas"]["StringGeneratorOutput"] | components["schemas"]["StringOutput"] | components["schemas"]["StringPosNegOutput"] | components["schemas"]["T2IAdapterOutput"] | components["schemas"]["TileToPropertiesOutput"] | components["schemas"]["UNetOutput"] | components["schemas"]["VAEOutput"] | components["schemas"]["WanConditioningOutput"] | components["schemas"]["WanModelLoaderOutput"] | components["schemas"]["ZImageConditioningOutput"] | components["schemas"]["ZImageControlOutput"] | components["schemas"]["ZImageLoRALoaderOutput"] | components["schemas"]["ZImageModelLoaderOutput"]; }; /** * InvocationErrorEvent @@ -15710,7 +15710,7 @@ export type components = { * Invocation * @description The ID of the invocation */ - invocation: components["schemas"]["AddInvocation"] | components["schemas"]["AlibabaCloudImageGenerationInvocation"] | components["schemas"]["AlphaMaskToTensorInvocation"] | components["schemas"]["AnimaDenoiseInvocation"] | components["schemas"]["AnimaImageToLatentsInvocation"] | components["schemas"]["AnimaLatentsToImageInvocation"] | components["schemas"]["AnimaLoRACollectionLoader"] | components["schemas"]["AnimaLoRALoaderInvocation"] | components["schemas"]["AnimaModelLoaderInvocation"] | components["schemas"]["AnimaTextEncoderInvocation"] | components["schemas"]["ApplyMaskTensorToImageInvocation"] | components["schemas"]["ApplyMaskToImageInvocation"] | components["schemas"]["BlankImageInvocation"] | components["schemas"]["BlendLatentsInvocation"] | components["schemas"]["BooleanCollectionInvocation"] | components["schemas"]["BooleanInvocation"] | components["schemas"]["BoundingBoxInvocation"] | components["schemas"]["CLIPSkipInvocation"] | components["schemas"]["CV2InfillInvocation"] | components["schemas"]["CalculateImageTilesEvenSplitInvocation"] | components["schemas"]["CalculateImageTilesInvocation"] | components["schemas"]["CalculateImageTilesMinimumOverlapInvocation"] | components["schemas"]["CannyEdgeDetectionInvocation"] | components["schemas"]["CanvasOutputInvocation"] | components["schemas"]["CanvasPasteBackInvocation"] | components["schemas"]["CanvasV2MaskAndCropInvocation"] | components["schemas"]["CenterPadCropInvocation"] | components["schemas"]["CogView4DenoiseInvocation"] | components["schemas"]["CogView4ImageToLatentsInvocation"] | components["schemas"]["CogView4LatentsToImageInvocation"] | components["schemas"]["CogView4ModelLoaderInvocation"] | components["schemas"]["CogView4TextEncoderInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["ColorCorrectInvocation"] | components["schemas"]["ColorInvocation"] | components["schemas"]["ColorMapInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["ConditioningCollectionInvocation"] | components["schemas"]["ConditioningInvocation"] | components["schemas"]["ContentShuffleInvocation"] | components["schemas"]["ControlNetInvocation"] | components["schemas"]["CoreMetadataInvocation"] | components["schemas"]["CreateDenoiseMaskInvocation"] | components["schemas"]["CreateGradientMaskInvocation"] | components["schemas"]["CropImageToBoundingBoxInvocation"] | components["schemas"]["CropLatentsCoreInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["DWOpenposeDetectionInvocation"] | components["schemas"]["DecodeInvisibleWatermarkInvocation"] | components["schemas"]["DenoiseLatentsInvocation"] | components["schemas"]["DenoiseLatentsMetaInvocation"] | components["schemas"]["DepthAnythingDepthEstimationInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["ExpandMaskWithFadeInvocation"] | components["schemas"]["FLUXLoRACollectionLoader"] | components["schemas"]["FaceIdentifierInvocation"] | components["schemas"]["FaceMaskInvocation"] | components["schemas"]["FaceOffInvocation"] | components["schemas"]["FloatBatchInvocation"] | components["schemas"]["FloatCollectionInvocation"] | components["schemas"]["FloatGenerator"] | components["schemas"]["FloatInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["FloatMathInvocation"] | components["schemas"]["FloatToIntegerInvocation"] | components["schemas"]["Flux2DenoiseInvocation"] | components["schemas"]["Flux2KleinLoRACollectionLoader"] | components["schemas"]["Flux2KleinLoRALoaderInvocation"] | components["schemas"]["Flux2KleinModelLoaderInvocation"] | components["schemas"]["Flux2KleinTextEncoderInvocation"] | components["schemas"]["Flux2VaeDecodeInvocation"] | components["schemas"]["Flux2VaeEncodeInvocation"] | components["schemas"]["FluxControlLoRALoaderInvocation"] | components["schemas"]["FluxControlNetInvocation"] | components["schemas"]["FluxDenoiseInvocation"] | components["schemas"]["FluxDenoiseLatentsMetaInvocation"] | components["schemas"]["FluxFillInvocation"] | components["schemas"]["FluxIPAdapterInvocation"] | components["schemas"]["FluxKontextConcatenateImagesInvocation"] | components["schemas"]["FluxKontextInvocation"] | components["schemas"]["FluxLoRALoaderInvocation"] | components["schemas"]["FluxModelLoaderInvocation"] | components["schemas"]["FluxReduxInvocation"] | components["schemas"]["FluxTextEncoderInvocation"] | components["schemas"]["FluxVaeDecodeInvocation"] | components["schemas"]["FluxVaeEncodeInvocation"] | components["schemas"]["FreeUInvocation"] | components["schemas"]["GeminiImageGenerationInvocation"] | components["schemas"]["GetMaskBoundingBoxInvocation"] | components["schemas"]["GroundingDinoInvocation"] | components["schemas"]["HEDEdgeDetectionInvocation"] | components["schemas"]["HeuristicResizeInvocation"] | components["schemas"]["IPAdapterInvocation"] | components["schemas"]["IdealSizeInvocation"] | components["schemas"]["IfInvocation"] | components["schemas"]["ImageBatchInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["ImageChannelMultiplyInvocation"] | components["schemas"]["ImageChannelOffsetInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["ImageGenerator"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["ImageInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["ImageMaskToTensorInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["ImageNoiseInvocation"] | components["schemas"]["ImagePanelLayoutInvocation"] | components["schemas"]["ImagePasteInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["IntegerBatchInvocation"] | components["schemas"]["IntegerCollectionInvocation"] | components["schemas"]["IntegerGenerator"] | components["schemas"]["IntegerInvocation"] | components["schemas"]["IntegerMathInvocation"] | components["schemas"]["InvertTensorMaskInvocation"] | components["schemas"]["InvokeAdjustImageHuePlusInvocation"] | components["schemas"]["InvokeEquivalentAchromaticLightnessInvocation"] | components["schemas"]["InvokeImageBlendInvocation"] | components["schemas"]["InvokeImageCompositorInvocation"] | components["schemas"]["InvokeImageDilateOrErodeInvocation"] | components["schemas"]["InvokeImageEnhanceInvocation"] | components["schemas"]["InvokeImageValueThresholdsInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["LaMaInfillInvocation"] | components["schemas"]["LatentsCollectionInvocation"] | components["schemas"]["LatentsInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["LineartAnimeEdgeDetectionInvocation"] | components["schemas"]["LineartEdgeDetectionInvocation"] | components["schemas"]["LlavaOnevisionVllmInvocation"] | components["schemas"]["LoRACollectionLoader"] | components["schemas"]["LoRALoaderInvocation"] | components["schemas"]["LoRASelectorInvocation"] | components["schemas"]["MLSDDetectionInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["MaskCombineInvocation"] | components["schemas"]["MaskEdgeInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["MaskFromIDInvocation"] | components["schemas"]["MaskTensorToImageInvocation"] | components["schemas"]["MediaPipeFaceDetectionInvocation"] | components["schemas"]["MergeMetadataInvocation"] | components["schemas"]["MergeTilesToImageInvocation"] | components["schemas"]["MetadataFieldExtractorInvocation"] | components["schemas"]["MetadataFromImageInvocation"] | components["schemas"]["MetadataInvocation"] | components["schemas"]["MetadataItemInvocation"] | components["schemas"]["MetadataItemLinkedInvocation"] | components["schemas"]["MetadataToBoolCollectionInvocation"] | components["schemas"]["MetadataToBoolInvocation"] | components["schemas"]["MetadataToControlnetsInvocation"] | components["schemas"]["MetadataToFloatCollectionInvocation"] | components["schemas"]["MetadataToFloatInvocation"] | components["schemas"]["MetadataToIPAdaptersInvocation"] | components["schemas"]["MetadataToIntegerCollectionInvocation"] | components["schemas"]["MetadataToIntegerInvocation"] | components["schemas"]["MetadataToLorasCollectionInvocation"] | components["schemas"]["MetadataToLorasInvocation"] | components["schemas"]["MetadataToModelInvocation"] | components["schemas"]["MetadataToSDXLLorasInvocation"] | components["schemas"]["MetadataToSDXLModelInvocation"] | components["schemas"]["MetadataToSchedulerInvocation"] | components["schemas"]["MetadataToStringCollectionInvocation"] | components["schemas"]["MetadataToStringInvocation"] | components["schemas"]["MetadataToT2IAdaptersInvocation"] | components["schemas"]["MetadataToVAEInvocation"] | components["schemas"]["ModelIdentifierInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["NormalMapInvocation"] | components["schemas"]["OpenAIImageGenerationInvocation"] | components["schemas"]["PBRMapsInvocation"] | components["schemas"]["PairTileImageInvocation"] | components["schemas"]["PasteImageIntoBoundingBoxInvocation"] | components["schemas"]["PiDiNetEdgeDetectionInvocation"] | components["schemas"]["PromptTemplateInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["QwenImageDenoiseInvocation"] | components["schemas"]["QwenImageImageToLatentsInvocation"] | components["schemas"]["QwenImageLatentsToImageInvocation"] | components["schemas"]["QwenImageLoRACollectionLoader"] | components["schemas"]["QwenImageLoRALoaderInvocation"] | components["schemas"]["QwenImageModelLoaderInvocation"] | components["schemas"]["QwenImageTextEncoderInvocation"] | components["schemas"]["RandomFloatInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["RectangleMaskInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["RoundInvocation"] | components["schemas"]["SD3DenoiseInvocation"] | components["schemas"]["SD3ImageToLatentsInvocation"] | components["schemas"]["SD3LatentsToImageInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["SDXLLoRACollectionLoader"] | components["schemas"]["SDXLLoRALoaderInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["SaveImageInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["SchedulerInvocation"] | components["schemas"]["Sd3ModelLoaderInvocation"] | components["schemas"]["Sd3TextEncoderInvocation"] | components["schemas"]["SeamlessModeInvocation"] | components["schemas"]["SeedreamImageGenerationInvocation"] | components["schemas"]["SegmentAnythingInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["SpandrelImageToImageAutoscaleInvocation"] | components["schemas"]["SpandrelImageToImageInvocation"] | components["schemas"]["StringBatchInvocation"] | components["schemas"]["StringCollectionInvocation"] | components["schemas"]["StringGenerator"] | components["schemas"]["StringInvocation"] | components["schemas"]["StringJoinInvocation"] | components["schemas"]["StringJoinThreeInvocation"] | components["schemas"]["StringReplaceInvocation"] | components["schemas"]["StringSplitInvocation"] | components["schemas"]["StringSplitNegInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["T2IAdapterInvocation"] | components["schemas"]["TextLLMInvocation"] | components["schemas"]["TileToPropertiesInvocation"] | components["schemas"]["TiledMultiDiffusionDenoiseLatents"] | components["schemas"]["UnsharpMaskInvocation"] | components["schemas"]["VAELoaderInvocation"] | components["schemas"]["ZImageControlInvocation"] | components["schemas"]["ZImageDenoiseInvocation"] | components["schemas"]["ZImageDenoiseMetaInvocation"] | components["schemas"]["ZImageImageToLatentsInvocation"] | components["schemas"]["ZImageLatentsToImageInvocation"] | components["schemas"]["ZImageLoRACollectionLoader"] | components["schemas"]["ZImageLoRALoaderInvocation"] | components["schemas"]["ZImageModelLoaderInvocation"] | components["schemas"]["ZImageSeedVarianceEnhancerInvocation"] | components["schemas"]["ZImageTextEncoderInvocation"]; + invocation: components["schemas"]["AddInvocation"] | components["schemas"]["AlibabaCloudImageGenerationInvocation"] | components["schemas"]["AlphaMaskToTensorInvocation"] | components["schemas"]["AnimaDenoiseInvocation"] | components["schemas"]["AnimaImageToLatentsInvocation"] | components["schemas"]["AnimaLatentsToImageInvocation"] | components["schemas"]["AnimaLoRACollectionLoader"] | components["schemas"]["AnimaLoRALoaderInvocation"] | components["schemas"]["AnimaModelLoaderInvocation"] | components["schemas"]["AnimaTextEncoderInvocation"] | components["schemas"]["ApplyMaskTensorToImageInvocation"] | components["schemas"]["ApplyMaskToImageInvocation"] | components["schemas"]["BlankImageInvocation"] | components["schemas"]["BlendLatentsInvocation"] | components["schemas"]["BooleanCollectionInvocation"] | components["schemas"]["BooleanInvocation"] | components["schemas"]["BoundingBoxInvocation"] | components["schemas"]["CLIPSkipInvocation"] | components["schemas"]["CV2InfillInvocation"] | components["schemas"]["CalculateImageTilesEvenSplitInvocation"] | components["schemas"]["CalculateImageTilesInvocation"] | components["schemas"]["CalculateImageTilesMinimumOverlapInvocation"] | components["schemas"]["CannyEdgeDetectionInvocation"] | components["schemas"]["CanvasOutputInvocation"] | components["schemas"]["CanvasPasteBackInvocation"] | components["schemas"]["CanvasV2MaskAndCropInvocation"] | components["schemas"]["CenterPadCropInvocation"] | components["schemas"]["CogView4DenoiseInvocation"] | components["schemas"]["CogView4ImageToLatentsInvocation"] | components["schemas"]["CogView4LatentsToImageInvocation"] | components["schemas"]["CogView4ModelLoaderInvocation"] | components["schemas"]["CogView4TextEncoderInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["ColorCorrectInvocation"] | components["schemas"]["ColorInvocation"] | components["schemas"]["ColorMapInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["ConditioningCollectionInvocation"] | components["schemas"]["ConditioningInvocation"] | components["schemas"]["ContentShuffleInvocation"] | components["schemas"]["ControlNetInvocation"] | components["schemas"]["CoreMetadataInvocation"] | components["schemas"]["CreateDenoiseMaskInvocation"] | components["schemas"]["CreateGradientMaskInvocation"] | components["schemas"]["CropImageToBoundingBoxInvocation"] | components["schemas"]["CropLatentsCoreInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["DWOpenposeDetectionInvocation"] | components["schemas"]["DecodeInvisibleWatermarkInvocation"] | components["schemas"]["DenoiseLatentsInvocation"] | components["schemas"]["DenoiseLatentsMetaInvocation"] | components["schemas"]["DepthAnythingDepthEstimationInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["ExpandMaskWithFadeInvocation"] | components["schemas"]["FLUXLoRACollectionLoader"] | components["schemas"]["FaceIdentifierInvocation"] | components["schemas"]["FaceMaskInvocation"] | components["schemas"]["FaceOffInvocation"] | components["schemas"]["FloatBatchInvocation"] | components["schemas"]["FloatCollectionInvocation"] | components["schemas"]["FloatGenerator"] | components["schemas"]["FloatInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["FloatMathInvocation"] | components["schemas"]["FloatToIntegerInvocation"] | components["schemas"]["Flux2DenoiseInvocation"] | components["schemas"]["Flux2KleinLoRACollectionLoader"] | components["schemas"]["Flux2KleinLoRALoaderInvocation"] | components["schemas"]["Flux2KleinModelLoaderInvocation"] | components["schemas"]["Flux2KleinTextEncoderInvocation"] | components["schemas"]["Flux2VaeDecodeInvocation"] | components["schemas"]["Flux2VaeEncodeInvocation"] | components["schemas"]["FluxControlLoRALoaderInvocation"] | components["schemas"]["FluxControlNetInvocation"] | components["schemas"]["FluxDenoiseInvocation"] | components["schemas"]["FluxDenoiseLatentsMetaInvocation"] | components["schemas"]["FluxFillInvocation"] | components["schemas"]["FluxIPAdapterInvocation"] | components["schemas"]["FluxKontextConcatenateImagesInvocation"] | components["schemas"]["FluxKontextInvocation"] | components["schemas"]["FluxLoRALoaderInvocation"] | components["schemas"]["FluxModelLoaderInvocation"] | components["schemas"]["FluxReduxInvocation"] | components["schemas"]["FluxTextEncoderInvocation"] | components["schemas"]["FluxVaeDecodeInvocation"] | components["schemas"]["FluxVaeEncodeInvocation"] | components["schemas"]["FreeUInvocation"] | components["schemas"]["GeminiImageGenerationInvocation"] | components["schemas"]["GetMaskBoundingBoxInvocation"] | components["schemas"]["GroundingDinoInvocation"] | components["schemas"]["HEDEdgeDetectionInvocation"] | components["schemas"]["HeuristicResizeInvocation"] | components["schemas"]["IPAdapterInvocation"] | components["schemas"]["IdealSizeInvocation"] | components["schemas"]["IfInvocation"] | components["schemas"]["ImageBatchInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["ImageChannelMultiplyInvocation"] | components["schemas"]["ImageChannelOffsetInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["ImageGenerator"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["ImageInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["ImageMaskToTensorInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["ImageNoiseInvocation"] | components["schemas"]["ImagePanelLayoutInvocation"] | components["schemas"]["ImagePasteInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["IntegerBatchInvocation"] | components["schemas"]["IntegerCollectionInvocation"] | components["schemas"]["IntegerGenerator"] | components["schemas"]["IntegerInvocation"] | components["schemas"]["IntegerMathInvocation"] | components["schemas"]["InvertTensorMaskInvocation"] | components["schemas"]["InvokeAdjustImageHuePlusInvocation"] | components["schemas"]["InvokeEquivalentAchromaticLightnessInvocation"] | components["schemas"]["InvokeImageBlendInvocation"] | components["schemas"]["InvokeImageCompositorInvocation"] | components["schemas"]["InvokeImageDilateOrErodeInvocation"] | components["schemas"]["InvokeImageEnhanceInvocation"] | components["schemas"]["InvokeImageValueThresholdsInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["LaMaInfillInvocation"] | components["schemas"]["LatentsCollectionInvocation"] | components["schemas"]["LatentsInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["LineartAnimeEdgeDetectionInvocation"] | components["schemas"]["LineartEdgeDetectionInvocation"] | components["schemas"]["LlavaOnevisionVllmInvocation"] | components["schemas"]["LoRACollectionLoader"] | components["schemas"]["LoRALoaderInvocation"] | components["schemas"]["LoRASelectorInvocation"] | components["schemas"]["MLSDDetectionInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["MaskCombineInvocation"] | components["schemas"]["MaskEdgeInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["MaskFromIDInvocation"] | components["schemas"]["MaskTensorToImageInvocation"] | components["schemas"]["MediaPipeFaceDetectionInvocation"] | components["schemas"]["MergeMetadataInvocation"] | components["schemas"]["MergeTilesToImageInvocation"] | components["schemas"]["MetadataFieldExtractorInvocation"] | components["schemas"]["MetadataFromImageInvocation"] | components["schemas"]["MetadataInvocation"] | components["schemas"]["MetadataItemInvocation"] | components["schemas"]["MetadataItemLinkedInvocation"] | components["schemas"]["MetadataToBoolCollectionInvocation"] | components["schemas"]["MetadataToBoolInvocation"] | components["schemas"]["MetadataToControlnetsInvocation"] | components["schemas"]["MetadataToFloatCollectionInvocation"] | components["schemas"]["MetadataToFloatInvocation"] | components["schemas"]["MetadataToIPAdaptersInvocation"] | components["schemas"]["MetadataToIntegerCollectionInvocation"] | components["schemas"]["MetadataToIntegerInvocation"] | components["schemas"]["MetadataToLorasCollectionInvocation"] | components["schemas"]["MetadataToLorasInvocation"] | components["schemas"]["MetadataToModelInvocation"] | components["schemas"]["MetadataToSDXLLorasInvocation"] | components["schemas"]["MetadataToSDXLModelInvocation"] | components["schemas"]["MetadataToSchedulerInvocation"] | components["schemas"]["MetadataToStringCollectionInvocation"] | components["schemas"]["MetadataToStringInvocation"] | components["schemas"]["MetadataToT2IAdaptersInvocation"] | components["schemas"]["MetadataToVAEInvocation"] | components["schemas"]["ModelIdentifierInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["NormalMapInvocation"] | components["schemas"]["OpenAIImageGenerationInvocation"] | components["schemas"]["PBRMapsInvocation"] | components["schemas"]["PairTileImageInvocation"] | components["schemas"]["PasteImageIntoBoundingBoxInvocation"] | components["schemas"]["PiDiNetEdgeDetectionInvocation"] | components["schemas"]["PromptTemplateInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["QwenImageDenoiseInvocation"] | components["schemas"]["QwenImageImageToLatentsInvocation"] | components["schemas"]["QwenImageLatentsToImageInvocation"] | components["schemas"]["QwenImageLoRACollectionLoader"] | components["schemas"]["QwenImageLoRALoaderInvocation"] | components["schemas"]["QwenImageModelLoaderInvocation"] | components["schemas"]["QwenImageTextEncoderInvocation"] | components["schemas"]["RandomFloatInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["RectangleMaskInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["RoundInvocation"] | components["schemas"]["SD3DenoiseInvocation"] | components["schemas"]["SD3ImageToLatentsInvocation"] | components["schemas"]["SD3LatentsToImageInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["SDXLLoRACollectionLoader"] | components["schemas"]["SDXLLoRALoaderInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["SaveImageInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["SchedulerInvocation"] | components["schemas"]["Sd3ModelLoaderInvocation"] | components["schemas"]["Sd3TextEncoderInvocation"] | components["schemas"]["SeamlessModeInvocation"] | components["schemas"]["SeedreamImageGenerationInvocation"] | components["schemas"]["SegmentAnythingInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["SpandrelImageToImageAutoscaleInvocation"] | components["schemas"]["SpandrelImageToImageInvocation"] | components["schemas"]["StringBatchInvocation"] | components["schemas"]["StringCollectionInvocation"] | components["schemas"]["StringGenerator"] | components["schemas"]["StringInvocation"] | components["schemas"]["StringJoinInvocation"] | components["schemas"]["StringJoinThreeInvocation"] | components["schemas"]["StringReplaceInvocation"] | components["schemas"]["StringSplitInvocation"] | components["schemas"]["StringSplitNegInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["T2IAdapterInvocation"] | components["schemas"]["TextLLMInvocation"] | components["schemas"]["TileToPropertiesInvocation"] | components["schemas"]["TiledMultiDiffusionDenoiseLatents"] | components["schemas"]["UnsharpMaskInvocation"] | components["schemas"]["VAELoaderInvocation"] | components["schemas"]["WanDenoiseInvocation"] | components["schemas"]["WanImageToLatentsInvocation"] | components["schemas"]["WanLatentsToImageInvocation"] | components["schemas"]["WanModelLoaderInvocation"] | components["schemas"]["WanTextEncoderInvocation"] | components["schemas"]["ZImageControlInvocation"] | components["schemas"]["ZImageDenoiseInvocation"] | components["schemas"]["ZImageDenoiseMetaInvocation"] | components["schemas"]["ZImageImageToLatentsInvocation"] | components["schemas"]["ZImageLatentsToImageInvocation"] | components["schemas"]["ZImageLoRACollectionLoader"] | components["schemas"]["ZImageLoRALoaderInvocation"] | components["schemas"]["ZImageModelLoaderInvocation"] | components["schemas"]["ZImageSeedVarianceEnhancerInvocation"] | components["schemas"]["ZImageTextEncoderInvocation"]; /** * Invocation Source Id * @description The ID of the prepared invocation's source node @@ -15975,6 +15975,11 @@ export type components = { tomask: components["schemas"]["ImageOutput"]; unsharp_mask: components["schemas"]["ImageOutput"]; vae_loader: components["schemas"]["VAEOutput"]; + wan_denoise: components["schemas"]["LatentsOutput"]; + wan_i2l: components["schemas"]["LatentsOutput"]; + wan_l2i: components["schemas"]["ImageOutput"]; + wan_model_loader: components["schemas"]["WanModelLoaderOutput"]; + wan_text_encoder: components["schemas"]["WanConditioningOutput"]; z_image_control: components["schemas"]["ZImageControlOutput"]; z_image_denoise: components["schemas"]["LatentsOutput"]; z_image_denoise_meta: components["schemas"]["LatentsMetaOutput"]; @@ -16038,7 +16043,7 @@ export type components = { * Invocation * @description The ID of the invocation */ - invocation: components["schemas"]["AddInvocation"] | components["schemas"]["AlibabaCloudImageGenerationInvocation"] | components["schemas"]["AlphaMaskToTensorInvocation"] | components["schemas"]["AnimaDenoiseInvocation"] | components["schemas"]["AnimaImageToLatentsInvocation"] | components["schemas"]["AnimaLatentsToImageInvocation"] | components["schemas"]["AnimaLoRACollectionLoader"] | components["schemas"]["AnimaLoRALoaderInvocation"] | components["schemas"]["AnimaModelLoaderInvocation"] | components["schemas"]["AnimaTextEncoderInvocation"] | components["schemas"]["ApplyMaskTensorToImageInvocation"] | components["schemas"]["ApplyMaskToImageInvocation"] | components["schemas"]["BlankImageInvocation"] | components["schemas"]["BlendLatentsInvocation"] | components["schemas"]["BooleanCollectionInvocation"] | components["schemas"]["BooleanInvocation"] | components["schemas"]["BoundingBoxInvocation"] | components["schemas"]["CLIPSkipInvocation"] | components["schemas"]["CV2InfillInvocation"] | components["schemas"]["CalculateImageTilesEvenSplitInvocation"] | components["schemas"]["CalculateImageTilesInvocation"] | components["schemas"]["CalculateImageTilesMinimumOverlapInvocation"] | components["schemas"]["CannyEdgeDetectionInvocation"] | components["schemas"]["CanvasOutputInvocation"] | components["schemas"]["CanvasPasteBackInvocation"] | components["schemas"]["CanvasV2MaskAndCropInvocation"] | components["schemas"]["CenterPadCropInvocation"] | components["schemas"]["CogView4DenoiseInvocation"] | components["schemas"]["CogView4ImageToLatentsInvocation"] | components["schemas"]["CogView4LatentsToImageInvocation"] | components["schemas"]["CogView4ModelLoaderInvocation"] | components["schemas"]["CogView4TextEncoderInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["ColorCorrectInvocation"] | components["schemas"]["ColorInvocation"] | components["schemas"]["ColorMapInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["ConditioningCollectionInvocation"] | components["schemas"]["ConditioningInvocation"] | components["schemas"]["ContentShuffleInvocation"] | components["schemas"]["ControlNetInvocation"] | components["schemas"]["CoreMetadataInvocation"] | components["schemas"]["CreateDenoiseMaskInvocation"] | components["schemas"]["CreateGradientMaskInvocation"] | components["schemas"]["CropImageToBoundingBoxInvocation"] | components["schemas"]["CropLatentsCoreInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["DWOpenposeDetectionInvocation"] | components["schemas"]["DecodeInvisibleWatermarkInvocation"] | components["schemas"]["DenoiseLatentsInvocation"] | components["schemas"]["DenoiseLatentsMetaInvocation"] | components["schemas"]["DepthAnythingDepthEstimationInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["ExpandMaskWithFadeInvocation"] | components["schemas"]["FLUXLoRACollectionLoader"] | components["schemas"]["FaceIdentifierInvocation"] | components["schemas"]["FaceMaskInvocation"] | components["schemas"]["FaceOffInvocation"] | components["schemas"]["FloatBatchInvocation"] | components["schemas"]["FloatCollectionInvocation"] | components["schemas"]["FloatGenerator"] | components["schemas"]["FloatInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["FloatMathInvocation"] | components["schemas"]["FloatToIntegerInvocation"] | components["schemas"]["Flux2DenoiseInvocation"] | components["schemas"]["Flux2KleinLoRACollectionLoader"] | components["schemas"]["Flux2KleinLoRALoaderInvocation"] | components["schemas"]["Flux2KleinModelLoaderInvocation"] | components["schemas"]["Flux2KleinTextEncoderInvocation"] | components["schemas"]["Flux2VaeDecodeInvocation"] | components["schemas"]["Flux2VaeEncodeInvocation"] | components["schemas"]["FluxControlLoRALoaderInvocation"] | components["schemas"]["FluxControlNetInvocation"] | components["schemas"]["FluxDenoiseInvocation"] | components["schemas"]["FluxDenoiseLatentsMetaInvocation"] | components["schemas"]["FluxFillInvocation"] | components["schemas"]["FluxIPAdapterInvocation"] | components["schemas"]["FluxKontextConcatenateImagesInvocation"] | components["schemas"]["FluxKontextInvocation"] | components["schemas"]["FluxLoRALoaderInvocation"] | components["schemas"]["FluxModelLoaderInvocation"] | components["schemas"]["FluxReduxInvocation"] | components["schemas"]["FluxTextEncoderInvocation"] | components["schemas"]["FluxVaeDecodeInvocation"] | components["schemas"]["FluxVaeEncodeInvocation"] | components["schemas"]["FreeUInvocation"] | components["schemas"]["GeminiImageGenerationInvocation"] | components["schemas"]["GetMaskBoundingBoxInvocation"] | components["schemas"]["GroundingDinoInvocation"] | components["schemas"]["HEDEdgeDetectionInvocation"] | components["schemas"]["HeuristicResizeInvocation"] | components["schemas"]["IPAdapterInvocation"] | components["schemas"]["IdealSizeInvocation"] | components["schemas"]["IfInvocation"] | components["schemas"]["ImageBatchInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["ImageChannelMultiplyInvocation"] | components["schemas"]["ImageChannelOffsetInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["ImageGenerator"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["ImageInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["ImageMaskToTensorInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["ImageNoiseInvocation"] | components["schemas"]["ImagePanelLayoutInvocation"] | components["schemas"]["ImagePasteInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["IntegerBatchInvocation"] | components["schemas"]["IntegerCollectionInvocation"] | components["schemas"]["IntegerGenerator"] | components["schemas"]["IntegerInvocation"] | components["schemas"]["IntegerMathInvocation"] | components["schemas"]["InvertTensorMaskInvocation"] | components["schemas"]["InvokeAdjustImageHuePlusInvocation"] | components["schemas"]["InvokeEquivalentAchromaticLightnessInvocation"] | components["schemas"]["InvokeImageBlendInvocation"] | components["schemas"]["InvokeImageCompositorInvocation"] | components["schemas"]["InvokeImageDilateOrErodeInvocation"] | components["schemas"]["InvokeImageEnhanceInvocation"] | components["schemas"]["InvokeImageValueThresholdsInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["LaMaInfillInvocation"] | components["schemas"]["LatentsCollectionInvocation"] | components["schemas"]["LatentsInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["LineartAnimeEdgeDetectionInvocation"] | components["schemas"]["LineartEdgeDetectionInvocation"] | components["schemas"]["LlavaOnevisionVllmInvocation"] | components["schemas"]["LoRACollectionLoader"] | components["schemas"]["LoRALoaderInvocation"] | components["schemas"]["LoRASelectorInvocation"] | components["schemas"]["MLSDDetectionInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["MaskCombineInvocation"] | components["schemas"]["MaskEdgeInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["MaskFromIDInvocation"] | components["schemas"]["MaskTensorToImageInvocation"] | components["schemas"]["MediaPipeFaceDetectionInvocation"] | components["schemas"]["MergeMetadataInvocation"] | components["schemas"]["MergeTilesToImageInvocation"] | components["schemas"]["MetadataFieldExtractorInvocation"] | components["schemas"]["MetadataFromImageInvocation"] | components["schemas"]["MetadataInvocation"] | components["schemas"]["MetadataItemInvocation"] | components["schemas"]["MetadataItemLinkedInvocation"] | components["schemas"]["MetadataToBoolCollectionInvocation"] | components["schemas"]["MetadataToBoolInvocation"] | components["schemas"]["MetadataToControlnetsInvocation"] | components["schemas"]["MetadataToFloatCollectionInvocation"] | components["schemas"]["MetadataToFloatInvocation"] | components["schemas"]["MetadataToIPAdaptersInvocation"] | components["schemas"]["MetadataToIntegerCollectionInvocation"] | components["schemas"]["MetadataToIntegerInvocation"] | components["schemas"]["MetadataToLorasCollectionInvocation"] | components["schemas"]["MetadataToLorasInvocation"] | components["schemas"]["MetadataToModelInvocation"] | components["schemas"]["MetadataToSDXLLorasInvocation"] | components["schemas"]["MetadataToSDXLModelInvocation"] | components["schemas"]["MetadataToSchedulerInvocation"] | components["schemas"]["MetadataToStringCollectionInvocation"] | components["schemas"]["MetadataToStringInvocation"] | components["schemas"]["MetadataToT2IAdaptersInvocation"] | components["schemas"]["MetadataToVAEInvocation"] | components["schemas"]["ModelIdentifierInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["NormalMapInvocation"] | components["schemas"]["OpenAIImageGenerationInvocation"] | components["schemas"]["PBRMapsInvocation"] | components["schemas"]["PairTileImageInvocation"] | components["schemas"]["PasteImageIntoBoundingBoxInvocation"] | components["schemas"]["PiDiNetEdgeDetectionInvocation"] | components["schemas"]["PromptTemplateInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["QwenImageDenoiseInvocation"] | components["schemas"]["QwenImageImageToLatentsInvocation"] | components["schemas"]["QwenImageLatentsToImageInvocation"] | components["schemas"]["QwenImageLoRACollectionLoader"] | components["schemas"]["QwenImageLoRALoaderInvocation"] | components["schemas"]["QwenImageModelLoaderInvocation"] | components["schemas"]["QwenImageTextEncoderInvocation"] | components["schemas"]["RandomFloatInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["RectangleMaskInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["RoundInvocation"] | components["schemas"]["SD3DenoiseInvocation"] | components["schemas"]["SD3ImageToLatentsInvocation"] | components["schemas"]["SD3LatentsToImageInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["SDXLLoRACollectionLoader"] | components["schemas"]["SDXLLoRALoaderInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["SaveImageInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["SchedulerInvocation"] | components["schemas"]["Sd3ModelLoaderInvocation"] | components["schemas"]["Sd3TextEncoderInvocation"] | components["schemas"]["SeamlessModeInvocation"] | components["schemas"]["SeedreamImageGenerationInvocation"] | components["schemas"]["SegmentAnythingInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["SpandrelImageToImageAutoscaleInvocation"] | components["schemas"]["SpandrelImageToImageInvocation"] | components["schemas"]["StringBatchInvocation"] | components["schemas"]["StringCollectionInvocation"] | components["schemas"]["StringGenerator"] | components["schemas"]["StringInvocation"] | components["schemas"]["StringJoinInvocation"] | components["schemas"]["StringJoinThreeInvocation"] | components["schemas"]["StringReplaceInvocation"] | components["schemas"]["StringSplitInvocation"] | components["schemas"]["StringSplitNegInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["T2IAdapterInvocation"] | components["schemas"]["TextLLMInvocation"] | components["schemas"]["TileToPropertiesInvocation"] | components["schemas"]["TiledMultiDiffusionDenoiseLatents"] | components["schemas"]["UnsharpMaskInvocation"] | components["schemas"]["VAELoaderInvocation"] | components["schemas"]["ZImageControlInvocation"] | components["schemas"]["ZImageDenoiseInvocation"] | components["schemas"]["ZImageDenoiseMetaInvocation"] | components["schemas"]["ZImageImageToLatentsInvocation"] | components["schemas"]["ZImageLatentsToImageInvocation"] | components["schemas"]["ZImageLoRACollectionLoader"] | components["schemas"]["ZImageLoRALoaderInvocation"] | components["schemas"]["ZImageModelLoaderInvocation"] | components["schemas"]["ZImageSeedVarianceEnhancerInvocation"] | components["schemas"]["ZImageTextEncoderInvocation"]; + invocation: components["schemas"]["AddInvocation"] | components["schemas"]["AlibabaCloudImageGenerationInvocation"] | components["schemas"]["AlphaMaskToTensorInvocation"] | components["schemas"]["AnimaDenoiseInvocation"] | components["schemas"]["AnimaImageToLatentsInvocation"] | components["schemas"]["AnimaLatentsToImageInvocation"] | components["schemas"]["AnimaLoRACollectionLoader"] | components["schemas"]["AnimaLoRALoaderInvocation"] | components["schemas"]["AnimaModelLoaderInvocation"] | components["schemas"]["AnimaTextEncoderInvocation"] | components["schemas"]["ApplyMaskTensorToImageInvocation"] | components["schemas"]["ApplyMaskToImageInvocation"] | components["schemas"]["BlankImageInvocation"] | components["schemas"]["BlendLatentsInvocation"] | components["schemas"]["BooleanCollectionInvocation"] | components["schemas"]["BooleanInvocation"] | components["schemas"]["BoundingBoxInvocation"] | components["schemas"]["CLIPSkipInvocation"] | components["schemas"]["CV2InfillInvocation"] | components["schemas"]["CalculateImageTilesEvenSplitInvocation"] | components["schemas"]["CalculateImageTilesInvocation"] | components["schemas"]["CalculateImageTilesMinimumOverlapInvocation"] | components["schemas"]["CannyEdgeDetectionInvocation"] | components["schemas"]["CanvasOutputInvocation"] | components["schemas"]["CanvasPasteBackInvocation"] | components["schemas"]["CanvasV2MaskAndCropInvocation"] | components["schemas"]["CenterPadCropInvocation"] | components["schemas"]["CogView4DenoiseInvocation"] | components["schemas"]["CogView4ImageToLatentsInvocation"] | components["schemas"]["CogView4LatentsToImageInvocation"] | components["schemas"]["CogView4ModelLoaderInvocation"] | components["schemas"]["CogView4TextEncoderInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["ColorCorrectInvocation"] | components["schemas"]["ColorInvocation"] | components["schemas"]["ColorMapInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["ConditioningCollectionInvocation"] | components["schemas"]["ConditioningInvocation"] | components["schemas"]["ContentShuffleInvocation"] | components["schemas"]["ControlNetInvocation"] | components["schemas"]["CoreMetadataInvocation"] | components["schemas"]["CreateDenoiseMaskInvocation"] | components["schemas"]["CreateGradientMaskInvocation"] | components["schemas"]["CropImageToBoundingBoxInvocation"] | components["schemas"]["CropLatentsCoreInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["DWOpenposeDetectionInvocation"] | components["schemas"]["DecodeInvisibleWatermarkInvocation"] | components["schemas"]["DenoiseLatentsInvocation"] | components["schemas"]["DenoiseLatentsMetaInvocation"] | components["schemas"]["DepthAnythingDepthEstimationInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["ExpandMaskWithFadeInvocation"] | components["schemas"]["FLUXLoRACollectionLoader"] | components["schemas"]["FaceIdentifierInvocation"] | components["schemas"]["FaceMaskInvocation"] | components["schemas"]["FaceOffInvocation"] | components["schemas"]["FloatBatchInvocation"] | components["schemas"]["FloatCollectionInvocation"] | components["schemas"]["FloatGenerator"] | components["schemas"]["FloatInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["FloatMathInvocation"] | components["schemas"]["FloatToIntegerInvocation"] | components["schemas"]["Flux2DenoiseInvocation"] | components["schemas"]["Flux2KleinLoRACollectionLoader"] | components["schemas"]["Flux2KleinLoRALoaderInvocation"] | components["schemas"]["Flux2KleinModelLoaderInvocation"] | components["schemas"]["Flux2KleinTextEncoderInvocation"] | components["schemas"]["Flux2VaeDecodeInvocation"] | components["schemas"]["Flux2VaeEncodeInvocation"] | components["schemas"]["FluxControlLoRALoaderInvocation"] | components["schemas"]["FluxControlNetInvocation"] | components["schemas"]["FluxDenoiseInvocation"] | components["schemas"]["FluxDenoiseLatentsMetaInvocation"] | components["schemas"]["FluxFillInvocation"] | components["schemas"]["FluxIPAdapterInvocation"] | components["schemas"]["FluxKontextConcatenateImagesInvocation"] | components["schemas"]["FluxKontextInvocation"] | components["schemas"]["FluxLoRALoaderInvocation"] | components["schemas"]["FluxModelLoaderInvocation"] | components["schemas"]["FluxReduxInvocation"] | components["schemas"]["FluxTextEncoderInvocation"] | components["schemas"]["FluxVaeDecodeInvocation"] | components["schemas"]["FluxVaeEncodeInvocation"] | components["schemas"]["FreeUInvocation"] | components["schemas"]["GeminiImageGenerationInvocation"] | components["schemas"]["GetMaskBoundingBoxInvocation"] | components["schemas"]["GroundingDinoInvocation"] | components["schemas"]["HEDEdgeDetectionInvocation"] | components["schemas"]["HeuristicResizeInvocation"] | components["schemas"]["IPAdapterInvocation"] | components["schemas"]["IdealSizeInvocation"] | components["schemas"]["IfInvocation"] | components["schemas"]["ImageBatchInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["ImageChannelMultiplyInvocation"] | components["schemas"]["ImageChannelOffsetInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["ImageGenerator"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["ImageInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["ImageMaskToTensorInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["ImageNoiseInvocation"] | components["schemas"]["ImagePanelLayoutInvocation"] | components["schemas"]["ImagePasteInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["IntegerBatchInvocation"] | components["schemas"]["IntegerCollectionInvocation"] | components["schemas"]["IntegerGenerator"] | components["schemas"]["IntegerInvocation"] | components["schemas"]["IntegerMathInvocation"] | components["schemas"]["InvertTensorMaskInvocation"] | components["schemas"]["InvokeAdjustImageHuePlusInvocation"] | components["schemas"]["InvokeEquivalentAchromaticLightnessInvocation"] | components["schemas"]["InvokeImageBlendInvocation"] | components["schemas"]["InvokeImageCompositorInvocation"] | components["schemas"]["InvokeImageDilateOrErodeInvocation"] | components["schemas"]["InvokeImageEnhanceInvocation"] | components["schemas"]["InvokeImageValueThresholdsInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["LaMaInfillInvocation"] | components["schemas"]["LatentsCollectionInvocation"] | components["schemas"]["LatentsInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["LineartAnimeEdgeDetectionInvocation"] | components["schemas"]["LineartEdgeDetectionInvocation"] | components["schemas"]["LlavaOnevisionVllmInvocation"] | components["schemas"]["LoRACollectionLoader"] | components["schemas"]["LoRALoaderInvocation"] | components["schemas"]["LoRASelectorInvocation"] | components["schemas"]["MLSDDetectionInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["MaskCombineInvocation"] | components["schemas"]["MaskEdgeInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["MaskFromIDInvocation"] | components["schemas"]["MaskTensorToImageInvocation"] | components["schemas"]["MediaPipeFaceDetectionInvocation"] | components["schemas"]["MergeMetadataInvocation"] | components["schemas"]["MergeTilesToImageInvocation"] | components["schemas"]["MetadataFieldExtractorInvocation"] | components["schemas"]["MetadataFromImageInvocation"] | components["schemas"]["MetadataInvocation"] | components["schemas"]["MetadataItemInvocation"] | components["schemas"]["MetadataItemLinkedInvocation"] | components["schemas"]["MetadataToBoolCollectionInvocation"] | components["schemas"]["MetadataToBoolInvocation"] | components["schemas"]["MetadataToControlnetsInvocation"] | components["schemas"]["MetadataToFloatCollectionInvocation"] | components["schemas"]["MetadataToFloatInvocation"] | components["schemas"]["MetadataToIPAdaptersInvocation"] | components["schemas"]["MetadataToIntegerCollectionInvocation"] | components["schemas"]["MetadataToIntegerInvocation"] | components["schemas"]["MetadataToLorasCollectionInvocation"] | components["schemas"]["MetadataToLorasInvocation"] | components["schemas"]["MetadataToModelInvocation"] | components["schemas"]["MetadataToSDXLLorasInvocation"] | components["schemas"]["MetadataToSDXLModelInvocation"] | components["schemas"]["MetadataToSchedulerInvocation"] | components["schemas"]["MetadataToStringCollectionInvocation"] | components["schemas"]["MetadataToStringInvocation"] | components["schemas"]["MetadataToT2IAdaptersInvocation"] | components["schemas"]["MetadataToVAEInvocation"] | components["schemas"]["ModelIdentifierInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["NormalMapInvocation"] | components["schemas"]["OpenAIImageGenerationInvocation"] | components["schemas"]["PBRMapsInvocation"] | components["schemas"]["PairTileImageInvocation"] | components["schemas"]["PasteImageIntoBoundingBoxInvocation"] | components["schemas"]["PiDiNetEdgeDetectionInvocation"] | components["schemas"]["PromptTemplateInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["QwenImageDenoiseInvocation"] | components["schemas"]["QwenImageImageToLatentsInvocation"] | components["schemas"]["QwenImageLatentsToImageInvocation"] | components["schemas"]["QwenImageLoRACollectionLoader"] | components["schemas"]["QwenImageLoRALoaderInvocation"] | components["schemas"]["QwenImageModelLoaderInvocation"] | components["schemas"]["QwenImageTextEncoderInvocation"] | components["schemas"]["RandomFloatInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["RectangleMaskInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["RoundInvocation"] | components["schemas"]["SD3DenoiseInvocation"] | components["schemas"]["SD3ImageToLatentsInvocation"] | components["schemas"]["SD3LatentsToImageInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["SDXLLoRACollectionLoader"] | components["schemas"]["SDXLLoRALoaderInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["SaveImageInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["SchedulerInvocation"] | components["schemas"]["Sd3ModelLoaderInvocation"] | components["schemas"]["Sd3TextEncoderInvocation"] | components["schemas"]["SeamlessModeInvocation"] | components["schemas"]["SeedreamImageGenerationInvocation"] | components["schemas"]["SegmentAnythingInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["SpandrelImageToImageAutoscaleInvocation"] | components["schemas"]["SpandrelImageToImageInvocation"] | components["schemas"]["StringBatchInvocation"] | components["schemas"]["StringCollectionInvocation"] | components["schemas"]["StringGenerator"] | components["schemas"]["StringInvocation"] | components["schemas"]["StringJoinInvocation"] | components["schemas"]["StringJoinThreeInvocation"] | components["schemas"]["StringReplaceInvocation"] | components["schemas"]["StringSplitInvocation"] | components["schemas"]["StringSplitNegInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["T2IAdapterInvocation"] | components["schemas"]["TextLLMInvocation"] | components["schemas"]["TileToPropertiesInvocation"] | components["schemas"]["TiledMultiDiffusionDenoiseLatents"] | components["schemas"]["UnsharpMaskInvocation"] | components["schemas"]["VAELoaderInvocation"] | components["schemas"]["WanDenoiseInvocation"] | components["schemas"]["WanImageToLatentsInvocation"] | components["schemas"]["WanLatentsToImageInvocation"] | components["schemas"]["WanModelLoaderInvocation"] | components["schemas"]["WanTextEncoderInvocation"] | components["schemas"]["ZImageControlInvocation"] | components["schemas"]["ZImageDenoiseInvocation"] | components["schemas"]["ZImageDenoiseMetaInvocation"] | components["schemas"]["ZImageImageToLatentsInvocation"] | components["schemas"]["ZImageLatentsToImageInvocation"] | components["schemas"]["ZImageLoRACollectionLoader"] | components["schemas"]["ZImageLoRALoaderInvocation"] | components["schemas"]["ZImageModelLoaderInvocation"] | components["schemas"]["ZImageSeedVarianceEnhancerInvocation"] | components["schemas"]["ZImageTextEncoderInvocation"]; /** * Invocation Source Id * @description The ID of the prepared invocation's source node @@ -16113,7 +16118,7 @@ export type components = { * Invocation * @description The ID of the invocation */ - invocation: components["schemas"]["AddInvocation"] | components["schemas"]["AlibabaCloudImageGenerationInvocation"] | components["schemas"]["AlphaMaskToTensorInvocation"] | components["schemas"]["AnimaDenoiseInvocation"] | components["schemas"]["AnimaImageToLatentsInvocation"] | components["schemas"]["AnimaLatentsToImageInvocation"] | components["schemas"]["AnimaLoRACollectionLoader"] | components["schemas"]["AnimaLoRALoaderInvocation"] | components["schemas"]["AnimaModelLoaderInvocation"] | components["schemas"]["AnimaTextEncoderInvocation"] | components["schemas"]["ApplyMaskTensorToImageInvocation"] | components["schemas"]["ApplyMaskToImageInvocation"] | components["schemas"]["BlankImageInvocation"] | components["schemas"]["BlendLatentsInvocation"] | components["schemas"]["BooleanCollectionInvocation"] | components["schemas"]["BooleanInvocation"] | components["schemas"]["BoundingBoxInvocation"] | components["schemas"]["CLIPSkipInvocation"] | components["schemas"]["CV2InfillInvocation"] | components["schemas"]["CalculateImageTilesEvenSplitInvocation"] | components["schemas"]["CalculateImageTilesInvocation"] | components["schemas"]["CalculateImageTilesMinimumOverlapInvocation"] | components["schemas"]["CannyEdgeDetectionInvocation"] | components["schemas"]["CanvasOutputInvocation"] | components["schemas"]["CanvasPasteBackInvocation"] | components["schemas"]["CanvasV2MaskAndCropInvocation"] | components["schemas"]["CenterPadCropInvocation"] | components["schemas"]["CogView4DenoiseInvocation"] | components["schemas"]["CogView4ImageToLatentsInvocation"] | components["schemas"]["CogView4LatentsToImageInvocation"] | components["schemas"]["CogView4ModelLoaderInvocation"] | components["schemas"]["CogView4TextEncoderInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["ColorCorrectInvocation"] | components["schemas"]["ColorInvocation"] | components["schemas"]["ColorMapInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["ConditioningCollectionInvocation"] | components["schemas"]["ConditioningInvocation"] | components["schemas"]["ContentShuffleInvocation"] | components["schemas"]["ControlNetInvocation"] | components["schemas"]["CoreMetadataInvocation"] | components["schemas"]["CreateDenoiseMaskInvocation"] | components["schemas"]["CreateGradientMaskInvocation"] | components["schemas"]["CropImageToBoundingBoxInvocation"] | components["schemas"]["CropLatentsCoreInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["DWOpenposeDetectionInvocation"] | components["schemas"]["DecodeInvisibleWatermarkInvocation"] | components["schemas"]["DenoiseLatentsInvocation"] | components["schemas"]["DenoiseLatentsMetaInvocation"] | components["schemas"]["DepthAnythingDepthEstimationInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["ExpandMaskWithFadeInvocation"] | components["schemas"]["FLUXLoRACollectionLoader"] | components["schemas"]["FaceIdentifierInvocation"] | components["schemas"]["FaceMaskInvocation"] | components["schemas"]["FaceOffInvocation"] | components["schemas"]["FloatBatchInvocation"] | components["schemas"]["FloatCollectionInvocation"] | components["schemas"]["FloatGenerator"] | components["schemas"]["FloatInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["FloatMathInvocation"] | components["schemas"]["FloatToIntegerInvocation"] | components["schemas"]["Flux2DenoiseInvocation"] | components["schemas"]["Flux2KleinLoRACollectionLoader"] | components["schemas"]["Flux2KleinLoRALoaderInvocation"] | components["schemas"]["Flux2KleinModelLoaderInvocation"] | components["schemas"]["Flux2KleinTextEncoderInvocation"] | components["schemas"]["Flux2VaeDecodeInvocation"] | components["schemas"]["Flux2VaeEncodeInvocation"] | components["schemas"]["FluxControlLoRALoaderInvocation"] | components["schemas"]["FluxControlNetInvocation"] | components["schemas"]["FluxDenoiseInvocation"] | components["schemas"]["FluxDenoiseLatentsMetaInvocation"] | components["schemas"]["FluxFillInvocation"] | components["schemas"]["FluxIPAdapterInvocation"] | components["schemas"]["FluxKontextConcatenateImagesInvocation"] | components["schemas"]["FluxKontextInvocation"] | components["schemas"]["FluxLoRALoaderInvocation"] | components["schemas"]["FluxModelLoaderInvocation"] | components["schemas"]["FluxReduxInvocation"] | components["schemas"]["FluxTextEncoderInvocation"] | components["schemas"]["FluxVaeDecodeInvocation"] | components["schemas"]["FluxVaeEncodeInvocation"] | components["schemas"]["FreeUInvocation"] | components["schemas"]["GeminiImageGenerationInvocation"] | components["schemas"]["GetMaskBoundingBoxInvocation"] | components["schemas"]["GroundingDinoInvocation"] | components["schemas"]["HEDEdgeDetectionInvocation"] | components["schemas"]["HeuristicResizeInvocation"] | components["schemas"]["IPAdapterInvocation"] | components["schemas"]["IdealSizeInvocation"] | components["schemas"]["IfInvocation"] | components["schemas"]["ImageBatchInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["ImageChannelMultiplyInvocation"] | components["schemas"]["ImageChannelOffsetInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["ImageGenerator"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["ImageInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["ImageMaskToTensorInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["ImageNoiseInvocation"] | components["schemas"]["ImagePanelLayoutInvocation"] | components["schemas"]["ImagePasteInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["IntegerBatchInvocation"] | components["schemas"]["IntegerCollectionInvocation"] | components["schemas"]["IntegerGenerator"] | components["schemas"]["IntegerInvocation"] | components["schemas"]["IntegerMathInvocation"] | components["schemas"]["InvertTensorMaskInvocation"] | components["schemas"]["InvokeAdjustImageHuePlusInvocation"] | components["schemas"]["InvokeEquivalentAchromaticLightnessInvocation"] | components["schemas"]["InvokeImageBlendInvocation"] | components["schemas"]["InvokeImageCompositorInvocation"] | components["schemas"]["InvokeImageDilateOrErodeInvocation"] | components["schemas"]["InvokeImageEnhanceInvocation"] | components["schemas"]["InvokeImageValueThresholdsInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["LaMaInfillInvocation"] | components["schemas"]["LatentsCollectionInvocation"] | components["schemas"]["LatentsInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["LineartAnimeEdgeDetectionInvocation"] | components["schemas"]["LineartEdgeDetectionInvocation"] | components["schemas"]["LlavaOnevisionVllmInvocation"] | components["schemas"]["LoRACollectionLoader"] | components["schemas"]["LoRALoaderInvocation"] | components["schemas"]["LoRASelectorInvocation"] | components["schemas"]["MLSDDetectionInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["MaskCombineInvocation"] | components["schemas"]["MaskEdgeInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["MaskFromIDInvocation"] | components["schemas"]["MaskTensorToImageInvocation"] | components["schemas"]["MediaPipeFaceDetectionInvocation"] | components["schemas"]["MergeMetadataInvocation"] | components["schemas"]["MergeTilesToImageInvocation"] | components["schemas"]["MetadataFieldExtractorInvocation"] | components["schemas"]["MetadataFromImageInvocation"] | components["schemas"]["MetadataInvocation"] | components["schemas"]["MetadataItemInvocation"] | components["schemas"]["MetadataItemLinkedInvocation"] | components["schemas"]["MetadataToBoolCollectionInvocation"] | components["schemas"]["MetadataToBoolInvocation"] | components["schemas"]["MetadataToControlnetsInvocation"] | components["schemas"]["MetadataToFloatCollectionInvocation"] | components["schemas"]["MetadataToFloatInvocation"] | components["schemas"]["MetadataToIPAdaptersInvocation"] | components["schemas"]["MetadataToIntegerCollectionInvocation"] | components["schemas"]["MetadataToIntegerInvocation"] | components["schemas"]["MetadataToLorasCollectionInvocation"] | components["schemas"]["MetadataToLorasInvocation"] | components["schemas"]["MetadataToModelInvocation"] | components["schemas"]["MetadataToSDXLLorasInvocation"] | components["schemas"]["MetadataToSDXLModelInvocation"] | components["schemas"]["MetadataToSchedulerInvocation"] | components["schemas"]["MetadataToStringCollectionInvocation"] | components["schemas"]["MetadataToStringInvocation"] | components["schemas"]["MetadataToT2IAdaptersInvocation"] | components["schemas"]["MetadataToVAEInvocation"] | components["schemas"]["ModelIdentifierInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["NormalMapInvocation"] | components["schemas"]["OpenAIImageGenerationInvocation"] | components["schemas"]["PBRMapsInvocation"] | components["schemas"]["PairTileImageInvocation"] | components["schemas"]["PasteImageIntoBoundingBoxInvocation"] | components["schemas"]["PiDiNetEdgeDetectionInvocation"] | components["schemas"]["PromptTemplateInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["QwenImageDenoiseInvocation"] | components["schemas"]["QwenImageImageToLatentsInvocation"] | components["schemas"]["QwenImageLatentsToImageInvocation"] | components["schemas"]["QwenImageLoRACollectionLoader"] | components["schemas"]["QwenImageLoRALoaderInvocation"] | components["schemas"]["QwenImageModelLoaderInvocation"] | components["schemas"]["QwenImageTextEncoderInvocation"] | components["schemas"]["RandomFloatInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["RectangleMaskInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["RoundInvocation"] | components["schemas"]["SD3DenoiseInvocation"] | components["schemas"]["SD3ImageToLatentsInvocation"] | components["schemas"]["SD3LatentsToImageInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["SDXLLoRACollectionLoader"] | components["schemas"]["SDXLLoRALoaderInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["SaveImageInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["SchedulerInvocation"] | components["schemas"]["Sd3ModelLoaderInvocation"] | components["schemas"]["Sd3TextEncoderInvocation"] | components["schemas"]["SeamlessModeInvocation"] | components["schemas"]["SeedreamImageGenerationInvocation"] | components["schemas"]["SegmentAnythingInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["SpandrelImageToImageAutoscaleInvocation"] | components["schemas"]["SpandrelImageToImageInvocation"] | components["schemas"]["StringBatchInvocation"] | components["schemas"]["StringCollectionInvocation"] | components["schemas"]["StringGenerator"] | components["schemas"]["StringInvocation"] | components["schemas"]["StringJoinInvocation"] | components["schemas"]["StringJoinThreeInvocation"] | components["schemas"]["StringReplaceInvocation"] | components["schemas"]["StringSplitInvocation"] | components["schemas"]["StringSplitNegInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["T2IAdapterInvocation"] | components["schemas"]["TextLLMInvocation"] | components["schemas"]["TileToPropertiesInvocation"] | components["schemas"]["TiledMultiDiffusionDenoiseLatents"] | components["schemas"]["UnsharpMaskInvocation"] | components["schemas"]["VAELoaderInvocation"] | components["schemas"]["ZImageControlInvocation"] | components["schemas"]["ZImageDenoiseInvocation"] | components["schemas"]["ZImageDenoiseMetaInvocation"] | components["schemas"]["ZImageImageToLatentsInvocation"] | components["schemas"]["ZImageLatentsToImageInvocation"] | components["schemas"]["ZImageLoRACollectionLoader"] | components["schemas"]["ZImageLoRALoaderInvocation"] | components["schemas"]["ZImageModelLoaderInvocation"] | components["schemas"]["ZImageSeedVarianceEnhancerInvocation"] | components["schemas"]["ZImageTextEncoderInvocation"]; + invocation: components["schemas"]["AddInvocation"] | components["schemas"]["AlibabaCloudImageGenerationInvocation"] | components["schemas"]["AlphaMaskToTensorInvocation"] | components["schemas"]["AnimaDenoiseInvocation"] | components["schemas"]["AnimaImageToLatentsInvocation"] | components["schemas"]["AnimaLatentsToImageInvocation"] | components["schemas"]["AnimaLoRACollectionLoader"] | components["schemas"]["AnimaLoRALoaderInvocation"] | components["schemas"]["AnimaModelLoaderInvocation"] | components["schemas"]["AnimaTextEncoderInvocation"] | components["schemas"]["ApplyMaskTensorToImageInvocation"] | components["schemas"]["ApplyMaskToImageInvocation"] | components["schemas"]["BlankImageInvocation"] | components["schemas"]["BlendLatentsInvocation"] | components["schemas"]["BooleanCollectionInvocation"] | components["schemas"]["BooleanInvocation"] | components["schemas"]["BoundingBoxInvocation"] | components["schemas"]["CLIPSkipInvocation"] | components["schemas"]["CV2InfillInvocation"] | components["schemas"]["CalculateImageTilesEvenSplitInvocation"] | components["schemas"]["CalculateImageTilesInvocation"] | components["schemas"]["CalculateImageTilesMinimumOverlapInvocation"] | components["schemas"]["CannyEdgeDetectionInvocation"] | components["schemas"]["CanvasOutputInvocation"] | components["schemas"]["CanvasPasteBackInvocation"] | components["schemas"]["CanvasV2MaskAndCropInvocation"] | components["schemas"]["CenterPadCropInvocation"] | components["schemas"]["CogView4DenoiseInvocation"] | components["schemas"]["CogView4ImageToLatentsInvocation"] | components["schemas"]["CogView4LatentsToImageInvocation"] | components["schemas"]["CogView4ModelLoaderInvocation"] | components["schemas"]["CogView4TextEncoderInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["ColorCorrectInvocation"] | components["schemas"]["ColorInvocation"] | components["schemas"]["ColorMapInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["ConditioningCollectionInvocation"] | components["schemas"]["ConditioningInvocation"] | components["schemas"]["ContentShuffleInvocation"] | components["schemas"]["ControlNetInvocation"] | components["schemas"]["CoreMetadataInvocation"] | components["schemas"]["CreateDenoiseMaskInvocation"] | components["schemas"]["CreateGradientMaskInvocation"] | components["schemas"]["CropImageToBoundingBoxInvocation"] | components["schemas"]["CropLatentsCoreInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["DWOpenposeDetectionInvocation"] | components["schemas"]["DecodeInvisibleWatermarkInvocation"] | components["schemas"]["DenoiseLatentsInvocation"] | components["schemas"]["DenoiseLatentsMetaInvocation"] | components["schemas"]["DepthAnythingDepthEstimationInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["ExpandMaskWithFadeInvocation"] | components["schemas"]["FLUXLoRACollectionLoader"] | components["schemas"]["FaceIdentifierInvocation"] | components["schemas"]["FaceMaskInvocation"] | components["schemas"]["FaceOffInvocation"] | components["schemas"]["FloatBatchInvocation"] | components["schemas"]["FloatCollectionInvocation"] | components["schemas"]["FloatGenerator"] | components["schemas"]["FloatInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["FloatMathInvocation"] | components["schemas"]["FloatToIntegerInvocation"] | components["schemas"]["Flux2DenoiseInvocation"] | components["schemas"]["Flux2KleinLoRACollectionLoader"] | components["schemas"]["Flux2KleinLoRALoaderInvocation"] | components["schemas"]["Flux2KleinModelLoaderInvocation"] | components["schemas"]["Flux2KleinTextEncoderInvocation"] | components["schemas"]["Flux2VaeDecodeInvocation"] | components["schemas"]["Flux2VaeEncodeInvocation"] | components["schemas"]["FluxControlLoRALoaderInvocation"] | components["schemas"]["FluxControlNetInvocation"] | components["schemas"]["FluxDenoiseInvocation"] | components["schemas"]["FluxDenoiseLatentsMetaInvocation"] | components["schemas"]["FluxFillInvocation"] | components["schemas"]["FluxIPAdapterInvocation"] | components["schemas"]["FluxKontextConcatenateImagesInvocation"] | components["schemas"]["FluxKontextInvocation"] | components["schemas"]["FluxLoRALoaderInvocation"] | components["schemas"]["FluxModelLoaderInvocation"] | components["schemas"]["FluxReduxInvocation"] | components["schemas"]["FluxTextEncoderInvocation"] | components["schemas"]["FluxVaeDecodeInvocation"] | components["schemas"]["FluxVaeEncodeInvocation"] | components["schemas"]["FreeUInvocation"] | components["schemas"]["GeminiImageGenerationInvocation"] | components["schemas"]["GetMaskBoundingBoxInvocation"] | components["schemas"]["GroundingDinoInvocation"] | components["schemas"]["HEDEdgeDetectionInvocation"] | components["schemas"]["HeuristicResizeInvocation"] | components["schemas"]["IPAdapterInvocation"] | components["schemas"]["IdealSizeInvocation"] | components["schemas"]["IfInvocation"] | components["schemas"]["ImageBatchInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["ImageChannelMultiplyInvocation"] | components["schemas"]["ImageChannelOffsetInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["ImageGenerator"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["ImageInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["ImageMaskToTensorInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["ImageNoiseInvocation"] | components["schemas"]["ImagePanelLayoutInvocation"] | components["schemas"]["ImagePasteInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["IntegerBatchInvocation"] | components["schemas"]["IntegerCollectionInvocation"] | components["schemas"]["IntegerGenerator"] | components["schemas"]["IntegerInvocation"] | components["schemas"]["IntegerMathInvocation"] | components["schemas"]["InvertTensorMaskInvocation"] | components["schemas"]["InvokeAdjustImageHuePlusInvocation"] | components["schemas"]["InvokeEquivalentAchromaticLightnessInvocation"] | components["schemas"]["InvokeImageBlendInvocation"] | components["schemas"]["InvokeImageCompositorInvocation"] | components["schemas"]["InvokeImageDilateOrErodeInvocation"] | components["schemas"]["InvokeImageEnhanceInvocation"] | components["schemas"]["InvokeImageValueThresholdsInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["LaMaInfillInvocation"] | components["schemas"]["LatentsCollectionInvocation"] | components["schemas"]["LatentsInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["LineartAnimeEdgeDetectionInvocation"] | components["schemas"]["LineartEdgeDetectionInvocation"] | components["schemas"]["LlavaOnevisionVllmInvocation"] | components["schemas"]["LoRACollectionLoader"] | components["schemas"]["LoRALoaderInvocation"] | components["schemas"]["LoRASelectorInvocation"] | components["schemas"]["MLSDDetectionInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["MaskCombineInvocation"] | components["schemas"]["MaskEdgeInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["MaskFromIDInvocation"] | components["schemas"]["MaskTensorToImageInvocation"] | components["schemas"]["MediaPipeFaceDetectionInvocation"] | components["schemas"]["MergeMetadataInvocation"] | components["schemas"]["MergeTilesToImageInvocation"] | components["schemas"]["MetadataFieldExtractorInvocation"] | components["schemas"]["MetadataFromImageInvocation"] | components["schemas"]["MetadataInvocation"] | components["schemas"]["MetadataItemInvocation"] | components["schemas"]["MetadataItemLinkedInvocation"] | components["schemas"]["MetadataToBoolCollectionInvocation"] | components["schemas"]["MetadataToBoolInvocation"] | components["schemas"]["MetadataToControlnetsInvocation"] | components["schemas"]["MetadataToFloatCollectionInvocation"] | components["schemas"]["MetadataToFloatInvocation"] | components["schemas"]["MetadataToIPAdaptersInvocation"] | components["schemas"]["MetadataToIntegerCollectionInvocation"] | components["schemas"]["MetadataToIntegerInvocation"] | components["schemas"]["MetadataToLorasCollectionInvocation"] | components["schemas"]["MetadataToLorasInvocation"] | components["schemas"]["MetadataToModelInvocation"] | components["schemas"]["MetadataToSDXLLorasInvocation"] | components["schemas"]["MetadataToSDXLModelInvocation"] | components["schemas"]["MetadataToSchedulerInvocation"] | components["schemas"]["MetadataToStringCollectionInvocation"] | components["schemas"]["MetadataToStringInvocation"] | components["schemas"]["MetadataToT2IAdaptersInvocation"] | components["schemas"]["MetadataToVAEInvocation"] | components["schemas"]["ModelIdentifierInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["NormalMapInvocation"] | components["schemas"]["OpenAIImageGenerationInvocation"] | components["schemas"]["PBRMapsInvocation"] | components["schemas"]["PairTileImageInvocation"] | components["schemas"]["PasteImageIntoBoundingBoxInvocation"] | components["schemas"]["PiDiNetEdgeDetectionInvocation"] | components["schemas"]["PromptTemplateInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["QwenImageDenoiseInvocation"] | components["schemas"]["QwenImageImageToLatentsInvocation"] | components["schemas"]["QwenImageLatentsToImageInvocation"] | components["schemas"]["QwenImageLoRACollectionLoader"] | components["schemas"]["QwenImageLoRALoaderInvocation"] | components["schemas"]["QwenImageModelLoaderInvocation"] | components["schemas"]["QwenImageTextEncoderInvocation"] | components["schemas"]["RandomFloatInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["RectangleMaskInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["RoundInvocation"] | components["schemas"]["SD3DenoiseInvocation"] | components["schemas"]["SD3ImageToLatentsInvocation"] | components["schemas"]["SD3LatentsToImageInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["SDXLLoRACollectionLoader"] | components["schemas"]["SDXLLoRALoaderInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["SaveImageInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["SchedulerInvocation"] | components["schemas"]["Sd3ModelLoaderInvocation"] | components["schemas"]["Sd3TextEncoderInvocation"] | components["schemas"]["SeamlessModeInvocation"] | components["schemas"]["SeedreamImageGenerationInvocation"] | components["schemas"]["SegmentAnythingInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["SpandrelImageToImageAutoscaleInvocation"] | components["schemas"]["SpandrelImageToImageInvocation"] | components["schemas"]["StringBatchInvocation"] | components["schemas"]["StringCollectionInvocation"] | components["schemas"]["StringGenerator"] | components["schemas"]["StringInvocation"] | components["schemas"]["StringJoinInvocation"] | components["schemas"]["StringJoinThreeInvocation"] | components["schemas"]["StringReplaceInvocation"] | components["schemas"]["StringSplitInvocation"] | components["schemas"]["StringSplitNegInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["T2IAdapterInvocation"] | components["schemas"]["TextLLMInvocation"] | components["schemas"]["TileToPropertiesInvocation"] | components["schemas"]["TiledMultiDiffusionDenoiseLatents"] | components["schemas"]["UnsharpMaskInvocation"] | components["schemas"]["VAELoaderInvocation"] | components["schemas"]["WanDenoiseInvocation"] | components["schemas"]["WanImageToLatentsInvocation"] | components["schemas"]["WanLatentsToImageInvocation"] | components["schemas"]["WanModelLoaderInvocation"] | components["schemas"]["WanTextEncoderInvocation"] | components["schemas"]["ZImageControlInvocation"] | components["schemas"]["ZImageDenoiseInvocation"] | components["schemas"]["ZImageDenoiseMetaInvocation"] | components["schemas"]["ZImageImageToLatentsInvocation"] | components["schemas"]["ZImageLatentsToImageInvocation"] | components["schemas"]["ZImageLoRACollectionLoader"] | components["schemas"]["ZImageLoRALoaderInvocation"] | components["schemas"]["ZImageModelLoaderInvocation"] | components["schemas"]["ZImageSeedVarianceEnhancerInvocation"] | components["schemas"]["ZImageTextEncoderInvocation"]; /** * Invocation Source Id * @description The ID of the prepared invocation's source node @@ -21085,6 +21090,107 @@ export type components = { */ base: "sdxl"; }; + /** + * Main_Diffusers_Wan_Config + * @description Model config for Wan 2.2 diffusers models. + * + * Covers both the dual-expert T2V-A14B family and the single-transformer TI2V-5B + * family. Variant is detected from the on-disk transformer config (latent channel + * count) plus the presence of a sibling ``transformer_2/`` directory. + */ + Main_Diffusers_Wan_Config: { + /** + * Key + * @description A unique key for this model. + */ + key: string; + /** + * Hash + * @description The hash of the model file(s). + */ + hash: string; + /** + * Path + * @description Path to the model on the filesystem. Relative paths are relative to the Invoke root directory. + */ + path: string; + /** + * File Size + * @description The size of the model in bytes. + */ + file_size: number; + /** + * Name + * @description Name of the model. + */ + name: string; + /** + * Description + * @description Model description + */ + description: string | null; + /** + * Source + * @description The original source of the model (path, URL or repo_id). + */ + source: string; + /** @description The type of source */ + source_type: components["schemas"]["ModelSourceType"]; + /** + * Source Api Response + * @description The original API response from the source, as stringified JSON. + */ + source_api_response: string | null; + /** + * Source Url + * @description Optional URL for the model (e.g. download page or model page). + */ + source_url: string | null; + /** + * Cover Image + * @description Url for image to preview model + */ + cover_image: string | null; + /** + * Type + * @default main + * @constant + */ + type: "main"; + /** + * Trigger Phrases + * @description Set of trigger phrases for this model + */ + trigger_phrases: string[] | null; + /** @description Default settings for this model */ + default_settings: components["schemas"]["MainModelDefaultSettings"] | null; + /** + * Format + * @default diffusers + * @constant + */ + format: "diffusers"; + /** @default */ + repo_variant: components["schemas"]["ModelRepoVariant"]; + /** + * Base + * @default wan + * @constant + */ + base: "wan"; + variant: components["schemas"]["WanVariantType"]; + /** + * Has Dual Expert + * @description Whether this model ships two transformer experts (Wan 2.2 A14B MoE). False for TI2V-5B. + * @default false + */ + has_dual_expert: boolean; + /** + * Boundary Ratio + * @description MoE expert switch point as a fraction of num_train_timesteps (typically 1000). None for single-transformer models. Read from model_index.json by Diffusers' WanPipeline. + */ + boundary_ratio: number | null; + }; /** * Main_Diffusers_ZImage_Config * @description Model config for Z-Image diffusers models (Z-Image-Turbo, Z-Image-Base). @@ -23218,7 +23324,7 @@ export type components = { * @description Storage format of model. * @enum {string} */ - ModelFormat: "omi" | "diffusers" | "checkpoint" | "lycoris" | "onnx" | "olive" | "embedding_file" | "embedding_folder" | "invokeai" | "t5_encoder" | "qwen3_encoder" | "qwen_vl_encoder" | "bnb_quantized_int8b" | "bnb_quantized_nf4b" | "gguf_quantized" | "external_api" | "unknown"; + ModelFormat: "omi" | "diffusers" | "checkpoint" | "lycoris" | "onnx" | "olive" | "embedding_file" | "embedding_folder" | "invokeai" | "t5_encoder" | "qwen3_encoder" | "qwen_vl_encoder" | "wan_t5_encoder" | "bnb_quantized_int8b" | "bnb_quantized_nf4b" | "gguf_quantized" | "external_api" | "unknown"; /** ModelIdentifierField */ ModelIdentifierField: { /** @@ -23355,7 +23461,7 @@ export type components = { * Config * @description The installed model's config */ - config: components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_QwenImage_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_Checkpoint_Anima_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_QwenImage_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Checkpoint_QwenImage_Config"] | components["schemas"]["VAE_Checkpoint_Anima_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_LyCORIS_QwenImage_Config"] | components["schemas"]["LoRA_LyCORIS_Anima_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["QwenVLEncoder_Diffusers_Config"] | components["schemas"]["QwenVLEncoder_Checkpoint_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["TextLLM_Diffusers_Config"] | components["schemas"]["ExternalApiModelConfig"] | components["schemas"]["Unknown_Config"]; + config: components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_QwenImage_Config"] | components["schemas"]["Main_Diffusers_Wan_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_Checkpoint_Anima_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_QwenImage_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Checkpoint_Wan_Config"] | components["schemas"]["VAE_Checkpoint_QwenImage_Config"] | components["schemas"]["VAE_Checkpoint_Anima_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["VAE_Diffusers_Wan_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_LyCORIS_QwenImage_Config"] | components["schemas"]["LoRA_LyCORIS_Anima_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["QwenVLEncoder_Diffusers_Config"] | components["schemas"]["QwenVLEncoder_Checkpoint_Config"] | components["schemas"]["WanT5Encoder_WanT5Encoder_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["TextLLM_Diffusers_Config"] | components["schemas"]["ExternalApiModelConfig"] | components["schemas"]["Unknown_Config"]; }; /** * ModelInstallDownloadProgressEvent @@ -23521,7 +23627,7 @@ export type components = { * Config Out * @description After successful installation, this will hold the configuration object. */ - config_out?: (components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_QwenImage_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_Checkpoint_Anima_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_QwenImage_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Checkpoint_QwenImage_Config"] | components["schemas"]["VAE_Checkpoint_Anima_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_LyCORIS_QwenImage_Config"] | components["schemas"]["LoRA_LyCORIS_Anima_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["QwenVLEncoder_Diffusers_Config"] | components["schemas"]["QwenVLEncoder_Checkpoint_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["TextLLM_Diffusers_Config"] | components["schemas"]["ExternalApiModelConfig"] | components["schemas"]["Unknown_Config"]) | null; + config_out?: (components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_QwenImage_Config"] | components["schemas"]["Main_Diffusers_Wan_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_Checkpoint_Anima_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_QwenImage_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Checkpoint_Wan_Config"] | components["schemas"]["VAE_Checkpoint_QwenImage_Config"] | components["schemas"]["VAE_Checkpoint_Anima_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["VAE_Diffusers_Wan_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_LyCORIS_QwenImage_Config"] | components["schemas"]["LoRA_LyCORIS_Anima_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["QwenVLEncoder_Diffusers_Config"] | components["schemas"]["QwenVLEncoder_Checkpoint_Config"] | components["schemas"]["WanT5Encoder_WanT5Encoder_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["TextLLM_Diffusers_Config"] | components["schemas"]["ExternalApiModelConfig"] | components["schemas"]["Unknown_Config"]) | null; /** * Inplace * @description Leave model in its current location; otherwise install under models directory @@ -23607,7 +23713,7 @@ export type components = { * Config * @description The model's config */ - config: components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_QwenImage_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_Checkpoint_Anima_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_QwenImage_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Checkpoint_QwenImage_Config"] | components["schemas"]["VAE_Checkpoint_Anima_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_LyCORIS_QwenImage_Config"] | components["schemas"]["LoRA_LyCORIS_Anima_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["QwenVLEncoder_Diffusers_Config"] | components["schemas"]["QwenVLEncoder_Checkpoint_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["TextLLM_Diffusers_Config"] | components["schemas"]["ExternalApiModelConfig"] | components["schemas"]["Unknown_Config"]; + config: components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_QwenImage_Config"] | components["schemas"]["Main_Diffusers_Wan_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_Checkpoint_Anima_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_QwenImage_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Checkpoint_Wan_Config"] | components["schemas"]["VAE_Checkpoint_QwenImage_Config"] | components["schemas"]["VAE_Checkpoint_Anima_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["VAE_Diffusers_Wan_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_LyCORIS_QwenImage_Config"] | components["schemas"]["LoRA_LyCORIS_Anima_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["QwenVLEncoder_Diffusers_Config"] | components["schemas"]["QwenVLEncoder_Checkpoint_Config"] | components["schemas"]["WanT5Encoder_WanT5Encoder_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["TextLLM_Diffusers_Config"] | components["schemas"]["ExternalApiModelConfig"] | components["schemas"]["Unknown_Config"]; /** * @description The submodel type, if any * @default null @@ -23628,7 +23734,7 @@ export type components = { * Config * @description The model's config */ - config: components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_QwenImage_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_Checkpoint_Anima_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_QwenImage_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Checkpoint_QwenImage_Config"] | components["schemas"]["VAE_Checkpoint_Anima_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_LyCORIS_QwenImage_Config"] | components["schemas"]["LoRA_LyCORIS_Anima_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["QwenVLEncoder_Diffusers_Config"] | components["schemas"]["QwenVLEncoder_Checkpoint_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["TextLLM_Diffusers_Config"] | components["schemas"]["ExternalApiModelConfig"] | components["schemas"]["Unknown_Config"]; + config: components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_QwenImage_Config"] | components["schemas"]["Main_Diffusers_Wan_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_Checkpoint_Anima_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_QwenImage_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Checkpoint_Wan_Config"] | components["schemas"]["VAE_Checkpoint_QwenImage_Config"] | components["schemas"]["VAE_Checkpoint_Anima_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["VAE_Diffusers_Wan_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_LyCORIS_QwenImage_Config"] | components["schemas"]["LoRA_LyCORIS_Anima_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["QwenVLEncoder_Diffusers_Config"] | components["schemas"]["QwenVLEncoder_Checkpoint_Config"] | components["schemas"]["WanT5Encoder_WanT5Encoder_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["TextLLM_Diffusers_Config"] | components["schemas"]["ExternalApiModelConfig"] | components["schemas"]["Unknown_Config"]; /** * @description The submodel type, if any * @default null @@ -23754,7 +23860,7 @@ export type components = { * Variant * @description The variant of the model. */ - variant?: components["schemas"]["ModelVariantType"] | components["schemas"]["ClipVariantType"] | components["schemas"]["FluxVariantType"] | components["schemas"]["Flux2VariantType"] | components["schemas"]["ZImageVariantType"] | components["schemas"]["QwenImageVariantType"] | components["schemas"]["Qwen3VariantType"] | null; + variant?: components["schemas"]["ModelVariantType"] | components["schemas"]["ClipVariantType"] | components["schemas"]["FluxVariantType"] | components["schemas"]["Flux2VariantType"] | components["schemas"]["ZImageVariantType"] | components["schemas"]["QwenImageVariantType"] | components["schemas"]["WanVariantType"] | components["schemas"]["Qwen3VariantType"] | null; /** @description The prediction type of the model. */ prediction_type?: components["schemas"]["SchedulerPredictionType"] | null; /** @@ -23812,7 +23918,7 @@ export type components = { * @description Model type. * @enum {string} */ - ModelType: "onnx" | "main" | "vae" | "lora" | "control_lora" | "controlnet" | "embedding" | "ip_adapter" | "clip_vision" | "clip_embed" | "t2i_adapter" | "t5_encoder" | "qwen3_encoder" | "qwen_vl_encoder" | "spandrel_image_to_image" | "siglip" | "flux_redux" | "llava_onevision" | "text_llm" | "external_image_generator" | "unknown"; + ModelType: "onnx" | "main" | "vae" | "lora" | "control_lora" | "controlnet" | "embedding" | "ip_adapter" | "clip_vision" | "clip_embed" | "t2i_adapter" | "t5_encoder" | "qwen3_encoder" | "qwen_vl_encoder" | "wan_t5_encoder" | "spandrel_image_to_image" | "siglip" | "flux_redux" | "llava_onevision" | "text_llm" | "external_image_generator" | "unknown"; /** * ModelVariantType * @description Variant type. @@ -23825,7 +23931,7 @@ export type components = { */ ModelsList: { /** Models */ - models: (components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_QwenImage_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_Checkpoint_Anima_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_QwenImage_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Checkpoint_QwenImage_Config"] | components["schemas"]["VAE_Checkpoint_Anima_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_LyCORIS_QwenImage_Config"] | components["schemas"]["LoRA_LyCORIS_Anima_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["QwenVLEncoder_Diffusers_Config"] | components["schemas"]["QwenVLEncoder_Checkpoint_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["TextLLM_Diffusers_Config"] | components["schemas"]["ExternalApiModelConfig"] | components["schemas"]["Unknown_Config"])[]; + models: (components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_QwenImage_Config"] | components["schemas"]["Main_Diffusers_Wan_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_Checkpoint_Anima_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_QwenImage_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Checkpoint_Wan_Config"] | components["schemas"]["VAE_Checkpoint_QwenImage_Config"] | components["schemas"]["VAE_Checkpoint_Anima_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["VAE_Diffusers_Wan_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_LyCORIS_QwenImage_Config"] | components["schemas"]["LoRA_LyCORIS_Anima_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["QwenVLEncoder_Diffusers_Config"] | components["schemas"]["QwenVLEncoder_Checkpoint_Config"] | components["schemas"]["WanT5Encoder_WanT5Encoder_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["TextLLM_Diffusers_Config"] | components["schemas"]["ExternalApiModelConfig"] | components["schemas"]["Unknown_Config"])[]; }; /** * Multiply Integers @@ -28371,7 +28477,7 @@ export type components = { type: components["schemas"]["ModelType"]; format?: components["schemas"]["ModelFormat"] | null; /** Variant */ - variant?: components["schemas"]["ModelVariantType"] | components["schemas"]["ClipVariantType"] | components["schemas"]["FluxVariantType"] | components["schemas"]["Flux2VariantType"] | components["schemas"]["ZImageVariantType"] | components["schemas"]["QwenImageVariantType"] | components["schemas"]["Qwen3VariantType"] | null; + variant?: components["schemas"]["ModelVariantType"] | components["schemas"]["ClipVariantType"] | components["schemas"]["FluxVariantType"] | components["schemas"]["Flux2VariantType"] | components["schemas"]["ZImageVariantType"] | components["schemas"]["QwenImageVariantType"] | components["schemas"]["WanVariantType"] | components["schemas"]["Qwen3VariantType"] | null; /** * Is Installed * @default false @@ -28416,7 +28522,7 @@ export type components = { type: components["schemas"]["ModelType"]; format?: components["schemas"]["ModelFormat"] | null; /** Variant */ - variant?: components["schemas"]["ModelVariantType"] | components["schemas"]["ClipVariantType"] | components["schemas"]["FluxVariantType"] | components["schemas"]["Flux2VariantType"] | components["schemas"]["ZImageVariantType"] | components["schemas"]["QwenImageVariantType"] | components["schemas"]["Qwen3VariantType"] | null; + variant?: components["schemas"]["ModelVariantType"] | components["schemas"]["ClipVariantType"] | components["schemas"]["FluxVariantType"] | components["schemas"]["Flux2VariantType"] | components["schemas"]["ZImageVariantType"] | components["schemas"]["QwenImageVariantType"] | components["schemas"]["WanVariantType"] | components["schemas"]["Qwen3VariantType"] | null; /** * Is Installed * @default false @@ -28929,14 +29035,14 @@ export type components = { * @description Submodel type. * @enum {string} */ - SubModelType: "unet" | "transformer" | "text_encoder" | "text_encoder_2" | "text_encoder_3" | "tokenizer" | "tokenizer_2" | "tokenizer_3" | "vae" | "vae_decoder" | "vae_encoder" | "scheduler" | "safety_checker"; + SubModelType: "unet" | "transformer" | "transformer_2" | "text_encoder" | "text_encoder_2" | "text_encoder_3" | "tokenizer" | "tokenizer_2" | "tokenizer_3" | "vae" | "vae_decoder" | "vae_encoder" | "scheduler" | "safety_checker"; /** SubmodelDefinition */ SubmodelDefinition: { /** Path Or Prefix */ path_or_prefix: string; model_type: components["schemas"]["ModelType"]; /** Variant */ - variant?: components["schemas"]["ModelVariantType"] | components["schemas"]["ClipVariantType"] | components["schemas"]["FluxVariantType"] | components["schemas"]["Flux2VariantType"] | components["schemas"]["ZImageVariantType"] | components["schemas"]["QwenImageVariantType"] | components["schemas"]["Qwen3VariantType"] | null; + variant?: components["schemas"]["ModelVariantType"] | components["schemas"]["ClipVariantType"] | components["schemas"]["FluxVariantType"] | components["schemas"]["Flux2VariantType"] | components["schemas"]["ZImageVariantType"] | components["schemas"]["QwenImageVariantType"] | components["schemas"]["WanVariantType"] | components["schemas"]["Qwen3VariantType"] | null; }; /** * Subtract Integers @@ -31297,6 +31403,96 @@ export type components = { */ base: "sdxl"; }; + /** + * VAE_Checkpoint_Wan_Config + * @description Model config for Wan 2.2 VAE checkpoint models (AutoencoderKLWan). + * + * Distinguishes A14B (z_dim=16, standard Wan VAE) from TI2V-5B (z_dim=48, + * Wan2.2-VAE) via the input channel count of ``decoder.conv_in.weight``. + */ + VAE_Checkpoint_Wan_Config: { + /** + * Key + * @description A unique key for this model. + */ + key: string; + /** + * Hash + * @description The hash of the model file(s). + */ + hash: string; + /** + * Path + * @description Path to the model on the filesystem. Relative paths are relative to the Invoke root directory. + */ + path: string; + /** + * File Size + * @description The size of the model in bytes. + */ + file_size: number; + /** + * Name + * @description Name of the model. + */ + name: string; + /** + * Description + * @description Model description + */ + description: string | null; + /** + * Source + * @description The original source of the model (path, URL or repo_id). + */ + source: string; + /** @description The type of source */ + source_type: components["schemas"]["ModelSourceType"]; + /** + * Source Api Response + * @description The original API response from the source, as stringified JSON. + */ + source_api_response: string | null; + /** + * Source Url + * @description Optional URL for the model (e.g. download page or model page). + */ + source_url: string | null; + /** + * Cover Image + * @description Url for image to preview model + */ + cover_image: string | null; + /** + * Config Path + * @description Path to the config for this model, if any. + */ + config_path: string | null; + /** + * Type + * @default vae + * @constant + */ + type: "vae"; + /** + * Format + * @default checkpoint + * @constant + */ + format: "checkpoint"; + /** + * Base + * @default wan + * @constant + */ + base: "wan"; + /** + * Latent Channels + * @description VAE latent channel count: 16 for A14B (standard Wan VAE) or 48 for TI2V-5B (Wan2.2-VAE). + * @enum {integer} + */ + latent_channels: 16 | 48; + }; /** * VAE_Diffusers_Flux2_Config * @description Model config for FLUX.2 VAE models in diffusers format (AutoencoderKLFlux2). @@ -31525,6 +31721,91 @@ export type components = { */ base: "sdxl"; }; + /** + * VAE_Diffusers_Wan_Config + * @description Model config for Wan 2.2 VAE in diffusers folder layout (AutoencoderKLWan). + */ + VAE_Diffusers_Wan_Config: { + /** + * Key + * @description A unique key for this model. + */ + key: string; + /** + * Hash + * @description The hash of the model file(s). + */ + hash: string; + /** + * Path + * @description Path to the model on the filesystem. Relative paths are relative to the Invoke root directory. + */ + path: string; + /** + * File Size + * @description The size of the model in bytes. + */ + file_size: number; + /** + * Name + * @description Name of the model. + */ + name: string; + /** + * Description + * @description Model description + */ + description: string | null; + /** + * Source + * @description The original source of the model (path, URL or repo_id). + */ + source: string; + /** @description The type of source */ + source_type: components["schemas"]["ModelSourceType"]; + /** + * Source Api Response + * @description The original API response from the source, as stringified JSON. + */ + source_api_response: string | null; + /** + * Source Url + * @description Optional URL for the model (e.g. download page or model page). + */ + source_url: string | null; + /** + * Cover Image + * @description Url for image to preview model + */ + cover_image: string | null; + /** + * Format + * @default diffusers + * @constant + */ + format: "diffusers"; + /** @default */ + repo_variant: components["schemas"]["ModelRepoVariant"]; + /** + * Type + * @default vae + * @constant + */ + type: "vae"; + /** + * Base + * @default wan + * @constant + */ + base: "wan"; + /** + * Latent Channels + * @description VAE latent channel count: 16 for A14B or 48 for TI2V-5B's Wan2.2-VAE. + * @default 16 + * @enum {integer} + */ + latent_channels: 16 | 48; + }; /** ValidationError */ ValidationError: { /** Location */ @@ -31570,6 +31851,539 @@ export type components = { */ cover_image_name?: string | null; }; + /** + * WanConditioningField + * @description A Wan 2.2 conditioning tensor primitive value. + * + * Wan conditioning is the UMT5-XXL hidden state for the prompt plus an attention + * mask marking valid (non-padding) tokens. + */ + WanConditioningField: { + /** + * Conditioning Name + * @description The name of conditioning tensor + */ + conditioning_name: string; + }; + /** + * WanConditioningOutput + * @description Base class for nodes that output a Wan 2.2 text conditioning tensor. + */ + WanConditioningOutput: { + /** @description Conditioning tensor */ + conditioning: components["schemas"]["WanConditioningField"]; + /** + * type + * @default wan_conditioning_output + * @constant + */ + type: "wan_conditioning_output"; + }; + /** + * Denoise - Wan 2.2 + * @description Run the denoising process with a Wan 2.2 model. + * + * Drives a flow-matching Euler schedule via Diffusers' + * ``FlowMatchEulerDiscreteScheduler``. CFG is supported when negative + * conditioning is provided and ``guidance_scale != 1.0``. + * + * For Wan 2.2 A14B the high-noise expert handles timesteps at and above + * ``boundary_ratio * num_train_timesteps``; the low-noise expert handles + * timesteps below. Both experts share the model cache; only the active one is + * GPU-resident at any time. + */ + WanDenoiseInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * Transformer + * @description Wan transformer field (transformer + optional dual-expert metadata). + * @default null + */ + transformer?: components["schemas"]["WanTransformerField"] | null; + /** + * @description Positive conditioning tensor + * @default null + */ + positive_conditioning?: components["schemas"]["WanConditioningField"] | null; + /** + * @description Negative conditioning tensor + * @default null + */ + negative_conditioning?: components["schemas"]["WanConditioningField"] | null; + /** + * @description Latents tensor + * @default null + */ + latents?: components["schemas"]["LatentsField"] | null; + /** + * @description A mask of the region to apply the denoising process to. Values of 0.0 represent the regions to be fully denoised, and 1.0 represent the regions to be preserved. + * @default null + */ + denoise_mask?: components["schemas"]["DenoiseMaskField"] | null; + /** + * Denoising Start + * @description When to start denoising, expressed a percentage of total steps + * @default 0 + */ + denoising_start?: number; + /** + * Denoising End + * @description When to stop denoising, expressed a percentage of total steps + * @default 1 + */ + denoising_end?: number; + /** + * Add Noise + * @description Add noise based on denoising start. + * @default true + */ + add_noise?: boolean; + /** + * Guidance Scale + * @description Classifier-free guidance scale. 4.0 is the Wan 2.2 default for A14B; TI2V-5B can tolerate higher values up to ~5.5. + * @default 4 + */ + guidance_scale?: number; + /** + * Guidance Scale (Low Noise) + * @description Optional separate CFG scale for the low-noise expert (Wan 2.2 A14B only). If unset, the primary 'Guidance Scale' is reused. Ignored for TI2V-5B. + * @default null + */ + guidance_scale_low_noise?: number | null; + /** + * Width + * @description Width of the generated image. + * @default 1024 + */ + width?: number; + /** + * Height + * @description Height of the generated image. + * @default 1024 + */ + height?: number; + /** + * Steps + * @description Number of denoising steps. + * @default 40 + */ + steps?: number; + /** + * Seed + * @description Randomness seed for reproducibility. + * @default 0 + */ + seed?: number; + /** + * type + * @default wan_denoise + * @constant + */ + type: "wan_denoise"; + }; + /** + * Image to Latents - Wan 2.2 + * @description Encodes an image with the Wan VAE (AutoencoderKLWan). + * + * The output latents have the temporal dimension squeezed out, so downstream + * nodes see 4D ``[B, C, H, W]``. The denoise loop re-adds ``T=1`` before + * feeding the transformer. + */ + WanImageToLatentsInvocation: { + /** + * @description The board to save the image to + * @default null + */ + board?: components["schemas"]["BoardField"] | null; + /** + * @description Optional metadata to be saved with the image + * @default null + */ + metadata?: components["schemas"]["MetadataField"] | null; + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * @description The image to encode. + * @default null + */ + image?: components["schemas"]["ImageField"] | null; + /** + * @description VAE + * @default null + */ + vae?: components["schemas"]["VAEField"] | null; + /** + * type + * @default wan_i2l + * @constant + */ + type: "wan_i2l"; + }; + /** + * Latents to Image - Wan 2.2 + * @description Decodes Wan latents back to RGB. + */ + WanLatentsToImageInvocation: { + /** + * @description The board to save the image to + * @default null + */ + board?: components["schemas"]["BoardField"] | null; + /** + * @description Optional metadata to be saved with the image + * @default null + */ + metadata?: components["schemas"]["MetadataField"] | null; + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * @description Latents tensor + * @default null + */ + latents?: components["schemas"]["LatentsField"] | null; + /** + * @description VAE + * @default null + */ + vae?: components["schemas"]["VAEField"] | null; + /** + * type + * @default wan_l2i + * @constant + */ + type: "wan_l2i"; + }; + /** + * Main Model - Wan 2.2 + * @description Loads a Wan 2.2 model, outputting its submodels. + * + * Components can be mixed and matched, mirroring the Qwen Image loader pattern: + * + * - Transformer(s) always come from the main model. For A14B that's both + * ``transformer/`` (high-noise) and ``transformer_2/`` (low-noise); for + * TI2V-5B it's the single ``transformer/``. + * - VAE: standalone Wan VAE > main (if Diffusers) > Component Source (Diffusers). + * - UMT5-XXL encoder: standalone Wan T5 encoder > main (if Diffusers) > + * Component Source (Diffusers). + * + * The Component Source slot lets users supply a Diffusers Wan main model purely + * for VAE / encoder extraction when the actual transformer is in a single-file + * format (GGUF in Phase 4). Together, the standalone VAE + standalone encoder + * let a GGUF transformer run without a full ~30 GB Diffusers install. + */ + WanModelLoaderInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * Transformer + * @description Wan 2.2 model (Transformer) to load + */ + model: components["schemas"]["ModelIdentifierField"]; + /** + * VAE + * @description Standalone Wan VAE model. If not set, the VAE is loaded from the main model (when in Diffusers format) or from the Component Source. + * @default null + */ + vae_model?: components["schemas"]["ModelIdentifierField"] | null; + /** + * Wan T5 Encoder + * @description Standalone Wan UMT5-XXL encoder. If not set, the encoder is loaded from the main model (when in Diffusers format) or from the Component Source. + * @default null + */ + wan_t5_encoder_model?: components["schemas"]["ModelIdentifierField"] | null; + /** + * Component Source (Diffusers) + * @description Diffusers Wan main model to extract VAE and/or encoder from. Use this if you don't have separate VAE/encoder models. Ignored for any submodel that is provided separately. + * @default null + */ + component_source?: components["schemas"]["ModelIdentifierField"] | null; + /** + * type + * @default wan_model_loader + * @constant + */ + type: "wan_model_loader"; + }; + /** + * WanModelLoaderOutput + * @description Wan 2.2 model loader output. + */ + WanModelLoaderOutput: { + /** + * Transformer + * @description Wan transformer (one or two experts depending on the variant) + */ + transformer: components["schemas"]["WanTransformerField"]; + /** + * UMT5-XXL Encoder + * @description UMT5-XXL tokenizer and text encoder for Wan 2.2 + */ + wan_t5_encoder: components["schemas"]["WanT5EncoderField"]; + /** + * VAE + * @description VAE + */ + vae: components["schemas"]["VAEField"]; + /** + * type + * @default wan_model_loader_output + * @constant + */ + type: "wan_model_loader_output"; + }; + /** + * WanT5EncoderField + * @description Field for the UMT5-XXL text encoder used by Wan 2.2 models. + */ + WanT5EncoderField: { + /** @description Info to load tokenizer submodel */ + tokenizer: components["schemas"]["ModelIdentifierField"]; + /** @description Info to load text_encoder submodel */ + text_encoder: components["schemas"]["ModelIdentifierField"]; + /** + * Loras + * @description LoRAs to apply on model loading + */ + loras?: components["schemas"]["LoRAField"][]; + }; + /** + * WanT5Encoder_WanT5Encoder_Config + * @description UMT5-XXL encoder in diffusers folder layout. + * + * Accepts either: + * - A directory containing ``text_encoder/`` (and typically ``tokenizer/``) ─ the + * shape produced by ``Wan-AI/Wan2.2-T2V-A14B::text_encoder+tokenizer``. + * - A bare ``text_encoder/`` directory whose own ``config.json`` declares + * ``model_type: umt5``. + */ + WanT5Encoder_WanT5Encoder_Config: { + /** + * Key + * @description A unique key for this model. + */ + key: string; + /** + * Hash + * @description The hash of the model file(s). + */ + hash: string; + /** + * Path + * @description Path to the model on the filesystem. Relative paths are relative to the Invoke root directory. + */ + path: string; + /** + * File Size + * @description The size of the model in bytes. + */ + file_size: number; + /** + * Name + * @description Name of the model. + */ + name: string; + /** + * Description + * @description Model description + */ + description: string | null; + /** + * Source + * @description The original source of the model (path, URL or repo_id). + */ + source: string; + /** @description The type of source */ + source_type: components["schemas"]["ModelSourceType"]; + /** + * Source Api Response + * @description The original API response from the source, as stringified JSON. + */ + source_api_response: string | null; + /** + * Source Url + * @description Optional URL for the model (e.g. download page or model page). + */ + source_url: string | null; + /** + * Cover Image + * @description Url for image to preview model + */ + cover_image: string | null; + /** + * Base + * @default any + * @constant + */ + base: "any"; + /** + * Type + * @default wan_t5_encoder + * @constant + */ + type: "wan_t5_encoder"; + /** + * Format + * @default wan_t5_encoder + * @constant + */ + format: "wan_t5_encoder"; + }; + /** + * Prompt - Wan 2.2 + * @description Encodes a text prompt for Wan 2.2 using the UMT5-XXL encoder. + * + * Output is the encoder's last hidden state (shape: [seq_len=226, 4096]) plus + * an attention mask marking valid (non-padding) tokens. The Wan transformer + * consumes these directly as ``encoder_hidden_states``. + */ + WanTextEncoderInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * Prompt + * @description Text prompt for Wan 2.2. + * @default null + */ + prompt?: string | null; + /** + * UMT5-XXL Encoder + * @description UMT5-XXL tokenizer and text encoder for Wan 2.2 + * @default null + */ + wan_t5_encoder?: components["schemas"]["WanT5EncoderField"] | null; + /** + * type + * @default wan_text_encoder + * @constant + */ + type: "wan_text_encoder"; + }; + /** + * WanTransformerField + * @description Transformer field for Wan 2.2 models. + * + * Wan 2.2 A14B is a Mixture-of-Experts model with two transformer experts: + * a high-noise expert (active at large timesteps) and a low-noise expert + * (active at small timesteps). TI2V-5B is a single-transformer model and only + * populates ``transformer``. + * + * ``boundary_ratio`` matches Diffusers' ``WanPipeline`` semantics: it's the + * boundary timestep as a fraction of ``num_train_timesteps`` (typically 1000), + * so ``boundary_ratio=0.875`` means the high-noise expert handles t >= 875 and + * the low-noise expert handles t < 875. + */ + WanTransformerField: { + /** @description Primary transformer submodel. For A14B this is the high-noise expert. */ + transformer: components["schemas"]["ModelIdentifierField"]; + /** + * @description Low-noise transformer expert (Wan 2.2 A14B only). None for TI2V-5B. + * @default null + */ + transformer_low_noise?: components["schemas"]["ModelIdentifierField"] | null; + /** + * Loras + * @description LoRAs to apply to the primary transformer. For A14B applied to the high-noise expert. + */ + loras?: components["schemas"]["LoRAField"][]; + /** + * Loras Low Noise + * @description Optional separate LoRAs for the low-noise expert (Wan 2.2 A14B). If empty and transformer_low_noise is set, the primary 'loras' list is reused. + */ + loras_low_noise?: components["schemas"]["LoRAField"][]; + /** + * Boundary Ratio + * @description Boundary timestep as a fraction of num_train_timesteps (Wan 2.2 A14B only). High-noise expert: t >= boundary_ratio * num_train_timesteps. Low-noise expert: t below. Ignored for TI2V-5B. + * @default 0.875 + */ + boundary_ratio?: number; + }; + /** + * WanVariantType + * @description Wan 2.2 model variants. + * + * Both variants are used for image generation at num_frames=1. They differ in + * architecture: A14B is a Mixture-of-Experts model with two transformer experts + * (high-noise and low-noise) totalling ~28B params; TI2V-5B is a single ~5B + * transformer with a higher-compression VAE (z_dim=48). + * @enum {string} + */ + WanVariantType: "t2v_a14b" | "ti2v_5b"; /** Workflow */ Workflow: { /** @@ -33226,7 +34040,7 @@ export interface operations { [name: string]: unknown; }; content: { - "application/json": components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_QwenImage_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_Checkpoint_Anima_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_QwenImage_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Checkpoint_QwenImage_Config"] | components["schemas"]["VAE_Checkpoint_Anima_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_LyCORIS_QwenImage_Config"] | components["schemas"]["LoRA_LyCORIS_Anima_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["QwenVLEncoder_Diffusers_Config"] | components["schemas"]["QwenVLEncoder_Checkpoint_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["TextLLM_Diffusers_Config"] | components["schemas"]["ExternalApiModelConfig"] | components["schemas"]["Unknown_Config"]; + "application/json": components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_QwenImage_Config"] | components["schemas"]["Main_Diffusers_Wan_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_Checkpoint_Anima_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_QwenImage_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Checkpoint_Wan_Config"] | components["schemas"]["VAE_Checkpoint_QwenImage_Config"] | components["schemas"]["VAE_Checkpoint_Anima_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["VAE_Diffusers_Wan_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_LyCORIS_QwenImage_Config"] | components["schemas"]["LoRA_LyCORIS_Anima_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["QwenVLEncoder_Diffusers_Config"] | components["schemas"]["QwenVLEncoder_Checkpoint_Config"] | components["schemas"]["WanT5Encoder_WanT5Encoder_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["TextLLM_Diffusers_Config"] | components["schemas"]["ExternalApiModelConfig"] | components["schemas"]["Unknown_Config"]; }; }; /** @description Validation Error */ @@ -33258,7 +34072,7 @@ export interface operations { [name: string]: unknown; }; content: { - "application/json": components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_QwenImage_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_Checkpoint_Anima_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_QwenImage_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Checkpoint_QwenImage_Config"] | components["schemas"]["VAE_Checkpoint_Anima_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_LyCORIS_QwenImage_Config"] | components["schemas"]["LoRA_LyCORIS_Anima_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["QwenVLEncoder_Diffusers_Config"] | components["schemas"]["QwenVLEncoder_Checkpoint_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["TextLLM_Diffusers_Config"] | components["schemas"]["ExternalApiModelConfig"] | components["schemas"]["Unknown_Config"]; + "application/json": components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_QwenImage_Config"] | components["schemas"]["Main_Diffusers_Wan_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_Checkpoint_Anima_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_QwenImage_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Checkpoint_Wan_Config"] | components["schemas"]["VAE_Checkpoint_QwenImage_Config"] | components["schemas"]["VAE_Checkpoint_Anima_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["VAE_Diffusers_Wan_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_LyCORIS_QwenImage_Config"] | components["schemas"]["LoRA_LyCORIS_Anima_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["QwenVLEncoder_Diffusers_Config"] | components["schemas"]["QwenVLEncoder_Checkpoint_Config"] | components["schemas"]["WanT5Encoder_WanT5Encoder_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["TextLLM_Diffusers_Config"] | components["schemas"]["ExternalApiModelConfig"] | components["schemas"]["Unknown_Config"]; }; }; /** @description Validation Error */ @@ -33308,7 +34122,7 @@ export interface operations { * "repo_variant": "fp16", * "upcast_attention": false * } */ - "application/json": components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_QwenImage_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_Checkpoint_Anima_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_QwenImage_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Checkpoint_QwenImage_Config"] | components["schemas"]["VAE_Checkpoint_Anima_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_LyCORIS_QwenImage_Config"] | components["schemas"]["LoRA_LyCORIS_Anima_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["QwenVLEncoder_Diffusers_Config"] | components["schemas"]["QwenVLEncoder_Checkpoint_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["TextLLM_Diffusers_Config"] | components["schemas"]["ExternalApiModelConfig"] | components["schemas"]["Unknown_Config"]; + "application/json": components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_QwenImage_Config"] | components["schemas"]["Main_Diffusers_Wan_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_Checkpoint_Anima_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_QwenImage_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Checkpoint_Wan_Config"] | components["schemas"]["VAE_Checkpoint_QwenImage_Config"] | components["schemas"]["VAE_Checkpoint_Anima_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["VAE_Diffusers_Wan_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_LyCORIS_QwenImage_Config"] | components["schemas"]["LoRA_LyCORIS_Anima_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["QwenVLEncoder_Diffusers_Config"] | components["schemas"]["QwenVLEncoder_Checkpoint_Config"] | components["schemas"]["WanT5Encoder_WanT5Encoder_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["TextLLM_Diffusers_Config"] | components["schemas"]["ExternalApiModelConfig"] | components["schemas"]["Unknown_Config"]; }; }; /** @description Bad request */ @@ -33413,7 +34227,7 @@ export interface operations { * "repo_variant": "fp16", * "upcast_attention": false * } */ - "application/json": components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_QwenImage_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_Checkpoint_Anima_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_QwenImage_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Checkpoint_QwenImage_Config"] | components["schemas"]["VAE_Checkpoint_Anima_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_LyCORIS_QwenImage_Config"] | components["schemas"]["LoRA_LyCORIS_Anima_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["QwenVLEncoder_Diffusers_Config"] | components["schemas"]["QwenVLEncoder_Checkpoint_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["TextLLM_Diffusers_Config"] | components["schemas"]["ExternalApiModelConfig"] | components["schemas"]["Unknown_Config"]; + "application/json": components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_QwenImage_Config"] | components["schemas"]["Main_Diffusers_Wan_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_Checkpoint_Anima_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_QwenImage_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Checkpoint_Wan_Config"] | components["schemas"]["VAE_Checkpoint_QwenImage_Config"] | components["schemas"]["VAE_Checkpoint_Anima_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["VAE_Diffusers_Wan_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_LyCORIS_QwenImage_Config"] | components["schemas"]["LoRA_LyCORIS_Anima_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["QwenVLEncoder_Diffusers_Config"] | components["schemas"]["QwenVLEncoder_Checkpoint_Config"] | components["schemas"]["WanT5Encoder_WanT5Encoder_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["TextLLM_Diffusers_Config"] | components["schemas"]["ExternalApiModelConfig"] | components["schemas"]["Unknown_Config"]; }; }; /** @description Bad request */ @@ -33484,7 +34298,7 @@ export interface operations { * "repo_variant": "fp16", * "upcast_attention": false * } */ - "application/json": components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_QwenImage_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_Checkpoint_Anima_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_QwenImage_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Checkpoint_QwenImage_Config"] | components["schemas"]["VAE_Checkpoint_Anima_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_LyCORIS_QwenImage_Config"] | components["schemas"]["LoRA_LyCORIS_Anima_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["QwenVLEncoder_Diffusers_Config"] | components["schemas"]["QwenVLEncoder_Checkpoint_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["TextLLM_Diffusers_Config"] | components["schemas"]["ExternalApiModelConfig"] | components["schemas"]["Unknown_Config"]; + "application/json": components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_QwenImage_Config"] | components["schemas"]["Main_Diffusers_Wan_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_Checkpoint_Anima_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_QwenImage_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Checkpoint_Wan_Config"] | components["schemas"]["VAE_Checkpoint_QwenImage_Config"] | components["schemas"]["VAE_Checkpoint_Anima_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["VAE_Diffusers_Wan_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_LyCORIS_QwenImage_Config"] | components["schemas"]["LoRA_LyCORIS_Anima_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["QwenVLEncoder_Diffusers_Config"] | components["schemas"]["QwenVLEncoder_Checkpoint_Config"] | components["schemas"]["WanT5Encoder_WanT5Encoder_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["TextLLM_Diffusers_Config"] | components["schemas"]["ExternalApiModelConfig"] | components["schemas"]["Unknown_Config"]; }; }; /** @description Bad request */ @@ -34217,7 +35031,7 @@ export interface operations { * "repo_variant": "fp16", * "upcast_attention": false * } */ - "application/json": components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_QwenImage_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_Checkpoint_Anima_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_QwenImage_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Checkpoint_QwenImage_Config"] | components["schemas"]["VAE_Checkpoint_Anima_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_LyCORIS_QwenImage_Config"] | components["schemas"]["LoRA_LyCORIS_Anima_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["QwenVLEncoder_Diffusers_Config"] | components["schemas"]["QwenVLEncoder_Checkpoint_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["TextLLM_Diffusers_Config"] | components["schemas"]["ExternalApiModelConfig"] | components["schemas"]["Unknown_Config"]; + "application/json": components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_QwenImage_Config"] | components["schemas"]["Main_Diffusers_Wan_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_Checkpoint_Anima_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_QwenImage_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Checkpoint_Wan_Config"] | components["schemas"]["VAE_Checkpoint_QwenImage_Config"] | components["schemas"]["VAE_Checkpoint_Anima_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["VAE_Diffusers_Wan_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_LyCORIS_QwenImage_Config"] | components["schemas"]["LoRA_LyCORIS_Anima_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["QwenVLEncoder_Diffusers_Config"] | components["schemas"]["QwenVLEncoder_Checkpoint_Config"] | components["schemas"]["WanT5Encoder_WanT5Encoder_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["TextLLM_Diffusers_Config"] | components["schemas"]["ExternalApiModelConfig"] | components["schemas"]["Unknown_Config"]; }; }; /** @description Bad request */ diff --git a/invokeai/frontend/web/src/services/api/types.ts b/invokeai/frontend/web/src/services/api/types.ts index 27c6fcbf3c3..962765603ee 100644 --- a/invokeai/frontend/web/src/services/api/types.ts +++ b/invokeai/frontend/web/src/services/api/types.ts @@ -117,6 +117,7 @@ export type T5EncoderBnbQuantizedLlmInt8bModelConfig = Extract< >; export type Qwen3EncoderModelConfig = Extract; export type QwenVLEncoderModelConfig = Extract; +type WanT5EncoderModelConfig = Extract; export type SpandrelImageToImageModelConfig = Extract; export type CheckpointModelConfig = Extract; export type CLIPVisionModelConfig = Extract; @@ -379,6 +380,10 @@ export const isQwenVLEncoderModelConfig = (config: AnyModelConfig): config is Qw return config.type === 'qwen_vl_encoder'; }; +export const isWanT5EncoderModelConfig = (config: AnyModelConfig): config is WanT5EncoderModelConfig => { + return config.type === 'wan_t5_encoder'; +}; + export const isCLIPEmbedModelConfigOrSubmodel = ( config: AnyModelConfig, excludeSubmodels?: boolean diff --git a/scripts/wan_diffusers_reference.py b/scripts/wan_diffusers_reference.py new file mode 100644 index 00000000000..0e67ceac225 --- /dev/null +++ b/scripts/wan_diffusers_reference.py @@ -0,0 +1,85 @@ +"""Run TI2V-5B (or any Wan 2.2 Diffusers checkpoint) via the upstream +WanPipeline directly, with the same arguments InvokeAI's wan_denoise uses. + +Use to A/B against InvokeAI output when image quality is questionable. +Generates one image and saves it next to this script. + +Example: + python scripts/wan_diffusers_reference.py \ + --model-path /home/lstein/invokeai-delete/models/ \ + --prompt "a photograph of a young redheaded woman sitting on a three-legged stool next to a potted fern" \ + --seed 42 --steps 40 --cfg 4.0 --width 1024 --height 1024 +""" + +import argparse +from pathlib import Path + +import torch +from diffusers import WanPipeline + + +def main() -> None: + p = argparse.ArgumentParser() + p.add_argument("--model-path", required=True, help="Path to a Diffusers Wan model directory.") + p.add_argument("--prompt", required=True) + p.add_argument( + "--negative", + default="", + help="Negative prompt (default empty string — matches WanPipeline.encode_prompt behaviour).", + ) + p.add_argument("--seed", type=int, default=42) + p.add_argument("--steps", type=int, default=40) + p.add_argument("--cfg", type=float, default=4.0) + p.add_argument("--width", type=int, default=1024) + p.add_argument("--height", type=int, default=1024) + p.add_argument("--output", default="wan_diffusers_reference.png") + p.add_argument( + "--offload", + choices=["model", "sequential", "none"], + default="model", + help="VRAM-saving strategy. 'model' (default) keeps one component on GPU at a time — fits TI2V-5B " + "in ~16 GB. 'sequential' is even more aggressive (per-module offload) and slower. " + "'none' loads everything to GPU at once (~24 GB+).", + ) + args = p.parse_args() + + print(f"Loading WanPipeline from {args.model_path} ...") + pipe = WanPipeline.from_pretrained(args.model_path, torch_dtype=torch.bfloat16) + + if args.offload == "model": + # enable_model_cpu_offload puts each component (transformer, vae, text_encoder) + # on GPU only while it's actively running; the rest sit on CPU. Adds a little + # latency between stages but cuts peak VRAM dramatically. + pipe.enable_model_cpu_offload() + elif args.offload == "sequential": + pipe.enable_sequential_cpu_offload() + else: + pipe.to("cuda") + + generator = torch.Generator(device="cuda").manual_seed(args.seed) + + print( + f"Generating: prompt={args.prompt!r}\n" + f" steps={args.steps}, cfg={args.cfg}, size={args.width}x{args.height}, seed={args.seed}" + ) + # num_frames=1 → image generation + result = pipe( + prompt=args.prompt, + negative_prompt=args.negative, + height=args.height, + width=args.width, + num_frames=1, + num_inference_steps=args.steps, + guidance_scale=args.cfg, + generator=generator, + output_type="pil", + ) + # WanPipelineOutput.frames is a list of [PIL.Image] sequences (one per video). + image = result.frames[0][0] + out = Path(args.output) + image.save(out) + print(f"Saved {out.resolve()}") + + +if __name__ == "__main__": + main() diff --git a/tests/backend/model_manager/configs/test_wan_t5_encoder_config.py b/tests/backend/model_manager/configs/test_wan_t5_encoder_config.py new file mode 100644 index 00000000000..9fac29db374 --- /dev/null +++ b/tests/backend/model_manager/configs/test_wan_t5_encoder_config.py @@ -0,0 +1,100 @@ +"""Tests for the WanT5Encoder config probe (UMT5-XXL diffusers folder).""" + +import json +from pathlib import Path +from tempfile import TemporaryDirectory +from unittest.mock import MagicMock + +import pytest + +from invokeai.backend.model_manager.configs.identification_utils import NotAMatchError +from invokeai.backend.model_manager.configs.wan_t5_encoder import WanT5Encoder_WanT5Encoder_Config +from invokeai.backend.model_manager.taxonomy import BaseModelType, ModelFormat, ModelType + + +def _build_overrides(model_path: Path, name: str) -> dict: + return { + "hash": "test-hash", + "path": str(model_path), + "file_size": 0, + "name": name, + "source": str(model_path), + "source_type": "path", + } + + +def _make_mod(model_path: Path) -> MagicMock: + mod = MagicMock() + mod.path = model_path + return mod + + +def _write_encoder_config(target: Path, model_type: str) -> None: + target.parent.mkdir(parents=True, exist_ok=True) + with target.open("w") as f: + json.dump({"model_type": model_type, "architectures": ["UMT5EncoderModel"]}, f) + + +class TestWanT5EncoderProbe: + def test_accepts_nested_text_encoder_layout(self): + """Standard layout: /text_encoder/config.json with model_type=umt5.""" + with TemporaryDirectory() as tmp: + root = Path(tmp) / "wan-encoder-bundle" + root.mkdir() + _write_encoder_config(root / "text_encoder" / "config.json", "umt5") + + cfg = WanT5Encoder_WanT5Encoder_Config.from_model_on_disk( + _make_mod(root), _build_overrides(root, "wan-encoder") + ) + + assert cfg.base == BaseModelType.Any + assert cfg.type == ModelType.WanT5Encoder + assert cfg.format == ModelFormat.WanT5Encoder + + def test_accepts_flat_encoder_layout(self): + """Flat layout: /config.json directly (just the encoder folder).""" + with TemporaryDirectory() as tmp: + root = Path(tmp) / "umt5-xxl" + root.mkdir() + _write_encoder_config(root / "config.json", "umt5") + + cfg = WanT5Encoder_WanT5Encoder_Config.from_model_on_disk( + _make_mod(root), _build_overrides(root, "umt5-xxl") + ) + assert cfg.format == ModelFormat.WanT5Encoder + + def test_rejects_t5(self): + """A regular T5-XXL encoder must not match (different vocabulary).""" + with TemporaryDirectory() as tmp: + root = Path(tmp) / "t5-xxl" + root.mkdir() + _write_encoder_config(root / "config.json", "t5") + + with pytest.raises(NotAMatchError, match="not 'umt5'"): + WanT5Encoder_WanT5Encoder_Config.from_model_on_disk( + _make_mod(root), _build_overrides(root, "t5-xxl") + ) + + def test_rejects_full_pipeline(self): + """A folder with model_index.json or transformer/ is a full pipeline, not an encoder.""" + with TemporaryDirectory() as tmp: + root = Path(tmp) / "full-pipeline" + root.mkdir() + _write_encoder_config(root / "text_encoder" / "config.json", "umt5") + (root / "model_index.json").touch() + + with pytest.raises(NotAMatchError, match="full Wan pipeline"): + WanT5Encoder_WanT5Encoder_Config.from_model_on_disk( + _make_mod(root), _build_overrides(root, "full-pipeline") + ) + + def test_rejects_missing_config(self): + """Empty directory has no encoder config to read.""" + with TemporaryDirectory() as tmp: + root = Path(tmp) / "empty" + root.mkdir() + + with pytest.raises(NotAMatchError, match="no encoder config"): + WanT5Encoder_WanT5Encoder_Config.from_model_on_disk( + _make_mod(root), _build_overrides(root, "empty") + ) diff --git a/tests/backend/model_manager/configs/test_wan_vae_config.py b/tests/backend/model_manager/configs/test_wan_vae_config.py new file mode 100644 index 00000000000..21c3f42a7b8 --- /dev/null +++ b/tests/backend/model_manager/configs/test_wan_vae_config.py @@ -0,0 +1,173 @@ +"""Tests for Wan 2.2 VAE config probes (checkpoint + diffusers).""" + +import json +from pathlib import Path +from tempfile import TemporaryDirectory +from unittest.mock import MagicMock + +import pytest +import torch + +from invokeai.backend.model_manager.configs.identification_utils import NotAMatchError +from invokeai.backend.model_manager.configs.vae import ( + VAE_Checkpoint_QwenImage_Config, + VAE_Checkpoint_Wan_Config, + VAE_Diffusers_Wan_Config, + _wan_vae_z_dim, +) +from invokeai.backend.model_manager.taxonomy import BaseModelType, ModelFormat + + +def _build_overrides(model_path: Path, name: str) -> dict: + return { + "hash": "test-hash", + "path": str(model_path), + "file_size": 0, + "name": name, + "source": str(model_path), + "source_type": "path", + } + + +def _make_mod(model_path: Path, state_dict: dict | None = None) -> MagicMock: + mod = MagicMock() + mod.path = model_path + if state_dict is not None: + mod.load_state_dict.return_value = state_dict + return mod + + +def _wan_vae_state_dict(z_dim: int) -> dict: + """Synthetic 5D Wan-style VAE state dict.""" + return { + "decoder.conv_in.weight": torch.zeros(96, z_dim, 1, 3, 3), + "encoder.conv_in.weight": torch.zeros(z_dim, 3, 1, 3, 3), + } + + +class TestZDimDetection: + def test_detects_16_channel(self): + assert _wan_vae_z_dim(_wan_vae_state_dict(16)) == 16 + + def test_detects_48_channel(self): + assert _wan_vae_z_dim(_wan_vae_state_dict(48)) == 48 + + def test_rejects_unknown_z_dim(self): + # Some other 5D conv weight (not Wan). + sd = {"decoder.conv_in.weight": torch.zeros(96, 32, 1, 3, 3)} + assert _wan_vae_z_dim(sd) is None + + def test_rejects_4d_conv(self): + # Standard SD/SDXL 4D conv — not Wan. + sd = {"decoder.conv_in.weight": torch.zeros(96, 16, 3, 3)} + assert _wan_vae_z_dim(sd) is None + + +class TestVAECheckpointWanConfig: + """Probe + filename-heuristic disambiguation from Qwen Image VAE.""" + + def test_48_channel_unambiguous_wan(self): + with TemporaryDirectory() as tmp: + vae_path = Path(tmp) / "wan2.2-vae.safetensors" + vae_path.touch() + + cfg = VAE_Checkpoint_Wan_Config.from_model_on_disk( + _make_mod(vae_path, state_dict=_wan_vae_state_dict(48)), + _build_overrides(vae_path, "Wan2.2-VAE"), + ) + + assert cfg.base == BaseModelType.Wan + assert cfg.format == ModelFormat.Checkpoint + assert cfg.latent_channels == 48 + + def test_16_channel_with_wan_in_filename(self): + with TemporaryDirectory() as tmp: + vae_path = Path(tmp) / "wan-vae.safetensors" + vae_path.touch() + + cfg = VAE_Checkpoint_Wan_Config.from_model_on_disk( + _make_mod(vae_path, state_dict=_wan_vae_state_dict(16)), + _build_overrides(vae_path, "Wan VAE"), + ) + + assert cfg.latent_channels == 16 + + def test_16_channel_without_wan_in_filename_defers(self): + """Filename without 'wan' should let Qwen Image VAE win.""" + with TemporaryDirectory() as tmp: + vae_path = Path(tmp) / "qwen_vae.safetensors" + vae_path.touch() + + with pytest.raises(NotAMatchError, match="deferring to Qwen Image"): + VAE_Checkpoint_Wan_Config.from_model_on_disk( + _make_mod(vae_path, state_dict=_wan_vae_state_dict(16)), + _build_overrides(vae_path, "QwenImage VAE"), + ) + + def test_qwen_image_defers_when_filename_says_wan(self): + """The mirror case — QwenImage config refuses files whose filenames suggest Wan.""" + with TemporaryDirectory() as tmp: + vae_path = Path(tmp) / "wan-vae.safetensors" + vae_path.touch() + + with pytest.raises(NotAMatchError, match="filename suggests a Wan"): + VAE_Checkpoint_QwenImage_Config.from_model_on_disk( + _make_mod(vae_path, state_dict=_wan_vae_state_dict(16)), + _build_overrides(vae_path, "Wan VAE"), + ) + + def test_rejects_non_wan_state_dict(self): + with TemporaryDirectory() as tmp: + vae_path = Path(tmp) / "wan-junk.safetensors" + vae_path.touch() + sd = {"foo.bar": torch.zeros(1)} + + with pytest.raises(NotAMatchError): + VAE_Checkpoint_Wan_Config.from_model_on_disk( + _make_mod(vae_path, state_dict=sd), + _build_overrides(vae_path, "junk"), + ) + + +class TestVAEDiffusersWanConfig: + """Diffusers-folder probe; latent_channels read from vae/config.json.""" + + def test_z_dim_from_config_json(self): + with TemporaryDirectory() as tmp: + root = Path(tmp) / "Wan2.2-VAE" + root.mkdir() + with (root / "config.json").open("w") as f: + json.dump({"_class_name": "AutoencoderKLWan", "z_dim": 48}, f) + + cfg = VAE_Diffusers_Wan_Config.from_model_on_disk( + _make_mod(root), + _build_overrides(root, "Wan2.2-VAE"), + ) + assert cfg.latent_channels == 48 + assert cfg.format == ModelFormat.Diffusers + + def test_default_to_16_when_z_dim_missing(self): + with TemporaryDirectory() as tmp: + root = Path(tmp) / "Wan-VAE" + root.mkdir() + with (root / "config.json").open("w") as f: + json.dump({"_class_name": "AutoencoderKLWan"}, f) # No z_dim. + + cfg = VAE_Diffusers_Wan_Config.from_model_on_disk( + _make_mod(root), + _build_overrides(root, "Wan-VAE"), + ) + assert cfg.latent_channels == 16 + + def test_rejects_non_wan_class(self): + with TemporaryDirectory() as tmp: + root = Path(tmp) / "FluxVAE" + root.mkdir() + with (root / "config.json").open("w") as f: + json.dump({"_class_name": "AutoencoderKL"}, f) + + with pytest.raises(NotAMatchError): + VAE_Diffusers_Wan_Config.from_model_on_disk( + _make_mod(root), + _build_overrides(root, "FluxVAE"), + ) From f9a22c1ea56d1b24b9ce09c02e21a5103a800458 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Sat, 9 May 2026 13:40:10 -0400 Subject: [PATCH 03/12] feat(model): Wan 2.2 Phase 4 - GGUF transformer support Adds single-file GGUF support for Wan 2.2 transformers, the path that makes A14B usable on consumer GPUs (~7 GB/expert at Q4_K_M instead of ~28 GB at bf16). Probe (configs/main.py): - New helpers: _has_wan_keys (Wan vs Qwen/FLUX/Z-Image fingerprint via condition_embedder.text_embedder.linear_1 + patch_embedding); _detect_wan_gguf_variant (16ch -> A14B, 48ch -> TI2V-5B from patch_embedding.weight.shape[1]); _detect_wan_gguf_expert (filename heuristic for high_noise / low_noise / none). - Main_GGUF_Wan_Config(base=Wan, format=GGUFQuantized, variant, expert). Tolerates the ComfyUI 'model.diffusion_model.' / 'diffusion_model.' prefixes via _has_wan_keys' multi-prefix scan. - Registered in factory.py. Loader (model_loaders/wan.py): - WanGGUFCheckpointModel mirrors the QwenImage GGUF pattern: gguf_sd_loader -> strip ComfyUI prefix -> auto-detect arch from state dict shapes (num_layers, inner_dim, ffn_dim, text_dim, in_channels, num_heads = inner_dim/128) -> init_empty_weights + load_state_dict(strict=False, assign=True). Loader invocation (wan_model_loader.py): - New 'Transformer (Low Noise)' picker: optional second GGUF for the A14B dual-expert MoE. Auto-swaps if the user wired the experts in the wrong order. Warns when an A14B GGUF is loaded without a paired low-noise expert (single-expert run, degraded quality). - GGUF mains require either a standalone VAE+encoder or a Diffusers Component Source (which can also supply boundary_ratio). - Diffusers main path unchanged (still pulls both experts from transformer/ + transformer_2/). Tests (tests/.../test_wan_gguf_config.py): - 14 tests across key fingerprint, variant detection, expert filename heuristic, and the full probe (A14B high/low, TI2V-5B, GGUF rejection, unrecognised state-dict rejection, explicit override). Total Wan tests: 55 passing (no regressions). FE lint clean. Co-Authored-By: Claude Opus 4.7 (1M context) fix(wan): support QuantStack-style GGUFs and standalone Diffusers VAE The city96 Wan 2.2 GGUF repos have been removed from Hugging Face, leaving QuantStack as the surviving distributor. QuantStack ships the native upstream Wan key layout (text_embedding.0/2, self_attn/cross_attn, ffn.0/2, head.head, head.modulation, ...) rather than the diffusers naming city96 used; biases are stored as F16 rather than BF16; and the standalone Wan VAE installs as a flat AutoencoderKLWan folder which the generic loader rejects. Three fixes: 1. Probe now recognises both diffusers and native key layouts via a new _is_native_wan_layout helper; _has_wan_keys accepts either text-proj fingerprint. 2. GGUF loader converts native -> diffusers keys (mirroring diffusers' convert_wan_transformer_to_diffusers) and unwraps non-quantized GGMLTensors to plain tensors at compute_dtype. The unwrap is needed because conv3d isn't in GGMLTensor's dispatch table, so the F16 patch_embedding bias would otherwise hit conv3d against bf16 latents. 3. VAELoader gains a VAE_Diffusers_Wan_Config branch that loads AutoencoderKLWan directly; the generic path can't handle a flat single-class folder when a submodel_type is provided. Adds 12 tests covering the native layout (probe + converter + unwrap). Verified end-to-end against Wan2.2-T2V-A14B-Q4_K_M from QuantStack: 1095 tensors round-trip key-for-key against WanTransformer3DModel. Co-Authored-By: Claude Opus 4.7 (1M context) --- invokeai/app/invocations/wan_model_loader.py | 118 +++++++-- .../backend/model_manager/configs/factory.py | 2 + .../backend/model_manager/configs/main.py | 126 +++++++++ .../model_manager/load/model_loaders/vae.py | 25 ++ .../model_manager/load/model_loaders/wan.py | 208 +++++++++++++++ .../frontend/web/src/services/api/schema.ts | 144 +++++++++-- .../configs/test_wan_gguf_config.py | 242 ++++++++++++++++++ .../model_manager/load/test_wan_loader.py | 175 +++++++++++++ 8 files changed, 1002 insertions(+), 38 deletions(-) create mode 100644 tests/backend/model_manager/configs/test_wan_gguf_config.py create mode 100644 tests/backend/model_manager/load/test_wan_loader.py diff --git a/invokeai/app/invocations/wan_model_loader.py b/invokeai/app/invocations/wan_model_loader.py index 38a1d60705a..dc063d5ba21 100644 --- a/invokeai/app/invocations/wan_model_loader.py +++ b/invokeai/app/invocations/wan_model_loader.py @@ -46,17 +46,19 @@ class WanModelLoaderInvocation(BaseInvocation): Components can be mixed and matched, mirroring the Qwen Image loader pattern: - - Transformer(s) always come from the main model. For A14B that's both - ``transformer/`` (high-noise) and ``transformer_2/`` (low-noise); for - TI2V-5B it's the single ``transformer/``. + - Transformer(s): + * Diffusers main: emits ``transformer/`` and (for A14B) ``transformer_2/`` + from the same model record. + * GGUF main: emits the single GGUF as the primary transformer; for A14B + the second-expert GGUF must be wired to ``Transformer (Low Noise)``. - VAE: standalone Wan VAE > main (if Diffusers) > Component Source (Diffusers). - UMT5-XXL encoder: standalone Wan T5 encoder > main (if Diffusers) > Component Source (Diffusers). The Component Source slot lets users supply a Diffusers Wan main model purely for VAE / encoder extraction when the actual transformer is in a single-file - format (GGUF in Phase 4). Together, the standalone VAE + standalone encoder - let a GGUF transformer run without a full ~30 GB Diffusers install. + format. Together, the standalone VAE + standalone encoder let a GGUF + transformer run without a full ~30 GB Diffusers install. """ model: ModelIdentifierField = InputField( @@ -67,6 +69,19 @@ class WanModelLoaderInvocation(BaseInvocation): title="Transformer", ) + transformer_low_noise_model: Optional[ModelIdentifierField] = InputField( + default=None, + description="Optional second GGUF transformer for the A14B low-noise expert. " + "Only relevant when the main model is a single-file GGUF and the variant is A14B; " + "ignored when the main is a Diffusers A14B (both experts are pulled from " + "transformer/ and transformer_2/ already) or when the variant is TI2V-5B.", + input=Input.Direct, + ui_model_base=BaseModelType.Wan, + ui_model_type=ModelType.Main, + ui_model_format=ModelFormat.GGUFQuantized, + title="Transformer (Low Noise)", + ) + vae_model: Optional[ModelIdentifierField] = InputField( default=None, description="Standalone Wan VAE model. If not set, the VAE is loaded from the main model " @@ -100,21 +115,84 @@ class WanModelLoaderInvocation(BaseInvocation): def invoke(self, context: InvocationContext) -> WanModelLoaderOutput: main_config = context.models.get_config(self.model) - main_is_diffusers = main_config.format == ModelFormat.Diffusers - - # Primary transformer: the high-noise expert for A14B, or the only - # transformer for TI2V-5B. - transformer = self.model.model_copy(update={"submodel_type": SubModelType.Transformer}) - - # Dual-expert (A14B) wiring. The probe records ``has_dual_expert`` and - # the recorded ``boundary_ratio`` from model_index.json on the config. - transformer_low_noise = None - boundary_ratio = 0.875 # Sensible Wan A14B default; overridden by model config when present. - if getattr(main_config, "has_dual_expert", False): - transformer_low_noise = self.model.model_copy(update={"submodel_type": SubModelType.Transformer2}) - recorded = getattr(main_config, "boundary_ratio", None) - if recorded is not None: - boundary_ratio = float(recorded) + main_format = main_config.format + main_is_diffusers = main_format == ModelFormat.Diffusers + main_is_gguf = main_format == ModelFormat.GGUFQuantized + + # Resolve transformer + dual-expert wiring + boundary_ratio. + # + # Diffusers main: transformer/ is the primary, transformer_2/ is the + # low-noise expert (A14B only). boundary_ratio comes from the probed + # model_index.json. + # + # GGUF main: the file itself is one expert (high or low). For A14B, + # the user wires the other expert to transformer_low_noise_model. + # We swap so the *high*-noise expert is always the primary if needed. + # boundary_ratio falls back to 0.875 unless a Diffusers component_source + # provides a recorded value. + boundary_ratio = 0.875 + transformer_low_noise: Optional[ModelIdentifierField] = None + + if main_is_diffusers: + transformer = self.model.model_copy(update={"submodel_type": SubModelType.Transformer}) + if getattr(main_config, "has_dual_expert", False): + transformer_low_noise = self.model.model_copy( + update={"submodel_type": SubModelType.Transformer2} + ) + recorded = getattr(main_config, "boundary_ratio", None) + if recorded is not None: + boundary_ratio = float(recorded) + elif main_is_gguf: + primary_expert = getattr(main_config, "expert", "none") + primary_id = self.model.model_copy(update={"submodel_type": SubModelType.Transformer}) + + if self.transformer_low_noise_model is not None: + low_config = context.models.get_config(self.transformer_low_noise_model) + if low_config.format != ModelFormat.GGUFQuantized: + raise ValueError( + f"'Transformer (Low Noise)' must be a GGUF-format Wan model. " + f"'{low_config.name}' is in {low_config.format.value} format." + ) + low_id = self.transformer_low_noise_model.model_copy( + update={"submodel_type": SubModelType.Transformer} + ) + low_expert = getattr(low_config, "expert", "none") + + # Make sure 'transformer' is the high-noise expert and + # 'transformer_low_noise' is the low-noise expert. If the user + # accidentally swapped them, swap back. + if primary_expert == "low" and low_expert == "high": + transformer = low_id + transformer_low_noise = primary_id + else: + transformer = primary_id + transformer_low_noise = low_id + else: + transformer = primary_id + # A14B without a paired low-noise GGUF will produce degraded + # quality (only the high-noise expert runs). Warn but don't + # abort — TI2V-5B GGUFs are single-expert and totally fine. + if ( + getattr(main_config, "variant", None) + and main_config.variant.value == "t2v_a14b" + ): + context.logger.warning( + "A14B GGUF main was provided without a paired 'Transformer (Low Noise)'. " + "Only the high-noise expert will run; image quality will be reduced." + ) + + # Borrow the boundary_ratio recorded on the optional Diffusers + # component_source, when one is wired. + if self.component_source is not None: + src_cfg = context.models.get_config(self.component_source) + src_boundary = getattr(src_cfg, "boundary_ratio", None) + if src_boundary is not None: + boundary_ratio = float(src_boundary) + else: + raise ValueError( + f"Unsupported main model format for Wan: {main_format.value}. " + "Use a Diffusers folder or a GGUF single-file checkpoint." + ) # VAE: standalone override > main (if Diffusers) > component source. if self.vae_model is not None: diff --git a/invokeai/backend/model_manager/configs/factory.py b/invokeai/backend/model_manager/configs/factory.py index 8b0c35f98e8..a3f47f26dce 100644 --- a/invokeai/backend/model_manager/configs/factory.py +++ b/invokeai/backend/model_manager/configs/factory.py @@ -83,6 +83,7 @@ Main_GGUF_Flux2_Config, Main_GGUF_FLUX_Config, Main_GGUF_QwenImage_Config, + Main_GGUF_Wan_Config, Main_GGUF_ZImage_Config, MainModelDefaultSettings, ) @@ -197,6 +198,7 @@ Annotated[Main_GGUF_Flux2_Config, Main_GGUF_Flux2_Config.get_tag()], Annotated[Main_GGUF_FLUX_Config, Main_GGUF_FLUX_Config.get_tag()], Annotated[Main_GGUF_QwenImage_Config, Main_GGUF_QwenImage_Config.get_tag()], + Annotated[Main_GGUF_Wan_Config, Main_GGUF_Wan_Config.get_tag()], Annotated[Main_GGUF_ZImage_Config, Main_GGUF_ZImage_Config.get_tag()], # VAE - checkpoint format Annotated[VAE_Checkpoint_SD1_Config, VAE_Checkpoint_SD1_Config.get_tag()], diff --git a/invokeai/backend/model_manager/configs/main.py b/invokeai/backend/model_manager/configs/main.py index 324e014dd10..b336f1d3860 100644 --- a/invokeai/backend/model_manager/configs/main.py +++ b/invokeai/backend/model_manager/configs/main.py @@ -1391,6 +1391,132 @@ def from_model_on_disk(cls, mod: ModelOnDisk, override_fields: dict[str, Any]) - return cls(**override_fields, variant=explicit_variant) +def _has_wan_keys(state_dict: dict[str | int, Any]) -> bool: + """Check if state dict contains Wan 2.2 transformer keys. + + Two layouts are accepted: + + * **Diffusers** (city96-style GGUF, Wan-AI/*-Diffusers safetensors): the text + projection is named ``condition_embedder.text_embedder.linear_1``. + * **Native upstream** (QuantStack-style GGUF, ComfyUI, Wan-AI's non-Diffusers + releases): the text projection is named ``text_embedding.0``. + + Both layouts share ``patch_embedding.weight`` as the input conv. Combined with + the text-projection fingerprint, this won't collide with FLUX + (``double_blocks/single_blocks``), Qwen Image (``txt_in/img_in``), Z-Image + (``cap_embedder``), or Anima (``llm_adapter``). + + Tolerates both bare keys and the ComfyUI ``model.diffusion_model.`` / + ``diffusion_model.`` prefixes. + """ + text_proj_options = ( + "condition_embedder.text_embedder.linear_1.weight", + "text_embedding.0.weight", + ) + prefixes = ("", "model.diffusion_model.", "diffusion_model.") + keys = state_dict.keys() + if not any((p + "patch_embedding.weight") in keys for p in prefixes): + return False + return any((p + needle) in keys for p in prefixes for needle in text_proj_options) + + +def _is_native_wan_layout(state_dict: dict[str | int, Any]) -> bool: + """True if the state dict uses the native upstream Wan key layout. + + Native layout uses ``text_embedding.0/2``, ``self_attn``/``cross_attn``, + ``ffn.0/2``, ``head.head``, ``head.modulation``, etc. — what ComfyUI and + QuantStack ship. Diffusers layout uses ``condition_embedder.*``, ``attn1``/ + ``attn2``, ``ffn.net.*``, ``proj_out``, ``scale_shift_table``. + """ + prefixes = ("", "model.diffusion_model.", "diffusion_model.") + keys = state_dict.keys() + return any((p + "text_embedding.0.weight") in keys for p in prefixes) + + +def _detect_wan_gguf_variant(state_dict: dict[str | int, Any]) -> WanVariantType | None: + """Determine A14B vs TI2V-5B from the GGUF state dict. + + ``patch_embedding.weight`` has shape ``[inner_dim, in_channels, T, H, W]`` + where ``in_channels`` is the latent channel count: 16 for the standard Wan + VAE (A14B family) or 48 for Wan2.2-VAE (TI2V-5B). Returns None if the + tensor isn't found or the channel count is unrecognised. + """ + candidates = ( + "patch_embedding.weight", + "model.diffusion_model.patch_embedding.weight", + "diffusion_model.patch_embedding.weight", + ) + for key in candidates: + if key in state_dict: + tensor = state_dict[key] + shape = getattr(tensor, "tensor_shape", None) or getattr(tensor, "shape", None) + if shape is None or len(shape) < 2: + return None + in_channels = int(shape[1]) + if in_channels == 16: + return WanVariantType.T2V_A14B + if in_channels == 48: + return WanVariantType.TI2V_5B + return None + return None + + +def _detect_wan_gguf_expert(filename: str) -> Literal["high", "low", "none"]: + """Filename heuristic for the A14B dual-expert MoE. + + Community releases tag each expert in the filename — typically + ``high_noise`` / ``low_noise`` (or hyphenated/concatenated variants). + Returns 'none' when neither marker is present (single-expert model or + ambiguous filename). + """ + name = filename.lower() + if any(s in name for s in ("high_noise", "high-noise", "highnoise")): + return "high" + if any(s in name for s in ("low_noise", "low-noise", "lownoise")): + return "low" + return "none" + + +class Main_GGUF_Wan_Config(Checkpoint_Config_Base, Main_Config_Base, Config_Base): + """Model config for GGUF-quantized Wan 2.2 transformer models. + + A14B's MoE ships as two GGUF files (one per expert); ``expert`` records + which one this is so the model loader invocation can pair them. TI2V-5B + is a single-transformer model and stores ``expert='none'``. + """ + + base: Literal[BaseModelType.Wan] = Field(default=BaseModelType.Wan) + format: Literal[ModelFormat.GGUFQuantized] = Field(default=ModelFormat.GGUFQuantized) + variant: WanVariantType = Field() + expert: Literal["high", "low", "none"] = Field( + default="none", + description="For Wan 2.2 A14B's dual-expert MoE: 'high' for the high-noise expert, " + "'low' for the low-noise expert. 'none' for single-transformer models (TI2V-5B).", + ) + + @classmethod + def from_model_on_disk(cls, mod: ModelOnDisk, override_fields: dict[str, Any]) -> Self: + raise_if_not_file(mod) + raise_for_override_fields(cls, override_fields) + + sd = mod.load_state_dict() + + if not _has_ggml_tensors(sd): + raise NotAMatchError("state dict does not look like GGUF quantized") + if not _has_wan_keys(sd): + raise NotAMatchError("state dict does not look like a Wan transformer") + + explicit_variant = override_fields.pop("variant", None) + variant = explicit_variant or _detect_wan_gguf_variant(sd) + if variant is None: + raise NotAMatchError("could not determine Wan variant from state dict") + + explicit_expert = override_fields.pop("expert", None) + expert = explicit_expert or _detect_wan_gguf_expert(mod.path.stem) + + return cls(**override_fields, variant=variant, expert=expert) + + class Main_Diffusers_Wan_Config(Diffusers_Config_Base, Main_Config_Base, Config_Base): """Model config for Wan 2.2 diffusers models. diff --git a/invokeai/backend/model_manager/load/model_loaders/vae.py b/invokeai/backend/model_manager/load/model_loaders/vae.py index 75443cd2897..e68413c9af9 100644 --- a/invokeai/backend/model_manager/load/model_loaders/vae.py +++ b/invokeai/backend/model_manager/load/model_loaders/vae.py @@ -11,6 +11,7 @@ VAE_Checkpoint_Config_Base, VAE_Checkpoint_QwenImage_Config, VAE_Checkpoint_Wan_Config, + VAE_Diffusers_Wan_Config, ) from invokeai.backend.model_manager.load.model_loader_registry import ModelLoaderRegistry from invokeai.backend.model_manager.load.model_loaders.generic_diffusers import GenericDiffusersLoader @@ -42,6 +43,8 @@ def _load_model( ) elif isinstance(config, VAE_Checkpoint_Wan_Config): return self._load_wan_vae(config) + elif isinstance(config, VAE_Diffusers_Wan_Config): + return self._load_wan_vae_diffusers(config) elif isinstance(config, VAE_Checkpoint_QwenImage_Config): return self._load_qwen_image_vae(config) elif isinstance(config, VAE_Checkpoint_Config_Base): @@ -80,6 +83,28 @@ def _load_wan_vae(self, config: VAE_Checkpoint_Wan_Config) -> AnyModel: model.eval() return model + def _load_wan_vae_diffusers(self, config: VAE_Diffusers_Wan_Config) -> AnyModel: + """Load a Wan 2.2 VAE from a flat diffusers folder (AutoencoderKLWan). + + The standalone install ``Wan-AI/Wan2.2-T2V-A14B-Diffusers::vae`` lands as a + single-class folder (``config.json`` + ``diffusion_pytorch_model.safetensors``, + no ``model_index.json``). The generic loader rejects this when a + ``submodel_type`` is requested — we always pass ``SubModelType.VAE`` from + the model loader invocation since that's how cached entries are keyed. + Loading ``AutoencoderKLWan`` directly here sidesteps the submodel check. + + Forces bfloat16 (same as ``WanDiffusersModel``) — fp16 is unstable on the + Wan VAE. + """ + import torch + from diffusers.models.autoencoders.autoencoder_kl_wan import AutoencoderKLWan + + return AutoencoderKLWan.from_pretrained( + config.path, + torch_dtype=torch.bfloat16, + local_files_only=True, + ) + def _load_qwen_image_vae(self, config: VAE_Checkpoint_QwenImage_Config) -> AnyModel: """Load a Qwen Image VAE from a single safetensors file. diff --git a/invokeai/backend/model_manager/load/model_loaders/wan.py b/invokeai/backend/model_manager/load/model_loaders/wan.py index 42d8cb3e184..09065129f6d 100644 --- a/invokeai/backend/model_manager/load/model_loaders/wan.py +++ b/invokeai/backend/model_manager/load/model_loaders/wan.py @@ -15,6 +15,7 @@ from invokeai.backend.model_manager.configs.base import Checkpoint_Config_Base, Diffusers_Config_Base from invokeai.backend.model_manager.configs.factory import AnyModelConfig +from invokeai.backend.model_manager.configs.main import Main_GGUF_Wan_Config, _is_native_wan_layout from invokeai.backend.model_manager.load.load_default import ModelLoader from invokeai.backend.model_manager.load.model_loader_registry import ModelLoaderRegistry from invokeai.backend.model_manager.load.model_loaders.generic_diffusers import GenericDiffusersLoader @@ -24,7 +25,12 @@ ModelFormat, ModelType, SubModelType, + WanVariantType, ) +from invokeai.backend.quantization.gguf.ggml_tensor import GGMLTensor +from invokeai.backend.quantization.gguf.loaders import gguf_sd_loader +from invokeai.backend.quantization.gguf.utils import TORCH_COMPATIBLE_QTYPES +from invokeai.backend.util.devices import TorchDevice @ModelLoaderRegistry.register(base=BaseModelType.Wan, type=ModelType.Main, format=ModelFormat.Diffusers) @@ -83,6 +89,208 @@ def _load_model( return result +# Native (upstream) -> Diffusers key rename rules. +# +# Mirrors diffusers.loaders.single_file_utils.convert_wan_transformer_to_diffusers +# (T2V subset; we don't ship VACE / motion / face-adapter conversion). Order +# matters — `cross_attn`/`self_attn` must come before `.q. .k. .v. .o.` so the +# attention blocks are renamed before the projection suffix swap. The norm2/3 +# swap uses a placeholder to avoid collisions during the substring rewrite. +_WAN_NATIVE_TO_DIFFUSERS_RENAMES: tuple[tuple[str, str], ...] = ( + ("time_embedding.0", "condition_embedder.time_embedder.linear_1"), + ("time_embedding.2", "condition_embedder.time_embedder.linear_2"), + ("text_embedding.0", "condition_embedder.text_embedder.linear_1"), + ("text_embedding.2", "condition_embedder.text_embedder.linear_2"), + ("time_projection.1", "condition_embedder.time_proj"), + ("cross_attn", "attn2"), + ("self_attn", "attn1"), + (".o.", ".to_out.0."), + (".q.", ".to_q."), + (".k.", ".to_k."), + (".v.", ".to_v."), + (".k_img.", ".add_k_proj."), + (".v_img.", ".add_v_proj."), + (".norm_k_img.", ".norm_added_k."), + ("head.modulation", "scale_shift_table"), + ("head.head", "proj_out"), + ("modulation", "scale_shift_table"), + ("ffn.0", "ffn.net.0.proj"), + ("ffn.2", "ffn.net.2"), + # norm2 <-> norm3 swap via placeholder + ("norm2", "norm__placeholder"), + ("norm3", "norm2"), + ("norm__placeholder", "norm3"), + # I2V-only keys (harmless on T2V) + ("img_emb.proj.0", "condition_embedder.image_embedder.norm1"), + ("img_emb.proj.1", "condition_embedder.image_embedder.ff.net.0.proj"), + ("img_emb.proj.3", "condition_embedder.image_embedder.ff.net.2"), + ("img_emb.proj.4", "condition_embedder.image_embedder.norm2"), +) + + +def _convert_wan_native_to_diffusers(state_dict: dict) -> dict: + """Rename native upstream Wan keys (ComfyUI / QuantStack) to diffusers names. + + Pure substring replacement — no tensor manipulation — so it's safe to apply + to a dict of GGMLTensors. Returns a new dict; the input is not mutated. + """ + converted: dict = {} + for key, value in state_dict.items(): + if not isinstance(key, str): + converted[key] = value + continue + new_key = key + for needle, replacement in _WAN_NATIVE_TO_DIFFUSERS_RENAMES: + new_key = new_key.replace(needle, replacement) + converted[new_key] = value + return converted + + +def _unwrap_unquantized_to_compute_dtype(state_dict: dict) -> dict: + """Replace non-quantized GGMLTensor entries with plain tensors at compute_dtype. + + Why: QuantStack-style GGUFs store biases (and other small tensors) as F16, + while Wan's ``patch_embedding`` is an ``nn.Conv3d``. ``conv3d`` isn't in + GGMLTensor's dispatch table, so PyTorch reads the wrapper's underlying F16 + storage directly and crashes against bf16 latents + (``Input type (c10::BFloat16) and bias type (c10::Half) should be the same``). + + For compatible qtypes (F16/F32/BF16) we just pre-cast to compute_dtype here — + they're not quantized, there's no benefit to keeping them wrapped, and + unwrapping them sidesteps the missing-op problem entirely. Genuinely + quantized tensors (Q4_K, Q6_K, etc.) stay wrapped — their on-demand + dequantization through the linear/addmm dispatch path still works. + """ + unwrapped: dict = {} + for key, value in state_dict.items(): + if ( + isinstance(value, GGMLTensor) + and value._ggml_quantization_type in TORCH_COMPATIBLE_QTYPES + ): + # GGMLTensor.get_dequantized_tensor() already casts to compute_dtype. + unwrapped[key] = value.get_dequantized_tensor() + else: + unwrapped[key] = value + return unwrapped + + +@ModelLoaderRegistry.register(base=BaseModelType.Wan, type=ModelType.Main, format=ModelFormat.GGUFQuantized) +class WanGGUFCheckpointModel(ModelLoader): + """Loader for GGUF-quantized Wan 2.2 transformer models. + + The community typically distributes Wan A14B as two files (one per expert + — high-noise + low-noise). Each file is loaded independently here; the + pairing happens at the WanModelLoaderInvocation layer. TI2V-5B ships as a + single file. + + Mirrors the QwenImage GGUF loader pattern: ``gguf_sd_loader`` -> strip the + ComfyUI ``model.diffusion_model.`` / ``diffusion_model.`` prefix if present + -> auto-detect arch from state-dict shapes -> ``init_empty_weights`` + + ``load_state_dict(strict=False, assign=True)``. + """ + + def _load_model( + self, + config: AnyModelConfig, + submodel_type: Optional[SubModelType] = None, + ) -> AnyModel: + if not isinstance(config, Main_GGUF_Wan_Config): + raise TypeError(f"Expected Main_GGUF_Wan_Config, got {type(config).__name__}.") + + if submodel_type != SubModelType.Transformer: + raise ValueError( + "Only the Transformer submodel is available from a GGUF Wan checkpoint. " + "Pair with a standalone Wan VAE and Wan T5 encoder for the other components." + ) + + return self._load_from_singlefile(config) + + def _load_from_singlefile(self, config: Main_GGUF_Wan_Config) -> AnyModel: + import accelerate + from diffusers import WanTransformer3DModel + + model_path = Path(config.path) + target_device = TorchDevice.choose_torch_device() + compute_dtype = TorchDevice.choose_bfloat16_safe_dtype(target_device) + + sd = gguf_sd_loader(model_path, compute_dtype=compute_dtype) + + # Strip ComfyUI-style prefixes if present. + for prefix in ("model.diffusion_model.", "diffusion_model."): + if any(isinstance(k, str) and k.startswith(prefix) for k in sd.keys()): + sd = { + (k[len(prefix):] if isinstance(k, str) and k.startswith(prefix) else k): v + for k, v in sd.items() + } + break + + # QuantStack and other community releases ship the native upstream Wan key + # layout (text_embedding.0, self_attn/cross_attn, ffn.0/2, head.head, ...); + # diffusers' WanTransformer3DModel expects condition_embedder.*, attn1/attn2, + # ffn.net.*, proj_out. Convert in place if needed. + if _is_native_wan_layout(sd): + sd = _convert_wan_native_to_diffusers(sd) + + # Pre-cast non-quantized tensors (F16/F32/BF16 biases, scale_shift_table, + # patch_embedding.weight, etc.) to compute_dtype. This avoids dtype + # mismatches in conv3d at the input (patch_embedding is the only Conv3d + # in WanTransformer3DModel; conv3d isn't in GGMLTensor's dispatch table + # so the wrapper's underlying storage dtype reaches PyTorch directly). + sd = _unwrap_unquantized_to_compute_dtype(sd) + + # Auto-detect architecture from the state dict. + num_layers = 0 + for key in sd.keys(): + if isinstance(key, str) and key.startswith("blocks."): + parts = key.split(".") + if len(parts) >= 2: + try: + num_layers = max(num_layers, int(parts[1]) + 1) + except ValueError: + pass + + # Patch embedding gives us in_channels (16=A14B, 48=TI2V-5B) and inner dim. + patch_w = sd.get("patch_embedding.weight") + if patch_w is None: + raise RuntimeError("GGUF state dict missing patch_embedding.weight after prefix strip") + patch_shape = patch_w.tensor_shape if isinstance(patch_w, GGMLTensor) else patch_w.shape + inner_dim = int(patch_shape[0]) + in_channels = int(patch_shape[1]) + + # Wan uses head_dim=128 throughout the family; num_heads = inner_dim / 128. + attention_head_dim = 128 + num_attention_heads = inner_dim // attention_head_dim + + ffn_w = sd.get("blocks.0.ffn.net.0.proj.weight") + if ffn_w is None: + raise RuntimeError("GGUF state dict missing blocks.0.ffn.net.0.proj.weight after prefix strip") + ffn_shape = ffn_w.tensor_shape if isinstance(ffn_w, GGMLTensor) else ffn_w.shape + ffn_dim = int(ffn_shape[0]) + + text_w = sd.get("condition_embedder.text_embedder.linear_1.weight") + text_dim = 4096 + if text_w is not None: + text_shape = text_w.tensor_shape if isinstance(text_w, GGMLTensor) else text_w.shape + text_dim = int(text_shape[1]) + + model_config: dict = { + "patch_size": (1, 2, 2), + "in_channels": in_channels, + "out_channels": in_channels, + "num_layers": num_layers if num_layers > 0 else (40 if config.variant == WanVariantType.T2V_A14B else 30), + "attention_head_dim": attention_head_dim, + "num_attention_heads": num_attention_heads, + "ffn_dim": ffn_dim, + "text_dim": text_dim, + } + + with accelerate.init_empty_weights(): + model = WanTransformer3DModel(**model_config) + + model.load_state_dict(sd, strict=False, assign=True) + return model + + @ModelLoaderRegistry.register( base=BaseModelType.Any, type=ModelType.WanT5Encoder, format=ModelFormat.WanT5Encoder ) diff --git a/invokeai/frontend/web/src/services/api/schema.ts b/invokeai/frontend/web/src/services/api/schema.ts index 22952097c8d..ca979819522 100644 --- a/invokeai/frontend/web/src/services/api/schema.ts +++ b/invokeai/frontend/web/src/services/api/schema.ts @@ -3558,7 +3558,7 @@ export type components = { */ type: "anima_text_encoder"; }; - AnyModelConfig: components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_QwenImage_Config"] | components["schemas"]["Main_Diffusers_Wan_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_Checkpoint_Anima_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_QwenImage_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Checkpoint_Wan_Config"] | components["schemas"]["VAE_Checkpoint_QwenImage_Config"] | components["schemas"]["VAE_Checkpoint_Anima_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["VAE_Diffusers_Wan_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_LyCORIS_QwenImage_Config"] | components["schemas"]["LoRA_LyCORIS_Anima_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["QwenVLEncoder_Diffusers_Config"] | components["schemas"]["QwenVLEncoder_Checkpoint_Config"] | components["schemas"]["WanT5Encoder_WanT5Encoder_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["TextLLM_Diffusers_Config"] | components["schemas"]["ExternalApiModelConfig"] | components["schemas"]["Unknown_Config"]; + AnyModelConfig: components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_QwenImage_Config"] | components["schemas"]["Main_Diffusers_Wan_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_Checkpoint_Anima_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_QwenImage_Config"] | components["schemas"]["Main_GGUF_Wan_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Checkpoint_Wan_Config"] | components["schemas"]["VAE_Checkpoint_QwenImage_Config"] | components["schemas"]["VAE_Checkpoint_Anima_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["VAE_Diffusers_Wan_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_LyCORIS_QwenImage_Config"] | components["schemas"]["LoRA_LyCORIS_Anima_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["QwenVLEncoder_Diffusers_Config"] | components["schemas"]["QwenVLEncoder_Checkpoint_Config"] | components["schemas"]["WanT5Encoder_WanT5Encoder_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["TextLLM_Diffusers_Config"] | components["schemas"]["ExternalApiModelConfig"] | components["schemas"]["Unknown_Config"]; /** * AppVersion * @description App Version Response @@ -21544,6 +21544,106 @@ export type components = { format: "gguf_quantized"; variant: components["schemas"]["QwenImageVariantType"] | null; }; + /** + * Main_GGUF_Wan_Config + * @description Model config for GGUF-quantized Wan 2.2 transformer models. + * + * A14B's MoE ships as two GGUF files (one per expert); ``expert`` records + * which one this is so the model loader invocation can pair them. TI2V-5B + * is a single-transformer model and stores ``expert='none'``. + */ + Main_GGUF_Wan_Config: { + /** + * Key + * @description A unique key for this model. + */ + key: string; + /** + * Hash + * @description The hash of the model file(s). + */ + hash: string; + /** + * Path + * @description Path to the model on the filesystem. Relative paths are relative to the Invoke root directory. + */ + path: string; + /** + * File Size + * @description The size of the model in bytes. + */ + file_size: number; + /** + * Name + * @description Name of the model. + */ + name: string; + /** + * Description + * @description Model description + */ + description: string | null; + /** + * Source + * @description The original source of the model (path, URL or repo_id). + */ + source: string; + /** @description The type of source */ + source_type: components["schemas"]["ModelSourceType"]; + /** + * Source Api Response + * @description The original API response from the source, as stringified JSON. + */ + source_api_response: string | null; + /** + * Source Url + * @description Optional URL for the model (e.g. download page or model page). + */ + source_url: string | null; + /** + * Cover Image + * @description Url for image to preview model + */ + cover_image: string | null; + /** + * Type + * @default main + * @constant + */ + type: "main"; + /** + * Trigger Phrases + * @description Set of trigger phrases for this model + */ + trigger_phrases: string[] | null; + /** @description Default settings for this model */ + default_settings: components["schemas"]["MainModelDefaultSettings"] | null; + /** + * Config Path + * @description Path to the config for this model, if any. + */ + config_path: string | null; + /** + * Base + * @default wan + * @constant + */ + base: "wan"; + /** + * Format + * @default gguf_quantized + * @constant + */ + format: "gguf_quantized"; + variant: components["schemas"]["WanVariantType"]; + /** + * Expert + * @description For Wan 2.2 A14B's dual-expert MoE: 'high' for the high-noise expert, 'low' for the low-noise expert. 'none' for single-transformer models (TI2V-5B). + * @default none + * @enum {string} + */ + expert: "high" | "low" | "none"; + }; /** * Main_GGUF_ZImage_Config * @description Model config for GGUF-quantized Z-Image transformer models. @@ -23461,7 +23561,7 @@ export type components = { * Config * @description The installed model's config */ - config: components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_QwenImage_Config"] | components["schemas"]["Main_Diffusers_Wan_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_Checkpoint_Anima_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_QwenImage_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Checkpoint_Wan_Config"] | components["schemas"]["VAE_Checkpoint_QwenImage_Config"] | components["schemas"]["VAE_Checkpoint_Anima_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["VAE_Diffusers_Wan_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_LyCORIS_QwenImage_Config"] | components["schemas"]["LoRA_LyCORIS_Anima_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["QwenVLEncoder_Diffusers_Config"] | components["schemas"]["QwenVLEncoder_Checkpoint_Config"] | components["schemas"]["WanT5Encoder_WanT5Encoder_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["TextLLM_Diffusers_Config"] | components["schemas"]["ExternalApiModelConfig"] | components["schemas"]["Unknown_Config"]; + config: components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_QwenImage_Config"] | components["schemas"]["Main_Diffusers_Wan_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_Checkpoint_Anima_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_QwenImage_Config"] | components["schemas"]["Main_GGUF_Wan_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Checkpoint_Wan_Config"] | components["schemas"]["VAE_Checkpoint_QwenImage_Config"] | components["schemas"]["VAE_Checkpoint_Anima_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["VAE_Diffusers_Wan_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_LyCORIS_QwenImage_Config"] | components["schemas"]["LoRA_LyCORIS_Anima_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["QwenVLEncoder_Diffusers_Config"] | components["schemas"]["QwenVLEncoder_Checkpoint_Config"] | components["schemas"]["WanT5Encoder_WanT5Encoder_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["TextLLM_Diffusers_Config"] | components["schemas"]["ExternalApiModelConfig"] | components["schemas"]["Unknown_Config"]; }; /** * ModelInstallDownloadProgressEvent @@ -23627,7 +23727,7 @@ export type components = { * Config Out * @description After successful installation, this will hold the configuration object. */ - config_out?: (components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_QwenImage_Config"] | components["schemas"]["Main_Diffusers_Wan_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_Checkpoint_Anima_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_QwenImage_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Checkpoint_Wan_Config"] | components["schemas"]["VAE_Checkpoint_QwenImage_Config"] | components["schemas"]["VAE_Checkpoint_Anima_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["VAE_Diffusers_Wan_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_LyCORIS_QwenImage_Config"] | components["schemas"]["LoRA_LyCORIS_Anima_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["QwenVLEncoder_Diffusers_Config"] | components["schemas"]["QwenVLEncoder_Checkpoint_Config"] | components["schemas"]["WanT5Encoder_WanT5Encoder_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["TextLLM_Diffusers_Config"] | components["schemas"]["ExternalApiModelConfig"] | components["schemas"]["Unknown_Config"]) | null; + config_out?: (components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_QwenImage_Config"] | components["schemas"]["Main_Diffusers_Wan_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_Checkpoint_Anima_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_QwenImage_Config"] | components["schemas"]["Main_GGUF_Wan_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Checkpoint_Wan_Config"] | components["schemas"]["VAE_Checkpoint_QwenImage_Config"] | components["schemas"]["VAE_Checkpoint_Anima_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["VAE_Diffusers_Wan_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_LyCORIS_QwenImage_Config"] | components["schemas"]["LoRA_LyCORIS_Anima_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["QwenVLEncoder_Diffusers_Config"] | components["schemas"]["QwenVLEncoder_Checkpoint_Config"] | components["schemas"]["WanT5Encoder_WanT5Encoder_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["TextLLM_Diffusers_Config"] | components["schemas"]["ExternalApiModelConfig"] | components["schemas"]["Unknown_Config"]) | null; /** * Inplace * @description Leave model in its current location; otherwise install under models directory @@ -23713,7 +23813,7 @@ export type components = { * Config * @description The model's config */ - config: components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_QwenImage_Config"] | components["schemas"]["Main_Diffusers_Wan_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_Checkpoint_Anima_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_QwenImage_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Checkpoint_Wan_Config"] | components["schemas"]["VAE_Checkpoint_QwenImage_Config"] | components["schemas"]["VAE_Checkpoint_Anima_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["VAE_Diffusers_Wan_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_LyCORIS_QwenImage_Config"] | components["schemas"]["LoRA_LyCORIS_Anima_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["QwenVLEncoder_Diffusers_Config"] | components["schemas"]["QwenVLEncoder_Checkpoint_Config"] | components["schemas"]["WanT5Encoder_WanT5Encoder_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["TextLLM_Diffusers_Config"] | components["schemas"]["ExternalApiModelConfig"] | components["schemas"]["Unknown_Config"]; + config: components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_QwenImage_Config"] | components["schemas"]["Main_Diffusers_Wan_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_Checkpoint_Anima_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_QwenImage_Config"] | components["schemas"]["Main_GGUF_Wan_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Checkpoint_Wan_Config"] | components["schemas"]["VAE_Checkpoint_QwenImage_Config"] | components["schemas"]["VAE_Checkpoint_Anima_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["VAE_Diffusers_Wan_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_LyCORIS_QwenImage_Config"] | components["schemas"]["LoRA_LyCORIS_Anima_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["QwenVLEncoder_Diffusers_Config"] | components["schemas"]["QwenVLEncoder_Checkpoint_Config"] | components["schemas"]["WanT5Encoder_WanT5Encoder_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["TextLLM_Diffusers_Config"] | components["schemas"]["ExternalApiModelConfig"] | components["schemas"]["Unknown_Config"]; /** * @description The submodel type, if any * @default null @@ -23734,7 +23834,7 @@ export type components = { * Config * @description The model's config */ - config: components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_QwenImage_Config"] | components["schemas"]["Main_Diffusers_Wan_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_Checkpoint_Anima_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_QwenImage_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Checkpoint_Wan_Config"] | components["schemas"]["VAE_Checkpoint_QwenImage_Config"] | components["schemas"]["VAE_Checkpoint_Anima_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["VAE_Diffusers_Wan_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_LyCORIS_QwenImage_Config"] | components["schemas"]["LoRA_LyCORIS_Anima_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["QwenVLEncoder_Diffusers_Config"] | components["schemas"]["QwenVLEncoder_Checkpoint_Config"] | components["schemas"]["WanT5Encoder_WanT5Encoder_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["TextLLM_Diffusers_Config"] | components["schemas"]["ExternalApiModelConfig"] | components["schemas"]["Unknown_Config"]; + config: components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_QwenImage_Config"] | components["schemas"]["Main_Diffusers_Wan_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_Checkpoint_Anima_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_QwenImage_Config"] | components["schemas"]["Main_GGUF_Wan_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Checkpoint_Wan_Config"] | components["schemas"]["VAE_Checkpoint_QwenImage_Config"] | components["schemas"]["VAE_Checkpoint_Anima_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["VAE_Diffusers_Wan_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_LyCORIS_QwenImage_Config"] | components["schemas"]["LoRA_LyCORIS_Anima_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["QwenVLEncoder_Diffusers_Config"] | components["schemas"]["QwenVLEncoder_Checkpoint_Config"] | components["schemas"]["WanT5Encoder_WanT5Encoder_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["TextLLM_Diffusers_Config"] | components["schemas"]["ExternalApiModelConfig"] | components["schemas"]["Unknown_Config"]; /** * @description The submodel type, if any * @default null @@ -23931,7 +24031,7 @@ export type components = { */ ModelsList: { /** Models */ - models: (components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_QwenImage_Config"] | components["schemas"]["Main_Diffusers_Wan_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_Checkpoint_Anima_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_QwenImage_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Checkpoint_Wan_Config"] | components["schemas"]["VAE_Checkpoint_QwenImage_Config"] | components["schemas"]["VAE_Checkpoint_Anima_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["VAE_Diffusers_Wan_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_LyCORIS_QwenImage_Config"] | components["schemas"]["LoRA_LyCORIS_Anima_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["QwenVLEncoder_Diffusers_Config"] | components["schemas"]["QwenVLEncoder_Checkpoint_Config"] | components["schemas"]["WanT5Encoder_WanT5Encoder_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["TextLLM_Diffusers_Config"] | components["schemas"]["ExternalApiModelConfig"] | components["schemas"]["Unknown_Config"])[]; + models: (components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_QwenImage_Config"] | components["schemas"]["Main_Diffusers_Wan_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_Checkpoint_Anima_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_QwenImage_Config"] | components["schemas"]["Main_GGUF_Wan_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Checkpoint_Wan_Config"] | components["schemas"]["VAE_Checkpoint_QwenImage_Config"] | components["schemas"]["VAE_Checkpoint_Anima_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["VAE_Diffusers_Wan_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_LyCORIS_QwenImage_Config"] | components["schemas"]["LoRA_LyCORIS_Anima_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["QwenVLEncoder_Diffusers_Config"] | components["schemas"]["QwenVLEncoder_Checkpoint_Config"] | components["schemas"]["WanT5Encoder_WanT5Encoder_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["TextLLM_Diffusers_Config"] | components["schemas"]["ExternalApiModelConfig"] | components["schemas"]["Unknown_Config"])[]; }; /** * Multiply Integers @@ -31962,7 +32062,7 @@ export type components = { guidance_scale?: number; /** * Guidance Scale (Low Noise) - * @description Optional separate CFG scale for the low-noise expert (Wan 2.2 A14B only). If unset, the primary 'Guidance Scale' is reused. Ignored for TI2V-5B. + * @description Optional separate CFG scale for the low-noise expert (Wan 2.2 A14B only). Values below 1.0 (including 0) fall back to the primary 'Guidance Scale'. Ignored for TI2V-5B. * @default null */ guidance_scale_low_noise?: number | null; @@ -32105,17 +32205,19 @@ export type components = { * * Components can be mixed and matched, mirroring the Qwen Image loader pattern: * - * - Transformer(s) always come from the main model. For A14B that's both - * ``transformer/`` (high-noise) and ``transformer_2/`` (low-noise); for - * TI2V-5B it's the single ``transformer/``. + * - Transformer(s): + * * Diffusers main: emits ``transformer/`` and (for A14B) ``transformer_2/`` + * from the same model record. + * * GGUF main: emits the single GGUF as the primary transformer; for A14B + * the second-expert GGUF must be wired to ``Transformer (Low Noise)``. * - VAE: standalone Wan VAE > main (if Diffusers) > Component Source (Diffusers). * - UMT5-XXL encoder: standalone Wan T5 encoder > main (if Diffusers) > * Component Source (Diffusers). * * The Component Source slot lets users supply a Diffusers Wan main model purely * for VAE / encoder extraction when the actual transformer is in a single-file - * format (GGUF in Phase 4). Together, the standalone VAE + standalone encoder - * let a GGUF transformer run without a full ~30 GB Diffusers install. + * format. Together, the standalone VAE + standalone encoder let a GGUF + * transformer run without a full ~30 GB Diffusers install. */ WanModelLoaderInvocation: { /** @@ -32140,6 +32242,12 @@ export type components = { * @description Wan 2.2 model (Transformer) to load */ model: components["schemas"]["ModelIdentifierField"]; + /** + * Transformer (Low Noise) + * @description Optional second GGUF transformer for the A14B low-noise expert. Only relevant when the main model is a single-file GGUF and the variant is A14B; ignored when the main is a Diffusers A14B (both experts are pulled from transformer/ and transformer_2/ already) or when the variant is TI2V-5B. + * @default null + */ + transformer_low_noise_model?: components["schemas"]["ModelIdentifierField"] | null; /** * VAE * @description Standalone Wan VAE model. If not set, the VAE is loaded from the main model (when in Diffusers format) or from the Component Source. @@ -34040,7 +34148,7 @@ export interface operations { [name: string]: unknown; }; content: { - "application/json": components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_QwenImage_Config"] | components["schemas"]["Main_Diffusers_Wan_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_Checkpoint_Anima_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_QwenImage_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Checkpoint_Wan_Config"] | components["schemas"]["VAE_Checkpoint_QwenImage_Config"] | components["schemas"]["VAE_Checkpoint_Anima_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["VAE_Diffusers_Wan_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_LyCORIS_QwenImage_Config"] | components["schemas"]["LoRA_LyCORIS_Anima_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["QwenVLEncoder_Diffusers_Config"] | components["schemas"]["QwenVLEncoder_Checkpoint_Config"] | components["schemas"]["WanT5Encoder_WanT5Encoder_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["TextLLM_Diffusers_Config"] | components["schemas"]["ExternalApiModelConfig"] | components["schemas"]["Unknown_Config"]; + "application/json": components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_QwenImage_Config"] | components["schemas"]["Main_Diffusers_Wan_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_Checkpoint_Anima_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_QwenImage_Config"] | components["schemas"]["Main_GGUF_Wan_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Checkpoint_Wan_Config"] | components["schemas"]["VAE_Checkpoint_QwenImage_Config"] | components["schemas"]["VAE_Checkpoint_Anima_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["VAE_Diffusers_Wan_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_LyCORIS_QwenImage_Config"] | components["schemas"]["LoRA_LyCORIS_Anima_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["QwenVLEncoder_Diffusers_Config"] | components["schemas"]["QwenVLEncoder_Checkpoint_Config"] | components["schemas"]["WanT5Encoder_WanT5Encoder_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["TextLLM_Diffusers_Config"] | components["schemas"]["ExternalApiModelConfig"] | components["schemas"]["Unknown_Config"]; }; }; /** @description Validation Error */ @@ -34072,7 +34180,7 @@ export interface operations { [name: string]: unknown; }; content: { - "application/json": components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_QwenImage_Config"] | components["schemas"]["Main_Diffusers_Wan_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_Checkpoint_Anima_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_QwenImage_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Checkpoint_Wan_Config"] | components["schemas"]["VAE_Checkpoint_QwenImage_Config"] | components["schemas"]["VAE_Checkpoint_Anima_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["VAE_Diffusers_Wan_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_LyCORIS_QwenImage_Config"] | components["schemas"]["LoRA_LyCORIS_Anima_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["QwenVLEncoder_Diffusers_Config"] | components["schemas"]["QwenVLEncoder_Checkpoint_Config"] | components["schemas"]["WanT5Encoder_WanT5Encoder_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["TextLLM_Diffusers_Config"] | components["schemas"]["ExternalApiModelConfig"] | components["schemas"]["Unknown_Config"]; + "application/json": components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_QwenImage_Config"] | components["schemas"]["Main_Diffusers_Wan_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_Checkpoint_Anima_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_QwenImage_Config"] | components["schemas"]["Main_GGUF_Wan_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Checkpoint_Wan_Config"] | components["schemas"]["VAE_Checkpoint_QwenImage_Config"] | components["schemas"]["VAE_Checkpoint_Anima_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["VAE_Diffusers_Wan_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_LyCORIS_QwenImage_Config"] | components["schemas"]["LoRA_LyCORIS_Anima_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["QwenVLEncoder_Diffusers_Config"] | components["schemas"]["QwenVLEncoder_Checkpoint_Config"] | components["schemas"]["WanT5Encoder_WanT5Encoder_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["TextLLM_Diffusers_Config"] | components["schemas"]["ExternalApiModelConfig"] | components["schemas"]["Unknown_Config"]; }; }; /** @description Validation Error */ @@ -34122,7 +34230,7 @@ export interface operations { * "repo_variant": "fp16", * "upcast_attention": false * } */ - "application/json": components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_QwenImage_Config"] | components["schemas"]["Main_Diffusers_Wan_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_Checkpoint_Anima_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_QwenImage_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Checkpoint_Wan_Config"] | components["schemas"]["VAE_Checkpoint_QwenImage_Config"] | components["schemas"]["VAE_Checkpoint_Anima_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["VAE_Diffusers_Wan_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_LyCORIS_QwenImage_Config"] | components["schemas"]["LoRA_LyCORIS_Anima_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["QwenVLEncoder_Diffusers_Config"] | components["schemas"]["QwenVLEncoder_Checkpoint_Config"] | components["schemas"]["WanT5Encoder_WanT5Encoder_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["TextLLM_Diffusers_Config"] | components["schemas"]["ExternalApiModelConfig"] | components["schemas"]["Unknown_Config"]; + "application/json": components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_QwenImage_Config"] | components["schemas"]["Main_Diffusers_Wan_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_Checkpoint_Anima_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_QwenImage_Config"] | components["schemas"]["Main_GGUF_Wan_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Checkpoint_Wan_Config"] | components["schemas"]["VAE_Checkpoint_QwenImage_Config"] | components["schemas"]["VAE_Checkpoint_Anima_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["VAE_Diffusers_Wan_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_LyCORIS_QwenImage_Config"] | components["schemas"]["LoRA_LyCORIS_Anima_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["QwenVLEncoder_Diffusers_Config"] | components["schemas"]["QwenVLEncoder_Checkpoint_Config"] | components["schemas"]["WanT5Encoder_WanT5Encoder_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["TextLLM_Diffusers_Config"] | components["schemas"]["ExternalApiModelConfig"] | components["schemas"]["Unknown_Config"]; }; }; /** @description Bad request */ @@ -34227,7 +34335,7 @@ export interface operations { * "repo_variant": "fp16", * "upcast_attention": false * } */ - "application/json": components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_QwenImage_Config"] | components["schemas"]["Main_Diffusers_Wan_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_Checkpoint_Anima_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_QwenImage_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Checkpoint_Wan_Config"] | components["schemas"]["VAE_Checkpoint_QwenImage_Config"] | components["schemas"]["VAE_Checkpoint_Anima_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["VAE_Diffusers_Wan_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_LyCORIS_QwenImage_Config"] | components["schemas"]["LoRA_LyCORIS_Anima_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["QwenVLEncoder_Diffusers_Config"] | components["schemas"]["QwenVLEncoder_Checkpoint_Config"] | components["schemas"]["WanT5Encoder_WanT5Encoder_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["TextLLM_Diffusers_Config"] | components["schemas"]["ExternalApiModelConfig"] | components["schemas"]["Unknown_Config"]; + "application/json": components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_QwenImage_Config"] | components["schemas"]["Main_Diffusers_Wan_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_Checkpoint_Anima_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_QwenImage_Config"] | components["schemas"]["Main_GGUF_Wan_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Checkpoint_Wan_Config"] | components["schemas"]["VAE_Checkpoint_QwenImage_Config"] | components["schemas"]["VAE_Checkpoint_Anima_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["VAE_Diffusers_Wan_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_LyCORIS_QwenImage_Config"] | components["schemas"]["LoRA_LyCORIS_Anima_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["QwenVLEncoder_Diffusers_Config"] | components["schemas"]["QwenVLEncoder_Checkpoint_Config"] | components["schemas"]["WanT5Encoder_WanT5Encoder_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["TextLLM_Diffusers_Config"] | components["schemas"]["ExternalApiModelConfig"] | components["schemas"]["Unknown_Config"]; }; }; /** @description Bad request */ @@ -34298,7 +34406,7 @@ export interface operations { * "repo_variant": "fp16", * "upcast_attention": false * } */ - "application/json": components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_QwenImage_Config"] | components["schemas"]["Main_Diffusers_Wan_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_Checkpoint_Anima_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_QwenImage_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Checkpoint_Wan_Config"] | components["schemas"]["VAE_Checkpoint_QwenImage_Config"] | components["schemas"]["VAE_Checkpoint_Anima_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["VAE_Diffusers_Wan_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_LyCORIS_QwenImage_Config"] | components["schemas"]["LoRA_LyCORIS_Anima_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["QwenVLEncoder_Diffusers_Config"] | components["schemas"]["QwenVLEncoder_Checkpoint_Config"] | components["schemas"]["WanT5Encoder_WanT5Encoder_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["TextLLM_Diffusers_Config"] | components["schemas"]["ExternalApiModelConfig"] | components["schemas"]["Unknown_Config"]; + "application/json": components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_QwenImage_Config"] | components["schemas"]["Main_Diffusers_Wan_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_Checkpoint_Anima_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_QwenImage_Config"] | components["schemas"]["Main_GGUF_Wan_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Checkpoint_Wan_Config"] | components["schemas"]["VAE_Checkpoint_QwenImage_Config"] | components["schemas"]["VAE_Checkpoint_Anima_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["VAE_Diffusers_Wan_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_LyCORIS_QwenImage_Config"] | components["schemas"]["LoRA_LyCORIS_Anima_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["QwenVLEncoder_Diffusers_Config"] | components["schemas"]["QwenVLEncoder_Checkpoint_Config"] | components["schemas"]["WanT5Encoder_WanT5Encoder_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["TextLLM_Diffusers_Config"] | components["schemas"]["ExternalApiModelConfig"] | components["schemas"]["Unknown_Config"]; }; }; /** @description Bad request */ @@ -35031,7 +35139,7 @@ export interface operations { * "repo_variant": "fp16", * "upcast_attention": false * } */ - "application/json": components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_QwenImage_Config"] | components["schemas"]["Main_Diffusers_Wan_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_Checkpoint_Anima_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_QwenImage_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Checkpoint_Wan_Config"] | components["schemas"]["VAE_Checkpoint_QwenImage_Config"] | components["schemas"]["VAE_Checkpoint_Anima_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["VAE_Diffusers_Wan_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_LyCORIS_QwenImage_Config"] | components["schemas"]["LoRA_LyCORIS_Anima_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["QwenVLEncoder_Diffusers_Config"] | components["schemas"]["QwenVLEncoder_Checkpoint_Config"] | components["schemas"]["WanT5Encoder_WanT5Encoder_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["TextLLM_Diffusers_Config"] | components["schemas"]["ExternalApiModelConfig"] | components["schemas"]["Unknown_Config"]; + "application/json": components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_QwenImage_Config"] | components["schemas"]["Main_Diffusers_Wan_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_Checkpoint_Anima_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_QwenImage_Config"] | components["schemas"]["Main_GGUF_Wan_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Checkpoint_Wan_Config"] | components["schemas"]["VAE_Checkpoint_QwenImage_Config"] | components["schemas"]["VAE_Checkpoint_Anima_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["VAE_Diffusers_Wan_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_LyCORIS_QwenImage_Config"] | components["schemas"]["LoRA_LyCORIS_Anima_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["QwenVLEncoder_Diffusers_Config"] | components["schemas"]["QwenVLEncoder_Checkpoint_Config"] | components["schemas"]["WanT5Encoder_WanT5Encoder_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["TextLLM_Diffusers_Config"] | components["schemas"]["ExternalApiModelConfig"] | components["schemas"]["Unknown_Config"]; }; }; /** @description Bad request */ diff --git a/tests/backend/model_manager/configs/test_wan_gguf_config.py b/tests/backend/model_manager/configs/test_wan_gguf_config.py new file mode 100644 index 00000000000..ca8ef671844 --- /dev/null +++ b/tests/backend/model_manager/configs/test_wan_gguf_config.py @@ -0,0 +1,242 @@ +"""Tests for the GGUF Wan probe (Main_GGUF_Wan_Config).""" + +from pathlib import Path +from tempfile import TemporaryDirectory +from unittest.mock import MagicMock + +import gguf +import pytest +import torch + +from invokeai.backend.model_manager.configs.identification_utils import NotAMatchError +from invokeai.backend.model_manager.configs.main import ( + Main_GGUF_Wan_Config, + _detect_wan_gguf_expert, + _detect_wan_gguf_variant, + _has_wan_keys, + _is_native_wan_layout, +) +from invokeai.backend.model_manager.taxonomy import BaseModelType, ModelFormat, WanVariantType +from invokeai.backend.quantization.gguf.ggml_tensor import GGMLTensor + + +def _ggml(shape: tuple[int, ...]) -> GGMLTensor: + return GGMLTensor( + data=torch.zeros((1,), dtype=torch.uint8), + ggml_quantization_type=gguf.GGMLQuantizationType.Q4_0, + tensor_shape=torch.Size(shape), + compute_dtype=torch.float32, + ) + + +def _wan_a14b_state_dict(prefix: str = "") -> dict: + """Synthetic Wan A14B GGUF state dict (16-channel patch embed).""" + return { + f"{prefix}patch_embedding.weight": _ggml((5120, 16, 1, 2, 2)), + f"{prefix}condition_embedder.text_embedder.linear_1.weight": _ggml((5120, 4096)), + f"{prefix}blocks.0.attn1.to_q.weight": _ggml((5120, 5120)), + f"{prefix}blocks.0.ffn.net.0.proj.weight": _ggml((13824, 5120)), + } + + +def _wan_ti2v_state_dict() -> dict: + """Synthetic Wan TI2V-5B GGUF state dict (48-channel patch embed).""" + return { + "patch_embedding.weight": _ggml((3072, 48, 1, 2, 2)), + "condition_embedder.text_embedder.linear_1.weight": _ggml((3072, 4096)), + "blocks.0.attn1.to_q.weight": _ggml((3072, 3072)), + "blocks.0.ffn.net.0.proj.weight": _ggml((14336, 3072)), + } + + +def _wan_a14b_native_state_dict() -> dict: + """Synthetic Wan A14B GGUF state dict using the native upstream key layout + (text_embedding/self_attn/cross_attn/ffn.0 — what QuantStack and ComfyUI ship).""" + return { + "patch_embedding.weight": _ggml((5120, 16, 1, 2, 2)), + "text_embedding.0.weight": _ggml((5120, 4096)), + "text_embedding.2.weight": _ggml((5120, 5120)), + "blocks.0.self_attn.q.weight": _ggml((5120, 5120)), + "blocks.0.cross_attn.q.weight": _ggml((5120, 5120)), + "blocks.0.ffn.0.weight": _ggml((13824, 5120)), + "blocks.0.modulation": _ggml((1, 6, 5120)), + "head.head.weight": _ggml((64, 5120)), + "head.modulation": _ggml((1, 2, 5120)), + } + + +def _build_overrides(model_path: Path, name: str) -> dict: + return { + "hash": "test-hash", + "path": str(model_path), + "file_size": 0, + "name": name, + "source": str(model_path), + "source_type": "path", + } + + +def _make_mod(path: Path, sd: dict) -> MagicMock: + mod = MagicMock() + mod.path = path + mod.load_state_dict.return_value = sd + return mod + + +class TestKeyFingerprint: + def test_recognises_bare_keys(self): + assert _has_wan_keys(_wan_ti2v_state_dict()) is True + + def test_recognises_comfyui_prefix(self): + assert _has_wan_keys(_wan_a14b_state_dict(prefix="model.diffusion_model.")) is True + + def test_recognises_diffusion_model_prefix(self): + assert _has_wan_keys(_wan_a14b_state_dict(prefix="diffusion_model.")) is True + + def test_recognises_native_upstream_layout(self): + assert _has_wan_keys(_wan_a14b_native_state_dict()) is True + + def test_rejects_qwen_image(self): + sd = {"txt_in.weight": _ggml((1, 1)), "img_in.weight": _ggml((1, 1))} + assert _has_wan_keys(sd) is False + + def test_rejects_flux(self): + sd = {"double_blocks.0.img_attn.proj.weight": _ggml((1, 1))} + assert _has_wan_keys(sd) is False + + +class TestNativeLayoutDetection: + def test_native_a14b(self): + assert _is_native_wan_layout(_wan_a14b_native_state_dict()) is True + + def test_diffusers_a14b_is_not_native(self): + assert _is_native_wan_layout(_wan_a14b_state_dict()) is False + + def test_diffusers_ti2v_is_not_native(self): + assert _is_native_wan_layout(_wan_ti2v_state_dict()) is False + + +class TestVariantDetection: + def test_a14b_from_16ch(self): + sd = _wan_a14b_state_dict() + assert _detect_wan_gguf_variant(sd) == WanVariantType.T2V_A14B + + def test_ti2v_from_48ch(self): + sd = _wan_ti2v_state_dict() + assert _detect_wan_gguf_variant(sd) == WanVariantType.TI2V_5B + + def test_unknown_channel_count_returns_none(self): + sd = {"patch_embedding.weight": _ggml((1, 32, 1, 2, 2))} + assert _detect_wan_gguf_variant(sd) is None + + def test_missing_patch_embedding_returns_none(self): + sd = {"blocks.0.attn1.to_q.weight": _ggml((1, 1))} + assert _detect_wan_gguf_variant(sd) is None + + +class TestExpertFilenameHeuristic: + @pytest.mark.parametrize( + "name, expected", + [ + ("wan2.2-t2v-a14b-high_noise-Q4_K_M", "high"), + ("Wan2.2-T2V-A14B-High-Noise-Q4_K_M", "high"), + ("wan_a14b_highnoise_q4", "high"), + ("wan2.2-t2v-a14b-low_noise-Q4_K_M", "low"), + ("Wan2.2-A14B-LowNoise-Q4", "low"), + ("wan2.2-ti2v-5b-Q4_K_M", "none"), + ("wan-A14B-flagship", "none"), + ], + ) + def test_filename_heuristic(self, name: str, expected: str): + assert _detect_wan_gguf_expert(name) == expected + + +class TestProbe: + def test_a14b_high_noise_filename(self): + with TemporaryDirectory() as tmp: + f = Path(tmp) / "wan2.2-t2v-a14b-high_noise-Q4_K_M.gguf" + f.touch() + + cfg = Main_GGUF_Wan_Config.from_model_on_disk( + _make_mod(f, _wan_a14b_state_dict()), + _build_overrides(f, "Wan A14B (high)"), + ) + assert cfg.base == BaseModelType.Wan + assert cfg.format == ModelFormat.GGUFQuantized + assert cfg.variant == WanVariantType.T2V_A14B + assert cfg.expert == "high" + + def test_a14b_low_noise_filename(self): + with TemporaryDirectory() as tmp: + f = Path(tmp) / "wan2.2-t2v-a14b-low_noise-Q4_K_M.gguf" + f.touch() + + cfg = Main_GGUF_Wan_Config.from_model_on_disk( + _make_mod(f, _wan_a14b_state_dict()), + _build_overrides(f, "Wan A14B (low)"), + ) + assert cfg.expert == "low" + + def test_ti2v_5b_unambiguous(self): + with TemporaryDirectory() as tmp: + f = Path(tmp) / "wan2.2-ti2v-5b-Q4_K_M.gguf" + f.touch() + + cfg = Main_GGUF_Wan_Config.from_model_on_disk( + _make_mod(f, _wan_ti2v_state_dict()), + _build_overrides(f, "Wan TI2V-5B"), + ) + assert cfg.variant == WanVariantType.TI2V_5B + assert cfg.expert == "none" + + def test_rejects_non_gguf(self): + with TemporaryDirectory() as tmp: + f = Path(tmp) / "wan-a14b.safetensors" + f.touch() + sd = {"patch_embedding.weight": torch.zeros(5120, 16, 1, 2, 2)} # NOT a GGMLTensor + + with pytest.raises(NotAMatchError, match="GGUF"): + Main_GGUF_Wan_Config.from_model_on_disk( + _make_mod(f, sd), + _build_overrides(f, "non-gguf"), + ) + + def test_rejects_unrecognised_state_dict(self): + with TemporaryDirectory() as tmp: + f = Path(tmp) / "junk.gguf" + f.touch() + sd = {"random.key": _ggml((1, 1))} + + with pytest.raises(NotAMatchError, match="Wan transformer"): + Main_GGUF_Wan_Config.from_model_on_disk( + _make_mod(f, sd), + _build_overrides(f, "junk"), + ) + + def test_native_upstream_a14b_high_noise(self): + """QuantStack-style GGUF: native upstream keys + HighNoise filename.""" + with TemporaryDirectory() as tmp: + f = Path(tmp) / "Wan2.2-T2V-A14B-HighNoise-Q4_K_M.gguf" + f.touch() + + cfg = Main_GGUF_Wan_Config.from_model_on_disk( + _make_mod(f, _wan_a14b_native_state_dict()), + _build_overrides(f, "Wan A14B QuantStack (high)"), + ) + assert cfg.base == BaseModelType.Wan + assert cfg.format == ModelFormat.GGUFQuantized + assert cfg.variant == WanVariantType.T2V_A14B + assert cfg.expert == "high" + + def test_explicit_expert_override(self): + with TemporaryDirectory() as tmp: + f = Path(tmp) / "wan-a14b-flagship.gguf" + f.touch() + overrides = _build_overrides(f, "user-tagged") + overrides["expert"] = "low" + + cfg = Main_GGUF_Wan_Config.from_model_on_disk( + _make_mod(f, _wan_a14b_state_dict()), + overrides, + ) + assert cfg.expert == "low" diff --git a/tests/backend/model_manager/load/test_wan_loader.py b/tests/backend/model_manager/load/test_wan_loader.py new file mode 100644 index 00000000000..31d30522446 --- /dev/null +++ b/tests/backend/model_manager/load/test_wan_loader.py @@ -0,0 +1,175 @@ +"""Tests for Wan loader helpers (native -> diffusers key conversion).""" + +import gguf +import torch + +from invokeai.backend.model_manager.load.model_loaders.wan import ( + _convert_wan_native_to_diffusers, + _unwrap_unquantized_to_compute_dtype, +) +from invokeai.backend.quantization.gguf.ggml_tensor import GGMLTensor + + +def test_converts_text_and_time_embedders(): + sd = { + "text_embedding.0.weight": "a", + "text_embedding.0.bias": "b", + "text_embedding.2.weight": "c", + "time_embedding.0.weight": "d", + "time_embedding.2.weight": "e", + "time_projection.1.weight": "f", + } + out = _convert_wan_native_to_diffusers(sd) + assert "condition_embedder.text_embedder.linear_1.weight" in out + assert "condition_embedder.text_embedder.linear_1.bias" in out + assert "condition_embedder.text_embedder.linear_2.weight" in out + assert "condition_embedder.time_embedder.linear_1.weight" in out + assert "condition_embedder.time_embedder.linear_2.weight" in out + assert "condition_embedder.time_proj.weight" in out + + +def test_converts_attention_blocks(): + sd = { + "blocks.0.self_attn.q.weight": 1, + "blocks.0.self_attn.k.weight": 2, + "blocks.0.self_attn.v.weight": 3, + "blocks.0.self_attn.o.weight": 4, + "blocks.0.self_attn.norm_q.weight": 5, + "blocks.0.self_attn.norm_k.weight": 6, + "blocks.0.cross_attn.q.weight": 7, + "blocks.0.cross_attn.k.weight": 8, + "blocks.0.cross_attn.v.weight": 9, + "blocks.0.cross_attn.o.weight": 10, + } + out = _convert_wan_native_to_diffusers(sd) + assert "blocks.0.attn1.to_q.weight" in out + assert "blocks.0.attn1.to_k.weight" in out + assert "blocks.0.attn1.to_v.weight" in out + assert "blocks.0.attn1.to_out.0.weight" in out + assert "blocks.0.attn1.norm_q.weight" in out + assert "blocks.0.attn1.norm_k.weight" in out + assert "blocks.0.attn2.to_q.weight" in out + assert "blocks.0.attn2.to_out.0.weight" in out + + +def test_converts_ffn_and_modulation(): + sd = { + "blocks.0.ffn.0.weight": 1, + "blocks.0.ffn.0.bias": 2, + "blocks.0.ffn.2.weight": 3, + "blocks.0.modulation": 4, + } + out = _convert_wan_native_to_diffusers(sd) + assert "blocks.0.ffn.net.0.proj.weight" in out + assert "blocks.0.ffn.net.0.proj.bias" in out + assert "blocks.0.ffn.net.2.weight" in out + assert "blocks.0.scale_shift_table" in out + + +def test_swaps_norm2_and_norm3(): + """Native norm3 has params (cross-attn norm in diffusers norm2 slot) + while native norm2 is the elementwise-affine-False norm. The swap + via placeholder must not collide.""" + sd = { + "blocks.0.norm2.weight": "native_norm2", + "blocks.0.norm3.weight": "native_norm3", + } + out = _convert_wan_native_to_diffusers(sd) + assert out["blocks.0.norm3.weight"] == "native_norm2" + assert out["blocks.0.norm2.weight"] == "native_norm3" + + +def test_converts_head_keys(): + sd = { + "head.head.weight": 1, + "head.head.bias": 2, + "head.modulation": 3, + } + out = _convert_wan_native_to_diffusers(sd) + assert "proj_out.weight" in out + assert "proj_out.bias" in out + assert "scale_shift_table" in out + + +def test_diffusers_keys_pass_through_unchanged(): + """If a state dict is already in diffusers form, the substring rules + must be no-ops — none of the native fingerprints are present.""" + sd = { + "patch_embedding.weight": 1, + "condition_embedder.text_embedder.linear_1.weight": 2, + "blocks.0.attn1.to_q.weight": 3, + "blocks.0.ffn.net.0.proj.weight": 4, + "scale_shift_table": 5, + "proj_out.weight": 6, + } + out = _convert_wan_native_to_diffusers(sd) + assert set(out.keys()) == set(sd.keys()) + assert all(out[k] == sd[k] for k in sd) + + +def test_does_not_mutate_input(): + sd = {"text_embedding.0.weight": 1} + snapshot = dict(sd) + _convert_wan_native_to_diffusers(sd) + assert sd == snapshot + + +def test_non_string_keys_pass_through(): + sd = {0: "ignored", "text_embedding.0.weight": "renamed"} + out = _convert_wan_native_to_diffusers(sd) + assert out[0] == "ignored" + assert "condition_embedder.text_embedder.linear_1.weight" in out + + +def _ggml(data: torch.Tensor, qtype: gguf.GGMLQuantizationType, compute_dtype: torch.dtype) -> GGMLTensor: + return GGMLTensor( + data=data, + ggml_quantization_type=qtype, + tensor_shape=data.shape, + compute_dtype=compute_dtype, + ) + + +class TestUnwrapUnquantized: + """The QuantStack GGUFs store ``patch_embedding.bias`` as F16 while latents + flow through the model as bf16. Conv3d isn't in GGMLTensor's dispatch table, + so without unwrapping the F16 wrapper goes into conv3d as-is and crashes + with ``Input type (c10::BFloat16) and bias type (c10::Half) should be the same``. + These tests guard the unwrap step that prevents that.""" + + def test_f16_compatible_qtype_is_unwrapped_and_cast(self): + # F16 storage that should become bf16 plain tensor. + f16_data = torch.zeros((4,), dtype=torch.float16) + sd = {"bias": _ggml(f16_data, gguf.GGMLQuantizationType.F16, torch.bfloat16)} + out = _unwrap_unquantized_to_compute_dtype(sd) + + result = out["bias"] + assert not isinstance(result, GGMLTensor) + assert result.dtype == torch.bfloat16 + + def test_f32_compatible_qtype_is_unwrapped_and_cast(self): + # patch_embedding.weight in QuantStack is F32 — same path. + f32_data = torch.zeros((4,), dtype=torch.float32) + sd = {"weight": _ggml(f32_data, gguf.GGMLQuantizationType.F32, torch.bfloat16)} + out = _unwrap_unquantized_to_compute_dtype(sd) + + result = out["weight"] + assert not isinstance(result, GGMLTensor) + assert result.dtype == torch.bfloat16 + + def test_quantized_tensor_stays_wrapped(self): + # Q4_K and friends must remain GGMLTensor so on-demand dequant works + # via the linear/addmm dispatch path. The byte storage shape is fake + # but irrelevant for this test. + q4_data = torch.zeros((1,), dtype=torch.uint8) + sd = {"linear.weight": _ggml(q4_data, gguf.GGMLQuantizationType.Q4_K, torch.bfloat16)} + out = _unwrap_unquantized_to_compute_dtype(sd) + + assert isinstance(out["linear.weight"], GGMLTensor) + assert out["linear.weight"]._ggml_quantization_type == gguf.GGMLQuantizationType.Q4_K + + def test_plain_torch_tensor_passes_through(self): + plain = torch.zeros((4,), dtype=torch.bfloat16) + sd = {"plain": plain} + out = _unwrap_unquantized_to_compute_dtype(sd) + assert out["plain"] is plain From 11b7eb307ce94952055e0c2c063e6c1ffdfcba61 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Sun, 10 May 2026 18:42:51 -0400 Subject: [PATCH 04/12] feat(model): Wan 2.2 Phase 5 - LoRA support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Probe + config (LoRA_LyCORIS_Wan_Config): - Detects Wan LoRAs in three layouts: diffusers PEFT, native upstream PEFT (ComfyUI), and Kohya (both naming variants). - Anti-pattern guards prevent collisions with Anima (Cosmos DiT q_proj convention), QwenImage (transformer_blocks), Flux (double/single blocks), and Z-Image (diffusion_model.layers). - Optional ``expert: "high" | "low" | None`` field; auto-detected from filename (high_noise / low_noise / hyphenated / concatenated variants). Key conversion (wan_lora_conversion_utils): - Native upstream keys (self_attn/cross_attn, ffn.0/2) -> diffusers (attn1/attn2, ffn.net.0.proj / ffn.net.2). - Strips ``transformer.``, ``diffusion_model.``, ``base_model.model.transformer.`` prefixes from PEFT-style keys. - Kohya layer names mapped through an explicit longest-match table. - Output paths use diffusers naming so the LayerPatcher can resolve them against WanTransformer3DModel parameter paths. Loader integration: - Adds BaseModelType.Wan branch to LoRALoader._load_model. Invocation nodes (wan_lora_loader.py): - WanLoRALoaderInvocation: single LoRA with auto/both/high/low target field. - WanLoRACollectionLoader: list of LoRAs, auto-routed by each LoRA's recorded expert tag. - Output WanLoRALoaderOutput carries the WanTransformerField with updated ``loras`` / ``loras_low_noise`` lists. Denoise integration: - _ExpertSwapper now manages both the model_on_device context and the LayerPatcher.apply_smart_model_patches context per expert. LoRA patches are entered after device load and exited before device release, with fresh iterators per swap. - GGUF (quantized) experts request sidecar patching so GGMLTensor weights aren't touched directly. - Low-noise expert falls back to the primary loras list when ``loras_low_noise`` is empty (matches WanTransformerField semantics). Tests: 81 new tests covering probe accept/reject across formats, anti-pattern guards on competing architectures, converter round-trips for all three layouts, invocation target resolution + routing + duplicate guards, and the _ExpertSwapper lifecycle (lora context opens/closes in the right order around the device swap, quantized flag forwards, no-LoRA path skips the patch context, re-entering the same label is a no-op). Co-Authored-By: Claude Opus 4.7 (1M context) fix(wan): probe Wan LoRA before Anima in the config union Native-PEFT Wan LoRAs (lightx2v's Lightning, most ComfyUI-trained Wan LoRAs) carry keys like ``diffusion_model.blocks.X.cross_attn.k.lora_A.weight``. Anima's probe matches on the bare ``cross_attn``/``self_attn`` substring — it does not require the Anima-specific ``_proj`` suffix nor any of the ``mlp``/``adaln_modulation`` Cosmos DiT markers — so these Wan LoRAs were classified as ``BaseModelType.Anima`` because Anima happened to run first. Reorder the LyCORIS section of ``AnyModelConfig`` so Wan probes first. Wan's probe is strictly more restrictive (it rejects Anima's ``_proj`` attention suffix via the anti-pattern guard added in the previous commit), so Anima LoRAs are still correctly classified after this reorder. Existing users with mis-tagged installs need to delete the affected LoRA records and reinstall. Adds two regression tests: a union-ordering assertion, and a sanity check that demonstrates Anima's probe *would* match Wan native keys if asked directly — pinning the constraint that motivates the ordering. Co-Authored-By: Claude Opus 4.7 (1M context) chore(i18n): add Wan2.2 T5 Encoder model-manager label The frontend source already references ``modelManager.wanT5Encoder``; the locale key was added with a casing typo (``want5Encoder``). Fix the key so the Wan T5 Encoder model type renders its display name correctly in the model manager UI. Co-Authored-By: Claude Opus 4.7 (1M context) --- invokeai/app/invocations/wan_denoise.py | 131 ++++++- invokeai/app/invocations/wan_lora_loader.py | 189 ++++++++++ .../backend/model_manager/configs/factory.py | 9 + .../backend/model_manager/configs/lora.py | 84 +++++ .../model_manager/load/model_loaders/lora.py | 5 + .../lora_conversions/wan_lora_constants.py | 115 ++++++ .../wan_lora_conversion_utils.py | 255 +++++++++++++ invokeai/frontend/web/public/locales/en.json | 1 + .../invocations/test_wan_expert_swapper.py | 270 ++++++++++++++ tests/app/invocations/test_wan_lora_loader.py | 244 +++++++++++++ .../configs/test_wan_lora_config.py | 344 ++++++++++++++++++ .../test_wan_lora_conversion_utils.py | 201 ++++++++++ 12 files changed, 1830 insertions(+), 18 deletions(-) create mode 100644 invokeai/app/invocations/wan_lora_loader.py create mode 100644 invokeai/backend/patches/lora_conversions/wan_lora_constants.py create mode 100644 invokeai/backend/patches/lora_conversions/wan_lora_conversion_utils.py create mode 100644 tests/app/invocations/test_wan_expert_swapper.py create mode 100644 tests/app/invocations/test_wan_lora_loader.py create mode 100644 tests/backend/model_manager/configs/test_wan_lora_config.py create mode 100644 tests/backend/patches/lora_conversions/test_wan_lora_conversion_utils.py diff --git a/invokeai/app/invocations/wan_denoise.py b/invokeai/app/invocations/wan_denoise.py index 787b0674718..0a95ae08a17 100644 --- a/invokeai/app/invocations/wan_denoise.py +++ b/invokeai/app/invocations/wan_denoise.py @@ -24,7 +24,7 @@ from contextlib import ExitStack from pathlib import Path -from typing import Any, Callable, Iterator, Optional +from typing import Any, Callable, Iterable, Iterator, Optional, Tuple import torch import torchvision.transforms as tv_transforms @@ -40,16 +40,25 @@ LatentsField, WanConditioningField, ) -from invokeai.app.invocations.model import WanTransformerField +from invokeai.app.invocations.model import LoRAField, WanTransformerField from invokeai.app.invocations.primitives import LatentsOutput from invokeai.app.services.shared.invocation_context import InvocationContext -from invokeai.backend.model_manager.taxonomy import BaseModelType, WanVariantType +from invokeai.backend.model_manager.taxonomy import BaseModelType, ModelFormat, WanVariantType +from invokeai.backend.patches.layer_patcher import LayerPatcher +from invokeai.backend.patches.lora_conversions.wan_lora_constants import WAN_LORA_TRANSFORMER_PREFIX +from invokeai.backend.patches.model_patch_raw import ModelPatchRaw from invokeai.backend.rectified_flow.rectified_flow_inpaint_extension import RectifiedFlowInpaintExtension from invokeai.backend.stable_diffusion.diffusers_pipeline import PipelineIntermediateState from invokeai.backend.stable_diffusion.diffusion.conditioning_data import WanConditioningInfo from invokeai.backend.util.devices import TorchDevice from invokeai.backend.wan.sampling_utils import get_spatial_scale_factor, make_noise +# Type alias: a factory that produces a fresh iterator of (LoRA patch, weight) +# pairs each time it is called. We need fresh iterators because the patcher +# consumes the iterator once per ``apply_smart_model_patches`` invocation, and +# the expert may be swapped (and re-entered) multiple times in a render. +LoRAIteratorFactory = Callable[[], Iterable[Tuple[ModelPatchRaw, float]]] + def _resolve_variant(context: InvocationContext, transformer_field: WanTransformerField) -> WanVariantType: """Look up the Wan variant from the main model config that produced this transformer.""" @@ -75,22 +84,43 @@ def _scheduler_path_for_transformer(context: InvocationContext, transformer_fiel class _ExpertSwapper: - """Manages GPU residency of one or two Wan transformer experts. + """Manages GPU residency and LoRA patching of one or two Wan transformer experts. Both experts are kept in the model cache (system RAM); only one is on device at a time. ``get(label)`` returns the model for the requested label, - swapping GPU residency when the label changes. The first ``get`` call also - enters the underlying ``model_on_device`` context for the requested expert. + swapping GPU residency when the label changes and applying that expert's + LoRA patches via ``LayerPatcher.apply_smart_model_patches``. + + Ordering on swap: exit the active expert's LoRA context (restores weights) + -> exit ``model_on_device`` (returns expert to RAM) -> enter the new + expert's device context -> apply the new expert's LoRAs. This mirrors the + pattern used by ``flux_denoise``/``anima_denoise`` but adds the extra + context layer needed for dual experts. """ HIGH = "high" LOW = "low" - def __init__(self, high_info: Any, low_info: Any | None) -> None: + def __init__( + self, + high_info: Any, + low_info: Any | None, + inference_dtype: torch.dtype, + high_lora_factory: LoRAIteratorFactory | None = None, + low_lora_factory: LoRAIteratorFactory | None = None, + high_is_quantized: bool = False, + low_is_quantized: bool = False, + ) -> None: self._high_info = high_info self._low_info = low_info + self._inference_dtype = inference_dtype + self._high_lora_factory = high_lora_factory + self._low_lora_factory = low_lora_factory + self._high_is_quantized = high_is_quantized + self._low_is_quantized = low_is_quantized self._active_label: str | None = None - self._active_ctx: Any | None = None + self._active_device_ctx: Any | None = None + self._active_lora_ctx: Any | None = None self._active_model: Any | None = None def get(self, label: str) -> Any: @@ -106,18 +136,44 @@ def get(self, label: str) -> Any: self._release() info = self._high_info if label == self.HIGH else self._low_info - ctx = info.model_on_device() - _cached, model = ctx.__enter__() + device_ctx = info.model_on_device() + cached_weights, model = device_ctx.__enter__() + + # Apply LoRA patches for this expert. GGUF transformers need sidecar + # patching since direct patching of GGMLTensors isn't supported. + lora_factory = ( + self._high_lora_factory if label == self.HIGH else self._low_lora_factory + ) + is_quantized = ( + self._high_is_quantized if label == self.HIGH else self._low_is_quantized + ) + lora_ctx: Any | None = None + if lora_factory is not None: + lora_ctx = LayerPatcher.apply_smart_model_patches( + model=model, + patches=lora_factory(), + prefix=WAN_LORA_TRANSFORMER_PREFIX, + dtype=self._inference_dtype, + cached_weights=cached_weights, + force_sidecar_patching=is_quantized, + ) + lora_ctx.__enter__() + self._active_label = label - self._active_ctx = ctx + self._active_device_ctx = device_ctx + self._active_lora_ctx = lora_ctx self._active_model = model return model def _release(self) -> None: - if self._active_ctx is not None: - self._active_ctx.__exit__(None, None, None) + # LoRA context first so weights are restored before the model leaves GPU. + if self._active_lora_ctx is not None: + self._active_lora_ctx.__exit__(None, None, None) + if self._active_device_ctx is not None: + self._active_device_ctx.__exit__(None, None, None) self._active_label = None - self._active_ctx = None + self._active_device_ctx = None + self._active_lora_ctx = None self._active_model = None def close(self) -> None: @@ -319,8 +375,34 @@ def _run_diffusion(self, context: InvocationContext) -> torch.Tensor: self.transformer.boundary_ratio * num_train_timesteps if low_info is not None else None ) + # LoRA wiring. The high-noise expert uses ``transformer.loras``; the + # low-noise expert uses ``transformer.loras_low_noise``, falling back + # to the primary list if empty (matches the WanTransformerField semantics). + # Quantized (GGUF) experts force sidecar patching so GGMLTensor weights + # aren't touched directly. + high_loras = self.transformer.loras + low_loras = self.transformer.loras_low_noise or self.transformer.loras + high_is_quantized = high_info.config.format == ModelFormat.GGUFQuantized + low_is_quantized = ( + low_info.config.format == ModelFormat.GGUFQuantized if low_info is not None else False + ) + + def high_lora_factory() -> Iterable[Tuple[ModelPatchRaw, float]]: + return self._lora_iterator(context, high_loras) + + def low_lora_factory() -> Iterable[Tuple[ModelPatchRaw, float]]: + return self._lora_iterator(context, low_loras) + with ExitStack() as exit_stack: - swapper = _ExpertSwapper(high_info, low_info) + swapper = _ExpertSwapper( + high_info=high_info, + low_info=low_info, + inference_dtype=inference_dtype, + high_lora_factory=high_lora_factory if high_loras else None, + low_lora_factory=low_lora_factory if low_loras else None, + high_is_quantized=high_is_quantized, + low_is_quantized=low_is_quantized, + ) exit_stack.callback(swapper.close) for step_idx, t in enumerate(tqdm(timesteps, desc="Denoising (Wan 2.2)", total=total_steps)): @@ -463,6 +545,19 @@ def step_callback(state: PipelineIntermediateState) -> None: return step_callback - def _lora_iterator(self, context: InvocationContext) -> Iterator: - # Phase 5 will populate this with the actual LoRA application path. - return iter([]) + def _lora_iterator( + self, context: InvocationContext, loras: list[LoRAField] + ) -> Iterator[Tuple[ModelPatchRaw, float]]: + """Yield (ModelPatchRaw, weight) pairs for the given LoRA list. + + The caller passes either ``transformer.loras`` (high-noise expert) or + ``transformer.loras_low_noise`` (low-noise expert) — the fallback to + the primary list when low-noise is empty is handled at the call site. + """ + for lora_field in loras: + lora_info = context.models.load(lora_field.lora) + assert isinstance(lora_info.model, ModelPatchRaw), ( + f"Wan LoRA model must be ModelPatchRaw, got {type(lora_info.model).__name__}" + ) + yield (lora_info.model, lora_field.weight) + del lora_info diff --git a/invokeai/app/invocations/wan_lora_loader.py b/invokeai/app/invocations/wan_lora_loader.py new file mode 100644 index 00000000000..4cf6e1dfe2b --- /dev/null +++ b/invokeai/app/invocations/wan_lora_loader.py @@ -0,0 +1,189 @@ +from typing import Literal, Optional + +from invokeai.app.invocations.baseinvocation import ( + BaseInvocation, + BaseInvocationOutput, + Classification, + invocation, + invocation_output, +) +from invokeai.app.invocations.fields import FieldDescriptions, Input, InputField, OutputField +from invokeai.app.invocations.model import LoRAField, ModelIdentifierField, WanTransformerField +from invokeai.app.services.shared.invocation_context import InvocationContext +from invokeai.backend.model_manager.taxonomy import BaseModelType, ModelType + + +# Target option for routing a LoRA to one or both Wan A14B expert lists. +# +# - ``auto``: read the LoRA config's ``expert`` field (set by the probe / from +# filename). ``"high"`` -> primary list only, ``"low"`` -> low-noise list +# only, ``None`` -> both lists. +# - ``both``: append to both lists regardless of the config. +# - ``high``: append only to the primary list (high-noise expert). +# - ``low``: append only to the low-noise list (low-noise expert). +WanLoRATarget = Literal["auto", "both", "high", "low"] + + +def _resolve_target(target: WanLoRATarget, lora_expert: str | None) -> tuple[bool, bool]: + """Return (apply_to_primary, apply_to_low_noise) based on the requested + target and the LoRA's recorded expert tag.""" + if target == "both": + return True, True + if target == "high": + return True, False + if target == "low": + return False, True + # auto + if lora_expert == "high": + return True, False + if lora_expert == "low": + return False, True + return True, True + + +@invocation_output("wan_lora_loader_output") +class WanLoRALoaderOutput(BaseInvocationOutput): + """Wan 2.2 LoRA loader output.""" + + transformer: Optional[WanTransformerField] = OutputField( + default=None, description=FieldDescriptions.transformer, title="Wan Transformer" + ) + + +@invocation( + "wan_lora_loader", + title="Apply LoRA - Wan 2.2", + tags=["lora", "model", "wan"], + category="model", + version="1.0.0", + classification=Classification.Prototype, +) +class WanLoRALoaderInvocation(BaseInvocation): + """Apply a LoRA to the Wan 2.2 transformer(s). + + For A14B (dual expert) the LoRA's recorded ``expert`` field determines + which expert list it lands in: ``"high"`` -> primary list, ``"low"`` -> + low-noise list, ``None`` (untagged) -> both lists. Use the ``target`` + field to override. + + For TI2V-5B (single transformer) only the primary list is used at denoise + time; the low-noise routing is harmless but ignored. + """ + + lora: ModelIdentifierField = InputField( + description=FieldDescriptions.lora_model, + title="LoRA", + ui_model_base=BaseModelType.Wan, + ui_model_type=ModelType.LoRA, + ) + weight: float = InputField(default=0.75, description=FieldDescriptions.lora_weight) + target: WanLoRATarget = InputField( + default="auto", + description="Which expert(s) to apply this LoRA to. 'auto' uses the LoRA's " + "recorded expert tag (or both if untagged); 'both'/'high'/'low' override it.", + ) + transformer: WanTransformerField | None = InputField( + default=None, + description=FieldDescriptions.transformer, + input=Input.Connection, + title="Wan Transformer", + ) + + def invoke(self, context: InvocationContext) -> WanLoRALoaderOutput: + lora_key = self.lora.key + + if not context.models.exists(lora_key): + raise ValueError(f"Unknown lora: {lora_key}!") + + output = WanLoRALoaderOutput() + if self.transformer is None: + return output + + lora_config = context.models.get_config(self.lora) + lora_expert = getattr(lora_config, "expert", None) + to_primary, to_low_noise = _resolve_target(self.target, lora_expert) + + # Reject duplicates on whichever list(s) we're about to append to. + if to_primary and any(item.lora.key == lora_key for item in self.transformer.loras): + raise ValueError(f'LoRA "{lora_key}" already applied to primary transformer list.') + if to_low_noise and any(item.lora.key == lora_key for item in self.transformer.loras_low_noise): + raise ValueError(f'LoRA "{lora_key}" already applied to low-noise transformer list.') + + output.transformer = self.transformer.model_copy(deep=True) + new_lora = LoRAField(lora=self.lora, weight=self.weight) + if to_primary: + output.transformer.loras.append(new_lora) + if to_low_noise: + output.transformer.loras_low_noise.append(new_lora) + + return output + + +@invocation( + "wan_lora_collection_loader", + title="Apply LoRA Collection - Wan 2.2", + tags=["lora", "model", "wan"], + category="model", + version="1.0.0", + classification=Classification.Prototype, +) +class WanLoRACollectionLoader(BaseInvocation): + """Apply a collection of LoRAs to the Wan 2.2 transformer(s). + + Each LoRA is routed to the primary and/or low-noise list based on its + recorded ``expert`` tag (set by the probe from the filename). Untagged + LoRAs go to both lists. + """ + + loras: Optional[LoRAField | list[LoRAField]] = InputField( + default=None, + description="LoRAs to apply. May be a single LoRA or a collection.", + title="LoRAs", + ) + transformer: Optional[WanTransformerField] = InputField( + default=None, + description=FieldDescriptions.transformer, + input=Input.Connection, + title="Wan Transformer", + ) + + def invoke(self, context: InvocationContext) -> WanLoRALoaderOutput: + output = WanLoRALoaderOutput() + + if self.transformer is None: + return output + + output.transformer = self.transformer.model_copy(deep=True) + + if self.loras is None: + return output + + loras = self.loras if isinstance(self.loras, list) else [self.loras] + added: set[str] = set() + + for lora in loras: + if lora is None or lora.lora.key in added: + continue + + if not context.models.exists(lora.lora.key): + raise ValueError(f"Unknown lora: {lora.lora.key}!") + + if lora.lora.base is not BaseModelType.Wan: + raise ValueError( + f"LoRA '{lora.lora.key}' is for " + f"{lora.lora.base.value if lora.lora.base else 'unknown'} models, " + "not Wan 2.2." + ) + + lora_config = context.models.get_config(lora.lora) + lora_expert = getattr(lora_config, "expert", None) + to_primary, to_low_noise = _resolve_target("auto", lora_expert) + + added.add(lora.lora.key) + + if to_primary: + output.transformer.loras.append(lora) + if to_low_noise: + output.transformer.loras_low_noise.append(lora) + + return output diff --git a/invokeai/backend/model_manager/configs/factory.py b/invokeai/backend/model_manager/configs/factory.py index a3f47f26dce..03dd31b5d1f 100644 --- a/invokeai/backend/model_manager/configs/factory.py +++ b/invokeai/backend/model_manager/configs/factory.py @@ -54,6 +54,7 @@ LoRA_LyCORIS_SD1_Config, LoRA_LyCORIS_SD2_Config, LoRA_LyCORIS_SDXL_Config, + LoRA_LyCORIS_Wan_Config, LoRA_LyCORIS_ZImage_Config, LoRA_OMI_FLUX_Config, LoRA_OMI_SDXL_Config, @@ -239,6 +240,14 @@ Annotated[LoRA_LyCORIS_FLUX_Config, LoRA_LyCORIS_FLUX_Config.get_tag()], Annotated[LoRA_LyCORIS_ZImage_Config, LoRA_LyCORIS_ZImage_Config.get_tag()], Annotated[LoRA_LyCORIS_QwenImage_Config, LoRA_LyCORIS_QwenImage_Config.get_tag()], + # Wan must come BEFORE Anima: Anima's probe only checks for the bare + # ``cross_attn``/``self_attn`` substring (it doesn't require Cosmos + # DiT's ``_proj`` suffix or ``mlp``/``adaln_modulation``), so a Wan + # native PEFT LoRA (``diffusion_model.blocks.X.cross_attn.k...``) + # would otherwise match Anima first. Wan's probe is strictly more + # restrictive — it rejects Anima's ``_proj`` suffix via the + # anti-pattern — so trying Wan first is safe for both directions. + Annotated[LoRA_LyCORIS_Wan_Config, LoRA_LyCORIS_Wan_Config.get_tag()], Annotated[LoRA_LyCORIS_Anima_Config, LoRA_LyCORIS_Anima_Config.get_tag()], # LoRA - OMI format Annotated[LoRA_OMI_SDXL_Config, LoRA_OMI_SDXL_Config.get_tag()], diff --git a/invokeai/backend/model_manager/configs/lora.py b/invokeai/backend/model_manager/configs/lora.py index 46606a3c0d5..a56139c602a 100644 --- a/invokeai/backend/model_manager/configs/lora.py +++ b/invokeai/backend/model_manager/configs/lora.py @@ -36,6 +36,11 @@ has_cosmos_dit_peft_keys, ) from invokeai.backend.patches.lora_conversions.flux_control_lora_utils import is_state_dict_likely_flux_control +from invokeai.backend.patches.lora_conversions.wan_lora_constants import ( + has_non_wan_architecture_keys, + has_wan_kohya_keys, + has_wan_peft_keys, +) class LoraModelDefaultSettings(BaseModel): @@ -930,6 +935,85 @@ def _get_base_or_raise(cls, mod: ModelOnDisk) -> BaseModelType: raise NotAMatchError("model does not look like an Anima LoRA") +class LoRA_LyCORIS_Wan_Config(LoRA_LyCORIS_Config_Base, Config_Base): + """Model config for Wan 2.2 LoRA models in LyCORIS format. + + Wan LoRAs target ``WanTransformer3DModel`` blocks. The Wan 2.2 A14B family + is dual-expert (high-noise + low-noise) — LoRAs are typically trained + against one expert. ``expert`` records which one so the model loader + invocation can wire it to the correct ``loras`` / ``loras_low_noise`` list. + Many LoRAs are expert-agnostic (TI2V-5B family, or community LoRAs that + just don't tag the expert) — these get ``expert=None`` and are applied to + both experts by default. + """ + + base: Literal[BaseModelType.Wan] = Field(default=BaseModelType.Wan) + expert: Literal["high", "low"] | None = Field( + default=None, + description="For Wan 2.2 A14B dual-expert LoRAs: 'high' targets the high-noise expert, " + "'low' targets the low-noise expert. None means the LoRA is expert-agnostic " + "(TI2V-5B, or community LoRAs without explicit tagging) and is applied to both.", + ) + + @classmethod + def _validate_looks_like_lora(cls, mod: ModelOnDisk) -> None: + """Wan LoRAs target attn1/attn2/ffn.net (diffusers form) or self_attn/cross_attn/ffn.N (native form).""" + state_dict = mod.load_state_dict() + str_keys = [k for k in state_dict.keys() if isinstance(k, str)] + + has_wan_keys = has_wan_kohya_keys(str_keys) or has_wan_peft_keys(str_keys) + has_lora_suffix = state_dict_has_any_keys_ending_with( + state_dict, + { + "lora_A.weight", + "lora_B.weight", + "lora_down.weight", + "lora_up.weight", + "dora_scale", + ".lokr_w1", + ".lokr_w2", + }, + ) + + # Reject if any non-Wan architecture signature is present. Without this + # guard a Wan LoRA could be falsely identified by Anima (cross_attn / + # self_attn name collision) or vice versa. + if has_wan_keys and has_lora_suffix and not has_non_wan_architecture_keys(str_keys): + return + + raise NotAMatchError("model does not match Wan LoRA heuristics") + + @classmethod + def _get_base_or_raise(cls, mod: ModelOnDisk) -> BaseModelType: + state_dict = mod.load_state_dict() + str_keys = [k for k in state_dict.keys() if isinstance(k, str)] + + if ( + (has_wan_kohya_keys(str_keys) or has_wan_peft_keys(str_keys)) + and not has_non_wan_architecture_keys(str_keys) + ): + return BaseModelType.Wan + + raise NotAMatchError("model does not look like a Wan LoRA") + + @classmethod + def from_model_on_disk(cls, mod: ModelOnDisk, override_fields: dict[str, Any]) -> Self: + # Run the base-class probe (file-check, lora-suffix, base detection). + instance = super().from_model_on_disk(mod, override_fields) + + # Auto-detect the expert tag from the filename if the user didn't + # override it. ``high_noise`` / ``low_noise`` / hyphenated / concatenated + # variants — mirrors the GGUF transformer probe's heuristic. + if instance.expert is None: + name = mod.path.stem.lower() + if any(s in name for s in ("high_noise", "high-noise", "highnoise")): + instance.expert = "high" + elif any(s in name for s in ("low_noise", "low-noise", "lownoise")): + instance.expert = "low" + + return instance + + class ControlAdapter_Config_Base(ABC, BaseModel): default_settings: ControlAdapterDefaultSettings | None = Field(None) diff --git a/invokeai/backend/model_manager/load/model_loaders/lora.py b/invokeai/backend/model_manager/load/model_loaders/lora.py index 6cf06d48074..4e5d8a5d649 100644 --- a/invokeai/backend/model_manager/load/model_loaders/lora.py +++ b/invokeai/backend/model_manager/load/model_loaders/lora.py @@ -22,6 +22,7 @@ SubModelType, ) from invokeai.backend.patches.lora_conversions.anima_lora_conversion_utils import lora_model_from_anima_state_dict +from invokeai.backend.patches.lora_conversions.wan_lora_conversion_utils import lora_model_from_wan_state_dict from invokeai.backend.patches.lora_conversions.flux_aitoolkit_lora_conversion_utils import ( is_state_dict_likely_in_flux_aitoolkit_format, lora_model_from_flux_aitoolkit_state_dict, @@ -170,6 +171,10 @@ def _load_model( elif self._model_base == BaseModelType.Anima: # Anima LoRAs use Kohya-style or diffusers PEFT format targeting Cosmos DiT blocks. model = lora_model_from_anima_state_dict(state_dict=state_dict, alpha=None) + elif self._model_base == BaseModelType.Wan: + # Wan LoRAs use Kohya / diffusers PEFT / native PEFT formats targeting + # WanTransformer3DModel attention (attn1/attn2) and FFN blocks. + model = lora_model_from_wan_state_dict(state_dict=state_dict, alpha=None) else: raise ValueError(f"Unsupported LoRA base model: {self._model_base}") diff --git a/invokeai/backend/patches/lora_conversions/wan_lora_constants.py b/invokeai/backend/patches/lora_conversions/wan_lora_constants.py new file mode 100644 index 00000000000..ebf141f2d92 --- /dev/null +++ b/invokeai/backend/patches/lora_conversions/wan_lora_constants.py @@ -0,0 +1,115 @@ +# Wan 2.2 LoRA prefix constants and key-shape detection helpers. +# +# Wan LoRAs come in three shapes in the wild: +# +# 1. **Diffusers PEFT** (HF naming), with or without a "transformer." prefix: +# blocks.0.attn1.to_q.lora_A.weight +# transformer.blocks.0.attn1.to_q.lora_A.weight +# +# 2. **Native upstream PEFT** (ComfyUI / Wan-AI checkpoint naming) with +# "diffusion_model." or "transformer." prefix: +# diffusion_model.blocks.0.self_attn.q.lora_A.weight +# transformer.blocks.0.cross_attn.k.lora_A.weight +# +# 3. **Kohya**, with the standard ``lora_unet_blocks__`` shape, +# in either diffusers naming (``attn1_to_q``) or native naming (``self_attn_q``): +# lora_unet_blocks_0_attn1_to_q.lora_down.weight +# lora_unet_blocks_0_self_attn_q.lora_down.weight +# +# The detection helpers below are shared with ``configs/lora.py`` so the probe +# and the conversion code agree on what counts as a Wan LoRA. They keep this +# file circular-import-free. + +import re + +# Prefix for Wan transformer LoRA layers in the ModelPatchRaw layer dict. +# Same convention as Anima / QwenImage — the LayerPatcher uses this prefix to +# resolve patches against the loaded transformer's parameter paths. +WAN_LORA_TRANSFORMER_PREFIX = "lora_transformer-" + + +# Diffusers Wan-specific submodules: attn1/attn2 (self/cross attention with +# to_q/to_k/to_v/to_out.0 children) and ffn.net (gated FFN). These are unique +# to WanTransformer3DModel — none of FLUX (double_blocks/single_blocks), +# QwenImage (transformer_blocks.X.attn), Z-Image (diffusion_model.layers), +# or Anima/Cosmos (mlp + adaln_modulation) produce this combination. +_WAN_DIFFUSERS_SUBMODULES = r"(attn1\.|attn2\.|ffn\.net\.)" + +# Native upstream Wan submodules. self_attn / cross_attn collide with Anima's +# Cosmos DiT naming, so we look for the bare ``.q``/``.k``/``.v``/``.o`` +# projection suffix (no ``_proj`` tail) AND/OR the ``ffn.`` MLP layout — +# Anima uses ``mlp`` instead, so this is mutually exclusive. +_WAN_NATIVE_SUBMODULES = r"(self_attn\.[qkvo](\.|$)|cross_attn\.[qkvo](\.|$)|ffn\.\d+\.)" + +# Anti-patterns: keys that would indicate Anima/Cosmos (mlp / adaln_modulation / +# the ``q_proj`` projection naming Cosmos uses on its attention blocks), +# QwenImage (transformer_blocks), Flux (double_blocks / single_blocks), or +# Z-Image (diffusion_model.layers). If any of these are present, the LoRA is +# NOT Wan. +_ANIMA_ANTI_RE = re.compile(r"blocks[\._]\d+[\._](mlp|adaln_modulation)") +# Anima Cosmos attention uses ``q_proj`` / ``k_proj`` / ``v_proj`` / ``output_proj`` +# under self_attn/cross_attn. Wan native uses just ``q``/``k``/``v``/``o`` — so +# the ``_proj`` suffix on a self_attn/cross_attn child is a definitive Anima tell, +# in both Kohya (``self_attn_q_proj``) and PEFT (``self_attn.q_proj``) forms. +_ANIMA_ATTN_ANTI_RE = re.compile(r"(self_attn|cross_attn)[\._]([qkv]_proj|output_proj)") +_QWEN_ANTI_RE = re.compile(r"(^|\.)transformer_blocks\.\d+\.") +_FLUX_ANTI_RE = re.compile(r"(^|\.|_)(double_blocks|single_blocks|single_transformer_blocks)[\._]\d+") +_Z_IMAGE_ANTI_RE = re.compile(r"diffusion_model\.layers\.\d+\.") + + +# Kohya format: lora_unet_blocks__(attn1_to_X | ffn_N | (self|cross)_attn_X +# where X is a single q/k/v/o letter). The strict alphabet on the attention +# child keeps us from matching Anima's ``cross_attn_q_proj`` (which has an +# additional ``_proj`` segment). +_KOHYA_WAN_RE = re.compile( + r"lora_unet_blocks_\d+_" + r"(attn[12]_(to_[qkv]|to_out_0|norm_[qk])" + r"|(self_attn|cross_attn)_[qkvo](_|\.|$)" + r"|ffn_(\d+|net_\d+_proj|net_\d+))" +) + +# PEFT format: .blocks.. +# Prefix may be empty, "transformer.", "diffusion_model.", or "base_model.model.transformer." +_PEFT_WAN_DIFFUSERS_RE = re.compile( + r"(?:^|(?:diffusion_model|transformer|base_model\.model\.transformer)\.)blocks\.\d+\." + + _WAN_DIFFUSERS_SUBMODULES +) +_PEFT_WAN_NATIVE_RE = re.compile( + r"(?:^|(?:diffusion_model|transformer|base_model\.model\.transformer)\.)blocks\.\d+\." + + _WAN_NATIVE_SUBMODULES +) + + +def has_wan_kohya_keys(str_keys: list[str]) -> bool: + """Kohya-style keys naming Wan submodules (attn1/attn2/self_attn/cross_attn/ffn).""" + return any(_KOHYA_WAN_RE.search(k) is not None for k in str_keys) + + +def has_wan_peft_keys(str_keys: list[str]) -> bool: + """Diffusers PEFT keys naming Wan submodules in either diffusers or native layout.""" + for k in str_keys: + if _PEFT_WAN_DIFFUSERS_RE.search(k) is not None: + return True + if _PEFT_WAN_NATIVE_RE.search(k) is not None: + return True + return False + + +def has_non_wan_architecture_keys(str_keys: list[str]) -> bool: + """True if any key indicates a non-Wan architecture (Anima, Qwen, Flux, Z-Image). + + Used as an exclusion guard — a Wan LoRA should never carry these patterns, + so finding them is grounds to reject the Wan probe. + """ + for k in str_keys: + if _ANIMA_ANTI_RE.search(k) is not None: + return True + if _ANIMA_ATTN_ANTI_RE.search(k) is not None: + return True + if _QWEN_ANTI_RE.search(k) is not None: + return True + if _FLUX_ANTI_RE.search(k) is not None: + return True + if _Z_IMAGE_ANTI_RE.search(k) is not None: + return True + return False diff --git a/invokeai/backend/patches/lora_conversions/wan_lora_conversion_utils.py b/invokeai/backend/patches/lora_conversions/wan_lora_conversion_utils.py new file mode 100644 index 00000000000..90507923168 --- /dev/null +++ b/invokeai/backend/patches/lora_conversions/wan_lora_conversion_utils.py @@ -0,0 +1,255 @@ +"""Wan 2.2 LoRA conversion utilities. + +Wan LoRAs target the ``WanTransformer3DModel`` attention and FFN layers. We +normalise every supported source layout to the diffusers parameter-path naming +the loaded model uses at runtime (``blocks..attn1.to_q``, +``blocks..attn2.to_k``, ``blocks..ffn.net.0.proj``, etc.). + +Supported source layouts: + +- **Diffusers PEFT**: ``[transformer.|base_model.model.transformer.]blocks.X.attn1.to_q.lora_A.weight`` +- **Native PEFT** (ComfyUI / Wan-AI native naming, with diffusion_model or transformer prefix): + ``diffusion_model.blocks.X.self_attn.q.lora_A.weight`` +- **Kohya** in either naming: ``lora_unet_blocks_X_attn1_to_q.lora_down.weight`` + or ``lora_unet_blocks_X_self_attn_q.lora_down.weight`` +""" + +import re +from typing import Dict + +import torch + +from invokeai.backend.patches.layers.base_layer_patch import BaseLayerPatch +from invokeai.backend.patches.layers.utils import any_lora_layer_from_state_dict +from invokeai.backend.patches.lora_conversions.wan_lora_constants import ( + WAN_LORA_TRANSFORMER_PREFIX, + has_wan_kohya_keys, +) +from invokeai.backend.patches.model_patch_raw import ModelPatchRaw + + +# Kohya layer-name regex: lora_unet_blocks__ +_KOHYA_KEY_REGEX = re.compile(r"lora_unet_blocks_(\d+)_(.*)") + + +# Kohya submodule name -> diffusers parameter-path tail. +# +# Longest-match-first ordering matters because some keys are prefixes of others +# (e.g. ``attn1_to_q`` vs ``attn1_to_out_0``). The lookup is exact (not prefix), +# so this is purely cosmetic, but kept consistent with QwenImage's convention. +_KOHYA_SUBMODULE_MAP: list[tuple[str, str]] = [ + # --- Diffusers naming --- + # Self-attention (attn1) + ("attn1_to_q", "attn1.to_q"), + ("attn1_to_k", "attn1.to_k"), + ("attn1_to_v", "attn1.to_v"), + ("attn1_to_out_0", "attn1.to_out.0"), + ("attn1_norm_q", "attn1.norm_q"), + ("attn1_norm_k", "attn1.norm_k"), + # Cross-attention (attn2) + ("attn2_to_q", "attn2.to_q"), + ("attn2_to_k", "attn2.to_k"), + ("attn2_to_v", "attn2.to_v"), + ("attn2_to_out_0", "attn2.to_out.0"), + ("attn2_norm_q", "attn2.norm_q"), + ("attn2_norm_k", "attn2.norm_k"), + # FFN diffusers + ("ffn_net_0_proj", "ffn.net.0.proj"), + ("ffn_net_2", "ffn.net.2"), + # --- Native naming (mapped onto diffusers paths) --- + # self_attn -> attn1 + ("self_attn_q", "attn1.to_q"), + ("self_attn_k", "attn1.to_k"), + ("self_attn_v", "attn1.to_v"), + ("self_attn_o", "attn1.to_out.0"), + ("self_attn_norm_q", "attn1.norm_q"), + ("self_attn_norm_k", "attn1.norm_k"), + # cross_attn -> attn2 + ("cross_attn_q", "attn2.to_q"), + ("cross_attn_k", "attn2.to_k"), + ("cross_attn_v", "attn2.to_v"), + ("cross_attn_o", "attn2.to_out.0"), + ("cross_attn_norm_q", "attn2.norm_q"), + ("cross_attn_norm_k", "attn2.norm_k"), + # FFN native + ("ffn_0", "ffn.net.0.proj"), + ("ffn_2", "ffn.net.2"), +] + + +# Layer-path rules used for PEFT-style keys: applied as substring replacements +# to the *layer path* (everything between an optional prefix and the LoRA suffix). +# Order matters — see ``convert_wan_transformer_to_diffusers`` in diffusers for +# the equivalent state-dict-key rules. We use trailing-dot semantics so e.g. +# ``.q.`` matches ``self_attn.q.something`` but not ``norm_q``. +# +# Paths are augmented with a sentinel trailing ``.`` before applying these +# rules so that bare endings like ``blocks.0.self_attn.q`` get rewritten as +# ``blocks.0.attn1.to_q``. +_NATIVE_TO_DIFFUSERS_PATH_RULES: tuple[tuple[str, str], ...] = ( + ("cross_attn.", "attn2."), + ("self_attn.", "attn1."), + (".o.", ".to_out.0."), + (".q.", ".to_q."), + (".k.", ".to_k."), + (".v.", ".to_v."), + ("ffn.0.", "ffn.net.0.proj."), + ("ffn.2.", "ffn.net.2."), +) + +# Prefixes seen on PEFT-style Wan LoRA keys. +_PEFT_PREFIXES_TO_STRIP: tuple[str, ...] = ( + "base_model.model.transformer.", + "transformer.", + "diffusion_model.", +) + + +def lora_model_from_wan_state_dict( + state_dict: Dict[str, torch.Tensor], alpha: float | None = None +) -> ModelPatchRaw: + """Convert any supported Wan LoRA state dict into a ``ModelPatchRaw``. + + Detects Kohya vs PEFT layouts and dispatches accordingly. Layer paths in + the returned patch use diffusers naming (``blocks.X.attn1.to_q``) prefixed + with ``WAN_LORA_TRANSFORMER_PREFIX`` so the runtime ``LayerPatcher`` can + match them against ``WanTransformer3DModel`` parameters. + """ + str_keys = [k for k in state_dict.keys() if isinstance(k, str)] + if has_wan_kohya_keys(str_keys): + return _convert_kohya_format(state_dict, alpha) + return _convert_peft_format(state_dict, alpha) + + +def _convert_kohya_format(state_dict: Dict[str, torch.Tensor], alpha: float | None) -> ModelPatchRaw: + """Convert a Kohya-format Wan LoRA state dict. + + Keys look like ``lora_unet_blocks__.{lora_down,lora_up,alpha}.weight``. + Unrecognised submodules are silently skipped (logged at conversion debug level + by the layer factory if needed). + """ + layers: dict[str, BaseLayerPatch] = {} + grouped = _group_by_layer(state_dict) + + for kohya_layer, layer_dict in grouped.items(): + path = _kohya_layer_to_diffusers_path(kohya_layer) + if path is None: + continue + values = _normalize_lora_param_names(layer_dict, alpha) + layers[f"{WAN_LORA_TRANSFORMER_PREFIX}{path}"] = any_lora_layer_from_state_dict(values) + + return ModelPatchRaw(layers=layers) + + +def _convert_peft_format(state_dict: Dict[str, torch.Tensor], alpha: float | None) -> ModelPatchRaw: + """Convert a Diffusers-PEFT or native-PEFT Wan LoRA state dict.""" + layers: dict[str, BaseLayerPatch] = {} + grouped = _group_by_layer(state_dict) + + for raw_layer_key, layer_dict in grouped.items(): + stripped = _strip_peft_prefix(raw_layer_key) + path = _native_layer_path_to_diffusers(stripped) + if path is None: + continue + values = _normalize_lora_param_names(layer_dict, alpha) + layers[f"{WAN_LORA_TRANSFORMER_PREFIX}{path}"] = any_lora_layer_from_state_dict(values) + + return ModelPatchRaw(layers=layers) + + +def _kohya_layer_to_diffusers_path(kohya_layer: str) -> str | None: + """``lora_unet_blocks_0_self_attn_q`` -> ``blocks.0.attn1.to_q``.""" + m = _KOHYA_KEY_REGEX.match(kohya_layer) + if not m: + return None + block_idx = m.group(1) + sub = m.group(2) + for kohya_sub, diffusers_sub in _KOHYA_SUBMODULE_MAP: + if sub == kohya_sub: + return f"blocks.{block_idx}.{diffusers_sub}" + return None + + +def _strip_peft_prefix(layer_key: str) -> str: + """Strip ``transformer.``, ``diffusion_model.``, ``base_model.model.transformer.`` if present.""" + for prefix in _PEFT_PREFIXES_TO_STRIP: + if layer_key.startswith(prefix): + return layer_key[len(prefix):] + return layer_key + + +def _native_layer_path_to_diffusers(path: str) -> str | None: + """Rewrite a stripped PEFT layer path to diffusers naming. + + No-op if the path is already in diffusers form (contains attn1./attn2./ffn.net.). + Returns None only if the path can't be plausibly identified as Wan. + """ + if not path.startswith("blocks."): + return None + + if "attn1." in path or "attn2." in path or "ffn.net." in path: + return path + + # Apply the native-to-diffusers replacements with a sentinel trailing dot + # so rules like ``.q.`` fire on a bare-ending ``...self_attn.q``. + augmented = path + "." + for needle, replacement in _NATIVE_TO_DIFFUSERS_PATH_RULES: + augmented = augmented.replace(needle, replacement) + return augmented.rstrip(".") + + +def _normalize_lora_param_names( + layer_dict: dict[str, torch.Tensor], alpha: float | None +) -> dict[str, torch.Tensor]: + """Map PEFT-style ``lora_A``/``lora_B`` to ``lora_down``/``lora_up``. + + Kohya-style ``lora_down``/``lora_up`` pass through unchanged. + """ + if "lora_A.weight" in layer_dict: + values: dict[str, torch.Tensor] = { + "lora_down.weight": layer_dict["lora_A.weight"], + "lora_up.weight": layer_dict["lora_B.weight"], + } + if alpha is not None: + values["alpha"] = torch.tensor(alpha) + if "alpha" in layer_dict: + values["alpha"] = layer_dict["alpha"] + if "dora_scale" in layer_dict: + values["dora_scale"] = layer_dict["dora_scale"] + return values + return layer_dict + + +def _group_by_layer(state_dict: Dict[str, torch.Tensor]) -> dict[str, dict[str, torch.Tensor]]: + """Group state-dict keys by their layer path (everything before the LoRA-suffix tail).""" + grouped: dict[str, dict[str, torch.Tensor]] = {} + + known_suffixes = [ + ".lora_A.weight", + ".lora_B.weight", + ".lora_down.weight", + ".lora_up.weight", + ".dora_scale", + ".alpha", + ] + + for key in state_dict: + if not isinstance(key, str): + continue + + layer_name = None + key_name = None + for suffix in known_suffixes: + if key.endswith(suffix): + layer_name = key[: -len(suffix)] + key_name = suffix[1:] # drop leading dot + break + + if layer_name is None: + parts = key.rsplit(".", maxsplit=2) + layer_name = parts[0] + key_name = ".".join(parts[1:]) + + grouped.setdefault(layer_name, {})[key_name] = state_dict[key] + + return grouped diff --git a/invokeai/frontend/web/public/locales/en.json b/invokeai/frontend/web/public/locales/en.json index 05edf886890..6685fe3bf2b 100644 --- a/invokeai/frontend/web/public/locales/en.json +++ b/invokeai/frontend/web/public/locales/en.json @@ -1367,6 +1367,7 @@ "qwenImageQuantizationNone": "None (bf16)", "qwenImageQuantizationInt8": "8-bit (int8)", "qwenImageQuantizationNf4": "4-bit (nf4)", + "wanT5Encoder": "Wan2.2 T5 Encoder", "upcastAttention": "Upcast Attention", "uploadImage": "Upload Image", "urlOrLocalPath": "URL or Local Path", diff --git a/tests/app/invocations/test_wan_expert_swapper.py b/tests/app/invocations/test_wan_expert_swapper.py new file mode 100644 index 00000000000..f8897b46343 --- /dev/null +++ b/tests/app/invocations/test_wan_expert_swapper.py @@ -0,0 +1,270 @@ +"""Tests for ``_ExpertSwapper``'s LoRA-context lifecycle. + +The swapper is responsible for entering and exiting both the +``model_on_device`` context and the ``LayerPatcher.apply_smart_model_patches`` +context in the right order across an expert swap: + + enter HIGH: enter device(HIGH) -> enter lora(HIGH) + swap: exit lora(HIGH) -> exit device(HIGH) + enter device(LOW) -> enter lora(LOW) + close: exit lora(LOW) -> exit device(LOW) + +These tests use a tiny ``nn.Linear`` standing in for each transformer expert +so we can verify the swapper hands back the right model and routes the right +LoRA factory at each step. +""" + +from typing import Iterable, Iterator, Tuple +from unittest.mock import MagicMock, patch + +import torch +import torch.nn as nn + +from invokeai.app.invocations.wan_denoise import _ExpertSwapper +from invokeai.backend.patches.model_patch_raw import ModelPatchRaw + + +class _FakeModelOnDevice: + """Minimal stand-in for the model-cache record's ``model_on_device`` context. + + Tracks enter/exit to verify the swapper's lifecycle invariants.""" + + def __init__(self, label: str, model: nn.Module, log: list[str]) -> None: + self._label = label + self._model = model + self._log = log + + def __enter__(self): + self._log.append(f"device-enter:{self._label}") + # Return shape mirrors the real model cache: (cached_weights, model). + return (None, self._model) + + def __exit__(self, exc_type, exc_val, exc_tb): + self._log.append(f"device-exit:{self._label}") + return False + + +class _FakeInfo: + def __init__(self, label: str, model: nn.Module, log: list[str]) -> None: + self._label = label + self._model = model + self._log = log + + def model_on_device(self): + return _FakeModelOnDevice(self._label, self._model, self._log) + + +def _make_factory(log: list[str], label: str) -> "callable": + """Build a LoRAIteratorFactory that records each invocation in ``log``.""" + + def factory() -> Iterable[Tuple[ModelPatchRaw, float]]: + log.append(f"lora-factory-call:{label}") + return iter([]) + + return factory + + +def _stub_lora_context_manager(log: list[str]): + """Patch ``LayerPatcher.apply_smart_model_patches`` to a stub that records + enter/exit in ``log`` and returns a no-op context manager. + + The stub introspects its arguments so we can verify the swapper passes + the correct ``model``, ``patches`` factory output, and prefix. + """ + calls: list[dict] = [] + + class _Stub: + def __init__(self, model, patches, prefix, dtype, cached_weights, force_sidecar_patching): + self.model = model + self.patches = patches + self.prefix = prefix + self.dtype = dtype + self.cached_weights = cached_weights + self.force_sidecar_patching = force_sidecar_patching + calls.append( + { + "model": model, + "prefix": prefix, + "dtype": dtype, + "force_sidecar_patching": force_sidecar_patching, + } + ) + + def __enter__(self): + log.append("lora-enter") + # Force the factory's iterator to evaluate so we can assert it was + # consumed (mirrors the real LayerPatcher behavior). + list(self.patches) + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + log.append("lora-exit") + return False + + def factory(model, patches, prefix, dtype, cached_weights, force_sidecar_patching=False): + return _Stub(model, patches, prefix, dtype, cached_weights, force_sidecar_patching) + + return factory, calls + + +def test_lifecycle_high_only(): + """Single-expert (TI2V-5B / A14B with only high loaded): enter HIGH, close.""" + log: list[str] = [] + high_model = nn.Linear(1, 1) + high_info = _FakeInfo("HIGH", high_model, log) + + stub, calls = _stub_lora_context_manager(log) + with patch( + "invokeai.app.invocations.wan_denoise.LayerPatcher.apply_smart_model_patches", + side_effect=stub, + ): + swapper = _ExpertSwapper( + high_info=high_info, + low_info=None, + inference_dtype=torch.bfloat16, + high_lora_factory=_make_factory(log, "HIGH"), + low_lora_factory=None, + ) + model = swapper.get(_ExpertSwapper.HIGH) + assert model is high_model + swapper.close() + + assert log == [ + "device-enter:HIGH", + "lora-factory-call:HIGH", + "lora-enter", + "lora-exit", + "device-exit:HIGH", + ] + assert len(calls) == 1 + assert calls[0]["model"] is high_model + assert calls[0]["prefix"] == "lora_transformer-" + + +def test_lifecycle_dual_expert_swap(): + """A14B: HIGH first, then LOW. Each LoRA context opens/closes with its expert.""" + log: list[str] = [] + high_model = nn.Linear(1, 1) + low_model = nn.Linear(1, 1) + high_info = _FakeInfo("HIGH", high_model, log) + low_info = _FakeInfo("LOW", low_model, log) + + stub, calls = _stub_lora_context_manager(log) + with patch( + "invokeai.app.invocations.wan_denoise.LayerPatcher.apply_smart_model_patches", + side_effect=stub, + ): + swapper = _ExpertSwapper( + high_info=high_info, + low_info=low_info, + inference_dtype=torch.bfloat16, + high_lora_factory=_make_factory(log, "HIGH"), + low_lora_factory=_make_factory(log, "LOW"), + ) + first = swapper.get(_ExpertSwapper.HIGH) + assert first is high_model + + second = swapper.get(_ExpertSwapper.LOW) + assert second is low_model + + swapper.close() + + expected = [ + # enter HIGH (device, then lora) + "device-enter:HIGH", + "lora-factory-call:HIGH", + "lora-enter", + # swap to LOW: LoRA out -> device out -> device in -> LoRA in + "lora-exit", + "device-exit:HIGH", + "device-enter:LOW", + "lora-factory-call:LOW", + "lora-enter", + # close + "lora-exit", + "device-exit:LOW", + ] + assert log == expected + # Two patcher invocations, each bound to the expected model. + assert len(calls) == 2 + assert calls[0]["model"] is high_model + assert calls[1]["model"] is low_model + + +def test_quantized_flag_forwards_to_sidecar(): + """GGUF (quantized) experts must request sidecar patching.""" + log: list[str] = [] + high_model = nn.Linear(1, 1) + high_info = _FakeInfo("HIGH", high_model, log) + + stub, calls = _stub_lora_context_manager(log) + with patch( + "invokeai.app.invocations.wan_denoise.LayerPatcher.apply_smart_model_patches", + side_effect=stub, + ): + swapper = _ExpertSwapper( + high_info=high_info, + low_info=None, + inference_dtype=torch.bfloat16, + high_lora_factory=_make_factory(log, "HIGH"), + high_is_quantized=True, + ) + swapper.get(_ExpertSwapper.HIGH) + swapper.close() + + assert calls[0]["force_sidecar_patching"] is True + + +def test_no_lora_factory_skips_lora_context(): + """When no LoRAs are wired, the swapper doesn't enter the LoRA context.""" + log: list[str] = [] + high_model = nn.Linear(1, 1) + high_info = _FakeInfo("HIGH", high_model, log) + + stub, calls = _stub_lora_context_manager(log) + with patch( + "invokeai.app.invocations.wan_denoise.LayerPatcher.apply_smart_model_patches", + side_effect=stub, + ): + swapper = _ExpertSwapper( + high_info=high_info, + low_info=None, + inference_dtype=torch.bfloat16, + high_lora_factory=None, # no LoRAs + low_lora_factory=None, + ) + swapper.get(_ExpertSwapper.HIGH) + swapper.close() + + # No "lora-enter" / "lora-exit" entries — LayerPatcher was never invoked. + assert "lora-enter" not in log + assert "lora-exit" not in log + assert len(calls) == 0 + + +def test_repeat_get_same_label_is_a_no_op(): + """Calling get(HIGH) twice in a row must not re-enter the contexts.""" + log: list[str] = [] + high_model = nn.Linear(1, 1) + high_info = _FakeInfo("HIGH", high_model, log) + + stub, calls = _stub_lora_context_manager(log) + with patch( + "invokeai.app.invocations.wan_denoise.LayerPatcher.apply_smart_model_patches", + side_effect=stub, + ): + swapper = _ExpertSwapper( + high_info=high_info, + low_info=None, + inference_dtype=torch.bfloat16, + high_lora_factory=_make_factory(log, "HIGH"), + ) + swapper.get(_ExpertSwapper.HIGH) + swapper.get(_ExpertSwapper.HIGH) # should be a no-op + swapper.close() + + # device-enter + lora-enter happen exactly once. + assert log.count("device-enter:HIGH") == 1 + assert log.count("lora-enter") == 1 + assert log.count("lora-exit") == 1 + assert log.count("device-exit:HIGH") == 1 diff --git a/tests/app/invocations/test_wan_lora_loader.py b/tests/app/invocations/test_wan_lora_loader.py new file mode 100644 index 00000000000..0e5f3e94d43 --- /dev/null +++ b/tests/app/invocations/test_wan_lora_loader.py @@ -0,0 +1,244 @@ +"""Tests for ``WanLoRALoaderInvocation`` target resolution and routing.""" + +from unittest.mock import MagicMock + +import pytest + +from invokeai.app.invocations.model import LoRAField, ModelIdentifierField, WanTransformerField +from invokeai.app.invocations.wan_lora_loader import ( + WanLoRACollectionLoader, + WanLoRALoaderInvocation, + _resolve_target, +) +from invokeai.backend.model_manager.taxonomy import BaseModelType, ModelType + + +# -------------------------------------------------------------------------- +# _resolve_target — pure function, no mocks needed. +# -------------------------------------------------------------------------- + + +class TestResolveTarget: + @pytest.mark.parametrize( + "target, expert, expected", + [ + ("auto", None, (True, True)), + ("auto", "high", (True, False)), + ("auto", "low", (False, True)), + ("both", None, (True, True)), + ("both", "high", (True, True)), + ("both", "low", (True, True)), + ("high", None, (True, False)), + ("high", "low", (True, False)), # explicit target overrides config + ("low", None, (False, True)), + ("low", "high", (False, True)), + ], + ) + def test_target_resolution(self, target, expert, expected): + assert _resolve_target(target, expert) == expected + + +# -------------------------------------------------------------------------- +# WanLoRALoaderInvocation — routing into primary vs low-noise lists. +# -------------------------------------------------------------------------- + + +def _make_lora_field(key: str = "lora-1") -> ModelIdentifierField: + return ModelIdentifierField( + key=key, + hash=f"hash-{key}", + name=f"name-{key}", + base=BaseModelType.Wan, + type=ModelType.LoRA, + ) + + +def _make_transformer_field() -> WanTransformerField: + transformer_id = ModelIdentifierField( + key="wan-main", + hash="wan-main-hash", + name="wan-main", + base=BaseModelType.Wan, + type=ModelType.Main, + ) + return WanTransformerField(transformer=transformer_id) + + +def _make_context(lora_expert: str | None) -> MagicMock: + """Mock context where context.models.get_config(lora).expert == lora_expert + and context.models.exists returns True for any lora key.""" + ctx = MagicMock() + ctx.models.exists.return_value = True + config = MagicMock() + config.expert = lora_expert + ctx.models.get_config.return_value = config + return ctx + + +class TestSingleLoaderRouting: + def test_auto_untagged_goes_to_both(self): + inv = WanLoRALoaderInvocation( + id="inv-1", lora=_make_lora_field(), transformer=_make_transformer_field() + ) + out = inv.invoke(_make_context(lora_expert=None)) + assert out.transformer is not None + assert len(out.transformer.loras) == 1 + assert len(out.transformer.loras_low_noise) == 1 + + def test_auto_high_tag_goes_to_primary_only(self): + inv = WanLoRALoaderInvocation( + id="inv-1", lora=_make_lora_field(), transformer=_make_transformer_field() + ) + out = inv.invoke(_make_context(lora_expert="high")) + assert out.transformer is not None + assert len(out.transformer.loras) == 1 + assert len(out.transformer.loras_low_noise) == 0 + + def test_auto_low_tag_goes_to_low_only(self): + inv = WanLoRALoaderInvocation( + id="inv-1", lora=_make_lora_field(), transformer=_make_transformer_field() + ) + out = inv.invoke(_make_context(lora_expert="low")) + assert out.transformer is not None + assert len(out.transformer.loras) == 0 + assert len(out.transformer.loras_low_noise) == 1 + + def test_explicit_target_overrides_tag(self): + inv = WanLoRALoaderInvocation( + id="inv-1", + lora=_make_lora_field(), + target="high", + transformer=_make_transformer_field(), + ) + out = inv.invoke(_make_context(lora_expert="low")) + assert out.transformer is not None + assert len(out.transformer.loras) == 1 + assert len(out.transformer.loras_low_noise) == 0 + + def test_weight_propagates(self): + inv = WanLoRALoaderInvocation( + id="inv-1", + lora=_make_lora_field(), + weight=0.42, + transformer=_make_transformer_field(), + ) + out = inv.invoke(_make_context(lora_expert=None)) + assert out.transformer is not None + assert out.transformer.loras[0].weight == pytest.approx(0.42) + + def test_unknown_lora_raises(self): + ctx = _make_context(lora_expert=None) + ctx.models.exists.return_value = False + inv = WanLoRALoaderInvocation( + id="inv-1", lora=_make_lora_field(), transformer=_make_transformer_field() + ) + with pytest.raises(ValueError, match="Unknown lora"): + inv.invoke(ctx) + + def test_duplicate_on_primary_raises(self): + existing = LoRAField(lora=_make_lora_field(key="dup"), weight=0.5) + transformer = _make_transformer_field() + transformer.loras.append(existing) + + inv = WanLoRALoaderInvocation( + id="inv-1", lora=_make_lora_field(key="dup"), transformer=transformer + ) + with pytest.raises(ValueError, match="already applied to primary"): + inv.invoke(_make_context(lora_expert="high")) + + def test_duplicate_on_low_noise_raises(self): + existing = LoRAField(lora=_make_lora_field(key="dup"), weight=0.5) + transformer = _make_transformer_field() + transformer.loras_low_noise.append(existing) + + inv = WanLoRALoaderInvocation( + id="inv-1", lora=_make_lora_field(key="dup"), transformer=transformer + ) + with pytest.raises(ValueError, match="already applied to low-noise"): + inv.invoke(_make_context(lora_expert="low")) + + def test_no_transformer_returns_empty_output(self): + inv = WanLoRALoaderInvocation(id="inv-1", lora=_make_lora_field(), transformer=None) + out = inv.invoke(_make_context(lora_expert=None)) + assert out.transformer is None + + +# -------------------------------------------------------------------------- +# Collection loader — list routing + base validation. +# -------------------------------------------------------------------------- + + +class TestCollectionLoaderRouting: + def test_routes_mixed_tagged_loras(self): + """A collection of three LoRAs (high, low, untagged) routes correctly.""" + high_lora = LoRAField(lora=_make_lora_field(key="lora-high"), weight=0.5) + low_lora = LoRAField(lora=_make_lora_field(key="lora-low"), weight=0.6) + untagged_lora = LoRAField(lora=_make_lora_field(key="lora-any"), weight=0.7) + + inv = WanLoRACollectionLoader( + id="inv-1", + loras=[high_lora, low_lora, untagged_lora], + transformer=_make_transformer_field(), + ) + + # The collection loader queries each LoRA's config separately. Mock + # get_config to return different expert tags by lora key. + expert_by_key = {"lora-high": "high", "lora-low": "low", "lora-any": None} + ctx = MagicMock() + ctx.models.exists.return_value = True + + def get_config(field: ModelIdentifierField) -> MagicMock: + config = MagicMock() + config.expert = expert_by_key[field.key] + return config + + ctx.models.get_config.side_effect = get_config + out = inv.invoke(ctx) + assert out.transformer is not None + + primary_keys = {item.lora.key for item in out.transformer.loras} + low_keys = {item.lora.key for item in out.transformer.loras_low_noise} + # high -> primary only; low -> low only; untagged -> both + assert primary_keys == {"lora-high", "lora-any"} + assert low_keys == {"lora-low", "lora-any"} + + def test_rejects_non_wan_base(self): + wrong_base_lora = LoRAField( + lora=ModelIdentifierField( + key="not-wan", hash="h", name="n", base=BaseModelType.Flux, type=ModelType.LoRA + ), + weight=0.5, + ) + inv = WanLoRACollectionLoader( + id="inv-1", loras=[wrong_base_lora], transformer=_make_transformer_field() + ) + ctx = MagicMock() + ctx.models.exists.return_value = True + with pytest.raises(ValueError, match="not Wan 2.2"): + inv.invoke(ctx) + + def test_skips_duplicates(self): + dup_lora = LoRAField(lora=_make_lora_field(key="dup"), weight=0.5) + inv = WanLoRACollectionLoader( + id="inv-1", + loras=[dup_lora, dup_lora], + transformer=_make_transformer_field(), + ) + ctx = MagicMock() + ctx.models.exists.return_value = True + config = MagicMock() + config.expert = None + ctx.models.get_config.return_value = config + + out = inv.invoke(ctx) + assert out.transformer is not None + assert len(out.transformer.loras) == 1 + + def test_no_loras_returns_clean_copy(self): + inv = WanLoRACollectionLoader( + id="inv-1", loras=None, transformer=_make_transformer_field() + ) + out = inv.invoke(MagicMock()) + assert out.transformer is not None + assert len(out.transformer.loras) == 0 + assert len(out.transformer.loras_low_noise) == 0 diff --git a/tests/backend/model_manager/configs/test_wan_lora_config.py b/tests/backend/model_manager/configs/test_wan_lora_config.py new file mode 100644 index 00000000000..0b95554ca74 --- /dev/null +++ b/tests/backend/model_manager/configs/test_wan_lora_config.py @@ -0,0 +1,344 @@ +"""Tests for the Wan LoRA probe (LoRA_LyCORIS_Wan_Config). + +These tests cover detection across the three formats Wan LoRAs ship in: + +- **Diffusers PEFT**, with or without a ``transformer.`` prefix +- **Native upstream PEFT** with ``diffusion_model.`` prefix (ComfyUI-trained) +- **Kohya** ``lora_unet_blocks_N_`` with both diffusers and native + attention naming + +And the anti-pattern guards that prevent false positives on: + +- Anima (Cosmos DiT — ``cross_attn_q_proj`` / ``mlp`` / ``adaln_modulation``) +- QwenImage (``transformer_blocks.``) +- Flux (``double_blocks`` / ``single_blocks`` / ``single_transformer_blocks``) +- Z-Image (``diffusion_model.layers.``) +""" + +from pathlib import Path +from tempfile import TemporaryDirectory +from unittest.mock import MagicMock + +import pytest +import torch + +from invokeai.backend.model_manager.configs.identification_utils import NotAMatchError +from invokeai.backend.model_manager.configs.lora import LoRA_LyCORIS_Wan_Config +from invokeai.backend.model_manager.taxonomy import BaseModelType, ModelFormat +from invokeai.backend.patches.lora_conversions.wan_lora_constants import ( + has_non_wan_architecture_keys, + has_wan_kohya_keys, + has_wan_peft_keys, +) + + +def _make_mod(path: Path, sd: dict) -> MagicMock: + mod = MagicMock() + mod.path = path + mod.load_state_dict.return_value = sd + return mod + + +def _overrides(model_path: Path, name: str) -> dict: + return { + "hash": "test-hash", + "path": str(model_path), + "file_size": 0, + "name": name, + "source": str(model_path), + "source_type": "path", + } + + +def _t(shape: tuple[int, ...]) -> torch.Tensor: + return torch.zeros(shape) + + +class TestDiffusersPEFTPositives: + def test_attn1_to_q(self): + keys = ["transformer.blocks.0.attn1.to_q.lora_A.weight"] + assert has_wan_peft_keys(keys) is True + assert has_non_wan_architecture_keys(keys) is False + + def test_attn2_to_k(self): + keys = ["blocks.0.attn2.to_k.lora_A.weight"] + assert has_wan_peft_keys(keys) is True + assert has_non_wan_architecture_keys(keys) is False + + def test_ffn_net(self): + keys = ["transformer.blocks.0.ffn.net.0.proj.lora_A.weight"] + assert has_wan_peft_keys(keys) is True + assert has_non_wan_architecture_keys(keys) is False + + def test_base_model_peft_prefix(self): + keys = ["base_model.model.transformer.blocks.0.attn1.to_q.lora_A.weight"] + assert has_wan_peft_keys(keys) is True + assert has_non_wan_architecture_keys(keys) is False + + +class TestNativePEFTPositives: + def test_self_attn_q(self): + keys = ["diffusion_model.blocks.0.self_attn.q.lora_A.weight"] + assert has_wan_peft_keys(keys) is True + assert has_non_wan_architecture_keys(keys) is False + + def test_cross_attn_k(self): + keys = ["diffusion_model.blocks.0.cross_attn.k.lora_A.weight"] + assert has_wan_peft_keys(keys) is True + assert has_non_wan_architecture_keys(keys) is False + + def test_cross_attn_o(self): + keys = ["transformer.blocks.0.cross_attn.o.lora_A.weight"] + assert has_wan_peft_keys(keys) is True + assert has_non_wan_architecture_keys(keys) is False + + def test_ffn_native(self): + keys = ["diffusion_model.blocks.0.ffn.0.lora_A.weight"] + assert has_wan_peft_keys(keys) is True + assert has_non_wan_architecture_keys(keys) is False + + +class TestKohyaPositives: + def test_kohya_diffusers_attn1_to_q(self): + keys = ["lora_unet_blocks_0_attn1_to_q.lora_down.weight"] + assert has_wan_kohya_keys(keys) is True + assert has_non_wan_architecture_keys(keys) is False + + def test_kohya_diffusers_attn2_to_out(self): + keys = ["lora_unet_blocks_0_attn2_to_out_0.lora_down.weight"] + assert has_wan_kohya_keys(keys) is True + assert has_non_wan_architecture_keys(keys) is False + + def test_kohya_native_self_attn_q(self): + keys = ["lora_unet_blocks_0_self_attn_q.lora_down.weight"] + assert has_wan_kohya_keys(keys) is True + assert has_non_wan_architecture_keys(keys) is False + + def test_kohya_native_cross_attn_v(self): + keys = ["lora_unet_blocks_5_cross_attn_v.lora_down.weight"] + assert has_wan_kohya_keys(keys) is True + assert has_non_wan_architecture_keys(keys) is False + + def test_kohya_native_ffn_0(self): + keys = ["lora_unet_blocks_0_ffn_0.lora_down.weight"] + assert has_wan_kohya_keys(keys) is True + assert has_non_wan_architecture_keys(keys) is False + + +class TestArchitectureGuards: + """Anti-pattern checks: non-Wan architectures must be flagged so the + probe rejects them even if a wan-ish substring matches.""" + + @pytest.mark.parametrize( + "label, keys", + [ + ("anima_kohya_q_proj", + ["lora_unet_blocks_0_cross_attn_q_proj.lora_down.weight"]), + ("anima_peft_mlp", + ["transformer.blocks.0.mlp.layer1.lora_A.weight"]), + ("anima_peft_adaln", + ["transformer.blocks.0.adaln_modulation.linear.lora_A.weight"]), + ("anima_peft_self_attn_q_proj", + ["transformer.blocks.0.self_attn.q_proj.lora_A.weight"]), + ("qwen_image", + ["transformer_blocks.0.attn.to_q.lora_A.weight"]), + ("flux_kohya_double", + ["lora_unet_double_blocks_0_img_attn_qkv.lora_down.weight"]), + ("flux_kohya_single", + ["lora_unet_single_blocks_0_linear1.lora_down.weight"]), + ("flux_diffusers_single_transformer", + ["transformer.single_transformer_blocks.0.attn.to_q.lora_A.weight"]), + ("z_image", + ["diffusion_model.layers.0.attn.to_q.lora_A.weight"]), + ], + ) + def test_non_wan_archs_are_flagged(self, label: str, keys: list[str]): + assert has_non_wan_architecture_keys(keys) is True + + +class TestProbeAcceptance: + """End-to-end probe behavior — Wan LoRA must be accepted, non-Wan rejected.""" + + def _wan_diffusers_sd(self) -> dict: + return { + "transformer.blocks.0.attn1.to_q.lora_A.weight": _t((128, 5120)), + "transformer.blocks.0.attn1.to_q.lora_B.weight": _t((5120, 128)), + "transformer.blocks.0.ffn.net.0.proj.lora_A.weight": _t((128, 5120)), + "transformer.blocks.0.ffn.net.0.proj.lora_B.weight": _t((13824, 128)), + } + + def _wan_native_sd(self) -> dict: + return { + "diffusion_model.blocks.0.self_attn.q.lora_A.weight": _t((128, 5120)), + "diffusion_model.blocks.0.self_attn.q.lora_B.weight": _t((5120, 128)), + } + + def _wan_kohya_sd(self) -> dict: + return { + "lora_unet_blocks_0_attn1_to_q.lora_down.weight": _t((128, 5120)), + "lora_unet_blocks_0_attn1_to_q.lora_up.weight": _t((5120, 128)), + } + + def test_accepts_diffusers_wan(self): + with TemporaryDirectory() as tmp: + f = Path(tmp) / "my-wan-lora.safetensors" + f.touch() + cfg = LoRA_LyCORIS_Wan_Config.from_model_on_disk( + _make_mod(f, self._wan_diffusers_sd()), + _overrides(f, "wan-lora"), + ) + assert cfg.base == BaseModelType.Wan + assert cfg.format == ModelFormat.LyCORIS + assert cfg.expert is None + + def test_accepts_native_wan(self): + with TemporaryDirectory() as tmp: + f = Path(tmp) / "wan-style-lora.safetensors" + f.touch() + cfg = LoRA_LyCORIS_Wan_Config.from_model_on_disk( + _make_mod(f, self._wan_native_sd()), + _overrides(f, "wan-native"), + ) + assert cfg.base == BaseModelType.Wan + + def test_accepts_kohya_wan(self): + with TemporaryDirectory() as tmp: + f = Path(tmp) / "wan-kohya.safetensors" + f.touch() + cfg = LoRA_LyCORIS_Wan_Config.from_model_on_disk( + _make_mod(f, self._wan_kohya_sd()), + _overrides(f, "wan-kohya"), + ) + assert cfg.base == BaseModelType.Wan + + def test_filename_marks_high_noise_expert(self): + with TemporaryDirectory() as tmp: + f = Path(tmp) / "stylize-high_noise.safetensors" + f.touch() + cfg = LoRA_LyCORIS_Wan_Config.from_model_on_disk( + _make_mod(f, self._wan_diffusers_sd()), + _overrides(f, "high-noise lora"), + ) + assert cfg.expert == "high" + + def test_filename_marks_low_noise_expert(self): + with TemporaryDirectory() as tmp: + f = Path(tmp) / "fine-detail-LowNoise.safetensors" + f.touch() + cfg = LoRA_LyCORIS_Wan_Config.from_model_on_disk( + _make_mod(f, self._wan_diffusers_sd()), + _overrides(f, "low-noise lora"), + ) + assert cfg.expert == "low" + + def test_explicit_expert_override_wins(self): + with TemporaryDirectory() as tmp: + f = Path(tmp) / "ambiguous-name.safetensors" + f.touch() + overrides = _overrides(f, "override") + overrides["expert"] = "low" + cfg = LoRA_LyCORIS_Wan_Config.from_model_on_disk( + _make_mod(f, self._wan_diffusers_sd()), + overrides, + ) + assert cfg.expert == "low" + + def test_expert_none_for_untagged_filename(self): + with TemporaryDirectory() as tmp: + f = Path(tmp) / "my-lora.safetensors" + f.touch() + cfg = LoRA_LyCORIS_Wan_Config.from_model_on_disk( + _make_mod(f, self._wan_diffusers_sd()), + _overrides(f, "untagged"), + ) + assert cfg.expert is None + + def test_rejects_anima_lora(self): + with TemporaryDirectory() as tmp: + f = Path(tmp) / "anima.safetensors" + f.touch() + sd = { + "transformer.blocks.0.cross_attn.q_proj.lora_A.weight": _t((128, 4096)), + "transformer.blocks.0.mlp.layer1.lora_A.weight": _t((128, 4096)), + } + with pytest.raises(NotAMatchError, match="Wan LoRA"): + LoRA_LyCORIS_Wan_Config.from_model_on_disk(_make_mod(f, sd), _overrides(f, "anima")) + + def test_rejects_qwen_image_lora(self): + with TemporaryDirectory() as tmp: + f = Path(tmp) / "qwen.safetensors" + f.touch() + sd = {"transformer_blocks.0.attn.to_q.lora_A.weight": _t((128, 4096))} + with pytest.raises(NotAMatchError, match="Wan LoRA"): + LoRA_LyCORIS_Wan_Config.from_model_on_disk(_make_mod(f, sd), _overrides(f, "qwen")) + + def test_rejects_flux_lora(self): + with TemporaryDirectory() as tmp: + f = Path(tmp) / "flux.safetensors" + f.touch() + sd = {"lora_unet_double_blocks_0_img_attn_qkv.lora_down.weight": _t((128, 3072))} + with pytest.raises(NotAMatchError, match="Wan LoRA"): + LoRA_LyCORIS_Wan_Config.from_model_on_disk(_make_mod(f, sd), _overrides(f, "flux")) + + +class TestFactoryOrdering: + """Regression: native-PEFT Wan LoRAs share the ``cross_attn``/``self_attn`` + substring with Anima/Cosmos DiT. Anima's probe matches on the bare substring + (it doesn't require Anima's ``_proj`` suffix or ``mlp``/``adaln_modulation``), + so a Wan LoRA would be mis-tagged as Anima unless Wan's probe runs first + in the AnyModelConfig union — or unless Anima's probe gets tightened. + + This test pins the order by importing the union and asserting Wan appears + before Anima in the LyCORIS section. + """ + + def test_wan_appears_before_anima_in_lora_union(self): + from typing import get_args + + from invokeai.backend.model_manager.configs.factory import AnyModelConfig + from invokeai.backend.model_manager.configs.lora import ( + LoRA_LyCORIS_Anima_Config, + LoRA_LyCORIS_Wan_Config, + ) + + # AnyModelConfig is an Annotated[Union[...], Discriminator(...)] — the + # first arg of get_args is the Union itself. + union_type = get_args(AnyModelConfig)[0] + union_members = get_args(union_type) + + def _index_of(cls) -> int: + for i, m in enumerate(union_members): + # Each member is Annotated[ConfigClass, Tag(...)]; first get_args is the class. + if get_args(m)[0] is cls: + return i + raise AssertionError(f"{cls.__name__} not in union") + + wan_idx = _index_of(LoRA_LyCORIS_Wan_Config) + anima_idx = _index_of(LoRA_LyCORIS_Anima_Config) + assert wan_idx < anima_idx, ( + f"LoRA_LyCORIS_Wan_Config must come before LoRA_LyCORIS_Anima_Config in " + f"the AnyModelConfig union (Wan at {wan_idx}, Anima at {anima_idx}). " + "Otherwise Anima's cross_attn/self_attn substring match will steal Wan LoRAs." + ) + + def test_anima_would_have_matched_a_wan_native_lora(self): + """Sanity check: confirm that Anima's probe DOES match a Wan native LoRA + if asked directly. This is why ordering matters — Wan must run first.""" + from invokeai.backend.model_manager.configs.lora import LoRA_LyCORIS_Anima_Config + + with TemporaryDirectory() as tmp: + f = Path(tmp) / "wan_native_lora.safetensors" + f.touch() + # Realistic Wan native PEFT keys: this is what lightx2v's Lightning + # LoRAs and most ComfyUI-trained Wan LoRAs look like. + sd = { + "diffusion_model.blocks.0.self_attn.q.lora_A.weight": _t((128, 5120)), + "diffusion_model.blocks.0.self_attn.q.lora_B.weight": _t((5120, 128)), + "diffusion_model.blocks.0.cross_attn.k.lora_A.weight": _t((128, 5120)), + "diffusion_model.blocks.0.cross_attn.k.lora_B.weight": _t((5120, 128)), + } + # Anima's probe (today) erroneously accepts these. If this assertion + # ever flips, Anima's probe got tightened and the Wan-first ordering + # constraint is no longer required (but it's still safe to keep). + cfg = LoRA_LyCORIS_Anima_Config.from_model_on_disk(_make_mod(f, sd), _overrides(f, "anima-false-positive")) + assert cfg.base == BaseModelType.Anima # NB: a false positive; protected against by ordering diff --git a/tests/backend/patches/lora_conversions/test_wan_lora_conversion_utils.py b/tests/backend/patches/lora_conversions/test_wan_lora_conversion_utils.py new file mode 100644 index 00000000000..21c79cd5c9a --- /dev/null +++ b/tests/backend/patches/lora_conversions/test_wan_lora_conversion_utils.py @@ -0,0 +1,201 @@ +"""Tests for Wan LoRA state-dict conversion to ModelPatchRaw.""" + +import torch + +from invokeai.backend.patches.lora_conversions.wan_lora_constants import WAN_LORA_TRANSFORMER_PREFIX +from invokeai.backend.patches.lora_conversions.wan_lora_conversion_utils import ( + _kohya_layer_to_diffusers_path, + _native_layer_path_to_diffusers, + _strip_peft_prefix, + lora_model_from_wan_state_dict, +) + + +def _ab_pair(in_dim: int, out_dim: int, rank: int = 16) -> dict[str, torch.Tensor]: + """PEFT-style lora_A (in→rank) + lora_B (rank→out) pair.""" + return { + "lora_A.weight": torch.zeros((rank, in_dim)), + "lora_B.weight": torch.zeros((out_dim, rank)), + } + + +def _down_up_pair(in_dim: int, out_dim: int, rank: int = 16) -> dict[str, torch.Tensor]: + """Kohya-style lora_down + lora_up pair.""" + return { + "lora_down.weight": torch.zeros((rank, in_dim)), + "lora_up.weight": torch.zeros((out_dim, rank)), + } + + +class TestKohyaLayerToDiffusersPath: + def test_diffusers_self_attention(self): + assert _kohya_layer_to_diffusers_path("lora_unet_blocks_0_attn1_to_q") == "blocks.0.attn1.to_q" + assert _kohya_layer_to_diffusers_path("lora_unet_blocks_5_attn1_to_out_0") == "blocks.5.attn1.to_out.0" + + def test_diffusers_cross_attention(self): + assert _kohya_layer_to_diffusers_path("lora_unet_blocks_0_attn2_to_k") == "blocks.0.attn2.to_k" + assert _kohya_layer_to_diffusers_path("lora_unet_blocks_0_attn2_to_v") == "blocks.0.attn2.to_v" + + def test_native_self_attention_maps_to_attn1(self): + assert _kohya_layer_to_diffusers_path("lora_unet_blocks_0_self_attn_q") == "blocks.0.attn1.to_q" + assert _kohya_layer_to_diffusers_path("lora_unet_blocks_0_self_attn_o") == "blocks.0.attn1.to_out.0" + + def test_native_cross_attention_maps_to_attn2(self): + assert _kohya_layer_to_diffusers_path("lora_unet_blocks_2_cross_attn_v") == "blocks.2.attn2.to_v" + + def test_ffn_diffusers(self): + assert _kohya_layer_to_diffusers_path("lora_unet_blocks_0_ffn_net_0_proj") == "blocks.0.ffn.net.0.proj" + assert _kohya_layer_to_diffusers_path("lora_unet_blocks_0_ffn_net_2") == "blocks.0.ffn.net.2" + + def test_ffn_native_maps_to_diffusers(self): + assert _kohya_layer_to_diffusers_path("lora_unet_blocks_0_ffn_0") == "blocks.0.ffn.net.0.proj" + assert _kohya_layer_to_diffusers_path("lora_unet_blocks_0_ffn_2") == "blocks.0.ffn.net.2" + + def test_unknown_submodule_returns_none(self): + assert _kohya_layer_to_diffusers_path("lora_unet_blocks_0_unknown_thing") is None + + def test_non_kohya_returns_none(self): + assert _kohya_layer_to_diffusers_path("transformer.blocks.0.attn1.to_q") is None + + +class TestPEFTPathConversion: + def test_strip_transformer_prefix(self): + assert _strip_peft_prefix("transformer.blocks.0.attn1.to_q") == "blocks.0.attn1.to_q" + + def test_strip_diffusion_model_prefix(self): + assert _strip_peft_prefix("diffusion_model.blocks.0.self_attn.q") == "blocks.0.self_attn.q" + + def test_strip_base_model_prefix(self): + assert _strip_peft_prefix( + "base_model.model.transformer.blocks.0.attn1.to_q" + ) == "blocks.0.attn1.to_q" + + def test_no_prefix_unchanged(self): + assert _strip_peft_prefix("blocks.0.attn1.to_q") == "blocks.0.attn1.to_q" + + def test_diffusers_path_passes_through(self): + assert _native_layer_path_to_diffusers("blocks.0.attn1.to_q") == "blocks.0.attn1.to_q" + assert _native_layer_path_to_diffusers("blocks.0.ffn.net.0.proj") == "blocks.0.ffn.net.0.proj" + + def test_native_self_attn_becomes_attn1(self): + assert _native_layer_path_to_diffusers("blocks.0.self_attn.q") == "blocks.0.attn1.to_q" + assert _native_layer_path_to_diffusers("blocks.0.self_attn.k") == "blocks.0.attn1.to_k" + assert _native_layer_path_to_diffusers("blocks.0.self_attn.v") == "blocks.0.attn1.to_v" + assert _native_layer_path_to_diffusers("blocks.0.self_attn.o") == "blocks.0.attn1.to_out.0" + + def test_native_cross_attn_becomes_attn2(self): + assert _native_layer_path_to_diffusers("blocks.7.cross_attn.q") == "blocks.7.attn2.to_q" + assert _native_layer_path_to_diffusers("blocks.7.cross_attn.o") == "blocks.7.attn2.to_out.0" + + def test_native_ffn_becomes_diffusers_ffn(self): + assert _native_layer_path_to_diffusers("blocks.0.ffn.0") == "blocks.0.ffn.net.0.proj" + assert _native_layer_path_to_diffusers("blocks.0.ffn.2") == "blocks.0.ffn.net.2" + + def test_non_block_path_rejected(self): + assert _native_layer_path_to_diffusers("patch_embedding.weight") is None + + +class TestLoRAModelFromStateDict: + """End-to-end conversion: state dict -> ModelPatchRaw.""" + + def test_diffusers_peft_with_transformer_prefix(self): + sd = { + f"transformer.blocks.0.attn1.to_q.{k}": v + for k, v in _ab_pair(5120, 5120).items() + } + patch = lora_model_from_wan_state_dict(sd) + expected_key = f"{WAN_LORA_TRANSFORMER_PREFIX}blocks.0.attn1.to_q" + assert expected_key in patch.layers + + def test_diffusers_peft_bare(self): + sd = {f"blocks.5.attn2.to_k.{k}": v for k, v in _ab_pair(5120, 5120).items()} + patch = lora_model_from_wan_state_dict(sd) + assert f"{WAN_LORA_TRANSFORMER_PREFIX}blocks.5.attn2.to_k" in patch.layers + + def test_native_peft_diffusion_model_prefix(self): + sd = { + f"diffusion_model.blocks.0.self_attn.q.{k}": v + for k, v in _ab_pair(5120, 5120).items() + } + patch = lora_model_from_wan_state_dict(sd) + # native self_attn.q must be rewritten to attn1.to_q + assert f"{WAN_LORA_TRANSFORMER_PREFIX}blocks.0.attn1.to_q" in patch.layers + + def test_native_peft_cross_attn_to_attn2(self): + sd = { + f"diffusion_model.blocks.3.cross_attn.o.{k}": v + for k, v in _ab_pair(5120, 5120).items() + } + patch = lora_model_from_wan_state_dict(sd) + assert f"{WAN_LORA_TRANSFORMER_PREFIX}blocks.3.attn2.to_out.0" in patch.layers + + def test_native_peft_ffn_to_diffusers(self): + sd = { + f"diffusion_model.blocks.0.ffn.0.{k}": v + for k, v in _ab_pair(5120, 13824).items() + } + patch = lora_model_from_wan_state_dict(sd) + assert f"{WAN_LORA_TRANSFORMER_PREFIX}blocks.0.ffn.net.0.proj" in patch.layers + + def test_kohya_diffusers_naming(self): + sd = { + f"lora_unet_blocks_0_attn1_to_q.{k}": v + for k, v in _down_up_pair(5120, 5120).items() + } + patch = lora_model_from_wan_state_dict(sd) + assert f"{WAN_LORA_TRANSFORMER_PREFIX}blocks.0.attn1.to_q" in patch.layers + + def test_kohya_native_naming(self): + sd = { + f"lora_unet_blocks_0_self_attn_q.{k}": v + for k, v in _down_up_pair(5120, 5120).items() + } + patch = lora_model_from_wan_state_dict(sd) + assert f"{WAN_LORA_TRANSFORMER_PREFIX}blocks.0.attn1.to_q" in patch.layers + + def test_kohya_ffn_native_naming(self): + sd = { + f"lora_unet_blocks_0_ffn_0.{k}": v + for k, v in _down_up_pair(5120, 13824).items() + } + patch = lora_model_from_wan_state_dict(sd) + assert f"{WAN_LORA_TRANSFORMER_PREFIX}blocks.0.ffn.net.0.proj" in patch.layers + + def test_multiple_layers(self): + """Cover a realistic mix of attn + ffn keys across multiple blocks.""" + sd = {} + for block in range(3): + for k, v in _ab_pair(5120, 5120).items(): + sd[f"transformer.blocks.{block}.attn1.to_q.{k}"] = v + sd[f"transformer.blocks.{block}.attn2.to_v.{k}"] = v + for k, v in _ab_pair(5120, 13824).items(): + sd[f"transformer.blocks.{block}.ffn.net.0.proj.{k}"] = v + + patch = lora_model_from_wan_state_dict(sd) + expected_paths = [] + for block in range(3): + expected_paths.append(f"blocks.{block}.attn1.to_q") + expected_paths.append(f"blocks.{block}.attn2.to_v") + expected_paths.append(f"blocks.{block}.ffn.net.0.proj") + for path in expected_paths: + assert f"{WAN_LORA_TRANSFORMER_PREFIX}{path}" in patch.layers + + def test_alpha_override_propagates(self): + sd = {f"blocks.0.attn1.to_q.{k}": v for k, v in _ab_pair(5120, 5120).items()} + patch = lora_model_from_wan_state_dict(sd, alpha=8.0) + layer = patch.layers[f"{WAN_LORA_TRANSFORMER_PREFIX}blocks.0.attn1.to_q"] + # any_lora_layer_from_state_dict picks LoRALayer / LoKR / etc. — the + # layer object should at minimum have processed the alpha into its state. + assert layer is not None + + def test_unknown_kohya_submodule_is_skipped_silently(self): + sd = { + f"lora_unet_blocks_0_unknown_thing.{k}": v + for k, v in _down_up_pair(5120, 5120).items() + } + patch = lora_model_from_wan_state_dict(sd) + assert len(patch.layers) == 0 + + def test_empty_state_dict(self): + patch = lora_model_from_wan_state_dict({}) + assert len(patch.layers) == 0 From 22799af39f09182f7fadc1c2e15c0a9f1c676c7e Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Sun, 10 May 2026 22:47:34 -0400 Subject: [PATCH 05/12] feat(model): Wan 2.2 Phase 7 - reference-image (I2V) conditioning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Re-implementation after the first attempt — which used CLIP-vision conditioning — was reverted. Wan 2.2 I2V-A14B does NOT use a CLIP-vision encoder (the Diffusers repo ships ``image_encoder: [null, null]`` in ``model_index.json``); instead it conditions on a reference image by VAE-encoding it and concatenating the resulting latents (plus a first-frame mask) to the noise latents along the channel dim. The I2V transformer therefore has ``in_channels=36`` (16 noise + 16 ref-image latents + 4 mask) vs ``in_channels=16`` for T2V. Taxonomy: - Re-adds ``WanVariantType.I2V_A14B``. Probes: - Diffusers: ``_detect_wan_variant`` reads ``transformer/config.json::in_channels``; 36 → I2V_A14B, 16 → T2V_A14B (both share the dual-expert layout). - GGUF: ``_detect_wan_gguf_variant`` recognises ``in_channels=36`` from the patch_embedding tensor shape and emits I2V_A14B. Backend extension (``backend/wan/extensions/wan_ref_image_extension.py``): - ``preprocess_reference_image`` resizes + normalises to a 5D pixel tensor. - ``encode_reference_image_to_condition`` VAE-encodes the image and stacks a 4-channel first-frame mask on top, producing the ``[1, 20, 1, H/8, W/8]`` condition tensor the denoise loop consumes. - Mirrors diffusers ``WanImageToVideoPipeline.prepare_latents`` with ``num_frames=1`` and ``expand_timesteps=False``. Invocation node (``wan_ref_image_encoder.py``): - "Reference Image - Wan 2.2": image + VAE + width/height pickers. - Output ``WanRefImageConditioningField`` carries the condition tensor name plus the dimensions used (so the denoise step can validate dim parity). Denoise integration: - ``WanDenoiseInvocation`` gains an optional ``ref_image`` field. - Variant gate: rejects ref_image on T2V_A14B and TI2V-5B with a clear error before doing any work. - Dimension gate: rejects ref-image width/height mismatch vs denoise. - At every transformer call, concatenates the 20-channel condition tensor to the 16-channel noise latents along the channel dim before passing to the transformer (giving the 36-channel input I2V expects). Tests: 14 new across the probe, the extension, and the denoise loop. The synthetic ``_ZeroTransformer`` test stand-in now mirrors the real I2V transformer's ``in_channels=36, out_channels=16`` asymmetry by slicing its zero output back to 16 channels when the input is 36-wide. Co-Authored-By: Claude Opus 4.7 (1M context) fix(wan): derive GGUF out_channels from proj_out shape (I2V support) The GGUF loader was setting ``out_channels = in_channels`` which is wrong for Wan 2.2 I2V-A14B: that variant has ``in_channels=36`` (16 noise + 16 ref-image latents + 4 first-frame mask, concatenated by the denoise loop) but ``out_channels=16`` since the transformer only predicts the noise component back. Loading an I2V GGUF would build a transformer with the wrong proj_out shape and crash: RuntimeError: Error(s) in loading state_dict for WanTransformer3DModel: size mismatch for proj_out.weight: copying a param with shape torch.Size([64, 5120]) from checkpoint, the shape in current model is torch.Size([144, 5120]). (144 = 36 * 4, 64 = 16 * 4 — patch_size=(1, 2, 2) → prod=4) Read out_channels directly from the ``proj_out.weight`` shape in the state dict. This is correct for all three Wan 2.2 variants without needing to know the variant in advance. Also tighten the num_layers fallback: T2V_A14B and I2V_A14B share 40 layers; only TI2V-5B has 30. The fallback is rarely hit in practice (the per-block count comes from the state dict scan), but the previous code would have defaulted I2V_A14B to 30 layers. Co-Authored-By: Claude Opus 4.7 (1M context) fix(model): make Anima LoRA probe mutually exclusive with Wan InvokeAI's ``Config_Base.CONFIG_CLASSES`` is a Python ``set``, so iteration order during model probing is non-deterministic across process restarts. First-match-wins ordering in ``AnyModelConfig`` is documentation only — it has no effect on which config is iterated first. Anima's previous probe accepted any state dict containing the substring ``cross_attn`` or ``self_attn``, which collides with Wan's native LoRA key layout (``diffusion_model.blocks.X.cross_attn.q.lora_down.weight``). Both probes accepted Wan native LoRAs (including lightx2v's Lightning T2V and I2V distillations), and the ``matches.sort_key`` tiebreaker only disambiguates by ModelType, not within LoRA configs. So which config "won" depended on dict hash order — sometimes Wan, sometimes Anima. The previous mitigation reordered the AnyModelConfig union to put Wan before Anima. That worked by luck and was inherently fragile. Tighten Anima's probe to require Cosmos-DiT-exclusive subcomponents: ``mlp``, ``adaln_modulation``, or ``_proj``-suffixed attention names (``q_proj``/``k_proj``/``v_proj``/``output_proj``) — none of which appear in any Wan LoRA. Wan native uses bare ``.q``/``.k``/``.v``/``.o`` on ``self_attn``/``cross_attn``, and ``ffn.N``/``ffn.net.N`` instead of ``mlp``. The new strict detectors live alongside the original loose ones so the Anima conversion utility (which runs after probing) still works. Regression tests in ``test_wan_lora_probe_independence.py`` cover: - I2V Lightning V1 (the bug-triggering LoRA), T2V Lightning V2, Wan Kohya and Wan diffusers PEFT layouts — Wan probe accepts, Anima probe rejects. - Anima PEFT and Kohya layouts — Anima accepts, Wan rejects. - A meta-test that runs every LoRA config in CONFIG_CLASSES against the Lightning state dicts and asserts exactly one accepts — this catches ANY future probe collision, not just Wan vs Anima. Co-Authored-By: Claude Opus 4.7 (1M context) fix(wan): defer expert model loading in _ExpertSwapper to avoid cache thrash The swapper used to take pre-loaded ``LoadedModel`` handles at construction: high_info = context.models.load(self.transformer.transformer) low_info = context.models.load(self.transformer.transformer_low_noise) swapper = _ExpertSwapper(high_info=high_info, low_info=low_info, ...) With dual ~9 GB A14B GGUF experts plus the ~10 GB UMT5-XXL encoder competing for the same RAM cache, the LRU policy frequently dropped one expert by the time the denoise loop swapped into it. The model manager then emitted [MODEL CACHE] Locking model cache entry ... but it has already been dropped from the RAM cache. This is a sign that the model loading order is non-optimal in the invocation code (See ... #7513). and reloaded the weights from disk (~1.2s extra per swap). Refactor the swapper to take the ``ModelIdentifierField`` plus the ``InvocationContext`` and call ``context.models.load(model_id)`` lazily inside ``get()``. Each swap obtains a fresh handle, the LRU window is small, and the warning goes away. Config metadata (used to compute ``is_quantized``) is read upfront via ``context.models.get_config()`` — that's metadata, not weights, so it doesn't put pressure on the cache. Tests: existing swapper lifecycle tests refactored to use a fake context whose ``models.load`` is logged. A new ``test_lazy_load_per_swap_not_upfront`` pins the regression — it asserts ``models.load`` is NOT called at swapper construction, only at first get() per expert. Co-Authored-By: Claude Opus 4.7 (1M context) --- invokeai/app/invocations/fields.py | 18 + invokeai/app/invocations/primitives.py | 19 + invokeai/app/invocations/wan_denoise.py | 104 ++++- .../app/invocations/wan_ref_image_encoder.py | 89 ++++ .../backend/model_manager/configs/factory.py | 13 +- .../backend/model_manager/configs/lora.py | 29 +- .../backend/model_manager/configs/main.py | 49 ++- .../model_manager/load/model_loaders/wan.py | 21 +- invokeai/backend/model_manager/taxonomy.py | 16 +- .../lora_conversions/anima_lora_constants.py | 55 ++- invokeai/backend/wan/extensions/__init__.py | 0 .../wan/extensions/wan_ref_image_extension.py | 97 +++++ .../frontend/web/src/services/api/schema.ts | 386 ++++++++++++++++-- tests/app/invocations/test_wan_denoise.py | 138 ++++++- .../invocations/test_wan_expert_swapper.py | 157 +++++-- .../configs/test_wan_gguf_config.py | 18 + .../configs/test_wan_lora_config.py | 91 ++--- .../test_wan_lora_probe_independence.py | 277 +++++++++++++ .../configs/test_wan_main_config.py | 42 ++ .../wan/test_wan_ref_image_extension.py | 112 +++++ 20 files changed, 1565 insertions(+), 166 deletions(-) create mode 100644 invokeai/app/invocations/wan_ref_image_encoder.py create mode 100644 invokeai/backend/wan/extensions/__init__.py create mode 100644 invokeai/backend/wan/extensions/wan_ref_image_extension.py create mode 100644 tests/backend/model_manager/configs/test_wan_lora_probe_independence.py create mode 100644 tests/backend/wan/test_wan_ref_image_extension.py diff --git a/invokeai/app/invocations/fields.py b/invokeai/app/invocations/fields.py index f0ec3c9b5b4..62a327eacec 100644 --- a/invokeai/app/invocations/fields.py +++ b/invokeai/app/invocations/fields.py @@ -175,6 +175,7 @@ class FieldDescriptions: qwen_vl_encoder = "Qwen2.5-VL tokenizer, processor and text/vision encoder" wan_model = "Wan 2.2 model (Transformer) to load" wan_t5_encoder = "UMT5-XXL tokenizer and text encoder for Wan 2.2" + wan_ref_image = "Reference-image (VAE-latent) conditioning for Wan 2.2 I2V." sdxl_main_model = "SDXL Main model (UNet, VAE, CLIP1, CLIP2) to load" sdxl_refiner_model = "SDXL Refiner Main Modde (UNet, VAE, CLIP2) to load" onnx_main_model = "ONNX Main model (UNet, VAE, CLIP) to load" @@ -376,6 +377,23 @@ class WanConditioningField(BaseModel): conditioning_name: str = Field(description="The name of conditioning tensor") +class WanRefImageConditioningField(BaseModel): + """Reference-image conditioning for Wan 2.2 I2V. + + Carries the 20-channel VAE-latent condition tensor (4-channel first-frame + mask + 16-channel ref-image latents). The denoise loop concatenates this + to the 16-channel noise latents along the channel dim each step, producing + the 36-channel input the I2V-A14B transformer expects. + + Also carries the spatial dims used to encode the image, so the denoise + node can sanity-check that the user's width/height match. + """ + + condition_tensor_name: str = Field(description="Name of the saved [1, 20, 1, H/8, W/8] condition tensor.") + width: int = Field(description="Image width used during VAE encoding (matches denoise width).") + height: int = Field(description="Image height used during VAE encoding (matches denoise height).") + + class ConditioningField(BaseModel): """A conditioning tensor primitive value""" diff --git a/invokeai/app/invocations/primitives.py b/invokeai/app/invocations/primitives.py index 417790e6ea6..f51ece6602e 100644 --- a/invokeai/app/invocations/primitives.py +++ b/invokeai/app/invocations/primitives.py @@ -30,6 +30,7 @@ TensorField, UIComponent, WanConditioningField, + WanRefImageConditioningField, ZImageConditioningField, ) from invokeai.app.services.images.images_common import ImageDTO @@ -509,6 +510,24 @@ def build(cls, conditioning_name: str) -> "WanConditioningOutput": return cls(conditioning=WanConditioningField(conditioning_name=conditioning_name)) +@invocation_output("wan_ref_image_output") +class WanRefImageOutput(BaseInvocationOutput): + """Output of a Wan 2.2 reference-image VAE-encoder.""" + + ref_image: WanRefImageConditioningField = OutputField( + description="VAE-latent reference-image conditioning for Wan 2.2 I2V.", + title="Reference Image", + ) + + @classmethod + def build(cls, condition_tensor_name: str, width: int, height: int) -> "WanRefImageOutput": + return cls( + ref_image=WanRefImageConditioningField( + condition_tensor_name=condition_tensor_name, width=width, height=height + ) + ) + + @invocation_output("conditioning_output") class ConditioningOutput(BaseInvocationOutput): """Base class for nodes that output a single conditioning tensor""" diff --git a/invokeai/app/invocations/wan_denoise.py b/invokeai/app/invocations/wan_denoise.py index 0a95ae08a17..7b9a02ee835 100644 --- a/invokeai/app/invocations/wan_denoise.py +++ b/invokeai/app/invocations/wan_denoise.py @@ -39,6 +39,7 @@ InputField, LatentsField, WanConditioningField, + WanRefImageConditioningField, ) from invokeai.app.invocations.model import LoRAField, WanTransformerField from invokeai.app.invocations.primitives import LatentsOutput @@ -92,10 +93,18 @@ class _ExpertSwapper: LoRA patches via ``LayerPatcher.apply_smart_model_patches``. Ordering on swap: exit the active expert's LoRA context (restores weights) - -> exit ``model_on_device`` (returns expert to RAM) -> enter the new - expert's device context -> apply the new expert's LoRAs. This mirrors the - pattern used by ``flux_denoise``/``anima_denoise`` but adds the extra - context layer needed for dual experts. + -> exit ``model_on_device`` (returns expert to RAM) -> load the new expert + (fresh handle) -> enter its device context -> apply its LoRAs. This + mirrors the pattern used by ``flux_denoise``/``anima_denoise`` but adds + the extra context layer needed for dual experts. + + Model handles are obtained lazily inside ``get()`` rather than cached at + construction. With dual ~9 GB GGUF experts plus a UMT5-XXL encoder + competing for the RAM cache, holding both ``LoadedModel`` handles upfront + can leave one of them stale by the time the swap happens — InvokeAI's + model cache emits a ``has already been dropped from the RAM cache`` + warning and reloads from disk per swap. See issue #7513 for the broader + pattern. """ HIGH = "high" @@ -103,16 +112,18 @@ class _ExpertSwapper: def __init__( self, - high_info: Any, - low_info: Any | None, + context: InvocationContext, + high_model: Any, + low_model: Any | None, inference_dtype: torch.dtype, high_lora_factory: LoRAIteratorFactory | None = None, low_lora_factory: LoRAIteratorFactory | None = None, high_is_quantized: bool = False, low_is_quantized: bool = False, ) -> None: - self._high_info = high_info - self._low_info = low_info + self._context = context + self._high_model = high_model + self._low_model = low_model self._inference_dtype = inference_dtype self._high_lora_factory = high_lora_factory self._low_lora_factory = low_lora_factory @@ -126,7 +137,7 @@ def __init__( def get(self, label: str) -> Any: if label not in (self.HIGH, self.LOW): raise ValueError(f"Unknown expert label: {label!r}") - if label == self.LOW and self._low_info is None: + if label == self.LOW and self._low_model is None: raise ValueError("Low-noise expert was requested but is not available.") if label == self._active_label: assert self._active_model is not None @@ -135,7 +146,10 @@ def get(self, label: str) -> Any: # Release current GPU residency before bringing the other expert on device. self._release() - info = self._high_info if label == self.HIGH else self._low_info + # Load the requested expert lazily so its ``LoadedModel`` handle is + # always fresh — see class docstring for the cache-eviction reasoning. + model_id = self._high_model if label == self.HIGH else self._low_model + info = self._context.models.load(model_id) device_ctx = info.model_on_device() cached_weights, model = device_ctx.__enter__() @@ -213,6 +227,13 @@ class WanDenoiseInvocation(BaseInvocation): default=None, description=FieldDescriptions.negative_cond, input=Input.Connection ) + ref_image: Optional[WanRefImageConditioningField] = InputField( + default=None, + description=FieldDescriptions.wan_ref_image, + input=Input.Connection, + title="Reference Image", + ) + latents: Optional[LatentsField] = InputField( default=None, description=FieldDescriptions.latents, @@ -280,6 +301,28 @@ def _run_diffusion(self, context: InvocationContext) -> torch.Tensor: context, self.negative_conditioning, device=device, dtype=inference_dtype ) + # Reference-image conditioning (Wan 2.2 I2V-A14B only). The condition + # tensor is 20 channels (4 mask + 16 VAE-encoded image latents); it + # gets concatenated to the 16-channel noise latents each step, + # yielding the 36-channel input the I2V transformer expects. + ref_condition: torch.Tensor | None = None + if self.ref_image is not None: + if variant != WanVariantType.I2V_A14B: + raise ValueError( + f"Reference-image conditioning is only supported by the Wan 2.2 I2V variant. " + f"The selected transformer is {variant.value!r}. Remove the Reference Image input " + "or load an I2V model." + ) + if self.ref_image.width != self.width or self.ref_image.height != self.height: + raise ValueError( + f"Reference-image dimensions ({self.ref_image.width}x{self.ref_image.height}) must " + f"match denoise dimensions ({self.width}x{self.height})." + ) + ref_condition = ( + context.tensors.load(self.ref_image.condition_tensor_name) + .to(device=device, dtype=inference_dtype) + ) + # Schedule timesteps. set_timesteps populates scheduler.timesteps and # scheduler.sigmas (where sigmas is in [0, 1] flow-matching space). scheduler.set_timesteps(num_inference_steps=self.steps, device=device) @@ -361,18 +404,27 @@ def _run_diffusion(self, context: InvocationContext) -> torch.Tensor: step_callback = self._build_step_callback(context) # Resolve experts and the boundary timestep that triggers the MoE swap. - high_info = context.models.load(self.transformer.transformer) - low_info = ( - context.models.load(self.transformer.transformer_low_noise) - if self.transformer.transformer_low_noise is not None - else None - ) + # + # We deliberately do NOT call ``context.models.load(...)`` for the + # transformer experts here — that would put both ~9 GB GGUF handles + # in the model cache concurrently. With UMT5-XXL (~10 GB) competing + # for the same cache, the LRU policy can drop one of them by the + # time the denoise loop swaps in, producing the + # "has already been dropped from the RAM cache" warning and forcing + # a disk reload per swap. The swapper calls ``models.load`` lazily + # inside each ``get()`` instead, so handles are always fresh. + # + # The config metadata (variant / format) is fine to read upfront — + # ``get_config`` doesn't touch the weights cache. + high_model = self.transformer.transformer + low_model = self.transformer.transformer_low_noise + low_config = context.models.get_config(low_model) if low_model is not None else None # FlowMatchEulerDiscreteScheduler stores num_train_timesteps in its config # (default 1000). Diffusers' WanPipeline computes: # boundary_timestep = boundary_ratio * num_train_timesteps num_train_timesteps = int(scheduler.config.num_train_timesteps) boundary_timestep = ( - self.transformer.boundary_ratio * num_train_timesteps if low_info is not None else None + self.transformer.boundary_ratio * num_train_timesteps if low_model is not None else None ) # LoRA wiring. The high-noise expert uses ``transformer.loras``; the @@ -382,9 +434,10 @@ def _run_diffusion(self, context: InvocationContext) -> torch.Tensor: # aren't touched directly. high_loras = self.transformer.loras low_loras = self.transformer.loras_low_noise or self.transformer.loras - high_is_quantized = high_info.config.format == ModelFormat.GGUFQuantized + high_config = context.models.get_config(high_model) + high_is_quantized = high_config.format == ModelFormat.GGUFQuantized low_is_quantized = ( - low_info.config.format == ModelFormat.GGUFQuantized if low_info is not None else False + low_config.format == ModelFormat.GGUFQuantized if low_config is not None else False ) def high_lora_factory() -> Iterable[Tuple[ModelPatchRaw, float]]: @@ -395,8 +448,9 @@ def low_lora_factory() -> Iterable[Tuple[ModelPatchRaw, float]]: with ExitStack() as exit_stack: swapper = _ExpertSwapper( - high_info=high_info, - low_info=low_info, + context=context, + high_model=high_model, + low_model=low_model, inference_dtype=inference_dtype, high_lora_factory=high_lora_factory if high_loras else None, low_lora_factory=low_lora_factory if low_loras else None, @@ -410,7 +464,7 @@ def low_lora_factory() -> Iterable[Tuple[ModelPatchRaw, float]]: # Pick the active expert: high-noise for t >= boundary_timestep, # low-noise below. Single-transformer models always use HIGH. - if low_info is not None and float(t) < float(boundary_timestep): + if low_model is not None and float(t) < float(boundary_timestep): active_label = _ExpertSwapper.LOW # Treat None or values below 1.0 (incl. the FE's default 0) # as "use the primary guidance_scale". @@ -426,6 +480,12 @@ def low_lora_factory() -> Iterable[Tuple[ModelPatchRaw, float]]: # pass; keep the scheduler-level latents in fp32. latent_model_input = latents.to(dtype=inference_dtype) + # For I2V, concatenate the ref-image condition (4-ch mask + 16-ch + # image latents) along the channel dim, producing the 36-channel + # input the I2V transformer's patch_embedding expects. + if ref_condition is not None: + latent_model_input = torch.cat([latent_model_input, ref_condition], dim=1) + noise_pred_cond = transformer( hidden_states=latent_model_input, timestep=timestep, diff --git a/invokeai/app/invocations/wan_ref_image_encoder.py b/invokeai/app/invocations/wan_ref_image_encoder.py new file mode 100644 index 00000000000..858bf25514c --- /dev/null +++ b/invokeai/app/invocations/wan_ref_image_encoder.py @@ -0,0 +1,89 @@ +"""Reference-image (VAE-latent) encoder for Wan 2.2 I2V-A14B. + +Wan 2.2 I2V conditions on a reference image by VAE-encoding it and +concatenating the resulting latents to the noise latents along the channel +dim. This invocation produces the 20-channel condition tensor (4-ch first- +frame mask + 16-ch image latents) the denoise loop will consume. +""" + +import torch +from diffusers.models.autoencoders import AutoencoderKLWan + +from invokeai.app.invocations.baseinvocation import BaseInvocation, Classification, invocation +from invokeai.app.invocations.fields import ( + FieldDescriptions, + ImageField, + Input, + InputField, +) +from invokeai.app.invocations.model import VAEField +from invokeai.app.invocations.primitives import WanRefImageOutput +from invokeai.app.services.shared.invocation_context import InvocationContext +from invokeai.backend.util.devices import TorchDevice +from invokeai.backend.wan.extensions.wan_ref_image_extension import ( + encode_reference_image_to_condition, +) + + +@invocation( + "wan_ref_image_encoder", + title="Reference Image - Wan 2.2", + tags=["image", "conditioning", "wan", "i2v"], + category="conditioning", + version="1.0.0", + classification=Classification.Prototype, +) +class WanRefImageEncoderInvocation(BaseInvocation): + """VAE-encode a reference image into Wan 2.2 I2V conditioning. + + Output is a ``[1, 20, 1, height // 8, width // 8]`` condition tensor that + the denoise loop concatenates to the 16-channel noise latents each step, + producing the 36-channel input the I2V-A14B transformer expects. + + Only works with I2V-A14B (the denoise loop's variant gate enforces this). + For T2V or TI2V-5B, omit this node entirely. + """ + + image: ImageField = InputField(description="Reference image to condition on.") + vae: VAEField = InputField( + description=FieldDescriptions.vae, input=Input.Connection, title="VAE" + ) + width: int = InputField( + default=1024, + multiple_of=8, + description="Width to resize the reference image to (must match denoise width).", + ) + height: int = InputField( + default=1024, + multiple_of=8, + description="Height to resize the reference image to (must match denoise height).", + ) + + @torch.no_grad() + def invoke(self, context: InvocationContext) -> WanRefImageOutput: + pil_image = context.images.get_pil(self.image.image_name, "RGB") + + vae_info = context.models.load(self.vae.vae) + device = TorchDevice.choose_torch_device() + target_dtype = TorchDevice.choose_bfloat16_safe_dtype(device) + + with vae_info.model_on_device() as (_, vae): + if not isinstance(vae, AutoencoderKLWan): + raise TypeError( + f"Reference-image encoder requires AutoencoderKLWan, got {type(vae).__name__}." + ) + context.util.signal_progress("VAE-encoding reference image") + condition = encode_reference_image_to_condition( + image=pil_image, + vae=vae, + width=self.width, + height=self.height, + device=device, + dtype=target_dtype, + ) + + condition = condition.detach().to("cpu") + name = context.tensors.save(tensor=condition) + return WanRefImageOutput.build( + condition_tensor_name=name, width=self.width, height=self.height + ) diff --git a/invokeai/backend/model_manager/configs/factory.py b/invokeai/backend/model_manager/configs/factory.py index 03dd31b5d1f..aae06328782 100644 --- a/invokeai/backend/model_manager/configs/factory.py +++ b/invokeai/backend/model_manager/configs/factory.py @@ -240,13 +240,12 @@ Annotated[LoRA_LyCORIS_FLUX_Config, LoRA_LyCORIS_FLUX_Config.get_tag()], Annotated[LoRA_LyCORIS_ZImage_Config, LoRA_LyCORIS_ZImage_Config.get_tag()], Annotated[LoRA_LyCORIS_QwenImage_Config, LoRA_LyCORIS_QwenImage_Config.get_tag()], - # Wan must come BEFORE Anima: Anima's probe only checks for the bare - # ``cross_attn``/``self_attn`` substring (it doesn't require Cosmos - # DiT's ``_proj`` suffix or ``mlp``/``adaln_modulation``), so a Wan - # native PEFT LoRA (``diffusion_model.blocks.X.cross_attn.k...``) - # would otherwise match Anima first. Wan's probe is strictly more - # restrictive — it rejects Anima's ``_proj`` suffix via the - # anti-pattern — so trying Wan first is safe for both directions. + # Wan and Anima both target ``blocks.X`` shapes; their LoRA probes are + # mutually exclusive — Wan rejects Anima's ``_proj``/``mlp``/ + # ``adaln_modulation`` markers, Anima requires at least one of those + # markers (see ``has_cosmos_dit_*_keys_strict``). Order between these + # two doesn't affect correctness; mutual exclusivity is locked in by + # ``test_wan_lora_probe_independence.py``. Annotated[LoRA_LyCORIS_Wan_Config, LoRA_LyCORIS_Wan_Config.get_tag()], Annotated[LoRA_LyCORIS_Anima_Config, LoRA_LyCORIS_Anima_Config.get_tag()], # LoRA - OMI format diff --git a/invokeai/backend/model_manager/configs/lora.py b/invokeai/backend/model_manager/configs/lora.py index a56139c602a..ed3272fee96 100644 --- a/invokeai/backend/model_manager/configs/lora.py +++ b/invokeai/backend/model_manager/configs/lora.py @@ -33,7 +33,9 @@ from invokeai.backend.model_manager.util.model_util import lora_token_vector_length from invokeai.backend.patches.lora_conversions.anima_lora_constants import ( has_cosmos_dit_kohya_keys, + has_cosmos_dit_kohya_keys_strict, has_cosmos_dit_peft_keys, + has_cosmos_dit_peft_keys_strict, ) from invokeai.backend.patches.lora_conversions.flux_control_lora_utils import is_state_dict_likely_flux_control from invokeai.backend.patches.lora_conversions.wan_lora_constants import ( @@ -890,16 +892,23 @@ def _validate_looks_like_lora(cls, mod: ModelOnDisk) -> None: Anima LoRAs have keys like: - lora_unet_blocks_0_cross_attn_k_proj.lora_down.weight (Kohya format) - diffusion_model.blocks.0.cross_attn.k_proj.lora_A.weight (diffusers PEFT format) - - transformer.blocks.0.cross_attn.k_proj.lora_A.weight (diffusers PEFT format) - - Detection requires Cosmos DiT-specific subcomponent names (cross_attn, - self_attn, mlp, adaln_modulation) to avoid false-positives on other - architectures that also use ``blocks`` in their paths. + - transformer.blocks.0.mlp.layer_0.lora_A.weight (Anima-only MLP layer) + + Uses the **strict** Cosmos-DiT detectors, which require an + Anima-exclusive subcomponent name (``mlp``, ``adaln_modulation``, or + ``_proj``-suffixed attention). The loose detectors would also accept + Wan-native LoRAs (which use ``cross_attn``/``self_attn`` too but with + bare ``.q``/``.k``/``.v``/``.o`` rather than ``_proj``), so they're not + safe for first-match-wins probing — see the regression tests in + ``test_wan_lora_probe_independence.py``. """ state_dict = mod.load_state_dict() str_keys = [k for k in state_dict.keys() if isinstance(k, str)] - has_cosmos_keys = has_cosmos_dit_kohya_keys(str_keys) or has_cosmos_dit_peft_keys(str_keys) + has_cosmos_keys = ( + has_cosmos_dit_kohya_keys_strict(str_keys) + or has_cosmos_dit_peft_keys_strict(str_keys) + ) # Also check for LoRA/LoKR weight suffixes has_lora_suffix = state_dict_has_any_keys_ending_with( @@ -922,14 +931,16 @@ def _validate_looks_like_lora(cls, mod: ModelOnDisk) -> None: @classmethod def _get_base_or_raise(cls, mod: ModelOnDisk) -> BaseModelType: - """Anima LoRAs target Cosmos DiT blocks (blocks.X.cross_attn, blocks.X.self_attn, etc.). + """Anima LoRAs target Cosmos DiT blocks (blocks.X.mlp, blocks.X.adaln_modulation, + blocks.X.cross_attn.q_proj, etc.). - Uses Cosmos DiT-specific subcomponent names to avoid false-positives. + Uses the strict Cosmos-DiT detectors to be mutually exclusive with + Wan-LoRA detection — see ``_validate_looks_like_lora`` for rationale. """ state_dict = mod.load_state_dict() str_keys = [k for k in state_dict.keys() if isinstance(k, str)] - if has_cosmos_dit_kohya_keys(str_keys) or has_cosmos_dit_peft_keys(str_keys): + if has_cosmos_dit_kohya_keys_strict(str_keys) or has_cosmos_dit_peft_keys_strict(str_keys): return BaseModelType.Anima raise NotAMatchError("model does not look like an Anima LoRA") diff --git a/invokeai/backend/model_manager/configs/main.py b/invokeai/backend/model_manager/configs/main.py index b336f1d3860..55389b986f9 100644 --- a/invokeai/backend/model_manager/configs/main.py +++ b/invokeai/backend/model_manager/configs/main.py @@ -1434,12 +1434,18 @@ def _is_native_wan_layout(state_dict: dict[str | int, Any]) -> bool: def _detect_wan_gguf_variant(state_dict: dict[str | int, Any]) -> WanVariantType | None: - """Determine A14B vs TI2V-5B from the GGUF state dict. + """Determine A14B (T2V vs I2V) vs TI2V-5B from the GGUF state dict. - ``patch_embedding.weight`` has shape ``[inner_dim, in_channels, T, H, W]`` - where ``in_channels`` is the latent channel count: 16 for the standard Wan - VAE (A14B family) or 48 for Wan2.2-VAE (TI2V-5B). Returns None if the - tensor isn't found or the channel count is unrecognised. + ``patch_embedding.weight`` has shape ``[inner_dim, in_channels, T, H, W]``; + ``in_channels`` uniquely identifies the Wan 2.2 variant: + + - 16 → T2V-A14B (noise latents only). + - 36 → I2V-A14B (16 noise + 16 ref-image latents + 4 first-frame mask, + concatenated along the channel dim — see diffusers + ``WanImageToVideoPipeline.prepare_latents``). + - 48 → TI2V-5B (Wan2.2-VAE z_dim=48). + + Returns None if the tensor is missing or the channel count is unrecognised. """ candidates = ( "patch_embedding.weight", @@ -1455,6 +1461,8 @@ def _detect_wan_gguf_variant(state_dict: dict[str | int, Any]) -> WanVariantType in_channels = int(shape[1]) if in_channels == 16: return WanVariantType.T2V_A14B + if in_channels == 36: + return WanVariantType.I2V_A14B if in_channels == 48: return WanVariantType.TI2V_5B return None @@ -1593,10 +1601,21 @@ def _read_boundary_ratio(cls, mod: ModelOnDisk) -> float | None: def _detect_wan_variant(cls, mod: ModelOnDisk, has_dual_expert: bool) -> WanVariantType: """Detect Wan variant from transformer + VAE config. - - A14B: dual transformer experts, standard Wan VAE (z_dim=16). + - T2V-A14B: dual transformer experts, standard Wan VAE (z_dim=16), + transformer ``in_channels=16`` (text-only conditioning). + - I2V-A14B: dual transformer experts, standard Wan VAE, + transformer ``in_channels=36`` (text + VAE-encoded reference image + + first-frame mask concatenated along the channel dim). - TI2V-5B: single transformer, Wan2.2-VAE (z_dim=48). """ if has_dual_expert: + # Disambiguate T2V vs I2V via the transformer's input channel count. + # Wan 2.2 I2V uses VAE-latent concatenation: 16 noise + 16 ref-image + # latents + 4 first-frame mask = 36. (Wan 2.1 I2V used CLIP-vision + # via ``image_dim``; that mechanism is absent in Wan 2.2.) + in_channels = cls._transformer_in_channels(mod) + if in_channels == 36: + return WanVariantType.I2V_A14B return WanVariantType.T2V_A14B # Single-transformer model: distinguish TI2V-5B from any future single-expert @@ -1616,6 +1635,24 @@ def _detect_wan_variant(cls, mod: ModelOnDisk, has_dual_expert: bool) -> WanVari return WanVariantType.TI2V_5B return WanVariantType.T2V_A14B + @staticmethod + def _transformer_in_channels(mod: ModelOnDisk) -> int | None: + """Read ``in_channels`` from ``transformer/config.json``. + + For Wan 2.2 A14B, this is the canonical discriminator between T2V + (``in_channels=16``) and I2V (``in_channels=36``). Returns None if the + config can't be read. + """ + try: + transformer_config = get_config_dict_or_raise(mod.path / "transformer" / "config.json") + except NotAMatchError: + return None + value = transformer_config.get("in_channels") + try: + return int(value) if value is not None else None + except (TypeError, ValueError): + return None + class Main_Checkpoint_Anima_Config(Checkpoint_Config_Base, Main_Config_Base, Config_Base): """Model config for Anima single-file checkpoint models (safetensors). diff --git a/invokeai/backend/model_manager/load/model_loaders/wan.py b/invokeai/backend/model_manager/load/model_loaders/wan.py index 09065129f6d..4824d61b8dd 100644 --- a/invokeai/backend/model_manager/load/model_loaders/wan.py +++ b/invokeai/backend/model_manager/load/model_loaders/wan.py @@ -273,11 +273,28 @@ def _load_from_singlefile(self, config: Main_GGUF_Wan_Config) -> AnyModel: text_shape = text_w.tensor_shape if isinstance(text_w, GGMLTensor) else text_w.shape text_dim = int(text_shape[1]) + # out_channels is read from proj_out.weight directly rather than assumed + # equal to in_channels: I2V-A14B has in_channels=36 (16 noise + 16 + # ref-image latents + 4 mask, concatenated by the denoise loop) but + # out_channels=16 (only the noise prediction comes back). proj_out is + # ``nn.Linear(inner_dim, out_channels * prod(patch_size))`` and + # patch_size is (1, 2, 2) → prod = 4 for the Wan 2.2 family. + proj_out_w = sd.get("proj_out.weight") + if proj_out_w is None: + raise RuntimeError("GGUF state dict missing proj_out.weight after prefix strip") + proj_out_shape = proj_out_w.tensor_shape if isinstance(proj_out_w, GGMLTensor) else proj_out_w.shape + out_channels = int(proj_out_shape[0]) // 4 + + # Layer count fallback (only triggers if the auto-count loop above + # found zero blocks, which shouldn't happen for a valid GGUF). T2V/I2V + # A14B have 40 layers; TI2V-5B has 30. + layer_count_fallback = 30 if config.variant == WanVariantType.TI2V_5B else 40 + model_config: dict = { "patch_size": (1, 2, 2), "in_channels": in_channels, - "out_channels": in_channels, - "num_layers": num_layers if num_layers > 0 else (40 if config.variant == WanVariantType.T2V_A14B else 30), + "out_channels": out_channels, + "num_layers": num_layers if num_layers > 0 else layer_count_fallback, "attention_head_dim": attention_head_dim, "num_attention_heads": num_attention_heads, "ffn_dim": ffn_dim, diff --git a/invokeai/backend/model_manager/taxonomy.py b/invokeai/backend/model_manager/taxonomy.py index d9819b5fe2b..23f818a1116 100644 --- a/invokeai/backend/model_manager/taxonomy.py +++ b/invokeai/backend/model_manager/taxonomy.py @@ -172,14 +172,20 @@ class QwenImageVariantType(str, Enum): class WanVariantType(str, Enum): """Wan 2.2 model variants. - Both variants are used for image generation at num_frames=1. They differ in - architecture: A14B is a Mixture-of-Experts model with two transformer experts - (high-noise and low-noise) totalling ~28B params; TI2V-5B is a single ~5B - transformer with a higher-compression VAE (z_dim=48). + All variants are used for image generation at num_frames=1. The A14B family + is a Mixture-of-Experts (high-noise + low-noise) totalling ~28B params; the + T2V sub-variant takes text only, while the I2V sub-variant additionally + conditions on a reference image (encoded by the VAE and concatenated to the + noise latents along the channel dim — its transformer has ``in_channels=36`` + instead of ``16``). TI2V-5B is a single ~5B transformer with a + higher-compression VAE (z_dim=48). """ T2V_A14B = "t2v_a14b" - """Wan 2.2 T2V-A14B - dual-expert MoE flagship (high-noise + low-noise transformers, standard 16-channel Wan VAE).""" + """Wan 2.2 T2V-A14B - dual-expert MoE (text only, 16-channel Wan VAE, transformer in_channels=16).""" + + I2V_A14B = "i2v_a14b" + """Wan 2.2 I2V-A14B - dual-expert MoE with VAE-latent reference-image conditioning (transformer in_channels=36).""" TI2V_5B = "ti2v_5b" """Wan 2.2 TI2V-5B - smaller single-transformer model with Wan2.2-VAE (48 latent channels).""" diff --git a/invokeai/backend/patches/lora_conversions/anima_lora_constants.py b/invokeai/backend/patches/lora_conversions/anima_lora_constants.py index 380e31998a7..d3d4ac3bcd0 100644 --- a/invokeai/backend/patches/lora_conversions/anima_lora_constants.py +++ b/invokeai/backend/patches/lora_conversions/anima_lora_constants.py @@ -17,7 +17,10 @@ # in ``anima_lora_conversion_utils``) to avoid circular imports. # --------------------------------------------------------------------------- -# Cosmos DiT subcomponent names unique to the Anima / Cosmos Predict2 architecture. +# Cosmos DiT subcomponent names that ALSO appear in Wan (cross_attn, self_attn) +# plus those unique to Cosmos. Used by ``anima_lora_conversion_utils`` to find +# block layers during state-dict conversion, where the architecture is already +# known to be Anima. _COSMOS_DIT_SUBCOMPONENTS_RE = r"(cross_attn|self_attn|mlp|adaln_modulation)" # Kohya format: lora_unet_[llm_adapter_]blocks_N_ @@ -29,17 +32,55 @@ ) +# Subcomponents *uniquely* identifying Anima/Cosmos DiT: ``mlp`` and +# ``adaln_modulation`` (Wan calls those ``ffn`` and ``modulation`` respectively), +# plus the Cosmos attention naming with a ``_proj`` suffix on the projection +# letter (Wan native uses bare ``.q``/``.k``/``.v``/``.o`` — no ``_proj``). +# +# Used by the probe in ``configs/lora.py`` to make Anima-LoRA detection +# *mutually exclusive* with Wan-LoRA detection: a state dict carrying only +# ``cross_attn.q`` / ``ffn.0`` (Wan native) will NOT match here, regardless of +# the order configs are tried. +_COSMOS_DIT_EXCLUSIVE_SUBCOMPONENTS_RE = ( + r"(mlp|adaln_modulation|" + r"(?:cross|self)_attn[._](?:[qkv]_proj|output_proj))" +) + +_KOHYA_ANIMA_STRICT_RE = re.compile( + r"lora_unet_(llm_adapter_)?blocks_\d+_" + _COSMOS_DIT_EXCLUSIVE_SUBCOMPONENTS_RE +) +_PEFT_ANIMA_STRICT_RE = re.compile( + r"(diffusion_model|transformer|base_model\.model\.transformer)\.blocks\.\d+\." + + _COSMOS_DIT_EXCLUSIVE_SUBCOMPONENTS_RE +) + + def has_cosmos_dit_kohya_keys(str_keys: list[str]) -> bool: - """Check for Kohya-style keys targeting Cosmos DiT blocks with specific subcomponents. + """Loose detector — matches any Cosmos-shaped block submodule including + those whose names collide with Wan (``cross_attn``, ``self_attn``). - Requires both the ``lora_unet_[llm_adapter_]blocks_N_`` prefix **and** a - Cosmos DiT subcomponent name (cross_attn, self_attn, mlp, adaln_modulation) - to avoid false-positives on other architectures that might also use bare - ``blocks`` in their key paths. + For probe disambiguation between Anima and Wan, prefer + ``has_cosmos_dit_kohya_keys_strict``. This loose form is still useful + inside the Anima conversion utility, where the architecture is already + confirmed to be Anima and we just need to enumerate matching layers. """ return any(_KOHYA_ANIMA_RE.search(k) is not None for k in str_keys) def has_cosmos_dit_peft_keys(str_keys: list[str]) -> bool: - """Check for diffusers PEFT keys targeting Cosmos DiT blocks with specific subcomponents.""" + """Loose PEFT-format detector — see ``has_cosmos_dit_kohya_keys`` docstring.""" return any(_PEFT_ANIMA_RE.search(k) is not None for k in str_keys) + + +def has_cosmos_dit_kohya_keys_strict(str_keys: list[str]) -> bool: + """Strict Kohya detector requiring an Anima-exclusive submodule (``mlp``, + ``adaln_modulation``, or Cosmos's ``_proj``-suffixed attention names). + + Mutually exclusive with the Wan LoRA probe — no Wan LoRA can satisfy this. + """ + return any(_KOHYA_ANIMA_STRICT_RE.search(k) is not None for k in str_keys) + + +def has_cosmos_dit_peft_keys_strict(str_keys: list[str]) -> bool: + """Strict PEFT detector. See ``has_cosmos_dit_kohya_keys_strict`` docstring.""" + return any(_PEFT_ANIMA_STRICT_RE.search(k) is not None for k in str_keys) diff --git a/invokeai/backend/wan/extensions/__init__.py b/invokeai/backend/wan/extensions/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/invokeai/backend/wan/extensions/wan_ref_image_extension.py b/invokeai/backend/wan/extensions/wan_ref_image_extension.py new file mode 100644 index 00000000000..0b98053b464 --- /dev/null +++ b/invokeai/backend/wan/extensions/wan_ref_image_extension.py @@ -0,0 +1,97 @@ +"""Wan 2.2 I2V reference-image conditioning. + +Wan 2.2 I2V-A14B conditions on a reference image by **VAE-encoding** it and +concatenating the resulting latents to the noise latents along the channel +dim — its transformer has ``in_channels=36`` (16 noise + 16 ref-image latents ++ 4 first-frame mask) rather than 16. + +This module produces the 20-channel condition tensor ``[B, 20, T_lat, H_lat, W_lat]`` +that the denoise loop will concatenate to the 16-channel noise latents each +step, yielding the 36-channel input the I2V transformer expects. + +Mirrors diffusers ``WanImageToVideoPipeline.prepare_latents`` lines 423–481 +with ``num_frames=1`` and ``expand_timesteps=False`` (the defaults for +single-frame image generation). +""" + +from PIL import Image +import torch +import torchvision.transforms.functional as TF +from diffusers.models.autoencoders import AutoencoderKLWan + +# Wan 2.2 VAE temporal scale factor — single frame still consumes a 4-position +# slice of the mask tensor, which is why the mask contributes 4 channels. +_WAN_VAE_TEMPORAL_SCALE = 4 + + +def preprocess_reference_image( + image: Image.Image, width: int, height: int +) -> torch.Tensor: + """Resize a PIL image to (width, height) and return a normalised [-1, 1] + tensor of shape ``[1, 3, 1, height, width]`` ready for ``AutoencoderKLWan.encode``.""" + if width % 8 != 0 or height % 8 != 0: + raise ValueError( + f"Reference-image dimensions must be multiples of 8 (got {width}x{height})." + ) + resized = image.convert("RGB").resize((width, height), Image.LANCZOS) + # [0, 1] CHW float tensor. + pixel = TF.to_tensor(resized) + # Scale to [-1, 1] to match the Wan VAE's expected input range. + pixel = pixel * 2.0 - 1.0 + # [3, H, W] -> [1, 3, 1, H, W]: add batch + temporal dims. + return pixel.unsqueeze(0).unsqueeze(2) + + +def encode_reference_image_to_condition( + image: Image.Image, + vae: AutoencoderKLWan, + width: int, + height: int, + device: torch.device, + dtype: torch.dtype, +) -> torch.Tensor: + """Build the 20-channel I2V condition tensor for a reference image. + + Returns shape ``[1, 20, 1, height // 8, width // 8]`` (4-channel first-frame + mask concatenated with 16-channel VAE-encoded image latents along the + channel dim). + + The output should later be concatenated with the 16-channel noise latents + inside the denoise loop to produce the 36-channel input the I2V transformer + expects. + """ + vae_dtype = next(iter(vae.parameters())).dtype + pixel = preprocess_reference_image(image, width=width, height=height).to( + device=device, dtype=vae_dtype + ) + + with torch.inference_mode(): + encoded = vae.encode(pixel, return_dict=False)[0] + latents = encoded.sample() # [1, 16, 1, H_lat, W_lat] + + # Normalise against the VAE's per-channel mean/std, matching diffusers' + # ``WanImageToVideoPipeline.prepare_latents`` (lines 440-459). Note the + # multiplication by 1/std == division by std. + latents_mean = ( + torch.tensor(vae.config.latents_mean) + .view(1, -1, 1, 1, 1) + .to(latents.device, latents.dtype) + ) + latents_std = ( + torch.tensor(vae.config.latents_std) + .view(1, -1, 1, 1, 1) + .to(latents.device, latents.dtype) + ) + latent_condition = (latents - latents_mean) / latents_std + + latent_condition = latent_condition.to(dtype=dtype) + + # First-frame mask: at num_frames=1 every position is "the first frame" + # (i.e., conditioned). After the temporal-scale expansion the mask is + # 4 channels of ones at [1, T_lat=1, H_lat, W_lat]. + _, _, t_lat, h_lat, w_lat = latent_condition.shape + mask = torch.ones( + 1, _WAN_VAE_TEMPORAL_SCALE, t_lat, h_lat, w_lat, device=device, dtype=dtype + ) + + return torch.cat([mask, latent_condition], dim=1) diff --git a/invokeai/frontend/web/src/services/api/schema.ts b/invokeai/frontend/web/src/services/api/schema.ts index ca979819522..76894ce9e4b 100644 --- a/invokeai/frontend/web/src/services/api/schema.ts +++ b/invokeai/frontend/web/src/services/api/schema.ts @@ -3558,7 +3558,7 @@ export type components = { */ type: "anima_text_encoder"; }; - AnyModelConfig: components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_QwenImage_Config"] | components["schemas"]["Main_Diffusers_Wan_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_Checkpoint_Anima_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_QwenImage_Config"] | components["schemas"]["Main_GGUF_Wan_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Checkpoint_Wan_Config"] | components["schemas"]["VAE_Checkpoint_QwenImage_Config"] | components["schemas"]["VAE_Checkpoint_Anima_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["VAE_Diffusers_Wan_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_LyCORIS_QwenImage_Config"] | components["schemas"]["LoRA_LyCORIS_Anima_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["QwenVLEncoder_Diffusers_Config"] | components["schemas"]["QwenVLEncoder_Checkpoint_Config"] | components["schemas"]["WanT5Encoder_WanT5Encoder_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["TextLLM_Diffusers_Config"] | components["schemas"]["ExternalApiModelConfig"] | components["schemas"]["Unknown_Config"]; + AnyModelConfig: components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_QwenImage_Config"] | components["schemas"]["Main_Diffusers_Wan_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_Checkpoint_Anima_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_QwenImage_Config"] | components["schemas"]["Main_GGUF_Wan_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Checkpoint_Wan_Config"] | components["schemas"]["VAE_Checkpoint_QwenImage_Config"] | components["schemas"]["VAE_Checkpoint_Anima_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["VAE_Diffusers_Wan_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_LyCORIS_QwenImage_Config"] | components["schemas"]["LoRA_LyCORIS_Wan_Config"] | components["schemas"]["LoRA_LyCORIS_Anima_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["QwenVLEncoder_Diffusers_Config"] | components["schemas"]["QwenVLEncoder_Checkpoint_Config"] | components["schemas"]["WanT5Encoder_WanT5Encoder_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["TextLLM_Diffusers_Config"] | components["schemas"]["ExternalApiModelConfig"] | components["schemas"]["Unknown_Config"]; /** * AppVersion * @description App Version Response @@ -12248,7 +12248,7 @@ export type components = { * @description The nodes in this graph */ nodes?: { - [key: string]: components["schemas"]["AddInvocation"] | components["schemas"]["AlibabaCloudImageGenerationInvocation"] | components["schemas"]["AlphaMaskToTensorInvocation"] | components["schemas"]["AnimaDenoiseInvocation"] | components["schemas"]["AnimaImageToLatentsInvocation"] | components["schemas"]["AnimaLatentsToImageInvocation"] | components["schemas"]["AnimaLoRACollectionLoader"] | components["schemas"]["AnimaLoRALoaderInvocation"] | components["schemas"]["AnimaModelLoaderInvocation"] | components["schemas"]["AnimaTextEncoderInvocation"] | components["schemas"]["ApplyMaskTensorToImageInvocation"] | components["schemas"]["ApplyMaskToImageInvocation"] | components["schemas"]["BlankImageInvocation"] | components["schemas"]["BlendLatentsInvocation"] | components["schemas"]["BooleanCollectionInvocation"] | components["schemas"]["BooleanInvocation"] | components["schemas"]["BoundingBoxInvocation"] | components["schemas"]["CLIPSkipInvocation"] | components["schemas"]["CV2InfillInvocation"] | components["schemas"]["CalculateImageTilesEvenSplitInvocation"] | components["schemas"]["CalculateImageTilesInvocation"] | components["schemas"]["CalculateImageTilesMinimumOverlapInvocation"] | components["schemas"]["CannyEdgeDetectionInvocation"] | components["schemas"]["CanvasOutputInvocation"] | components["schemas"]["CanvasPasteBackInvocation"] | components["schemas"]["CanvasV2MaskAndCropInvocation"] | components["schemas"]["CenterPadCropInvocation"] | components["schemas"]["CogView4DenoiseInvocation"] | components["schemas"]["CogView4ImageToLatentsInvocation"] | components["schemas"]["CogView4LatentsToImageInvocation"] | components["schemas"]["CogView4ModelLoaderInvocation"] | components["schemas"]["CogView4TextEncoderInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["ColorCorrectInvocation"] | components["schemas"]["ColorInvocation"] | components["schemas"]["ColorMapInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["ConditioningCollectionInvocation"] | components["schemas"]["ConditioningInvocation"] | components["schemas"]["ContentShuffleInvocation"] | components["schemas"]["ControlNetInvocation"] | components["schemas"]["CoreMetadataInvocation"] | components["schemas"]["CreateDenoiseMaskInvocation"] | components["schemas"]["CreateGradientMaskInvocation"] | components["schemas"]["CropImageToBoundingBoxInvocation"] | components["schemas"]["CropLatentsCoreInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["DWOpenposeDetectionInvocation"] | components["schemas"]["DecodeInvisibleWatermarkInvocation"] | components["schemas"]["DenoiseLatentsInvocation"] | components["schemas"]["DenoiseLatentsMetaInvocation"] | components["schemas"]["DepthAnythingDepthEstimationInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["ExpandMaskWithFadeInvocation"] | components["schemas"]["FLUXLoRACollectionLoader"] | components["schemas"]["FaceIdentifierInvocation"] | components["schemas"]["FaceMaskInvocation"] | components["schemas"]["FaceOffInvocation"] | components["schemas"]["FloatBatchInvocation"] | components["schemas"]["FloatCollectionInvocation"] | components["schemas"]["FloatGenerator"] | components["schemas"]["FloatInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["FloatMathInvocation"] | components["schemas"]["FloatToIntegerInvocation"] | components["schemas"]["Flux2DenoiseInvocation"] | components["schemas"]["Flux2KleinLoRACollectionLoader"] | components["schemas"]["Flux2KleinLoRALoaderInvocation"] | components["schemas"]["Flux2KleinModelLoaderInvocation"] | components["schemas"]["Flux2KleinTextEncoderInvocation"] | components["schemas"]["Flux2VaeDecodeInvocation"] | components["schemas"]["Flux2VaeEncodeInvocation"] | components["schemas"]["FluxControlLoRALoaderInvocation"] | components["schemas"]["FluxControlNetInvocation"] | components["schemas"]["FluxDenoiseInvocation"] | components["schemas"]["FluxDenoiseLatentsMetaInvocation"] | components["schemas"]["FluxFillInvocation"] | components["schemas"]["FluxIPAdapterInvocation"] | components["schemas"]["FluxKontextConcatenateImagesInvocation"] | components["schemas"]["FluxKontextInvocation"] | components["schemas"]["FluxLoRALoaderInvocation"] | components["schemas"]["FluxModelLoaderInvocation"] | components["schemas"]["FluxReduxInvocation"] | components["schemas"]["FluxTextEncoderInvocation"] | components["schemas"]["FluxVaeDecodeInvocation"] | components["schemas"]["FluxVaeEncodeInvocation"] | components["schemas"]["FreeUInvocation"] | components["schemas"]["GeminiImageGenerationInvocation"] | components["schemas"]["GetMaskBoundingBoxInvocation"] | components["schemas"]["GroundingDinoInvocation"] | components["schemas"]["HEDEdgeDetectionInvocation"] | components["schemas"]["HeuristicResizeInvocation"] | components["schemas"]["IPAdapterInvocation"] | components["schemas"]["IdealSizeInvocation"] | components["schemas"]["IfInvocation"] | components["schemas"]["ImageBatchInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["ImageChannelMultiplyInvocation"] | components["schemas"]["ImageChannelOffsetInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["ImageGenerator"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["ImageInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["ImageMaskToTensorInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["ImageNoiseInvocation"] | components["schemas"]["ImagePanelLayoutInvocation"] | components["schemas"]["ImagePasteInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["IntegerBatchInvocation"] | components["schemas"]["IntegerCollectionInvocation"] | components["schemas"]["IntegerGenerator"] | components["schemas"]["IntegerInvocation"] | components["schemas"]["IntegerMathInvocation"] | components["schemas"]["InvertTensorMaskInvocation"] | components["schemas"]["InvokeAdjustImageHuePlusInvocation"] | components["schemas"]["InvokeEquivalentAchromaticLightnessInvocation"] | components["schemas"]["InvokeImageBlendInvocation"] | components["schemas"]["InvokeImageCompositorInvocation"] | components["schemas"]["InvokeImageDilateOrErodeInvocation"] | components["schemas"]["InvokeImageEnhanceInvocation"] | components["schemas"]["InvokeImageValueThresholdsInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["LaMaInfillInvocation"] | components["schemas"]["LatentsCollectionInvocation"] | components["schemas"]["LatentsInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["LineartAnimeEdgeDetectionInvocation"] | components["schemas"]["LineartEdgeDetectionInvocation"] | components["schemas"]["LlavaOnevisionVllmInvocation"] | components["schemas"]["LoRACollectionLoader"] | components["schemas"]["LoRALoaderInvocation"] | components["schemas"]["LoRASelectorInvocation"] | components["schemas"]["MLSDDetectionInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["MaskCombineInvocation"] | components["schemas"]["MaskEdgeInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["MaskFromIDInvocation"] | components["schemas"]["MaskTensorToImageInvocation"] | components["schemas"]["MediaPipeFaceDetectionInvocation"] | components["schemas"]["MergeMetadataInvocation"] | components["schemas"]["MergeTilesToImageInvocation"] | components["schemas"]["MetadataFieldExtractorInvocation"] | components["schemas"]["MetadataFromImageInvocation"] | components["schemas"]["MetadataInvocation"] | components["schemas"]["MetadataItemInvocation"] | components["schemas"]["MetadataItemLinkedInvocation"] | components["schemas"]["MetadataToBoolCollectionInvocation"] | components["schemas"]["MetadataToBoolInvocation"] | components["schemas"]["MetadataToControlnetsInvocation"] | components["schemas"]["MetadataToFloatCollectionInvocation"] | components["schemas"]["MetadataToFloatInvocation"] | components["schemas"]["MetadataToIPAdaptersInvocation"] | components["schemas"]["MetadataToIntegerCollectionInvocation"] | components["schemas"]["MetadataToIntegerInvocation"] | components["schemas"]["MetadataToLorasCollectionInvocation"] | components["schemas"]["MetadataToLorasInvocation"] | components["schemas"]["MetadataToModelInvocation"] | components["schemas"]["MetadataToSDXLLorasInvocation"] | components["schemas"]["MetadataToSDXLModelInvocation"] | components["schemas"]["MetadataToSchedulerInvocation"] | components["schemas"]["MetadataToStringCollectionInvocation"] | components["schemas"]["MetadataToStringInvocation"] | components["schemas"]["MetadataToT2IAdaptersInvocation"] | components["schemas"]["MetadataToVAEInvocation"] | components["schemas"]["ModelIdentifierInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["NormalMapInvocation"] | components["schemas"]["OpenAIImageGenerationInvocation"] | components["schemas"]["PBRMapsInvocation"] | components["schemas"]["PairTileImageInvocation"] | components["schemas"]["PasteImageIntoBoundingBoxInvocation"] | components["schemas"]["PiDiNetEdgeDetectionInvocation"] | components["schemas"]["PromptTemplateInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["QwenImageDenoiseInvocation"] | components["schemas"]["QwenImageImageToLatentsInvocation"] | components["schemas"]["QwenImageLatentsToImageInvocation"] | components["schemas"]["QwenImageLoRACollectionLoader"] | components["schemas"]["QwenImageLoRALoaderInvocation"] | components["schemas"]["QwenImageModelLoaderInvocation"] | components["schemas"]["QwenImageTextEncoderInvocation"] | components["schemas"]["RandomFloatInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["RectangleMaskInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["RoundInvocation"] | components["schemas"]["SD3DenoiseInvocation"] | components["schemas"]["SD3ImageToLatentsInvocation"] | components["schemas"]["SD3LatentsToImageInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["SDXLLoRACollectionLoader"] | components["schemas"]["SDXLLoRALoaderInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["SaveImageInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["SchedulerInvocation"] | components["schemas"]["Sd3ModelLoaderInvocation"] | components["schemas"]["Sd3TextEncoderInvocation"] | components["schemas"]["SeamlessModeInvocation"] | components["schemas"]["SeedreamImageGenerationInvocation"] | components["schemas"]["SegmentAnythingInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["SpandrelImageToImageAutoscaleInvocation"] | components["schemas"]["SpandrelImageToImageInvocation"] | components["schemas"]["StringBatchInvocation"] | components["schemas"]["StringCollectionInvocation"] | components["schemas"]["StringGenerator"] | components["schemas"]["StringInvocation"] | components["schemas"]["StringJoinInvocation"] | components["schemas"]["StringJoinThreeInvocation"] | components["schemas"]["StringReplaceInvocation"] | components["schemas"]["StringSplitInvocation"] | components["schemas"]["StringSplitNegInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["T2IAdapterInvocation"] | components["schemas"]["TextLLMInvocation"] | components["schemas"]["TileToPropertiesInvocation"] | components["schemas"]["TiledMultiDiffusionDenoiseLatents"] | components["schemas"]["UnsharpMaskInvocation"] | components["schemas"]["VAELoaderInvocation"] | components["schemas"]["WanDenoiseInvocation"] | components["schemas"]["WanImageToLatentsInvocation"] | components["schemas"]["WanLatentsToImageInvocation"] | components["schemas"]["WanModelLoaderInvocation"] | components["schemas"]["WanTextEncoderInvocation"] | components["schemas"]["ZImageControlInvocation"] | components["schemas"]["ZImageDenoiseInvocation"] | components["schemas"]["ZImageDenoiseMetaInvocation"] | components["schemas"]["ZImageImageToLatentsInvocation"] | components["schemas"]["ZImageLatentsToImageInvocation"] | components["schemas"]["ZImageLoRACollectionLoader"] | components["schemas"]["ZImageLoRALoaderInvocation"] | components["schemas"]["ZImageModelLoaderInvocation"] | components["schemas"]["ZImageSeedVarianceEnhancerInvocation"] | components["schemas"]["ZImageTextEncoderInvocation"]; + [key: string]: components["schemas"]["AddInvocation"] | components["schemas"]["AlibabaCloudImageGenerationInvocation"] | components["schemas"]["AlphaMaskToTensorInvocation"] | components["schemas"]["AnimaDenoiseInvocation"] | components["schemas"]["AnimaImageToLatentsInvocation"] | components["schemas"]["AnimaLatentsToImageInvocation"] | components["schemas"]["AnimaLoRACollectionLoader"] | components["schemas"]["AnimaLoRALoaderInvocation"] | components["schemas"]["AnimaModelLoaderInvocation"] | components["schemas"]["AnimaTextEncoderInvocation"] | components["schemas"]["ApplyMaskTensorToImageInvocation"] | components["schemas"]["ApplyMaskToImageInvocation"] | components["schemas"]["BlankImageInvocation"] | components["schemas"]["BlendLatentsInvocation"] | components["schemas"]["BooleanCollectionInvocation"] | components["schemas"]["BooleanInvocation"] | components["schemas"]["BoundingBoxInvocation"] | components["schemas"]["CLIPSkipInvocation"] | components["schemas"]["CV2InfillInvocation"] | components["schemas"]["CalculateImageTilesEvenSplitInvocation"] | components["schemas"]["CalculateImageTilesInvocation"] | components["schemas"]["CalculateImageTilesMinimumOverlapInvocation"] | components["schemas"]["CannyEdgeDetectionInvocation"] | components["schemas"]["CanvasOutputInvocation"] | components["schemas"]["CanvasPasteBackInvocation"] | components["schemas"]["CanvasV2MaskAndCropInvocation"] | components["schemas"]["CenterPadCropInvocation"] | components["schemas"]["CogView4DenoiseInvocation"] | components["schemas"]["CogView4ImageToLatentsInvocation"] | components["schemas"]["CogView4LatentsToImageInvocation"] | components["schemas"]["CogView4ModelLoaderInvocation"] | components["schemas"]["CogView4TextEncoderInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["ColorCorrectInvocation"] | components["schemas"]["ColorInvocation"] | components["schemas"]["ColorMapInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["ConditioningCollectionInvocation"] | components["schemas"]["ConditioningInvocation"] | components["schemas"]["ContentShuffleInvocation"] | components["schemas"]["ControlNetInvocation"] | components["schemas"]["CoreMetadataInvocation"] | components["schemas"]["CreateDenoiseMaskInvocation"] | components["schemas"]["CreateGradientMaskInvocation"] | components["schemas"]["CropImageToBoundingBoxInvocation"] | components["schemas"]["CropLatentsCoreInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["DWOpenposeDetectionInvocation"] | components["schemas"]["DecodeInvisibleWatermarkInvocation"] | components["schemas"]["DenoiseLatentsInvocation"] | components["schemas"]["DenoiseLatentsMetaInvocation"] | components["schemas"]["DepthAnythingDepthEstimationInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["ExpandMaskWithFadeInvocation"] | components["schemas"]["FLUXLoRACollectionLoader"] | components["schemas"]["FaceIdentifierInvocation"] | components["schemas"]["FaceMaskInvocation"] | components["schemas"]["FaceOffInvocation"] | components["schemas"]["FloatBatchInvocation"] | components["schemas"]["FloatCollectionInvocation"] | components["schemas"]["FloatGenerator"] | components["schemas"]["FloatInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["FloatMathInvocation"] | components["schemas"]["FloatToIntegerInvocation"] | components["schemas"]["Flux2DenoiseInvocation"] | components["schemas"]["Flux2KleinLoRACollectionLoader"] | components["schemas"]["Flux2KleinLoRALoaderInvocation"] | components["schemas"]["Flux2KleinModelLoaderInvocation"] | components["schemas"]["Flux2KleinTextEncoderInvocation"] | components["schemas"]["Flux2VaeDecodeInvocation"] | components["schemas"]["Flux2VaeEncodeInvocation"] | components["schemas"]["FluxControlLoRALoaderInvocation"] | components["schemas"]["FluxControlNetInvocation"] | components["schemas"]["FluxDenoiseInvocation"] | components["schemas"]["FluxDenoiseLatentsMetaInvocation"] | components["schemas"]["FluxFillInvocation"] | components["schemas"]["FluxIPAdapterInvocation"] | components["schemas"]["FluxKontextConcatenateImagesInvocation"] | components["schemas"]["FluxKontextInvocation"] | components["schemas"]["FluxLoRALoaderInvocation"] | components["schemas"]["FluxModelLoaderInvocation"] | components["schemas"]["FluxReduxInvocation"] | components["schemas"]["FluxTextEncoderInvocation"] | components["schemas"]["FluxVaeDecodeInvocation"] | components["schemas"]["FluxVaeEncodeInvocation"] | components["schemas"]["FreeUInvocation"] | components["schemas"]["GeminiImageGenerationInvocation"] | components["schemas"]["GetMaskBoundingBoxInvocation"] | components["schemas"]["GroundingDinoInvocation"] | components["schemas"]["HEDEdgeDetectionInvocation"] | components["schemas"]["HeuristicResizeInvocation"] | components["schemas"]["IPAdapterInvocation"] | components["schemas"]["IdealSizeInvocation"] | components["schemas"]["IfInvocation"] | components["schemas"]["ImageBatchInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["ImageChannelMultiplyInvocation"] | components["schemas"]["ImageChannelOffsetInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["ImageGenerator"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["ImageInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["ImageMaskToTensorInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["ImageNoiseInvocation"] | components["schemas"]["ImagePanelLayoutInvocation"] | components["schemas"]["ImagePasteInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["IntegerBatchInvocation"] | components["schemas"]["IntegerCollectionInvocation"] | components["schemas"]["IntegerGenerator"] | components["schemas"]["IntegerInvocation"] | components["schemas"]["IntegerMathInvocation"] | components["schemas"]["InvertTensorMaskInvocation"] | components["schemas"]["InvokeAdjustImageHuePlusInvocation"] | components["schemas"]["InvokeEquivalentAchromaticLightnessInvocation"] | components["schemas"]["InvokeImageBlendInvocation"] | components["schemas"]["InvokeImageCompositorInvocation"] | components["schemas"]["InvokeImageDilateOrErodeInvocation"] | components["schemas"]["InvokeImageEnhanceInvocation"] | components["schemas"]["InvokeImageValueThresholdsInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["LaMaInfillInvocation"] | components["schemas"]["LatentsCollectionInvocation"] | components["schemas"]["LatentsInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["LineartAnimeEdgeDetectionInvocation"] | components["schemas"]["LineartEdgeDetectionInvocation"] | components["schemas"]["LlavaOnevisionVllmInvocation"] | components["schemas"]["LoRACollectionLoader"] | components["schemas"]["LoRALoaderInvocation"] | components["schemas"]["LoRASelectorInvocation"] | components["schemas"]["MLSDDetectionInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["MaskCombineInvocation"] | components["schemas"]["MaskEdgeInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["MaskFromIDInvocation"] | components["schemas"]["MaskTensorToImageInvocation"] | components["schemas"]["MediaPipeFaceDetectionInvocation"] | components["schemas"]["MergeMetadataInvocation"] | components["schemas"]["MergeTilesToImageInvocation"] | components["schemas"]["MetadataFieldExtractorInvocation"] | components["schemas"]["MetadataFromImageInvocation"] | components["schemas"]["MetadataInvocation"] | components["schemas"]["MetadataItemInvocation"] | components["schemas"]["MetadataItemLinkedInvocation"] | components["schemas"]["MetadataToBoolCollectionInvocation"] | components["schemas"]["MetadataToBoolInvocation"] | components["schemas"]["MetadataToControlnetsInvocation"] | components["schemas"]["MetadataToFloatCollectionInvocation"] | components["schemas"]["MetadataToFloatInvocation"] | components["schemas"]["MetadataToIPAdaptersInvocation"] | components["schemas"]["MetadataToIntegerCollectionInvocation"] | components["schemas"]["MetadataToIntegerInvocation"] | components["schemas"]["MetadataToLorasCollectionInvocation"] | components["schemas"]["MetadataToLorasInvocation"] | components["schemas"]["MetadataToModelInvocation"] | components["schemas"]["MetadataToSDXLLorasInvocation"] | components["schemas"]["MetadataToSDXLModelInvocation"] | components["schemas"]["MetadataToSchedulerInvocation"] | components["schemas"]["MetadataToStringCollectionInvocation"] | components["schemas"]["MetadataToStringInvocation"] | components["schemas"]["MetadataToT2IAdaptersInvocation"] | components["schemas"]["MetadataToVAEInvocation"] | components["schemas"]["ModelIdentifierInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["NormalMapInvocation"] | components["schemas"]["OpenAIImageGenerationInvocation"] | components["schemas"]["PBRMapsInvocation"] | components["schemas"]["PairTileImageInvocation"] | components["schemas"]["PasteImageIntoBoundingBoxInvocation"] | components["schemas"]["PiDiNetEdgeDetectionInvocation"] | components["schemas"]["PromptTemplateInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["QwenImageDenoiseInvocation"] | components["schemas"]["QwenImageImageToLatentsInvocation"] | components["schemas"]["QwenImageLatentsToImageInvocation"] | components["schemas"]["QwenImageLoRACollectionLoader"] | components["schemas"]["QwenImageLoRALoaderInvocation"] | components["schemas"]["QwenImageModelLoaderInvocation"] | components["schemas"]["QwenImageTextEncoderInvocation"] | components["schemas"]["RandomFloatInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["RectangleMaskInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["RoundInvocation"] | components["schemas"]["SD3DenoiseInvocation"] | components["schemas"]["SD3ImageToLatentsInvocation"] | components["schemas"]["SD3LatentsToImageInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["SDXLLoRACollectionLoader"] | components["schemas"]["SDXLLoRALoaderInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["SaveImageInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["SchedulerInvocation"] | components["schemas"]["Sd3ModelLoaderInvocation"] | components["schemas"]["Sd3TextEncoderInvocation"] | components["schemas"]["SeamlessModeInvocation"] | components["schemas"]["SeedreamImageGenerationInvocation"] | components["schemas"]["SegmentAnythingInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["SpandrelImageToImageAutoscaleInvocation"] | components["schemas"]["SpandrelImageToImageInvocation"] | components["schemas"]["StringBatchInvocation"] | components["schemas"]["StringCollectionInvocation"] | components["schemas"]["StringGenerator"] | components["schemas"]["StringInvocation"] | components["schemas"]["StringJoinInvocation"] | components["schemas"]["StringJoinThreeInvocation"] | components["schemas"]["StringReplaceInvocation"] | components["schemas"]["StringSplitInvocation"] | components["schemas"]["StringSplitNegInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["T2IAdapterInvocation"] | components["schemas"]["TextLLMInvocation"] | components["schemas"]["TileToPropertiesInvocation"] | components["schemas"]["TiledMultiDiffusionDenoiseLatents"] | components["schemas"]["UnsharpMaskInvocation"] | components["schemas"]["VAELoaderInvocation"] | components["schemas"]["WanDenoiseInvocation"] | components["schemas"]["WanImageToLatentsInvocation"] | components["schemas"]["WanLatentsToImageInvocation"] | components["schemas"]["WanLoRACollectionLoader"] | components["schemas"]["WanLoRALoaderInvocation"] | components["schemas"]["WanModelLoaderInvocation"] | components["schemas"]["WanRefImageEncoderInvocation"] | components["schemas"]["WanTextEncoderInvocation"] | components["schemas"]["ZImageControlInvocation"] | components["schemas"]["ZImageDenoiseInvocation"] | components["schemas"]["ZImageDenoiseMetaInvocation"] | components["schemas"]["ZImageImageToLatentsInvocation"] | components["schemas"]["ZImageLatentsToImageInvocation"] | components["schemas"]["ZImageLoRACollectionLoader"] | components["schemas"]["ZImageLoRALoaderInvocation"] | components["schemas"]["ZImageModelLoaderInvocation"] | components["schemas"]["ZImageSeedVarianceEnhancerInvocation"] | components["schemas"]["ZImageTextEncoderInvocation"]; }; /** * Edges @@ -12285,7 +12285,7 @@ export type components = { * @description The results of node executions */ results: { - [key: string]: components["schemas"]["AnimaConditioningOutput"] | components["schemas"]["AnimaLoRALoaderOutput"] | components["schemas"]["AnimaModelLoaderOutput"] | components["schemas"]["BooleanCollectionOutput"] | components["schemas"]["BooleanOutput"] | components["schemas"]["BoundingBoxCollectionOutput"] | components["schemas"]["BoundingBoxOutput"] | components["schemas"]["CLIPOutput"] | components["schemas"]["CLIPSkipInvocationOutput"] | components["schemas"]["CalculateImageTilesOutput"] | components["schemas"]["CogView4ConditioningOutput"] | components["schemas"]["CogView4ModelLoaderOutput"] | components["schemas"]["CollectInvocationOutput"] | components["schemas"]["ColorCollectionOutput"] | components["schemas"]["ColorOutput"] | components["schemas"]["ConditioningCollectionOutput"] | components["schemas"]["ConditioningOutput"] | components["schemas"]["ControlOutput"] | components["schemas"]["DenoiseMaskOutput"] | components["schemas"]["FaceMaskOutput"] | components["schemas"]["FaceOffOutput"] | components["schemas"]["FloatCollectionOutput"] | components["schemas"]["FloatGeneratorOutput"] | components["schemas"]["FloatOutput"] | components["schemas"]["Flux2KleinLoRALoaderOutput"] | components["schemas"]["Flux2KleinModelLoaderOutput"] | components["schemas"]["FluxConditioningCollectionOutput"] | components["schemas"]["FluxConditioningOutput"] | components["schemas"]["FluxControlLoRALoaderOutput"] | components["schemas"]["FluxControlNetOutput"] | components["schemas"]["FluxFillOutput"] | components["schemas"]["FluxKontextOutput"] | components["schemas"]["FluxLoRALoaderOutput"] | components["schemas"]["FluxModelLoaderOutput"] | components["schemas"]["FluxReduxOutput"] | components["schemas"]["GradientMaskOutput"] | components["schemas"]["IPAdapterOutput"] | components["schemas"]["IdealSizeOutput"] | components["schemas"]["IfInvocationOutput"] | components["schemas"]["ImageCollectionOutput"] | components["schemas"]["ImageGeneratorOutput"] | components["schemas"]["ImageOutput"] | components["schemas"]["ImagePanelCoordinateOutput"] | components["schemas"]["IntegerCollectionOutput"] | components["schemas"]["IntegerGeneratorOutput"] | components["schemas"]["IntegerOutput"] | components["schemas"]["IterateInvocationOutput"] | components["schemas"]["LatentsCollectionOutput"] | components["schemas"]["LatentsMetaOutput"] | components["schemas"]["LatentsOutput"] | components["schemas"]["LoRALoaderOutput"] | components["schemas"]["LoRASelectorOutput"] | components["schemas"]["MDControlListOutput"] | components["schemas"]["MDIPAdapterListOutput"] | components["schemas"]["MDT2IAdapterListOutput"] | components["schemas"]["MaskOutput"] | components["schemas"]["MetadataItemOutput"] | components["schemas"]["MetadataOutput"] | components["schemas"]["MetadataToLorasCollectionOutput"] | components["schemas"]["MetadataToModelOutput"] | components["schemas"]["MetadataToSDXLModelOutput"] | components["schemas"]["ModelIdentifierOutput"] | components["schemas"]["ModelLoaderOutput"] | components["schemas"]["NoiseOutput"] | components["schemas"]["PBRMapsOutput"] | components["schemas"]["PairTileImageOutput"] | components["schemas"]["PromptTemplateOutput"] | components["schemas"]["QwenImageConditioningOutput"] | components["schemas"]["QwenImageLoRALoaderOutput"] | components["schemas"]["QwenImageModelLoaderOutput"] | components["schemas"]["SD3ConditioningOutput"] | components["schemas"]["SDXLLoRALoaderOutput"] | components["schemas"]["SDXLModelLoaderOutput"] | components["schemas"]["SDXLRefinerModelLoaderOutput"] | components["schemas"]["SchedulerOutput"] | components["schemas"]["Sd3ModelLoaderOutput"] | components["schemas"]["SeamlessModeOutput"] | components["schemas"]["String2Output"] | components["schemas"]["StringCollectionOutput"] | components["schemas"]["StringGeneratorOutput"] | components["schemas"]["StringOutput"] | components["schemas"]["StringPosNegOutput"] | components["schemas"]["T2IAdapterOutput"] | components["schemas"]["TileToPropertiesOutput"] | components["schemas"]["UNetOutput"] | components["schemas"]["VAEOutput"] | components["schemas"]["WanConditioningOutput"] | components["schemas"]["WanModelLoaderOutput"] | components["schemas"]["ZImageConditioningOutput"] | components["schemas"]["ZImageControlOutput"] | components["schemas"]["ZImageLoRALoaderOutput"] | components["schemas"]["ZImageModelLoaderOutput"]; + [key: string]: components["schemas"]["AnimaConditioningOutput"] | components["schemas"]["AnimaLoRALoaderOutput"] | components["schemas"]["AnimaModelLoaderOutput"] | components["schemas"]["BooleanCollectionOutput"] | components["schemas"]["BooleanOutput"] | components["schemas"]["BoundingBoxCollectionOutput"] | components["schemas"]["BoundingBoxOutput"] | components["schemas"]["CLIPOutput"] | components["schemas"]["CLIPSkipInvocationOutput"] | components["schemas"]["CalculateImageTilesOutput"] | components["schemas"]["CogView4ConditioningOutput"] | components["schemas"]["CogView4ModelLoaderOutput"] | components["schemas"]["CollectInvocationOutput"] | components["schemas"]["ColorCollectionOutput"] | components["schemas"]["ColorOutput"] | components["schemas"]["ConditioningCollectionOutput"] | components["schemas"]["ConditioningOutput"] | components["schemas"]["ControlOutput"] | components["schemas"]["DenoiseMaskOutput"] | components["schemas"]["FaceMaskOutput"] | components["schemas"]["FaceOffOutput"] | components["schemas"]["FloatCollectionOutput"] | components["schemas"]["FloatGeneratorOutput"] | components["schemas"]["FloatOutput"] | components["schemas"]["Flux2KleinLoRALoaderOutput"] | components["schemas"]["Flux2KleinModelLoaderOutput"] | components["schemas"]["FluxConditioningCollectionOutput"] | components["schemas"]["FluxConditioningOutput"] | components["schemas"]["FluxControlLoRALoaderOutput"] | components["schemas"]["FluxControlNetOutput"] | components["schemas"]["FluxFillOutput"] | components["schemas"]["FluxKontextOutput"] | components["schemas"]["FluxLoRALoaderOutput"] | components["schemas"]["FluxModelLoaderOutput"] | components["schemas"]["FluxReduxOutput"] | components["schemas"]["GradientMaskOutput"] | components["schemas"]["IPAdapterOutput"] | components["schemas"]["IdealSizeOutput"] | components["schemas"]["IfInvocationOutput"] | components["schemas"]["ImageCollectionOutput"] | components["schemas"]["ImageGeneratorOutput"] | components["schemas"]["ImageOutput"] | components["schemas"]["ImagePanelCoordinateOutput"] | components["schemas"]["IntegerCollectionOutput"] | components["schemas"]["IntegerGeneratorOutput"] | components["schemas"]["IntegerOutput"] | components["schemas"]["IterateInvocationOutput"] | components["schemas"]["LatentsCollectionOutput"] | components["schemas"]["LatentsMetaOutput"] | components["schemas"]["LatentsOutput"] | components["schemas"]["LoRALoaderOutput"] | components["schemas"]["LoRASelectorOutput"] | components["schemas"]["MDControlListOutput"] | components["schemas"]["MDIPAdapterListOutput"] | components["schemas"]["MDT2IAdapterListOutput"] | components["schemas"]["MaskOutput"] | components["schemas"]["MetadataItemOutput"] | components["schemas"]["MetadataOutput"] | components["schemas"]["MetadataToLorasCollectionOutput"] | components["schemas"]["MetadataToModelOutput"] | components["schemas"]["MetadataToSDXLModelOutput"] | components["schemas"]["ModelIdentifierOutput"] | components["schemas"]["ModelLoaderOutput"] | components["schemas"]["NoiseOutput"] | components["schemas"]["PBRMapsOutput"] | components["schemas"]["PairTileImageOutput"] | components["schemas"]["PromptTemplateOutput"] | components["schemas"]["QwenImageConditioningOutput"] | components["schemas"]["QwenImageLoRALoaderOutput"] | components["schemas"]["QwenImageModelLoaderOutput"] | components["schemas"]["SD3ConditioningOutput"] | components["schemas"]["SDXLLoRALoaderOutput"] | components["schemas"]["SDXLModelLoaderOutput"] | components["schemas"]["SDXLRefinerModelLoaderOutput"] | components["schemas"]["SchedulerOutput"] | components["schemas"]["Sd3ModelLoaderOutput"] | components["schemas"]["SeamlessModeOutput"] | components["schemas"]["String2Output"] | components["schemas"]["StringCollectionOutput"] | components["schemas"]["StringGeneratorOutput"] | components["schemas"]["StringOutput"] | components["schemas"]["StringPosNegOutput"] | components["schemas"]["T2IAdapterOutput"] | components["schemas"]["TileToPropertiesOutput"] | components["schemas"]["UNetOutput"] | components["schemas"]["VAEOutput"] | components["schemas"]["WanConditioningOutput"] | components["schemas"]["WanLoRALoaderOutput"] | components["schemas"]["WanModelLoaderOutput"] | components["schemas"]["WanRefImageOutput"] | components["schemas"]["ZImageConditioningOutput"] | components["schemas"]["ZImageControlOutput"] | components["schemas"]["ZImageLoRALoaderOutput"] | components["schemas"]["ZImageModelLoaderOutput"]; }; /** * Errors @@ -15646,7 +15646,7 @@ export type components = { * Invocation * @description The ID of the invocation */ - invocation: components["schemas"]["AddInvocation"] | components["schemas"]["AlibabaCloudImageGenerationInvocation"] | components["schemas"]["AlphaMaskToTensorInvocation"] | components["schemas"]["AnimaDenoiseInvocation"] | components["schemas"]["AnimaImageToLatentsInvocation"] | components["schemas"]["AnimaLatentsToImageInvocation"] | components["schemas"]["AnimaLoRACollectionLoader"] | components["schemas"]["AnimaLoRALoaderInvocation"] | components["schemas"]["AnimaModelLoaderInvocation"] | components["schemas"]["AnimaTextEncoderInvocation"] | components["schemas"]["ApplyMaskTensorToImageInvocation"] | components["schemas"]["ApplyMaskToImageInvocation"] | components["schemas"]["BlankImageInvocation"] | components["schemas"]["BlendLatentsInvocation"] | components["schemas"]["BooleanCollectionInvocation"] | components["schemas"]["BooleanInvocation"] | components["schemas"]["BoundingBoxInvocation"] | components["schemas"]["CLIPSkipInvocation"] | components["schemas"]["CV2InfillInvocation"] | components["schemas"]["CalculateImageTilesEvenSplitInvocation"] | components["schemas"]["CalculateImageTilesInvocation"] | components["schemas"]["CalculateImageTilesMinimumOverlapInvocation"] | components["schemas"]["CannyEdgeDetectionInvocation"] | components["schemas"]["CanvasOutputInvocation"] | components["schemas"]["CanvasPasteBackInvocation"] | components["schemas"]["CanvasV2MaskAndCropInvocation"] | components["schemas"]["CenterPadCropInvocation"] | components["schemas"]["CogView4DenoiseInvocation"] | components["schemas"]["CogView4ImageToLatentsInvocation"] | components["schemas"]["CogView4LatentsToImageInvocation"] | components["schemas"]["CogView4ModelLoaderInvocation"] | components["schemas"]["CogView4TextEncoderInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["ColorCorrectInvocation"] | components["schemas"]["ColorInvocation"] | components["schemas"]["ColorMapInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["ConditioningCollectionInvocation"] | components["schemas"]["ConditioningInvocation"] | components["schemas"]["ContentShuffleInvocation"] | components["schemas"]["ControlNetInvocation"] | components["schemas"]["CoreMetadataInvocation"] | components["schemas"]["CreateDenoiseMaskInvocation"] | components["schemas"]["CreateGradientMaskInvocation"] | components["schemas"]["CropImageToBoundingBoxInvocation"] | components["schemas"]["CropLatentsCoreInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["DWOpenposeDetectionInvocation"] | components["schemas"]["DecodeInvisibleWatermarkInvocation"] | components["schemas"]["DenoiseLatentsInvocation"] | components["schemas"]["DenoiseLatentsMetaInvocation"] | components["schemas"]["DepthAnythingDepthEstimationInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["ExpandMaskWithFadeInvocation"] | components["schemas"]["FLUXLoRACollectionLoader"] | components["schemas"]["FaceIdentifierInvocation"] | components["schemas"]["FaceMaskInvocation"] | components["schemas"]["FaceOffInvocation"] | components["schemas"]["FloatBatchInvocation"] | components["schemas"]["FloatCollectionInvocation"] | components["schemas"]["FloatGenerator"] | components["schemas"]["FloatInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["FloatMathInvocation"] | components["schemas"]["FloatToIntegerInvocation"] | components["schemas"]["Flux2DenoiseInvocation"] | components["schemas"]["Flux2KleinLoRACollectionLoader"] | components["schemas"]["Flux2KleinLoRALoaderInvocation"] | components["schemas"]["Flux2KleinModelLoaderInvocation"] | components["schemas"]["Flux2KleinTextEncoderInvocation"] | components["schemas"]["Flux2VaeDecodeInvocation"] | components["schemas"]["Flux2VaeEncodeInvocation"] | components["schemas"]["FluxControlLoRALoaderInvocation"] | components["schemas"]["FluxControlNetInvocation"] | components["schemas"]["FluxDenoiseInvocation"] | components["schemas"]["FluxDenoiseLatentsMetaInvocation"] | components["schemas"]["FluxFillInvocation"] | components["schemas"]["FluxIPAdapterInvocation"] | components["schemas"]["FluxKontextConcatenateImagesInvocation"] | components["schemas"]["FluxKontextInvocation"] | components["schemas"]["FluxLoRALoaderInvocation"] | components["schemas"]["FluxModelLoaderInvocation"] | components["schemas"]["FluxReduxInvocation"] | components["schemas"]["FluxTextEncoderInvocation"] | components["schemas"]["FluxVaeDecodeInvocation"] | components["schemas"]["FluxVaeEncodeInvocation"] | components["schemas"]["FreeUInvocation"] | components["schemas"]["GeminiImageGenerationInvocation"] | components["schemas"]["GetMaskBoundingBoxInvocation"] | components["schemas"]["GroundingDinoInvocation"] | components["schemas"]["HEDEdgeDetectionInvocation"] | components["schemas"]["HeuristicResizeInvocation"] | components["schemas"]["IPAdapterInvocation"] | components["schemas"]["IdealSizeInvocation"] | components["schemas"]["IfInvocation"] | components["schemas"]["ImageBatchInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["ImageChannelMultiplyInvocation"] | components["schemas"]["ImageChannelOffsetInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["ImageGenerator"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["ImageInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["ImageMaskToTensorInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["ImageNoiseInvocation"] | components["schemas"]["ImagePanelLayoutInvocation"] | components["schemas"]["ImagePasteInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["IntegerBatchInvocation"] | components["schemas"]["IntegerCollectionInvocation"] | components["schemas"]["IntegerGenerator"] | components["schemas"]["IntegerInvocation"] | components["schemas"]["IntegerMathInvocation"] | components["schemas"]["InvertTensorMaskInvocation"] | components["schemas"]["InvokeAdjustImageHuePlusInvocation"] | components["schemas"]["InvokeEquivalentAchromaticLightnessInvocation"] | components["schemas"]["InvokeImageBlendInvocation"] | components["schemas"]["InvokeImageCompositorInvocation"] | components["schemas"]["InvokeImageDilateOrErodeInvocation"] | components["schemas"]["InvokeImageEnhanceInvocation"] | components["schemas"]["InvokeImageValueThresholdsInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["LaMaInfillInvocation"] | components["schemas"]["LatentsCollectionInvocation"] | components["schemas"]["LatentsInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["LineartAnimeEdgeDetectionInvocation"] | components["schemas"]["LineartEdgeDetectionInvocation"] | components["schemas"]["LlavaOnevisionVllmInvocation"] | components["schemas"]["LoRACollectionLoader"] | components["schemas"]["LoRALoaderInvocation"] | components["schemas"]["LoRASelectorInvocation"] | components["schemas"]["MLSDDetectionInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["MaskCombineInvocation"] | components["schemas"]["MaskEdgeInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["MaskFromIDInvocation"] | components["schemas"]["MaskTensorToImageInvocation"] | components["schemas"]["MediaPipeFaceDetectionInvocation"] | components["schemas"]["MergeMetadataInvocation"] | components["schemas"]["MergeTilesToImageInvocation"] | components["schemas"]["MetadataFieldExtractorInvocation"] | components["schemas"]["MetadataFromImageInvocation"] | components["schemas"]["MetadataInvocation"] | components["schemas"]["MetadataItemInvocation"] | components["schemas"]["MetadataItemLinkedInvocation"] | components["schemas"]["MetadataToBoolCollectionInvocation"] | components["schemas"]["MetadataToBoolInvocation"] | components["schemas"]["MetadataToControlnetsInvocation"] | components["schemas"]["MetadataToFloatCollectionInvocation"] | components["schemas"]["MetadataToFloatInvocation"] | components["schemas"]["MetadataToIPAdaptersInvocation"] | components["schemas"]["MetadataToIntegerCollectionInvocation"] | components["schemas"]["MetadataToIntegerInvocation"] | components["schemas"]["MetadataToLorasCollectionInvocation"] | components["schemas"]["MetadataToLorasInvocation"] | components["schemas"]["MetadataToModelInvocation"] | components["schemas"]["MetadataToSDXLLorasInvocation"] | components["schemas"]["MetadataToSDXLModelInvocation"] | components["schemas"]["MetadataToSchedulerInvocation"] | components["schemas"]["MetadataToStringCollectionInvocation"] | components["schemas"]["MetadataToStringInvocation"] | components["schemas"]["MetadataToT2IAdaptersInvocation"] | components["schemas"]["MetadataToVAEInvocation"] | components["schemas"]["ModelIdentifierInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["NormalMapInvocation"] | components["schemas"]["OpenAIImageGenerationInvocation"] | components["schemas"]["PBRMapsInvocation"] | components["schemas"]["PairTileImageInvocation"] | components["schemas"]["PasteImageIntoBoundingBoxInvocation"] | components["schemas"]["PiDiNetEdgeDetectionInvocation"] | components["schemas"]["PromptTemplateInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["QwenImageDenoiseInvocation"] | components["schemas"]["QwenImageImageToLatentsInvocation"] | components["schemas"]["QwenImageLatentsToImageInvocation"] | components["schemas"]["QwenImageLoRACollectionLoader"] | components["schemas"]["QwenImageLoRALoaderInvocation"] | components["schemas"]["QwenImageModelLoaderInvocation"] | components["schemas"]["QwenImageTextEncoderInvocation"] | components["schemas"]["RandomFloatInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["RectangleMaskInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["RoundInvocation"] | components["schemas"]["SD3DenoiseInvocation"] | components["schemas"]["SD3ImageToLatentsInvocation"] | components["schemas"]["SD3LatentsToImageInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["SDXLLoRACollectionLoader"] | components["schemas"]["SDXLLoRALoaderInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["SaveImageInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["SchedulerInvocation"] | components["schemas"]["Sd3ModelLoaderInvocation"] | components["schemas"]["Sd3TextEncoderInvocation"] | components["schemas"]["SeamlessModeInvocation"] | components["schemas"]["SeedreamImageGenerationInvocation"] | components["schemas"]["SegmentAnythingInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["SpandrelImageToImageAutoscaleInvocation"] | components["schemas"]["SpandrelImageToImageInvocation"] | components["schemas"]["StringBatchInvocation"] | components["schemas"]["StringCollectionInvocation"] | components["schemas"]["StringGenerator"] | components["schemas"]["StringInvocation"] | components["schemas"]["StringJoinInvocation"] | components["schemas"]["StringJoinThreeInvocation"] | components["schemas"]["StringReplaceInvocation"] | components["schemas"]["StringSplitInvocation"] | components["schemas"]["StringSplitNegInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["T2IAdapterInvocation"] | components["schemas"]["TextLLMInvocation"] | components["schemas"]["TileToPropertiesInvocation"] | components["schemas"]["TiledMultiDiffusionDenoiseLatents"] | components["schemas"]["UnsharpMaskInvocation"] | components["schemas"]["VAELoaderInvocation"] | components["schemas"]["WanDenoiseInvocation"] | components["schemas"]["WanImageToLatentsInvocation"] | components["schemas"]["WanLatentsToImageInvocation"] | components["schemas"]["WanModelLoaderInvocation"] | components["schemas"]["WanTextEncoderInvocation"] | components["schemas"]["ZImageControlInvocation"] | components["schemas"]["ZImageDenoiseInvocation"] | components["schemas"]["ZImageDenoiseMetaInvocation"] | components["schemas"]["ZImageImageToLatentsInvocation"] | components["schemas"]["ZImageLatentsToImageInvocation"] | components["schemas"]["ZImageLoRACollectionLoader"] | components["schemas"]["ZImageLoRALoaderInvocation"] | components["schemas"]["ZImageModelLoaderInvocation"] | components["schemas"]["ZImageSeedVarianceEnhancerInvocation"] | components["schemas"]["ZImageTextEncoderInvocation"]; + invocation: components["schemas"]["AddInvocation"] | components["schemas"]["AlibabaCloudImageGenerationInvocation"] | components["schemas"]["AlphaMaskToTensorInvocation"] | components["schemas"]["AnimaDenoiseInvocation"] | components["schemas"]["AnimaImageToLatentsInvocation"] | components["schemas"]["AnimaLatentsToImageInvocation"] | components["schemas"]["AnimaLoRACollectionLoader"] | components["schemas"]["AnimaLoRALoaderInvocation"] | components["schemas"]["AnimaModelLoaderInvocation"] | components["schemas"]["AnimaTextEncoderInvocation"] | components["schemas"]["ApplyMaskTensorToImageInvocation"] | components["schemas"]["ApplyMaskToImageInvocation"] | components["schemas"]["BlankImageInvocation"] | components["schemas"]["BlendLatentsInvocation"] | components["schemas"]["BooleanCollectionInvocation"] | components["schemas"]["BooleanInvocation"] | components["schemas"]["BoundingBoxInvocation"] | components["schemas"]["CLIPSkipInvocation"] | components["schemas"]["CV2InfillInvocation"] | components["schemas"]["CalculateImageTilesEvenSplitInvocation"] | components["schemas"]["CalculateImageTilesInvocation"] | components["schemas"]["CalculateImageTilesMinimumOverlapInvocation"] | components["schemas"]["CannyEdgeDetectionInvocation"] | components["schemas"]["CanvasOutputInvocation"] | components["schemas"]["CanvasPasteBackInvocation"] | components["schemas"]["CanvasV2MaskAndCropInvocation"] | components["schemas"]["CenterPadCropInvocation"] | components["schemas"]["CogView4DenoiseInvocation"] | components["schemas"]["CogView4ImageToLatentsInvocation"] | components["schemas"]["CogView4LatentsToImageInvocation"] | components["schemas"]["CogView4ModelLoaderInvocation"] | components["schemas"]["CogView4TextEncoderInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["ColorCorrectInvocation"] | components["schemas"]["ColorInvocation"] | components["schemas"]["ColorMapInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["ConditioningCollectionInvocation"] | components["schemas"]["ConditioningInvocation"] | components["schemas"]["ContentShuffleInvocation"] | components["schemas"]["ControlNetInvocation"] | components["schemas"]["CoreMetadataInvocation"] | components["schemas"]["CreateDenoiseMaskInvocation"] | components["schemas"]["CreateGradientMaskInvocation"] | components["schemas"]["CropImageToBoundingBoxInvocation"] | components["schemas"]["CropLatentsCoreInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["DWOpenposeDetectionInvocation"] | components["schemas"]["DecodeInvisibleWatermarkInvocation"] | components["schemas"]["DenoiseLatentsInvocation"] | components["schemas"]["DenoiseLatentsMetaInvocation"] | components["schemas"]["DepthAnythingDepthEstimationInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["ExpandMaskWithFadeInvocation"] | components["schemas"]["FLUXLoRACollectionLoader"] | components["schemas"]["FaceIdentifierInvocation"] | components["schemas"]["FaceMaskInvocation"] | components["schemas"]["FaceOffInvocation"] | components["schemas"]["FloatBatchInvocation"] | components["schemas"]["FloatCollectionInvocation"] | components["schemas"]["FloatGenerator"] | components["schemas"]["FloatInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["FloatMathInvocation"] | components["schemas"]["FloatToIntegerInvocation"] | components["schemas"]["Flux2DenoiseInvocation"] | components["schemas"]["Flux2KleinLoRACollectionLoader"] | components["schemas"]["Flux2KleinLoRALoaderInvocation"] | components["schemas"]["Flux2KleinModelLoaderInvocation"] | components["schemas"]["Flux2KleinTextEncoderInvocation"] | components["schemas"]["Flux2VaeDecodeInvocation"] | components["schemas"]["Flux2VaeEncodeInvocation"] | components["schemas"]["FluxControlLoRALoaderInvocation"] | components["schemas"]["FluxControlNetInvocation"] | components["schemas"]["FluxDenoiseInvocation"] | components["schemas"]["FluxDenoiseLatentsMetaInvocation"] | components["schemas"]["FluxFillInvocation"] | components["schemas"]["FluxIPAdapterInvocation"] | components["schemas"]["FluxKontextConcatenateImagesInvocation"] | components["schemas"]["FluxKontextInvocation"] | components["schemas"]["FluxLoRALoaderInvocation"] | components["schemas"]["FluxModelLoaderInvocation"] | components["schemas"]["FluxReduxInvocation"] | components["schemas"]["FluxTextEncoderInvocation"] | components["schemas"]["FluxVaeDecodeInvocation"] | components["schemas"]["FluxVaeEncodeInvocation"] | components["schemas"]["FreeUInvocation"] | components["schemas"]["GeminiImageGenerationInvocation"] | components["schemas"]["GetMaskBoundingBoxInvocation"] | components["schemas"]["GroundingDinoInvocation"] | components["schemas"]["HEDEdgeDetectionInvocation"] | components["schemas"]["HeuristicResizeInvocation"] | components["schemas"]["IPAdapterInvocation"] | components["schemas"]["IdealSizeInvocation"] | components["schemas"]["IfInvocation"] | components["schemas"]["ImageBatchInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["ImageChannelMultiplyInvocation"] | components["schemas"]["ImageChannelOffsetInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["ImageGenerator"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["ImageInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["ImageMaskToTensorInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["ImageNoiseInvocation"] | components["schemas"]["ImagePanelLayoutInvocation"] | components["schemas"]["ImagePasteInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["IntegerBatchInvocation"] | components["schemas"]["IntegerCollectionInvocation"] | components["schemas"]["IntegerGenerator"] | components["schemas"]["IntegerInvocation"] | components["schemas"]["IntegerMathInvocation"] | components["schemas"]["InvertTensorMaskInvocation"] | components["schemas"]["InvokeAdjustImageHuePlusInvocation"] | components["schemas"]["InvokeEquivalentAchromaticLightnessInvocation"] | components["schemas"]["InvokeImageBlendInvocation"] | components["schemas"]["InvokeImageCompositorInvocation"] | components["schemas"]["InvokeImageDilateOrErodeInvocation"] | components["schemas"]["InvokeImageEnhanceInvocation"] | components["schemas"]["InvokeImageValueThresholdsInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["LaMaInfillInvocation"] | components["schemas"]["LatentsCollectionInvocation"] | components["schemas"]["LatentsInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["LineartAnimeEdgeDetectionInvocation"] | components["schemas"]["LineartEdgeDetectionInvocation"] | components["schemas"]["LlavaOnevisionVllmInvocation"] | components["schemas"]["LoRACollectionLoader"] | components["schemas"]["LoRALoaderInvocation"] | components["schemas"]["LoRASelectorInvocation"] | components["schemas"]["MLSDDetectionInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["MaskCombineInvocation"] | components["schemas"]["MaskEdgeInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["MaskFromIDInvocation"] | components["schemas"]["MaskTensorToImageInvocation"] | components["schemas"]["MediaPipeFaceDetectionInvocation"] | components["schemas"]["MergeMetadataInvocation"] | components["schemas"]["MergeTilesToImageInvocation"] | components["schemas"]["MetadataFieldExtractorInvocation"] | components["schemas"]["MetadataFromImageInvocation"] | components["schemas"]["MetadataInvocation"] | components["schemas"]["MetadataItemInvocation"] | components["schemas"]["MetadataItemLinkedInvocation"] | components["schemas"]["MetadataToBoolCollectionInvocation"] | components["schemas"]["MetadataToBoolInvocation"] | components["schemas"]["MetadataToControlnetsInvocation"] | components["schemas"]["MetadataToFloatCollectionInvocation"] | components["schemas"]["MetadataToFloatInvocation"] | components["schemas"]["MetadataToIPAdaptersInvocation"] | components["schemas"]["MetadataToIntegerCollectionInvocation"] | components["schemas"]["MetadataToIntegerInvocation"] | components["schemas"]["MetadataToLorasCollectionInvocation"] | components["schemas"]["MetadataToLorasInvocation"] | components["schemas"]["MetadataToModelInvocation"] | components["schemas"]["MetadataToSDXLLorasInvocation"] | components["schemas"]["MetadataToSDXLModelInvocation"] | components["schemas"]["MetadataToSchedulerInvocation"] | components["schemas"]["MetadataToStringCollectionInvocation"] | components["schemas"]["MetadataToStringInvocation"] | components["schemas"]["MetadataToT2IAdaptersInvocation"] | components["schemas"]["MetadataToVAEInvocation"] | components["schemas"]["ModelIdentifierInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["NormalMapInvocation"] | components["schemas"]["OpenAIImageGenerationInvocation"] | components["schemas"]["PBRMapsInvocation"] | components["schemas"]["PairTileImageInvocation"] | components["schemas"]["PasteImageIntoBoundingBoxInvocation"] | components["schemas"]["PiDiNetEdgeDetectionInvocation"] | components["schemas"]["PromptTemplateInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["QwenImageDenoiseInvocation"] | components["schemas"]["QwenImageImageToLatentsInvocation"] | components["schemas"]["QwenImageLatentsToImageInvocation"] | components["schemas"]["QwenImageLoRACollectionLoader"] | components["schemas"]["QwenImageLoRALoaderInvocation"] | components["schemas"]["QwenImageModelLoaderInvocation"] | components["schemas"]["QwenImageTextEncoderInvocation"] | components["schemas"]["RandomFloatInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["RectangleMaskInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["RoundInvocation"] | components["schemas"]["SD3DenoiseInvocation"] | components["schemas"]["SD3ImageToLatentsInvocation"] | components["schemas"]["SD3LatentsToImageInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["SDXLLoRACollectionLoader"] | components["schemas"]["SDXLLoRALoaderInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["SaveImageInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["SchedulerInvocation"] | components["schemas"]["Sd3ModelLoaderInvocation"] | components["schemas"]["Sd3TextEncoderInvocation"] | components["schemas"]["SeamlessModeInvocation"] | components["schemas"]["SeedreamImageGenerationInvocation"] | components["schemas"]["SegmentAnythingInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["SpandrelImageToImageAutoscaleInvocation"] | components["schemas"]["SpandrelImageToImageInvocation"] | components["schemas"]["StringBatchInvocation"] | components["schemas"]["StringCollectionInvocation"] | components["schemas"]["StringGenerator"] | components["schemas"]["StringInvocation"] | components["schemas"]["StringJoinInvocation"] | components["schemas"]["StringJoinThreeInvocation"] | components["schemas"]["StringReplaceInvocation"] | components["schemas"]["StringSplitInvocation"] | components["schemas"]["StringSplitNegInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["T2IAdapterInvocation"] | components["schemas"]["TextLLMInvocation"] | components["schemas"]["TileToPropertiesInvocation"] | components["schemas"]["TiledMultiDiffusionDenoiseLatents"] | components["schemas"]["UnsharpMaskInvocation"] | components["schemas"]["VAELoaderInvocation"] | components["schemas"]["WanDenoiseInvocation"] | components["schemas"]["WanImageToLatentsInvocation"] | components["schemas"]["WanLatentsToImageInvocation"] | components["schemas"]["WanLoRACollectionLoader"] | components["schemas"]["WanLoRALoaderInvocation"] | components["schemas"]["WanModelLoaderInvocation"] | components["schemas"]["WanRefImageEncoderInvocation"] | components["schemas"]["WanTextEncoderInvocation"] | components["schemas"]["ZImageControlInvocation"] | components["schemas"]["ZImageDenoiseInvocation"] | components["schemas"]["ZImageDenoiseMetaInvocation"] | components["schemas"]["ZImageImageToLatentsInvocation"] | components["schemas"]["ZImageLatentsToImageInvocation"] | components["schemas"]["ZImageLoRACollectionLoader"] | components["schemas"]["ZImageLoRALoaderInvocation"] | components["schemas"]["ZImageModelLoaderInvocation"] | components["schemas"]["ZImageSeedVarianceEnhancerInvocation"] | components["schemas"]["ZImageTextEncoderInvocation"]; /** * Invocation Source Id * @description The ID of the prepared invocation's source node @@ -15656,7 +15656,7 @@ export type components = { * Result * @description The result of the invocation */ - result: components["schemas"]["AnimaConditioningOutput"] | components["schemas"]["AnimaLoRALoaderOutput"] | components["schemas"]["AnimaModelLoaderOutput"] | components["schemas"]["BooleanCollectionOutput"] | components["schemas"]["BooleanOutput"] | components["schemas"]["BoundingBoxCollectionOutput"] | components["schemas"]["BoundingBoxOutput"] | components["schemas"]["CLIPOutput"] | components["schemas"]["CLIPSkipInvocationOutput"] | components["schemas"]["CalculateImageTilesOutput"] | components["schemas"]["CogView4ConditioningOutput"] | components["schemas"]["CogView4ModelLoaderOutput"] | components["schemas"]["CollectInvocationOutput"] | components["schemas"]["ColorCollectionOutput"] | components["schemas"]["ColorOutput"] | components["schemas"]["ConditioningCollectionOutput"] | components["schemas"]["ConditioningOutput"] | components["schemas"]["ControlOutput"] | components["schemas"]["DenoiseMaskOutput"] | components["schemas"]["FaceMaskOutput"] | components["schemas"]["FaceOffOutput"] | components["schemas"]["FloatCollectionOutput"] | components["schemas"]["FloatGeneratorOutput"] | components["schemas"]["FloatOutput"] | components["schemas"]["Flux2KleinLoRALoaderOutput"] | components["schemas"]["Flux2KleinModelLoaderOutput"] | components["schemas"]["FluxConditioningCollectionOutput"] | components["schemas"]["FluxConditioningOutput"] | components["schemas"]["FluxControlLoRALoaderOutput"] | components["schemas"]["FluxControlNetOutput"] | components["schemas"]["FluxFillOutput"] | components["schemas"]["FluxKontextOutput"] | components["schemas"]["FluxLoRALoaderOutput"] | components["schemas"]["FluxModelLoaderOutput"] | components["schemas"]["FluxReduxOutput"] | components["schemas"]["GradientMaskOutput"] | components["schemas"]["IPAdapterOutput"] | components["schemas"]["IdealSizeOutput"] | components["schemas"]["IfInvocationOutput"] | components["schemas"]["ImageCollectionOutput"] | components["schemas"]["ImageGeneratorOutput"] | components["schemas"]["ImageOutput"] | components["schemas"]["ImagePanelCoordinateOutput"] | components["schemas"]["IntegerCollectionOutput"] | components["schemas"]["IntegerGeneratorOutput"] | components["schemas"]["IntegerOutput"] | components["schemas"]["IterateInvocationOutput"] | components["schemas"]["LatentsCollectionOutput"] | components["schemas"]["LatentsMetaOutput"] | components["schemas"]["LatentsOutput"] | components["schemas"]["LoRALoaderOutput"] | components["schemas"]["LoRASelectorOutput"] | components["schemas"]["MDControlListOutput"] | components["schemas"]["MDIPAdapterListOutput"] | components["schemas"]["MDT2IAdapterListOutput"] | components["schemas"]["MaskOutput"] | components["schemas"]["MetadataItemOutput"] | components["schemas"]["MetadataOutput"] | components["schemas"]["MetadataToLorasCollectionOutput"] | components["schemas"]["MetadataToModelOutput"] | components["schemas"]["MetadataToSDXLModelOutput"] | components["schemas"]["ModelIdentifierOutput"] | components["schemas"]["ModelLoaderOutput"] | components["schemas"]["NoiseOutput"] | components["schemas"]["PBRMapsOutput"] | components["schemas"]["PairTileImageOutput"] | components["schemas"]["PromptTemplateOutput"] | components["schemas"]["QwenImageConditioningOutput"] | components["schemas"]["QwenImageLoRALoaderOutput"] | components["schemas"]["QwenImageModelLoaderOutput"] | components["schemas"]["SD3ConditioningOutput"] | components["schemas"]["SDXLLoRALoaderOutput"] | components["schemas"]["SDXLModelLoaderOutput"] | components["schemas"]["SDXLRefinerModelLoaderOutput"] | components["schemas"]["SchedulerOutput"] | components["schemas"]["Sd3ModelLoaderOutput"] | components["schemas"]["SeamlessModeOutput"] | components["schemas"]["String2Output"] | components["schemas"]["StringCollectionOutput"] | components["schemas"]["StringGeneratorOutput"] | components["schemas"]["StringOutput"] | components["schemas"]["StringPosNegOutput"] | components["schemas"]["T2IAdapterOutput"] | components["schemas"]["TileToPropertiesOutput"] | components["schemas"]["UNetOutput"] | components["schemas"]["VAEOutput"] | components["schemas"]["WanConditioningOutput"] | components["schemas"]["WanModelLoaderOutput"] | components["schemas"]["ZImageConditioningOutput"] | components["schemas"]["ZImageControlOutput"] | components["schemas"]["ZImageLoRALoaderOutput"] | components["schemas"]["ZImageModelLoaderOutput"]; + result: components["schemas"]["AnimaConditioningOutput"] | components["schemas"]["AnimaLoRALoaderOutput"] | components["schemas"]["AnimaModelLoaderOutput"] | components["schemas"]["BooleanCollectionOutput"] | components["schemas"]["BooleanOutput"] | components["schemas"]["BoundingBoxCollectionOutput"] | components["schemas"]["BoundingBoxOutput"] | components["schemas"]["CLIPOutput"] | components["schemas"]["CLIPSkipInvocationOutput"] | components["schemas"]["CalculateImageTilesOutput"] | components["schemas"]["CogView4ConditioningOutput"] | components["schemas"]["CogView4ModelLoaderOutput"] | components["schemas"]["CollectInvocationOutput"] | components["schemas"]["ColorCollectionOutput"] | components["schemas"]["ColorOutput"] | components["schemas"]["ConditioningCollectionOutput"] | components["schemas"]["ConditioningOutput"] | components["schemas"]["ControlOutput"] | components["schemas"]["DenoiseMaskOutput"] | components["schemas"]["FaceMaskOutput"] | components["schemas"]["FaceOffOutput"] | components["schemas"]["FloatCollectionOutput"] | components["schemas"]["FloatGeneratorOutput"] | components["schemas"]["FloatOutput"] | components["schemas"]["Flux2KleinLoRALoaderOutput"] | components["schemas"]["Flux2KleinModelLoaderOutput"] | components["schemas"]["FluxConditioningCollectionOutput"] | components["schemas"]["FluxConditioningOutput"] | components["schemas"]["FluxControlLoRALoaderOutput"] | components["schemas"]["FluxControlNetOutput"] | components["schemas"]["FluxFillOutput"] | components["schemas"]["FluxKontextOutput"] | components["schemas"]["FluxLoRALoaderOutput"] | components["schemas"]["FluxModelLoaderOutput"] | components["schemas"]["FluxReduxOutput"] | components["schemas"]["GradientMaskOutput"] | components["schemas"]["IPAdapterOutput"] | components["schemas"]["IdealSizeOutput"] | components["schemas"]["IfInvocationOutput"] | components["schemas"]["ImageCollectionOutput"] | components["schemas"]["ImageGeneratorOutput"] | components["schemas"]["ImageOutput"] | components["schemas"]["ImagePanelCoordinateOutput"] | components["schemas"]["IntegerCollectionOutput"] | components["schemas"]["IntegerGeneratorOutput"] | components["schemas"]["IntegerOutput"] | components["schemas"]["IterateInvocationOutput"] | components["schemas"]["LatentsCollectionOutput"] | components["schemas"]["LatentsMetaOutput"] | components["schemas"]["LatentsOutput"] | components["schemas"]["LoRALoaderOutput"] | components["schemas"]["LoRASelectorOutput"] | components["schemas"]["MDControlListOutput"] | components["schemas"]["MDIPAdapterListOutput"] | components["schemas"]["MDT2IAdapterListOutput"] | components["schemas"]["MaskOutput"] | components["schemas"]["MetadataItemOutput"] | components["schemas"]["MetadataOutput"] | components["schemas"]["MetadataToLorasCollectionOutput"] | components["schemas"]["MetadataToModelOutput"] | components["schemas"]["MetadataToSDXLModelOutput"] | components["schemas"]["ModelIdentifierOutput"] | components["schemas"]["ModelLoaderOutput"] | components["schemas"]["NoiseOutput"] | components["schemas"]["PBRMapsOutput"] | components["schemas"]["PairTileImageOutput"] | components["schemas"]["PromptTemplateOutput"] | components["schemas"]["QwenImageConditioningOutput"] | components["schemas"]["QwenImageLoRALoaderOutput"] | components["schemas"]["QwenImageModelLoaderOutput"] | components["schemas"]["SD3ConditioningOutput"] | components["schemas"]["SDXLLoRALoaderOutput"] | components["schemas"]["SDXLModelLoaderOutput"] | components["schemas"]["SDXLRefinerModelLoaderOutput"] | components["schemas"]["SchedulerOutput"] | components["schemas"]["Sd3ModelLoaderOutput"] | components["schemas"]["SeamlessModeOutput"] | components["schemas"]["String2Output"] | components["schemas"]["StringCollectionOutput"] | components["schemas"]["StringGeneratorOutput"] | components["schemas"]["StringOutput"] | components["schemas"]["StringPosNegOutput"] | components["schemas"]["T2IAdapterOutput"] | components["schemas"]["TileToPropertiesOutput"] | components["schemas"]["UNetOutput"] | components["schemas"]["VAEOutput"] | components["schemas"]["WanConditioningOutput"] | components["schemas"]["WanLoRALoaderOutput"] | components["schemas"]["WanModelLoaderOutput"] | components["schemas"]["WanRefImageOutput"] | components["schemas"]["ZImageConditioningOutput"] | components["schemas"]["ZImageControlOutput"] | components["schemas"]["ZImageLoRALoaderOutput"] | components["schemas"]["ZImageModelLoaderOutput"]; }; /** * InvocationErrorEvent @@ -15710,7 +15710,7 @@ export type components = { * Invocation * @description The ID of the invocation */ - invocation: components["schemas"]["AddInvocation"] | components["schemas"]["AlibabaCloudImageGenerationInvocation"] | components["schemas"]["AlphaMaskToTensorInvocation"] | components["schemas"]["AnimaDenoiseInvocation"] | components["schemas"]["AnimaImageToLatentsInvocation"] | components["schemas"]["AnimaLatentsToImageInvocation"] | components["schemas"]["AnimaLoRACollectionLoader"] | components["schemas"]["AnimaLoRALoaderInvocation"] | components["schemas"]["AnimaModelLoaderInvocation"] | components["schemas"]["AnimaTextEncoderInvocation"] | components["schemas"]["ApplyMaskTensorToImageInvocation"] | components["schemas"]["ApplyMaskToImageInvocation"] | components["schemas"]["BlankImageInvocation"] | components["schemas"]["BlendLatentsInvocation"] | components["schemas"]["BooleanCollectionInvocation"] | components["schemas"]["BooleanInvocation"] | components["schemas"]["BoundingBoxInvocation"] | components["schemas"]["CLIPSkipInvocation"] | components["schemas"]["CV2InfillInvocation"] | components["schemas"]["CalculateImageTilesEvenSplitInvocation"] | components["schemas"]["CalculateImageTilesInvocation"] | components["schemas"]["CalculateImageTilesMinimumOverlapInvocation"] | components["schemas"]["CannyEdgeDetectionInvocation"] | components["schemas"]["CanvasOutputInvocation"] | components["schemas"]["CanvasPasteBackInvocation"] | components["schemas"]["CanvasV2MaskAndCropInvocation"] | components["schemas"]["CenterPadCropInvocation"] | components["schemas"]["CogView4DenoiseInvocation"] | components["schemas"]["CogView4ImageToLatentsInvocation"] | components["schemas"]["CogView4LatentsToImageInvocation"] | components["schemas"]["CogView4ModelLoaderInvocation"] | components["schemas"]["CogView4TextEncoderInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["ColorCorrectInvocation"] | components["schemas"]["ColorInvocation"] | components["schemas"]["ColorMapInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["ConditioningCollectionInvocation"] | components["schemas"]["ConditioningInvocation"] | components["schemas"]["ContentShuffleInvocation"] | components["schemas"]["ControlNetInvocation"] | components["schemas"]["CoreMetadataInvocation"] | components["schemas"]["CreateDenoiseMaskInvocation"] | components["schemas"]["CreateGradientMaskInvocation"] | components["schemas"]["CropImageToBoundingBoxInvocation"] | components["schemas"]["CropLatentsCoreInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["DWOpenposeDetectionInvocation"] | components["schemas"]["DecodeInvisibleWatermarkInvocation"] | components["schemas"]["DenoiseLatentsInvocation"] | components["schemas"]["DenoiseLatentsMetaInvocation"] | components["schemas"]["DepthAnythingDepthEstimationInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["ExpandMaskWithFadeInvocation"] | components["schemas"]["FLUXLoRACollectionLoader"] | components["schemas"]["FaceIdentifierInvocation"] | components["schemas"]["FaceMaskInvocation"] | components["schemas"]["FaceOffInvocation"] | components["schemas"]["FloatBatchInvocation"] | components["schemas"]["FloatCollectionInvocation"] | components["schemas"]["FloatGenerator"] | components["schemas"]["FloatInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["FloatMathInvocation"] | components["schemas"]["FloatToIntegerInvocation"] | components["schemas"]["Flux2DenoiseInvocation"] | components["schemas"]["Flux2KleinLoRACollectionLoader"] | components["schemas"]["Flux2KleinLoRALoaderInvocation"] | components["schemas"]["Flux2KleinModelLoaderInvocation"] | components["schemas"]["Flux2KleinTextEncoderInvocation"] | components["schemas"]["Flux2VaeDecodeInvocation"] | components["schemas"]["Flux2VaeEncodeInvocation"] | components["schemas"]["FluxControlLoRALoaderInvocation"] | components["schemas"]["FluxControlNetInvocation"] | components["schemas"]["FluxDenoiseInvocation"] | components["schemas"]["FluxDenoiseLatentsMetaInvocation"] | components["schemas"]["FluxFillInvocation"] | components["schemas"]["FluxIPAdapterInvocation"] | components["schemas"]["FluxKontextConcatenateImagesInvocation"] | components["schemas"]["FluxKontextInvocation"] | components["schemas"]["FluxLoRALoaderInvocation"] | components["schemas"]["FluxModelLoaderInvocation"] | components["schemas"]["FluxReduxInvocation"] | components["schemas"]["FluxTextEncoderInvocation"] | components["schemas"]["FluxVaeDecodeInvocation"] | components["schemas"]["FluxVaeEncodeInvocation"] | components["schemas"]["FreeUInvocation"] | components["schemas"]["GeminiImageGenerationInvocation"] | components["schemas"]["GetMaskBoundingBoxInvocation"] | components["schemas"]["GroundingDinoInvocation"] | components["schemas"]["HEDEdgeDetectionInvocation"] | components["schemas"]["HeuristicResizeInvocation"] | components["schemas"]["IPAdapterInvocation"] | components["schemas"]["IdealSizeInvocation"] | components["schemas"]["IfInvocation"] | components["schemas"]["ImageBatchInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["ImageChannelMultiplyInvocation"] | components["schemas"]["ImageChannelOffsetInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["ImageGenerator"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["ImageInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["ImageMaskToTensorInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["ImageNoiseInvocation"] | components["schemas"]["ImagePanelLayoutInvocation"] | components["schemas"]["ImagePasteInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["IntegerBatchInvocation"] | components["schemas"]["IntegerCollectionInvocation"] | components["schemas"]["IntegerGenerator"] | components["schemas"]["IntegerInvocation"] | components["schemas"]["IntegerMathInvocation"] | components["schemas"]["InvertTensorMaskInvocation"] | components["schemas"]["InvokeAdjustImageHuePlusInvocation"] | components["schemas"]["InvokeEquivalentAchromaticLightnessInvocation"] | components["schemas"]["InvokeImageBlendInvocation"] | components["schemas"]["InvokeImageCompositorInvocation"] | components["schemas"]["InvokeImageDilateOrErodeInvocation"] | components["schemas"]["InvokeImageEnhanceInvocation"] | components["schemas"]["InvokeImageValueThresholdsInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["LaMaInfillInvocation"] | components["schemas"]["LatentsCollectionInvocation"] | components["schemas"]["LatentsInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["LineartAnimeEdgeDetectionInvocation"] | components["schemas"]["LineartEdgeDetectionInvocation"] | components["schemas"]["LlavaOnevisionVllmInvocation"] | components["schemas"]["LoRACollectionLoader"] | components["schemas"]["LoRALoaderInvocation"] | components["schemas"]["LoRASelectorInvocation"] | components["schemas"]["MLSDDetectionInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["MaskCombineInvocation"] | components["schemas"]["MaskEdgeInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["MaskFromIDInvocation"] | components["schemas"]["MaskTensorToImageInvocation"] | components["schemas"]["MediaPipeFaceDetectionInvocation"] | components["schemas"]["MergeMetadataInvocation"] | components["schemas"]["MergeTilesToImageInvocation"] | components["schemas"]["MetadataFieldExtractorInvocation"] | components["schemas"]["MetadataFromImageInvocation"] | components["schemas"]["MetadataInvocation"] | components["schemas"]["MetadataItemInvocation"] | components["schemas"]["MetadataItemLinkedInvocation"] | components["schemas"]["MetadataToBoolCollectionInvocation"] | components["schemas"]["MetadataToBoolInvocation"] | components["schemas"]["MetadataToControlnetsInvocation"] | components["schemas"]["MetadataToFloatCollectionInvocation"] | components["schemas"]["MetadataToFloatInvocation"] | components["schemas"]["MetadataToIPAdaptersInvocation"] | components["schemas"]["MetadataToIntegerCollectionInvocation"] | components["schemas"]["MetadataToIntegerInvocation"] | components["schemas"]["MetadataToLorasCollectionInvocation"] | components["schemas"]["MetadataToLorasInvocation"] | components["schemas"]["MetadataToModelInvocation"] | components["schemas"]["MetadataToSDXLLorasInvocation"] | components["schemas"]["MetadataToSDXLModelInvocation"] | components["schemas"]["MetadataToSchedulerInvocation"] | components["schemas"]["MetadataToStringCollectionInvocation"] | components["schemas"]["MetadataToStringInvocation"] | components["schemas"]["MetadataToT2IAdaptersInvocation"] | components["schemas"]["MetadataToVAEInvocation"] | components["schemas"]["ModelIdentifierInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["NormalMapInvocation"] | components["schemas"]["OpenAIImageGenerationInvocation"] | components["schemas"]["PBRMapsInvocation"] | components["schemas"]["PairTileImageInvocation"] | components["schemas"]["PasteImageIntoBoundingBoxInvocation"] | components["schemas"]["PiDiNetEdgeDetectionInvocation"] | components["schemas"]["PromptTemplateInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["QwenImageDenoiseInvocation"] | components["schemas"]["QwenImageImageToLatentsInvocation"] | components["schemas"]["QwenImageLatentsToImageInvocation"] | components["schemas"]["QwenImageLoRACollectionLoader"] | components["schemas"]["QwenImageLoRALoaderInvocation"] | components["schemas"]["QwenImageModelLoaderInvocation"] | components["schemas"]["QwenImageTextEncoderInvocation"] | components["schemas"]["RandomFloatInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["RectangleMaskInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["RoundInvocation"] | components["schemas"]["SD3DenoiseInvocation"] | components["schemas"]["SD3ImageToLatentsInvocation"] | components["schemas"]["SD3LatentsToImageInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["SDXLLoRACollectionLoader"] | components["schemas"]["SDXLLoRALoaderInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["SaveImageInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["SchedulerInvocation"] | components["schemas"]["Sd3ModelLoaderInvocation"] | components["schemas"]["Sd3TextEncoderInvocation"] | components["schemas"]["SeamlessModeInvocation"] | components["schemas"]["SeedreamImageGenerationInvocation"] | components["schemas"]["SegmentAnythingInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["SpandrelImageToImageAutoscaleInvocation"] | components["schemas"]["SpandrelImageToImageInvocation"] | components["schemas"]["StringBatchInvocation"] | components["schemas"]["StringCollectionInvocation"] | components["schemas"]["StringGenerator"] | components["schemas"]["StringInvocation"] | components["schemas"]["StringJoinInvocation"] | components["schemas"]["StringJoinThreeInvocation"] | components["schemas"]["StringReplaceInvocation"] | components["schemas"]["StringSplitInvocation"] | components["schemas"]["StringSplitNegInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["T2IAdapterInvocation"] | components["schemas"]["TextLLMInvocation"] | components["schemas"]["TileToPropertiesInvocation"] | components["schemas"]["TiledMultiDiffusionDenoiseLatents"] | components["schemas"]["UnsharpMaskInvocation"] | components["schemas"]["VAELoaderInvocation"] | components["schemas"]["WanDenoiseInvocation"] | components["schemas"]["WanImageToLatentsInvocation"] | components["schemas"]["WanLatentsToImageInvocation"] | components["schemas"]["WanModelLoaderInvocation"] | components["schemas"]["WanTextEncoderInvocation"] | components["schemas"]["ZImageControlInvocation"] | components["schemas"]["ZImageDenoiseInvocation"] | components["schemas"]["ZImageDenoiseMetaInvocation"] | components["schemas"]["ZImageImageToLatentsInvocation"] | components["schemas"]["ZImageLatentsToImageInvocation"] | components["schemas"]["ZImageLoRACollectionLoader"] | components["schemas"]["ZImageLoRALoaderInvocation"] | components["schemas"]["ZImageModelLoaderInvocation"] | components["schemas"]["ZImageSeedVarianceEnhancerInvocation"] | components["schemas"]["ZImageTextEncoderInvocation"]; + invocation: components["schemas"]["AddInvocation"] | components["schemas"]["AlibabaCloudImageGenerationInvocation"] | components["schemas"]["AlphaMaskToTensorInvocation"] | components["schemas"]["AnimaDenoiseInvocation"] | components["schemas"]["AnimaImageToLatentsInvocation"] | components["schemas"]["AnimaLatentsToImageInvocation"] | components["schemas"]["AnimaLoRACollectionLoader"] | components["schemas"]["AnimaLoRALoaderInvocation"] | components["schemas"]["AnimaModelLoaderInvocation"] | components["schemas"]["AnimaTextEncoderInvocation"] | components["schemas"]["ApplyMaskTensorToImageInvocation"] | components["schemas"]["ApplyMaskToImageInvocation"] | components["schemas"]["BlankImageInvocation"] | components["schemas"]["BlendLatentsInvocation"] | components["schemas"]["BooleanCollectionInvocation"] | components["schemas"]["BooleanInvocation"] | components["schemas"]["BoundingBoxInvocation"] | components["schemas"]["CLIPSkipInvocation"] | components["schemas"]["CV2InfillInvocation"] | components["schemas"]["CalculateImageTilesEvenSplitInvocation"] | components["schemas"]["CalculateImageTilesInvocation"] | components["schemas"]["CalculateImageTilesMinimumOverlapInvocation"] | components["schemas"]["CannyEdgeDetectionInvocation"] | components["schemas"]["CanvasOutputInvocation"] | components["schemas"]["CanvasPasteBackInvocation"] | components["schemas"]["CanvasV2MaskAndCropInvocation"] | components["schemas"]["CenterPadCropInvocation"] | components["schemas"]["CogView4DenoiseInvocation"] | components["schemas"]["CogView4ImageToLatentsInvocation"] | components["schemas"]["CogView4LatentsToImageInvocation"] | components["schemas"]["CogView4ModelLoaderInvocation"] | components["schemas"]["CogView4TextEncoderInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["ColorCorrectInvocation"] | components["schemas"]["ColorInvocation"] | components["schemas"]["ColorMapInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["ConditioningCollectionInvocation"] | components["schemas"]["ConditioningInvocation"] | components["schemas"]["ContentShuffleInvocation"] | components["schemas"]["ControlNetInvocation"] | components["schemas"]["CoreMetadataInvocation"] | components["schemas"]["CreateDenoiseMaskInvocation"] | components["schemas"]["CreateGradientMaskInvocation"] | components["schemas"]["CropImageToBoundingBoxInvocation"] | components["schemas"]["CropLatentsCoreInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["DWOpenposeDetectionInvocation"] | components["schemas"]["DecodeInvisibleWatermarkInvocation"] | components["schemas"]["DenoiseLatentsInvocation"] | components["schemas"]["DenoiseLatentsMetaInvocation"] | components["schemas"]["DepthAnythingDepthEstimationInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["ExpandMaskWithFadeInvocation"] | components["schemas"]["FLUXLoRACollectionLoader"] | components["schemas"]["FaceIdentifierInvocation"] | components["schemas"]["FaceMaskInvocation"] | components["schemas"]["FaceOffInvocation"] | components["schemas"]["FloatBatchInvocation"] | components["schemas"]["FloatCollectionInvocation"] | components["schemas"]["FloatGenerator"] | components["schemas"]["FloatInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["FloatMathInvocation"] | components["schemas"]["FloatToIntegerInvocation"] | components["schemas"]["Flux2DenoiseInvocation"] | components["schemas"]["Flux2KleinLoRACollectionLoader"] | components["schemas"]["Flux2KleinLoRALoaderInvocation"] | components["schemas"]["Flux2KleinModelLoaderInvocation"] | components["schemas"]["Flux2KleinTextEncoderInvocation"] | components["schemas"]["Flux2VaeDecodeInvocation"] | components["schemas"]["Flux2VaeEncodeInvocation"] | components["schemas"]["FluxControlLoRALoaderInvocation"] | components["schemas"]["FluxControlNetInvocation"] | components["schemas"]["FluxDenoiseInvocation"] | components["schemas"]["FluxDenoiseLatentsMetaInvocation"] | components["schemas"]["FluxFillInvocation"] | components["schemas"]["FluxIPAdapterInvocation"] | components["schemas"]["FluxKontextConcatenateImagesInvocation"] | components["schemas"]["FluxKontextInvocation"] | components["schemas"]["FluxLoRALoaderInvocation"] | components["schemas"]["FluxModelLoaderInvocation"] | components["schemas"]["FluxReduxInvocation"] | components["schemas"]["FluxTextEncoderInvocation"] | components["schemas"]["FluxVaeDecodeInvocation"] | components["schemas"]["FluxVaeEncodeInvocation"] | components["schemas"]["FreeUInvocation"] | components["schemas"]["GeminiImageGenerationInvocation"] | components["schemas"]["GetMaskBoundingBoxInvocation"] | components["schemas"]["GroundingDinoInvocation"] | components["schemas"]["HEDEdgeDetectionInvocation"] | components["schemas"]["HeuristicResizeInvocation"] | components["schemas"]["IPAdapterInvocation"] | components["schemas"]["IdealSizeInvocation"] | components["schemas"]["IfInvocation"] | components["schemas"]["ImageBatchInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["ImageChannelMultiplyInvocation"] | components["schemas"]["ImageChannelOffsetInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["ImageGenerator"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["ImageInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["ImageMaskToTensorInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["ImageNoiseInvocation"] | components["schemas"]["ImagePanelLayoutInvocation"] | components["schemas"]["ImagePasteInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["IntegerBatchInvocation"] | components["schemas"]["IntegerCollectionInvocation"] | components["schemas"]["IntegerGenerator"] | components["schemas"]["IntegerInvocation"] | components["schemas"]["IntegerMathInvocation"] | components["schemas"]["InvertTensorMaskInvocation"] | components["schemas"]["InvokeAdjustImageHuePlusInvocation"] | components["schemas"]["InvokeEquivalentAchromaticLightnessInvocation"] | components["schemas"]["InvokeImageBlendInvocation"] | components["schemas"]["InvokeImageCompositorInvocation"] | components["schemas"]["InvokeImageDilateOrErodeInvocation"] | components["schemas"]["InvokeImageEnhanceInvocation"] | components["schemas"]["InvokeImageValueThresholdsInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["LaMaInfillInvocation"] | components["schemas"]["LatentsCollectionInvocation"] | components["schemas"]["LatentsInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["LineartAnimeEdgeDetectionInvocation"] | components["schemas"]["LineartEdgeDetectionInvocation"] | components["schemas"]["LlavaOnevisionVllmInvocation"] | components["schemas"]["LoRACollectionLoader"] | components["schemas"]["LoRALoaderInvocation"] | components["schemas"]["LoRASelectorInvocation"] | components["schemas"]["MLSDDetectionInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["MaskCombineInvocation"] | components["schemas"]["MaskEdgeInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["MaskFromIDInvocation"] | components["schemas"]["MaskTensorToImageInvocation"] | components["schemas"]["MediaPipeFaceDetectionInvocation"] | components["schemas"]["MergeMetadataInvocation"] | components["schemas"]["MergeTilesToImageInvocation"] | components["schemas"]["MetadataFieldExtractorInvocation"] | components["schemas"]["MetadataFromImageInvocation"] | components["schemas"]["MetadataInvocation"] | components["schemas"]["MetadataItemInvocation"] | components["schemas"]["MetadataItemLinkedInvocation"] | components["schemas"]["MetadataToBoolCollectionInvocation"] | components["schemas"]["MetadataToBoolInvocation"] | components["schemas"]["MetadataToControlnetsInvocation"] | components["schemas"]["MetadataToFloatCollectionInvocation"] | components["schemas"]["MetadataToFloatInvocation"] | components["schemas"]["MetadataToIPAdaptersInvocation"] | components["schemas"]["MetadataToIntegerCollectionInvocation"] | components["schemas"]["MetadataToIntegerInvocation"] | components["schemas"]["MetadataToLorasCollectionInvocation"] | components["schemas"]["MetadataToLorasInvocation"] | components["schemas"]["MetadataToModelInvocation"] | components["schemas"]["MetadataToSDXLLorasInvocation"] | components["schemas"]["MetadataToSDXLModelInvocation"] | components["schemas"]["MetadataToSchedulerInvocation"] | components["schemas"]["MetadataToStringCollectionInvocation"] | components["schemas"]["MetadataToStringInvocation"] | components["schemas"]["MetadataToT2IAdaptersInvocation"] | components["schemas"]["MetadataToVAEInvocation"] | components["schemas"]["ModelIdentifierInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["NormalMapInvocation"] | components["schemas"]["OpenAIImageGenerationInvocation"] | components["schemas"]["PBRMapsInvocation"] | components["schemas"]["PairTileImageInvocation"] | components["schemas"]["PasteImageIntoBoundingBoxInvocation"] | components["schemas"]["PiDiNetEdgeDetectionInvocation"] | components["schemas"]["PromptTemplateInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["QwenImageDenoiseInvocation"] | components["schemas"]["QwenImageImageToLatentsInvocation"] | components["schemas"]["QwenImageLatentsToImageInvocation"] | components["schemas"]["QwenImageLoRACollectionLoader"] | components["schemas"]["QwenImageLoRALoaderInvocation"] | components["schemas"]["QwenImageModelLoaderInvocation"] | components["schemas"]["QwenImageTextEncoderInvocation"] | components["schemas"]["RandomFloatInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["RectangleMaskInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["RoundInvocation"] | components["schemas"]["SD3DenoiseInvocation"] | components["schemas"]["SD3ImageToLatentsInvocation"] | components["schemas"]["SD3LatentsToImageInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["SDXLLoRACollectionLoader"] | components["schemas"]["SDXLLoRALoaderInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["SaveImageInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["SchedulerInvocation"] | components["schemas"]["Sd3ModelLoaderInvocation"] | components["schemas"]["Sd3TextEncoderInvocation"] | components["schemas"]["SeamlessModeInvocation"] | components["schemas"]["SeedreamImageGenerationInvocation"] | components["schemas"]["SegmentAnythingInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["SpandrelImageToImageAutoscaleInvocation"] | components["schemas"]["SpandrelImageToImageInvocation"] | components["schemas"]["StringBatchInvocation"] | components["schemas"]["StringCollectionInvocation"] | components["schemas"]["StringGenerator"] | components["schemas"]["StringInvocation"] | components["schemas"]["StringJoinInvocation"] | components["schemas"]["StringJoinThreeInvocation"] | components["schemas"]["StringReplaceInvocation"] | components["schemas"]["StringSplitInvocation"] | components["schemas"]["StringSplitNegInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["T2IAdapterInvocation"] | components["schemas"]["TextLLMInvocation"] | components["schemas"]["TileToPropertiesInvocation"] | components["schemas"]["TiledMultiDiffusionDenoiseLatents"] | components["schemas"]["UnsharpMaskInvocation"] | components["schemas"]["VAELoaderInvocation"] | components["schemas"]["WanDenoiseInvocation"] | components["schemas"]["WanImageToLatentsInvocation"] | components["schemas"]["WanLatentsToImageInvocation"] | components["schemas"]["WanLoRACollectionLoader"] | components["schemas"]["WanLoRALoaderInvocation"] | components["schemas"]["WanModelLoaderInvocation"] | components["schemas"]["WanRefImageEncoderInvocation"] | components["schemas"]["WanTextEncoderInvocation"] | components["schemas"]["ZImageControlInvocation"] | components["schemas"]["ZImageDenoiseInvocation"] | components["schemas"]["ZImageDenoiseMetaInvocation"] | components["schemas"]["ZImageImageToLatentsInvocation"] | components["schemas"]["ZImageLatentsToImageInvocation"] | components["schemas"]["ZImageLoRACollectionLoader"] | components["schemas"]["ZImageLoRALoaderInvocation"] | components["schemas"]["ZImageModelLoaderInvocation"] | components["schemas"]["ZImageSeedVarianceEnhancerInvocation"] | components["schemas"]["ZImageTextEncoderInvocation"]; /** * Invocation Source Id * @description The ID of the prepared invocation's source node @@ -15978,7 +15978,10 @@ export type components = { wan_denoise: components["schemas"]["LatentsOutput"]; wan_i2l: components["schemas"]["LatentsOutput"]; wan_l2i: components["schemas"]["ImageOutput"]; + wan_lora_collection_loader: components["schemas"]["WanLoRALoaderOutput"]; + wan_lora_loader: components["schemas"]["WanLoRALoaderOutput"]; wan_model_loader: components["schemas"]["WanModelLoaderOutput"]; + wan_ref_image_encoder: components["schemas"]["WanRefImageOutput"]; wan_text_encoder: components["schemas"]["WanConditioningOutput"]; z_image_control: components["schemas"]["ZImageControlOutput"]; z_image_denoise: components["schemas"]["LatentsOutput"]; @@ -16043,7 +16046,7 @@ export type components = { * Invocation * @description The ID of the invocation */ - invocation: components["schemas"]["AddInvocation"] | components["schemas"]["AlibabaCloudImageGenerationInvocation"] | components["schemas"]["AlphaMaskToTensorInvocation"] | components["schemas"]["AnimaDenoiseInvocation"] | components["schemas"]["AnimaImageToLatentsInvocation"] | components["schemas"]["AnimaLatentsToImageInvocation"] | components["schemas"]["AnimaLoRACollectionLoader"] | components["schemas"]["AnimaLoRALoaderInvocation"] | components["schemas"]["AnimaModelLoaderInvocation"] | components["schemas"]["AnimaTextEncoderInvocation"] | components["schemas"]["ApplyMaskTensorToImageInvocation"] | components["schemas"]["ApplyMaskToImageInvocation"] | components["schemas"]["BlankImageInvocation"] | components["schemas"]["BlendLatentsInvocation"] | components["schemas"]["BooleanCollectionInvocation"] | components["schemas"]["BooleanInvocation"] | components["schemas"]["BoundingBoxInvocation"] | components["schemas"]["CLIPSkipInvocation"] | components["schemas"]["CV2InfillInvocation"] | components["schemas"]["CalculateImageTilesEvenSplitInvocation"] | components["schemas"]["CalculateImageTilesInvocation"] | components["schemas"]["CalculateImageTilesMinimumOverlapInvocation"] | components["schemas"]["CannyEdgeDetectionInvocation"] | components["schemas"]["CanvasOutputInvocation"] | components["schemas"]["CanvasPasteBackInvocation"] | components["schemas"]["CanvasV2MaskAndCropInvocation"] | components["schemas"]["CenterPadCropInvocation"] | components["schemas"]["CogView4DenoiseInvocation"] | components["schemas"]["CogView4ImageToLatentsInvocation"] | components["schemas"]["CogView4LatentsToImageInvocation"] | components["schemas"]["CogView4ModelLoaderInvocation"] | components["schemas"]["CogView4TextEncoderInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["ColorCorrectInvocation"] | components["schemas"]["ColorInvocation"] | components["schemas"]["ColorMapInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["ConditioningCollectionInvocation"] | components["schemas"]["ConditioningInvocation"] | components["schemas"]["ContentShuffleInvocation"] | components["schemas"]["ControlNetInvocation"] | components["schemas"]["CoreMetadataInvocation"] | components["schemas"]["CreateDenoiseMaskInvocation"] | components["schemas"]["CreateGradientMaskInvocation"] | components["schemas"]["CropImageToBoundingBoxInvocation"] | components["schemas"]["CropLatentsCoreInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["DWOpenposeDetectionInvocation"] | components["schemas"]["DecodeInvisibleWatermarkInvocation"] | components["schemas"]["DenoiseLatentsInvocation"] | components["schemas"]["DenoiseLatentsMetaInvocation"] | components["schemas"]["DepthAnythingDepthEstimationInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["ExpandMaskWithFadeInvocation"] | components["schemas"]["FLUXLoRACollectionLoader"] | components["schemas"]["FaceIdentifierInvocation"] | components["schemas"]["FaceMaskInvocation"] | components["schemas"]["FaceOffInvocation"] | components["schemas"]["FloatBatchInvocation"] | components["schemas"]["FloatCollectionInvocation"] | components["schemas"]["FloatGenerator"] | components["schemas"]["FloatInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["FloatMathInvocation"] | components["schemas"]["FloatToIntegerInvocation"] | components["schemas"]["Flux2DenoiseInvocation"] | components["schemas"]["Flux2KleinLoRACollectionLoader"] | components["schemas"]["Flux2KleinLoRALoaderInvocation"] | components["schemas"]["Flux2KleinModelLoaderInvocation"] | components["schemas"]["Flux2KleinTextEncoderInvocation"] | components["schemas"]["Flux2VaeDecodeInvocation"] | components["schemas"]["Flux2VaeEncodeInvocation"] | components["schemas"]["FluxControlLoRALoaderInvocation"] | components["schemas"]["FluxControlNetInvocation"] | components["schemas"]["FluxDenoiseInvocation"] | components["schemas"]["FluxDenoiseLatentsMetaInvocation"] | components["schemas"]["FluxFillInvocation"] | components["schemas"]["FluxIPAdapterInvocation"] | components["schemas"]["FluxKontextConcatenateImagesInvocation"] | components["schemas"]["FluxKontextInvocation"] | components["schemas"]["FluxLoRALoaderInvocation"] | components["schemas"]["FluxModelLoaderInvocation"] | components["schemas"]["FluxReduxInvocation"] | components["schemas"]["FluxTextEncoderInvocation"] | components["schemas"]["FluxVaeDecodeInvocation"] | components["schemas"]["FluxVaeEncodeInvocation"] | components["schemas"]["FreeUInvocation"] | components["schemas"]["GeminiImageGenerationInvocation"] | components["schemas"]["GetMaskBoundingBoxInvocation"] | components["schemas"]["GroundingDinoInvocation"] | components["schemas"]["HEDEdgeDetectionInvocation"] | components["schemas"]["HeuristicResizeInvocation"] | components["schemas"]["IPAdapterInvocation"] | components["schemas"]["IdealSizeInvocation"] | components["schemas"]["IfInvocation"] | components["schemas"]["ImageBatchInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["ImageChannelMultiplyInvocation"] | components["schemas"]["ImageChannelOffsetInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["ImageGenerator"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["ImageInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["ImageMaskToTensorInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["ImageNoiseInvocation"] | components["schemas"]["ImagePanelLayoutInvocation"] | components["schemas"]["ImagePasteInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["IntegerBatchInvocation"] | components["schemas"]["IntegerCollectionInvocation"] | components["schemas"]["IntegerGenerator"] | components["schemas"]["IntegerInvocation"] | components["schemas"]["IntegerMathInvocation"] | components["schemas"]["InvertTensorMaskInvocation"] | components["schemas"]["InvokeAdjustImageHuePlusInvocation"] | components["schemas"]["InvokeEquivalentAchromaticLightnessInvocation"] | components["schemas"]["InvokeImageBlendInvocation"] | components["schemas"]["InvokeImageCompositorInvocation"] | components["schemas"]["InvokeImageDilateOrErodeInvocation"] | components["schemas"]["InvokeImageEnhanceInvocation"] | components["schemas"]["InvokeImageValueThresholdsInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["LaMaInfillInvocation"] | components["schemas"]["LatentsCollectionInvocation"] | components["schemas"]["LatentsInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["LineartAnimeEdgeDetectionInvocation"] | components["schemas"]["LineartEdgeDetectionInvocation"] | components["schemas"]["LlavaOnevisionVllmInvocation"] | components["schemas"]["LoRACollectionLoader"] | components["schemas"]["LoRALoaderInvocation"] | components["schemas"]["LoRASelectorInvocation"] | components["schemas"]["MLSDDetectionInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["MaskCombineInvocation"] | components["schemas"]["MaskEdgeInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["MaskFromIDInvocation"] | components["schemas"]["MaskTensorToImageInvocation"] | components["schemas"]["MediaPipeFaceDetectionInvocation"] | components["schemas"]["MergeMetadataInvocation"] | components["schemas"]["MergeTilesToImageInvocation"] | components["schemas"]["MetadataFieldExtractorInvocation"] | components["schemas"]["MetadataFromImageInvocation"] | components["schemas"]["MetadataInvocation"] | components["schemas"]["MetadataItemInvocation"] | components["schemas"]["MetadataItemLinkedInvocation"] | components["schemas"]["MetadataToBoolCollectionInvocation"] | components["schemas"]["MetadataToBoolInvocation"] | components["schemas"]["MetadataToControlnetsInvocation"] | components["schemas"]["MetadataToFloatCollectionInvocation"] | components["schemas"]["MetadataToFloatInvocation"] | components["schemas"]["MetadataToIPAdaptersInvocation"] | components["schemas"]["MetadataToIntegerCollectionInvocation"] | components["schemas"]["MetadataToIntegerInvocation"] | components["schemas"]["MetadataToLorasCollectionInvocation"] | components["schemas"]["MetadataToLorasInvocation"] | components["schemas"]["MetadataToModelInvocation"] | components["schemas"]["MetadataToSDXLLorasInvocation"] | components["schemas"]["MetadataToSDXLModelInvocation"] | components["schemas"]["MetadataToSchedulerInvocation"] | components["schemas"]["MetadataToStringCollectionInvocation"] | components["schemas"]["MetadataToStringInvocation"] | components["schemas"]["MetadataToT2IAdaptersInvocation"] | components["schemas"]["MetadataToVAEInvocation"] | components["schemas"]["ModelIdentifierInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["NormalMapInvocation"] | components["schemas"]["OpenAIImageGenerationInvocation"] | components["schemas"]["PBRMapsInvocation"] | components["schemas"]["PairTileImageInvocation"] | components["schemas"]["PasteImageIntoBoundingBoxInvocation"] | components["schemas"]["PiDiNetEdgeDetectionInvocation"] | components["schemas"]["PromptTemplateInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["QwenImageDenoiseInvocation"] | components["schemas"]["QwenImageImageToLatentsInvocation"] | components["schemas"]["QwenImageLatentsToImageInvocation"] | components["schemas"]["QwenImageLoRACollectionLoader"] | components["schemas"]["QwenImageLoRALoaderInvocation"] | components["schemas"]["QwenImageModelLoaderInvocation"] | components["schemas"]["QwenImageTextEncoderInvocation"] | components["schemas"]["RandomFloatInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["RectangleMaskInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["RoundInvocation"] | components["schemas"]["SD3DenoiseInvocation"] | components["schemas"]["SD3ImageToLatentsInvocation"] | components["schemas"]["SD3LatentsToImageInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["SDXLLoRACollectionLoader"] | components["schemas"]["SDXLLoRALoaderInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["SaveImageInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["SchedulerInvocation"] | components["schemas"]["Sd3ModelLoaderInvocation"] | components["schemas"]["Sd3TextEncoderInvocation"] | components["schemas"]["SeamlessModeInvocation"] | components["schemas"]["SeedreamImageGenerationInvocation"] | components["schemas"]["SegmentAnythingInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["SpandrelImageToImageAutoscaleInvocation"] | components["schemas"]["SpandrelImageToImageInvocation"] | components["schemas"]["StringBatchInvocation"] | components["schemas"]["StringCollectionInvocation"] | components["schemas"]["StringGenerator"] | components["schemas"]["StringInvocation"] | components["schemas"]["StringJoinInvocation"] | components["schemas"]["StringJoinThreeInvocation"] | components["schemas"]["StringReplaceInvocation"] | components["schemas"]["StringSplitInvocation"] | components["schemas"]["StringSplitNegInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["T2IAdapterInvocation"] | components["schemas"]["TextLLMInvocation"] | components["schemas"]["TileToPropertiesInvocation"] | components["schemas"]["TiledMultiDiffusionDenoiseLatents"] | components["schemas"]["UnsharpMaskInvocation"] | components["schemas"]["VAELoaderInvocation"] | components["schemas"]["WanDenoiseInvocation"] | components["schemas"]["WanImageToLatentsInvocation"] | components["schemas"]["WanLatentsToImageInvocation"] | components["schemas"]["WanModelLoaderInvocation"] | components["schemas"]["WanTextEncoderInvocation"] | components["schemas"]["ZImageControlInvocation"] | components["schemas"]["ZImageDenoiseInvocation"] | components["schemas"]["ZImageDenoiseMetaInvocation"] | components["schemas"]["ZImageImageToLatentsInvocation"] | components["schemas"]["ZImageLatentsToImageInvocation"] | components["schemas"]["ZImageLoRACollectionLoader"] | components["schemas"]["ZImageLoRALoaderInvocation"] | components["schemas"]["ZImageModelLoaderInvocation"] | components["schemas"]["ZImageSeedVarianceEnhancerInvocation"] | components["schemas"]["ZImageTextEncoderInvocation"]; + invocation: components["schemas"]["AddInvocation"] | components["schemas"]["AlibabaCloudImageGenerationInvocation"] | components["schemas"]["AlphaMaskToTensorInvocation"] | components["schemas"]["AnimaDenoiseInvocation"] | components["schemas"]["AnimaImageToLatentsInvocation"] | components["schemas"]["AnimaLatentsToImageInvocation"] | components["schemas"]["AnimaLoRACollectionLoader"] | components["schemas"]["AnimaLoRALoaderInvocation"] | components["schemas"]["AnimaModelLoaderInvocation"] | components["schemas"]["AnimaTextEncoderInvocation"] | components["schemas"]["ApplyMaskTensorToImageInvocation"] | components["schemas"]["ApplyMaskToImageInvocation"] | components["schemas"]["BlankImageInvocation"] | components["schemas"]["BlendLatentsInvocation"] | components["schemas"]["BooleanCollectionInvocation"] | components["schemas"]["BooleanInvocation"] | components["schemas"]["BoundingBoxInvocation"] | components["schemas"]["CLIPSkipInvocation"] | components["schemas"]["CV2InfillInvocation"] | components["schemas"]["CalculateImageTilesEvenSplitInvocation"] | components["schemas"]["CalculateImageTilesInvocation"] | components["schemas"]["CalculateImageTilesMinimumOverlapInvocation"] | components["schemas"]["CannyEdgeDetectionInvocation"] | components["schemas"]["CanvasOutputInvocation"] | components["schemas"]["CanvasPasteBackInvocation"] | components["schemas"]["CanvasV2MaskAndCropInvocation"] | components["schemas"]["CenterPadCropInvocation"] | components["schemas"]["CogView4DenoiseInvocation"] | components["schemas"]["CogView4ImageToLatentsInvocation"] | components["schemas"]["CogView4LatentsToImageInvocation"] | components["schemas"]["CogView4ModelLoaderInvocation"] | components["schemas"]["CogView4TextEncoderInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["ColorCorrectInvocation"] | components["schemas"]["ColorInvocation"] | components["schemas"]["ColorMapInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["ConditioningCollectionInvocation"] | components["schemas"]["ConditioningInvocation"] | components["schemas"]["ContentShuffleInvocation"] | components["schemas"]["ControlNetInvocation"] | components["schemas"]["CoreMetadataInvocation"] | components["schemas"]["CreateDenoiseMaskInvocation"] | components["schemas"]["CreateGradientMaskInvocation"] | components["schemas"]["CropImageToBoundingBoxInvocation"] | components["schemas"]["CropLatentsCoreInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["DWOpenposeDetectionInvocation"] | components["schemas"]["DecodeInvisibleWatermarkInvocation"] | components["schemas"]["DenoiseLatentsInvocation"] | components["schemas"]["DenoiseLatentsMetaInvocation"] | components["schemas"]["DepthAnythingDepthEstimationInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["ExpandMaskWithFadeInvocation"] | components["schemas"]["FLUXLoRACollectionLoader"] | components["schemas"]["FaceIdentifierInvocation"] | components["schemas"]["FaceMaskInvocation"] | components["schemas"]["FaceOffInvocation"] | components["schemas"]["FloatBatchInvocation"] | components["schemas"]["FloatCollectionInvocation"] | components["schemas"]["FloatGenerator"] | components["schemas"]["FloatInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["FloatMathInvocation"] | components["schemas"]["FloatToIntegerInvocation"] | components["schemas"]["Flux2DenoiseInvocation"] | components["schemas"]["Flux2KleinLoRACollectionLoader"] | components["schemas"]["Flux2KleinLoRALoaderInvocation"] | components["schemas"]["Flux2KleinModelLoaderInvocation"] | components["schemas"]["Flux2KleinTextEncoderInvocation"] | components["schemas"]["Flux2VaeDecodeInvocation"] | components["schemas"]["Flux2VaeEncodeInvocation"] | components["schemas"]["FluxControlLoRALoaderInvocation"] | components["schemas"]["FluxControlNetInvocation"] | components["schemas"]["FluxDenoiseInvocation"] | components["schemas"]["FluxDenoiseLatentsMetaInvocation"] | components["schemas"]["FluxFillInvocation"] | components["schemas"]["FluxIPAdapterInvocation"] | components["schemas"]["FluxKontextConcatenateImagesInvocation"] | components["schemas"]["FluxKontextInvocation"] | components["schemas"]["FluxLoRALoaderInvocation"] | components["schemas"]["FluxModelLoaderInvocation"] | components["schemas"]["FluxReduxInvocation"] | components["schemas"]["FluxTextEncoderInvocation"] | components["schemas"]["FluxVaeDecodeInvocation"] | components["schemas"]["FluxVaeEncodeInvocation"] | components["schemas"]["FreeUInvocation"] | components["schemas"]["GeminiImageGenerationInvocation"] | components["schemas"]["GetMaskBoundingBoxInvocation"] | components["schemas"]["GroundingDinoInvocation"] | components["schemas"]["HEDEdgeDetectionInvocation"] | components["schemas"]["HeuristicResizeInvocation"] | components["schemas"]["IPAdapterInvocation"] | components["schemas"]["IdealSizeInvocation"] | components["schemas"]["IfInvocation"] | components["schemas"]["ImageBatchInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["ImageChannelMultiplyInvocation"] | components["schemas"]["ImageChannelOffsetInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["ImageGenerator"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["ImageInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["ImageMaskToTensorInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["ImageNoiseInvocation"] | components["schemas"]["ImagePanelLayoutInvocation"] | components["schemas"]["ImagePasteInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["IntegerBatchInvocation"] | components["schemas"]["IntegerCollectionInvocation"] | components["schemas"]["IntegerGenerator"] | components["schemas"]["IntegerInvocation"] | components["schemas"]["IntegerMathInvocation"] | components["schemas"]["InvertTensorMaskInvocation"] | components["schemas"]["InvokeAdjustImageHuePlusInvocation"] | components["schemas"]["InvokeEquivalentAchromaticLightnessInvocation"] | components["schemas"]["InvokeImageBlendInvocation"] | components["schemas"]["InvokeImageCompositorInvocation"] | components["schemas"]["InvokeImageDilateOrErodeInvocation"] | components["schemas"]["InvokeImageEnhanceInvocation"] | components["schemas"]["InvokeImageValueThresholdsInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["LaMaInfillInvocation"] | components["schemas"]["LatentsCollectionInvocation"] | components["schemas"]["LatentsInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["LineartAnimeEdgeDetectionInvocation"] | components["schemas"]["LineartEdgeDetectionInvocation"] | components["schemas"]["LlavaOnevisionVllmInvocation"] | components["schemas"]["LoRACollectionLoader"] | components["schemas"]["LoRALoaderInvocation"] | components["schemas"]["LoRASelectorInvocation"] | components["schemas"]["MLSDDetectionInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["MaskCombineInvocation"] | components["schemas"]["MaskEdgeInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["MaskFromIDInvocation"] | components["schemas"]["MaskTensorToImageInvocation"] | components["schemas"]["MediaPipeFaceDetectionInvocation"] | components["schemas"]["MergeMetadataInvocation"] | components["schemas"]["MergeTilesToImageInvocation"] | components["schemas"]["MetadataFieldExtractorInvocation"] | components["schemas"]["MetadataFromImageInvocation"] | components["schemas"]["MetadataInvocation"] | components["schemas"]["MetadataItemInvocation"] | components["schemas"]["MetadataItemLinkedInvocation"] | components["schemas"]["MetadataToBoolCollectionInvocation"] | components["schemas"]["MetadataToBoolInvocation"] | components["schemas"]["MetadataToControlnetsInvocation"] | components["schemas"]["MetadataToFloatCollectionInvocation"] | components["schemas"]["MetadataToFloatInvocation"] | components["schemas"]["MetadataToIPAdaptersInvocation"] | components["schemas"]["MetadataToIntegerCollectionInvocation"] | components["schemas"]["MetadataToIntegerInvocation"] | components["schemas"]["MetadataToLorasCollectionInvocation"] | components["schemas"]["MetadataToLorasInvocation"] | components["schemas"]["MetadataToModelInvocation"] | components["schemas"]["MetadataToSDXLLorasInvocation"] | components["schemas"]["MetadataToSDXLModelInvocation"] | components["schemas"]["MetadataToSchedulerInvocation"] | components["schemas"]["MetadataToStringCollectionInvocation"] | components["schemas"]["MetadataToStringInvocation"] | components["schemas"]["MetadataToT2IAdaptersInvocation"] | components["schemas"]["MetadataToVAEInvocation"] | components["schemas"]["ModelIdentifierInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["NormalMapInvocation"] | components["schemas"]["OpenAIImageGenerationInvocation"] | components["schemas"]["PBRMapsInvocation"] | components["schemas"]["PairTileImageInvocation"] | components["schemas"]["PasteImageIntoBoundingBoxInvocation"] | components["schemas"]["PiDiNetEdgeDetectionInvocation"] | components["schemas"]["PromptTemplateInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["QwenImageDenoiseInvocation"] | components["schemas"]["QwenImageImageToLatentsInvocation"] | components["schemas"]["QwenImageLatentsToImageInvocation"] | components["schemas"]["QwenImageLoRACollectionLoader"] | components["schemas"]["QwenImageLoRALoaderInvocation"] | components["schemas"]["QwenImageModelLoaderInvocation"] | components["schemas"]["QwenImageTextEncoderInvocation"] | components["schemas"]["RandomFloatInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["RectangleMaskInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["RoundInvocation"] | components["schemas"]["SD3DenoiseInvocation"] | components["schemas"]["SD3ImageToLatentsInvocation"] | components["schemas"]["SD3LatentsToImageInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["SDXLLoRACollectionLoader"] | components["schemas"]["SDXLLoRALoaderInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["SaveImageInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["SchedulerInvocation"] | components["schemas"]["Sd3ModelLoaderInvocation"] | components["schemas"]["Sd3TextEncoderInvocation"] | components["schemas"]["SeamlessModeInvocation"] | components["schemas"]["SeedreamImageGenerationInvocation"] | components["schemas"]["SegmentAnythingInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["SpandrelImageToImageAutoscaleInvocation"] | components["schemas"]["SpandrelImageToImageInvocation"] | components["schemas"]["StringBatchInvocation"] | components["schemas"]["StringCollectionInvocation"] | components["schemas"]["StringGenerator"] | components["schemas"]["StringInvocation"] | components["schemas"]["StringJoinInvocation"] | components["schemas"]["StringJoinThreeInvocation"] | components["schemas"]["StringReplaceInvocation"] | components["schemas"]["StringSplitInvocation"] | components["schemas"]["StringSplitNegInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["T2IAdapterInvocation"] | components["schemas"]["TextLLMInvocation"] | components["schemas"]["TileToPropertiesInvocation"] | components["schemas"]["TiledMultiDiffusionDenoiseLatents"] | components["schemas"]["UnsharpMaskInvocation"] | components["schemas"]["VAELoaderInvocation"] | components["schemas"]["WanDenoiseInvocation"] | components["schemas"]["WanImageToLatentsInvocation"] | components["schemas"]["WanLatentsToImageInvocation"] | components["schemas"]["WanLoRACollectionLoader"] | components["schemas"]["WanLoRALoaderInvocation"] | components["schemas"]["WanModelLoaderInvocation"] | components["schemas"]["WanRefImageEncoderInvocation"] | components["schemas"]["WanTextEncoderInvocation"] | components["schemas"]["ZImageControlInvocation"] | components["schemas"]["ZImageDenoiseInvocation"] | components["schemas"]["ZImageDenoiseMetaInvocation"] | components["schemas"]["ZImageImageToLatentsInvocation"] | components["schemas"]["ZImageLatentsToImageInvocation"] | components["schemas"]["ZImageLoRACollectionLoader"] | components["schemas"]["ZImageLoRALoaderInvocation"] | components["schemas"]["ZImageModelLoaderInvocation"] | components["schemas"]["ZImageSeedVarianceEnhancerInvocation"] | components["schemas"]["ZImageTextEncoderInvocation"]; /** * Invocation Source Id * @description The ID of the prepared invocation's source node @@ -16118,7 +16121,7 @@ export type components = { * Invocation * @description The ID of the invocation */ - invocation: components["schemas"]["AddInvocation"] | components["schemas"]["AlibabaCloudImageGenerationInvocation"] | components["schemas"]["AlphaMaskToTensorInvocation"] | components["schemas"]["AnimaDenoiseInvocation"] | components["schemas"]["AnimaImageToLatentsInvocation"] | components["schemas"]["AnimaLatentsToImageInvocation"] | components["schemas"]["AnimaLoRACollectionLoader"] | components["schemas"]["AnimaLoRALoaderInvocation"] | components["schemas"]["AnimaModelLoaderInvocation"] | components["schemas"]["AnimaTextEncoderInvocation"] | components["schemas"]["ApplyMaskTensorToImageInvocation"] | components["schemas"]["ApplyMaskToImageInvocation"] | components["schemas"]["BlankImageInvocation"] | components["schemas"]["BlendLatentsInvocation"] | components["schemas"]["BooleanCollectionInvocation"] | components["schemas"]["BooleanInvocation"] | components["schemas"]["BoundingBoxInvocation"] | components["schemas"]["CLIPSkipInvocation"] | components["schemas"]["CV2InfillInvocation"] | components["schemas"]["CalculateImageTilesEvenSplitInvocation"] | components["schemas"]["CalculateImageTilesInvocation"] | components["schemas"]["CalculateImageTilesMinimumOverlapInvocation"] | components["schemas"]["CannyEdgeDetectionInvocation"] | components["schemas"]["CanvasOutputInvocation"] | components["schemas"]["CanvasPasteBackInvocation"] | components["schemas"]["CanvasV2MaskAndCropInvocation"] | components["schemas"]["CenterPadCropInvocation"] | components["schemas"]["CogView4DenoiseInvocation"] | components["schemas"]["CogView4ImageToLatentsInvocation"] | components["schemas"]["CogView4LatentsToImageInvocation"] | components["schemas"]["CogView4ModelLoaderInvocation"] | components["schemas"]["CogView4TextEncoderInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["ColorCorrectInvocation"] | components["schemas"]["ColorInvocation"] | components["schemas"]["ColorMapInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["ConditioningCollectionInvocation"] | components["schemas"]["ConditioningInvocation"] | components["schemas"]["ContentShuffleInvocation"] | components["schemas"]["ControlNetInvocation"] | components["schemas"]["CoreMetadataInvocation"] | components["schemas"]["CreateDenoiseMaskInvocation"] | components["schemas"]["CreateGradientMaskInvocation"] | components["schemas"]["CropImageToBoundingBoxInvocation"] | components["schemas"]["CropLatentsCoreInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["DWOpenposeDetectionInvocation"] | components["schemas"]["DecodeInvisibleWatermarkInvocation"] | components["schemas"]["DenoiseLatentsInvocation"] | components["schemas"]["DenoiseLatentsMetaInvocation"] | components["schemas"]["DepthAnythingDepthEstimationInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["ExpandMaskWithFadeInvocation"] | components["schemas"]["FLUXLoRACollectionLoader"] | components["schemas"]["FaceIdentifierInvocation"] | components["schemas"]["FaceMaskInvocation"] | components["schemas"]["FaceOffInvocation"] | components["schemas"]["FloatBatchInvocation"] | components["schemas"]["FloatCollectionInvocation"] | components["schemas"]["FloatGenerator"] | components["schemas"]["FloatInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["FloatMathInvocation"] | components["schemas"]["FloatToIntegerInvocation"] | components["schemas"]["Flux2DenoiseInvocation"] | components["schemas"]["Flux2KleinLoRACollectionLoader"] | components["schemas"]["Flux2KleinLoRALoaderInvocation"] | components["schemas"]["Flux2KleinModelLoaderInvocation"] | components["schemas"]["Flux2KleinTextEncoderInvocation"] | components["schemas"]["Flux2VaeDecodeInvocation"] | components["schemas"]["Flux2VaeEncodeInvocation"] | components["schemas"]["FluxControlLoRALoaderInvocation"] | components["schemas"]["FluxControlNetInvocation"] | components["schemas"]["FluxDenoiseInvocation"] | components["schemas"]["FluxDenoiseLatentsMetaInvocation"] | components["schemas"]["FluxFillInvocation"] | components["schemas"]["FluxIPAdapterInvocation"] | components["schemas"]["FluxKontextConcatenateImagesInvocation"] | components["schemas"]["FluxKontextInvocation"] | components["schemas"]["FluxLoRALoaderInvocation"] | components["schemas"]["FluxModelLoaderInvocation"] | components["schemas"]["FluxReduxInvocation"] | components["schemas"]["FluxTextEncoderInvocation"] | components["schemas"]["FluxVaeDecodeInvocation"] | components["schemas"]["FluxVaeEncodeInvocation"] | components["schemas"]["FreeUInvocation"] | components["schemas"]["GeminiImageGenerationInvocation"] | components["schemas"]["GetMaskBoundingBoxInvocation"] | components["schemas"]["GroundingDinoInvocation"] | components["schemas"]["HEDEdgeDetectionInvocation"] | components["schemas"]["HeuristicResizeInvocation"] | components["schemas"]["IPAdapterInvocation"] | components["schemas"]["IdealSizeInvocation"] | components["schemas"]["IfInvocation"] | components["schemas"]["ImageBatchInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["ImageChannelMultiplyInvocation"] | components["schemas"]["ImageChannelOffsetInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["ImageGenerator"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["ImageInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["ImageMaskToTensorInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["ImageNoiseInvocation"] | components["schemas"]["ImagePanelLayoutInvocation"] | components["schemas"]["ImagePasteInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["IntegerBatchInvocation"] | components["schemas"]["IntegerCollectionInvocation"] | components["schemas"]["IntegerGenerator"] | components["schemas"]["IntegerInvocation"] | components["schemas"]["IntegerMathInvocation"] | components["schemas"]["InvertTensorMaskInvocation"] | components["schemas"]["InvokeAdjustImageHuePlusInvocation"] | components["schemas"]["InvokeEquivalentAchromaticLightnessInvocation"] | components["schemas"]["InvokeImageBlendInvocation"] | components["schemas"]["InvokeImageCompositorInvocation"] | components["schemas"]["InvokeImageDilateOrErodeInvocation"] | components["schemas"]["InvokeImageEnhanceInvocation"] | components["schemas"]["InvokeImageValueThresholdsInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["LaMaInfillInvocation"] | components["schemas"]["LatentsCollectionInvocation"] | components["schemas"]["LatentsInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["LineartAnimeEdgeDetectionInvocation"] | components["schemas"]["LineartEdgeDetectionInvocation"] | components["schemas"]["LlavaOnevisionVllmInvocation"] | components["schemas"]["LoRACollectionLoader"] | components["schemas"]["LoRALoaderInvocation"] | components["schemas"]["LoRASelectorInvocation"] | components["schemas"]["MLSDDetectionInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["MaskCombineInvocation"] | components["schemas"]["MaskEdgeInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["MaskFromIDInvocation"] | components["schemas"]["MaskTensorToImageInvocation"] | components["schemas"]["MediaPipeFaceDetectionInvocation"] | components["schemas"]["MergeMetadataInvocation"] | components["schemas"]["MergeTilesToImageInvocation"] | components["schemas"]["MetadataFieldExtractorInvocation"] | components["schemas"]["MetadataFromImageInvocation"] | components["schemas"]["MetadataInvocation"] | components["schemas"]["MetadataItemInvocation"] | components["schemas"]["MetadataItemLinkedInvocation"] | components["schemas"]["MetadataToBoolCollectionInvocation"] | components["schemas"]["MetadataToBoolInvocation"] | components["schemas"]["MetadataToControlnetsInvocation"] | components["schemas"]["MetadataToFloatCollectionInvocation"] | components["schemas"]["MetadataToFloatInvocation"] | components["schemas"]["MetadataToIPAdaptersInvocation"] | components["schemas"]["MetadataToIntegerCollectionInvocation"] | components["schemas"]["MetadataToIntegerInvocation"] | components["schemas"]["MetadataToLorasCollectionInvocation"] | components["schemas"]["MetadataToLorasInvocation"] | components["schemas"]["MetadataToModelInvocation"] | components["schemas"]["MetadataToSDXLLorasInvocation"] | components["schemas"]["MetadataToSDXLModelInvocation"] | components["schemas"]["MetadataToSchedulerInvocation"] | components["schemas"]["MetadataToStringCollectionInvocation"] | components["schemas"]["MetadataToStringInvocation"] | components["schemas"]["MetadataToT2IAdaptersInvocation"] | components["schemas"]["MetadataToVAEInvocation"] | components["schemas"]["ModelIdentifierInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["NormalMapInvocation"] | components["schemas"]["OpenAIImageGenerationInvocation"] | components["schemas"]["PBRMapsInvocation"] | components["schemas"]["PairTileImageInvocation"] | components["schemas"]["PasteImageIntoBoundingBoxInvocation"] | components["schemas"]["PiDiNetEdgeDetectionInvocation"] | components["schemas"]["PromptTemplateInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["QwenImageDenoiseInvocation"] | components["schemas"]["QwenImageImageToLatentsInvocation"] | components["schemas"]["QwenImageLatentsToImageInvocation"] | components["schemas"]["QwenImageLoRACollectionLoader"] | components["schemas"]["QwenImageLoRALoaderInvocation"] | components["schemas"]["QwenImageModelLoaderInvocation"] | components["schemas"]["QwenImageTextEncoderInvocation"] | components["schemas"]["RandomFloatInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["RectangleMaskInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["RoundInvocation"] | components["schemas"]["SD3DenoiseInvocation"] | components["schemas"]["SD3ImageToLatentsInvocation"] | components["schemas"]["SD3LatentsToImageInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["SDXLLoRACollectionLoader"] | components["schemas"]["SDXLLoRALoaderInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["SaveImageInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["SchedulerInvocation"] | components["schemas"]["Sd3ModelLoaderInvocation"] | components["schemas"]["Sd3TextEncoderInvocation"] | components["schemas"]["SeamlessModeInvocation"] | components["schemas"]["SeedreamImageGenerationInvocation"] | components["schemas"]["SegmentAnythingInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["SpandrelImageToImageAutoscaleInvocation"] | components["schemas"]["SpandrelImageToImageInvocation"] | components["schemas"]["StringBatchInvocation"] | components["schemas"]["StringCollectionInvocation"] | components["schemas"]["StringGenerator"] | components["schemas"]["StringInvocation"] | components["schemas"]["StringJoinInvocation"] | components["schemas"]["StringJoinThreeInvocation"] | components["schemas"]["StringReplaceInvocation"] | components["schemas"]["StringSplitInvocation"] | components["schemas"]["StringSplitNegInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["T2IAdapterInvocation"] | components["schemas"]["TextLLMInvocation"] | components["schemas"]["TileToPropertiesInvocation"] | components["schemas"]["TiledMultiDiffusionDenoiseLatents"] | components["schemas"]["UnsharpMaskInvocation"] | components["schemas"]["VAELoaderInvocation"] | components["schemas"]["WanDenoiseInvocation"] | components["schemas"]["WanImageToLatentsInvocation"] | components["schemas"]["WanLatentsToImageInvocation"] | components["schemas"]["WanModelLoaderInvocation"] | components["schemas"]["WanTextEncoderInvocation"] | components["schemas"]["ZImageControlInvocation"] | components["schemas"]["ZImageDenoiseInvocation"] | components["schemas"]["ZImageDenoiseMetaInvocation"] | components["schemas"]["ZImageImageToLatentsInvocation"] | components["schemas"]["ZImageLatentsToImageInvocation"] | components["schemas"]["ZImageLoRACollectionLoader"] | components["schemas"]["ZImageLoRALoaderInvocation"] | components["schemas"]["ZImageModelLoaderInvocation"] | components["schemas"]["ZImageSeedVarianceEnhancerInvocation"] | components["schemas"]["ZImageTextEncoderInvocation"]; + invocation: components["schemas"]["AddInvocation"] | components["schemas"]["AlibabaCloudImageGenerationInvocation"] | components["schemas"]["AlphaMaskToTensorInvocation"] | components["schemas"]["AnimaDenoiseInvocation"] | components["schemas"]["AnimaImageToLatentsInvocation"] | components["schemas"]["AnimaLatentsToImageInvocation"] | components["schemas"]["AnimaLoRACollectionLoader"] | components["schemas"]["AnimaLoRALoaderInvocation"] | components["schemas"]["AnimaModelLoaderInvocation"] | components["schemas"]["AnimaTextEncoderInvocation"] | components["schemas"]["ApplyMaskTensorToImageInvocation"] | components["schemas"]["ApplyMaskToImageInvocation"] | components["schemas"]["BlankImageInvocation"] | components["schemas"]["BlendLatentsInvocation"] | components["schemas"]["BooleanCollectionInvocation"] | components["schemas"]["BooleanInvocation"] | components["schemas"]["BoundingBoxInvocation"] | components["schemas"]["CLIPSkipInvocation"] | components["schemas"]["CV2InfillInvocation"] | components["schemas"]["CalculateImageTilesEvenSplitInvocation"] | components["schemas"]["CalculateImageTilesInvocation"] | components["schemas"]["CalculateImageTilesMinimumOverlapInvocation"] | components["schemas"]["CannyEdgeDetectionInvocation"] | components["schemas"]["CanvasOutputInvocation"] | components["schemas"]["CanvasPasteBackInvocation"] | components["schemas"]["CanvasV2MaskAndCropInvocation"] | components["schemas"]["CenterPadCropInvocation"] | components["schemas"]["CogView4DenoiseInvocation"] | components["schemas"]["CogView4ImageToLatentsInvocation"] | components["schemas"]["CogView4LatentsToImageInvocation"] | components["schemas"]["CogView4ModelLoaderInvocation"] | components["schemas"]["CogView4TextEncoderInvocation"] | components["schemas"]["CollectInvocation"] | components["schemas"]["ColorCorrectInvocation"] | components["schemas"]["ColorInvocation"] | components["schemas"]["ColorMapInvocation"] | components["schemas"]["CompelInvocation"] | components["schemas"]["ConditioningCollectionInvocation"] | components["schemas"]["ConditioningInvocation"] | components["schemas"]["ContentShuffleInvocation"] | components["schemas"]["ControlNetInvocation"] | components["schemas"]["CoreMetadataInvocation"] | components["schemas"]["CreateDenoiseMaskInvocation"] | components["schemas"]["CreateGradientMaskInvocation"] | components["schemas"]["CropImageToBoundingBoxInvocation"] | components["schemas"]["CropLatentsCoreInvocation"] | components["schemas"]["CvInpaintInvocation"] | components["schemas"]["DWOpenposeDetectionInvocation"] | components["schemas"]["DecodeInvisibleWatermarkInvocation"] | components["schemas"]["DenoiseLatentsInvocation"] | components["schemas"]["DenoiseLatentsMetaInvocation"] | components["schemas"]["DepthAnythingDepthEstimationInvocation"] | components["schemas"]["DivideInvocation"] | components["schemas"]["DynamicPromptInvocation"] | components["schemas"]["ESRGANInvocation"] | components["schemas"]["ExpandMaskWithFadeInvocation"] | components["schemas"]["FLUXLoRACollectionLoader"] | components["schemas"]["FaceIdentifierInvocation"] | components["schemas"]["FaceMaskInvocation"] | components["schemas"]["FaceOffInvocation"] | components["schemas"]["FloatBatchInvocation"] | components["schemas"]["FloatCollectionInvocation"] | components["schemas"]["FloatGenerator"] | components["schemas"]["FloatInvocation"] | components["schemas"]["FloatLinearRangeInvocation"] | components["schemas"]["FloatMathInvocation"] | components["schemas"]["FloatToIntegerInvocation"] | components["schemas"]["Flux2DenoiseInvocation"] | components["schemas"]["Flux2KleinLoRACollectionLoader"] | components["schemas"]["Flux2KleinLoRALoaderInvocation"] | components["schemas"]["Flux2KleinModelLoaderInvocation"] | components["schemas"]["Flux2KleinTextEncoderInvocation"] | components["schemas"]["Flux2VaeDecodeInvocation"] | components["schemas"]["Flux2VaeEncodeInvocation"] | components["schemas"]["FluxControlLoRALoaderInvocation"] | components["schemas"]["FluxControlNetInvocation"] | components["schemas"]["FluxDenoiseInvocation"] | components["schemas"]["FluxDenoiseLatentsMetaInvocation"] | components["schemas"]["FluxFillInvocation"] | components["schemas"]["FluxIPAdapterInvocation"] | components["schemas"]["FluxKontextConcatenateImagesInvocation"] | components["schemas"]["FluxKontextInvocation"] | components["schemas"]["FluxLoRALoaderInvocation"] | components["schemas"]["FluxModelLoaderInvocation"] | components["schemas"]["FluxReduxInvocation"] | components["schemas"]["FluxTextEncoderInvocation"] | components["schemas"]["FluxVaeDecodeInvocation"] | components["schemas"]["FluxVaeEncodeInvocation"] | components["schemas"]["FreeUInvocation"] | components["schemas"]["GeminiImageGenerationInvocation"] | components["schemas"]["GetMaskBoundingBoxInvocation"] | components["schemas"]["GroundingDinoInvocation"] | components["schemas"]["HEDEdgeDetectionInvocation"] | components["schemas"]["HeuristicResizeInvocation"] | components["schemas"]["IPAdapterInvocation"] | components["schemas"]["IdealSizeInvocation"] | components["schemas"]["IfInvocation"] | components["schemas"]["ImageBatchInvocation"] | components["schemas"]["ImageBlurInvocation"] | components["schemas"]["ImageChannelInvocation"] | components["schemas"]["ImageChannelMultiplyInvocation"] | components["schemas"]["ImageChannelOffsetInvocation"] | components["schemas"]["ImageCollectionInvocation"] | components["schemas"]["ImageConvertInvocation"] | components["schemas"]["ImageCropInvocation"] | components["schemas"]["ImageGenerator"] | components["schemas"]["ImageHueAdjustmentInvocation"] | components["schemas"]["ImageInverseLerpInvocation"] | components["schemas"]["ImageInvocation"] | components["schemas"]["ImageLerpInvocation"] | components["schemas"]["ImageMaskToTensorInvocation"] | components["schemas"]["ImageMultiplyInvocation"] | components["schemas"]["ImageNSFWBlurInvocation"] | components["schemas"]["ImageNoiseInvocation"] | components["schemas"]["ImagePanelLayoutInvocation"] | components["schemas"]["ImagePasteInvocation"] | components["schemas"]["ImageResizeInvocation"] | components["schemas"]["ImageScaleInvocation"] | components["schemas"]["ImageToLatentsInvocation"] | components["schemas"]["ImageWatermarkInvocation"] | components["schemas"]["InfillColorInvocation"] | components["schemas"]["InfillPatchMatchInvocation"] | components["schemas"]["InfillTileInvocation"] | components["schemas"]["IntegerBatchInvocation"] | components["schemas"]["IntegerCollectionInvocation"] | components["schemas"]["IntegerGenerator"] | components["schemas"]["IntegerInvocation"] | components["schemas"]["IntegerMathInvocation"] | components["schemas"]["InvertTensorMaskInvocation"] | components["schemas"]["InvokeAdjustImageHuePlusInvocation"] | components["schemas"]["InvokeEquivalentAchromaticLightnessInvocation"] | components["schemas"]["InvokeImageBlendInvocation"] | components["schemas"]["InvokeImageCompositorInvocation"] | components["schemas"]["InvokeImageDilateOrErodeInvocation"] | components["schemas"]["InvokeImageEnhanceInvocation"] | components["schemas"]["InvokeImageValueThresholdsInvocation"] | components["schemas"]["IterateInvocation"] | components["schemas"]["LaMaInfillInvocation"] | components["schemas"]["LatentsCollectionInvocation"] | components["schemas"]["LatentsInvocation"] | components["schemas"]["LatentsToImageInvocation"] | components["schemas"]["LineartAnimeEdgeDetectionInvocation"] | components["schemas"]["LineartEdgeDetectionInvocation"] | components["schemas"]["LlavaOnevisionVllmInvocation"] | components["schemas"]["LoRACollectionLoader"] | components["schemas"]["LoRALoaderInvocation"] | components["schemas"]["LoRASelectorInvocation"] | components["schemas"]["MLSDDetectionInvocation"] | components["schemas"]["MainModelLoaderInvocation"] | components["schemas"]["MaskCombineInvocation"] | components["schemas"]["MaskEdgeInvocation"] | components["schemas"]["MaskFromAlphaInvocation"] | components["schemas"]["MaskFromIDInvocation"] | components["schemas"]["MaskTensorToImageInvocation"] | components["schemas"]["MediaPipeFaceDetectionInvocation"] | components["schemas"]["MergeMetadataInvocation"] | components["schemas"]["MergeTilesToImageInvocation"] | components["schemas"]["MetadataFieldExtractorInvocation"] | components["schemas"]["MetadataFromImageInvocation"] | components["schemas"]["MetadataInvocation"] | components["schemas"]["MetadataItemInvocation"] | components["schemas"]["MetadataItemLinkedInvocation"] | components["schemas"]["MetadataToBoolCollectionInvocation"] | components["schemas"]["MetadataToBoolInvocation"] | components["schemas"]["MetadataToControlnetsInvocation"] | components["schemas"]["MetadataToFloatCollectionInvocation"] | components["schemas"]["MetadataToFloatInvocation"] | components["schemas"]["MetadataToIPAdaptersInvocation"] | components["schemas"]["MetadataToIntegerCollectionInvocation"] | components["schemas"]["MetadataToIntegerInvocation"] | components["schemas"]["MetadataToLorasCollectionInvocation"] | components["schemas"]["MetadataToLorasInvocation"] | components["schemas"]["MetadataToModelInvocation"] | components["schemas"]["MetadataToSDXLLorasInvocation"] | components["schemas"]["MetadataToSDXLModelInvocation"] | components["schemas"]["MetadataToSchedulerInvocation"] | components["schemas"]["MetadataToStringCollectionInvocation"] | components["schemas"]["MetadataToStringInvocation"] | components["schemas"]["MetadataToT2IAdaptersInvocation"] | components["schemas"]["MetadataToVAEInvocation"] | components["schemas"]["ModelIdentifierInvocation"] | components["schemas"]["MultiplyInvocation"] | components["schemas"]["NoiseInvocation"] | components["schemas"]["NormalMapInvocation"] | components["schemas"]["OpenAIImageGenerationInvocation"] | components["schemas"]["PBRMapsInvocation"] | components["schemas"]["PairTileImageInvocation"] | components["schemas"]["PasteImageIntoBoundingBoxInvocation"] | components["schemas"]["PiDiNetEdgeDetectionInvocation"] | components["schemas"]["PromptTemplateInvocation"] | components["schemas"]["PromptsFromFileInvocation"] | components["schemas"]["QwenImageDenoiseInvocation"] | components["schemas"]["QwenImageImageToLatentsInvocation"] | components["schemas"]["QwenImageLatentsToImageInvocation"] | components["schemas"]["QwenImageLoRACollectionLoader"] | components["schemas"]["QwenImageLoRALoaderInvocation"] | components["schemas"]["QwenImageModelLoaderInvocation"] | components["schemas"]["QwenImageTextEncoderInvocation"] | components["schemas"]["RandomFloatInvocation"] | components["schemas"]["RandomIntInvocation"] | components["schemas"]["RandomRangeInvocation"] | components["schemas"]["RangeInvocation"] | components["schemas"]["RangeOfSizeInvocation"] | components["schemas"]["RectangleMaskInvocation"] | components["schemas"]["ResizeLatentsInvocation"] | components["schemas"]["RoundInvocation"] | components["schemas"]["SD3DenoiseInvocation"] | components["schemas"]["SD3ImageToLatentsInvocation"] | components["schemas"]["SD3LatentsToImageInvocation"] | components["schemas"]["SDXLCompelPromptInvocation"] | components["schemas"]["SDXLLoRACollectionLoader"] | components["schemas"]["SDXLLoRALoaderInvocation"] | components["schemas"]["SDXLModelLoaderInvocation"] | components["schemas"]["SDXLRefinerCompelPromptInvocation"] | components["schemas"]["SDXLRefinerModelLoaderInvocation"] | components["schemas"]["SaveImageInvocation"] | components["schemas"]["ScaleLatentsInvocation"] | components["schemas"]["SchedulerInvocation"] | components["schemas"]["Sd3ModelLoaderInvocation"] | components["schemas"]["Sd3TextEncoderInvocation"] | components["schemas"]["SeamlessModeInvocation"] | components["schemas"]["SeedreamImageGenerationInvocation"] | components["schemas"]["SegmentAnythingInvocation"] | components["schemas"]["ShowImageInvocation"] | components["schemas"]["SpandrelImageToImageAutoscaleInvocation"] | components["schemas"]["SpandrelImageToImageInvocation"] | components["schemas"]["StringBatchInvocation"] | components["schemas"]["StringCollectionInvocation"] | components["schemas"]["StringGenerator"] | components["schemas"]["StringInvocation"] | components["schemas"]["StringJoinInvocation"] | components["schemas"]["StringJoinThreeInvocation"] | components["schemas"]["StringReplaceInvocation"] | components["schemas"]["StringSplitInvocation"] | components["schemas"]["StringSplitNegInvocation"] | components["schemas"]["SubtractInvocation"] | components["schemas"]["T2IAdapterInvocation"] | components["schemas"]["TextLLMInvocation"] | components["schemas"]["TileToPropertiesInvocation"] | components["schemas"]["TiledMultiDiffusionDenoiseLatents"] | components["schemas"]["UnsharpMaskInvocation"] | components["schemas"]["VAELoaderInvocation"] | components["schemas"]["WanDenoiseInvocation"] | components["schemas"]["WanImageToLatentsInvocation"] | components["schemas"]["WanLatentsToImageInvocation"] | components["schemas"]["WanLoRACollectionLoader"] | components["schemas"]["WanLoRALoaderInvocation"] | components["schemas"]["WanModelLoaderInvocation"] | components["schemas"]["WanRefImageEncoderInvocation"] | components["schemas"]["WanTextEncoderInvocation"] | components["schemas"]["ZImageControlInvocation"] | components["schemas"]["ZImageDenoiseInvocation"] | components["schemas"]["ZImageDenoiseMetaInvocation"] | components["schemas"]["ZImageImageToLatentsInvocation"] | components["schemas"]["ZImageLatentsToImageInvocation"] | components["schemas"]["ZImageLoRACollectionLoader"] | components["schemas"]["ZImageLoRALoaderInvocation"] | components["schemas"]["ZImageModelLoaderInvocation"] | components["schemas"]["ZImageSeedVarianceEnhancerInvocation"] | components["schemas"]["ZImageTextEncoderInvocation"]; /** * Invocation Source Id * @description The ID of the prepared invocation's source node @@ -19019,6 +19022,102 @@ export type components = { */ base: "sdxl"; }; + /** + * LoRA_LyCORIS_Wan_Config + * @description Model config for Wan 2.2 LoRA models in LyCORIS format. + * + * Wan LoRAs target ``WanTransformer3DModel`` blocks. The Wan 2.2 A14B family + * is dual-expert (high-noise + low-noise) — LoRAs are typically trained + * against one expert. ``expert`` records which one so the model loader + * invocation can wire it to the correct ``loras`` / ``loras_low_noise`` list. + * Many LoRAs are expert-agnostic (TI2V-5B family, or community LoRAs that + * just don't tag the expert) — these get ``expert=None`` and are applied to + * both experts by default. + */ + LoRA_LyCORIS_Wan_Config: { + /** + * Key + * @description A unique key for this model. + */ + key: string; + /** + * Hash + * @description The hash of the model file(s). + */ + hash: string; + /** + * Path + * @description Path to the model on the filesystem. Relative paths are relative to the Invoke root directory. + */ + path: string; + /** + * File Size + * @description The size of the model in bytes. + */ + file_size: number; + /** + * Name + * @description Name of the model. + */ + name: string; + /** + * Description + * @description Model description + */ + description: string | null; + /** + * Source + * @description The original source of the model (path, URL or repo_id). + */ + source: string; + /** @description The type of source */ + source_type: components["schemas"]["ModelSourceType"]; + /** + * Source Api Response + * @description The original API response from the source, as stringified JSON. + */ + source_api_response: string | null; + /** + * Source Url + * @description Optional URL for the model (e.g. download page or model page). + */ + source_url: string | null; + /** + * Cover Image + * @description Url for image to preview model + */ + cover_image: string | null; + /** + * Type + * @default lora + * @constant + */ + type: "lora"; + /** + * Trigger Phrases + * @description Set of trigger phrases for this model + */ + trigger_phrases: string[] | null; + /** @description Default settings for this model */ + default_settings: components["schemas"]["LoraModelDefaultSettings"] | null; + /** + * Format + * @default lycoris + * @constant + */ + format: "lycoris"; + /** + * Base + * @default wan + * @constant + */ + base: "wan"; + /** + * Expert + * @description For Wan 2.2 A14B dual-expert LoRAs: 'high' targets the high-noise expert, 'low' targets the low-noise expert. None means the LoRA is expert-agnostic (TI2V-5B, or community LoRAs without explicit tagging) and is applied to both. + */ + expert: ("high" | "low") | null; + }; /** * LoRA_LyCORIS_ZImage_Config * @description Model config for Z-Image LoRA models in LyCORIS format. @@ -23561,7 +23660,7 @@ export type components = { * Config * @description The installed model's config */ - config: components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_QwenImage_Config"] | components["schemas"]["Main_Diffusers_Wan_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_Checkpoint_Anima_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_QwenImage_Config"] | components["schemas"]["Main_GGUF_Wan_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Checkpoint_Wan_Config"] | components["schemas"]["VAE_Checkpoint_QwenImage_Config"] | components["schemas"]["VAE_Checkpoint_Anima_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["VAE_Diffusers_Wan_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_LyCORIS_QwenImage_Config"] | components["schemas"]["LoRA_LyCORIS_Anima_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["QwenVLEncoder_Diffusers_Config"] | components["schemas"]["QwenVLEncoder_Checkpoint_Config"] | components["schemas"]["WanT5Encoder_WanT5Encoder_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["TextLLM_Diffusers_Config"] | components["schemas"]["ExternalApiModelConfig"] | components["schemas"]["Unknown_Config"]; + config: components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_QwenImage_Config"] | components["schemas"]["Main_Diffusers_Wan_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_Checkpoint_Anima_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_QwenImage_Config"] | components["schemas"]["Main_GGUF_Wan_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Checkpoint_Wan_Config"] | components["schemas"]["VAE_Checkpoint_QwenImage_Config"] | components["schemas"]["VAE_Checkpoint_Anima_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["VAE_Diffusers_Wan_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_LyCORIS_QwenImage_Config"] | components["schemas"]["LoRA_LyCORIS_Wan_Config"] | components["schemas"]["LoRA_LyCORIS_Anima_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["QwenVLEncoder_Diffusers_Config"] | components["schemas"]["QwenVLEncoder_Checkpoint_Config"] | components["schemas"]["WanT5Encoder_WanT5Encoder_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["TextLLM_Diffusers_Config"] | components["schemas"]["ExternalApiModelConfig"] | components["schemas"]["Unknown_Config"]; }; /** * ModelInstallDownloadProgressEvent @@ -23727,7 +23826,7 @@ export type components = { * Config Out * @description After successful installation, this will hold the configuration object. */ - config_out?: (components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_QwenImage_Config"] | components["schemas"]["Main_Diffusers_Wan_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_Checkpoint_Anima_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_QwenImage_Config"] | components["schemas"]["Main_GGUF_Wan_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Checkpoint_Wan_Config"] | components["schemas"]["VAE_Checkpoint_QwenImage_Config"] | components["schemas"]["VAE_Checkpoint_Anima_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["VAE_Diffusers_Wan_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_LyCORIS_QwenImage_Config"] | components["schemas"]["LoRA_LyCORIS_Anima_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["QwenVLEncoder_Diffusers_Config"] | components["schemas"]["QwenVLEncoder_Checkpoint_Config"] | components["schemas"]["WanT5Encoder_WanT5Encoder_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["TextLLM_Diffusers_Config"] | components["schemas"]["ExternalApiModelConfig"] | components["schemas"]["Unknown_Config"]) | null; + config_out?: (components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_QwenImage_Config"] | components["schemas"]["Main_Diffusers_Wan_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_Checkpoint_Anima_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_QwenImage_Config"] | components["schemas"]["Main_GGUF_Wan_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Checkpoint_Wan_Config"] | components["schemas"]["VAE_Checkpoint_QwenImage_Config"] | components["schemas"]["VAE_Checkpoint_Anima_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["VAE_Diffusers_Wan_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_LyCORIS_QwenImage_Config"] | components["schemas"]["LoRA_LyCORIS_Wan_Config"] | components["schemas"]["LoRA_LyCORIS_Anima_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["QwenVLEncoder_Diffusers_Config"] | components["schemas"]["QwenVLEncoder_Checkpoint_Config"] | components["schemas"]["WanT5Encoder_WanT5Encoder_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["TextLLM_Diffusers_Config"] | components["schemas"]["ExternalApiModelConfig"] | components["schemas"]["Unknown_Config"]) | null; /** * Inplace * @description Leave model in its current location; otherwise install under models directory @@ -23813,7 +23912,7 @@ export type components = { * Config * @description The model's config */ - config: components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_QwenImage_Config"] | components["schemas"]["Main_Diffusers_Wan_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_Checkpoint_Anima_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_QwenImage_Config"] | components["schemas"]["Main_GGUF_Wan_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Checkpoint_Wan_Config"] | components["schemas"]["VAE_Checkpoint_QwenImage_Config"] | components["schemas"]["VAE_Checkpoint_Anima_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["VAE_Diffusers_Wan_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_LyCORIS_QwenImage_Config"] | components["schemas"]["LoRA_LyCORIS_Anima_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["QwenVLEncoder_Diffusers_Config"] | components["schemas"]["QwenVLEncoder_Checkpoint_Config"] | components["schemas"]["WanT5Encoder_WanT5Encoder_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["TextLLM_Diffusers_Config"] | components["schemas"]["ExternalApiModelConfig"] | components["schemas"]["Unknown_Config"]; + config: components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_QwenImage_Config"] | components["schemas"]["Main_Diffusers_Wan_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_Checkpoint_Anima_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_QwenImage_Config"] | components["schemas"]["Main_GGUF_Wan_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Checkpoint_Wan_Config"] | components["schemas"]["VAE_Checkpoint_QwenImage_Config"] | components["schemas"]["VAE_Checkpoint_Anima_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["VAE_Diffusers_Wan_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_LyCORIS_QwenImage_Config"] | components["schemas"]["LoRA_LyCORIS_Wan_Config"] | components["schemas"]["LoRA_LyCORIS_Anima_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["QwenVLEncoder_Diffusers_Config"] | components["schemas"]["QwenVLEncoder_Checkpoint_Config"] | components["schemas"]["WanT5Encoder_WanT5Encoder_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["TextLLM_Diffusers_Config"] | components["schemas"]["ExternalApiModelConfig"] | components["schemas"]["Unknown_Config"]; /** * @description The submodel type, if any * @default null @@ -23834,7 +23933,7 @@ export type components = { * Config * @description The model's config */ - config: components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_QwenImage_Config"] | components["schemas"]["Main_Diffusers_Wan_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_Checkpoint_Anima_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_QwenImage_Config"] | components["schemas"]["Main_GGUF_Wan_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Checkpoint_Wan_Config"] | components["schemas"]["VAE_Checkpoint_QwenImage_Config"] | components["schemas"]["VAE_Checkpoint_Anima_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["VAE_Diffusers_Wan_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_LyCORIS_QwenImage_Config"] | components["schemas"]["LoRA_LyCORIS_Anima_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["QwenVLEncoder_Diffusers_Config"] | components["schemas"]["QwenVLEncoder_Checkpoint_Config"] | components["schemas"]["WanT5Encoder_WanT5Encoder_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["TextLLM_Diffusers_Config"] | components["schemas"]["ExternalApiModelConfig"] | components["schemas"]["Unknown_Config"]; + config: components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_QwenImage_Config"] | components["schemas"]["Main_Diffusers_Wan_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_Checkpoint_Anima_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_QwenImage_Config"] | components["schemas"]["Main_GGUF_Wan_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Checkpoint_Wan_Config"] | components["schemas"]["VAE_Checkpoint_QwenImage_Config"] | components["schemas"]["VAE_Checkpoint_Anima_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["VAE_Diffusers_Wan_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_LyCORIS_QwenImage_Config"] | components["schemas"]["LoRA_LyCORIS_Wan_Config"] | components["schemas"]["LoRA_LyCORIS_Anima_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["QwenVLEncoder_Diffusers_Config"] | components["schemas"]["QwenVLEncoder_Checkpoint_Config"] | components["schemas"]["WanT5Encoder_WanT5Encoder_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["TextLLM_Diffusers_Config"] | components["schemas"]["ExternalApiModelConfig"] | components["schemas"]["Unknown_Config"]; /** * @description The submodel type, if any * @default null @@ -24031,7 +24130,7 @@ export type components = { */ ModelsList: { /** Models */ - models: (components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_QwenImage_Config"] | components["schemas"]["Main_Diffusers_Wan_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_Checkpoint_Anima_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_QwenImage_Config"] | components["schemas"]["Main_GGUF_Wan_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Checkpoint_Wan_Config"] | components["schemas"]["VAE_Checkpoint_QwenImage_Config"] | components["schemas"]["VAE_Checkpoint_Anima_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["VAE_Diffusers_Wan_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_LyCORIS_QwenImage_Config"] | components["schemas"]["LoRA_LyCORIS_Anima_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["QwenVLEncoder_Diffusers_Config"] | components["schemas"]["QwenVLEncoder_Checkpoint_Config"] | components["schemas"]["WanT5Encoder_WanT5Encoder_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["TextLLM_Diffusers_Config"] | components["schemas"]["ExternalApiModelConfig"] | components["schemas"]["Unknown_Config"])[]; + models: (components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_QwenImage_Config"] | components["schemas"]["Main_Diffusers_Wan_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_Checkpoint_Anima_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_QwenImage_Config"] | components["schemas"]["Main_GGUF_Wan_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Checkpoint_Wan_Config"] | components["schemas"]["VAE_Checkpoint_QwenImage_Config"] | components["schemas"]["VAE_Checkpoint_Anima_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["VAE_Diffusers_Wan_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_LyCORIS_QwenImage_Config"] | components["schemas"]["LoRA_LyCORIS_Wan_Config"] | components["schemas"]["LoRA_LyCORIS_Anima_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["QwenVLEncoder_Diffusers_Config"] | components["schemas"]["QwenVLEncoder_Checkpoint_Config"] | components["schemas"]["WanT5Encoder_WanT5Encoder_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["TextLLM_Diffusers_Config"] | components["schemas"]["ExternalApiModelConfig"] | components["schemas"]["Unknown_Config"])[]; }; /** * Multiply Integers @@ -32026,6 +32125,12 @@ export type components = { * @default null */ negative_conditioning?: components["schemas"]["WanConditioningField"] | null; + /** + * Reference Image + * @description Reference-image (VAE-latent) conditioning for Wan 2.2 I2V. + * @default null + */ + ref_image?: components["schemas"]["WanRefImageConditioningField"] | null; /** * @description Latents tensor * @default null @@ -32199,6 +32304,131 @@ export type components = { */ type: "wan_l2i"; }; + /** + * Apply LoRA Collection - Wan 2.2 + * @description Apply a collection of LoRAs to the Wan 2.2 transformer(s). + * + * Each LoRA is routed to the primary and/or low-noise list based on its + * recorded ``expert`` tag (set by the probe from the filename). Untagged + * LoRAs go to both lists. + */ + WanLoRACollectionLoader: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * LoRAs + * @description LoRAs to apply. May be a single LoRA or a collection. + * @default null + */ + loras?: components["schemas"]["LoRAField"] | components["schemas"]["LoRAField"][] | null; + /** + * Wan Transformer + * @description Transformer + * @default null + */ + transformer?: components["schemas"]["WanTransformerField"] | null; + /** + * type + * @default wan_lora_collection_loader + * @constant + */ + type: "wan_lora_collection_loader"; + }; + /** + * Apply LoRA - Wan 2.2 + * @description Apply a LoRA to the Wan 2.2 transformer(s). + * + * For A14B (dual expert) the LoRA's recorded ``expert`` field determines + * which expert list it lands in: ``"high"`` -> primary list, ``"low"`` -> + * low-noise list, ``None`` (untagged) -> both lists. Use the ``target`` + * field to override. + * + * For TI2V-5B (single transformer) only the primary list is used at denoise + * time; the low-noise routing is harmless but ignored. + */ + WanLoRALoaderInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * LoRA + * @description LoRA model to load + * @default null + */ + lora?: components["schemas"]["ModelIdentifierField"] | null; + /** + * Weight + * @description The weight at which the LoRA is applied to each model + * @default 0.75 + */ + weight?: number; + /** + * Target + * @description Which expert(s) to apply this LoRA to. 'auto' uses the LoRA's recorded expert tag (or both if untagged); 'both'/'high'/'low' override it. + * @default auto + * @enum {string} + */ + target?: "auto" | "both" | "high" | "low"; + /** + * Wan Transformer + * @description Transformer + * @default null + */ + transformer?: components["schemas"]["WanTransformerField"] | null; + /** + * type + * @default wan_lora_loader + * @constant + */ + type: "wan_lora_loader"; + }; + /** + * WanLoRALoaderOutput + * @description Wan 2.2 LoRA loader output. + */ + WanLoRALoaderOutput: { + /** + * Wan Transformer + * @description Transformer + * @default null + */ + transformer: components["schemas"]["WanTransformerField"] | null; + /** + * type + * @default wan_lora_loader_output + * @constant + */ + type: "wan_lora_loader_output"; + }; /** * Main Model - Wan 2.2 * @description Loads a Wan 2.2 model, outputting its submodels. @@ -32300,6 +32530,111 @@ export type components = { */ type: "wan_model_loader_output"; }; + /** + * WanRefImageConditioningField + * @description Reference-image conditioning for Wan 2.2 I2V. + * + * Carries the 20-channel VAE-latent condition tensor (4-channel first-frame + * mask + 16-channel ref-image latents). The denoise loop concatenates this + * to the 16-channel noise latents along the channel dim each step, producing + * the 36-channel input the I2V-A14B transformer expects. + * + * Also carries the spatial dims used to encode the image, so the denoise + * node can sanity-check that the user's width/height match. + */ + WanRefImageConditioningField: { + /** + * Condition Tensor Name + * @description Name of the saved [1, 20, 1, H/8, W/8] condition tensor. + */ + condition_tensor_name: string; + /** + * Width + * @description Image width used during VAE encoding (matches denoise width). + */ + width: number; + /** + * Height + * @description Image height used during VAE encoding (matches denoise height). + */ + height: number; + }; + /** + * Reference Image - Wan 2.2 + * @description VAE-encode a reference image into Wan 2.2 I2V conditioning. + * + * Output is a ``[1, 20, 1, height // 8, width // 8]`` condition tensor that + * the denoise loop concatenates to the 16-channel noise latents each step, + * producing the 36-channel input the I2V-A14B transformer expects. + * + * Only works with I2V-A14B (the denoise loop's variant gate enforces this). + * For T2V or TI2V-5B, omit this node entirely. + */ + WanRefImageEncoderInvocation: { + /** + * Id + * @description The id of this instance of an invocation. Must be unique among all instances of invocations. + */ + id: string; + /** + * Is Intermediate + * @description Whether or not this is an intermediate invocation. + * @default false + */ + is_intermediate?: boolean; + /** + * Use Cache + * @description Whether or not to use the cache + * @default true + */ + use_cache?: boolean; + /** + * @description Reference image to condition on. + * @default null + */ + image?: components["schemas"]["ImageField"] | null; + /** + * VAE + * @description VAE + * @default null + */ + vae?: components["schemas"]["VAEField"] | null; + /** + * Width + * @description Width to resize the reference image to (must match denoise width). + * @default 1024 + */ + width?: number; + /** + * Height + * @description Height to resize the reference image to (must match denoise height). + * @default 1024 + */ + height?: number; + /** + * type + * @default wan_ref_image_encoder + * @constant + */ + type: "wan_ref_image_encoder"; + }; + /** + * WanRefImageOutput + * @description Output of a Wan 2.2 reference-image VAE-encoder. + */ + WanRefImageOutput: { + /** + * Reference Image + * @description VAE-latent reference-image conditioning for Wan 2.2 I2V. + */ + ref_image: components["schemas"]["WanRefImageConditioningField"]; + /** + * type + * @default wan_ref_image_output + * @constant + */ + type: "wan_ref_image_output"; + }; /** * WanT5EncoderField * @description Field for the UMT5-XXL text encoder used by Wan 2.2 models. @@ -32485,13 +32820,16 @@ export type components = { * WanVariantType * @description Wan 2.2 model variants. * - * Both variants are used for image generation at num_frames=1. They differ in - * architecture: A14B is a Mixture-of-Experts model with two transformer experts - * (high-noise and low-noise) totalling ~28B params; TI2V-5B is a single ~5B - * transformer with a higher-compression VAE (z_dim=48). + * All variants are used for image generation at num_frames=1. The A14B family + * is a Mixture-of-Experts (high-noise + low-noise) totalling ~28B params; the + * T2V sub-variant takes text only, while the I2V sub-variant additionally + * conditions on a reference image (encoded by the VAE and concatenated to the + * noise latents along the channel dim — its transformer has ``in_channels=36`` + * instead of ``16``). TI2V-5B is a single ~5B transformer with a + * higher-compression VAE (z_dim=48). * @enum {string} */ - WanVariantType: "t2v_a14b" | "ti2v_5b"; + WanVariantType: "t2v_a14b" | "i2v_a14b" | "ti2v_5b"; /** Workflow */ Workflow: { /** @@ -34148,7 +34486,7 @@ export interface operations { [name: string]: unknown; }; content: { - "application/json": components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_QwenImage_Config"] | components["schemas"]["Main_Diffusers_Wan_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_Checkpoint_Anima_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_QwenImage_Config"] | components["schemas"]["Main_GGUF_Wan_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Checkpoint_Wan_Config"] | components["schemas"]["VAE_Checkpoint_QwenImage_Config"] | components["schemas"]["VAE_Checkpoint_Anima_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["VAE_Diffusers_Wan_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_LyCORIS_QwenImage_Config"] | components["schemas"]["LoRA_LyCORIS_Anima_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["QwenVLEncoder_Diffusers_Config"] | components["schemas"]["QwenVLEncoder_Checkpoint_Config"] | components["schemas"]["WanT5Encoder_WanT5Encoder_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["TextLLM_Diffusers_Config"] | components["schemas"]["ExternalApiModelConfig"] | components["schemas"]["Unknown_Config"]; + "application/json": components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_QwenImage_Config"] | components["schemas"]["Main_Diffusers_Wan_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_Checkpoint_Anima_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_QwenImage_Config"] | components["schemas"]["Main_GGUF_Wan_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Checkpoint_Wan_Config"] | components["schemas"]["VAE_Checkpoint_QwenImage_Config"] | components["schemas"]["VAE_Checkpoint_Anima_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["VAE_Diffusers_Wan_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_LyCORIS_QwenImage_Config"] | components["schemas"]["LoRA_LyCORIS_Wan_Config"] | components["schemas"]["LoRA_LyCORIS_Anima_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["QwenVLEncoder_Diffusers_Config"] | components["schemas"]["QwenVLEncoder_Checkpoint_Config"] | components["schemas"]["WanT5Encoder_WanT5Encoder_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["TextLLM_Diffusers_Config"] | components["schemas"]["ExternalApiModelConfig"] | components["schemas"]["Unknown_Config"]; }; }; /** @description Validation Error */ @@ -34180,7 +34518,7 @@ export interface operations { [name: string]: unknown; }; content: { - "application/json": components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_QwenImage_Config"] | components["schemas"]["Main_Diffusers_Wan_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_Checkpoint_Anima_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_QwenImage_Config"] | components["schemas"]["Main_GGUF_Wan_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Checkpoint_Wan_Config"] | components["schemas"]["VAE_Checkpoint_QwenImage_Config"] | components["schemas"]["VAE_Checkpoint_Anima_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["VAE_Diffusers_Wan_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_LyCORIS_QwenImage_Config"] | components["schemas"]["LoRA_LyCORIS_Anima_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["QwenVLEncoder_Diffusers_Config"] | components["schemas"]["QwenVLEncoder_Checkpoint_Config"] | components["schemas"]["WanT5Encoder_WanT5Encoder_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["TextLLM_Diffusers_Config"] | components["schemas"]["ExternalApiModelConfig"] | components["schemas"]["Unknown_Config"]; + "application/json": components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_QwenImage_Config"] | components["schemas"]["Main_Diffusers_Wan_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_Checkpoint_Anima_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_QwenImage_Config"] | components["schemas"]["Main_GGUF_Wan_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Checkpoint_Wan_Config"] | components["schemas"]["VAE_Checkpoint_QwenImage_Config"] | components["schemas"]["VAE_Checkpoint_Anima_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["VAE_Diffusers_Wan_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_LyCORIS_QwenImage_Config"] | components["schemas"]["LoRA_LyCORIS_Wan_Config"] | components["schemas"]["LoRA_LyCORIS_Anima_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["QwenVLEncoder_Diffusers_Config"] | components["schemas"]["QwenVLEncoder_Checkpoint_Config"] | components["schemas"]["WanT5Encoder_WanT5Encoder_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["TextLLM_Diffusers_Config"] | components["schemas"]["ExternalApiModelConfig"] | components["schemas"]["Unknown_Config"]; }; }; /** @description Validation Error */ @@ -34230,7 +34568,7 @@ export interface operations { * "repo_variant": "fp16", * "upcast_attention": false * } */ - "application/json": components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_QwenImage_Config"] | components["schemas"]["Main_Diffusers_Wan_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_Checkpoint_Anima_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_QwenImage_Config"] | components["schemas"]["Main_GGUF_Wan_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Checkpoint_Wan_Config"] | components["schemas"]["VAE_Checkpoint_QwenImage_Config"] | components["schemas"]["VAE_Checkpoint_Anima_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["VAE_Diffusers_Wan_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_LyCORIS_QwenImage_Config"] | components["schemas"]["LoRA_LyCORIS_Anima_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["QwenVLEncoder_Diffusers_Config"] | components["schemas"]["QwenVLEncoder_Checkpoint_Config"] | components["schemas"]["WanT5Encoder_WanT5Encoder_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["TextLLM_Diffusers_Config"] | components["schemas"]["ExternalApiModelConfig"] | components["schemas"]["Unknown_Config"]; + "application/json": components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_QwenImage_Config"] | components["schemas"]["Main_Diffusers_Wan_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_Checkpoint_Anima_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_QwenImage_Config"] | components["schemas"]["Main_GGUF_Wan_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Checkpoint_Wan_Config"] | components["schemas"]["VAE_Checkpoint_QwenImage_Config"] | components["schemas"]["VAE_Checkpoint_Anima_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["VAE_Diffusers_Wan_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_LyCORIS_QwenImage_Config"] | components["schemas"]["LoRA_LyCORIS_Wan_Config"] | components["schemas"]["LoRA_LyCORIS_Anima_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["QwenVLEncoder_Diffusers_Config"] | components["schemas"]["QwenVLEncoder_Checkpoint_Config"] | components["schemas"]["WanT5Encoder_WanT5Encoder_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["TextLLM_Diffusers_Config"] | components["schemas"]["ExternalApiModelConfig"] | components["schemas"]["Unknown_Config"]; }; }; /** @description Bad request */ @@ -34335,7 +34673,7 @@ export interface operations { * "repo_variant": "fp16", * "upcast_attention": false * } */ - "application/json": components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_QwenImage_Config"] | components["schemas"]["Main_Diffusers_Wan_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_Checkpoint_Anima_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_QwenImage_Config"] | components["schemas"]["Main_GGUF_Wan_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Checkpoint_Wan_Config"] | components["schemas"]["VAE_Checkpoint_QwenImage_Config"] | components["schemas"]["VAE_Checkpoint_Anima_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["VAE_Diffusers_Wan_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_LyCORIS_QwenImage_Config"] | components["schemas"]["LoRA_LyCORIS_Anima_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["QwenVLEncoder_Diffusers_Config"] | components["schemas"]["QwenVLEncoder_Checkpoint_Config"] | components["schemas"]["WanT5Encoder_WanT5Encoder_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["TextLLM_Diffusers_Config"] | components["schemas"]["ExternalApiModelConfig"] | components["schemas"]["Unknown_Config"]; + "application/json": components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_QwenImage_Config"] | components["schemas"]["Main_Diffusers_Wan_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_Checkpoint_Anima_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_QwenImage_Config"] | components["schemas"]["Main_GGUF_Wan_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Checkpoint_Wan_Config"] | components["schemas"]["VAE_Checkpoint_QwenImage_Config"] | components["schemas"]["VAE_Checkpoint_Anima_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["VAE_Diffusers_Wan_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_LyCORIS_QwenImage_Config"] | components["schemas"]["LoRA_LyCORIS_Wan_Config"] | components["schemas"]["LoRA_LyCORIS_Anima_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["QwenVLEncoder_Diffusers_Config"] | components["schemas"]["QwenVLEncoder_Checkpoint_Config"] | components["schemas"]["WanT5Encoder_WanT5Encoder_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["TextLLM_Diffusers_Config"] | components["schemas"]["ExternalApiModelConfig"] | components["schemas"]["Unknown_Config"]; }; }; /** @description Bad request */ @@ -34406,7 +34744,7 @@ export interface operations { * "repo_variant": "fp16", * "upcast_attention": false * } */ - "application/json": components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_QwenImage_Config"] | components["schemas"]["Main_Diffusers_Wan_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_Checkpoint_Anima_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_QwenImage_Config"] | components["schemas"]["Main_GGUF_Wan_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Checkpoint_Wan_Config"] | components["schemas"]["VAE_Checkpoint_QwenImage_Config"] | components["schemas"]["VAE_Checkpoint_Anima_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["VAE_Diffusers_Wan_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_LyCORIS_QwenImage_Config"] | components["schemas"]["LoRA_LyCORIS_Anima_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["QwenVLEncoder_Diffusers_Config"] | components["schemas"]["QwenVLEncoder_Checkpoint_Config"] | components["schemas"]["WanT5Encoder_WanT5Encoder_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["TextLLM_Diffusers_Config"] | components["schemas"]["ExternalApiModelConfig"] | components["schemas"]["Unknown_Config"]; + "application/json": components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_QwenImage_Config"] | components["schemas"]["Main_Diffusers_Wan_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_Checkpoint_Anima_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_QwenImage_Config"] | components["schemas"]["Main_GGUF_Wan_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Checkpoint_Wan_Config"] | components["schemas"]["VAE_Checkpoint_QwenImage_Config"] | components["schemas"]["VAE_Checkpoint_Anima_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["VAE_Diffusers_Wan_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_LyCORIS_QwenImage_Config"] | components["schemas"]["LoRA_LyCORIS_Wan_Config"] | components["schemas"]["LoRA_LyCORIS_Anima_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["QwenVLEncoder_Diffusers_Config"] | components["schemas"]["QwenVLEncoder_Checkpoint_Config"] | components["schemas"]["WanT5Encoder_WanT5Encoder_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["TextLLM_Diffusers_Config"] | components["schemas"]["ExternalApiModelConfig"] | components["schemas"]["Unknown_Config"]; }; }; /** @description Bad request */ @@ -35139,7 +35477,7 @@ export interface operations { * "repo_variant": "fp16", * "upcast_attention": false * } */ - "application/json": components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_QwenImage_Config"] | components["schemas"]["Main_Diffusers_Wan_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_Checkpoint_Anima_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_QwenImage_Config"] | components["schemas"]["Main_GGUF_Wan_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Checkpoint_Wan_Config"] | components["schemas"]["VAE_Checkpoint_QwenImage_Config"] | components["schemas"]["VAE_Checkpoint_Anima_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["VAE_Diffusers_Wan_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_LyCORIS_QwenImage_Config"] | components["schemas"]["LoRA_LyCORIS_Anima_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["QwenVLEncoder_Diffusers_Config"] | components["schemas"]["QwenVLEncoder_Checkpoint_Config"] | components["schemas"]["WanT5Encoder_WanT5Encoder_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["TextLLM_Diffusers_Config"] | components["schemas"]["ExternalApiModelConfig"] | components["schemas"]["Unknown_Config"]; + "application/json": components["schemas"]["Main_Diffusers_SD1_Config"] | components["schemas"]["Main_Diffusers_SD2_Config"] | components["schemas"]["Main_Diffusers_SDXL_Config"] | components["schemas"]["Main_Diffusers_SDXLRefiner_Config"] | components["schemas"]["Main_Diffusers_SD3_Config"] | components["schemas"]["Main_Diffusers_FLUX_Config"] | components["schemas"]["Main_Diffusers_Flux2_Config"] | components["schemas"]["Main_Diffusers_CogView4_Config"] | components["schemas"]["Main_Diffusers_QwenImage_Config"] | components["schemas"]["Main_Diffusers_Wan_Config"] | components["schemas"]["Main_Diffusers_ZImage_Config"] | components["schemas"]["Main_Checkpoint_SD1_Config"] | components["schemas"]["Main_Checkpoint_SD2_Config"] | components["schemas"]["Main_Checkpoint_SDXL_Config"] | components["schemas"]["Main_Checkpoint_SDXLRefiner_Config"] | components["schemas"]["Main_Checkpoint_Flux2_Config"] | components["schemas"]["Main_Checkpoint_FLUX_Config"] | components["schemas"]["Main_Checkpoint_ZImage_Config"] | components["schemas"]["Main_Checkpoint_Anima_Config"] | components["schemas"]["Main_BnBNF4_FLUX_Config"] | components["schemas"]["Main_GGUF_Flux2_Config"] | components["schemas"]["Main_GGUF_FLUX_Config"] | components["schemas"]["Main_GGUF_QwenImage_Config"] | components["schemas"]["Main_GGUF_Wan_Config"] | components["schemas"]["Main_GGUF_ZImage_Config"] | components["schemas"]["VAE_Checkpoint_SD1_Config"] | components["schemas"]["VAE_Checkpoint_SD2_Config"] | components["schemas"]["VAE_Checkpoint_SDXL_Config"] | components["schemas"]["VAE_Checkpoint_FLUX_Config"] | components["schemas"]["VAE_Checkpoint_Flux2_Config"] | components["schemas"]["VAE_Checkpoint_Wan_Config"] | components["schemas"]["VAE_Checkpoint_QwenImage_Config"] | components["schemas"]["VAE_Checkpoint_Anima_Config"] | components["schemas"]["VAE_Diffusers_SD1_Config"] | components["schemas"]["VAE_Diffusers_SDXL_Config"] | components["schemas"]["VAE_Diffusers_Flux2_Config"] | components["schemas"]["VAE_Diffusers_Wan_Config"] | components["schemas"]["ControlNet_Checkpoint_SD1_Config"] | components["schemas"]["ControlNet_Checkpoint_SD2_Config"] | components["schemas"]["ControlNet_Checkpoint_SDXL_Config"] | components["schemas"]["ControlNet_Checkpoint_FLUX_Config"] | components["schemas"]["ControlNet_Checkpoint_ZImage_Config"] | components["schemas"]["ControlNet_Diffusers_SD1_Config"] | components["schemas"]["ControlNet_Diffusers_SD2_Config"] | components["schemas"]["ControlNet_Diffusers_SDXL_Config"] | components["schemas"]["ControlNet_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_SD1_Config"] | components["schemas"]["LoRA_LyCORIS_SD2_Config"] | components["schemas"]["LoRA_LyCORIS_SDXL_Config"] | components["schemas"]["LoRA_LyCORIS_Flux2_Config"] | components["schemas"]["LoRA_LyCORIS_FLUX_Config"] | components["schemas"]["LoRA_LyCORIS_ZImage_Config"] | components["schemas"]["LoRA_LyCORIS_QwenImage_Config"] | components["schemas"]["LoRA_LyCORIS_Wan_Config"] | components["schemas"]["LoRA_LyCORIS_Anima_Config"] | components["schemas"]["LoRA_OMI_SDXL_Config"] | components["schemas"]["LoRA_OMI_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_SD1_Config"] | components["schemas"]["LoRA_Diffusers_SD2_Config"] | components["schemas"]["LoRA_Diffusers_SDXL_Config"] | components["schemas"]["LoRA_Diffusers_Flux2_Config"] | components["schemas"]["LoRA_Diffusers_FLUX_Config"] | components["schemas"]["LoRA_Diffusers_ZImage_Config"] | components["schemas"]["ControlLoRA_LyCORIS_FLUX_Config"] | components["schemas"]["T5Encoder_T5Encoder_Config"] | components["schemas"]["T5Encoder_BnBLLMint8_Config"] | components["schemas"]["Qwen3Encoder_Qwen3Encoder_Config"] | components["schemas"]["Qwen3Encoder_Checkpoint_Config"] | components["schemas"]["Qwen3Encoder_GGUF_Config"] | components["schemas"]["QwenVLEncoder_Diffusers_Config"] | components["schemas"]["QwenVLEncoder_Checkpoint_Config"] | components["schemas"]["WanT5Encoder_WanT5Encoder_Config"] | components["schemas"]["TI_File_SD1_Config"] | components["schemas"]["TI_File_SD2_Config"] | components["schemas"]["TI_File_SDXL_Config"] | components["schemas"]["TI_Folder_SD1_Config"] | components["schemas"]["TI_Folder_SD2_Config"] | components["schemas"]["TI_Folder_SDXL_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD1_Config"] | components["schemas"]["IPAdapter_InvokeAI_SD2_Config"] | components["schemas"]["IPAdapter_InvokeAI_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD1_Config"] | components["schemas"]["IPAdapter_Checkpoint_SD2_Config"] | components["schemas"]["IPAdapter_Checkpoint_SDXL_Config"] | components["schemas"]["IPAdapter_Checkpoint_FLUX_Config"] | components["schemas"]["T2IAdapter_Diffusers_SD1_Config"] | components["schemas"]["T2IAdapter_Diffusers_SDXL_Config"] | components["schemas"]["Spandrel_Checkpoint_Config"] | components["schemas"]["CLIPEmbed_Diffusers_G_Config"] | components["schemas"]["CLIPEmbed_Diffusers_L_Config"] | components["schemas"]["CLIPVision_Diffusers_Config"] | components["schemas"]["SigLIP_Diffusers_Config"] | components["schemas"]["FLUXRedux_Checkpoint_Config"] | components["schemas"]["LlavaOnevision_Diffusers_Config"] | components["schemas"]["TextLLM_Diffusers_Config"] | components["schemas"]["ExternalApiModelConfig"] | components["schemas"]["Unknown_Config"]; }; }; /** @description Bad request */ diff --git a/tests/app/invocations/test_wan_denoise.py b/tests/app/invocations/test_wan_denoise.py index e8fe15d5932..e10fcbadc54 100644 --- a/tests/app/invocations/test_wan_denoise.py +++ b/tests/app/invocations/test_wan_denoise.py @@ -19,7 +19,7 @@ import torch import torch.nn as nn -from invokeai.app.invocations.fields import WanConditioningField +from invokeai.app.invocations.fields import WanConditioningField, WanRefImageConditioningField from invokeai.app.invocations.model import WanTransformerField from invokeai.app.invocations.wan_denoise import WanDenoiseInvocation from invokeai.backend.model_manager.taxonomy import WanVariantType @@ -64,7 +64,14 @@ def forward( # noqa: D401 — match diffusers signature ) # Record the timestep (t.expand(B) → take first element). self.timesteps_seen.append(float(timestep.flatten()[0].item())) - out = torch.zeros_like(hidden_states) + # Real Wan I2V transformer has in_channels=36 (16 noise + 20 ref-image + # condition) but out_channels=16. T2V is 16/16 and TI2V-5B is 48/48 — + # both have matching in/out. Mirror that by only collapsing the I2V + # input width back to 16 channels. + out_shape = list(hidden_states.shape) + if out_shape[1] == 36: + out_shape[1] = 16 + out = torch.zeros(out_shape, dtype=hidden_states.dtype, device=hidden_states.device) if return_dict: return type("Out", (), {"sample": out}) return (out,) @@ -480,3 +487,130 @@ class TestWanDenoiseHeavy: def test_real_ti2v_5b_runs(self) -> None: pytest.skip("Heavy test stub — implement once a TI2V-5B checkpoint is installable.") + + +class TestWanDenoiseRefImage: + """Phase 7: VAE-latent reference-image conditioning for I2V-A14B. + + The denoise loop must concatenate the 20-channel condition tensor to the + 16-channel noise latents at every transformer call, producing 36-channel + input. Variant gate must fast-fail when ref_image is wired to a non-I2V + transformer.""" + + def _build_ctx_with_condition( + self, + transformer: _ZeroTransformer, + variant: WanVariantType, + model_root: Path, + condition_tensor: torch.Tensor | None, + ) -> MagicMock: + ctx = _build_context( + transformer, + variant=variant, + model_root=model_root, + pos_cond=_make_conditioning(), + neg_cond=None, + ) + if condition_tensor is not None: + ctx.tensors.load.return_value = condition_tensor + return ctx + + def _make_inv_with_ref( + self, + ref_field: "WanRefImageConditioningField | None", + *, + width: int = 64, + height: int = 64, + ) -> WanDenoiseInvocation: + return WanDenoiseInvocation( + id="test", + transformer=_wan_transformer_field(dual=True), + positive_conditioning=WanConditioningField(conditioning_name="pos"), + negative_conditioning=None, + ref_image=ref_field, + width=width, + height=height, + steps=3, + guidance_scale=1.0, + seed=42, + ) + + def test_ref_image_concatenated_to_36_channels(self, fake_model_root: Path) -> None: + """I2V_A14B + ref_image → transformer sees [B, 36, T, H/8, W/8].""" + transformer = _ZeroTransformer() + # Build the 20-channel condition tensor the encoder would have saved: + # 4-ch first-frame mask + 16-ch VAE-encoded image latents. + # At 64x64 → 8x8 latent spatial dims. + condition = torch.zeros(1, 20, 1, 8, 8) + ctx = self._build_ctx_with_condition( + transformer, WanVariantType.I2V_A14B, fake_model_root, condition + ) + + ref_field = WanRefImageConditioningField( + condition_tensor_name="condition", width=64, height=64 + ) + inv = self._make_inv_with_ref(ref_field) + inv._run_diffusion(ctx) + + assert len(transformer.calls) == 3 + # Every call's hidden_states must have 36 channels (16 noise + 20 condition). + for h_shape, *_ in transformer.calls: + assert h_shape == (1, 36, 1, 8, 8), f"expected 36-channel input, got {h_shape}" + + def test_no_ref_image_keeps_16_channels(self, fake_model_root: Path) -> None: + """Without ref_image → transformer sees [B, 16, T, H/8, W/8] as before.""" + transformer = _ZeroTransformer() + ctx = self._build_ctx_with_condition( + transformer, WanVariantType.I2V_A14B, fake_model_root, condition_tensor=None + ) + + inv = self._make_inv_with_ref(ref_field=None) + inv._run_diffusion(ctx) + + for h_shape, *_ in transformer.calls: + assert h_shape == (1, 16, 1, 8, 8), f"expected unchanged 16-channel input, got {h_shape}" + + def test_variant_gate_rejects_ref_image_on_t2v(self, fake_model_root: Path) -> None: + """T2V_A14B + ref_image must raise — fast-fail before doing any work.""" + transformer = _ZeroTransformer() + condition = torch.zeros(1, 20, 1, 8, 8) + ctx = self._build_ctx_with_condition( + transformer, WanVariantType.T2V_A14B, fake_model_root, condition + ) + + ref_field = WanRefImageConditioningField( + condition_tensor_name="condition", width=64, height=64 + ) + inv = self._make_inv_with_ref(ref_field) + with pytest.raises(ValueError, match="only supported by the Wan 2.2 I2V variant"): + inv._run_diffusion(ctx) + + def test_variant_gate_rejects_ref_image_on_ti2v(self, fake_model_root: Path) -> None: + """TI2V-5B + ref_image must raise — TI2V uses a different image path.""" + transformer = _ZeroTransformer() + condition = torch.zeros(1, 20, 1, 8, 8) + ctx = self._build_ctx_with_condition( + transformer, WanVariantType.TI2V_5B, fake_model_root, condition + ) + + ref_field = WanRefImageConditioningField( + condition_tensor_name="condition", width=64, height=64 + ) + inv = self._make_inv_with_ref(ref_field) + with pytest.raises(ValueError, match="only supported by the Wan 2.2 I2V variant"): + inv._run_diffusion(ctx) + + def test_dim_mismatch_raises(self, fake_model_root: Path) -> None: + """If the encoder's width/height differ from denoise's, fail clearly.""" + transformer = _ZeroTransformer() + condition = torch.zeros(1, 20, 1, 8, 8) + ctx = self._build_ctx_with_condition( + transformer, WanVariantType.I2V_A14B, fake_model_root, condition + ) + + ref_field = WanRefImageConditioningField( + condition_tensor_name="condition", width=512, height=512 + ) + inv = self._make_inv_with_ref(ref_field, width=64, height=64) + with pytest.raises(ValueError, match="must match denoise dimensions"): + inv._run_diffusion(ctx) diff --git a/tests/app/invocations/test_wan_expert_swapper.py b/tests/app/invocations/test_wan_expert_swapper.py index f8897b46343..3c7d5a94fa3 100644 --- a/tests/app/invocations/test_wan_expert_swapper.py +++ b/tests/app/invocations/test_wan_expert_swapper.py @@ -54,6 +54,30 @@ def model_on_device(self): return _FakeModelOnDevice(self._label, self._model, self._log) +class _FakeContext: + """Mocks ``InvocationContext.models.load`` returning a fresh ``_FakeInfo`` + for each call — mirrors the real behaviour where the swapper expects a + fresh handle per ``get()``.""" + + def __init__(self, infos_by_model_id: dict[str, _FakeInfo], log: list[str]) -> None: + self._infos = infos_by_model_id + self._log = log + # Track how many times each model id was loaded — the lazy-load fix + # depends on this count being 1 per swap, not 1 upfront. + + class _Models: + def __init__(self, outer): + self._outer = outer + self.load_calls: list[str] = [] + + def load(self, model_id): + self.load_calls.append(model_id) + self._outer._log.append(f"models.load:{model_id}") + return self._outer._infos[model_id] + + self.models = _Models(self) + + def _make_factory(log: list[str], label: str) -> "callable": """Build a LoRAIteratorFactory that records each invocation in ``log``.""" @@ -110,8 +134,8 @@ def factory(model, patches, prefix, dtype, cached_weights, force_sidecar_patchin def test_lifecycle_high_only(): """Single-expert (TI2V-5B / A14B with only high loaded): enter HIGH, close.""" log: list[str] = [] - high_model = nn.Linear(1, 1) - high_info = _FakeInfo("HIGH", high_model, log) + high_nn = nn.Linear(1, 1) + ctx = _FakeContext({"high": _FakeInfo("HIGH", high_nn, log)}, log) stub, calls = _stub_lora_context_manager(log) with patch( @@ -119,17 +143,19 @@ def test_lifecycle_high_only(): side_effect=stub, ): swapper = _ExpertSwapper( - high_info=high_info, - low_info=None, + context=ctx, + high_model="high", + low_model=None, inference_dtype=torch.bfloat16, high_lora_factory=_make_factory(log, "HIGH"), low_lora_factory=None, ) model = swapper.get(_ExpertSwapper.HIGH) - assert model is high_model + assert model is high_nn swapper.close() assert log == [ + "models.load:high", "device-enter:HIGH", "lora-factory-call:HIGH", "lora-enter", @@ -137,17 +163,19 @@ def test_lifecycle_high_only(): "device-exit:HIGH", ] assert len(calls) == 1 - assert calls[0]["model"] is high_model + assert calls[0]["model"] is high_nn assert calls[0]["prefix"] == "lora_transformer-" def test_lifecycle_dual_expert_swap(): """A14B: HIGH first, then LOW. Each LoRA context opens/closes with its expert.""" log: list[str] = [] - high_model = nn.Linear(1, 1) - low_model = nn.Linear(1, 1) - high_info = _FakeInfo("HIGH", high_model, log) - low_info = _FakeInfo("LOW", low_model, log) + high_nn = nn.Linear(1, 1) + low_nn = nn.Linear(1, 1) + ctx = _FakeContext( + {"high": _FakeInfo("HIGH", high_nn, log), "low": _FakeInfo("LOW", low_nn, log)}, + log, + ) stub, calls = _stub_lora_context_manager(log) with patch( @@ -155,28 +183,31 @@ def test_lifecycle_dual_expert_swap(): side_effect=stub, ): swapper = _ExpertSwapper( - high_info=high_info, - low_info=low_info, + context=ctx, + high_model="high", + low_model="low", inference_dtype=torch.bfloat16, high_lora_factory=_make_factory(log, "HIGH"), low_lora_factory=_make_factory(log, "LOW"), ) first = swapper.get(_ExpertSwapper.HIGH) - assert first is high_model + assert first is high_nn second = swapper.get(_ExpertSwapper.LOW) - assert second is low_model + assert second is low_nn swapper.close() expected = [ - # enter HIGH (device, then lora) + # enter HIGH (models.load first, then device, then lora) + "models.load:high", "device-enter:HIGH", "lora-factory-call:HIGH", "lora-enter", - # swap to LOW: LoRA out -> device out -> device in -> LoRA in + # swap to LOW: LoRA out -> device out -> models.load -> device in -> LoRA in "lora-exit", "device-exit:HIGH", + "models.load:low", "device-enter:LOW", "lora-factory-call:LOW", "lora-enter", @@ -187,15 +218,15 @@ def test_lifecycle_dual_expert_swap(): assert log == expected # Two patcher invocations, each bound to the expected model. assert len(calls) == 2 - assert calls[0]["model"] is high_model - assert calls[1]["model"] is low_model + assert calls[0]["model"] is high_nn + assert calls[1]["model"] is low_nn def test_quantized_flag_forwards_to_sidecar(): """GGUF (quantized) experts must request sidecar patching.""" log: list[str] = [] - high_model = nn.Linear(1, 1) - high_info = _FakeInfo("HIGH", high_model, log) + high_nn = nn.Linear(1, 1) + ctx = _FakeContext({"high": _FakeInfo("HIGH", high_nn, log)}, log) stub, calls = _stub_lora_context_manager(log) with patch( @@ -203,8 +234,9 @@ def test_quantized_flag_forwards_to_sidecar(): side_effect=stub, ): swapper = _ExpertSwapper( - high_info=high_info, - low_info=None, + context=ctx, + high_model="high", + low_model=None, inference_dtype=torch.bfloat16, high_lora_factory=_make_factory(log, "HIGH"), high_is_quantized=True, @@ -218,8 +250,8 @@ def test_quantized_flag_forwards_to_sidecar(): def test_no_lora_factory_skips_lora_context(): """When no LoRAs are wired, the swapper doesn't enter the LoRA context.""" log: list[str] = [] - high_model = nn.Linear(1, 1) - high_info = _FakeInfo("HIGH", high_model, log) + high_nn = nn.Linear(1, 1) + ctx = _FakeContext({"high": _FakeInfo("HIGH", high_nn, log)}, log) stub, calls = _stub_lora_context_manager(log) with patch( @@ -227,8 +259,9 @@ def test_no_lora_factory_skips_lora_context(): side_effect=stub, ): swapper = _ExpertSwapper( - high_info=high_info, - low_info=None, + context=ctx, + high_model="high", + low_model=None, inference_dtype=torch.bfloat16, high_lora_factory=None, # no LoRAs low_lora_factory=None, @@ -243,10 +276,14 @@ def test_no_lora_factory_skips_lora_context(): def test_repeat_get_same_label_is_a_no_op(): - """Calling get(HIGH) twice in a row must not re-enter the contexts.""" + """Calling get(HIGH) twice in a row must not re-enter the contexts. + + Critically, ``models.load`` must only be called once per actual swap — + not on every ``get()``. Caching the loaded model on first entry, and + short-circuiting re-entry, prevents per-step cache thrash.""" log: list[str] = [] - high_model = nn.Linear(1, 1) - high_info = _FakeInfo("HIGH", high_model, log) + high_nn = nn.Linear(1, 1) + ctx = _FakeContext({"high": _FakeInfo("HIGH", high_nn, log)}, log) stub, calls = _stub_lora_context_manager(log) with patch( @@ -254,8 +291,9 @@ def test_repeat_get_same_label_is_a_no_op(): side_effect=stub, ): swapper = _ExpertSwapper( - high_info=high_info, - low_info=None, + context=ctx, + high_model="high", + low_model=None, inference_dtype=torch.bfloat16, high_lora_factory=_make_factory(log, "HIGH"), ) @@ -263,8 +301,63 @@ def test_repeat_get_same_label_is_a_no_op(): swapper.get(_ExpertSwapper.HIGH) # should be a no-op swapper.close() - # device-enter + lora-enter happen exactly once. + # device-enter + lora-enter happen exactly once, and crucially + # models.load is called only once — repeat get() must short-circuit + # so the cache isn't re-touched every step of the denoise loop. + assert log.count("models.load:high") == 1 assert log.count("device-enter:HIGH") == 1 assert log.count("lora-enter") == 1 assert log.count("lora-exit") == 1 assert log.count("device-exit:HIGH") == 1 + + +def test_lazy_load_per_swap_not_upfront(): + """Regression for the cache-eviction warning that triggered this fix. + + ``models.load`` must NOT be called at swapper construction. It is called + only on the first ``get()`` for each expert. This keeps the per-handle + cache window small enough that the LRU policy doesn't drop one expert + while the other is being used.""" + log: list[str] = [] + high_nn = nn.Linear(1, 1) + low_nn = nn.Linear(1, 1) + ctx = _FakeContext( + {"high": _FakeInfo("HIGH", high_nn, log), "low": _FakeInfo("LOW", low_nn, log)}, + log, + ) + + stub, _ = _stub_lora_context_manager(log) + with patch( + "invokeai.app.invocations.wan_denoise.LayerPatcher.apply_smart_model_patches", + side_effect=stub, + ): + # Construction alone must not trigger any models.load call. + swapper = _ExpertSwapper( + context=ctx, + high_model="high", + low_model="low", + inference_dtype=torch.bfloat16, + high_lora_factory=_make_factory(log, "HIGH"), + low_lora_factory=_make_factory(log, "LOW"), + ) + assert ctx.models.load_calls == [], ( + "Swapper must not call models.load until get() is invoked — " + "see issue #7513 for cache-eviction rationale." + ) + + # First get(HIGH): loads HIGH only. + swapper.get(_ExpertSwapper.HIGH) + assert ctx.models.load_calls == ["high"] + + # Swap to LOW: loads LOW only. HIGH is NOT re-loaded — its handle + # was used and released, the next call to it (if any) will re-load. + swapper.get(_ExpertSwapper.LOW) + assert ctx.models.load_calls == ["high", "low"] + + # Back to HIGH: a fresh load (the previous handle is gone). This is + # the right behaviour — each swap gets a guaranteed-fresh handle + # rather than a stale reference into the cache. + swapper.get(_ExpertSwapper.HIGH) + assert ctx.models.load_calls == ["high", "low", "high"] + + swapper.close() diff --git a/tests/backend/model_manager/configs/test_wan_gguf_config.py b/tests/backend/model_manager/configs/test_wan_gguf_config.py index ca8ef671844..46b8ce499fe 100644 --- a/tests/backend/model_manager/configs/test_wan_gguf_config.py +++ b/tests/backend/model_manager/configs/test_wan_gguf_config.py @@ -49,6 +49,17 @@ def _wan_ti2v_state_dict() -> dict: } +def _wan_i2v_a14b_state_dict() -> dict: + """Wan 2.2 I2V-A14B GGUF: same shape as T2V except patch_embedding has 36 + input channels (16 noise + 16 ref-image latents + 4 first-frame mask).""" + return { + "patch_embedding.weight": _ggml((5120, 36, 1, 2, 2)), + "condition_embedder.text_embedder.linear_1.weight": _ggml((5120, 4096)), + "blocks.0.attn1.to_q.weight": _ggml((5120, 5120)), + "blocks.0.ffn.net.0.proj.weight": _ggml((13824, 5120)), + } + + def _wan_a14b_native_state_dict() -> dict: """Synthetic Wan A14B GGUF state dict using the native upstream key layout (text_embedding/self_attn/cross_attn/ffn.0 — what QuantStack and ComfyUI ship).""" @@ -125,6 +136,13 @@ def test_ti2v_from_48ch(self): sd = _wan_ti2v_state_dict() assert _detect_wan_gguf_variant(sd) == WanVariantType.TI2V_5B + def test_i2v_a14b_from_36ch(self): + """Wan 2.2 I2V has the same A14B architecture as T2V but with + in_channels=36 because the ref-image latents and first-frame mask are + concatenated to the noise along the channel dim before patch embedding.""" + sd = _wan_i2v_a14b_state_dict() + assert _detect_wan_gguf_variant(sd) == WanVariantType.I2V_A14B + def test_unknown_channel_count_returns_none(self): sd = {"patch_embedding.weight": _ggml((1, 32, 1, 2, 2))} assert _detect_wan_gguf_variant(sd) is None diff --git a/tests/backend/model_manager/configs/test_wan_lora_config.py b/tests/backend/model_manager/configs/test_wan_lora_config.py index 0b95554ca74..dce64616165 100644 --- a/tests/backend/model_manager/configs/test_wan_lora_config.py +++ b/tests/backend/model_manager/configs/test_wan_lora_config.py @@ -281,64 +281,55 @@ def test_rejects_flux_lora(self): LoRA_LyCORIS_Wan_Config.from_model_on_disk(_make_mod(f, sd), _overrides(f, "flux")) -class TestFactoryOrdering: - """Regression: native-PEFT Wan LoRAs share the ``cross_attn``/``self_attn`` - substring with Anima/Cosmos DiT. Anima's probe matches on the bare substring - (it doesn't require Anima's ``_proj`` suffix or ``mlp``/``adaln_modulation``), - so a Wan LoRA would be mis-tagged as Anima unless Wan's probe runs first - in the AnyModelConfig union — or unless Anima's probe gets tightened. - - This test pins the order by importing the union and asserting Wan appears - before Anima in the LyCORIS section. - """ - - def test_wan_appears_before_anima_in_lora_union(self): - from typing import get_args - - from invokeai.backend.model_manager.configs.factory import AnyModelConfig - from invokeai.backend.model_manager.configs.lora import ( - LoRA_LyCORIS_Anima_Config, - LoRA_LyCORIS_Wan_Config, - ) - - # AnyModelConfig is an Annotated[Union[...], Discriminator(...)] — the - # first arg of get_args is the Union itself. - union_type = get_args(AnyModelConfig)[0] - union_members = get_args(union_type) - - def _index_of(cls) -> int: - for i, m in enumerate(union_members): - # Each member is Annotated[ConfigClass, Tag(...)]; first get_args is the class. - if get_args(m)[0] is cls: - return i - raise AssertionError(f"{cls.__name__} not in union") - - wan_idx = _index_of(LoRA_LyCORIS_Wan_Config) - anima_idx = _index_of(LoRA_LyCORIS_Anima_Config) - assert wan_idx < anima_idx, ( - f"LoRA_LyCORIS_Wan_Config must come before LoRA_LyCORIS_Anima_Config in " - f"the AnyModelConfig union (Wan at {wan_idx}, Anima at {anima_idx}). " - "Otherwise Anima's cross_attn/self_attn substring match will steal Wan LoRAs." - ) - - def test_anima_would_have_matched_a_wan_native_lora(self): - """Sanity check: confirm that Anima's probe DOES match a Wan native LoRA - if asked directly. This is why ordering matters — Wan must run first.""" +class TestProbeMutualExclusivity: + """Regression: Anima's probe must REJECT Wan-native LoRA keys, so probing + is correct regardless of which config the factory iterates first. + + ``Config_Base.CONFIG_CLASSES`` is a ``set``, so iteration order is + non-deterministic across Python process restarts. Probes therefore need + to be mutually exclusive at the per-config level — see also + ``test_wan_lora_probe_independence.py`` for the broader cross-architecture + coverage.""" + + def test_anima_rejects_wan_native_lora(self): + """Wan native LoRAs (``diffusion_model.blocks.X.self_attn.q.lora_*``) + used to false-positive on Anima's probe because Anima accepted any + ``cross_attn``/``self_attn`` substring. Anima now requires + Cosmos-DiT-exclusive markers (``mlp``, ``adaln_modulation``, or the + ``_proj`` attention suffix), so a Wan LoRA — which has none of those — + is correctly rejected.""" from invokeai.backend.model_manager.configs.lora import LoRA_LyCORIS_Anima_Config with TemporaryDirectory() as tmp: f = Path(tmp) / "wan_native_lora.safetensors" f.touch() - # Realistic Wan native PEFT keys: this is what lightx2v's Lightning - # LoRAs and most ComfyUI-trained Wan LoRAs look like. + # Realistic Wan native PEFT keys — what lightx2v's Lightning + # distillations and most ComfyUI-trained Wan LoRAs look like. sd = { "diffusion_model.blocks.0.self_attn.q.lora_A.weight": _t((128, 5120)), "diffusion_model.blocks.0.self_attn.q.lora_B.weight": _t((5120, 128)), "diffusion_model.blocks.0.cross_attn.k.lora_A.weight": _t((128, 5120)), "diffusion_model.blocks.0.cross_attn.k.lora_B.weight": _t((5120, 128)), } - # Anima's probe (today) erroneously accepts these. If this assertion - # ever flips, Anima's probe got tightened and the Wan-first ordering - # constraint is no longer required (but it's still safe to keep). - cfg = LoRA_LyCORIS_Anima_Config.from_model_on_disk(_make_mod(f, sd), _overrides(f, "anima-false-positive")) - assert cfg.base == BaseModelType.Anima # NB: a false positive; protected against by ordering + with pytest.raises(NotAMatchError, match="Anima LoRA"): + LoRA_LyCORIS_Anima_Config.from_model_on_disk( + _make_mod(f, sd), _overrides(f, "wan-native-lora") + ) + + def test_wan_rejects_anima_lora(self): + """Mirror direction: a real Anima LoRA must not be matched by Wan. + Wan's anti-patterns already cover ``_proj`` suffix, ``mlp``, and + ``adaln_modulation``.""" + with TemporaryDirectory() as tmp: + f = Path(tmp) / "anima_lora.safetensors" + f.touch() + sd = { + "transformer.blocks.0.self_attn.q_proj.lora_A.weight": _t((128, 4096)), + "transformer.blocks.0.self_attn.q_proj.lora_B.weight": _t((4096, 128)), + "transformer.blocks.0.mlp.layer1.lora_A.weight": _t((128, 4096)), + "transformer.blocks.0.mlp.layer1.lora_B.weight": _t((4096, 128)), + } + with pytest.raises(NotAMatchError, match="Wan LoRA"): + LoRA_LyCORIS_Wan_Config.from_model_on_disk( + _make_mod(f, sd), _overrides(f, "anima-lora") + ) diff --git a/tests/backend/model_manager/configs/test_wan_lora_probe_independence.py b/tests/backend/model_manager/configs/test_wan_lora_probe_independence.py new file mode 100644 index 00000000000..6d54cb5401f --- /dev/null +++ b/tests/backend/model_manager/configs/test_wan_lora_probe_independence.py @@ -0,0 +1,277 @@ +"""Regression tests for Wan vs Anima LoRA probe mutual exclusivity. + +InvokeAI's ``Config_Base.CONFIG_CLASSES`` is a ``set``, so iteration order is +non-deterministic across Python process restarts. The probe MUST therefore be +mutually exclusive at the per-config level — first-match-wins is not safe to +rely on. + +The historic bug these tests guard against: Anima's probe accepted anything +with the ``cross_attn`` or ``self_attn`` substring, which collides with Wan's +native LoRA key layout (``diffusion_model.blocks.X.cross_attn.q.lora_down.weight``). +A Wan native LoRA — including lightx2v's Lightning distillations — would +randomly identify as ``BaseModelType.Anima`` depending on dict hash order. + +The fix tightened Anima's probe to require Cosmos-DiT-exclusive markers +(``mlp``, ``adaln_modulation``, or attention with the ``_proj`` suffix). + +Each test below feeds a fixed state dict shape to BOTH the Wan and Anima +probes individually and asserts at most one accepts — order-independent. +""" + +from pathlib import Path +from tempfile import TemporaryDirectory +from unittest.mock import MagicMock + +import pytest +import torch + +from invokeai.backend.model_manager.configs.identification_utils import NotAMatchError +from invokeai.backend.model_manager.configs.lora import ( + LoRA_LyCORIS_Anima_Config, + LoRA_LyCORIS_Wan_Config, +) +from invokeai.backend.model_manager.taxonomy import BaseModelType + + +def _t(shape: tuple[int, ...]) -> torch.Tensor: + return torch.zeros(shape) + + +def _make_mod(path: Path, sd: dict) -> MagicMock: + mod = MagicMock() + mod.path = path + mod.load_state_dict.return_value = sd + return mod + + +def _overrides(p: Path, name: str) -> dict: + return { + "hash": "test-hash", + "path": str(p), + "file_size": 0, + "name": name, + "source": str(p), + "source_type": "path", + } + + +def _probe(cls, path: Path, sd: dict, name: str): + """Try a probe; return (accepted: bool, instance_or_exc).""" + try: + return True, cls.from_model_on_disk(_make_mod(path, sd), _overrides(path, name)) + except NotAMatchError as e: + return False, e + + +def _i2v_lightning_v1_keys() -> dict: + """Realistic key shape from lightx2v's I2V-A14B Lightning V1 — the actual + LoRA that triggered the bug. Native upstream Wan naming with + ``diffusion_model.`` prefix, no ``_proj`` suffix on attention.""" + sd: dict[str, torch.Tensor] = {} + for block in range(3): + for sub in ("self_attn", "cross_attn"): + for proj in ("q", "k", "v", "o"): + base = f"diffusion_model.blocks.{block}.{sub}.{proj}" + sd[f"{base}.lora_down.weight"] = _t((64, 5120)) + sd[f"{base}.lora_up.weight"] = _t((5120, 64)) + sd[f"{base}.alpha"] = torch.tensor(8.0) + for ffn_idx in (0, 2): + base = f"diffusion_model.blocks.{block}.ffn.{ffn_idx}" + sd[f"{base}.lora_down.weight"] = _t((64, 5120)) + sd[f"{base}.lora_up.weight"] = _t((5120, 64)) + sd[f"{base}.alpha"] = torch.tensor(8.0) + return sd + + +def _t2v_lightning_v2_keys() -> dict: + """Same layout as I2V Lightning — both lightx2v releases use native Wan + keys with ``diffusion_model.`` prefix. The T2V version had been working + (after a manual factory reorder), but only by luck of dict-hash order.""" + return _i2v_lightning_v1_keys() # structurally identical to I2V V1 + + +def _wan_kohya_keys() -> dict: + """Hypothetical Kohya-format Wan LoRA — same native naming, underscore + separators. Lightning hasn't shipped in this format, but other community + LoRAs do.""" + sd: dict[str, torch.Tensor] = {} + for block in range(2): + for sub in ("self_attn", "cross_attn"): + for proj in ("q", "k", "v", "o"): + base = f"lora_unet_blocks_{block}_{sub}_{proj}" + sd[f"{base}.lora_down.weight"] = _t((64, 5120)) + sd[f"{base}.lora_up.weight"] = _t((5120, 64)) + return sd + + +def _wan_diffusers_peft_keys() -> dict: + """Wan diffusers-style LoRA: ``transformer.blocks.X.attn1.to_q.lora_A.weight`` + etc. Distinct enough from Anima that even the loose probes wouldn't collide, + but covered here for completeness.""" + sd: dict[str, torch.Tensor] = {} + for block in range(2): + for attn in ("attn1", "attn2"): + for to in ("to_q", "to_k", "to_v"): + base = f"transformer.blocks.{block}.{attn}.{to}" + sd[f"{base}.lora_A.weight"] = _t((64, 5120)) + sd[f"{base}.lora_B.weight"] = _t((5120, 64)) + sd[f"transformer.blocks.{block}.ffn.net.0.proj.lora_A.weight"] = _t((64, 5120)) + sd[f"transformer.blocks.{block}.ffn.net.0.proj.lora_B.weight"] = _t((13824, 64)) + return sd + + +def _anima_peft_keys() -> dict: + """Realistic Anima Cosmos-DiT LoRA: ``q_proj``/``k_proj`` attention naming + plus ``mlp`` and ``adaln_modulation`` modules. Wan has none of these.""" + sd: dict[str, torch.Tensor] = {} + for block in range(2): + for sub in ("self_attn", "cross_attn"): + for proj in ("q_proj", "k_proj", "v_proj", "output_proj"): + base = f"transformer.blocks.{block}.{sub}.{proj}" + sd[f"{base}.lora_A.weight"] = _t((64, 4096)) + sd[f"{base}.lora_B.weight"] = _t((4096, 64)) + sd[f"transformer.blocks.{block}.mlp.layer1.lora_A.weight"] = _t((64, 4096)) + sd[f"transformer.blocks.{block}.mlp.layer1.lora_B.weight"] = _t((4096, 64)) + sd[f"transformer.blocks.{block}.adaln_modulation.linear.lora_A.weight"] = _t((64, 4096)) + sd[f"transformer.blocks.{block}.adaln_modulation.linear.lora_B.weight"] = _t((4096, 64)) + return sd + + +def _anima_kohya_keys() -> dict: + """Same Anima content in Kohya format.""" + sd: dict[str, torch.Tensor] = {} + for block in range(2): + for sub in ("self_attn", "cross_attn"): + for proj in ("q_proj", "k_proj", "v_proj", "output_proj"): + base = f"lora_unet_blocks_{block}_{sub}_{proj}" + sd[f"{base}.lora_down.weight"] = _t((64, 4096)) + sd[f"{base}.lora_up.weight"] = _t((4096, 64)) + sd[f"lora_unet_blocks_{block}_mlp_layer1.lora_down.weight"] = _t((64, 4096)) + sd[f"lora_unet_blocks_{block}_mlp_layer1.lora_up.weight"] = _t((4096, 64)) + return sd + + +# --------------------------------------------------------------------------- +# Mutual-exclusivity assertions +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize( + "label, sd_builder", + [ + ("i2v_lightning_v1", _i2v_lightning_v1_keys), + ("t2v_lightning_v2", _t2v_lightning_v2_keys), + ("wan_kohya_native", _wan_kohya_keys), + ("wan_diffusers_peft", _wan_diffusers_peft_keys), + ], +) +def test_wan_loras_only_match_wan(label: str, sd_builder) -> None: + """Wan probe accepts; Anima probe rejects. Independent of factory order.""" + sd = sd_builder() + with TemporaryDirectory() as tmp: + f = Path(tmp) / f"{label}.safetensors" + f.touch() + + wan_ok, wan_result = _probe(LoRA_LyCORIS_Wan_Config, f, sd, label) + anima_ok, anima_result = _probe(LoRA_LyCORIS_Anima_Config, f, sd, label) + + assert wan_ok, f"Wan probe must accept {label}; got {wan_result}" + assert wan_result.base == BaseModelType.Wan + assert not anima_ok, ( + f"Anima probe must reject {label} so probing is order-independent. " + f"Instead it accepted: {anima_result}" + ) + + +@pytest.mark.parametrize( + "label, sd_builder", + [ + ("anima_peft", _anima_peft_keys), + ("anima_kohya", _anima_kohya_keys), + ], +) +def test_anima_loras_only_match_anima(label: str, sd_builder) -> None: + """Anima probe accepts; Wan probe rejects. Independent of factory order.""" + sd = sd_builder() + with TemporaryDirectory() as tmp: + f = Path(tmp) / f"{label}.safetensors" + f.touch() + + wan_ok, wan_result = _probe(LoRA_LyCORIS_Wan_Config, f, sd, label) + anima_ok, anima_result = _probe(LoRA_LyCORIS_Anima_Config, f, sd, label) + + assert anima_ok, f"Anima probe must accept {label}; got {anima_result}" + assert anima_result.base == BaseModelType.Anima + assert not wan_ok, ( + f"Wan probe must reject {label} so probing is order-independent. " + f"Instead it accepted: {wan_result}" + ) + + +# --------------------------------------------------------------------------- +# Belt-and-suspenders: confirm CONFIG_CLASSES doesn't ALSO produce a match for +# any unrelated LoRA config. This is the test that would have caught the +# original bug regardless of which LoRA configs are registered in the future. +# --------------------------------------------------------------------------- + + +def test_at_most_one_lora_config_matches_wan_lightning() -> None: + """Run every LoRA config in the factory against an I2V Lightning state + dict. Only one should accept. If a future LoRA config (a hypothetical + new model with cross_attn naming) starts matching too, this test fires + so we can tighten that probe rather than relying on factory ordering.""" + from invokeai.backend.model_manager.configs.base import Config_Base + from invokeai.backend.model_manager.taxonomy import ModelType + + sd = _i2v_lightning_v1_keys() + with TemporaryDirectory() as tmp: + f = Path(tmp) / "wan_lightning.safetensors" + f.touch() + mod = _make_mod(f, sd) + overrides = _overrides(f, "wan_lightning") + + accepting: list[str] = [] + for cls in Config_Base.CONFIG_CLASSES: + # Only LoRA configs are at risk of collision with each other; skip + # the rest. (Main models can also probe-accept-then-reject on type + # mismatch, but they're disambiguated by ``matches_sort_key``.) + if getattr(cls.model_fields.get("type", None), "default", None) != ModelType.LoRA: + continue + try: + cls.from_model_on_disk(mod, dict(overrides)) + accepting.append(cls.__name__) + except (NotAMatchError, Exception): + continue + + assert accepting == ["LoRA_LyCORIS_Wan_Config"], ( + f"Exactly one LoRA config must accept a Wan Lightning LoRA; got {accepting}. " + "If a new LoRA config starts matching here, tighten its probe to be " + "mutually exclusive with Wan rather than relying on factory ordering." + ) + + +def test_at_most_one_lora_config_matches_anima_peft() -> None: + """Same exclusivity guarantee for the Anima side.""" + from invokeai.backend.model_manager.configs.base import Config_Base + from invokeai.backend.model_manager.taxonomy import ModelType + + sd = _anima_peft_keys() + with TemporaryDirectory() as tmp: + f = Path(tmp) / "anima_peft.safetensors" + f.touch() + mod = _make_mod(f, sd) + overrides = _overrides(f, "anima_peft") + + accepting: list[str] = [] + for cls in Config_Base.CONFIG_CLASSES: + if getattr(cls.model_fields.get("type", None), "default", None) != ModelType.LoRA: + continue + try: + cls.from_model_on_disk(mod, dict(overrides)) + accepting.append(cls.__name__) + except (NotAMatchError, Exception): + continue + + assert accepting == ["LoRA_LyCORIS_Anima_Config"], ( + f"Exactly one LoRA config must accept an Anima LoRA; got {accepting}." + ) diff --git a/tests/backend/model_manager/configs/test_wan_main_config.py b/tests/backend/model_manager/configs/test_wan_main_config.py index 3109b5a9767..d3f4f00451e 100644 --- a/tests/backend/model_manager/configs/test_wan_main_config.py +++ b/tests/backend/model_manager/configs/test_wan_main_config.py @@ -32,6 +32,25 @@ def _build_ti2v_5b_layout(root: Path) -> None: _write_json(root / "vae" / "config.json", {"_class_name": "AutoencoderKLWan", "z_dim": 48}) +def _build_i2v_a14b_layout(root: Path) -> None: + """Wan-AI/Wan2.2-I2V-A14B: dual transformers, z_dim=16, transformer in_channels=36. + + The Wan 2.2 I2V transformer concatenates noise latents (16) + ref-image + latents (16) + first-frame mask (4) along the channel dim, so its + ``in_channels`` is 36 vs 16 for T2V. + """ + _write_json(root / "model_index.json", {"_class_name": "WanImageToVideoPipeline"}) + _write_json( + root / "transformer" / "config.json", + {"_class_name": "WanTransformer3DModel", "in_channels": 36, "image_dim": None}, + ) + _write_json( + root / "transformer_2" / "config.json", + {"_class_name": "WanTransformer3DModel", "in_channels": 36, "image_dim": None}, + ) + _write_json(root / "vae" / "config.json", {"_class_name": "AutoencoderKLWan", "z_dim": 16}) + + def _build_overrides(model_path: Path, name: str) -> dict: return { "hash": "test-hash", @@ -64,6 +83,29 @@ def test_a14b_detected_from_dual_transformer(self) -> None: assert cfg.variant == WanVariantType.T2V_A14B assert cfg.has_dual_expert is True + def test_i2v_a14b_detected_from_in_channels_36(self) -> None: + """I2V-A14B has the same dual-expert + z_dim=16 layout as T2V, but its + transformer's ``in_channels`` is 36 (16 noise + 16 ref-image latents + + 4 first-frame mask). That's the canonical Wan 2.2 differentiator.""" + with TemporaryDirectory() as tmp: + root = Path(tmp) / "Wan2.2-I2V-A14B" + _build_i2v_a14b_layout(root) + + cfg = Main_Diffusers_Wan_Config.from_model_on_disk(_make_mod(root), _build_overrides(root, "I2V")) + + assert cfg.variant == WanVariantType.I2V_A14B + assert cfg.has_dual_expert is True + + def test_t2v_a14b_kept_when_in_channels_is_16(self) -> None: + """A14B layout with ``in_channels=16`` resolves to T2V (not I2V).""" + with TemporaryDirectory() as tmp: + root = Path(tmp) / "Wan2.2-T2V-A14B" + _build_a14b_layout(root) + + cfg = Main_Diffusers_Wan_Config.from_model_on_disk(_make_mod(root), _build_overrides(root, "T2V")) + + assert cfg.variant == WanVariantType.T2V_A14B + def test_ti2v_5b_detected_from_z_dim(self) -> None: with TemporaryDirectory() as tmp: root = Path(tmp) / "Wan2.2-TI2V-5B" diff --git a/tests/backend/wan/test_wan_ref_image_extension.py b/tests/backend/wan/test_wan_ref_image_extension.py new file mode 100644 index 00000000000..d301eda4925 --- /dev/null +++ b/tests/backend/wan/test_wan_ref_image_extension.py @@ -0,0 +1,112 @@ +"""Tests for the Wan 2.2 I2V reference-image VAE-latent encoder helper.""" + +from unittest.mock import MagicMock + +import torch +from PIL import Image + +from invokeai.backend.wan.extensions.wan_ref_image_extension import ( + encode_reference_image_to_condition, + preprocess_reference_image, +) + + +def _make_fake_vae(z_dim: int = 16, spatial_scale: int = 8, temporal_scale: int = 4) -> MagicMock: + """Stand-in for ``AutoencoderKLWan`` that returns deterministic latents. + + ``encode(pixel)`` returns a fake distribution whose ``sample()`` yields + a tensor sized exactly as the real Wan VAE would: ``[B, z_dim, T_lat, H/8, W/8]``. + """ + vae = MagicMock() + + # ``next(iter(vae.parameters())).dtype`` is queried; pin to float32. + param = torch.zeros(1, dtype=torch.float32) + vae.parameters = MagicMock(return_value=iter([param])) + + # Config carries per-channel normalisation stats. + vae.config = MagicMock() + vae.config.latents_mean = [0.0] * z_dim + vae.config.latents_std = [1.0] * z_dim + + def fake_encode(pixel: torch.Tensor, return_dict: bool = False): + b, _, t, h, w = pixel.shape + t_lat = (t - 1) // temporal_scale + 1 + h_lat = h // spatial_scale + w_lat = w // spatial_scale + latents = torch.zeros(b, z_dim, t_lat, h_lat, w_lat, dtype=pixel.dtype) + + dist = MagicMock() + dist.sample = MagicMock(return_value=latents) + # The pipeline does ``vae.encode(...)[0]`` for non-dict returns. + return (dist,) if return_dict is False else MagicMock(latent_dist=dist) + + vae.encode = fake_encode + return vae + + +class TestPreprocess: + def test_resize_to_target_dims(self): + img = Image.new("RGB", (200, 300), (128, 128, 128)) + out = preprocess_reference_image(img, width=64, height=64) + # Shape: [batch=1, channels=3, time=1, H, W] + assert out.shape == (1, 3, 1, 64, 64) + + def test_normalised_to_minus_one_to_one(self): + # Pure-grey image preprocessed should be exactly 0 (since 128/255*2 - 1 ≈ 0.004). + img = Image.new("RGB", (64, 64), (255, 255, 255)) + out = preprocess_reference_image(img, width=64, height=64) + # White → 1.0 + assert torch.allclose(out, torch.ones_like(out), atol=1e-4) + + black = Image.new("RGB", (64, 64), (0, 0, 0)) + out_b = preprocess_reference_image(black, width=64, height=64) + # Black → -1.0 + assert torch.allclose(out_b, -torch.ones_like(out_b), atol=1e-4) + + def test_rejects_non_multiple_of_8(self): + img = Image.new("RGB", (100, 100)) + import pytest + + with pytest.raises(ValueError, match="multiples of 8"): + preprocess_reference_image(img, width=65, height=64) + + +class TestEncodeReferenceImageToCondition: + """The condition tensor must be 20-channel (4 mask + 16 image latents) + and shaped for the denoise step's later concat with 16-ch noise latents.""" + + def test_shape_at_64x64(self): + img = Image.new("RGB", (64, 64)) + vae = _make_fake_vae() + cond = encode_reference_image_to_condition( + image=img, vae=vae, width=64, height=64, device=torch.device("cpu"), dtype=torch.float32 + ) + # [1, 20, 1, 8, 8] — 4-ch mask + 16-ch latents at H/8, W/8. + assert cond.shape == (1, 20, 1, 8, 8) + + def test_shape_at_1024x1024(self): + img = Image.new("RGB", (1024, 1024)) + vae = _make_fake_vae() + cond = encode_reference_image_to_condition( + image=img, vae=vae, width=1024, height=1024, device=torch.device("cpu"), dtype=torch.float32 + ) + # 1024/8 = 128 latent spatial dim. + assert cond.shape == (1, 20, 1, 128, 128) + + def test_first_four_channels_are_all_ones_mask(self): + img = Image.new("RGB", (64, 64)) + vae = _make_fake_vae() + cond = encode_reference_image_to_condition( + image=img, vae=vae, width=64, height=64, device=torch.device("cpu"), dtype=torch.float32 + ) + mask = cond[:, :4] + # First-frame mask is all-ones at num_frames=1 (every position is the first frame). + assert torch.equal(mask, torch.ones_like(mask)) + + def test_returns_dtype(self): + img = Image.new("RGB", (64, 64)) + vae = _make_fake_vae() + cond = encode_reference_image_to_condition( + image=img, vae=vae, width=64, height=64, device=torch.device("cpu"), dtype=torch.bfloat16 + ) + assert cond.dtype == torch.bfloat16 From 5d5f8cb07c39525f9ff9702a9e00ee5c331923f8 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Mon, 11 May 2026 00:15:50 -0400 Subject: [PATCH 06/12] test(wan): add Phase 8 inpaint regression tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The denoise_mask wiring + RectifiedFlowInpaintExtension integration in wan_denoise.py was put in place during Phase 2/3 alongside the rest of the denoise loop. Phase 8 of the plan was about ensuring this path worked and is locked in by tests. Three new tests under TestWanDenoiseInpaint: 1. test_preserved_region_matches_init_exactly: builds a half/half mask (left = preserve, right = regenerate in user-side convention), runs full denoise with the synthetic zero-output transformer, and asserts the preserved half of the final latents equals the init exactly while the regenerated half does not. Pins the mask-inversion + per-step merge behavior. 2. test_inpaint_requires_init_latents: a mask without init latents must raise a clear ValueError — the merge has nothing to weld back to. 3. test_no_mask_path_is_unchanged: regression that adding the inpaint extension didn't perturb the non-inpaint codepath (with init latents + denoising_start=0.5 but no mask, the loop just runs img2img). Co-Authored-By: Claude Opus 4.7 (1M context) feat(frontend): add I2V_A14B to Wan variant zod enum + manager label Phase 7 added the I2V_A14B backend variant. The frontend's zod enum (features/nodes/types/common.ts:zWanVariantType) and the model manager's variant-label map (features/modelManagerV2/models.ts) were still on the two-variant list, so: - ModelIdentifierField inputs with ui_model_variant filters on Wan couldn't list I2V models. - The model manager UI showed a raw 'i2v_a14b' string instead of the human label. Phase 9 (full linear-view wiring — type guards, hooks, params slice, graph builder, tab UI) is in progress on a follow-up commit; this lands the two small enum fixes first so the I2V probe / install paths work correctly end-to-end with the existing FE. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../web/src/features/modelManagerV2/models.ts | 1 + .../web/src/features/nodes/types/common.ts | 2 +- tests/app/invocations/test_wan_denoise.py | 164 ++++++++++++++++++ 3 files changed, 166 insertions(+), 1 deletion(-) diff --git a/invokeai/frontend/web/src/features/modelManagerV2/models.ts b/invokeai/frontend/web/src/features/modelManagerV2/models.ts index 60adf6474f5..9928a2ce66a 100644 --- a/invokeai/frontend/web/src/features/modelManagerV2/models.ts +++ b/invokeai/frontend/web/src/features/modelManagerV2/models.ts @@ -262,6 +262,7 @@ export const MODEL_VARIANT_TO_LONG_NAME: Record = { generate: 'Qwen Image', edit: 'Qwen Image Edit', t2v_a14b: 'Wan 2.2 T2V A14B', + i2v_a14b: 'Wan 2.2 I2V A14B', ti2v_5b: 'Wan 2.2 TI2V 5B', qwen3_4b: 'Qwen3 4B', qwen3_8b: 'Qwen3 8B', diff --git a/invokeai/frontend/web/src/features/nodes/types/common.ts b/invokeai/frontend/web/src/features/nodes/types/common.ts index 24bc5d8b0f6..62055b76e49 100644 --- a/invokeai/frontend/web/src/features/nodes/types/common.ts +++ b/invokeai/frontend/web/src/features/nodes/types/common.ts @@ -167,7 +167,7 @@ export const zFluxVariantType = z.enum(['dev', 'dev_fill', 'schnell']); export const zFlux2VariantType = z.enum(['klein_4b', 'klein_4b_base', 'klein_9b', 'klein_9b_base']); export const zZImageVariantType = z.enum(['turbo', 'zbase']); const zQwenImageVariantType = z.enum(['generate', 'edit']); -const zWanVariantType = z.enum(['t2v_a14b', 'ti2v_5b']); +const zWanVariantType = z.enum(['t2v_a14b', 'i2v_a14b', 'ti2v_5b']); export const zQwen3VariantType = z.enum(['qwen3_4b', 'qwen3_8b', 'qwen3_06b']); export const zAnyModelVariant = z.union([ zModelVariantType, diff --git a/tests/app/invocations/test_wan_denoise.py b/tests/app/invocations/test_wan_denoise.py index e10fcbadc54..5baa86b5211 100644 --- a/tests/app/invocations/test_wan_denoise.py +++ b/tests/app/invocations/test_wan_denoise.py @@ -614,3 +614,167 @@ def test_dim_mismatch_raises(self, fake_model_root: Path) -> None: inv = self._make_inv_with_ref(ref_field, width=64, height=64) with pytest.raises(ValueError, match="must match denoise dimensions"): inv._run_diffusion(ctx) + + +class TestWanDenoiseInpaint: + """Phase 8: ``denoise_mask`` (inpaint) wiring via ``RectifiedFlowInpaintExtension``. + + User-side mask convention (matches Anima / Flux): 1.0 = preserve, + 0.0 = regenerate. After ``_prep_inpaint_mask`` inverts, the extension + sees: 0.0 = preserve, 1.0 = regenerate. + + With the synthetic zero-output transformer, the scheduler step is a + no-op (noise_pred=0 → latents unchanged). The init latents are placed + into the preserved regions at every step via the extension's merge + function; the regenerated regions stay as the original noise tensor + because the model never updates them. + """ + + def _build_inpaint_context( + self, + transformer: _ZeroTransformer, + variant: WanVariantType, + model_root: Path, + init_latents: torch.Tensor, + mask: torch.Tensor, + ) -> MagicMock: + ctx = _build_context( + transformer, + variant=variant, + model_root=model_root, + pos_cond=_make_conditioning(), + neg_cond=None, + ) + + # tensors.load needs to return different tensors for the init-latents + # and the mask, dispatched by the name field. + def _load_tensor(name: str) -> torch.Tensor: + if name == "init": + return init_latents + if name == "mask": + return mask + raise KeyError(name) + + ctx.tensors.load.side_effect = _load_tensor + return ctx + + def test_preserved_region_matches_init_exactly(self, fake_model_root: Path) -> None: + from invokeai.app.invocations.fields import DenoiseMaskField, LatentsField + + transformer = _ZeroTransformer() + # 64x64 image -> 8x8 latents at scale 8 (T2V-A14B family). + # Init latents: fixed value 0.5 so the preserved region is detectable. + init_latents = torch.full((1, 16, 8, 8), 0.5) + # Mask: 8x8 spatial mask, half-1 (preserve left), half-0 (regenerate right). + # User-side convention: 1 = preserve, 0 = regenerate. + mask = torch.zeros(1, 1, 8, 8) + mask[..., :, :4] = 1.0 # left half preserved + + ctx = self._build_inpaint_context( + transformer, + variant=WanVariantType.T2V_A14B, + model_root=fake_model_root, + init_latents=init_latents, + mask=mask, + ) + + inv = WanDenoiseInvocation( + id="test", + transformer=_wan_transformer_field(), + positive_conditioning=WanConditioningField(conditioning_name="pos"), + negative_conditioning=None, + latents=LatentsField(latents_name="init"), + denoise_mask=DenoiseMaskField(mask_name="mask", masked_latents_name=None, gradient=False), + width=64, + height=64, + steps=4, + guidance_scale=1.0, + denoising_start=0.0, + denoising_end=1.0, + seed=42, + ) + + out = inv._run_diffusion(ctx) # [B, C, H_lat, W_lat] + assert out.shape == (1, 16, 8, 8) + + # Preserved (left) half: must exactly match the init latents at t_prev=0 + # (final step's merge produces noised_init = noise*0 + 1*init = init). + assert torch.allclose(out[..., :, :4], torch.full_like(out[..., :, :4], 0.5)), ( + "Preserved region must equal init latents at the end of denoise" + ) + + # Regenerated (right) half: model never changed anything (zero transformer) + # so this region stays equal to the original noise, NOT to init. + # Assert it's *not* equal to init — concrete proof the regions are + # being handled separately. + assert not torch.allclose(out[..., :, 4:], torch.full_like(out[..., :, 4:], 0.5)), ( + "Regenerated region should NOT equal init — extension must route it through the model path" + ) + + def test_inpaint_requires_init_latents(self, fake_model_root: Path) -> None: + """Providing a mask without init latents must raise — there's nothing + to merge back into the preserved regions.""" + from invokeai.app.invocations.fields import DenoiseMaskField + + transformer = _ZeroTransformer() + mask = torch.ones(1, 1, 8, 8) + ctx = self._build_inpaint_context( + transformer, + variant=WanVariantType.T2V_A14B, + model_root=fake_model_root, + init_latents=torch.zeros(1, 16, 8, 8), # unused + mask=mask, + ) + + inv = WanDenoiseInvocation( + id="test", + transformer=_wan_transformer_field(), + positive_conditioning=WanConditioningField(conditioning_name="pos"), + negative_conditioning=None, + latents=None, # missing — error + denoise_mask=DenoiseMaskField(mask_name="mask", masked_latents_name=None, gradient=False), + width=64, + height=64, + steps=2, + guidance_scale=1.0, + seed=42, + ) + + with pytest.raises(ValueError, match="img2img inpainting"): + inv._run_diffusion(ctx) + + def test_no_mask_path_is_unchanged(self, fake_model_root: Path) -> None: + """Without a denoise_mask, the loop behaves as before — sanity check + that adding the inpaint extension didn't introduce a regression on + the non-inpaint codepath.""" + from invokeai.app.invocations.fields import LatentsField + + transformer = _ZeroTransformer() + init_latents = torch.full((1, 16, 8, 8), 0.3) + ctx = self._build_inpaint_context( + transformer, + variant=WanVariantType.T2V_A14B, + model_root=fake_model_root, + init_latents=init_latents, + mask=torch.zeros(1, 1, 8, 8), # unused — no mask wired + ) + + inv = WanDenoiseInvocation( + id="test", + transformer=_wan_transformer_field(), + positive_conditioning=WanConditioningField(conditioning_name="pos"), + negative_conditioning=None, + latents=LatentsField(latents_name="init"), + denoise_mask=None, # no mask + width=64, + height=64, + steps=4, + guidance_scale=1.0, + denoising_start=0.5, # img2img-style partial denoise + denoising_end=1.0, + seed=42, + ) + + out = inv._run_diffusion(ctx) + assert out.shape == (1, 16, 8, 8) + assert torch.isfinite(out).all() From c595452a81d33c5f211ea7ebfef72b03d12dbf13 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Mon, 11 May 2026 00:47:13 -0400 Subject: [PATCH 07/12] feat(wan): Phase 9 piece #1 - linear-view T2V txt2img graph builder MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds the minimum frontend wiring needed to generate Wan 2.2 images from the linear view: - buildWanGraph.ts (new): text-to-image graph (model_loader → text_encoder × 2 → denoise → l2i). Diffusers main model only — transformer, VAE and UMT5 encoder all resolve from the same repo, so no Wan-specific params slice fields are required yet. CFG-skip branch when guidance_scale ≤ 1.0. - useEnqueueGenerate / useEnqueueCanvas dispatchers: route base === 'wan' to buildWanGraph. - graph/types.ts: add wan_l2i / wan_i2l / wan_denoise / wan_model_loader to the relevant node-type unions. - addTextToImage / addImageToImage: include wan_denoise / wan_l2i so width/height are wired correctly and the txt2img helper accepts the Wan l2i node. - isMainModelWithoutUnet: include wan_model_loader (Wan has no UNet, same as the other modern bases). - metadata.py: add wan_txt2img / wan_img2img / wan_inpaint to the generation_mode enum (img2img / inpaint pieces land next). - schema.ts: regenerated to pick up the metadata enum + new Wan invocations. Pieces left in Phase 9: params slice (standalone VAE / T5 / GGUF low-noise / LoRA / ref-image fields + selectors), img2img + I2V + inpaint branches in the graph builder, and Wan-specific UI components. Co-Authored-By: Claude Opus 4.7 (1M context) feat(wan): Phase 9 piece #2 - GGUF support and CFG-Low control in linear view Adds the three Wan-specific params + UI controls that gate GGUF workflows plus a separate low-noise CFG slider for A14B users. Params slice: - wanTransformerLowNoise (the second-expert GGUF for A14B) - wanComponentSource (Diffusers Wan model providing VAE + UMT5-XXL when the main is a GGUF) - wanGuidanceScaleLowNoise (optional separate CFG for the low-noise expert; null = fall back to the primary CFG) Plus a `selectIsWan` selector for accordion gating. UI components: - ParamWanModelSelects.tsx (Advanced accordion): two model pickers — Transformer (Low Noise) filtered to Wan GGUF mains, and VAE/Encoder Source filtered to Wan Diffusers mains. Mirrors the ParamQwenImageComponentSourceSelect structure. - ParamWanGuidanceScaleLowNoise.tsx (Generation accordion): slider + number input with an "auto" indicator when cleared. Default 3.5 matches the diffusers reference 4.0 / 3.0 split. Wiring: - Generation accordion: ParamWanGuidanceScaleLowNoise shown when base is wan, scheduler excluded for wan (same pattern as Anima/Qwen). - Advanced accordion: ParamWanModelSelects shown when base is wan, and Wan excluded from the SD-family VAE/CFG-rescale blocks. - buildWanGraph.ts: forwards the three new params to the model loader and denoise nodes (transformer_low_noise_model, component_source, guidance_scale_low_noise) and adds them to the graph metadata. Hooks/types: - useWanDiffusersModels + useWanGGUFModels in modelsByType.ts. - isWanDiffusersMainModelConfig + isWanGGUFMainModelConfig type guards. - Three new locale strings (wanComponentSource, wanTransformerLowNoise, wanGuidanceScaleLowNoise[Auto]). GGUF workflow now works end-to-end in the linear view: pick a Wan GGUF main, set Transformer (Low Noise) to the paired second-expert GGUF, set VAE/Encoder Source to any Diffusers Wan repo (TI2V-5B is convenient at ~12 GB) — generate produces an image. Co-Authored-By: Claude Opus 4.7 (1M context) fix(wan): UX polish on the Wan linear-view controls Bundles four small fixes applied during a usability review of the Wan linear-view section (piece #2): 1. **Filter Main vs Transformer (Low Noise) dropdowns by expert tag.** The Wan GGUF probe records each file's ``expert`` field (``"high"`` / ``"low"`` / ``"none"``) via filename heuristic. - ``MainModelPicker``: hides ``expert === 'low'`` Wan GGUFs so users can't accidentally wire a low-noise expert as the primary main. - Transformer (Low Noise) picker (``useWanGGUFLowNoiseModels``): shows ``expert === 'low'`` Wan GGUFs only. Diffusers Wan mains and TI2V-5B aren't affected — they don't carry the ``expert`` field on their config schema. The backend's auto-swap safety net stays in place. 2. **Match the primary CFG slider's range.** The Wan low-noise CFG slider was constrained to 1–10 while the primary CFG ranges 1–20. With the diffusers reference 4/3 split, the low-noise slider thumb sat noticeably further right than the primary — visually misleading. Both sliders now share the 1–20 range with marks at [1, 10, 20]. 3. **Label fits the form column.** "CFG (Low Noise)" → "CFG (Low)" so the slider fits cleanly next to its label instead of overlapping. 4. **Indicator state for the low-noise CFG slider.** Replaced the inline "(auto)" / "(same as cfg)" text — which kept overlapping the slider regardless of how short the label got — with an X-only reset button that's only visible when the user has set an explicit value. Absence of the X conveys auto/fallback state without any text overhang. 5. **Friendlier Transformer (Low Noise) placeholder.** "Second-expert GGUF for A14B (pair with the high-noise main)" → "Add for full detail" — concise nudge for users who haven't paired the second expert yet. Co-Authored-By: Claude Opus 4.7 (1M context) feat(wan): Phase 9 piece #3 - linear-view img2img branch Adds Wan 2.2 image-to-image to the linear view, mirroring the Qwen Image pattern. The mode switches on the canvas state — pure-prompt runs go through addTextToImage as before; canvas runs with an init image go through addImageToImage which wires a fresh wan_i2l (Image to Latents - Wan 2.2) node between the init image and the denoise's `latents` input, honoring the existing denoise_start slider. buildWanGraph: - Drops the txt2img-only guard, branches on generationMode. - img2img: spins up a wan_i2l node and hands it to addImageToImage alongside the existing denoise / l2i / modelLoader (as vaeSource). - inpaint / outpaint still fail loudly — pieces #4-#6. graphBuilderUtils.getDenoisingStartAndEnd: - Adds 'wan' to the simple-linear case (denoising_start = 1 - denoisingStrength). Note: Wan's flow-matching schedule is "sticky" on the init compared to SDXL — users will likely need denoisingStrength ≥ 0.7 to see substantial change, matching the user-found 0.15-0.3 denoising_start sweet spot from earlier img2img testing. We may revisit this with an exponent rescale (like FLUX uses) if the response curve feels off. addImageToImage: - Adds 'wan_i2l' to the i2l-node-type union so the Wan i2l can be threaded through the shared helper. Co-Authored-By: Claude Opus 4.7 (1M context) fix(wan): add wan_denoise to addImageToImage/addInpaint/addOutpaint type checks Three sibling graph-helper utilities had the same modern-base list as addTextToImage did, and the buildWanGraph img2img branch tripped one of them at canvas-Generate time: error [generation]: Failed to build graph {name: 'Error', message: 'Wrong assertion encountered'} The else-branch in each helper assumes 'denoise_latents' (the SD1.5/SDXL legacy path) and asserts that — failing for any modern base not listed above the branch. addTextToImage was already updated in Phase 9 piece #1; this catches the parallel cases that the img2img/inpaint/outpaint flows go through. Co-Authored-By: Claude Opus 4.7 (1M context) feat(wan): Phase 9 piece #4 - linear-view inpaint and outpaint branches Wires Wan 2.2 inpaint and outpaint through the existing addInpaint / addOutpaint helpers. The backend's RectifiedFlowInpaintExtension was plumbed into wan_denoise.py back in Phase 8 (commit ab54617173); this just connects the FE. buildWanGraph: - generationMode === 'inpaint' → spin up a wan_i2l, call addInpaint with denoise + l2i + modelLoader (used as both vaeSource and modelLoader since the Wan model loader carries the VAE). - generationMode === 'outpaint' → parallel branch with addOutpaint. addInpaint: - i2l-node-type union now includes 'wan_i2l' (the addImageToImage and addOutpaint type unions already do — different union shapes). metadata.py: - generation_mode literal adds "wan_outpaint" alongside the existing wan_txt2img / wan_img2img / wan_inpaint entries. isMainModelWithoutUnet already includes wan_model_loader (Phase 9 piece create_gradient_mask when Wan is the main. Co-Authored-By: Claude Opus 4.7 (1M context) feat(wan): Phase 9 piece #5 - linear-view I2V branch (raster as reference image) Wan 2.2 I2V-A14B models condition on a reference image whose VAE-encoded latents are concatenated to the noise along the channel dim each step (in_channels=36 on the I2V transformer). In the linear view this maps cleanly onto the existing canvas raster layer: pick an I2V model, drag an image to raster, generate. buildWanGraph: - Fetch the modelConfig early so the variant gate (i2v_a14b vs the rest) can drive the branch shape instead of being a post-hoc check. - I2V + txt2img: fail loudly ("Switch to the canvas tab and drag an image to the raster layer"). I2V models won't produce useful output without a reference, and the backend would crash trying to concatenate a missing condition tensor. - I2V + img2img: pull the raster image via the canvas compositor, wire it through a wan_ref_image_encoder (which VAE-encodes it and builds the 4-mask + 16-latent condition tensor backend-side), then feed the result into denoise.ref_image. Denoise runs from fresh noise (denoising_start=0, no init_latents) — the ref image is cross-attention/concat conditioning, not a noise-trajectory anchor. - I2V + inpaint/outpaint: fail clearly. Combining ref-image conditioning with a denoise mask is conceptually possible but the backend interaction hasn't been validated end-to-end. metadata.py: - Adds "wan_i2v" to the generation_mode literal so the metadata field on I2V renders correctly. T2V flows (txt2img / img2img / inpaint / outpaint) are unchanged for non-I2V Wan variants (T2V-A14B and TI2V-5B). Co-Authored-By: Claude Opus 4.7 (1M context) fix(wan): enforce multiple-of-16 dimensions to match transformer patch grid Wan 2.2's transformer has ``patch_size=(1, 2, 2)``: it patch-embeds with stride 2 then un-patches by 2. Combined with the VAE's 8x spatial scale, canvas H/W must be a multiple of ``8 * 2 = 16`` — not just 8 — for the patch round-trip to land exactly. Otherwise the latents and noise prediction disagree by one in the spatial dim and the scheduler step fails: RuntimeError: The size of tensor a (147) must match the size of tensor b (146) at non-singleton dimension 3 (here latent_w=147 → patch_w=73 → un-patched_w=146 ≠ 147) This was silent for T2V at 1024x1024 (already a multiple of 16) but fired for I2V at non-multiple-of-16 canvas sizes. Fixes: - ``optimalDimension.getGridSize``: Wan moves from the default 8 case to the multiple-of-16 case (alongside flux / sd-3 / qwen-image / z-image which have the same patch arithmetic). The canvas bbox UI now snaps Wan dimensions to multiples of 16. - ``wan_denoise.py`` and ``wan_ref_image_encoder.py``: bump width/height ``multiple_of`` from 8 to 16. Defense-in-depth — workflow-editor users won't be able to send a non-16-aligned dim either. Existing backend tests (23 passing) still hold — 1024 is divisible by 16 so the test fixtures didn't exercise the off-by-one path. Co-Authored-By: Claude Opus 4.7 (1M context) fix(wan): show negative prompt box in Wan linear-view Wan was missing from SUPPORTS_NEGATIVE_PROMPT_BASE_MODELS, so the linear-view negative-prompt input was hidden even though the Wan denoise node already wires negative conditioning when CFG > 1 (buildWanGraph.ts:67-75). Adds 'wan' to the list. Co-Authored-By: Claude Opus 4.7 (1M context) feat(wan): Phase 9 piece #6 - Wan LoRA collection in linear view Adds Wan LoRA wiring to buildWanGraph, mirroring the Qwen Image pattern. The shared LoRASelect / LoRAList UI in the linear view already filters LoRAs by the selected main model's base, so Wan LoRAs surface automatically when a Wan main is picked — no UI changes needed. addWanLoRAs (new): - Filters state.loras.loras to enabled Wan LoRAs. - For each LoRA: spawns a ``lora_selector`` node and threads it through a single ``collect`` collector. - Routes the collector into a ``wan_lora_collection_loader`` which sits between modelLoader and denoise — modelLoader.transformer → loader, then loader.transformer → denoise (rerouting the original modelLoader → denoise edge). - Emits per-LoRA metadata so PNG metadata + workflow restore work. The dual-expert routing (high-noise vs low-noise vs untagged) is handled entirely on the backend by ``WanLoRACollectionLoader`` based on each LoRA's recorded ``expert`` tag (set by the probe from the filename heuristic in piece #5 of Phase 5). The FE just hands over the bag of LoRAs; no per-list FE plumbing needed. buildWanGraph: - Calls addWanLoRAs(state, g, denoise, modelLoader) after the base transformer edge is in place. The helper is a no-op when no Wan LoRAs are enabled, so it's safe to call unconditionally. Co-Authored-By: Claude Opus 4.7 (1M context) fix(wan): detect LoRA variant and filter by main model Wan 2.2 A14B (inner_dim=5120) and TI2V-5B (inner_dim=3072) LoRAs are not interchangeable — applying one against the wrong main model crashes the layer patcher with a tensor-shape error (e.g. A14B Lightning on TI2V-5B mains produced ``shape '[3072, 3072]' is invalid for input of size 26214400``). Probe Wan LoRAs' inner-dim at install time and record the family on a new ``variant`` field (``a14b`` / ``5b`` / null). The LoRA picker in the linear view hides incompatible variants when the user selects a main, and the graph builder filters any still-enabled mismatches at submit time with a warning. Untagged LoRAs (probe couldn't identify) pass through so they aren't silently hidden. Co-Authored-By: Claude Opus 4.7 (1M context) feat(wan): ref-image panel, GGUF readiness, and auto-default sources Wan 2.2 I2V now uses the global Reference Images panel (same UX as Qwen Image Edit and FLUX.2 Klein) instead of pulling the conditioning image from a canvas raster layer. Adds: - WanReferenceImageConfig zod type + isWanReferenceImageConfig guard; integrated into the ref-image discriminated union, settings panel, layer hooks, and validators. - 'wan' added to SUPPORTS_REF_IMAGES_BASE_MODELS, but the panel only shows for the i2v_a14b variant (T2V and TI2V-5B don't consume ref images, so the panel is hidden for them). - buildWanGraph I2V branch reads the first enabled wan_reference_image from refImagesSlice; the canvas-raster-as-ref path is removed. I2V now only supports txt2img mode (canvas img2img/inpaint/outpaint assert with a clear message). GGUF Wan readiness check: GGUF mains carry only the transformer, so the loader needs a Diffusers Component Source (or standalone VAE + UMT5-XXL encoder) to resolve the VAE and text encoder. Without one, enqueue is now blocked with a clear reason. The low-noise A14B partner expert remains optional (loader falls back to the high-noise expert when it's missing). Adds standalone Wan VAE and Wan T5 Encoder selectors to the Advanced accordion (Qwen pattern). Wires them as vae_model / wan_t5_encoder_model on the wan_model_loader node — backend priority is standalone > diffusers main > component source. Auto-default on Wan selection (so GGUF users don't have to fiddle with Advanced): when the new main is a Wan GGUF, fill the Component Source, standalone VAE, and standalone T5 encoder with first available matches if not already set. Component Source is matched by variant family (A14B GGUF prefers an A14B Diffusers; TI2V-5B prefers a TI2V-5B Diffusers) since the two families use different VAE channel counts (16 vs 48); within A14B, T2V and I2V share VAE/encoder so they're interchangeable as a source. Runs on every Wan selection (including Diffusers -> GGUF switches), only fills empty slots. Co-Authored-By: Claude Opus 4.7 (1M context) --- invokeai/app/invocations/metadata.py | 5 + invokeai/app/invocations/wan_denoise.py | 8 +- .../app/invocations/wan_ref_image_encoder.py | 6 +- .../model_records/model_records_base.py | 2 + .../backend/model_manager/configs/lora.py | 15 + invokeai/backend/model_manager/taxonomy.py | 19 ++ .../lora_conversions/wan_lora_constants.py | 61 ++++ invokeai/frontend/web/public/locales/en.json | 9 + .../listeners/modelSelected.ts | 118 ++++++++ .../components/RefImage/RefImageSettings.tsx | 8 +- .../controlLayers/hooks/addLayerHooks.ts | 14 +- .../controlLayers/store/paramsSlice.ts | 58 +++- .../controlLayers/store/refImagesSlice.ts | 9 +- .../src/features/controlLayers/store/types.ts | 25 ++ .../src/features/controlLayers/store/util.ts | 5 + .../controlLayers/store/validators.ts | 14 +- .../features/lora/components/LoRASelect.tsx | 19 ++ .../web/src/features/modelManagerV2/models.ts | 5 +- .../web/src/features/nodes/types/common.ts | 4 + .../util/graph/generation/addImageToImage.ts | 5 +- .../nodes/util/graph/generation/addInpaint.ts | 4 +- .../util/graph/generation/addOutpaint.ts | 3 +- .../util/graph/generation/addTextToImage.ts | 4 +- .../util/graph/generation/addWanLoRAs.ts | 132 +++++++++ .../util/graph/generation/buildWanGraph.ts | 277 ++++++++++++++++++ .../nodes/util/graph/graphBuilderUtils.ts | 4 +- .../src/features/nodes/util/graph/types.ts | 15 +- .../Advanced/ParamWanModelSelects.tsx | 246 ++++++++++++++++ .../Core/ParamWanGuidanceScaleLowNoise.tsx | 94 ++++++ .../parameters/util/optimalDimension.ts | 10 +- .../features/queue/hooks/useEnqueueCanvas.ts | 3 + .../queue/hooks/useEnqueueGenerate.ts | 3 + .../web/src/features/queue/store/readiness.ts | 26 ++ .../AdvancedSettingsAccordion.tsx | 12 +- .../GenerationSettingsAccordion.tsx | 7 +- .../MainModelPicker.tsx | 21 +- .../src/services/api/hooks/modelsByType.ts | 11 + .../frontend/web/src/services/api/schema.ts | 23 +- .../frontend/web/src/services/api/types.ts | 27 +- .../configs/test_wan_lora_config.py | 50 ++++ 40 files changed, 1345 insertions(+), 36 deletions(-) create mode 100644 invokeai/frontend/web/src/features/nodes/util/graph/generation/addWanLoRAs.ts create mode 100644 invokeai/frontend/web/src/features/nodes/util/graph/generation/buildWanGraph.ts create mode 100644 invokeai/frontend/web/src/features/parameters/components/Advanced/ParamWanModelSelects.tsx create mode 100644 invokeai/frontend/web/src/features/parameters/components/Core/ParamWanGuidanceScaleLowNoise.tsx diff --git a/invokeai/app/invocations/metadata.py b/invokeai/app/invocations/metadata.py index da24d8802bb..c5acc6757d9 100644 --- a/invokeai/app/invocations/metadata.py +++ b/invokeai/app/invocations/metadata.py @@ -174,6 +174,11 @@ def invoke(self, context: InvocationContext) -> MetadataOutput: "anima_img2img", "anima_inpaint", "anima_outpaint", + "wan_txt2img", + "wan_img2img", + "wan_inpaint", + "wan_outpaint", + "wan_i2v", ] diff --git a/invokeai/app/invocations/wan_denoise.py b/invokeai/app/invocations/wan_denoise.py index 7b9a02ee835..ca15ddd373a 100644 --- a/invokeai/app/invocations/wan_denoise.py +++ b/invokeai/app/invocations/wan_denoise.py @@ -264,8 +264,12 @@ class WanDenoiseInvocation(BaseInvocation): "Ignored for TI2V-5B.", title="Guidance Scale (Low Noise)", ) - width: int = InputField(default=1024, multiple_of=8, description="Width of the generated image.") - height: int = InputField(default=1024, multiple_of=8, description="Height of the generated image.") + # Wan transformer has ``patch_size=(1, 2, 2)``: combined with the VAE's + # 8x spatial scale, generated H/W must be a multiple of 16 (not just 8) + # or the patch round-trip lands off-by-one and the scheduler step fails + # with a spatial-dim mismatch. + width: int = InputField(default=1024, multiple_of=16, description="Width of the generated image.") + height: int = InputField(default=1024, multiple_of=16, description="Height of the generated image.") steps: int = InputField(default=40, gt=0, description="Number of denoising steps.") seed: int = InputField(default=0, description="Randomness seed for reproducibility.") diff --git a/invokeai/app/invocations/wan_ref_image_encoder.py b/invokeai/app/invocations/wan_ref_image_encoder.py index 858bf25514c..cc1d6c90669 100644 --- a/invokeai/app/invocations/wan_ref_image_encoder.py +++ b/invokeai/app/invocations/wan_ref_image_encoder.py @@ -48,14 +48,16 @@ class WanRefImageEncoderInvocation(BaseInvocation): vae: VAEField = InputField( description=FieldDescriptions.vae, input=Input.Connection, title="VAE" ) + # Must match wan_denoise's width/height. multiple_of=16 (not 8) because + # Wan's transformer patch_size=(1, 2, 2) needs latent H/W to be even. width: int = InputField( default=1024, - multiple_of=8, + multiple_of=16, description="Width to resize the reference image to (must match denoise width).", ) height: int = InputField( default=1024, - multiple_of=8, + multiple_of=16, description="Height to resize the reference image to (must match denoise height).", ) diff --git a/invokeai/app/services/model_records/model_records_base.py b/invokeai/app/services/model_records/model_records_base.py index 3fedd8fb07c..4d5a9d102ca 100644 --- a/invokeai/app/services/model_records/model_records_base.py +++ b/invokeai/app/services/model_records/model_records_base.py @@ -33,6 +33,7 @@ Qwen3VariantType, QwenImageVariantType, SchedulerPredictionType, + WanLoRAVariantType, WanVariantType, ZImageVariantType, ) @@ -136,6 +137,7 @@ def validate_source_url(cls, v: Any) -> Optional[str]: | ZImageVariantType | QwenImageVariantType | WanVariantType + | WanLoRAVariantType | Qwen3VariantType ] = Field(description="The variant of the model.", default=None) prediction_type: Optional[SchedulerPredictionType] = Field( diff --git a/invokeai/backend/model_manager/configs/lora.py b/invokeai/backend/model_manager/configs/lora.py index ed3272fee96..4659f341de8 100644 --- a/invokeai/backend/model_manager/configs/lora.py +++ b/invokeai/backend/model_manager/configs/lora.py @@ -28,6 +28,7 @@ FluxLoRAFormat, ModelFormat, ModelType, + WanLoRAVariantType, ZImageVariantType, ) from invokeai.backend.model_manager.util.model_util import lora_token_vector_length @@ -39,6 +40,7 @@ ) from invokeai.backend.patches.lora_conversions.flux_control_lora_utils import is_state_dict_likely_flux_control from invokeai.backend.patches.lora_conversions.wan_lora_constants import ( + detect_wan_lora_variant, has_non_wan_architecture_keys, has_wan_kohya_keys, has_wan_peft_keys, @@ -965,6 +967,14 @@ class LoRA_LyCORIS_Wan_Config(LoRA_LyCORIS_Config_Base, Config_Base): "'low' targets the low-noise expert. None means the LoRA is expert-agnostic " "(TI2V-5B, or community LoRAs without explicit tagging) and is applied to both.", ) + variant: WanLoRAVariantType | None = Field( + default=None, + description="The Wan model family this LoRA targets, detected from its inner-dim " + "(5120 -> A14B, 3072 -> TI2V-5B). A14B LoRAs are incompatible with TI2V-5B mains " + "(and vice versa) — they crash with a shape mismatch in the layer patcher. The " + "linear-view graph builder filters LoRAs on variant when building the LoRA " + "collection. None means the LoRA's inner-dim couldn't be identified.", + ) @classmethod def _validate_looks_like_lora(cls, mod: ModelOnDisk) -> None: @@ -1022,6 +1032,11 @@ def from_model_on_disk(cls, mod: ModelOnDisk, override_fields: dict[str, Any]) - elif any(s in name for s in ("low_noise", "low-noise", "lownoise")): instance.expert = "low" + # Auto-detect the model-family variant from inner_dim in the state + # dict. The override field skips this if the user has set it. + if instance.variant is None: + instance.variant = detect_wan_lora_variant(mod.load_state_dict()) + return instance diff --git a/invokeai/backend/model_manager/taxonomy.py b/invokeai/backend/model_manager/taxonomy.py index 23f818a1116..7f7b9f21c1e 100644 --- a/invokeai/backend/model_manager/taxonomy.py +++ b/invokeai/backend/model_manager/taxonomy.py @@ -191,6 +191,22 @@ class WanVariantType(str, Enum): """Wan 2.2 TI2V-5B - smaller single-transformer model with Wan2.2-VAE (48 latent channels).""" +class WanLoRAVariantType(str, Enum): + """Wan 2.2 LoRA variants, identifying which model family a LoRA targets. + + Detected from the LoRA's inner attention dim: A14B has ``inner_dim=5120``, + TI2V-5B has ``inner_dim=3072``. A14B and 5B LoRAs are NOT interchangeable — + applying one against the wrong main model crashes in the layer patcher + with a tensor-shape error. + """ + + A14B = "a14b" + """Targets a Wan 2.2 A14B main (T2V or I2V, inner_dim=5120).""" + + Wan5B = "5b" + """Targets the Wan 2.2 TI2V-5B main (inner_dim=3072).""" + + class Qwen3VariantType(str, Enum): """Qwen3 text encoder variants based on model size.""" @@ -276,6 +292,7 @@ class FluxLoRAFormat(str, Enum): ZImageVariantType, QwenImageVariantType, WanVariantType, + WanLoRAVariantType, Qwen3VariantType, ] variant_type_adapter = TypeAdapter[ @@ -286,6 +303,7 @@ class FluxLoRAFormat(str, Enum): | ZImageVariantType | QwenImageVariantType | WanVariantType + | WanLoRAVariantType | Qwen3VariantType ]( ModelVariantType @@ -295,5 +313,6 @@ class FluxLoRAFormat(str, Enum): | ZImageVariantType | QwenImageVariantType | WanVariantType + | WanLoRAVariantType | Qwen3VariantType ) diff --git a/invokeai/backend/patches/lora_conversions/wan_lora_constants.py b/invokeai/backend/patches/lora_conversions/wan_lora_constants.py index ebf141f2d92..b539bcbec91 100644 --- a/invokeai/backend/patches/lora_conversions/wan_lora_constants.py +++ b/invokeai/backend/patches/lora_conversions/wan_lora_constants.py @@ -22,6 +22,8 @@ import re +from invokeai.backend.model_manager.taxonomy import WanLoRAVariantType + # Prefix for Wan transformer LoRA layers in the ModelPatchRaw layer dict. # Same convention as Anima / QwenImage — the LayerPatcher uses this prefix to # resolve patches against the loaded transformer's parameter paths. @@ -95,6 +97,65 @@ def has_wan_peft_keys(str_keys: list[str]) -> bool: return False +def detect_wan_lora_variant(state_dict: dict) -> WanLoRAVariantType | None: + """Inspect a Wan LoRA state dict and guess which model family it targets. + + A14B has inner_dim=5120; TI2V-5B has inner_dim=3072. Every transformer + block's ``attn1.to_q`` (or native ``self_attn.q``) LoRA pair has weights + shaped against the inner dim — ``lora_up.weight`` is ``[inner_dim, rank]`` + and ``lora_down.weight`` is ``[rank, inner_dim]``. The larger dim of + either is the inner dim. + + Returns: + ``WanLoRAVariantType.A14B`` if inner_dim == 5120, + ``WanLoRAVariantType.Wan5B`` if inner_dim == 3072, + ``None`` if no recognisable attn weight is found or inner_dim is + ambiguous (e.g. LoRA that only patches FFN at non-standard rank). + """ + # Probe several common key shapes — diffusers PEFT (lora_A/lora_B), + # native Kohya naming (lora_up/lora_down), with or without a + # diffusion_model/transformer prefix, in diffusers or native attn + # naming. The first matching tensor is enough. + candidate_suffixes = ( + # diffusers PEFT + ".attn1.to_q.lora_A.weight", + ".attn1.to_q.lora_B.weight", + ".self_attn.q.lora_A.weight", + ".self_attn.q.lora_B.weight", + # native (Kohya) PEFT + ".attn1.to_q.lora_up.weight", + ".attn1.to_q.lora_down.weight", + ".self_attn.q.lora_up.weight", + ".self_attn.q.lora_down.weight", + ) + kohya_substrings = ( + "_attn1_to_q.lora_up.weight", + "_attn1_to_q.lora_down.weight", + "_self_attn_q.lora_up.weight", + "_self_attn_q.lora_down.weight", + ) + + for key, tensor in state_dict.items(): + if not isinstance(key, str): + continue + match_suffix = any(key.endswith(suffix) for suffix in candidate_suffixes) + match_kohya = any(needle in key for needle in kohya_substrings) + if not (match_suffix or match_kohya): + continue + shape = getattr(tensor, "shape", None) + if shape is None or len(shape) < 2: + continue + inner_dim = max(int(shape[0]), int(shape[1])) + if inner_dim == 5120: + return WanLoRAVariantType.A14B + if inner_dim == 3072: + return WanLoRAVariantType.Wan5B + # Any other inner_dim is uncharted — bail rather than guess. + return None + + return None + + def has_non_wan_architecture_keys(str_keys: list[str]) -> bool: """True if any key indicates a non-Wan architecture (Anima, Qwen, Flux, Z-Image). diff --git a/invokeai/frontend/web/public/locales/en.json b/invokeai/frontend/web/public/locales/en.json index 6685fe3bf2b..8ebb3899e28 100644 --- a/invokeai/frontend/web/public/locales/en.json +++ b/invokeai/frontend/web/public/locales/en.json @@ -1368,6 +1368,13 @@ "qwenImageQuantizationInt8": "8-bit (int8)", "qwenImageQuantizationNf4": "4-bit (nf4)", "wanT5Encoder": "Wan2.2 T5 Encoder", + "wanT5EncoderPlaceholder": "From VAE/Encoder Source", + "wanVae": "VAE", + "wanVaePlaceholder": "From VAE/Encoder Source", + "wanComponentSource": "VAE/Encoder Source (Diffusers)", + "wanComponentSourcePlaceholder": "GGUF Wan models require a Diffusers Wan source for VAE + UMT5-XXL", + "wanTransformerLowNoise": "Transformer (Low Noise)", + "wanTransformerLowNoisePlaceholder": "Add for full detail", "upcastAttention": "Upcast Attention", "uploadImage": "Upload Image", "urlOrLocalPath": "URL or Local Path", @@ -1669,6 +1676,7 @@ "noFlux2KleinVaeModelSelected": "No VAE selected. Non-diffusers FLUX.2 Klein models require a standalone VAE", "noFlux2KleinQwen3EncoderModelSelected": "No Qwen3 Encoder selected. Non-diffusers FLUX.2 Klein models require a standalone Qwen3 Encoder", "noQwenImageComponentSourceSelected": "GGUF Qwen Image models require a Diffusers Component Source for VAE/encoder", + "noWanComponentSourceSelected": "GGUF Wan 2.2 models require a Diffusers Component Source for VAE/encoder", "noZImageVaeSourceSelected": "No VAE source: Select VAE (FLUX) or Qwen3 Source model", "noZImageQwen3EncoderSourceSelected": "No Qwen3 Encoder source: Select Qwen3 Encoder or Qwen3 Source model", "noAnimaVaeModelSelected": "No Anima VAE model selected", @@ -1724,6 +1732,7 @@ "showOptionsPanel": "Show Side Panel (O or T)", "shift": "Shift", "shuffle": "Shuffle Seed", + "wanGuidanceScaleLowNoise": "CFG (Low)", "steps": "Steps", "strength": "Strength", "symmetry": "Symmetry", diff --git a/invokeai/frontend/web/src/app/store/middleware/listenerMiddleware/listeners/modelSelected.ts b/invokeai/frontend/web/src/app/store/middleware/listenerMiddleware/listeners/modelSelected.ts index 20303fe0183..62f813db357 100644 --- a/invokeai/frontend/web/src/app/store/middleware/listenerMiddleware/listeners/modelSelected.ts +++ b/invokeai/frontend/web/src/app/store/middleware/listenerMiddleware/listeners/modelSelected.ts @@ -18,6 +18,9 @@ import { setZImageScheduler, syncedToOptimalDimension, vaeSelected, + wanComponentSourceSelected, + wanT5EncoderModelSelected, + wanVaeModelSelected, zImageQwen3EncoderModelSelected, zImageQwen3SourceModelSelected, zImageVaeModelSelected, @@ -37,6 +40,7 @@ import { isAspectRatioID, isFlux2ReferenceImageConfig, isQwenImageReferenceImageConfig, + isWanReferenceImageConfig, } from 'features/controlLayers/store/types'; import { initialFlux2ReferenceImage, @@ -44,6 +48,7 @@ import { initialFLUXRedux, initialIPAdapter, initialQwenImageReferenceImage, + initialWanReferenceImage, } from 'features/controlLayers/store/util'; import { SUPPORTS_REF_IMAGES_BASE_MODELS } from 'features/modelManagerV2/models'; import { zModelIdentifierField } from 'features/nodes/types/common'; @@ -63,6 +68,9 @@ import { selectQwenVLEncoderModels, selectRegionalRefImageModels, selectT5EncoderModels, + selectWanDiffusersModels, + selectWanT5EncoderModels, + selectWanVAEModels, selectZImageDiffusersModels, } from 'services/api/hooks/modelsByType'; import type { FLUXKontextModelConfig, FLUXReduxModelConfig, IPAdapterModelConfig } from 'services/api/types'; @@ -321,6 +329,29 @@ export const addModelSelectedListener = (startAppListening: AppStartListening) = } } + // handle Wan 2.2 component source / standalone VAE / standalone T5 encoder - + // clear when switching away. (Auto-default happens unconditionally outside + // this block so it fires when switching between Wan variants too.) + const { + wanComponentSource: wanComponentSourceOnLeave, + wanVaeModel: wanVaeModelOnLeave, + wanT5EncoderModel: wanT5EncoderModelOnLeave, + } = state.params; + if (newBase !== 'wan') { + if (wanComponentSourceOnLeave) { + dispatch(wanComponentSourceSelected(null)); + modelsUpdatedDisabledOrCleared += 1; + } + if (wanVaeModelOnLeave) { + dispatch(wanVaeModelSelected(null)); + modelsUpdatedDisabledOrCleared += 1; + } + if (wanT5EncoderModelOnLeave) { + dispatch(wanT5EncoderModelSelected(null)); + modelsUpdatedDisabledOrCleared += 1; + } + } + if (newModel.base !== 'external' && SUPPORTS_REF_IMAGES_BASE_MODELS.includes(newModel.base)) { // Handle incompatible reference image models - switch to first compatible model, with some smart logic // to choose the best available model based on the new main model. @@ -377,6 +408,22 @@ export const addModelSelectedListener = (startAppListening: AppStartListening) = continue; } + if (newBase === 'wan') { + // Switching TO Wan - convert any non-wan configs to wan_reference_image. + // The Wan I2V graph builder consumes the first enabled ref image; T2V / + // TI2V variants ignore ref images entirely (matches Qwen-generate behavior). + if (!isWanReferenceImageConfig(entity.config)) { + dispatch( + refImageConfigChanged({ + id: entity.id, + config: { ...initialWanReferenceImage }, + }) + ); + modelsUpdatedDisabledOrCleared += 1; + } + continue; + } + if (isFlux2ReferenceImageConfig(entity.config)) { // Switching AWAY from FLUX.2 - convert flux2_reference_image to the appropriate config type let newConfig; @@ -425,6 +472,29 @@ export const addModelSelectedListener = (startAppListening: AppStartListening) = continue; } + if (isWanReferenceImageConfig(entity.config)) { + // Switching AWAY from Wan - convert to the appropriate config type for the new base. + let newConfig; + if (newGlobalRefImageModel) { + const parsedModel = zModelIdentifierField.parse(newGlobalRefImageModel); + if (newModel.base === 'flux' && newModel.name.toLowerCase().includes('kontext')) { + newConfig = { ...initialFluxKontextReferenceImage, model: parsedModel }; + } else if (newGlobalRefImageModel.type === 'flux_redux') { + newConfig = { ...initialFLUXRedux, model: parsedModel }; + } else { + newConfig = { ...initialIPAdapter, model: parsedModel }; + if (parsedModel.base === 'flux') { + newConfig.clipVisionModel = 'ViT-L'; + } + } + } else { + newConfig = { ...initialIPAdapter }; + } + dispatch(refImageConfigChanged({ id: entity.id, config: newConfig })); + modelsUpdatedDisabledOrCleared += 1; + continue; + } + // Standard handling for non-flux2 configs const shouldUpdateModel = (entity.config.model && entity.config.model.base !== newBase) || @@ -480,6 +550,54 @@ export const addModelSelectedListener = (startAppListening: AppStartListening) = } } + // Wan 2.2: auto-default Component Source / standalone VAE / standalone T5 encoder + // when the new model is Wan. Runs on every Wan selection (including same-base + // switches like Diffusers Wan → GGUF Wan) so the user doesn't have to dig into + // Advanced when picking a GGUF main. Only sets fields that are currently empty + // and only does it for GGUF mains — Diffusers mains carry everything themselves. + if (newBase === 'wan') { + const modelConfigsResult = selectModelConfigsQuery(state); + const newModelConfig = modelConfigsResult.data + ? modelConfigsAdapterSelectors.selectById(modelConfigsResult.data, newModel.key) + : null; + const isNewModelGGUF = newModelConfig?.type === 'main' && newModelConfig.format === 'gguf_quantized'; + if (isNewModelGGUF) { + const { wanComponentSource, wanVaeModel, wanT5EncoderModel } = state.params; + // Match component source by variant family — A14B (t2v_a14b/i2v_a14b) and + // TI2V-5B use different VAEs (16-ch vs 48-ch); a mismatched component source + // would silently load the wrong VAE and produce broken images. The standalone + // VAE / encoder configs don't carry variant info, so those still go first-match. + const newVariant = + newModelConfig && 'variant' in newModelConfig && typeof newModelConfig.variant === 'string' + ? newModelConfig.variant + : null; + const a14bFamily = newVariant === 't2v_a14b' || newVariant === 'i2v_a14b'; + if (!wanComponentSource) { + const availableWanDiffusers = selectWanDiffusersModels(state); + const matchingFamily = availableWanDiffusers.find((m) => { + const v = 'variant' in m && typeof m.variant === 'string' ? m.variant : null; + return a14bFamily ? v === 't2v_a14b' || v === 'i2v_a14b' : v === newVariant; + }); + const diffusersModel = matchingFamily ?? availableWanDiffusers[0]; + if (diffusersModel) { + dispatch(wanComponentSourceSelected(zModelIdentifierField.parse(diffusersModel))); + } + } + if (!wanVaeModel) { + const vae = selectWanVAEModels(state)[0]; + if (vae) { + dispatch(wanVaeModelSelected(zModelIdentifierField.parse(vae))); + } + } + if (!wanT5EncoderModel) { + const encoder = selectWanT5EncoderModels(state)[0]; + if (encoder) { + dispatch(wanT5EncoderModelSelected(zModelIdentifierField.parse(encoder))); + } + } + } + } + // Handle FLUX.2 Klein model changes within the same base (different variants need different encoders) // Clear the Qwen3 encoder only when switching between different Klein variants // (e.g., klein_4b needs qwen3_4b, klein_9b needs qwen3_8b) diff --git a/invokeai/frontend/web/src/features/controlLayers/components/RefImage/RefImageSettings.tsx b/invokeai/frontend/web/src/features/controlLayers/components/RefImage/RefImageSettings.tsx index 54b345361d5..3edf9594b79 100644 --- a/invokeai/frontend/web/src/features/controlLayers/components/RefImage/RefImageSettings.tsx +++ b/invokeai/frontend/web/src/features/controlLayers/components/RefImage/RefImageSettings.tsx @@ -39,6 +39,7 @@ import { isFLUXReduxConfig, isIPAdapterConfig, isQwenImageReferenceImageConfig, + isWanReferenceImageConfig, } from 'features/controlLayers/store/types'; import type { SetGlobalReferenceImageDndTargetData } from 'features/dnd/dnd'; import { setGlobalReferenceImageDndTarget } from 'features/dnd/dnd'; @@ -129,9 +130,12 @@ const RefImageSettingsContent = memo(() => { const isFLUX = useAppSelector(selectIsFLUX); const isExternalModel = !!mainModelConfig && isExternalApiModelConfig(mainModelConfig); - // FLUX.2 Klein, Qwen Image Edit and external API models do not require a ref image model selection. + // FLUX.2 Klein, Qwen Image Edit, Wan 2.2 and external API models do not require a ref image model selection. const showModelSelector = - !isFlux2ReferenceImageConfig(config) && !isQwenImageReferenceImageConfig(config) && !isExternalModel; + !isFlux2ReferenceImageConfig(config) && + !isQwenImageReferenceImageConfig(config) && + !isWanReferenceImageConfig(config) && + !isExternalModel; return ( diff --git a/invokeai/frontend/web/src/features/controlLayers/hooks/addLayerHooks.ts b/invokeai/frontend/web/src/features/controlLayers/hooks/addLayerHooks.ts index 2027ff41741..10603191df6 100644 --- a/invokeai/frontend/web/src/features/controlLayers/hooks/addLayerHooks.ts +++ b/invokeai/frontend/web/src/features/controlLayers/hooks/addLayerHooks.ts @@ -32,6 +32,7 @@ import type { QwenImageReferenceImageConfig, RegionalGuidanceIPAdapterConfig, T2IAdapterConfig, + WanReferenceImageConfig, } from 'features/controlLayers/store/types'; import { initialControlNet, @@ -41,6 +42,7 @@ import { initialQwenImageReferenceImage, initialRegionalGuidanceIPAdapter, initialT2IAdapter, + initialWanReferenceImage, } from 'features/controlLayers/store/util'; import { zModelIdentifierField } from 'features/nodes/types/common'; import { useCallback } from 'react'; @@ -80,7 +82,12 @@ export const selectDefaultControlAdapter = createSelector( export const getDefaultRefImageConfig = ( getState: AppGetState -): IPAdapterConfig | FluxKontextReferenceImageConfig | Flux2ReferenceImageConfig | QwenImageReferenceImageConfig => { +): + | IPAdapterConfig + | FluxKontextReferenceImageConfig + | Flux2ReferenceImageConfig + | QwenImageReferenceImageConfig + | WanReferenceImageConfig => { const state = getState(); const mainModelConfig = selectMainModelConfig(state); @@ -98,6 +105,11 @@ export const getDefaultRefImageConfig = ( return deepClone(initialQwenImageReferenceImage); } + // Wan 2.2 I2V uses the main model's own VAE - no adapter model needed + if (base === 'wan') { + return deepClone(initialWanReferenceImage); + } + if (base === 'flux' && mainModelConfig?.name?.toLowerCase().includes('kontext')) { const config = deepClone(initialFluxKontextReferenceImage); config.model = zModelIdentifierField.parse(mainModelConfig); diff --git a/invokeai/frontend/web/src/features/controlLayers/store/paramsSlice.ts b/invokeai/frontend/web/src/features/controlLayers/store/paramsSlice.ts index a5200ef1ff8..21fecaf6437 100644 --- a/invokeai/frontend/web/src/features/controlLayers/store/paramsSlice.ts +++ b/invokeai/frontend/web/src/features/controlLayers/store/paramsSlice.ts @@ -291,6 +291,37 @@ const slice = createSlice({ qwenImageShiftChanged: (state, action: PayloadAction) => { state.qwenImageShift = action.payload; }, + wanTransformerLowNoiseSelected: (state, action: PayloadAction) => { + const result = zParamsState.shape.wanTransformerLowNoise.safeParse(action.payload); + if (!result.success) { + return; + } + state.wanTransformerLowNoise = result.data; + }, + wanComponentSourceSelected: (state, action: PayloadAction) => { + const result = zParamsState.shape.wanComponentSource.safeParse(action.payload); + if (!result.success) { + return; + } + state.wanComponentSource = result.data; + }, + wanVaeModelSelected: (state, action: PayloadAction) => { + const result = zParamsState.shape.wanVaeModel.safeParse(action.payload); + if (!result.success) { + return; + } + state.wanVaeModel = result.data; + }, + wanT5EncoderModelSelected: (state, action: PayloadAction<{ key: string; name: string; base: string } | null>) => { + const result = zParamsState.shape.wanT5EncoderModel.safeParse(action.payload); + if (!result.success) { + return; + } + state.wanT5EncoderModel = result.data; + }, + wanGuidanceScaleLowNoiseChanged: (state, action: PayloadAction) => { + state.wanGuidanceScaleLowNoise = action.payload; + }, vaePrecisionChanged: (state, action: PayloadAction) => { state.vaePrecision = action.payload; }, @@ -610,6 +641,11 @@ const resetState = (state: ParamsState): ParamsState => { newState.qwenImageQwenVLEncoderModel = oldState.qwenImageQwenVLEncoderModel; newState.qwenImageQuantization = oldState.qwenImageQuantization; newState.qwenImageShift = oldState.qwenImageShift; + newState.wanTransformerLowNoise = oldState.wanTransformerLowNoise; + newState.wanComponentSource = oldState.wanComponentSource; + newState.wanVaeModel = oldState.wanVaeModel; + newState.wanT5EncoderModel = oldState.wanT5EncoderModel; + newState.wanGuidanceScaleLowNoise = oldState.wanGuidanceScaleLowNoise; return newState; }; @@ -662,6 +698,11 @@ export const { qwenImageQwenVLEncoderModelSelected, qwenImageQuantizationChanged, qwenImageShiftChanged, + wanTransformerLowNoiseSelected, + wanComponentSourceSelected, + wanVaeModelSelected, + wanT5EncoderModelSelected, + wanGuidanceScaleLowNoiseChanged, setClipSkip, shouldUseCpuNoiseChanged, setColorCompensation, @@ -752,6 +793,7 @@ export const selectIsAnima = createParamsSelector((params) => params.model?.base export const selectIsFlux2 = createParamsSelector((params) => params.model?.base === 'flux2'); export const selectIsExternal = createParamsSelector((params) => params.model?.base === 'external'); export const selectIsQwenImage = createParamsSelector((params) => params.model?.base === 'qwen-image'); +export const selectIsWan = createParamsSelector((params) => params.model?.base === 'wan'); export const selectIsFluxKontext = createParamsSelector((params) => { if (params.model?.base === 'flux' && params.model?.name.toLowerCase().includes('kontext')) { return true; @@ -783,6 +825,11 @@ export const selectQwenImageVaeModel = createParamsSelector((params) => params.q export const selectQwenImageQwenVLEncoderModel = createParamsSelector((params) => params.qwenImageQwenVLEncoderModel); export const selectQwenImageQuantization = createParamsSelector((params) => params.qwenImageQuantization); export const selectQwenImageShift = createParamsSelector((params) => params.qwenImageShift); +export const selectWanTransformerLowNoise = createParamsSelector((params) => params.wanTransformerLowNoise); +export const selectWanComponentSource = createParamsSelector((params) => params.wanComponentSource); +export const selectWanVaeModel = createParamsSelector((params) => params.wanVaeModel); +export const selectWanT5EncoderModel = createParamsSelector((params) => params.wanT5EncoderModel); +export const selectWanGuidanceScaleLowNoise = createParamsSelector((params) => params.wanGuidanceScaleLowNoise); export const selectCFGScale = createParamsSelector((params) => params.cfgScale); export const selectGuidance = createParamsSelector((params) => params.guidance); @@ -842,7 +889,16 @@ export const selectModelSupportsRefImages = createSelector(selectModel, selectMo if (model.base === 'external') { return false; } - return SUPPORTS_REF_IMAGES_BASE_MODELS.includes(model.base); + if (!SUPPORTS_REF_IMAGES_BASE_MODELS.includes(model.base)) { + return false; + } + // Wan: only the I2V variant of A14B consumes a reference image. T2V and + // TI2V-5B ignore ref images, so hide the panel for those. + if (model.base === 'wan') { + const variant = modelConfig && 'variant' in modelConfig ? modelConfig.variant : null; + return variant === 'i2v_a14b'; + } + return true; }); export const selectModelSupportsOptimizedDenoising = createSelector( selectModel, diff --git a/invokeai/frontend/web/src/features/controlLayers/store/refImagesSlice.ts b/invokeai/frontend/web/src/features/controlLayers/store/refImagesSlice.ts index b7026b586a8..6c364e51e88 100644 --- a/invokeai/frontend/web/src/features/controlLayers/store/refImagesSlice.ts +++ b/invokeai/frontend/web/src/features/controlLayers/store/refImagesSlice.ts @@ -23,6 +23,7 @@ import { isFLUXReduxConfig, isIPAdapterConfig, isQwenImageReferenceImageConfig, + isWanReferenceImageConfig, zRefImagesState, } from './types'; import { getReferenceImageState, initialFluxKontextReferenceImage, initialFLUXRedux, initialIPAdapter } from './util'; @@ -144,8 +145,12 @@ const slice = createSlice({ return; } - // FLUX.2 and Qwen Image Edit reference images don't have a model field - they use built-in support - if (isFlux2ReferenceImageConfig(entity.config) || isQwenImageReferenceImageConfig(entity.config)) { + // FLUX.2, Qwen Image Edit and Wan reference images don't have a model field - they use built-in support + if ( + isFlux2ReferenceImageConfig(entity.config) || + isQwenImageReferenceImageConfig(entity.config) || + isWanReferenceImageConfig(entity.config) + ) { return; } diff --git a/invokeai/frontend/web/src/features/controlLayers/store/types.ts b/invokeai/frontend/web/src/features/controlLayers/store/types.ts index 7a7ebeade71..99f20ec4e78 100644 --- a/invokeai/frontend/web/src/features/controlLayers/store/types.ts +++ b/invokeai/frontend/web/src/features/controlLayers/store/types.ts @@ -395,6 +395,15 @@ const zQwenImageReferenceImageConfig = z.object({ }); export type QwenImageReferenceImageConfig = z.infer; +// Wan 2.2 I2V uses the model's own VAE to encode a single reference image - +// no separate adapter model needed. Only consumed by the I2V variant of Wan +// 2.2 (A14B). T2V / TI2V variants ignore the ref image at graph build time. +const zWanReferenceImageConfig = z.object({ + type: z.literal('wan_reference_image'), + image: zCroppableImageWithDims.nullable(), +}); +export type WanReferenceImageConfig = z.infer; + const zCanvasEntityBase = z.object({ id: zId, name: zName, @@ -411,6 +420,7 @@ export const zRefImageState = z.object({ zFluxKontextReferenceImageConfig, zFlux2ReferenceImageConfig, zQwenImageReferenceImageConfig, + zWanReferenceImageConfig, ]), }); export type RefImageState = z.infer; @@ -432,6 +442,9 @@ export const isQwenImageReferenceImageConfig = ( config: RefImageState['config'] ): config is QwenImageReferenceImageConfig => config.type === 'qwen_image_reference_image'; +export const isWanReferenceImageConfig = (config: RefImageState['config']): config is WanReferenceImageConfig => + config.type === 'wan_reference_image'; + const zFillStyle = z.enum(['solid', 'grid', 'crosshatch', 'diagonal', 'horizontal', 'vertical']); export type FillStyle = z.infer; export const isFillStyle = (v: unknown): v is FillStyle => zFillStyle.safeParse(v).success; @@ -820,6 +833,13 @@ export const zParamsState = z.object({ qwenImageQwenVLEncoderModel: zModelIdentifierField.nullable(), // Optional: Standalone Qwen2.5-VL encoder qwenImageQuantization: z.enum(['none', 'int8', 'nf4']), // BitsAndBytes quantization for Qwen VL encoder qwenImageShift: z.number().nullable(), // Sigma schedule shift override (e.g. 3.0 for Lightning LoRAs) + // Wan 2.2 model components — A14B GGUF needs a paired second-expert transformer + // plus a Diffusers source for VAE/T5 unless standalone VAE/encoder models are wired. + wanTransformerLowNoise: zParameterModel.nullable(), // A14B GGUF only: second-expert transformer + wanComponentSource: zParameterModel.nullable(), // Diffusers Wan model providing VAE + UMT5-XXL + wanVaeModel: zParameterVAEModel.nullable(), // Optional: Standalone Wan VAE checkpoint + wanT5EncoderModel: zModelIdentifierField.nullable(), // Optional: Standalone UMT5-XXL encoder + wanGuidanceScaleLowNoise: z.number().nullable(), // Optional: separate CFG for low-noise expert (A14B). null = same as primary // Z-Image Seed Variance Enhancer settings zImageSeedVarianceEnabled: z.boolean(), zImageSeedVarianceStrength: z.number().min(0).max(2), @@ -905,6 +925,11 @@ export const getInitialParamsState = (): ParamsState => ({ qwenImageQwenVLEncoderModel: null, qwenImageQuantization: 'none' as const, qwenImageShift: null, + wanTransformerLowNoise: null, + wanComponentSource: null, + wanVaeModel: null, + wanT5EncoderModel: null, + wanGuidanceScaleLowNoise: null, zImageSeedVarianceEnabled: false, zImageSeedVarianceStrength: 0.1, zImageSeedVarianceRandomizePercent: 50, diff --git a/invokeai/frontend/web/src/features/controlLayers/store/util.ts b/invokeai/frontend/web/src/features/controlLayers/store/util.ts index c8cb49dde3f..9f0fd779e70 100644 --- a/invokeai/frontend/web/src/features/controlLayers/store/util.ts +++ b/invokeai/frontend/web/src/features/controlLayers/store/util.ts @@ -21,6 +21,7 @@ import type { RegionalGuidanceIPAdapterConfig, RgbColor, T2IAdapterConfig, + WanReferenceImageConfig, ZImageControlConfig, } from 'features/controlLayers/store/types'; import type { ImageDTO } from 'services/api/types'; @@ -122,6 +123,10 @@ export const initialQwenImageReferenceImage: QwenImageReferenceImageConfig = { type: 'qwen_image_reference_image', image: null, }; +export const initialWanReferenceImage: WanReferenceImageConfig = { + type: 'wan_reference_image', + image: null, +}; export const initialT2IAdapter: T2IAdapterConfig = { type: 't2i_adapter', model: null, diff --git a/invokeai/frontend/web/src/features/controlLayers/store/validators.ts b/invokeai/frontend/web/src/features/controlLayers/store/validators.ts index db5ad4f7662..c1ea2b4797c 100644 --- a/invokeai/frontend/web/src/features/controlLayers/store/validators.ts +++ b/invokeai/frontend/web/src/features/controlLayers/store/validators.ts @@ -147,8 +147,12 @@ export const getGlobalReferenceImageWarnings = ( const { config } = entity; - // FLUX.2 and Qwen Image Edit reference images don't require a model - it's built-in - if (config.type !== 'flux2_reference_image' && config.type !== 'qwen_image_reference_image') { + // FLUX.2, Qwen Image Edit and Wan reference images don't require a model - it's built-in + if ( + config.type !== 'flux2_reference_image' && + config.type !== 'qwen_image_reference_image' && + config.type !== 'wan_reference_image' + ) { if (!('model' in config) || !config.model) { // No model selected warnings.push(WARNINGS.IP_ADAPTER_NO_MODEL_SELECTED); @@ -159,8 +163,10 @@ export const getGlobalReferenceImageWarnings = ( } if (!entity.config.image) { - // No image selected - for Qwen Image Edit, an image is optional (txt2img works without one) - if (config.type !== 'qwen_image_reference_image') { + // No image selected - for Qwen Image Edit and Wan, an image is optional at the + // entity level. Wan I2V *requires* one but enforcement happens at graph-build + // time so the warning doesn't fire on T2V/TI2V variants that ignore ref images. + if (config.type !== 'qwen_image_reference_image' && config.type !== 'wan_reference_image') { warnings.push(WARNINGS.IP_ADAPTER_NO_IMAGE_SELECTED); } } diff --git a/invokeai/frontend/web/src/features/lora/components/LoRASelect.tsx b/invokeai/frontend/web/src/features/lora/components/LoRASelect.tsx index 2d043c9c816..5394bb67a65 100644 --- a/invokeai/frontend/web/src/features/lora/components/LoRASelect.tsx +++ b/invokeai/frontend/web/src/features/lora/components/LoRASelect.tsx @@ -44,6 +44,25 @@ const LoRASelect = () => { ) { return model.variant === currentMainModelConfig.variant; } + // For Wan: A14B (t2v_a14b/i2v_a14b) and TI2V-5B have different inner + // dims (5120 vs 3072) — applying the wrong variant crashes the layer + // patcher. LoRAs whose variant couldn't be detected (null) are kept + // so we don't silently hide ambiguous ones. + if ( + currentMainModelConfig?.base === 'wan' && + 'variant' in currentMainModelConfig && + currentMainModelConfig.variant && + 'variant' in model && + model.variant + ) { + const expected = + currentMainModelConfig.variant === 't2v_a14b' || currentMainModelConfig.variant === 'i2v_a14b' + ? 'a14b' + : currentMainModelConfig.variant === 'ti2v_5b' + ? '5b' + : null; + return expected === null || model.variant === expected; + } return true; }); }, [modelConfigs, currentBaseModel, currentMainModelConfig]); diff --git a/invokeai/frontend/web/src/features/modelManagerV2/models.ts b/invokeai/frontend/web/src/features/modelManagerV2/models.ts index 9928a2ce66a..a154aa67ed7 100644 --- a/invokeai/frontend/web/src/features/modelManagerV2/models.ts +++ b/invokeai/frontend/web/src/features/modelManagerV2/models.ts @@ -264,6 +264,8 @@ export const MODEL_VARIANT_TO_LONG_NAME: Record = { t2v_a14b: 'Wan 2.2 T2V A14B', i2v_a14b: 'Wan 2.2 I2V A14B', ti2v_5b: 'Wan 2.2 TI2V 5B', + a14b: 'Wan 2.2 A14B LoRA', + '5b': 'Wan 2.2 5B LoRA', qwen3_4b: 'Qwen3 4B', qwen3_8b: 'Qwen3 8B', qwen3_06b: 'Qwen3 0.6B', @@ -292,7 +294,7 @@ export const MODEL_FORMAT_TO_LONG_NAME: Record = { export const SUPPORTS_OPTIMIZED_DENOISING_BASE_MODELS: BaseModelType[] = ['flux', 'sd-3', 'z-image']; -export const SUPPORTS_REF_IMAGES_BASE_MODELS: BaseModelType[] = ['sd-1', 'sdxl', 'flux', 'flux2', 'qwen-image']; +export const SUPPORTS_REF_IMAGES_BASE_MODELS: BaseModelType[] = ['sd-1', 'sdxl', 'flux', 'flux2', 'qwen-image', 'wan']; export const SUPPORTS_NEGATIVE_PROMPT_BASE_MODELS: BaseModelType[] = [ 'sd-1', @@ -303,4 +305,5 @@ export const SUPPORTS_NEGATIVE_PROMPT_BASE_MODELS: BaseModelType[] = [ 'sd-3', 'z-image', 'anima', + 'wan', ]; diff --git a/invokeai/frontend/web/src/features/nodes/types/common.ts b/invokeai/frontend/web/src/features/nodes/types/common.ts index 62055b76e49..62e4aad69f2 100644 --- a/invokeai/frontend/web/src/features/nodes/types/common.ts +++ b/invokeai/frontend/web/src/features/nodes/types/common.ts @@ -168,6 +168,9 @@ export const zFlux2VariantType = z.enum(['klein_4b', 'klein_4b_base', 'klein_9b' export const zZImageVariantType = z.enum(['turbo', 'zbase']); const zQwenImageVariantType = z.enum(['generate', 'edit']); const zWanVariantType = z.enum(['t2v_a14b', 'i2v_a14b', 'ti2v_5b']); +/** Wan LoRA variant — identifies which model FAMILY (inner_dim) a LoRA + * targets. A14B = inner_dim 5120 (both T2V and I2V), 5B = inner_dim 3072. */ +const zWanLoRAVariantType = z.enum(['a14b', '5b']); export const zQwen3VariantType = z.enum(['qwen3_4b', 'qwen3_8b', 'qwen3_06b']); export const zAnyModelVariant = z.union([ zModelVariantType, @@ -177,6 +180,7 @@ export const zAnyModelVariant = z.union([ zZImageVariantType, zQwenImageVariantType, zWanVariantType, + zWanLoRAVariantType, zQwen3VariantType, ]); export type AnyModelVariant = z.infer; diff --git a/invokeai/frontend/web/src/features/nodes/util/graph/generation/addImageToImage.ts b/invokeai/frontend/web/src/features/nodes/util/graph/generation/addImageToImage.ts index f17ff970f27..103c139b723 100644 --- a/invokeai/frontend/web/src/features/nodes/util/graph/generation/addImageToImage.ts +++ b/invokeai/frontend/web/src/features/nodes/util/graph/generation/addImageToImage.ts @@ -30,6 +30,7 @@ type AddImageToImageArg = { | 'qwen_image_i2l' | 'z_image_i2l' | 'anima_i2l' + | 'wan_i2l' >; noise?: Invocation<'noise'>; denoise: Invocation; @@ -56,6 +57,7 @@ export const addImageToImage = async ({ | 'qwen_image_l2i' | 'z_image_l2i' | 'anima_l2i' + | 'wan_l2i' > > => { const { denoising_start, denoising_end } = getDenoisingStartAndEnd(state); @@ -71,7 +73,8 @@ export const addImageToImage = async ({ denoise.type === 'flux2_denoise' || denoise.type === 'sd3_denoise' || denoise.type === 'z_image_denoise' || - denoise.type === 'anima_denoise' + denoise.type === 'anima_denoise' || + denoise.type === 'wan_denoise' ) { denoise.width = scaledSize.width; denoise.height = scaledSize.height; diff --git a/invokeai/frontend/web/src/features/nodes/util/graph/generation/addInpaint.ts b/invokeai/frontend/web/src/features/nodes/util/graph/generation/addInpaint.ts index fa01db67e60..03aa0f78a60 100644 --- a/invokeai/frontend/web/src/features/nodes/util/graph/generation/addInpaint.ts +++ b/invokeai/frontend/web/src/features/nodes/util/graph/generation/addInpaint.ts @@ -33,6 +33,7 @@ type AddInpaintArg = { | 'qwen_image_i2l' | 'z_image_i2l' | 'anima_i2l' + | 'wan_i2l' >; noise?: Invocation<'noise'>; denoise: Invocation; @@ -69,7 +70,8 @@ export const addInpaint = async ({ denoise.type === 'flux2_denoise' || denoise.type === 'sd3_denoise' || denoise.type === 'z_image_denoise' || - denoise.type === 'anima_denoise' + denoise.type === 'anima_denoise' || + denoise.type === 'wan_denoise' ) { denoise.width = scaledSize.width; denoise.height = scaledSize.height; diff --git a/invokeai/frontend/web/src/features/nodes/util/graph/generation/addOutpaint.ts b/invokeai/frontend/web/src/features/nodes/util/graph/generation/addOutpaint.ts index 0c57087eaad..79d38075e35 100644 --- a/invokeai/frontend/web/src/features/nodes/util/graph/generation/addOutpaint.ts +++ b/invokeai/frontend/web/src/features/nodes/util/graph/generation/addOutpaint.ts @@ -62,7 +62,8 @@ export const addOutpaint = async ({ denoise.type === 'flux2_denoise' || denoise.type === 'sd3_denoise' || denoise.type === 'z_image_denoise' || - denoise.type === 'anima_denoise' + denoise.type === 'anima_denoise' || + denoise.type === 'wan_denoise' ) { denoise.width = scaledSize.width; denoise.height = scaledSize.height; diff --git a/invokeai/frontend/web/src/features/nodes/util/graph/generation/addTextToImage.ts b/invokeai/frontend/web/src/features/nodes/util/graph/generation/addTextToImage.ts index 06ece522da5..9e5d8aec82e 100644 --- a/invokeai/frontend/web/src/features/nodes/util/graph/generation/addTextToImage.ts +++ b/invokeai/frontend/web/src/features/nodes/util/graph/generation/addTextToImage.ts @@ -31,6 +31,7 @@ export const addTextToImage = ({ | 'qwen_image_l2i' | 'z_image_l2i' | 'anima_l2i' + | 'wan_l2i' > => { denoise.denoising_start = 0; denoise.denoising_end = 1; @@ -44,7 +45,8 @@ export const addTextToImage = ({ denoise.type === 'flux2_denoise' || denoise.type === 'sd3_denoise' || denoise.type === 'z_image_denoise' || - denoise.type === 'anima_denoise' + denoise.type === 'anima_denoise' || + denoise.type === 'wan_denoise' ) { denoise.width = scaledSize.width; denoise.height = scaledSize.height; diff --git a/invokeai/frontend/web/src/features/nodes/util/graph/generation/addWanLoRAs.ts b/invokeai/frontend/web/src/features/nodes/util/graph/generation/addWanLoRAs.ts new file mode 100644 index 00000000000..9b7bacccff5 --- /dev/null +++ b/invokeai/frontend/web/src/features/nodes/util/graph/generation/addWanLoRAs.ts @@ -0,0 +1,132 @@ +import { logger } from 'app/logging/logger'; +import type { RootState } from 'app/store/store'; +import { getPrefixedId } from 'features/controlLayers/konva/util'; +import { fetchModelConfigWithTypeGuard } from 'features/metadata/util/modelFetchingHelpers'; +import { zModelIdentifierField } from 'features/nodes/types/common'; +import type { Graph } from 'features/nodes/util/graph/generation/Graph'; +import type { Invocation, MainModelConfig, S } from 'services/api/types'; +import { isWanLoRAModelConfig } from 'services/api/types'; + +const log = logger('system'); + +/** Map a Wan main-model variant onto the LoRA-variant string used by the + * probe. A14B (both T2V and I2V) uses inner_dim=5120 → "a14b". TI2V-5B + * uses inner_dim=3072 → "5b". */ +const mainVariantToLoRAVariant = (mainVariant: string | null | undefined): 'a14b' | '5b' | null => { + if (mainVariant === 't2v_a14b' || mainVariant === 'i2v_a14b') { + return 'a14b'; + } + if (mainVariant === 'ti2v_5b') { + return '5b'; + } + return null; +}; + +/** + * Add Wan 2.2 LoRA wiring to the graph between the model loader and the + * denoise node. + * + * Each enabled Wan LoRA becomes a ``lora_selector`` feeding a ``collect`` + * node, which fans into a ``wan_lora_collection_loader``. The collection + * loader rewrites the model loader's transformer output into a + * ``WanTransformerField`` with the appropriate ``loras`` / + * ``loras_low_noise`` lists populated based on each LoRA's recorded + * ``expert`` tag — high-noise LoRAs land on the primary list, low-noise + * LoRAs on ``loras_low_noise``, and untagged LoRAs are applied to both + * experts. The dual-expert routing happens entirely on the backend; the + * FE just hands the loader the bag of LoRAs. + * + * Variant filter: each LoRA's full config carries a ``variant`` field + * (``"a14b"`` / ``"5b"`` / null) set by the backend probe from the LoRA's + * inner-dim. A14B LoRAs have 5120-dim weights and can't be reshaped to + * fit a TI2V-5B main (3072-dim) — the layer patcher would crash with a + * tensor-size error. We fetch each LoRA's config and skip mismatches, + * logging a warning so the user can tell why a LoRA they enabled didn't + * take effect. + */ +export const addWanLoRAs = async ( + state: RootState, + g: Graph, + denoise: Invocation<'wan_denoise'>, + modelLoader: Invocation<'wan_model_loader'>, + mainConfig: MainModelConfig +): Promise => { + // MainModelConfig is the union of all main-config schemas; ``variant`` is + // only present on the discriminated members (Wan, FLUX, ZImage, etc.). + // Read it defensively rather than relying on TypeScript narrowing through + // a typed parameter. + const mainVariant = 'variant' in mainConfig ? ((mainConfig as { variant?: string | null }).variant ?? null) : null; + const expectedLoRAVariant = mainVariantToLoRAVariant(mainVariant); + const candidateLoRAs = state.loras.loras.filter((l) => l.isEnabled && l.model.base === 'wan'); + + if (candidateLoRAs.length === 0) { + return; + } + + // Fetch each LoRA's config and filter by variant compatibility. LoRAs + // with ``variant === null`` are kept (the probe couldn't identify them; + // best to try rather than silently drop). + const compatibleLoRAs: typeof candidateLoRAs = []; + for (const lora of candidateLoRAs) { + try { + const cfg = await fetchModelConfigWithTypeGuard(lora.model.key, isWanLoRAModelConfig); + const loraVariant = cfg.variant ?? null; + if (loraVariant !== null && expectedLoRAVariant !== null && loraVariant !== expectedLoRAVariant) { + log.warn( + { lora: lora.model.name, loraVariant, mainVariant }, + `Skipping Wan LoRA "${lora.model.name}" — its variant (${loraVariant}) is incompatible with ` + + `the selected main model variant (${mainVariant}). ` + + `A14B and TI2V-5B have different inner dims and LoRA weights aren't interchangeable.` + ); + continue; + } + compatibleLoRAs.push(lora); + } catch (e) { + // If the config can't be fetched, fall back to including the LoRA — + // the backend will produce a clearer error if it really doesn't fit. + log.warn({ lora: lora.model.name, error: String(e) }, `Failed to read variant for Wan LoRA "${lora.model.name}"`); + compatibleLoRAs.push(lora); + } + } + + if (compatibleLoRAs.length === 0) { + return; + } + + const loraMetadata: S['LoRAMetadataField'][] = []; + + const loraCollector = g.addNode({ + id: getPrefixedId('lora_collector'), + type: 'collect', + }); + const loraCollectionLoader = g.addNode({ + type: 'wan_lora_collection_loader', + id: getPrefixedId('wan_lora_collection_loader'), + }); + + g.addEdge(loraCollector, 'collection', loraCollectionLoader, 'loras'); + g.addEdge(modelLoader, 'transformer', loraCollectionLoader, 'transformer'); + g.deleteEdgesTo(denoise, ['transformer']); + g.addEdge(loraCollectionLoader, 'transformer', denoise, 'transformer'); + + for (const lora of compatibleLoRAs) { + const { weight } = lora; + const parsedModel = zModelIdentifierField.parse(lora.model); + + const loraSelector = g.addNode({ + type: 'lora_selector', + id: getPrefixedId('lora_selector'), + lora: parsedModel, + weight, + }); + + loraMetadata.push({ + model: parsedModel, + weight, + }); + + g.addEdge(loraSelector, 'lora', loraCollector, 'item'); + } + + g.upsertMetadata({ loras: loraMetadata }); +}; diff --git a/invokeai/frontend/web/src/features/nodes/util/graph/generation/buildWanGraph.ts b/invokeai/frontend/web/src/features/nodes/util/graph/generation/buildWanGraph.ts new file mode 100644 index 00000000000..74b1da57571 --- /dev/null +++ b/invokeai/frontend/web/src/features/nodes/util/graph/generation/buildWanGraph.ts @@ -0,0 +1,277 @@ +import { logger } from 'app/logging/logger'; +import { getPrefixedId } from 'features/controlLayers/konva/util'; +import { selectMainModelConfig, selectParamsSlice } from 'features/controlLayers/store/paramsSlice'; +import { selectRefImagesSlice } from 'features/controlLayers/store/refImagesSlice'; +import { selectCanvasMetadata } from 'features/controlLayers/store/selectors'; +import { isWanReferenceImageConfig } from 'features/controlLayers/store/types'; +import { getGlobalReferenceImageWarnings } from 'features/controlLayers/store/validators'; +import { fetchModelConfigWithTypeGuard } from 'features/metadata/util/modelFetchingHelpers'; +import { zImageField } from 'features/nodes/types/common'; +import { addImageToImage } from 'features/nodes/util/graph/generation/addImageToImage'; +import { addInpaint } from 'features/nodes/util/graph/generation/addInpaint'; +import { addNSFWChecker } from 'features/nodes/util/graph/generation/addNSFWChecker'; +import { addOutpaint } from 'features/nodes/util/graph/generation/addOutpaint'; +import { addTextToImage } from 'features/nodes/util/graph/generation/addTextToImage'; +import { addWanLoRAs } from 'features/nodes/util/graph/generation/addWanLoRAs'; +import { addWatermarker } from 'features/nodes/util/graph/generation/addWatermarker'; +import { Graph } from 'features/nodes/util/graph/generation/Graph'; +import { selectCanvasOutputFields, selectPresetModifiedPrompts } from 'features/nodes/util/graph/graphBuilderUtils'; +import type { GraphBuilderArg, GraphBuilderReturn, ImageOutputNodes } from 'features/nodes/util/graph/types'; +import { selectActiveTab } from 'features/ui/store/uiSelectors'; +import type { Invocation } from 'services/api/types'; +import { isNonRefinerMainModelConfig } from 'services/api/types'; +import type { Equals } from 'tsafe'; +import { assert } from 'tsafe'; + +const log = logger('system'); + +/** + * Build a graph for Wan 2.2 image generation. + * + * Phase 9 piece #1: text-to-image only, Diffusers main model with all + * components (transformer, VAE, UMT5-XXL encoder) resolved from the main + * model itself. Subsequent pieces will add: + * - img2img (Latents input + Image-to-Latents wiring + denoising_start) + * - I2V (ref-image encoder, A14B I2V variant gate) + * - LoRAs (single + collection) + * - Inpaint (mask handling) + * - Standalone VAE / T5 / GGUF low-noise-expert wiring via params slice + */ +export const buildWanGraph = async (arg: GraphBuilderArg): Promise => { + const { generationMode, state, manager } = arg; + + log.debug({ generationMode, manager: manager?.id }, 'Building Wan 2.2 graph'); + + const model = selectMainModelConfig(state); + assert(model, 'No model selected'); + assert(model.base === 'wan', 'Selected model is not a Wan model'); + + // Fetch the full config early so we can branch on variant. I2V flows + // route the raster image through wan_ref_image_encoder instead of + // wan_i2l, so the variant has to be known before we choose a graph + // shape — not after. + const modelConfig = await fetchModelConfigWithTypeGuard(model.key, isNonRefinerMainModelConfig); + assert(modelConfig.base === 'wan'); + const isI2V = modelConfig.variant === 'i2v_a14b'; + + const params = selectParamsSlice(state); + const { cfgScale: cfg_scale, steps } = params; + const prompts = selectPresetModifiedPrompts(state); + + const g = new Graph(getPrefixedId('wan_graph')); + + const modelLoader = g.addNode({ + type: 'wan_model_loader', + id: getPrefixedId('wan_model_loader'), + model, + transformer_low_noise_model: params.wanTransformerLowNoise ?? undefined, + component_source: params.wanComponentSource ?? undefined, + vae_model: params.wanVaeModel ?? undefined, + wan_t5_encoder_model: params.wanT5EncoderModel ?? undefined, + }); + + const positivePrompt = g.addNode({ + id: getPrefixedId('positive_prompt'), + type: 'string', + }); + const posCond = g.addNode({ + type: 'wan_text_encoder', + id: getPrefixedId('pos_prompt'), + }); + + // CFG is mathematically inactive at scale 1.0 — skip the negative branch + // entirely so each step runs only one forward pass. + const useCfg = cfg_scale > 1; + const negCond = useCfg + ? g.addNode({ + type: 'wan_text_encoder', + id: getPrefixedId('neg_prompt'), + prompt: prompts.negative || ' ', + }) + : null; + + const seed = g.addNode({ + id: getPrefixedId('seed'), + type: 'integer', + }); + + const denoise = g.addNode({ + type: 'wan_denoise', + id: getPrefixedId('denoise_latents'), + guidance_scale: cfg_scale, + // The denoise node treats values < 1.0 (including the FE's default 0) as + // "fall back to the primary guidance_scale". Forward null/undefined when + // the user hasn't set an explicit low-noise CFG so the backend handles it. + guidance_scale_low_noise: params.wanGuidanceScaleLowNoise ?? undefined, + steps, + }); + + const l2i = g.addNode({ + type: 'wan_l2i', + id: getPrefixedId('l2i'), + }); + + g.addEdge(modelLoader, 'transformer', denoise, 'transformer'); + g.addEdge(modelLoader, 'wan_t5_encoder', posCond, 'wan_t5_encoder'); + g.addEdge(modelLoader, 'vae', l2i, 'vae'); + + g.addEdge(positivePrompt, 'value', posCond, 'prompt'); + g.addEdge(posCond, 'conditioning', denoise, 'positive_conditioning'); + + if (negCond) { + g.addEdge(modelLoader, 'wan_t5_encoder', negCond, 'wan_t5_encoder'); + g.addEdge(negCond, 'conditioning', denoise, 'negative_conditioning'); + } + + g.addEdge(seed, 'value', denoise, 'seed'); + g.addEdge(denoise, 'latents', l2i, 'latents'); + + // Wan LoRAs (high-noise, low-noise, and untagged). The collection loader + // is inserted between modelLoader and denoise; both expert routing and + // dual-list population happen on the backend based on each LoRA's + // recorded ``expert`` tag. The helper also filters out variant-incompatible + // LoRAs (e.g. A14B Lightning on a TI2V-5B main) so the layer patcher + // doesn't crash on a shape mismatch. + await addWanLoRAs(state, g, denoise, modelLoader, modelConfig); + + g.upsertMetadata({ + cfg_scale, + negative_prompt: prompts.negative, + model: Graph.getModelMetadataField(modelConfig), + steps, + wan_transformer_low_noise: params.wanTransformerLowNoise, + wan_component_source: params.wanComponentSource, + wan_vae_model: params.wanVaeModel, + wan_t5_encoder_model: params.wanT5EncoderModel, + wan_guidance_scale_low_noise: params.wanGuidanceScaleLowNoise, + }); + g.addEdgeToMetadata(seed, 'value', 'seed'); + g.addEdgeToMetadata(positivePrompt, 'value', 'positive_prompt'); + + let canvasOutput: Invocation = l2i; + + // I2V variants take a reference image from the global Reference Images + // panel (same UX as Qwen Image Edit / FLUX.2 Klein). The image is encoded + // by the model's own VAE and concatenated to the noise latents along the + // channel dim each step (transformer in_channels=36 on I2V). Canvas modes + // (img2img/inpaint/outpaint) don't apply to I2V — the ref image fully + // replaces what a raster layer used to provide. + if (isI2V) { + assert( + generationMode === 'txt2img', + 'Wan 2.2 I2V only supports text-to-image with a reference image. ' + + 'Use a T2V or TI2V model for canvas img2img / inpaint / outpaint.' + ); + + const wanRefEntity = selectRefImagesSlice(state).entities.find( + (entity) => + entity.isEnabled && + isWanReferenceImageConfig(entity.config) && + entity.config.image !== null && + getGlobalReferenceImageWarnings(entity, modelConfig).length === 0 + ); + assert( + wanRefEntity && isWanReferenceImageConfig(wanRefEntity.config) && wanRefEntity.config.image, + 'Wan 2.2 I2V requires a reference image. Add one in the Reference Images panel.' + ); + + canvasOutput = addTextToImage({ g, state, denoise, l2i }); + const refImageField = zImageField.parse( + wanRefEntity.config.image.crop?.image ?? wanRefEntity.config.image.original.image + ); + const refEncoder = g.addNode({ + type: 'wan_ref_image_encoder', + id: getPrefixedId('wan_ref_encoder'), + image: refImageField, + width: denoise.width, + height: denoise.height, + }); + g.addEdge(modelLoader, 'vae', refEncoder, 'vae'); + g.addEdge(refEncoder, 'ref_image', denoise, 'ref_image'); + + g.upsertMetadata({ generation_mode: 'wan_i2v' }); + } else if (generationMode === 'txt2img') { + canvasOutput = addTextToImage({ + g, + state, + denoise, + l2i, + }); + g.upsertMetadata({ generation_mode: 'wan_txt2img' }); + } else if (generationMode === 'img2img') { + assert(manager !== null); + const i2l = g.addNode({ + type: 'wan_i2l', + id: getPrefixedId('wan_i2l'), + }); + canvasOutput = await addImageToImage({ + g, + state, + manager, + denoise, + l2i, + i2l, + vaeSource: modelLoader, + }); + g.upsertMetadata({ generation_mode: 'wan_img2img' }); + } else if (generationMode === 'inpaint') { + assert(manager !== null); + const i2l = g.addNode({ + type: 'wan_i2l', + id: getPrefixedId('wan_i2l'), + }); + canvasOutput = await addInpaint({ + g, + state, + manager, + l2i, + i2l, + denoise, + vaeSource: modelLoader, + modelLoader, + seed, + }); + g.upsertMetadata({ generation_mode: 'wan_inpaint' }); + } else if (generationMode === 'outpaint') { + assert(manager !== null); + const i2l = g.addNode({ + type: 'wan_i2l', + id: getPrefixedId('wan_i2l'), + }); + canvasOutput = await addOutpaint({ + g, + state, + manager, + l2i, + i2l, + denoise, + vaeSource: modelLoader, + modelLoader, + seed, + }); + g.upsertMetadata({ generation_mode: 'wan_outpaint' }); + } else { + assert>(false); + } + + if (state.system.shouldUseNSFWChecker) { + canvasOutput = addNSFWChecker(g, canvasOutput); + } + if (state.system.shouldUseWatermarker) { + canvasOutput = addWatermarker(g, canvasOutput); + } + + g.updateNode(canvasOutput, selectCanvasOutputFields(state)); + + if (selectActiveTab(state) === 'canvas') { + g.upsertMetadata(selectCanvasMetadata(state)); + } + + g.setMetadataReceivingNode(canvasOutput); + + return { + g, + seed, + positivePrompt, + }; +}; diff --git a/invokeai/frontend/web/src/features/nodes/util/graph/graphBuilderUtils.ts b/invokeai/frontend/web/src/features/nodes/util/graph/graphBuilderUtils.ts index 28aa74db5ec..9d5f165ef78 100644 --- a/invokeai/frontend/web/src/features/nodes/util/graph/graphBuilderUtils.ts +++ b/invokeai/frontend/web/src/features/nodes/util/graph/graphBuilderUtils.ts @@ -217,7 +217,8 @@ export const isMainModelWithoutUnet = (modelLoader: Invocation { + const dispatch = useAppDispatch(); + const { t } = useTranslation(); + const value = useAppSelector(selectWanTransformerLowNoise); + const [modelConfigs, { isLoading }] = useWanGGUFLowNoiseModels(); + + const _onChange = useCallback( + (model: MainModelConfig | null) => { + if (model) { + dispatch(wanTransformerLowNoiseSelected(zModelIdentifierField.parse(model))); + } else { + dispatch(wanTransformerLowNoiseSelected(null)); + } + }, + [dispatch] + ); + + const { + options, + value: comboValue, + onChange, + noOptionsMessage, + } = useModelCombobox({ + modelConfigs, + onChange: _onChange, + selectedModel: value, + isLoading, + }); + + return ( + + {t('modelManager.wanTransformerLowNoise')} + + + ); +}); + +ParamWanTransformerLowNoiseSelect.displayName = 'ParamWanTransformerLowNoiseSelect'; + +/** + * Wan 2.2 Component Source Select + * + * Picks a Diffusers Wan model whose VAE and UMT5-XXL encoder will be extracted + * for the workflow. Required when the main Wan model is a GGUF (since GGUF + * mains are transformer-only). Ignored for Diffusers mains, which carry their + * own VAE and encoder. + */ +const ParamWanComponentSourceSelect = memo(() => { + const dispatch = useAppDispatch(); + const { t } = useTranslation(); + const value = useAppSelector(selectWanComponentSource); + const [modelConfigs, { isLoading }] = useWanDiffusersModels(); + + const _onChange = useCallback( + (model: MainModelConfig | null) => { + if (model) { + dispatch(wanComponentSourceSelected(zModelIdentifierField.parse(model))); + } else { + dispatch(wanComponentSourceSelected(null)); + } + }, + [dispatch] + ); + + const { + options, + value: comboValue, + onChange, + noOptionsMessage, + } = useModelCombobox({ + modelConfigs, + onChange: _onChange, + selectedModel: value, + isLoading, + }); + + return ( + + {t('modelManager.wanComponentSource')} + + + ); +}); + +ParamWanComponentSourceSelect.displayName = 'ParamWanComponentSourceSelect'; + +/** + * Wan 2.2 Standalone VAE Select + * + * Selects a standalone Wan VAE checkpoint. When set, this overrides the VAE + * provided by the Component Source (or the main Diffusers model). + */ +const ParamWanVaeModelSelect = memo(() => { + const dispatch = useAppDispatch(); + const { t } = useTranslation(); + const vaeModel = useAppSelector(selectWanVaeModel); + const [modelConfigs, { isLoading }] = useWanVAEModels(); + + const _onChange = useCallback( + (model: VAEModelConfig | null) => { + if (model) { + dispatch(wanVaeModelSelected(zModelIdentifierField.parse(model))); + } else { + dispatch(wanVaeModelSelected(null)); + } + }, + [dispatch] + ); + + const { options, value, onChange, noOptionsMessage } = useModelCombobox({ + modelConfigs, + onChange: _onChange, + selectedModel: vaeModel, + isLoading, + }); + + return ( + + {t('modelManager.wanVae')} + + + ); +}); + +ParamWanVaeModelSelect.displayName = 'ParamWanVaeModelSelect'; + +/** + * Wan 2.2 Standalone UMT5-XXL Encoder Select + * + * Selects a standalone UMT5-XXL encoder. When set, this overrides the encoder + * provided by the Component Source (or the main Diffusers model). + */ +const ParamWanT5EncoderModelSelect = memo(() => { + const dispatch = useAppDispatch(); + const { t } = useTranslation(); + const encoderModel = useAppSelector(selectWanT5EncoderModel); + const [modelConfigs, { isLoading }] = useWanT5EncoderModels(); + + const _onChange = useCallback( + (model: WanT5EncoderModelConfig | null) => { + if (model) { + dispatch(wanT5EncoderModelSelected(zModelIdentifierField.parse(model))); + } else { + dispatch(wanT5EncoderModelSelected(null)); + } + }, + [dispatch] + ); + + const { options, value, onChange, noOptionsMessage } = useModelCombobox({ + modelConfigs, + onChange: _onChange, + selectedModel: encoderModel, + isLoading, + }); + + return ( + + {t('modelManager.wanT5Encoder')} + + + ); +}); + +ParamWanT5EncoderModelSelect.displayName = 'ParamWanT5EncoderModelSelect'; + +/** + * Combined Wan 2.2 component selectors (low-noise transformer + standalone + * VAE + standalone T5 encoder + Component Source). + * + * Only relevant for GGUF workflows. Diffusers Wan mains have everything + * built in; TI2V-5B is a single-expert model with no low-noise pair. Showing + * these always is fine since they're optional — but the AdvancedSettingsAccordion + * still gates the render on `isWan` so they don't pollute other tabs. + */ +const ParamWanModelSelects = () => { + return ( + <> + + + + + + ); +}; + +export default memo(ParamWanModelSelects); diff --git a/invokeai/frontend/web/src/features/parameters/components/Core/ParamWanGuidanceScaleLowNoise.tsx b/invokeai/frontend/web/src/features/parameters/components/Core/ParamWanGuidanceScaleLowNoise.tsx new file mode 100644 index 00000000000..5d3f7b3e34a --- /dev/null +++ b/invokeai/frontend/web/src/features/parameters/components/Core/ParamWanGuidanceScaleLowNoise.tsx @@ -0,0 +1,94 @@ +import { CompositeNumberInput, CompositeSlider, FormControl, FormLabel, IconButton } from '@invoke-ai/ui-library'; +import { useAppDispatch, useAppSelector } from 'app/store/storeHooks'; +import { + selectWanGuidanceScaleLowNoise, + wanGuidanceScaleLowNoiseChanged, +} from 'features/controlLayers/store/paramsSlice'; +import type React from 'react'; +import { memo, useCallback } from 'react'; +import { useTranslation } from 'react-i18next'; +import { PiXBold } from 'react-icons/pi'; + +// Match the primary ParamCFGScale's range so the slider thumb position is +// visually comparable between the two CFG sliders at the same numeric value +// (e.g. CFG=5 and CFG-Low=3 should look correct relative to each other). +const CONSTRAINTS = { + initial: 3.5, + sliderMin: 1, + sliderMax: 20, + numberInputMin: 1, + numberInputMax: 200, + fineStep: 0.1, + coarseStep: 0.5, +}; + +const MARKS = [CONSTRAINTS.sliderMin, Math.floor(CONSTRAINTS.sliderMax / 2), CONSTRAINTS.sliderMax]; + +/** + * Wan 2.2 Guidance Scale (Low Noise) + * + * Optional separate CFG for the A14B low-noise expert. When null (cleared), + * the denoise node falls back to the primary guidance_scale. Ignored for + * TI2V-5B (single-expert). + * + * Diffusers reference defaults for A14B: primary 4.0 / low-noise 3.0 — i.e. + * a slightly lower CFG on the detail-pass expert produces less over-sharpened + * output. + */ +const ParamWanGuidanceScaleLowNoise = () => { + const { t } = useTranslation(); + const value = useAppSelector(selectWanGuidanceScaleLowNoise); + const dispatch = useAppDispatch(); + + const onChange = useCallback((v: number) => dispatch(wanGuidanceScaleLowNoiseChanged(v)), [dispatch]); + const onReset = useCallback( + (e: React.MouseEvent) => { + e.preventDefault(); + e.stopPropagation(); + dispatch(wanGuidanceScaleLowNoiseChanged(null)); + }, + [dispatch] + ); + + const displayValue = value ?? CONSTRAINTS.initial; + + return ( + + + {t('parameters.wanGuidanceScaleLowNoise')}{' '} + {value !== null && ( + } + onClick={onReset} + minW={4} + h={4} + /> + )} + + + + + ); +}; + +export default memo(ParamWanGuidanceScaleLowNoise); diff --git a/invokeai/frontend/web/src/features/parameters/util/optimalDimension.ts b/invokeai/frontend/web/src/features/parameters/util/optimalDimension.ts index 2ac59a32e2b..4b2263db2f4 100644 --- a/invokeai/frontend/web/src/features/parameters/util/optimalDimension.ts +++ b/invokeai/frontend/web/src/features/parameters/util/optimalDimension.ts @@ -63,9 +63,16 @@ export const isInSDXLTrainingDimensions = (width: number, height: number): boole /** * Gets the grid size for a given base model. For Flux, the grid size is 16, otherwise it is 8. * - sd-1, sd-2, sdxl, anima: 8 - * - flux, sd-3, qwen-image, z-image: 16 + * - flux, sd-3, qwen-image, z-image, wan: 16 * - cogview4: 32 * - default: 8 + * + * Wan 2.2's transformer has ``patch_size=(1, 2, 2)``: it patch-embeds with + * stride 2 then un-patches by 2. Combined with the VAE's 8x spatial scale, + * canvas H/W must be a multiple of ``8 * 2 = 16``; otherwise the patch + * round-trip produces an off-by-one and the scheduler step fails with a + * spatial-dim mismatch between latents and noise prediction. + * * @param base The base model * @returns The grid size for the model, defaulting to 8 */ @@ -77,6 +84,7 @@ export const getGridSize = (base?: BaseModelType | null): number => { case 'flux2': case 'sd-3': case 'qwen-image': + case 'wan': case 'z-image': return 16; case 'sd-1': diff --git a/invokeai/frontend/web/src/features/queue/hooks/useEnqueueCanvas.ts b/invokeai/frontend/web/src/features/queue/hooks/useEnqueueCanvas.ts index 68e1e9a382e..a4f74e22860 100644 --- a/invokeai/frontend/web/src/features/queue/hooks/useEnqueueCanvas.ts +++ b/invokeai/frontend/web/src/features/queue/hooks/useEnqueueCanvas.ts @@ -17,6 +17,7 @@ import { buildQwenImageGraph } from 'features/nodes/util/graph/generation/buildQ import { buildSD1Graph } from 'features/nodes/util/graph/generation/buildSD1Graph'; import { buildSD3Graph } from 'features/nodes/util/graph/generation/buildSD3Graph'; import { buildSDXLGraph } from 'features/nodes/util/graph/generation/buildSDXLGraph'; +import { buildWanGraph } from 'features/nodes/util/graph/generation/buildWanGraph'; import { buildZImageGraph } from 'features/nodes/util/graph/generation/buildZImageGraph'; import { selectCanvasDestination } from 'features/nodes/util/graph/graphBuilderUtils'; import type { GraphBuilderArg } from 'features/nodes/util/graph/types'; @@ -69,6 +70,8 @@ const enqueueCanvas = async (store: AppStore, canvasManager: CanvasManager, prep return await buildExternalGraph(graphBuilderArg); case 'anima': return await buildAnimaGraph(graphBuilderArg); + case 'wan': + return await buildWanGraph(graphBuilderArg); default: assert(false, `No graph builders for base ${base}`); } diff --git a/invokeai/frontend/web/src/features/queue/hooks/useEnqueueGenerate.ts b/invokeai/frontend/web/src/features/queue/hooks/useEnqueueGenerate.ts index 54b37e1b95e..17d062b86c0 100644 --- a/invokeai/frontend/web/src/features/queue/hooks/useEnqueueGenerate.ts +++ b/invokeai/frontend/web/src/features/queue/hooks/useEnqueueGenerate.ts @@ -15,6 +15,7 @@ import { buildQwenImageGraph } from 'features/nodes/util/graph/generation/buildQ import { buildSD1Graph } from 'features/nodes/util/graph/generation/buildSD1Graph'; import { buildSD3Graph } from 'features/nodes/util/graph/generation/buildSD3Graph'; import { buildSDXLGraph } from 'features/nodes/util/graph/generation/buildSDXLGraph'; +import { buildWanGraph } from 'features/nodes/util/graph/generation/buildWanGraph'; import { buildZImageGraph } from 'features/nodes/util/graph/generation/buildZImageGraph'; import type { GraphBuilderArg } from 'features/nodes/util/graph/types'; import { UnsupportedGenerationModeError } from 'features/nodes/util/graph/types'; @@ -62,6 +63,8 @@ const enqueueGenerate = async (store: AppStore, prepend: boolean) => { return await buildExternalGraph(graphBuilderArg); case 'anima': return await buildAnimaGraph(graphBuilderArg); + case 'wan': + return await buildWanGraph(graphBuilderArg); default: assert(false, `No graph builders for base ${base}`); } diff --git a/invokeai/frontend/web/src/features/queue/store/readiness.ts b/invokeai/frontend/web/src/features/queue/store/readiness.ts index 230fa3348d6..84b7906018b 100644 --- a/invokeai/frontend/web/src/features/queue/store/readiness.ts +++ b/invokeai/frontend/web/src/features/queue/store/readiness.ts @@ -311,6 +311,19 @@ export const getReasonsWhyCannotEnqueueGenerateTab = (arg: { } } + if (model?.base === 'wan' && model.format === 'gguf_quantized') { + // GGUF Wan mains carry only the transformer; VAE + UMT5-XXL encoder must + // come from either standalone models or the Component Source (Diffusers). + // The low-noise A14B partner expert is optional — if omitted, the loader + // will use the high-noise expert for the whole schedule (lower quality + // but still produces an image). + const hasVaeSource = params.wanVaeModel !== null || params.wanComponentSource !== null; + const hasEncoderSource = params.wanT5EncoderModel !== null || params.wanComponentSource !== null; + if (!hasVaeSource || !hasEncoderSource) { + reasons.push({ content: i18n.t('parameters.invoke.noWanComponentSourceSelected') }); + } + } + if (model?.base === 'z-image') { // Check if VAE source is available (either separate VAE or Qwen3 Source) const hasVaeSource = params.zImageVaeModel !== null || params.zImageQwen3SourceModel !== null; @@ -774,6 +787,19 @@ export const getReasonsWhyCannotEnqueueCanvasTab = (arg: { } } + if (model?.base === 'wan' && model.format === 'gguf_quantized') { + // GGUF Wan mains carry only the transformer; VAE + UMT5-XXL encoder must + // come from either standalone models or the Component Source (Diffusers). + // The low-noise A14B partner expert is optional — if omitted, the loader + // will use the high-noise expert for the whole schedule (lower quality + // but still produces an image). + const hasVaeSource = params.wanVaeModel !== null || params.wanComponentSource !== null; + const hasEncoderSource = params.wanT5EncoderModel !== null || params.wanComponentSource !== null; + if (!hasVaeSource || !hasEncoderSource) { + reasons.push({ content: i18n.t('parameters.invoke.noWanComponentSourceSelected') }); + } + } + if (model?.base === 'z-image') { // Check if VAE source is available (either separate VAE or Qwen3 Source) const hasVaeSource = params.zImageVaeModel !== null || params.zImageQwen3SourceModel !== null; diff --git a/invokeai/frontend/web/src/features/settingsAccordions/components/AdvancedSettingsAccordion/AdvancedSettingsAccordion.tsx b/invokeai/frontend/web/src/features/settingsAccordions/components/AdvancedSettingsAccordion/AdvancedSettingsAccordion.tsx index bfb69b945c8..312c9b71df9 100644 --- a/invokeai/frontend/web/src/features/settingsAccordions/components/AdvancedSettingsAccordion/AdvancedSettingsAccordion.tsx +++ b/invokeai/frontend/web/src/features/settingsAccordions/components/AdvancedSettingsAccordion/AdvancedSettingsAccordion.tsx @@ -10,6 +10,7 @@ import { selectIsFlux2, selectIsQwenImage, selectIsSD3, + selectIsWan, selectIsZImage, selectParamsSlice, selectVAEKey, @@ -24,6 +25,7 @@ import ParamFlux2KleinModelSelect from 'features/parameters/components/Advanced/ import ParamQwenImageComponentSourceSelect from 'features/parameters/components/Advanced/ParamQwenImageComponentSourceSelect'; import ParamQwenImageQuantization from 'features/parameters/components/Advanced/ParamQwenImageQuantization'; import ParamT5EncoderModelSelect from 'features/parameters/components/Advanced/ParamT5EncoderModelSelect'; +import ParamWanModelSelects from 'features/parameters/components/Advanced/ParamWanModelSelects'; import ParamZImageQwen3VaeModelSelect from 'features/parameters/components/Advanced/ParamZImageQwen3VaeModelSelect'; import ParamSeamlessXAxis from 'features/parameters/components/Seamless/ParamSeamlessXAxis'; import ParamSeamlessYAxis from 'features/parameters/components/Seamless/ParamSeamlessYAxis'; @@ -54,6 +56,7 @@ export const AdvancedSettingsAccordion = memo(() => { const isExternal = useAppSelector(selectIsExternal); const isQwenImage = useAppSelector(selectIsQwenImage); const isAnima = useAppSelector(selectIsAnima); + const isWan = useAppSelector(selectIsWan); const selectBadges = useMemo( () => @@ -107,13 +110,13 @@ export const AdvancedSettingsAccordion = memo(() => { return ( - {!isZImage && !isAnima && !isFlux2 && !isQwenImage && ( + {!isZImage && !isAnima && !isFlux2 && !isQwenImage && !isWan && ( {isFLUX ? : } {!isFLUX && !isSD3 && } )} - {!isFLUX && !isFlux2 && !isSD3 && !isZImage && !isQwenImage && !isAnima && ( + {!isFLUX && !isFlux2 && !isSD3 && !isZImage && !isQwenImage && !isAnima && !isWan && ( <> @@ -166,6 +169,11 @@ export const AdvancedSettingsAccordion = memo(() => { )} + {isWan && ( + + + + )} ); diff --git a/invokeai/frontend/web/src/features/settingsAccordions/components/GenerationSettingsAccordion/GenerationSettingsAccordion.tsx b/invokeai/frontend/web/src/features/settingsAccordions/components/GenerationSettingsAccordion/GenerationSettingsAccordion.tsx index 220008a38b0..2ec05cd46d8 100644 --- a/invokeai/frontend/web/src/features/settingsAccordions/components/GenerationSettingsAccordion/GenerationSettingsAccordion.tsx +++ b/invokeai/frontend/web/src/features/settingsAccordions/components/GenerationSettingsAccordion/GenerationSettingsAccordion.tsx @@ -13,6 +13,7 @@ import { selectIsFlux2, selectIsQwenImage, selectIsSD3, + selectIsWan, selectIsZImage, selectModelSupportsGuidance, selectModelSupportsSteps, @@ -29,6 +30,7 @@ import ParamGuidance from 'features/parameters/components/Core/ParamGuidance'; import ParamQwenImageShift from 'features/parameters/components/Core/ParamQwenImageShift'; import ParamScheduler from 'features/parameters/components/Core/ParamScheduler'; import ParamSteps from 'features/parameters/components/Core/ParamSteps'; +import ParamWanGuidanceScaleLowNoise from 'features/parameters/components/Core/ParamWanGuidanceScaleLowNoise'; import ParamZImageScheduler from 'features/parameters/components/Core/ParamZImageScheduler'; import ParamZImageShift from 'features/parameters/components/Core/ParamZImageShift'; import ParamZImageSeedVarianceSettings from 'features/parameters/components/SeedVariance/ParamZImageSeedVarianceSettings'; @@ -55,6 +57,7 @@ export const GenerationSettingsAccordion = memo(() => { const isExternal = useAppSelector(selectIsExternal); const isQwenImage = useAppSelector(selectIsQwenImage); const isAnima = useAppSelector(selectIsAnima); + const isWan = useAppSelector(selectIsWan); const fluxDypePreset = useAppSelector(selectFluxDypePreset); const modelSupportsGuidance = useAppSelector(selectModelSupportsGuidance); const modelSupportsSteps = useAppSelector(selectModelSupportsSteps); @@ -104,7 +107,8 @@ export const GenerationSettingsAccordion = memo(() => { !isCogView4 && !isZImage && !isQwenImage && - !isAnima && } + !isAnima && + !isWan && } {!isExternal && (isFLUX || isFlux2) && } {!isExternal && isZImage && } {!isExternal && isAnima && } @@ -114,6 +118,7 @@ export const GenerationSettingsAccordion = memo(() => { )} {!isExternal && !isFLUX && !isFlux2 && } + {!isExternal && isWan && } {!isExternal && isZImage && } {!isExternal && isQwenImage && } {!isExternal && isFLUX && } diff --git a/invokeai/frontend/web/src/features/settingsAccordions/components/GenerationSettingsAccordion/MainModelPicker.tsx b/invokeai/frontend/web/src/features/settingsAccordions/components/GenerationSettingsAccordion/MainModelPicker.tsx index 66f76dcd153..134ab5f1e62 100644 --- a/invokeai/frontend/web/src/features/settingsAccordions/components/GenerationSettingsAccordion/MainModelPicker.tsx +++ b/invokeai/frontend/web/src/features/settingsAccordions/components/GenerationSettingsAccordion/MainModelPicker.tsx @@ -17,7 +17,26 @@ export const MainModelPicker = memo(() => { const { t } = useTranslation(); const dispatch = useAppDispatch(); const activeTab = useAppSelector(selectActiveTab); - const [modelConfigs] = useMainModels(); + const [allModelConfigs] = useMainModels(); + // Low-noise Wan GGUFs belong in the Transformer (Low Noise) slot of the + // Wan advanced section, not as a primary main. Filter them out of the main + // model dropdown so users can't accidentally wire them backwards. + const modelConfigs = useMemo( + () => + allModelConfigs.filter((c) => { + if ( + c.type === 'main' && + c.base === 'wan' && + c.format === 'gguf_quantized' && + 'expert' in c && + c.expert === 'low' + ) { + return false; + } + return true; + }), + [allModelConfigs] + ); const selectedModelConfig = useSelectedModelConfig(); const onChange = useCallback( (modelConfig: AnyModelConfigWithExternal) => { diff --git a/invokeai/frontend/web/src/services/api/hooks/modelsByType.ts b/invokeai/frontend/web/src/services/api/hooks/modelsByType.ts index bd1ac088138..73e65b984f7 100644 --- a/invokeai/frontend/web/src/services/api/hooks/modelsByType.ts +++ b/invokeai/frontend/web/src/services/api/hooks/modelsByType.ts @@ -37,6 +37,10 @@ import { isTextLLMModelConfig, isTIModelConfig, isVAEModelConfigOrSubmodel, + isWanDiffusersMainModelConfig, + isWanGGUFLowNoiseMainModelConfig, + isWanT5EncoderModelConfig, + isWanVAEModelConfig, isZImageDiffusersMainModelConfig, } from 'services/api/types'; @@ -111,6 +115,10 @@ export const useQwenImageDiffusersModels = () => buildModelsHook(isQwenImageDiff export const useQwenImageVAEModels = () => buildModelsHook(isQwenImageVAEModelConfig)(); export const useQwenVLEncoderModels = () => buildModelsHook(isQwenVLEncoderModelConfig)(); export const useQwen3EncoderModels = () => buildModelsHook(isQwen3EncoderModelConfig)(); +export const useWanDiffusersModels = () => buildModelsHook(isWanDiffusersMainModelConfig)(); +export const useWanGGUFLowNoiseModels = () => buildModelsHook(isWanGGUFLowNoiseMainModelConfig)(); +export const useWanVAEModels = () => buildModelsHook(isWanVAEModelConfig)(); +export const useWanT5EncoderModels = () => buildModelsHook(isWanT5EncoderModelConfig)(); export const useGlobalReferenceImageModels = buildModelsHook( (config) => isIPAdapterModelConfig(config) || isFluxReduxModelConfig(config) || isFluxKontextModelConfig(config) ); @@ -154,5 +162,8 @@ export const selectFlux2DiffusersModels = buildModelsSelector(isFlux2DiffusersMa export const selectFluxVAEModels = buildModelsSelector(isFluxVAEModelConfig); export const selectAnimaVAEModels = buildModelsSelector(isAnimaVAEModelConfig); export const selectT5EncoderModels = buildModelsSelector(isT5EncoderModelConfigOrSubmodel); +export const selectWanDiffusersModels = buildModelsSelector(isWanDiffusersMainModelConfig); +export const selectWanVAEModels = buildModelsSelector(isWanVAEModelConfig); +export const selectWanT5EncoderModels = buildModelsSelector(isWanT5EncoderModelConfig); export const useTextLLMModels = () => buildModelsHook(isTextLLMModelConfig)(); export const useLlavaModels = () => buildModelsHook(isLLaVAModelConfig)(); diff --git a/invokeai/frontend/web/src/services/api/schema.ts b/invokeai/frontend/web/src/services/api/schema.ts index 76894ce9e4b..4935b590620 100644 --- a/invokeai/frontend/web/src/services/api/schema.ts +++ b/invokeai/frontend/web/src/services/api/schema.ts @@ -7543,7 +7543,7 @@ export type components = { * @description The generation mode that output this image * @default null */ - generation_mode?: ("txt2img" | "img2img" | "inpaint" | "outpaint" | "sdxl_txt2img" | "sdxl_img2img" | "sdxl_inpaint" | "sdxl_outpaint" | "flux_txt2img" | "flux_img2img" | "flux_inpaint" | "flux_outpaint" | "flux2_txt2img" | "flux2_img2img" | "flux2_inpaint" | "flux2_outpaint" | "sd3_txt2img" | "sd3_img2img" | "sd3_inpaint" | "sd3_outpaint" | "cogview4_txt2img" | "cogview4_img2img" | "cogview4_inpaint" | "cogview4_outpaint" | "z_image_txt2img" | "z_image_img2img" | "z_image_inpaint" | "z_image_outpaint" | "qwen_image_txt2img" | "qwen_image_img2img" | "qwen_image_inpaint" | "qwen_image_outpaint" | "anima_txt2img" | "anima_img2img" | "anima_inpaint" | "anima_outpaint") | null; + generation_mode?: ("txt2img" | "img2img" | "inpaint" | "outpaint" | "sdxl_txt2img" | "sdxl_img2img" | "sdxl_inpaint" | "sdxl_outpaint" | "flux_txt2img" | "flux_img2img" | "flux_inpaint" | "flux_outpaint" | "flux2_txt2img" | "flux2_img2img" | "flux2_inpaint" | "flux2_outpaint" | "sd3_txt2img" | "sd3_img2img" | "sd3_inpaint" | "sd3_outpaint" | "cogview4_txt2img" | "cogview4_img2img" | "cogview4_inpaint" | "cogview4_outpaint" | "z_image_txt2img" | "z_image_img2img" | "z_image_inpaint" | "z_image_outpaint" | "qwen_image_txt2img" | "qwen_image_img2img" | "qwen_image_inpaint" | "qwen_image_outpaint" | "anima_txt2img" | "anima_img2img" | "anima_inpaint" | "anima_outpaint" | "wan_txt2img" | "wan_img2img" | "wan_inpaint" | "wan_outpaint" | "wan_i2v") | null; /** * Positive Prompt * @description The positive prompt parameter @@ -19117,6 +19117,8 @@ export type components = { * @description For Wan 2.2 A14B dual-expert LoRAs: 'high' targets the high-noise expert, 'low' targets the low-noise expert. None means the LoRA is expert-agnostic (TI2V-5B, or community LoRAs without explicit tagging) and is applied to both. */ expert: ("high" | "low") | null; + /** @description The Wan model family this LoRA targets, detected from its inner-dim (5120 -> A14B, 3072 -> TI2V-5B). A14B LoRAs are incompatible with TI2V-5B mains (and vice versa) — they crash with a shape mismatch in the layer patcher. The linear-view graph builder filters LoRAs on variant when building the LoRA collection. None means the LoRA's inner-dim couldn't be identified. */ + variant: components["schemas"]["WanLoRAVariantType"] | null; }; /** * LoRA_LyCORIS_ZImage_Config @@ -24059,7 +24061,7 @@ export type components = { * Variant * @description The variant of the model. */ - variant?: components["schemas"]["ModelVariantType"] | components["schemas"]["ClipVariantType"] | components["schemas"]["FluxVariantType"] | components["schemas"]["Flux2VariantType"] | components["schemas"]["ZImageVariantType"] | components["schemas"]["QwenImageVariantType"] | components["schemas"]["WanVariantType"] | components["schemas"]["Qwen3VariantType"] | null; + variant?: components["schemas"]["ModelVariantType"] | components["schemas"]["ClipVariantType"] | components["schemas"]["FluxVariantType"] | components["schemas"]["Flux2VariantType"] | components["schemas"]["ZImageVariantType"] | components["schemas"]["QwenImageVariantType"] | components["schemas"]["WanVariantType"] | components["schemas"]["WanLoRAVariantType"] | components["schemas"]["Qwen3VariantType"] | null; /** @description The prediction type of the model. */ prediction_type?: components["schemas"]["SchedulerPredictionType"] | null; /** @@ -28676,7 +28678,7 @@ export type components = { type: components["schemas"]["ModelType"]; format?: components["schemas"]["ModelFormat"] | null; /** Variant */ - variant?: components["schemas"]["ModelVariantType"] | components["schemas"]["ClipVariantType"] | components["schemas"]["FluxVariantType"] | components["schemas"]["Flux2VariantType"] | components["schemas"]["ZImageVariantType"] | components["schemas"]["QwenImageVariantType"] | components["schemas"]["WanVariantType"] | components["schemas"]["Qwen3VariantType"] | null; + variant?: components["schemas"]["ModelVariantType"] | components["schemas"]["ClipVariantType"] | components["schemas"]["FluxVariantType"] | components["schemas"]["Flux2VariantType"] | components["schemas"]["ZImageVariantType"] | components["schemas"]["QwenImageVariantType"] | components["schemas"]["WanVariantType"] | components["schemas"]["WanLoRAVariantType"] | components["schemas"]["Qwen3VariantType"] | null; /** * Is Installed * @default false @@ -28721,7 +28723,7 @@ export type components = { type: components["schemas"]["ModelType"]; format?: components["schemas"]["ModelFormat"] | null; /** Variant */ - variant?: components["schemas"]["ModelVariantType"] | components["schemas"]["ClipVariantType"] | components["schemas"]["FluxVariantType"] | components["schemas"]["Flux2VariantType"] | components["schemas"]["ZImageVariantType"] | components["schemas"]["QwenImageVariantType"] | components["schemas"]["WanVariantType"] | components["schemas"]["Qwen3VariantType"] | null; + variant?: components["schemas"]["ModelVariantType"] | components["schemas"]["ClipVariantType"] | components["schemas"]["FluxVariantType"] | components["schemas"]["Flux2VariantType"] | components["schemas"]["ZImageVariantType"] | components["schemas"]["QwenImageVariantType"] | components["schemas"]["WanVariantType"] | components["schemas"]["WanLoRAVariantType"] | components["schemas"]["Qwen3VariantType"] | null; /** * Is Installed * @default false @@ -29241,7 +29243,7 @@ export type components = { path_or_prefix: string; model_type: components["schemas"]["ModelType"]; /** Variant */ - variant?: components["schemas"]["ModelVariantType"] | components["schemas"]["ClipVariantType"] | components["schemas"]["FluxVariantType"] | components["schemas"]["Flux2VariantType"] | components["schemas"]["ZImageVariantType"] | components["schemas"]["QwenImageVariantType"] | components["schemas"]["WanVariantType"] | components["schemas"]["Qwen3VariantType"] | null; + variant?: components["schemas"]["ModelVariantType"] | components["schemas"]["ClipVariantType"] | components["schemas"]["FluxVariantType"] | components["schemas"]["Flux2VariantType"] | components["schemas"]["ZImageVariantType"] | components["schemas"]["QwenImageVariantType"] | components["schemas"]["WanVariantType"] | components["schemas"]["WanLoRAVariantType"] | components["schemas"]["Qwen3VariantType"] | null; }; /** * Subtract Integers @@ -32429,6 +32431,17 @@ export type components = { */ type: "wan_lora_loader_output"; }; + /** + * WanLoRAVariantType + * @description Wan 2.2 LoRA variants, identifying which model family a LoRA targets. + * + * Detected from the LoRA's inner attention dim: A14B has ``inner_dim=5120``, + * TI2V-5B has ``inner_dim=3072``. A14B and 5B LoRAs are NOT interchangeable — + * applying one against the wrong main model crashes in the layer patcher + * with a tensor-shape error. + * @enum {string} + */ + WanLoRAVariantType: "a14b" | "5b"; /** * Main Model - Wan 2.2 * @description Loads a Wan 2.2 model, outputting its submodels. diff --git a/invokeai/frontend/web/src/services/api/types.ts b/invokeai/frontend/web/src/services/api/types.ts index 962765603ee..cc205010edf 100644 --- a/invokeai/frontend/web/src/services/api/types.ts +++ b/invokeai/frontend/web/src/services/api/types.ts @@ -101,6 +101,7 @@ type FLUX2ModelConfig = Extract; export type LoRAModelConfig = Extract; +type WanLoRAModelConfig = Extract; export type VAEModelConfig = Extract; export type ControlNetModelConfig = Extract; export type IPAdapterModelConfig = Extract; @@ -117,7 +118,7 @@ export type T5EncoderBnbQuantizedLlmInt8bModelConfig = Extract< >; export type Qwen3EncoderModelConfig = Extract; export type QwenVLEncoderModelConfig = Extract; -type WanT5EncoderModelConfig = Extract; +export type WanT5EncoderModelConfig = Extract; export type SpandrelImageToImageModelConfig = Extract; export type CheckpointModelConfig = Extract; export type CLIPVisionModelConfig = Extract; @@ -322,6 +323,13 @@ export const isQwenImageVAEModelConfig = ( ); }; +export const isWanVAEModelConfig = (config: AnyModelConfig, excludeSubmodels?: boolean): config is VAEModelConfig => { + return ( + (config.type === 'vae' || (!excludeSubmodels && config.type === 'main' && checkSubmodels(['vae'], config))) && + config.base === 'wan' + ); +}; + export const isControlNetModelConfig = (config: AnyModelConfig): config is ControlNetModelConfig => { return config.type === 'controlnet'; }; @@ -491,6 +499,23 @@ export const isQwenImageDiffusersMainModelConfig = (config: AnyModelConfig): con return config.type === 'main' && config.base === 'qwen-image' && config.format === 'diffusers'; }; +export const isWanDiffusersMainModelConfig = (config: AnyModelConfig): config is MainModelConfig => { + return config.type === 'main' && config.base === 'wan' && config.format === 'diffusers'; +}; + +/** Wan GGUF main models marked as the low-noise expert (the second half + * of the A14B MoE pair). Suitable for the Transformer (Low Noise) picker; + * also used to filter low-noise GGUFs out of the primary main dropdown. */ +export const isWanGGUFLowNoiseMainModelConfig = (config: AnyModelConfig): config is MainModelConfig => { + return ( + config.type === 'main' && config.base === 'wan' && config.format === 'gguf_quantized' && config.expert === 'low' + ); +}; + +export const isWanLoRAModelConfig = (config: AnyModelConfig): config is WanLoRAModelConfig => { + return config.type === 'lora' && config.base === 'wan'; +}; + export const isTIModelConfig = (config: AnyModelConfig): config is MainModelConfig => { return config.type === 'embedding'; }; diff --git a/tests/backend/model_manager/configs/test_wan_lora_config.py b/tests/backend/model_manager/configs/test_wan_lora_config.py index dce64616165..fb1e125a37e 100644 --- a/tests/backend/model_manager/configs/test_wan_lora_config.py +++ b/tests/backend/model_manager/configs/test_wan_lora_config.py @@ -179,6 +179,13 @@ def _wan_kohya_sd(self) -> dict: "lora_unet_blocks_0_attn1_to_q.lora_up.weight": _t((5120, 128)), } + def _wan_ti2v5b_sd(self) -> dict: + """A TI2V-5B LoRA — inner_dim 3072, not 5120.""" + return { + "transformer.blocks.0.attn1.to_q.lora_A.weight": _t((64, 3072)), + "transformer.blocks.0.attn1.to_q.lora_B.weight": _t((3072, 64)), + } + def test_accepts_diffusers_wan(self): with TemporaryDirectory() as tmp: f = Path(tmp) / "my-wan-lora.safetensors" @@ -190,6 +197,7 @@ def test_accepts_diffusers_wan(self): assert cfg.base == BaseModelType.Wan assert cfg.format == ModelFormat.LyCORIS assert cfg.expert is None + assert cfg.variant == "a14b" # 5120-dim state dict def test_accepts_native_wan(self): with TemporaryDirectory() as tmp: @@ -253,6 +261,48 @@ def test_expert_none_for_untagged_filename(self): ) assert cfg.expert is None + def test_variant_detected_as_5b_when_inner_dim_3072(self): + """TI2V-5B LoRAs have inner_dim 3072. Detector must classify them as + '5b' so the FE filter doesn't route them to an A14B main and crash.""" + with TemporaryDirectory() as tmp: + f = Path(tmp) / "ti2v5b-lora.safetensors" + f.touch() + cfg = LoRA_LyCORIS_Wan_Config.from_model_on_disk( + _make_mod(f, self._wan_ti2v5b_sd()), + _overrides(f, "ti2v5b"), + ) + assert cfg.base == BaseModelType.Wan + assert cfg.variant == "5b" + + def test_variant_none_when_unrecognised_inner_dim(self): + """A future Wan family or a LoRA touching only ffn at non-attn dims + should map to variant=None rather than mis-classify.""" + with TemporaryDirectory() as tmp: + f = Path(tmp) / "future-wan.safetensors" + f.touch() + # Only an ffn LoRA — no attn weight to read inner_dim from. + # Also a non-5120, non-3072 dim that would otherwise mis-classify. + sd = { + "transformer.blocks.0.ffn.net.0.proj.lora_A.weight": _t((128, 4096)), + "transformer.blocks.0.ffn.net.0.proj.lora_B.weight": _t((11008, 128)), + } + cfg = LoRA_LyCORIS_Wan_Config.from_model_on_disk(_make_mod(f, sd), _overrides(f, "future")) + assert cfg.variant is None + + def test_explicit_variant_override_wins(self): + with TemporaryDirectory() as tmp: + f = Path(tmp) / "manual.safetensors" + f.touch() + overrides = _overrides(f, "manual") + overrides["variant"] = "5b" + # State dict is 5120-dim (auto-detect would say "a14b") but the + # explicit override should stick. + cfg = LoRA_LyCORIS_Wan_Config.from_model_on_disk( + _make_mod(f, self._wan_diffusers_sd()), + overrides, + ) + assert cfg.variant == "5b" + def test_rejects_anima_lora(self): with TemporaryDirectory() as tmp: f = Path(tmp) / "anima.safetensors" From 727eff94bce57126a38444e2cec50eff43e72f81 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Mon, 11 May 2026 21:14:24 -0400 Subject: [PATCH 08/12] feat(wan): add Wan 2.2 starter models and bundle Wan 2.2 starter pack (selected when the user picks the Wan 2.2 bundle) brings up the minimal-cost path to running A14B T2V end-to-end: - Standalone UMT5-XXL encoder and A14B VAE (so GGUF mains don't need a full Diffusers download for their VAE/encoder sources). - T2V A14B Q4_K_M and Q8_0 GGUF expert pairs (high + low noise). - T2V Lightning V1.1 Seko rank-64 LoRA pair (4-step inference). Additional Wan 2.2 starter models browseable from the model manager: - Full Diffusers T2V A14B, I2V A14B, and TI2V-5B. - I2V A14B Q4_K_M and Q8_0 GGUF expert pairs + Lightning V1 LoRA pair. - TI2V-5B Q4_K_M and Q8_0 GGUFs + the 48-channel TI2V-5B VAE. Each "high noise" GGUF lists its low-noise partner plus the shared VAE and UMT5-XXL encoder as dependencies, so installing one of them pulls in everything the loader needs. QuantStack's HighNoise/LowNoise file naming and lightx2v's high_noise_model/low_noise_model.safetensors are both picked up by the existing filename heuristic in the GGUF probe. Co-Authored-By: Claude Opus 4.7 (1M context) docs(wan): add Wan 2.2 hardware requirements Adds Wan 2.2 A14B (T2V/I2V) and TI2V-5B rows to the hardware requirements table with rough VRAM/RAM guidance per quantization. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../docs/start-here/system-requirements.mdx | 4 +- .../backend/model_manager/starter_models.py | 260 ++++++++++++++++++ 2 files changed, 263 insertions(+), 1 deletion(-) diff --git a/docs/src/content/docs/start-here/system-requirements.mdx b/docs/src/content/docs/start-here/system-requirements.mdx index 114698ce158..5eff2bc427a 100644 --- a/docs/src/content/docs/start-here/system-requirements.mdx +++ b/docs/src/content/docs/start-here/system-requirements.mdx @@ -2,7 +2,7 @@ title: Hardware Requirements sidebar: order: 1 -lastUpdated: 2026-02-18 +lastUpdated: 2026-05-11 --- import { Tabs, TabItem, Steps } from '@astrojs/starlight/components' @@ -28,6 +28,8 @@ The requirements below are rough guidelines for best performance. GPUs with less | FLUX.2 Klein 4B | 1024x1024 | Nvidia 30xx+ | 12GB | 16GB | FP8 works with 8GB+; Diffusers + encoder | | FLUX.2 Klein 9B | 1024x1024 | Nvidia 40xx | 24GB | 32GB | FP8 works with 12GB+; Diffusers + encoder | | Z-Image Turbo | 1024x1024 | Nvidia 20xx+ | 8GB | 16GB | Q4_K 8GB; Q8/BF16 16GB+ | +| Wan 2.2 A14B (T2V/I2V) | 1280x720 | Nvidia 30xx+ | 12GB | 32GB | Dual-expert MoE; Q4_K_M 12GB; Q8 18GB+; Diffusers requires 32GB+ | +| Wan 2.2 TI2V-5B | 1280x720 | Nvidia 20xx+ | 8GB | 16GB | Single transformer; Q4_K_M 6GB+; Q8 8GB+; Diffusers 12GB+ | :::tip[`tmpfs` on Linux] If your temporary directory is mounted as a `tmpfs`, ensure it has sufficient space. diff --git a/invokeai/backend/model_manager/starter_models.py b/invokeai/backend/model_manager/starter_models.py index 306b1482344..4acd60e0de1 100644 --- a/invokeai/backend/model_manager/starter_models.py +++ b/invokeai/backend/model_manager/starter_models.py @@ -15,6 +15,7 @@ ModelFormat, ModelType, QwenImageVariantType, + WanVariantType, ) @@ -1299,6 +1300,229 @@ def _gemini_3_resolution_presets( default_settings=ExternalApiModelDefaultSettings(width=1328, height=1328, num_images=1), panel_schema=ExternalModelPanelSchema(image=[{"name": "dimensions"}]), ) +# region Wan 2.2 (local) +# Shared components — all Wan 2.2 variants use the UMT5-XXL text encoder. A14B +# (both T2V and I2V) uses a 16-channel VAE; TI2V-5B uses a 48-channel VAE. The +# two VAEs are not interchangeable. +wan_22_t5_encoder = StarterModel( + name="Wan T5 Encoder (UMT5-XXL)", + base=BaseModelType.Any, + source="Wan-AI/Wan2.2-T2V-A14B-Diffusers::text_encoder+tokenizer", + description="UMT5-XXL text encoder used by all Wan 2.2 variants (T2V/I2V A14B and TI2V-5B). " + "Required when running a GGUF Wan main without a Diffusers Component Source. (~11GB)", + type=ModelType.WanT5Encoder, + format=ModelFormat.WanT5Encoder, +) + +wan_22_a14b_vae = StarterModel( + name="Wan 2.2 A14B VAE", + base=BaseModelType.Wan, + source="Wan-AI/Wan2.2-T2V-A14B-Diffusers::vae/diffusion_pytorch_model.safetensors", + description="Wan 2.2 A14B VAE (16-channel). Shared between T2V and I2V A14B variants. " + "Not interchangeable with the TI2V-5B VAE. (~250MB)", + type=ModelType.VAE, + format=ModelFormat.Checkpoint, +) + +wan_22_5b_vae = StarterModel( + name="Wan 2.2 TI2V-5B VAE", + base=BaseModelType.Wan, + source="Wan-AI/Wan2.2-TI2V-5B-Diffusers::vae/diffusion_pytorch_model.safetensors", + description="Wan 2.2 TI2V-5B VAE (48-channel). Required for the TI2V-5B model family. " + "Not interchangeable with the A14B VAE. (~400MB)", + type=ModelType.VAE, + format=ModelFormat.Checkpoint, +) + +# T2V A14B — full Diffusers + GGUF expert pairs (Q4_K_M and Q8_0). +# The high-noise GGUF is the "main" entry the user picks; the low-noise GGUF +# is wired as the partner expert via the Advanced panel. Each high-noise entry +# lists its low-noise partner plus the shared VAE/encoder as dependencies so +# the bundle/dependency installer pulls everything together. +wan_22_t2v_a14b_diffusers = StarterModel( + name="Wan 2.2 T2V A14B (Diffusers)", + base=BaseModelType.Wan, + source="Wan-AI/Wan2.2-T2V-A14B-Diffusers", + description="Full Diffusers Wan 2.2 T2V A14B model — both expert transformers, VAE, and UMT5-XXL " + "encoder in a single folder. No additional components needed. (~80GB)", + type=ModelType.Main, + format=ModelFormat.Diffusers, + variant=WanVariantType.T2V_A14B, +) + +wan_22_t2v_a14b_low_gguf_q4_k_m = StarterModel( + name="Wan 2.2 T2V A14B Low Noise (Q4_K_M)", + base=BaseModelType.Wan, + source="https://huggingface.co/QuantStack/Wan2.2-T2V-A14B-GGUF/resolve/main/LowNoise/Wan2.2-T2V-A14B-LowNoise-Q4_K_M.gguf", + description="Wan 2.2 T2V A14B low-noise expert transformer (Q4_K_M). Paired with the high-noise " + "expert; selected via the Advanced 'Transformer (Low Noise)' field. (~9.7GB)", + type=ModelType.Main, + format=ModelFormat.GGUFQuantized, + variant=WanVariantType.T2V_A14B, +) + +wan_22_t2v_a14b_gguf_q4_k_m = StarterModel( + name="Wan 2.2 T2V A14B High Noise (Q4_K_M)", + base=BaseModelType.Wan, + source="https://huggingface.co/QuantStack/Wan2.2-T2V-A14B-GGUF/resolve/main/HighNoise/Wan2.2-T2V-A14B-HighNoise-Q4_K_M.gguf", + description="Wan 2.2 T2V A14B high-noise expert transformer (Q4_K_M). Pick this as the main model; " + "the low-noise partner is wired in Advanced. Good quality/size balance. (~9.7GB)", + type=ModelType.Main, + format=ModelFormat.GGUFQuantized, + variant=WanVariantType.T2V_A14B, + dependencies=[wan_22_a14b_vae, wan_22_t5_encoder, wan_22_t2v_a14b_low_gguf_q4_k_m], +) + +wan_22_t2v_a14b_low_gguf_q8_0 = StarterModel( + name="Wan 2.2 T2V A14B Low Noise (Q8_0)", + base=BaseModelType.Wan, + source="https://huggingface.co/QuantStack/Wan2.2-T2V-A14B-GGUF/resolve/main/LowNoise/Wan2.2-T2V-A14B-LowNoise-Q8_0.gguf", + description="Wan 2.2 T2V A14B low-noise expert transformer (Q8_0). Highest quality quantization. (~15.4GB)", + type=ModelType.Main, + format=ModelFormat.GGUFQuantized, + variant=WanVariantType.T2V_A14B, +) + +wan_22_t2v_a14b_gguf_q8_0 = StarterModel( + name="Wan 2.2 T2V A14B High Noise (Q8_0)", + base=BaseModelType.Wan, + source="https://huggingface.co/QuantStack/Wan2.2-T2V-A14B-GGUF/resolve/main/HighNoise/Wan2.2-T2V-A14B-HighNoise-Q8_0.gguf", + description="Wan 2.2 T2V A14B high-noise expert transformer (Q8_0). Pick as the main; pair with the " + "low-noise Q8_0 partner in Advanced. Highest quality quantization. (~15.4GB)", + type=ModelType.Main, + format=ModelFormat.GGUFQuantized, + variant=WanVariantType.T2V_A14B, + dependencies=[wan_22_a14b_vae, wan_22_t5_encoder, wan_22_t2v_a14b_low_gguf_q8_0], +) + +# T2V Lightning LoRAs — V1.1 Seko rank-64 pair (4-step inference). +wan_22_t2v_lightning_high = StarterModel( + name="Wan 2.2 T2V Lightning High Noise (4-step, V1.1)", + base=BaseModelType.Wan, + source="https://huggingface.co/lightx2v/Wan2.2-Lightning/resolve/main/Wan2.2-T2V-A14B-4steps-lora-rank64-Seko-V1.1/high_noise_model.safetensors", + description="Lightning distillation LoRA for the Wan 2.2 T2V A14B high-noise expert — enables " + "4-step generation. Use together with the low-noise variant. Settings: Steps=4, CFG=1.", + type=ModelType.LoRA, +) + +wan_22_t2v_lightning_low = StarterModel( + name="Wan 2.2 T2V Lightning Low Noise (4-step, V1.1)", + base=BaseModelType.Wan, + source="https://huggingface.co/lightx2v/Wan2.2-Lightning/resolve/main/Wan2.2-T2V-A14B-4steps-lora-rank64-Seko-V1.1/low_noise_model.safetensors", + description="Lightning distillation LoRA for the Wan 2.2 T2V A14B low-noise expert — enables " + "4-step generation. Use together with the high-noise variant. Settings: Steps=4, CFG=1.", + type=ModelType.LoRA, +) + +# I2V A14B — full Diffusers + GGUF expert pairs (Q4_K_M and Q8_0). +wan_22_i2v_a14b_diffusers = StarterModel( + name="Wan 2.2 I2V A14B (Diffusers)", + base=BaseModelType.Wan, + source="Wan-AI/Wan2.2-I2V-A14B-Diffusers", + description="Full Diffusers Wan 2.2 I2V A14B model — both expert transformers, VAE, and UMT5-XXL " + "encoder. Use the Reference Images panel to provide the conditioning image. (~80GB)", + type=ModelType.Main, + format=ModelFormat.Diffusers, + variant=WanVariantType.I2V_A14B, +) + +wan_22_i2v_a14b_low_gguf_q4_k_m = StarterModel( + name="Wan 2.2 I2V A14B Low Noise (Q4_K_M)", + base=BaseModelType.Wan, + source="https://huggingface.co/QuantStack/Wan2.2-I2V-A14B-GGUF/resolve/main/LowNoise/Wan2.2-I2V-A14B-LowNoise-Q4_K_M.gguf", + description="Wan 2.2 I2V A14B low-noise expert transformer (Q4_K_M). (~9.7GB)", + type=ModelType.Main, + format=ModelFormat.GGUFQuantized, + variant=WanVariantType.I2V_A14B, +) + +wan_22_i2v_a14b_gguf_q4_k_m = StarterModel( + name="Wan 2.2 I2V A14B High Noise (Q4_K_M)", + base=BaseModelType.Wan, + source="https://huggingface.co/QuantStack/Wan2.2-I2V-A14B-GGUF/resolve/main/HighNoise/Wan2.2-I2V-A14B-HighNoise-Q4_K_M.gguf", + description="Wan 2.2 I2V A14B high-noise expert transformer (Q4_K_M). Pick as the main; pair with " + "the low-noise partner in Advanced. Use the Reference Images panel for the conditioning image. (~9.7GB)", + type=ModelType.Main, + format=ModelFormat.GGUFQuantized, + variant=WanVariantType.I2V_A14B, + dependencies=[wan_22_a14b_vae, wan_22_t5_encoder, wan_22_i2v_a14b_low_gguf_q4_k_m], +) + +wan_22_i2v_a14b_low_gguf_q8_0 = StarterModel( + name="Wan 2.2 I2V A14B Low Noise (Q8_0)", + base=BaseModelType.Wan, + source="https://huggingface.co/QuantStack/Wan2.2-I2V-A14B-GGUF/resolve/main/LowNoise/Wan2.2-I2V-A14B-LowNoise-Q8_0.gguf", + description="Wan 2.2 I2V A14B low-noise expert transformer (Q8_0). Highest quality quantization. (~15.4GB)", + type=ModelType.Main, + format=ModelFormat.GGUFQuantized, + variant=WanVariantType.I2V_A14B, +) + +wan_22_i2v_a14b_gguf_q8_0 = StarterModel( + name="Wan 2.2 I2V A14B High Noise (Q8_0)", + base=BaseModelType.Wan, + source="https://huggingface.co/QuantStack/Wan2.2-I2V-A14B-GGUF/resolve/main/HighNoise/Wan2.2-I2V-A14B-HighNoise-Q8_0.gguf", + description="Wan 2.2 I2V A14B high-noise expert transformer (Q8_0). Highest quality quantization. (~15.4GB)", + type=ModelType.Main, + format=ModelFormat.GGUFQuantized, + variant=WanVariantType.I2V_A14B, + dependencies=[wan_22_a14b_vae, wan_22_t5_encoder, wan_22_i2v_a14b_low_gguf_q8_0], +) + +# I2V Lightning LoRAs — Seko rank-64 pair (4-step inference). Currently only V1. +wan_22_i2v_lightning_high = StarterModel( + name="Wan 2.2 I2V Lightning High Noise (4-step, V1)", + base=BaseModelType.Wan, + source="https://huggingface.co/lightx2v/Wan2.2-Lightning/resolve/main/Wan2.2-I2V-A14B-4steps-lora-rank64-Seko-V1/high_noise_model.safetensors", + description="Lightning distillation LoRA for the Wan 2.2 I2V A14B high-noise expert — enables " + "4-step image-to-image generation. Use together with the low-noise variant. Settings: Steps=4, CFG=1.", + type=ModelType.LoRA, +) + +wan_22_i2v_lightning_low = StarterModel( + name="Wan 2.2 I2V Lightning Low Noise (4-step, V1)", + base=BaseModelType.Wan, + source="https://huggingface.co/lightx2v/Wan2.2-Lightning/resolve/main/Wan2.2-I2V-A14B-4steps-lora-rank64-Seko-V1/low_noise_model.safetensors", + description="Lightning distillation LoRA for the Wan 2.2 I2V A14B low-noise expert — enables " + "4-step image-to-image generation. Use together with the high-noise variant. Settings: Steps=4, CFG=1.", + type=ModelType.LoRA, +) + +# TI2V-5B — single-transformer model (no expert pair). Uses its own 48-channel VAE. +wan_22_ti2v_5b_diffusers = StarterModel( + name="Wan 2.2 TI2V-5B (Diffusers)", + base=BaseModelType.Wan, + source="Wan-AI/Wan2.2-TI2V-5B-Diffusers", + description="Full Diffusers Wan 2.2 TI2V-5B model — single 5B transformer, 48-channel VAE, and " + "UMT5-XXL encoder. Smaller and faster than A14B; runs on consumer GPUs. (~20GB)", + type=ModelType.Main, + format=ModelFormat.Diffusers, + variant=WanVariantType.TI2V_5B, +) + +wan_22_ti2v_5b_gguf_q4_k_m = StarterModel( + name="Wan 2.2 TI2V-5B (Q4_K_M)", + base=BaseModelType.Wan, + source="https://huggingface.co/QuantStack/Wan2.2-TI2V-5B-GGUF/resolve/main/Wan2.2-TI2V-5B-Q4_K_M.gguf", + description="Wan 2.2 TI2V-5B transformer (Q4_K_M). Single-expert model — no low-noise partner needed. (~3.4GB)", + type=ModelType.Main, + format=ModelFormat.GGUFQuantized, + variant=WanVariantType.TI2V_5B, + dependencies=[wan_22_5b_vae, wan_22_t5_encoder], +) + +wan_22_ti2v_5b_gguf_q8_0 = StarterModel( + name="Wan 2.2 TI2V-5B (Q8_0)", + base=BaseModelType.Wan, + source="https://huggingface.co/QuantStack/Wan2.2-TI2V-5B-GGUF/resolve/main/Wan2.2-TI2V-5B-Q8_0.gguf", + description="Wan 2.2 TI2V-5B transformer (Q8_0). Highest quality quantization. (~5.4GB)", + type=ModelType.Main, + format=ModelFormat.GGUFQuantized, + variant=WanVariantType.TI2V_5B, + dependencies=[wan_22_5b_vae, wan_22_t5_encoder], +) +# endregion + alibabacloud_wan26_t2i = StarterModel( name="Wan 2.6 Text-to-Image", base=BaseModelType.External, @@ -1672,6 +1896,26 @@ def _gemini_3_resolution_presets( z_image_qwen3_encoder_quantized, z_image_controlnet_union, z_image_controlnet_tile, + wan_22_t5_encoder, + wan_22_a14b_vae, + wan_22_5b_vae, + wan_22_t2v_a14b_diffusers, + wan_22_t2v_a14b_low_gguf_q4_k_m, + wan_22_t2v_a14b_gguf_q4_k_m, + wan_22_t2v_a14b_low_gguf_q8_0, + wan_22_t2v_a14b_gguf_q8_0, + wan_22_t2v_lightning_high, + wan_22_t2v_lightning_low, + wan_22_i2v_a14b_diffusers, + wan_22_i2v_a14b_low_gguf_q4_k_m, + wan_22_i2v_a14b_gguf_q4_k_m, + wan_22_i2v_a14b_low_gguf_q8_0, + wan_22_i2v_a14b_gguf_q8_0, + wan_22_i2v_lightning_high, + wan_22_i2v_lightning_low, + wan_22_ti2v_5b_diffusers, + wan_22_ti2v_5b_gguf_q4_k_m, + wan_22_ti2v_5b_gguf_q8_0, gemini_flash_image, gemini_pro_image_preview, gemini_3_1_flash_image_preview, @@ -1781,6 +2025,21 @@ def _gemini_3_resolution_presets( t5_base_encoder, ] +# Wan 2.2 starter bundle — A14B T2V at Q4_K_M and Q8_0 (high+low expert pairs), +# plus the Lightning LoRA pair for 4-step inference, plus the standalone A14B +# VAE and UMT5-XXL encoder so GGUF mains have everything they need without +# the user pulling a full Diffusers Wan. +wan_bundle: list[StarterModel] = [ + wan_22_t5_encoder, + wan_22_a14b_vae, + wan_22_t2v_a14b_gguf_q4_k_m, + wan_22_t2v_a14b_low_gguf_q4_k_m, + wan_22_t2v_a14b_gguf_q8_0, + wan_22_t2v_a14b_low_gguf_q8_0, + wan_22_t2v_lightning_high, + wan_22_t2v_lightning_low, +] + STARTER_BUNDLES: dict[str, StarterModelBundle] = { BaseModelType.StableDiffusion1: StarterModelBundle(name="Stable Diffusion 1.5", models=sd1_bundle), BaseModelType.StableDiffusionXL: StarterModelBundle(name="SDXL", models=sdxl_bundle), @@ -1789,6 +2048,7 @@ def _gemini_3_resolution_presets( BaseModelType.ZImage: StarterModelBundle(name="Z-Image Turbo", models=zimage_bundle), BaseModelType.QwenImage: StarterModelBundle(name="Qwen Image", models=qwen_image_bundle), BaseModelType.Anima: StarterModelBundle(name="Anima", models=anima_bundle), + BaseModelType.Wan: StarterModelBundle(name="Wan 2.2", models=wan_bundle), } assert len(STARTER_MODELS) == len({m.source for m in STARTER_MODELS}), "Duplicate starter models" From 6c076c529d871849ef34528d0264cb40f938d1ba Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Mon, 11 May 2026 21:45:50 -0400 Subject: [PATCH 09/12] fix(wan): recall low-noise transformer, component source, and standalone VAE/T5 Wan-specific metadata fields embedded by the graph builder (wan_transformer_low_noise, wan_component_source, wan_vae_model, wan_t5_encoder_model, wan_guidance_scale_low_noise) had no recall handlers in features/metadata/parsing.tsx, so recalling an image's parameters would leave these fields empty. Adds a handler for each that dispatches the matching paramsSlice action and renders a row in the metadata viewer. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../web/src/features/metadata/parsing.tsx | 137 ++++++++++++++++++ 1 file changed, 137 insertions(+) diff --git a/invokeai/frontend/web/src/features/metadata/parsing.tsx b/invokeai/frontend/web/src/features/metadata/parsing.tsx index c5a31d03a34..9731a4e3b85 100644 --- a/invokeai/frontend/web/src/features/metadata/parsing.tsx +++ b/invokeai/frontend/web/src/features/metadata/parsing.tsx @@ -58,6 +58,11 @@ import { setZImageSeedVarianceStrength, setZImageShift, vaeSelected, + wanComponentSourceSelected, + wanGuidanceScaleLowNoiseChanged, + wanT5EncoderModelSelected, + wanTransformerLowNoiseSelected, + wanVaeModelSelected, widthChanged, zImageQwen3EncoderModelSelected, zImageQwen3SourceModelSelected, @@ -845,6 +850,133 @@ const QwenImageShift: SingleMetadataHandler = { }; //#endregion QwenImageShift +//#region WanTransformerLowNoise +const WanTransformerLowNoise: SingleMetadataHandler = { + [SingleMetadataKey]: true, + type: 'WanTransformerLowNoise', + parse: (metadata, _store) => { + const raw = getProperty(metadata, 'wan_transformer_low_noise'); + // Reject when the key is absent so the handler is not rendered for non-Wan images + if (raw === undefined) { + return Promise.reject(); + } + if (raw === null) { + return Promise.resolve(null); + } + return Promise.resolve(zModelIdentifierField.parse(raw)); + }, + recall: (value, store) => { + store.dispatch(wanTransformerLowNoiseSelected(value)); + }, + i18nKey: 'modelManager.wanTransformerLowNoise', + LabelComponent: MetadataLabel, + ValueComponent: ({ value }: SingleMetadataValueProps) => ( + + ), +}; +//#endregion WanTransformerLowNoise + +//#region WanComponentSource +const WanComponentSource: SingleMetadataHandler = { + [SingleMetadataKey]: true, + type: 'WanComponentSource', + parse: (metadata, _store) => { + const raw = getProperty(metadata, 'wan_component_source'); + if (raw === undefined) { + return Promise.reject(); + } + if (raw === null) { + return Promise.resolve(null); + } + return Promise.resolve(zModelIdentifierField.parse(raw)); + }, + recall: (value, store) => { + store.dispatch(wanComponentSourceSelected(value)); + }, + i18nKey: 'modelManager.wanComponentSource', + LabelComponent: MetadataLabel, + ValueComponent: ({ value }: SingleMetadataValueProps) => ( + + ), +}; +//#endregion WanComponentSource + +//#region WanVaeModel +const WanVaeModel: SingleMetadataHandler = { + [SingleMetadataKey]: true, + type: 'WanVaeModel', + parse: (metadata, _store) => { + const raw = getProperty(metadata, 'wan_vae_model'); + if (raw === undefined) { + return Promise.reject(); + } + if (raw === null) { + return Promise.resolve(null); + } + return Promise.resolve(zModelIdentifierField.parse(raw)); + }, + recall: (value, store) => { + store.dispatch(wanVaeModelSelected(value)); + }, + i18nKey: 'modelManager.wanVae', + LabelComponent: MetadataLabel, + ValueComponent: ({ value }: SingleMetadataValueProps) => ( + + ), +}; +//#endregion WanVaeModel + +//#region WanT5EncoderModel +const WanT5EncoderModel: SingleMetadataHandler = { + [SingleMetadataKey]: true, + type: 'WanT5EncoderModel', + parse: (metadata, _store) => { + const raw = getProperty(metadata, 'wan_t5_encoder_model'); + if (raw === undefined) { + return Promise.reject(); + } + if (raw === null) { + return Promise.resolve(null); + } + return Promise.resolve(zModelIdentifierField.parse(raw)); + }, + recall: (value, store) => { + store.dispatch(wanT5EncoderModelSelected(value)); + }, + i18nKey: 'modelManager.wanT5Encoder', + LabelComponent: MetadataLabel, + ValueComponent: ({ value }: SingleMetadataValueProps) => ( + + ), +}; +//#endregion WanT5EncoderModel + +//#region WanGuidanceScaleLowNoise +const WanGuidanceScaleLowNoise: SingleMetadataHandler = { + [SingleMetadataKey]: true, + type: 'WanGuidanceScaleLowNoise', + parse: (metadata, _store) => { + const raw = getProperty(metadata, 'wan_guidance_scale_low_noise'); + if (raw === undefined) { + return Promise.reject(); + } + if (raw === null) { + return Promise.resolve(null); + } + const parsed = z.number().parse(raw); + return Promise.resolve(parsed); + }, + recall: (value, store) => { + store.dispatch(wanGuidanceScaleLowNoiseChanged(value)); + }, + i18nKey: 'parameters.wanGuidanceScaleLowNoise', + LabelComponent: MetadataLabel, + ValueComponent: ({ value }: SingleMetadataValueProps) => ( + + ), +}; +//#endregion WanGuidanceScaleLowNoise + //#region ZImageShift const ZImageShift: SingleMetadataHandler = { [SingleMetadataKey]: true, @@ -1649,6 +1781,11 @@ export const ImageMetadataHandlers = { QwenImageQwenVLEncoderModel, QwenImageQuantization, QwenImageShift, + WanTransformerLowNoise, + WanComponentSource, + WanVaeModel, + WanT5EncoderModel, + WanGuidanceScaleLowNoise, ZImageShift, LoRAs, CanvasLayers, From 2d1add894f331e9d58795495fdbe0298ae045ece Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Mon, 11 May 2026 22:28:08 -0400 Subject: [PATCH 10/12] feat(wan): add default Wan 2.2 T2V and I2V workflows Ships two default workflows in the library, tagged so they appear in "Browse Workflows" under the wan2.2 / text to image / image to image tags: - Text to Image - Wan 2.2: full T2V/TI2V-5B graph (model loader, positive + negative encoders, denoise, l2i). Exposes the five model slots, prompts, steps, dual CFG, and dimensions. - Image to Image - Wan 2.2: I2V A14B graph that adds a wan_ref_image_encoder. Exposes the reference image input plus the standard fields. Both follow default-workflow rules: IDs prefixed with default_, meta.category = "default", and no references to user-installed resources. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../Wan 2.2 Image to Image.json | 305 ++++++++++++++++++ .../Wan 2.2 Text to Image.json | 264 +++++++++++++++ 2 files changed, 569 insertions(+) create mode 100644 invokeai/app/services/workflow_records/default_workflows/Wan 2.2 Image to Image.json create mode 100644 invokeai/app/services/workflow_records/default_workflows/Wan 2.2 Text to Image.json diff --git a/invokeai/app/services/workflow_records/default_workflows/Wan 2.2 Image to Image.json b/invokeai/app/services/workflow_records/default_workflows/Wan 2.2 Image to Image.json new file mode 100644 index 00000000000..7975e2c0a56 --- /dev/null +++ b/invokeai/app/services/workflow_records/default_workflows/Wan 2.2 Image to Image.json @@ -0,0 +1,305 @@ +{ + "id": "default_wan22_i2v_c2d5e1b3-3e4f-5b6c-af7d-8e9f0a1b2c3e", + "name": "Image to Image - Wan 2.2", + "author": "InvokeAI", + "description": "Image-to-image generation with Wan 2.2 I2V A14B. The reference image is VAE-encoded and concatenated to the noise latents each step (the I2V transformer has in_channels=36). Drop a reference image into the 'Reference Image' input, then invoke. Only the I2V A14B variant is supported — T2V and TI2V-5B don't consume reference images.", + "version": "1.0.0", + "contact": "", + "tags": "wan2.2, image to image", + "notes": "Prerequisite model downloads: a Wan 2.2 I2V A14B main (Diffusers or GGUF expert pair). For GGUF mains, also install the Component Source (Diffusers Wan I2V) OR the standalone Wan VAE + UMT5-XXL encoder. Wan 2.2 I2V was trained for video — at single-frame inference it tends to anchor strongly to the reference. Recommended settings: 30-40 steps and CFG 5-7 (or 4 steps and CFG 1 with the Wan I2V Lightning LoRA pair).", + "exposedFields": [ + { + "nodeId": "1a0e6c7b-9d2f-4b3c-8e1a-2f3d4c5b6a7e", + "fieldName": "model" + }, + { + "nodeId": "1a0e6c7b-9d2f-4b3c-8e1a-2f3d4c5b6a7e", + "fieldName": "transformer_low_noise_model" + }, + { + "nodeId": "1a0e6c7b-9d2f-4b3c-8e1a-2f3d4c5b6a7e", + "fieldName": "component_source" + }, + { + "nodeId": "1a0e6c7b-9d2f-4b3c-8e1a-2f3d4c5b6a7e", + "fieldName": "vae_model" + }, + { + "nodeId": "1a0e6c7b-9d2f-4b3c-8e1a-2f3d4c5b6a7e", + "fieldName": "wan_t5_encoder_model" + }, + { + "nodeId": "7a6edc2d-f38e-a0c1-e27a-8f9dcbb20fce", + "fieldName": "image" + }, + { + "nodeId": "2b1f7d8c-ae3f-5c4d-9f2b-3a4e5d6c7b8f", + "fieldName": "prompt" + }, + { + "nodeId": "3c2a8e9d-bf4a-6d5e-af3c-4b5f6e7d8c9a", + "fieldName": "prompt" + }, + { + "nodeId": "4d3b9faf-c05b-7e6f-bf4d-5c6a7f8e9dab", + "fieldName": "steps" + }, + { + "nodeId": "4d3b9faf-c05b-7e6f-bf4d-5c6a7f8e9dab", + "fieldName": "guidance_scale" + }, + { + "nodeId": "4d3b9faf-c05b-7e6f-bf4d-5c6a7f8e9dab", + "fieldName": "guidance_scale_low_noise" + }, + { + "nodeId": "4d3b9faf-c05b-7e6f-bf4d-5c6a7f8e9dab", + "fieldName": "width" + }, + { + "nodeId": "4d3b9faf-c05b-7e6f-bf4d-5c6a7f8e9dab", + "fieldName": "height" + } + ], + "meta": { + "version": "3.0.0", + "category": "default" + }, + "nodes": [ + { + "id": "1a0e6c7b-9d2f-4b3c-8e1a-2f3d4c5b6a7e", + "type": "invocation", + "data": { + "id": "1a0e6c7b-9d2f-4b3c-8e1a-2f3d4c5b6a7e", + "type": "wan_model_loader", + "version": "1.0.0", + "label": "", + "notes": "", + "isOpen": true, + "isIntermediate": true, + "useCache": false, + "inputs": { + "model": { "name": "model", "label": "" }, + "transformer_low_noise_model": { "name": "transformer_low_noise_model", "label": "" }, + "vae_model": { "name": "vae_model", "label": "" }, + "wan_t5_encoder_model": { "name": "wan_t5_encoder_model", "label": "" }, + "component_source": { "name": "component_source", "label": "" } + } + }, + "position": { "x": 200, "y": 0 } + }, + { + "id": "2b1f7d8c-ae3f-5c4d-9f2b-3a4e5d6c7b8f", + "type": "invocation", + "data": { + "id": "2b1f7d8c-ae3f-5c4d-9f2b-3a4e5d6c7b8f", + "type": "wan_text_encoder", + "version": "1.0.0", + "label": "Positive Prompt", + "notes": "", + "isOpen": true, + "isIntermediate": true, + "useCache": true, + "inputs": { + "prompt": { "name": "prompt", "label": "", "value": "" }, + "wan_t5_encoder": { "name": "wan_t5_encoder", "label": "" } + } + }, + "position": { "x": 700, "y": -200 } + }, + { + "id": "3c2a8e9d-bf4a-6d5e-af3c-4b5f6e7d8c9a", + "type": "invocation", + "data": { + "id": "3c2a8e9d-bf4a-6d5e-af3c-4b5f6e7d8c9a", + "type": "wan_text_encoder", + "version": "1.0.0", + "label": "Negative Prompt", + "notes": "", + "isOpen": true, + "isIntermediate": true, + "useCache": true, + "inputs": { + "prompt": { "name": "prompt", "label": "", "value": " " }, + "wan_t5_encoder": { "name": "wan_t5_encoder", "label": "" } + } + }, + "position": { "x": 700, "y": 100 } + }, + { + "id": "7a6edc2d-f38e-a0c1-e27a-8f9dcbb20fce", + "type": "invocation", + "data": { + "id": "7a6edc2d-f38e-a0c1-e27a-8f9dcbb20fce", + "type": "wan_ref_image_encoder", + "version": "1.0.0", + "label": "", + "notes": "", + "isOpen": true, + "isIntermediate": true, + "useCache": true, + "inputs": { + "image": { "name": "image", "label": "Reference Image" }, + "vae": { "name": "vae", "label": "" }, + "width": { "name": "width", "label": "", "value": 1024 }, + "height": { "name": "height", "label": "", "value": 1024 } + } + }, + "position": { "x": 700, "y": 300 } + }, + { + "id": "5e4cab0b-d16c-8faf-c05e-6d7baf90ebbc", + "type": "invocation", + "data": { + "id": "5e4cab0b-d16c-8faf-c05e-6d7baf90ebbc", + "type": "rand_int", + "version": "1.0.1", + "label": "", + "notes": "", + "isOpen": true, + "isIntermediate": true, + "useCache": false, + "inputs": { + "low": { "name": "low", "label": "", "value": 0 }, + "high": { "name": "high", "label": "", "value": 2147483647 } + } + }, + "position": { "x": 700, "y": 550 } + }, + { + "id": "4d3b9faf-c05b-7e6f-bf4d-5c6a7f8e9dab", + "type": "invocation", + "data": { + "id": "4d3b9faf-c05b-7e6f-bf4d-5c6a7f8e9dab", + "type": "wan_denoise", + "version": "1.0.0", + "label": "", + "notes": "", + "isOpen": true, + "isIntermediate": true, + "useCache": true, + "inputs": { + "transformer": { "name": "transformer", "label": "" }, + "positive_conditioning": { "name": "positive_conditioning", "label": "" }, + "negative_conditioning": { "name": "negative_conditioning", "label": "" }, + "ref_image": { "name": "ref_image", "label": "" }, + "latents": { "name": "latents", "label": "" }, + "denoise_mask": { "name": "denoise_mask", "label": "" }, + "denoising_start": { "name": "denoising_start", "label": "", "value": 0 }, + "denoising_end": { "name": "denoising_end", "label": "", "value": 1 }, + "add_noise": { "name": "add_noise", "label": "", "value": true }, + "guidance_scale": { "name": "guidance_scale", "label": "CFG", "value": 5.0 }, + "guidance_scale_low_noise": { "name": "guidance_scale_low_noise", "label": "CFG (Low)" }, + "width": { "name": "width", "label": "", "value": 1024 }, + "height": { "name": "height", "label": "", "value": 1024 }, + "steps": { "name": "steps", "label": "", "value": 30 }, + "seed": { "name": "seed", "label": "", "value": 0 } + } + }, + "position": { "x": 1100, "y": -50 } + }, + { + "id": "6f5dcb1c-e27d-9fb0-d16f-7e8cbaa1fcbd", + "type": "invocation", + "data": { + "id": "6f5dcb1c-e27d-9fb0-d16f-7e8cbaa1fcbd", + "type": "wan_l2i", + "version": "1.0.0", + "label": "", + "notes": "", + "isOpen": true, + "isIntermediate": false, + "useCache": true, + "inputs": { + "board": { "name": "board", "label": "" }, + "metadata": { "name": "metadata", "label": "" }, + "latents": { "name": "latents", "label": "" }, + "vae": { "name": "vae", "label": "" } + } + }, + "position": { "x": 1550, "y": -50 } + } + ], + "edges": [ + { + "id": "edge-loader-transformer-denoise", + "type": "default", + "source": "1a0e6c7b-9d2f-4b3c-8e1a-2f3d4c5b6a7e", + "target": "4d3b9faf-c05b-7e6f-bf4d-5c6a7f8e9dab", + "sourceHandle": "transformer", + "targetHandle": "transformer" + }, + { + "id": "edge-loader-t5-pos", + "type": "default", + "source": "1a0e6c7b-9d2f-4b3c-8e1a-2f3d4c5b6a7e", + "target": "2b1f7d8c-ae3f-5c4d-9f2b-3a4e5d6c7b8f", + "sourceHandle": "wan_t5_encoder", + "targetHandle": "wan_t5_encoder" + }, + { + "id": "edge-loader-t5-neg", + "type": "default", + "source": "1a0e6c7b-9d2f-4b3c-8e1a-2f3d4c5b6a7e", + "target": "3c2a8e9d-bf4a-6d5e-af3c-4b5f6e7d8c9a", + "sourceHandle": "wan_t5_encoder", + "targetHandle": "wan_t5_encoder" + }, + { + "id": "edge-loader-vae-l2i", + "type": "default", + "source": "1a0e6c7b-9d2f-4b3c-8e1a-2f3d4c5b6a7e", + "target": "6f5dcb1c-e27d-9fb0-d16f-7e8cbaa1fcbd", + "sourceHandle": "vae", + "targetHandle": "vae" + }, + { + "id": "edge-loader-vae-refenc", + "type": "default", + "source": "1a0e6c7b-9d2f-4b3c-8e1a-2f3d4c5b6a7e", + "target": "7a6edc2d-f38e-a0c1-e27a-8f9dcbb20fce", + "sourceHandle": "vae", + "targetHandle": "vae" + }, + { + "id": "edge-pos-cond-denoise", + "type": "default", + "source": "2b1f7d8c-ae3f-5c4d-9f2b-3a4e5d6c7b8f", + "target": "4d3b9faf-c05b-7e6f-bf4d-5c6a7f8e9dab", + "sourceHandle": "conditioning", + "targetHandle": "positive_conditioning" + }, + { + "id": "edge-neg-cond-denoise", + "type": "default", + "source": "3c2a8e9d-bf4a-6d5e-af3c-4b5f6e7d8c9a", + "target": "4d3b9faf-c05b-7e6f-bf4d-5c6a7f8e9dab", + "sourceHandle": "conditioning", + "targetHandle": "negative_conditioning" + }, + { + "id": "edge-refenc-refimage-denoise", + "type": "default", + "source": "7a6edc2d-f38e-a0c1-e27a-8f9dcbb20fce", + "target": "4d3b9faf-c05b-7e6f-bf4d-5c6a7f8e9dab", + "sourceHandle": "ref_image", + "targetHandle": "ref_image" + }, + { + "id": "edge-rand-seed-denoise", + "type": "default", + "source": "5e4cab0b-d16c-8faf-c05e-6d7baf90ebbc", + "target": "4d3b9faf-c05b-7e6f-bf4d-5c6a7f8e9dab", + "sourceHandle": "value", + "targetHandle": "seed" + }, + { + "id": "edge-denoise-latents-l2i", + "type": "default", + "source": "4d3b9faf-c05b-7e6f-bf4d-5c6a7f8e9dab", + "target": "6f5dcb1c-e27d-9fb0-d16f-7e8cbaa1fcbd", + "sourceHandle": "latents", + "targetHandle": "latents" + } + ] +} diff --git a/invokeai/app/services/workflow_records/default_workflows/Wan 2.2 Text to Image.json b/invokeai/app/services/workflow_records/default_workflows/Wan 2.2 Text to Image.json new file mode 100644 index 00000000000..3fc9395c232 --- /dev/null +++ b/invokeai/app/services/workflow_records/default_workflows/Wan 2.2 Text to Image.json @@ -0,0 +1,264 @@ +{ + "id": "default_wan22_t2v_b1c4f0a2-2d3e-4a5b-9f6c-7d8e0a1b2c3d", + "name": "Text to Image - Wan 2.2", + "author": "InvokeAI", + "description": "Text-to-image generation with Wan 2.2 (T2V A14B or TI2V-5B). For A14B GGUFs, wire the second-expert transformer into 'Transformer (Low Noise)' and pick a Diffusers Wan as the Component Source (or use standalone VAE + UMT5-XXL encoder). TI2V-5B is a single-transformer model — leave the low-noise slot empty.", + "version": "1.0.0", + "contact": "", + "tags": "wan2.2, text to image", + "notes": "Prerequisite model downloads: a Wan 2.2 main model (Diffusers or GGUF). For GGUF mains, also install the Component Source (Diffusers Wan) OR the standalone Wan VAE + UMT5-XXL encoder. The Wan 2.2 starter bundle in the Model Manager pulls everything you need for T2V A14B Q4_K_M/Q8_0. Recommended settings: 30-40 steps and CFG 5-7 (or 4 steps and CFG 1 with the Wan Lightning LoRA pair).", + "exposedFields": [ + { + "nodeId": "1a0e6c7b-9d2f-4b3c-8e1a-2f3d4c5b6a7e", + "fieldName": "model" + }, + { + "nodeId": "1a0e6c7b-9d2f-4b3c-8e1a-2f3d4c5b6a7e", + "fieldName": "transformer_low_noise_model" + }, + { + "nodeId": "1a0e6c7b-9d2f-4b3c-8e1a-2f3d4c5b6a7e", + "fieldName": "component_source" + }, + { + "nodeId": "1a0e6c7b-9d2f-4b3c-8e1a-2f3d4c5b6a7e", + "fieldName": "vae_model" + }, + { + "nodeId": "1a0e6c7b-9d2f-4b3c-8e1a-2f3d4c5b6a7e", + "fieldName": "wan_t5_encoder_model" + }, + { + "nodeId": "2b1f7d8c-ae3f-5c4d-9f2b-3a4e5d6c7b8f", + "fieldName": "prompt" + }, + { + "nodeId": "3c2a8e9d-bf4a-6d5e-af3c-4b5f6e7d8c9a", + "fieldName": "prompt" + }, + { + "nodeId": "4d3b9faf-c05b-7e6f-bf4d-5c6a7f8e9dab", + "fieldName": "steps" + }, + { + "nodeId": "4d3b9faf-c05b-7e6f-bf4d-5c6a7f8e9dab", + "fieldName": "guidance_scale" + }, + { + "nodeId": "4d3b9faf-c05b-7e6f-bf4d-5c6a7f8e9dab", + "fieldName": "guidance_scale_low_noise" + }, + { + "nodeId": "4d3b9faf-c05b-7e6f-bf4d-5c6a7f8e9dab", + "fieldName": "width" + }, + { + "nodeId": "4d3b9faf-c05b-7e6f-bf4d-5c6a7f8e9dab", + "fieldName": "height" + } + ], + "meta": { + "version": "3.0.0", + "category": "default" + }, + "nodes": [ + { + "id": "1a0e6c7b-9d2f-4b3c-8e1a-2f3d4c5b6a7e", + "type": "invocation", + "data": { + "id": "1a0e6c7b-9d2f-4b3c-8e1a-2f3d4c5b6a7e", + "type": "wan_model_loader", + "version": "1.0.0", + "label": "", + "notes": "", + "isOpen": true, + "isIntermediate": true, + "useCache": false, + "inputs": { + "model": { "name": "model", "label": "" }, + "transformer_low_noise_model": { "name": "transformer_low_noise_model", "label": "" }, + "vae_model": { "name": "vae_model", "label": "" }, + "wan_t5_encoder_model": { "name": "wan_t5_encoder_model", "label": "" }, + "component_source": { "name": "component_source", "label": "" } + } + }, + "position": { "x": 200, "y": 0 } + }, + { + "id": "2b1f7d8c-ae3f-5c4d-9f2b-3a4e5d6c7b8f", + "type": "invocation", + "data": { + "id": "2b1f7d8c-ae3f-5c4d-9f2b-3a4e5d6c7b8f", + "type": "wan_text_encoder", + "version": "1.0.0", + "label": "Positive Prompt", + "notes": "", + "isOpen": true, + "isIntermediate": true, + "useCache": true, + "inputs": { + "prompt": { "name": "prompt", "label": "", "value": "a cat" }, + "wan_t5_encoder": { "name": "wan_t5_encoder", "label": "" } + } + }, + "position": { "x": 700, "y": -200 } + }, + { + "id": "3c2a8e9d-bf4a-6d5e-af3c-4b5f6e7d8c9a", + "type": "invocation", + "data": { + "id": "3c2a8e9d-bf4a-6d5e-af3c-4b5f6e7d8c9a", + "type": "wan_text_encoder", + "version": "1.0.0", + "label": "Negative Prompt", + "notes": "", + "isOpen": true, + "isIntermediate": true, + "useCache": true, + "inputs": { + "prompt": { "name": "prompt", "label": "", "value": " " }, + "wan_t5_encoder": { "name": "wan_t5_encoder", "label": "" } + } + }, + "position": { "x": 700, "y": 100 } + }, + { + "id": "5e4cab0b-d16c-8faf-c05e-6d7baf90ebbc", + "type": "invocation", + "data": { + "id": "5e4cab0b-d16c-8faf-c05e-6d7baf90ebbc", + "type": "rand_int", + "version": "1.0.1", + "label": "", + "notes": "", + "isOpen": true, + "isIntermediate": true, + "useCache": false, + "inputs": { + "low": { "name": "low", "label": "", "value": 0 }, + "high": { "name": "high", "label": "", "value": 2147483647 } + } + }, + "position": { "x": 700, "y": 400 } + }, + { + "id": "4d3b9faf-c05b-7e6f-bf4d-5c6a7f8e9dab", + "type": "invocation", + "data": { + "id": "4d3b9faf-c05b-7e6f-bf4d-5c6a7f8e9dab", + "type": "wan_denoise", + "version": "1.0.0", + "label": "", + "notes": "", + "isOpen": true, + "isIntermediate": true, + "useCache": true, + "inputs": { + "transformer": { "name": "transformer", "label": "" }, + "positive_conditioning": { "name": "positive_conditioning", "label": "" }, + "negative_conditioning": { "name": "negative_conditioning", "label": "" }, + "ref_image": { "name": "ref_image", "label": "" }, + "latents": { "name": "latents", "label": "" }, + "denoise_mask": { "name": "denoise_mask", "label": "" }, + "denoising_start": { "name": "denoising_start", "label": "", "value": 0 }, + "denoising_end": { "name": "denoising_end", "label": "", "value": 1 }, + "add_noise": { "name": "add_noise", "label": "", "value": true }, + "guidance_scale": { "name": "guidance_scale", "label": "CFG", "value": 5.0 }, + "guidance_scale_low_noise": { "name": "guidance_scale_low_noise", "label": "CFG (Low)" }, + "width": { "name": "width", "label": "", "value": 1024 }, + "height": { "name": "height", "label": "", "value": 1024 }, + "steps": { "name": "steps", "label": "", "value": 30 }, + "seed": { "name": "seed", "label": "", "value": 0 } + } + }, + "position": { "x": 1100, "y": -50 } + }, + { + "id": "6f5dcb1c-e27d-9fb0-d16f-7e8cbaa1fcbd", + "type": "invocation", + "data": { + "id": "6f5dcb1c-e27d-9fb0-d16f-7e8cbaa1fcbd", + "type": "wan_l2i", + "version": "1.0.0", + "label": "", + "notes": "", + "isOpen": true, + "isIntermediate": false, + "useCache": true, + "inputs": { + "board": { "name": "board", "label": "" }, + "metadata": { "name": "metadata", "label": "" }, + "latents": { "name": "latents", "label": "" }, + "vae": { "name": "vae", "label": "" } + } + }, + "position": { "x": 1550, "y": -50 } + } + ], + "edges": [ + { + "id": "edge-loader-transformer-denoise", + "type": "default", + "source": "1a0e6c7b-9d2f-4b3c-8e1a-2f3d4c5b6a7e", + "target": "4d3b9faf-c05b-7e6f-bf4d-5c6a7f8e9dab", + "sourceHandle": "transformer", + "targetHandle": "transformer" + }, + { + "id": "edge-loader-t5-pos", + "type": "default", + "source": "1a0e6c7b-9d2f-4b3c-8e1a-2f3d4c5b6a7e", + "target": "2b1f7d8c-ae3f-5c4d-9f2b-3a4e5d6c7b8f", + "sourceHandle": "wan_t5_encoder", + "targetHandle": "wan_t5_encoder" + }, + { + "id": "edge-loader-t5-neg", + "type": "default", + "source": "1a0e6c7b-9d2f-4b3c-8e1a-2f3d4c5b6a7e", + "target": "3c2a8e9d-bf4a-6d5e-af3c-4b5f6e7d8c9a", + "sourceHandle": "wan_t5_encoder", + "targetHandle": "wan_t5_encoder" + }, + { + "id": "edge-loader-vae-l2i", + "type": "default", + "source": "1a0e6c7b-9d2f-4b3c-8e1a-2f3d4c5b6a7e", + "target": "6f5dcb1c-e27d-9fb0-d16f-7e8cbaa1fcbd", + "sourceHandle": "vae", + "targetHandle": "vae" + }, + { + "id": "edge-pos-cond-denoise", + "type": "default", + "source": "2b1f7d8c-ae3f-5c4d-9f2b-3a4e5d6c7b8f", + "target": "4d3b9faf-c05b-7e6f-bf4d-5c6a7f8e9dab", + "sourceHandle": "conditioning", + "targetHandle": "positive_conditioning" + }, + { + "id": "edge-neg-cond-denoise", + "type": "default", + "source": "3c2a8e9d-bf4a-6d5e-af3c-4b5f6e7d8c9a", + "target": "4d3b9faf-c05b-7e6f-bf4d-5c6a7f8e9dab", + "sourceHandle": "conditioning", + "targetHandle": "negative_conditioning" + }, + { + "id": "edge-rand-seed-denoise", + "type": "default", + "source": "5e4cab0b-d16c-8faf-c05e-6d7baf90ebbc", + "target": "4d3b9faf-c05b-7e6f-bf4d-5c6a7f8e9dab", + "sourceHandle": "value", + "targetHandle": "seed" + }, + { + "id": "edge-denoise-latents-l2i", + "type": "default", + "source": "4d3b9faf-c05b-7e6f-bf4d-5c6a7f8e9dab", + "target": "6f5dcb1c-e27d-9fb0-d16f-7e8cbaa1fcbd", + "sourceHandle": "latents", + "targetHandle": "latents" + } + ] +} From 0344f4ba746c162d3e47be60bd68bee01c482b73 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Mon, 11 May 2026 23:07:14 -0400 Subject: [PATCH 11/12] chore(docs): remove old planning doc --- USER_ISOLATION_IMPLEMENTATION.md | 169 ---------- WAN_2_2_IMPLEMENTATION.md | 561 ------------------------------- 2 files changed, 730 deletions(-) delete mode 100644 USER_ISOLATION_IMPLEMENTATION.md delete mode 100644 WAN_2_2_IMPLEMENTATION.md diff --git a/USER_ISOLATION_IMPLEMENTATION.md b/USER_ISOLATION_IMPLEMENTATION.md deleted file mode 100644 index 324c40db562..00000000000 --- a/USER_ISOLATION_IMPLEMENTATION.md +++ /dev/null @@ -1,169 +0,0 @@ -# User Isolation Implementation Summary - -This document describes the implementation of user isolation features in the InvokeAI session queue and processing system to address issues identified in the enhancement request. - -## Issues Addressed - -### 1. Cross-User Image/Preview Visibility -**Problem:** When two users are logged in simultaneously and one initiates a generation, the generation preview shows up in both users' browsers and the generated image gets saved to both users' image boards. - -**Solution:** Implemented socket-level event filtering based on user authentication: - -#### Backend Changes (`invokeai/app/api/sockets.py`): -- Added socket authentication middleware in `_handle_connect()` method -- Extracts JWT token from socket auth data or HTTP headers -- Verifies token using existing `verify_token()` function -- Stores `user_id` and `is_admin` in socket session for later use -- Modified `_handle_queue_event()` to filter events by user: - - For `QueueItemEventBase` events, only emit to: - - The user who owns the queue item (`user_id` matches) - - Admin users (`is_admin` is True) - - For general queue events, emit to all subscribers - -#### Event System Changes (`invokeai/app/services/events/events_common.py`): -- Added `user_id` field to `QueueItemEventBase` class -- Updated all event builders to include `user_id` from queue items: - - `InvocationStartedEvent.build()` - - `InvocationProgressEvent.build()` - - `InvocationCompleteEvent.build()` - - `InvocationErrorEvent.build()` - - `QueueItemStatusChangedEvent.build()` - -### 2. Batch Field Values Privacy -**Problem:** Users can see batch field values from generation processes launched by other users. - -**Solution:** Implemented field value sanitization at the API level: - -#### API Router Changes (`invokeai/app/api/routers/session_queue.py`): -- Created `sanitize_queue_item_for_user()` helper function - - Clears `field_values` for non-admin users viewing other users' items - - Admins and item owners can see all field values -- Updated endpoints to require authentication and sanitize responses: - - `list_all_queue_items()` - Added `CurrentUser` dependency - - `get_queue_items_by_item_ids()` - Added `CurrentUser` dependency - - `get_queue_item()` - Added `CurrentUser` dependency - -### 3. Queue Updates Across Browser Windows -**Problem:** When the job queue tab is open in multiple browsers and a generation is begun in one browser window, the queue does not update in the other window. - -**Status:** This issue is likely resolved by the socket authentication and event filtering changes. The existing socket subscription mechanism (`subscribe_queue` event) already supports multiple connections per user. Testing is required to confirm this works correctly with the new authentication flow. - -### 4. User Information Display -**Problem:** Queue table lacks user identification, making it difficult to know who launched which job. - -**Solution:** Added user information to queue items and UI: - -#### Database Layer (`invokeai/app/services/session_queue/session_queue_sqlite.py`): -- Updated SQL queries to JOIN with `users` table -- Modified methods to fetch user information: - - `get_queue_item()` - Now selects `display_name` and `email` from users table - - `dequeue()` - Includes user info - - `get_next()` - Includes user info - - `get_current()` - Includes user info - - `list_all_queue_items()` - Includes user info - -#### Data Model Changes (`invokeai/app/services/session_queue/session_queue_common.py`): -- Added optional fields to `SessionQueueItem`: - - `user_display_name: Optional[str]` - Display name from users table - - `user_email: Optional[str]` - Email from users table - - Note: `user_id` field already existed from Migration 25 - -#### Frontend UI Changes: -- **Constants** (`constants.ts`): Added `user: '8rem'` column width -- **Header** (`QueueListHeader.tsx`): Added "User" column header -- **Item Component** (`QueueItemComponent.tsx`): - - Added logic to display user information (display_name → email → user_id) - - Added user column to queue item row - - Added tooltip with full username on hover - - Added "Hidden for privacy" message when field_values are null for non-owned items -- **Localization** (`en.json`): Added translations: - - `"user": "User"` - - `"fieldValuesHidden": "Hidden for privacy"` - -## Security Considerations - -### Token Verification -- Tokens are verified using the existing `verify_token()` function from `invokeai.app.services.auth.token_service` -- Invalid or missing tokens default to "system" user with non-admin privileges -- Socket connections without valid tokens are still accepted for backward compatibility but have limited access - -### Data Privacy -- Field values are only visible to: - - The user who created the queue item - - Admin users -- Non-admin users viewing other users' queue items see "Hidden for privacy" instead of field values - -### Admin Privileges -- Admin users can see all queue events and field values across all users -- Admin status is determined from the JWT token's `is_admin` field - -## Migration Notes - -No database migration is required. The changes leverage: -- Existing `user_id` column in `session_queue` table (added in Migration 25) -- Existing `users` table (added in Migration 25) -- SQL LEFT JOINs to fetch user information (gracefully handles missing user records) - -## Testing Requirements - -### Backend Testing -1. **Socket Authentication:** - - Verify valid tokens are accepted and user context is stored - - Verify invalid tokens default to system user - - Verify expired tokens are rejected - -2. **Event Filtering:** - - User A should only receive events for their own queue items - - Admin users should receive all events - - Non-admin users should not receive events from other users - -3. **Field Value Sanitization:** - - Non-admin users should see null field_values for other users' items - - Admins should see all field values - - Users should see their own field values - -### Frontend Testing -1. **UI Display:** - - User column should display in queue list - - Display name should be shown when available - - Email should be shown as fallback when display name is missing - - User ID should be shown when both display name and email are missing - - Tooltip should show full username on hover - -2. **Field Values Display:** - - "Hidden for privacy" message should appear when viewing other users' items - - Own items should show field values normally - -3. **Multi-Browser Testing:** - - Open queue tab in two browsers with different users - - Start generation in one browser - - Verify other browser doesn't see the preview/progress - - Verify admin user can see all generations - -### Integration Testing -1. Multi-user scenarios with simultaneous generations -2. Queue updates across multiple browser windows -3. Admin vs. non-admin privilege differentiation -4. Socket reconnection handling - -## Known Limitations - -1. **TypeScript Types:** - - The OpenAPI schema needs to be regenerated to include new fields - - Run: `cd invokeai/frontend/web && python ../../../scripts/generate_openapi_schema.py | pnpm typegen` - -2. **Backward Compatibility:** - - System user ("system") entries will not have display name or email - - Existing queue items from before Migration 25 will have user_id="system" - -3. **Socket.IO Session Storage:** - - Socket.IO's in-memory session storage may not persist across server restarts - - Consider implementing persistent session storage if needed for production - -## Future Enhancements - -1. Add user filtering to queue list (show only my items vs. all items) -2. Add permission system for queue management operations (cancel, retry, delete) -3. Implement queue item ownership transfer for administrative purposes -4. Add audit logging for queue operations with user attribution -5. Consider implementing user-specific queue limits or quotas diff --git a/WAN_2_2_IMPLEMENTATION.md b/WAN_2_2_IMPLEMENTATION.md deleted file mode 100644 index 1ef71daaa85..00000000000 --- a/WAN_2_2_IMPLEMENTATION.md +++ /dev/null @@ -1,561 +0,0 @@ -# Wan 2.2 Image Generation — Implementation Plan - -**Branch:** `lstein/feature/wan-image-2-2` -**Status:** Planning -**Owner:** Lincoln Stein - -## 0. Naming and Ground Rules - -- New base: `BaseModelType.Wan = "wan"` (single base for both A14B and TI2V-5B; variants distinguish them). -- Backend module path: `invokeai/backend/wan/` (mirrors `invokeai/backend/anima/`, `invokeai/backend/flux/`). -- Invocations: prefix `wan_*` (e.g. `wan_model_loader`, `wan_text_encoder`, `wan_denoise`, `wan_lora_loader`, `wan_image_to_latents`, `wan_latents_to_image`, `wan_controlnet`, `wan_ref_image`). -- Submodel layout (per Diffusers `WanPipeline` / `WanImageToVideoPipeline`): `transformer/` (A14B has both `transformer/` and `transformer_2/`), `text_encoder/` (UMT5-XXL), `tokenizer/`, `vae/`, `scheduler/`. -- Diffusers 0.37.0 already in `pyproject.toml` and exposes `WanPipeline`, `WanImageToVideoPipeline`, `WanTransformer3DModel`, `AutoencoderKLWan`. **No diffusers bump required.** - -## 1. Model Architecture Reality Check (verified against Diffusers 0.37.0) - -These shape and signature facts shape every later design decision: - -- `WanTransformer3DModel.__init__` defaults: `patch_size=(1,2,2)`, `text_dim=4096` (UMT5-XXL hidden), `in_channels=16`, `num_layers=40`, `num_attention_heads=40`, `attention_head_dim=128`. So a `text_dim` of 4096 is the strongest UMT5-XXL fingerprint. -- `WanTransformer3DModel.forward(hidden_states, timestep, encoder_hidden_states, encoder_hidden_states_image=None, ...)` — text via `encoder_hidden_states`, optional CLIP image embedding via `encoder_hidden_states_image` (this is the I2V path; we will not feed it for pure T2I but **will** for "reference image at frame 1"). -- `WanPipeline.__call__(prompt, ..., num_frames, guidance_scale, guidance_scale_2, ...)` — Diffusers already handles the two-expert swap when `transformer_2` is loaded; `guidance_scale` is for the high-noise expert and `guidance_scale_2` is for the low-noise expert. -- `AutoencoderKLWan.__init__` default: `z_dim=16`, `scale_factor_temporal=4`, `scale_factor_spatial=8`. **Standard Wan VAE used by A14B.** -- TI2V-5B uses a larger Wan2.2-VAE with `z_dim=48`. Latent channels are the strongest discriminator on disk. -- For `num_frames=1`, the temporal patch dimension collapses, but Wan still expects `[B, C, T=1, H, W]` 5D tensors. Latents-to-image will need to squeeze T just like Anima already does. -- A14B carries **two transformers** (high-noise + low-noise expert), shipped as separate `transformer/` and `transformer_2/` subfolders. Each is ~14B params — drives every VRAM and quantization decision. - -## 2. Phasing Summary - -| Phase | Goal | Independent? | -|---|---|---| -| 0 | Probe + taxonomy + base type | foundational (gate for all others) | -| 1 | Diffusers-format MVP T2I (TI2V-5B first) | depends on 0 | -| 2 | A14B dual-expert loader + denoise hooks + **Low VRAM mode** | depends on 1 | -| 3 | Standalone VAE + UMT5-XXL encoder configs | depends on 0; can run parallel to 1/2 | -| 4 | GGUF transformer (single-file) — both experts | depends on 2, 3 | -| 5 | LoRA (single + dual-expert pairing) | depends on 2 | -| 6 | ControlNet | depends on 2 | -| 7 | Reference image (frame-1 I2V conditioning) | depends on 2 | -| 8 | Inpaint | depends on 2 (uses `RectifiedFlowInpaintExtension`) | -| 9 | Frontend wiring (model picker, params slice, graph builder) | depends on 1 minimum | -| 10 | Starter models, docs | last | - -Phases 5–8 can all run in parallel after Phase 2 lands. Phase 4 is the largest single unit of work. - ---- - -## VRAM Targets and the Low VRAM Mode - -Dev hardware: 16 GB VRAM card. Most InvokeAI users are at 16 GB or below, so the low-VRAM path is mandatory regardless. - -| Config | Active VRAM (transformer only) | Verdict | -|---|---|---| -| TI2V-5B @ bf16 | ~10 GB | Comfortable native fit | -| A14B @ bf16 (one expert resident) | ~28 GB per expert | Won't fit; needs CPU offload | -| A14B @ Q8 GGUF (one expert) | ~14 GB | Tight; possible with offload of encoder/VAE | -| A14B @ Q4_K_M GGUF (one expert) | ~7 GB | Comfortable; realistic 16 GB path | - -UMT5-XXL is ~5B params (~10 GB bf16) but only encodes once before denoise — it gets moved off GPU before the transformer runs. - -**Low VRAM mode** (revised in Phase 2 implementation): InvokeAI's model cache already exposes partial loading via `InvokeAIAppConfig.enable_partial_loading` (default `True`). When a model exceeds the VRAM budget the cache loads what fits and streams the rest from RAM per forward pass. Combined with `_ExpertSwapper` (which keeps only one expert locked at a time, freeing the other for cache eviction), the A14B-at-bf16-on-16-GB scenario is **already handled by existing infrastructure** — no `low_vram` field on `wan_denoise` is required. Users with less VRAM than the model size get the slow-but-functional path automatically. - ---- - -## Phase 0 — Foundation (taxonomy, base type, FE enum, probe scaffolding) - -### Backend changes - -- `invokeai/backend/model_manager/taxonomy.py` - - Add `Wan = "wan"` to `BaseModelType`. - - Add `class WanVariantType(str, Enum)` with `T2V_A14B = "t2v_a14b"` and `TI2V_5B = "ti2v_5b"`. - - Add `WanVariantType` to the `AnyVariant` union and to `variant_type_adapter`. -- `invokeai/backend/model_manager/configs/main.py` - - Add `MainModelDefaultSettings.from_base(BaseModelType.Wan, variant=...)`: A14B → `cls(steps=40, cfg_scale=4.0, width=1024, height=1024)`; TI2V-5B → `cls(steps=30, cfg_scale=5.0, width=1024, height=1024)`. Tune later. -- `invokeai/app/util/step_callback.py` - - Add `BaseModelType.Wan` branch. 16-channel projection matrix for A14B; TI2V-5B's 48-channel preview is a TODO (non-blocking). -- `invokeai/app/services/shared/sqlite_migrator/migrations/migration_NN.py` - - New migration to widen any base-model enum constraint (mirror Anima's `migration_26.py`). Bump `model_records_schema_version`. - -### Frontend changes - -- `invokeai/frontend/web/src/features/nodes/types/common.ts` — add `'wan'` to `zBaseModelType`/`zMainModelBase`; add `zWanVariantType`; include in `zAnyModelVariant`. -- `invokeai/frontend/web/src/features/parameters/types/constants.ts` — add `wan` to `CLIP_SKIP_MAP` with `maxClip=0`. -- `invokeai/frontend/web/src/features/nodes/types/constants.ts` — add `WanMainModelField` colour entry. - -### Decisions - -- **One base for both, or split?** One base (`Wan`) with two variants. They share text encoder (UMT5-XXL) and pipeline ergonomics. Splitting would double FE selectors for marginal gain. -- **Naming**: prefer `wan` over `wan-image` — Wan 2.3 is coming. - -### Test surface - -- `tests/backend/model_manager/configs/test_main_config.py` — migration adds new enum, existing rows still validate. - ---- - -## Phase 1 — Diffusers Pipeline MVP (TI2V-5B, T2I single-frame) - -Start with TI2V-5B because it's smaller (single transformer ~5B, fits ~16 GB), avoids the dual-expert complication, and validates the encoder/VAE/denoise path before adding the MoE layer. - -### Probe / config - -- `invokeai/backend/model_manager/configs/main.py` - - Add `Main_Diffusers_Wan_Config(Diffusers_Config_Base, Main_Config_Base, Config_Base)`: - - `base: Literal[BaseModelType.Wan]`, `variant: WanVariantType`. - - `from_model_on_disk` accepts class names `{"WanPipeline", "WanImageToVideoPipeline", "WanTransformer3DModel"}`. - - Variant detection: load `transformer/config.json`; if `in_channels` indicates 48-ch latents → TI2V-5B; if 16-ch and a sibling `transformer_2/` exists → A14B. Filename heuristic fallback. - - `has_dual_expert: bool` field set at probe time. -- `invokeai/backend/model_manager/configs/factory.py` — add `Main_Diffusers_Wan_Config` to `AnyModelConfig` union. - -### Loader - -- `invokeai/backend/model_manager/load/model_loaders/wan.py` (new). Mirror `qwen_image.py`. Initial scope: TI2V-5B only. - - Transformer: `WanTransformer3DModel.from_pretrained(model_path / "transformer", torch_dtype=bfloat16)`. - - VAE: `AutoencoderKLWan.from_pretrained(model_path / "vae", torch_dtype=bfloat16)`. - - Text encoder: standard `T5EncoderModel` / `T5TokenizerFast` from `text_encoder/` and `tokenizer/`. **Verify `model_type` in config.json — if `umt5`, use `UMT5EncoderModel` from transformers.** - -### Invocation nodes (TI2V-5B only) - -- `wan_model_loader.py` — outputs `transformer: TransformerField`, `vae: VAEField`, `text_encoder: WanTextEncoderField`. -- `model.py` — add `class WanTextEncoderField(BaseModel)` with `tokenizer`, `text_encoder`, `loras`. -- `wan_text_encoder.py` — runs UMT5-XXL, returns `WanConditioningField`. Output `WanConditioning` dataclass: `prompt_embeds: [seq_len, 4096]` + `prompt_attention_mask`. Add `WanConditioningInfo` to `invokeai/backend/stable_diffusion/diffusion/conditioning_data.py`. -- `fields.py` — add `WanConditioningField` and `wan_model` field-description string. -- `wan_image_to_latents.py` — VAE encode, mirroring `qwen_image_image_to_latents.py`. Wan VAE expects 5D `[B,3,1,H,W]`. -- `wan_latents_to_image.py` — VAE decode, squeeze T. -- `wan_denoise.py` — heart of the work for this phase. - -### Denoise loop design - -**Decision: bypass `WanPipeline.__call__` and drive the loop ourselves (Option A).** Same as every other InvokeAI backend — keeps LoRA / ControlNet / inpaint plumbing consistent. - -For Phase 1 (single transformer): -- `invokeai/backend/wan/sampling_utils.py` — `get_noise(...)` returning 5D `[1, z_dim, 1, H/8, W/8]`; a `WanScheduler` (start with `FlowMatchEulerDiscreteScheduler` from Diffusers). -- Pseudocode: - ```python - latents = get_noise(...) or noised init - for t in timesteps: - noise_pred_cond = transformer(latents, t, prompt_embeds, ...) - if cfg: - noise_pred_uncond = transformer(latents, t, neg_embeds, ...) - noise_pred = noise_pred_uncond + scale * (cond - uncond) - latents = scheduler.step(noise_pred, t, latents) - step_callback(...) - return latents - ``` -- Reuse `RectifiedFlowInpaintExtension` from `invokeai.backend.rectified_flow.rectified_flow_inpaint_extension`. - -### Open questions - -- Does `WanPipeline` use `FlowMatchEulerDiscreteScheduler`? Confirm against `Wan-AI/Wan2.2-TI2V-5B-Diffusers/scheduler/scheduler_config.json`. -- New `WanT5EncoderConfig` rather than reuse of `T5Encoder_T5Encoder_Config`? **Yes** — UMT5-XXL is not bit-compatible with T5-XXL. See Phase 3. -- Does `WanTransformer3DModel` accept attention mask through `attention_kwargs`? - -### Test surface - -- `tests/app/invocations/test_wan_text_encoder.py` — output shape sanity. -- `tests/app/invocations/test_wan_denoise.py` (slow, gated by `INVOKEAI_HEAVY_TESTS=1`) — 4-step denoise on TI2V-5B at 256x256, assert non-NaN. -- `tests/backend/model_manager/configs/test_wan_config.py` — variant detection. - -### Files touched in Phase 1 - -- `invokeai/backend/model_manager/taxonomy.py` -- `invokeai/backend/model_manager/configs/main.py` -- `invokeai/backend/model_manager/configs/factory.py` -- `invokeai/backend/model_manager/load/model_loaders/wan.py` (new) -- `invokeai/app/invocations/wan_model_loader.py` (new) -- `invokeai/app/invocations/wan_text_encoder.py` (new) -- `invokeai/app/invocations/wan_denoise.py` (new) -- `invokeai/app/invocations/wan_image_to_latents.py`, `wan_latents_to_image.py` (new) -- `invokeai/backend/wan/__init__.py`, `sampling_utils.py`, `conditioning_data.py` (new) - ---- - -## Phase 2 — Dual-Expert MoE (Wan2.2-T2V-A14B) + Low VRAM Mode - -### MoE detail - -Wan 2.2 A14B runs two `WanTransformer3DModel` instances. `WanPipeline` swaps based on a noise threshold. `boundary_ratio` (default 0.875) lives on the scheduler config — the high-noise expert handles the first 12.5% of denoising, low-noise handles the rest. - -### Loader changes - -- Extend `SubModelType` with `Transformer2 = "transformer_2"`. Cleanest path: each expert is its own cacheable entity, `apply_smart_model_patches` LoRAs each independently, matches Diffusers folder layout. Mirror in FE `common.ts`. - -### TransformerField split - -- New `WanTransformerField`: - ```python - class WanTransformerField(BaseModel): - transformer_high: ModelIdentifierField - transformer_low: ModelIdentifierField | None - loras_high: List[LoRAField] = [] - loras_low: List[LoRAField] = [] - boundary_ratio: float = 0.875 - ``` - In `invokeai/app/invocations/model.py`. Single explicit place where MoE-ness is encoded. -- `wan_model_loader.py` populates both. TI2V-5B leaves `transformer_low` as `None`. - -### Denoise loop changes - -- `wan_denoise.py`: - ```python - with ExitStack() as exit_stack: - _, transformer_high = exit_stack.enter_context(context.models.load(field.transformer_high).model_on_device()) - transformer_low = None - if field.transformer_low is not None: - _, transformer_low = exit_stack.enter_context(context.models.load(field.transformer_low).model_on_device()) - apply_loras(transformer_high, field.loras_high) - if transformer_low: apply_loras(transformer_low, field.loras_low) - - for i, t in enumerate(timesteps): - model = transformer_high - if transformer_low is not None and (t / t_max) < boundary_ratio: - model = transformer_low - noise_pred = model(...) - # ... - ``` - -### VRAM strategy (default mode) - -- Both experts in **system RAM** (~28 GB at bf16; cheap in 2026). -- Only the active expert on **GPU**. Boundary crossing once per denoise → ~2s CPU↔GPU transfer overhead. -- Implementation: re-enter `model_on_device()` for the other expert after boundary crossing. - -### Low VRAM mode (new — needed for 16 GB dev card and most users) - -- New `low_vram: bool` field on `wan_denoise` (also a global setting). -- Mode A (default): RAM-resident, GPU-juggle on boundary as above. -- Mode B (low VRAM): wrap each transformer with `enable_model_cpu_offload()` semantics — model stays on CPU, individual layers move to GPU on forward call. Slow (~minutes/step at bf16, but seconds/step at Q4 GGUF). Let users render even when full active-expert won't fit. -- Mode B also useful for keeping the text encoder CPU-resident the whole time on tight VRAM. - -### Dual-expert LoRA pairing - -- Community releases ship paired files: `xxx_high_noise.safetensors` + `xxx_low_noise.safetensors`. -- New `wan_lora_loader` accepts either single LoRA (auto-applied to both, with quality warning) or explicit `lora_high` + `lora_low` pair. -- Probe identifies each as `LoRA_LyCORIS_Wan_Config(base=Wan)` with optional `expert: Literal["high","low"] | None` from filename heuristic (`"high_noise"`/`"low_noise"` substring). - -### Files touched in Phase 2 - -- `invokeai/backend/model_manager/taxonomy.py` (add `Transformer2`) -- `invokeai/backend/model_manager/load/model_loaders/wan.py` -- `invokeai/app/invocations/model.py` (add `WanTransformerField`) -- `invokeai/app/invocations/wan_model_loader.py` (extend for dual) -- `invokeai/app/invocations/wan_denoise.py` (MoE swap + low VRAM mode) -- `invokeai/app/invocations/wan_lora_loader.py` -- `invokeai/frontend/web/src/features/nodes/types/common.ts` (Transformer2) - -### Open questions - -- `boundary_ratio` units in actual `scheduler_config.json` — timestep fraction vs sigma threshold? Read source of truth from disk. -- Expose `boundary_ratio` as advanced UI control? **Yes** — useful for experimentation, default from config. -- Expose `cfg_scale_low_noise` separately from `cfg_scale`? **Yes** as advanced override; default both to same value. - -### Test surface - -- Mock dual-expert load + boundary crossing: ensure correct expert called at each step. Fake transformer that records calls. -- Low VRAM mode smoke test against TI2V-5B (single-expert), confirm output matches default mode. - ---- - -## Phase 3 — Standalone VAE + UMT5-XXL Encoder Configs - -Makes the GGUF flow possible by allowing users to install only encoder + VAE + quantized transformer. - -### VAE configs - -- `invokeai/backend/model_manager/configs/vae.py` - - `_is_wan_vae(state_dict)`: 5D conv weights and `decoder.conv_in.weight.shape[1] in {16, 48}`. - - `VAE_Checkpoint_Wan_Config(Checkpoint_Config_Base, Config_Base)` with `base=Wan`, `latent_channels: Literal[16, 48]`. Detect via `state_dict["decoder.conv_in.weight"].shape[1]`. - - Update `_validate_looks_like_vae` to exclude Wan VAEs (mirror Qwen Image / FLUX.2 exclusion at lines 113-118). - - `VAE_Diffusers_Wan_Config` for diffusers-format Wan VAE (`AutoencoderKLWan`). -- `factory.py` — add both new VAE configs to `AnyModelConfig`. - -### VAE loader - -- `wan.py` — register `(base=Wan, type=VAE, format=Checkpoint)` and `format=Diffusers`. -- For checkpoint: build `AutoencoderKLWan(z_dim=...)` based on detected latent channels, then `model.load_state_dict(sd, assign=True)`. **VAE in fp16 is broken — use bf16** (FluxVAELoader pattern). - -### UMT5-XXL encoder - -- `invokeai/backend/model_manager/configs/wan_t5_encoder.py` (new) — `WanT5Encoder_Diffusers_Config` and `WanT5Encoder_Checkpoint_Config`. -- New config class **rather than reuse** of `T5Encoder_T5Encoder_Config`: - - UMT5-XXL has `model_type: "umt5"` in transformers. - - Different vocabulary — InvokeAI shouldn't let users wire a FLUX T5 into the Wan slot. -- New `ModelType.WanT5Encoder = "wan_t5_encoder"` and `ModelFormat.WanT5Encoder = "wan_t5_encoder"`. Add to taxonomy + FE enum. - -### Standalone-encoder loader - -- New class in `wan.py`: `(base=Any, type=WanT5Encoder, format=...)`. Loads `UMT5EncoderModel` for TextEncoder, `T5TokenizerFast` for Tokenizer. Mirror `T5EncoderLoader` in `flux.py:426-505`. - -### Component-source loader pattern - -- `wan_model_loader.py` follows `qwen_image_model_loader.py` pattern: optional standalone `vae_model` and `wan_t5_encoder_model` inputs override main model's submodels. Required when main model is single-file GGUF. - -### Files touched in Phase 3 - -- `invokeai/backend/model_manager/configs/vae.py` -- `invokeai/backend/model_manager/configs/wan_t5_encoder.py` (new) -- `invokeai/backend/model_manager/configs/factory.py` -- `invokeai/backend/model_manager/load/model_loaders/wan.py` -- `invokeai/backend/model_manager/taxonomy.py` -- FE: `isWanVAEModelConfig`, `isWanT5EncoderModelConfig` type guards in `services/api/types.ts`; `useWanVAEModels`, `useWanT5EncoderModels` hooks in `services/api/hooks/modelsByType.ts`. - -### Open questions - -- A14B and TI2V-5B ship the same UMT5-XXL `text_encoder/`? Verify; if yes, one encoder config covers both. - ---- - -## Phase 4 — GGUF Quantization for Both Experts - -Highest user impact: brings Wan 2.2 A14B onto consumer hardware. - -### Probe / config - -- `invokeai/backend/model_manager/configs/main.py` - - `Main_GGUF_Wan_Config(Checkpoint_Config_Base, Main_Config_Base, Config_Base)` with `base=Wan`, `format=GGUFQuantized`, `variant: WanVariantType`, `expert: Literal["high","low","none"] = "none"`. - - Detection: GGML tensors + Wan-specific keys (`blocks.0.attn1.to_q.weight`, `attn2.to_k.weight` shape `[head_dim*heads, 4096]` for UMT5 cross-attn). - - Expert from filename: `"high_noise"` / `"low_noise"` substring; fall back to `"none"`. **User must confirm** when ambiguous. - -### Loader - -- `wan.py` — `(base=Wan, type=Main, format=GGUFQuantized)`. Mirror `QwenImageGGUFCheckpointModel`: - 1. `gguf_sd_loader(model_path, compute_dtype=bfloat16)` - 2. Strip ComfyUI prefixes (`model.diffusion_model.`, `diffusion_model.`). - 3. Auto-detect arch (count `blocks.X.` keys → `num_layers`; `attn1.to_q.weight.shape[0]` → hidden dim). - 4. `with accelerate.init_empty_weights(): model = WanTransformer3DModel(**inferred_config)` - 5. `model.load_state_dict(sd, strict=False, assign=True)`. -- A14B's two GGUFs: same registration handles both — file alone is the unit, called twice by `wan_model_loader` invocation. - -### Pairing in the model loader invocation - -- UI sketch: - ``` - Transformer (High Noise) [GGUF or Diffusers] - Transformer (Low Noise) [GGUF or Diffusers, optional — empty for TI2V-5B] - Component Source [Diffusers, optional — for VAE/encoder] - Standalone VAE [optional] - Standalone Wan T5 Encoder [optional] - Low VRAM mode [bool] - ``` -- Low Noise field hidden on FE when High Noise variant is TI2V-5B. - -### Files touched in Phase 4 - -- `invokeai/backend/model_manager/configs/main.py` -- `invokeai/backend/model_manager/configs/factory.py` -- `invokeai/backend/model_manager/load/model_loaders/wan.py` -- `invokeai/app/invocations/wan_model_loader.py` (extend pickers) - -### Open questions - -- Reference GGUFs: `city96/Wan2.2-T2V-A14B-gguf`, `QuantStack/Wan2.2-TI2V-5B-GGUF`. Verify key naming matches Diffusers' `WanTransformer3DModel` exactly. -- If only one of the two A14B experts is GGUF'd, fall back to bf16 for the other (mixed quant within one denoise loop). Loader supports this — each transformer slot has independent format. - ---- - -## Phase 5 — LoRA - -### Probe / config - -- `invokeai/backend/model_manager/configs/lora.py` - - `_is_wan_lora(state_dict)`: keys like `blocks.0.attn1.to_q.lora_A.weight` / `lora_unet_blocks_0_attn1_to_q.lora_down.weight` / `transformer.blocks.0.attn1.to_q.lora_A.weight`. Exclude clashes with Anima (`cross_attn`/`self_attn`) and FLUX (`double_blocks`, `single_blocks`). - - `LoRA_LyCORIS_Wan_Config(LoRA_LyCORIS_Config_Base, Config_Base)` with `base=Wan`, optional `expert: Literal["high","low"] | None`. - - Register in `factory.py`. - -### LoRA conversion - -- `invokeai/backend/patches/lora_conversions/wan_lora_constants.py` (new) — `WAN_LORA_TRANSFORMER_PREFIX = "lora_transformer-"`. -- `invokeai/backend/patches/lora_conversions/wan_lora_conversion_utils.py` (new) — handle three formats: - - **Kohya**: `lora_unet_blocks_X_...` → diffusers `blocks.X....` - - **Diffusers PEFT**: `transformer.blocks.X.attn1.to_q.lora_A.weight` → strip `transformer.` prefix. - - **Native diffusion_model**: `diffusion_model.blocks.X....` → strip prefix. -- Start from `qwen_image_lora_conversion_utils.py` and adjust prefixes/key-renaming. - -### Loader integration - -- `invokeai/backend/model_manager/load/model_loaders/lora.py` — add `BaseModelType.Wan` branch calling `lora_model_from_wan_state_dict(state_dict, alpha=None)`. - -### Invocation node - -- `invokeai/app/invocations/wan_lora_loader.py`: - - Single LoRA mode (default): one picker, auto-applied to both experts. - - Dual LoRA mode: two pickers (high / low). Validates bases are both Wan and at most one of each `expert`. - - Mirrors `AnimaLoRALoaderInvocation` + `AnimaLoRACollectionLoader`. -- Output: `WanLoRALoaderOutput` containing the `WanTransformerField` with updated `loras_high` / `loras_low`. - -### Denoise integration - -- `wan_denoise.py` — when entering each transformer's `model_on_device()` context, apply `LayerPatcher.apply_smart_model_patches(model=transformer_high, patches=loras_high_iter, prefix=WAN_LORA_TRANSFORMER_PREFIX, ...)`. Pattern from `flux_denoise.py:434-443`. - -### Files touched in Phase 5 - -- `invokeai/backend/model_manager/configs/lora.py` -- `invokeai/backend/model_manager/configs/factory.py` -- `invokeai/backend/model_manager/load/model_loaders/lora.py` -- `invokeai/backend/patches/lora_conversions/wan_lora_constants.py` (new) -- `invokeai/backend/patches/lora_conversions/wan_lora_conversion_utils.py` (new) -- `invokeai/app/invocations/wan_lora_loader.py` (new) -- `invokeai/app/invocations/wan_denoise.py` - ---- - -## Phase 6 — ControlNet - -Wan ControlNet ecosystem **less mature** than FLUX. Common community models target Wan2.1, with Wan2.2 ports trickling out. Treat with thrash risk. - -### Approach - -- `invokeai/backend/wan/controlnet/` mirroring `invokeai/backend/flux/controlnet/`. Two state-dict identifiers initially: - - **InstantX-style**: `controlnet_x_embedder.` / `controlnet_blocks.` + `blocks.X.attn1.*` transformer keys. - - **Diffusers Wan ControlNet** (if/when one exists): `WanControlNetModel`-style. -- Configs: `ControlNet_Checkpoint_Wan_Config`, `ControlNet_Diffusers_Wan_Config` in `invokeai/backend/model_manager/configs/controlnet.py`. -- Loader: extend `wan.py`. -- Extension: `invokeai/backend/wan/extensions/wan_controlnet_extension.py` — callable taking control-image, returning per-block residuals. Pattern from `flux/extensions/instantx_controlnet_extension.py`. -- Invocation: `invokeai/app/invocations/wan_controlnet.py` — defines `WanControlNetField` and picker node. -- Denoise: `wan_denoise.py` accepts `control: WanControlNetField | list[WanControlNetField] | None`. - -### Risks - -- If community ControlNet weights only target one expert, need conditional injection. Defer until reference model in hand. -- ControlNet may want a separate VAE-encoded conditioning image (FLUX denoise pattern). -- **Gate on ecosystem maturity**: ship v1 without ControlNet if Wan2.2-native models aren't ready; add as v2. - ---- - -## Phase 7 — Reference Image (Frame-1 I2V Conditioning) - -Wan 2.2's I2V variant takes an image and produces a video starting from it. At `num_frames=1`, becomes a reference image — analogous to FLUX Kontext. - -### Decision: Path B — CLIP-vision conditioning via `encoder_hidden_states_image` - -`WanTransformer3DModel.forward` accepts `encoder_hidden_states_image: Optional[Tensor]`. I2V pipeline preprocesses the ref image through CLIP-vision and feeds those features. We do the same with stock `CLIPVisionModelWithProjection` (already in `invokeai/backend/model_manager/load/model_loaders/clip_vision.py`). - -Treats ref-image as conditioning rather than a different model. Simpler UI, no extra 30 GB checkpoint to install. Same approach as FLUX Kontext (`invokeai/backend/flux/extensions/kontext_extension.py`). - -### Implementation - -- `invokeai/backend/wan/extensions/wan_ref_image_extension.py` — encodes via CLIP vision, produces `image_embeds` for `encoder_hidden_states_image`. -- `wan_denoise.py` accepts `ref_image: WanRefImageConditioningField | None`. - -### Open questions - -- Wan2.2-T2V-A14B's `transformer/config.json` likely has `image_dim=None` (text-only); I2V variant has `image_dim != None`. **Ref-image path only works on I2V variants.** Either ship I2V as separate variant or detect and reject gracefully. Add `WanVariantType.I2V_A14B = "i2v_a14b"` if shipping. Probe via `transformer/config.json::image_dim`. - ---- - -## Phase 8 — Inpaint - -Inpaint = image-to-image with denoise mask. `RectifiedFlowInpaintExtension` already handles this for Anima and FLUX. Wan's flow-matching scheduler is mathematically identical; reuse should be straightforward. - -### Implementation - -- `wan_denoise.py` accepts `denoise_mask: DenoiseMaskField | None`. -- Reuse `RectifiedFlowInpaintExtension` from `invokeai.backend.rectified_flow.rectified_flow_inpaint_extension`. Anima needed `AnimaInpaintExtension` for shifted timesteps; for Wan, check if the scheduler shift introduces the same issue. If yes, subclass. - -### Files touched in Phase 8 - -- `invokeai/app/invocations/wan_denoise.py` (mask branch) -- Possibly `invokeai/backend/wan/wan_inpaint_extension.py` - ---- - -## Phase 9 — Frontend Wiring - -### Type definitions - -- `invokeai/frontend/web/src/services/api/types.ts` — `isWanMainModelConfig`, `isWanLoRAModelConfig`, `isWanVAEModelConfig`, `isWanT5EncoderModelConfig`, `isWanControlNetModelConfig`. Mirror Anima/Qwen Image at lines 286-322. -- `invokeai/frontend/web/src/services/api/hooks/modelsByType.ts` — `useWanMainModels`, `useWanVAEModels`, `useWanT5EncoderModels`, `useWanLoRAModels`, `useWanControlNetModels`. Mirror lines 105-113. - -### Params slice - -- `invokeai/frontend/web/src/features/controlLayers/store/paramsSlice.ts` - - Selectors: `selectWanVaeModel`, `selectWanT5EncoderModel`, `selectWanScheduler`, `selectWanBoundaryRatio`, `selectWanLowVramMode`. Anima sets the precedent. - - State: `wanVaeModel`, `wanT5EncoderModel`, etc. - -### Graph builder - -- `invokeai/frontend/web/src/features/nodes/util/graph/generation/buildWanGraph.ts` (new). Mirror `buildAnimaGraph.ts`. Differences: - - Two transformer pickers when variant is A14B. - - Dual-expert LoRA collection node. - - Separate VAE / WanT5Encoder pickers (GGUF requires them). - - Low VRAM toggle. -- `invokeai/frontend/web/src/features/nodes/util/graph/generation/addWanLoRAs.ts` (new). -- `invokeai/frontend/web/src/features/nodes/util/graph/types.ts` — register Wan in `GraphBuilderArg`. -- Graph dispatcher (`buildGenerationTabGraph.ts`) — add `case 'wan'`. - -### UI - -- ModelPicker, ControlLayer toolbox iterate over `BaseModelType` so adding `'wan'` should propagate. Audit `ModelPicker.tsx` for hardcoded base lists. - ---- - -## Phase 10 — Starter Models, Migration, Docs - -### Starter models - -- `invokeai/backend/model_manager/starter_models.py` — append `# region Wan` block: - ```python - wan_t5_encoder = StarterModel(name="Wan T5 Encoder (UMT5-XXL)", - base=BaseModelType.Any, source="Wan-AI/Wan2.2-T2V-A14B-Diffusers::text_encoder+tokenizer", - type=ModelType.WanT5Encoder, format=ModelFormat.WanT5Encoder, ...) - wan_vae = StarterModel(name="Wan VAE", - base=BaseModelType.Wan, source="Wan-AI/Wan2.2-T2V-A14B-Diffusers::vae/diffusion_pytorch_model.safetensors", - type=ModelType.VAE, format=ModelFormat.Checkpoint, ...) - wan_vae_2_2 = StarterModel(name="Wan2.2 VAE", - base=BaseModelType.Wan, source="Wan-AI/Wan2.2-TI2V-5B-Diffusers::vae/...", - type=ModelType.VAE, ...) - wan_t2v_a14b = StarterModel(name="Wan 2.2 T2V A14B", - base=BaseModelType.Wan, source="Wan-AI/Wan2.2-T2V-A14B-Diffusers", - type=ModelType.Main, variant=WanVariantType.T2V_A14B, ...) - wan_t2v_a14b_high_q4 = StarterModel(name="Wan 2.2 T2V A14B High Noise (Q4_K_M)", - base=BaseModelType.Wan, - source="https://huggingface.co/city96/Wan2.2-T2V-A14B-gguf/resolve/main/wan2.2-t2v-a14b-high-noise-Q4_K_M.gguf", - ..., dependencies=[wan_t5_encoder, wan_vae]) - wan_t2v_a14b_low_q4 = ... - wan_ti2v_5b = StarterModel(name="Wan 2.2 TI2V 5B", - base=BaseModelType.Wan, source="Wan-AI/Wan2.2-TI2V-5B-Diffusers", - variant=WanVariantType.TI2V_5B, ...) - ``` -- Verify each `source` URL exists before merge. - -### DB migration - -- New `migration_NN.py` only if `BaseModelType` Enum constraint rejects unknown values. Inspect `migration_26.py` (Anima) for pattern. - -### Docs - -- Update `docs/` (model support tables, getting-started for Wan). - ---- - -## Risk Register - -| # | Risk / Unknown | Mitigation | -|---|---|---| -| 1 | `WanPipeline` Option A bypass — enough hooks? | Source confirms `WanTransformer3DModel.forward` is callable directly. Low risk. | -| 2 | Dual-expert VRAM blowup | Default RAM-resident + GPU-juggle on boundary. Low VRAM mode covers tighter cases. GGUF Q4 → ~7 GB/expert. Document expectations. | -| 3 | GGUF availability for both A14B experts | `city96/Wan2.2-T2V-A14B-gguf` advertises both. Verify before Phase 4. Mixed-quant denoise as fallback. | -| 4 | UMT5-XXL vs T5-XXL distinction | Strict probe via `model_type`. Separate `WanT5Encoder` type prevents cross-wiring. | -| 5 | Wan ControlNet ecosystem maturity | Phase 6 may slip — ship v1 without if Wan2.2-native models not ready, ControlNet as v2. | -| 6 | Single-frame inference is OOD | Empirically fine. Document as known property. | -| 7 | Boundary ratio variability | Read from `scheduler/scheduler_config.json::boundary_ratio` per-model. Default 0.875. | -| 8 | TI2V-5B's 48-channel VAE | Probe both 16/48 in `_is_wan_vae`. Denoise loop reads `z_dim` from VAE config, doesn't hardcode. | -| 9 | DB enum widening | Standard migration template (Anima's `migration_26.py`). Low risk. | -| 10 | Diffusers' modular `Wan22Blocks`/`WanModularPipeline` — use it? | No. Modular = extra moving part. Stick to `WanPipeline`/`WanTransformer3DModel`. | -| 11 | FE vitest tests for new base type | Mostly automatic via zod enum; audit `*.test.ts` mentioning `'anima'`. | -| 12 | Step preview latents for Wan | Reuse FLUX 16-channel matrix for A14B. TI2V-5B's 48-channel: degraded preview (slice 16) until proper RGB factors generated via `scripts/generate_vae_linear_approximation.py`. | - ---- - -## Recommended Working Cadence - -1. Phases 0 + 1 (TI2V-5B Diffusers MVP) — one PR, foundational, no user-visible features but unblocks everything. -2. Phase 2 (A14B dual-expert + Low VRAM mode) — second PR, first user-visible feature. -3. Phase 3 (standalone components) — third PR, parallelizable with Phase 2. -4. Phase 4 (GGUF) — fourth PR, the big VRAM win. -5. Phase 5 (LoRA) — fifth PR. -6. Phases 6, 7, 8 in parallel — small targeted PRs. -7. Phase 9 (FE) tracks each backend phase. -8. Phase 10 (starters) gates final release. - -Total: ~4–6 weeks focused work. Schedule risk concentrated on Phase 6 (ControlNet) and Phase 4 (GGUF arch verification). From 945d45572126fa3d07fe6798c0a4df6677a391c3 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Tue, 12 May 2026 06:47:41 -0400 Subject: [PATCH 12/12] chore(backend): ruff --- invokeai/app/invocations/wan_denoise.py | 33 ++++---------- .../app/invocations/wan_image_to_latents.py | 4 +- .../app/invocations/wan_latents_to_image.py | 4 +- invokeai/app/invocations/wan_lora_loader.py | 1 - invokeai/app/invocations/wan_model_loader.py | 13 ++---- .../app/invocations/wan_ref_image_encoder.py | 12 ++--- invokeai/app/invocations/wan_text_encoder.py | 4 +- .../backend/model_manager/configs/factory.py | 2 +- .../backend/model_manager/configs/lora.py | 10 ++--- invokeai/backend/model_manager/configs/vae.py | 3 +- .../model_manager/load/model_loaders/lora.py | 2 +- .../model_manager/load/model_loaders/wan.py | 18 +++----- .../lora_conversions/anima_lora_constants.py | 4 +- .../lora_conversions/wan_lora_constants.py | 6 +-- .../wan_lora_conversion_utils.py | 11 ++--- .../wan/extensions/wan_ref_image_extension.py | 30 +++---------- tests/app/invocations/test_wan_denoise.py | 36 ++++----------- .../invocations/test_wan_expert_swapper.py | 7 ++- tests/app/invocations/test_wan_lora_loader.py | 37 ++++------------ .../configs/test_wan_lora_config.py | 35 +++++---------- .../test_wan_lora_probe_independence.py | 6 +-- .../configs/test_wan_t5_encoder_config.py | 8 +--- .../test_wan_lora_conversion_utils.py | 44 ++++--------------- tests/backend/wan/test_sampling_utils.py | 38 ++++++++++------ 24 files changed, 114 insertions(+), 254 deletions(-) diff --git a/invokeai/app/invocations/wan_denoise.py b/invokeai/app/invocations/wan_denoise.py index ca15ddd373a..7c5ae551234 100644 --- a/invokeai/app/invocations/wan_denoise.py +++ b/invokeai/app/invocations/wan_denoise.py @@ -66,9 +66,7 @@ def _resolve_variant(context: InvocationContext, transformer_field: WanTransform config = context.models.get_config(transformer_field.transformer) variant = getattr(config, "variant", None) if not isinstance(variant, WanVariantType): - raise ValueError( - f"Could not determine Wan variant from model {config.name!r}: variant is {variant!r}." - ) + raise ValueError(f"Could not determine Wan variant from model {config.name!r}: variant is {variant!r}.") return variant @@ -155,12 +153,8 @@ def get(self, label: str) -> Any: # Apply LoRA patches for this expert. GGUF transformers need sidecar # patching since direct patching of GGMLTensors isn't supported. - lora_factory = ( - self._high_lora_factory if label == self.HIGH else self._low_lora_factory - ) - is_quantized = ( - self._high_is_quantized if label == self.HIGH else self._low_is_quantized - ) + lora_factory = self._high_lora_factory if label == self.HIGH else self._low_lora_factory + is_quantized = self._high_is_quantized if label == self.HIGH else self._low_is_quantized lora_ctx: Any | None = None if lora_factory is not None: lora_ctx = LayerPatcher.apply_smart_model_patches( @@ -294,9 +288,7 @@ def _run_diffusion(self, context: InvocationContext) -> torch.Tensor: scheduler = self._build_scheduler(context, device) - pos_cond = self._load_conditioning( - context, self.positive_conditioning, device=device, dtype=inference_dtype - ) + pos_cond = self._load_conditioning(context, self.positive_conditioning, device=device, dtype=inference_dtype) do_cfg = self.guidance_scale != 1.0 and self.negative_conditioning is not None neg_cond: WanConditioningInfo | None = None if do_cfg: @@ -322,9 +314,8 @@ def _run_diffusion(self, context: InvocationContext) -> torch.Tensor: f"Reference-image dimensions ({self.ref_image.width}x{self.ref_image.height}) must " f"match denoise dimensions ({self.width}x{self.height})." ) - ref_condition = ( - context.tensors.load(self.ref_image.condition_tensor_name) - .to(device=device, dtype=inference_dtype) + ref_condition = context.tensors.load(self.ref_image.condition_tensor_name).to( + device=device, dtype=inference_dtype ) # Schedule timesteps. set_timesteps populates scheduler.timesteps and @@ -427,9 +418,7 @@ def _run_diffusion(self, context: InvocationContext) -> torch.Tensor: # (default 1000). Diffusers' WanPipeline computes: # boundary_timestep = boundary_ratio * num_train_timesteps num_train_timesteps = int(scheduler.config.num_train_timesteps) - boundary_timestep = ( - self.transformer.boundary_ratio * num_train_timesteps if low_model is not None else None - ) + boundary_timestep = self.transformer.boundary_ratio * num_train_timesteps if low_model is not None else None # LoRA wiring. The high-noise expert uses ``transformer.loras``; the # low-noise expert uses ``transformer.loras_low_noise``, falling back @@ -440,9 +429,7 @@ def _run_diffusion(self, context: InvocationContext) -> torch.Tensor: low_loras = self.transformer.loras_low_noise or self.transformer.loras high_config = context.models.get_config(high_model) high_is_quantized = high_config.format == ModelFormat.GGUFQuantized - low_is_quantized = ( - low_config.format == ModelFormat.GGUFQuantized if low_config is not None else False - ) + low_is_quantized = low_config.format == ModelFormat.GGUFQuantized if low_config is not None else False def high_lora_factory() -> Iterable[Tuple[ModelPatchRaw, float]]: return self._lora_iterator(context, high_loras) @@ -515,9 +502,7 @@ def low_lora_factory() -> Iterable[Tuple[ModelPatchRaw, float]]: if inpaint_extension is not None: sigma_prev = float(sigmas[step_idx + 1]) latents_4d = latents.squeeze(2) - latents_4d = inpaint_extension.merge_intermediate_latents_with_init_latents( - latents_4d, sigma_prev - ) + latents_4d = inpaint_extension.merge_intermediate_latents_with_init_latents(latents_4d, sigma_prev) latents = latents_4d.unsqueeze(2) step_callback( diff --git a/invokeai/app/invocations/wan_image_to_latents.py b/invokeai/app/invocations/wan_image_to_latents.py index 7903f650e66..d5827110d0a 100644 --- a/invokeai/app/invocations/wan_image_to_latents.py +++ b/invokeai/app/invocations/wan_image_to_latents.py @@ -52,9 +52,7 @@ class WanImageToLatentsInvocation(BaseInvocation, WithMetadata, WithBoard): @staticmethod def vae_encode(vae_info: LoadedModel, image_tensor: torch.Tensor) -> torch.Tensor: if not isinstance(vae_info.model, AutoencoderKLWan): - raise TypeError( - f"Expected AutoencoderKLWan for Wan VAE, got {type(vae_info.model).__name__}." - ) + raise TypeError(f"Expected AutoencoderKLWan for Wan VAE, got {type(vae_info.model).__name__}.") estimated_working_memory = estimate_vae_working_memory_flux( operation="encode", diff --git a/invokeai/app/invocations/wan_latents_to_image.py b/invokeai/app/invocations/wan_latents_to_image.py index 866d1cc8abc..049959646c1 100644 --- a/invokeai/app/invocations/wan_latents_to_image.py +++ b/invokeai/app/invocations/wan_latents_to_image.py @@ -51,9 +51,7 @@ def invoke(self, context: InvocationContext) -> ImageOutput: vae_info = context.models.load(self.vae.vae) if not isinstance(vae_info.model, AutoencoderKLWan): - raise TypeError( - f"Expected AutoencoderKLWan for Wan VAE, got {type(vae_info.model).__name__}." - ) + raise TypeError(f"Expected AutoencoderKLWan for Wan VAE, got {type(vae_info.model).__name__}.") estimated_working_memory = estimate_vae_working_memory_flux( operation="decode", diff --git a/invokeai/app/invocations/wan_lora_loader.py b/invokeai/app/invocations/wan_lora_loader.py index 4cf6e1dfe2b..66034685e00 100644 --- a/invokeai/app/invocations/wan_lora_loader.py +++ b/invokeai/app/invocations/wan_lora_loader.py @@ -12,7 +12,6 @@ from invokeai.app.services.shared.invocation_context import InvocationContext from invokeai.backend.model_manager.taxonomy import BaseModelType, ModelType - # Target option for routing a LoRA to one or both Wan A14B expert lists. # # - ``auto``: read the LoRA config's ``expert`` field (set by the probe / from diff --git a/invokeai/app/invocations/wan_model_loader.py b/invokeai/app/invocations/wan_model_loader.py index dc063d5ba21..a4d986d8aa3 100644 --- a/invokeai/app/invocations/wan_model_loader.py +++ b/invokeai/app/invocations/wan_model_loader.py @@ -136,9 +136,7 @@ def invoke(self, context: InvocationContext) -> WanModelLoaderOutput: if main_is_diffusers: transformer = self.model.model_copy(update={"submodel_type": SubModelType.Transformer}) if getattr(main_config, "has_dual_expert", False): - transformer_low_noise = self.model.model_copy( - update={"submodel_type": SubModelType.Transformer2} - ) + transformer_low_noise = self.model.model_copy(update={"submodel_type": SubModelType.Transformer2}) recorded = getattr(main_config, "boundary_ratio", None) if recorded is not None: boundary_ratio = float(recorded) @@ -153,9 +151,7 @@ def invoke(self, context: InvocationContext) -> WanModelLoaderOutput: f"'Transformer (Low Noise)' must be a GGUF-format Wan model. " f"'{low_config.name}' is in {low_config.format.value} format." ) - low_id = self.transformer_low_noise_model.model_copy( - update={"submodel_type": SubModelType.Transformer} - ) + low_id = self.transformer_low_noise_model.model_copy(update={"submodel_type": SubModelType.Transformer}) low_expert = getattr(low_config, "expert", "none") # Make sure 'transformer' is the high-noise expert and @@ -172,10 +168,7 @@ def invoke(self, context: InvocationContext) -> WanModelLoaderOutput: # A14B without a paired low-noise GGUF will produce degraded # quality (only the high-noise expert runs). Warn but don't # abort — TI2V-5B GGUFs are single-expert and totally fine. - if ( - getattr(main_config, "variant", None) - and main_config.variant.value == "t2v_a14b" - ): + if getattr(main_config, "variant", None) and main_config.variant.value == "t2v_a14b": context.logger.warning( "A14B GGUF main was provided without a paired 'Transformer (Low Noise)'. " "Only the high-noise expert will run; image quality will be reduced." diff --git a/invokeai/app/invocations/wan_ref_image_encoder.py b/invokeai/app/invocations/wan_ref_image_encoder.py index cc1d6c90669..98a609ed99b 100644 --- a/invokeai/app/invocations/wan_ref_image_encoder.py +++ b/invokeai/app/invocations/wan_ref_image_encoder.py @@ -45,9 +45,7 @@ class WanRefImageEncoderInvocation(BaseInvocation): """ image: ImageField = InputField(description="Reference image to condition on.") - vae: VAEField = InputField( - description=FieldDescriptions.vae, input=Input.Connection, title="VAE" - ) + vae: VAEField = InputField(description=FieldDescriptions.vae, input=Input.Connection, title="VAE") # Must match wan_denoise's width/height. multiple_of=16 (not 8) because # Wan's transformer patch_size=(1, 2, 2) needs latent H/W to be even. width: int = InputField( @@ -71,9 +69,7 @@ def invoke(self, context: InvocationContext) -> WanRefImageOutput: with vae_info.model_on_device() as (_, vae): if not isinstance(vae, AutoencoderKLWan): - raise TypeError( - f"Reference-image encoder requires AutoencoderKLWan, got {type(vae).__name__}." - ) + raise TypeError(f"Reference-image encoder requires AutoencoderKLWan, got {type(vae).__name__}.") context.util.signal_progress("VAE-encoding reference image") condition = encode_reference_image_to_condition( image=pil_image, @@ -86,6 +82,4 @@ def invoke(self, context: InvocationContext) -> WanRefImageOutput: condition = condition.detach().to("cpu") name = context.tensors.save(tensor=condition) - return WanRefImageOutput.build( - condition_tensor_name=name, width=self.width, height=self.height - ) + return WanRefImageOutput.build(condition_tensor_name=name, width=self.width, height=self.height) diff --git a/invokeai/app/invocations/wan_text_encoder.py b/invokeai/app/invocations/wan_text_encoder.py index 9cbb15476e0..396819d5434 100644 --- a/invokeai/app/invocations/wan_text_encoder.py +++ b/invokeai/app/invocations/wan_text_encoder.py @@ -56,9 +56,7 @@ def invoke(self, context: InvocationContext) -> WanConditioningOutput: attention_mask = attention_mask.detach().to("cpu") if attention_mask is not None else None conditioning_data = ConditioningFieldData( - conditionings=[ - WanConditioningInfo(prompt_embeds=prompt_embeds, prompt_attention_mask=attention_mask) - ] + conditionings=[WanConditioningInfo(prompt_embeds=prompt_embeds, prompt_attention_mask=attention_mask)] ) conditioning_name = context.conditioning.save(conditioning_data) return WanConditioningOutput.build(conditioning_name) diff --git a/invokeai/backend/model_manager/configs/factory.py b/invokeai/backend/model_manager/configs/factory.py index aae06328782..feedafe6c38 100644 --- a/invokeai/backend/model_manager/configs/factory.py +++ b/invokeai/backend/model_manager/configs/factory.py @@ -105,7 +105,6 @@ ) from invokeai.backend.model_manager.configs.t5_encoder import T5Encoder_BnBLLMint8_Config, T5Encoder_T5Encoder_Config from invokeai.backend.model_manager.configs.text_llm import TextLLM_Diffusers_Config -from invokeai.backend.model_manager.configs.wan_t5_encoder import WanT5Encoder_WanT5Encoder_Config from invokeai.backend.model_manager.configs.textual_inversion import ( TI_File_SD1_Config, TI_File_SD2_Config, @@ -129,6 +128,7 @@ VAE_Diffusers_SDXL_Config, VAE_Diffusers_Wan_Config, ) +from invokeai.backend.model_manager.configs.wan_t5_encoder import WanT5Encoder_WanT5Encoder_Config from invokeai.backend.model_manager.model_on_disk import ModelOnDisk from invokeai.backend.model_manager.taxonomy import ( BaseModelType, diff --git a/invokeai/backend/model_manager/configs/lora.py b/invokeai/backend/model_manager/configs/lora.py index 4659f341de8..d0372ba3f14 100644 --- a/invokeai/backend/model_manager/configs/lora.py +++ b/invokeai/backend/model_manager/configs/lora.py @@ -907,10 +907,7 @@ def _validate_looks_like_lora(cls, mod: ModelOnDisk) -> None: state_dict = mod.load_state_dict() str_keys = [k for k in state_dict.keys() if isinstance(k, str)] - has_cosmos_keys = ( - has_cosmos_dit_kohya_keys_strict(str_keys) - or has_cosmos_dit_peft_keys_strict(str_keys) - ) + has_cosmos_keys = has_cosmos_dit_kohya_keys_strict(str_keys) or has_cosmos_dit_peft_keys_strict(str_keys) # Also check for LoRA/LoKR weight suffixes has_lora_suffix = state_dict_has_any_keys_ending_with( @@ -1009,9 +1006,8 @@ def _get_base_or_raise(cls, mod: ModelOnDisk) -> BaseModelType: state_dict = mod.load_state_dict() str_keys = [k for k in state_dict.keys() if isinstance(k, str)] - if ( - (has_wan_kohya_keys(str_keys) or has_wan_peft_keys(str_keys)) - and not has_non_wan_architecture_keys(str_keys) + if (has_wan_kohya_keys(str_keys) or has_wan_peft_keys(str_keys)) and not has_non_wan_architecture_keys( + str_keys ): return BaseModelType.Wan diff --git a/invokeai/backend/model_manager/configs/vae.py b/invokeai/backend/model_manager/configs/vae.py index 49f272147d6..00b96c3c1ac 100644 --- a/invokeai/backend/model_manager/configs/vae.py +++ b/invokeai/backend/model_manager/configs/vae.py @@ -290,8 +290,7 @@ def from_model_on_disk(cls, mod: ModelOnDisk, override_fields: dict[str, Any]) - latent_channels: int = z_dim if latent_channels == 16 and not _filename_suggests_wan(mod): raise NotAMatchError( - "16-channel AutoencoderKLWan VAE without 'wan' in filename — " - "deferring to Qwen Image VAE config." + "16-channel AutoencoderKLWan VAE without 'wan' in filename — deferring to Qwen Image VAE config." ) explicit = override_fields.pop("latent_channels", None) diff --git a/invokeai/backend/model_manager/load/model_loaders/lora.py b/invokeai/backend/model_manager/load/model_loaders/lora.py index 4e5d8a5d649..a38ad2acd71 100644 --- a/invokeai/backend/model_manager/load/model_loaders/lora.py +++ b/invokeai/backend/model_manager/load/model_loaders/lora.py @@ -22,7 +22,6 @@ SubModelType, ) from invokeai.backend.patches.lora_conversions.anima_lora_conversion_utils import lora_model_from_anima_state_dict -from invokeai.backend.patches.lora_conversions.wan_lora_conversion_utils import lora_model_from_wan_state_dict from invokeai.backend.patches.lora_conversions.flux_aitoolkit_lora_conversion_utils import ( is_state_dict_likely_in_flux_aitoolkit_format, lora_model_from_flux_aitoolkit_state_dict, @@ -63,6 +62,7 @@ ) from invokeai.backend.patches.lora_conversions.sd_lora_conversion_utils import lora_model_from_sd_state_dict from invokeai.backend.patches.lora_conversions.sdxl_lora_conversion_utils import convert_sdxl_keys_to_diffusers_format +from invokeai.backend.patches.lora_conversions.wan_lora_conversion_utils import lora_model_from_wan_state_dict from invokeai.backend.patches.lora_conversions.z_image_lora_conversion_utils import lora_model_from_z_image_state_dict diff --git a/invokeai/backend/model_manager/load/model_loaders/wan.py b/invokeai/backend/model_manager/load/model_loaders/wan.py index 4824d61b8dd..f3bb7de7b61 100644 --- a/invokeai/backend/model_manager/load/model_loaders/wan.py +++ b/invokeai/backend/model_manager/load/model_loaders/wan.py @@ -163,10 +163,7 @@ def _unwrap_unquantized_to_compute_dtype(state_dict: dict) -> dict: """ unwrapped: dict = {} for key, value in state_dict.items(): - if ( - isinstance(value, GGMLTensor) - and value._ggml_quantization_type in TORCH_COMPATIBLE_QTYPES - ): + if isinstance(value, GGMLTensor) and value._ggml_quantization_type in TORCH_COMPATIBLE_QTYPES: # GGMLTensor.get_dequantized_tensor() already casts to compute_dtype. unwrapped[key] = value.get_dequantized_tensor() else: @@ -219,8 +216,7 @@ def _load_from_singlefile(self, config: Main_GGUF_Wan_Config) -> AnyModel: for prefix in ("model.diffusion_model.", "diffusion_model."): if any(isinstance(k, str) and k.startswith(prefix) for k in sd.keys()): sd = { - (k[len(prefix):] if isinstance(k, str) and k.startswith(prefix) else k): v - for k, v in sd.items() + (k[len(prefix) :] if isinstance(k, str) and k.startswith(prefix) else k): v for k, v in sd.items() } break @@ -308,9 +304,7 @@ def _load_from_singlefile(self, config: Main_GGUF_Wan_Config) -> AnyModel: return model -@ModelLoaderRegistry.register( - base=BaseModelType.Any, type=ModelType.WanT5Encoder, format=ModelFormat.WanT5Encoder -) +@ModelLoaderRegistry.register(base=BaseModelType.Any, type=ModelType.WanT5Encoder, format=ModelFormat.WanT5Encoder) class WanT5EncoderLoader(ModelLoader): """Loader for the standalone Wan UMT5-XXL encoder. @@ -348,8 +342,10 @@ def _load_model( # Prefer a sibling tokenizer/ directory; fall back to the encoder dir # itself, which is normal for "flat" downloads. - target = nested_tokenizer if nested_tokenizer.exists() else ( - nested_text_encoder if nested_text_encoder.exists() else root + target = ( + nested_tokenizer + if nested_tokenizer.exists() + else (nested_text_encoder if nested_text_encoder.exists() else root) ) return AutoTokenizer.from_pretrained(str(target), local_files_only=True) diff --git a/invokeai/backend/patches/lora_conversions/anima_lora_constants.py b/invokeai/backend/patches/lora_conversions/anima_lora_constants.py index d3d4ac3bcd0..5a54de82e86 100644 --- a/invokeai/backend/patches/lora_conversions/anima_lora_constants.py +++ b/invokeai/backend/patches/lora_conversions/anima_lora_constants.py @@ -46,9 +46,7 @@ r"(?:cross|self)_attn[._](?:[qkv]_proj|output_proj))" ) -_KOHYA_ANIMA_STRICT_RE = re.compile( - r"lora_unet_(llm_adapter_)?blocks_\d+_" + _COSMOS_DIT_EXCLUSIVE_SUBCOMPONENTS_RE -) +_KOHYA_ANIMA_STRICT_RE = re.compile(r"lora_unet_(llm_adapter_)?blocks_\d+_" + _COSMOS_DIT_EXCLUSIVE_SUBCOMPONENTS_RE) _PEFT_ANIMA_STRICT_RE = re.compile( r"(diffusion_model|transformer|base_model\.model\.transformer)\.blocks\.\d+\." + _COSMOS_DIT_EXCLUSIVE_SUBCOMPONENTS_RE diff --git a/invokeai/backend/patches/lora_conversions/wan_lora_constants.py b/invokeai/backend/patches/lora_conversions/wan_lora_constants.py index b539bcbec91..c7a6859d6f0 100644 --- a/invokeai/backend/patches/lora_conversions/wan_lora_constants.py +++ b/invokeai/backend/patches/lora_conversions/wan_lora_constants.py @@ -73,12 +73,10 @@ # PEFT format: .blocks.. # Prefix may be empty, "transformer.", "diffusion_model.", or "base_model.model.transformer." _PEFT_WAN_DIFFUSERS_RE = re.compile( - r"(?:^|(?:diffusion_model|transformer|base_model\.model\.transformer)\.)blocks\.\d+\." - + _WAN_DIFFUSERS_SUBMODULES + r"(?:^|(?:diffusion_model|transformer|base_model\.model\.transformer)\.)blocks\.\d+\." + _WAN_DIFFUSERS_SUBMODULES ) _PEFT_WAN_NATIVE_RE = re.compile( - r"(?:^|(?:diffusion_model|transformer|base_model\.model\.transformer)\.)blocks\.\d+\." - + _WAN_NATIVE_SUBMODULES + r"(?:^|(?:diffusion_model|transformer|base_model\.model\.transformer)\.)blocks\.\d+\." + _WAN_NATIVE_SUBMODULES ) diff --git a/invokeai/backend/patches/lora_conversions/wan_lora_conversion_utils.py b/invokeai/backend/patches/lora_conversions/wan_lora_conversion_utils.py index 90507923168..5592572b246 100644 --- a/invokeai/backend/patches/lora_conversions/wan_lora_conversion_utils.py +++ b/invokeai/backend/patches/lora_conversions/wan_lora_conversion_utils.py @@ -27,7 +27,6 @@ ) from invokeai.backend.patches.model_patch_raw import ModelPatchRaw - # Kohya layer-name regex: lora_unet_blocks__ _KOHYA_KEY_REGEX = re.compile(r"lora_unet_blocks_(\d+)_(.*)") @@ -105,9 +104,7 @@ ) -def lora_model_from_wan_state_dict( - state_dict: Dict[str, torch.Tensor], alpha: float | None = None -) -> ModelPatchRaw: +def lora_model_from_wan_state_dict(state_dict: Dict[str, torch.Tensor], alpha: float | None = None) -> ModelPatchRaw: """Convert any supported Wan LoRA state dict into a ``ModelPatchRaw``. Detects Kohya vs PEFT layouts and dispatches accordingly. Layer paths in @@ -174,7 +171,7 @@ def _strip_peft_prefix(layer_key: str) -> str: """Strip ``transformer.``, ``diffusion_model.``, ``base_model.model.transformer.`` if present.""" for prefix in _PEFT_PREFIXES_TO_STRIP: if layer_key.startswith(prefix): - return layer_key[len(prefix):] + return layer_key[len(prefix) :] return layer_key @@ -198,9 +195,7 @@ def _native_layer_path_to_diffusers(path: str) -> str | None: return augmented.rstrip(".") -def _normalize_lora_param_names( - layer_dict: dict[str, torch.Tensor], alpha: float | None -) -> dict[str, torch.Tensor]: +def _normalize_lora_param_names(layer_dict: dict[str, torch.Tensor], alpha: float | None) -> dict[str, torch.Tensor]: """Map PEFT-style ``lora_A``/``lora_B`` to ``lora_down``/``lora_up``. Kohya-style ``lora_down``/``lora_up`` pass through unchanged. diff --git a/invokeai/backend/wan/extensions/wan_ref_image_extension.py b/invokeai/backend/wan/extensions/wan_ref_image_extension.py index 0b98053b464..387e779cdd4 100644 --- a/invokeai/backend/wan/extensions/wan_ref_image_extension.py +++ b/invokeai/backend/wan/extensions/wan_ref_image_extension.py @@ -14,25 +14,21 @@ single-frame image generation). """ -from PIL import Image import torch import torchvision.transforms.functional as TF from diffusers.models.autoencoders import AutoencoderKLWan +from PIL import Image # Wan 2.2 VAE temporal scale factor — single frame still consumes a 4-position # slice of the mask tensor, which is why the mask contributes 4 channels. _WAN_VAE_TEMPORAL_SCALE = 4 -def preprocess_reference_image( - image: Image.Image, width: int, height: int -) -> torch.Tensor: +def preprocess_reference_image(image: Image.Image, width: int, height: int) -> torch.Tensor: """Resize a PIL image to (width, height) and return a normalised [-1, 1] tensor of shape ``[1, 3, 1, height, width]`` ready for ``AutoencoderKLWan.encode``.""" if width % 8 != 0 or height % 8 != 0: - raise ValueError( - f"Reference-image dimensions must be multiples of 8 (got {width}x{height})." - ) + raise ValueError(f"Reference-image dimensions must be multiples of 8 (got {width}x{height}).") resized = image.convert("RGB").resize((width, height), Image.LANCZOS) # [0, 1] CHW float tensor. pixel = TF.to_tensor(resized) @@ -61,9 +57,7 @@ def encode_reference_image_to_condition( expects. """ vae_dtype = next(iter(vae.parameters())).dtype - pixel = preprocess_reference_image(image, width=width, height=height).to( - device=device, dtype=vae_dtype - ) + pixel = preprocess_reference_image(image, width=width, height=height).to(device=device, dtype=vae_dtype) with torch.inference_mode(): encoded = vae.encode(pixel, return_dict=False)[0] @@ -72,16 +66,8 @@ def encode_reference_image_to_condition( # Normalise against the VAE's per-channel mean/std, matching diffusers' # ``WanImageToVideoPipeline.prepare_latents`` (lines 440-459). Note the # multiplication by 1/std == division by std. - latents_mean = ( - torch.tensor(vae.config.latents_mean) - .view(1, -1, 1, 1, 1) - .to(latents.device, latents.dtype) - ) - latents_std = ( - torch.tensor(vae.config.latents_std) - .view(1, -1, 1, 1, 1) - .to(latents.device, latents.dtype) - ) + latents_mean = torch.tensor(vae.config.latents_mean).view(1, -1, 1, 1, 1).to(latents.device, latents.dtype) + latents_std = torch.tensor(vae.config.latents_std).view(1, -1, 1, 1, 1).to(latents.device, latents.dtype) latent_condition = (latents - latents_mean) / latents_std latent_condition = latent_condition.to(dtype=dtype) @@ -90,8 +76,6 @@ def encode_reference_image_to_condition( # (i.e., conditioned). After the temporal-scale expansion the mask is # 4 channels of ones at [1, T_lat=1, H_lat, W_lat]. _, _, t_lat, h_lat, w_lat = latent_condition.shape - mask = torch.ones( - 1, _WAN_VAE_TEMPORAL_SCALE, t_lat, h_lat, w_lat, device=device, dtype=dtype - ) + mask = torch.ones(1, _WAN_VAE_TEMPORAL_SCALE, t_lat, h_lat, w_lat, device=device, dtype=dtype) return torch.cat([mask, latent_condition], dim=1) diff --git a/tests/app/invocations/test_wan_denoise.py b/tests/app/invocations/test_wan_denoise.py index 5baa86b5211..47e542d471d 100644 --- a/tests/app/invocations/test_wan_denoise.py +++ b/tests/app/invocations/test_wan_denoise.py @@ -183,9 +183,7 @@ def _force_cpu(monkeypatch): from invokeai.backend.util.devices import TorchDevice monkeypatch.setattr(TorchDevice, "choose_torch_device", classmethod(lambda cls: torch.device("cpu"))) - monkeypatch.setattr( - TorchDevice, "choose_bfloat16_safe_dtype", classmethod(lambda cls, device=None: torch.float32) - ) + monkeypatch.setattr(TorchDevice, "choose_bfloat16_safe_dtype", classmethod(lambda cls, device=None: torch.float32)) def _wan_transformer_field(*, dual: bool = False, boundary_ratio: float = 0.875) -> WanTransformerField: @@ -542,13 +540,9 @@ def test_ref_image_concatenated_to_36_channels(self, fake_model_root: Path) -> N # 4-ch first-frame mask + 16-ch VAE-encoded image latents. # At 64x64 → 8x8 latent spatial dims. condition = torch.zeros(1, 20, 1, 8, 8) - ctx = self._build_ctx_with_condition( - transformer, WanVariantType.I2V_A14B, fake_model_root, condition - ) + ctx = self._build_ctx_with_condition(transformer, WanVariantType.I2V_A14B, fake_model_root, condition) - ref_field = WanRefImageConditioningField( - condition_tensor_name="condition", width=64, height=64 - ) + ref_field = WanRefImageConditioningField(condition_tensor_name="condition", width=64, height=64) inv = self._make_inv_with_ref(ref_field) inv._run_diffusion(ctx) @@ -574,13 +568,9 @@ def test_variant_gate_rejects_ref_image_on_t2v(self, fake_model_root: Path) -> N """T2V_A14B + ref_image must raise — fast-fail before doing any work.""" transformer = _ZeroTransformer() condition = torch.zeros(1, 20, 1, 8, 8) - ctx = self._build_ctx_with_condition( - transformer, WanVariantType.T2V_A14B, fake_model_root, condition - ) + ctx = self._build_ctx_with_condition(transformer, WanVariantType.T2V_A14B, fake_model_root, condition) - ref_field = WanRefImageConditioningField( - condition_tensor_name="condition", width=64, height=64 - ) + ref_field = WanRefImageConditioningField(condition_tensor_name="condition", width=64, height=64) inv = self._make_inv_with_ref(ref_field) with pytest.raises(ValueError, match="only supported by the Wan 2.2 I2V variant"): inv._run_diffusion(ctx) @@ -589,13 +579,9 @@ def test_variant_gate_rejects_ref_image_on_ti2v(self, fake_model_root: Path) -> """TI2V-5B + ref_image must raise — TI2V uses a different image path.""" transformer = _ZeroTransformer() condition = torch.zeros(1, 20, 1, 8, 8) - ctx = self._build_ctx_with_condition( - transformer, WanVariantType.TI2V_5B, fake_model_root, condition - ) + ctx = self._build_ctx_with_condition(transformer, WanVariantType.TI2V_5B, fake_model_root, condition) - ref_field = WanRefImageConditioningField( - condition_tensor_name="condition", width=64, height=64 - ) + ref_field = WanRefImageConditioningField(condition_tensor_name="condition", width=64, height=64) inv = self._make_inv_with_ref(ref_field) with pytest.raises(ValueError, match="only supported by the Wan 2.2 I2V variant"): inv._run_diffusion(ctx) @@ -604,13 +590,9 @@ def test_dim_mismatch_raises(self, fake_model_root: Path) -> None: """If the encoder's width/height differ from denoise's, fail clearly.""" transformer = _ZeroTransformer() condition = torch.zeros(1, 20, 1, 8, 8) - ctx = self._build_ctx_with_condition( - transformer, WanVariantType.I2V_A14B, fake_model_root, condition - ) + ctx = self._build_ctx_with_condition(transformer, WanVariantType.I2V_A14B, fake_model_root, condition) - ref_field = WanRefImageConditioningField( - condition_tensor_name="condition", width=512, height=512 - ) + ref_field = WanRefImageConditioningField(condition_tensor_name="condition", width=512, height=512) inv = self._make_inv_with_ref(ref_field, width=64, height=64) with pytest.raises(ValueError, match="must match denoise dimensions"): inv._run_diffusion(ctx) diff --git a/tests/app/invocations/test_wan_expert_swapper.py b/tests/app/invocations/test_wan_expert_swapper.py index 3c7d5a94fa3..8238065aa65 100644 --- a/tests/app/invocations/test_wan_expert_swapper.py +++ b/tests/app/invocations/test_wan_expert_swapper.py @@ -14,8 +14,8 @@ LoRA factory at each step. """ -from typing import Iterable, Iterator, Tuple -from unittest.mock import MagicMock, patch +from typing import Iterable, Tuple +from unittest.mock import patch import torch import torch.nn as nn @@ -341,8 +341,7 @@ def test_lazy_load_per_swap_not_upfront(): low_lora_factory=_make_factory(log, "LOW"), ) assert ctx.models.load_calls == [], ( - "Swapper must not call models.load until get() is invoked — " - "see issue #7513 for cache-eviction rationale." + "Swapper must not call models.load until get() is invoked — see issue #7513 for cache-eviction rationale." ) # First get(HIGH): loads HIGH only. diff --git a/tests/app/invocations/test_wan_lora_loader.py b/tests/app/invocations/test_wan_lora_loader.py index 0e5f3e94d43..ce250eff86d 100644 --- a/tests/app/invocations/test_wan_lora_loader.py +++ b/tests/app/invocations/test_wan_lora_loader.py @@ -12,7 +12,6 @@ ) from invokeai.backend.model_manager.taxonomy import BaseModelType, ModelType - # -------------------------------------------------------------------------- # _resolve_target — pure function, no mocks needed. # -------------------------------------------------------------------------- @@ -77,27 +76,21 @@ def _make_context(lora_expert: str | None) -> MagicMock: class TestSingleLoaderRouting: def test_auto_untagged_goes_to_both(self): - inv = WanLoRALoaderInvocation( - id="inv-1", lora=_make_lora_field(), transformer=_make_transformer_field() - ) + inv = WanLoRALoaderInvocation(id="inv-1", lora=_make_lora_field(), transformer=_make_transformer_field()) out = inv.invoke(_make_context(lora_expert=None)) assert out.transformer is not None assert len(out.transformer.loras) == 1 assert len(out.transformer.loras_low_noise) == 1 def test_auto_high_tag_goes_to_primary_only(self): - inv = WanLoRALoaderInvocation( - id="inv-1", lora=_make_lora_field(), transformer=_make_transformer_field() - ) + inv = WanLoRALoaderInvocation(id="inv-1", lora=_make_lora_field(), transformer=_make_transformer_field()) out = inv.invoke(_make_context(lora_expert="high")) assert out.transformer is not None assert len(out.transformer.loras) == 1 assert len(out.transformer.loras_low_noise) == 0 def test_auto_low_tag_goes_to_low_only(self): - inv = WanLoRALoaderInvocation( - id="inv-1", lora=_make_lora_field(), transformer=_make_transformer_field() - ) + inv = WanLoRALoaderInvocation(id="inv-1", lora=_make_lora_field(), transformer=_make_transformer_field()) out = inv.invoke(_make_context(lora_expert="low")) assert out.transformer is not None assert len(out.transformer.loras) == 0 @@ -129,9 +122,7 @@ def test_weight_propagates(self): def test_unknown_lora_raises(self): ctx = _make_context(lora_expert=None) ctx.models.exists.return_value = False - inv = WanLoRALoaderInvocation( - id="inv-1", lora=_make_lora_field(), transformer=_make_transformer_field() - ) + inv = WanLoRALoaderInvocation(id="inv-1", lora=_make_lora_field(), transformer=_make_transformer_field()) with pytest.raises(ValueError, match="Unknown lora"): inv.invoke(ctx) @@ -140,9 +131,7 @@ def test_duplicate_on_primary_raises(self): transformer = _make_transformer_field() transformer.loras.append(existing) - inv = WanLoRALoaderInvocation( - id="inv-1", lora=_make_lora_field(key="dup"), transformer=transformer - ) + inv = WanLoRALoaderInvocation(id="inv-1", lora=_make_lora_field(key="dup"), transformer=transformer) with pytest.raises(ValueError, match="already applied to primary"): inv.invoke(_make_context(lora_expert="high")) @@ -151,9 +140,7 @@ def test_duplicate_on_low_noise_raises(self): transformer = _make_transformer_field() transformer.loras_low_noise.append(existing) - inv = WanLoRALoaderInvocation( - id="inv-1", lora=_make_lora_field(key="dup"), transformer=transformer - ) + inv = WanLoRALoaderInvocation(id="inv-1", lora=_make_lora_field(key="dup"), transformer=transformer) with pytest.raises(ValueError, match="already applied to low-noise"): inv.invoke(_make_context(lora_expert="low")) @@ -204,14 +191,10 @@ def get_config(field: ModelIdentifierField) -> MagicMock: def test_rejects_non_wan_base(self): wrong_base_lora = LoRAField( - lora=ModelIdentifierField( - key="not-wan", hash="h", name="n", base=BaseModelType.Flux, type=ModelType.LoRA - ), + lora=ModelIdentifierField(key="not-wan", hash="h", name="n", base=BaseModelType.Flux, type=ModelType.LoRA), weight=0.5, ) - inv = WanLoRACollectionLoader( - id="inv-1", loras=[wrong_base_lora], transformer=_make_transformer_field() - ) + inv = WanLoRACollectionLoader(id="inv-1", loras=[wrong_base_lora], transformer=_make_transformer_field()) ctx = MagicMock() ctx.models.exists.return_value = True with pytest.raises(ValueError, match="not Wan 2.2"): @@ -235,9 +218,7 @@ def test_skips_duplicates(self): assert len(out.transformer.loras) == 1 def test_no_loras_returns_clean_copy(self): - inv = WanLoRACollectionLoader( - id="inv-1", loras=None, transformer=_make_transformer_field() - ) + inv = WanLoRACollectionLoader(id="inv-1", loras=None, transformer=_make_transformer_field()) out = inv.invoke(MagicMock()) assert out.transformer is not None assert len(out.transformer.loras) == 0 diff --git a/tests/backend/model_manager/configs/test_wan_lora_config.py b/tests/backend/model_manager/configs/test_wan_lora_config.py index fb1e125a37e..43f55db06b2 100644 --- a/tests/backend/model_manager/configs/test_wan_lora_config.py +++ b/tests/backend/model_manager/configs/test_wan_lora_config.py @@ -132,24 +132,15 @@ class TestArchitectureGuards: @pytest.mark.parametrize( "label, keys", [ - ("anima_kohya_q_proj", - ["lora_unet_blocks_0_cross_attn_q_proj.lora_down.weight"]), - ("anima_peft_mlp", - ["transformer.blocks.0.mlp.layer1.lora_A.weight"]), - ("anima_peft_adaln", - ["transformer.blocks.0.adaln_modulation.linear.lora_A.weight"]), - ("anima_peft_self_attn_q_proj", - ["transformer.blocks.0.self_attn.q_proj.lora_A.weight"]), - ("qwen_image", - ["transformer_blocks.0.attn.to_q.lora_A.weight"]), - ("flux_kohya_double", - ["lora_unet_double_blocks_0_img_attn_qkv.lora_down.weight"]), - ("flux_kohya_single", - ["lora_unet_single_blocks_0_linear1.lora_down.weight"]), - ("flux_diffusers_single_transformer", - ["transformer.single_transformer_blocks.0.attn.to_q.lora_A.weight"]), - ("z_image", - ["diffusion_model.layers.0.attn.to_q.lora_A.weight"]), + ("anima_kohya_q_proj", ["lora_unet_blocks_0_cross_attn_q_proj.lora_down.weight"]), + ("anima_peft_mlp", ["transformer.blocks.0.mlp.layer1.lora_A.weight"]), + ("anima_peft_adaln", ["transformer.blocks.0.adaln_modulation.linear.lora_A.weight"]), + ("anima_peft_self_attn_q_proj", ["transformer.blocks.0.self_attn.q_proj.lora_A.weight"]), + ("qwen_image", ["transformer_blocks.0.attn.to_q.lora_A.weight"]), + ("flux_kohya_double", ["lora_unet_double_blocks_0_img_attn_qkv.lora_down.weight"]), + ("flux_kohya_single", ["lora_unet_single_blocks_0_linear1.lora_down.weight"]), + ("flux_diffusers_single_transformer", ["transformer.single_transformer_blocks.0.attn.to_q.lora_A.weight"]), + ("z_image", ["diffusion_model.layers.0.attn.to_q.lora_A.weight"]), ], ) def test_non_wan_archs_are_flagged(self, label: str, keys: list[str]): @@ -362,9 +353,7 @@ def test_anima_rejects_wan_native_lora(self): "diffusion_model.blocks.0.cross_attn.k.lora_B.weight": _t((5120, 128)), } with pytest.raises(NotAMatchError, match="Anima LoRA"): - LoRA_LyCORIS_Anima_Config.from_model_on_disk( - _make_mod(f, sd), _overrides(f, "wan-native-lora") - ) + LoRA_LyCORIS_Anima_Config.from_model_on_disk(_make_mod(f, sd), _overrides(f, "wan-native-lora")) def test_wan_rejects_anima_lora(self): """Mirror direction: a real Anima LoRA must not be matched by Wan. @@ -380,6 +369,4 @@ def test_wan_rejects_anima_lora(self): "transformer.blocks.0.mlp.layer1.lora_B.weight": _t((4096, 128)), } with pytest.raises(NotAMatchError, match="Wan LoRA"): - LoRA_LyCORIS_Wan_Config.from_model_on_disk( - _make_mod(f, sd), _overrides(f, "anima-lora") - ) + LoRA_LyCORIS_Wan_Config.from_model_on_disk(_make_mod(f, sd), _overrides(f, "anima-lora")) diff --git a/tests/backend/model_manager/configs/test_wan_lora_probe_independence.py b/tests/backend/model_manager/configs/test_wan_lora_probe_independence.py index 6d54cb5401f..93fdf054639 100644 --- a/tests/backend/model_manager/configs/test_wan_lora_probe_independence.py +++ b/tests/backend/model_manager/configs/test_wan_lora_probe_independence.py @@ -178,8 +178,7 @@ def test_wan_loras_only_match_wan(label: str, sd_builder) -> None: assert wan_ok, f"Wan probe must accept {label}; got {wan_result}" assert wan_result.base == BaseModelType.Wan assert not anima_ok, ( - f"Anima probe must reject {label} so probing is order-independent. " - f"Instead it accepted: {anima_result}" + f"Anima probe must reject {label} so probing is order-independent. Instead it accepted: {anima_result}" ) @@ -203,8 +202,7 @@ def test_anima_loras_only_match_anima(label: str, sd_builder) -> None: assert anima_ok, f"Anima probe must accept {label}; got {anima_result}" assert anima_result.base == BaseModelType.Anima assert not wan_ok, ( - f"Wan probe must reject {label} so probing is order-independent. " - f"Instead it accepted: {wan_result}" + f"Wan probe must reject {label} so probing is order-independent. Instead it accepted: {wan_result}" ) diff --git a/tests/backend/model_manager/configs/test_wan_t5_encoder_config.py b/tests/backend/model_manager/configs/test_wan_t5_encoder_config.py index 9fac29db374..4a5732bc10a 100644 --- a/tests/backend/model_manager/configs/test_wan_t5_encoder_config.py +++ b/tests/backend/model_manager/configs/test_wan_t5_encoder_config.py @@ -71,9 +71,7 @@ def test_rejects_t5(self): _write_encoder_config(root / "config.json", "t5") with pytest.raises(NotAMatchError, match="not 'umt5'"): - WanT5Encoder_WanT5Encoder_Config.from_model_on_disk( - _make_mod(root), _build_overrides(root, "t5-xxl") - ) + WanT5Encoder_WanT5Encoder_Config.from_model_on_disk(_make_mod(root), _build_overrides(root, "t5-xxl")) def test_rejects_full_pipeline(self): """A folder with model_index.json or transformer/ is a full pipeline, not an encoder.""" @@ -95,6 +93,4 @@ def test_rejects_missing_config(self): root.mkdir() with pytest.raises(NotAMatchError, match="no encoder config"): - WanT5Encoder_WanT5Encoder_Config.from_model_on_disk( - _make_mod(root), _build_overrides(root, "empty") - ) + WanT5Encoder_WanT5Encoder_Config.from_model_on_disk(_make_mod(root), _build_overrides(root, "empty")) diff --git a/tests/backend/patches/lora_conversions/test_wan_lora_conversion_utils.py b/tests/backend/patches/lora_conversions/test_wan_lora_conversion_utils.py index 21c79cd5c9a..f9cac4bd61b 100644 --- a/tests/backend/patches/lora_conversions/test_wan_lora_conversion_utils.py +++ b/tests/backend/patches/lora_conversions/test_wan_lora_conversion_utils.py @@ -66,9 +66,7 @@ def test_strip_diffusion_model_prefix(self): assert _strip_peft_prefix("diffusion_model.blocks.0.self_attn.q") == "blocks.0.self_attn.q" def test_strip_base_model_prefix(self): - assert _strip_peft_prefix( - "base_model.model.transformer.blocks.0.attn1.to_q" - ) == "blocks.0.attn1.to_q" + assert _strip_peft_prefix("base_model.model.transformer.blocks.0.attn1.to_q") == "blocks.0.attn1.to_q" def test_no_prefix_unchanged(self): assert _strip_peft_prefix("blocks.0.attn1.to_q") == "blocks.0.attn1.to_q" @@ -99,10 +97,7 @@ class TestLoRAModelFromStateDict: """End-to-end conversion: state dict -> ModelPatchRaw.""" def test_diffusers_peft_with_transformer_prefix(self): - sd = { - f"transformer.blocks.0.attn1.to_q.{k}": v - for k, v in _ab_pair(5120, 5120).items() - } + sd = {f"transformer.blocks.0.attn1.to_q.{k}": v for k, v in _ab_pair(5120, 5120).items()} patch = lora_model_from_wan_state_dict(sd) expected_key = f"{WAN_LORA_TRANSFORMER_PREFIX}blocks.0.attn1.to_q" assert expected_key in patch.layers @@ -113,51 +108,33 @@ def test_diffusers_peft_bare(self): assert f"{WAN_LORA_TRANSFORMER_PREFIX}blocks.5.attn2.to_k" in patch.layers def test_native_peft_diffusion_model_prefix(self): - sd = { - f"diffusion_model.blocks.0.self_attn.q.{k}": v - for k, v in _ab_pair(5120, 5120).items() - } + sd = {f"diffusion_model.blocks.0.self_attn.q.{k}": v for k, v in _ab_pair(5120, 5120).items()} patch = lora_model_from_wan_state_dict(sd) # native self_attn.q must be rewritten to attn1.to_q assert f"{WAN_LORA_TRANSFORMER_PREFIX}blocks.0.attn1.to_q" in patch.layers def test_native_peft_cross_attn_to_attn2(self): - sd = { - f"diffusion_model.blocks.3.cross_attn.o.{k}": v - for k, v in _ab_pair(5120, 5120).items() - } + sd = {f"diffusion_model.blocks.3.cross_attn.o.{k}": v for k, v in _ab_pair(5120, 5120).items()} patch = lora_model_from_wan_state_dict(sd) assert f"{WAN_LORA_TRANSFORMER_PREFIX}blocks.3.attn2.to_out.0" in patch.layers def test_native_peft_ffn_to_diffusers(self): - sd = { - f"diffusion_model.blocks.0.ffn.0.{k}": v - for k, v in _ab_pair(5120, 13824).items() - } + sd = {f"diffusion_model.blocks.0.ffn.0.{k}": v for k, v in _ab_pair(5120, 13824).items()} patch = lora_model_from_wan_state_dict(sd) assert f"{WAN_LORA_TRANSFORMER_PREFIX}blocks.0.ffn.net.0.proj" in patch.layers def test_kohya_diffusers_naming(self): - sd = { - f"lora_unet_blocks_0_attn1_to_q.{k}": v - for k, v in _down_up_pair(5120, 5120).items() - } + sd = {f"lora_unet_blocks_0_attn1_to_q.{k}": v for k, v in _down_up_pair(5120, 5120).items()} patch = lora_model_from_wan_state_dict(sd) assert f"{WAN_LORA_TRANSFORMER_PREFIX}blocks.0.attn1.to_q" in patch.layers def test_kohya_native_naming(self): - sd = { - f"lora_unet_blocks_0_self_attn_q.{k}": v - for k, v in _down_up_pair(5120, 5120).items() - } + sd = {f"lora_unet_blocks_0_self_attn_q.{k}": v for k, v in _down_up_pair(5120, 5120).items()} patch = lora_model_from_wan_state_dict(sd) assert f"{WAN_LORA_TRANSFORMER_PREFIX}blocks.0.attn1.to_q" in patch.layers def test_kohya_ffn_native_naming(self): - sd = { - f"lora_unet_blocks_0_ffn_0.{k}": v - for k, v in _down_up_pair(5120, 13824).items() - } + sd = {f"lora_unet_blocks_0_ffn_0.{k}": v for k, v in _down_up_pair(5120, 13824).items()} patch = lora_model_from_wan_state_dict(sd) assert f"{WAN_LORA_TRANSFORMER_PREFIX}blocks.0.ffn.net.0.proj" in patch.layers @@ -189,10 +166,7 @@ def test_alpha_override_propagates(self): assert layer is not None def test_unknown_kohya_submodule_is_skipped_silently(self): - sd = { - f"lora_unet_blocks_0_unknown_thing.{k}": v - for k, v in _down_up_pair(5120, 5120).items() - } + sd = {f"lora_unet_blocks_0_unknown_thing.{k}": v for k, v in _down_up_pair(5120, 5120).items()} patch = lora_model_from_wan_state_dict(sd) assert len(patch.layers) == 0 diff --git a/tests/backend/wan/test_sampling_utils.py b/tests/backend/wan/test_sampling_utils.py index a1d27a53f7e..ec52f357a87 100644 --- a/tests/backend/wan/test_sampling_utils.py +++ b/tests/backend/wan/test_sampling_utils.py @@ -53,7 +53,22 @@ def test_ti2v_shape_at_1024(self) -> None: assert noise.shape == (1, 48, 1, 64, 64) def test_seed_is_deterministic(self) -> None: - kwargs = dict( + kwargs = { + "batch_size": 1, + "latent_channels": 16, + "height": 256, + "width": 256, + "spatial_scale_factor": 8, + "device": torch.device("cpu"), + "dtype": torch.float32, + "seed": 123, + } + a = make_noise(**kwargs) + b = make_noise(**kwargs) + assert torch.allclose(a, b) + + def test_seed_changes_output(self) -> None: + a = make_noise( batch_size=1, latent_channels=16, height=256, @@ -61,19 +76,16 @@ def test_seed_is_deterministic(self) -> None: spatial_scale_factor=8, device=torch.device("cpu"), dtype=torch.float32, - seed=123, - ) - a = make_noise(**kwargs) - b = make_noise(**kwargs) - assert torch.allclose(a, b) - - def test_seed_changes_output(self) -> None: - a = make_noise( - batch_size=1, latent_channels=16, height=256, width=256, spatial_scale_factor=8, - device=torch.device("cpu"), dtype=torch.float32, seed=1, + seed=1, ) b = make_noise( - batch_size=1, latent_channels=16, height=256, width=256, spatial_scale_factor=8, - device=torch.device("cpu"), dtype=torch.float32, seed=2, + batch_size=1, + latent_channels=16, + height=256, + width=256, + spatial_scale_factor=8, + device=torch.device("cpu"), + dtype=torch.float32, + seed=2, ) assert not torch.allclose(a, b)