huggingface · HuggingFaceInfra · Apr 9, 2026
diff --git a/src/huggingface_hub/inference/_client.py b/src/huggingface_hub/inference/_client.py
diff --git a/src/huggingface_hub/inference/_generated/_async_client.py b/src/huggingface_hub/inference/_generated/_async_client.py
diff --git a/src/huggingface_hub/inference/_generated/types/audio_classification.py b/src/huggingface_hub/inference/_generated/types/audio_classification.py
@@ -17,7 +17,7 @@ class AudioClassificationParameters(BaseInferenceType):
 
     function_to_apply: Optional["AudioClassificationOutputTransform"] = None
     """The function to apply to the model outputs in order to retrieve the scores."""
-    top_k: int | None = None
+    top_k: Optional[int] = None
     """When specified, limits the output to the top K most probable classes."""
 
 
@@ -29,7 +29,7 @@ class AudioClassificationInput(BaseInferenceType):
     """The input audio data as a base64-encoded string. If no `parameters` are provided, you can
     also provide the audio data as a raw bytes payload.
     """
-    parameters: AudioClassificationParameters | None = None
+    parameters: Optional[AudioClassificationParameters] = None
     """Additional inference parameters for Audio Classification"""
 
 

diff --git a/src/huggingface_hub/inference/_generated/types/automatic_speech_recognition.py b/src/huggingface_hub/inference/_generated/types/automatic_speech_recognition.py
@@ -3,7 +3,7 @@
 # See:
 #   - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
 #   - specs:  https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
-from typing import Literal, Union
+from typing import Literal, Optional, Union
 
 from .base import BaseInferenceType, dataclass_with_extra
 
@@ -15,17 +15,17 @@
 class AutomaticSpeechRecognitionGenerationParameters(BaseInferenceType):
     """Parametrization of the text generation process"""
 
-    do_sample: bool | None = None
+    do_sample: Optional[bool] = None
     """Whether to use sampling instead of greedy decoding when generating new tokens."""
-    early_stopping: Union[bool, "AutomaticSpeechRecognitionEarlyStoppingEnum"] | None = None
+    early_stopping: Optional[Union[bool, "AutomaticSpeechRecognitionEarlyStoppingEnum"]] = None
     """Controls the stopping condition for beam-based methods."""
-    epsilon_cutoff: float | None = None
+    epsilon_cutoff: Optional[float] = None
     """If set to float strictly between 0 and 1, only tokens with a conditional probability
     greater than epsilon_cutoff will be sampled. In the paper, suggested values range from
     3e-4 to 9e-4, depending on the size of the model. See [Truncation Sampling as Language
     Model Desmoothing](https://hf.co/papers/2210.15191) for more details.
     """
-    eta_cutoff: float | None = None
+    eta_cutoff: Optional[float] = None
     """Eta sampling is a hybrid of locally typical sampling and epsilon sampling. If set to
     float strictly between 0 and 1, a token is only considered if it is greater than either
     eta_cutoff or sqrt(eta_cutoff) * exp(-entropy(softmax(next_token_logits))). The latter
@@ -34,50 +34,50 @@ class AutomaticSpeechRecognitionGenerationParameters(BaseInferenceType):
     See [Truncation Sampling as Language Model Desmoothing](https://hf.co/papers/2210.15191)
     for more details.
     """
-    max_length: int | None = None
+    max_length: Optional[int] = None
     """The maximum length (in tokens) of the generated text, including the input."""
-    max_new_tokens: int | None = None
+    max_new_tokens: Optional[int] = None
     """The maximum number of tokens to generate. Takes precedence over max_length."""
-    min_length: int | None = None
+    min_length: Optional[int] = None
     """The minimum length (in tokens) of the generated text, including the input."""
-    min_new_tokens: int | None = None
+    min_new_tokens: Optional[int] = None
     """The minimum number of tokens to generate. Takes precedence over min_length."""
-    num_beam_groups: int | None = None
+    num_beam_groups: Optional[int] = None
     """Number of groups to divide num_beams into in order to ensure diversity among different
     groups of beams. See [this paper](https://hf.co/papers/1610.02424) for more details.
     """
-    num_beams: int | None = None
+    num_beams: Optional[int] = None
     """Number of beams to use for beam search."""
-    penalty_alpha: float | None = None
+    penalty_alpha: Optional[float] = None
     """The value balances the model confidence and the degeneration penalty in contrastive
     search decoding.
     """
-    temperature: float | None = None
+    temperature: Optional[float] = None
     """The value used to modulate the next token probabilities."""
-    top_k: int | None = None
+    top_k: Optional[int] = None
     """The number of highest probability vocabulary tokens to keep for top-k-filtering."""
-    top_p: float | None = None
+    top_p: Optional[float] = None
     """If set to float < 1, only the smallest set of most probable tokens with probabilities
     that add up to top_p or higher are kept for generation.
     """
-    typical_p: float | None = None
+    typical_p: Optional[float] = None
     """Local typicality measures how similar the conditional probability of predicting a target
     token next is to the expected conditional probability of predicting a random token next,
     given the partial text already generated. If set to float < 1, the smallest set of the
     most locally typical tokens with probabilities that add up to typical_p or higher are
     kept for generation. See [this paper](https://hf.co/papers/2202.00666) for more details.
     """
-    use_cache: bool | None = None
+    use_cache: Optional[bool] = None
     """Whether the model should use the past last key/values attentions to speed up decoding"""
 
 
 @dataclass_with_extra
 class AutomaticSpeechRecognitionParameters(BaseInferenceType):
     """Additional inference parameters for Automatic Speech Recognition"""
 
-    generation_parameters: AutomaticSpeechRecognitionGenerationParameters | None = None
+    generation_parameters: Optional[AutomaticSpeechRecognitionGenerationParameters] = None
     """Parametrization of the text generation process"""
-    return_timestamps: bool | None = None
+    return_timestamps: Optional[bool] = None
     """Whether to output corresponding timestamps with the generated text"""
 
 
@@ -89,7 +89,7 @@ class AutomaticSpeechRecognitionInput(BaseInferenceType):
     """The input audio data as a base64-encoded string. If no `parameters` are provided, you can
     also provide the audio data as a raw bytes payload.
     """
-    parameters: AutomaticSpeechRecognitionParameters | None = None
+    parameters: Optional[AutomaticSpeechRecognitionParameters] = None
     """Additional inference parameters for Automatic Speech Recognition"""
 
 
@@ -107,7 +107,7 @@ class AutomaticSpeechRecognitionOutput(BaseInferenceType):
 
     text: str
     """The recognized text."""
-    chunks: list[AutomaticSpeechRecognitionOutputChunk] | None = None
+    chunks: Optional[list[AutomaticSpeechRecognitionOutputChunk]] = None
     """When returnTimestamps is enabled, chunks contains a list of audio chunks identified by
     the model.
     """
diff --git a/src/huggingface_hub/inference/_generated/types/depth_estimation.py b/src/huggingface_hub/inference/_generated/types/depth_estimation.py
@@ -3,7 +3,7 @@
 # See:
 #   - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
 #   - specs:  https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
-from typing import Any
+from typing import Any, Optional
 
 from .base import BaseInferenceType, dataclass_with_extra
 
@@ -14,7 +14,7 @@ class DepthEstimationInput(BaseInferenceType):
 
     inputs: Any
     """The input image data"""
-    parameters: dict[str, Any] | None = None
+    parameters: Optional[dict[str, Any]] = None
     """Additional inference parameters for Depth Estimation"""
 
 

diff --git a/src/huggingface_hub/inference/_generated/types/document_question_answering.py b/src/huggingface_hub/inference/_generated/types/document_question_answering.py
@@ -3,7 +3,7 @@
 # See:
 #   - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
 #   - specs:  https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
-from typing import Any
+from typing import Any, Optional, Union
 
 from .base import BaseInferenceType, dataclass_with_extra
 
@@ -22,31 +22,31 @@ class DocumentQuestionAnsweringInputData(BaseInferenceType):
 class DocumentQuestionAnsweringParameters(BaseInferenceType):
     """Additional inference parameters for Document Question Answering"""
 
-    doc_stride: int | None = None
+    doc_stride: Optional[int] = None
     """If the words in the document are too long to fit with the question for the model, it will
     be split in several chunks with some overlap. This argument controls the size of that
     overlap.
     """
-    handle_impossible_answer: bool | None = None
+    handle_impossible_answer: Optional[bool] = None
     """Whether to accept impossible as an answer"""
-    lang: str | None = None
+    lang: Optional[str] = None
     """Language to use while running OCR. Defaults to english."""
-    max_answer_len: int | None = None
+    max_answer_len: Optional[int] = None
     """The maximum length of predicted answers (e.g., only answers with a shorter length are
     considered).
     """
-    max_question_len: int | None = None
+    max_question_len: Optional[int] = None
     """The maximum length of the question after tokenization. It will be truncated if needed."""
-    max_seq_len: int | None = None
+    max_seq_len: Optional[int] = None
     """The maximum length of the total sentence (context + question) in tokens of each chunk
     passed to the model. The context will be split in several chunks (using doc_stride as
     overlap) if needed.
     """
-    top_k: int | None = None
+    top_k: Optional[int] = None
     """The number of answers to return (will be chosen by order of likelihood). Can return less
     than top_k answers if there are not enough options available within the context.
     """
-    word_boxes: list[list[float] | str] | None = None
+    word_boxes: Optional[list[Union[list[float], str]]] = None
     """A list of words and bounding boxes (normalized 0->1000). If provided, the inference will
     skip the OCR step and use the provided bounding boxes instead.
     """
@@ -58,7 +58,7 @@ class DocumentQuestionAnsweringInput(BaseInferenceType):
 
     inputs: DocumentQuestionAnsweringInputData
     """One (document, question) pair to answer"""
-    parameters: DocumentQuestionAnsweringParameters | None = None
+    parameters: Optional[DocumentQuestionAnsweringParameters] = None
     """Additional inference parameters for Document Question Answering"""
 
 

diff --git a/src/huggingface_hub/inference/_generated/types/feature_extraction.py b/src/huggingface_hub/inference/_generated/types/feature_extraction.py
@@ -3,7 +3,7 @@
 # See:
 #   - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
 #   - specs:  https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
-from typing import Literal, Optional
+from typing import Literal, Optional, Union
 
 from .base import BaseInferenceType, dataclass_with_extra
 
@@ -19,10 +19,10 @@ class FeatureExtractionInput(BaseInferenceType):
     https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tei-import.ts.
     """
 
-    inputs: list[str] | str
+    inputs: Union[list[str], str]
     """The text or list of texts to embed."""
-    normalize: bool | None = None
-    prompt_name: str | None = None
+    normalize: Optional[bool] = None
+    prompt_name: Optional[str] = None
     """The name of the prompt that should be used by for encoding. If not set, no prompt
     will be applied.
     Must be a key in the `sentence-transformers` configuration `prompts` dictionary.
@@ -32,5 +32,5 @@ class FeatureExtractionInput(BaseInferenceType):
     "query: What is the capital of France?" because the prompt text will be prepended before
     any text to encode.
     """
-    truncate: bool | None = None
+    truncate: Optional[bool] = None
     truncation_direction: Optional["FeatureExtractionInputTruncationDirection"] = None
diff --git a/src/huggingface_hub/inference/_generated/types/fill_mask.py b/src/huggingface_hub/inference/_generated/types/fill_mask.py
@@ -3,7 +3,7 @@
 # See:
 #   - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
 #   - specs:  https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
-from typing import Any
+from typing import Any, Optional
 
 from .base import BaseInferenceType, dataclass_with_extra
 
@@ -12,13 +12,13 @@
 class FillMaskParameters(BaseInferenceType):
     """Additional inference parameters for Fill Mask"""
 
-    targets: list[str] | None = None
+    targets: Optional[list[str]] = None
     """When passed, the model will limit the scores to the passed targets instead of looking up
     in the whole vocabulary. If the provided targets are not in the model vocab, they will be
     tokenized and the first resulting token will be used (with a warning, and that might be
     slower).
     """
-    top_k: int | None = None
+    top_k: Optional[int] = None
     """When passed, overrides the number of predictions to return."""
 
 
@@ -28,7 +28,7 @@ class FillMaskInput(BaseInferenceType):
 
     inputs: str
     """The text with masked tokens"""
-    parameters: FillMaskParameters | None = None
+    parameters: Optional[FillMaskParameters] = None
     """Additional inference parameters for Fill Mask"""
 
 
@@ -43,5 +43,5 @@ class FillMaskOutputElement(BaseInferenceType):
     token: int
     """The predicted token id (to replace the masked one)."""
     token_str: Any
-    fill_mask_output_token_str: str | None = None
+    fill_mask_output_token_str: Optional[str] = None
     """The predicted token (to replace the masked one)."""
diff --git a/src/huggingface_hub/inference/_generated/types/image_classification.py b/src/huggingface_hub/inference/_generated/types/image_classification.py
@@ -17,7 +17,7 @@ class ImageClassificationParameters(BaseInferenceType):
 
     function_to_apply: Optional["ImageClassificationOutputTransform"] = None
     """The function to apply to the model outputs in order to retrieve the scores."""
-    top_k: int | None = None
+    top_k: Optional[int] = None
     """When specified, limits the output to the top K most probable classes."""
 
 
@@ -29,7 +29,7 @@ class ImageClassificationInput(BaseInferenceType):
     """The input image data as a base64-encoded string. If no `parameters` are provided, you can
     also provide the image data as a raw bytes payload.
     """
-    parameters: ImageClassificationParameters | None = None
+    parameters: Optional[ImageClassificationParameters] = None
     """Additional inference parameters for Image Classification"""
 
 

diff --git a/src/huggingface_hub/inference/_generated/types/image_segmentation.py b/src/huggingface_hub/inference/_generated/types/image_segmentation.py
@@ -15,13 +15,13 @@
 class ImageSegmentationParameters(BaseInferenceType):
     """Additional inference parameters for Image Segmentation"""
 
-    mask_threshold: float | None = None
+    mask_threshold: Optional[float] = None
     """Threshold to use when turning the predicted masks into binary values."""
-    overlap_mask_area_threshold: float | None = None
+    overlap_mask_area_threshold: Optional[float] = None
     """Mask overlap threshold to eliminate small, disconnected segments."""
     subtask: Optional["ImageSegmentationSubtask"] = None
     """Segmentation task to be performed, depending on model capabilities."""
-    threshold: float | None = None
+    threshold: Optional[float] = None
     """Probability threshold to filter out predicted masks."""
 
 
@@ -33,7 +33,7 @@ class ImageSegmentationInput(BaseInferenceType):
     """The input image data as a base64-encoded string. If no `parameters` are provided, you can
     also provide the image data as a raw bytes payload.
     """
-    parameters: ImageSegmentationParameters | None = None
+    parameters: Optional[ImageSegmentationParameters] = None
     """Additional inference parameters for Image Segmentation"""
 
 
@@ -47,5 +47,5 @@ class ImageSegmentationOutputElement(BaseInferenceType):
     """The label of the predicted segment."""
     mask: str
     """The corresponding mask as a black-and-white image (base64-encoded)."""
-    score: float | None = None
+    score: Optional[float] = None
     """The score or confidence degree the model has."""
diff --git a/src/huggingface_hub/inference/_generated/types/image_text_to_image.py b/src/huggingface_hub/inference/_generated/types/image_text_to_image.py
@@ -3,7 +3,7 @@
 # See:
 #   - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
 #   - specs:  https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
-from typing import Any
+from typing import Any, Optional
 
 from .base import BaseInferenceType, dataclass_with_extra
 
@@ -22,23 +22,23 @@ class ImageTextToImageTargetSize(BaseInferenceType):
 class ImageTextToImageParameters(BaseInferenceType):
     """Additional inference parameters for Image Text To Image"""
 
-    guidance_scale: float | None = None
+    guidance_scale: Optional[float] = None
     """For diffusion models. A higher guidance scale value encourages the model to generate
     images closely linked to the text prompt at the expense of lower image quality.
     """
-    negative_prompt: str | None = None
+    negative_prompt: Optional[str] = None
     """One prompt to guide what NOT to include in image generation."""
-    num_inference_steps: int | None = None
+    num_inference_steps: Optional[int] = None
     """For diffusion models. The number of denoising steps. More denoising steps usually lead to
     a higher quality image at the expense of slower inference.
     """
-    prompt: str | None = None
+    prompt: Optional[str] = None
     """The text prompt to guide the image generation. Either this or inputs (image) must be
     provided.
     """
-    seed: int | None = None
+    seed: Optional[int] = None
     """Seed for the random number generator."""
-    target_size: ImageTextToImageTargetSize | None = None
+    target_size: Optional[ImageTextToImageTargetSize] = None
     """The size in pixels of the output image. This parameter is only supported by some
     providers and for specific models. It will be ignored when unsupported.
     """
@@ -50,12 +50,12 @@ class ImageTextToImageInput(BaseInferenceType):
     must be provided, or both.
     """
 
-    inputs: str | None = None
+    inputs: Optional[str] = None
     """The input image data as a base64-encoded string. If no `parameters` are provided, you can
     also provide the image data as a raw bytes payload. Either this or prompt must be
     provided.
     """
-    parameters: ImageTextToImageParameters | None = None
+    parameters: Optional[ImageTextToImageParameters] = None
     """Additional inference parameters for Image Text To Image"""