From efb7d16884f79dcc26f6f2704592512fcf5ea155 Mon Sep 17 00:00:00 2001 From: Mokzu Date: Mon, 24 Nov 2025 18:48:57 -0800 Subject: [PATCH 1/3] init --- docs/source/en/guides/inference.md | 62 +++++++++---------- src/huggingface_hub/inference/_client.py | 2 +- .../inference/_generated/_async_client.py | 2 +- .../inference/_providers/__init__.py | 6 ++ .../inference/_providers/_common.py | 1 + .../inference/_providers/mokzu.py | 33 ++++++++++ tests/test_inference_providers.py | 14 +++++ 7 files changed, 87 insertions(+), 33 deletions(-) create mode 100644 src/huggingface_hub/inference/_providers/mokzu.py diff --git a/docs/source/en/guides/inference.md b/docs/source/en/guides/inference.md index 172f7bed80..1ffdd7a5f1 100644 --- a/docs/source/en/guides/inference.md +++ b/docs/source/en/guides/inference.md @@ -192,37 +192,37 @@ For more details, refer to the [Inference Providers pricing documentation](https [`InferenceClient`]'s goal is to provide the easiest interface to run inference on Hugging Face models, on any provider. It has a simple API that supports the most common tasks. Here is a table showing which providers support which tasks: -| Task | Black Forest Labs | Cerebras | Clarifai | Cohere | fal-ai | Featherless AI | Fireworks AI | Groq | HF Inference | Hyperbolic | Nebius AI Studio | Novita AI | Nscale | OVHcloud AI Endpoints | Public AI | Replicate | Sambanova | Scaleway | Together | Wavespeed | Zai | -| --------------------------------------------------- | ----------------- | -------- | -------- | ------ | ------ | -------------- | ------------ | ---- | ------------ | ---------- | ---------------- | --------- | ------ | -------- | ---------- | --------- | --------- | --------- | -------- | --------- | ---- | -| [`~InferenceClient.audio_classification`] | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | -| [`~InferenceClient.audio_to_audio`] | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | -| [`~InferenceClient.automatic_speech_recognition`] | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | -| [`~InferenceClient.chat_completion`] | ❌ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ✅ | ❌ | ✅ | -| [`~InferenceClient.document_question_answering`] | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | -| [`~InferenceClient.feature_extraction`] | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | -| [`~InferenceClient.fill_mask`] | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | -| [`~InferenceClient.image_classification`] | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | -| [`~InferenceClient.image_segmentation`] | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | -| [`~InferenceClient.image_to_image`] | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | -| [`~InferenceClient.image_to_video`] | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | -| [`~InferenceClient.image_to_text`] | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | -| [`~InferenceClient.object_detection`] | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | -| [`~InferenceClient.question_answering`] | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | -| [`~InferenceClient.sentence_similarity`] | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | -| [`~InferenceClient.summarization`] | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | -| [`~InferenceClient.table_question_answering`] | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | -| [`~InferenceClient.text_classification`] | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | -| [`~InferenceClient.text_generation`] | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | -| [`~InferenceClient.text_to_image`] | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ✅ | ❌ | ❌ | ✅ | ❌ | ❌ | ✅ | ✅ | ❌ | -| [`~InferenceClient.text_to_speech`] | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | -| [`~InferenceClient.text_to_video`] | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | -| [`~InferenceClient.tabular_classification`] | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | -| [`~InferenceClient.tabular_regression`] | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | -| [`~InferenceClient.token_classification`] | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | -| [`~InferenceClient.translation`] | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | -| [`~InferenceClient.visual_question_answering`] | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | -| [`~InferenceClient.zero_shot_image_classification`] | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | -| [`~InferenceClient.zero_shot_classification`] | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | +| Task | Black Forest Labs | Cerebras | Clarifai | Cohere | fal-ai | Featherless AI | Fireworks AI | Groq | HF Inference | Hyperbolic | Mokzu | Nebius AI Studio | Novita AI | Nscale | OVHcloud AI Endpoints | Public AI | Replicate | Sambanova | Scaleway | Together | Wavespeed | Zai | +| --------------------------------------------------- | ----------------- | -------- | -------- | ------ | ------ | -------------- | ------------ | ---- | ------------ | ---------- | ----- | ---------------- | --------- | ------ | -------- | ---------- | --------- | --------- | --------- | -------- | --------- | ---- | +| [`~InferenceClient.audio_classification`] | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | +| [`~InferenceClient.audio_to_audio`] | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | +| [`~InferenceClient.automatic_speech_recognition`] | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | +| [`~InferenceClient.chat_completion`] | ❌ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ✅ | ❌ | ✅ | +| [`~InferenceClient.document_question_answering`] | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | +| [`~InferenceClient.feature_extraction`] | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | +| [`~InferenceClient.fill_mask`] | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | +| [`~InferenceClient.image_classification`] | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | +| [`~InferenceClient.image_segmentation`] | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | +| [`~InferenceClient.image_to_image`] | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | +| [`~InferenceClient.image_to_video`] | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | +| [`~InferenceClient.image_to_text`] | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | +| [`~InferenceClient.object_detection`] | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | +| [`~InferenceClient.question_answering`] | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | +| [`~InferenceClient.sentence_similarity`] | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | +| [`~InferenceClient.summarization`] | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | +| [`~InferenceClient.table_question_answering`] | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | +| [`~InferenceClient.text_classification`] | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | +| [`~InferenceClient.text_generation`] | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | +| [`~InferenceClient.text_to_image`] | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ✅ | ✅ | ❌ | ✅ | ❌ | ✅ | ❌ | ❌ | ✅ | ❌ | ❌ | ✅ | ✅ | ❌ | +| [`~InferenceClient.text_to_speech`] | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | +| [`~InferenceClient.text_to_video`] | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | +| [`~InferenceClient.tabular_classification`] | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | +| [`~InferenceClient.tabular_regression`] | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | +| [`~InferenceClient.token_classification`] | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | +| [`~InferenceClient.translation`] | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | +| [`~InferenceClient.visual_question_answering`] | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | +| [`~InferenceClient.zero_shot_image_classification`] | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | +| [`~InferenceClient.zero_shot_classification`] | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | > [!TIP] > Check out the [Tasks](https://huggingface.co/tasks) page to learn more about each task. diff --git a/src/huggingface_hub/inference/_client.py b/src/huggingface_hub/inference/_client.py index 791259e039..d52fdb5b5e 100644 --- a/src/huggingface_hub/inference/_client.py +++ b/src/huggingface_hub/inference/_client.py @@ -135,7 +135,7 @@ class InferenceClient: Note: for better compatibility with OpenAI's client, `model` has been aliased as `base_url`. Those 2 arguments are mutually exclusive. If a URL is passed as `model` or `base_url` for chat completion, the `(/v1)/chat/completions` suffix path will be appended to the URL. provider (`str`, *optional*): - Name of the provider to use for inference. Can be `"black-forest-labs"`, `"cerebras"`, `"clarifai"`, `"cohere"`, `"fal-ai"`, `"featherless-ai"`, `"fireworks-ai"`, `"groq"`, `"hf-inference"`, `"hyperbolic"`, `"nebius"`, `"novita"`, `"nscale"`, `"openai"`, `"ovhcloud"`, `"publicai"`, `"replicate"`, `"sambanova"`, `"scaleway"`, `"together"`, `"wavespeed"` or `"zai-org"`. + Name of the provider to use for inference. Can be `"black-forest-labs"`, `"cerebras"`, `"clarifai"`, `"cohere"`, `"fal-ai"`, `"featherless-ai"`, `"fireworks-ai"`, `"groq"`, `"hf-inference"`, `"hyperbolic"`, `"mokzu"`, `"nebius"`, `"novita"`, `"nscale"`, `"openai"`, `"ovhcloud"`, `"publicai"`, `"replicate"`, `"sambanova"`, `"scaleway"`, `"together"`, `"wavespeed"` or `"zai-org"`. Defaults to "auto" i.e. the first of the providers available for the model, sorted by the user's order in https://hf.co/settings/inference-providers. If model is a URL or `base_url` is passed, then `provider` is not used. token (`str`, *optional*): diff --git a/src/huggingface_hub/inference/_generated/_async_client.py b/src/huggingface_hub/inference/_generated/_async_client.py index feab3a230e..7c40959549 100644 --- a/src/huggingface_hub/inference/_generated/_async_client.py +++ b/src/huggingface_hub/inference/_generated/_async_client.py @@ -126,7 +126,7 @@ class AsyncInferenceClient: Note: for better compatibility with OpenAI's client, `model` has been aliased as `base_url`. Those 2 arguments are mutually exclusive. If a URL is passed as `model` or `base_url` for chat completion, the `(/v1)/chat/completions` suffix path will be appended to the URL. provider (`str`, *optional*): - Name of the provider to use for inference. Can be `"black-forest-labs"`, `"cerebras"`, `"clarifai"`, `"cohere"`, `"fal-ai"`, `"featherless-ai"`, `"fireworks-ai"`, `"groq"`, `"hf-inference"`, `"hyperbolic"`, `"nebius"`, `"novita"`, `"nscale"`, `"openai"`, `"ovhcloud"`, `"publicai"`, `"replicate"`, `"sambanova"`, `"scaleway"`, `"together"`, `"wavespeed"` or `"zai-org"`. + Name of the provider to use for inference. Can be `"black-forest-labs"`, `"cerebras"`, `"clarifai"`, `"cohere"`, `"fal-ai"`, `"featherless-ai"`, `"fireworks-ai"`, `"groq"`, `"hf-inference"`, `"hyperbolic"`, `"mokzu"`, `"nebius"`, `"novita"`, `"nscale"`, `"openai"`, `"ovhcloud"`, `"publicai"`, `"replicate"`, `"sambanova"`, `"scaleway"`, `"together"`, `"wavespeed"` or `"zai-org"`. Defaults to "auto" i.e. the first of the providers available for the model, sorted by the user's order in https://hf.co/settings/inference-providers. If model is a URL or `base_url` is passed, then `provider` is not used. token (`str`, *optional*): diff --git a/src/huggingface_hub/inference/_providers/__init__.py b/src/huggingface_hub/inference/_providers/__init__.py index d8092c1daa..16854f53de 100644 --- a/src/huggingface_hub/inference/_providers/__init__.py +++ b/src/huggingface_hub/inference/_providers/__init__.py @@ -29,6 +29,7 @@ HFInferenceTask, ) from .hyperbolic import HyperbolicTextGenerationTask, HyperbolicTextToImageTask +from .mokzu import MokzuTextToVideoTask, MokzuImageToVideoTask from .nebius import ( NebiusConversationalTask, NebiusFeatureExtractionTask, @@ -73,6 +74,7 @@ "groq", "hf-inference", "hyperbolic", + "mokzu", "nebius", "novita", "nscale", @@ -156,6 +158,10 @@ "conversational": HyperbolicTextGenerationTask("conversational"), "text-generation": HyperbolicTextGenerationTask("text-generation"), }, + "mokzu": { + "text-to-video": MokzuTextToVideoTask(), + "image-to-video": MokzuImageToVideoTask(), + }, "nebius": { "text-to-image": NebiusTextToImageTask(), "conversational": NebiusConversationalTask(), diff --git a/src/huggingface_hub/inference/_providers/_common.py b/src/huggingface_hub/inference/_providers/_common.py index 4a8bda9ea8..3611667938 100644 --- a/src/huggingface_hub/inference/_providers/_common.py +++ b/src/huggingface_hub/inference/_providers/_common.py @@ -30,6 +30,7 @@ "groq": {}, "hf-inference": {}, "hyperbolic": {}, + "mokzu": {}, "nebius": {}, "nscale": {}, "ovhcloud": {}, diff --git a/src/huggingface_hub/inference/_providers/mokzu.py b/src/huggingface_hub/inference/_providers/mokzu.py new file mode 100644 index 0000000000..79585ea597 --- /dev/null +++ b/src/huggingface_hub/inference/_providers/mokzu.py @@ -0,0 +1,33 @@ +from ._common import TaskProviderHelper +from typing import Any, Dict, Optional, Union + +class MokzuTextToVideoTask(TaskProviderHelper): + def __init__(self): + super().__init__(provider="mokzu", base_url="https://api.mokzu.com/v1", task="text-to-video") + + def _prepare_route(self, mapped_model: str, api_key: str) -> str: + return f"{self.base_url}/{self.task}" + + def _prepare_payload_as_dict( + self, inputs: Any, parameters: dict, provider_mapping_info: InferenceProviderMapping + ) -> Optional[dict]: + return {"prompt": inputs, **filter_none(parameters)} + + def get_response(self, response: Union[Dict, bytes], request_params: Optional[Dict] = None) -> Any:: + return response["video_url"] if isinstance(response, dict) else {"video_url": ""} + +class MokzuImageToVideoTask(TaskProviderHelper): + def __init__(self): + super().__init__(provider="mokzu", base_url="https://api.mokzu.com/v1", task="image-to-video") + + def _prepare_route(self, mapped_model: str, api_key: str) -> str: + return f"{self.base_url}/{self.task}" + + def _prepare_payload_as_dict( + self, inputs: Any, parameters: dict, provider_mapping_info: InferenceProviderMapping + ) -> Optional[dict]: + encoded = base64.b64encode(inputs).decode("utf-8") + return {"file": encoded, **filter_none(parameters)} + + def get_response(self, response: Union[Dict, bytes], request_params: Optional[Dict] = None) -> Any:: + return response["video_url"] if isinstance(response, dict) else {"video_url": ""} diff --git a/tests/test_inference_providers.py b/tests/test_inference_providers.py index 54cb7b150d..971f3ff6d0 100644 --- a/tests/test_inference_providers.py +++ b/tests/test_inference_providers.py @@ -42,6 +42,7 @@ HFInferenceTask, ) from huggingface_hub.inference._providers.hyperbolic import HyperbolicTextGenerationTask, HyperbolicTextToImageTask +from huggingface_hub.inference._providers.mokzu import MokzuTextToVideoTask, MokzuImageToVideoTask from huggingface_hub.inference._providers.nebius import NebiusFeatureExtractionTask, NebiusTextToImageTask from huggingface_hub.inference._providers.novita import NovitaConversationalTask, NovitaTextGenerationTask from huggingface_hub.inference._providers.nscale import NscaleConversationalTask, NscaleTextToImageTask @@ -1210,6 +1211,19 @@ def test_text_to_image_get_response(self): assert response == dummy_image +class TestMokzuProvider: + def test_mokzu_text_to_video_payload(): + helper = MokzuTextToVideoTask() + helper.task = "text-to-video" + payload = helper._prepare_payload_as_dict("Hello world", {}, "mokzu") + assert payload == {"prompt": "Hello world"} + + def test_mokzu_image_to_video_payload(): + helper = MokzuImageToVideoTask() + helper.task = "image-to-video" + payload = helper._prepare_payload_as_dict(b"binarydata", {}, "mokzu") + assert "file" in payload + class TestNebiusProvider: def test_prepare_route_text_to_image(self): helper = NebiusTextToImageTask() From df9835eac933615b85926393bcf39d13840db92c Mon Sep 17 00:00:00 2001 From: Mokzu Date: Wed, 14 Jan 2026 09:01:11 -0800 Subject: [PATCH 2/3] Add Mokzu provider for image-to-video inference --- .../inference/_providers/__init__.py | 3 +- .../inference/_providers/mokzu.py | 59 ++++++++++------ tests/test_inference_providers.py | 68 ++++++++++++++++--- 3 files changed, 96 insertions(+), 34 deletions(-) diff --git a/src/huggingface_hub/inference/_providers/__init__.py b/src/huggingface_hub/inference/_providers/__init__.py index 16854f53de..0bfbc4255c 100644 --- a/src/huggingface_hub/inference/_providers/__init__.py +++ b/src/huggingface_hub/inference/_providers/__init__.py @@ -29,7 +29,7 @@ HFInferenceTask, ) from .hyperbolic import HyperbolicTextGenerationTask, HyperbolicTextToImageTask -from .mokzu import MokzuTextToVideoTask, MokzuImageToVideoTask +from .mokzu import MokzuImageToVideoTask from .nebius import ( NebiusConversationalTask, NebiusFeatureExtractionTask, @@ -159,7 +159,6 @@ "text-generation": HyperbolicTextGenerationTask("text-generation"), }, "mokzu": { - "text-to-video": MokzuTextToVideoTask(), "image-to-video": MokzuImageToVideoTask(), }, "nebius": { diff --git a/src/huggingface_hub/inference/_providers/mokzu.py b/src/huggingface_hub/inference/_providers/mokzu.py index 79585ea597..6e2a528fb4 100644 --- a/src/huggingface_hub/inference/_providers/mokzu.py +++ b/src/huggingface_hub/inference/_providers/mokzu.py @@ -1,33 +1,48 @@ -from ._common import TaskProviderHelper -from typing import Any, Dict, Optional, Union +import base64 +from typing import Any, Optional, Union -class MokzuTextToVideoTask(TaskProviderHelper): - def __init__(self): - super().__init__(provider="mokzu", base_url="https://api.mokzu.com/v1", task="text-to-video") - - def _prepare_route(self, mapped_model: str, api_key: str) -> str: - return f"{self.base_url}/{self.task}" - - def _prepare_payload_as_dict( - self, inputs: Any, parameters: dict, provider_mapping_info: InferenceProviderMapping - ) -> Optional[dict]: - return {"prompt": inputs, **filter_none(parameters)} +from huggingface_hub.hf_api import InferenceProviderMapping +from huggingface_hub.inference._common import RequestParameters, _as_dict, _as_url +from huggingface_hub.inference._providers._common import TaskProviderHelper, filter_none - def get_response(self, response: Union[Dict, bytes], request_params: Optional[Dict] = None) -> Any:: - return response["video_url"] if isinstance(response, dict) else {"video_url": ""} class MokzuImageToVideoTask(TaskProviderHelper): def __init__(self): - super().__init__(provider="mokzu", base_url="https://api.mokzu.com/v1", task="image-to-video") + super().__init__(provider="mokzu", base_url="https://api.mokzu.com", task="image-to-video") def _prepare_route(self, mapped_model: str, api_key: str) -> str: - return f"{self.base_url}/{self.task}" + return "/v1/image-to-video" def _prepare_payload_as_dict( self, inputs: Any, parameters: dict, provider_mapping_info: InferenceProviderMapping ) -> Optional[dict]: - encoded = base64.b64encode(inputs).decode("utf-8") - return {"file": encoded, **filter_none(parameters)} - - def get_response(self, response: Union[Dict, bytes], request_params: Optional[Dict] = None) -> Any:: - return response["video_url"] if isinstance(response, dict) else {"video_url": ""} + # Inputs can be bytes (image data) or dict with image and prompt + if isinstance(inputs, bytes): + encoded = base64.b64encode(inputs).decode("utf-8") + payload = {"image": encoded, **filter_none(parameters)} + elif isinstance(inputs, dict): + # For dict input, expect 'image' (bytes or base64) and optional 'prompt' + image_data = inputs.get("image", "") + if isinstance(image_data, bytes): + image_data = base64.b64encode(image_data).decode("utf-8") + payload = { + "image": image_data, + "prompt": inputs.get("prompt", ""), + **filter_none(parameters) + } + else: + # Assume string (base64 or URL) + payload = {"image": inputs, **filter_none(parameters)} + + # Ensure prompt exists + if "prompt" not in payload: + payload["prompt"] = parameters.get("prompt", "") + + return payload + + def get_response(self, response: Union[bytes, dict], request_params: Optional[RequestParameters] = None) -> Any: + response_dict = _as_dict(response) + video_url = response_dict.get("video_url", "") + if video_url: + return _as_url(video_url, default_mime_type="video/mp4") + raise ValueError("No video_url in response") diff --git a/tests/test_inference_providers.py b/tests/test_inference_providers.py index 971f3ff6d0..72708df380 100644 --- a/tests/test_inference_providers.py +++ b/tests/test_inference_providers.py @@ -42,7 +42,7 @@ HFInferenceTask, ) from huggingface_hub.inference._providers.hyperbolic import HyperbolicTextGenerationTask, HyperbolicTextToImageTask -from huggingface_hub.inference._providers.mokzu import MokzuTextToVideoTask, MokzuImageToVideoTask +from huggingface_hub.inference._providers.mokzu import MokzuImageToVideoTask from huggingface_hub.inference._providers.nebius import NebiusFeatureExtractionTask, NebiusTextToImageTask from huggingface_hub.inference._providers.novita import NovitaConversationalTask, NovitaTextGenerationTask from huggingface_hub.inference._providers.nscale import NscaleConversationalTask, NscaleTextToImageTask @@ -1212,17 +1212,65 @@ def test_text_to_image_get_response(self): class TestMokzuProvider: - def test_mokzu_text_to_video_payload(): - helper = MokzuTextToVideoTask() - helper.task = "text-to-video" - payload = helper._prepare_payload_as_dict("Hello world", {}, "mokzu") - assert payload == {"prompt": "Hello world"} + def test_mokzu_image_to_video_payload_bytes(self): + helper = MokzuImageToVideoTask() + mapping_info = InferenceProviderMapping( + provider="mokzu", + hf_model_id="mokzu/image-to-video", + providerId="mokzu/image-to-video", + task="image-to-video", + status="live", + ) + payload = helper._prepare_payload_as_dict(b"binaryimagedata", {"prompt": "a cat walking"}, mapping_info) + assert "image" in payload + assert payload["image"] == base64.b64encode(b"binaryimagedata").decode("utf-8") + assert payload["prompt"] == "a cat walking" + + def test_mokzu_image_to_video_payload_dict(self): + helper = MokzuImageToVideoTask() + mapping_info = InferenceProviderMapping( + provider="mokzu", + hf_model_id="mokzu/image-to-video", + providerId="mokzu/image-to-video", + task="image-to-video", + status="live", + ) + payload = helper._prepare_payload_as_dict( + {"image": b"binaryimagedata", "prompt": "a dog running"}, + {"duration": 3}, + mapping_info + ) + assert payload["image"] == base64.b64encode(b"binaryimagedata").decode("utf-8") + assert payload["prompt"] == "a dog running" + assert payload["duration"] == 3 - def test_mokzu_image_to_video_payload(): + def test_mokzu_image_to_video_payload_base64(self): + helper = MokzuImageToVideoTask() + mapping_info = InferenceProviderMapping( + provider="mokzu", + hf_model_id="mokzu/image-to-video", + providerId="mokzu/image-to-video", + task="image-to-video", + status="live", + ) + base64_img = base64.b64encode(b"testimage").decode("utf-8") + payload = helper._prepare_payload_as_dict(base64_img, {"prompt": "test"}, mapping_info) + assert payload["image"] == base64_img + assert payload["prompt"] == "test" + + def test_mokzu_image_to_video_route(self): + helper = MokzuImageToVideoTask() + assert helper._prepare_route("mokzu/image-to-video", "api_key") == "/v1/image-to-video" + + def test_mokzu_image_to_video_response(self): + helper = MokzuImageToVideoTask() + response = helper.get_response({"video_url": "https://mokzu.com/videos/output.mp4"}) + assert response == "https://mokzu.com/videos/output.mp4" + + def test_mokzu_image_to_video_response_error(self): helper = MokzuImageToVideoTask() - helper.task = "image-to-video" - payload = helper._prepare_payload_as_dict(b"binarydata", {}, "mokzu") - assert "file" in payload + with pytest.raises(ValueError, match="No video_url in response"): + helper.get_response({"error": "failed"}) class TestNebiusProvider: def test_prepare_route_text_to_image(self): From c40f477f0b96ae219460b09d14e317c22b445002 Mon Sep 17 00:00:00 2001 From: Mokzu Date: Wed, 14 Jan 2026 09:06:04 -0800 Subject: [PATCH 3/3] Add Mokzu provider for image-to-video inference --- docs/source/en/guides/inference.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/en/guides/inference.md b/docs/source/en/guides/inference.md index 1ffdd7a5f1..28a5df3e76 100644 --- a/docs/source/en/guides/inference.md +++ b/docs/source/en/guides/inference.md @@ -214,7 +214,7 @@ For more details, refer to the [Inference Providers pricing documentation](https | [`~InferenceClient.text_classification`] | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | | [`~InferenceClient.text_generation`] | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | | [`~InferenceClient.text_to_image`] | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ✅ | ✅ | ❌ | ✅ | ❌ | ✅ | ❌ | ❌ | ✅ | ❌ | ❌ | ✅ | ✅ | ❌ | -| [`~InferenceClient.text_to_speech`] | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | +| [`~InferenceClient.text_to_speech`] | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | | [`~InferenceClient.text_to_video`] | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | | [`~InferenceClient.tabular_classification`] | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | | [`~InferenceClient.tabular_regression`] | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |