From 0b7c88f406aea9c5534d9d2e6d3e6deb7ca0a947 Mon Sep 17 00:00:00 2001 From: Mark Ericksen Date: Mon, 25 May 2026 14:01:52 -0600 Subject: [PATCH] Cloudflare Workers AI with Kimi K2.6 verified --- .env.example | 4 +- README.md | 38 ++++++++++ test/chat_models/chat_open_ai_test.exs | 99 ++++++++++++++++++++++++++ 3 files changed, 140 insertions(+), 1 deletion(-) diff --git a/.env.example b/.env.example index 0914155b..0b936d1d 100644 --- a/.env.example +++ b/.env.example @@ -12,4 +12,6 @@ export VERTEX_API_KEY="YOUR_VERTEX_API_KEY" export VERTEX_API_ENDPOINT="YOUR_VERTEX_API_ENDPOINT" export DEEPSEEK_API_KEY="YOUR_DEEPSEEK_API_KEY" export XAI_API_KEY="YOUR_XAI_API_KEY" -export AWS_BEARER_TOKEN_BEDROCK="YOUR_AWS_BEDROCK_BEARER_TOKEN" \ No newline at end of file +export AWS_BEARER_TOKEN_BEDROCK="YOUR_AWS_BEDROCK_BEARER_TOKEN" +export CLOUDFLARE_ACCOUNT_ID="YOUR_CLOUDFLARE_ACCOUNT_ID" +export CLOUDFLARE_API_TOKEN="YOUR_CLOUDFLARE_API_TOKEN" \ No newline at end of file diff --git a/README.md b/README.md index 2b42004b..4ba45324 100644 --- a/README.md +++ b/README.md @@ -12,6 +12,7 @@ Elixir LangChain enables Elixir applications to integrate AI services and self-h - **AWS Bedrock Mantle** - OpenAI-compatible gateway for third-party models hosted on Bedrock (Moonshot Kimi K2 family, OpenAI gpt-oss, and many others) - **OpenAI ChatGPT** - GPT models via the Chat Completions API - **OpenAI Responses API** - OpenAI's newer Responses API with WebSocket transport support +- **Cloudflare Workers AI** - OpenAI-compatible gateway via `ChatOpenAI` (e.g. Moonshot Kimi K2.6 and other Workers AI models) - **xAI Grok** - Grok-4, Grok-3-mini, Grok-4 Heavy (multi-agent), and more - **Google Gemini** - Gemini AI models - **Google Vertex AI** - Google's enterprise AI offering @@ -285,6 +286,43 @@ For example, if a locally running service provided that feature, the following c |> LLMChain.run() ``` +### Cloudflare Workers AI + +Cloudflare Workers AI exposes an OpenAI-compatible `/chat/completions` endpoint, so it works through `ChatOpenAI` by overriding the `endpoint` and supplying a Cloudflare API token. Any model in the Workers AI catalog (e.g. `@cf/moonshotai/kimi-k2.6`) can be used this way, including with streaming and tool calling. + +```elixir +alias LangChain.ChatModels.ChatOpenAI +alias LangChain.Chains.LLMChain +alias LangChain.Message + +account_id = System.fetch_env!("CLOUDFLARE_ACCOUNT_ID") +api_key = System.fetch_env!("CLOUDFLARE_API_TOKEN") + +endpoint = + "https://api.cloudflare.com/client/v4/accounts/#{account_id}/ai/v1/chat/completions" + +{:ok, chat} = + ChatOpenAI.new(%{ + endpoint: endpoint, + api_key: api_key, + model: "@cf/moonshotai/kimi-k2.6", + temperature: 0, + seed: 0, + stream: false + }) + +{:ok, updated_chain} = + %{llm: chat} + |> LLMChain.new!() + |> LLMChain.add_messages([ + Message.new_system!("You answer with a single word."), + Message.new_user!("Reply with the single word: PONG") + ]) + |> LLMChain.run() +``` + +Streaming and tool calling work the same as with native OpenAI: set `stream: true` and add tools via `LLMChain.add_tools/2`. + ### Bumblebee Chat Support Bumblebee hosted chat models are supported. There is built-in support for Llama 2, Mistral, and Zephyr models. diff --git a/test/chat_models/chat_open_ai_test.exs b/test/chat_models/chat_open_ai_test.exs index 23d1e732..2fbc1ac2 100644 --- a/test/chat_models/chat_open_ai_test.exs +++ b/test/chat_models/chat_open_ai_test.exs @@ -1248,6 +1248,105 @@ defmodule LangChain.ChatModels.ChatOpenAITest do assert message.role == :assistant assert message.index == 0 end + + @tag live_call: true, live_cloudflare: true + test "supports Cloudflare Workers AI (OpenAI-compatible) with Kimi K2.6" do + # https://developers.cloudflare.com/workers-ai/configuration/open-ai-compatibility/ + account_id = System.fetch_env!("CLOUDFLARE_ACCOUNT_ID") + api_key = System.fetch_env!("CLOUDFLARE_API_TOKEN") + + endpoint = + "https://api.cloudflare.com/client/v4/accounts/#{account_id}" <> + "/ai/v1/chat/completions" + + {:ok, chat} = + ChatOpenAI.new(%{ + endpoint: endpoint, + api_key: api_key, + model: "@cf/moonshotai/kimi-k2.6", + temperature: 0, + seed: 0, + stream: false + }) + + {:ok, [message]} = + ChatOpenAI.call( + chat, + [ + Message.new_system!("You answer with a single word."), + Message.new_user!("Reply with the single word: PONG") + ], + [] + ) + + assert message.role == :assistant + assert ContentPart.parts_to_string(message.content) =~ ~r/PONG/i + end + + @tag live_call: true, live_cloudflare: true + test "Cloudflare Kimi K2.6 streamed tool call assembles into a complete ToolCall", + %{weather: weather} do + account_id = System.fetch_env!("CLOUDFLARE_ACCOUNT_ID") + api_key = System.fetch_env!("CLOUDFLARE_API_TOKEN") + + endpoint = + "https://api.cloudflare.com/client/v4/accounts/#{account_id}" <> + "/ai/v1/chat/completions" + + handler = %{ + on_llm_new_delta: fn %LLMChain{} = _chain, deltas -> + send(self(), deltas) + end, + on_message_processed: fn _chain, message -> + send(self(), {:test_message_processed, message}) + end + } + + model = + ChatOpenAI.new!(%{ + endpoint: endpoint, + api_key: api_key, + model: "@cf/moonshotai/kimi-k2.6", + temperature: 0, + seed: 0, + stream: true + }) + + original_chain = + %{llm: model} + |> LLMChain.new!() + |> LLMChain.add_callback(handler) + |> LLMChain.add_tools([weather]) + |> LLMChain.add_messages([ + Message.new_user!("What is the weather like in Moab, Utah? Use the tool.") + ]) + + {:ok, updated_chain} = LLMChain.run(original_chain) + + # The model should have streamed back an assistant message containing + # the get_weather tool call with both required arguments parsed. + assert %Message{role: :assistant} = updated_chain.last_message + assert [%ToolCall{} = call] = updated_chain.last_message.tool_calls + assert call.name == "get_weather" + assert call.type == :function + assert call.status == :complete + assert is_map(call.arguments) + assert call.arguments["city"] =~ ~r/moab/i + assert call.arguments["state"] =~ ~r/ut/i + + assert_received {:test_message_processed, %Message{} = processed} + assert processed == updated_chain.last_message + + # Collect every delta the streaming callback sent us and confirm that + # merging them produces the same final message LLMChain assembled. + # This is the real streaming-pipeline test: SSE frame decoding + + # incremental tool_call argument accumulation + merge. + deltas = collect_messages() |> List.flatten() + assert length(deltas) > 0, "expected at least one streamed delta, got none" + + {:ok, delta_merged_chain} = LLMChain.apply_deltas(original_chain, deltas) + assert delta_merged_chain.last_message == updated_chain.last_message + end end describe "use in LLMChain" do