From 0b7c88f406aea9c5534d9d2e6d3e6deb7ca0a947 Mon Sep 17 00:00:00 2001
From: Mark Ericksen <brainlid@gmail.com>
Date: Mon, 25 May 2026 14:01:52 -0600
Subject: [PATCH] Cloudflare Workers AI with Kimi K2.6 verified

---
 .env.example                           |  4 +-
 README.md                              | 38 ++++++++++
 test/chat_models/chat_open_ai_test.exs | 99 ++++++++++++++++++++++++++
 3 files changed, 140 insertions(+), 1 deletion(-)

diff --git a/.env.example b/.env.example
index 0914155b..0b936d1d 100644
--- a/.env.example
+++ b/.env.example
@@ -12,4 +12,6 @@ export VERTEX_API_KEY="YOUR_VERTEX_API_KEY"
 export VERTEX_API_ENDPOINT="YOUR_VERTEX_API_ENDPOINT"
 export DEEPSEEK_API_KEY="YOUR_DEEPSEEK_API_KEY"
 export XAI_API_KEY="YOUR_XAI_API_KEY"
-export AWS_BEARER_TOKEN_BEDROCK="YOUR_AWS_BEDROCK_BEARER_TOKEN"
\ No newline at end of file
+export AWS_BEARER_TOKEN_BEDROCK="YOUR_AWS_BEDROCK_BEARER_TOKEN"
+export CLOUDFLARE_ACCOUNT_ID="YOUR_CLOUDFLARE_ACCOUNT_ID"
+export CLOUDFLARE_API_TOKEN="YOUR_CLOUDFLARE_API_TOKEN"
\ No newline at end of file
diff --git a/README.md b/README.md
index 2b42004b..4ba45324 100644
--- a/README.md
+++ b/README.md
@@ -12,6 +12,7 @@ Elixir LangChain enables Elixir applications to integrate AI services and self-h
 - **AWS Bedrock Mantle** - OpenAI-compatible gateway for third-party models hosted on Bedrock (Moonshot Kimi K2 family, OpenAI gpt-oss, and many others)
 - **OpenAI ChatGPT** - GPT models via the Chat Completions API
 - **OpenAI Responses API** - OpenAI's newer Responses API with WebSocket transport support
+- **Cloudflare Workers AI** - OpenAI-compatible gateway via `ChatOpenAI` (e.g. Moonshot Kimi K2.6 and other Workers AI models)
 - **xAI Grok** - Grok-4, Grok-3-mini, Grok-4 Heavy (multi-agent), and more
 - **Google Gemini** - Gemini AI models
 - **Google Vertex AI** - Google's enterprise AI offering
@@ -285,6 +286,43 @@ For example, if a locally running service provided that feature, the following c
   |> LLMChain.run()
 ```
 
+### Cloudflare Workers AI
+
+Cloudflare Workers AI exposes an OpenAI-compatible `/chat/completions` endpoint, so it works through `ChatOpenAI` by overriding the `endpoint` and supplying a Cloudflare API token. Any model in the Workers AI catalog (e.g. `@cf/moonshotai/kimi-k2.6`) can be used this way, including with streaming and tool calling.
+
+```elixir
+alias LangChain.ChatModels.ChatOpenAI
+alias LangChain.Chains.LLMChain
+alias LangChain.Message
+
+account_id = System.fetch_env!("CLOUDFLARE_ACCOUNT_ID")
+api_key = System.fetch_env!("CLOUDFLARE_API_TOKEN")
+
+endpoint =
+  "https://api.cloudflare.com/client/v4/accounts/#{account_id}/ai/v1/chat/completions"
+
+{:ok, chat} =
+  ChatOpenAI.new(%{
+    endpoint: endpoint,
+    api_key: api_key,
+    model: "@cf/moonshotai/kimi-k2.6",
+    temperature: 0,
+    seed: 0,
+    stream: false
+  })
+
+{:ok, updated_chain} =
+  %{llm: chat}
+  |> LLMChain.new!()
+  |> LLMChain.add_messages([
+    Message.new_system!("You answer with a single word."),
+    Message.new_user!("Reply with the single word: PONG")
+  ])
+  |> LLMChain.run()
+```
+
+Streaming and tool calling work the same as with native OpenAI: set `stream: true` and add tools via `LLMChain.add_tools/2`.
+
 ### Bumblebee Chat Support
 
 Bumblebee hosted chat models are supported. There is built-in support for Llama 2, Mistral, and Zephyr models.
diff --git a/test/chat_models/chat_open_ai_test.exs b/test/chat_models/chat_open_ai_test.exs
index 23d1e732..2fbc1ac2 100644
--- a/test/chat_models/chat_open_ai_test.exs
+++ b/test/chat_models/chat_open_ai_test.exs
@@ -1248,6 +1248,105 @@ defmodule LangChain.ChatModels.ChatOpenAITest do
       assert message.role == :assistant
       assert message.index == 0
     end
+
+    @tag live_call: true, live_cloudflare: true
+    test "supports Cloudflare Workers AI (OpenAI-compatible) with Kimi K2.6" do
+      # https://developers.cloudflare.com/workers-ai/configuration/open-ai-compatibility/
+      account_id = System.fetch_env!("CLOUDFLARE_ACCOUNT_ID")
+      api_key = System.fetch_env!("CLOUDFLARE_API_TOKEN")
+
+      endpoint =
+        "https://api.cloudflare.com/client/v4/accounts/#{account_id}" <>
+          "/ai/v1/chat/completions"
+
+      {:ok, chat} =
+        ChatOpenAI.new(%{
+          endpoint: endpoint,
+          api_key: api_key,
+          model: "@cf/moonshotai/kimi-k2.6",
+          temperature: 0,
+          seed: 0,
+          stream: false
+        })
+
+      {:ok, [message]} =
+        ChatOpenAI.call(
+          chat,
+          [
+            Message.new_system!("You answer with a single word."),
+            Message.new_user!("Reply with the single word: PONG")
+          ],
+          []
+        )
+
+      assert message.role == :assistant
+      assert ContentPart.parts_to_string(message.content) =~ ~r/PONG/i
+    end
+
+    @tag live_call: true, live_cloudflare: true
+    test "Cloudflare Kimi K2.6 streamed tool call assembles into a complete ToolCall",
+         %{weather: weather} do
+      account_id = System.fetch_env!("CLOUDFLARE_ACCOUNT_ID")
+      api_key = System.fetch_env!("CLOUDFLARE_API_TOKEN")
+
+      endpoint =
+        "https://api.cloudflare.com/client/v4/accounts/#{account_id}" <>
+          "/ai/v1/chat/completions"
+
+      handler = %{
+        on_llm_new_delta: fn %LLMChain{} = _chain, deltas ->
+          send(self(), deltas)
+        end,
+        on_message_processed: fn _chain, message ->
+          send(self(), {:test_message_processed, message})
+        end
+      }
+
+      model =
+        ChatOpenAI.new!(%{
+          endpoint: endpoint,
+          api_key: api_key,
+          model: "@cf/moonshotai/kimi-k2.6",
+          temperature: 0,
+          seed: 0,
+          stream: true
+        })
+
+      original_chain =
+        %{llm: model}
+        |> LLMChain.new!()
+        |> LLMChain.add_callback(handler)
+        |> LLMChain.add_tools([weather])
+        |> LLMChain.add_messages([
+          Message.new_user!("What is the weather like in Moab, Utah? Use the tool.")
+        ])
+
+      {:ok, updated_chain} = LLMChain.run(original_chain)
+
+      # The model should have streamed back an assistant message containing
+      # the get_weather tool call with both required arguments parsed.
+      assert %Message{role: :assistant} = updated_chain.last_message
+      assert [%ToolCall{} = call] = updated_chain.last_message.tool_calls
+      assert call.name == "get_weather"
+      assert call.type == :function
+      assert call.status == :complete
+      assert is_map(call.arguments)
+      assert call.arguments["city"] =~ ~r/moab/i
+      assert call.arguments["state"] =~ ~r/ut/i
+
+      assert_received {:test_message_processed, %Message{} = processed}
+      assert processed == updated_chain.last_message
+
+      # Collect every delta the streaming callback sent us and confirm that
+      # merging them produces the same final message LLMChain assembled.
+      # This is the real streaming-pipeline test: SSE frame decoding +
+      # incremental tool_call argument accumulation + merge.
+      deltas = collect_messages() |> List.flatten()
+      assert length(deltas) > 0, "expected at least one streamed delta, got none"
+
+      {:ok, delta_merged_chain} = LLMChain.apply_deltas(original_chain, deltas)
+      assert delta_merged_chain.last_message == updated_chain.last_message
+    end
   end
 
   describe "use in LLMChain" do