-
-
Notifications
You must be signed in to change notification settings - Fork 1.3k
feat(inference): add native Anthropic (Claude) provider #2890
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
4d93f44
bfa70bc
fa1cdfe
8956104
ea2e8c6
f0e8526
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,108 @@ | ||
| import { beforeEach, describe, expect, it, vi } from "vitest"; | ||
| import { z } from "zod"; | ||
|
|
||
| // Mock the Anthropic SDK: default export is a class exposing messages.create. | ||
| const createMock = vi.fn(); | ||
| vi.mock("@anthropic-ai/sdk", () => ({ | ||
| default: class { | ||
| messages = { create: createMock }; | ||
| // eslint-disable-next-line @typescript-eslint/no-explicit-any | ||
| constructor(public opts: any) {} | ||
| }, | ||
| })); | ||
|
|
||
| import { AnthropicInferenceClient } from "./inference"; | ||
|
|
||
| function makeClient(overrides = {}) { | ||
| return new AnthropicInferenceClient({ | ||
| apiKey: "test-key", | ||
| textModel: "gpt-4.1-mini", | ||
| imageModel: "gpt-4o-mini", | ||
| maxOutputTokens: 100, | ||
| outputSchema: "structured", | ||
| ...overrides, | ||
| }); | ||
| } | ||
|
|
||
| beforeEach(() => { | ||
| createMock.mockReset(); | ||
| createMock.mockResolvedValue({ | ||
| content: [{ type: "text", text: '{"tags":["a"]}' }], | ||
| usage: { input_tokens: 10, output_tokens: 5 }, | ||
| }); | ||
| }); | ||
|
|
||
| describe("AnthropicInferenceClient text inference", () => { | ||
| it("substitutes the Claude default when the model is the OpenAI default", async () => { | ||
| const client = makeClient(); | ||
| await client.inferFromText("hi", { schema: null }); | ||
| expect(createMock.mock.calls[0][0].model).toBe("claude-haiku-4-5"); | ||
| }); | ||
|
|
||
| it("preserves an explicitly configured Claude model", async () => { | ||
| const client = makeClient({ textModel: "claude-sonnet-4-6" }); | ||
| await client.inferFromText("hi", { schema: null }); | ||
| expect(createMock.mock.calls[0][0].model).toBe("claude-sonnet-4-6"); | ||
| }); | ||
|
|
||
| it("sends max_tokens and the user message, and returns text + summed tokens", async () => { | ||
| const client = makeClient(); | ||
| const res = await client.inferFromText("hello", { schema: null }); | ||
| const body = createMock.mock.calls[0][0]; | ||
| expect(body.max_tokens).toBe(100); | ||
| expect(body.messages).toEqual([{ role: "user", content: "hello" }]); | ||
| expect(res.response).toBe('{"tags":["a"]}'); | ||
| expect(res.totalTokens).toBe(15); | ||
| }); | ||
|
|
||
| it("attaches output_config json_schema in structured mode when a schema is given", async () => { | ||
| const client = makeClient(); | ||
| await client.inferFromText("hi", { | ||
| schema: z.object({ tags: z.array(z.string()) }), | ||
| }); | ||
| const body = createMock.mock.calls[0][0]; | ||
| expect(body.output_config.format.type).toBe("json_schema"); | ||
| expect(body.output_config.format.schema).toBeTypeOf("object"); | ||
| }); | ||
|
|
||
| it("omits output_config in plain mode", async () => { | ||
| const client = makeClient({ outputSchema: "plain" }); | ||
| await client.inferFromText("hi", { | ||
| schema: z.object({ tags: z.array(z.string()) }), | ||
| }); | ||
| expect(createMock.mock.calls[0][0].output_config).toBeUndefined(); | ||
| }); | ||
|
|
||
| it("omits output_config when structured mode has no schema (e.g. summarization)", async () => { | ||
| const client = makeClient(); | ||
| await client.inferFromText("summarize", { schema: null }); | ||
| expect(createMock.mock.calls[0][0].output_config).toBeUndefined(); | ||
| }); | ||
| }); | ||
|
|
||
| describe("AnthropicInferenceClient image inference", () => { | ||
| it("builds a base64 image content block with the given media type", async () => { | ||
| const client = makeClient({ outputSchema: "plain" }); | ||
| await client.inferFromImage("describe", "image/png", "BASE64DATA", { | ||
| schema: null, | ||
| }); | ||
| const body = createMock.mock.calls[0][0]; | ||
| expect(body.model).toBe("claude-haiku-4-5"); | ||
| expect(body.messages[0].content).toEqual([ | ||
| { type: "text", text: "describe" }, | ||
| { | ||
| type: "image", | ||
| source: { type: "base64", media_type: "image/png", data: "BASE64DATA" }, | ||
| }, | ||
| ]); | ||
| }); | ||
| }); | ||
|
|
||
| describe("AnthropicInferenceClient embeddings", () => { | ||
| it("rejects with a clear unsupported error", async () => { | ||
| const client = makeClient(); | ||
| await expect(client.generateEmbeddingFromText(["x"])).rejects.toThrow( | ||
| /does not provide an embeddings API/, | ||
| ); | ||
| }); | ||
| }); | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,3 +1,4 @@ | ||
| import Anthropic from "@anthropic-ai/sdk"; | ||
| import { Ollama } from "ollama"; | ||
| import OpenAI from "openai"; | ||
| import { zodResponseFormat } from "openai/helpers/zod"; | ||
|
|
@@ -157,13 +158,63 @@ export class InferenceClientFactory { | |
| return OpenAIInferenceClient.fromConfig(); | ||
| } | ||
|
|
||
| if (serverConfig.inference.anthropicApiKey) { | ||
| return AnthropicInferenceClient.fromConfig(); | ||
| } | ||
|
|
||
| if (serverConfig.inference.ollamaBaseUrl) { | ||
| return OllamaInferenceClient.fromConfig(); | ||
| } | ||
| return null; | ||
| } | ||
| } | ||
|
|
||
| const ANTHROPIC_DEFAULT_MODEL = "claude-haiku-4-5"; | ||
| const OPENAI_DEFAULT_TEXT_MODEL = "gpt-4.1-mini"; | ||
| const OPENAI_DEFAULT_IMAGE_MODEL = "gpt-4o-mini"; | ||
|
|
||
| // If the configured model is still Karakeep's global OpenAI default, fall back to | ||
| // a Claude model so that a zero-config Anthropic setup works (and we never send a | ||
| // gpt-* id to Anthropic, which would 404). | ||
| function resolveAnthropicModel(model: string, openAIDefault: string): string { | ||
| if (model === openAIDefault) { | ||
| logger.info( | ||
| `[inference] No Claude model set for the Anthropic provider; defaulting to ${ANTHROPIC_DEFAULT_MODEL}. Set INFERENCE_TEXT_MODEL/INFERENCE_IMAGE_MODEL to override.`, | ||
| ); | ||
| return ANTHROPIC_DEFAULT_MODEL; | ||
| } | ||
| return model; | ||
| } | ||
|
Comment on lines
+179
to
+187
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Prompt To Fix With AIThis is a comment left during a code review.
Path: packages/shared/inference.ts
Line: 179-187
Comment:
**Ambiguous log message: text vs. image model substitution indistinguishable**
`resolveAnthropicModel` is called for both `textModel` and `imageModel`, but the log message always says "No Claude model set … Set INFERENCE_TEXT_MODEL/INFERENCE_IMAGE_MODEL to override." — it never identifies which model slot triggered the substitution. When both are defaulted, users see the same message twice with no way to tell them apart. Passing a `slot` identifier (e.g. `"text"` / `"image"`) would make the message actionable.
How can I resolve this? If you propose a fix, please make it concise.Note: If this suggestion doesn't match your team's coding style, reply to this and let me know. I'll remember it for next time! |
||
|
|
||
| // Anthropic has no json_object mode. We use native Structured Outputs | ||
| // (output_config.format) whenever a schema is supplied and the mode wants JSON. | ||
| function buildAnthropicOutputConfig( | ||
| // eslint-disable-next-line @typescript-eslint/no-explicit-any | ||
| schema: z.ZodSchema<any> | null, | ||
| outputSchema: "structured" | "json" | "plain", | ||
| ) { | ||
| if (!schema || outputSchema === "plain") { | ||
| return undefined; | ||
| } | ||
| return { | ||
| format: { | ||
| type: "json_schema" as const, | ||
| schema: z.toJSONSchema(schema), | ||
| }, | ||
| }; | ||
| } | ||
|
Comment on lines
+191
to
+205
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Prompt To Fix With AIThis is a comment left during a code review.
Path: packages/shared/inference.ts
Line: 191-205
Comment:
**Silent no-op in `json` mode without a schema**
`buildAnthropicOutputConfig` returns `undefined` when `outputSchema === "json"` and `schema === null`. This silently drops JSON enforcement: Anthropic gets a plain-text request instead. The OpenAI path always sends `{ type: "json_object" }` in `json` mode regardless of whether a schema is supplied, so any caller that sets `INFERENCE_OUTPUT_SCHEMA=json` without a schema (which is valid OpenAI usage) will receive unstructured text from Anthropic and likely fail to parse it. At minimum a warning should be logged, or the condition should be tightened to make the gap explicit.
How can I resolve this? If you propose a fix, please make it concise. |
||
|
|
||
| function extractAnthropicText(message: Anthropic.Message): string { | ||
| const text = message.content | ||
| .filter((b): b is Anthropic.TextBlock => b.type === "text") | ||
| .map((b) => b.text) | ||
| .join(""); | ||
| if (!text) { | ||
| throw new Error(`Got no text content from Anthropic`); | ||
| } | ||
| return text; | ||
| } | ||
|
|
||
| export class OpenAIInferenceClient implements InferenceClient { | ||
| openAI: OpenAI; | ||
| private config: OpenAIInferenceConfig; | ||
|
|
@@ -316,6 +367,134 @@ export class OpenAIInferenceClient implements InferenceClient { | |
| } | ||
| } | ||
|
|
||
| export interface AnthropicInferenceConfig { | ||
| apiKey: string; | ||
| baseURL?: string; | ||
| textModel: string; | ||
| imageModel: string; | ||
| maxOutputTokens: number; | ||
| outputSchema: "structured" | "json" | "plain"; | ||
| } | ||
|
|
||
| export class AnthropicInferenceClient implements InferenceClient { | ||
| private anthropic: Anthropic; | ||
| private config: AnthropicInferenceConfig; | ||
| private textModel: string; | ||
| private imageModel: string; | ||
|
|
||
| constructor(config: AnthropicInferenceConfig) { | ||
| this.config = config; | ||
| this.textModel = resolveAnthropicModel( | ||
| config.textModel, | ||
| OPENAI_DEFAULT_TEXT_MODEL, | ||
| ); | ||
| this.imageModel = resolveAnthropicModel( | ||
| config.imageModel, | ||
| OPENAI_DEFAULT_IMAGE_MODEL, | ||
| ); | ||
| this.anthropic = new Anthropic({ | ||
| apiKey: config.apiKey, | ||
| baseURL: config.baseURL, | ||
| }); | ||
| } | ||
|
Comment on lines
+393
to
+399
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Prompt To Fix With AIThis is a comment left during a code review.
Path: packages/shared/inference.ts
Line: 393-399
Comment:
**No configurable timeout for the Anthropic client**
`OpenAIInferenceClient` reads `OPENAI_TIMEOUT_SEC` and passes it as `timeout` to the `OpenAI` constructor. The `AnthropicInferenceClient` passes no timeout to `new Anthropic({...})`, so the SDK's built-in default (~10 minutes) is always used, ignoring any user-configured timeout expectation. Consider adding an `ANTHROPIC_TIMEOUT_SEC` env var (or re-using a generic `INFERENCE_TIMEOUT_SEC`) and wiring it through `AnthropicInferenceConfig`.
How can I resolve this? If you propose a fix, please make it concise. |
||
|
|
||
| static fromConfig(): AnthropicInferenceClient { | ||
| return new AnthropicInferenceClient({ | ||
| apiKey: serverConfig.inference.anthropicApiKey!, | ||
| baseURL: serverConfig.inference.anthropicBaseUrl, | ||
| textModel: serverConfig.inference.textModel, | ||
| imageModel: serverConfig.inference.imageModel, | ||
| maxOutputTokens: serverConfig.inference.maxOutputTokens, | ||
| outputSchema: serverConfig.inference.outputSchema, | ||
| }); | ||
| } | ||
|
|
||
| async inferFromText( | ||
| prompt: string, | ||
| _opts: Partial<InferenceOptions>, | ||
| ): Promise<InferenceResponse> { | ||
| const optsWithDefaults: InferenceOptions = { | ||
| ...defaultInferenceOptions, | ||
| ..._opts, | ||
| }; | ||
| const outputConfig = buildAnthropicOutputConfig( | ||
| optsWithDefaults.schema, | ||
| this.config.outputSchema, | ||
| ); | ||
| const message = await this.anthropic.messages.create( | ||
| { | ||
| model: this.textModel, | ||
| max_tokens: this.config.maxOutputTokens, | ||
| messages: [{ role: "user", content: prompt }], | ||
| ...(outputConfig ? { output_config: outputConfig } : {}), | ||
| }, | ||
| { signal: optsWithDefaults.abortSignal }, | ||
| ); | ||
| return { | ||
| response: extractAnthropicText(message), | ||
| totalTokens: | ||
| (message.usage.input_tokens ?? 0) + (message.usage.output_tokens ?? 0), | ||
| }; | ||
| } | ||
|
|
||
| async inferFromImage( | ||
| prompt: string, | ||
| contentType: string, | ||
| image: string, | ||
| _opts: Partial<InferenceOptions>, | ||
| ): Promise<InferenceResponse> { | ||
| const optsWithDefaults: InferenceOptions = { | ||
| ...defaultInferenceOptions, | ||
| ..._opts, | ||
| }; | ||
| const outputConfig = buildAnthropicOutputConfig( | ||
| optsWithDefaults.schema, | ||
| this.config.outputSchema, | ||
| ); | ||
| const message = await this.anthropic.messages.create( | ||
| { | ||
| model: this.imageModel, | ||
| max_tokens: this.config.maxOutputTokens, | ||
| messages: [ | ||
| { | ||
| role: "user", | ||
| content: [ | ||
| { type: "text", text: prompt }, | ||
| { | ||
| type: "image", | ||
| source: { | ||
| type: "base64", | ||
| media_type: contentType as | ||
| | "image/jpeg" | ||
| | "image/png" | ||
| | "image/gif" | ||
| | "image/webp", | ||
| data: image, | ||
| }, | ||
| }, | ||
| ], | ||
| }, | ||
| ], | ||
| ...(outputConfig ? { output_config: outputConfig } : {}), | ||
| }, | ||
| { signal: optsWithDefaults.abortSignal }, | ||
| ); | ||
| return { | ||
| response: extractAnthropicText(message), | ||
| totalTokens: | ||
| (message.usage.input_tokens ?? 0) + (message.usage.output_tokens ?? 0), | ||
| }; | ||
| } | ||
|
|
||
| generateEmbeddingFromText(_inputs: string[]): Promise<EmbeddingResponse> { | ||
| return Promise.reject( | ||
| new Error( | ||
| "Anthropic does not provide an embeddings API. Configure a separate embedding provider (e.g. OpenAI or Ollama) for semantic search.", | ||
| ), | ||
| ); | ||
| } | ||
| } | ||
|
|
||
| export interface OllamaInferenceConfig { | ||
| baseUrl: string; | ||
| textModel: string; | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
makeClient()defaultstextModel: "gpt-4.1-mini"andimageModel: "gpt-4o-mini". The tests exercise the substitution path (good), but the helper's name and intent could mislead a future contributor into thinking it simulates a misconfigured OpenAI client rather than an Anthropic client with OpenAI-defaulted models. A brief comment explaining why the OpenAI defaults are intentional here would improve clarity.Prompt To Fix With AI
Note: If this suggestion doesn't match your team's coding style, reply to this and let me know. I'll remember it for next time!