Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions docs/docs/03-configuration/02-different-ai-providers.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,31 @@ OPENAI_API_KEY=sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
# INFERENCE_IMAGE_MODEL=gpt-4o-mini
```

## Anthropic (Claude)

Karakeep has a native Anthropic provider (it does **not** go through Anthropic's
OpenAI-compatibility endpoint), which means structured tagging output is enforced
by the API.

```
ANTHROPIC_API_KEY=sk-ant-xxxxxxxxxxxxxxxxxxxxxxxx
# Optional: point at a gateway/proxy instead of api.anthropic.com
# ANTHROPIC_BASE_URL=https://your-gateway.example.com
# Optional: override the default model (claude-haiku-4-5)
INFERENCE_TEXT_MODEL=claude-haiku-4-5
INFERENCE_IMAGE_MODEL=claude-haiku-4-5
```

Notes:

- If `ANTHROPIC_API_KEY` is set, the Anthropic provider is used. If
`OPENAI_API_KEY` is also set, OpenAI takes precedence — unset it to use Claude.
- The default model is `claude-haiku-4-5` (cheap and fast, suited to
high-volume auto-tagging). Set `INFERENCE_TEXT_MODEL` / `INFERENCE_IMAGE_MODEL`
to any Claude model (e.g. `claude-sonnet-4-6`) to override.
- **Embeddings are not supported** by Anthropic. Semantic search needs a
separate embedding provider (OpenAI or Ollama).

## Ollama

Ollama is a local LLM provider that you can use to run your own LLM server. You'll need to pass ollama's address to karakeep and you need to ensure that it's accessible from within the karakeep container (e.g. no localhost addresses).
Expand Down
9 changes: 8 additions & 1 deletion packages/shared/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ const allEnv = z.object({
TURNSTILE_SITE_KEY: z.string().optional(),
TURNSTILE_SECRET_KEY: z.string().optional(),
OPENAI_API_KEY: z.string().optional(),
ANTHROPIC_API_KEY: z.string().optional(),
ANTHROPIC_BASE_URL: z.string().url().optional(),
OPENAI_BASE_URL: z.string().url().optional(),
OPENAI_PROXY_URL: z.string().url().optional(),
OPENAI_TIMEOUT_SEC: z.coerce.number().positive().optional(),
Expand Down Expand Up @@ -316,11 +318,16 @@ const serverConfigSchema = allEnv.transform((val, ctx) => {
: undefined,
},
inference: {
isConfigured: !!val.OPENAI_API_KEY || !!val.OLLAMA_BASE_URL,
isConfigured:
!!val.OPENAI_API_KEY ||
!!val.ANTHROPIC_API_KEY ||
!!val.OLLAMA_BASE_URL,
numWorkers: val.INFERENCE_NUM_WORKERS,
jobTimeoutSec: val.INFERENCE_JOB_TIMEOUT_SEC,
fetchTimeoutSec: val.INFERENCE_FETCH_TIMEOUT_SEC,
openAIApiKey: val.OPENAI_API_KEY,
anthropicApiKey: val.ANTHROPIC_API_KEY,
anthropicBaseUrl: val.ANTHROPIC_BASE_URL,
openAIBaseUrl: val.OPENAI_BASE_URL,
openAIProxyUrl: val.OPENAI_PROXY_URL,
openAITimeoutSec: val.OPENAI_TIMEOUT_SEC,
Expand Down
108 changes: 108 additions & 0 deletions packages/shared/inference.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
import { beforeEach, describe, expect, it, vi } from "vitest";
import { z } from "zod";

// Mock the Anthropic SDK: default export is a class exposing messages.create.
const createMock = vi.fn();
vi.mock("@anthropic-ai/sdk", () => ({
default: class {
messages = { create: createMock };
// eslint-disable-next-line @typescript-eslint/no-explicit-any
constructor(public opts: any) {}
},
}));

import { AnthropicInferenceClient } from "./inference";

function makeClient(overrides = {}) {
return new AnthropicInferenceClient({
apiKey: "test-key",
textModel: "gpt-4.1-mini",
imageModel: "gpt-4o-mini",
maxOutputTokens: 100,
outputSchema: "structured",
...overrides,
});
}

beforeEach(() => {
createMock.mockReset();
createMock.mockResolvedValue({
content: [{ type: "text", text: '{"tags":["a"]}' }],
usage: { input_tokens: 10, output_tokens: 5 },
});
});

describe("AnthropicInferenceClient text inference", () => {
it("substitutes the Claude default when the model is the OpenAI default", async () => {
const client = makeClient();
await client.inferFromText("hi", { schema: null });
expect(createMock.mock.calls[0][0].model).toBe("claude-haiku-4-5");
});

it("preserves an explicitly configured Claude model", async () => {
const client = makeClient({ textModel: "claude-sonnet-4-6" });
await client.inferFromText("hi", { schema: null });
expect(createMock.mock.calls[0][0].model).toBe("claude-sonnet-4-6");
});

it("sends max_tokens and the user message, and returns text + summed tokens", async () => {
const client = makeClient();
const res = await client.inferFromText("hello", { schema: null });
const body = createMock.mock.calls[0][0];
expect(body.max_tokens).toBe(100);
expect(body.messages).toEqual([{ role: "user", content: "hello" }]);
expect(res.response).toBe('{"tags":["a"]}');
expect(res.totalTokens).toBe(15);
});

it("attaches output_config json_schema in structured mode when a schema is given", async () => {
const client = makeClient();
await client.inferFromText("hi", {
schema: z.object({ tags: z.array(z.string()) }),
});
const body = createMock.mock.calls[0][0];
expect(body.output_config.format.type).toBe("json_schema");
expect(body.output_config.format.schema).toBeTypeOf("object");
});

it("omits output_config in plain mode", async () => {
const client = makeClient({ outputSchema: "plain" });
await client.inferFromText("hi", {
schema: z.object({ tags: z.array(z.string()) }),
});
expect(createMock.mock.calls[0][0].output_config).toBeUndefined();
});

it("omits output_config when structured mode has no schema (e.g. summarization)", async () => {
const client = makeClient();
await client.inferFromText("summarize", { schema: null });
expect(createMock.mock.calls[0][0].output_config).toBeUndefined();
});
});

describe("AnthropicInferenceClient image inference", () => {
it("builds a base64 image content block with the given media type", async () => {
const client = makeClient({ outputSchema: "plain" });
await client.inferFromImage("describe", "image/png", "BASE64DATA", {
schema: null,
});
const body = createMock.mock.calls[0][0];
expect(body.model).toBe("claude-haiku-4-5");
expect(body.messages[0].content).toEqual([
{ type: "text", text: "describe" },
{
type: "image",
source: { type: "base64", media_type: "image/png", data: "BASE64DATA" },
},
]);
});
});

describe("AnthropicInferenceClient embeddings", () => {
it("rejects with a clear unsupported error", async () => {
const client = makeClient();
await expect(client.generateEmbeddingFromText(["x"])).rejects.toThrow(
/does not provide an embeddings API/,
);
});
});
179 changes: 179 additions & 0 deletions packages/shared/inference.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import Anthropic from "@anthropic-ai/sdk";
import { Ollama } from "ollama";
import OpenAI from "openai";
import { zodResponseFormat } from "openai/helpers/zod";
Expand Down Expand Up @@ -157,13 +158,63 @@ export class InferenceClientFactory {
return OpenAIInferenceClient.fromConfig();
}

if (serverConfig.inference.anthropicApiKey) {
return AnthropicInferenceClient.fromConfig();
}

if (serverConfig.inference.ollamaBaseUrl) {
return OllamaInferenceClient.fromConfig();
}
return null;
}
}

const ANTHROPIC_DEFAULT_MODEL = "claude-haiku-4-5";
const OPENAI_DEFAULT_TEXT_MODEL = "gpt-4.1-mini";
const OPENAI_DEFAULT_IMAGE_MODEL = "gpt-4o-mini";

// If the configured model is still Karakeep's global OpenAI default, fall back to
// a Claude model so that a zero-config Anthropic setup works (and we never send a
// gpt-* id to Anthropic, which would 404).
function resolveAnthropicModel(model: string, openAIDefault: string): string {
if (model === openAIDefault) {
logger.info(
`[inference] No Claude model set for the Anthropic provider; defaulting to ${ANTHROPIC_DEFAULT_MODEL}. Set INFERENCE_TEXT_MODEL/INFERENCE_IMAGE_MODEL to override.`,
);
return ANTHROPIC_DEFAULT_MODEL;
}
return model;
}

// Anthropic has no json_object mode. We use native Structured Outputs
// (output_config.format) whenever a schema is supplied and the mode wants JSON.
function buildAnthropicOutputConfig(
// eslint-disable-next-line @typescript-eslint/no-explicit-any
schema: z.ZodSchema<any> | null,
outputSchema: "structured" | "json" | "plain",
) {
if (!schema || outputSchema === "plain") {
return undefined;
}
return {
format: {
type: "json_schema" as const,
schema: z.toJSONSchema(schema),
},
};
}

function extractAnthropicText(message: Anthropic.Message): string {
const text = message.content
.filter((b): b is Anthropic.TextBlock => b.type === "text")
.map((b) => b.text)
.join("");
if (!text) {
throw new Error(`Got no text content from Anthropic`);
}
return text;
}

export class OpenAIInferenceClient implements InferenceClient {
openAI: OpenAI;
private config: OpenAIInferenceConfig;
Expand Down Expand Up @@ -316,6 +367,134 @@ export class OpenAIInferenceClient implements InferenceClient {
}
}

export interface AnthropicInferenceConfig {
apiKey: string;
baseURL?: string;
textModel: string;
imageModel: string;
maxOutputTokens: number;
outputSchema: "structured" | "json" | "plain";
}

export class AnthropicInferenceClient implements InferenceClient {
private anthropic: Anthropic;
private config: AnthropicInferenceConfig;
private textModel: string;
private imageModel: string;

constructor(config: AnthropicInferenceConfig) {
this.config = config;
this.textModel = resolveAnthropicModel(
config.textModel,
OPENAI_DEFAULT_TEXT_MODEL,
);
this.imageModel = resolveAnthropicModel(
config.imageModel,
OPENAI_DEFAULT_IMAGE_MODEL,
);
this.anthropic = new Anthropic({
apiKey: config.apiKey,
baseURL: config.baseURL,
});
}

static fromConfig(): AnthropicInferenceClient {
return new AnthropicInferenceClient({
apiKey: serverConfig.inference.anthropicApiKey!,
baseURL: serverConfig.inference.anthropicBaseUrl,
textModel: serverConfig.inference.textModel,
imageModel: serverConfig.inference.imageModel,
maxOutputTokens: serverConfig.inference.maxOutputTokens,
outputSchema: serverConfig.inference.outputSchema,
});
}

async inferFromText(
prompt: string,
_opts: Partial<InferenceOptions>,
): Promise<InferenceResponse> {
const optsWithDefaults: InferenceOptions = {
...defaultInferenceOptions,
..._opts,
};
const outputConfig = buildAnthropicOutputConfig(
optsWithDefaults.schema,
this.config.outputSchema,
);
const message = await this.anthropic.messages.create(
{
model: this.textModel,
max_tokens: this.config.maxOutputTokens,
messages: [{ role: "user", content: prompt }],
...(outputConfig ? { output_config: outputConfig } : {}),
},
{ signal: optsWithDefaults.abortSignal },
);
return {
response: extractAnthropicText(message),
totalTokens:
(message.usage.input_tokens ?? 0) + (message.usage.output_tokens ?? 0),
};
}

async inferFromImage(
prompt: string,
contentType: string,
image: string,
_opts: Partial<InferenceOptions>,
): Promise<InferenceResponse> {
const optsWithDefaults: InferenceOptions = {
...defaultInferenceOptions,
..._opts,
};
const outputConfig = buildAnthropicOutputConfig(
optsWithDefaults.schema,
this.config.outputSchema,
);
const message = await this.anthropic.messages.create(
{
model: this.imageModel,
max_tokens: this.config.maxOutputTokens,
messages: [
{
role: "user",
content: [
{ type: "text", text: prompt },
{
type: "image",
source: {
type: "base64",
media_type: contentType as
| "image/jpeg"
| "image/png"
| "image/gif"
| "image/webp",
data: image,
},
},
],
},
],
...(outputConfig ? { output_config: outputConfig } : {}),
},
{ signal: optsWithDefaults.abortSignal },
);
return {
response: extractAnthropicText(message),
totalTokens:
(message.usage.input_tokens ?? 0) + (message.usage.output_tokens ?? 0),
};
}

generateEmbeddingFromText(_inputs: string[]): Promise<EmbeddingResponse> {
return Promise.reject(
new Error(
"Anthropic does not provide an embeddings API. Configure a separate embedding provider (e.g. OpenAI or Ollama) for semantic search.",
),
);
}
}

export interface OllamaInferenceConfig {
baseUrl: string;
textModel: string;
Expand Down
1 change: 1 addition & 0 deletions packages/shared/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
"private": true,
"type": "module",
"dependencies": {
"@anthropic-ai/sdk": "^0.104.1",
"@aws-sdk/client-s3": "^3.1014.0",
"glob": "^11.0.0",
"html-to-text": "^9.0.5",
Expand Down
Loading
Loading