diff --git a/.github/workflows/reusable-test.yml b/.github/workflows/reusable-test.yml index db3b83fb84..27fc05a5f1 100644 --- a/.github/workflows/reusable-test.yml +++ b/.github/workflows/reusable-test.yml @@ -114,3 +114,60 @@ jobs: - name: Run tests run: npm test + + transport-runtime: + name: "@arcjet/transport proxy (${{ matrix.runtime }} ${{ matrix.bun-version || matrix.deno-version }})" + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + include: + # Bun's native-fetch proxy support is exercised here (not under Node). + # `HTTPS_PROXY` is set at process startup (by the npm script), which + # Bun honors back to 1.3.0, the project's minimum supported Bun. + - runtime: bun + script: test-runtime-bun + bun-version: 1.3.0 + - runtime: bun + script: test-runtime-bun + bun-version: latest + - runtime: deno + script: test-runtime-deno + deno-version: lts + - runtime: deno + script: test-runtime-deno + deno-version: latest + permissions: + contents: read + steps: + - uses: step-security/harden-runner@a5ad31d6a139d249332a2605b85202e8c0b78450 # v2.19.1 + with: + allowed-endpoints: > + api.github.com:443 + deno.com:443 + dl.deno.land:443 + github.com:443 + nodejs.org:443 + objects.githubusercontent.com:443 + registry.npmjs.org:443 + release-assets.githubusercontent.com:443 + disable-sudo-and-containers: true + egress-policy: block + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false + - uses: actions/setup-node@6044e13b5dc448c55e2357c09f80417699197238 # v6.2.0 + with: + node-version: 22 + - uses: oven-sh/setup-bun@3d267786b128fe76c2f16a390aa2448b815359f3 # v2.1.2 + if: matrix.runtime == 'bun' + with: + bun-version: ${{ matrix.bun-version }} + - uses: denoland/setup-deno@e95548e56dfa95d4e1a28d6f422fafe75c4c26fb # v2.0.3 + if: matrix.runtime == 'deno' + with: + deno-version: ${{ matrix.deno-version }} + # Build the whole workspace first so `@arcjet/transport`'s dependencies + # (`@arcjet/env`, `@arcjet/logger`) are available to the runtime test. + - run: npm ci && npm run build + - run: npm run ${{ matrix.script }} --workspace=@arcjet/transport diff --git a/arcjet-guard/README.md b/arcjet-guard/README.md index 1aa38e7278..592e904d64 100644 --- a/arcjet-guard/README.md +++ b/arcjet-guard/README.md @@ -413,6 +413,33 @@ See the [docs](https://docs.arcjet.com/mcp-server) for setup instructions. You can also manage sites and keys with the CLI: `npx @arcjet/cli`. +## Proxy support + +The standard proxy environment variables (`HTTP_PROXY` and `HTTPS_PROXY`, while +respecting `NO_PROXY`) are auto-detected, making it possible to connect to the +Arcjet API through a proxy such as [Squid](https://www.squid-cache.org/). When a +proxy is in use, a line is logged at startup; the proxy +URL itself is not logged, since it can contain credentials. How the request is +actually proxied depends on the runtime: + +- **Node.js** — uses the HTTP/2 transport; when a proxy is detected, requests + are routed through it over HTTP/1.1 using the built-in proxy support of the + Node.js HTTP agent, otherwise made directly over HTTP/2. +- **Bun** — uses the HTTP/2 transport directly, but its Node HTTP agent doesn't + support proxying, so when a proxy is detected it falls back to the fetch-based + transport and Bun's `fetch` performs the proxying natively. +- **Deno** — the runtime's `fetch` performs the proxying natively. +- **Cloudflare Workers** and other edge runtimes don't support outbound proxy + environment variables, so no proxy is used. + +`NO_PROXY` accepts a comma- or space-separated list of host suffixes, each with +an optional leading `.` or `*.` and an optional `:port`, plus `*` to bypass the +proxy for every host. Entries are matched as host names; IP/CIDR ranges (such as +`10.0.0.0/8`) are not supported, the same as +[curl](https://curl.se/docs/manpage.html#--noproxy). On Bun and Deno the +runtime's `fetch` applies `NO_PROXY` itself, so its exact semantics are the +runtime's. + ## Runtime support | Runtime | Minimum version | diff --git a/arcjet-guard/package.json b/arcjet-guard/package.json index 3f0bdd1df6..1dade33304 100644 --- a/arcjet-guard/package.json +++ b/arcjet-guard/package.json @@ -27,8 +27,8 @@ "exports": { ".": { "bun": { - "types": "./dist/node.d.ts", - "import": "./dist/node.js" + "types": "./dist/bun.d.ts", + "import": "./dist/bun.js" }, "edge-light": { "types": "./dist/fetch.d.ts", @@ -38,6 +38,10 @@ "types": "./dist/fetch.d.ts", "import": "./dist/fetch.js" }, + "deno": { + "types": "./dist/fetch.d.ts", + "import": "./dist/fetch.js" + }, "node": { "types": "./dist/node.d.ts", "import": "./dist/node.js" @@ -51,6 +55,10 @@ "types": "./dist/node.d.ts", "import": "./dist/node.js" }, + "./bun": { + "types": "./dist/bun.d.ts", + "import": "./dist/bun.js" + }, "./fetch": { "types": "./dist/fetch.d.ts", "import": "./dist/fetch.js" diff --git a/arcjet-guard/src/bun.test.ts b/arcjet-guard/src/bun.test.ts new file mode 100644 index 0000000000..10a9049df2 --- /dev/null +++ b/arcjet-guard/src/bun.test.ts @@ -0,0 +1,43 @@ +import assert from "node:assert/strict"; +import { describe, test } from "node:test"; + +import { + launchArcjet, + createTransport, + tokenBucket, + fixedWindow, + slidingWindow, + detectPromptInjection, + localDetectSensitiveInfo, + defineCustomRule, + launchArcjetWithTransport, +} from "./bun.ts"; + +describe("bun entrypoint", () => { + test("launchArcjet is exported as a function", () => { + assert.equal(typeof launchArcjet, "function"); + }); + + test("createTransport is re-exported", () => { + assert.equal(typeof createTransport, "function"); + }); + + test("rule factories are re-exported", () => { + assert.equal(typeof tokenBucket, "function"); + assert.equal(typeof fixedWindow, "function"); + assert.equal(typeof slidingWindow, "function"); + assert.equal(typeof detectPromptInjection, "function"); + assert.equal(typeof localDetectSensitiveInfo, "function"); + assert.equal(typeof defineCustomRule, "function"); + }); + + test("launchArcjetWithTransport is re-exported", () => { + assert.equal(typeof launchArcjetWithTransport, "function"); + }); + + test("launchArcjet returns an object with .guard()", () => { + const arcjet = launchArcjet({ key: "ajkey_test" }); + + assert.equal(typeof arcjet.guard, "function"); + }); +}); diff --git a/arcjet-guard/src/bun.ts b/arcjet-guard/src/bun.ts new file mode 100644 index 0000000000..36bb46d30d --- /dev/null +++ b/arcjet-guard/src/bun.ts @@ -0,0 +1,195 @@ +/** + * `@arcjet/guard/bun` — Bun entrypoint. + * + * Bun resolves the `"."` export here. Uses HTTP/2 via `node:http2` + * (`@connectrpc/connect-node`) for optimal performance with long-lived + * connections and optimistic pre-connect — Bun's `fetch` does not support + * HTTP/2 ({@link https://github.com/oven-sh/bun/issues/7194}). When a proxy is + * configured, it falls back to the fetch transport so Bun's native `fetch` + * performs the proxying. + * + * **Lifecycle:** Create the client once at module scope and reuse it. + * The underlying HTTP/2 transport maintains a persistent connection; + * creating a new client per request wastes that connection. + * + * @example + * ```ts + * import { launchArcjet, tokenBucket, detectPromptInjection } from "@arcjet/guard"; + * + * // Create the client once at module scope + * const arcjet = launchArcjet({ key: "ajkey_..." }); + * + * // Configure reusable rules (also at module scope) + * const limitRule = tokenBucket({ bucket: "user-tokens", refillRate: 10, intervalSeconds: 60, maxTokens: 100 }); + * const piRule = detectPromptInjection(); + * + * // Per request — create rule inputs each time + * const rl = limitRule({ key: userId, requested: tokenCount }); + * const decision = await arcjet.guard({ + * label: "tools.weather", + * rules: [rl, piRule(userMessage)], + * }); + * + * // Overall decision + * if (decision.conclusion === "DENY") { + * console.log(decision.reason); // "RATE_LIMIT", "PROMPT_INJECTION", etc. + * } + * + * // Check for errors (fail-open — errors don't cause denials) + * if (decision.hasError()) { + * console.warn("At least one rule errored"); + * } + * + * // Per-rule results + * for (const result of decision.results) { + * console.log(result.type, result.conclusion); + * } + * + * // From a RuleWithInput — result for this specific submission + * const r = rl.result(decision); + * if (r) { + * console.log(r.remainingTokens, r.maxTokens); + * } + * + * // From a RuleWithConfig — first denied result across all submissions + * const denied = limitRule.deniedResult(decision); + * if (denied) { + * console.log(denied.remainingTokens); // 0 + * } + * ``` + * + * Unlike some other `@arcjet/*` packages, `@arcjet/guard` never reads + * environment variables directly. All configuration must be passed + * explicitly via `launchArcjet()` options, `.guard()`, or rule inputs. + * + * @packageDocumentation + */ + +export { + // Types + type Conclusion, + type Reason, + type Mode, + type RuleResult, + type RuleResultTokenBucket, + type RuleResultFixedWindow, + type RuleResultSlidingWindow, + type RuleResultPromptInjection, + type RuleResultSensitiveInfo, + type RuleResultCustom, + type RuleResultNotRun, + type RuleResultError, + type RuleResultUnknown, + type Decision, + type DecisionAllow, + type DecisionDeny, + type DecisionBase, + type RuleWithInput, + type RuleWithConfig, + type GuardOptions, + type LaunchOptions, + type ArcjetGuard, + + // Rule config types + type TokenBucketConfig, + type TokenBucketInput, + type FixedWindowConfig, + type FixedWindowInput, + type SlidingWindowConfig, + type SlidingWindowInput, + type DetectPromptInjectionConfig, + type LocalDetectSensitiveInfoConfig, + type SensitiveInfoEntityType, + type LocalCustomConfig, + type LocalCustomInput, + + // Rule factories + tokenBucket, + fixedWindow, + slidingWindow, + detectPromptInjection, + localDetectSensitiveInfo, + defineCustomRule, + + // Transport-agnostic factory + launchArcjetWithTransport, + + // Internal + _launchWithTransportFactory, +} from "./index.ts"; + +import { _launchWithTransportFactory } from "./index.ts"; +import type { LaunchOptions, ArcjetGuard } from "./index.ts"; +import { createTransport } from "./transport-bun.ts"; + +/** + * Create an Arcjet guard client using the Bun transport. + * + * Connects over HTTP/2 by default, falling back to a fetch-based transport when + * a proxy is configured so Bun's native `fetch` performs the proxying. + * + * Connect to the Arcjet MCP server at `https://api.arcjet.com/mcp` to manage + * sites, retrieve SDK keys, and more. Learn more at + * {@link https://docs.arcjet.com/mcp-server}. + * + * **Create once, reuse everywhere.** The returned client holds a + * persistent HTTP/2 connection that is optimistically pre-connected. + * Wrapping this in a function that creates a new client per request + * defeats connection reuse and adds latency. + * + * Three lifetimes to keep in mind: + * 1. **Client** (`launchArcjet`) — create once at module scope. + * 2. **Rule config** (`tokenBucket(...)`) — create once at module scope (recommended). + * 3. **Rule input** (`limitRule({ key })`) — create per request / tool call. + * + * @example + * ```ts + * import { launchArcjet, tokenBucket, detectPromptInjection } from "@arcjet/guard"; + * + * // Create the client once at module scope + * const arcjet = launchArcjet({ key: "ajkey_..." }); + * + * // Configure reusable rules (also at module scope) + * const limitRule = tokenBucket({ bucket: "user-tokens", refillRate: 10, intervalSeconds: 60, maxTokens: 100 }); + * const piRule = detectPromptInjection(); + * + * // Per request — create rule inputs each time + * const rl = limitRule({ key: userId, requested: tokenCount }); + * const decision = await arcjet.guard({ + * label: "tools.weather", + * rules: [rl, piRule(userMessage)], + * }); + * + * // Overall decision + * if (decision.conclusion === "DENY") { + * console.log(decision.reason); // "RATE_LIMIT", "PROMPT_INJECTION", etc. + * } + * + * // Check for errors (fail-open — errors don't cause denials) + * if (decision.hasError()) { + * console.warn("At least one rule errored"); + * } + * + * // Per-rule results + * for (const result of decision.results) { + * console.log(result.type, result.conclusion); + * } + * + * // From a RuleWithInput — result for this specific submission + * const r = rl.result(decision); + * if (r) { + * console.log(r.remainingTokens, r.maxTokens); + * } + * + * // From a RuleWithConfig — first denied result across all submissions + * const denied = limitRule.deniedResult(decision); + * if (denied) { + * console.log(denied.remainingTokens); // 0 + * } + * ``` + */ +export function launchArcjet(options: LaunchOptions): ArcjetGuard { + return _launchWithTransportFactory(createTransport, options); +} + +export { createTransport } from "./transport-bun.ts"; diff --git a/arcjet-guard/src/detect-proxy.test.ts b/arcjet-guard/src/detect-proxy.test.ts new file mode 100644 index 0000000000..0f637b2575 --- /dev/null +++ b/arcjet-guard/src/detect-proxy.test.ts @@ -0,0 +1,209 @@ +import assert from "node:assert/strict"; +import { describe, test } from "node:test"; + +import { detectProxy } from "./detect-proxy.ts"; + +// Run `detectProxy` with the given environment while capturing (and silencing) +// the startup log line, returning the resolved proxy and whether it logged. +function detect( + baseUrl: string, + proxyEnv: Record, +): { proxy: string | undefined; logged: boolean } { + const original = console.info; + let logged = false; + console.info = (): void => { + logged = true; + }; + + try { + return { proxy: detectProxy(new URL(baseUrl), proxyEnv), logged }; + } finally { + console.info = original; + } +} + +describe("detectProxy", () => { + test("returns undefined and does not log without a proxy", () => { + const { proxy, logged } = detect("https://decide.arcjet.com", {}); + + assert.equal(proxy, undefined); + assert.equal(logged, false); + }); + + test("throws on an invalid base URL", () => { + // `detectProxy` takes a parsed `URL`, so an invalid base URL surfaces when + // the caller constructs it (here, via the `detect` helper) rather than + // being swallowed — matching how `createTransport` parses up front. + assert.throws(() => detect("not a url", {}), /Invalid URL/); + }); + + test("resolves the proxy for HTTPS and HTTP targets", () => { + assert.equal( + detect("https://decide.arcjet.com", { + HTTPS_PROXY: "http://proxy.example.com:3128", + }).proxy, + "http://proxy.example.com:3128", + ); + + assert.equal( + detect("http://decide.arcjet.com", { + HTTP_PROXY: "http://proxy.example.com:3128", + }).proxy, + "http://proxy.example.com:3128", + ); + }); + + test("logs once when a proxy is in use and the level allows it", () => { + assert.equal( + detect("https://decide.arcjet.com", { + HTTPS_PROXY: "http://proxy.example.com:3128", + ARCJET_LOG_LEVEL: "info", + }).logged, + true, + ); + }); + + test("does not log by default (level below `info`)", () => { + // Matches @arcjet/transport, whose default `warn` level hides this line. + assert.equal( + detect("https://decide.arcjet.com", { + HTTPS_PROXY: "http://proxy.example.com:3128", + }).logged, + false, + ); + assert.equal( + detect("https://decide.arcjet.com", { + HTTPS_PROXY: "http://proxy.example.com:3128", + ARCJET_LOG_LEVEL: "warn", + }).logged, + false, + ); + }); + + test("prefers the lowercase proxy variable", () => { + assert.equal( + detect("http://api.example.com/", { + http_proxy: "http://lower.example.com:3128", + HTTP_PROXY: "http://upper.example.com:3128", + }).proxy, + "http://lower.example.com:3128", + ); + }); + + test("never logs the proxy URL or its credentials", () => { + const messages: unknown[] = []; + const original = console.info; + console.info = (...values: unknown[]): void => { + messages.push(...values); + }; + + try { + detectProxy(new URL("https://decide.arcjet.com"), { + HTTPS_PROXY: "http://user:secret@proxy.example.com:3128", + ARCJET_LOG_LEVEL: "info", + }); + } finally { + console.info = original; + } + + // Only the fixed message is logged — never the proxy URL, so credentials + // and host can't leak. Asserting the exact output is stronger than checking + // for substrings. + assert.deepEqual(messages, ["Connecting to the Arcjet API through a proxy"]); + }); + + test("honors `NO_PROXY`", () => { + const proxy = "http://proxy.example.com:3128"; + + // [NO_PROXY, base URL, expected to be bypassed] + const cases: Array<[string, string, boolean]> = [ + ["*", "http://api.example.com:8080/", true], + ["api.example.com", "http://api.example.com:8080/", true], + ["example.com", "http://api.example.com:8080/", true], + ["other.com", "http://api.example.com:8080/", false], + ["api.example.com:8080", "http://api.example.com:8080/", true], + ["api.example.com:9999", "http://api.example.com:8080/", false], + [".example.com", "http://api.example.com:8080/", true], + ["*.example.com", "http://api.example.com:8080/", true], + [",other.com", "http://api.example.com:8080/", false], + [".", "http://api.example.com:8080/", false], + ["foo:bar", "http://api.example.com:8080/", false], + ["api.example.com:80", "http://api.example.com/", true], + ["api.example.com:443", "https://api.example.com/", true], + // IPv6 hosts, written with or without brackets and with or without a port. + ["::1", "http://[::1]:8080/", true], + ["[::1]", "http://[::1]:8080/", true], + ["[::1]:8080", "http://[::1]:8080/", true], + ["[::1]:9999", "http://[::1]:8080/", false], + ["::1", "http://[::2]:8080/", false], + ]; + + for (const [noProxy, baseUrl, bypassed] of cases) { + assert.equal( + detect(baseUrl, { + HTTP_PROXY: proxy, + HTTPS_PROXY: proxy, + NO_PROXY: noProxy, + }).proxy, + bypassed ? undefined : proxy, + `NO_PROXY=${noProxy} for ${baseUrl}`, + ); + } + }); + + test("returns undefined when reading the environment throws", () => { + // Simulate a runtime that gates environment access behind a permission + // (e.g. Deno without `--allow-env`), where reading a variable throws. + const throwing = new Proxy>( + {}, + { + get(): never { + throw new Error("permission denied"); + }, + }, + ); + + const { proxy, logged } = detect("https://decide.arcjet.com", throwing); + + assert.equal(proxy, undefined); + assert.equal(logged, false); + }); + + test("ignores uppercase `HTTP_PROXY` under CGI (httpoxy)", () => { + // With `REQUEST_METHOD` set (a CGI environment), uppercase `HTTP_PROXY` — + // which an inbound `Proxy` header can populate — is ignored for HTTP. + assert.equal( + detect("http://api.example.com/", { + HTTP_PROXY: "http://attacker.example.com:3128", + REQUEST_METHOD: "GET", + }).proxy, + undefined, + ); + + // Lowercase `http_proxy` is still honored under CGI. + assert.equal( + detect("http://api.example.com/", { + http_proxy: "http://proxy.example.com:3128", + REQUEST_METHOD: "GET", + }).proxy, + "http://proxy.example.com:3128", + ); + + // HTTPS targets are unaffected (no header maps to `HTTPS_PROXY`). + assert.equal( + detect("https://api.example.com/", { + HTTPS_PROXY: "http://proxy.example.com:3128", + REQUEST_METHOD: "GET", + }).proxy, + "http://proxy.example.com:3128", + ); + + // Without `REQUEST_METHOD`, uppercase `HTTP_PROXY` is honored as usual. + assert.equal( + detect("http://api.example.com/", { + HTTP_PROXY: "http://proxy.example.com:3128", + }).proxy, + "http://proxy.example.com:3128", + ); + }); +}); diff --git a/arcjet-guard/src/detect-proxy.ts b/arcjet-guard/src/detect-proxy.ts new file mode 100644 index 0000000000..5f09cd9bcf --- /dev/null +++ b/arcjet-guard/src/detect-proxy.ts @@ -0,0 +1,248 @@ +/** + * Outbound proxy detection shared by the `@arcjet/guard` transports. + * + * Resolves the proxy (if any) that applies to a base URL from the standard + * proxy environment variables (`HTTP_PROXY`/`HTTPS_PROXY`, respecting + * `NO_PROXY`) and logs a single line at startup when one is in use. The proxy + * URL itself is never logged, since it can contain credentials. + * + * @packageDocumentation + */ + +/** Map of environment variables used to detect an outbound proxy. */ +export type ProxyEnvironment = Record; + +/** + * Detect the proxy that applies to a URL and log a line when one is found. + * + * Standard proxy environment variables (`HTTP_PROXY` and `HTTPS_PROXY`, + * respecting `NO_PROXY`) are auto-detected. When a proxy applies, a single line + * is logged at startup so it is easy to know one is in use; the proxy URL itself + * is not logged, since it can contain credentials. + * + * Takes an already-parsed `URL` so callers that also need it (e.g. to pick an + * HTTP vs HTTPS agent) don't parse the base URL twice. + * + * @param url URL that requests will be made to. + * @param proxyEnv Environment variables to inspect (defaults to the current + * runtime's environment when available). + * @returns Proxy URL that applies to `url`, or `undefined` when none does. + */ +export function detectProxy( + url: URL, + proxyEnv: ProxyEnvironment | undefined = currentEnvironment(), +): string | undefined { + if (proxyEnv === undefined) { + return undefined; + } + + let proxyUrl: string | undefined; + try { + proxyUrl = proxyForUrl(url, proxyEnv); + } catch { + // Reading proxy environment variables can throw on runtimes that gate + // environment access behind a permission (e.g. Deno without `--allow-env`). + // Treat that as "no proxy" rather than failing transport creation. + return undefined; + } + + if (typeof proxyUrl === "string") { + // Log a line at startup so it is easy to know when a proxy is being used. + // We deliberately do not log the proxy URL itself: it can contain + // credentials, and not logging it is simpler and safer than redacting it. + // + // Gate on `ARCJET_LOG_LEVEL` so this matches `@arcjet/transport`, which logs + // the same line through `@arcjet/logger` at `info` level — hidden unless the + // level is `info` or `debug`, and silenceable. This copy is edge-safe and + // can't import `@arcjet/logger`, so read the level from the same environment + // we resolved the proxy from. + const level = proxyEnv["ARCJET_LOG_LEVEL"]; + if (level === "info" || level === "debug") { + console.info("Connecting to the Arcjet API through a proxy"); + } + } + + return proxyUrl; +} + +/** + * Read the current runtime's environment, when available. + * + * `process` is available on Node, Deno, and Bun but not on every edge runtime, + * so we read it through `globalThis` (which is safe when it is absent) rather + * than referencing it directly or importing `node:process`. + * + * @returns The environment, or `undefined` on runtimes without `process`. + */ +function currentEnvironment(): ProxyEnvironment | undefined { + return globalThis.process?.env; +} + +// --------------------------------------------------------------------------- +// Keep the proxy-resolution logic below in sync with the copy in +// `@arcjet/transport` (`transport/detect-proxy.ts`). The two packages +// intentionally duplicate it rather than share a module: this copy is bundled +// into a fetch transport that runs on edge runtimes without `process` or extra +// dependencies, so it stays edge-safe with no imports. Only the `detectProxy` +// entry point above differs between the copies; the helpers below should stay +// logically identical (the two may differ only in line wrapping, since each +// package runs a different formatter). +// --------------------------------------------------------------------------- + +/** + * Find the proxy that should be used for a URL, if any. + * + * Honors `NO_PROXY` so the result reflects the connection that will actually be + * made. + * + * @param url URL that requests will be made to. + * @param proxyEnv Environment variables to inspect. + * @returns Proxy URL to use, or `undefined` when no proxy applies. + */ +function proxyForUrl(url: URL, proxyEnv: ProxyEnvironment): string | undefined { + // httpoxy mitigation: under CGI the inbound `Proxy` request header is exposed + // as the `HTTP_PROXY` environment variable, so honoring uppercase `HTTP_PROXY` + // for HTTP targets could let a request control outbound proxying. When a CGI + // environment is detected (`REQUEST_METHOD` is set), ignore it and use only + // the lowercase `http_proxy`. See https://httpoxy.org. + const httpProxy = + proxyEnv["REQUEST_METHOD"] === undefined + ? firstValue(proxyEnv["http_proxy"], proxyEnv["HTTP_PROXY"]) + : firstValue(proxyEnv["http_proxy"]); + + const proxyUrl = + url.protocol === "https:" + ? firstValue(proxyEnv["https_proxy"], proxyEnv["HTTPS_PROXY"]) + : httpProxy; + + if (typeof proxyUrl !== "string") { + return undefined; + } + + if (isNoProxy(url, firstValue(proxyEnv["no_proxy"], proxyEnv["NO_PROXY"]))) { + return undefined; + } + + return proxyUrl; +} + +/** + * Determine whether a URL should bypass the proxy because of `NO_PROXY`. + * + * Supports the common `NO_PROXY` syntax: a comma- or space-separated list of + * host suffixes, an optional leading `.` or `*.`, an optional `:port`, and `*` + * to match everything. Entries are matched as host names; IP/CIDR ranges (e.g. + * `10.0.0.0/8`) are not supported, the same as curl. + * + * @param url URL that requests will be made to. + * @param noProxy Value of the `NO_PROXY` environment variable. + * @returns Whether the proxy should be bypassed. + */ +function isNoProxy(url: URL, noProxy: string | undefined): boolean { + if (typeof noProxy !== "string") { + return false; + } + + // `url.hostname` wraps IPv6 addresses in brackets (e.g. `[::1]`); strip them + // so entries can be written with or without brackets. + const hostname = url.hostname.toLowerCase().replaceAll(/^\[|\]$/g, ""); + const port = + url.port === "" ? (url.protocol === "https:" ? "443" : "80") : url.port; + + for (const raw of noProxy.split(/[\s,]+/)) { + if (raw === "") { + continue; + } + + // `*` bypasses the proxy for every host. + if (raw === "*") { + return true; + } + + const entry = parseNoProxyEntry(raw); + + // A port on the entry must match the target's (default) port. + if (entry.port !== undefined && entry.port !== port) { + continue; + } + + if (entry.host !== "" && hostMatches(hostname, entry.host)) { + return true; + } + } + + return false; +} + +/** + * Parse one `NO_PROXY` entry into its host and optional port. + * + * @param raw + * A single entry from the `NO_PROXY` list (already split out and non-empty). + * @returns + * The lowercased host (with any `*.`/`.` wildcard prefix and IPv6 brackets + * removed) and the explicit `:port`, if the entry had one. + */ +function parseNoProxyEntry(raw: string): { + host: string; + port: string | undefined; +} { + const entry = raw.toLowerCase(); + + // Split off an optional `:port`. A bracketed IPv6 entry (`[::1]:8080`) keeps + // its port outside the brackets, a bare IPv6 entry (`::1`) has no port, and + // everything else treats a single trailing `:` as the port (so IPv6 + // colons are not mistaken for one). + let host = entry; + let port: string | undefined; + const bracketed = entry.match(/^\[(.+)\](?::([0-9]+))?$/); + if (bracketed === null) { + const colon = entry.lastIndexOf(":"); + if ( + colon !== -1 && + colon === entry.indexOf(":") && + /^[0-9]+$/.test(entry.slice(colon + 1)) + ) { + host = entry.slice(0, colon); + port = entry.slice(colon + 1); + } + } else { + host = bracketed[1] ?? ""; + port = bracketed[2]; + } + + // Strip a leading `*.` or `.` so `.example.com`, `*.example.com`, and + // `example.com` all match the domain and its subdomains. + return { host: host.replace(/^\*?\./, ""), port }; +} + +/** + * Whether a host name matches a `NO_PROXY` entry host, exactly or as a + * subdomain. + * + * @param hostname + * Host name of the URL being requested. + * @param host + * Host parsed from a `NO_PROXY` entry. + * @returns + * Whether the host name is, or is a subdomain of, the entry host. + */ +function hostMatches(hostname: string, host: string): boolean { + return hostname === host || hostname.endsWith("." + host); +} + +/** + * Get the first non-empty string from a list of values. + * + * @param values Values to inspect. + * @returns First non-empty string, or `undefined`. + */ +function firstValue(...values: Array): string | undefined { + for (const value of values) { + if (typeof value === "string" && value !== "") { + return value; + } + } + + return undefined; +} diff --git a/arcjet-guard/src/fetch.ts b/arcjet-guard/src/fetch.ts index 0287fe8a61..c3c88e6b22 100644 --- a/arcjet-guard/src/fetch.ts +++ b/arcjet-guard/src/fetch.ts @@ -7,7 +7,7 @@ * ALPN — no special configuration needed. * * Bun's fetch does not support HTTP/2 ({@link https://github.com/oven-sh/bun/issues/7194}). - * On Bun, the `"."` export resolves to the `node` entrypoint which uses + * On Bun, the `"."` export resolves to the `bun` entrypoint which uses * `node:http2` directly for HTTP/2 support. * * **Lifecycle:** Create the client once at module scope and reuse it. diff --git a/arcjet-guard/src/transport-bun.test.ts b/arcjet-guard/src/transport-bun.test.ts new file mode 100644 index 0000000000..66d9cb7ee2 --- /dev/null +++ b/arcjet-guard/src/transport-bun.test.ts @@ -0,0 +1,37 @@ +import assert from "node:assert/strict"; +import { describe, test } from "node:test"; + +import { isolateProxyEnvironment } from "../test/_shared/proxy-env.ts"; +import { createTransport } from "./transport-bun.ts"; + +describe("createTransport (bun)", () => { + isolateProxyEnvironment(); + + test("is a function", () => { + assert.equal(typeof createTransport, "function"); + }); + + test("returns a transport-shaped object", () => { + const transport = createTransport("https://decide.arcjet.com"); + + assert.equal(typeof transport, "object"); + assert.notEqual(transport, null); + }); + + test("does not throw for valid URL", () => { + assert.doesNotThrow(() => { + createTransport("https://example.com"); + }); + }); + + // With a proxy, Bun uses the fetch transport (its native `fetch` proxies); + // without one it uses HTTP/2. Both should build a transport-shaped object. + test("builds a fetch transport when a proxy is detected", () => { + process.env.HTTPS_PROXY = "http://127.0.0.1:1"; + + const transport = createTransport("https://decide.arcjet.com"); + + assert.equal(typeof transport, "object"); + assert.notEqual(transport, null); + }); +}); diff --git a/arcjet-guard/src/transport-bun.ts b/arcjet-guard/src/transport-bun.ts new file mode 100644 index 0000000000..16cd166bec --- /dev/null +++ b/arcjet-guard/src/transport-bun.ts @@ -0,0 +1,41 @@ +/** + * Connect RPC transport factory for `@arcjet/guard` — Bun. + * + * Bun resolves the `"."` export to this entry point. Without a proxy it + * connects directly over HTTP/2 via `node:http2` (Bun's `fetch` doesn't support + * HTTP/2 — {@link https://github.com/oven-sh/bun/issues/7194}). When a proxy is + * detected it uses the fetch transport instead, because Bun's native `fetch` + * honors the standard proxy environment variables while its `node:http` agent + * ignores the `proxyEnv` option the Node entry point relies on. + * + * @packageDocumentation + */ + +import type { Transport } from "@connectrpc/connect"; + +import { detectProxy } from "./detect-proxy.ts"; +import { createFetchTransport } from "./transport-fetch.ts"; +import { createHttp2Transport } from "./transport-http2.ts"; + +/** + * Create a Connect transport for the given base URL on Bun. + * + * Without a proxy it connects directly over HTTP/2, optimistically + * pre-connecting so the first `.guard()` call doesn't pay the full TCP + TLS + * setup cost. When a proxy is detected (`HTTP_PROXY`/`HTTPS_PROXY`, respecting + * `NO_PROXY`) it uses the fetch transport so Bun's native `fetch` performs the + * proxying. + */ +export function createTransport(baseUrl: string): Transport { + const proxyUrl = detectProxy(new URL(baseUrl)); + + // No proxy: connect directly over HTTP/2. + if (proxyUrl === undefined) { + return createHttp2Transport(baseUrl); + } + + // Proxy: Bun's native `fetch` honors the proxy environment variables. The + // proxy was already detected and logged above, so build the fetch transport + // directly without detecting again. + return createFetchTransport(baseUrl); +} diff --git a/arcjet-guard/src/transport-fetch.test.ts b/arcjet-guard/src/transport-fetch.test.ts index 1dc4032a06..9d9c4a3077 100644 --- a/arcjet-guard/src/transport-fetch.test.ts +++ b/arcjet-guard/src/transport-fetch.test.ts @@ -1,9 +1,12 @@ import assert from "node:assert/strict"; import { describe, test } from "node:test"; +import { isolateProxyEnvironment } from "../test/_shared/proxy-env.ts"; import { createTransport } from "./transport-fetch.ts"; describe("createTransport (fetch)", () => { + isolateProxyEnvironment(); + test("is a function", () => { assert.equal(typeof createTransport, "function"); }); diff --git a/arcjet-guard/src/transport-fetch.ts b/arcjet-guard/src/transport-fetch.ts index 57811f2205..6fdd921327 100644 --- a/arcjet-guard/src/transport-fetch.ts +++ b/arcjet-guard/src/transport-fetch.ts @@ -11,13 +11,15 @@ import type { Transport } from "@connectrpc/connect"; import { createConnectTransport } from "@connectrpc/connect-web"; +import { detectProxy } from "./detect-proxy.ts"; + /** * Create a Connect transport using the web (fetch-based) protocol. * * Compatible with Deno, Cloudflare Workers, Vercel Edge, * and any runtime providing the WHATWG Fetch API. * - * Note: Bun's `"."` export resolves to the `node` entrypoint for HTTP/2. + * Note: Bun's `"."` export resolves to the `bun` entrypoint for HTTP/2. * This transport is still usable on Bun via `@arcjet/guard/fetch` but * will only use HTTP/1.1. * @@ -28,6 +30,27 @@ import { createConnectTransport } from "@connectrpc/connect-web"; * @see https://github.com/connectrpc/connect-es/pull/1082 */ export function createTransport(baseUrl: string): Transport { + // The runtime's `fetch` performs any proxying itself (e.g. Deno honors the + // standard proxy environment variables natively); we detect only to log a + // line when a proxy is in use. Edge runtimes without proxy environment + // support simply won't detect one. + detectProxy(new URL(baseUrl)); + + return createFetchTransport(baseUrl); +} + +/** + * Build the fetch-based Connect transport without detecting a proxy. + * + * Separated from {@link createTransport} so the Node entry point can reuse it + * on Bun — where the proxy has already been detected and logged, and Bun's + * `fetch` performs the proxying itself — without logging the startup line a + * second time. + * + * Overrides `redirect` to `"follow"` because some edge runtimes (workerd, + * edge-light) reject the `"error"` default set by connect-web. + */ +export function createFetchTransport(baseUrl: string): Transport { return createConnectTransport({ baseUrl, fetch: (input: RequestInfo | URL, init?: RequestInit) => diff --git a/arcjet-guard/src/transport-http2.ts b/arcjet-guard/src/transport-http2.ts new file mode 100644 index 0000000000..fa5db84cf2 --- /dev/null +++ b/arcjet-guard/src/transport-http2.ts @@ -0,0 +1,43 @@ +/** + * Direct HTTP/2 transport factory shared by the `@arcjet/guard` Node and Bun + * entry points. + * + * Both Node and Bun talk to the Arcjet API over HTTP/2 via + * `@connectrpc/connect-node` (Bun implements `node:http2`, but its `fetch` does + * not support HTTP/2 — {@link https://github.com/oven-sh/bun/issues/7194}). The + * proxy strategy differs between the two runtimes, so each entry point handles + * proxying itself and reuses this for the direct, no-proxy case. + * + * @packageDocumentation + */ + +import type { Transport } from "@connectrpc/connect"; +import { createConnectTransport, Http2SessionManager } from "@connectrpc/connect-node"; + +/** + * Create a direct HTTP/2 Connect transport, optimistically pre-connecting. + * + * The session is pre-connected so the first `.guard()` call doesn't pay the + * full TCP + TLS setup cost. + * + * @param baseUrl Base URL for the Arcjet API. + * @returns A Connect transport that talks HTTP/2 directly to `baseUrl`. + */ +export function createHttp2Transport(baseUrl: string): Transport { + const sessionManager = new Http2SessionManager(baseUrl, { + // AWS Global Accelerator doesn't support PING so we use a very high idle + // timeout. Ref: + // https://docs.aws.amazon.com/global-accelerator/latest/dg/introduction-how-it-works.html#about-idle-timeout + idleConnectionTimeoutMs: 340 * 1000, + }); + + // Optimistic pre-connect — failures are silently ignored because the real RPC + // call will retry the connection anyway. + void sessionManager.connect().catch(() => {}); + + return createConnectTransport({ + baseUrl, + httpVersion: "2", + sessionManager, + }); +} diff --git a/arcjet-guard/src/transport-node.test.ts b/arcjet-guard/src/transport-node.test.ts index 448c862187..3a23cdcd85 100644 --- a/arcjet-guard/src/transport-node.test.ts +++ b/arcjet-guard/src/transport-node.test.ts @@ -1,9 +1,12 @@ import assert from "node:assert/strict"; import { describe, test } from "node:test"; +import { isolateProxyEnvironment } from "../test/_shared/proxy-env.ts"; import { createTransport } from "./transport-node.ts"; describe("createTransport (node)", () => { + isolateProxyEnvironment(); + test("is a function", () => { assert.equal(typeof createTransport, "function"); }); @@ -20,4 +23,23 @@ describe("createTransport (node)", () => { createTransport("https://example.com"); }); }); + + test("builds an HTTPS-proxy transport for an https target", () => { + process.env.HTTPS_PROXY = "http://127.0.0.1:1"; + + const transport = createTransport("https://decide.arcjet.com"); + + assert.equal(typeof transport, "object"); + assert.notEqual(transport, null); + }); + + test("builds an HTTP-proxy transport for an http target", () => { + // Exercises the `http.Agent` branch (the https one is covered above). + process.env.HTTP_PROXY = "http://127.0.0.1:1"; + + const transport = createTransport("http://decide.arcjet.com"); + + assert.equal(typeof transport, "object"); + assert.notEqual(transport, null); + }); }); diff --git a/arcjet-guard/src/transport-node.ts b/arcjet-guard/src/transport-node.ts index a62843769b..003a8fb881 100644 --- a/arcjet-guard/src/transport-node.ts +++ b/arcjet-guard/src/transport-node.ts @@ -1,36 +1,69 @@ /** - * Connect RPC transport factory for `@arcjet/guard`. + * Connect RPC transport factory for `@arcjet/guard` — Node.js. * - * Creates an HTTP/2 transport with optimistic pre-connect and a long - * idle timeout suitable for AWS Global Accelerator. + * Without a proxy it connects directly over HTTP/2. When a standard proxy + * environment variable is detected, it routes through the proxy over HTTP/1.1 + * using the built-in proxy support of the Node.js HTTP agent. + * + * This entry point is Node-only: Bun has its own entry point + * (`transport-bun.ts`) because its `fetch` proxies but its `node:http` agent + * does not, and Deno reaches the fetch entry point through the `"deno"` export + * condition. An explicit `@arcjet/guard/node` import on Bun or Deno still lands + * here and uses the Node agent — whose `proxyEnv` option those runtimes don't + * implement, so a proxy would not be applied on them (use the default import + * for proxy support there). * * @packageDocumentation */ +import * as http from "node:http"; +import * as https from "node:https"; + import type { Transport } from "@connectrpc/connect"; -import { createConnectTransport, Http2SessionManager } from "@connectrpc/connect-node"; +import { createConnectTransport } from "@connectrpc/connect-node"; + +import { detectProxy } from "./detect-proxy.ts"; +import { createHttp2Transport } from "./transport-http2.ts"; /** - * Create an HTTP/2 Connect transport for the given base URL. + * Create a Connect transport for the given base URL. * - * Optimistically pre-connects so the first `.guard()` call doesn't + * When a proxy is detected (`HTTP_PROXY`/`HTTPS_PROXY`, respecting `NO_PROXY`), + * the request is routed through it over HTTP/1.1 using the built-in proxy + * support of the Node.js HTTP agent. Without a proxy it connects directly over + * HTTP/2, optimistically pre-connecting so the first `.guard()` call doesn't * pay the full TCP + TLS setup cost. */ export function createTransport(baseUrl: string): Transport { - const sessionManager = new Http2SessionManager(baseUrl, { - // AWS Global Accelerator doesn't support PING so we use a very high idle - // timeout. Ref: - // https://docs.aws.amazon.com/global-accelerator/latest/dg/introduction-how-it-works.html#about-idle-timeout - idleConnectionTimeoutMs: 340 * 1000, - }); + const url = new URL(baseUrl); + const proxyUrl = detectProxy(url); + + // No proxy: connect directly over HTTP/2. + if (proxyUrl === undefined) { + return createHttp2Transport(baseUrl); + } - // Optimistic pre-connect — failures are silently ignored because the - // real RPC call will retry the connection anyway. - void sessionManager.connect().catch(() => {}); + // Proxy: route through it over HTTP/1.1 using the agent's built-in proxy + // support. Hand the agent only the single proxy variable we resolved, typed + // with the exact key names so a misspelled key is a compile error. + // `keepAlive` reuses the proxy connection across requests. The agent's + // `proxyEnv` option only exists in @types/node 24.x, so it's added through an + // intersection type to keep this type-checking on the 22.x line used across + // the monorepo. + const isHttps = url.protocol === "https:"; + const proxyEnvironment: Partial> = isHttps + ? { HTTPS_PROXY: proxyUrl } + : { HTTP_PROXY: proxyUrl }; + const options: http.AgentOptions & { proxyEnv: typeof proxyEnvironment } = { + keepAlive: true, + proxyEnv: proxyEnvironment, + }; + const agent = isHttps ? new https.Agent(options) : new http.Agent(options); + // Node's built-in proxy support only works over HTTP/1.1. return createConnectTransport({ baseUrl, - httpVersion: "2", - sessionManager, + httpVersion: "1.1", + nodeOptions: { agent }, }); } diff --git a/arcjet-guard/test/_shared/proxy-env.ts b/arcjet-guard/test/_shared/proxy-env.ts new file mode 100644 index 0000000000..fad73a76e5 --- /dev/null +++ b/arcjet-guard/test/_shared/proxy-env.ts @@ -0,0 +1,44 @@ +// Shared test helper for the `createTransport` unit tests, which all need to +// neutralize the ambient proxy environment so the host environment can't flip a +// no-proxy case onto the proxy path. +import { afterEach, beforeEach } from "node:test"; + +// Standard proxy variables, cleared around every test so the host environment +// (e.g. a developer or CI runner with `HTTPS_PROXY` set) can't flip a no-proxy +// case onto the proxy path or leak a stray startup log. +const proxyEnvironmentKeys = [ + "HTTP_PROXY", + "http_proxy", + "HTTPS_PROXY", + "https_proxy", + "NO_PROXY", + "no_proxy", +]; + +/** + * Clear the standard proxy environment variables before each test in the + * calling suite and restore them afterward. Call once inside a `describe`. + */ +export function isolateProxyEnvironment(): void { + const saved = new Map(); + + beforeEach(() => { + for (const key of proxyEnvironmentKeys) { + const value = process.env[key]; + if (typeof value === "string") { + saved.set(key, value); + } + delete process.env[key]; + } + }); + + afterEach(() => { + for (const key of proxyEnvironmentKeys) { + delete process.env[key]; + } + for (const [key, value] of saved) { + process.env[key] = value; + } + saved.clear(); + }); +} diff --git a/arcjet-guard/tsconfig.json b/arcjet-guard/tsconfig.json index 9f0e164bdf..7d82454f64 100644 --- a/arcjet-guard/tsconfig.json +++ b/arcjet-guard/tsconfig.json @@ -30,7 +30,7 @@ "skipLibCheck": true, "strict": true, "target": "es2023", - "types": [], + "types": ["node"], "verbatimModuleSyntax": true }, "include": ["src/**/*.ts"], diff --git a/package-lock.json b/package-lock.json index fc58bb87c7..a439457f94 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1380,7 +1380,6 @@ "os": [ "aix" ], - "peer": true, "engines": { "node": ">=18" } @@ -1398,7 +1397,6 @@ "os": [ "android" ], - "peer": true, "engines": { "node": ">=18" } @@ -1416,7 +1414,6 @@ "os": [ "android" ], - "peer": true, "engines": { "node": ">=18" } @@ -1434,7 +1431,6 @@ "os": [ "android" ], - "peer": true, "engines": { "node": ">=18" } @@ -1452,7 +1448,6 @@ "os": [ "darwin" ], - "peer": true, "engines": { "node": ">=18" } @@ -1470,7 +1465,6 @@ "os": [ "darwin" ], - "peer": true, "engines": { "node": ">=18" } @@ -1488,7 +1482,6 @@ "os": [ "freebsd" ], - "peer": true, "engines": { "node": ">=18" } @@ -1506,7 +1499,6 @@ "os": [ "freebsd" ], - "peer": true, "engines": { "node": ">=18" } @@ -1524,7 +1516,6 @@ "os": [ "linux" ], - "peer": true, "engines": { "node": ">=18" } @@ -1542,7 +1533,6 @@ "os": [ "linux" ], - "peer": true, "engines": { "node": ">=18" } @@ -1560,7 +1550,6 @@ "os": [ "linux" ], - "peer": true, "engines": { "node": ">=18" } @@ -1578,7 +1567,6 @@ "os": [ "linux" ], - "peer": true, "engines": { "node": ">=18" } @@ -1596,7 +1584,6 @@ "os": [ "linux" ], - "peer": true, "engines": { "node": ">=18" } @@ -1614,7 +1601,6 @@ "os": [ "linux" ], - "peer": true, "engines": { "node": ">=18" } @@ -1632,7 +1618,6 @@ "os": [ "linux" ], - "peer": true, "engines": { "node": ">=18" } @@ -1650,7 +1635,6 @@ "os": [ "linux" ], - "peer": true, "engines": { "node": ">=18" } @@ -1668,7 +1652,6 @@ "os": [ "linux" ], - "peer": true, "engines": { "node": ">=18" } @@ -1686,7 +1669,6 @@ "os": [ "netbsd" ], - "peer": true, "engines": { "node": ">=18" } @@ -1704,7 +1686,6 @@ "os": [ "netbsd" ], - "peer": true, "engines": { "node": ">=18" } @@ -1722,7 +1703,6 @@ "os": [ "openbsd" ], - "peer": true, "engines": { "node": ">=18" } @@ -1740,7 +1720,6 @@ "os": [ "openbsd" ], - "peer": true, "engines": { "node": ">=18" } @@ -1758,7 +1737,6 @@ "os": [ "openharmony" ], - "peer": true, "engines": { "node": ">=18" } @@ -1776,7 +1754,6 @@ "os": [ "sunos" ], - "peer": true, "engines": { "node": ">=18" } @@ -1794,7 +1771,6 @@ "os": [ "win32" ], - "peer": true, "engines": { "node": ">=18" } @@ -1812,7 +1788,6 @@ "os": [ "win32" ], - "peer": true, "engines": { "node": ">=18" } @@ -1830,7 +1805,6 @@ "os": [ "win32" ], - "peer": true, "engines": { "node": ">=18" } @@ -10877,6 +10851,8 @@ "version": "1.5.0", "license": "Apache-2.0", "dependencies": { + "@arcjet/env": "1.5.0", + "@arcjet/logger": "1.5.0", "@bufbuild/protobuf": "2.12.0", "@connectrpc/connect": "2.1.1", "@connectrpc/connect-node": "2.1.1", diff --git a/transport/.gitignore b/transport/.gitignore index 485b5ebbbb..6c3754d55e 100644 --- a/transport/.gitignore +++ b/transport/.gitignore @@ -132,10 +132,16 @@ dist # Generated files bun.js bun.d.ts +deno.js +deno.d.ts +detect-proxy.js +detect-proxy.d.ts edge-light.js edge-light.d.ts index.js index.d.ts +proxy-tunnel.js +proxy-tunnel.d.ts workerd.js workerd.d.ts test/*.js diff --git a/transport/README.md b/transport/README.md index 7fb45253d8..8691ef86d8 100644 --- a/transport/README.md +++ b/transport/README.md @@ -68,33 +68,132 @@ This package exports the identifier [`createTransport`][api-create-transport]. There is no default export. -This package exports no [TypeScript][] types. - -### `createTransport(baseUrl)` - -Creates a transport that talks over HTTP/2 using -`@connectrpc/connect-node`. This is a thin wrapper around -[`createConnectTransport`][connect-create-transport]. -Alternative entry points exist for Bun, Edge Light, and `workerd` that use -`@connectrpc/connect-web` instead. +This package exports the [TypeScript][] types +[`ProxyEnvironment`][api-proxy-environment], +[`TransportLogger`][api-transport-logger], and +[`TransportOptions`][api-transport-options]. + +### `createTransport(baseUrl[, options])` + +Creates a transport that talks to the Arcjet API. On Node.js it uses +`@connectrpc/connect-node` over HTTP/2; separate entry points for Bun, Deno, +Edge Light, and `workerd` use `@connectrpc/connect-web` instead. This is a thin +wrapper around [`createConnectTransport`][connect-create-transport]. + +### Proxy support + +The standard proxy environment variables (`HTTP_PROXY` and `HTTPS_PROXY`, while +respecting `NO_PROXY`) are auto-detected, making it possible to connect to the +Arcjet API through a proxy such as [Squid][squid]. When a proxy is in use, a +line is logged at startup at `info` level (so set `ARCJET_LOG_LEVEL=info` to see +it). The proxy URL itself is not logged, since it can contain credentials. How +the request is actually proxied depends on the runtime, using each runtime's +built-in proxy support: + +- **Node.js** — requests are routed through the proxy over HTTP/1.1 using the + built-in proxy support of the Node.js HTTP agent; otherwise they are made + directly over HTTP/2. Set `proxyHttpVersion: "2"` to instead keep HTTP/2 while + proxying (see [HTTP/2 through a proxy](#http2-through-a-proxy) below). +- **Bun** and **Deno** — the runtime's `fetch` performs the proxying natively. +- **Edge Light** and **`workerd`** — these edge runtimes don't support outbound + proxy environment variables, so no proxy is used. + +`NO_PROXY` accepts a comma- or space-separated list of host suffixes, each with +an optional leading `.` or `*.` and an optional `:port`, plus `*` to bypass the +proxy for every host. Entries are matched as host names; IP/CIDR ranges (such as +`10.0.0.0/8`) are not supported, the same as [curl][curl-noproxy]. On Bun and +Deno the runtime's `fetch` applies `NO_PROXY` itself, so its exact semantics are +the runtime's. + +#### HTTP/2 through a proxy + +By default, proxying on Node.js downgrades the connection from HTTP/2 to +HTTP/1.1, because Node's built-in agent proxy support only works over HTTP/1.1. +For a latency-sensitive API this is unfortunate: it gives up HTTP/2's +multiplexing, so a burst of concurrent requests opens a new proxy connection +each instead of sharing one. + +Setting `proxyHttpVersion: "2"` keeps HTTP/2 end-to-end. The transport opens an +HTTP `CONNECT` tunnel to the proxy and then performs the TLS handshake — and the +ALPN negotiation that selects `h2` — directly with the origin. The proxy only +blindly forwards the tunnel, so it never sees, and cannot downgrade, the +negotiated protocol. + +This comes with caveats: + +- **Node.js only.** Bun and Deno don't implement the agent option this builds + on; they proxy through their `fetch` (over HTTP/1.1) regardless of this + setting, and the edge runtimes don't proxy at all. +- **Requires a tunneling (`CONNECT`) proxy** — the common kind for HTTPS egress, + including [Squid][squid]. A proxy that terminates TLS and re-originates an + HTTP/1.1 connection to the origin (a TLS-intercepting / "MITM" proxy) cannot + preserve HTTP/2 no matter what this option is set to. +- **The proxy must not buffer the tunnel.** HTTP/2 sends many small, dependent + frames. The transport disables [Nagle's algorithm][nagle] (`TCP_NODELAY`) on + its side of the tunnel, but if the proxy buffers tunneled bytes (or leaves + Nagle enabled on its upstream socket) the interaction with delayed ACKs can + add roughly 40 ms of latency per round trip, erasing the benefit. + Tunneling proxies such as Squid set `TCP_NODELAY` on `CONNECT` tunnels by + default; verify this if you use a different proxy. + +When no proxy applies, this option has no effect — direct connections always use +HTTP/2. ###### Parameters - `baseUrl` (`string`, example: `https://example.com/my-api`) — the base URL for all HTTP requests +- `options` ([`TransportOptions`][api-transport-options], optional) + — configuration ###### Returns A Connect transport that you can pass to `createClient` from `@arcjet/protocol`. +### `ProxyEnvironment` + +Map of environment variables used to detect an outbound proxy (TypeScript +type). This is the same shape as `process.env`. + +### `TransportLogger` + +Logger used to print a line at startup when a proxy is detected (TypeScript +type). It must provide an `info` method. + +### `TransportOptions` + +Configuration for `createTransport` (TypeScript type). + +###### Fields + +- `log` ([`TransportLogger`][api-transport-logger], optional) + — logger used to print a line at startup when a proxy is detected; defaults + to a logger configured from the `ARCJET_LOG_LEVEL` environment variable +- `proxyEnv` ([`ProxyEnvironment`][api-proxy-environment] or `false`, optional) + — environment variables used to detect an outbound proxy; defaults to + `process.env` so standard proxy environment variables are auto-detected; pass + `false` to ignore proxy environment variables +- `proxyHttpVersion` (`"1.1"` or `"2"`, optional, default `"1.1"`) + — HTTP version to use when a proxy is in use on Node.js; `"1.1"` routes + through the proxy using the Node.js HTTP agent, while `"2"` keeps HTTP/2 by + tunneling through the proxy with `CONNECT`; has no effect without a proxy, or + on Bun, Deno, and the edge runtimes (see + [HTTP/2 through a proxy](#http2-through-a-proxy)) + ## License [Apache License, Version 2.0][apache-license] © [Arcjet Labs, Inc.][arcjet] [apache-license]: http://www.apache.org/licenses/LICENSE-2.0 -[api-create-transport]: #createtransportbaseurl +[api-create-transport]: #createtransportbaseurl-options +[api-proxy-environment]: #proxyenvironment +[api-transport-logger]: #transportlogger +[api-transport-options]: #transportoptions [arcjet]: https://arcjet.com [arcjet-get-started]: https://docs.arcjet.com/get-started [connect-create-transport]: https://connectrpc.com/docs/web/choosing-a-protocol/ +[curl-noproxy]: https://curl.se/docs/manpage.html#--noproxy +[nagle]: https://en.wikipedia.org/wiki/Nagle%27s_algorithm +[squid]: https://www.squid-cache.org/ [typescript]: https://www.typescriptlang.org/ diff --git a/transport/bun.ts b/transport/bun.ts index 9fff4fcd4b..3d8026507b 100644 --- a/transport/bun.ts +++ b/transport/bun.ts @@ -1,9 +1,29 @@ // This file is used when running in Bun. // It uses DOM based APIs (`@connectrpc/connect-web`) to connect to the API. // Bun slightly differs in how it implements Node APIs and that causes problems. +// +// Bun's `fetch` has built-in proxy support and honors the standard proxy +// environment variables (`HTTP_PROXY`, `HTTPS_PROXY`, and `NO_PROXY`), so we +// only need to detect and log when a proxy is in use. +import type { Transport } from "@connectrpc/connect"; import { createConnectTransport } from "@connectrpc/connect-web"; +import { detectProxy } from "./detect-proxy.js"; + +export type { + ProxyEnvironment, + TransportLogger, + TransportOptions, +} from "./detect-proxy.js"; + +import type { TransportOptions } from "./detect-proxy.js"; + +export function createTransport( + baseUrl: string, + options?: TransportOptions, +): Transport { + // Bun's `fetch` performs the proxying itself; we detect to log a line. + detectProxy(new URL(baseUrl), options); -export function createTransport(baseUrl: string) { return createConnectTransport({ baseUrl, }); diff --git a/transport/deno.ts b/transport/deno.ts new file mode 100644 index 0000000000..d942b1a1fc --- /dev/null +++ b/transport/deno.ts @@ -0,0 +1,48 @@ +// This file is used when running on Deno. +// It uses DOM based APIs (`@connectrpc/connect-web`) to connect to the API +// rather than the Node.js HTTP/2 transport, because Deno's `fetch` has built-in +// proxy support and honors the standard proxy environment variables +// (`HTTP_PROXY`, `HTTPS_PROXY`, and `NO_PROXY`) while its Node.js HTTP +// compatibility layer does not. +// +// Like `edge-light.ts` and `workerd.ts`, this solves the `redirect` option set +// to `error` inside `connect`. +// +// For more information, see: +// +// * +// * +// * +// * +// * +import type { Transport } from "@connectrpc/connect"; +import { createConnectTransport } from "@connectrpc/connect-web"; +import { detectProxy } from "./detect-proxy.js"; + +export type { + ProxyEnvironment, + TransportLogger, + TransportOptions, +} from "./detect-proxy.js"; + +import type { TransportOptions } from "./detect-proxy.js"; + +export function createTransport( + baseUrl: string, + options?: TransportOptions, +): Transport { + // Deno's `fetch` performs the proxying itself; we detect to log a line. + detectProxy(new URL(baseUrl), options); + + return createConnectTransport({ + baseUrl, + fetch: fetchProxy, + }); +} + +function fetchProxy( + input: Request | URL | string, + init?: RequestInit | undefined, +): Promise { + return fetch(input, { ...init, redirect: "follow" }); +} diff --git a/transport/detect-proxy.ts b/transport/detect-proxy.ts new file mode 100644 index 0000000000..02e50beec4 --- /dev/null +++ b/transport/detect-proxy.ts @@ -0,0 +1,309 @@ +import process from "node:process"; +import { logLevel } from "@arcjet/env"; +import { Logger } from "@arcjet/logger"; + +/** + * Map of environment variables used to detect an outbound proxy. + * + * This is the same shape as `process.env`. + */ +export type ProxyEnvironment = Record; + +/** + * Minimal logger used to print a line when a proxy is detected. + */ +export interface TransportLogger { + /** + * Log an informational message. + * + * @param message + * Template. + * @param interpolationValues + * Parameters to interpolate. + * @returns + * Nothing. + */ + info(message: string, ...interpolationValues: unknown[]): void; +} + +/** + * Configuration shared by all transports. + */ +export interface TransportOptions { + /** + * Logger used to print a line at startup when a proxy is detected (optional). + * + * Defaults to a logger configured from the `ARCJET_LOG_LEVEL` environment + * variable. + */ + log?: TransportLogger | undefined; + + /** + * Environment variables used to detect an outbound proxy (optional). + * + * Defaults to `process.env` so standard proxy environment variables + * (`HTTP_PROXY`, `HTTPS_PROXY`, and `NO_PROXY`) are auto-detected. Pass + * `false` to ignore proxy environment variables entirely. + */ + proxyEnv?: ProxyEnvironment | false | undefined; + + /** + * HTTP version to use when a proxy is in use, on Node.js (optional). + * + * Has no effect when no proxy applies, and no effect on Bun, Deno, or the + * edge runtimes (which proxy through their `fetch` instead). Ignored for + * direct connections, which always use HTTP/2. + * + * - `"1.1"` (default) routes through the proxy over HTTP/1.1 using the + * built-in proxy support of the Node.js HTTP agent. This works with any + * proxy the agent supports, but loses the latency benefits of HTTP/2. + * - `"2"` establishes an HTTP `CONNECT` tunnel and keeps HTTP/2 to the origin + * end-to-end. This requires a tunneling (`CONNECT`) proxy — the common kind + * for HTTPS egress — and a proxy that does not buffer the tunnel (see the + * proxy support notes in the README). A proxy that terminates TLS and + * speaks HTTP/1.1 to origins cannot preserve HTTP/2 regardless. + * + * Defaults to `"1.1"`. + */ + proxyHttpVersion?: "1.1" | "2" | undefined; +} + +/** + * Detect the proxy that applies to a URL and log a line when one is found. + * + * Standard proxy environment variables (`HTTP_PROXY` and `HTTPS_PROXY`, + * respecting `NO_PROXY`) are auto-detected. When a proxy applies, a single line + * is logged at startup so it is easy to know when a proxy is being used. The + * proxy URL itself is not logged, since it can contain credentials. + * + * Takes an already-parsed `URL` so callers that also need it (e.g. to pick an + * HTTP vs HTTPS agent) don't parse the base URL twice. + * + * @param url + * URL that requests will be made to. + * @param options + * Configuration (optional). + * @returns + * Proxy URL that applies to `url`, or `undefined` when no proxy applies. + */ +export function detectProxy( + url: URL, + options?: TransportOptions, +): string | undefined { + // Default to detecting proxy configuration from `process.env`. Passing + // `false` disables proxy detection entirely. + const proxyEnv = + options?.proxyEnv === false + ? undefined + : (options?.proxyEnv ?? process.env); + + let proxyUrl: string | undefined; + try { + proxyUrl = proxyEnv ? proxyForUrl(url, proxyEnv) : undefined; + } catch { + // Reading proxy environment variables can throw on runtimes that gate + // environment access behind a permission (e.g. Deno without `--allow-env`). + // Treat that as "no proxy" rather than failing transport creation. + return undefined; + } + + if (typeof proxyUrl === "string") { + // Log a line at startup so it is easy to know when a proxy is being used. + // We deliberately do not log the proxy URL itself: it can contain + // credentials, and not logging it is simpler and safer than redacting it. + let log = options?.log; + if (!log) { + try { + log = new Logger({ + level: logLevel({ ARCJET_LOG_LEVEL: process.env.ARCJET_LOG_LEVEL }), + }); + } catch { + // Building the default logger reads `ARCJET_LOG_LEVEL`, which can throw + // on runtimes that gate environment access (e.g. Deno without + // `--allow-env`) when the proxy came from an explicit `proxyEnv`. Skip + // the startup line rather than failing transport creation; the proxy is + // still returned below. + } + } + log?.info("Connecting to the Arcjet API through a proxy"); + } + + return proxyUrl; +} + +// --------------------------------------------------------------------------- +// Keep the proxy-resolution logic below in sync with the copy in +// `@arcjet/guard` (`arcjet-guard/src/detect-proxy.ts`). The two packages +// intentionally duplicate it rather than share a module: `@arcjet/guard` +// bundles a fetch transport that runs on edge runtimes without `process` or +// these dependencies, so it keeps an edge-safe copy with no imports. Only the +// `detectProxy` entry point above differs between the copies; the helpers +// below should stay logically identical (the two may differ only in line +// wrapping, since each package runs a different formatter). +// --------------------------------------------------------------------------- + +/** + * Find the proxy that should be used for a URL, if any. + * + * Honors `NO_PROXY` so the result reflects the connection that will actually be + * made. + * + * @param url + * URL that requests will be made to. + * @param proxyEnv + * Environment variables to inspect. + * @returns + * Proxy URL to use, or `undefined` when no proxy applies. + */ +function proxyForUrl(url: URL, proxyEnv: ProxyEnvironment): string | undefined { + // httpoxy mitigation: under CGI the inbound `Proxy` request header is exposed + // as the `HTTP_PROXY` environment variable, so honoring uppercase `HTTP_PROXY` + // for HTTP targets could let a request control outbound proxying. When a CGI + // environment is detected (`REQUEST_METHOD` is set), ignore it and use only + // the lowercase `http_proxy`. See https://httpoxy.org. + const httpProxy = + proxyEnv["REQUEST_METHOD"] === undefined + ? firstValue(proxyEnv["http_proxy"], proxyEnv["HTTP_PROXY"]) + : firstValue(proxyEnv["http_proxy"]); + + const proxyUrl = + url.protocol === "https:" + ? firstValue(proxyEnv["https_proxy"], proxyEnv["HTTPS_PROXY"]) + : httpProxy; + + if (typeof proxyUrl !== "string") { + return undefined; + } + + if (isNoProxy(url, firstValue(proxyEnv["no_proxy"], proxyEnv["NO_PROXY"]))) { + return undefined; + } + + return proxyUrl; +} + +/** + * Determine whether a URL should bypass the proxy because of `NO_PROXY`. + * + * Supports the common `NO_PROXY` syntax: a comma- or space-separated list of + * host suffixes, an optional leading `.` or `*.`, an optional `:port`, and `*` + * to match everything. Entries are matched as host names; IP/CIDR ranges (e.g. + * `10.0.0.0/8`) are not supported, the same as curl. + * + * @param url + * URL that requests will be made to. + * @param noProxy + * Value of the `NO_PROXY` environment variable. + * @returns + * Whether the proxy should be bypassed. + */ +function isNoProxy(url: URL, noProxy: string | undefined): boolean { + if (typeof noProxy !== "string") { + return false; + } + + // `url.hostname` wraps IPv6 addresses in brackets (e.g. `[::1]`); strip them + // so entries can be written with or without brackets. + const hostname = url.hostname.toLowerCase().replaceAll(/^\[|\]$/g, ""); + const port = + url.port === "" ? (url.protocol === "https:" ? "443" : "80") : url.port; + + for (const raw of noProxy.split(/[\s,]+/)) { + if (raw === "") { + continue; + } + + // `*` bypasses the proxy for every host. + if (raw === "*") { + return true; + } + + const entry = parseNoProxyEntry(raw); + + // A port on the entry must match the target's (default) port. + if (entry.port !== undefined && entry.port !== port) { + continue; + } + + if (entry.host !== "" && hostMatches(hostname, entry.host)) { + return true; + } + } + + return false; +} + +/** + * Parse one `NO_PROXY` entry into its host and optional port. + * + * @param raw + * A single entry from the `NO_PROXY` list (already split out and non-empty). + * @returns + * The lowercased host (with any `*.`/`.` wildcard prefix and IPv6 brackets + * removed) and the explicit `:port`, if the entry had one. + */ +function parseNoProxyEntry(raw: string): { + host: string; + port: string | undefined; +} { + const entry = raw.toLowerCase(); + + // Split off an optional `:port`. A bracketed IPv6 entry (`[::1]:8080`) keeps + // its port outside the brackets, a bare IPv6 entry (`::1`) has no port, and + // everything else treats a single trailing `:` as the port (so IPv6 + // colons are not mistaken for one). + let host = entry; + let port: string | undefined; + const bracketed = entry.match(/^\[(.+)\](?::([0-9]+))?$/); + if (bracketed === null) { + const colon = entry.lastIndexOf(":"); + if ( + colon !== -1 && + colon === entry.indexOf(":") && + /^[0-9]+$/.test(entry.slice(colon + 1)) + ) { + host = entry.slice(0, colon); + port = entry.slice(colon + 1); + } + } else { + host = bracketed[1] ?? ""; + port = bracketed[2]; + } + + // Strip a leading `*.` or `.` so `.example.com`, `*.example.com`, and + // `example.com` all match the domain and its subdomains. + return { host: host.replace(/^\*?\./, ""), port }; +} + +/** + * Whether a host name matches a `NO_PROXY` entry host, exactly or as a + * subdomain. + * + * @param hostname + * Host name of the URL being requested. + * @param host + * Host parsed from a `NO_PROXY` entry. + * @returns + * Whether the host name is, or is a subdomain of, the entry host. + */ +function hostMatches(hostname: string, host: string): boolean { + return hostname === host || hostname.endsWith("." + host); +} + +/** + * Get the first non-empty string from a list of values. + * + * @param values + * Values to inspect. + * @returns + * First non-empty string, or `undefined`. + */ +function firstValue(...values: Array): string | undefined { + for (const value of values) { + if (typeof value === "string" && value !== "") { + return value; + } + } + + return undefined; +} diff --git a/transport/edge-light.ts b/transport/edge-light.ts index 226a7b1092..226d7bc899 100644 --- a/transport/edge-light.ts +++ b/transport/edge-light.ts @@ -12,9 +12,24 @@ // * // * // * +import type { Transport } from "@connectrpc/connect"; import { createConnectTransport } from "@connectrpc/connect-web"; -export function createTransport(baseUrl: string) { +export type { + ProxyEnvironment, + TransportLogger, + TransportOptions, +} from "./detect-proxy.js"; + +import type { TransportOptions } from "./detect-proxy.js"; + +export function createTransport( + baseUrl: string, + // These edge runtimes don't support outbound proxy environment variables, so + // the options are accepted for API parity with the other entry points but no + // proxy is detected or used. + _options?: TransportOptions, +): Transport { return createConnectTransport({ baseUrl, fetch: fetchProxy, diff --git a/transport/index.ts b/transport/index.ts index 2a30a64f97..8202de7a9c 100644 --- a/transport/index.ts +++ b/transport/index.ts @@ -1,33 +1,126 @@ +import type { Transport } from "@connectrpc/connect"; import { createConnectTransport, Http2SessionManager, } from "@connectrpc/connect-node"; +import * as http from "node:http"; +import * as https from "node:https"; +import { detectProxy } from "./detect-proxy.js"; +import { createTunnelingConnection } from "./proxy-tunnel.js"; + +export type { + ProxyEnvironment, + TransportLogger, + TransportOptions, +} from "./detect-proxy.js"; + +import type { TransportOptions } from "./detect-proxy.js"; /** - * Create a transport that talks over HTTP/2 using Connect RPC. + * Create a transport that talks to the Arcjet API using Connect RPC. * * A thin wrapper around {@linkcode createConnectTransport}. * + * When a standard proxy environment variable (`HTTP_PROXY` or `HTTPS_PROXY`, + * respecting `NO_PROXY`) is detected, the transport routes requests through the + * proxy and logs a line at startup. By default it proxies over HTTP/1.1 using + * the built-in proxy support of the Node.js HTTP agent; set + * `options.proxyHttpVersion` to `"2"` to instead tunnel HTTP/2 to the origin + * via `CONNECT` (see {@linkcode TransportOptions.proxyHttpVersion}). Without a + * proxy it always connects directly over HTTP/2. + * * @param baseUrl * Base URI for all HTTP requests (example: `https://example.com/my-api`). + * @param options + * Configuration (optional). * @returns * Connect transport used to make RPC calls. */ -export function createTransport(baseUrl: string) { - // We create our own session manager so we can attempt to pre-connect - const sessionManager = new Http2SessionManager(baseUrl, { +export function createTransport( + baseUrl: string, + options?: TransportOptions, +): Transport { + const url = new URL(baseUrl); + const proxyUrl = detectProxy(url, options); + + if (typeof proxyUrl === "string") { + if (options?.proxyHttpVersion === "2") { + // HTTP/2 through the proxy: open a `CONNECT` tunnel and keep HTTP/2 to + // the origin end-to-end. The proxy only blindly forwards the tunnel, so + // ALPN is negotiated directly with the origin — see `./proxy-tunnel.ts`. + return createHttp2Transport(baseUrl, createTunnelingConnection(proxyUrl)); + } + + // HTTP/1.1 through the proxy (default). Hand the agent only the single + // proxy variable we resolved (not the whole environment) so it routes + // through exactly the proxy our detection chose, honoring our `NO_PROXY` + // handling. `keepAlive` lets the agent reuse the connection to the proxy + // across requests, like the long-lived session of the direct HTTP/2 path. + // + // Type the literal with the exact proxy variable names so a misspelled key + // is a compile error. The agent's `proxyEnv` option only exists in + // @types/node 24.x, but this source is also type-checked on the 22.x line + // (e.g. when bundled into @arcjet/next or @arcjet/sveltekit), so `proxyEnv` + // is added through an intersection type rather than relying on it being a + // known `AgentOptions` property. The `as unknown as ProcessEnv` is needed + // because some augmentations (e.g. Next.js) make `ProcessEnv` require + // `NODE_ENV`; the object is correct at runtime. + const isHttps = url.protocol === "https:"; + const proxyEnvironment: Partial< + Record<"HTTP_PROXY" | "HTTPS_PROXY", string> + > = isHttps ? { HTTPS_PROXY: proxyUrl } : { HTTP_PROXY: proxyUrl }; + const agentOptions: http.AgentOptions & { proxyEnv: NodeJS.ProcessEnv } = { + keepAlive: true, + proxyEnv: proxyEnvironment as unknown as NodeJS.ProcessEnv, + }; + + const agent = isHttps + ? new https.Agent(agentOptions) + : new http.Agent(agentOptions); + + // Node's built-in proxy support only works over HTTP/1.1. + return createConnectTransport({ + baseUrl, + httpVersion: "1.1", + nodeOptions: { agent }, + }); + } + + // No proxy: connect directly over HTTP/2. + return createHttp2Transport(baseUrl); +} + +/** + * Build a direct HTTP/2 transport with an optimistically pre-connecting session + * manager. + * + * When `createConnection` is supplied the session is tunneled through it (used + * to route HTTP/2 through a proxy via `CONNECT`); otherwise it connects directly + * to `baseUrl`. Either way pings and the idle timeout behave identically — only + * the underlying connection differs. + * + * @param baseUrl + * Base URI for all HTTP requests. + * @param createConnection + * Optional connection factory passed through to `http2.connect` (optional). + * @returns + * Connect transport that talks HTTP/2 to `baseUrl`. + */ +function createHttp2Transport( + baseUrl: string, + createConnection?: ReturnType, +): Transport { + const sessionManager = new Http2SessionManager( + baseUrl, // AWS Global Accelerator doesn't support PING so we use a very high idle // timeout. Ref: // https://docs.aws.amazon.com/global-accelerator/latest/dg/introduction-how-it-works.html#about-idle-timeout - idleConnectionTimeoutMs: 340 * 1000, - }); + { idleConnectionTimeoutMs: 340 * 1000 }, + createConnection ? { createConnection } : undefined, + ); - // We ignore the promise result because this is an optimistic pre-connect + // We ignore the promise result because this is an optimistic pre-connect. sessionManager.connect(); - return createConnectTransport({ - baseUrl, - httpVersion: "2", - sessionManager, - }); + return createConnectTransport({ baseUrl, httpVersion: "2", sessionManager }); } diff --git a/transport/package.json b/transport/package.json index 92c82dede8..75164a9c75 100644 --- a/transport/package.json +++ b/transport/package.json @@ -32,6 +32,7 @@ "types": "./index.d.ts", "exports": { "bun": "./bun.js", + "deno": "./deno.js", "edge-light": "./edge-light.js", "workerd": "./workerd.js", "default": "./index.js" @@ -39,10 +40,16 @@ "files": [ "bun.d.ts", "bun.js", + "deno.d.ts", + "deno.js", + "detect-proxy.d.ts", + "detect-proxy.js", "edge-light.d.ts", "edge-light.js", "index.d.ts", "index.js", + "proxy-tunnel.d.ts", + "proxy-tunnel.js", "workerd.d.ts", "workerd.js" ], @@ -51,9 +58,13 @@ "lint": "eslint .", "test-api": "node --test -- test/*.test.js", "test-coverage": "node --experimental-test-coverage --test -- test/*.test.js", + "test-runtime-bun": "npm run build && HTTPS_PROXY=http://127.0.0.1:49219 bun test test/runtime/proxy.bun.test.ts", + "test-runtime-deno": "npm run build && HTTPS_PROXY=http://127.0.0.1:49219 deno test --allow-all --no-check test/runtime/proxy.deno.test.ts", "test": "npm run build && npm run lint && npm run test-coverage" }, "dependencies": { + "@arcjet/env": "1.5.0", + "@arcjet/logger": "1.5.0", "@bufbuild/protobuf": "2.12.0", "@connectrpc/connect": "2.1.1", "@connectrpc/connect-node": "2.1.1", diff --git a/transport/proxy-tunnel.ts b/transport/proxy-tunnel.ts new file mode 100644 index 0000000000..099a872b14 --- /dev/null +++ b/transport/proxy-tunnel.ts @@ -0,0 +1,183 @@ +import type { SecureClientSessionOptions } from "node:http2"; +import * as net from "node:net"; +import { Duplex } from "node:stream"; +import * as tls from "node:tls"; + +/** + * Route an HTTP/2 session through a forward proxy using an HTTP `CONNECT` + * tunnel, preserving HTTP/2 to the origin. + * + * Node's built-in HTTP agent proxy support (and the `https-proxy-agent` family) + * only wire a proxy into the HTTP/1.1 agent, which is why proxying otherwise + * forces a downgrade from HTTP/2. But HTTP/2 survives a `CONNECT` tunnel + * end-to-end: the proxy is told to open a raw TCP tunnel and thereafter only + * blindly forwards bytes (RFC 9110 §9.3.6), so the TLS handshake — including the + * ALPN negotiation that selects `h2` — happens directly with the origin. The + * proxy never sees, and so cannot downgrade, the negotiated protocol. + * + * The one wrinkle is that {@linkcode http2.connect}'s `createConnection` + * callback must return a {@linkcode Duplex} synchronously, but the `CONNECT` + * handshake is asynchronous. We bridge that gap with a small `Duplex` that + * buffers whatever the consumer writes (the TLS `ClientHello`, or the HTTP/2 + * client preface for a cleartext target) until the proxy answers `2xx`, then + * splices itself onto the proxy socket. Because the contract stays synchronous, + * this drops into `@connectrpc/connect-node`'s default `Http2SessionManager` + * via `nodeOptions.createConnection` with no fork — reconnection, pings, and the + * idle timeout all keep working. + * + * This is Node-only. Bun and Deno don't implement the agent option this sits + * alongside, and their `fetch` is used for proxying instead. + * + * @param proxyUrl + * Proxy to route through (for example `http://127.0.0.1:3128`). An HTTPS proxy + * (TLS to the proxy itself) is supported too. + * @returns + * A `createConnection` callback for `http2.connect(..., { createConnection })` + * (and therefore for connect-node's `nodeOptions.createConnection`). + */ +export function createTunnelingConnection( + proxyUrl: string, +): (authority: URL, options: SecureClientSessionOptions) => Duplex { + const proxy = new URL(proxyUrl); + const proxyIsHttps = proxy.protocol === "https:"; + const proxyPort = Number(proxy.port) || (proxyIsHttps ? 443 : 80); + + // `Proxy-Authorization` header from any credentials embedded in the proxy URL. + const proxyAuthorization = + proxy.username === "" + ? undefined + : "Basic " + + Buffer.from( + decodeURIComponent(proxy.username) + + ":" + + decodeURIComponent(proxy.password), + ).toString("base64"); + + return function createConnection(authority, options): Duplex { + const originIsHttps = authority.protocol === "https:"; + const originPort = Number(authority.port) || (originIsHttps ? 443 : 80); + const originAuthority = authority.hostname + ":" + originPort; + + // The bridge is the underlying transport the HTTP/2 client writes into. We + // hold those bytes until the `CONNECT` tunnel is established, then flush and + // splice the bridge onto the proxy socket. + let tunnelReady = false; + const pending: Array<{ + chunk: Buffer; + callback: (error?: Error | null) => void; + }> = []; + + const bridge = new Duplex({ + read() { + // Push-driven; see the splice below. + }, + write(chunk, _encoding, callback) { + const buffer = Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk); + if (tunnelReady) { + proxySocket.write(buffer, callback); + } else { + pending.push({ chunk: buffer, callback }); + } + }, + }); + + // 1. Open the connection to the proxy itself. Typed as the common + // `net.Socket` supertype (a `tls.TLSSocket` is one) so `.on(...)` event + // listeners resolve against a single typed event map rather than a union + // that would leave their parameters implicitly `any`. + const proxySocket: net.Socket = proxyIsHttps + ? tls.connect({ + host: proxy.hostname, + port: proxyPort, + servername: proxy.hostname, + }) + : net.connect({ host: proxy.hostname, port: proxyPort }); + + // Disable Nagle's algorithm on the tunnel. HTTP/2 sends many small, + // dependent control frames; left on, Nagle interacts with the peer's + // delayed ACK to add ~40ms per round trip. Node sets this on its own HTTP/2 + // sockets, but since we supply the socket we must set it ourselves. Note + // that an intermediate proxy that buffers the tunnel can reintroduce the + // same penalty regardless of this setting. + proxySocket.setNoDelay(true); + + // 2. Once connected to the proxy, ask it to tunnel to the origin authority. + proxySocket.once(proxyIsHttps ? "secureConnect" : "connect", () => { + let request = "CONNECT " + originAuthority + " HTTP/1.1\r\n"; + request += "Host: " + originAuthority + "\r\n"; + if (proxyAuthorization !== undefined) { + request += "Proxy-Authorization: " + proxyAuthorization + "\r\n"; + } + request += "\r\n"; + proxySocket.write(request); + }); + + // 3. Read the proxy's response. Buffer until the header terminator, check + // the status line, and only on `2xx` splice the tunnel onto the bridge. + let head = Buffer.alloc(0); + function onData(chunk: Buffer) { + head = Buffer.concat([head, chunk]); + const terminator = head.indexOf("\r\n\r\n"); + if (terminator === -1) { + return; // Wait for the full response head. + } + + proxySocket.off("data", onData); + + const statusLine = head + .subarray(0, head.indexOf("\r\n")) + .toString("latin1"); + const status = Number(statusLine.split(" ")[1]); + if (!(status >= 200 && status < 300)) { + const error = new Error( + "Proxy CONNECT failed with status: " + statusLine.trim(), + ); + proxySocket.destroy(error); + bridge.destroy(error); + return; + } + + // Anything past the header terminator is already tunnel data (rare for a + // fresh handshake, but never drop it). + const leftover = head.subarray(terminator + 4); + if (leftover.length > 0) { + bridge.push(leftover); + } + + // Splice: proxy -> bridge, so origin bytes become readable by the client. + proxySocket.on("data", (data: Buffer) => bridge.push(data)); + proxySocket.on("end", () => bridge.push(null)); + + // Flush whatever the client buffered while the tunnel was establishing. + tunnelReady = true; + for (const { chunk: queued, callback } of pending) { + proxySocket.write(queued, callback); + } + pending.length = 0; + + // The CONNECT response head is no longer needed; release it so it isn't + // retained for the lifetime of the (long-lived) connection. + head = Buffer.alloc(0); + } + proxySocket.on("data", onData); + + // Propagate failures in both directions so the session manager observes them. + proxySocket.on("error", (error: Error) => bridge.destroy(error)); + bridge.on("close", () => proxySocket.destroy()); + + // 4. For an HTTPS origin, run TLS *to the origin* over the tunnel, offering + // h2 via ALPN; ALPN is negotiated with the origin, not the proxy. For a + // cleartext origin, the bridge itself carries HTTP/2 (h2c) over the + // tunnel. + if (!originIsHttps) { + return bridge; + } + + return tls.connect({ + ...options, + socket: bridge, + servername: authority.hostname, + ALPNProtocols: ["h2"], + }); + }; +} diff --git a/transport/test/drift.test.ts b/transport/test/drift.test.ts new file mode 100644 index 0000000000..3b19ffc705 --- /dev/null +++ b/transport/test/drift.test.ts @@ -0,0 +1,44 @@ +import assert from "node:assert/strict"; +import { readFileSync } from "node:fs"; +import test from "node:test"; +import { fileURLToPath } from "node:url"; + +// The proxy-resolution logic is intentionally duplicated between +// `@arcjet/transport` and `@arcjet/guard` (the guard copy stays edge-safe with +// no imports). The two are allowed to differ only in their `detectProxy` entry +// point and in formatting; the shared helpers (`proxyForUrl`, `isNoProxy`, +// `firstValue`) must stay logically identical, per the "keep in sync" comments +// in both files. This test fails if they drift, so a fix applied to one copy +// can't silently miss the other. + +// Everything from the first shared helper to the end of the file, with comments +// and all whitespace removed so only the logic (tokens) is compared — line +// wrapping and each package's formatter are ignored. +function sharedHelpers(source: string): string { + const start = source.indexOf("function proxyForUrl"); + assert.notEqual(start, -1, "could not locate the shared proxy helpers"); + return source + .slice(start) + .replace(/\/\*[\s\S]*?\*\//g, "") // block comments + .replace(/\/\/.*$/gm, "") // line comments + .replace(/\s+/g, ""); // all whitespace +} + +function read(relativePath: string): string { + return readFileSync( + fileURLToPath(new URL(relativePath, import.meta.url)), + "utf8", + ); +} + +test("proxy-resolution helpers stay in sync across packages", function () { + const transport = sharedHelpers(read("../detect-proxy.ts")); + const guard = sharedHelpers(read("../../arcjet-guard/src/detect-proxy.ts")); + + assert.equal( + guard, + transport, + "The shared proxy helpers in transport/detect-proxy.ts and " + + "arcjet-guard/src/detect-proxy.ts have drifted. Apply the change to both.", + ); +}); diff --git a/transport/test/index.test.ts b/transport/test/index.test.ts index c0dbf88c2d..05aac2cbc3 100644 --- a/transport/test/index.test.ts +++ b/transport/test/index.test.ts @@ -1,16 +1,86 @@ import assert from "node:assert/strict"; import http2 from "node:http2"; import http from "node:http"; +import https from "node:https"; import test from "node:test"; +import type { Transport } from "@connectrpc/connect"; import { connectNodeAdapter } from "@connectrpc/connect-node"; import { createClient } from "@connectrpc/connect"; import { createTransport as createTransportBun } from "../bun.js"; +import { createTransport as createTransportDeno } from "../deno.js"; import { createTransport as createTransportEdge } from "../edge-light.js"; +import { createTransport as createTransportWorkerd } from "../workerd.js"; import { createTransport } from "../index.js"; +import { createTunnelingConnection } from "../proxy-tunnel.js"; import { ElizaService } from "./eliza_pb.js"; +import { + close, + createConnectProxy, + createProxy, + generateSelfSignedCert, + listen, + withHttpProxyEnvironment, +} from "./proxy.js"; + +function elizaRoutes() { + return connectNodeAdapter({ + routes(router) { + router.service(ElizaService, { + say(request) { + return { sentence: "You said `" + request.sentence + "`" }; + }, + }); + }, + }); +} + +// Message logged once at startup when a proxy is detected. The proxy URL is +// deliberately not included, so it can never leak credentials. +const proxyMessage = "Connecting to the Arcjet API through a proxy"; + +// Construct a transport with the given proxy environment and return the message +// that was logged (or `undefined` when nothing was logged). Uses the Bun +// transport because constructing it has no side effects — no network +// connection is opened — which keeps these checks fast and deterministic. +function loggedProxy( + baseUrl: string, + proxyEnv: Record, +): string | undefined { + let logged: string | undefined; + createTransportBun(baseUrl, { + log: { + info(message) { + logged = message; + }, + }, + proxyEnv, + }); + return logged; +} let uniquePort = 3400; +// Start an HTTP origin serving the Eliza service, run `fn` with its URL, then +// close it. +async function withHttpOrigin( + fn: (url: string) => Promise, +): Promise { + const port = uniquePort++; + const server = http.createServer(elizaRoutes()); + + await new Promise(function (resolve) { + server.listen({ port }, function () { + resolve(); + }); + }); + + try { + await fn("http://localhost:" + port); + } finally { + await server.close(); + } +} + test("@arcjet/transport", async function (t) { await t.test("should expose the public api", async function () { assert.deepEqual(Object.keys(await import("../index.js")).sort(), [ @@ -30,17 +100,7 @@ test("@arcjet/transport", async function (t) { const port = uniquePort++; const url = "http://localhost:" + port; - const server = http2.createServer( - connectNodeAdapter({ - routes(router) { - router.service(ElizaService, { - say(request) { - return { sentence: "You said `" + request.sentence + "`" }; - }, - }); - }, - }), - ); + const server = http2.createServer(elizaRoutes()); await new Promise(function (resolve) { server.listen({ port }, function () { @@ -56,51 +116,259 @@ test("@arcjet/transport", async function (t) { assert.equal(result.sentence, "You said `Hi!`"); }); - await t.test("should work over HTTP on Bun", async function () { - const port = uniquePort++; - const url = "http://localhost:" + port; + await t.test( + "should work through `HTTP_PROXY` over HTTP/1.1", + async function () { + const origin = http.createServer(elizaRoutes()); + const originUrl = await listen(origin); - const server = http.createServer( - connectNodeAdapter({ - routes(router) { - router.service(ElizaService, { - say(request) { - return { sentence: "You said `" + request.sentence + "`" }; - }, - }); - }, - }), - ); + let proxyRequests = 0; + const proxy = createProxy(originUrl, () => { + proxyRequests++; + }); + const proxyUrl = await listen(proxy); - await new Promise(function (resolve) { - server.listen({ port }, function () { - resolve(undefined); + try { + await withHttpProxyEnvironment(proxyUrl, async () => { + const client = createClient( + ElizaService, + createTransport(originUrl, { log: { info() {} } }), + ); + const result = await client.say({ sentence: "Hi!" }); + assert.equal(result.sentence, "You said `Hi!`"); + }); + } finally { + await close(proxy); + await close(origin); + } + + assert.equal(proxyRequests, 1); + }, + ); + + await t.test( + "should route an HTTPS target through `HTTPS_PROXY` via CONNECT", + async function () { + // The production Arcjet API is HTTPS, which the Node agent reaches by + // sending an HTTP/1.1 CONNECT to the proxy before the TLS handshake — + // unlike the absolute-form forwarding used for HTTP. We verify that + // routing by asserting the proxy receives the CONNECT. We deliberately + // do NOT trust the test origin's self-signed certificate (disabling TLS + // verification is a security anti-pattern), so the handshake over the + // tunnel is expected to fail; the CONNECT is what proves the routing. + const { key, cert } = generateSelfSignedCert(); + const origin = https.createServer({ key, cert }, elizaRoutes()); + const originUrl = await listen(origin, "https"); + const authority = new URL(originUrl).host; + + let connectRequests = 0; + const proxy = createConnectProxy(authority, () => { + connectRequests++; }); - }); + const proxyUrl = await listen(proxy); - const client = createClient(ElizaService, createTransportBun(url)); - const result = await client.say({ sentence: "Hi!" }); + try { + const client = createClient( + ElizaService, + createTransport(originUrl, { + log: { info() {} }, + proxyEnv: { HTTPS_PROXY: proxyUrl }, + }), + ); + // Expected to reject at the TLS handshake (untrusted self-signed cert); + // we only care that it was tunneled through the proxy via CONNECT. + await client.say({ sentence: "Hi!" }).catch(() => {}); + } finally { + await close(proxy); + await close(origin); + } - await server.close(); + assert.ok( + connectRequests >= 1, + "expected the HTTPS request to be tunneled through the proxy via CONNECT", + ); + }, + ); - assert.equal(result.sentence, "You said `Hi!`"); + await t.test( + "should preserve HTTP/2 through a proxy when `proxyHttpVersion` is `2`", + async function () { + // A cleartext HTTP/2 (h2c) origin lets us drive a real round trip through + // the tunnel without certificates: the `CONNECT` proxy tunnels TCP and + // the transport speaks HTTP/2 over it end-to-end. + const origin = http2.createServer(elizaRoutes()); + const originUrl = await listen(origin); + const authority = new URL(originUrl).host; + + let connectRequests = 0; + const proxy = createConnectProxy(authority, () => { + connectRequests++; + }); + const proxyUrl = await listen(proxy); + + try { + const client = createClient( + ElizaService, + createTransport(originUrl, { + log: { info() {} }, + proxyEnv: { HTTP_PROXY: proxyUrl }, + proxyHttpVersion: "2", + }), + ); + const result = await client.say({ sentence: "Hi!" }); + assert.equal(result.sentence, "You said `Hi!`"); + } finally { + await close(proxy); + await close(origin); + } + + assert.ok( + connectRequests >= 1, + "expected the request to be tunneled through the proxy via CONNECT", + ); + }, + ); + + await t.test( + "should negotiate HTTP/2 (ALPN `h2`) end-to-end through a CONNECT proxy", + async function () { + // The production API is HTTPS, where HTTP/2 is selected by ALPN during the + // TLS handshake. That handshake happens directly with the origin inside + // the tunnel, so the proxy can't downgrade it. We trust the test origin's + // certificate here (via `ca`) so the handshake completes and we can assert + // the negotiated protocol — exercising the tunnel helper the way the + // transport uses it. + const { key, cert } = generateSelfSignedCert(); + const origin = http2.createSecureServer({ key, cert }); + origin.on("stream", function (stream) { + stream.respond({ ":status": 200 }); + stream.end("ok"); + }); + const originUrl = await listen(origin, "https"); + const authority = new URL(originUrl).host; + + let connectRequests = 0; + const proxy = createConnectProxy(authority, () => { + connectRequests++; + }); + const proxyUrl = await listen(proxy); + + const session = http2.connect(originUrl, { + ca: cert, + createConnection: createTunnelingConnection(proxyUrl), + }); + + try { + await new Promise(function (resolve, reject) { + session.once("connect", () => resolve()); + session.once("error", reject); + }); + + assert.equal( + session.alpnProtocol, + "h2", + "expected HTTP/2 to be negotiated with the origin through the tunnel", + ); + + // A round trip proves the tunnel carries real HTTP/2 frames, not just a + // completed handshake. + const body = await new Promise(function (resolve, reject) { + const request = session.request({ ":path": "/" }); + let data = ""; + request.setEncoding("utf8"); + request.on("data", (chunk) => (data += chunk)); + request.on("end", () => resolve(data)); + request.on("error", reject); + request.end(); + }); + assert.equal(body, "ok"); + } finally { + session.close(); + await close(proxy); + await close(origin); + } + + assert.ok( + connectRequests >= 1, + "expected the request to be tunneled through the proxy via CONNECT", + ); + }, + ); + + await t.test( + "should connect directly over HTTP/2 when `NO_PROXY` matches", + async function () { + const port = uniquePort++; + const url = "http://localhost:" + port; + + const server = http2.createServer(elizaRoutes()); + + await new Promise(function (resolve) { + server.listen({ port }, function () { + resolve(undefined); + }); + }); + + let logged = false; + try { + const client = createClient( + ElizaService, + createTransport(url, { + log: { + info() { + logged = true; + }, + }, + proxyEnv: { + HTTP_PROXY: "http://127.0.0.1:1", + NO_PROXY: "localhost", + }, + }), + ); + const result = await client.say({ sentence: "Hi!" }); + assert.equal(result.sentence, "You said `Hi!`"); + } finally { + await server.close(); + } + + // The proxy was bypassed, so nothing should have been logged. + assert.equal(logged, false); + }, + ); + + await t.test("should allow explicit proxy environment", async function () { + const origin = http.createServer(elizaRoutes()); + const originUrl = await listen(origin); + + let proxyRequests = 0; + const proxy = createProxy(originUrl, () => { + proxyRequests++; + }); + const proxyUrl = await listen(proxy); + + try { + const client = createClient( + ElizaService, + createTransport(originUrl, { + log: { info() {} }, + proxyEnv: { HTTP_PROXY: proxyUrl }, + }), + ); + const result = await client.say({ sentence: "Hi!" }); + assert.equal(result.sentence, "You said `Hi!`"); + } finally { + await close(proxy); + await close(origin); + } + + assert.equal(proxyRequests, 1); }); - await t.test("should work over HTTP on Vercel Edge", async function () { + await t.test("should allow disabling proxy environment", async function () { const port = uniquePort++; const url = "http://localhost:" + port; - const server = http.createServer( - connectNodeAdapter({ - routes(router) { - router.service(ElizaService, { - say(request) { - return { sentence: "You said `" + request.sentence + "`" }; - }, - }); - }, - }), - ); + const server = http2.createServer(elizaRoutes()); await new Promise(function (resolve) { server.listen({ port }, function () { @@ -108,11 +376,154 @@ test("@arcjet/transport", async function (t) { }); }); - const client = createClient(ElizaService, createTransportEdge(url)); - const result = await client.say({ sentence: "Hi!" }); + try { + const client = createClient( + ElizaService, + // `proxyEnv: false` ignores the proxy set in the environment. + await withHttpProxyEnvironment("http://127.0.0.1:1", async () => + createTransport(url, { proxyEnv: false }), + ), + ); + const result = await client.say({ sentence: "Hi!" }); + assert.equal(result.sentence, "You said `Hi!`"); + } finally { + await server.close(); + } + }); - await server.close(); + await t.test("should build an HTTPS proxy transport", async function () { + const transport = createTransport("https://decide.arcjet.com", { + log: { info() {} }, + proxyEnv: { HTTPS_PROXY: "http://127.0.0.1:1" }, + }); - assert.equal(result.sentence, "You said `Hi!`"); + assert.equal(typeof transport, "object"); + assert.notEqual(transport, null); + }); + + await t.test("should not log when no proxy is configured", async function () { + assert.equal(loggedProxy("https://decide.arcjet.com", {}), undefined); + }); + + await t.test("should use the default logger", async function () { + // No `log` option, so the default logger (configured from + // `ARCJET_LOG_LEVEL`) is created. We can't easily capture its output, but + // exercising it covers the default branch. + const transport = createTransportBun("https://decide.arcjet.com", { + proxyEnv: { HTTPS_PROXY: "http://127.0.0.1:1" }, + }); + assert.equal(typeof transport, "object"); + }); + + await t.test("should honor `NO_PROXY`", async function () { + const proxy = "http://proxy.example.com:3128"; + + // [NO_PROXY, base URL, expected to be bypassed] + const cases: Array<[string, string, boolean]> = [ + ["*", "http://api.example.com:8080/", true], + ["api.example.com", "http://api.example.com:8080/", true], + ["example.com", "http://api.example.com:8080/", true], + ["other.com", "http://api.example.com:8080/", false], + ["api.example.com:8080", "http://api.example.com:8080/", true], + ["api.example.com:9999", "http://api.example.com:8080/", false], + [".example.com", "http://api.example.com:8080/", true], + ["*.example.com", "http://api.example.com:8080/", true], + [",other.com", "http://api.example.com:8080/", false], + [".", "http://api.example.com:8080/", false], + ["foo:bar", "http://api.example.com:8080/", false], + ["api.example.com:80", "http://api.example.com/", true], + ["api.example.com:443", "https://api.example.com/", true], + // IPv6 hosts, written with or without brackets and with or without a port. + ["::1", "http://[::1]:8080/", true], + ["[::1]", "http://[::1]:8080/", true], + ["[::1]:8080", "http://[::1]:8080/", true], + ["[::1]:9999", "http://[::1]:8080/", false], + ["::1", "http://[::2]:8080/", false], + ]; + + for (const [noProxy, baseUrl, bypassed] of cases) { + const logged = loggedProxy(baseUrl, { + HTTP_PROXY: proxy, + HTTPS_PROXY: proxy, + NO_PROXY: noProxy, + }); + assert.equal( + logged, + bypassed ? undefined : proxyMessage, + `NO_PROXY=${noProxy} for ${baseUrl}`, + ); + } }); + + await t.test( + "should not throw when reading the environment fails", + function () { + // Simulate a runtime that gates environment access behind a permission + // (e.g. Deno without `--allow-env`), where reading a variable throws. + const throwing = new Proxy>( + {}, + { + get() { + throw new Error("permission denied"); + }, + }, + ); + + assert.equal( + loggedProxy("https://decide.arcjet.com", throwing), + undefined, + ); + }, + ); + + await t.test( + "should ignore uppercase `HTTP_PROXY` under CGI (httpoxy)", + function () { + // With `REQUEST_METHOD` set (a CGI environment), uppercase `HTTP_PROXY` — + // which an inbound `Proxy` header can populate — is ignored for HTTP. + assert.equal( + loggedProxy("http://api.example.com/", { + HTTP_PROXY: "http://attacker.example.com:3128", + REQUEST_METHOD: "GET", + }), + undefined, + ); + + // Lowercase `http_proxy` is still honored under CGI. + assert.equal( + loggedProxy("http://api.example.com/", { + http_proxy: "http://proxy.example.com:3128", + REQUEST_METHOD: "GET", + }), + proxyMessage, + ); + + // Without `REQUEST_METHOD`, uppercase `HTTP_PROXY` is honored as usual. + assert.equal( + loggedProxy("http://api.example.com/", { + HTTP_PROXY: "http://proxy.example.com:3128", + }), + proxyMessage, + ); + }, + ); + + // Each web-runtime entry point uses `@connectrpc/connect-web` over HTTP/1.1; + // they differ only in the runtime they target. Exercise each the same way. + const webRuntimes: Array<[string, (url: string) => Transport]> = [ + ["Bun", createTransportBun], + ["Deno", createTransportDeno], + ["Vercel Edge", createTransportEdge], + ["Cloudflare Workers", createTransportWorkerd], + ]; + + for (const [name, create] of webRuntimes) { + await t.test("should work over HTTP on " + name, async function () { + await withHttpOrigin(async function (url) { + const client = createClient(ElizaService, create(url)); + const result = await client.say({ sentence: "Hi!" }); + assert.equal(result.sentence, "You said `Hi!`"); + }); + }); + } }); diff --git a/transport/test/proxy.ts b/transport/test/proxy.ts new file mode 100644 index 0000000000..95dec574cf --- /dev/null +++ b/transport/test/proxy.ts @@ -0,0 +1,306 @@ +import assert from "node:assert/strict"; +import { execFileSync } from "node:child_process"; +import { mkdtempSync, readFileSync } from "node:fs"; +import http from "node:http"; +import net from "node:net"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; + +/** + * Standard proxy environment variables that we save and restore around tests + * so they cannot leak between cases or from the host environment. + */ +const proxyEnvironmentKeys = [ + "HTTP_PROXY", + "http_proxy", + "HTTPS_PROXY", + "https_proxy", + "NO_PROXY", + "no_proxy", +]; + +/** + * Open tunnel sockets per `CONNECT` proxy. + * + * A `net.Server` has no `closeAllConnections()`, and a keep-alive agent holds + * the tunnel open, so we track the accepted sockets here and destroy them in + * `close()` to let the server shut down. + */ +const tunnelSockets = new WeakMap>(); + +/** + * Start listening on a random port on the loopback interface. + * + * @param server + * Server to listen with. + * @param protocol + * URL scheme to build the returned base URL with (defaults to `http`). Pass + * `https` for a TLS server. + * @returns + * Base URL the server is listening on. + */ +export async function listen( + server: net.Server, + protocol: "http" | "https" = "http", +): Promise { + await new Promise((resolve) => { + server.listen(0, "127.0.0.1", resolve); + }); + + const address = server.address(); + assert.notEqual(address, null); + assert.notEqual(typeof address, "string"); + + return `${protocol}://127.0.0.1:${(address as { port: number }).port}`; +} + +/** + * Close a server. + * + * @param server + * Server to close. + * @returns + * Promise that resolves once the server is closed. + */ +export async function close(server: net.Server): Promise { + await new Promise((resolve, reject) => { + server.close((error) => { + if (error) { + reject(error); + } else { + resolve(); + } + }); + + // A keep-alive agent (and an open tunnel) holds connections open, so + // `close()` would otherwise wait forever. Force them shut: HTTP(S) servers + // expose `closeAllConnections()`, while `CONNECT` proxies are `net.Server`s + // whose accepted sockets we tracked above (destroying one tears down its + // tunnel, which the upstream side follows via its `close` handler). + const httpServer = server as net.Server & { + closeAllConnections?: () => void; + }; + httpServer.closeAllConnections?.(); + const sockets = tunnelSockets.get(server); + if (sockets) { + for (const socket of sockets) { + socket.destroy(); + } + } + }); +} + +/** + * Create a forwarding HTTP proxy. + * + * The proxy asserts that the absolute-form request URI it receives targets the + * expected origin, then forwards the request and pipes the response back. + * + * @param expectedOrigin + * Origin the proxy expects to forward to. + * @param onRequest + * Called for every request the proxy receives. + * @returns + * Proxy server. + */ +export function createProxy( + expectedOrigin: string, + onRequest: () => void, +): http.Server { + return http.createServer((incoming, outgoing) => { + onRequest(); + + assert.ok(incoming.url); + const requested = new URL(incoming.url); + assert.equal(requested.origin, expectedOrigin); + + // Build the forwarded URL from the trusted `expectedOrigin` rather than the + // incoming request, so the request target's host can't be influenced by the + // (asserted, but still externally provided) request URL. + const target = new URL( + requested.pathname + requested.search, + expectedOrigin, + ); + + const forwarded = http.request( + target, + { + headers: incoming.headers, + method: incoming.method, + }, + (response) => { + outgoing.writeHead(response.statusCode ?? 500, response.headers); + response.pipe(outgoing); + }, + ); + + forwarded.on("error", (error) => { + outgoing.destroy(error); + }); + + incoming.pipe(forwarded); + }); +} + +/** + * Run a function with a clean proxy environment. + * + * Saves and clears all standard proxy environment variables, sets `HTTP_PROXY` + * (and optionally `NO_PROXY`), then restores the previous values afterwards. + * + * @param proxyUrl + * Value to use for `HTTP_PROXY`. + * @param fn + * Function to run. + * @param noProxy + * Optional value to use for `NO_PROXY`. + * @returns + * Result of `fn`. + */ +export async function withHttpProxyEnvironment( + proxyUrl: string, + fn: () => Promise, + noProxy?: string, +): Promise { + const previous = new Map(); + for (const key of proxyEnvironmentKeys) { + const value = process.env[key]; + if (typeof value === "string") { + previous.set(key, value); + } + + delete process.env[key]; + } + + process.env.HTTP_PROXY = proxyUrl; + if (typeof noProxy === "string") { + process.env.NO_PROXY = noProxy; + } + + try { + return await fn(); + } finally { + for (const key of proxyEnvironmentKeys) { + delete process.env[key]; + } + + for (const [key, value] of previous) { + process.env[key] = value; + } + } +} + +/** + * Create a tunneling proxy that handles the `CONNECT` method. + * + * This is how a proxy handles HTTPS targets: the client sends + * `CONNECT host:port`, the proxy opens a raw TCP tunnel to the origin and pipes + * bytes through without terminating TLS. The proxy asserts that the requested + * authority matches the expected origin before tunneling. + * + * Implemented with `node:net` rather than `http.createServer().on("connect")` + * because some runtimes' Node compatibility layers (e.g. older Deno) don't emit + * the `connect` event, whereas a raw TCP server works everywhere. + * + * @param expectedAuthority + * `host:port` the proxy expects to tunnel to. + * @param onConnect + * Called for every `CONNECT` request the proxy receives. + * @returns + * Proxy server. + */ +export function createConnectProxy( + expectedAuthority: string, + onConnect: () => void, +): net.Server { + const separator = expectedAuthority.lastIndexOf(":"); + const host = expectedAuthority.slice(0, separator); + const port = Number(expectedAuthority.slice(separator + 1)); + + const sockets = new Set(); + const proxy = net.createServer((client) => { + sockets.add(client); + client.on("close", () => sockets.delete(client)); + client.on("error", () => {}); + + let request = ""; + function onData(chunk: Buffer) { + request += chunk.toString("utf8"); + const lineEnd = request.indexOf("\r\n"); + // Wait until the full CONNECT request line has arrived. A client only + // sends tunnel (TLS) bytes after the `200`, so nothing is lost here. + if (lineEnd === -1) { + return; + } + client.off("data", onData); + + onConnect(); + const match = /^CONNECT (\S+) HTTP\/1\.1$/.exec(request.slice(0, lineEnd)); + assert.ok(match, "expected a CONNECT request"); + // Tunnel to the trusted `expectedAuthority`, not the (asserted, but + // externally provided) request target. + assert.equal(match[1], expectedAuthority); + + const upstream = net.connect(port, host, () => { + client.write("HTTP/1.1 200 Connection Established\r\n\r\n"); + upstream.pipe(client); + client.pipe(upstream); + }); + upstream.on("error", () => client.destroy()); + // When the client side is torn down (e.g. on `close()`), drop the + // upstream connection too so the origin server can close cleanly. + client.on("close", () => upstream.destroy()); + } + + client.on("data", onData); + }); + + tunnelSockets.set(proxy, sockets); + return proxy; +} + +/** + * Generate a throwaway self-signed certificate for `127.0.0.1`. + * + * Used to stand up a real HTTPS origin so the HTTPS-through-proxy (`CONNECT`) + * path can be exercised. The client deliberately doesn't trust this + * certificate — `createTransport`'s agent exposes no `ca` option and disabling + * TLS verification is a security anti-pattern — so the handshake over the + * tunnel is expected to fail; the test verifies routing via the proxy + * receiving the `CONNECT`. + * + * @returns + * PEM-encoded private key and certificate. + */ +export function generateSelfSignedCert(): { key: string; cert: string } { + const directory = mkdtempSync(join(tmpdir(), "arcjet-transport-cert-")); + const keyFile = join(directory, "key.pem"); + const certFile = join(directory, "cert.pem"); + + execFileSync( + "openssl", + [ + "req", + "-x509", + "-newkey", + "rsa:2048", + "-nodes", + "-keyout", + keyFile, + "-out", + certFile, + "-days", + "1", + "-subj", + "/CN=127.0.0.1", + "-addext", + "subjectAltName=IP:127.0.0.1", + ], + { stdio: "ignore" }, + ); + + return { + key: readFileSync(keyFile, "utf8"), + cert: readFileSync(certFile, "utf8"), + }; +} diff --git a/transport/test/runtime/fixture.ts b/transport/test/runtime/fixture.ts new file mode 100644 index 0000000000..05a44b7c05 --- /dev/null +++ b/transport/test/runtime/fixture.ts @@ -0,0 +1,95 @@ +// Shared setup for the Bun and Deno runtime proxy tests. +// +// These tests run on the real Bun and Deno runtimes (not under Node), where the +// `bun.js`/`deno.js` entry points delegate proxying to the runtime's native +// `fetch`. The Node test suite imports those entry points under Node, so it +// can't verify that the native `fetch` actually honors the proxy environment +// variables — that is what these tests cover. +// +// The fixture stands up an HTTPS Eliza origin reachable only through a +// `CONNECT` proxy, points `HTTPS_PROXY` at that proxy, and lets the runtime's +// `fetch` do the tunneling. The production Arcjet API is HTTPS, so this +// exercises the `CONNECT` path rather than plaintext-HTTP forwarding. +import https from "node:https"; +import { connectNodeAdapter } from "@connectrpc/connect-node"; +import { ElizaService } from "../eliza_pb.js"; +import { + close, + createConnectProxy, + generateSelfSignedCert, + listen, +} from "../proxy.js"; + +/** + * A running proxy + origin pair for a single runtime proxy test. + */ +export interface ProxyFixture { + /** Base URL of the HTTPS origin requests should be made to. */ + originUrl: string; + /** Number of `CONNECT` requests the proxy has received. */ + connectCount(): number; + /** Tear down the proxy and origin and restore the environment. */ + close(): Promise; +} + +function elizaAdapter() { + return connectNodeAdapter({ + routes(router) { + router.service(ElizaService, { + say(request) { + return { sentence: "You said `" + request.sentence + "`" }; + }, + }); + }, + }); +} + +/** + * Start an HTTPS Eliza origin reachable only through a `CONNECT` proxy listening + * on the port from `HTTPS_PROXY`, so the runtime's native `fetch` tunnels + * through it. + * + * `HTTPS_PROXY` must be set by the `test-runtime-*` npm script *before the + * process starts* — that mirrors how a proxy is configured in production (a + * plain environment variable), and it's required because Bun and older Deno + * only read the proxy environment at startup, not when `fetch` is called. + * + * @returns + * The running fixture. + */ +export async function startProxyFixture(): Promise { + const configuredProxy = process.env.HTTPS_PROXY; + if (!configuredProxy) { + throw new Error( + "HTTPS_PROXY must be set by the test-runtime-* npm script for this test", + ); + } + const proxyPort = Number(new URL(configuredProxy).port); + + const { key, cert } = generateSelfSignedCert(); + + const origin = https.createServer({ key, cert }, elizaAdapter()); + const originUrl = await listen(origin, "https"); + const authority = new URL(originUrl).host; + + // We don't trust the origin's self-signed certificate (disabling TLS + // verification is a security anti-pattern), so the handshake over the tunnel + // is expected to fail; the test only checks that the request was routed + // through the proxy via CONNECT. + let connectRequests = 0; + const proxy = createConnectProxy(authority, () => { + connectRequests++; + }); + await new Promise((resolve) => { + proxy.listen(proxyPort, "127.0.0.1", () => resolve()); + }); + + return { + originUrl, + connectCount: () => connectRequests, + close: async () => { + await close(proxy); + await close(origin); + }, + }; +} diff --git a/transport/test/runtime/proxy.bun.test.ts b/transport/test/runtime/proxy.bun.test.ts new file mode 100644 index 0000000000..c6a3b48320 --- /dev/null +++ b/transport/test/runtime/proxy.bun.test.ts @@ -0,0 +1,31 @@ +// Runtime proxy test: Bun. +// +// Verifies that on the real Bun runtime, a transport built from the `bun.js` +// entry point routes requests through `HTTPS_PROXY` using Bun's native `fetch` +// proxy support. The Node suite can only import `bun.js` under Node, so this is +// the only place the actual Bun proxying is exercised. +// +// Run: bun test test/runtime/proxy.bun.test.ts +import { expect, test } from "bun:test"; +import { createClient } from "@connectrpc/connect"; +import { createTransport } from "../../bun.js"; +import { ElizaService } from "../eliza_pb.js"; +import { startProxyFixture } from "./fixture.ts"; + +test("routes through `HTTPS_PROXY` via Bun's native fetch", async () => { + const fixture = await startProxyFixture(); + + try { + const client = createClient( + ElizaService, + createTransport(fixture.originUrl), + ); + // Expected to reject at the TLS handshake (untrusted self-signed cert); we + // only care that it was tunneled through the proxy via CONNECT. + await client.say({ sentence: "Hi!" }).catch(() => {}); + + expect(fixture.connectCount()).toBeGreaterThanOrEqual(1); + } finally { + await fixture.close(); + } +}); diff --git a/transport/test/runtime/proxy.deno.test.ts b/transport/test/runtime/proxy.deno.test.ts new file mode 100644 index 0000000000..f394312a36 --- /dev/null +++ b/transport/test/runtime/proxy.deno.test.ts @@ -0,0 +1,32 @@ +// Runtime proxy test: Deno. +// +// Verifies that on the real Deno runtime, a transport built from the `deno.js` +// entry point routes requests through `HTTPS_PROXY` using Deno's native `fetch` +// proxy support. The Node suite can only import `deno.js` under Node, so this is +// the only place the actual Deno proxying is exercised. +// +// Run: deno test --allow-net --allow-env --allow-read --allow-write --allow-run \ +// --no-check test/runtime/proxy.deno.test.ts +import assert from "node:assert/strict"; +import { createClient } from "@connectrpc/connect"; +import { createTransport } from "../../deno.js"; +import { ElizaService } from "../eliza_pb.js"; +import { startProxyFixture } from "./fixture.ts"; + +Deno.test("routes through `HTTPS_PROXY` via Deno's native fetch", async () => { + const fixture = await startProxyFixture(); + + try { + const client = createClient( + ElizaService, + createTransport(fixture.originUrl), + ); + // Expected to reject at the TLS handshake (untrusted self-signed cert); we + // only care that it was tunneled through the proxy via CONNECT. + await client.say({ sentence: "Hi!" }).catch(() => {}); + + assert.ok(fixture.connectCount() >= 1); + } finally { + await fixture.close(); + } +}); diff --git a/transport/tsconfig.json b/transport/tsconfig.json index 4eb37fee05..485699795a 100644 --- a/transport/tsconfig.json +++ b/transport/tsconfig.json @@ -1,3 +1,7 @@ { - "extends": "../tsconfig.base.json" + "extends": "../tsconfig.base.json", + // The Bun and Deno runtime tests use runtime-specific globals and module + // specifiers (`bun:test`, `Deno`) that aren't part of the Node build, so they + // are excluded from type-checking here and run directly on their runtimes. + "exclude": ["node_modules/", "test/runtime/**"] } diff --git a/transport/workerd.ts b/transport/workerd.ts index 16221bff73..1724493931 100644 --- a/transport/workerd.ts +++ b/transport/workerd.ts @@ -12,9 +12,24 @@ // * // * // * +import type { Transport } from "@connectrpc/connect"; import { createConnectTransport } from "@connectrpc/connect-web"; -export function createTransport(baseUrl: string) { +export type { + ProxyEnvironment, + TransportLogger, + TransportOptions, +} from "./detect-proxy.js"; + +import type { TransportOptions } from "./detect-proxy.js"; + +export function createTransport( + baseUrl: string, + // These edge runtimes don't support outbound proxy environment variables, so + // the options are accepted for API parity with the other entry points but no + // proxy is detected or used. + _options?: TransportOptions, +): Transport { return createConnectTransport({ baseUrl, fetch: fetchProxy, diff --git a/turbo.json b/turbo.json index 437f516158..948b988fe8 100644 --- a/turbo.json +++ b/turbo.json @@ -10,6 +10,12 @@ "ARCJET_KEY", "ARCJET_BASE_URL", "ARCJET_LOG_LEVEL", + "HTTP_PROXY", + "http_proxy", + "HTTPS_PROXY", + "https_proxy", + "NO_PROXY", + "no_proxy", "OPENAI_API_KEY", "FIREBASE_CONFIG", "FLY_APP_NAME",