diff --git a/packages/debugger/src/domain/api.spec.ts b/packages/debugger/src/domain/api.spec.ts index 2dcb5ac21c..90dff6f125 100644 --- a/packages/debugger/src/domain/api.spec.ts +++ b/packages/debugger/src/domain/api.spec.ts @@ -567,6 +567,226 @@ describe('api', () => { }) }) + describe('probe lifetime budgets', () => { + it('should stop sending snapshot events after maxSnapshotsPerProbeLifetime', () => { + initTransport({ maxSnapshotsPerProbeLifetime: 1 }) + + const probe: Probe = { + id: 'snapshot-lifetime-probe', + version: 0, + type: 'LOG_PROBE', + where: { typeName: 'TestClass', methodName: 'snapshotLifetime' }, + template: 'Test', + captureSnapshot: true, + capture: { maxReferenceDepth: 1 }, + // Disable per-probe rate limiting so the second invocation exercises the + // lifetime cap rather than the per-second cap. + sampling: { snapshotsPerSecond: Infinity }, + evaluateAt: 'ENTRY', + } + addProbe(probe) + + // First invocation: probe sends its single allowed event. + const probes = getProbes('TestClass;snapshotLifetime')! + onEntry(probes, {}, {}) + onReturn(probes, null, {}, {}, {}) + expect(mockBatchAdd).toHaveBeenCalledTimes(1) + + // Second invocation: the lifetime budget is now exhausted. No new event should + // be queued, and the probe should be auto-unregistered. + onEntry(probes, {}, {}) + onReturn(probes, null, {}, {}, {}) + expect(mockBatchAdd).toHaveBeenCalledTimes(1) + expect(getProbes('TestClass;snapshotLifetime')).toBeUndefined() + }) + + it('should skip snapshot collection once the lifetime budget is exhausted', () => { + initTransport({ maxSnapshotsPerProbeLifetime: 1 }) + + const getterSpy = jasmine.createSpy('argGetter').and.returnValue('value') + const args = {} + Object.defineProperty(args, 'arg', { + enumerable: true, + get: getterSpy, + }) + const probe: Probe = { + id: 'snapshot-lifetime-collection-probe', + version: 0, + type: 'LOG_PROBE', + where: { typeName: 'TestClass', methodName: 'snapshotLifetimeCollection' }, + template: 'Test', + captureSnapshot: true, + capture: { maxReferenceDepth: 1 }, + // Disable per-probe rate limiting so the second invocation isn't sampled out + // by it — we want to exercise the lifetime cap, not the rate cap. + sampling: { snapshotsPerSecond: Infinity }, + evaluateAt: 'ENTRY', + } + addProbe(probe) + + // First invocation does the full pipeline: 2 reads from entry capture + // (context spread + captureFields) + 1 read from return capture = 3 reads. + // This exhausts the lifetime budget. + const probes = getProbes('TestClass;snapshotLifetimeCollection')! + onEntry(probes, {}, args) + onReturn(probes, null, {}, args, {}) + + // Second invocation: both onEntry and onReturn detect the exhausted budget + // up front and skip all capture work — no further reads from args. + onEntry(probes, {}, args) + onReturn(probes, null, {}, args, {}) + + expect(getterSpy).toHaveBeenCalledTimes(3) + }) + + it('should stop sending non-snapshot events after maxNonSnapshotsPerProbeLifetime', () => { + initTransport({ maxNonSnapshotsPerProbeLifetime: 1 }) + + const probe: Probe = { + id: 'non-snapshot-lifetime-probe', + version: 0, + type: 'LOG_PROBE', + where: { typeName: 'TestClass', methodName: 'nonSnapshotLifetime' }, + template: 'Test', + captureSnapshot: false, + capture: {}, + // Disable per-probe rate limiting so the second invocation exercises the + // lifetime cap rather than the per-second cap. + sampling: { snapshotsPerSecond: Infinity }, + evaluateAt: 'ENTRY', + } + addProbe(probe) + + // First invocation: probe sends its single allowed event. + const probes = getProbes('TestClass;nonSnapshotLifetime')! + onEntry(probes, {}, {}) + onReturn(probes, null, {}, {}, {}) + expect(mockBatchAdd).toHaveBeenCalledTimes(1) + + // Second invocation: the lifetime budget is now exhausted. No new event should + // be queued, and the probe should be auto-unregistered. + onEntry(probes, {}, {}) + onReturn(probes, null, {}, {}, {}) + expect(mockBatchAdd).toHaveBeenCalledTimes(1) + expect(getProbes('TestClass;nonSnapshotLifetime')).toBeUndefined() + }) + + it('should reset the lifetime budget when a new probe version is delivered', () => { + initTransport({ maxSnapshotsPerProbeLifetime: 1 }) + + const probe: Probe = { + id: 'versioned-lifetime-probe', + version: 0, + type: 'LOG_PROBE', + where: { typeName: 'TestClass', methodName: 'versionedLifetime' }, + template: 'Test', + captureSnapshot: true, + capture: { maxReferenceDepth: 1 }, + sampling: { snapshotsPerSecond: 5000 }, + evaluateAt: 'ENTRY', + } + addProbe(probe) + + let probes = getProbes('TestClass;versionedLifetime')! + onEntry(probes, {}, {}) + onReturn(probes, null, {}, {}, {}) + expect(mockBatchAdd).toHaveBeenCalledTimes(1) + + // A Remote Config delivery for an existing probe id replaces the old probe with + // the new version. After re-add, the new version should have a fresh budget. + removeProbe(probe.id) + addProbe({ ...probe, version: 1 }) + + probes = getProbes('TestClass;versionedLifetime')! + onEntry(probes, {}, {}) + onReturn(probes, null, {}, {}, {}) + expect(mockBatchAdd).toHaveBeenCalledTimes(2) + }) + + it('should not emit any event when the lifetime budget is zero', () => { + initTransport({ maxSnapshotsPerProbeLifetime: 0 }) + + const probe: Probe = { + id: 'zero-budget-probe', + version: 0, + type: 'LOG_PROBE', + where: { typeName: 'TestClass', methodName: 'zeroBudget' }, + template: 'Test', + captureSnapshot: true, + capture: { maxReferenceDepth: 1 }, + sampling: { snapshotsPerSecond: 5000 }, + evaluateAt: 'ENTRY', + } + addProbe(probe) + + const probes = getProbes('TestClass;zeroBudget')! + onEntry(probes, {}, {}) + onReturn(probes, null, {}, {}, {}) + + expect(mockBatchAdd).not.toHaveBeenCalled() + expect(getProbes('TestClass;zeroBudget')).toBeUndefined() + }) + + it('should still process sibling probes when one is removed mid-iteration', () => { + // Use distinct snapshot/non-snapshot lifetime caps so probeA hits its cap after + // one event while probeB still has plenty of budget. On the second invocation, + // probeA's pre-call budget check fails and it gets removed from the shared + // probes array. This exposes the array mutation hazard: removing probeA + // mid-iteration must not cause probeB to be skipped. + initTransport({ maxSnapshotsPerProbeLifetime: 1, maxNonSnapshotsPerProbeLifetime: 1000 }) + + // Disable per-probe rate limiting on both probes so the second invocation + // exercises the lifetime cap rather than the per-second cap. + const probeA: Probe = { + id: 'sibling-probe-a', + version: 0, + type: 'LOG_PROBE', + where: { typeName: 'TestClass', methodName: 'sibling' }, + template: 'A', + captureSnapshot: true, + capture: { maxReferenceDepth: 1 }, + sampling: { snapshotsPerSecond: Infinity }, + evaluateAt: 'ENTRY', + } + const probeB: Probe = { + id: 'sibling-probe-b', + version: 0, + type: 'LOG_PROBE', + where: { typeName: 'TestClass', methodName: 'sibling' }, + template: 'B', + captureSnapshot: false, + capture: {}, + sampling: { snapshotsPerSecond: Infinity }, + evaluateAt: 'ENTRY', + } + addProbe(probeA) + addProbe(probeB) + + // First invocation: both probes emit one event. probeA hits its cap (eventsSent=1, + // max=1) but is not removed yet — the pre-call budget check still passed. + const probes = getProbes('TestClass;sibling')! + onEntry(probes, {}, {}) + onReturn(probes, null, {}, {}, {}) + expect(mockBatchAdd).toHaveBeenCalledTimes(2) + + // Second invocation: probeA's pre-call check now fails and it is queued for + // removal. probeB must still be processed in the same iteration even though + // probeA gets spliced out of the probes array. + mockBatchAdd.calls.reset() + const probesAfterFirst = getProbes('TestClass;sibling')! + onEntry(probesAfterFirst, {}, {}) + onReturn(probesAfterFirst, null, {}, {}, {}) + expect(mockBatchAdd).toHaveBeenCalledTimes(1) + expect(getProbes('TestClass;sibling')).toEqual([jasmine.objectContaining({ id: 'sibling-probe-b' })]) + + // probeB's stack entry must not leak: a third onReturn without onEntry is a no-op. + mockBatchAdd.calls.reset() + const remainingProbes = getProbes('TestClass;sibling')! + onReturn(remainingProbes, null, {}, {}, {}) + expect(mockBatchAdd).not.toHaveBeenCalled() + }) + }) + describe('active entries cleanup', () => { function createProbe(id: string, methodName: string): Probe { return { diff --git a/packages/debugger/src/domain/api.ts b/packages/debugger/src/domain/api.ts index cb5ad3f107..08e82733d8 100644 --- a/packages/debugger/src/domain/api.ts +++ b/packages/debugger/src/domain/api.ts @@ -5,7 +5,13 @@ import type { BrowserWindow, DebuggerInitConfiguration } from '../entries/main' import { capture, captureFields } from './capture' import type { CaptureContext } from './capture' import type { InitializedProbe } from './probes' -import { checkGlobalSnapshotBudget, resetProbeBudgetConfiguration, setProbeBudgetConfiguration } from './probes' +import { + checkGlobalSnapshotBudget, + hasProbeLifetimeBudgetRemaining, + removeProbe, + resetProbeBudgetConfiguration, + setProbeBudgetConfiguration, +} from './probes' import type { ActiveEntry } from './activeEntries' import { active } from './activeEntries' import { captureStackTrace, parseStackTrace } from './stacktrace' @@ -50,6 +56,10 @@ export function onEntry(probes: InitializedProbe[], self: any, args: Record): void { const end = performance.now() const captureCtx: CaptureContext = { deadline: performance.now() + SNAPSHOT_TIMEOUT_MS, timedOut: false } + let exhaustedProbeIds: string[] | undefined // TODO: A lot of repeated work performed for each probe that could be shared between probes for (const probe of probes) { + if (!hasProbeLifetimeBudgetRemaining(probe)) { + ;(exhaustedProbeIds ??= []).push(probe.id) + continue + } + const stack = active.get(probe.id) // TODO: Should we use the functionId instead? if (!stack) { continue // TODO: This shouldn't be possible, do we need it? Should we warn? @@ -252,17 +280,23 @@ export function onThrow(probes: InitializedProbe[], error: Error, self: any, arg }, } - sendDebuggerSnapshot(probe, result) + queueDebuggerSnapshot(probe, result) + } + + if (exhaustedProbeIds) { + for (const id of exhaustedProbeIds) { + removeProbe(id) + } } } /** - * Send a debugger snapshot to Datadog via the debugger's own transport. + * Queue a debugger snapshot for delivery via the debugger's own transport. * * @param probe - The probe that was executed * @param result - The result of the probe execution */ -function sendDebuggerSnapshot(probe: InitializedProbe, result: ActiveEntry): void { +function queueDebuggerSnapshot(probe: InitializedProbe, result: ActiveEntry): void { if (!debuggerBatch || !debuggerConfig) { display.warn('Debugger transport is not initialized. Make sure DD_DEBUGGER.init() has been called.') return @@ -310,6 +344,7 @@ function sendDebuggerSnapshot(probe: InitializedProbe, result: ActiveEntry): voi } debuggerBatch.add(payload) + probe.eventsSentInLifetime++ } function getDebuggerDDtags(debuggerVersion: string): string { diff --git a/packages/debugger/src/domain/probes.ts b/packages/debugger/src/domain/probes.ts index 99dad81228..feb9da42c1 100644 --- a/packages/debugger/src/domain/probes.ts +++ b/packages/debugger/src/domain/probes.ts @@ -11,6 +11,8 @@ import type { CaptureOptions } from './capture' const DEFAULT_MAX_SNAPSHOTS_PER_SECOND_GLOBALLY = 25 const DEFAULT_MAX_SNAPSHOTS_PER_SECOND_PER_PROBE = 1 const DEFAULT_MAX_NON_SNAPSHOTS_PER_SECOND_PER_PROBE = 5000 +const DEFAULT_MAX_SNAPSHOTS_PER_PROBE_LIFETIME = 1000 +const DEFAULT_MAX_NON_SNAPSHOTS_PER_PROBE_LIFETIME = 50000 // Global snapshot rate limiting let globalSnapshotSamplingRateWindowStart = 0 @@ -36,6 +38,8 @@ export interface ProbeBudgetConfiguration { maxSnapshotsPerSecondGlobally?: number maxSnapshotsPerSecondPerProbe?: number maxNonSnapshotsPerSecondPerProbe?: number + maxSnapshotsPerProbeLifetime?: number + maxNonSnapshotsPerProbeLifetime?: number } export interface Probe { @@ -63,6 +67,8 @@ export interface InitializedProbe extends Probe { condition?: CompiledCondition msBetweenSampling: number lastCaptureMs: number + eventsSentInLifetime: number + lifetimeBudgetWarningEmitted: boolean } // Pre-populate with a placeholder key to help V8 optimize property lookups. @@ -80,6 +86,8 @@ let currentProbeBudgetConfiguration: Required = { maxSnapshotsPerSecondGlobally: DEFAULT_MAX_SNAPSHOTS_PER_SECOND_GLOBALLY, maxSnapshotsPerSecondPerProbe: DEFAULT_MAX_SNAPSHOTS_PER_SECOND_PER_PROBE, maxNonSnapshotsPerSecondPerProbe: DEFAULT_MAX_NON_SNAPSHOTS_PER_SECOND_PER_PROBE, + maxSnapshotsPerProbeLifetime: DEFAULT_MAX_SNAPSHOTS_PER_PROBE_LIFETIME, + maxNonSnapshotsPerProbeLifetime: DEFAULT_MAX_NON_SNAPSHOTS_PER_PROBE_LIFETIME, } export function setProbeBudgetConfiguration(configuration: ProbeBudgetConfiguration = {}): void { @@ -96,6 +104,14 @@ export function setProbeBudgetConfiguration(configuration: ProbeBudgetConfigurat configuration.maxNonSnapshotsPerSecondPerProbe, DEFAULT_MAX_NON_SNAPSHOTS_PER_SECOND_PER_PROBE ), + maxSnapshotsPerProbeLifetime: normalizeProbeLifetimeLimit( + configuration.maxSnapshotsPerProbeLifetime, + DEFAULT_MAX_SNAPSHOTS_PER_PROBE_LIFETIME + ), + maxNonSnapshotsPerProbeLifetime: normalizeProbeLifetimeLimit( + configuration.maxNonSnapshotsPerProbeLifetime, + DEFAULT_MAX_NON_SNAPSHOTS_PER_PROBE_LIFETIME + ), } } @@ -169,8 +185,12 @@ export function removeProbe(id: string): void { } probes.splice(i, 1) // TODO: Gracefully drain in-flight entries instead of clearing them immediately. - // Deleting a probe can currently race with return/throw handling, whether removal - // comes from delivery updates or budget-based auto-unregistering. + // Deleting a probe can currently race with reentrant onEntry/onReturn invocations + // for the same probe — both delivery-driven removal and budget-based auto-unregistering + // can wipe the active stack while a deeper invocation is still in progress. + // A common concrete trigger is instrumenting a recursive function: if the inner + // frame is the one that exhausts the lifetime budget, the outer frame's pending + // entry is dropped and its snapshot is lost. clearActiveEntries(id) break } @@ -243,6 +263,26 @@ export function checkGlobalSnapshotBudget(now: number, captureSnapshot: boolean) return true } +export function hasProbeLifetimeBudgetRemaining(probe: InitializedProbe): boolean { + if (isProbeLifetimeBudgetExhausted(probe)) { + if (!probe.lifetimeBudgetWarningEmitted) { + probe.lifetimeBudgetWarningEmitted = true + display.warn( + `Debugger: Probe ${probe.id} version ${probe.version} reached max ${ + probe.captureSnapshot ? 'snapshot' : 'non-snapshot' + } events per lifetime: ${getMaxProbeLifetimeEvents(probe)}` + ) + } + return false + } + + return true +} + +export function isProbeLifetimeBudgetExhausted(probe: InitializedProbe): boolean { + return probe.eventsSentInLifetime >= getMaxProbeLifetimeEvents(probe) +} + /** * Initialize a probe by preprocessing template segments, conditions, and sampling * @@ -306,8 +346,20 @@ export function initializeProbe(probe: Probe): asserts probe is InitializedProbe : currentProbeBudgetConfiguration.maxNonSnapshotsPerSecondPerProbe) ;(probe as InitializedProbe).msBetweenSampling = (1 / snapshotsPerSecond) * 1000 // Convert to milliseconds ;(probe as InitializedProbe).lastCaptureMs = -Infinity // Initialize to -Infinity to allow first call + ;(probe as InitializedProbe).eventsSentInLifetime = 0 + ;(probe as InitializedProbe).lifetimeBudgetWarningEmitted = false +} + +function normalizeProbeLifetimeLimit(limit: number | undefined, defaultLimit: number): number { + return typeof limit === 'number' && Number.isFinite(limit) && limit >= 0 ? limit : defaultLimit } function normalizeProbeBudgetRate(rate: number | undefined, defaultRate: number): number { return typeof rate === 'number' && Number.isFinite(rate) && rate > 0 ? rate : defaultRate } + +function getMaxProbeLifetimeEvents(probe: InitializedProbe): number { + return probe.captureSnapshot + ? currentProbeBudgetConfiguration.maxSnapshotsPerProbeLifetime + : currentProbeBudgetConfiguration.maxNonSnapshotsPerProbeLifetime +} diff --git a/packages/debugger/src/entries/main.ts b/packages/debugger/src/entries/main.ts index dac1e80de1..7cd87f9951 100644 --- a/packages/debugger/src/entries/main.ts +++ b/packages/debugger/src/entries/main.ts @@ -92,6 +92,22 @@ export interface DebuggerInitConfiguration { */ maxNonSnapshotsPerSecondPerProbe?: number + /** + * Maximum number of snapshot events a single probe version can send during the page lifetime + * + * @category Data Collection + * @defaultValue 1000 + */ + maxSnapshotsPerProbeLifetime?: number + + /** + * Maximum number of non-snapshot events a single probe version can send during the page lifetime + * + * @category Data Collection + * @defaultValue 50000 + */ + maxNonSnapshotsPerProbeLifetime?: number + /** * A proxy URL for routing SDK requests. When set, delivery API requests are * sent to `{proxy}/api/unstable/debugger/frontend/probes` instead of the