Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions charts/lfx-v2-query-service/templates/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,15 @@ spec:
- name: OTEL_TRACES_SAMPLE_RATIO
value: {{ $otelTracesSampleRatio | quote }}
{{- end }}
{{- $otelTracesSampler := .Values.app.otel.tracesSampler | toString | trim }}
{{- if ne $otelTracesSampler "" }}
- name: OTEL_TRACES_SAMPLER
value: {{ $otelTracesSampler | quote }}
{{- if ne $otelTracesSampleRatio "" }}
- name: OTEL_TRACES_SAMPLER_ARG
value: {{ $otelTracesSampleRatio | quote }}
{{- end }}
{{- end }}
{{- $otelMetricsExporter := .Values.app.otel.metricsExporter | toString | trim }}
{{- if ne $otelMetricsExporter "" }}
- name: OTEL_METRICS_EXPORTER
Expand Down
1 change: 1 addition & 0 deletions charts/lfx-v2-query-service/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ app:
insecure: "false"
tracesExporter: "none"
tracesSampleRatio: "1.0"
tracesSampler: ""
metricsExporter: "none"
logsExporter: "none"
propagators: "tracecontext,baggage,jaeger"
Expand Down
39 changes: 38 additions & 1 deletion pkg/utils/otel.go
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,43 @@ func endpointURL(raw string, insecure bool) string {
return "https://" + raw
}

// newSampler creates a trace.Sampler from OTEL_TRACES_SAMPLER and
// OTEL_TRACES_SAMPLER_ARG environment variables, falling back to
// parentbased_traceidratio with cfg.TracesSampleRatio when unset.
// This ensures parent span sampling decisions are always honored.
func newSampler(cfg OTelConfig) trace.Sampler {
sampler := os.Getenv("OTEL_TRACES_SAMPLER")
arg := os.Getenv("OTEL_TRACES_SAMPLER_ARG")

parseRatio := func() float64 {
if arg != "" {
r, err := strconv.ParseFloat(arg, 64)
if err == nil && r >= 0.0 && r <= 1.0 {
return r
}
slog.Warn("invalid OTEL_TRACES_SAMPLER_ARG, using TracesSampleRatio", "value", arg)
}
return cfg.TracesSampleRatio
}

switch sampler {
case "always_on":
return trace.AlwaysSample()
case "always_off":
return trace.NeverSample()
case "traceidratio":
return trace.TraceIDRatioBased(parseRatio())
case "parentbased_always_on":
return trace.ParentBased(trace.AlwaysSample())
case "parentbased_always_off":
return trace.ParentBased(trace.NeverSample())
case "parentbased_traceidratio":
return trace.ParentBased(trace.TraceIDRatioBased(parseRatio()))
default: // empty/unknown → parent-based with configured ratio
return trace.ParentBased(trace.TraceIDRatioBased(cfg.TracesSampleRatio))
}
Comment on lines +333 to +335
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor | ⚡ Quick win

Warn when OTEL_TRACES_SAMPLER is unknown instead of silently defaulting.

Right now unknown values and empty values share the same fallback path, which can mask misconfiguration in production. Add a warning for non-empty unknown values before falling back.

Suggested change
 	default: // empty/unknown → parent-based with configured ratio
+		if sampler != "" {
+			slog.Warn("unknown OTEL_TRACES_SAMPLER, falling back to parentbased_traceidratio",
+				"value", sampler,
+				"fallback-ratio", cfg.TracesSampleRatio,
+			)
+		}
 		return trace.ParentBased(trace.TraceIDRatioBased(cfg.TracesSampleRatio))
 	}
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
default: // empty/unknown → parent-based with configured ratio
return trace.ParentBased(trace.TraceIDRatioBased(cfg.TracesSampleRatio))
}
default: // empty/unknown → parent-based with configured ratio
if sampler != "" {
slog.Warn("unknown OTEL_TRACES_SAMPLER, falling back to parentbased_traceidratio",
"value", sampler,
"fallback-ratio", cfg.TracesSampleRatio,
)
}
return trace.ParentBased(trace.TraceIDRatioBased(cfg.TracesSampleRatio))
}
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@pkg/utils/otel.go` around lines 333 - 335, The switch's default branch
silently falls back to
trace.ParentBased(trace.TraceIDRatioBased(cfg.TracesSampleRatio)) when
OTEL_TRACES_SAMPLER is empty or unknown; change this so that if the raw
OTEL_TRACES_SAMPLER value is non-empty but not recognized, you emit a warning
via the package's logger before returning the ParentBased(...) fallback. Locate
the code that reads/handles OTEL_TRACES_SAMPLER (the switch that now returns
trace.ParentBased(trace.TraceIDRatioBased(cfg.TracesSampleRatio))) and add a
conditional to check the original sampler string: if it's non-empty and
unmatched, call the existing logger.Warn/Warnf with a concise message including
the unrecognized value, then proceed to return the existing ParentBased
fallback.

Comment on lines +301 to +335
}

// newTraceProvider creates a TracerProvider with an OTLP exporter configured based on the protocol setting.
func newTraceProvider(ctx context.Context, cfg OTelConfig, res *resource.Resource) (*trace.TracerProvider, error) {
var exporter trace.SpanExporter
Expand Down Expand Up @@ -329,7 +366,7 @@ func newTraceProvider(ctx context.Context, cfg OTelConfig, res *resource.Resourc

traceProvider := trace.NewTracerProvider(
trace.WithResource(res),
trace.WithSampler(trace.TraceIDRatioBased(cfg.TracesSampleRatio)),
trace.WithSampler(newSampler(cfg)),
trace.WithBatcher(exporter,
trace.WithBatchTimeout(time.Second),
),
Expand Down
46 changes: 46 additions & 0 deletions pkg/utils/otel_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -321,3 +321,49 @@ func TestSetupOTelSDKWithConfig_IPEndpoint(t *testing.T) {

_ = shutdown(ctx)
}

// TestNewSampler verifies that newSampler returns a non-nil sampler for all
// supported OTEL_TRACES_SAMPLER values, including the default (empty) case.
func TestNewSampler(t *testing.T) {
cfg := OTelConfig{TracesSampleRatio: 0.5}

tests := []struct {
name string
sampler string
arg string
}{
{"default (empty)", "", ""},
{"always_on", "always_on", ""},
{"always_off", "always_off", ""},
{"traceidratio", "traceidratio", "0.5"},
{"parentbased_always_on", "parentbased_always_on", ""},
{"parentbased_always_off", "parentbased_always_off", ""},
{"parentbased_traceidratio", "parentbased_traceidratio", "0.5"},
{"unknown", "unknown", ""},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
t.Setenv("OTEL_TRACES_SAMPLER", tt.sampler)
t.Setenv("OTEL_TRACES_SAMPLER_ARG", tt.arg)

s := newSampler(cfg)
if s == nil {
t.Errorf("newSampler(%q) returned nil", tt.sampler)
}
})
}
}
Comment on lines +325 to +356

// TestNewSampler_InvalidArg verifies that an invalid OTEL_TRACES_SAMPLER_ARG
// falls back to cfg.TracesSampleRatio without panicking.
func TestNewSampler_InvalidArg(t *testing.T) {
cfg := OTelConfig{TracesSampleRatio: 0.5}
t.Setenv("OTEL_TRACES_SAMPLER", "parentbased_traceidratio")
t.Setenv("OTEL_TRACES_SAMPLER_ARG", "invalid")

s := newSampler(cfg)
if s == nil {
t.Error("newSampler returned nil for invalid OTEL_TRACES_SAMPLER_ARG")
}
}
Loading