diff --git a/charts/lfx-v2-auth-service/templates/deployment.yaml b/charts/lfx-v2-auth-service/templates/deployment.yaml index 5b9565f..ef88402 100644 --- a/charts/lfx-v2-auth-service/templates/deployment.yaml +++ b/charts/lfx-v2-auth-service/templates/deployment.yaml @@ -83,6 +83,15 @@ spec: - name: OTEL_TRACES_SAMPLE_RATIO value: {{ $otelTracesSampleRatio | quote }} {{- end }} + {{- $otelTracesSampler := .Values.app.otel.tracesSampler | toString | trim }} + {{- if ne $otelTracesSampler "" }} + - name: OTEL_TRACES_SAMPLER + value: {{ $otelTracesSampler | quote }} + {{- if ne $otelTracesSampleRatio "" }} + - name: OTEL_TRACES_SAMPLER_ARG + value: {{ $otelTracesSampleRatio | quote }} + {{- end }} + {{- end }} {{- $otelMetricsExporter := .Values.app.otel.metricsExporter | toString | trim }} {{- if ne $otelMetricsExporter "" }} - name: OTEL_METRICS_EXPORTER diff --git a/charts/lfx-v2-auth-service/values.yaml b/charts/lfx-v2-auth-service/values.yaml index d494ad8..22cb28b 100644 --- a/charts/lfx-v2-auth-service/values.yaml +++ b/charts/lfx-v2-auth-service/values.yaml @@ -191,6 +191,11 @@ app: # A value of 1.0 means all traces are sampled, 0.5 means 50% are sampled # (default: "1.0") tracesSampleRatio: "1.0" + # tracesSampler specifies the OTEL_TRACES_SAMPLER type. Supported values: + # "always_on", "always_off", "traceidratio", "parentbased_always_on", + # "parentbased_always_off", "parentbased_traceidratio". + # When empty, the application defaults to parentbased_traceidratio. + tracesSampler: "" # metricsExporter specifies the metrics exporter: "otlp" or "none" # (default: "none") metricsExporter: "none" diff --git a/pkg/utils/otel.go b/pkg/utils/otel.go index e949dde..3ad28ec 100644 --- a/pkg/utils/otel.go +++ b/pkg/utils/otel.go @@ -303,6 +303,43 @@ func endpointURL(raw string, insecure bool) string { return "https://" + raw } +// newSampler creates a trace.Sampler from OTEL_TRACES_SAMPLER and +// OTEL_TRACES_SAMPLER_ARG environment variables, falling back to +// parentbased_traceidratio with cfg.TracesSampleRatio when unset. +// This ensures parent span sampling decisions are always honored. +func newSampler(cfg OTelConfig) trace.Sampler { + sampler := os.Getenv("OTEL_TRACES_SAMPLER") + arg := os.Getenv("OTEL_TRACES_SAMPLER_ARG") + + parseRatio := func() float64 { + if arg != "" { + r, err := strconv.ParseFloat(arg, 64) + if err == nil && r >= 0.0 && r <= 1.0 { + return r + } + slog.Warn("invalid OTEL_TRACES_SAMPLER_ARG, using TracesSampleRatio", "value", arg) + } + return cfg.TracesSampleRatio + } + + switch sampler { + case "always_on": + return trace.AlwaysSample() + case "always_off": + return trace.NeverSample() + case "traceidratio": + return trace.TraceIDRatioBased(parseRatio()) + case "parentbased_always_on": + return trace.ParentBased(trace.AlwaysSample()) + case "parentbased_always_off": + return trace.ParentBased(trace.NeverSample()) + case "parentbased_traceidratio": + return trace.ParentBased(trace.TraceIDRatioBased(parseRatio())) + default: // empty/unknown → parent-based with configured ratio + return trace.ParentBased(trace.TraceIDRatioBased(cfg.TracesSampleRatio)) + } +} + // newTraceProvider creates a TracerProvider with an OTLP exporter configured based on the protocol setting. func newTraceProvider(ctx context.Context, cfg OTelConfig, res *resource.Resource) (*trace.TracerProvider, error) { var exporter trace.SpanExporter @@ -334,7 +371,7 @@ func newTraceProvider(ctx context.Context, cfg OTelConfig, res *resource.Resourc traceProvider := trace.NewTracerProvider( trace.WithResource(res), - trace.WithSampler(trace.TraceIDRatioBased(cfg.TracesSampleRatio)), + trace.WithSampler(newSampler(cfg)), trace.WithBatcher(exporter, trace.WithBatchTimeout(time.Second), ), diff --git a/pkg/utils/otel_test.go b/pkg/utils/otel_test.go index 6155ed1..4cfbf38 100644 --- a/pkg/utils/otel_test.go +++ b/pkg/utils/otel_test.go @@ -519,3 +519,49 @@ func TestSetupOTelSDKWithConfig_IPEndpoint(t *testing.T) { _ = shutdown(ctx) } + +// TestNewSampler verifies that newSampler returns a non-nil sampler for all +// supported OTEL_TRACES_SAMPLER values, including the default (empty) case. +func TestNewSampler(t *testing.T) { + cfg := OTelConfig{TracesSampleRatio: 0.5} + + tests := []struct { + name string + sampler string + arg string + }{ + {"default (empty)", "", ""}, + {"always_on", "always_on", ""}, + {"always_off", "always_off", ""}, + {"traceidratio", "traceidratio", "0.5"}, + {"parentbased_always_on", "parentbased_always_on", ""}, + {"parentbased_always_off", "parentbased_always_off", ""}, + {"parentbased_traceidratio", "parentbased_traceidratio", "0.5"}, + {"unknown", "unknown", ""}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Setenv("OTEL_TRACES_SAMPLER", tt.sampler) + t.Setenv("OTEL_TRACES_SAMPLER_ARG", tt.arg) + + s := newSampler(cfg) + if s == nil { + t.Errorf("newSampler(%q) returned nil", tt.sampler) + } + }) + } +} + +// TestNewSampler_InvalidArg verifies that an invalid OTEL_TRACES_SAMPLER_ARG +// falls back to cfg.TracesSampleRatio without panicking. +func TestNewSampler_InvalidArg(t *testing.T) { + cfg := OTelConfig{TracesSampleRatio: 0.5} + t.Setenv("OTEL_TRACES_SAMPLER", "parentbased_traceidratio") + t.Setenv("OTEL_TRACES_SAMPLER_ARG", "invalid") + + s := newSampler(cfg) + if s == nil { + t.Error("newSampler returned nil for invalid OTEL_TRACES_SAMPLER_ARG") + } +}