Skip to content
Draft
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 39 additions & 0 deletions CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,45 @@ make mypy # Run type checking
make clean # Delete all test-created resources (uses USER env var)
```

### Accessing Jaeger Tracing

**Local Setup:** Jaeger is deployed automatically and configured for both control and data plane tracing.

1. **Control plane tracing** (kuadrant-operator):
- OTEL env vars automatically configured when `INSTALL_TRACING=true` (default)
- Operator sends traces to `jaeger-collector.tools.svc.cluster.local:4318`
- Trace reconciliation loops, policy processing, and webhook calls

2. **Data plane tracing** (gateway/envoy):
- Configured in Kuadrant CR: `spec.observability.tracing.defaultEndpoint`
- Gateway sends request traces to same Jaeger collector
- Trace HTTP requests, rate limit checks, and auth decisions

3. **Access Jaeger UI:**
```bash
kubectl port-forward -n tools svc/jaeger-query 16686:80
# Open http://localhost:16686
```

4. **Run tracing tests:**
```bash
# Control plane tracing tests (40 tests)
make testsuite/tests/singlecluster/tracing/control_plane/

# Data plane tracing tests (10 tests)
make testsuite/tests/singlecluster/tracing/data_plane_tracing/
```

**Disable tracing:**
```bash
INSTALL_TRACING=false make local-setup
```

**View traces:**
- Service: `kuadrant-operator` for control plane traces
- Service: Gateway name for data plane traces
- Filter by operation, tags, or duration

## Pull Request Guidelines

For PR title format and commit conventions, see `.claude/commands/pr-description.md` or use the `/pr-description` command.
Expand Down
4 changes: 1 addition & 3 deletions config/settings.local.yaml.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,7 @@
# url: "MOCKSERVER_URL"
# image: "MOCKSERVER_IMAGE" # Image to be used for self-deployed Mockserver
# tracing:
# backend: "jaeger" # Tracing backend
# collector_url: "rpc://jaeger-collector.com:4317" # Tracing collector URL (may be internal)
# query_url: "http://jaeger-query.com" # Tracing query URL
# query_url: "http://jaeger-query.tools.svc.cluster.local:80" # Query UI endpoint (internal DNS or LoadBalancer IP)
# cfssl: "cfssl" # Path to the CFSSL library for TLS tests
# service_protection:
# system_project: "kuadrant-system" # Namespace where Kuadrant resource resides
Expand Down
20 changes: 20 additions & 0 deletions make/istio.mk
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,23 @@ istio-install: ## Install Istio via SAIL operator
' version: $(ISTIO_VERSION)' \
| kubectl apply -f -
@echo "Istio $(ISTIO_VERSION) installed via SAIL"

.PHONY: configure-istio-tracing
configure-istio-tracing: ## Configure Istio for distributed tracing
@echo "Configuring Istio for tracing with Jaeger..."
@# Patch Istio CR to add tracing extension provider and JSON access logs
@kubectl patch istio default -n istio-system --type=merge -p '{"spec":{"values":{"meshConfig":{"accessLogFile":"/dev/stdout","accessLogEncoding":"JSON","accessLogFormat":"{\"start_time\":\"%START_TIME%\",\"method\":\"%REQ(:METHOD)%\",\"path\":\"%REQ(X-ENVOY-ORIGINAL-PATH?:PATH)%\",\"protocol\":\"%PROTOCOL%\",\"response_code\":\"%RESPONSE_CODE%\",\"response_flags\":\"%RESPONSE_FLAGS%\",\"bytes_received\":\"%BYTES_RECEIVED%\",\"bytes_sent\":\"%BYTES_SENT%\",\"duration\":\"%DURATION%\",\"upstream_service_time\":\"%RESP(X-ENVOY-UPSTREAM-SERVICE-TIME)%\",\"x_forwarded_for\":\"%REQ(X-FORWARDED-FOR)%\",\"user_agent\":\"%REQ(USER-AGENT)%\",\"request_id\":\"%REQ(X-REQUEST-ID)%\",\"authority\":\"%REQ(:AUTHORITY)%\",\"upstream_host\":\"%UPSTREAM_HOST%\",\"upstream_cluster\":\"%UPSTREAM_CLUSTER%\",\"route_name\":\"%ROUTE_NAME%\"}","enableTracing":true,"defaultConfig":{"tracing":{}},"extensionProviders":[{"name":"jaeger-otlp","opentelemetry":{"port":4318,"service":"jaeger-collector.$(TOOLS_NAMESPACE).svc.cluster.local"}}]}}}}'
@# Create Telemetry resource to enable tracing
@printf '%s\n' \
'apiVersion: telemetry.istio.io/v1' \
'kind: Telemetry' \
'metadata:' \
' name: default-telemetry' \
' namespace: istio-system' \
'spec:' \
' tracing:' \
' - providers:' \
' - name: jaeger-otlp' \
' randomSamplingPercentage: 100' \
| kubectl apply -f -
@echo "Istio tracing configured"
18 changes: 18 additions & 0 deletions make/kuadrant.mk
Original file line number Diff line number Diff line change
Expand Up @@ -68,3 +68,21 @@ deploy-kuadrant-cr: ## Deploy Kuadrant CR
| kubectl apply -f -
kubectl wait kuadrant/kuadrant-sample --for=condition=Ready=True -n $(KUADRANT_NAMESPACE) --timeout=$(KUADRANT_CR_TIMEOUT)
@echo "Kuadrant CR ready"

.PHONY: configure-kuadrant-tracing-operator
configure-kuadrant-tracing-operator: ## Configure OTEL env vars on kuadrant-operator (control plane tracing)
@echo "Configuring OTEL environment variables on kuadrant-operator..."
@kubectl set env deployment/kuadrant-operator-controller-manager \
-n $(KUADRANT_NAMESPACE) \
OTEL_EXPORTER_OTLP_ENDPOINT=$(JAEGER_COLLECTOR_ENDPOINT) \
OTEL_EXPORTER_OTLP_INSECURE=true \
LOG_LEVEL=debug
@kubectl rollout status deployment/kuadrant-operator-controller-manager \
-n $(KUADRANT_NAMESPACE) --timeout=$(KUBECTL_TIMEOUT)
@echo "Control plane tracing configured on operator"

.PHONY: configure-kuadrant-tracing-cr
configure-kuadrant-tracing-cr: ## Configure tracing in Kuadrant CR (data plane tracing)
@echo "Configuring observability (tracing + data plane) in Kuadrant CR..."
@kubectl patch kuadrant kuadrant-sample -n $(KUADRANT_NAMESPACE) --type=merge -p '{"spec":{"observability":{"enable":true,"dataPlane":{"defaultLevels":[{"debug":"true"}],"httpHeaderIdentifier":"x-request-id"},"tracing":{"defaultEndpoint":"$(JAEGER_COLLECTOR_ENDPOINT)","insecure":true}}}}'
@echo "Data plane tracing configured in Kuadrant CR"
11 changes: 10 additions & 1 deletion make/local-setup.mk
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,18 @@ local-setup: ## Complete local environment setup (kind cluster + all dependencie
$(MAKE) $(GATEWAYAPI_PROVIDER)-install
$(MAKE) create-test-namespaces
$(MAKE) apply-additional-manifests
$(MAKE) deploy-testsuite-tools
ifeq ($(INSTALL_TRACING),true)
ifeq ($(GATEWAYAPI_PROVIDER),istio)
$(MAKE) configure-istio-tracing
endif
endif
$(MAKE) deploy-kuadrant-operator
$(MAKE) deploy-kuadrant-cr
$(MAKE) deploy-testsuite-tools
ifeq ($(INSTALL_TRACING),true)
$(MAKE) configure-kuadrant-tracing-operator
$(MAKE) configure-kuadrant-tracing-cr
endif
@echo ""
@echo "Local environment setup complete!"
@echo " Cluster: $(KIND_CLUSTER_NAME)"
Expand Down
5 changes: 3 additions & 2 deletions make/tools.mk
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,12 @@

.PHONY: deploy-testsuite-tools
deploy-testsuite-tools: ## Deploy testsuite tools (Keycloak, etc.)
@echo "Deploying testsuite tools..."
kubectl create namespace tools || true
@echo "Deploying testsuite tools to namespace: $(TOOLS_NAMESPACE)"
kubectl create namespace $(TOOLS_NAMESPACE) || true
helm repo add kuadrant-olm https://kuadrant.io/helm-charts-olm --force-update
helm repo update
helm install \
--namespace $(TOOLS_NAMESPACE) \
--set=tools.keycloak.keycloakProvider=deployment \
--debug \
--wait \
Expand Down
7 changes: 7 additions & 0 deletions make/vars.mk
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,13 @@ KUADRANT_OPERATOR_ENV_VARS ?= AUTH_SERVICE_TIMEOUT=1000ms,RATELIMIT_SERVICE_TIME
# Point to a YAML file containing any additional Kubernetes resources
ADDITIONAL_MANIFESTS ?=

# Tools namespace (Jaeger, Keycloak, etc.)
TOOLS_NAMESPACE ?= tools

# Tracing configuration
INSTALL_TRACING ?= true
JAEGER_COLLECTOR_ENDPOINT ?= http://jaeger-collector.$(TOOLS_NAMESPACE).svc.cluster.local:4318

# Timeout configurations (in seconds)
KUBECTL_TIMEOUT ?= 300s
CERT_MANAGER_TIMEOUT ?= 120s
Expand Down
2 changes: 1 addition & 1 deletion testsuite/config/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def __init__(self, name, default, **kwargs) -> None:
),
DefaultValueValidator("tracing.backend", default="jaeger", is_in=["jaeger", "tempo"]),
DefaultValueValidator(
"tracing.collector_url", default=fetch_service("jaeger-collector", protocol="rpc", port=4317)
"tracing.collector_url", default=fetch_service("jaeger-collector", protocol="http", port=4318)
),
DefaultValueValidator("tracing.query_url", default=fetch_service_ip("jaeger-query", protocol="http", port=80)),
Validator(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
target changes, and multi-policy scenarios.
"""

import time

import pytest

from testsuite.gateway.gateway_api.route import HTTPRoute
Expand All @@ -30,21 +32,30 @@ def updated_authorization(authorization, trace_snapshot_before_update): # pylin
"""
Authorization policy after update.
(trace_snapshot_before_update ensures snapshot taken before update)
Returns tuple of (authorization, update_timestamp_micros)
"""
# Capture timestamp right before updating (in microseconds)
update_time = int(time.time() * 1_000_000)

when_post = [Pattern("context.request.http.method", "eq", "POST")]
authorization.authorization.add_opa_policy("opa", "allow { false }", when=when_post)
authorization.wait_for_ready()

return authorization
return authorization, update_time


def test_policy_update_generates_new_reconciliation_trace(updated_authorization, trace_snapshot_before_update, tracing):
"""
Validate that policy updates generate new reconciliation traces
"""
authorization, update_time = updated_authorization
snapshot = trace_snapshot_before_update

updated_traces = tracing.get_traces(service="kuadrant-operator", tags={"policy.name": updated_authorization.name()})
# Query for traces that started after the update timestamp
# The backoff decorator will retry until at least one trace appears
updated_traces = tracing.get_traces(
service="kuadrant-operator", tags={"policy.name": authorization.name()}, start_time=update_time
)

# Find new reconcile spans (spans that weren't in the original snapshot)
new_reconcile_spans = []
Expand All @@ -58,7 +69,7 @@ def test_policy_update_generates_new_reconciliation_trace(updated_authorization,
# Find new policy spans (spans that weren't in the original snapshot)
new_policy_spans = []
for trace in updated_traces:
for span in trace.filter_spans(lambda s: s.get_tag("policy.name") == updated_authorization.name()):
for span in trace.filter_spans(lambda s: s.get_tag("policy.name") == authorization.name()):
if span.span_id not in snapshot["span_ids"]:
new_policy_spans.append(span)

Expand Down Expand Up @@ -214,20 +225,27 @@ def authorization_with_changed_target(
"""
Authorization policy with targetRef changed to second_route.
(trace_snapshot_before_target_change ensures snapshot taken before change)
Returns tuple of (authorization, change_timestamp_micros)
"""
# Capture timestamp right before changing the target (in microseconds)
change_time = int(time.time() * 1_000_000)

authorization.refresh()
authorization.model.spec.targetRef = second_route.reference
authorization.apply()
authorization.wait_for_ready()
return authorization
return authorization, change_time


def test_policy_target_change_traced(authorization_with_changed_target, trace_snapshot_before_target_change, tracing):
"""Validate traces when policy's targetRef changes"""
authorization, change_time = authorization_with_changed_target
snapshot = trace_snapshot_before_target_change

# Query for traces that started after the target change timestamp
# The backoff decorator will retry until at least one trace appears
updated_traces = tracing.get_traces(
service="kuadrant-operator", tags={"policy.name": authorization_with_changed_target.name()}
service="kuadrant-operator", tags={"policy.name": authorization.name()}, start_time=change_time
)

# Find new reconcile spans (spans that weren't in the original snapshot)
Expand Down
Loading
Loading