diff --git a/environment.yml b/environment.yml index c05f1d8e9..f2f4dc85a 100644 --- a/environment.yml +++ b/environment.yml @@ -42,6 +42,11 @@ dependencies: - urllib3 >=1.26 - xarray >=2022.6, <=2024.6 - zarr >=2.11 + # Observability + - opentelemetry-api + - opentelemetry-sdk + - opentelemetry-instrumentation-tornado + - opentelemetry-exporter-otlp-proto-grpc # Testing - flake8 >=3.7 - kerchunk diff --git a/xcube/cli/serve.py b/xcube/cli/serve.py index d0c132058..740ce2cee 100644 --- a/xcube/cli/serve.py +++ b/xcube/cli/serve.py @@ -18,6 +18,7 @@ DEFAULT_SERVER_PORT, DEFAULT_SERVER_ADDRESS, ) +from xcube.webapi.common.telemetry import tracer assets_to_show = ["apis", "endpoints", "openapi", "config", "configschema"] @@ -134,6 +135,7 @@ @cli_option_quiet @cli_option_verbosity @click.argument("paths", metavar="[PATHS...]", nargs=-1) +@tracer.start_as_current_span("serve") def serve( framework_name: str, port: int, diff --git a/xcube/server/webservers/tornado.py b/xcube/server/webservers/tornado.py index 9f143e9e0..9d03cfb01 100644 --- a/xcube/server/webservers/tornado.py +++ b/xcube/server/webservers/tornado.py @@ -15,6 +15,7 @@ import tornado.httputil import tornado.ioloop import tornado.web +from opentelemetry.instrumentation.tornado import TornadoInstrumentor from xcube.constants import LOG from xcube.constants import LOG_LEVEL_DETAIL @@ -39,6 +40,7 @@ from xcube.version import version SERVER_CTX_ATTR_NAME = "__xcube_server_ctx" +TornadoInstrumentor().instrument() class TornadoFramework(Framework): @@ -124,16 +126,18 @@ def add_routes(self, api_routes: Sequence[ApiRoute], url_prefix: str): handlers = [] for api_route in api_routes: - handlers.append(( - url_prefix + self.path_to_pattern(api_route.path) - + ("/?" if api_route.slash else ""), - TornadoRequestHandler, - {"api_route": api_route}, - )) + handlers.append( + ( + url_prefix + + self.path_to_pattern(api_route.path) + + ("/?" if api_route.slash else ""), + TornadoRequestHandler, + {"api_route": api_route}, + ) + ) LOG.log( LOG_LEVEL_DETAIL, - f"Added route {api_route.path!r}" - f" from API {api_route.api_name!r}", + f"Added route {api_route.path!r}" f" from API {api_route.api_name!r}", ) if handlers: diff --git a/xcube/webapi/common/telemetry.py b/xcube/webapi/common/telemetry.py new file mode 100644 index 000000000..b3a174662 --- /dev/null +++ b/xcube/webapi/common/telemetry.py @@ -0,0 +1,32 @@ +import os + +from opentelemetry import trace +from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter +from opentelemetry.sdk.trace import TracerProvider +from opentelemetry.sdk.trace.export import BatchSpanProcessor + + +otel_config = { + "exporter_otlp_endpoint": os.getenv("OTLP_ENDPOINT"), + "exporter_otlp_insecure": True, +} + +otlp_trace_exporter = OTLPSpanExporter( + endpoint=otel_config["exporter_otlp_endpoint"], + insecure=otel_config["exporter_otlp_insecure"], +) + +# Observability Initialization +provider = TracerProvider() +processor = BatchSpanProcessor(otlp_trace_exporter) +provider.add_span_processor(processor) +trace.set_tracer_provider(provider) +tracer = trace.get_tracer("xcube.observability") + + +def set_attributes(*attrs): + span = trace.get_current_span() + combined = {} + for attr in attrs: + combined.update(attr) + span.set_attributes(combined) diff --git a/xcube/webapi/datasets/controllers.py b/xcube/webapi/datasets/controllers.py index becdfadbb..41f736768 100644 --- a/xcube/webapi/datasets/controllers.py +++ b/xcube/webapi/datasets/controllers.py @@ -30,6 +30,7 @@ from .authutil import check_scopes from .context import DatasetConfig from .context import DatasetsContext +from ..common.telemetry import tracer from ..places.controllers import GeoJsonFeatureCollection from ..places.controllers import find_places @@ -494,6 +495,7 @@ def get_dataset_coordinates(ctx: DatasetsContext, ds_id: str, dim_name: str) -> # noinspection PyUnusedLocal +@tracer.start_as_current_span("get_color_bars.test") def get_color_bars(ctx: DatasetsContext, mime_type: str) -> str: cmaps = ctx.colormap_registry.to_json() if mime_type == "application/json": diff --git a/xcube/webapi/tiles/controllers.py b/xcube/webapi/tiles/controllers.py index ffa328446..5f68d5da1 100644 --- a/xcube/webapi/tiles/controllers.py +++ b/xcube/webapi/tiles/controllers.py @@ -5,6 +5,9 @@ from typing import Optional, Dict from collections.abc import Mapping +from opentelemetry.context import attach +from opentelemetry.context import detach + from xcube.constants import LOG from xcube.core.tile import DEFAULT_CRS_NAME from xcube.core.tile import DEFAULT_FORMAT @@ -15,6 +18,7 @@ from xcube.server.api import ApiError from xcube.util.perf import measure_time_cm from .context import TilesContext +from ..common.telemetry import tracer, set_attributes def compute_ml_dataset_tile( @@ -26,16 +30,25 @@ def compute_ml_dataset_tile( y: str, z: str, params: Mapping[str, str], + current_context, ): - params = dict(params) - trace_perf = params.pop("debug", "1" if ctx.datasets_ctx.trace_perf else "0") == "1" - measure_time = measure_time_cm(logger=LOG, disabled=not trace_perf) - with measure_time("Computing RGBA tile"): - return _compute_ml_dataset_tile( - ctx, ds_id, var_name, crs_name, x, y, z, params, trace_perf - ) + token = attach(current_context) + try: + with tracer.start_as_current_span("tiles.compute_ml_dataset_tile"): + params = dict(params) + trace_perf = ( + params.pop("debug", "1" if ctx.datasets_ctx.trace_perf else "0") == "1" + ) + measure_time = measure_time_cm(logger=LOG, disabled=not trace_perf) + with measure_time("Computing RGBA tile"): + return _compute_ml_dataset_tile( + ctx, ds_id, var_name, crs_name, x, y, z, params, trace_perf + ) + finally: + detach(token) +@tracer.start_as_current_span("tiles._compute_ml_dataset_tile.test") def _compute_ml_dataset_tile( ctx: TilesContext, ds_id: str, @@ -47,6 +60,7 @@ def _compute_ml_dataset_tile( args: dict[str, str], trace_perf: bool, ): + set_attributes({"x": x}, {"y": y}, {"z": z}) try: x, y, z = (int(c) for c in (x, y, z)) except ValueError: diff --git a/xcube/webapi/tiles/routes.py b/xcube/webapi/tiles/routes.py index 518c46160..c999c75b6 100644 --- a/xcube/webapi/tiles/routes.py +++ b/xcube/webapi/tiles/routes.py @@ -14,6 +14,8 @@ from ..datasets import QUERY_PARAM_VMAX from ..datasets import QUERY_PARAM_VMIN +import opentelemetry.context as context + PATH_PARAM_X = { "name": "x", "in": "path", @@ -88,6 +90,7 @@ class TilesHandler(ApiHandler[TilesContext]): parameters=TILE_PARAMETERS, ) async def get(self, datasetId: str, varName: str, z: str, y: str, x: str): + current_context = context.get_current() tile = await self.ctx.run_in_executor( None, compute_ml_dataset_tile, @@ -99,6 +102,7 @@ async def get(self, datasetId: str, varName: str, z: str, y: str, x: str): y, z, {k: v[0] for k, v in self.request.query.items()}, + current_context, ) self.response.set_header("Content-Type", "image/png") await self.response.finish(tile)