From ab98fc53b70cc11165d3061b0a0027b66c706202 Mon Sep 17 00:00:00 2001 From: Athul Date: Thu, 11 Jun 2026 13:38:44 +0530 Subject: [PATCH 1/2] UN-2771 Include text extraction time in API deployment metrics The structure tool timed indexing but not the text extraction (LLMWhisperer/X2Text) call, so API responses with include_metrics=True reported indexing time only. Time dynamic_extraction the same way and merge it into the result metrics as extraction.time_taken(s). Bump structure tool to 0.0.102. Co-Authored-By: Claude Fable 5 --- tools/structure/src/config/properties.json | 2 +- tools/structure/src/constants.py | 1 + tools/structure/src/main.py | 14 +++++++++++++- 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/tools/structure/src/config/properties.json b/tools/structure/src/config/properties.json index c8697e7307..a9d1a6029d 100644 --- a/tools/structure/src/config/properties.json +++ b/tools/structure/src/config/properties.json @@ -2,7 +2,7 @@ "schemaVersion": "0.0.1", "displayName": "Structure Tool", "functionName": "structure_tool", - "toolVersion": "0.0.101", + "toolVersion": "0.0.102", "description": "This is a template tool which can answer set of input prompts designed in the Prompt Studio", "input": { "description": "File that needs to be indexed and parsed for answers" diff --git a/tools/structure/src/constants.py b/tools/structure/src/constants.py index 8da6a5701a..294fb3015b 100644 --- a/tools/structure/src/constants.py +++ b/tools/structure/src/constants.py @@ -77,6 +77,7 @@ class SettingsKeys: TOOL = "tool" METRICS = "metrics" INDEXING = "indexing" + EXTRACTION = "extraction" EXECUTION_ID = "execution_id" IS_DIRECTORY_MODE = "is_directory_mode" LLM_PROFILE_ID = "llm_profile_id" diff --git a/tools/structure/src/main.py b/tools/structure/src/main.py index f68143a6c8..fa8ec39530 100644 --- a/tools/structure/src/main.py +++ b/tools/structure/src/main.py @@ -318,6 +318,7 @@ def run( ) extracted_text = "" + extraction_metrics = {} usage_kwargs: dict[Any, Any] = dict() if skip_extraction_and_indexing: self.stream_log( @@ -328,6 +329,7 @@ def run( usage_kwargs[UsageKwargs.RUN_ID] = self.file_execution_id usage_kwargs[UsageKwargs.FILE_NAME] = self.source_file_name usage_kwargs[UsageKwargs.EXECUTION_ID] = self.execution_id + extraction_start_time = datetime.datetime.now() extracted_text = STHelper.dynamic_extraction( file_path=input_file, enable_highlight=is_highlight_enabled, @@ -338,6 +340,13 @@ def run( tool=self, execution_run_data_folder=str(execution_run_data_folder), ) + extraction_metrics = { + SettingsKeys.EXTRACTION: { + "time_taken(s)": STHelper.elapsed_time( + start_time=extraction_start_time + ) + } + } index_metrics = {} if is_summarization_enabled: @@ -458,7 +467,10 @@ def run( "No text is extracted from the document to add to the metadata" ) if merged_metrics := self._merge_metrics( - structured_output.get(SettingsKeys.METRICS, {}), index_metrics + self._merge_metrics( + structured_output.get(SettingsKeys.METRICS, {}), index_metrics + ), + extraction_metrics, ): structured_output[SettingsKeys.METRICS] = merged_metrics # Update GUI From ec778bc95c32e6dbff2ec9581a76600414e7ec42 Mon Sep 17 00:00:00 2001 From: Athul Date: Thu, 11 Jun 2026 16:22:22 +0530 Subject: [PATCH 2/2] UN-2771 Rework: capture extraction time in the worker pipeline instead MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per review, the structure tool's Docker path is deprecated — the live flow is the celery-based LegacyExecutor structure pipeline. Time the extract step there and merge {'extraction': {'time_taken(s)': ...}} into the result metrics alongside the existing per-output indexing timing. Structure tool changes reverted (no tool version bump needed). Co-Authored-By: Claude Fable 5 --- tools/structure/src/config/properties.json | 2 +- tools/structure/src/constants.py | 1 - tools/structure/src/main.py | 14 +------------- workers/executor/executors/legacy_executor.py | 14 ++++++++++++-- 4 files changed, 14 insertions(+), 17 deletions(-) diff --git a/tools/structure/src/config/properties.json b/tools/structure/src/config/properties.json index a9d1a6029d..c8697e7307 100644 --- a/tools/structure/src/config/properties.json +++ b/tools/structure/src/config/properties.json @@ -2,7 +2,7 @@ "schemaVersion": "0.0.1", "displayName": "Structure Tool", "functionName": "structure_tool", - "toolVersion": "0.0.102", + "toolVersion": "0.0.101", "description": "This is a template tool which can answer set of input prompts designed in the Prompt Studio", "input": { "description": "File that needs to be indexed and parsed for answers" diff --git a/tools/structure/src/constants.py b/tools/structure/src/constants.py index 294fb3015b..8da6a5701a 100644 --- a/tools/structure/src/constants.py +++ b/tools/structure/src/constants.py @@ -77,7 +77,6 @@ class SettingsKeys: TOOL = "tool" METRICS = "metrics" INDEXING = "indexing" - EXTRACTION = "extraction" EXECUTION_ID = "execution_id" IS_DIRECTORY_MODE = "is_directory_mode" LLM_PROFILE_ID = "llm_profile_id" diff --git a/tools/structure/src/main.py b/tools/structure/src/main.py index fa8ec39530..f68143a6c8 100644 --- a/tools/structure/src/main.py +++ b/tools/structure/src/main.py @@ -318,7 +318,6 @@ def run( ) extracted_text = "" - extraction_metrics = {} usage_kwargs: dict[Any, Any] = dict() if skip_extraction_and_indexing: self.stream_log( @@ -329,7 +328,6 @@ def run( usage_kwargs[UsageKwargs.RUN_ID] = self.file_execution_id usage_kwargs[UsageKwargs.FILE_NAME] = self.source_file_name usage_kwargs[UsageKwargs.EXECUTION_ID] = self.execution_id - extraction_start_time = datetime.datetime.now() extracted_text = STHelper.dynamic_extraction( file_path=input_file, enable_highlight=is_highlight_enabled, @@ -340,13 +338,6 @@ def run( tool=self, execution_run_data_folder=str(execution_run_data_folder), ) - extraction_metrics = { - SettingsKeys.EXTRACTION: { - "time_taken(s)": STHelper.elapsed_time( - start_time=extraction_start_time - ) - } - } index_metrics = {} if is_summarization_enabled: @@ -467,10 +458,7 @@ def run( "No text is extracted from the document to add to the metadata" ) if merged_metrics := self._merge_metrics( - self._merge_metrics( - structured_output.get(SettingsKeys.METRICS, {}), index_metrics - ), - extraction_metrics, + structured_output.get(SettingsKeys.METRICS, {}), index_metrics ): structured_output[SettingsKeys.METRICS] = merged_metrics # Update GUI diff --git a/workers/executor/executors/legacy_executor.py b/workers/executor/executors/legacy_executor.py index eae4d05b2f..db89d4b98d 100644 --- a/workers/executor/executors/legacy_executor.py +++ b/workers/executor/executors/legacy_executor.py @@ -619,10 +619,12 @@ def _failure(child_result: ExecutionResult) -> ExecutionResult: ) step = 1 + extraction_metrics: dict = {} try: # ---- Step 1: Extract ---- if not skip_extraction: step += 1 + extraction_start = time.monotonic() extract_ctx = ExecutionContext( executor_name=context.executor_name, operation=Operation.EXTRACT.value, @@ -640,6 +642,9 @@ def _failure(child_result: ExecutionResult) -> ExecutionResult: return _failure(extract_result) _absorb(extract_result) extracted_text = extract_result.data.get(IKeys.EXTRACTED_TEXT, "") + extraction_metrics = { + "extraction": {"time_taken(s)": time.monotonic() - extraction_start} + } # ---- Step 2: Summarize (if enabled) ---- if is_summarization: @@ -700,6 +705,7 @@ def _failure(child_result: ExecutionResult) -> ExecutionResult: source_file_name=source_file_name, extracted_text=extracted_text, index_metrics=index_metrics, + extraction_metrics=extraction_metrics, ) output_map = structured_output.get(PSKeys.OUTPUT, {}) or {} @@ -787,6 +793,7 @@ def _finalize_pipeline_result( source_file_name: str, extracted_text: str, index_metrics: dict, + extraction_metrics: dict | None = None, ) -> None: """Populate metadata/metrics in structured_output after pipeline completion.""" if "metadata" not in structured_output: @@ -794,10 +801,13 @@ def _finalize_pipeline_result( structured_output["metadata"]["file_name"] = source_file_name if extracted_text: structured_output["metadata"]["extracted_text"] = extracted_text - if index_metrics: + new_metrics = self._merge_pipeline_metrics( + index_metrics or {}, extraction_metrics or {} + ) + if new_metrics: existing_metrics = structured_output.get("metrics", {}) structured_output["metrics"] = self._merge_pipeline_metrics( - existing_metrics, index_metrics + existing_metrics, new_metrics ) def _run_pipeline_summarize(