From ab98fc53b70cc11165d3061b0a0027b66c706202 Mon Sep 17 00:00:00 2001
From: Athul <athul@zipstack.com>
Date: Thu, 11 Jun 2026 13:38:44 +0530
Subject: [PATCH 1/2] UN-2771 Include text extraction time in API deployment
 metrics

The structure tool timed indexing but not the text extraction
(LLMWhisperer/X2Text) call, so API responses with include_metrics=True
reported indexing time only. Time dynamic_extraction the same way and
merge it into the result metrics as extraction.time_taken(s).
Bump structure tool to 0.0.102.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
---
 tools/structure/src/config/properties.json |  2 +-
 tools/structure/src/constants.py           |  1 +
 tools/structure/src/main.py                | 14 +++++++++++++-
 3 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/tools/structure/src/config/properties.json b/tools/structure/src/config/properties.json
index c8697e7307..a9d1a6029d 100644
--- a/tools/structure/src/config/properties.json
+++ b/tools/structure/src/config/properties.json
@@ -2,7 +2,7 @@
   "schemaVersion": "0.0.1",
   "displayName": "Structure Tool",
   "functionName": "structure_tool",
-  "toolVersion": "0.0.101",
+  "toolVersion": "0.0.102",
   "description": "This is a template tool which can answer set of input prompts designed in the Prompt Studio",
   "input": {
     "description": "File that needs to be indexed and parsed for answers"
diff --git a/tools/structure/src/constants.py b/tools/structure/src/constants.py
index 8da6a5701a..294fb3015b 100644
--- a/tools/structure/src/constants.py
+++ b/tools/structure/src/constants.py
@@ -77,6 +77,7 @@ class SettingsKeys:
     TOOL = "tool"
     METRICS = "metrics"
     INDEXING = "indexing"
+    EXTRACTION = "extraction"
     EXECUTION_ID = "execution_id"
     IS_DIRECTORY_MODE = "is_directory_mode"
     LLM_PROFILE_ID = "llm_profile_id"
diff --git a/tools/structure/src/main.py b/tools/structure/src/main.py
index f68143a6c8..fa8ec39530 100644
--- a/tools/structure/src/main.py
+++ b/tools/structure/src/main.py
@@ -318,6 +318,7 @@ def run(
         )
 
         extracted_text = ""
+        extraction_metrics = {}
         usage_kwargs: dict[Any, Any] = dict()
         if skip_extraction_and_indexing:
             self.stream_log(
@@ -328,6 +329,7 @@ def run(
             usage_kwargs[UsageKwargs.RUN_ID] = self.file_execution_id
             usage_kwargs[UsageKwargs.FILE_NAME] = self.source_file_name
             usage_kwargs[UsageKwargs.EXECUTION_ID] = self.execution_id
+            extraction_start_time = datetime.datetime.now()
             extracted_text = STHelper.dynamic_extraction(
                 file_path=input_file,
                 enable_highlight=is_highlight_enabled,
@@ -338,6 +340,13 @@ def run(
                 tool=self,
                 execution_run_data_folder=str(execution_run_data_folder),
             )
+            extraction_metrics = {
+                SettingsKeys.EXTRACTION: {
+                    "time_taken(s)": STHelper.elapsed_time(
+                        start_time=extraction_start_time
+                    )
+                }
+            }
 
         index_metrics = {}
         if is_summarization_enabled:
@@ -458,7 +467,10 @@ def run(
                 "No text is extracted from the document to add to the metadata"
             )
         if merged_metrics := self._merge_metrics(
-            structured_output.get(SettingsKeys.METRICS, {}), index_metrics
+            self._merge_metrics(
+                structured_output.get(SettingsKeys.METRICS, {}), index_metrics
+            ),
+            extraction_metrics,
         ):
             structured_output[SettingsKeys.METRICS] = merged_metrics
         # Update GUI

From ec778bc95c32e6dbff2ec9581a76600414e7ec42 Mon Sep 17 00:00:00 2001
From: Athul <athul@zipstack.com>
Date: Thu, 11 Jun 2026 16:22:22 +0530
Subject: [PATCH 2/2] UN-2771 Rework: capture extraction time in the worker
 pipeline instead
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Per review, the structure tool's Docker path is deprecated — the live
flow is the celery-based LegacyExecutor structure pipeline. Time the
extract step there and merge {'extraction': {'time_taken(s)': ...}}
into the result metrics alongside the existing per-output indexing
timing. Structure tool changes reverted (no tool version bump needed).

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
---
 tools/structure/src/config/properties.json    |  2 +-
 tools/structure/src/constants.py              |  1 -
 tools/structure/src/main.py                   | 14 +-------------
 workers/executor/executors/legacy_executor.py | 14 ++++++++++++--
 4 files changed, 14 insertions(+), 17 deletions(-)

diff --git a/tools/structure/src/config/properties.json b/tools/structure/src/config/properties.json
index a9d1a6029d..c8697e7307 100644
--- a/tools/structure/src/config/properties.json
+++ b/tools/structure/src/config/properties.json
@@ -2,7 +2,7 @@
   "schemaVersion": "0.0.1",
   "displayName": "Structure Tool",
   "functionName": "structure_tool",
-  "toolVersion": "0.0.102",
+  "toolVersion": "0.0.101",
   "description": "This is a template tool which can answer set of input prompts designed in the Prompt Studio",
   "input": {
     "description": "File that needs to be indexed and parsed for answers"
diff --git a/tools/structure/src/constants.py b/tools/structure/src/constants.py
index 294fb3015b..8da6a5701a 100644
--- a/tools/structure/src/constants.py
+++ b/tools/structure/src/constants.py
@@ -77,7 +77,6 @@ class SettingsKeys:
     TOOL = "tool"
     METRICS = "metrics"
     INDEXING = "indexing"
-    EXTRACTION = "extraction"
     EXECUTION_ID = "execution_id"
     IS_DIRECTORY_MODE = "is_directory_mode"
     LLM_PROFILE_ID = "llm_profile_id"
diff --git a/tools/structure/src/main.py b/tools/structure/src/main.py
index fa8ec39530..f68143a6c8 100644
--- a/tools/structure/src/main.py
+++ b/tools/structure/src/main.py
@@ -318,7 +318,6 @@ def run(
         )
 
         extracted_text = ""
-        extraction_metrics = {}
         usage_kwargs: dict[Any, Any] = dict()
         if skip_extraction_and_indexing:
             self.stream_log(
@@ -329,7 +328,6 @@ def run(
             usage_kwargs[UsageKwargs.RUN_ID] = self.file_execution_id
             usage_kwargs[UsageKwargs.FILE_NAME] = self.source_file_name
             usage_kwargs[UsageKwargs.EXECUTION_ID] = self.execution_id
-            extraction_start_time = datetime.datetime.now()
             extracted_text = STHelper.dynamic_extraction(
                 file_path=input_file,
                 enable_highlight=is_highlight_enabled,
@@ -340,13 +338,6 @@ def run(
                 tool=self,
                 execution_run_data_folder=str(execution_run_data_folder),
             )
-            extraction_metrics = {
-                SettingsKeys.EXTRACTION: {
-                    "time_taken(s)": STHelper.elapsed_time(
-                        start_time=extraction_start_time
-                    )
-                }
-            }
 
         index_metrics = {}
         if is_summarization_enabled:
@@ -467,10 +458,7 @@ def run(
                 "No text is extracted from the document to add to the metadata"
             )
         if merged_metrics := self._merge_metrics(
-            self._merge_metrics(
-                structured_output.get(SettingsKeys.METRICS, {}), index_metrics
-            ),
-            extraction_metrics,
+            structured_output.get(SettingsKeys.METRICS, {}), index_metrics
         ):
             structured_output[SettingsKeys.METRICS] = merged_metrics
         # Update GUI
diff --git a/workers/executor/executors/legacy_executor.py b/workers/executor/executors/legacy_executor.py
index eae4d05b2f..db89d4b98d 100644
--- a/workers/executor/executors/legacy_executor.py
+++ b/workers/executor/executors/legacy_executor.py
@@ -619,10 +619,12 @@ def _failure(child_result: ExecutionResult) -> ExecutionResult:
         )
         step = 1
 
+        extraction_metrics: dict = {}
         try:
             # ---- Step 1: Extract ----
             if not skip_extraction:
                 step += 1
+                extraction_start = time.monotonic()
                 extract_ctx = ExecutionContext(
                     executor_name=context.executor_name,
                     operation=Operation.EXTRACT.value,
@@ -640,6 +642,9 @@ def _failure(child_result: ExecutionResult) -> ExecutionResult:
                     return _failure(extract_result)
                 _absorb(extract_result)
                 extracted_text = extract_result.data.get(IKeys.EXTRACTED_TEXT, "")
+                extraction_metrics = {
+                    "extraction": {"time_taken(s)": time.monotonic() - extraction_start}
+                }
 
             # ---- Step 2: Summarize (if enabled) ----
             if is_summarization:
@@ -700,6 +705,7 @@ def _failure(child_result: ExecutionResult) -> ExecutionResult:
             source_file_name=source_file_name,
             extracted_text=extracted_text,
             index_metrics=index_metrics,
+            extraction_metrics=extraction_metrics,
         )
 
         output_map = structured_output.get(PSKeys.OUTPUT, {}) or {}
@@ -787,6 +793,7 @@ def _finalize_pipeline_result(
         source_file_name: str,
         extracted_text: str,
         index_metrics: dict,
+        extraction_metrics: dict | None = None,
     ) -> None:
         """Populate metadata/metrics in structured_output after pipeline completion."""
         if "metadata" not in structured_output:
@@ -794,10 +801,13 @@ def _finalize_pipeline_result(
         structured_output["metadata"]["file_name"] = source_file_name
         if extracted_text:
             structured_output["metadata"]["extracted_text"] = extracted_text
-        if index_metrics:
+        new_metrics = self._merge_pipeline_metrics(
+            index_metrics or {}, extraction_metrics or {}
+        )
+        if new_metrics:
             existing_metrics = structured_output.get("metrics", {})
             structured_output["metrics"] = self._merge_pipeline_metrics(
-                existing_metrics, index_metrics
+                existing_metrics, new_metrics
             )
 
     def _run_pipeline_summarize(