From 352e6a7dc5b7947c201b9bdba18f4ffcb75315b2 Mon Sep 17 00:00:00 2001 From: Chibi Vikram Date: Thu, 14 May 2026 15:09:35 -0700 Subject: [PATCH 1/2] fix(eval): trim legacy trajectory span history --- .../evaluators/legacy_trajectory_evaluator.py | 7 +-- .../test_legacy_trajectory_evaluator.py | 57 +++++++++++++++++++ 2 files changed, 59 insertions(+), 5 deletions(-) create mode 100644 packages/uipath/tests/evaluators/test_legacy_trajectory_evaluator.py diff --git a/packages/uipath/src/uipath/eval/evaluators/legacy_trajectory_evaluator.py b/packages/uipath/src/uipath/eval/evaluators/legacy_trajectory_evaluator.py index 17b69d0d0..cff0e8788 100644 --- a/packages/uipath/src/uipath/eval/evaluators/legacy_trajectory_evaluator.py +++ b/packages/uipath/src/uipath/eval/evaluators/legacy_trajectory_evaluator.py @@ -12,13 +12,13 @@ from ..._utils.constants import COMMUNITY_agents_SUFFIX from .._execution_context import eval_set_run_id_context +from .._helpers.evaluators_helpers import trace_to_str from .._helpers.helpers import is_empty_value from ..models import EvaluationResult from ..models.models import ( AgentExecution, LLMResponse, NumericEvaluationResult, - TrajectoryEvaluationTrace, UiPathEvaluationError, UiPathEvaluationErrorCategory, ) @@ -140,10 +140,7 @@ def _create_evaluation_prompt( and agent_run_history and isinstance(agent_run_history[0], ReadableSpan) ): - trajectory_trace = TrajectoryEvaluationTrace.from_readable_spans( - agent_run_history - ) - agent_run_history = str(trajectory_trace.spans) + agent_run_history = trace_to_str(agent_run_history) else: agent_run_history = str(agent_run_history) diff --git a/packages/uipath/tests/evaluators/test_legacy_trajectory_evaluator.py b/packages/uipath/tests/evaluators/test_legacy_trajectory_evaluator.py new file mode 100644 index 000000000..f7121d800 --- /dev/null +++ b/packages/uipath/tests/evaluators/test_legacy_trajectory_evaluator.py @@ -0,0 +1,57 @@ +import uuid + +from opentelemetry.sdk.trace import ReadableSpan + +from uipath.eval.evaluators import LegacyTrajectoryEvaluator +from uipath.eval.models.models import LegacyEvaluatorCategory, LegacyEvaluatorType + + +def _legacy_trajectory_evaluator() -> LegacyTrajectoryEvaluator: + return LegacyTrajectoryEvaluator( + id=str(uuid.uuid4()), + name="Legacy trajectory", + category=LegacyEvaluatorCategory.Trajectory, + type=LegacyEvaluatorType.Trajectory, + prompt="History:\n{{AgentRunHistory}}\nExpected:\n{{ExpectedAgentBehavior}}", + createdAt="2026-05-14T00:00:00Z", + updatedAt="2026-05-14T00:00:00Z", + ) + + +def test_legacy_trajectory_prompt_uses_compact_tool_history() -> None: + long_prompt = "SYSTEM_PROMPT_" + ("x" * 10_000) + spans = [ + ReadableSpan( + name="agent_llm_call", + start_time=0, + end_time=1, + attributes={ + "openinference.span.kind": "LLM", + "input.value": f'{{"messages": [{{"role": "system", "content": "{long_prompt}"}}]}}', + "output.value": '{"generations": []}', + }, + ), + ReadableSpan( + name="search_profiles", + start_time=1, + end_time=2, + attributes={ + "openinference.span.kind": "TOOL", + "tool.name": "search_profiles", + "input.value": '{"query": "mentor"}', + "output.value": '{"content": "found mentor profile"}', + "metadata": f'{{"agent_prompt": "{long_prompt}"}}', + }, + ), + ] + + prompt = _legacy_trajectory_evaluator()._create_evaluation_prompt( + expected_agent_behavior="The agent should search matching profiles.", + agent_run_history=spans, + ) + + assert "SYSTEM_PROMPT_" not in prompt + assert "Tool: search_profiles" in prompt + assert '{"query": "mentor"}' in prompt + assert "found mentor profile" in prompt + assert "agent_llm_call" not in prompt From 16a8491f8803eccb4647c314ba14403a9694f1ab Mon Sep 17 00:00:00 2001 From: Chibi Vikramathithan Date: Mon, 18 May 2026 23:17:05 -0700 Subject: [PATCH 2/2] fix(eval): resolve PR lint and version checks Adds explicit LegacyTrajectoryEvaluator test type metadata for mypy and bumps uipath to 2.10.66 for version uniqueness. --- packages/uipath/pyproject.toml | 2 +- .../tests/evaluators/test_legacy_trajectory_evaluator.py | 7 +++++++ packages/uipath/uv.lock | 2 +- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/packages/uipath/pyproject.toml b/packages/uipath/pyproject.toml index aba6ae877..9ba94e18a 100644 --- a/packages/uipath/pyproject.toml +++ b/packages/uipath/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "uipath" -version = "2.10.63" +version = "2.10.66" description = "Python SDK and CLI for UiPath Platform, enabling programmatic interaction with automation services, process management, and deployment tools." readme = { file = "README.md", content-type = "text/markdown" } requires-python = ">=3.11" diff --git a/packages/uipath/tests/evaluators/test_legacy_trajectory_evaluator.py b/packages/uipath/tests/evaluators/test_legacy_trajectory_evaluator.py index f7121d800..687fce08d 100644 --- a/packages/uipath/tests/evaluators/test_legacy_trajectory_evaluator.py +++ b/packages/uipath/tests/evaluators/test_legacy_trajectory_evaluator.py @@ -3,6 +3,10 @@ from opentelemetry.sdk.trace import ReadableSpan from uipath.eval.evaluators import LegacyTrajectoryEvaluator +from uipath.eval.evaluators.base_legacy_evaluator import LegacyEvaluationCriteria +from uipath.eval.evaluators.legacy_trajectory_evaluator import ( + LegacyTrajectoryEvaluatorConfig, +) from uipath.eval.models.models import LegacyEvaluatorCategory, LegacyEvaluatorType @@ -10,6 +14,9 @@ def _legacy_trajectory_evaluator() -> LegacyTrajectoryEvaluator: return LegacyTrajectoryEvaluator( id=str(uuid.uuid4()), name="Legacy trajectory", + config_type=LegacyTrajectoryEvaluatorConfig, + evaluation_criteria_type=LegacyEvaluationCriteria, + justification_type=str, category=LegacyEvaluatorCategory.Trajectory, type=LegacyEvaluatorType.Trajectory, prompt="History:\n{{AgentRunHistory}}\nExpected:\n{{ExpectedAgentBehavior}}", diff --git a/packages/uipath/uv.lock b/packages/uipath/uv.lock index c51486b96..3dcc3bd97 100644 --- a/packages/uipath/uv.lock +++ b/packages/uipath/uv.lock @@ -2543,7 +2543,7 @@ wheels = [ [[package]] name = "uipath" -version = "2.10.63" +version = "2.10.66" source = { editable = "." } dependencies = [ { name = "applicationinsights" },