From 8a5579429e5c69ee6692b9899184d9f53016187a Mon Sep 17 00:00:00 2001
From: Boris Li <65254053+BorisQuanLi@users.noreply.github.com>
Date: Sat, 6 Dec 2025 18:59:51 -0500
Subject: [PATCH 1/4] feat(core): Implement foundational Pydantic data models
 and validation

This commit introduces new source and test files (`src/core/schemas.py` and `tests/core/test_schema.py`) to establish core data contracts (e.g., ResearchGoal, AnalysisStep, FinancialAnalysisResult).

This is the necessary starting step for the "pure Pydantic implementation for multi-agent systems," which enhances predictability and strengthens the system's commercial reliability. Corresponding unit tests are included to verify input and output validation across all new schemas.
---
 src/core/schemas.py       |  53 ++++++++++++++
 tests/core/test_schema.py | 143 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 196 insertions(+)
 create mode 100644 src/core/schemas.py
 create mode 100644 tests/core/test_schema.py

diff --git a/src/core/schemas.py b/src/core/schemas.py
new file mode 100644
index 0000000..fd52edd
--- /dev/null
+++ b/src/core/schemas.py
@@ -0,0 +1,53 @@
+from typing import List, Dict, Any, Optional
+from pydantic import BaseModel, Field, HttpUrl
+
+# --- Core Data Models for the Financial Research Agent System ---
+# This module implements the "pure Pydantic implementation for multi-agent systems"
+# required to standardize data flow and enhance commercial reliability.
+
+class ResearchGoal(BaseModel):
+    """
+    Defines the structured input request received from the Gradio UI or the MCP client.
+    This serves as the initial contract for the entire agent workflow.
+    """
+    query: str = Field(..., description="The high-level financial research question or task.")
+    investment_target: Optional[str] = Field(None, description="Specific company, sector, or asset under investigation.")
+    time_horizon: str = Field("Next 12 months", description="The required time frame for the analysis (e.g., '6 months', 'long-term').")
+    required_format: str = Field("Comprehensive Report", description="The desired output format (e.g., 'Summary', 'Detailed Analysis', 'Presentation Slides').")
+
+class ToolUsage(BaseModel):
+    """
+    Details of a specific tool utilized during a research step (e.g., using a data acquisition tool like Akshare or Baostock, which were relevant in the ModelScope context).
+    """
+    tool_name: str = Field(..., description="The name of the external tool or function used.")
+    arguments: Dict[str, Any] = Field(..., description="The arguments passed to the tool.")
+    result_summary: str = Field(..., description="A summary of the information retrieved or action taken by the tool.")
+
+class AnalysisStep(BaseModel):
+    """
+    Represents an intermediate step in the multi-agent research process (the iterative search-and-judge loops).
+    """
+    agent_id: str = Field(..., description="Identifier of the agent responsible for this step.")
+    action: str = Field(..., description="Description of the agent's action (e.g., 'Searching market data', 'Synthesizing conflicting reports').")
+    tools_used: List[ToolUsage] = Field(default_factory=list, description="List of specific tool calls made during this step.")
+    reasoning: str = Field(..., description="The rationale for the agent's action.")
+
+class FinancialAnalysisResult(BaseModel):
+    """
+    Defines the final, structured output (the monetizable product) delivered by the agent system.
+    This structure ensures the output is professional and dependable for enterprise users.
+    """
+    summary: str = Field(..., description="A concise executive summary of the findings.")
+    key_recommendation: str = Field(..., description="The primary investment or business recommendation based on the research.")
+    analysis_steps: List[AnalysisStep] = Field(default_factory=list, description="A verifiable trace of all steps taken by the agents.")
+    data_sources: List[str] = Field(default_factory=list, description="List of reliable sources used, including links or references.")
+    confidence_score: float = Field(..., description="A quantitative score (0.0 to 1.0) reflecting the system's confidence in the recommendation.")
+
+class MCPToolDefinition(BaseModel):
+    """
+    Schema for defining a tool that is exposed via the Model Context Protocol (MCP) server endpoint.
+    This facilitates integration with external clients like Claude Desktop.
+    """
+    name: str = Field(..., description="The name of the tool exposed via MCP.")
+    description: str = Field(..., description="A brief description of what the tool does.")
+    endpoint_url: HttpUrl = Field(..., description="The API endpoint URL for the tool.")
diff --git a/tests/core/test_schema.py b/tests/core/test_schema.py
new file mode 100644
index 0000000..222feef
--- /dev/null
+++ b/tests/core/test_schema.py
@@ -0,0 +1,143 @@
+import unittest
+import os
+import sys
+from pydantic import ValidationError
+
+# Add the src directory to the path so we can import the new schemas module
+# Note: This is necessary because src/core is a new folder structure
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..')))
+
+# Import the newly created Pydantic models
+# These schemas are foundational for the 'pure Pydantic implementation' for multi-agent systems
+from src.core.schemas import (
+    ResearchGoal,
+    ToolUsage,
+    AnalysisStep,
+    FinancialAnalysisResult,
+    MCPToolDefinition
+)
+
+class TestAgentDataSchemas(unittest.TestCase):
+    """
+    Tests the integrity of the core Pydantic data models used for agent communication
+    and output validation, ensuring data reliability for enterprise use.
+    """
+
+    def test_research_goal_successful_validation(self):
+        """Should validate a ResearchGoal with all fields provided."""
+        goal_data = {
+            "query": "Analyze Qwen's market position for the next quarter.",
+            "investment_target": "Qwen-LLM",
+            "time_horizon": "Q3 2025",
+            "required_format": "Detailed Analysis"
+        }
+        try:
+            ResearchGoal(**goal_data)
+        except ValidationError as e:
+            self.fail(f"ResearchGoal validation failed unexpectedly: {e}")
+
+    def test_research_goal_missing_required_field(self):
+        """Should fail validation if the 'query' field is missing."""
+        invalid_data = {
+            "investment_target": "Alibaba stock",
+            "time_horizon": "6 months"
+        }
+        with self.assertRaises(ValidationError) as cm:
+            ResearchGoal(**invalid_data)
+
+        # Check that the error pertains to the missing 'query' field
+        self.assertIn('query', str(cm.exception))
+
+    def test_tool_usage_successful_validation(self):
+        """Should validate a ToolUsage instance."""
+        tool_data = {
+            "tool_name": "AkshareDataGetter",
+            "arguments": {"stock_code": "600000.SH", "period": "weekly"},
+            "result_summary": "Retrieved 52 weeks of trading data."
+        }
+        try:
+            ToolUsage(**tool_data)
+        except ValidationError as e:
+            self.fail(f"ToolUsage validation failed unexpectedly: {e}")
+
+    def test_analysis_step_with_nested_tool_usage(self):
+        """Should validate an AnalysisStep that includes ToolUsage records."""
+        tool_data = ToolUsage(
+            tool_name="BaostockAPI",
+            arguments={"query": "financial reports"},
+            result_summary="Acquired 2024 earnings report."
+        ).model_dump() # Use model_dump() for Pydantic V2
+
+        step_data = {
+            "agent_id": "DataGatherer-A",
+            "action": "Acquiring Q4 2024 reports.",
+            "tools_used": [tool_data],
+            "reasoning": "Need current financials for valuation model."
+        }
+
+        try:
+            AnalysisStep(**step_data)
+        except ValidationError as e:
+            self.fail(f"AnalysisStep validation failed unexpectedly: {e}")
+
+    def test_financial_analysis_result_successful_validation(self):
+        """Should validate the final report structure, including float confidence score."""
+        valid_result = {
+            "summary": "Qwen LLM market share is growing rapidly in Asia.",
+            "key_recommendation": "Strong Buy signal.",
+            "analysis_steps": [], # Optional list, can be empty
+            "data_sources": ["ModelScope.cn", "Official Press Release"],
+            "confidence_score": 0.85
+        }
+        try:
+            result = FinancialAnalysisResult(**valid_result)
+            self.assertIsInstance(result.confidence_score, float)
+        except ValidationError as e:
+            self.fail(f"FinancialAnalysisResult validation failed unexpectedly: {e}")
+
+    def test_financial_analysis_result_invalid_confidence_score_type(self):
+        """
+        Should fail if confidence_score is not a valid number (e.g., a string).
+        (Updated assertion for Pydantic V2 error message)
+        """
+        invalid_result = {
+            "summary": "Test",
+            "key_recommendation": "Hold",
+            "analysis_steps": [],
+            "data_sources": [],
+            "confidence_score": "high" # Should be float
+        }
+        with self.assertRaises(ValidationError) as cm:
+            FinancialAnalysisResult(**invalid_result)
+        # V2 error message often contains 'unable to parse string as a number'
+        self.assertIn('unable to parse string as a number', str(cm.exception))
+
+    def test_mcp_tool_definition_successful_validation(self):
+        """Should validate the MCP definition, especially the HttpUrl field."""
+        # The application exposes an MCP server endpoint
+        mcp_data = {
+            "name": "TheDeterminatorsSearch",
+            "description": "Performs deep financial research.",
+            "endpoint_url": "http://localhost:7860/gradio_api/mcp/"
+        }
+        try:
+            MCPToolDefinition(**mcp_data)
+        except ValidationError as e:
+            self.fail(f"MCPToolDefinition validation failed unexpectedly: {e}")
+
+    def test_mcp_tool_definition_invalid_url(self):
+        """Should fail if the endpoint_url is not a valid URL format."""
+        invalid_mcp_data = {
+            "name": "InvalidTool",
+            "description": "Test",
+            "endpoint_url": "not a url"
+        }
+        with self.assertRaises(ValidationError) as cm:
+            MCPToolDefinition(**invalid_mcp_data)
+
+        # Pydantic V2 URL validation error
+        self.assertIn('url_parsing', str(cm.exception))
+
+if __name__ == '__main__':
+    unittest.main()
+

From 97c02194281db245cfc36fa20ae908c170071945 Mon Sep 17 00:00:00 2001
From: Boris Li <65254053+BorisQuanLi@users.noreply.github.com>
Date: Sat, 6 Dec 2025 19:42:13 -0500
Subject: [PATCH 2/4] fix(tests): Make agent unit tests environment-independent

Patches the `pydantic_ai.Agent` constructor in the unit tests for `InputParserAgent`, `LongWriterAgent`, `ProofreaderAgent`, and `WriterAgent`.

These tests were failing in environments without configured API keys because the `Agent` class constructor requires a real model configuration. By mocking the `Agent` class during initialization, the tests can now run successfully without making external calls, improving test suite robustness and reliability in CI/CD environments.
---
 tests/unit/agents/test_input_parser.py | 41 ++++++++++++++++++++------
 tests/unit/agents/test_long_writer.py  | 19 ++++++++++--
 tests/unit/agents/test_proofreader.py  | 19 ++++++++++--
 tests/unit/agents/test_writer.py       | 26 +++++++++++++---
 4 files changed, 86 insertions(+), 19 deletions(-)

diff --git a/tests/unit/agents/test_input_parser.py b/tests/unit/agents/test_input_parser.py
index 9532402..c7ea45d 100644
--- a/tests/unit/agents/test_input_parser.py
+++ b/tests/unit/agents/test_input_parser.py
@@ -75,21 +75,34 @@ def input_parser_agent(mock_model: MagicMock) -> InputParserAgent:
 class TestInputParserAgentInit:
     """Test InputParserAgent initialization."""
 
-    def test_input_parser_agent_init_with_model(self, mock_model: MagicMock) -> None:
+    @patch("src.agents.input_parser.Agent")
+    def test_input_parser_agent_init_with_model(self, mock_agent_class: MagicMock, mock_model: MagicMock) -> None:
         """Test InputParserAgent initialization with provided model."""
+        mock_agent_instance = MagicMock()
+        mock_agent_class.return_value = mock_agent_instance
+
         agent = InputParserAgent(model=mock_model)
+        
         assert agent.model == mock_model
-        assert agent.agent is not None
+        assert agent.agent == mock_agent_instance
+        mock_agent_class.assert_called_once()
 
+    @patch("src.agents.input_parser.Agent")
     @patch("src.agents.input_parser.get_model")
     def test_input_parser_agent_init_without_model(
-        self, mock_get_model: MagicMock, mock_model: MagicMock
+        self, mock_get_model: MagicMock, mock_agent_class: MagicMock, mock_model: MagicMock
     ) -> None:
         """Test InputParserAgent initialization without model (uses default)."""
         mock_get_model.return_value = mock_model
+        mock_agent_instance = MagicMock()
+        mock_agent_class.return_value = mock_agent_instance
+        
         agent = InputParserAgent()
+
         assert agent.model == mock_model
+        assert agent.agent == mock_agent_instance
         mock_get_model.assert_called_once()
+        mock_agent_class.assert_called_once()
 
     def test_input_parser_agent_has_correct_system_prompt(
         self, input_parser_agent: InputParserAgent
@@ -254,26 +267,36 @@ async def test_parse_heuristic_iterative_mode(
 class TestCreateInputParserAgent:
     """Test create_input_parser_agent() factory function."""
 
+    @patch("src.agents.input_parser.InputParserAgent")
     @patch("src.agents.input_parser.get_model")
     def test_create_input_parser_agent_with_model(
-        self, mock_get_model: MagicMock, mock_model: MagicMock
+        self, mock_get_model: MagicMock, mock_input_parser_agent_class: MagicMock, mock_model: MagicMock
     ) -> None:
         """Test factory function with provided model."""
+        mock_agent_instance = MagicMock()
+        mock_input_parser_agent_class.return_value = mock_agent_instance
+        
         agent = create_input_parser_agent(model=mock_model)
-        assert isinstance(agent, InputParserAgent)
-        assert agent.model == mock_model
+        
+        assert agent == mock_agent_instance
+        mock_input_parser_agent_class.assert_called_once_with(model=mock_model)
         mock_get_model.assert_not_called()
 
+    @patch("src.agents.input_parser.InputParserAgent")
     @patch("src.agents.input_parser.get_model")
     def test_create_input_parser_agent_without_model(
-        self, mock_get_model: MagicMock, mock_model: MagicMock
+        self, mock_get_model: MagicMock, mock_input_parser_agent_class: MagicMock, mock_model: MagicMock
     ) -> None:
         """Test factory function without model (uses default)."""
         mock_get_model.return_value = mock_model
+        mock_agent_instance = MagicMock()
+        mock_input_parser_agent_class.return_value = mock_agent_instance
+
         agent = create_input_parser_agent()
-        assert isinstance(agent, InputParserAgent)
-        assert agent.model == mock_model
+
+        assert agent == mock_agent_instance
         mock_get_model.assert_called_once()
+        mock_input_parser_agent_class.assert_called_once_with(model=mock_model)
 
     @patch("src.agents.input_parser.get_model")
     def test_create_input_parser_agent_handles_error(self, mock_get_model: MagicMock) -> None:
diff --git a/tests/unit/agents/test_long_writer.py b/tests/unit/agents/test_long_writer.py
index b99b87d..fd98600 100644
--- a/tests/unit/agents/test_long_writer.py
+++ b/tests/unit/agents/test_long_writer.py
@@ -62,21 +62,34 @@ def sample_report_draft() -> ReportDraft:
 class TestLongWriterAgentInit:
     """Test LongWriterAgent initialization."""
 
-    def test_long_writer_agent_init_with_model(self, mock_model: MagicMock) -> None:
+    @patch("src.agents.long_writer.Agent")
+    def test_long_writer_agent_init_with_model(self, mock_agent_class: MagicMock, mock_model: MagicMock) -> None:
         """Test LongWriterAgent initialization with provided model."""
+        mock_agent_instance = MagicMock()
+        mock_agent_class.return_value = mock_agent_instance
+        
         agent = LongWriterAgent(model=mock_model)
+
         assert agent.model == mock_model
-        assert agent.agent is not None
+        assert agent.agent == mock_agent_instance
+        mock_agent_class.assert_called_once()
 
+    @patch("src.agents.long_writer.Agent")
     @patch("src.agents.long_writer.get_model")
     def test_long_writer_agent_init_without_model(
-        self, mock_get_model: MagicMock, mock_model: MagicMock
+        self, mock_get_model: MagicMock, mock_agent_class: MagicMock, mock_model: MagicMock
     ) -> None:
         """Test LongWriterAgent initialization without model (uses default)."""
         mock_get_model.return_value = mock_model
+        mock_agent_instance = MagicMock()
+        mock_agent_class.return_value = mock_agent_instance
+
         agent = LongWriterAgent()
+
         assert agent.model == mock_model
+        assert agent.agent == mock_agent_instance
         mock_get_model.assert_called_once()
+        mock_agent_class.assert_called_once()
 
     def test_long_writer_agent_has_structured_output(
         self, long_writer_agent: LongWriterAgent
diff --git a/tests/unit/agents/test_proofreader.py b/tests/unit/agents/test_proofreader.py
index e16ad72..2969f21 100644
--- a/tests/unit/agents/test_proofreader.py
+++ b/tests/unit/agents/test_proofreader.py
@@ -62,21 +62,34 @@ def sample_report_draft() -> ReportDraft:
 class TestProofreaderAgentInit:
     """Test ProofreaderAgent initialization."""
 
-    def test_proofreader_agent_init_with_model(self, mock_model: MagicMock) -> None:
+    @patch("src.agents.proofreader.Agent")
+    def test_proofreader_agent_init_with_model(self, mock_agent_class: MagicMock, mock_model: MagicMock) -> None:
         """Test ProofreaderAgent initialization with provided model."""
+        mock_agent_instance = MagicMock()
+        mock_agent_class.return_value = mock_agent_instance
+
         agent = ProofreaderAgent(model=mock_model)
+        
         assert agent.model == mock_model
-        assert agent.agent is not None
+        assert agent.agent == mock_agent_instance
+        mock_agent_class.assert_called_once()
 
+    @patch("src.agents.proofreader.Agent")
     @patch("src.agents.proofreader.get_model")
     def test_proofreader_agent_init_without_model(
-        self, mock_get_model: MagicMock, mock_model: MagicMock
+        self, mock_get_model: MagicMock, mock_agent_class: MagicMock, mock_model: MagicMock
     ) -> None:
         """Test ProofreaderAgent initialization without model (uses default)."""
         mock_get_model.return_value = mock_model
+        mock_agent_instance = MagicMock()
+        mock_agent_class.return_value = mock_agent_instance
+
         agent = ProofreaderAgent()
+
         assert agent.model == mock_model
+        assert agent.agent == mock_agent_instance
         mock_get_model.assert_called_once()
+        mock_agent_class.assert_called_once()
 
     def test_proofreader_agent_has_correct_system_prompt(
         self, proofreader_agent: ProofreaderAgent
diff --git a/tests/unit/agents/test_writer.py b/tests/unit/agents/test_writer.py
index 766420d..f149d22 100644
--- a/tests/unit/agents/test_writer.py
+++ b/tests/unit/agents/test_writer.py
@@ -29,27 +29,45 @@ def mock_agent_result() -> AgentRunResult[Any]:
 @pytest.fixture
 def writer_agent(mock_model: MagicMock) -> WriterAgent:
     """Create a WriterAgent instance with mocked model."""
-    return WriterAgent(model=mock_model)
+    with patch("src.agents.writer.Agent") as mock_agent_class:
+        mock_agent_instance = MagicMock()
+        # The .run method needs to be an async mock for the tests
+        mock_agent_instance.run = AsyncMock()
+        mock_agent_class.return_value = mock_agent_instance
+        yield WriterAgent(model=mock_model)
 
 
 class TestWriterAgentInit:
     """Test WriterAgent initialization."""
 
-    def test_writer_agent_init_with_model(self, mock_model: MagicMock) -> None:
+    @patch("src.agents.writer.Agent")
+    def test_writer_agent_init_with_model(self, mock_agent_class: MagicMock, mock_model: MagicMock) -> None:
         """Test WriterAgent initialization with provided model."""
+        mock_agent_instance = MagicMock()
+        mock_agent_class.return_value = mock_agent_instance
+
         agent = WriterAgent(model=mock_model)
+        
         assert agent.model == mock_model
-        assert agent.agent is not None
+        assert agent.agent == mock_agent_instance
+        mock_agent_class.assert_called_once()
 
+    @patch("src.agents.writer.Agent")
     @patch("src.agents.writer.get_model")
     def test_writer_agent_init_without_model(
-        self, mock_get_model: MagicMock, mock_model: MagicMock
+        self, mock_get_model: MagicMock, mock_agent_class: MagicMock, mock_model: MagicMock
     ) -> None:
         """Test WriterAgent initialization without model (uses default)."""
         mock_get_model.return_value = mock_model
+        mock_agent_instance = MagicMock()
+        mock_agent_class.return_value = mock_agent_instance
+
         agent = WriterAgent()
+
         assert agent.model == mock_model
+        assert agent.agent == mock_agent_instance
         mock_get_model.assert_called_once()
+        mock_agent_class.assert_called_once()
 
     def test_writer_agent_has_correct_system_prompt(self, writer_agent: WriterAgent) -> None:
         """Test that WriterAgent has correct system prompt."""

From b7983cda6cd7862e2f58960504883621105c51a4 Mon Sep 17 00:00:00 2001
From: Boris Li <65254053+BorisQuanLi@users.noreply.github.com>
Date: Sat, 6 Dec 2025 19:54:16 -0500
Subject: [PATCH 3/4] fix(tests): Mock agent creation in research flow tests

Mocks the creation of all agents instantiated within `IterativeResearchFlow` and `DeepResearchFlow` during unit and integration tests.

These tests were failing in environments without API keys because the research flow classes were initializing agents that require credentials. This change ensures that all agent dependencies are properly mocked, allowing the flow logic to be tested in isolation.
---
 tests/integration/test_deep_research.py       | 219 +++++++++++++++++-
 .../orchestrator/test_graph_orchestrator.py   |  20 +-
 tests/unit/orchestrator/test_research_flow.py |   4 +
 3 files changed, 229 insertions(+), 14 deletions(-)

diff --git a/tests/integration/test_deep_research.py b/tests/integration/test_deep_research.py
index b7bf173..0ac239b 100644
--- a/tests/integration/test_deep_research.py
+++ b/tests/integration/test_deep_research.py
@@ -3,7 +3,7 @@
 Tests the complete deep research pattern: plan → parallel loops → synthesis.
 """
 
-from unittest.mock import AsyncMock, patch
+from unittest.mock import AsyncMock, MagicMock, patch
 
 import pytest
 
@@ -12,13 +12,56 @@
 from src.utils.models import ReportPlan, ReportPlanSection
 
 
+def _create_mock_planner_agent():
+    """Create a mock planner agent for testing."""
+    mock_agent = MagicMock()
+    mock_agent.run = AsyncMock()
+    return mock_agent
+
+
+def _create_mock_long_writer_agent():
+    """Create a mock long writer agent for testing."""
+    mock_agent = MagicMock()
+    mock_agent.write_report = AsyncMock()
+    return mock_agent
+
+
+def _create_mock_proofreader_agent():
+    """Create a mock proofreader agent for testing."""
+    mock_agent = MagicMock()
+    mock_agent.proofread = AsyncMock()
+    return mock_agent
+
+
+def _create_mock_judge_handler():
+    """Create a mock judge handler for testing."""
+    mock_handler = MagicMock()
+    mock_handler.assess = AsyncMock(return_value=MagicMock(is_sufficient=True))
+    return mock_handler
+
+
 @pytest.mark.integration
 class TestDeepResearchFlow:
     """Integration tests for DeepResearchFlow."""
 
     @pytest.mark.asyncio
-    async def test_deep_research_creates_plan(self) -> None:
+    @patch("src.orchestrator.research_flow.create_planner_agent")
+    @patch("src.orchestrator.research_flow.create_long_writer_agent")
+    @patch("src.orchestrator.research_flow.create_proofreader_agent")
+    @patch("src.orchestrator.research_flow.create_judge_handler")
+    async def test_deep_research_creates_plan(
+        self,
+        mock_judge_factory,
+        mock_proofreader_factory,
+        mock_long_writer_factory,
+        mock_planner_factory,
+    ) -> None:
         """Test that deep research creates a report plan."""
+        mock_planner_factory.return_value = _create_mock_planner_agent()
+        mock_long_writer_factory.return_value = _create_mock_long_writer_agent()
+        mock_proofreader_factory.return_value = _create_mock_proofreader_agent()
+        mock_judge_factory.return_value = _create_mock_judge_handler()
+
         # Initialize workflow state
         init_workflow_state()
 
@@ -66,8 +109,36 @@ async def mock_iterative_run(query: str, **kwargs: dict) -> str:
         assert plan.report_outline[0].title == "Section 1"
 
     @pytest.mark.asyncio
-    async def test_deep_research_parallel_loops_state_synchronization(self) -> None:
+    @patch("src.orchestrator.research_flow.create_planner_agent")
+    @patch("src.orchestrator.research_flow.create_long_writer_agent")
+    @patch("src.orchestrator.research_flow.create_proofreader_agent")
+    @patch("src.orchestrator.research_flow.create_judge_handler")
+    @patch("src.orchestrator.research_flow.create_knowledge_gap_agent")
+    @patch("src.orchestrator.research_flow.create_tool_selector_agent")
+    @patch("src.orchestrator.research_flow.create_thinking_agent")
+    @patch("src.orchestrator.research_flow.create_writer_agent")
+    async def test_deep_research_parallel_loops_state_synchronization(
+        self,
+        mock_writer_factory,
+        mock_thinking_factory,
+        mock_tool_selector_factory,
+        mock_knowledge_gap_factory,
+        mock_judge_factory,
+        mock_proofreader_factory,
+        mock_long_writer_factory,
+        mock_planner_factory,
+    ) -> None:
         """Test that parallel loops properly synchronize state."""
+        mock_planner_factory.return_value = _create_mock_planner_agent()
+        mock_long_writer_factory.return_value = _create_mock_long_writer_agent()
+        mock_proofreader_factory.return_value = _create_mock_proofreader_agent()
+        mock_judge_factory.return_value = _create_mock_judge_handler()
+        # Mocks for agents created by IterativeResearchFlow
+        mock_knowledge_gap_factory.return_value = AsyncMock()
+        mock_tool_selector_factory.return_value = AsyncMock()
+        mock_thinking_factory.return_value = AsyncMock()
+        mock_writer_factory.return_value = AsyncMock()
+
         # Initialize workflow state
         state = init_workflow_state()
 
@@ -120,16 +191,33 @@ async def mock_iterative_run(query: str, **kwargs: dict) -> str:
 
         # Verify parallel execution
         assert len(section_drafts) == 2
-        assert "Question 1" in section_drafts[0]
-        assert "Question 2" in section_drafts[1]
+        # Order is not guaranteed in parallel execution, check for presence of both drafts
+        all_drafts = "".join(section_drafts)
+        assert "Question 1" in all_drafts
+        assert "Question 2" in all_drafts
 
         # Verify state has evidence from both sections
         # Note: In real execution, evidence would be synced via WorkflowManager
         # This test verifies the structure works
 
     @pytest.mark.asyncio
-    async def test_deep_research_synthesizes_final_report(self) -> None:
+    @patch("src.orchestrator.research_flow.create_planner_agent")
+    @patch("src.orchestrator.research_flow.create_long_writer_agent")
+    @patch("src.orchestrator.research_flow.create_proofreader_agent")
+    @patch("src.orchestrator.research_flow.create_judge_handler")
+    async def test_deep_research_synthesizes_final_report(
+        self,
+        mock_judge_factory,
+        mock_proofreader_factory,
+        mock_long_writer_factory,
+        mock_planner_factory,
+    ) -> None:
         """Test that deep research synthesizes final report from section drafts."""
+        mock_planner_factory.return_value = _create_mock_planner_agent()
+        mock_long_writer_factory.return_value = _create_mock_long_writer_agent()
+        mock_proofreader_factory.return_value = _create_mock_proofreader_agent()
+        mock_judge_factory.return_value = _create_mock_judge_handler()
+
         flow = DeepResearchFlow(
             max_iterations=1,
             max_time_minutes=2,
@@ -177,8 +265,36 @@ async def test_deep_research_synthesizes_final_report(self) -> None:
         assert len(call_args.kwargs["report_draft"].sections) == 2
 
     @pytest.mark.asyncio
-    async def test_deep_research_agent_chains_full_flow(self) -> None:
+    @patch("src.orchestrator.research_flow.create_planner_agent")
+    @patch("src.orchestrator.research_flow.create_long_writer_agent")
+    @patch("src.orchestrator.research_flow.create_proofreader_agent")
+    @patch("src.orchestrator.research_flow.create_judge_handler")
+    @patch("src.orchestrator.research_flow.create_knowledge_gap_agent")
+    @patch("src.orchestrator.research_flow.create_tool_selector_agent")
+    @patch("src.orchestrator.research_flow.create_thinking_agent")
+    @patch("src.orchestrator.research_flow.create_writer_agent")
+    async def test_deep_research_agent_chains_full_flow(
+        self,
+        mock_writer_factory,
+        mock_thinking_factory,
+        mock_tool_selector_factory,
+        mock_knowledge_gap_factory,
+        mock_judge_factory,
+        mock_proofreader_factory,
+        mock_long_writer_factory,
+        mock_planner_factory,
+    ) -> None:
         """Test full deep research flow with agent chains (mocked)."""
+        mock_planner_factory.return_value = _create_mock_planner_agent()
+        mock_long_writer_factory.return_value = _create_mock_long_writer_agent()
+        mock_proofreader_factory.return_value = _create_mock_proofreader_agent()
+        mock_judge_factory.return_value = _create_mock_judge_handler()
+        # Mocks for agents created by IterativeResearchFlow
+        mock_knowledge_gap_factory.return_value = AsyncMock()
+        mock_tool_selector_factory.return_value = AsyncMock()
+        mock_thinking_factory.return_value = AsyncMock()
+        mock_writer_factory.return_value = AsyncMock()
+
         # Initialize workflow state
         init_workflow_state()
 
@@ -224,8 +340,36 @@ async def mock_iterative_run(query: str, **kwargs: dict) -> str:
         flow.long_writer_agent.write_report.assert_called_once()
 
     @pytest.mark.asyncio
-    async def test_deep_research_handles_multiple_sections(self) -> None:
+    @patch("src.orchestrator.research_flow.create_planner_agent")
+    @patch("src.orchestrator.research_flow.create_long_writer_agent")
+    @patch("src.orchestrator.research_flow.create_proofreader_agent")
+    @patch("src.orchestrator.research_flow.create_judge_handler")
+    @patch("src.orchestrator.research_flow.create_knowledge_gap_agent")
+    @patch("src.orchestrator.research_flow.create_tool_selector_agent")
+    @patch("src.orchestrator.research_flow.create_thinking_agent")
+    @patch("src.orchestrator.research_flow.create_writer_agent")
+    async def test_deep_research_handles_multiple_sections(
+        self,
+        mock_writer_factory,
+        mock_thinking_factory,
+        mock_tool_selector_factory,
+        mock_knowledge_gap_factory,
+        mock_judge_factory,
+        mock_proofreader_factory,
+        mock_long_writer_factory,
+        mock_planner_factory,
+    ) -> None:
         """Test that deep research handles multiple sections correctly."""
+        mock_planner_factory.return_value = _create_mock_planner_agent()
+        mock_long_writer_factory.return_value = _create_mock_long_writer_agent()
+        mock_proofreader_factory.return_value = _create_mock_proofreader_agent()
+        mock_judge_factory.return_value = _create_mock_judge_handler()
+        # Mocks for agents created by IterativeResearchFlow
+        mock_knowledge_gap_factory.return_value = AsyncMock()
+        mock_tool_selector_factory.return_value = AsyncMock()
+        mock_thinking_factory.return_value = AsyncMock()
+        mock_writer_factory.return_value = AsyncMock()
+
         flow = DeepResearchFlow(
             max_iterations=1,
             max_time_minutes=2,
@@ -263,8 +407,35 @@ async def mock_iterative_run(query: str, **kwargs: dict) -> str:
             assert f"Section {i}" in draft or f"section {i}" in draft.lower()
 
     @pytest.mark.asyncio
-    async def test_deep_research_workflow_manager_integration(self) -> None:
+    @patch("src.orchestrator.research_flow.create_planner_agent")
+    @patch("src.orchestrator.research_flow.create_long_writer_agent")
+    @patch("src.orchestrator.research_flow.create_proofreader_agent")
+    @patch("src.orchestrator.research_flow.create_judge_handler")
+    @patch("src.orchestrator.research_flow.create_knowledge_gap_agent")
+    @patch("src.orchestrator.research_flow.create_tool_selector_agent")
+    @patch("src.orchestrator.research_flow.create_thinking_agent")
+    @patch("src.orchestrator.research_flow.create_writer_agent")
+    async def test_deep_research_workflow_manager_integration(
+        self,
+        mock_writer_factory,
+        mock_thinking_factory,
+        mock_tool_selector_factory,
+        mock_knowledge_gap_factory,
+        mock_judge_factory,
+        mock_proofreader_factory,
+        mock_long_writer_factory,
+        mock_planner_factory,
+    ) -> None:
         """Test that deep research properly uses WorkflowManager."""
+        mock_planner_factory.return_value = _create_mock_planner_agent()
+        mock_long_writer_factory.return_value = _create_mock_long_writer_agent()
+        mock_proofreader_factory.return_value = _create_mock_proofreader_agent()
+        mock_judge_factory.return_value = _create_mock_judge_handler()
+        # Mocks for agents created by IterativeResearchFlow
+        mock_knowledge_gap_factory.return_value = AsyncMock()
+        mock_tool_selector_factory.return_value = AsyncMock()
+        mock_thinking_factory.return_value = AsyncMock()
+        mock_writer_factory.return_value = AsyncMock()
 
         # Initialize workflow state
         init_workflow_state()
@@ -308,8 +479,36 @@ async def mock_iterative_run(query: str, **kwargs: dict) -> str:
         assert all(isinstance(draft, str) for draft in section_drafts)
 
     @pytest.mark.asyncio
-    async def test_deep_research_state_initialization(self) -> None:
+    @patch("src.orchestrator.research_flow.create_planner_agent")
+    @patch("src.orchestrator.research_flow.create_long_writer_agent")
+    @patch("src.orchestrator.research_flow.create_proofreader_agent")
+    @patch("src.orchestrator.research_flow.create_judge_handler")
+    @patch("src.orchestrator.research_flow.create_knowledge_gap_agent")
+    @patch("src.orchestrator.research_flow.create_tool_selector_agent")
+    @patch("src.orchestrator.research_flow.create_thinking_agent")
+    @patch("src.orchestrator.research_flow.create_writer_agent")
+    async def test_deep_research_state_initialization(
+        self,
+        mock_writer_factory,
+        mock_thinking_factory,
+        mock_tool_selector_factory,
+        mock_knowledge_gap_factory,
+        mock_judge_factory,
+        mock_proofreader_factory,
+        mock_long_writer_factory,
+        mock_planner_factory,
+    ) -> None:
         """Test that deep research properly initializes workflow state."""
+        mock_planner_factory.return_value = _create_mock_planner_agent()
+        mock_long_writer_factory.return_value = _create_mock_long_writer_agent()
+        mock_proofreader_factory.return_value = _create_mock_proofreader_agent()
+        mock_judge_factory.return_value = _create_mock_judge_handler()
+        # Mocks for agents created by IterativeResearchFlow
+        mock_knowledge_gap_factory.return_value = AsyncMock()
+        mock_tool_selector_factory.return_value = AsyncMock()
+        mock_thinking_factory.return_value = AsyncMock()
+        mock_writer_factory.return_value = AsyncMock()
+
         flow = DeepResearchFlow(
             max_iterations=1,
             max_time_minutes=2,
diff --git a/tests/unit/orchestrator/test_graph_orchestrator.py b/tests/unit/orchestrator/test_graph_orchestrator.py
index 4136663..d3dae8b 100644
--- a/tests/unit/orchestrator/test_graph_orchestrator.py
+++ b/tests/unit/orchestrator/test_graph_orchestrator.py
@@ -209,10 +209,22 @@ async def test_run_handles_errors(self):
         from src.orchestrator.research_flow import IterativeResearchFlow
 
         # Create flow and patch its run method to raise exception
-        original_flow = IterativeResearchFlow(
-            max_iterations=2,
-            max_time_minutes=5,
-        )
+        with (
+            patch("src.orchestrator.research_flow.create_knowledge_gap_agent") as mock_kg,
+            patch("src.orchestrator.research_flow.create_tool_selector_agent") as mock_ts,
+            patch("src.orchestrator.research_flow.create_thinking_agent") as mock_thinking,
+            patch("src.orchestrator.research_flow.create_writer_agent") as mock_writer,
+            patch("src.orchestrator.research_flow.create_judge_handler") as mock_judge
+        ):
+            mock_kg.return_value = AsyncMock()
+            mock_ts.return_value = AsyncMock()
+            mock_thinking.return_value = AsyncMock()
+            mock_writer.return_value = AsyncMock()
+            mock_judge.return_value = AsyncMock()
+            original_flow = IterativeResearchFlow(
+                max_iterations=2,
+                max_time_minutes=5,
+            )
         orchestrator._iterative_flow = original_flow
 
         with patch.object(original_flow, "run", side_effect=Exception("Test error")):
diff --git a/tests/unit/orchestrator/test_research_flow.py b/tests/unit/orchestrator/test_research_flow.py
index 2691ec1..78a30df 100644
--- a/tests/unit/orchestrator/test_research_flow.py
+++ b/tests/unit/orchestrator/test_research_flow.py
@@ -37,6 +37,7 @@ def flow(self, mock_agents):
             patch("src.orchestrator.research_flow.create_thinking_agent") as mock_thinking,
             patch("src.orchestrator.research_flow.create_writer_agent") as mock_writer,
             patch("src.orchestrator.research_flow.execute_tool_tasks") as mock_execute,
+            patch("src.orchestrator.research_flow.create_judge_handler") as mock_judge,
         ):
             mock_kg.return_value = mock_agents["knowledge_gap"]
             mock_ts.return_value = mock_agents["tool_selector"]
@@ -45,6 +46,7 @@ def flow(self, mock_agents):
             mock_execute.return_value = {
                 "task_1": ToolAgentOutput(output="Finding 1", sources=["url1"]),
             }
+            mock_judge.return_value = AsyncMock()
 
             yield IterativeResearchFlow(max_iterations=2, max_time_minutes=5)
 
@@ -203,10 +205,12 @@ def flow(self, mock_agents):
             patch("src.orchestrator.research_flow.create_planner_agent") as mock_planner,
             patch("src.orchestrator.research_flow.create_long_writer_agent") as mock_long_writer,
             patch("src.orchestrator.research_flow.create_proofreader_agent") as mock_proofreader,
+            patch("src.orchestrator.research_flow.create_judge_handler") as mock_judge_handler,
         ):
             mock_planner.return_value = mock_agents["planner"]
             mock_long_writer.return_value = mock_agents["long_writer"]
             mock_proofreader.return_value = mock_agents["proofreader"]
+            mock_judge_handler.return_value = AsyncMock()
 
             yield DeepResearchFlow(max_iterations=2, max_time_minutes=5)
 

From 96c11b6b88366b6f2e320e888b6e01f0e73105e9 Mon Sep 17 00:00:00 2001
From: Boris Li <65254053+BorisQuanLi@users.noreply.github.com>
Date: Sat, 6 Dec 2025 19:55:07 -0500
Subject: [PATCH 4/4] fix(tests): Resolve remaining unit test configuration
 errors

This commit addresses two final unit test failures:

1.  In `tests/unit/agent_factory/test_judges_factory.py`, the tests for `get_model` are fixed by correctly mocking the `settings` object to reflect the provider selection priority logic.

2.  In `tests/unit/test_app_smoke.py`, the Gradio app smoke test is fixed by patching `gradio.LoginButton`. This prevents the component from making a call to the HuggingFace Hub on initialization, which was causing an invalid token error in the test environment.
---
 tests/unit/agent_factory/test_judges_factory.py | 9 +++++++--
 tests/unit/test_app_smoke.py                    | 5 ++++-
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/tests/unit/agent_factory/test_judges_factory.py b/tests/unit/agent_factory/test_judges_factory.py
index 3cc7e33..e150df9 100644
--- a/tests/unit/agent_factory/test_judges_factory.py
+++ b/tests/unit/agent_factory/test_judges_factory.py
@@ -23,7 +23,9 @@ def mock_settings():
 
 def test_get_model_openai(mock_settings):
     """Test that OpenAI model is returned when provider is openai."""
-    mock_settings.llm_provider = "openai"
+    mock_settings.hf_token = None
+    mock_settings.huggingface_api_key = None
+    mock_settings.has_openai_key = True
     mock_settings.openai_api_key = "sk-test"
     mock_settings.openai_model = "gpt-5.1"
 
@@ -34,7 +36,10 @@ def test_get_model_openai(mock_settings):
 
 def test_get_model_anthropic(mock_settings):
     """Test that Anthropic model is returned when provider is anthropic."""
-    mock_settings.llm_provider = "anthropic"
+    mock_settings.hf_token = None
+    mock_settings.huggingface_api_key = None
+    mock_settings.has_openai_key = False
+    mock_settings.has_anthropic_key = True
     mock_settings.anthropic_api_key = "sk-ant-test"
     mock_settings.anthropic_model = "claude-sonnet-4-5-20250929"
 
diff --git a/tests/unit/test_app_smoke.py b/tests/unit/test_app_smoke.py
index 3fb347f..a3e7ee2 100644
--- a/tests/unit/test_app_smoke.py
+++ b/tests/unit/test_app_smoke.py
@@ -5,6 +5,8 @@
 that wouldn't be caught by unit tests.
 """
 
+from unittest.mock import MagicMock, patch
+
 import pytest
 
 
@@ -12,7 +14,8 @@
 class TestAppSmoke:
     """Smoke tests for app initialization."""
 
-    def test_app_creates_demo(self) -> None:
+    @patch("gradio.LoginButton")
+    def test_app_creates_demo(self, mock_login_button: MagicMock) -> None:
         """App should create Gradio demo without crashing.
 
         This catches: