ResearchHarness/agent_base/base.py at main · InternScience/ResearchHarness · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
from __future__ import annotations

from abc import ABC, abstractmethod
from typing import Any, Iterable, Optional, Sequence


def _normalize_function_list(function_list: Optional[Iterable[str]]) -> Optional[list[str]]:
    if function_list is None:
        return None
    normalized: list[str] = []
    for raw_name in function_list:
        name = str(raw_name).strip()
        if name:
            normalized.append(name)
    return normalized


def agent_role(
    *,
    name: str,
    role_prompt: str = "",
    function_list: Optional[Iterable[str]] = None,
):
    """
    Class decorator used by upper-layer frameworks to declare agent defaults.

    This keeps the lower-layer execution loop generic while allowing subclasses
    to provide role-specific prompt addenda and tool restrictions declaratively.
    """

    def decorator(cls):
        cls.role_name = str(name).strip() or cls.__name__
        cls.default_role_prompt = str(role_prompt).strip()
        cls.default_function_list = _normalize_function_list(function_list)
        return cls

    return decorator


class BaseAgent(ABC):
    """Abstract base class for agents built on top of ResearchHarness."""

    role_name: str = "agent"
    default_role_prompt: str = ""
    default_function_list: Optional[list[str]] = None

    @classmethod
    def resolve_function_list(cls, function_list: Optional[Sequence[str]]) -> Optional[list[str]]:
        if function_list is not None:
            return _normalize_function_list(function_list) or []
        default_tools = getattr(cls, "default_function_list", None)
        if default_tools is None:
            return None
        return list(default_tools)

    @classmethod
    def resolve_role_prompt(cls, role_prompt: Optional[str]) -> str:
        if role_prompt is None:
            role_prompt = getattr(cls, "default_role_prompt", "")
        return str(role_prompt or "").strip()

    def should_accept_plaintext_result(
        self,
        *,
        result_text: str,
        workspace_root: Optional[str],
        messages: Sequence[dict[str, Any]],
    ) -> bool:
        """
        Decide whether a plain assistant text reply with no tool calls is terminal.

        The default behavior preserves the original ResearchHarness semantics:
        any meaningful assistant text without tool calls is accepted as the final
        result. Upper layers may override this hook to require extra completion
        artifacts before termination.
        """

        return True

    def rejected_plaintext_result_message(
        self,
        *,
        result_text: str,
        workspace_root: Optional[str],
        messages: Sequence[dict[str, Any]],
    ) -> str:
        """
        Explain why a plain assistant text reply was not accepted as terminal.

        Returning an empty string falls back to the generic runtime message.
        """

        return ""

    def should_accept_terminal_error(
        self,
        *,
        error_text: str,
        workspace_root: Optional[str],
        messages: Sequence[dict[str, Any]],
    ) -> bool:
        """
        Decide whether a terminal LLM/runtime error can still be accepted.

        The default behavior is conservative: terminal errors are not accepted.
        Upper layers may override this hook when benchmark-specific completion
        artifacts are already present and the remaining assistant text is not
        semantically important.
        """

        return False

    def accepted_terminal_error_result_text(
        self,
        *,
        error_text: str,
        workspace_root: Optional[str],
        messages: Sequence[dict[str, Any]],
    ) -> str:
        """
        Provide a synthetic terminal result when a terminal error is accepted.

        Returning an empty string falls back to a generic runtime completion
        message.
        """

        return ""

    @abstractmethod
    def run(self, prompt: str, workspace_root: Optional[str] = None):
        raise NotImplementedError