Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
- `Prompt` class `xml_tree` property
### Changed
- `LLMModel` enum updated
- `verify` parameter added to `contains_xml` method
- Test system modified
## [0.9] - 2025-10-22
### Added
- `Prompt` class `contains_xml` method
Expand Down
19 changes: 16 additions & 3 deletions memor/message.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,9 +232,22 @@ def estimate_tokens(self, method: TokensEstimator = TokensEstimator.DEFAULT) ->
"""
return method(self.render(render_format=RenderFormat.STRING, show_warning=False))

def contains_xml(self) -> bool:
"""Check if the message contains any XML tags."""
return bool(re.search(XML_PATTERN, self.render(render_format=RenderFormat.STRING, show_warning=False)))
def contains_xml(self, verify: bool = False) -> bool:
"""
Check if the message contains any XML tags.

:param verify: verify if the XML structure is well-formed
"""
message = self.render(render_format=RenderFormat.STRING, show_warning=False)
wrapped = "<root>{message}</root>".format(message=message)
pattern_result = bool(re.search(XML_PATTERN, message))
if not verify:
return pattern_result
try:
_ = ElementTree.fromstring(wrapped)
return pattern_result
except Exception:
return False
Comment on lines +235 to +250
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe contains_xml should always detect the variable XMLs in the message. This new method is a smarter way to detect whether XML is present in the text. So my suggestion is to remove the verify parameter and assume it's always True.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is very common for the structure of XML to be wrong, especially in responses. It is important for users to recognize that the response contains XML tags, but is not well-formed, so they can retry or take other actions.


def _build_xml_tree(self) -> Dict[str, Any]:
"""Build XML tree."""
Expand Down
60 changes: 60 additions & 0 deletions tests/test_prompt.py
Original file line number Diff line number Diff line change
Expand Up @@ -898,6 +898,21 @@ def test_contains_xml1():


def test_contains_xml2():
message = "How are you?"
response1 = Response(message="I am fine.", model=LLMModel.GPT_4, temperature=0.5, role=Role.USER, score=0.8)
response2 = Response(message="Thanks!", model=LLMModel.GPT_4, temperature=0.5, role=Role.USER, score=0.8)
template = PromptTemplate(content="{instruction}, {prompt[message]}", custom_map={"instruction": "Hi"})
prompt = Prompt(
message=message,
responses=[
response1,
response2],
role=Role.USER,
template=template)
assert not prompt.contains_xml(verify=True)


def test_contains_xml3():
message = "How are you?"
response1 = Response(message="I am fine.", model=LLMModel.GPT_4, temperature=0.5, role=Role.USER, score=0.8)
response2 = Response(message="Thanks!", model=LLMModel.GPT_4, temperature=0.5, role=Role.USER, score=0.8)
Expand All @@ -912,6 +927,51 @@ def test_contains_xml2():
assert prompt.contains_xml()


def test_contains_xml4():
message = "How are you?"
response1 = Response(message="I am fine.", model=LLMModel.GPT_4, temperature=0.5, role=Role.USER, score=0.8)
response2 = Response(message="Thanks!", model=LLMModel.GPT_4, temperature=0.5, role=Role.USER, score=0.8)
template = PromptTemplate(content="<inst>{instruction}<inst>, {prompt[message]}", custom_map={"instruction": "Hi"})
prompt = Prompt(
message=message,
responses=[
response1,
response2],
role=Role.USER,
template=template)
assert prompt.contains_xml()


def test_contains_xml5():
message = "How are you?"
response1 = Response(message="I am fine.", model=LLMModel.GPT_4, temperature=0.5, role=Role.USER, score=0.8)
response2 = Response(message="Thanks!", model=LLMModel.GPT_4, temperature=0.5, role=Role.USER, score=0.8)
template = PromptTemplate(content="<inst>{instruction}<inst>, {prompt[message]}", custom_map={"instruction": "Hi"})
prompt = Prompt(
message=message,
responses=[
response1,
response2],
role=Role.USER,
template=template)
assert not prompt.contains_xml(verify=True)


def test_contains_xml6():
message = "How are you?"
response1 = Response(message="I am fine.", model=LLMModel.GPT_4, temperature=0.5, role=Role.USER, score=0.8)
response2 = Response(message="Thanks!", model=LLMModel.GPT_4, temperature=0.5, role=Role.USER, score=0.8)
template = PromptTemplate(content="<inst>{instruction}</inst>, {prompt[message]}", custom_map={"instruction": "Hi"})
prompt = Prompt(
message=message,
responses=[
response1,
response2],
role=Role.USER,
template=template)
assert prompt.contains_xml(verify=True)


def test_equality1():
message = "Hello, how are you?"
response1 = Response(message="I am fine.", model=LLMModel.GPT_4, temperature=0.5, role=Role.USER, score=0.8)
Expand Down
20 changes: 20 additions & 0 deletions tests/test_response.py
Original file line number Diff line number Diff line change
Expand Up @@ -825,10 +825,30 @@ def test_contains_xml1():


def test_contains_xml2():
response = Response(message="I am fine.")
assert not response.contains_xml(verify=True)


def test_contains_xml3():
response = Response(message="I am fine. <note>test</note>")
assert response.contains_xml()


def test_contains_xml4():
response = Response(message="I am fine. <note>test<note>")
assert response.contains_xml()


def test_contains_xml5():
response = Response(message="I am fine. <note>test<note>")
assert not response.contains_xml(verify=True)


def test_contains_xml6():
response = Response(message="I am fine. <note>test</note>")
assert response.contains_xml(verify=True)


def test_equality1():
response1 = Response(message="I am fine.", model=LLMModel.GPT_4, temperature=0.5, role=Role.USER, score=0.8)
response2 = response1.copy()
Expand Down