Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/examples/japanese_extraction.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ for entity in result.extractions:
if entity.char_interval:
start, end = entity.char_interval.start_pos, entity.char_interval.end_pos
position_info = f" (pos: {start}-{end})"

print(f"• {entity.extraction_class}: {entity.extraction_text}{position_info}")

# Expected Output:
Expand Down
27 changes: 25 additions & 2 deletions langextract/core/format_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,8 +288,31 @@ def _extract_content(self, text: str) -> str:
FormatParseError: When fences required but not found or multiple
blocks found.
"""
strip_text = text.strip()
if not self.use_fences:
return text.strip()
matches = list(_FENCE_RE.finditer(text))

if matches:
valid_tags = {
data.FormatType.YAML: {_YAML_FORMAT, _YML_FORMAT},
data.FormatType.JSON: {_JSON_FORMAT},
}
candidates = [
m
for m in matches
if self._is_valid_language_tag(m.group("lang"), valid_tags)
]

if len(candidates) == 1:
return candidates[0].group("body").strip()
if len(candidates) > 1:
raise exceptions.FormatParseError(
"Multiple fenced blocks found. Expected exactly one."
)
if not self.strict_fences and len(matches) == 1:
return matches[0].group("body").strip()

return strip_text

matches = list(_FENCE_RE.finditer(text))

Expand Down Expand Up @@ -330,7 +353,7 @@ def _extract_content(self, text: str) -> str:
f"No {self.format_type.value} code block found."
)

return text.strip()
return strip_text

# ---- Backward compatibility methods (to be removed in v2.0.0) ----

Expand Down
18 changes: 18 additions & 0 deletions tests/format_handler_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,24 @@ def test_think_tags_stripped_before_parsing(self):
self.assertLen(parsed, 1)
self.assertEqual(parsed[0]["person"], "Alice")

def test_fenced_json_accepted_when_fences_disabled(self):
# Some OpenAI-compatible backends may return fenced JSON even when raw
# JSON mode is expected.
handler = format_handler.FormatHandler(
format_type=data.FormatType.JSON,
use_wrapper=True,
wrapper_key="extractions",
use_fences=False,
)
fenced_json = textwrap.dedent("""
```json
{"extractions": [{"person": "Alice"}]}
```
""").strip()
parsed = handler.parse_output(fenced_json)
self.assertLen(parsed, 1)
self.assertEqual(parsed[0]["person"], "Alice")

def test_top_level_list_accepted_as_fallback(self):
# Some models return [...] instead of {"extractions": [...]}
handler = format_handler.FormatHandler(
Expand Down
Loading