Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions pypdf/annotations/_markup_annotations.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ def __init__(


class Text(MarkupAnnotation):
_clone_class = DictionaryObject
"""
A text annotation.

Expand Down Expand Up @@ -129,6 +130,7 @@ def __init__(


class FreeText(MarkupAnnotation):
_clone_class = DictionaryObject
"""A FreeText annotation"""

def __init__(
Expand Down Expand Up @@ -193,6 +195,8 @@ def __init__(


class Line(MarkupAnnotation):
_clone_class = DictionaryObject

def __init__(
self,
p1: Vertex,
Expand Down Expand Up @@ -233,6 +237,8 @@ def __init__(


class PolyLine(MarkupAnnotation):
_clone_class = DictionaryObject

def __init__(
self,
vertices: list[Vertex],
Expand All @@ -255,6 +261,8 @@ def __init__(


class Rectangle(MarkupAnnotation):
_clone_class = DictionaryObject

def __init__(
self,
rect: Union[RectangleObject, tuple[float, float, float, float]],
Expand All @@ -278,6 +286,8 @@ def __init__(


class Highlight(MarkupAnnotation):
_clone_class = DictionaryObject

def __init__(
self,
*,
Expand All @@ -303,6 +313,8 @@ def __init__(


class Ellipse(MarkupAnnotation):
_clone_class = DictionaryObject

def __init__(
self,
rect: Union[RectangleObject, tuple[float, float, float, float]],
Expand All @@ -327,6 +339,8 @@ def __init__(


class Polygon(MarkupAnnotation):
_clone_class = DictionaryObject

def __init__(
self,
vertices: list[tuple[float, float]],
Expand Down
4 changes: 4 additions & 0 deletions pypdf/annotations/_non_markup_annotations.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@


class Link(AnnotationDictionary):
_clone_class = DictionaryObject

def __init__(
self,
*,
Expand Down Expand Up @@ -77,6 +79,8 @@ def __init__(


class Popup(AnnotationDictionary):
_clone_class = DictionaryObject

def __init__(
self,
*,
Expand Down
23 changes: 22 additions & 1 deletion pypdf/generic/_data_structures.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,12 @@ def read_from_stream(


class DictionaryObject(dict[Any, Any], PdfObject):
_clone_class: Optional[type["DictionaryObject"]] = None
"""If set, ``clone()`` will instantiate this class instead of
``self.__class__()`` when creating the cloned object. This allows
subclasses that require constructor arguments (e.g., annotation types)
to specify a suitable fallback (typically ``DictionaryObject``)."""

def replicate(
self,
pdf_dest: PdfWriterProtocol,
Expand Down Expand Up @@ -300,9 +306,24 @@ def clone(
pass

visited: set[tuple[int, int]] = set() # (idnum, generation)
if self._clone_class is not None:
obj = self._clone_class()
else:
try:
obj = self.__class__()
except TypeError:
# Some subclasses (e.g., annotation types) require constructor
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we need to still catch this if we have an explicit _clone_class? And how do we ensure that we actually get the correct class, id est a proper annotation, at the end of the process?

# arguments. Fall back to a plain DictionaryObject.
logger_warning(
f"Could not construct {type(self).__name__}() during clone; "
"falling back to DictionaryObject. The cloned object will "
"lose its subclass type.",
source=__name__,
)
obj = DictionaryObject()
d__ = cast(
"DictionaryObject",
self._reference_clone(self.__class__(), pdf_dest, force_duplicate),
self._reference_clone(obj, pdf_dest, force_duplicate),
)
if ignore_fields is None:
ignore_fields = []
Expand Down
26 changes: 26 additions & 0 deletions tests/generic/test_data_structures.py
Original file line number Diff line number Diff line change
Expand Up @@ -306,3 +306,29 @@ def test_dictionary_object__read_from_stream__infinite_loop(caplog: pytest.LogCa
reader = PdfReader(buffer, strict=False)
with pytest.raises(expected_exception=PdfReadError, match=r"^Cannot find Root object in pdf$"):
assert len(reader.pages) == 0


def test_dictionary_object__clone_fallback_on_annotation_subclass() -> None:
"""
Regression test: ``DictionaryObject.clone()`` calls ``self.__class__()``
with no arguments. Annotation subclasses like ``Polygon`` require
constructor arguments and would raise a ``TypeError``. The fix catches
the exception and falls back to a plain ``DictionaryObject``.

This test clones a page that contains a Polygon annotation across
PdfWriters, triggering the clone path on the annotation.
"""
writer1 = PdfWriter()
page1 = writer1.add_blank_page(100, 100)

from pypdf.annotations import Polygon # noqa: PLC0415

annotation = Polygon(vertices=[(10, 10), (50, 10), (50, 50), (10, 50)])
writer1.add_annotation(page_number=0, annotation=annotation)

# Cloning to a new writer triggers ``DictionaryObject.clone``,
# which should not crash for Polygon annotations.
writer2 = PdfWriter()
cloned_page = writer2.add_page(page1)
assert cloned_page is not None
assert len(writer2.pages) == 1
32 changes: 32 additions & 0 deletions tests/test_annotations.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,38 @@ def test_polygon(pdf_file_path):
writer.write(fp)


def test_merge_page_with_markup_annotation():
"""
Regression test for #3467: merging a page that holds a markup annotation
instance (e.g. ``Polygon``, ``Line``) must not crash in
``DictionaryObject.clone`` because ``self.__class__()`` cannot be
constructed without the subclass' required arguments.
"""
src_writer = PdfWriter()
src_page = src_writer.add_blank_page(width=200, height=200)
src_writer.add_annotation(
0, Polygon(vertices=[(50, 50), (150, 50), (100, 150)])
)
src_writer.add_annotation(
0, Line(rect=(50, 550, 200, 650), p1=(50, 550), p2=(200, 650))
)

dst_writer = PdfWriter()
dst_page = dst_writer.add_blank_page(width=200, height=200)
dst_page.merge_page(src_page)

output = BytesIO()
dst_writer.write(output)
output.seek(0)

# The output PDF must be readable and the merged annotations preserved.
merged_reader = PdfReader(output)
merged_annots = merged_reader.pages[0]["/Annots"]
subtypes = {a.get_object()["/Subtype"] for a in merged_annots}
assert "/Polygon" in subtypes
assert "/Line" in subtypes


def test_polyline(pdf_file_path):
# Arrange
pdf_path = RESOURCE_ROOT / "crazyones.pdf"
Expand Down
Loading