diff --git a/pypdf/annotations/_markup_annotations.py b/pypdf/annotations/_markup_annotations.py index 25eb0dfc48..fd72f66bc3 100644 --- a/pypdf/annotations/_markup_annotations.py +++ b/pypdf/annotations/_markup_annotations.py @@ -99,6 +99,7 @@ def __init__( class Text(MarkupAnnotation): + _clone_class = DictionaryObject """ A text annotation. @@ -129,6 +130,7 @@ def __init__( class FreeText(MarkupAnnotation): + _clone_class = DictionaryObject """A FreeText annotation""" def __init__( @@ -193,6 +195,8 @@ def __init__( class Line(MarkupAnnotation): + _clone_class = DictionaryObject + def __init__( self, p1: Vertex, @@ -233,6 +237,8 @@ def __init__( class PolyLine(MarkupAnnotation): + _clone_class = DictionaryObject + def __init__( self, vertices: list[Vertex], @@ -255,6 +261,8 @@ def __init__( class Rectangle(MarkupAnnotation): + _clone_class = DictionaryObject + def __init__( self, rect: Union[RectangleObject, tuple[float, float, float, float]], @@ -278,6 +286,8 @@ def __init__( class Highlight(MarkupAnnotation): + _clone_class = DictionaryObject + def __init__( self, *, @@ -303,6 +313,8 @@ def __init__( class Ellipse(MarkupAnnotation): + _clone_class = DictionaryObject + def __init__( self, rect: Union[RectangleObject, tuple[float, float, float, float]], @@ -327,6 +339,8 @@ def __init__( class Polygon(MarkupAnnotation): + _clone_class = DictionaryObject + def __init__( self, vertices: list[tuple[float, float]], diff --git a/pypdf/annotations/_non_markup_annotations.py b/pypdf/annotations/_non_markup_annotations.py index 9bc6b3e909..c1c727d534 100644 --- a/pypdf/annotations/_non_markup_annotations.py +++ b/pypdf/annotations/_non_markup_annotations.py @@ -13,6 +13,8 @@ class Link(AnnotationDictionary): + _clone_class = DictionaryObject + def __init__( self, *, @@ -77,6 +79,8 @@ def __init__( class Popup(AnnotationDictionary): + _clone_class = DictionaryObject + def __init__( self, *, diff --git a/pypdf/generic/_data_structures.py b/pypdf/generic/_data_structures.py index 83f630956f..4bed5efdd4 100644 --- a/pypdf/generic/_data_structures.py +++ b/pypdf/generic/_data_structures.py @@ -272,6 +272,12 @@ def read_from_stream( class DictionaryObject(dict[Any, Any], PdfObject): + _clone_class: Optional[type["DictionaryObject"]] = None + """If set, ``clone()`` will instantiate this class instead of + ``self.__class__()`` when creating the cloned object. This allows + subclasses that require constructor arguments (e.g., annotation types) + to specify a suitable fallback (typically ``DictionaryObject``).""" + def replicate( self, pdf_dest: PdfWriterProtocol, @@ -300,9 +306,24 @@ def clone( pass visited: set[tuple[int, int]] = set() # (idnum, generation) + if self._clone_class is not None: + obj = self._clone_class() + else: + try: + obj = self.__class__() + except TypeError: + # Some subclasses (e.g., annotation types) require constructor + # arguments. Fall back to a plain DictionaryObject. + logger_warning( + f"Could not construct {type(self).__name__}() during clone; " + "falling back to DictionaryObject. The cloned object will " + "lose its subclass type.", + source=__name__, + ) + obj = DictionaryObject() d__ = cast( "DictionaryObject", - self._reference_clone(self.__class__(), pdf_dest, force_duplicate), + self._reference_clone(obj, pdf_dest, force_duplicate), ) if ignore_fields is None: ignore_fields = [] diff --git a/tests/generic/test_data_structures.py b/tests/generic/test_data_structures.py index 09ea63ca13..54581ba94e 100644 --- a/tests/generic/test_data_structures.py +++ b/tests/generic/test_data_structures.py @@ -306,3 +306,29 @@ def test_dictionary_object__read_from_stream__infinite_loop(caplog: pytest.LogCa reader = PdfReader(buffer, strict=False) with pytest.raises(expected_exception=PdfReadError, match=r"^Cannot find Root object in pdf$"): assert len(reader.pages) == 0 + + +def test_dictionary_object__clone_fallback_on_annotation_subclass() -> None: + """ + Regression test: ``DictionaryObject.clone()`` calls ``self.__class__()`` + with no arguments. Annotation subclasses like ``Polygon`` require + constructor arguments and would raise a ``TypeError``. The fix catches + the exception and falls back to a plain ``DictionaryObject``. + + This test clones a page that contains a Polygon annotation across + PdfWriters, triggering the clone path on the annotation. + """ + writer1 = PdfWriter() + page1 = writer1.add_blank_page(100, 100) + + from pypdf.annotations import Polygon # noqa: PLC0415 + + annotation = Polygon(vertices=[(10, 10), (50, 10), (50, 50), (10, 50)]) + writer1.add_annotation(page_number=0, annotation=annotation) + + # Cloning to a new writer triggers ``DictionaryObject.clone``, + # which should not crash for Polygon annotations. + writer2 = PdfWriter() + cloned_page = writer2.add_page(page1) + assert cloned_page is not None + assert len(writer2.pages) == 1 diff --git a/tests/test_annotations.py b/tests/test_annotations.py index 6caf5ee931..5f92498628 100644 --- a/tests/test_annotations.py +++ b/tests/test_annotations.py @@ -169,6 +169,38 @@ def test_polygon(pdf_file_path): writer.write(fp) +def test_merge_page_with_markup_annotation(): + """ + Regression test for #3467: merging a page that holds a markup annotation + instance (e.g. ``Polygon``, ``Line``) must not crash in + ``DictionaryObject.clone`` because ``self.__class__()`` cannot be + constructed without the subclass' required arguments. + """ + src_writer = PdfWriter() + src_page = src_writer.add_blank_page(width=200, height=200) + src_writer.add_annotation( + 0, Polygon(vertices=[(50, 50), (150, 50), (100, 150)]) + ) + src_writer.add_annotation( + 0, Line(rect=(50, 550, 200, 650), p1=(50, 550), p2=(200, 650)) + ) + + dst_writer = PdfWriter() + dst_page = dst_writer.add_blank_page(width=200, height=200) + dst_page.merge_page(src_page) + + output = BytesIO() + dst_writer.write(output) + output.seek(0) + + # The output PDF must be readable and the merged annotations preserved. + merged_reader = PdfReader(output) + merged_annots = merged_reader.pages[0]["/Annots"] + subtypes = {a.get_object()["/Subtype"] for a in merged_annots} + assert "/Polygon" in subtypes + assert "/Line" in subtypes + + def test_polyline(pdf_file_path): # Arrange pdf_path = RESOURCE_ROOT / "crazyones.pdf"