py-pdf · Abzaek · May 16, 2026 · May 17, 2026 · May 17, 2026 · May 17, 2026
diff --git a/pypdf/annotations/_markup_annotations.py b/pypdf/annotations/_markup_annotations.py
@@ -99,6 +99,7 @@ def __init__(
 
 
 class Text(MarkupAnnotation):
+    _clone_class = DictionaryObject
     """
     A text annotation.
 
@@ -129,6 +130,7 @@ def __init__(
 
 
 class FreeText(MarkupAnnotation):
+    _clone_class = DictionaryObject
     """A FreeText annotation"""
 
     def __init__(
@@ -193,6 +195,8 @@ def __init__(
 
 
 class Line(MarkupAnnotation):
+    _clone_class = DictionaryObject
+
     def __init__(
         self,
         p1: Vertex,
@@ -233,6 +237,8 @@ def __init__(
 
 
 class PolyLine(MarkupAnnotation):
+    _clone_class = DictionaryObject
+
     def __init__(
         self,
         vertices: list[Vertex],
@@ -255,6 +261,8 @@ def __init__(
 
 
 class Rectangle(MarkupAnnotation):
+    _clone_class = DictionaryObject
+
     def __init__(
         self,
         rect: Union[RectangleObject, tuple[float, float, float, float]],
@@ -278,6 +286,8 @@ def __init__(
 
 
 class Highlight(MarkupAnnotation):
+    _clone_class = DictionaryObject
+
     def __init__(
         self,
         *,
@@ -303,6 +313,8 @@ def __init__(
 
 
 class Ellipse(MarkupAnnotation):
+    _clone_class = DictionaryObject
+
     def __init__(
         self,
         rect: Union[RectangleObject, tuple[float, float, float, float]],
@@ -327,6 +339,8 @@ def __init__(
 
 
 class Polygon(MarkupAnnotation):
+    _clone_class = DictionaryObject
+
     def __init__(
         self,
         vertices: list[tuple[float, float]],

diff --git a/pypdf/annotations/_non_markup_annotations.py b/pypdf/annotations/_non_markup_annotations.py
@@ -13,6 +13,8 @@
 
 
 class Link(AnnotationDictionary):
+    _clone_class = DictionaryObject
+
     def __init__(
         self,
         *,
@@ -77,6 +79,8 @@ def __init__(
 
 
 class Popup(AnnotationDictionary):
+    _clone_class = DictionaryObject
+
     def __init__(
         self,
         *,

diff --git a/pypdf/generic/_data_structures.py b/pypdf/generic/_data_structures.py
@@ -272,6 +272,12 @@ def read_from_stream(
 
 
 class DictionaryObject(dict[Any, Any], PdfObject):
+    _clone_class: Optional[type["DictionaryObject"]] = None
+    """If set, ``clone()`` will instantiate this class instead of
+    ``self.__class__()`` when creating the cloned object. This allows
+    subclasses that require constructor arguments (e.g., annotation types)
+    to specify a suitable fallback (typically ``DictionaryObject``)."""
+
     def replicate(
         self,
         pdf_dest: PdfWriterProtocol,
@@ -300,9 +306,24 @@ def clone(
             pass
 
         visited: set[tuple[int, int]] = set()  # (idnum, generation)
+        if self._clone_class is not None:
+            obj = self._clone_class()
+        else:
+            try:
+                obj = self.__class__()
+            except TypeError:
+                # Some subclasses (e.g., annotation types) require constructor
+                # arguments. Fall back to a plain DictionaryObject.
+                logger_warning(
+                    f"Could not construct {type(self).__name__}() during clone; "
+                    "falling back to DictionaryObject. The cloned object will "
+                    "lose its subclass type.",
+                    source=__name__,
+                )
+                obj = DictionaryObject()
         d__ = cast(
             "DictionaryObject",
-            self._reference_clone(self.__class__(), pdf_dest, force_duplicate),
+            self._reference_clone(obj, pdf_dest, force_duplicate),
         )
         if ignore_fields is None:
             ignore_fields = []

diff --git a/tests/generic/test_data_structures.py b/tests/generic/test_data_structures.py
@@ -306,3 +306,29 @@ def test_dictionary_object__read_from_stream__infinite_loop(caplog: pytest.LogCa
     reader = PdfReader(buffer, strict=False)
     with pytest.raises(expected_exception=PdfReadError, match=r"^Cannot find Root object in pdf$"):
         assert len(reader.pages) == 0
+
+
+def test_dictionary_object__clone_fallback_on_annotation_subclass() -> None:
+    """
+    Regression test: ``DictionaryObject.clone()`` calls ``self.__class__()``
+    with no arguments. Annotation subclasses like ``Polygon`` require
+    constructor arguments and would raise a ``TypeError``. The fix catches
+    the exception and falls back to a plain ``DictionaryObject``.
+
+    This test clones a page that contains a Polygon annotation across
+    PdfWriters, triggering the clone path on the annotation.
+    """
+    writer1 = PdfWriter()
+    page1 = writer1.add_blank_page(100, 100)
+
+    from pypdf.annotations import Polygon  # noqa: PLC0415
+
+    annotation = Polygon(vertices=[(10, 10), (50, 10), (50, 50), (10, 50)])
+    writer1.add_annotation(page_number=0, annotation=annotation)
+
+    # Cloning to a new writer triggers ``DictionaryObject.clone``,
+    # which should not crash for Polygon annotations.
+    writer2 = PdfWriter()
+    cloned_page = writer2.add_page(page1)
+    assert cloned_page is not None
+    assert len(writer2.pages) == 1
diff --git a/tests/test_annotations.py b/tests/test_annotations.py
@@ -169,6 +169,38 @@ def test_polygon(pdf_file_path):
         writer.write(fp)
 
 
+def test_merge_page_with_markup_annotation():
+    """
+    Regression test for #3467: merging a page that holds a markup annotation
+    instance (e.g. ``Polygon``, ``Line``) must not crash in
+    ``DictionaryObject.clone`` because ``self.__class__()`` cannot be
+    constructed without the subclass' required arguments.
+    """
+    src_writer = PdfWriter()
+    src_page = src_writer.add_blank_page(width=200, height=200)
+    src_writer.add_annotation(
+        0, Polygon(vertices=[(50, 50), (150, 50), (100, 150)])
+    )
+    src_writer.add_annotation(
+        0, Line(rect=(50, 550, 200, 650), p1=(50, 550), p2=(200, 650))
+    )
+
+    dst_writer = PdfWriter()
+    dst_page = dst_writer.add_blank_page(width=200, height=200)
+    dst_page.merge_page(src_page)
+
+    output = BytesIO()
+    dst_writer.write(output)
+    output.seek(0)
+
+    # The output PDF must be readable and the merged annotations preserved.
+    merged_reader = PdfReader(output)
+    merged_annots = merged_reader.pages[0]["/Annots"]
+    subtypes = {a.get_object()["/Subtype"] for a in merged_annots}
+    assert "/Polygon" in subtypes
+    assert "/Line" in subtypes
+
+
 def test_polyline(pdf_file_path):
     # Arrange
     pdf_path = RESOURCE_ROOT / "crazyones.pdf"