From e666e744e08a3057e6c2ca81a0424a8d13b074a1 Mon Sep 17 00:00:00 2001 From: "Abdulazez A." Date: Sat, 16 May 2026 21:30:31 +0000 Subject: [PATCH 1/8] Fix: merge_page crashes on pages with markup annotations (#3467) DictionaryObject.clone() called self.__class__() with no arguments, which crashes for annotation subclasses (Polygon, Line, Square, Circle) whose constructors require parameters. Fix by catching the TypeError and falling back to a plain DictionaryObject. Fixes https://github.com/py-pdf/pypdf/issues/3467 --- pypdf/generic/_data_structures.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/pypdf/generic/_data_structures.py b/pypdf/generic/_data_structures.py index 83f630956f..5171bc8204 100644 --- a/pypdf/generic/_data_structures.py +++ b/pypdf/generic/_data_structures.py @@ -300,9 +300,15 @@ def clone( pass visited: set[tuple[int, int]] = set() # (idnum, generation) + try: + obj_cls = self.__class__() + except Exception: + # Some subclasses (e.g., annotation types) require constructor + # arguments. Fall back to a plain DictionaryObject. + obj_cls = DictionaryObject() # type: ignore[assignment] d__ = cast( "DictionaryObject", - self._reference_clone(self.__class__(), pdf_dest, force_duplicate), + self._reference_clone(obj_cls, pdf_dest, force_duplicate), ) if ignore_fields is None: ignore_fields = [] From 9b08340812f09b83255cde9ae588ab6372622be8 Mon Sep 17 00:00:00 2001 From: "Abdulazez A." Date: Sun, 17 May 2026 05:35:12 +0000 Subject: [PATCH 2/8] STY: Remove unused type: ignore comment --- pypdf/generic/_data_structures.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pypdf/generic/_data_structures.py b/pypdf/generic/_data_structures.py index 5171bc8204..561f9af427 100644 --- a/pypdf/generic/_data_structures.py +++ b/pypdf/generic/_data_structures.py @@ -305,7 +305,7 @@ def clone( except Exception: # Some subclasses (e.g., annotation types) require constructor # arguments. Fall back to a plain DictionaryObject. - obj_cls = DictionaryObject() # type: ignore[assignment] + obj_cls = DictionaryObject() d__ = cast( "DictionaryObject", self._reference_clone(obj_cls, pdf_dest, force_duplicate), From d99bd99c2bcf74aa28f2f10f5ffae1d494881fde Mon Sep 17 00:00:00 2001 From: "Abdulazez A." Date: Sun, 17 May 2026 07:07:07 +0000 Subject: [PATCH 3/8] ci: trigger re-run From df272e1f1f5933713046d364e75de23278559e30 Mon Sep 17 00:00:00 2001 From: "Abdulazez A." Date: Sun, 17 May 2026 07:18:20 +0000 Subject: [PATCH 4/8] TST: Cover merge_page clone path for markup annotations Adds a regression test for issue #3467 that merges a page containing Polygon and Line annotation instances. Without the clone() fallback this raises a TypeError because annotation subclass constructors require arguments. Signed-off-by: Abdulazez A. --- tests/test_annotations.py | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/tests/test_annotations.py b/tests/test_annotations.py index 6caf5ee931..5f92498628 100644 --- a/tests/test_annotations.py +++ b/tests/test_annotations.py @@ -169,6 +169,38 @@ def test_polygon(pdf_file_path): writer.write(fp) +def test_merge_page_with_markup_annotation(): + """ + Regression test for #3467: merging a page that holds a markup annotation + instance (e.g. ``Polygon``, ``Line``) must not crash in + ``DictionaryObject.clone`` because ``self.__class__()`` cannot be + constructed without the subclass' required arguments. + """ + src_writer = PdfWriter() + src_page = src_writer.add_blank_page(width=200, height=200) + src_writer.add_annotation( + 0, Polygon(vertices=[(50, 50), (150, 50), (100, 150)]) + ) + src_writer.add_annotation( + 0, Line(rect=(50, 550, 200, 650), p1=(50, 550), p2=(200, 650)) + ) + + dst_writer = PdfWriter() + dst_page = dst_writer.add_blank_page(width=200, height=200) + dst_page.merge_page(src_page) + + output = BytesIO() + dst_writer.write(output) + output.seek(0) + + # The output PDF must be readable and the merged annotations preserved. + merged_reader = PdfReader(output) + merged_annots = merged_reader.pages[0]["/Annots"] + subtypes = {a.get_object()["/Subtype"] for a in merged_annots} + assert "/Polygon" in subtypes + assert "/Line" in subtypes + + def test_polyline(pdf_file_path): # Arrange pdf_path = RESOURCE_ROOT / "crazyones.pdf" From 622808b63af606744bb1e02632921dd20bcdfc94 Mon Sep 17 00:00:00 2001 From: "Abdulazez A." Date: Sun, 17 May 2026 07:23:52 +0000 Subject: [PATCH 5/8] TST: Add clone fallback regression test for markup annotations Test that cloning a page with a Polygon annotation across PdfWriters does not crash. The clone() method on DictionaryObject calls self.__class__() which fails for annotation subclasses with required constructor args. The fix catches the exception and falls back to a plain DictionaryObject. Signed-off-by: Abdulazez A. --- tests/generic/test_data_structures.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/tests/generic/test_data_structures.py b/tests/generic/test_data_structures.py index 09ea63ca13..384d88e3ca 100644 --- a/tests/generic/test_data_structures.py +++ b/tests/generic/test_data_structures.py @@ -306,3 +306,29 @@ def test_dictionary_object__read_from_stream__infinite_loop(caplog: pytest.LogCa reader = PdfReader(buffer, strict=False) with pytest.raises(expected_exception=PdfReadError, match=r"^Cannot find Root object in pdf$"): assert len(reader.pages) == 0 + + +def test_dictionary_object__clone_fallback_on_annotation_subclass() -> None: + """ + Regression test: ``DictionaryObject.clone()`` calls ``self.__class__()`` + with no arguments. Annotation subclasses like ``Polygon`` require + constructor arguments and would raise a ``TypeError``. The fix catches + the exception and falls back to a plain ``DictionaryObject``. + + This test clones a page that contains a Polygon annotation across + PdfWriters, triggering the clone path on the annotation. + """ + writer1 = PdfWriter() + page1 = writer1.add_blank_page(100, 100) + + from pypdf.annotations import Polygon # noqa: F811 + + annotation = Polygon(vertices=[(10, 10), (50, 10), (50, 50), (10, 50)]) + writer1.add_annotation(page_number=0, annotation=annotation) + + # Cloning to a new writer triggers ``DictionaryObject.clone``, + # which should not crash for Polygon annotations. + writer2 = PdfWriter() + cloned_page = writer2.add_page(page1) + assert cloned_page is not None + assert len(writer2.pages) == 1 From 6a8a1a5f9bb8c7482c9e16462a279574f9b192f8 Mon Sep 17 00:00:00 2001 From: "Abdulazez A." Date: Sun, 17 May 2026 07:27:53 +0000 Subject: [PATCH 6/8] STY: Suppress PLC0415 instead of unused F811 noqa Signed-off-by: Abdulazez A. --- tests/generic/test_data_structures.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/generic/test_data_structures.py b/tests/generic/test_data_structures.py index 384d88e3ca..54581ba94e 100644 --- a/tests/generic/test_data_structures.py +++ b/tests/generic/test_data_structures.py @@ -321,7 +321,7 @@ def test_dictionary_object__clone_fallback_on_annotation_subclass() -> None: writer1 = PdfWriter() page1 = writer1.add_blank_page(100, 100) - from pypdf.annotations import Polygon # noqa: F811 + from pypdf.annotations import Polygon # noqa: PLC0415 annotation = Polygon(vertices=[(10, 10), (50, 10), (50, 50), (10, 50)]) writer1.add_annotation(page_number=0, annotation=annotation) From 01c48521739530056dd20b9a90b199c0173c2ad5 Mon Sep 17 00:00:00 2001 From: "Abdulazez A." Date: Mon, 18 May 2026 17:22:48 +0000 Subject: [PATCH 7/8] =?UTF-8?q?FIX:=20Address=20review=20=E2=80=94=20speci?= =?UTF-8?q?fic=20fallback=20with=20=5Fclone=5Fclass=20attribute?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewer feedback (stefan6419846): - Broad except Exception was too aggressive and silent - Returning DictionaryObject for annotation subclasses loses type info Fix: - Add _clone_class class attribute to DictionaryObject (None = use self.__class__() as before) - Narrow except Exception to except TypeError (specific error when calling constructor with missing required args) - Add logger_warning when falling back - Set _clone_class = DictionaryObject on all 10 annotation classes that require constructor arguments This gives annotation authors explicit control over the fallback type, while keeping the generic TypeError safeguard for any future subclass that may have a similar issue. --- pypdf/annotations/_markup_annotations.py | 14 ++++++++++ pypdf/annotations/_non_markup_annotations.py | 4 +++ pypdf/generic/_data_structures.py | 29 +++++++++++++++----- 3 files changed, 40 insertions(+), 7 deletions(-) diff --git a/pypdf/annotations/_markup_annotations.py b/pypdf/annotations/_markup_annotations.py index 25eb0dfc48..fd72f66bc3 100644 --- a/pypdf/annotations/_markup_annotations.py +++ b/pypdf/annotations/_markup_annotations.py @@ -99,6 +99,7 @@ def __init__( class Text(MarkupAnnotation): + _clone_class = DictionaryObject """ A text annotation. @@ -129,6 +130,7 @@ def __init__( class FreeText(MarkupAnnotation): + _clone_class = DictionaryObject """A FreeText annotation""" def __init__( @@ -193,6 +195,8 @@ def __init__( class Line(MarkupAnnotation): + _clone_class = DictionaryObject + def __init__( self, p1: Vertex, @@ -233,6 +237,8 @@ def __init__( class PolyLine(MarkupAnnotation): + _clone_class = DictionaryObject + def __init__( self, vertices: list[Vertex], @@ -255,6 +261,8 @@ def __init__( class Rectangle(MarkupAnnotation): + _clone_class = DictionaryObject + def __init__( self, rect: Union[RectangleObject, tuple[float, float, float, float]], @@ -278,6 +286,8 @@ def __init__( class Highlight(MarkupAnnotation): + _clone_class = DictionaryObject + def __init__( self, *, @@ -303,6 +313,8 @@ def __init__( class Ellipse(MarkupAnnotation): + _clone_class = DictionaryObject + def __init__( self, rect: Union[RectangleObject, tuple[float, float, float, float]], @@ -327,6 +339,8 @@ def __init__( class Polygon(MarkupAnnotation): + _clone_class = DictionaryObject + def __init__( self, vertices: list[tuple[float, float]], diff --git a/pypdf/annotations/_non_markup_annotations.py b/pypdf/annotations/_non_markup_annotations.py index 9bc6b3e909..c1c727d534 100644 --- a/pypdf/annotations/_non_markup_annotations.py +++ b/pypdf/annotations/_non_markup_annotations.py @@ -13,6 +13,8 @@ class Link(AnnotationDictionary): + _clone_class = DictionaryObject + def __init__( self, *, @@ -77,6 +79,8 @@ def __init__( class Popup(AnnotationDictionary): + _clone_class = DictionaryObject + def __init__( self, *, diff --git a/pypdf/generic/_data_structures.py b/pypdf/generic/_data_structures.py index 561f9af427..440a1af40c 100644 --- a/pypdf/generic/_data_structures.py +++ b/pypdf/generic/_data_structures.py @@ -272,6 +272,12 @@ def read_from_stream( class DictionaryObject(dict[Any, Any], PdfObject): + _clone_class: Optional[type["DictionaryObject"]] = None + """If set, ``clone()`` will instantiate this class instead of + ``self.__class__()`` when creating the cloned object. This allows + subclasses that require constructor arguments (e.g., annotation types) + to specify a suitable fallback (typically ``DictionaryObject``).""" + def replicate( self, pdf_dest: PdfWriterProtocol, @@ -300,15 +306,24 @@ def clone( pass visited: set[tuple[int, int]] = set() # (idnum, generation) - try: - obj_cls = self.__class__() - except Exception: - # Some subclasses (e.g., annotation types) require constructor - # arguments. Fall back to a plain DictionaryObject. - obj_cls = DictionaryObject() + if self._clone_class is not None: + obj = self._clone_class() + else: + try: + obj = self.__class__() + except TypeError: + # Some subclasses (e.g., annotation types) require constructor + # arguments. Fall back to a plain DictionaryObject. + logger_warning( + f"Could not construct {type(self).__name__}() during clone; " + "falling back to DictionaryObject. The cloned object will " + "lose its subclass type.", + __name__, + ) + obj = DictionaryObject() d__ = cast( "DictionaryObject", - self._reference_clone(obj_cls, pdf_dest, force_duplicate), + self._reference_clone(obj, pdf_dest, force_duplicate), ) if ignore_fields is None: ignore_fields = [] From dfeb3f3c52f705f766072db299e2543a91105cb5 Mon Sep 17 00:00:00 2001 From: "Abdulazez A." Date: Mon, 18 May 2026 17:39:30 +0000 Subject: [PATCH 8/8] FIX: logger_warning uses keyword argument source= logger_warning expects source= as a keyword-only parameter, not a positional arg. --- pypdf/generic/_data_structures.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pypdf/generic/_data_structures.py b/pypdf/generic/_data_structures.py index 440a1af40c..4bed5efdd4 100644 --- a/pypdf/generic/_data_structures.py +++ b/pypdf/generic/_data_structures.py @@ -318,7 +318,7 @@ def clone( f"Could not construct {type(self).__name__}() during clone; " "falling back to DictionaryObject. The cloned object will " "lose its subclass type.", - __name__, + source=__name__, ) obj = DictionaryObject() d__ = cast(