From 7fd053fc12404ab04f3134281a69bfd43d85810a Mon Sep 17 00:00:00 2001 From: HSY-999 Date: Sat, 20 Dec 2025 18:30:45 +0100 Subject: [PATCH 1/3] BUG: Fixed merge page bug issue with _markup_annotations objects --- pypdf/_page.py | 12 ++++++++++++ pypdf/generic/_data_structures.py | 4 +++- tests/generic/test_files.py | 25 +++++++++++++++++++++++++ 3 files changed, 40 insertions(+), 1 deletion(-) diff --git a/pypdf/_page.py b/pypdf/_page.py index 6c04e8dd29..5359644d5c 100644 --- a/pypdf/_page.py +++ b/pypdf/_page.py @@ -496,6 +496,18 @@ class PageObject(DictionaryObject): original_page: "PageObject" # very local use in writer when appending + def __new__(cls, *args: tuple[Any], **kwargs: dict[Any, Any]) -> "PageObject": + """__new__ used here to make sure instance.pdf attribute + is set. related to #3467. + """ + instance = super().__new__(cls) + + instance.pdf = None + instance.inline_images = None + instance.indirect_reference = None + + return instance + def __init__( self, pdf: Optional[PdfCommonDocProtocol] = None, diff --git a/pypdf/generic/_data_structures.py b/pypdf/generic/_data_structures.py index 78d39af951..d658880bc6 100644 --- a/pypdf/generic/_data_structures.py +++ b/pypdf/generic/_data_structures.py @@ -293,7 +293,9 @@ def clone( visited: set[tuple[int, int]] = set() # (idnum, generation) d__ = cast( "DictionaryObject", - self._reference_clone(self.__class__(), pdf_dest, force_duplicate), + self._reference_clone(self.__new__(self.__class__), pdf_dest, force_duplicate), + # self.__new__(self.__class__) because we want instance of type __class__, + # where we copy values into later below ) if ignore_fields is None: ignore_fields = [] diff --git a/tests/generic/test_files.py b/tests/generic/test_files.py index 9d488e0681..8282a9def7 100644 --- a/tests/generic/test_files.py +++ b/tests/generic/test_files.py @@ -8,6 +8,7 @@ import pytest from pypdf import PdfReader, PdfWriter +from pypdf.annotations._markup_annotations import Polygon from pypdf.constants import AFRelationship from pypdf.errors import PdfReadError, PyPdfError from pypdf.generic import ( @@ -575,3 +576,27 @@ def test_embedded_file__order(): "test.txt", attachment4.pdf_object.indirect_reference, "xyz.txt", attachment3.pdf_object.indirect_reference, ] + + +def test_merge_page_with_annotation(): + # added and adapted from issue #3467 + writer = PdfWriter() + writer2 = PdfWriter() + writer.add_blank_page(100, 100) + writer2.add_blank_page(100, 100) + + annotation = Polygon( + vertices=[(50, 550), (200, 650), (70, 750), (50, 700)], + ) + + writer.add_annotation(0, annotation) + + page1 = writer.pages[0] + page2 = writer2.pages[0] + page2.merge_page(page1) + + assert page2.annotations[0].get_object()["/Type"] == annotation["/Type"] + assert page2.annotations[0].get_object()["/Subtype"] == annotation["/Subtype"] + assert page2.annotations[0].get_object()["/Vertices"] == annotation["/Vertices"] + assert page2.annotations[0].get_object()["/IT"] == annotation["/IT"] + assert page2.annotations[0].get_object()["/Rect"] == annotation["/Rect"] From 9fa15e839e3fe9ed7dbfef498294eb1d5c4cdd43 Mon Sep 17 00:00:00 2001 From: HSY-999 Date: Wed, 24 Dec 2025 22:09:03 +0100 Subject: [PATCH 2/3] rewrite __new__ function call __init__ --- pypdf/_page.py | 15 ++++++--------- pypdf/generic/_data_structures.py | 2 +- tests/generic/test_files.py | 1 - 3 files changed, 7 insertions(+), 11 deletions(-) diff --git a/pypdf/_page.py b/pypdf/_page.py index 5359644d5c..cb5d90166d 100644 --- a/pypdf/_page.py +++ b/pypdf/_page.py @@ -496,15 +496,12 @@ class PageObject(DictionaryObject): original_page: "PageObject" # very local use in writer when appending - def __new__(cls, *args: tuple[Any], **kwargs: dict[Any, Any]) -> "PageObject": - """__new__ used here to make sure instance.pdf attribute - is set. related to #3467. - """ - instance = super().__new__(cls) - - instance.pdf = None - instance.inline_images = None - instance.indirect_reference = None + def __new__(cls, *args: Any, **kwargs: Any) -> "PageObject": + # __new__ used here to make sure instance.pdf attribute + # is set. related to #3467. + instance = super().__new__(cls, *args, **kwargs) + if isinstance(instance, cls): + cls.__init__(instance, *args, **kwargs) return instance diff --git a/pypdf/generic/_data_structures.py b/pypdf/generic/_data_structures.py index d658880bc6..c74250ce4a 100644 --- a/pypdf/generic/_data_structures.py +++ b/pypdf/generic/_data_structures.py @@ -293,9 +293,9 @@ def clone( visited: set[tuple[int, int]] = set() # (idnum, generation) d__ = cast( "DictionaryObject", - self._reference_clone(self.__new__(self.__class__), pdf_dest, force_duplicate), # self.__new__(self.__class__) because we want instance of type __class__, # where we copy values into later below + self._reference_clone(self.__new__(self.__class__), pdf_dest, force_duplicate), ) if ignore_fields is None: ignore_fields = [] diff --git a/tests/generic/test_files.py b/tests/generic/test_files.py index 8282a9def7..eb997cf914 100644 --- a/tests/generic/test_files.py +++ b/tests/generic/test_files.py @@ -579,7 +579,6 @@ def test_embedded_file__order(): def test_merge_page_with_annotation(): - # added and adapted from issue #3467 writer = PdfWriter() writer2 = PdfWriter() writer.add_blank_page(100, 100) From 87393a7d6d10e862a148a860d891ea835023c437 Mon Sep 17 00:00:00 2001 From: HSY-999 Date: Thu, 25 Dec 2025 00:05:51 +0100 Subject: [PATCH 3/3] revert logic closer to original after discovering double init --- pypdf/_page.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pypdf/_page.py b/pypdf/_page.py index 4b39dcad92..a9eca1fc55 100644 --- a/pypdf/_page.py +++ b/pypdf/_page.py @@ -499,8 +499,10 @@ def __new__(cls, *args: Any, **kwargs: Any) -> "PageObject": # __new__ used here to make sure instance.pdf attribute # is set. related to #3467. instance = super().__new__(cls, *args, **kwargs) - if isinstance(instance, cls): - cls.__init__(instance, *args, **kwargs) + + instance.pdf = None + instance.inline_images = None + instance.indirect_reference = None return instance