From ff2509a2e15e50854ff88e22fc4f28d7fe8a29a1 Mon Sep 17 00:00:00 2001 From: dev-KingMaster <136489418+dev-KingMaster@users.noreply.github.com> Date: Wed, 5 Nov 2025 07:45:34 -0500 Subject: [PATCH 01/12] Ruff cleanup: enforce PLW2901 (no loop/context var overwrite) --- pypdf/_cmap.py | 10 +++--- pypdf/_doc_common.py | 28 +++++++-------- pypdf/_page.py | 20 +++++------ .../_layout_mode/_fixed_width_page.py | 6 ++-- pypdf/_text_extraction/_layout_mode/_font.py | 15 ++++---- pypdf/_writer.py | 36 ++++++++++--------- pypdf/filters.py | 11 +++--- pypdf/generic/_files.py | 6 ++-- pyproject.toml | 1 - tests/scripts/test_make_release.py | 8 ++--- tests/test_filters.py | 4 +-- tests/test_images.py | 4 +-- tests/test_writer.py | 6 ++-- 13 files changed, 80 insertions(+), 75 deletions(-) diff --git a/pypdf/_cmap.py b/pypdf/_cmap.py index 2b89a34fe2..f6797f1698 100644 --- a/pypdf/_cmap.py +++ b/pypdf/_cmap.py @@ -460,11 +460,11 @@ def build_font_width_map( # will consider width of char as avg(width) m = 0 cpt = 0 - for xx in w: - xx = xx.get_object() - if xx > 0: - m += xx - cpt += 1 + for xx in w: + xx_val = xx.get_object() + if xx_val > 0: + m += xx_val + cpt += 1 font_width_map["default"] = m / max(1, cpt) st = cast(int, ft["/FirstChar"]) en = cast(int, ft["/LastChar"]) diff --git a/pypdf/_doc_common.py b/pypdf/_doc_common.py index 9d92ebb0ea..958df103ea 100644 --- a/pypdf/_doc_common.py +++ b/pypdf/_doc_common.py @@ -391,8 +391,8 @@ def recursive_call( return top, -1 return None, mi + ma for idx, kid in enumerate(cast(ArrayObject, node["/Kids"])): - kid = cast(DictionaryObject, kid.get_object()) - n, i = recursive_call(kid, mi) + kid_obj = cast(DictionaryObject, kid.get_object()) + n, i = recursive_call(kid_obj, mi) if n is not None: # page has just been found ... if i < 0: # ... just below! return node, idx @@ -612,8 +612,8 @@ def _build_field( states: list[str] = [] retval[key][NameObject("/_States_")] = ArrayObject(states) for k in obj.get(FA.Kids, {}): - k = k.get_object() - for s in list(k["/AP"]["/N"].keys()): + k_obj = k.get_object() + for s in list(k_obj["/AP"]["/N"].keys()): if s not in states: states.append(s) retval[key][NameObject("/_States_")] = ArrayObject(states) @@ -641,8 +641,8 @@ def _check_kids( if PagesAttributes.KIDS in tree: # recurse down the tree for kid in tree[PagesAttributes.KIDS]: # type: ignore - kid = kid.get_object() - self.get_fields(kid, retval, fileobj, stack) + kid_obj = kid.get_object() + self.get_fields(kid_obj, retval, fileobj, stack) def _write_field(self, fileobj: Any, field: Any, field_attributes: Any) -> None: field_attributes_tuple = FA.attributes() @@ -771,16 +771,16 @@ def _get_inherited(obj: DictionaryObject, key: str) -> Any: else: kids = field.get("/Kids", ()) for k in kids: - k = k.get_object() - if (k.get("/Subtype", "") == "/Widget") and ("/T" not in k): + k_obj = k.get_object() + if (k_obj.get("/Subtype", "") == "/Widget") and ("/T" not in k_obj): # Kid that is just a widget, not a field: - if "/P" in k: - ret += [k["/P"].get_object()] + if "/P" in k_obj: + ret += [k_obj["/P"].get_object()] else: ret += [ p for p in self.pages - if k.indirect_reference in p.get("/Annots", "") + if k_obj.indirect_reference in p.get("/Annots", "") ] return [ x @@ -1317,9 +1317,9 @@ def xfa(self) -> Optional[dict[str, Any]]: i = iter(fields) for f in i: tag = f - f = next(i) - if isinstance(f, IndirectObject): - field = cast(Optional[EncodedStreamObject], f.get_object()) + f_val = next(i) + if isinstance(f_val, IndirectObject): + field = cast(Optional[EncodedStreamObject], f_val.get_object()) if field: es = zlib.decompress(field._data) retval[tag] = es diff --git a/pypdf/_page.py b/pypdf/_page.py index c040180942..80a17a5e7f 100644 --- a/pypdf/_page.py +++ b/pypdf/_page.py @@ -740,14 +740,14 @@ def _get_inline_images(self) -> dict[str, ImageFile]: if k in {"/Length", "/L"}: # no length is expected continue if isinstance(v, list): - v = ArrayObject( + translated_v = ArrayObject( [self._translate_value_inline_image(k, x) for x in v] ) else: - v = self._translate_value_inline_image(k, v) - k = NameObject(_INLINE_IMAGE_KEY_MAPPING[k]) - if k not in init: - init[k] = v + translated_v = self._translate_value_inline_image(k, v) + mapped_k = NameObject(_INLINE_IMAGE_KEY_MAPPING[k]) + if mapped_k not in init: + init[mapped_k] = translated_v ii["object"] = EncodedStreamObject.initialize_from_dictionary(init) extension, byte_stream, img = _xobj_to_image(ii["object"]) files[f"~{num}~"] = ImageFile( @@ -1231,13 +1231,13 @@ def _merge_page_writer( else: trsf = Transformation(ctm) for a in cast(ArrayObject, page2[PG.ANNOTS]): - a = a.get_object() - aa = a.clone( + a_obj = a.get_object() + aa = a_obj.clone( pdf, ignore_fields=("/P", "/StructParent", "/Parent"), force_duplicate=True, ) - r = cast(ArrayObject, a["/Rect"]) + r = cast(ArrayObject, a_obj["/Rect"]) pt1 = trsf.apply_on((r[0], r[1]), True) pt2 = trsf.apply_on((r[2], r[3]), True) aa[NameObject("/Rect")] = ArrayObject( @@ -1248,8 +1248,8 @@ def _merge_page_writer( max(pt1[1], pt2[1]), ) ) - if "/QuadPoints" in a: - q = cast(ArrayObject, a["/QuadPoints"]) + if "/QuadPoints" in a_obj: + q = cast(ArrayObject, a_obj["/QuadPoints"]) aa[NameObject("/QuadPoints")] = ArrayObject( trsf.apply_on((q[0], q[1]), True) + trsf.apply_on((q[2], q[3]), True) diff --git a/pypdf/_text_extraction/_layout_mode/_fixed_width_page.py b/pypdf/_text_extraction/_layout_mode/_fixed_width_page.py index 43d6f15654..38b49dfe98 100644 --- a/pypdf/_text_extraction/_layout_mode/_fixed_width_page.py +++ b/pypdf/_text_extraction/_layout_mode/_fixed_width_page.py @@ -191,8 +191,10 @@ def recurs_to_target_op( elif op == b"TD": text_state_mgr.set_state_param(b"TL", -operands[1]) elif op == b"T*": - operands = [0, -text_state_mgr.TL] - text_state_mgr.add_tm(operands) + operands_for_tm = [0, -text_state_mgr.TL] + else: + operands_for_tm = operands + text_state_mgr.add_tm(operands_for_tm) elif op == b"Tf": text_state_mgr.set_font(fonts[operands[0]], operands[1]) else: # handle Tc, Tw, Tz, TL, and Ts operators diff --git a/pypdf/_text_extraction/_layout_mode/_font.py b/pypdf/_text_extraction/_layout_mode/_font.py index 3b6cd86ac7..44e07da7dd 100644 --- a/pypdf/_text_extraction/_layout_mode/_font.py +++ b/pypdf/_text_extraction/_layout_mode/_font.py @@ -64,9 +64,10 @@ def __post_init__(self) -> None: for d_font_idx, d_font in enumerate( self.font_dictionary["/DescendantFonts"] ): - while isinstance(d_font, IndirectObject): - d_font = d_font.get_object() - self.font_dictionary["/DescendantFonts"][d_font_idx] = d_font + d_font_val = d_font + while isinstance(d_font_val, IndirectObject): + d_font_val = d_font_val.get_object() + self.font_dictionary["/DescendantFonts"][d_font_idx] = d_font_val ord_map = { ord(_target): _surrogate for _target, _surrogate in self.char_map.items() @@ -80,18 +81,18 @@ def __post_init__(self) -> None: skip_count = 0 _w = d_font.get("/W", []) for idx, w_entry in enumerate(_w): - w_entry = w_entry.get_object() + w_val = w_entry.get_object() if skip_count: skip_count -= 1 continue - if not isinstance(w_entry, (int, float)): # pragma: no cover + if not isinstance(w_val, (int, float)): # pragma: no cover # We should never get here due to skip_count above. Add a # warning and or use reader's "strict" to force an ex??? continue # check for format (1): `int [int int int int ...]` w_next_entry = _w[idx + 1].get_object() if isinstance(w_next_entry, Sequence): - start_idx, width_list = w_entry, w_next_entry + start_idx, width_list = w_val, w_next_entry self.width_map.update( { ord_map[_cidx]: _width @@ -112,7 +113,7 @@ def __post_init__(self) -> None: _w[idx + 2].get_object(), (int, float) ): start_idx, stop_idx, const_width = ( - w_entry, + w_val, w_next_entry, _w[idx + 2].get_object(), ) diff --git a/pypdf/_writer.py b/pypdf/_writer.py index b4dd9db145..7e2f44710e 100644 --- a/pypdf/_writer.py +++ b/pypdf/_writer.py @@ -979,13 +979,13 @@ def update_page_form_field_values( logger_warning("No fields to update on this page", __name__) return for annotation in page[PG.ANNOTS]: # type: ignore - annotation = cast(DictionaryObject, annotation.get_object()) - if annotation.get("/Subtype", "") != "/Widget": + annotation_obj = cast(DictionaryObject, annotation.get_object()) + if annotation_obj.get("/Subtype", "") != "/Widget": continue - if "/FT" in annotation and "/T" in annotation: - parent_annotation = annotation + if "/FT" in annotation_obj and "/T" in annotation_obj: + parent_annotation = annotation_obj else: - parent_annotation = annotation.get( + parent_annotation = annotation_obj.get( PG.PARENT, DictionaryObject() ).get_object() @@ -1098,17 +1098,17 @@ def reattach_fields( annotations = cast(ArrayObject, page["/Annots"]) for idx, annotation in enumerate(annotations): is_indirect = isinstance(annotation, IndirectObject) - annotation = cast(DictionaryObject, annotation.get_object()) - if annotation.get("/Subtype", "") == "/Widget" and "/FT" in annotation: + annotation_obj = cast(DictionaryObject, annotation.get_object()) + if annotation_obj.get("/Subtype", "") == "/Widget" and "/FT" in annotation_obj: if ( - "indirect_reference" in annotation.__dict__ - and annotation.indirect_reference in fields + "indirect_reference" in annotation_obj.__dict__ + and annotation_obj.indirect_reference in fields ): continue if not is_indirect: - annotations[idx] = self._add_object(annotation) - fields.append(annotation.indirect_reference) - lst.append(annotation) + annotations[idx] = self._add_object(annotation_obj) + fields.append(annotation_obj.indirect_reference) + lst.append(annotation_obj) return lst def clone_reader_document_root(self, reader: PdfReader) -> None: @@ -1469,8 +1469,10 @@ def _write_pdf_structure(self, stream: StreamType) -> tuple[list[int], list[int] object_positions.append(stream.tell()) stream.write(f"{idnum} 0 obj\n".encode()) if self._encryption and obj != self._encrypt_entry: - obj = self._encryption.encrypt_object(obj, idnum, 0) - obj.write_to_stream(stream) + obj_to_write = self._encryption.encrypt_object(obj, idnum, 0) + else: + obj_to_write = obj + obj_to_write.write_to_stream(stream) stream.write(b"\nendobj\n") else: object_positions.append(-1) @@ -1560,8 +1562,10 @@ def add_metadata(self, infos: dict[str, Any]) -> None: infos = cast(DictionaryObject, infos.get_object()) for key, value in list(infos.items()): if isinstance(value, PdfObject): - value = value.get_object() - args[NameObject(key)] = create_string_object(str(value)) + value_obj = value.get_object() + else: + value_obj = value + args[NameObject(key)] = create_string_object(str(value_obj)) if self._info is None: self._info = DictionaryObject() self._info.update(args) diff --git a/pypdf/filters.py b/pypdf/filters.py index a3f87ad401..3f7564281a 100644 --- a/pypdf/filters.py +++ b/pypdf/filters.py @@ -760,27 +760,26 @@ def decode_stream_data(stream: Any) -> bytes: if not data: return data for filter_name, params in zip(filters, decode_parms): - if isinstance(params, NullObject): - params = {} + params_obj = {} if isinstance(params, NullObject) else params if filter_name in (FT.ASCII_HEX_DECODE, FTA.AHx): data = ASCIIHexDecode.decode(data) elif filter_name in (FT.ASCII_85_DECODE, FTA.A85): data = ASCII85Decode.decode(data) elif filter_name in (FT.LZW_DECODE, FTA.LZW): - data = LZWDecode.decode(data, params) + data = LZWDecode.decode(data, params_obj) elif filter_name in (FT.FLATE_DECODE, FTA.FL): - data = FlateDecode.decode(data, params) + data = FlateDecode.decode(data, params_obj) elif filter_name in (FT.RUN_LENGTH_DECODE, FTA.RL): data = RunLengthDecode.decode(data) elif filter_name == FT.CCITT_FAX_DECODE: height = stream.get(IA.HEIGHT, ()) - data = CCITTFaxDecode.decode(data, params, height) + data = CCITTFaxDecode.decode(data, params_obj, height) elif filter_name == FT.DCT_DECODE: data = DCTDecode.decode(data) elif filter_name == FT.JPX_DECODE: data = JPXDecode.decode(data) elif filter_name == FT.JBIG2_DECODE: - data = JBIG2Decode.decode(data, params) + data = JBIG2Decode.decode(data, params_obj) elif filter_name == "/Crypt": if "/Name" in params or "/Type" in params: raise NotImplementedError( diff --git a/pypdf/generic/_files.py b/pypdf/generic/_files.py index f29fa770f6..0f807fa2ce 100644 --- a/pypdf/generic/_files.py +++ b/pypdf/generic/_files.py @@ -394,8 +394,8 @@ def _load(cls, catalog: DictionaryObject) -> Generator[EmbeddedFile]: for kid in cast(ArrayObject, container["/Kids"].get_object()): # There might be further (nested) kids here. # Wait for an example before evaluating an implementation. - kid = kid.get_object() - if "/Names" in kid: - yield from cls._load_from_names(cast(ArrayObject, kid["/Names"])) + kid_obj = kid.get_object() + if "/Names" in kid_obj: + yield from cls._load_from_names(cast(ArrayObject, kid_obj["/Names"])) if "/Names" in container: yield from cls._load_from_names(cast(ArrayObject, container["/Names"])) diff --git a/pyproject.toml b/pyproject.toml index f0ba519cf4..462d20c388 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -173,7 +173,6 @@ ignore = [ "PERF203", # `try`-`except` within a loop incurs performance overhead "PGH003", # Use specific rule codes when ignoring type issues "PLW1510", # `subprocess.run` without explicit `check` argument - "PLW2901", # `with` statement variable `img` overwritten by assignment target "PT011", # `pytest.raises(ValueError)` is too broad, set the `match` "PT012", # `pytest.raises()` block should contain a single simple statement "PT014", # Ruff bug: Duplicate of test case at index 1 in `@pytest_mark.parametrize` diff --git a/tests/scripts/test_make_release.py b/tests/scripts/test_make_release.py index 16cb2e01ee..153ec2120e 100644 --- a/tests/scripts/test_make_release.py +++ b/tests/scripts/test_make_release.py @@ -41,11 +41,11 @@ def test_strip_header(data, expected): def test_get_git_commits_since_tag(): make_release = pytest.importorskip("make_release") - with open(COMMITS__VERSION_4_0_1, mode="rb") as commits, mock.patch( - "urllib.request.urlopen", side_effect=lambda _: commits + with open(COMMITS__VERSION_4_0_1, mode="rb") as commits_fh, mock.patch( + "urllib.request.urlopen", side_effect=lambda _: commits_fh ), mock.patch("subprocess.check_output", return_value=GIT_LOG__VERSION_4_0_1): - commits = make_release.get_git_commits_since_tag("4.0.1") - assert commits == [ + commits_list = make_release.get_git_commits_since_tag("4.0.1") + assert commits_list == [ make_release.Change( commit_hash="b7bfd0d7eddfd0865a94cc9e7027df6596242cf7", prefix="BUG", diff --git a/tests/test_filters.py b/tests/test_filters.py index a79e0ea217..b6b3468deb 100644 --- a/tests/test_filters.py +++ b/tests/test_filters.py @@ -82,9 +82,9 @@ def test_flatedecode_unsupported_predictor(): predictors = (-10, -1, 0, 9, 16, 20, 100) for predictor, s in cartesian_product(predictors, filter_inputs): - s = s.encode() + s_bytes = s.encode() with pytest.raises(PdfReadError): - codec.decode(codec.encode(s), DictionaryObject({"/Predictor": predictor})) + codec.decode(codec.encode(s_bytes), DictionaryObject({"/Predictor": predictor})) @pytest.mark.parametrize( diff --git a/tests/test_images.py b/tests/test_images.py index dd4ccebefb..1731e6871c 100644 --- a/tests/test_images.py +++ b/tests/test_images.py @@ -32,9 +32,9 @@ def open_image(path: Union[Path, Image.Image, BytesIO]) -> Image.Image: else: if isinstance(path, Path): assert path.exists() - with Image.open(path) as img: + with Image.open(path) as img_opened: img = ( - img.copy() + img_opened.copy() ) # Opened image should be copied to avoid issues with file closing return img diff --git a/tests/test_writer.py b/tests/test_writer.py index c76f76a443..192c52f450 100644 --- a/tests/test_writer.py +++ b/tests/test_writer.py @@ -2422,10 +2422,10 @@ def test_no_resource_for_14_std_fonts(caplog): writer = PdfWriter(BytesIO(get_data_from_url(url, name=name))) p = writer.pages[0] for a in p["/Annots"]: - a = a.get_object() - if a["/FT"] == "/Tx": + a_obj = a.get_object() + if a_obj["/FT"] == "/Tx": writer.update_page_form_field_values( - p, {a["/T"]: "Brooks"}, auto_regenerate=False + p, {a_obj["/T"]: "Brooks"}, auto_regenerate=False ) assert "Font dictionary for /Helvetica not found." in caplog.text From 12353fd343b02bfbf9362643607308181cec7903 Mon Sep 17 00:00:00 2001 From: dev-KingMaster <136489418+dev-KingMaster@users.noreply.github.com> Date: Wed, 5 Nov 2025 08:28:43 -0500 Subject: [PATCH 02/12] addressing ci errors --- pypdf/_cmap.py | 10 +++++----- pypdf/_page.py | 6 +++--- .../_layout_mode/_fixed_width_page.py | 2 ++ pypdf/filters.py | 14 +++++++++----- 4 files changed, 19 insertions(+), 13 deletions(-) diff --git a/pypdf/_cmap.py b/pypdf/_cmap.py index f6797f1698..ef03a8854e 100644 --- a/pypdf/_cmap.py +++ b/pypdf/_cmap.py @@ -460,11 +460,11 @@ def build_font_width_map( # will consider width of char as avg(width) m = 0 cpt = 0 - for xx in w: - xx_val = xx.get_object() - if xx_val > 0: - m += xx_val - cpt += 1 + for xx in w: + xx_val = xx.get_object() + if xx_val > 0: + m += xx_val + cpt += 1 font_width_map["default"] = m / max(1, cpt) st = cast(int, ft["/FirstChar"]) en = cast(int, ft["/LastChar"]) diff --git a/pypdf/_page.py b/pypdf/_page.py index 80a17a5e7f..802c78287f 100644 --- a/pypdf/_page.py +++ b/pypdf/_page.py @@ -740,14 +740,14 @@ def _get_inline_images(self) -> dict[str, ImageFile]: if k in {"/Length", "/L"}: # no length is expected continue if isinstance(v, list): - translated_v = ArrayObject( + value_for_init: Any = ArrayObject( [self._translate_value_inline_image(k, x) for x in v] ) else: - translated_v = self._translate_value_inline_image(k, v) + value_for_init: Any = self._translate_value_inline_image(k, v) mapped_k = NameObject(_INLINE_IMAGE_KEY_MAPPING[k]) if mapped_k not in init: - init[mapped_k] = translated_v + init[mapped_k] = value_for_init ii["object"] = EncodedStreamObject.initialize_from_dictionary(init) extension, byte_stream, img = _xobj_to_image(ii["object"]) files[f"~{num}~"] = ImageFile( diff --git a/pypdf/_text_extraction/_layout_mode/_fixed_width_page.py b/pypdf/_text_extraction/_layout_mode/_fixed_width_page.py index 38b49dfe98..e0f34c1168 100644 --- a/pypdf/_text_extraction/_layout_mode/_fixed_width_page.py +++ b/pypdf/_text_extraction/_layout_mode/_fixed_width_page.py @@ -188,8 +188,10 @@ def recurs_to_target_op( text_state_mgr.reset_trm() if op == b"Tm": text_state_mgr.reset_tm() + operands_for_tm = operands elif op == b"TD": text_state_mgr.set_state_param(b"TL", -operands[1]) + operands_for_tm = operands elif op == b"T*": operands_for_tm = [0, -text_state_mgr.TL] else: diff --git a/pypdf/filters.py b/pypdf/filters.py index 3f7564281a..fb19f19a75 100644 --- a/pypdf/filters.py +++ b/pypdf/filters.py @@ -760,26 +760,30 @@ def decode_stream_data(stream: Any) -> bytes: if not data: return data for filter_name, params in zip(filters, decode_parms): - params_obj = {} if isinstance(params, NullObject) else params + params_typed: Optional[DictionaryObject] + if isinstance(params, NullObject): + params_typed = None + else: + params_typed = cast(Optional[DictionaryObject], params) if filter_name in (FT.ASCII_HEX_DECODE, FTA.AHx): data = ASCIIHexDecode.decode(data) elif filter_name in (FT.ASCII_85_DECODE, FTA.A85): data = ASCII85Decode.decode(data) elif filter_name in (FT.LZW_DECODE, FTA.LZW): - data = LZWDecode.decode(data, params_obj) + data = LZWDecode.decode(data, params_typed) elif filter_name in (FT.FLATE_DECODE, FTA.FL): - data = FlateDecode.decode(data, params_obj) + data = FlateDecode.decode(data, params_typed) elif filter_name in (FT.RUN_LENGTH_DECODE, FTA.RL): data = RunLengthDecode.decode(data) elif filter_name == FT.CCITT_FAX_DECODE: height = stream.get(IA.HEIGHT, ()) - data = CCITTFaxDecode.decode(data, params_obj, height) + data = CCITTFaxDecode.decode(data, params_typed, height) elif filter_name == FT.DCT_DECODE: data = DCTDecode.decode(data) elif filter_name == FT.JPX_DECODE: data = JPXDecode.decode(data) elif filter_name == FT.JBIG2_DECODE: - data = JBIG2Decode.decode(data, params_obj) + data = JBIG2Decode.decode(data, params_typed) elif filter_name == "/Crypt": if "/Name" in params or "/Type" in params: raise NotImplementedError( From 6da4f0680c0f6ce2535916450c835edac25a3baa Mon Sep 17 00:00:00 2001 From: dev-KingMaster <136489418+dev-KingMaster@users.noreply.github.com> Date: Wed, 5 Nov 2025 08:39:36 -0500 Subject: [PATCH 03/12] redef error and write/form tests error fixed --- pypdf/_page.py | 5 +++-- pypdf/_writer.py | 22 +++++++++++++--------- 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/pypdf/_page.py b/pypdf/_page.py index 802c78287f..2f255c42aa 100644 --- a/pypdf/_page.py +++ b/pypdf/_page.py @@ -739,12 +739,13 @@ def _get_inline_images(self) -> dict[str, ImageFile]: for k, v in ii["settings"].items(): if k in {"/Length", "/L"}: # no length is expected continue + value_for_init: Any if isinstance(v, list): - value_for_init: Any = ArrayObject( + value_for_init = ArrayObject( [self._translate_value_inline_image(k, x) for x in v] ) else: - value_for_init: Any = self._translate_value_inline_image(k, v) + value_for_init = self._translate_value_inline_image(k, v) mapped_k = NameObject(_INLINE_IMAGE_KEY_MAPPING[k]) if mapped_k not in init: init[mapped_k] = value_for_init diff --git a/pypdf/_writer.py b/pypdf/_writer.py index 7e2f44710e..3e8207e85d 100644 --- a/pypdf/_writer.py +++ b/pypdf/_writer.py @@ -978,14 +978,14 @@ def update_page_form_field_values( if PG.ANNOTS not in page: logger_warning("No fields to update on this page", __name__) return - for annotation in page[PG.ANNOTS]: # type: ignore - annotation_obj = cast(DictionaryObject, annotation.get_object()) - if annotation_obj.get("/Subtype", "") != "/Widget": + for annotation_ref in page[PG.ANNOTS]: # type: ignore + annotation = cast(DictionaryObject, annotation_ref.get_object()) + if annotation.get("/Subtype", "") != "/Widget": continue - if "/FT" in annotation_obj and "/T" in annotation_obj: - parent_annotation = annotation_obj + if "/FT" in annotation and "/T" in annotation: + parent_annotation = annotation else: - parent_annotation = annotation_obj.get( + parent_annotation = annotation.get( PG.PARENT, DictionaryObject() ).get_object() @@ -1003,7 +1003,6 @@ def update_page_form_field_values( del parent_annotation["/I"] if flags: annotation[NameObject(FA.Ff)] = NumberObject(flags) - # Set the field value if not (value is None and flatten): # Only change values if given by user and not flattening. if isinstance(value, list): lst = ArrayObject(TextStringObject(v) for v in value) @@ -1025,9 +1024,14 @@ def update_page_form_field_values( if v not in normal_ap: v = NameObject("/Off") appearance_stream_obj = normal_ap.get(v) - # Other cases will be updated through the for loop + # other cases will be updated through the for loop annotation[NameObject(AA.AS)] = v annotation[NameObject(FA.V)] = v + if flatten and appearance_stream_obj is not None: + # We basically copy the entire appearance stream, which should be an XObject that + # is already registered. No need to add font resources. + rct = cast(RectangleObject, annotation[AA.Rect]) + self._add_apstream_object(page, appearance_stream_obj, field, rct[0], rct[1]) elif ( parent_annotation.get(FA.FT) == "/Tx" or parent_annotation.get(FA.FT) == "/Ch" @@ -1046,7 +1050,7 @@ def update_page_form_field_values( annotation[NameObject(AA.AP)] = DictionaryObject( {NameObject("/N"): self._add_object(appearance_stream_obj)} ) - elif "/N" not in (ap:= cast(DictionaryObject, annotation[AA.AP])): + elif "/N" not in (ap := cast(DictionaryObject, annotation[AA.AP])): cast(DictionaryObject, annotation[NameObject(AA.AP)])[ NameObject("/N") ] = self._add_object(appearance_stream_obj) From b1bee721d348d167a3b34d2c22ec3d3128de89e3 Mon Sep 17 00:00:00 2001 From: dev-KingMaster <136489418+dev-KingMaster@users.noreply.github.com> Date: Wed, 5 Nov 2025 09:32:24 -0500 Subject: [PATCH 04/12] variable name change and other comments handling --- pypdf/_cmap.py | 10 ++-- pypdf/_doc_common.py | 28 +++++----- pypdf/_page.py | 11 ++-- .../_layout_mode/_fixed_width_page.py | 5 +- pypdf/_text_extraction/_layout_mode/_font.py | 15 +++--- pypdf/_writer.py | 53 ++++++------------- pypdf/filters.py | 12 ++--- pypdf/generic/_files.py | 6 +-- tests/scripts/test_make_release.py | 9 ++-- tests/test_images.py | 4 +- 10 files changed, 64 insertions(+), 89 deletions(-) diff --git a/pypdf/_cmap.py b/pypdf/_cmap.py index ef03a8854e..a9153b77c4 100644 --- a/pypdf/_cmap.py +++ b/pypdf/_cmap.py @@ -460,11 +460,11 @@ def build_font_width_map( # will consider width of char as avg(width) m = 0 cpt = 0 - for xx in w: - xx_val = xx.get_object() - if xx_val > 0: - m += xx_val - cpt += 1 + for xx in w: + xx_value = xx.get_object() + if xx_value > 0: + m += xx_value + cpt += 1 font_width_map["default"] = m / max(1, cpt) st = cast(int, ft["/FirstChar"]) en = cast(int, ft["/LastChar"]) diff --git a/pypdf/_doc_common.py b/pypdf/_doc_common.py index 958df103ea..995a9f667b 100644 --- a/pypdf/_doc_common.py +++ b/pypdf/_doc_common.py @@ -391,8 +391,8 @@ def recursive_call( return top, -1 return None, mi + ma for idx, kid in enumerate(cast(ArrayObject, node["/Kids"])): - kid_obj = cast(DictionaryObject, kid.get_object()) - n, i = recursive_call(kid_obj, mi) + kid_object = cast(DictionaryObject, kid.get_object()) + n, i = recursive_call(kid_object, mi) if n is not None: # page has just been found ... if i < 0: # ... just below! return node, idx @@ -612,8 +612,8 @@ def _build_field( states: list[str] = [] retval[key][NameObject("/_States_")] = ArrayObject(states) for k in obj.get(FA.Kids, {}): - k_obj = k.get_object() - for s in list(k_obj["/AP"]["/N"].keys()): + k_object = k.get_object() + for s in list(k_object["/AP"]["/N"].keys()): if s not in states: states.append(s) retval[key][NameObject("/_States_")] = ArrayObject(states) @@ -641,8 +641,8 @@ def _check_kids( if PagesAttributes.KIDS in tree: # recurse down the tree for kid in tree[PagesAttributes.KIDS]: # type: ignore - kid_obj = kid.get_object() - self.get_fields(kid_obj, retval, fileobj, stack) + kid_object = kid.get_object() + self.get_fields(kid_object, retval, fileobj, stack) def _write_field(self, fileobj: Any, field: Any, field_attributes: Any) -> None: field_attributes_tuple = FA.attributes() @@ -771,16 +771,16 @@ def _get_inherited(obj: DictionaryObject, key: str) -> Any: else: kids = field.get("/Kids", ()) for k in kids: - k_obj = k.get_object() - if (k_obj.get("/Subtype", "") == "/Widget") and ("/T" not in k_obj): + k_object = k.get_object() + if (k_object.get("/Subtype", "") == "/Widget") and ("/T" not in k_object): # Kid that is just a widget, not a field: - if "/P" in k_obj: - ret += [k_obj["/P"].get_object()] + if "/P" in k_object: + ret += [k_object["/P"].get_object()] else: ret += [ p for p in self.pages - if k_obj.indirect_reference in p.get("/Annots", "") + if k_object.indirect_reference in p.get("/Annots", "") ] return [ x @@ -1317,9 +1317,9 @@ def xfa(self) -> Optional[dict[str, Any]]: i = iter(fields) for f in i: tag = f - f_val = next(i) - if isinstance(f_val, IndirectObject): - field = cast(Optional[EncodedStreamObject], f_val.get_object()) + field_value = next(i) + if isinstance(field_value, IndirectObject): + field = cast(Optional[EncodedStreamObject], field_value.get_object()) if field: es = zlib.decompress(field._data) retval[tag] = es diff --git a/pypdf/_page.py b/pypdf/_page.py index 2f255c42aa..3279b5cc5b 100644 --- a/pypdf/_page.py +++ b/pypdf/_page.py @@ -739,7 +739,6 @@ def _get_inline_images(self) -> dict[str, ImageFile]: for k, v in ii["settings"].items(): if k in {"/Length", "/L"}: # no length is expected continue - value_for_init: Any if isinstance(v, list): value_for_init = ArrayObject( [self._translate_value_inline_image(k, x) for x in v] @@ -1232,13 +1231,13 @@ def _merge_page_writer( else: trsf = Transformation(ctm) for a in cast(ArrayObject, page2[PG.ANNOTS]): - a_obj = a.get_object() - aa = a_obj.clone( + annotation_object = a.get_object() + aa = annotation_object.clone( pdf, ignore_fields=("/P", "/StructParent", "/Parent"), force_duplicate=True, ) - r = cast(ArrayObject, a_obj["/Rect"]) + r = cast(ArrayObject, annotation_object["/Rect"]) pt1 = trsf.apply_on((r[0], r[1]), True) pt2 = trsf.apply_on((r[2], r[3]), True) aa[NameObject("/Rect")] = ArrayObject( @@ -1249,8 +1248,8 @@ def _merge_page_writer( max(pt1[1], pt2[1]), ) ) - if "/QuadPoints" in a_obj: - q = cast(ArrayObject, a_obj["/QuadPoints"]) + if "/QuadPoints" in annotation_object: + q = cast(ArrayObject, annotation_object["/QuadPoints"]) aa[NameObject("/QuadPoints")] = ArrayObject( trsf.apply_on((q[0], q[1]), True) + trsf.apply_on((q[2], q[3]), True) diff --git a/pypdf/_text_extraction/_layout_mode/_fixed_width_page.py b/pypdf/_text_extraction/_layout_mode/_fixed_width_page.py index e0f34c1168..179f029b88 100644 --- a/pypdf/_text_extraction/_layout_mode/_fixed_width_page.py +++ b/pypdf/_text_extraction/_layout_mode/_fixed_width_page.py @@ -186,16 +186,13 @@ def recurs_to_target_op( tj_ops.append(text_state_mgr.text_state_params(operands[2])) elif op in (b"Td", b"Tm", b"TD", b"T*"): text_state_mgr.reset_trm() + operands_for_tm = operands if op == b"Tm": text_state_mgr.reset_tm() - operands_for_tm = operands elif op == b"TD": text_state_mgr.set_state_param(b"TL", -operands[1]) - operands_for_tm = operands elif op == b"T*": operands_for_tm = [0, -text_state_mgr.TL] - else: - operands_for_tm = operands text_state_mgr.add_tm(operands_for_tm) elif op == b"Tf": text_state_mgr.set_font(fonts[operands[0]], operands[1]) diff --git a/pypdf/_text_extraction/_layout_mode/_font.py b/pypdf/_text_extraction/_layout_mode/_font.py index 44e07da7dd..e58526b0c2 100644 --- a/pypdf/_text_extraction/_layout_mode/_font.py +++ b/pypdf/_text_extraction/_layout_mode/_font.py @@ -64,10 +64,9 @@ def __post_init__(self) -> None: for d_font_idx, d_font in enumerate( self.font_dictionary["/DescendantFonts"] ): - d_font_val = d_font - while isinstance(d_font_val, IndirectObject): - d_font_val = d_font_val.get_object() - self.font_dictionary["/DescendantFonts"][d_font_idx] = d_font_val + d_font_object = d_font.get_object() + assert not isinstance(d_font_object, IndirectObject), d_font_object + self.font_dictionary["/DescendantFonts"][d_font_idx] = d_font_object ord_map = { ord(_target): _surrogate for _target, _surrogate in self.char_map.items() @@ -81,18 +80,18 @@ def __post_init__(self) -> None: skip_count = 0 _w = d_font.get("/W", []) for idx, w_entry in enumerate(_w): - w_val = w_entry.get_object() + w_value = w_entry.get_object() if skip_count: skip_count -= 1 continue - if not isinstance(w_val, (int, float)): # pragma: no cover + if not isinstance(w_value, (int, float)): # pragma: no cover # We should never get here due to skip_count above. Add a # warning and or use reader's "strict" to force an ex??? continue # check for format (1): `int [int int int int ...]` w_next_entry = _w[idx + 1].get_object() if isinstance(w_next_entry, Sequence): - start_idx, width_list = w_val, w_next_entry + start_idx, width_list = w_value, w_next_entry self.width_map.update( { ord_map[_cidx]: _width @@ -113,7 +112,7 @@ def __post_init__(self) -> None: _w[idx + 2].get_object(), (int, float) ): start_idx, stop_idx, const_width = ( - w_val, + w_value, w_next_entry, _w[idx + 2].get_object(), ) diff --git a/pypdf/_writer.py b/pypdf/_writer.py index 3e8207e85d..eb41469466 100644 --- a/pypdf/_writer.py +++ b/pypdf/_writer.py @@ -1036,28 +1036,13 @@ def update_page_form_field_values( parent_annotation.get(FA.FT) == "/Tx" or parent_annotation.get(FA.FT) == "/Ch" ): - # Textbox; we need to generate the appearance stream object + # Textbox; update appearance via helper if isinstance(value, tuple): - appearance_stream_obj = TextStreamAppearance.from_text_annotation( - acro_form, parent_annotation, annotation, value[1], value[2] + self._update_field_annotation( + page, parent_annotation, annotation, value[1], value[2], flatten=flatten ) else: - appearance_stream_obj = TextStreamAppearance.from_text_annotation( - acro_form, parent_annotation, annotation - ) - # Add the appearance stream object - if AA.AP not in annotation: - annotation[NameObject(AA.AP)] = DictionaryObject( - {NameObject("/N"): self._add_object(appearance_stream_obj)} - ) - elif "/N" not in (ap := cast(DictionaryObject, annotation[AA.AP])): - cast(DictionaryObject, annotation[NameObject(AA.AP)])[ - NameObject("/N") - ] = self._add_object(appearance_stream_obj) - else: # [/AP][/N] exists - n = annotation[AA.AP]["/N"].indirect_reference.idnum # type: ignore - self._objects[n - 1] = appearance_stream_obj - appearance_stream_obj.indirect_reference = IndirectObject(n, 0, self) + self._update_field_annotation(page, parent_annotation, annotation, flatten=flatten) elif ( annotation.get(FA.FT) == "/Sig" ): # deprecated # not implemented yet @@ -1100,19 +1085,19 @@ def reattach_fields( if "/Annots" not in page: return lst annotations = cast(ArrayObject, page["/Annots"]) - for idx, annotation in enumerate(annotations): - is_indirect = isinstance(annotation, IndirectObject) - annotation_obj = cast(DictionaryObject, annotation.get_object()) - if annotation_obj.get("/Subtype", "") == "/Widget" and "/FT" in annotation_obj: + for idx, annotation_ref in enumerate(annotations): + is_indirect = isinstance(annotation_ref, IndirectObject) + annotation = cast(DictionaryObject, annotation_ref.get_object()) + if annotation.get("/Subtype", "") == "/Widget" and "/FT" in annotation: if ( - "indirect_reference" in annotation_obj.__dict__ - and annotation_obj.indirect_reference in fields + "indirect_reference" in annotation.__dict__ + and annotation.indirect_reference in fields ): continue if not is_indirect: - annotations[idx] = self._add_object(annotation_obj) - fields.append(annotation_obj.indirect_reference) - lst.append(annotation_obj) + annotations[idx] = self._add_object(annotation) + fields.append(annotation.indirect_reference) + lst.append(annotation) return lst def clone_reader_document_root(self, reader: PdfReader) -> None: @@ -1472,11 +1457,10 @@ def _write_pdf_structure(self, stream: StreamType) -> tuple[list[int], list[int] if obj is not None: object_positions.append(stream.tell()) stream.write(f"{idnum} 0 obj\n".encode()) + object_to_write = obj if self._encryption and obj != self._encrypt_entry: - obj_to_write = self._encryption.encrypt_object(obj, idnum, 0) - else: - obj_to_write = obj - obj_to_write.write_to_stream(stream) + object_to_write = self._encryption.encrypt_object(obj, idnum, 0) + object_to_write.write_to_stream(stream) stream.write(b"\nendobj\n") else: object_positions.append(-1) @@ -1565,10 +1549,7 @@ def add_metadata(self, infos: dict[str, Any]) -> None: if isinstance(infos, PdfObject): infos = cast(DictionaryObject, infos.get_object()) for key, value in list(infos.items()): - if isinstance(value, PdfObject): - value_obj = value.get_object() - else: - value_obj = value + value_obj = value.get_object() if isinstance(value, PdfObject) else value args[NameObject(key)] = create_string_object(str(value_obj)) if self._info is None: self._info = DictionaryObject() diff --git a/pypdf/filters.py b/pypdf/filters.py index fb19f19a75..9b1a958683 100644 --- a/pypdf/filters.py +++ b/pypdf/filters.py @@ -759,12 +759,10 @@ def decode_stream_data(stream: Any) -> bytes: # If there is no data to decode, we should not try to decode it. if not data: return data - for filter_name, params in zip(filters, decode_parms): - params_typed: Optional[DictionaryObject] - if isinstance(params, NullObject): - params_typed = None - else: - params_typed = cast(Optional[DictionaryObject], params) + for filter_name, params_untyped in zip(filters, decode_parms): + params_typed: Optional[DictionaryObject] = None + if not isinstance(params_untyped, NullObject): + params_typed = cast(Optional[DictionaryObject], params_untyped) if filter_name in (FT.ASCII_HEX_DECODE, FTA.AHx): data = ASCIIHexDecode.decode(data) elif filter_name in (FT.ASCII_85_DECODE, FTA.A85): @@ -785,7 +783,7 @@ def decode_stream_data(stream: Any) -> bytes: elif filter_name == FT.JBIG2_DECODE: data = JBIG2Decode.decode(data, params_typed) elif filter_name == "/Crypt": - if "/Name" in params or "/Type" in params: + if "/Name" in params_untyped or "/Type" in params_untyped: raise NotImplementedError( "/Crypt filter with /Name or /Type not supported yet" ) diff --git a/pypdf/generic/_files.py b/pypdf/generic/_files.py index 0f807fa2ce..8a6e8b5469 100644 --- a/pypdf/generic/_files.py +++ b/pypdf/generic/_files.py @@ -394,8 +394,8 @@ def _load(cls, catalog: DictionaryObject) -> Generator[EmbeddedFile]: for kid in cast(ArrayObject, container["/Kids"].get_object()): # There might be further (nested) kids here. # Wait for an example before evaluating an implementation. - kid_obj = kid.get_object() - if "/Names" in kid_obj: - yield from cls._load_from_names(cast(ArrayObject, kid_obj["/Names"])) + kid_object = kid.get_object() + if "/Names" in kid_object: + yield from cls._load_from_names(cast(ArrayObject, kid_object["/Names"])) if "/Names" in container: yield from cls._load_from_names(cast(ArrayObject, container["/Names"])) diff --git a/tests/scripts/test_make_release.py b/tests/scripts/test_make_release.py index 153ec2120e..88e23978c5 100644 --- a/tests/scripts/test_make_release.py +++ b/tests/scripts/test_make_release.py @@ -44,8 +44,8 @@ def test_get_git_commits_since_tag(): with open(COMMITS__VERSION_4_0_1, mode="rb") as commits_fh, mock.patch( "urllib.request.urlopen", side_effect=lambda _: commits_fh ), mock.patch("subprocess.check_output", return_value=GIT_LOG__VERSION_4_0_1): - commits_list = make_release.get_git_commits_since_tag("4.0.1") - assert commits_list == [ + commits = make_release.get_git_commits_since_tag("4.0.1") + assert commits == [ make_release.Change( commit_hash="b7bfd0d7eddfd0865a94cc9e7027df6596242cf7", prefix="BUG", @@ -87,8 +87,8 @@ def test_get_git_commits_since_tag(): def test_get_formatted_changes(): make_release = pytest.importorskip("make_release") - with open(COMMITS__VERSION_4_0_1, mode="rb") as commits, mock.patch( - "urllib.request.urlopen", side_effect=lambda _: commits + with open(COMMITS__VERSION_4_0_1, mode="rb") as commits_fh, mock.patch( + "urllib.request.urlopen", side_effect=lambda _: commits_fh ), mock.patch("subprocess.check_output", return_value=GIT_LOG__VERSION_4_0_1): output, output_with_user = make_release.get_formatted_changes("4.0.1") @@ -182,3 +182,4 @@ def test_get_formatted_changes__other(): - FIX: Broken test due to expired test file URL (#2468) by @pubpub-zz """ ) + diff --git a/tests/test_images.py b/tests/test_images.py index 1731e6871c..f8a48c910b 100644 --- a/tests/test_images.py +++ b/tests/test_images.py @@ -32,9 +32,9 @@ def open_image(path: Union[Path, Image.Image, BytesIO]) -> Image.Image: else: if isinstance(path, Path): assert path.exists() - with Image.open(path) as img_opened: + with Image.open(path) as opened_img: img = ( - img_opened.copy() + opened_img.copy() ) # Opened image should be copied to avoid issues with file closing return img From ca1e7ef872686983ffd362e1e734c99410801520 Mon Sep 17 00:00:00 2001 From: dev-KingMaster <136489418+dev-KingMaster@users.noreply.github.com> Date: Wed, 5 Nov 2025 09:43:55 -0500 Subject: [PATCH 05/12] CI failures fix --- pypdf/_cmap.py | 10 +++++----- pypdf/_page.py | 7 +++++-- pypdf/_text_extraction/_layout_mode/_font.py | 5 ++++- 3 files changed, 14 insertions(+), 8 deletions(-) diff --git a/pypdf/_cmap.py b/pypdf/_cmap.py index a9153b77c4..599665d8cb 100644 --- a/pypdf/_cmap.py +++ b/pypdf/_cmap.py @@ -460,11 +460,11 @@ def build_font_width_map( # will consider width of char as avg(width) m = 0 cpt = 0 - for xx in w: - xx_value = xx.get_object() - if xx_value > 0: - m += xx_value - cpt += 1 + for xx in w: + xx_value = xx.get_object() + if xx_value > 0: + m += xx_value + cpt += 1 font_width_map["default"] = m / max(1, cpt) st = cast(int, ft["/FirstChar"]) en = cast(int, ft["/LastChar"]) diff --git a/pypdf/_page.py b/pypdf/_page.py index 3279b5cc5b..0a3923d5a5 100644 --- a/pypdf/_page.py +++ b/pypdf/_page.py @@ -740,8 +740,11 @@ def _get_inline_images(self) -> dict[str, ImageFile]: if k in {"/Length", "/L"}: # no length is expected continue if isinstance(v, list): - value_for_init = ArrayObject( - [self._translate_value_inline_image(k, x) for x in v] + value_for_init = cast( + PdfObject, + ArrayObject( + [self._translate_value_inline_image(k, x) for x in v] + ), ) else: value_for_init = self._translate_value_inline_image(k, v) diff --git a/pypdf/_text_extraction/_layout_mode/_font.py b/pypdf/_text_extraction/_layout_mode/_font.py index e58526b0c2..daf03aa247 100644 --- a/pypdf/_text_extraction/_layout_mode/_font.py +++ b/pypdf/_text_extraction/_layout_mode/_font.py @@ -64,7 +64,10 @@ def __post_init__(self) -> None: for d_font_idx, d_font in enumerate( self.font_dictionary["/DescendantFonts"] ): - d_font_object = d_font.get_object() + if isinstance(d_font, IndirectObject): + d_font_object = d_font.get_object() + else: + d_font_object = d_font assert not isinstance(d_font_object, IndirectObject), d_font_object self.font_dictionary["/DescendantFonts"][d_font_idx] = d_font_object ord_map = { From e33d82a2828a77f19902a0129d5fa2410a30486f Mon Sep 17 00:00:00 2001 From: dev-KingMaster <136489418+dev-KingMaster@users.noreply.github.com> Date: Wed, 5 Nov 2025 09:49:36 -0500 Subject: [PATCH 06/12] mypy test error fix --- tests/scripts/test_make_release.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/scripts/test_make_release.py b/tests/scripts/test_make_release.py index 88e23978c5..7371e2547f 100644 --- a/tests/scripts/test_make_release.py +++ b/tests/scripts/test_make_release.py @@ -126,8 +126,6 @@ def test_get_formatted_changes(): - Avoid catching not emitted warnings (#2429) by @stefan6419846 """ ) - - def test_get_formatted_changes__other(): make_release = pytest.importorskip("make_release") @@ -182,4 +180,3 @@ def test_get_formatted_changes__other(): - FIX: Broken test due to expired test file URL (#2468) by @pubpub-zz """ ) - From e385405beaf6949bb9d001ab54717f58803e5d76 Mon Sep 17 00:00:00 2001 From: dev-KingMaster <136489418+dev-KingMaster@users.noreply.github.com> Date: Wed, 5 Nov 2025 20:22:51 -0500 Subject: [PATCH 07/12] style error fix --- pypdf/_writer.py | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/pypdf/_writer.py b/pypdf/_writer.py index eb41469466..674b985d4d 100644 --- a/pypdf/_writer.py +++ b/pypdf/_writer.py @@ -1036,13 +1036,28 @@ def update_page_form_field_values( parent_annotation.get(FA.FT) == "/Tx" or parent_annotation.get(FA.FT) == "/Ch" ): - # Textbox; update appearance via helper + # Textbox; we need to generate the appearance stream object if isinstance(value, tuple): - self._update_field_annotation( - page, parent_annotation, annotation, value[1], value[2], flatten=flatten + appearance_stream_obj = TextStreamAppearance.from_text_annotation( + acro_form, parent_annotation, annotation, value[1], value[2] ) else: - self._update_field_annotation(page, parent_annotation, annotation, flatten=flatten) + appearance_stream_obj = TextStreamAppearance.from_text_annotation( + acro_form, parent_annotation, annotation + ) + # Add the appearance stream object + if AA.AP not in annotation: + annotation[NameObject(AA.AP)] = DictionaryObject( + {NameObject("/N"): self._add_object(appearance_stream_obj)} + ) + elif "/N" not in (ap := cast(DictionaryObject, annotation[AA.AP])): + cast(DictionaryObject, annotation[NameObject(AA.AP)])[ + NameObject("/N") + ] = self._add_object(appearance_stream_obj) + else: # [/AP][/N] exists + n = annotation[AA.AP]["/N"].indirect_reference.idnum # type: ignore + self._objects[n - 1] = appearance_stream_obj + appearance_stream_obj.indirect_reference = IndirectObject(n, 0, self) elif ( annotation.get(FA.FT) == "/Sig" ): # deprecated # not implemented yet From 412599b8298c91fc1f37e7de6c74e4272eecf337 Mon Sep 17 00:00:00 2001 From: dev-KingMaster <136489418+dev-KingMaster@users.noreply.github.com> Date: Thu, 6 Nov 2025 11:15:06 -0500 Subject: [PATCH 08/12] comment handling --- pypdf/_page.py | 7 ++----- pypdf/_text_extraction/_layout_mode/_font.py | 9 ++------- pypdf/_writer.py | 9 ++------- pypdf/filters.py | 8 ++++---- tests/scripts/test_make_release.py | 2 ++ 5 files changed, 12 insertions(+), 23 deletions(-) diff --git a/pypdf/_page.py b/pypdf/_page.py index 0a3923d5a5..89ac81c9eb 100644 --- a/pypdf/_page.py +++ b/pypdf/_page.py @@ -740,11 +740,8 @@ def _get_inline_images(self) -> dict[str, ImageFile]: if k in {"/Length", "/L"}: # no length is expected continue if isinstance(v, list): - value_for_init = cast( - PdfObject, - ArrayObject( - [self._translate_value_inline_image(k, x) for x in v] - ), + value_for_init: PdfObject = ArrayObject( + [self._translate_value_inline_image(k, x) for x in v] ) else: value_for_init = self._translate_value_inline_image(k, v) diff --git a/pypdf/_text_extraction/_layout_mode/_font.py b/pypdf/_text_extraction/_layout_mode/_font.py index daf03aa247..bbe784b369 100644 --- a/pypdf/_text_extraction/_layout_mode/_font.py +++ b/pypdf/_text_extraction/_layout_mode/_font.py @@ -6,7 +6,6 @@ from ..._codecs import adobe_glyphs from ...errors import ParseError -from ...generic import IndirectObject from ._font_widths import STANDARD_WIDTHS @@ -64,11 +63,7 @@ def __post_init__(self) -> None: for d_font_idx, d_font in enumerate( self.font_dictionary["/DescendantFonts"] ): - if isinstance(d_font, IndirectObject): - d_font_object = d_font.get_object() - else: - d_font_object = d_font - assert not isinstance(d_font_object, IndirectObject), d_font_object + d_font_object = d_font.get_object() self.font_dictionary["/DescendantFonts"][d_font_idx] = d_font_object ord_map = { ord(_target): _surrogate @@ -81,7 +76,7 @@ def __post_init__(self) -> None: # (2) A character start index, a character stop index, and a width, e.g. # `45 65 500` applies width 500 to characters 45-65. skip_count = 0 - _w = d_font.get("/W", []) + _w = d_font_object.get("/W", []) for idx, w_entry in enumerate(_w): w_value = w_entry.get_object() if skip_count: diff --git a/pypdf/_writer.py b/pypdf/_writer.py index 674b985d4d..2c82a3e2f1 100644 --- a/pypdf/_writer.py +++ b/pypdf/_writer.py @@ -1027,11 +1027,6 @@ def update_page_form_field_values( # other cases will be updated through the for loop annotation[NameObject(AA.AS)] = v annotation[NameObject(FA.V)] = v - if flatten and appearance_stream_obj is not None: - # We basically copy the entire appearance stream, which should be an XObject that - # is already registered. No need to add font resources. - rct = cast(RectangleObject, annotation[AA.Rect]) - self._add_apstream_object(page, appearance_stream_obj, field, rct[0], rct[1]) elif ( parent_annotation.get(FA.FT) == "/Tx" or parent_annotation.get(FA.FT) == "/Ch" @@ -1564,8 +1559,8 @@ def add_metadata(self, infos: dict[str, Any]) -> None: if isinstance(infos, PdfObject): infos = cast(DictionaryObject, infos.get_object()) for key, value in list(infos.items()): - value_obj = value.get_object() if isinstance(value, PdfObject) else value - args[NameObject(key)] = create_string_object(str(value_obj)) + value_object = value.get_object() if isinstance(value, PdfObject) else value + args[NameObject(key)] = create_string_object(str(value_object)) if self._info is None: self._info = DictionaryObject() self._info.update(args) diff --git a/pypdf/filters.py b/pypdf/filters.py index 9b1a958683..5f5bfd7ecc 100644 --- a/pypdf/filters.py +++ b/pypdf/filters.py @@ -759,10 +759,10 @@ def decode_stream_data(stream: Any) -> bytes: # If there is no data to decode, we should not try to decode it. if not data: return data - for filter_name, params_untyped in zip(filters, decode_parms): + for filter_name, params in zip(filters, decode_parms): params_typed: Optional[DictionaryObject] = None - if not isinstance(params_untyped, NullObject): - params_typed = cast(Optional[DictionaryObject], params_untyped) + if not isinstance(params, NullObject): + params_typed = cast(Optional[DictionaryObject], params) if filter_name in (FT.ASCII_HEX_DECODE, FTA.AHx): data = ASCIIHexDecode.decode(data) elif filter_name in (FT.ASCII_85_DECODE, FTA.A85): @@ -783,7 +783,7 @@ def decode_stream_data(stream: Any) -> bytes: elif filter_name == FT.JBIG2_DECODE: data = JBIG2Decode.decode(data, params_typed) elif filter_name == "/Crypt": - if "/Name" in params_untyped or "/Type" in params_untyped: + if "/Name" in params or "/Type" in params: raise NotImplementedError( "/Crypt filter with /Name or /Type not supported yet" ) diff --git a/tests/scripts/test_make_release.py b/tests/scripts/test_make_release.py index 7371e2547f..4d79fcb33b 100644 --- a/tests/scripts/test_make_release.py +++ b/tests/scripts/test_make_release.py @@ -126,6 +126,8 @@ def test_get_formatted_changes(): - Avoid catching not emitted warnings (#2429) by @stefan6419846 """ ) + + def test_get_formatted_changes__other(): make_release = pytest.importorskip("make_release") From 320d53d1868458f1ec4fc1a3faa1d59695731c26 Mon Sep 17 00:00:00 2001 From: dev-KingMaster <136489418+dev-KingMaster@users.noreply.github.com> Date: Thu, 6 Nov 2025 11:22:36 -0500 Subject: [PATCH 09/12] dict error fix --- pypdf/_text_extraction/_layout_mode/_font.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pypdf/_text_extraction/_layout_mode/_font.py b/pypdf/_text_extraction/_layout_mode/_font.py index bbe784b369..a7cf6e473c 100644 --- a/pypdf/_text_extraction/_layout_mode/_font.py +++ b/pypdf/_text_extraction/_layout_mode/_font.py @@ -59,7 +59,7 @@ def __post_init__(self) -> None: # CID fonts have a /W array mapping character codes to widths stashed in /DescendantFonts if "/DescendantFonts" in self.font_dictionary: - d_font: dict[Any, Any] + d_font: Any for d_font_idx, d_font in enumerate( self.font_dictionary["/DescendantFonts"] ): From 04f13650ee605f2d542b240bdf8b8280c973e392 Mon Sep 17 00:00:00 2001 From: dev-KingMaster <136489418+dev-KingMaster@users.noreply.github.com> Date: Fri, 7 Nov 2025 05:00:32 -0500 Subject: [PATCH 10/12] comment back --- pypdf/_writer.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pypdf/_writer.py b/pypdf/_writer.py index b7051b0d06..acb04cf246 100644 --- a/pypdf/_writer.py +++ b/pypdf/_writer.py @@ -1007,6 +1007,7 @@ def update_page_form_field_values( del parent_annotation["/I"] if flags: annotation[NameObject(FA.Ff)] = NumberObject(flags) + # Set the field value if not (value is None and flatten): # Only change values if given by user and not flattening. if isinstance(value, list): lst = ArrayObject(TextStringObject(v) for v in value) @@ -1028,7 +1029,7 @@ def update_page_form_field_values( if v not in normal_ap: v = NameObject("/Off") appearance_stream_obj = normal_ap.get(v) - # other cases will be updated through the for loop + # Other cases will be updated through the for loop annotation[NameObject(AA.AS)] = v annotation[NameObject(FA.V)] = v elif ( From e79a99f88e33632e24053d1cdccd43a362e43ec3 Mon Sep 17 00:00:00 2001 From: dev-KingMaster <136489418+dev-KingMaster@users.noreply.github.com> Date: Tue, 11 Nov 2025 08:21:47 -0500 Subject: [PATCH 11/12] adding pdfobject --- pypdf/_text_extraction/_layout_mode/_font.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pypdf/_text_extraction/_layout_mode/_font.py b/pypdf/_text_extraction/_layout_mode/_font.py index a7cf6e473c..24d57e26b2 100644 --- a/pypdf/_text_extraction/_layout_mode/_font.py +++ b/pypdf/_text_extraction/_layout_mode/_font.py @@ -6,6 +6,7 @@ from ..._codecs import adobe_glyphs from ...errors import ParseError +from ...generic import DictionaryObject, PdfObject from ._font_widths import STANDARD_WIDTHS @@ -59,11 +60,11 @@ def __post_init__(self) -> None: # CID fonts have a /W array mapping character codes to widths stashed in /DescendantFonts if "/DescendantFonts" in self.font_dictionary: - d_font: Any + d_font: PdfObject for d_font_idx, d_font in enumerate( self.font_dictionary["/DescendantFonts"] ): - d_font_object = d_font.get_object() + d_font_object = cast(DictionaryObject, d_font.get_object()) self.font_dictionary["/DescendantFonts"][d_font_idx] = d_font_object ord_map = { ord(_target): _surrogate From a23337158113b7d25f8c24b711d6c1d62b05481b Mon Sep 17 00:00:00 2001 From: dev-KingMaster <136489418+dev-KingMaster@users.noreply.github.com> Date: Tue, 11 Nov 2025 08:40:56 -0500 Subject: [PATCH 12/12] params_untryped as loop variable fix --- pypdf/filters.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/pypdf/filters.py b/pypdf/filters.py index 5f5bfd7ecc..3ba7d228f3 100644 --- a/pypdf/filters.py +++ b/pypdf/filters.py @@ -759,31 +759,31 @@ def decode_stream_data(stream: Any) -> bytes: # If there is no data to decode, we should not try to decode it. if not data: return data - for filter_name, params in zip(filters, decode_parms): - params_typed: Optional[DictionaryObject] = None - if not isinstance(params, NullObject): - params_typed = cast(Optional[DictionaryObject], params) + for filter_name, params_untyped in zip(filters, decode_parms): + params: Optional[DictionaryObject] = None + if not isinstance(params_untyped, NullObject): + params = cast(Optional[DictionaryObject], params_untyped) if filter_name in (FT.ASCII_HEX_DECODE, FTA.AHx): data = ASCIIHexDecode.decode(data) elif filter_name in (FT.ASCII_85_DECODE, FTA.A85): data = ASCII85Decode.decode(data) elif filter_name in (FT.LZW_DECODE, FTA.LZW): - data = LZWDecode.decode(data, params_typed) + data = LZWDecode.decode(data, params) elif filter_name in (FT.FLATE_DECODE, FTA.FL): - data = FlateDecode.decode(data, params_typed) + data = FlateDecode.decode(data, params) elif filter_name in (FT.RUN_LENGTH_DECODE, FTA.RL): data = RunLengthDecode.decode(data) elif filter_name == FT.CCITT_FAX_DECODE: height = stream.get(IA.HEIGHT, ()) - data = CCITTFaxDecode.decode(data, params_typed, height) + data = CCITTFaxDecode.decode(data, params, height) elif filter_name == FT.DCT_DECODE: data = DCTDecode.decode(data) elif filter_name == FT.JPX_DECODE: data = JPXDecode.decode(data) elif filter_name == FT.JBIG2_DECODE: - data = JBIG2Decode.decode(data, params_typed) + data = JBIG2Decode.decode(data, params) elif filter_name == "/Crypt": - if "/Name" in params or "/Type" in params: + if "/Name" in params_untyped or "/Type" in params_untyped: raise NotImplementedError( "/Crypt filter with /Name or /Type not supported yet" )