diff --git a/pypdf/_xobj_image_helpers.py b/pypdf/_xobj_image_helpers.py index ba0934f46c..2ad866ba3e 100644 --- a/pypdf/_xobj_image_helpers.py +++ b/pypdf/_xobj_image_helpers.py @@ -5,9 +5,10 @@ from typing import Any, Literal, Optional, Union, cast from ._utils import check_if_whitespace_only, logger_warning -from .constants import ColorSpaces, StreamAttributes +from .constants import ColorSpaces from .constants import FilterTypes as FT from .constants import ImageAttributes as IA +from .constants import StreamAttributes from .errors import EmptyImageDataError, PdfReadError from .generic import ( ArrayObject, @@ -174,14 +175,16 @@ def __handle_flate__indexed(color_space: ArrayObject) -> tuple[Any, Any, Any, An base, hival = element1.split("\x00") hival = int(hival) return color_space, base, hival, lookup - raise PdfReadError(f"Expected color space with 4 values, got {count}: {color_space}") + raise PdfReadError( + f"Expected color space with 4 values, got {count}: {color_space}" + ) def _handle_flate( size: tuple[int, int], data: bytes, mode: mode_str_type, - color_space: str, + color_space: ArrayObject, colors: int, obj_as_text: str, ) -> tuple[Image.Image, str, str, bool]: @@ -233,13 +236,13 @@ def _handle_flate( if actual_count < expected_count: logger_warning( f"Not enough lookup values: Expected {expected_count}, got {actual_count}.", - __name__ + __name__, ) lookup += bytes([0] * (expected_count - actual_count)) elif not check_if_whitespace_only(lookup[expected_count:]): logger_warning( f"Too many lookup values: Expected {expected_count}, got {actual_count}.", - __name__ + __name__, ) lookup = lookup[:expected_count] colors_arr = [lookup[:nb], lookup[nb:]] @@ -280,7 +283,9 @@ def _handle_flate( # Table 65 - Additional Entries Specific to an ICC Profile Stream Dictionary mode2 = _get_image_mode(color_space, colors, mode)[0] if mode != mode2: - img = Image.frombytes(mode2, size, data) # reloaded as mode may have changed + img = Image.frombytes( + mode2, size, data + ) # reloaded as mode may have changed if mode == "CMYK": extension = ".tif" image_format = "TIFF" @@ -291,7 +296,7 @@ def _handle_jpx( size: tuple[int, int], data: bytes, mode: mode_str_type, - color_space: str, + color_space: ArrayObject, colors: int, ) -> tuple[Image.Image, str, str, bool]: """ @@ -336,12 +341,14 @@ def _apply_decode( # requires reverting scale (cf p243,2ยง last sentence) decode = x_object_obj.get( IA.DECODE, - ([1.0, 0.0] * len(img.getbands())) - if ( - (img.mode == "CMYK" and lfilters in (FT.DCT_DECODE, FT.JPX_DECODE)) - or (invert_color and img.mode == "L") - ) - else None, + ( + ([1.0, 0.0] * len(img.getbands())) + if ( + (img.mode == "CMYK" and lfilters in (FT.DCT_DECODE, FT.JPX_DECODE)) + or (invert_color and img.mode == "L") + ) + else None + ), ) if ( isinstance(color_space, ArrayObject) @@ -381,23 +388,21 @@ def _get_mode_and_invert_color( else: mode, invert_color = _get_image_mode( color_space, - 2 - if ( - colors == 1 - and ( - not is_null_or_none(color_space) - and "Gray" not in color_space + ( + 2 + if ( + colors == 1 + and (not is_null_or_none(color_space) and "Gray" not in color_space) ) - ) - else colors, + else colors + ), "", ) return mode, invert_color def _xobj_to_image( - x_object: dict[str, Any], - pillow_parameters: Union[dict[str, Any], None] = None + x_object: dict[str, Any], pillow_parameters: Union[dict[str, Any], None] = None ) -> tuple[Optional[str], bytes, Any]: """ Users need to have the pillow package installed. @@ -414,6 +419,7 @@ def _xobj_to_image( Tuple[file extension, bytes, PIL.Image.Image] """ + def _apply_alpha( img: Image.Image, x_object: dict[str, Any], @@ -462,7 +468,7 @@ def _apply_alpha( # Get color properties colors = x_object.get("/Colors", 1) - color_space: Any = x_object.get("/ColorSpace", NullObject()).get_object() + color_space: ArrayObject = x_object.get("/ColorSpace", NullObject()).get_object() if isinstance(color_space, list) and len(color_space) == 1: color_space = color_space[0].get_object() diff --git a/pypdf/generic/_data_structures.py b/pypdf/generic/_data_structures.py index b826652708..8a94aa65ca 100644 --- a/pypdf/generic/_data_structures.py +++ b/pypdf/generic/_data_structures.py @@ -35,15 +35,13 @@ from collections.abc import Iterable, Sequence from io import BytesIO from math import ceil -from typing import ( - Any, - Callable, - Optional, - Union, - cast, -) +from typing import Any, Callable, Optional, Union, cast -from .._protocols import PdfReaderProtocol, PdfWriterProtocol, XmpInformationProtocol +from .._protocols import ( + PdfReaderProtocol, + PdfWriterProtocol, + XmpInformationProtocol, +) from .._utils import ( WHITESPACES, StreamType, @@ -57,9 +55,9 @@ from ..constants import ( CheckboxRadioButtonAttributes, FieldDictionaryAttributes, - OutlineFontFlag, ) from ..constants import FilterTypes as FT +from ..constants import OutlineFontFlag from ..constants import StreamAttributes as SA from ..constants import TypArguments as TA from ..constants import TypFitArguments as TF @@ -1000,7 +998,7 @@ def write_to_stream( @staticmethod def initialize_from_dictionary( - data: dict[str, Any] + data: dict[str, Any], ) -> Union["EncodedStreamObject", "DecodedStreamObject"]: retval: Union[EncodedStreamObject, DecodedStreamObject] if SA.FILTER in data: @@ -1046,7 +1044,9 @@ def flate_encode(self, level: int = -1) -> "EncodedStreamObject": retval._data = FlateDecode.encode(self._data, level) return retval - def decode_as_image(self, pillow_parameters: Union[dict[str, Any], None] = None) -> Any: + def decode_as_image( + self, pillow_parameters: Union[dict[str, Any], None] = None + ) -> Any: """ Try to decode the stream object as an image @@ -1166,7 +1166,7 @@ def __init__( # seems to already be broken beforehand in these cases. logger_warning( f"Expected StreamObject, got {type(s_resolved).__name__} instead. Data might be wrong.", - __name__ + __name__, ) else: data += s_resolved.get_data() @@ -1598,9 +1598,9 @@ class Destination(TreeObject): """ - node: Optional[ - DictionaryObject - ] = None # node provide access to the original Object + node: Optional[DictionaryObject] = ( + None # node provide access to the original Object + ) def __init__( self, diff --git a/tests/test_xobject_image_helpers.py b/tests/test_xobject_image_helpers.py index 6fe2f2cc6c..18541c245d 100644 --- a/tests/test_xobject_image_helpers.py +++ b/tests/test_xobject_image_helpers.py @@ -1,4 +1,5 @@ """Test the pypdf._xobj_image_helpers module.""" + from io import BytesIO from pathlib import Path @@ -6,10 +7,21 @@ from PIL import Image from pypdf import PdfReader -from pypdf._xobj_image_helpers import _extended_image_from_bytes, _handle_flate, _xobj_to_image +from pypdf._xobj_image_helpers import ( + _extended_image_from_bytes, + _handle_flate, + _xobj_to_image, +) from pypdf.constants import FilterTypes, ImageAttributes, StreamAttributes from pypdf.errors import EmptyImageDataError, PdfReadError -from pypdf.generic import ArrayObject, DecodedStreamObject, NameObject, NumberObject, StreamObject, TextStringObject +from pypdf.generic import ( + ArrayObject, + DecodedStreamObject, + NameObject, + NumberObject, + StreamObject, + TextStringObject, +) from . import get_data_from_url, get_image_data @@ -106,7 +118,9 @@ def test_handle_flate__image_mode_1(caplog): # here, but received a custom padding of `0`. lookup.set_data(b"\x42\x42\x42\x00\x13") caplog.clear() - expected_short_data = tuple([entry if entry[0] == 66 else (0, 19, 0) for entry in expected_data]) + expected_short_data = tuple( + [entry if entry[0] == 66 else (0, 19, 0) for entry in expected_data] + ) result = _handle_flate( size=(3, 3), data=data, @@ -131,7 +145,10 @@ def test_extended_image_frombytes_zero_data(): size = (1, 1) data = b"" - with pytest.raises(EmptyImageDataError, match=r"Data is 0 bytes, cannot process an image from empty data\."): + with pytest.raises( + EmptyImageDataError, + match=r"Data is 0 bytes, cannot process an image from empty data\.", + ): _extended_image_from_bytes(mode, size, data) @@ -139,19 +156,27 @@ def test_handle_flate__autodesk_indexed(): reader = PdfReader(RESOURCE_ROOT / "AutoCad_Diagram.pdf") page = reader.pages[0] for name, image in page.images.items(): - assert name.startswith("/") - image.image.load() + if isinstance(name, str): + assert name.startswith("/") + else: + assert name[0].startswith("/") + + if image.image: + image.image.load() data = RESOURCE_ROOT.joinpath("AutoCad_Diagram.pdf").read_bytes() data = data.replace(b"/DeviceRGB\x00255", b"/DeviceRGB") reader = PdfReader(BytesIO(data)) page = reader.pages[0] with pytest.raises( - PdfReadError, - match=r"^Expected color space with 4 values, got 3: \['/Indexed', '/DeviceRGB', '\\x00\\x80\\x00\\x80\\x80่€€" # noqa: E501 + PdfReadError, + match=r"^Expected color space with 4 values, got 3: \['/Indexed', '/DeviceRGB', '\\x00\\x80\\x00\\x80\\x80่€€", # noqa: E501 ): for name, _image in page.images.items(): # noqa: PERF102 - assert name.startswith("/") + if isinstance(name, str): + assert name.startswith("/") + else: + assert name[0].startswith("/") @pytest.mark.enable_socket @@ -161,7 +186,8 @@ def test_get_mode_and_invert_color(): reader = PdfReader(BytesIO(get_data_from_url(url, name=name))) page = reader.pages[12] for _name, image in page.images.items(): # noqa: PERF102 - image.image.load() + if image.image: + image.image.load() @pytest.mark.enable_socket @@ -171,8 +197,11 @@ def test_get_imagemode__empty_array(): reader = PdfReader(BytesIO(get_data_from_url(url, name=name))) page = reader.pages[0] - with pytest.raises(expected_exception=PdfReadError, match=r"^ColorSpace field not found in .+"): - page.images[0].image.load() + with pytest.raises( + expected_exception=PdfReadError, match=r"^ColorSpace field not found in .+" + ): + if page.images[0].image: + page.images[0].image.load() def test_p_image_with_alpha_mask(): @@ -187,7 +216,9 @@ def test_p_image_with_alpha_mask(): for obj in [x_object, mask_object]: obj[NameObject(ImageAttributes.WIDTH)] = NumberObject(image.width) obj[NameObject(ImageAttributes.HEIGHT)] = NumberObject(image.height) - obj[NameObject(StreamAttributes.FILTER)] = NameObject(FilterTypes.CCITT_FAX_DECODE) + obj[NameObject(StreamAttributes.FILTER)] = NameObject( + FilterTypes.CCITT_FAX_DECODE + ) # Set the basic image data. x_object.set_data(image_data.getvalue())