diff --git a/tests/bench.py b/tests/bench.py index 7f2a23f6ea..ab346c2311 100644 --- a/tests/bench.py +++ b/tests/bench.py @@ -19,7 +19,6 @@ TESTS_ROOT = Path(__file__).parent.resolve() PROJECT_ROOT = TESTS_ROOT.parent RESOURCE_ROOT = PROJECT_ROOT / "resources" -SAMPLE_ROOT = PROJECT_ROOT / "sample-files" def page_ops(pdf_path, password): @@ -140,8 +139,8 @@ def text_extraction(pdf_path): return text -def test_text_extraction(benchmark): - file_path = SAMPLE_ROOT / "009-pdflatex-geotopo/GeoTopo.pdf" +def test_text_extraction(benchmark, sample_files_dir): + file_path = sample_files_dir / "009-pdflatex-geotopo/GeoTopo.pdf" benchmark(text_extraction, file_path) diff --git a/tests/conftest.py b/tests/conftest.py index ded189a8d4..7317fcd447 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -5,9 +5,11 @@ import pytest +from pypdf import PdfReader, PdfWriter +from pypdf._page import PageObject + TESTS_ROOT = Path(__file__).parent.resolve() PROJECT_ROOT = TESTS_ROOT.parent -RESOURCE_ROOT = PROJECT_ROOT / "resources" @pytest.fixture(scope="session") @@ -18,3 +20,39 @@ def pdf_file_path(tmp_path_factory): @pytest.fixture(scope="session") def txt_file_path(tmp_path_factory): return tmp_path_factory.mktemp("pypdf-data") / f"{uuid.uuid4()}.txt" + + +# Reusable PDF fixtures +@pytest.fixture +def crazyones_pdf_page_one(crazyones_pdf_reader) -> PageObject: + return crazyones_pdf_reader.pages[0] + + +@pytest.fixture +def crazyones_pdf_path(resources_dir) -> Path: + return resources_dir / "crazyones.pdf" + + +@pytest.fixture +def crazyones_pdf_reader(crazyones_pdf_path) -> PdfReader: + return PdfReader(crazyones_pdf_path) + + +@pytest.fixture +def crazyones_pdf_writer(crazyones_pdf_path) -> PdfWriter: + return PdfWriter(crazyones_pdf_path) + + +@pytest.fixture +def project_dir() -> Path: + return PROJECT_ROOT + + +@pytest.fixture +def resources_dir(project_dir) -> Path: + return project_dir / "resources" + + +@pytest.fixture +def sample_files_dir(project_dir) -> Path: + return project_dir / "sample-files" diff --git a/tests/generic/test_files.py b/tests/generic/test_files.py index 9d488e0681..3f7704d61f 100644 --- a/tests/generic/test_files.py +++ b/tests/generic/test_files.py @@ -3,7 +3,6 @@ import shutil import subprocess from io import BytesIO -from pathlib import Path import pytest @@ -24,16 +23,13 @@ ) from tests import get_data_from_url -TESTS_ROOT = Path(__file__).parent.parent.resolve() -PROJECT_ROOT = TESTS_ROOT.parent -SAMPLE_ROOT = PROJECT_ROOT / "sample-files" - PDFATTACH_BINARY = shutil.which("pdfattach") +@pytest.mark.samples @pytest.mark.skipif(PDFATTACH_BINARY is None, reason="Requires poppler-utils") -def test_embedded_file__basic(tmpdir): - clean_path = SAMPLE_ROOT / "002-trivial-libre-office-writer" / "002-trivial-libre-office-writer.pdf" +def test_embedded_file__basic(tmpdir, sample_files_dir): + clean_path = sample_files_dir / "002-trivial-libre-office-writer" / "002-trivial-libre-office-writer.pdf" attached_path = tmpdir / "attached.pdf" file_path = tmpdir / "test.txt" file_path.write_binary(b"Hello World\n") diff --git a/tests/test_annotations.py b/tests/test_annotations.py index 4a45cbb920..a034dbb2c5 100644 --- a/tests/test_annotations.py +++ b/tests/test_annotations.py @@ -24,15 +24,10 @@ from . import get_data_from_url -TESTS_ROOT = Path(__file__).parent.resolve() -PROJECT_ROOT = TESTS_ROOT.parent -RESOURCE_ROOT = PROJECT_ROOT / "resources" - -def test_ellipse(pdf_file_path): +def test_ellipse(pdf_file_path, crazyones_pdf_reader): # Arrange - pdf_path = RESOURCE_ROOT / "crazyones.pdf" - reader = PdfReader(pdf_path) + reader = crazyones_pdf_reader page = reader.pages[0] writer = PdfWriter() writer.add_page(page) @@ -49,9 +44,9 @@ def test_ellipse(pdf_file_path): writer.write(fp) -def test_text(pdf_file_path): +def test_text(pdf_file_path, resources_dir): # Arrange - pdf_path = RESOURCE_ROOT / "outline-without-title.pdf" + pdf_path = resources_dir / "outline-without-title.pdf" reader = PdfReader(pdf_path) page = reader.pages[0] writer = PdfWriter() @@ -70,10 +65,9 @@ def test_text(pdf_file_path): writer.write(fp) -def test_free_text(pdf_file_path): +def test_free_text(pdf_file_path, crazyones_pdf_reader): # Arrange - pdf_path = RESOURCE_ROOT / "crazyones.pdf" - reader = PdfReader(pdf_path) + reader = crazyones_pdf_reader page = reader.pages[0] writer = PdfWriter() writer.add_page(page) @@ -136,13 +130,10 @@ def test_annotation_dictionary(): assert a.flags == 123 -def test_polygon(pdf_file_path): +def test_polygon(pdf_file_path, crazyones_pdf_page_one): # Arrange - pdf_path = RESOURCE_ROOT / "crazyones.pdf" - reader = PdfReader(pdf_path) - page = reader.pages[0] writer = PdfWriter() - writer.add_page(page) + writer.add_page(crazyones_pdf_page_one) with pytest.raises(ValueError): Polygon( @@ -159,13 +150,10 @@ def test_polygon(pdf_file_path): writer.write(fp) -def test_polyline(pdf_file_path): +def test_polyline(pdf_file_path, crazyones_pdf_page_one): # Arrange - pdf_path = RESOURCE_ROOT / "crazyones.pdf" - reader = PdfReader(pdf_path) - page = reader.pages[0] writer = PdfWriter() - writer.add_page(page) + writer.add_page(crazyones_pdf_page_one) with pytest.raises( ValueError, @@ -185,13 +173,10 @@ def test_polyline(pdf_file_path): writer.write(fp) -def test_line(pdf_file_path): +def test_line(pdf_file_path, crazyones_pdf_page_one): # Arrange - pdf_path = RESOURCE_ROOT / "crazyones.pdf" - reader = PdfReader(pdf_path) - page = reader.pages[0] writer = PdfWriter() - writer.add_page(page) + writer.add_page(crazyones_pdf_page_one) # Act line_annotation = Line( @@ -207,13 +192,10 @@ def test_line(pdf_file_path): writer.write(fp) -def test_rectangle(pdf_file_path): +def test_rectangle(pdf_file_path, crazyones_pdf_page_one): # Arrange - pdf_path = RESOURCE_ROOT / "crazyones.pdf" - reader = PdfReader(pdf_path) - page = reader.pages[0] writer = PdfWriter() - writer.add_page(page) + writer.add_page(crazyones_pdf_page_one) # Act square_annotation = Rectangle( @@ -229,11 +211,9 @@ def test_rectangle(pdf_file_path): writer.write(fp) -def test_highlight(pdf_file_path): +def test_highlight(pdf_file_path, crazyones_pdf_page_one): # Arrange - pdf_path = RESOURCE_ROOT / "crazyones.pdf" - reader = PdfReader(pdf_path) - page = reader.pages[0] + page = crazyones_pdf_page_one writer = PdfWriter() writer.add_page(page) @@ -293,9 +273,9 @@ def test_highlight(pdf_file_path): writer.write(fp) -def test_link(pdf_file_path): +def test_link(pdf_file_path, resources_dir): # Arrange - pdf_path = RESOURCE_ROOT / "outline-without-title.pdf" + pdf_path = resources_dir / "outline-without-title.pdf" reader = PdfReader(pdf_path) page = reader.pages[0] writer = PdfWriter() @@ -340,9 +320,9 @@ def test_link(pdf_file_path): writer.write(fp) -def test_popup(caplog): +def test_popup(caplog, resources_dir): # Arrange - pdf_path = RESOURCE_ROOT / "outline-without-title.pdf" + pdf_path = resources_dir / "outline-without-title.pdf" reader = PdfReader(pdf_path) page = reader.pages[0] writer = PdfWriter() diff --git a/tests/test_cmap.py b/tests/test_cmap.py index 76cb46c488..c94b0cf8c4 100644 --- a/tests/test_cmap.py +++ b/tests/test_cmap.py @@ -1,10 +1,9 @@ """Test the pypdf_cmap module.""" from io import BytesIO -from pathlib import Path import pytest -from pypdf import PdfReader, PdfWriter +from pypdf import PdfReader from pypdf._cmap import get_encoding, parse_bfchar from pypdf._codecs import charset_encoding from pypdf._font import Font @@ -12,10 +11,6 @@ from . import get_data_from_url -TESTS_ROOT = Path(__file__).parent.resolve() -PROJECT_ROOT = TESTS_ROOT.parent -RESOURCE_ROOT = PROJECT_ROOT / "resources" - @pytest.mark.enable_socket @pytest.mark.slow @@ -218,25 +213,22 @@ def test_eten_b5(): reader.pages[0].extract_text().startswith("1/7 \n富邦新終身壽險") -def test_missing_entries_in_cmap(): +def test_missing_entries_in_cmap(crazyones_pdf_reader): """ Issue #2702: this issue is observed on damaged pdfs use of this file in test has been discarded as too slow/long we will create the same error from crazyones """ - pdf_path = RESOURCE_ROOT / "crazyones.pdf" - reader = PdfReader(pdf_path) - p = reader.pages[0] + p = crazyones_pdf_reader.pages[0] p["/Resources"]["/Font"]["/F1"][NameObject("/ToUnicode")] = IndirectObject( - 99999999, 0, reader + 99999999, 0, crazyones_pdf_reader ) p.extract_text() -def test_null_missing_width(): +def test_null_missing_width(crazyones_pdf_writer): """For coverage of #2792""" - writer = PdfWriter(RESOURCE_ROOT / "crazyones.pdf") - page = writer.pages[0] + page = crazyones_pdf_writer.pages[0] ft = page["/Resources"]["/Font"]["/F1"] ft[NameObject("/Widths")] = ArrayObject() ft["/FontDescriptor"][NameObject("/MissingWidth")] = NullObject() diff --git a/tests/test_codecs.py b/tests/test_codecs.py index 45411cb93c..f2a57ee624 100644 --- a/tests/test_codecs.py +++ b/tests/test_codecs.py @@ -1,6 +1,5 @@ """Test LZW-related code.""" from io import BytesIO -from pathlib import Path import pytest @@ -10,10 +9,6 @@ from . import get_data_from_url -TESTS_ROOT = Path(__file__).parent.resolve() -PROJECT_ROOT = TESTS_ROOT.parent -RESOURCE_ROOT = PROJECT_ROOT / "resources" - test_cases = [ pytest.param(b"", id="Empty input"), pytest.param(b"A", id="Single character"), @@ -68,8 +63,8 @@ def test_decode_lzw(encoded, expected_decoded): assert actual_decoded == expected_decoded -def test_lzw_decoder_table_overflow(caplog): - path = RESOURCE_ROOT / "lzw_decoder_table_overflow.bin" +def test_lzw_decoder_table_overflow(caplog, resources_dir): + path = resources_dir / "lzw_decoder_table_overflow.bin" codec = LzwCodec() assert codec.decode(path.read_bytes()).startswith( b'0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&\'()*+,-./:;<=>?@' diff --git a/tests/test_doc_common.py b/tests/test_doc_common.py index e4228ef5e2..093c984d95 100644 --- a/tests/test_doc_common.py +++ b/tests/test_doc_common.py @@ -5,7 +5,6 @@ import subprocess from io import BytesIO from operator import itemgetter -from pathlib import Path from unittest import mock import pytest @@ -15,18 +14,14 @@ from pypdf.generic import EmbeddedFile, NameObject, NullObject, TextStringObject, ViewerPreferences from tests import get_data_from_url -TESTS_ROOT = Path(__file__).parent.resolve() -PROJECT_ROOT = TESTS_ROOT.parent -SAMPLE_ROOT = PROJECT_ROOT / "sample-files" -RESOURCES_ROOT = PROJECT_ROOT / "resources" - PDFATTACH_BINARY = shutil.which("pdfattach") +@pytest.mark.samples @pytest.mark.skipif(PDFATTACH_BINARY is None, reason="Requires poppler-utils") -def test_attachments(tmpdir): +def test_attachments(tmpdir, sample_files_dir): # No attachments. - clean_path = SAMPLE_ROOT / "002-trivial-libre-office-writer" / "002-trivial-libre-office-writer.pdf" + clean_path = sample_files_dir / "002-trivial-libre-office-writer" / "002-trivial-libre-office-writer.pdf" with PdfReader(clean_path) as pdf: assert pdf._list_attachments() == [] assert list(pdf.attachment_list) == [] @@ -171,8 +166,8 @@ def test_byte_encoded_named_destinations(): } -def test_viewer_preferences__indirect_reference(): - input_path = RESOURCES_ROOT / "git.pdf" +def test_viewer_preferences__indirect_reference(resources_dir): + input_path = resources_dir / "git.pdf" reader = PdfReader(input_path) assert (0, 24) not in reader.resolved_objects viewer_preferences = reader.viewer_preferences @@ -452,10 +447,8 @@ def test_outline__issue3462(): ] -def test_flatten__cyclic_references(): - path = RESOURCES_ROOT / "crazyones.pdf" - - reader = PdfReader(path) +def test_flatten__cyclic_references(crazyones_pdf_reader): + reader = crazyones_pdf_reader assert len(reader.pages) == 1 reader._flatten() diff --git a/tests/test_encryption.py b/tests/test_encryption.py index 757e7dd0d9..ab69c5c2c7 100644 --- a/tests/test_encryption.py +++ b/tests/test_encryption.py @@ -14,10 +14,11 @@ USE_CRYPTOGRAPHY = crypt_provider[0] == "cryptography" USE_PYCRYPTODOME = crypt_provider[0] == "pycryptodome" HAS_AES = USE_CRYPTOGRAPHY or USE_PYCRYPTODOME -TESTS_ROOT = Path(__file__).parent.resolve() -PROJECT_ROOT = TESTS_ROOT.parent -RESOURCE_ROOT = PROJECT_ROOT / "resources" -SAMPLE_ROOT = PROJECT_ROOT / "sample-files" + + +@pytest.fixture +def encrytion_dir(resources_dir) -> Path: + return resources_dir / "encryption" @pytest.mark.parametrize( @@ -69,7 +70,7 @@ ("r6-owner-password.pdf", True), ], ) -def test_encryption(name, requires_aes): +def test_encryption(name, requires_aes, encrytion_dir): """ Encrypted PDFs are handled correctly. @@ -79,7 +80,7 @@ def test_encryption(name, requires_aes): - Decryption works for encrypted PDFs - Metadata is properly extracted from the decrypted PDF """ - inputfile = RESOURCE_ROOT / "encryption" / name + inputfile = encrytion_dir / name if requires_aes and not HAS_AES: with pytest.raises(DependencyError) as exc: ipdf = pypdf.PdfReader(inputfile) @@ -116,7 +117,7 @@ def test_encryption(name, requires_aes): ], ) @pytest.mark.skipif(not HAS_AES, reason="No AES implementation") -def test_pdf_with_both_passwords(name, user_passwd, owner_passwd): +def test_pdf_with_both_passwords(name, user_passwd, owner_passwd, encrytion_dir): """ PDFs with both user and owner passwords are handled correctly. @@ -126,7 +127,7 @@ def test_pdf_with_both_passwords(name, user_passwd, owner_passwd): - The correct password type is returned after decryption - The number of pages is correctly identified after decryption """ - inputfile = RESOURCE_ROOT / "encryption" / name + inputfile = encrytion_dir / name ipdf = pypdf.PdfReader(inputfile) assert ipdf.is_encrypted assert ipdf.decrypt(user_passwd) == PasswordType.USER_PASSWORD @@ -142,14 +143,14 @@ def test_pdf_with_both_passwords(name, user_passwd, owner_passwd): ], ) @pytest.mark.skipif(not HAS_AES, reason="No AES implementation") -def test_read_page_from_encrypted_file_aes_256(pdffile, password): +def test_read_page_from_encrypted_file_aes_256(pdffile, password, resources_dir): """ A page can be read from an encrypted. This is a regression test for issue 327: IndexError for get_page() of decrypted file """ - path = RESOURCE_ROOT / pdffile + path = resources_dir / pdffile pypdf.PdfReader(path, password=password).pages[0] @@ -168,10 +169,10 @@ def test_read_page_from_encrypted_file_aes_256(pdffile, password): ) @pytest.mark.skipif(not HAS_AES, reason="No AES implementation") @pytest.mark.filterwarnings("ignore::DeprecationWarning") -def test_merge_encrypted_pdfs(names): +def test_merge_encrypted_pdfs(names, encrytion_dir): """Encrypted PDFs can be merged after decryption.""" merger = pypdf.PdfWriter() - files = [RESOURCE_ROOT / "encryption" / x for x in names] + files = [encrytion_dir / x for x in names] pdfs = [pypdf.PdfReader(x) for x in files] for pdf in pdfs: if pdf.is_encrypted: @@ -199,11 +200,10 @@ def test_encrypt_decrypt_with_cipher_class(cryptcls): assert crypt.decrypt(crypt.encrypt(message)) == message -def test_attempt_decrypt_unencrypted_pdf(): +def test_attempt_decrypt_unencrypted_pdf(crazyones_pdf_path): """Attempting to decrypt an unencrypted PDF raises a PdfReadError.""" - path = RESOURCE_ROOT / "crazyones.pdf" with pytest.raises(PdfReadError) as exc: - PdfReader(path, password="nonexistent") + PdfReader(crazyones_pdf_path, password="nonexistent") assert exc.value.args[0] == "Not an encrypted file" @@ -244,11 +244,11 @@ def test_alg_v5_generate_values(): ("ABCD", False), ], ) -def test_pdf_encrypt(pdf_file_path, alg, requires_aes): +def test_pdf_encrypt(pdf_file_path, alg, requires_aes, encrytion_dir): user_password = secrets.token_urlsafe(10) owner_password = secrets.token_urlsafe(10) - reader = PdfReader(RESOURCE_ROOT / "encryption" / "unencrypted.pdf") + reader = PdfReader(encrytion_dir / "unencrypted.pdf") page = reader.pages[0] text0 = page.extract_text() @@ -298,11 +298,11 @@ def test_pdf_encrypt(pdf_file_path, alg, requires_aes): "count", [1, 2, 3, 4, 5, 10], ) -def test_pdf_encrypt_multiple(pdf_file_path, count): +def test_pdf_encrypt_multiple(pdf_file_path, count, encrytion_dir): user_password = secrets.token_urlsafe(10) owner_password = secrets.token_urlsafe(10) - reader = PdfReader(RESOURCE_ROOT / "encryption" / "unencrypted.pdf") + reader = PdfReader(encrytion_dir / "unencrypted.pdf") page = reader.pages[0] text0 = page.extract_text() @@ -344,10 +344,10 @@ def test_aes_decrypt_corrupted_data(): @pytest.mark.samples -def test_encrypt_stream_dictionary(pdf_file_path): +def test_encrypt_stream_dictionary(pdf_file_path, sample_files_dir): user_password = secrets.token_urlsafe(10) - reader = PdfReader(SAMPLE_ROOT / "023-cmyk-image/cmyk-image.pdf") + reader = PdfReader(sample_files_dir / "023-cmyk-image/cmyk-image.pdf") page = reader.pages[0] original_image_obj = reader.get_object(page.images["/I"].indirect_reference) diff --git a/tests/test_filters.py b/tests/test_filters.py index 9808792891..ed5868366d 100644 --- a/tests/test_filters.py +++ b/tests/test_filters.py @@ -54,10 +54,6 @@ string.whitespace, # Add more ) -TESTS_ROOT = Path(__file__).parent.resolve() -PROJECT_ROOT = TESTS_ROOT.parent -RESOURCE_ROOT = PROJECT_ROOT / "resources" - @pytest.mark.parametrize( ("predictor", "s"), list(cartesian_product([1], filter_inputs)) @@ -355,10 +351,9 @@ def test_1bit_image_extraction(): @pytest.mark.enable_socket -def test_png_transparency_reverse(): +def test_png_transparency_reverse(resources_dir): """Cf issue #1599""" - pdf_path = RESOURCE_ROOT / "labeled-edges-center-image.pdf" - reader = PdfReader(pdf_path) + reader = PdfReader(resources_dir / "labeled-edges-center-image.pdf") refimg = Image.open( BytesIO(get_data_from_url(name="labeled-edges-center-image.png")) ) @@ -657,11 +652,11 @@ def test_ascii85decode__ignore_whitespaces(caplog): @pytest.mark.enable_socket -def test_ccitt_fax_decode__black_is_1(): +def test_ccitt_fax_decode__black_is_1(resources_dir): url = "https://github.com/user-attachments/files/19288881/imagemagick-CCITTFaxDecode_BlackIs1-true.pdf" name = "issue3193.pdf" reader = PdfReader(BytesIO(get_data_from_url(url, name=name))) - other_reader = PdfReader(RESOURCE_ROOT / "imagemagick-CCITTFaxDecode.pdf") + other_reader = PdfReader(resources_dir / "imagemagick-CCITTFaxDecode.pdf") actual_image = reader.pages[0].images[0].image expected_image_inverted = other_reader.pages[0].images[0].image diff --git a/tests/test_generic.py b/tests/test_generic.py index da8b3314bd..15cb93824d 100644 --- a/tests/test_generic.py +++ b/tests/test_generic.py @@ -6,7 +6,6 @@ from base64 import a85encode from copy import deepcopy from io import BytesIO -from pathlib import Path import pytest @@ -49,10 +48,6 @@ from . import ReaderDummy, get_data_from_url -TESTS_ROOT = Path(__file__).parent.resolve() -PROJECT_ROOT = TESTS_ROOT.parent -RESOURCE_ROOT = PROJECT_ROOT / "resources" - class ChildDummy(DictionaryObject): @property @@ -640,11 +635,9 @@ def test_remove_child_found_in_tree(): tree.empty_tree() -def test_remove_child_in_tree(): - pdf = RESOURCE_ROOT / "form.pdf" - +def test_remove_child_in_tree(resources_dir): + reader = PdfReader(resources_dir / "form.pdf") tree = TreeObject() - reader = PdfReader(pdf) writer = PdfWriter() writer._add_object(tree) writer.add_page(reader.pages[0]) @@ -1029,14 +1022,13 @@ def test_destination_withoutzoom(): writer.write(out) -def test_encodedstream_set_data(): +def test_encodedstream_set_data(crazyones_pdf_reader): """ EncodedStreamObject.set_data to extend data stream works. Checks also the flate_encode. """ - pdf_path = RESOURCE_ROOT / "crazyones.pdf" - reader = PdfReader(pdf_path) + reader = crazyones_pdf_reader co = reader.pages[0]["/Contents"][0].get_object() co.set_data(b"%hello\n" + co.get_data()) assert b"hello" in co.get_data() @@ -1252,11 +1244,11 @@ def test_missing_hashbin(): assert t.hash_bin() == hash((ByteStringObject, b"123")) -def test_is_null_or_none(): +def test_is_null_or_none(crazyones_pdf_reader): assert is_null_or_none(NullObject()) assert not is_null_or_none(PdfObject()) - reader = PdfReader(RESOURCE_ROOT / "crazyones.pdf") + reader = crazyones_pdf_reader # used with get assert is_null_or_none(reader.root_object.get("/do_no_exist")) # object unknown... @@ -1267,14 +1259,14 @@ def test_is_null_or_none(): assert is_null_or_none(writer.pages[0]["/Contents"][-1]) -def test_coverage_arrayobject(): +def test_coverage_arrayobject(crazyones_pdf_reader): writer = PdfWriter() a = ArrayObject([1]) assert isinstance(a.replicate(writer)[0], int) assert isinstance(a.clone(writer)[0], int) a.indirect_reference = IndirectObject(1, 0, writer) assert isinstance(a.clone(writer)[0], int) - r = PdfReader(RESOURCE_ROOT / "crazyones.pdf") + r = crazyones_pdf_reader a = ArrayObject([r.pages[0]["/Contents"][0].get_object()]) aa = a.clone(writer) assert isinstance(aa[0], IndirectObject) @@ -1283,7 +1275,7 @@ def test_coverage_arrayobject(): assert isinstance(v, PdfObject) -def test_coverage_streamobject(): +def test_coverage_streamobject(crazyones_pdf_reader): writer = PdfWriter() s = StreamObject() del s.decoded_self @@ -1296,7 +1288,7 @@ def test_coverage_streamobject(): co.indirect_reference = IndirectObject(1, 0, writer) assert co == co.clone(writer) - r = PdfReader(RESOURCE_ROOT / "crazyones.pdf") + r = crazyones_pdf_reader co = r.pages[0].get_contents() co[NameObject("/testkey")] = NameObject("/test") co.decoded_self = None diff --git a/tests/test_images.py b/tests/test_images.py index d568800ecd..134b5ec6ce 100644 --- a/tests/test_images.py +++ b/tests/test_images.py @@ -21,11 +21,6 @@ from . import get_data_from_url, get_image_data -TESTS_ROOT = Path(__file__).parent.resolve() -PROJECT_ROOT = TESTS_ROOT.parent -RESOURCE_ROOT = PROJECT_ROOT / "resources" -SAMPLE_ROOT = PROJECT_ROOT / "sample-files" - def open_image(path: Union[Path, Image.Image, BytesIO]) -> Image.Image: if isinstance(path, Image.Image): @@ -84,22 +79,22 @@ def image_similarity( @pytest.mark.samples -def test_image_similarity_one(): - path_a = SAMPLE_ROOT / "018-base64-image/page-0-QuickPDFImd32aa1ab.png" +def test_image_similarity_one(sample_files_dir): + path_a = sample_files_dir / "018-base64-image/page-0-QuickPDFImd32aa1ab.png" path_b = path_a assert image_similarity(path_a, path_b) == 1 @pytest.mark.samples -def test_image_similarity_zero(): - path_a = SAMPLE_ROOT / "018-base64-image/page-0-QuickPDFImd32aa1ab.png" - path_b = SAMPLE_ROOT / "009-pdflatex-geotopo/page-23-Im2.png" +def test_image_similarity_zero(sample_files_dir): + path_a = sample_files_dir / "018-base64-image/page-0-QuickPDFImd32aa1ab.png" + path_b = sample_files_dir / "009-pdflatex-geotopo/page-23-Im2.png" assert image_similarity(path_a, path_b) == 0 @pytest.mark.samples -def test_image_similarity_mid(): - path_a = SAMPLE_ROOT / "018-base64-image/page-0-QuickPDFImd32aa1ab.png" +def test_image_similarity_mid(sample_files_dir): + path_a = sample_files_dir / "018-base64-image/page-0-QuickPDFImd32aa1ab.png" img_b = Image.open(path_a) draw = ImageDraw.Draw(img_b) @@ -189,28 +184,28 @@ def test_image_new_property(): ("src", "page_index", "image_key", "expected"), [ ( - SAMPLE_ROOT / "009-pdflatex-geotopo/GeoTopo.pdf", + "009-pdflatex-geotopo/GeoTopo.pdf", 23, "/Im2", - SAMPLE_ROOT / "009-pdflatex-geotopo/page-23-Im2.png", + "009-pdflatex-geotopo/page-23-Im2.png", ), ( - SAMPLE_ROOT / "003-pdflatex-image/pdflatex-image.pdf", + "003-pdflatex-image/pdflatex-image.pdf", 0, "/Im1", - SAMPLE_ROOT / "003-pdflatex-image/page-0-Im1.jpg", + "003-pdflatex-image/page-0-Im1.jpg", ), ( - SAMPLE_ROOT / "018-base64-image/base64image.pdf", + "018-base64-image/base64image.pdf", 0, "/QuickPDFImd32aa1ab", - SAMPLE_ROOT / "018-base64-image/page-0-QuickPDFImd32aa1ab.png", + "018-base64-image/page-0-QuickPDFImd32aa1ab.png", ), ( - SAMPLE_ROOT / "019-grayscale-image/grayscale-image.pdf", + "019-grayscale-image/grayscale-image.pdf", 0, "/X0", - SAMPLE_ROOT / "019-grayscale-image/page-0-X0.png", + "019-grayscale-image/page-0-X0.png", ), ], ids=[ @@ -221,14 +216,14 @@ def test_image_new_property(): ], ) @pytest.mark.samples -def test_image_extraction(src, page_index, image_key, expected): - reader = PdfReader(src) +def test_image_extraction(src, page_index, image_key, expected, sample_files_dir): + reader = PdfReader(sample_files_dir / src) actual_image = reader.pages[page_index].images[image_key] if not expected.exists(): # A little helper for test generation with open(f"page-{page_index}-{actual_image.name}", "wb") as fp: fp.write(actual_image.data) - assert image_similarity(BytesIO(actual_image.data), expected) >= 0.99 + assert image_similarity(BytesIO(actual_image.data), sample_files_dir / expected) >= 0.99 @pytest.mark.enable_socket @@ -392,7 +387,7 @@ def test_ff_fe_starting_lut(): @pytest.mark.enable_socket -def test_inline_image_extraction(): +def test_inline_image_extraction(resources_dir): """Cf #2598""" url = "https://github.com/py-pdf/pypdf/files/14982414/lebo102.pdf" name = "iss2598.pdf" @@ -421,7 +416,7 @@ def test_inline_image_extraction(): assert writer.pages[0].inline_images is not None writer.pages[0].merge_scaled_page(writer.pages[0], 0.25) assert writer.pages[0].inline_images is None - reader = PdfReader(RESOURCE_ROOT / "imagemagick-ASCII85Decode.pdf") + reader = PdfReader(resources_dir / "imagemagick-ASCII85Decode.pdf") writer.pages[0].merge_page(reader.pages[0]) assert list(writer.pages[0].images.keys()) == [ "/Im0", @@ -481,8 +476,8 @@ def test_extract_image_from_object(caplog): assert "does not seem to be an Image" in caplog.text -def test_extract_jpeg_with_explicit_quality(): - reader = PdfReader(RESOURCE_ROOT / "side-by-side-subfig.pdf") +def test_extract_jpeg_with_explicit_quality(resources_dir): + reader = PdfReader(resources_dir / "side-by-side-subfig.pdf") page = reader.pages[0] x_object = page["/Resources"]["/XObject"]["/Im1"] assert x_object["/Filter"] == "/DCTDecode" diff --git a/tests/test_javascript.py b/tests/test_javascript.py index 094f8126d1..22a6d035ff 100644 --- a/tests/test_javascript.py +++ b/tests/test_javascript.py @@ -1,20 +1,14 @@ """Test topics around the usage of JavaScript in PDF documents.""" -from pathlib import Path from typing import Any import pytest from pypdf import PdfReader, PdfWriter -# Configure path environment -TESTS_ROOT = Path(__file__).parent.resolve() -PROJECT_ROOT = TESTS_ROOT.parent -RESOURCE_ROOT = PROJECT_ROOT / "resources" - @pytest.fixture -def pdf_file_writer(): - reader = PdfReader(RESOURCE_ROOT / "issue-604.pdf") +def pdf_file_writer(resources_dir): + reader = PdfReader(resources_dir / "issue-604.pdf") writer = PdfWriter() writer.append_pages_from_reader(reader) return writer diff --git a/tests/test_page.py b/tests/test_page.py index 8406077442..3b9dc1cf3c 100644 --- a/tests/test_page.py +++ b/tests/test_page.py @@ -38,7 +38,6 @@ TESTS_ROOT = Path(__file__).parent.resolve() PROJECT_ROOT = TESTS_ROOT.parent -RESOURCE_ROOT = PROJECT_ROOT / "resources" SAMPLE_ROOT = PROJECT_ROOT / "sample-files" GHOSTSCRIPT_BINARY = shutil.which("gs") @@ -62,8 +61,8 @@ def get_all_sample_files(): ids=[m["path"] for m in all_files_meta["data"] if not m["encrypted"]], ) @pytest.mark.filterwarnings("ignore::pypdf.errors.PdfReadWarning") -def test_read(meta): - pdf_path = SAMPLE_ROOT / meta["path"] +def test_read(meta, sample_files_dir): + pdf_path = sample_files_dir / meta["path"] reader = PdfReader(pdf_path) try: reader.pages[0] @@ -89,7 +88,7 @@ def test_read(meta): ("https://arxiv.org/pdf/2201.00029.pdf", None), ], ) -def test_page_operations(pdf_path, password): +def test_page_operations(pdf_path, password, resources_dir): """ This test just checks if the operation throws an exception. @@ -99,7 +98,7 @@ def test_page_operations(pdf_path, password): if pdf_path.startswith("http"): pdf_path = BytesIO(get_data_from_url(pdf_path, pdf_path.split("/")[-1])) else: - pdf_path = RESOURCE_ROOT / pdf_path + pdf_path = resources_dir / pdf_path reader = PdfReader(pdf_path) writer = PdfWriter() @@ -141,14 +140,14 @@ def test_page_operations(pdf_path, password): ], ) def test_mediabox_expansion_after_rotation( - angle: float, expected_width: int, expected_height: int + angle: float, expected_width: int, expected_height: int, resources_dir ): """ Mediabox dimensions after rotation at a non-right angle with expansion are correct. The test was validated against pillow (see PR #2282) """ - pdf_path = RESOURCE_ROOT / "crazyones.pdf" + pdf_path = resources_dir / "crazyones.pdf" writer = PdfWriter(clone_from=pdf_path) transformation = Transformation().rotate(angle) @@ -162,12 +161,12 @@ def test_mediabox_expansion_after_rotation( assert math.isclose(mediabox.height, expected_height, abs_tol=2) -def test_transformation_equivalence(): - pdf_path = RESOURCE_ROOT / "labeled-edges-center-image.pdf" +def test_transformation_equivalence(resources_dir): + pdf_path = resources_dir / "labeled-edges-center-image.pdf" writer_base = PdfWriter(clone_from=pdf_path) page_base = writer_base.pages[0] - pdf_path = RESOURCE_ROOT / "box.pdf" + pdf_path = resources_dir / "box.pdf" writer_add = PdfWriter(clone_from=pdf_path) page_box = writer_add.pages[0] @@ -196,11 +195,11 @@ def test_transformation_equivalence(): ) -def test_transformation_equivalence2(): - pdf_path = RESOURCE_ROOT / "labeled-edges-center-image.pdf" +def test_transformation_equivalence2(resources_dir): + pdf_path = resources_dir / "labeled-edges-center-image.pdf" reader_base = PdfReader(pdf_path) - pdf_path = RESOURCE_ROOT / "box.pdf" + pdf_path = resources_dir / "box.pdf" reader_add = PdfReader(pdf_path) writer = PdfWriter() @@ -231,7 +230,7 @@ def test_transformation_equivalence2(): ) # No special assert: Visual check the page has been increased and all is visible (box+graph) - pdf_path = RESOURCE_ROOT / "commented-xmp.pdf" + pdf_path = resources_dir / "commented-xmp.pdf" reader_comments = PdfReader(pdf_path) writer = PdfWriter() @@ -247,10 +246,8 @@ def test_transformation_equivalence2(): # No special assert: Visual check the overlay has its comments at the good position -def test_get_user_unit_property(): - pdf_path = RESOURCE_ROOT / "crazyones.pdf" - reader = PdfReader(pdf_path) - assert reader.pages[0].user_unit == 1 +def test_get_user_unit_property(crazyones_pdf_reader): + assert crazyones_pdf_reader.pages[0].user_unit == 1 def compare_dict_objects(d1, d2): @@ -263,8 +260,8 @@ def compare_dict_objects(d1, d2): @pytest.mark.slow -def test_page_transformations(): - pdf_path = RESOURCE_ROOT / "crazyones.pdf" +def test_page_transformations(resources_dir): + pdf_path = resources_dir / "crazyones.pdf" writer = PdfWriter(clone_from=pdf_path) page: PageObject = writer.pages[0] @@ -291,17 +288,14 @@ def test_page_transformations(): @pytest.mark.parametrize( ("pdf_path", "password"), [ - (RESOURCE_ROOT / "crazyones.pdf", None), - (RESOURCE_ROOT / "attachment.pdf", None), - (RESOURCE_ROOT / "side-by-side-subfig.pdf", None), - ( - RESOURCE_ROOT / "libreoffice-writer-password.pdf", - "openpassword", - ), + ("crazyones.pdf", None), + ("attachment.pdf", None), + ("side-by-side-subfig.pdf", None), + ("libreoffice-writer-password.pdf", "openpassword"), ], ) -def test_compress_content_streams(pdf_path, password): - reader = PdfReader(pdf_path) +def test_compress_content_streams(pdf_path, password, resources_dir): + reader = PdfReader(resources_dir / pdf_path) writer = PdfWriter() if password: @@ -322,9 +316,8 @@ def test_compress_content_streams(pdf_path, password): reader.pages[0].compress_content_streams() -def test_page_properties(): - reader = PdfReader(RESOURCE_ROOT / "crazyones.pdf") - page = reader.pages[0] +def test_page_properties(crazyones_pdf_page_one): + page = crazyones_pdf_page_one assert page.mediabox == RectangleObject((0, 0, 612, 792)) assert page.cropbox == RectangleObject((0, 0, 612, 792)) assert page.bleedbox == RectangleObject((0, 0, 612, 792)) @@ -335,8 +328,8 @@ def test_page_properties(): assert page.bleedbox == RectangleObject((0, 1, 100, 101)) -def test_page_rotation(): - writer = PdfWriter(clone_from=RESOURCE_ROOT / "crazyones.pdf") +def test_page_rotation(resources_dir): + writer = PdfWriter(clone_from=resources_dir / "crazyones.pdf") page = writer.pages[0] with pytest.raises(ValueError) as exc: page.rotate(91) @@ -358,8 +351,8 @@ def test_page_rotation(): assert math.isclose(page.mediabox.top, 612, abs_tol=0.1) -def test_page_indirect_rotation(): - reader = PdfReader(RESOURCE_ROOT / "indirect-rotation.pdf") +def test_page_indirect_rotation(resources_dir): + reader = PdfReader(resources_dir / "indirect-rotation.pdf") page = reader.pages[0] # test rotation @@ -470,7 +463,7 @@ def test_extract_text_operator_t_star(): # L1266, L1267 page.extract_text() -def test_extract_text_visitor_callbacks(): +def test_extract_text_visitor_callbacks(resources_dir): """ Extract text in rectangle-objects or simple tables. @@ -654,7 +647,7 @@ def extract_cell_text(cell_texts: list[PositionedText]) -> str: return ("".join(t.text for t in cell_texts)).strip() # Test 1: We test the analysis of page 7 "2.1 LRS model". - reader = PdfReader(RESOURCE_ROOT / "GeoBase_NHNC1_Data_Model_UML_EN.pdf") + reader = PdfReader(resources_dir / "GeoBase_NHNC1_Data_Model_UML_EN.pdf") page_lrs_model = reader.pages[6] # We ignore the invisible large rectangles. @@ -726,7 +719,7 @@ def filter_first_table(r) -> bool: # Test 3: Read a table in a document using a non-translating # but scaling Tm-operand - reader = PdfReader(RESOURCE_ROOT / "Sample_Td-matrix.pdf") + reader = PdfReader(resources_dir / "Sample_Td-matrix.pdf") page_td_model = reader.pages[0] # We store the translations of the Td-executions. list_td = [] @@ -748,7 +741,7 @@ def visitor_td(op, args, cm, tm) -> None: ("pdf_path", "password", "embedded", "unembedded"), [ ( - RESOURCE_ROOT / "crazyones.pdf", + "crazyones.pdf", None, { "/HHXGQB+SFTI1440", @@ -758,7 +751,7 @@ def visitor_td(op, args, cm, tm) -> None: set(), ), ( - RESOURCE_ROOT / "attachment.pdf", + "attachment.pdf", None, { "/HHXGQB+SFTI1440", @@ -768,35 +761,35 @@ def visitor_td(op, args, cm, tm) -> None: set(), ), ( - RESOURCE_ROOT / "libreoffice-writer-password.pdf", + "libreoffice-writer-password.pdf", "openpassword", {"/BAAAAA+DejaVuSans"}, set(), ), ( - RESOURCE_ROOT / "imagemagick-images.pdf", + "imagemagick-images.pdf", None, set(), {"/Helvetica"}, ), - (RESOURCE_ROOT / "imagemagick-lzw.pdf", None, set(), set()), + ("imagemagick-lzw.pdf", None, set(), set()), ( - RESOURCE_ROOT / "reportlab-inline-image.pdf", + "reportlab-inline-image.pdf", None, set(), {"/Helvetica"}, ), # fonts in annotations ( - RESOURCE_ROOT / "FormTestFromOo.pdf", + "FormTestFromOo.pdf", None, {"/CAAAAA+LiberationSans", "/EAAAAA+SegoeUI", "/BAAAAA+LiberationSerif"}, {"/LiberationSans", "/ZapfDingbats"}, ), ], ) -def test_get_fonts(pdf_path, password, embedded, unembedded): - reader = PdfReader(pdf_path, password=password) +def test_get_fonts(pdf_path, password, embedded, unembedded, resources_dir): + reader = PdfReader(resources_dir / pdf_path, password=password) a = set() b = set() for page in reader.pages: @@ -844,9 +837,8 @@ def test_get_fonts2(): ) -def test_annotation_getter(): - pdf_path = RESOURCE_ROOT / "commented.pdf" - reader = PdfReader(pdf_path) +def test_annotation_getter(resources_dir): + reader = PdfReader(resources_dir / "commented.pdf") annotations = reader.pages[0].annotations assert annotations is not None assert isinstance(annotations[0], IndirectObject) @@ -885,11 +877,9 @@ def test_annotation_getter(): } -def test_annotation_setter(pdf_file_path): +def test_annotation_setter(pdf_file_path, crazyones_pdf_page_one): # Arange - pdf_path = RESOURCE_ROOT / "crazyones.pdf" - reader = PdfReader(pdf_path) - page = reader.pages[0] + page = crazyones_pdf_page_one writer = PdfWriter() writer.add_page(page) with pytest.raises(ValueError): @@ -966,10 +956,10 @@ def test_empyt_password_1088(): len(reader.pages) -@pytest.mark.enable_socket -def test_old_habibi(): +@pytest.mark.samples +def test_old_habibi(sample_files_dir): # this habibi has multiple characters associated with the h - reader = PdfReader(SAMPLE_ROOT / "015-arabic/habibi.pdf") + reader = PdfReader(sample_files_dir / "015-arabic/habibi.pdf") txt = reader.pages[0].extract_text() # very odd file # extract from acrobat reader "حَبيبي habibi􀀃􀏲􀎒􀏴􀎒􀎣􀋴 assert "habibi" in txt @@ -977,8 +967,8 @@ def test_old_habibi(): @pytest.mark.samples -def test_read_link_annotation(): - reader = PdfReader(SAMPLE_ROOT / "016-libre-office-link/libre-office-link.pdf") +def test_read_link_annotation(sample_files_dir): + reader = PdfReader(sample_files_dir / "016-libre-office-link/libre-office-link.pdf") assert len(reader.pages[0].annotations) == 1 annot = dict(reader.pages[0].annotations[0].get_object()) expected = { @@ -1217,9 +1207,8 @@ def test_merge_transformed_page_into_blank(): assert inserted_blank.page_number is not None -def test_pages_printing(): - pdf_path = RESOURCE_ROOT / "crazyones.pdf" - reader = PdfReader(pdf_path) +def test_pages_printing(crazyones_pdf_reader): + reader = crazyones_pdf_reader assert str(reader.pages) == "[PageObject(0)]" assert len(reader.pages[0].images) == 0 with pytest.raises(KeyError): @@ -1276,9 +1265,8 @@ def test_del_pages(): assert len(writer.flattened_pages) == 0 -def test_pdf_pages_missing_type(): - pdf_path = RESOURCE_ROOT / "crazyones.pdf" - reader = PdfReader(pdf_path) +def test_pdf_pages_missing_type(crazyones_pdf_reader): + reader = crazyones_pdf_reader del reader.trailer["/Root"]["/Pages"]["/Kids"][0].get_object()["/Type"] reader.pages[0] writer = PdfWriter(clone_from=reader) @@ -1299,7 +1287,7 @@ def test_merge_with_stream_wrapped_in_save_restore(): @pytest.mark.samples -def test_compression(): +def test_compression(sample_files_dir): """Test for issue #1897""" def create_stamp_pdf() -> BytesIO: @@ -1316,7 +1304,7 @@ def create_stamp_pdf() -> BytesIO: template = PdfReader(create_stamp_pdf()) template_page = template.pages[0] writer = PdfWriter() - writer.append(SAMPLE_ROOT / "009-pdflatex-geotopo/GeoTopo.pdf", [1]) + writer.append(sample_files_dir / "009-pdflatex-geotopo/GeoTopo.pdf", [1]) nb1 = len(writer._objects) # 1 page only is modified @@ -1444,21 +1432,21 @@ def test_missing_basefont_in_type3(): reader.pages[0]._get_fonts() -def test_invalid_index(): - src_abs = RESOURCE_ROOT / "git.pdf" +def test_invalid_index(resources_dir): + src_abs = resources_dir / "git.pdf" reader = PdfReader(src_abs) with pytest.raises(TypeError): _ = reader.pages["0"] -def test_negative_index(): - src_abs = RESOURCE_ROOT / "git.pdf" +def test_negative_index(resources_dir): + src_abs = resources_dir / "git.pdf" reader = PdfReader(src_abs) assert reader.pages[0] == reader.pages[-1] -def test_get_contents_as_bytes(): - writer = PdfWriter(RESOURCE_ROOT / "crazyones.pdf") +def test_get_contents_as_bytes(crazyones_pdf_writer): + writer = crazyones_pdf_writer co = writer.pages[0]["/Contents"][0] expected = co.get_data() assert writer.pages[0]._get_contents_as_bytes() == expected @@ -1468,14 +1456,14 @@ def test_get_contents_as_bytes(): assert writer.pages[0]._get_contents_as_bytes() is None -def test_recursive_get_page_from_node(): - writer = PdfWriter(RESOURCE_ROOT / "crazyones.pdf", incremental=True) +def test_recursive_get_page_from_node(crazyones_pdf_path): + writer = PdfWriter(crazyones_pdf_path, incremental=True) writer.root_object["/Pages"].get_object()[ NameObject("/Parent") ] = writer.root_object["/Pages"].indirect_reference with pytest.raises(PyPdfError): writer.add_page(writer.pages[0]) - writer = PdfWriter(RESOURCE_ROOT / "crazyones.pdf", incremental=True) + writer = PdfWriter(crazyones_pdf_path, incremental=True) writer.insert_page(writer.pages[0], -1) with pytest.raises(ValueError): writer.insert_page(writer.pages[0], -10) @@ -1585,9 +1573,8 @@ def test_delete_non_existent_annotations(): assert page.annotations is None -def test_replace_contents_on_reader(): - pdf_path = RESOURCE_ROOT / "crazyones.pdf" - reader = PdfReader(pdf_path) +def test_replace_contents_on_reader(crazyones_pdf_reader): + reader = crazyones_pdf_reader page = reader.pages[0] content_stream = ContentStream(stream=None, pdf=reader) content_stream.set_data(b"Test data") @@ -1617,7 +1604,7 @@ def test_replace_contents_on_reader__indirect_reference(): writer.add_page(lhs) -def test_merge_page__coverage(): +def test_merge_page__coverage(resources_dir): # Test with some otherwise untested cases. # Own resources are missing. @@ -1641,7 +1628,7 @@ def test_merge_page__coverage(): assert page.mediabox == RectangleObject((0.0, 0.0, 20, 10)) # With transformation. - path = RESOURCE_ROOT / "crazyones.pdf" + path = resources_dir / "crazyones.pdf" page = PdfWriter(clone_from=path).pages[0] page.indirect_reference = None page2 = PageObject.create_blank_page(width=20, height=5) diff --git a/tests/test_page_labels.py b/tests/test_page_labels.py index 332b0df0b3..930887fb7a 100644 --- a/tests/test_page_labels.py +++ b/tests/test_page_labels.py @@ -1,6 +1,5 @@ """Test the pypdf._page_labels module.""" from io import BytesIO -from pathlib import Path import pytest @@ -26,10 +25,6 @@ from . import get_data_from_url -TESTS_ROOT = Path(__file__).parent.resolve() -PROJECT_ROOT = TESTS_ROOT.parent -RESOURCE_ROOT = PROJECT_ROOT / "resources" - @pytest.mark.parametrize( ("number", "expected"), @@ -181,11 +176,10 @@ def test_get_label_from_nums__empty_nums_list(): assert get_label_from_nums(dictionary_object, 13) == "14" -def test_index2label__empty_kids_list(): - reader = PdfReader(RESOURCE_ROOT / "crazyones.pdf") +def test_index2label__empty_kids_list(crazyones_pdf_reader): number_tree = DictionaryObject() number_tree[NameObject("/Kids")] = ArrayObject() - root = reader.root_object + root = crazyones_pdf_reader.root_object root[NameObject("/PageLabels")] = number_tree - assert index2label(reader, 42) == "43" + assert index2label(crazyones_pdf_reader, 42) == "43" diff --git a/tests/test_pdfa.py b/tests/test_pdfa.py index 4ed7a4a7fd..dd8e6f7f11 100644 --- a/tests/test_pdfa.py +++ b/tests/test_pdfa.py @@ -8,11 +8,6 @@ from pypdf import PdfReader, PdfWriter -TESTS_ROOT = Path(__file__).parent.resolve() -PROJECT_ROOT = TESTS_ROOT.parent -RESOURCE_ROOT = PROJECT_ROOT / "resources" -SAMPLE_ROOT = PROJECT_ROOT / "sample-files" - def is_pdfa1b_compliant(src: BytesIO): """Check if a PDF is PDF/A-1b compliant.""" @@ -36,11 +31,11 @@ def document_information_has_analoguos_xml(src: BytesIO) -> bool: @pytest.mark.parametrize( ("src", "diagnostic_write_name"), [ - (SAMPLE_ROOT / "021-pdfa/crazyones-pdfa.pdf", None), + ("021-pdfa/crazyones-pdfa.pdf", None), ], ) -def test_pdfa(src: Path, diagnostic_write_name: Optional[str]): - with open(src, "rb") as fp: +def test_pdfa(src: Path, diagnostic_write_name: Optional[str], sample_files_dir): + with open(sample_files_dir / src, "rb") as fp: data = BytesIO(fp.read()) reader = PdfReader(src) assert is_pdfa1b_compliant(data) diff --git a/tests/test_reader.py b/tests/test_reader.py index 3852f2177d..fb8653305a 100644 --- a/tests/test_reader.py +++ b/tests/test_reader.py @@ -48,8 +48,8 @@ ("src", "num_pages"), [("selenium-pypdf-issue-177.pdf", 1), ("pdflatex-outline.pdf", 4)], ) -def test_get_num_pages(src, num_pages): - src = RESOURCE_ROOT / src +def test_get_num_pages(src, num_pages, resources_dir): + src = resources_dir / src with PdfReader(src) as reader: assert len(reader.pages) == num_pages # from #1911 @@ -60,7 +60,7 @@ def test_get_num_pages(src, num_pages): ("pdf_path", "expected"), [ ( - RESOURCE_ROOT / "crazyones.pdf", + "crazyones.pdf", { "/CreationDate": "D:20150604133406-06'00'", "/Creator": " XeTeX output 2015.06.04:1334", @@ -68,7 +68,7 @@ def test_get_num_pages(src, num_pages): }, ), ( - RESOURCE_ROOT / "metadata.pdf", + "metadata.pdf", { "/CreationDate": "D:20220415093243+02'00'", "/ModDate": "D:20220415093243+02'00'", @@ -89,8 +89,8 @@ def test_get_num_pages(src, num_pages): ], ids=["crazyones", "metadata"], ) -def test_read_metadata(pdf_path, expected): - with open(pdf_path, "rb") as inputfile: +def test_read_metadata(pdf_path, expected, resources_dir): + with open(resources_dir / pdf_path, "rb") as inputfile: reader = PdfReader(inputfile) docinfo = reader.metadata assert docinfo is not None @@ -117,16 +117,16 @@ def test_read_metadata(pdf_path, expected): assert metadict["/Title"] == docinfo.title -def test_read_metadata_title_is_utf8(): - with open(RESOURCE_ROOT / "bytes.pdf", "rb") as inputfile: +def test_read_metadata_title_is_utf8(resources_dir): + with open(resources_dir / "bytes.pdf", "rb") as inputfile: reader = PdfReader(inputfile) title = reader.metadata.title # Should be a str. assert title == "Microsoft Word - トランスバース社買収電話会議英語Final.docx" -def test_iss1943(): - with PdfReader(RESOURCE_ROOT / "crazyones.pdf") as reader: +def test_iss1943(crazyones_pdf_reader): + with crazyones_pdf_reader as reader: docinfo = reader.metadata docinfo.update( { @@ -146,14 +146,14 @@ def test_iss1943(): @pytest.mark.samples @pytest.mark.parametrize( - "pdf_path", [SAMPLE_ROOT / "017-unreadable-meta-data/unreadablemetadata.pdf"] + "pdf_path", ["017-unreadable-meta-data/unreadablemetadata.pdf"] ) -def test_broken_meta_data(pdf_path): - with open(pdf_path, "rb") as f: +def test_broken_meta_data(pdf_path, resources_dir, sample_files_dir): + with open(sample_files_dir / pdf_path, "rb") as f: reader = PdfReader(f) assert reader.metadata is None - with open(RESOURCE_ROOT / "crazyones.pdf", "rb") as f: + with open(resources_dir / "crazyones.pdf", "rb") as f: b = f.read(-1) reader = PdfReader(BytesIO(b.replace(b"/Info 2 0 R", b"/Info 2 "))) with pytest.raises(PdfReadError) as exc: @@ -164,12 +164,12 @@ def test_broken_meta_data(pdf_path): @pytest.mark.parametrize( "src", [ - RESOURCE_ROOT / "crazyones.pdf", - RESOURCE_ROOT / "commented.pdf", + "crazyones.pdf", + "commented.pdf", ], ) -def test_get_annotations(src): - with PdfReader(src) as reader: +def test_get_annotations(src, resources_dir): + with PdfReader(resources_dir / src) as reader: for page in reader.pages: if PG.ANNOTS in page: for annot in page[PG.ANNOTS]: @@ -181,12 +181,12 @@ def test_get_annotations(src): @pytest.mark.parametrize( ("src", "nb_attachments"), [ - (RESOURCE_ROOT / "attachment.pdf", 1), - (RESOURCE_ROOT / "crazyones.pdf", 0), + ("attachment.pdf", 1), + ("crazyones.pdf", 0), ], ) -def test_get_attachments(src, nb_attachments): - reader = PdfReader(src) +def test_get_attachments(src, nb_attachments, resources_dir): + reader = PdfReader(resources_dir / src) attachments = {} for page in reader.pages: @@ -202,12 +202,12 @@ def test_get_attachments(src, nb_attachments): @pytest.mark.parametrize( ("src", "outline_elements"), [ - (RESOURCE_ROOT / "pdflatex-outline.pdf", 9), - (RESOURCE_ROOT / "crazyones.pdf", 0), + ("pdflatex-outline.pdf", 9), + ("crazyones.pdf", 0), ], ) -def test_get_outline(src, outline_elements): - reader = PdfReader(src) +def test_get_outline(src, outline_elements, resources_dir): + reader = PdfReader(resources_dir / src) outline = reader.outline assert len(outline) == outline_elements @@ -233,10 +233,10 @@ def test_get_outline(src, outline_elements): (SAMPLE_ROOT / "019-grayscale-image/grayscale-image.pdf", ["X0.png"]), ], ) -def test_get_images(src, expected_images): +def test_get_images(src, expected_images, resources_dir): from PIL import Image # noqa: PLC0415 - src_abs = RESOURCE_ROOT / src + src_abs = resources_dir / src reader = PdfReader(src_abs) page = reader.pages[0] images_extracted = page.images @@ -360,8 +360,8 @@ def test_get_images_raw( assert normalize_warnings(caplog.text) == warning_msgs -def test_issue297(caplog): - path = RESOURCE_ROOT / "issue-297.pdf" +def test_issue297(caplog, resources_dir): + path = resources_dir / "issue-297.pdf" with pytest.raises(PdfReadError) as exc: reader = PdfReader(path, strict=True) assert caplog.text == "" @@ -383,14 +383,14 @@ def test_issue297(caplog): ("encrypted-file.pdf", b"qwerty", True), ], ) -def test_get_page_of_encrypted_file(pdffile, password, should_fail): +def test_get_page_of_encrypted_file(pdffile, password, should_fail, resources_dir): """ Check if we can read a page of an encrypted file. This is a regression test for issue 327: IndexError for get_page() of decrypted file """ - path = RESOURCE_ROOT / pdffile + path = resources_dir / pdffile if should_fail: with pytest.raises(PdfReadError): PdfReader(path, password=password) @@ -423,9 +423,9 @@ def test_get_page_of_encrypted_file(pdffile, password, should_fail): ) ], ) -def test_get_form(src, expected, expected_get_fields, txt_file_path): +def test_get_form(src, expected, expected_get_fields, txt_file_path, resources_dir): """Check if we can read out form data.""" - src = RESOURCE_ROOT / src + src = resources_dir / src reader = PdfReader(src) fields = reader.get_form_text_fields() assert fields == expected @@ -472,8 +472,8 @@ def test_reading_choice_field_without_opt_key(): ("pdflatex-outline.pdf", 2), ], ) -def test_get_page_number(src, page_number): - src = RESOURCE_ROOT / src +def test_get_page_number(src, page_number, resources_dir): + src = resources_dir / src reader = PdfReader(src) reader.get_page(0) page = reader.pages[page_number] @@ -484,8 +484,8 @@ def test_get_page_number(src, page_number): ("src", "expected"), [("form.pdf", None), ("AutoCad_Simple.pdf", "/SinglePage")], ) -def test_get_page_layout(src, expected): - src = RESOURCE_ROOT / src +def test_get_page_layout(src, expected, resources_dir): + src = resources_dir / src reader = PdfReader(src) assert reader.page_layout == expected @@ -497,8 +497,8 @@ def test_get_page_layout(src, expected): ("crazyones.pdf", None), ], ) -def test_get_page_mode(src, expected): - src = RESOURCE_ROOT / src +def test_get_page_mode(src, expected, resources_dir): + src = resources_dir / src reader = PdfReader(src) assert reader.page_mode == expected @@ -653,16 +653,16 @@ def test_read_unknown_zero_pages(caplog): assert exc.value.args[0] == "Invalid object in /Pages" -def test_read_encrypted_without_decryption(): - src = RESOURCE_ROOT / "libreoffice-writer-password.pdf" +def test_read_encrypted_without_decryption(resources_dir): + src = resources_dir / "libreoffice-writer-password.pdf" reader = PdfReader(src) with pytest.raises(FileNotDecryptedError) as exc: len(reader.pages) assert exc.value.args[0] == "File has not been decrypted" -def test_get_destination_page_number(): - src = RESOURCE_ROOT / "pdflatex-outline.pdf" +def test_get_destination_page_number(resources_dir): + src = resources_dir / "pdflatex-outline.pdf" reader = PdfReader(src) outline = reader.outline for outline_item in outline: @@ -686,20 +686,20 @@ def test_do_not_get_stuck_on_large_files_without_start_xref(): @pytest.mark.enable_socket -def test_decrypt_when_no_id(): +def test_decrypt_when_no_id(resources_dir): """ Decrypt an encrypted file that's missing the 'ID' value in its trailer. https://github.com/py-pdf/pypdf/issues/608 """ - with open(RESOURCE_ROOT / "encrypted_doc_no_id.pdf", "rb") as inputfile: + with open(resources_dir / "encrypted_doc_no_id.pdf", "rb") as inputfile: ipdf = PdfReader(inputfile) ipdf.decrypt("") assert ipdf.metadata == {"/Producer": "European Patent Office"} -def test_reader_properties(): - reader = PdfReader(RESOURCE_ROOT / "crazyones.pdf") +def test_reader_properties(crazyones_pdf_reader): + reader = crazyones_pdf_reader assert reader.outline == [] assert len(reader.pages) == 1 assert reader.page_layout is None @@ -711,9 +711,9 @@ def test_reader_properties(): "strict", [True, False], ) -def test_issue604(caplog, strict): +def test_issue604(caplog, strict, resources_dir): """Test with invalid destinations.""" - with open(RESOURCE_ROOT / "issue-604.pdf", "rb") as f: + with open(resources_dir / "issue-604.pdf", "rb") as f: pdf = None outline = None if strict: @@ -745,8 +745,8 @@ def get_dest_pages(x) -> NestedList: out.append(get_dest_pages(oi)) # noqa: PERF401 -def test_decode_permissions(): - reader = PdfReader(RESOURCE_ROOT / "crazyones.pdf") +def test_decode_permissions(crazyones_pdf_reader): + reader = crazyones_pdf_reader base = { "accessability": False, # Do not fix typo, as part of official, but deprecated API. "annotations": False, @@ -782,17 +782,16 @@ def test_decode_permissions(): @pytest.mark.skipif(not HAS_AES, reason="No AES implementation") -def test_user_access_permissions(): +def test_user_access_permissions(crazyones_pdf_path, crazyones_pdf_reader, resources_dir): # Not encrypted. - reader = PdfReader(RESOURCE_ROOT / "crazyones.pdf") - assert reader.user_access_permissions is None + assert crazyones_pdf_reader.user_access_permissions is None # Encrypted. - reader = PdfReader(RESOURCE_ROOT / "encryption" / "r6-owner-password.pdf") + reader = PdfReader(resources_dir / "encryption" / "r6-owner-password.pdf") assert reader.user_access_permissions == UAP.all() # Custom writer permissions. - writer = PdfWriter(clone_from=RESOURCE_ROOT / "crazyones.pdf") + writer = PdfWriter(clone_from=crazyones_pdf_path) writer.encrypt( user_password="", owner_password="abc", @@ -804,7 +803,7 @@ def test_user_access_permissions(): assert reader.user_access_permissions == (UAP.PRINT | UAP.FILL_FORM_FIELDS) # All writer permissions. - writer = PdfWriter(clone_from=RESOURCE_ROOT / "crazyones.pdf") + writer = PdfWriter(clone_from=crazyones_pdf_path) writer.encrypt( user_password="", owner_password="abc", @@ -816,9 +815,8 @@ def test_user_access_permissions(): assert reader.user_access_permissions == UAP.all() -def test_pages_attribute(): - pdf_path = RESOURCE_ROOT / "crazyones.pdf" - reader = PdfReader(pdf_path) +def test_pages_attribute(crazyones_pdf_reader): + reader = crazyones_pdf_reader # Test if getting as slice throws an error assert len(reader.pages[:]) == 1 @@ -858,14 +856,14 @@ def test_iss925(): annot.get_object() -def test_get_object(): - reader = PdfReader(RESOURCE_ROOT / "hello-world.pdf") +def test_get_object(resources_dir): + reader = PdfReader(resources_dir / "hello-world.pdf") assert reader.get_object(22)["/Type"] == "/Catalog" assert reader._get_indirect_object(22, 0)["/Type"] == "/Catalog" -def test_extract_text_hello_world(): - reader = PdfReader(RESOURCE_ROOT / "hello-world.pdf") +def test_extract_text_hello_world(resources_dir): + reader = PdfReader(resources_dir / "hello-world.pdf") text = reader.pages[0].extract_text().split("\n") assert text == [ "English:", @@ -883,14 +881,12 @@ def test_extract_text_hello_world(): ] -def test_read_path(): - path = Path(RESOURCE_ROOT, "crazyones.pdf") - reader = PdfReader(path) - assert len(reader.pages) == 1 +def test_read_path(crazyones_pdf_reader): + assert len(crazyones_pdf_reader.pages) == 1 -def test_read_not_binary_mode(caplog): - with open(RESOURCE_ROOT / "crazyones.pdf") as f: +def test_read_not_binary_mode(caplog, resources_dir): + with open(resources_dir / "crazyones.pdf") as f: msg = ( "PdfReader stream/file object is not in binary mode. " "It may not be read correctly." @@ -911,8 +907,8 @@ def test_read_form_416(): assert len(fields) > 0 -def test_form_topname_with_and_without_acroform(caplog): - r = PdfReader(RESOURCE_ROOT / "crazyones.pdf") +def test_form_topname_with_and_without_acroform(caplog, crazyones_pdf_reader, resources_dir): + r = crazyones_pdf_reader r.add_form_topname("no") r.rename_form_topname("renamed") assert "/AcroForm" not in r.trailer["/Root"] @@ -921,7 +917,7 @@ def test_form_topname_with_and_without_acroform(caplog): r.rename_form_topname("renamed") assert len(r.get_fields()) == 0 - r = PdfReader(RESOURCE_ROOT / "form.pdf") + r = PdfReader(resources_dir / "form.pdf") r.add_form_topname("top") flds = r.get_fields() assert "top" in flds @@ -931,7 +927,7 @@ def test_form_topname_with_and_without_acroform(caplog): assert "renamed" in flds assert "renamed.foo" in flds - r = PdfReader(RESOURCE_ROOT / "form.pdf") + r = PdfReader(resources_dir / "form.pdf") r.get_fields()["foo"].indirect_reference.get_object()[ NameObject("/Parent") ] = DictionaryObject() @@ -1065,12 +1061,12 @@ def test_get_fields_read_write_report(txt_file_path): @pytest.mark.parametrize( "src", [ - RESOURCE_ROOT / "crazyones.pdf", - RESOURCE_ROOT / "commented.pdf", + "crazyones.pdf", + "commented.pdf", ], ) -def test_xfa(src): - reader = PdfReader(src) +def test_xfa(src, resources_dir): + reader = PdfReader(resources_dir / src) assert reader.xfa is None @@ -1092,12 +1088,12 @@ def test_xfa_non_empty(): @pytest.mark.parametrize( ("src", "pdf_header"), [ - (RESOURCE_ROOT / "attachment.pdf", "%PDF-1.5"), - (RESOURCE_ROOT / "crazyones.pdf", "%PDF-1.5"), + ("attachment.pdf", "%PDF-1.5"), + ("crazyones.pdf", "%PDF-1.5"), ], ) -def test_header(src, pdf_header): - reader = PdfReader(src) +def test_header(src, pdf_header, resources_dir): + reader = PdfReader(resources_dir / src) assert reader.pdf_header == pdf_header @@ -1128,8 +1124,8 @@ def get_outline_property(outline, attribute_name: str): @pytest.mark.samples -def test_outline_title_issue_1121(): - reader = PdfReader(SAMPLE_ROOT / "014-outlines/mistitled_outlines_example.pdf") +def test_outline_title_issue_1121(sample_files_dir): + reader = PdfReader(sample_files_dir / "014-outlines/mistitled_outlines_example.pdf") assert get_outline_property(reader.outline, "title") == [ "First", @@ -1175,8 +1171,8 @@ def test_outline_title_issue_1121(): @pytest.mark.samples -def test_outline_count(): - reader = PdfReader(SAMPLE_ROOT / "014-outlines/mistitled_outlines_example.pdf") +def test_outline_count(sample_files_dir): + reader = PdfReader(sample_files_dir / "014-outlines/mistitled_outlines_example.pdf") assert get_outline_property(reader.outline, "outline_count") == [ 5, @@ -1221,15 +1217,15 @@ def test_outline_count(): ] -def test_outline_missing_title(caplog): +def test_outline_missing_title(caplog, resources_dir): # Strict - reader = PdfReader(RESOURCE_ROOT / "outline-without-title.pdf", strict=True) + reader = PdfReader(resources_dir / "outline-without-title.pdf", strict=True) with pytest.raises(PdfReadError) as exc: reader.outline assert exc.value.args[0].startswith("Outline Entry Missing /Title attribute:") # Non-strict : no errors - reader = PdfReader(RESOURCE_ROOT / "outline-without-title.pdf", strict=False) + reader = PdfReader(resources_dir / "outline-without-title.pdf", strict=False) assert reader.outline[0]["/Title"] == "" @@ -1275,8 +1271,8 @@ def test_outline_with_empty_action(): assert reader.outline[-4].title == "Tables" -def test_outline_with_invalid_destinations(): - reader = PdfReader(RESOURCE_ROOT / "outlines-with-invalid-destinations.pdf") +def test_outline_with_invalid_destinations(resources_dir): + reader = PdfReader(resources_dir / "outlines-with-invalid-destinations.pdf") # contains 9 outline items, 6 with invalid destinations # caused by different malformations assert len(reader.outline) == 9 @@ -1294,8 +1290,8 @@ def test_pdfreader_multiple_definitions(caplog): ] -def test_wrong_password_error(): - encrypted_pdf_path = RESOURCE_ROOT / "encrypted-file.pdf" +def test_wrong_password_error(resources_dir): + encrypted_pdf_path = resources_dir / "encrypted-file.pdf" with pytest.raises(WrongPasswordError): PdfReader( encrypted_pdf_path, @@ -1303,9 +1299,8 @@ def test_wrong_password_error(): ) -def test_get_page_number_by_indirect(): - reader = PdfReader(RESOURCE_ROOT / "crazyones.pdf") - reader._get_page_number_by_indirect(1) +def test_get_page_number_by_indirect(crazyones_pdf_reader): + crazyones_pdf_reader._get_page_number_by_indirect(1) @pytest.mark.enable_socket @@ -1859,8 +1854,8 @@ def test_infinite_loop_for_length_value(): writer.add_page(reader.pages[0]) -def test_trailer_cannot_be_read(): - path = RESOURCE_ROOT / "crazyones.pdf" +def test_trailer_cannot_be_read(resources_dir): + path = resources_dir / "crazyones.pdf" data = path.read_bytes().replace(b"/Type/XRef", b"/Type/Invalid") with pytest.raises(PdfReadError, match=r"^Trailer cannot be read: Unexpected type '/Invalid'$"): reader = PdfReader(BytesIO(data)) diff --git a/tests/test_text_extraction.py b/tests/test_text_extraction.py index f0e5a759be..0e236ffc18 100644 --- a/tests/test_text_extraction.py +++ b/tests/test_text_extraction.py @@ -7,7 +7,6 @@ import re from dataclasses import asdict from io import BytesIO -from pathlib import Path from unittest.mock import patch import pytest @@ -21,16 +20,11 @@ from . import get_data_from_url -TESTS_ROOT = Path(__file__).parent.resolve() -PROJECT_ROOT = TESTS_ROOT.parent -RESOURCE_ROOT = PROJECT_ROOT / "resources" -SAMPLE_ROOT = PROJECT_ROOT / "sample-files" - @pytest.mark.samples @pytest.mark.parametrize(("visitor_text"), [None, lambda a, b, c, d, e: None]) # noqa: ARG005 -def test_multi_language(visitor_text): - reader = PdfReader(RESOURCE_ROOT / "multilang.pdf") +def test_multi_language(visitor_text, resources_dir, sample_files_dir): + reader = PdfReader(resources_dir / "multilang.pdf") txt = reader.pages[0].extract_text(visitor_text=visitor_text) assert "Hello World" in txt, "English not correctly extracted" # iss #1296 @@ -58,7 +52,7 @@ def test_multi_language(visitor_text): ), "CUSTOM_RTL_MIN/MAX failed" set_custom_rtl(-1, -1, []) # to prevent further errors - reader = PdfReader(SAMPLE_ROOT / "015-arabic/habibi-rotated.pdf") + reader = PdfReader(sample_files_dir / "015-arabic/habibi-rotated.pdf") assert "habibi" in reader.pages[0].extract_text(visitor_text=visitor_text) assert "حَبيبي" in reader.pages[0].extract_text(visitor_text=visitor_text) assert "habibi" in reader.pages[1].extract_text(visitor_text=visitor_text) @@ -82,15 +76,14 @@ def test_multi_language(visitor_text): ) ], ) -def test_visitor_text_matrices(file_name, constraints): +def test_visitor_text_matrices(file_name, constraints, resources_dir): """ Checks if the matrices given to the visitor_text function when calling `extract_text` on the first page of `file_name` match some given constraints. `constraints` is a dictionary mapping a line of text to a constraint that should evaluate to `True` on its expected x,y-coordinates. """ - reader = PdfReader(RESOURCE_ROOT / file_name) - + reader = PdfReader(resources_dir / file_name) lines = [] def visitor_text(text, cm, tm, font_dict, font_size) -> None: @@ -172,29 +165,29 @@ def test_uninterpretable_type3_font(mock_logger_warning): @pytest.mark.enable_socket -def test_layout_mode_epic_page_fonts(): +def test_layout_mode_epic_page_fonts(resources_dir): url = "https://github.com/py-pdf/pypdf/files/13836944/Epic.Page.PDF" name = "Epic Page.PDF" reader = PdfReader(BytesIO(get_data_from_url(url, name=name))) - expected = (RESOURCE_ROOT / "Epic.Page.layout.txt").read_text(encoding="utf-8") + expected = (resources_dir / "Epic.Page.layout.txt").read_text(encoding="utf-8") assert expected == reader.pages[0].extract_text(extraction_mode="layout") -def test_layout_mode_uncommon_operators(): +def test_layout_mode_uncommon_operators(resources_dir): # Coverage for layout mode Tc, Tz, Ts, ', ", TD, TL, and Tw - reader = PdfReader(RESOURCE_ROOT / "toy.pdf") - expected = (RESOURCE_ROOT / "toy.layout.txt").read_text(encoding="utf-8") + reader = PdfReader(resources_dir / "toy.pdf") + expected = (resources_dir / "toy.layout.txt").read_text(encoding="utf-8") assert expected == reader.pages[0].extract_text(extraction_mode="layout") @pytest.mark.enable_socket -def test_layout_mode_type0_font_widths(): +def test_layout_mode_type0_font_widths(resources_dir): # Cover both the 'int int int' and 'int [int int ...]' formats for Type0 # /DescendantFonts /W array entries. url = "https://github.com/py-pdf/pypdf/files/13533204/Claim.Maker.Alerts.Guide_pg2.PDF" name = "Claim Maker Alerts Guide_pg2.PDF" reader = PdfReader(BytesIO(get_data_from_url(url, name=name))) - expected = (RESOURCE_ROOT / "Claim Maker Alerts Guide_pg2.layout.txt").read_text( + expected = (resources_dir / "Claim Maker Alerts Guide_pg2.layout.txt").read_text( encoding="utf-8" ) assert expected == reader.pages[0].extract_text(extraction_mode="layout") @@ -220,9 +213,9 @@ def dummy_visitor_text(text, ctm, tm, fd, fs): @patch("pypdf._page.logger_warning") -def test_layout_mode_warnings(mock_logger_warning): +def test_layout_mode_warnings(mock_logger_warning, resources_dir): # Check that a warning is issued when an argument is ignored - reader = PdfReader(RESOURCE_ROOT / "hello-world.pdf") + reader = PdfReader(resources_dir / "hello-world.pdf") page = reader.pages[0] page.extract_text(extraction_mode="plain", visitor_text=dummy_visitor_text) mock_logger_warning.assert_not_called() @@ -254,23 +247,23 @@ def test_space_position_calculation(): assert "Shortly after the Geneva BOF session, the" in extracted -def test_text_leading_height_unit(): +def test_text_leading_height_unit(resources_dir): """Tests for #2262""" - reader = PdfReader(RESOURCE_ROOT / "toy.pdf") + reader = PdfReader(resources_dir / "toy.pdf") page = reader.pages[0] extracted = page.extract_text() assert "Something[cited]\n" in extracted -def test_layout_mode_space_vertically_font_height_weight(): +def test_layout_mode_space_vertically_font_height_weight(crazyones_pdf_path, resources_dir): """Tests layout mode with vertical space and font height weight (issue #2915)""" - with open(RESOURCE_ROOT / "crazyones.pdf", "rb") as inputfile: + with open(crazyones_pdf_path, "rb") as inputfile: # Load PDF file from file reader = PdfReader(inputfile) page = reader.pages[0] # Normal behaviour - with open(RESOURCE_ROOT / "crazyones_layout_vertical_space.txt", "rb") as pdftext_file: + with open(resources_dir / "crazyones_layout_vertical_space.txt", "rb") as pdftext_file: pdftext = pdftext_file.read() text = page.extract_text(extraction_mode="layout", layout_mode_space_vertically=True).encode("utf-8") @@ -283,7 +276,7 @@ def test_layout_mode_space_vertically_font_height_weight(): assert text == pdftext # Blank lines are added to truly separate paragraphs - with open(RESOURCE_ROOT / "crazyones_layout_vertical_space_font_height_weight.txt", "rb") as pdftext_file: + with open(resources_dir / "crazyones_layout_vertical_space_font_height_weight.txt", "rb") as pdftext_file: pdftext = pdftext_file.read() text = page.extract_text(extraction_mode="layout", layout_mode_space_vertically=True, @@ -412,9 +405,9 @@ def test_layout_mode_warns_on_malformed_content_stream(op, msg, caplog): assert caplog.records[-1].msg == msg -def test_process_operation__cm_multiplication_issue(): +def test_process_operation__cm_multiplication_issue(crazyones_pdf_path): """Test for #3262.""" - writer = PdfWriter(clone_from=RESOURCE_ROOT / "crazyones.pdf") + writer = PdfWriter(clone_from=crazyones_pdf_path) page = writer.pages[0] content = page.get_contents().get_data() content = content.replace(b" 1 0 0 1 72 720 cm ", b" 0.70278 65.3 163.36 cm ") diff --git a/tests/test_utils.py b/tests/test_utils.py index a559af2b09..7dfd1b566a 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -36,10 +36,6 @@ from . import is_sublist -TESTS_ROOT = Path(__file__).parent.resolve() -PROJECT_ROOT = TESTS_ROOT.parent -RESOURCE_ROOT = PROJECT_ROOT / "resources" - @pytest.mark.parametrize( ("stream", "expected"), diff --git a/tests/test_workflows.py b/tests/test_workflows.py index 0df858404d..9df14212f2 100644 --- a/tests/test_workflows.py +++ b/tests/test_workflows.py @@ -36,11 +36,9 @@ sys.path.append(str(PROJECT_ROOT)) -def test_basic_features(tmp_path): - pdf_path = RESOURCE_ROOT / "crazyones.pdf" - reader = PdfReader(pdf_path) +def test_basic_features(crazyones_pdf_path, crazyones_pdf_reader, tmp_path): + reader = crazyones_pdf_reader writer = PdfWriter() - assert len(reader.pages) == 1 # add page 1 from input1 to output document, unchanged @@ -63,7 +61,7 @@ def test_basic_features(tmp_path): # add page 4 from input1, but first add a watermark from another PDF: page3 = reader.pages[0] page3 = writer.add_page(page3) - watermark_pdf = pdf_path + watermark_pdf = crazyones_pdf_path watermark = PdfReader(watermark_pdf) page3.merge_page(watermark.pages[0]) @@ -84,27 +82,27 @@ def test_basic_features(tmp_path): writer.write(output_stream) -def test_dropdown_items(): - inputfile = RESOURCE_ROOT / "libreoffice-form.pdf" +def test_dropdown_items(resources_dir): + inputfile = resources_dir / "libreoffice-form.pdf" reader = PdfReader(inputfile) fields = reader.get_fields() assert "/Opt" in fields["Nationality"] -def test_pdfreader_file_load(): +def test_pdfreader_file_load(crazyones_pdf_path, resources_dir): """ Test loading and parsing of a file. Extract text of the file and compare to expected textual output. Expected outcome: file loads, text matches expected. """ - with open(RESOURCE_ROOT / "crazyones.pdf", "rb") as inputfile: + with open(crazyones_pdf_path, "rb") as inputfile: # Load PDF file from file reader = PdfReader(inputfile) page = reader.pages[0] # Retrieve the text of the PDF - with open(RESOURCE_ROOT / "crazyones.txt", "rb") as pdftext_file: + with open(resources_dir / "crazyones.txt", "rb") as pdftext_file: pdftext = pdftext_file.read() text = page.extract_text().encode("utf-8") @@ -117,19 +115,19 @@ def test_pdfreader_file_load(): assert text == pdftext -def test_pdfreader_jpeg_image(): +def test_pdfreader_jpeg_image(resources_dir): """ Test loading and parsing of a file. Extract the image of the file and compare to expected textual output. Expected outcome: file loads, image matches expected. """ - with open(RESOURCE_ROOT / "jpeg.pdf", "rb") as inputfile: + with open(resources_dir / "jpeg.pdf", "rb") as inputfile: # Load PDF file from file reader = PdfReader(inputfile) # Retrieve the text of the image - with open(RESOURCE_ROOT / "jpeg.txt") as pdftext_file: + with open(resources_dir / "jpeg.txt") as pdftext_file: imagetext = pdftext_file.read() page = reader.pages[0] @@ -140,8 +138,8 @@ def test_pdfreader_jpeg_image(): assert binascii.hexlify(data).decode() == imagetext -def test_decrypt(): - with open(RESOURCE_ROOT / "libreoffice-writer-password.pdf", "rb") as inputfile: +def test_decrypt(resources_dir): + with open(resources_dir / "libreoffice-writer-password.pdf", "rb") as inputfile: reader = PdfReader(inputfile) assert reader.is_encrypted is True reader.decrypt("openpassword") @@ -155,8 +153,8 @@ def test_decrypt(): } -def test_text_extraction_encrypted(): - inputfile = RESOURCE_ROOT / "libreoffice-writer-password.pdf" +def test_text_extraction_encrypted(resources_dir): + inputfile = resources_dir / "libreoffice-writer-password.pdf" reader = PdfReader(inputfile) assert reader.is_encrypted is True reader.decrypt("openpassword") @@ -169,15 +167,15 @@ def test_text_extraction_encrypted(): @pytest.mark.parametrize("degree", [0, 90, 180, 270, 360, -90]) -def test_rotate(degree): - with open(RESOURCE_ROOT / "crazyones.pdf", "rb") as inputfile: +def test_rotate(degree, resources_dir): + with open(resources_dir / "crazyones.pdf", "rb") as inputfile: reader = PdfReader(inputfile) page = reader.pages[0] page.rotate(degree) -def test_rotate_45(): - with open(RESOURCE_ROOT / "crazyones.pdf", "rb") as inputfile: +def test_rotate_45(resources_dir): + with open(resources_dir / "crazyones.pdf", "rb") as inputfile: reader = PdfReader(inputfile) page = reader.pages[0] with pytest.raises(ValueError) as exc: @@ -268,8 +266,8 @@ def test_extract_textbench(enable, url, pages): pass -def test_transform_compress_identical_objects(): - writer = PdfWriter(clone_from=RESOURCE_ROOT / "two-different-pages.pdf") +def test_transform_compress_identical_objects(resources_dir): + writer = PdfWriter(clone_from=resources_dir / "two-different-pages.pdf") for page in writer.pages: op = Transformation().scale(sx=0.8, sy=0.8) @@ -285,8 +283,8 @@ def test_transform_compress_identical_objects(): @pytest.mark.slow -def test_orientations(): - p = PdfReader(RESOURCE_ROOT / "test Orient.pdf").pages[0] +def test_orientations(resources_dir): + p = PdfReader(resources_dir / "test Orient.pdf").pages[0] p.extract_text("", "") p.extract_text("", "", 0) p.extract_text("", "", 0, 200) @@ -334,14 +332,14 @@ def test_orientations(): ), ], ) -def test_overlay(pdf_file_path, base_path, overlay_path): +def test_overlay(pdf_file_path, base_path, overlay_path, resources_dir): if base_path.startswith("http"): base_path = BytesIO(get_data_from_url(base_path, name="tika-935981.pdf")) else: base_path = PROJECT_ROOT / base_path writer = PdfWriter(clone_from=base_path) - reader_overlay = PdfReader(PROJECT_ROOT / overlay_path) + reader_overlay = PdfReader(resources_dir / overlay_path) overlay = reader_overlay.pages[0] for page in writer.pages: @@ -594,11 +592,11 @@ def test_scale_rectangle_indirect_object(): page.scale(sx=2, sy=3) -def test_merge_output(caplog): +def test_merge_output(caplog, crazyones_pdf_path, resources_dir): # Arrange - base = RESOURCE_ROOT / "Seige_of_Vicksburg_Sample_OCR.pdf" - crazy = RESOURCE_ROOT / "crazyones.pdf" - expected = RESOURCE_ROOT / "Seige_of_Vicksburg_Sample_OCR-crazyones-merged.pdf" + base = resources_dir / "Seige_of_Vicksburg_Sample_OCR.pdf" + crazy = crazyones_pdf_path + expected = resources_dir / "Seige_of_Vicksburg_Sample_OCR-crazyones-merged.pdf" # Act merger = PdfWriter() @@ -962,9 +960,9 @@ def test_fields_returning_stream(): assert "BtchIssQATit_time" in reader.get_form_text_fields()["TimeStampData"] -def test_replace_image(tmp_path): - writer = PdfWriter(clone_from=RESOURCE_ROOT / "labeled-edges-center-image.pdf") - reader = PdfReader(RESOURCE_ROOT / "jpeg.pdf") +def test_replace_image(resources_dir, tmp_path): + writer = PdfWriter(clone_from=resources_dir / "labeled-edges-center-image.pdf") + reader = PdfReader(resources_dir / "jpeg.pdf") img = reader.pages[0].images[0].image if int(pil_version.split(".")[0]) < 9: img = img.convert("RGB") @@ -1100,11 +1098,11 @@ def test_text_extraction_layout_mode(pdf_path, expected_path): @pytest.mark.enable_socket -def test_layout_mode_space_vertically(): +def test_layout_mode_space_vertically(resources_dir): reader = PdfReader(BytesIO(get_data_from_url(name="iss2138.pdf"))) # remove automatically added final newline expected = ( - (RESOURCE_ROOT / "AEO.1172.layout.txt").read_text(encoding="utf-8").rstrip() + (resources_dir / "AEO.1172.layout.txt").read_text(encoding="utf-8").rstrip() ) assert expected == reader.pages[0].extract_text( extraction_mode="layout", layout_mode_space_vertically=False @@ -1115,14 +1113,14 @@ def test_layout_mode_space_vertically(): @pytest.mark.parametrize( ("rotation", "strip_rotated"), [(90, True), (180, False), (270, True)] ) -def test_layout_mode_rotations(rotation, strip_rotated): +def test_layout_mode_rotations(rotation, strip_rotated, resources_dir): writer = PdfWriter(clone_from=BytesIO(get_data_from_url(name="iss2138.pdf"))) rotated_page = writer.pages[0].rotate(rotation) rotated_page.transfer_rotation_to_content() expected = "" if not strip_rotated: expected = ( - (RESOURCE_ROOT / "AEO.1172.layout.rot180.txt") + (resources_dir / "AEO.1172.layout.rot180.txt") .read_text(encoding="utf-8") .rstrip() ) # remove automatically added final newline @@ -1133,11 +1131,9 @@ def test_layout_mode_rotations(rotation, strip_rotated): ) -def test_text_extraction_invalid_mode(): - pdf_path = RESOURCE_ROOT / "crazyones.pdf" - reader = PdfReader(pdf_path) +def test_text_extraction_invalid_mode(crazyones_pdf_reader): with pytest.raises(ValueError, match="Invalid text extraction mode"): - reader.pages[0].extract_text(extraction_mode="foo") # type: ignore + crazyones_pdf_reader.pages[0].extract_text(extraction_mode="foo") # type: ignore @pytest.mark.enable_socket diff --git a/tests/test_writer.py b/tests/test_writer.py index 5940fdb62d..58522c2ea9 100644 --- a/tests/test_writer.py +++ b/tests/test_writer.py @@ -41,10 +41,6 @@ from . import get_data_from_url, is_sublist from .test_images import image_similarity -TESTS_ROOT = Path(__file__).parent.resolve() -PROJECT_ROOT = TESTS_ROOT.parent -RESOURCE_ROOT = PROJECT_ROOT / "resources" -SAMPLE_ROOT = Path(PROJECT_ROOT) / "sample-files" GHOSTSCRIPT_BINARY = shutil.which("gs") @@ -56,8 +52,8 @@ def _get_write_target(convert) -> Any: return target -def test_writer_exception_non_binary(tmp_path, caplog): - src = RESOURCE_ROOT / "pdflatex-outline.pdf" +def test_writer_exception_non_binary(tmp_path, caplog, resources_dir): + src = resources_dir / "pdflatex-outline.pdf" reader = PdfReader(src) writer = PdfWriter() @@ -69,8 +65,8 @@ def test_writer_exception_non_binary(tmp_path, caplog): assert caplog.text.endswith(ending) -def test_writer_clone(): - src = RESOURCE_ROOT / "pdflatex-outline.pdf" +def test_writer_clone(resources_dir): + src = resources_dir / "pdflatex-outline.pdf" reader = PdfReader(src) writer = PdfWriter(clone_from=reader) @@ -82,8 +78,8 @@ def test_writer_clone(): assert "PageObject" in str(type(writer.pages[0])) -def test_clone_metadata(): - src = RESOURCE_ROOT / "pdflatex-outline.pdf" +def test_clone_metadata(resources_dir): + src = resources_dir / "pdflatex-outline.pdf" reader = PdfReader(src) writer = PdfWriter(clone_from=reader) @@ -110,9 +106,9 @@ def test_clone_metadata(): assert writer.metadata == {"/foo": "bar"} -def test_writer_clone_bookmarks(): +def test_writer_clone_bookmarks(resources_dir): # Arrange - src = RESOURCE_ROOT / "Seige_of_Vicksburg_Sample_OCR-crazyones-merged.pdf" + src = resources_dir / "Seige_of_Vicksburg_Sample_OCR-crazyones-merged.pdf" reader = PdfReader(src) writer = PdfWriter() @@ -152,7 +148,7 @@ def cat1(p) -> None: assert len(reader2.outline) == 2 -def writer_operate(writer: PdfWriter) -> None: +def writer_operate(writer: PdfWriter, resources_dir: Path) -> None: """ To test the writer that initialized by each of the four usages. @@ -160,8 +156,8 @@ def writer_operate(writer: PdfWriter) -> None: writer: A PdfWriter object """ - pdf_path = RESOURCE_ROOT / "crazyones.pdf" - pdf_outline_path = RESOURCE_ROOT / "pdflatex-outline.pdf" + pdf_path = resources_dir / "crazyones.pdf" + pdf_outline_path = resources_dir / "pdflatex-outline.pdf" reader = PdfReader(pdf_path) reader_outline = PdfReader(pdf_outline_path) @@ -263,10 +259,10 @@ def writer_operate(writer: PdfWriter) -> None: (BytesIO(), False), ], ) -def test_writer_operations_by_traditional_usage(convert, needs_cleanup): +def test_writer_operations_by_traditional_usage(convert, needs_cleanup, resources_dir): write_data_here = _get_write_target(convert) writer = PdfWriter() - writer_operate(writer) + writer_operate(writer, resources_dir) # finally, write "output" to pypdf-output.pdf if needs_cleanup: @@ -288,11 +284,11 @@ def test_writer_operations_by_traditional_usage(convert, needs_cleanup): (BytesIO(), False), ], ) -def test_writer_operations_by_semi_traditional_usage(convert, needs_cleanup): +def test_writer_operations_by_semi_traditional_usage(convert, needs_cleanup, resources_dir): write_data_here = _get_write_target(convert) with PdfWriter() as writer: - writer_operate(writer) + writer_operate(writer, resources_dir) # finally, write "output" to pypdf-output.pdf if needs_cleanup: @@ -314,11 +310,11 @@ def test_writer_operations_by_semi_traditional_usage(convert, needs_cleanup): (BytesIO(), False), ], ) -def test_writer_operations_by_semi_new_traditional_usage(convert, needs_cleanup): +def test_writer_operations_by_semi_new_traditional_usage(convert, needs_cleanup, resources_dir): write_data_here = _get_write_target(convert) with PdfWriter() as writer: - writer_operate(writer) + writer_operate(writer, resources_dir) # finally, write "output" to pypdf-output.pdf writer.write(write_data_here) @@ -335,12 +331,12 @@ def test_writer_operations_by_semi_new_traditional_usage(convert, needs_cleanup) (BytesIO(), False), ], ) -def test_writer_operation_by_new_usage(convert, needs_cleanup): +def test_writer_operation_by_new_usage(convert, needs_cleanup, resources_dir): write_data_here = _get_write_target(convert) # This includes write "output" to pypdf-output.pdf with PdfWriter(write_data_here) as writer: - writer_operate(writer) + writer_operate(writer, resources_dir) if needs_cleanup: Path(write_data_here).unlink() @@ -353,8 +349,8 @@ def test_writer_operation_by_new_usage(convert, needs_cleanup): "reportlab-inline-image.pdf", ], ) -def test_remove_images(pdf_file_path, input_path): - pdf_path = RESOURCE_ROOT / input_path +def test_remove_images(pdf_file_path, input_path, resources_dir): + pdf_path = resources_dir / input_path reader = PdfReader(pdf_path) writer = PdfWriter() @@ -405,8 +401,8 @@ def test_remove_images_sub_level(): "reportlab-inline-image.pdf", ], ) -def test_remove_text(input_path, pdf_file_path): - pdf_path = RESOURCE_ROOT / input_path +def test_remove_text(input_path, pdf_file_path, resources_dir): + pdf_path = resources_dir / input_path reader = PdfReader(pdf_path) writer = PdfWriter() @@ -482,10 +478,8 @@ def test_remove_text_all_operators(pdf_file_path): writer.write(output_stream) -def test_write_metadata(pdf_file_path): - pdf_path = RESOURCE_ROOT / "crazyones.pdf" - - reader = PdfReader(pdf_path) +def test_write_metadata(pdf_file_path, crazyones_pdf_reader): + reader = crazyones_pdf_reader writer = PdfWriter() writer.add_page(reader.pages[0]) @@ -507,12 +501,12 @@ def test_write_metadata(pdf_file_path): assert metadata.get("/Title") == "The Crazy Ones" -def test_fill_form(pdf_file_path): - reader = PdfReader(RESOURCE_ROOT / "form.pdf") +def test_fill_form(pdf_file_path, resources_dir): + reader = PdfReader(resources_dir / "form.pdf") writer = PdfWriter() writer.append(reader, [0]) - writer.append(RESOURCE_ROOT / "crazyones.pdf", [0]) + writer.append(resources_dir / "crazyones.pdf", [0]) writer.update_page_form_field_values( writer.pages[0], {"foo": "some filled in text"}, flags=1, flatten=True @@ -532,8 +526,8 @@ def test_fill_form(pdf_file_path): writer.write(output_stream) -def test_fill_form_with_qualified(): - reader = PdfReader(RESOURCE_ROOT / "form.pdf") +def test_fill_form_with_qualified(resources_dir): + reader = PdfReader(resources_dir / "form.pdf") reader.add_form_topname("top") writer = PdfWriter() @@ -554,8 +548,8 @@ def test_fill_form_with_qualified(): ("use_128bit", "user_password", "owner_password"), [(True, "userpwd", "ownerpwd"), (False, "userpwd", "ownerpwd")], ) -def test_encrypt(use_128bit, user_password, owner_password, pdf_file_path): - reader = PdfReader(RESOURCE_ROOT / "form.pdf") +def test_encrypt(use_128bit, user_password, owner_password, pdf_file_path, resources_dir): + reader = PdfReader(resources_dir / "form.pdf") writer = PdfWriter() page = reader.pages[0] @@ -608,8 +602,8 @@ def test_encrypt(use_128bit, user_password, owner_password, pdf_file_path): assert new_text == orig_text -def test_add_outline_item(pdf_file_path): - reader = PdfReader(RESOURCE_ROOT / "pdflatex-outline.pdf") +def test_add_outline_item(pdf_file_path, resources_dir): + reader = PdfReader(resources_dir / "pdflatex-outline.pdf") writer = PdfWriter() for page in reader.pages: @@ -661,8 +655,8 @@ def test_add_outline_item(pdf_file_path): assert reader.outline[1][0]["/Count"] == 0 -def test_add_named_destination(pdf_file_path): - reader = PdfReader(RESOURCE_ROOT / "pdflatex-outline.pdf") +def test_add_named_destination(pdf_file_path, resources_dir): + reader = PdfReader(resources_dir / "pdflatex-outline.pdf") writer = PdfWriter() assert writer.get_named_dest_root() == [] @@ -726,8 +720,8 @@ def test_add_named_destination_sort_order(pdf_file_path): writer.write(output_stream) -def test_add_uri(pdf_file_path): - reader = PdfReader(RESOURCE_ROOT / "pdflatex-outline.pdf") +def test_add_uri(pdf_file_path, resources_dir): + reader = PdfReader(resources_dir / "pdflatex-outline.pdf") writer = PdfWriter() for page in reader.pages: @@ -763,8 +757,8 @@ def test_add_uri(pdf_file_path): writer.write(output_stream) -def test_link_annotation(pdf_file_path): - reader = PdfReader(RESOURCE_ROOT / "pdflatex-outline.pdf") +def test_link_annotation(pdf_file_path, resources_dir): + reader = PdfReader(resources_dir / "pdflatex-outline.pdf") writer = PdfWriter() for page in reader.pages: @@ -817,9 +811,9 @@ def test_link_annotation(pdf_file_path): writer.write(output_stream) -def test_io_streams(): +def test_io_streams(resources_dir): """This is the example from the docs ("Streaming data").""" - filepath = RESOURCE_ROOT / "pdflatex-outline.pdf" + filepath = resources_dir / "pdflatex-outline.pdf" with open(filepath, "rb") as fh: bytes_stream = BytesIO(fh.read()) @@ -833,9 +827,8 @@ def test_io_streams(): writer.write(output_stream) -def test_regression_issue670(pdf_file_path): - filepath = RESOURCE_ROOT / "crazyones.pdf" - reader = PdfReader(filepath, strict=False) +def test_regression_issue670(pdf_file_path, crazyones_pdf_path): + reader = PdfReader(crazyones_pdf_path, strict=False) for _ in range(2): writer = PdfWriter() writer.add_page(reader.pages[0]) @@ -843,9 +836,9 @@ def test_regression_issue670(pdf_file_path): writer.write(f_pdf) -def test_issue301(): +def test_issue301(resources_dir): """Test with invalid stream length object.""" - with open(RESOURCE_ROOT / "issue-301.pdf", "rb") as f: + with open(resources_dir / "issue-301.pdf", "rb") as f: reader = PdfReader(f) writer = PdfWriter() writer.append_pages_from_reader(reader) @@ -853,9 +846,9 @@ def test_issue301(): writer.write(b) -def test_append_pages_from_reader_append(): +def test_append_pages_from_reader_append(resources_dir): """Use append_pages_from_reader with a callable.""" - with open(RESOURCE_ROOT / "issue-301.pdf", "rb") as f: + with open(resources_dir / "issue-301.pdf", "rb") as f: reader = PdfReader(f) writer = PdfWriter() writer.append_pages_from_reader(reader, callable) @@ -900,11 +893,11 @@ def test_some_appends(pdf_file_path, url, name): merger.write(pdf_file_path) -def test_pdf_header(): +def test_pdf_header(resources_dir): writer = PdfWriter() assert writer.pdf_header == "%PDF-1.3" - reader = PdfReader(RESOURCE_ROOT / "crazyones.pdf") + reader = PdfReader(resources_dir / "crazyones.pdf") writer.add_page(reader.pages[0]) assert writer.pdf_header == "%PDF-1.5" @@ -956,9 +949,8 @@ def test_write_dict_stream_object(pdf_file_path): assert k in objects_hash, f"Missing {v}" -def test_add_single_annotation(pdf_file_path): - pdf_path = RESOURCE_ROOT / "crazyones.pdf" - reader = PdfReader(pdf_path) +def test_add_single_annotation(pdf_file_path, crazyones_pdf_reader): + reader = crazyones_pdf_reader page = reader.pages[0] writer = PdfWriter() writer.add_page(page) @@ -986,8 +978,8 @@ def test_add_single_annotation(pdf_file_path): @pytest.mark.samples -def test_colors_in_outline_item(pdf_file_path): - reader = PdfReader(SAMPLE_ROOT / "004-pdflatex-4-pages/pdflatex-4-pages.pdf") +def test_colors_in_outline_item(pdf_file_path, sample_files_dir): + reader = PdfReader(sample_files_dir / "004-pdflatex-4-pages/pdflatex-4-pages.pdf") writer = PdfWriter() writer.clone_document_from_reader(reader) purple_rgb = (0.5019607843137255, 0.0, 0.5019607843137255) @@ -1007,8 +999,8 @@ def test_colors_in_outline_item(pdf_file_path): @pytest.mark.samples -def test_write_empty_stream(): - reader = PdfReader(SAMPLE_ROOT / "004-pdflatex-4-pages/pdflatex-4-pages.pdf") +def test_write_empty_stream(sample_files_dir): + reader = PdfReader(sample_files_dir / "004-pdflatex-4-pages/pdflatex-4-pages.pdf") writer = PdfWriter() writer.clone_document_from_reader(reader) @@ -1017,9 +1009,9 @@ def test_write_empty_stream(): assert exc.value.args[0] == "Output(stream='') is empty." -def test_startup_dest(): +def test_startup_dest(resources_dir): pdf_file_writer = PdfWriter() - pdf_file_writer.append_pages_from_reader(PdfReader(RESOURCE_ROOT / "issue-604.pdf")) + pdf_file_writer.append_pages_from_reader(PdfReader(resources_dir / "issue-604.pdf")) assert pdf_file_writer.open_destination is None pdf_file_writer.open_destination = pdf_file_writer.pages[9] @@ -1144,8 +1136,8 @@ def test_append_multiple(): @pytest.mark.samples -def test_set_page_label(pdf_file_path): - src = RESOURCE_ROOT / "GeoBase_NHNC1_Data_Model_UML_EN.pdf" # File without labels +def test_set_page_label(pdf_file_path, resources_dir, sample_files_dir): + src = resources_dir / "GeoBase_NHNC1_Data_Model_UML_EN.pdf" # File without labels reader = PdfReader(src) expected = [ @@ -1239,10 +1231,8 @@ def test_set_page_label(pdf_file_path): pdf_file_path.unlink() - src = ( - SAMPLE_ROOT / "009-pdflatex-geotopo/GeoTopo.pdf" - ) # File with pre existing labels - reader = PdfReader(src) + # File with pre existing labels + reader = PdfReader(sample_files_dir / "009-pdflatex-geotopo/GeoTopo.pdf") # Tests adding labels to existing ones expected = ["i", "ii", "A", "B", "1"] @@ -1263,8 +1253,7 @@ def test_set_page_label(pdf_file_path): pdf_file_path.unlink() # Tests prefix and start. - src = RESOURCE_ROOT / "issue-604.pdf" # File without page labels - reader = PdfReader(src) + reader = PdfReader(resources_dir / "issue-604.pdf") # File without page labels writer = PdfWriter() writer.clone_document_from_reader(reader) @@ -1510,9 +1499,9 @@ def test_named_dest_page_number(): assert len(writer.root_object["/Names"]["/Dests"]["/Names"]) == 6 -def test_update_form_fields(tmp_path): +def test_update_form_fields(tmp_path, resources_dir): write_data_here = tmp_path / "out.pdf" - writer = PdfWriter(clone_from=RESOURCE_ROOT / "FormTestFromOo.pdf") + writer = PdfWriter(clone_from=resources_dir / "FormTestFromOo.pdf") writer.update_page_form_field_values( writer.pages[0], { @@ -1566,7 +1555,7 @@ def test_update_form_fields(tmp_path): assert all(x in flds["RadioGroup1"]["/_States_"] for x in ["/1", "/2", "/3"]) assert all(x in flds["Liste1"]["/_States_"] for x in ["Liste1", "Liste2", "Liste3"]) - writer = PdfWriter(clone_from=RESOURCE_ROOT / "FormTestFromOo.pdf") + writer = PdfWriter(clone_from=resources_dir / "FormTestFromOo.pdf") writer.add_annotation( page_number=0, annotation=Link(target_page_index=1, rect=RectangleObject([0, 0, 100, 100])), @@ -1743,9 +1732,8 @@ def test_iss1862(): writer.pages[0]["/Resources"]["/Font"]["/F1"]["/CharProcs"]["/B"].get_data() -def test_empty_objects_before_cloning(): - pdf_path = RESOURCE_ROOT / "crazyones.pdf" - reader = PdfReader(pdf_path) +def test_empty_objects_before_cloning(crazyones_pdf_reader): + reader = crazyones_pdf_reader writer = PdfWriter(clone_from=reader) nb_obj_reader = len(reader.xref_objStm) + sum( len(reader.xref[i]) for i in reader.xref @@ -1832,14 +1820,14 @@ def test_watermark_rendering(tmp_path): @pytest.mark.samples @pytest.mark.skipif(GHOSTSCRIPT_BINARY is None, reason="Requires Ghostscript") -def test_watermarking_reportlab_rendering(tmp_path): +def test_watermarking_reportlab_rendering(tmp_path, resources_dir, sample_files_dir): """ This test is showing a rotated+mirrored watermark in pypdf==3.15.4. Replacing the generate_base with e.g. the crazyones did not show the issue. """ - base_path = SAMPLE_ROOT / "022-pdfkit/pdfkit.pdf" - watermark_path = SAMPLE_ROOT / "013-reportlab-overlay/reportlab-overlay.pdf" + base_path = sample_files_dir / "022-pdfkit/pdfkit.pdf" + watermark_path = sample_files_dir / "013-reportlab-overlay/reportlab-overlay.pdf" reader = PdfReader(base_path) base_page = reader.pages[0] @@ -1849,7 +1837,7 @@ def test_watermarking_reportlab_rendering(tmp_path): base_page = writer.add_page(base_page) base_page.merge_page(watermark) - target_png_path = RESOURCE_ROOT / "test_watermarking_reportlab_rendering.png" + target_png_path = resources_dir / "test_watermarking_reportlab_rendering.png" pdf_path = tmp_path / "out.pdf" png_path = tmp_path / "test_watermarking_reportlab_rendering.png" @@ -1896,8 +1884,8 @@ def test_da_missing_in_annot(): ) -def test_missing_fields(pdf_file_path): - reader = PdfReader(RESOURCE_ROOT / "form.pdf") +def test_missing_fields(pdf_file_path, resources_dir): + reader = PdfReader(resources_dir / "form.pdf") writer = PdfWriter() writer.add_page(reader.pages[0]) @@ -1918,8 +1906,8 @@ def test_missing_fields(pdf_file_path): assert exc.value.args[0] == "No /Fields dictionary in PDF of PdfWriter Object" -def test_missing_info(): - reader = PdfReader(RESOURCE_ROOT / "missing_info.pdf") +def test_missing_info(crazyones_pdf_reader, resources_dir): + reader = PdfReader(resources_dir / "missing_info.pdf") writer = PdfWriter(clone_from=reader) assert len(writer.pages) == len(reader.pages) @@ -1928,7 +1916,7 @@ def test_missing_info(): writer.write(b) assert b"/Info" not in b.getvalue() - reader = PdfReader(RESOURCE_ROOT / "crazyones.pdf") + reader = crazyones_pdf_reader writer.metadata = reader.metadata assert dict(writer._info) == dict(reader._info) assert writer.metadata == reader.metadata @@ -2077,8 +2065,8 @@ def test_viewerpreferences(): assert writer.viewer_preferences is None -def test_extra_spaces_in_da_text(caplog): - writer = PdfWriter(clone_from=RESOURCE_ROOT / "form.pdf") +def test_extra_spaces_in_da_text(caplog, resources_dir): + writer = PdfWriter(clone_from=resources_dir / "form.pdf") t = writer.pages[0]["/Annots"][0].get_object()["/DA"] t = t.replace("/Helv", "/Helv ") writer.pages[0]["/Annots"][0].get_object()[NameObject("/DA")] = TextStringObject(t) @@ -2103,8 +2091,8 @@ def test_object_contains_indirect_reference_to_self(): writer.append(reader) -def test_remove_image_per_type(): - writer = PdfWriter(clone_from=RESOURCE_ROOT / "reportlab-inline-image.pdf") +def test_remove_image_per_type(resources_dir): + writer = PdfWriter(clone_from=resources_dir / "reportlab-inline-image.pdf") writer.remove_images(ImageType.INLINE_IMAGES) assert all( @@ -2114,7 +2102,7 @@ def test_remove_image_per_type(): writer.remove_images() - writer = PdfWriter(clone_from=RESOURCE_ROOT / "GeoBase_NHNC1_Data_Model_UML_EN.pdf") + writer = PdfWriter(clone_from=resources_dir / "GeoBase_NHNC1_Data_Model_UML_EN.pdf") writer.remove_images(ImageType.DRAWING_IMAGES) assert all( x not in writer.pages[1].get_contents().get_data() @@ -2311,9 +2299,9 @@ def test_reattach_fields(): assert len(writer.reattach_fields(writer.pages[1])) == 0 -def test_get_pagenumber_from_indirectobject(): +def test_get_pagenumber_from_indirectobject(resources_dir): """Test test_get_pagenumber_from_indirectobject""" - pdf_path = RESOURCE_ROOT / "crazyones.pdf" + pdf_path = resources_dir / "crazyones.pdf" writer = PdfWriter(clone_from=pdf_path) assert writer._get_page_number_by_indirect(None) is None assert writer._get_page_number_by_indirect(NullObject()) is None @@ -2324,9 +2312,8 @@ def test_get_pagenumber_from_indirectobject(): assert writer._get_page_number_by_indirect(ind.idnum + 1) is None -def test_replace_object(): - pdf_path = RESOURCE_ROOT / "crazyones.pdf" - reader = PdfReader(pdf_path) +def test_replace_object(crazyones_pdf_path, crazyones_pdf_reader): + reader = crazyones_pdf_reader writer = PdfWriter(clone_from=reader) with pytest.raises(ValueError): writer._replace_object(reader.pages[0].indirect_reference, reader.pages[0]) @@ -2335,7 +2322,7 @@ def test_replace_object(): writer._replace_object(writer.pages[0].indirect_reference, pg) # mainly for coverage - reader = PdfReader(pdf_path) # reload a new instance + reader = PdfReader(crazyones_pdf_path) # reload a new instance with pytest.raises(ValueError): reader._replace_object(writer.pages[0].indirect_reference, reader.pages[0]) with pytest.raises(ValueError): @@ -2349,30 +2336,26 @@ def test_replace_object(): writer.add_page(pg) -def test_mime_jupyter(): - pdf_path = RESOURCE_ROOT / "crazyones.pdf" - reader = PdfReader(pdf_path) - writer = PdfWriter(clone_from=reader) - assert reader._repr_mimebundle_(("include",), ("exclude",)) == {} +def test_mime_jupyter(crazyones_pdf_reader): + writer = PdfWriter(clone_from=crazyones_pdf_reader) + assert crazyones_pdf_reader._repr_mimebundle_(("include",), ("exclude",)) == {} assert writer._repr_mimebundle_(("include",), ("exclude",)) == {} -def test_init_without_named_arg(): +def test_init_without_named_arg(crazyones_pdf_path, crazyones_pdf_reader): """Test to use file_obj argument and not clone_from""" - pdf_path = RESOURCE_ROOT / "crazyones.pdf" - reader = PdfReader(pdf_path) - writer = PdfWriter(clone_from=reader) + writer = PdfWriter(clone_from=crazyones_pdf_reader) nb = len(writer._objects) - writer = PdfWriter(reader) + writer = PdfWriter(crazyones_pdf_reader) assert len(writer._objects) == nb - with open(pdf_path, "rb") as f: + with open(crazyones_pdf_path, "rb") as f: writer = PdfWriter(f) f.seek(0, 0) by = BytesIO(f.read()) assert len(writer._objects) == nb - writer = PdfWriter(pdf_path) + writer = PdfWriter(crazyones_pdf_path) assert len(writer._objects) == nb - writer = PdfWriter(str(pdf_path)) + writer = PdfWriter(str(crazyones_pdf_path)) assert len(writer._objects) == nb writer = PdfWriter(by) assert len(writer._objects) == nb @@ -2391,8 +2374,8 @@ def test_i_in_choice_fields(): assert "/I" not in writer.get_fields()["State"].indirect_reference.get_object() -def test_selfont(): - writer = PdfWriter(clone_from=RESOURCE_ROOT / "FormTestFromOo.pdf") +def test_selfont(resources_dir): + writer = PdfWriter(clone_from=resources_dir / "FormTestFromOo.pdf") writer.update_page_form_field_values( writer.pages[0], {"Text1": ("Text_1", "", 5), "Text2": ("Text_2", "/F3", 0)}, @@ -2490,9 +2473,9 @@ def test_set_need_appearances_writer(): writer.set_need_appearances_writer() -def test_utf16_metadata(): +def test_utf16_metadata(crazyones_pdf_writer): """See #2754""" - writer = PdfWriter(RESOURCE_ROOT / "crazyones.pdf") + writer = crazyones_pdf_writer writer.add_metadata( { "/Subject": "Invoice №AI_047", @@ -2512,10 +2495,10 @@ def test_utf16_metadata(): @pytest.mark.enable_socket -def test_increment_writer(caplog): +def test_increment_writer(caplog, crazyones_pdf_path, resources_dir): """Tests for #2811""" writer = PdfWriter( - RESOURCE_ROOT / "Seige_of_Vicksburg_Sample_OCR-crazyones-merged.pdf", + resources_dir / "Seige_of_Vicksburg_Sample_OCR-crazyones-merged.pdf", incremental=True, ) # Contains JBIG2 not decoded for the moment @@ -2525,7 +2508,7 @@ def test_increment_writer(caplog): b = BytesIO() writer.write(b) with open( - RESOURCE_ROOT / "Seige_of_Vicksburg_Sample_OCR-crazyones-merged.pdf", "rb" + resources_dir / "Seige_of_Vicksburg_Sample_OCR-crazyones-merged.pdf", "rb" ) as f: assert b.getvalue() == f.read(-1) b.seek(0) @@ -2567,13 +2550,13 @@ def test_increment_writer(caplog): writer = PdfWriter(b, incremental=True) assert writer.list_objects_in_increment() == [] # no flowdown of properties - writer = PdfWriter(RESOURCE_ROOT / "crazyones.pdf", incremental=True) + writer = PdfWriter(crazyones_pdf_path, incremental=True) # 1 object is modified: page 0 inherits MediaBox so is changed assert len(writer.list_objects_in_increment()) == 1 b = BytesIO() writer.write(b) - writer = PdfWriter(RESOURCE_ROOT / "crazyones.pdf", incremental=False) + writer = PdfWriter(crazyones_pdf_path, incremental=False) # 1 object is modified: page 0 inherits MediaBox so is changed assert len(writer.list_objects_in_increment()) == len(writer._objects) @@ -2581,7 +2564,7 @@ def test_increment_writer(caplog): url = "https://github.com/py-pdf/pypdf/files/13946477/panda.pdf" name = "iss2343b.pdf" writer = PdfWriter(BytesIO(get_data_from_url(url, name=name)), incremental=True) - reader = PdfReader(RESOURCE_ROOT / "crazyones.pdf") + reader = PdfReader(crazyones_pdf_path) pg = writer.insert_page(reader.pages[0], 4) assert ( pg.raw_get("/Parent") @@ -2592,7 +2575,7 @@ def test_increment_writer(caplog): assert len(writer.flattened_pages) == 285 # clone without info - writer = PdfWriter(RESOURCE_ROOT / "missing_info.pdf", incremental=True) + writer = PdfWriter(resources_dir / "missing_info.pdf", incremental=True) assert len(writer.list_objects_in_increment()) == 0 assert writer.metadata is None writer.metadata = {} @@ -2637,9 +2620,9 @@ def test_destination_page_is_none(): writer.append(reader) -def test_stream_not_closed(): +def test_stream_not_closed(resources_dir): """Tests for #2905""" - src = RESOURCE_ROOT / "pdflatex-outline.pdf" + src = resources_dir / "pdflatex-outline.pdf" with NamedTemporaryFile(suffix=".pdf") as tmp: with PdfReader(src) as reader, PdfWriter() as writer: writer.add_page(reader.pages[0]) @@ -2763,23 +2746,23 @@ def test_incremental_read(): assert len(writer._objects) == 5 -def test_compress_identical_objects__after_remove_images(): +def test_compress_identical_objects__after_remove_images(resources_dir): """Test for #3237""" - writer = PdfWriter(clone_from=RESOURCE_ROOT / "AutoCad_Diagram.pdf") + writer = PdfWriter(clone_from=resources_dir / "AutoCad_Diagram.pdf") writer.remove_images() writer.compress_identical_objects(remove_identicals=True, remove_orphans=True) -def test_merge__process_named_dests__no_dests_in_source_file(): +def test_merge__process_named_dests__no_dests_in_source_file(resources_dir): """Test for #3279""" - writer = PdfWriter(clone_from=RESOURCE_ROOT / "crazyones.pdf") + writer = PdfWriter(clone_from=resources_dir / "crazyones.pdf") # Hacky solution to avoid attribute errors. names = DictionaryObject() names.indirect_reference = names writer.root_object[NameObject("/Names")] = names - reader = PdfReader(RESOURCE_ROOT / "hello-world.pdf") + reader = PdfReader(resources_dir / "hello-world.pdf") destination = Destination(title="test.pdf", page=reader.pages[0], fit=Fit("/Fit")) with mock.patch.object(reader, "_get_named_destinations", return_value={"test.pdf": destination}): writer.append(reader) @@ -2789,10 +2772,10 @@ def test_merge__process_named_dests__no_dests_in_source_file(): } -def test_insert_filtered_annotations__link_without_destination(): +def test_insert_filtered_annotations__link_without_destination(resources_dir): """Test for #3211""" - writer = PdfWriter(clone_from=RESOURCE_ROOT / "crazyones.pdf") - reader = PdfReader(RESOURCE_ROOT / "hello-world.pdf") + writer = PdfWriter(clone_from=resources_dir / "crazyones.pdf") + reader = PdfReader(resources_dir / "hello-world.pdf") annotations = [ DictionaryObject({ @@ -2811,7 +2794,7 @@ def test_insert_filtered_annotations__link_without_destination(): ) assert result == [] - writer = PdfWriter(clone_from=RESOURCE_ROOT / "crazyones.pdf") + writer = PdfWriter(clone_from=resources_dir / "crazyones.pdf") del annotations[0]["/A"]["/D"] result = writer._insert_filtered_annotations( annots=annotations, page=writer.pages[0], pages={}, reader=reader @@ -2840,9 +2823,9 @@ def test_insert_filtered_annotations__annotations_are_no_list(caplog): ] -def test_unterminated_object__with_incremental_writer(): +def test_unterminated_object__with_incremental_writer(resources_dir): """Test for #3118""" - reader = PdfReader(RESOURCE_ROOT / "bytes.pdf") + reader = PdfReader(resources_dir / "bytes.pdf") writer = PdfWriter(reader, incremental=True) writer.add_blank_page(72, 72) @@ -2853,8 +2836,8 @@ def test_unterminated_object__with_incremental_writer(): assert b[-39:] == b"\nendstream\nendobj\nstartxref\n1240\n%%EOF\n" -def test_wrong_size_in_incremental_pdf(caplog): - source_data = RESOURCE_ROOT.joinpath("crazyones.pdf").read_bytes() +def test_wrong_size_in_incremental_pdf(caplog, resources_dir): + source_data = resources_dir.joinpath("crazyones.pdf").read_bytes() writer = PdfWriter(BytesIO(source_data), incremental=True) writer._add_object(DictionaryObject()) diff --git a/tests/test_xmp.py b/tests/test_xmp.py index 5deae1f127..6cc42dd5b4 100644 --- a/tests/test_xmp.py +++ b/tests/test_xmp.py @@ -1,7 +1,6 @@ """Test the pypdf.xmp module.""" from datetime import datetime, timedelta, timezone from io import BytesIO -from pathlib import Path import pytest @@ -14,21 +13,16 @@ from . import get_data_from_url -TESTS_ROOT = Path(__file__).parent.resolve() -PROJECT_ROOT = TESTS_ROOT.parent -RESOURCE_ROOT = PROJECT_ROOT / "resources" -SAMPLE_ROOT = Path(PROJECT_ROOT) / "sample-files" - @pytest.mark.samples @pytest.mark.parametrize( "src", [ - (SAMPLE_ROOT / "020-xmp/output_with_metadata_pymupdf.pdf"), + ("020-xmp/output_with_metadata_pymupdf.pdf"), ], ) -def test_read_xmp_metadata_samples(src): - reader = PdfReader(src) +def test_read_xmp_metadata_samples(src, sample_files_dir): + reader = PdfReader(sample_files_dir / src) xmp = reader.xmp_metadata assert xmp assert xmp.dc_contributor == [] @@ -45,8 +39,8 @@ def test_read_xmp_metadata_samples(src): @pytest.mark.samples -def test_writer_xmp_metadata_samples(): - writer = PdfWriter(SAMPLE_ROOT / "020-xmp/output_with_metadata_pymupdf.pdf") +def test_writer_xmp_metadata_samples(sample_files_dir): + writer = PdfWriter(sample_files_dir / "020-xmp/output_with_metadata_pymupdf.pdf") xmp = writer.xmp_metadata assert xmp assert xmp.dc_contributor == [] @@ -77,13 +71,13 @@ def test_writer_xmp_metadata_samples(): @pytest.mark.parametrize( ("src", "has_xmp"), [ - (RESOURCE_ROOT / "commented-xmp.pdf", True), - (RESOURCE_ROOT / "crazyones.pdf", False), + ("commented-xmp.pdf", True), + ("crazyones.pdf", False), ], ) -def test_read_xmp_metadata(src, has_xmp): +def test_read_xmp_metadata(src, has_xmp, resources_dir): """Read XMP metadata from PDF files.""" - reader = PdfReader(src) + reader = PdfReader(resources_dir / src) xmp = reader.xmp_metadata assert (xmp is None) == (not has_xmp) if has_xmp: @@ -125,13 +119,13 @@ def test_converter_date(): assert date == datetime(2021, 4, 28, 15, 23, 1) -def test_modify_date(): +def test_modify_date(resources_dir): """ xmp_modify_date is extracted correctly. This is a regression test for issue #914. """ - path = RESOURCE_ROOT / "issue-914-xmp-data.pdf" + path = resources_dir / "issue-914-xmp-data.pdf" reader = PdfReader(path) assert reader.xmp_metadata.xmp_modify_date == datetime(2022, 4, 9, 15, 22, 43) @@ -251,10 +245,11 @@ def test_invalid_xmp_information_handling(): reader.xmp_metadata assert exc.value.args[0].startswith("XML in XmpInformation was invalid") + @pytest.mark.samples -def test_pdfa_xmp_metadata_with_values(): +def test_pdfa_xmp_metadata_with_values(sample_files_dir): """Test PDF/A XMP metadata extraction from a file with PDF/A metadata.""" - reader = PdfReader(SAMPLE_ROOT / "021-pdfa" / "crazyones-pdfa.pdf") + reader = PdfReader(sample_files_dir / "021-pdfa" / "crazyones-pdfa.pdf") xmp = reader.xmp_metadata assert xmp is not None @@ -263,9 +258,9 @@ def test_pdfa_xmp_metadata_with_values(): @pytest.mark.samples -def test_pdfa_xmp_metadata_without_values(): +def test_pdfa_xmp_metadata_without_values(sample_files_dir): """Test PDF/A XMP metadata extraction from a file without PDF/A metadata.""" - reader = PdfReader(SAMPLE_ROOT / "020-xmp" / "output_with_metadata_pymupdf.pdf") + reader = PdfReader(sample_files_dir / "020-xmp" / "output_with_metadata_pymupdf.pdf") xmp = reader.xmp_metadata assert xmp is not None @@ -304,9 +299,9 @@ def test_dc_language__no_bag_container(): assert reader.xmp_metadata.dc_language == ["x-unknown"] -def test_reading_does_not_destroy_root_object(): +def test_reading_does_not_destroy_root_object(resources_dir): """Test for #3391.""" - writer = PdfWriter(clone_from=RESOURCE_ROOT / "commented-xmp.pdf") + writer = PdfWriter(clone_from=resources_dir / "commented-xmp.pdf") xmp = writer.xmp_metadata assert xmp is not None assert not isinstance(writer.root_object["/Metadata"], XmpInformation) @@ -318,8 +313,8 @@ def test_reading_does_not_destroy_root_object(): assert b"\n/Metadata 27 0 R\n" in output_bytes -def test_xmp_information__write_to_stream(): - writer = PdfWriter(clone_from=RESOURCE_ROOT / "commented-xmp.pdf") +def test_xmp_information__write_to_stream(resources_dir): + writer = PdfWriter(clone_from=resources_dir / "commented-xmp.pdf") xmp = writer.xmp_metadata output = BytesIO() @@ -335,9 +330,9 @@ def test_xmp_information__write_to_stream(): assert output_bytes.startswith(b"<<\n/Type /Metadata\n/Subtype /XML\n/Length 2786\n>>\nstream\n