diff --git a/src/huggingface_hub/cli/lfs.py b/src/huggingface_hub/cli/lfs.py index 524f43fda1..1ccec9b923 100644 --- a/src/huggingface_hub/cli/lfs.py +++ b/src/huggingface_hub/cli/lfs.py @@ -28,7 +28,6 @@ from huggingface_hub.lfs import LFS_MULTIPART_UPLOAD_COMMAND from ..utils import get_session, hf_raise_for_status, logging -from ..utils._lfs import SliceFileObj logger = logging.get_logger(__name__) @@ -139,29 +138,32 @@ def lfs_multipart_upload() -> None: parts = [] with open(filepath, "rb") as file: for i, presigned_url in enumerate(presigned_urls): - with SliceFileObj( - file, - seek_from=i * chunk_size, - read_limit=chunk_size, - ) as data: - r = get_session().put(presigned_url, data=data) - hf_raise_for_status(r) - parts.append( - { - "etag": r.headers.get("etag"), - "partNumber": i + 1, - } - ) - # In order to support progress reporting while data is uploading / downloading, - # the transfer process should post messages to stdout - write_msg( - { - "event": "progress", - "oid": oid, - "bytesSoFar": (i + 1) * chunk_size, - "bytesSinceLast": chunk_size, - } + # Read chunk sequentially instead of using SliceFileObj + seek. + # On Windows, file.seek() with offset >= 2GB can fail due to 32-bit + # signed integer limits in some APIs (see issue #3871). + chunk_data = file.read(chunk_size) + if not chunk_data: + raise ValueError( + f"Unexpected end of file while reading part {i + 1} of multipart upload" ) + r = get_session().put(presigned_url, data=chunk_data) + hf_raise_for_status(r) + parts.append( + { + "etag": r.headers.get("etag"), + "partNumber": i + 1, + } + ) + # In order to support progress reporting while data is uploading / downloading, + # the transfer process should post messages to stdout + write_msg( + { + "event": "progress", + "oid": oid, + "bytesSoFar": (i + 1) * chunk_size, + "bytesSinceLast": chunk_size, + } + ) r = get_session().post( completion_url, diff --git a/src/huggingface_hub/lfs.py b/src/huggingface_hub/lfs.py index 8e5df3a9f5..e7180ffbac 100644 --- a/src/huggingface_hub/lfs.py +++ b/src/huggingface_hub/lfs.py @@ -32,7 +32,6 @@ logging, validate_hf_hub_args, ) -from .utils._lfs import SliceFileObj from .utils.sha import sha256, sha_fileobj @@ -383,13 +382,16 @@ def _upload_parts_iteratively( headers = [] with operation.as_file(with_tqdm=True) as fileobj: for part_idx, part_upload_url in enumerate(sorted_parts_urls): - with SliceFileObj( - fileobj, - seek_from=chunk_size * part_idx, - read_limit=chunk_size, - ) as fileobj_slice: - # S3 might raise a transient 500 error -> let's retry if that happens - part_upload_res = http_backoff("PUT", part_upload_url, data=fileobj_slice) - hf_raise_for_status(part_upload_res) - headers.append(part_upload_res.headers) + # Read chunk sequentially instead of using SliceFileObj + seek. + # On Windows, file.seek() with offset >= 2GB can fail due to 32-bit + # signed integer limits in some APIs (see issue #3871). + chunk_data = fileobj.read(chunk_size) + if not chunk_data: + raise ValueError( + f"Unexpected end of file while reading part {part_idx + 1} of multipart upload" + ) + # S3 might raise a transient 500 error -> let's retry if that happens + part_upload_res = http_backoff("PUT", part_upload_url, data=chunk_data) + hf_raise_for_status(part_upload_res) + headers.append(part_upload_res.headers) return headers # type: ignore