diff --git a/onetl/_util/file.py b/onetl/_util/file.py index 3668a1296..eb3fbf3cb 100644 --- a/onetl/_util/file.py +++ b/onetl/_util/file.py @@ -6,8 +6,11 @@ import io import os from datetime import datetime -from pathlib import Path, PurePath +from pathlib import Path +from typing import TypeVar +from onetl.base.path_protocol import PathProtocol +from onetl.base.pure_path_protocol import PurePathProtocol from onetl.exception import NotAFileError from onetl.impl import path_repr @@ -50,7 +53,10 @@ def is_file_readable(path: str | os.PathLike) -> Path: return path -def generate_temp_path(root: PurePath) -> PurePath: +T = TypeVar("T", PurePathProtocol, PathProtocol) + + +def generate_temp_path(root: T) -> T: """ Returns prefix which will be used for creating temp directory diff --git a/onetl/base/path_protocol.py b/onetl/base/path_protocol.py index 14ede608d..a071be9d8 100644 --- a/onetl/base/path_protocol.py +++ b/onetl/base/path_protocol.py @@ -2,45 +2,51 @@ # SPDX-License-Identifier: Apache-2.0 from __future__ import annotations -from typing_extensions import Protocol, runtime_checkable +from pathlib import Path +from typing import TYPE_CHECKING + +from typing_extensions import Protocol, TypeAlias, runtime_checkable from onetl.base.path_stat_protocol import PathStatProtocol from onetl.base.pure_path_protocol import PurePathProtocol +if TYPE_CHECKING: + PathProtocol: TypeAlias = Path + PathWithStatsProtocol: TypeAlias = PathProtocol +else: -@runtime_checkable -class PathProtocol(PurePathProtocol, Protocol): - """ - Generic protocol for `pathlib.Path` like objects. - - Includes only minimal set of methods which allow to determine path type (file, directory) and existence - """ - - def is_dir(self) -> bool: - """ - Checks if this path is a directory - """ - - def is_file(self) -> bool: - """ - Checks if this path is a file + @runtime_checkable + class PathProtocol(PurePathProtocol, Protocol): """ + Generic protocol for `pathlib.Path` like objects. - def exists(self) -> bool: - """ - Checks if this path exists + Includes only minimal set of methods which allow to determine path type (file, directory) and existence """ + def is_dir(self) -> bool: + """ + Checks if this path is a directory + """ -@runtime_checkable -class PathWithStatsProtocol(PathProtocol, Protocol): - """ - Protocol for `pathlib.Path`-like file objects. + def is_file(self) -> bool: + """ + Checks if this path is a file + """ - Includes only minimal set of methods which allow to determine if file exists, or get stats, e.g. size - """ + def exists(self) -> bool: + """ + Checks if this path exists + """ - def stat(self) -> PathStatProtocol: + @runtime_checkable + class PathWithStatsProtocol(PathProtocol, Protocol): """ - Returns stats object with file information + Protocol for `pathlib.Path`-like file objects. + + Includes only minimal set of methods which allow to determine if file exists, or get stats, e.g. size """ + + def stat(self) -> PathStatProtocol: + """ + Returns stats object with file information + """ diff --git a/onetl/base/pure_path_protocol.py b/onetl/base/pure_path_protocol.py index fc9aec3b8..f00f7b002 100644 --- a/onetl/base/pure_path_protocol.py +++ b/onetl/base/pure_path_protocol.py @@ -2,91 +2,94 @@ # SPDX-License-Identifier: Apache-2.0 from __future__ import annotations -from typing import Sequence, TypeVar - -from typing_extensions import Protocol, runtime_checkable - -T = TypeVar("T", bound="PurePathProtocol", covariant=True) # noqa: PLC0105 - - -@runtime_checkable -class PurePathProtocol(Protocol[T]): - """ - Generic protocol for `pathlib.PurePath` like objects. - - Includes only minimal set of methods which allow to get path items, like parent, name, etc - """ - - def __fspath__(self) -> str: - """ - Get string representation of path - """ - - def __eq__(self, other) -> bool: - """ - Check if two paths are equal - """ - - def __hash__(self) -> int: - """ - Get hash value for path - """ - - def __truediv__(self, key) -> T: - """ - Add items to path - """ - - def __rtruediv__(self, key) -> T: - """ - Add items to path - """ - - @property - def name(self) -> str: - """ - Get path name - """ - - @property - def parent(self) -> T: - """ - Get parent path - """ - - @property - def parents(self) -> Sequence[T]: - """ - Get parent paths - """ - - @property - def parts(self) -> Sequence[str]: - """ - Get path parts - """ - - def is_absolute(self) -> bool: - """ - Checks if this path is absolute - """ - - def match(self, path_pattern) -> bool: - """ - Checks if path matches a glob pattern - """ - - def relative_to(self, *other) -> T: - """ - Return the relative path to another path - """ - - def as_posix(self) -> str: - """ - Get POSIX representation of path - """ - - def joinpath(self, *args) -> T: - """ - Add items to path - """ +from pathlib import PurePath +from typing import TYPE_CHECKING, Sequence, TypeVar + +from typing_extensions import Protocol, TypeAlias, runtime_checkable + +if TYPE_CHECKING: + PurePathProtocol: TypeAlias = PurePath +else: + T = TypeVar("T", bound="PurePathProtocol", covariant=True) # noqa: PLC0105 + + @runtime_checkable + class PurePathProtocol(Protocol[T]): + """ + Generic protocol for `pathlib.PurePath` like objects. + + Includes only minimal set of methods which allow to get path items, like parent, name, etc + """ + + def __fspath__(self) -> str: + """ + Get string representation of path + """ + + def __eq__(self, other) -> bool: + """ + Check if two paths are equal + """ + + def __hash__(self) -> int: + """ + Get hash value for path + """ + + def __truediv__(self, key) -> T: + """ + Add items to path + """ + + def __rtruediv__(self, key) -> T: + """ + Add items to path + """ + + @property + def name(self) -> str: + """ + Get path name + """ + + @property + def parent(self) -> T: + """ + Get parent path + """ + + @property + def parents(self) -> Sequence[T]: + """ + Get parent paths + """ + + @property + def parts(self) -> Sequence[str]: + """ + Get path parts + """ + + def is_absolute(self) -> bool: + """ + Checks if this path is absolute + """ + + def match(self, path_pattern) -> bool: + """ + Checks if path matches a glob pattern + """ + + def relative_to(self, *other) -> T: + """ + Return the relative path to another path + """ + + def as_posix(self) -> str: + """ + Get POSIX representation of path + """ + + def joinpath(self, *args) -> T: + """ + Add items to path + """ diff --git a/onetl/connection/file_connection/file_connection.py b/onetl/connection/file_connection/file_connection.py index 9d06d3c54..d69e4e8f7 100644 --- a/onetl/connection/file_connection/file_connection.py +++ b/onetl/connection/file_connection/file_connection.py @@ -198,7 +198,7 @@ def resolve_file(self, path: os.PathLike | str) -> RemoteFile: return RemoteFile(path=remote_path, stats=stat) @slot - def read_text(self, path: os.PathLike | str, encoding: str = "utf-8", **kwargs) -> str: + def read_text(self, path: os.PathLike | str, encoding: str = "utf-8", **kwargs) -> str: # type: ignore[override] log.debug( "|%s| Reading string with encoding %r and options %r from '%s'", self.__class__.__name__, @@ -211,14 +211,14 @@ def read_text(self, path: os.PathLike | str, encoding: str = "utf-8", **kwargs) return self._read_text(remote_path, encoding=encoding, **kwargs) @slot - def read_bytes(self, path: os.PathLike | str, **kwargs) -> bytes: + def read_bytes(self, path: os.PathLike | str, **kwargs) -> bytes: # type: ignore[override] log.debug("|%s| Reading bytes with options %r from '%s'", self.__class__.__name__, kwargs, path) remote_path = self.resolve_file(path) return self._read_bytes(remote_path, **kwargs) @slot - def write_text(self, path: os.PathLike | str, content: str, encoding: str = "utf-8", **kwargs) -> RemoteFile: + def write_text(self, path: os.PathLike | str, content: str, encoding: str = "utf-8", **kwargs) -> RemoteFile: # type: ignore[override] if not isinstance(content, str): msg = f"content must be str, not '{content.__class__.__name__}'" raise TypeError(msg) @@ -248,7 +248,7 @@ def write_text(self, path: os.PathLike | str, content: str, encoding: str = "utf return self.resolve_file(remote_path) @slot - def write_bytes(self, path: os.PathLike | str, content: bytes, **kwargs) -> RemoteFile: + def write_bytes(self, path: os.PathLike | str, content: bytes, **kwargs) -> RemoteFile: # type: ignore[override] if not isinstance(content, bytes): msg = f"content must be bytes, not '{content.__class__.__name__}'" raise TypeError(msg) @@ -420,7 +420,7 @@ def rename_file( return self.resolve_file(target_file) @slot - def list_dir( + def list_dir( # type: ignore[override] self, path: os.PathLike | str, filters: Iterable[BaseFileFilter] | None = None, @@ -453,7 +453,7 @@ def list_dir( return result @slot - def walk( + def walk( # type: ignore[override] self, root: os.PathLike | str, *, @@ -515,7 +515,8 @@ def _walk( # noqa: C901 return log.debug("|%s| Walking through directory '%s'", self.__class__.__name__, root) - dirs, files = [], [] + dirs: list[RemoteDirectory] = [] + files: list[RemoteFile] = [] for entry in self._scan_entries(root): if limits_reached(limits): @@ -539,8 +540,8 @@ def _walk( # noqa: C901 files.append(file) if topdown and not limits_reached(limits): - for name in dirs: - yield from self._walk(root=root / name, topdown=topdown, filters=filters, limits=limits) + for directory in dirs: + yield from self._walk(root=directory, topdown=topdown, filters=filters, limits=limits) log.debug( "|%s| Directory '%s' contains %d nested directories and %d files", diff --git a/onetl/connection/file_connection/ftps.py b/onetl/connection/file_connection/ftps.py index 5f7a38766..8de5abce9 100644 --- a/onetl/connection/file_connection/ftps.py +++ b/onetl/connection/file_connection/ftps.py @@ -12,7 +12,7 @@ from pydantic import Field # type: ignore[no-redef, assignment] try: - from onetl.connection.file_connection.ftp import FTP + from onetl.connection.file_connection.ftp import FTP, FTPExtra except (ImportError, NameError) as e: raise ImportError( textwrap.dedent( @@ -46,12 +46,12 @@ def ntransfercmd(self, cmd, rest=None): return conn, size -class FTPSExtra(FTP.Extra): - __doc__ = FTP.Extra.__doc__.replace("FTP", "FTPS") +class FTPSExtra(FTPExtra): + __doc__ = FTPExtra.__doc__.replace("FTP", "FTPS") # type: ignore[union-attr] class FTPS(FTP): - __doc__ = FTP.__doc__.replace("FTP", "FTPS") + __doc__ = FTP.__doc__.replace("FTP", "FTPS") # type: ignore[union-attr] extra: FTPSExtra = Field(default_factory=FTPSExtra) diff --git a/onetl/connection/file_connection/hdfs/connection.py b/onetl/connection/file_connection/hdfs/connection.py index 5eff2897d..a91c35647 100644 --- a/onetl/connection/file_connection/hdfs/connection.py +++ b/onetl/connection/file_connection/hdfs/connection.py @@ -7,7 +7,7 @@ import warnings from contextlib import suppress from logging import getLogger -from typing import TYPE_CHECKING, Optional, Tuple +from typing import TYPE_CHECKING, Optional, Tuple, cast from etl_entities.instance import Cluster, Host @@ -263,7 +263,7 @@ class HDFS(FileConnection, RenameDirMixin): Extra = HDFSExtra - _active_host: Optional[Host] = PrivateAttr(default=None) + _active_host: Optional[str] = PrivateAttr(default=None) @slot @classmethod @@ -459,49 +459,52 @@ def _timeout_fallback(cls, values): def _get_active_namenode(self) -> str: class_name = self.__class__.__name__ - log.info("|%s| Detecting active namenode of cluster %r ...", class_name, self.cluster) + cluster = cast("str", self.cluster) + log.info("|%s| Detecting active namenode of cluster %r ...", class_name, cluster) - namenodes = self.Slots.get_cluster_namenodes(self.cluster) + namenodes = self.Slots.get_cluster_namenodes(cast("str", cluster)) if not namenodes: - msg = f"Cannot get list of namenodes for a cluster {self.cluster!r}" + msg = f"Cannot get list of namenodes for a cluster {cluster!r}" raise RuntimeError(msg) nodes_len = len(namenodes) for i, namenode in enumerate(namenodes, start=1): log.debug("|%s| Trying namenode %r (%d of %d) ...", class_name, namenode, i, nodes_len) - if self.Slots.is_namenode_active(namenode, self.cluster): + if self.Slots.is_namenode_active(namenode, cluster): log.info("|%s| Node %r is active!", class_name, namenode) return namenode log.debug("|%s| Node %r is not active, skipping", class_name, namenode) - msg = f"Cannot detect active namenode for cluster {self.cluster!r}" + msg = f"Cannot detect active namenode for cluster {cluster!r}" raise RuntimeError(msg) def _get_host(self) -> str: - if not self.host and self.cluster: + host = cast("str", self.host) + + if not host and self.cluster: return self._get_active_namenode() # host is passed explicitly or cluster not set class_name = self.__class__.__name__ if self.cluster: - log.info("|%s| Detecting if namenode %r of cluster %r is active...", class_name, self.host, self.cluster) + log.info("|%s| Detecting if namenode %r of cluster %r is active...", class_name, host, self.cluster) else: - log.info("|%s| Detecting if namenode %r is active...", class_name, self.host) + log.info("|%s| Detecting if namenode %r is active...", class_name, host) - is_active = self.Slots.is_namenode_active(self.host, self.cluster) + is_active = self.Slots.is_namenode_active(cast("str", host), self.cluster) if is_active: - log.info("|%s| Namenode %r is active!", class_name, self.host) - return self.host + log.info("|%s| Namenode %r is active!", class_name, host) + return host if is_active is None: log.debug("|%s| No hooks, skip validation", class_name) - return self.host + return host if self.cluster: - msg = f"Host {self.host!r} is not an active namenode of cluster {self.cluster!r}" + msg = f"Host {host!r} is not an active namenode of cluster {self.cluster!r}" raise RuntimeError(msg) - msg = f"Host {self.host!r} is not an active namenode" + msg = f"Host {host!r} is not an active namenode" raise RuntimeError(msg) def _get_conn_str(self) -> str: diff --git a/onetl/connection/file_connection/mixins/rename_dir_mixin.py b/onetl/connection/file_connection/mixins/rename_dir_mixin.py index a84762a12..311edf03f 100644 --- a/onetl/connection/file_connection/mixins/rename_dir_mixin.py +++ b/onetl/connection/file_connection/mixins/rename_dir_mixin.py @@ -5,6 +5,7 @@ import os from abc import abstractmethod from logging import getLogger +from typing import cast from onetl.base import BaseFileConnection from onetl.exception import DirectoryExistsError @@ -85,7 +86,7 @@ def rename_dir( self._rename_dir(source_dir, target_dir) log.info("|%s| Successfully renamed directory '%s' to '%s'", self.__class__.__name__, source_dir, target_dir) - return self.resolve_dir(target_dir) + return cast("RemoteDirectory", self.resolve_dir(target_dir)) @abstractmethod def _rename_dir(self, source: RemotePath, target: RemotePath) -> None: ... diff --git a/onetl/connection/file_connection/s3.py b/onetl/connection/file_connection/s3.py index 633ef1d66..637c9b01a 100644 --- a/onetl/connection/file_connection/s3.py +++ b/onetl/connection/file_connection/s3.py @@ -7,8 +7,9 @@ import os import textwrap import warnings +from pathlib import Path from pprint import pformat -from typing import Iterable, Optional, Union +from typing import Iterable, Optional, Union, cast from onetl.exception import DirectoryNotEmptyError from onetl.hooks import slot, support_hooks @@ -329,7 +330,7 @@ def _scan_entries_recursive(root: RemotePath) -> Iterable[Object]: log.debug("|%s| Directory to remove: %s", self.__class__.__name__, directory_info) objects_to_delete = ( - DeleteObject(obj.object_name) + DeleteObject(obj.object_name) # type: ignore[arg-type] for obj in _scan_entries_recursive(remote_dir) # type: ignore[arg-type] ) errors = list( @@ -389,19 +390,13 @@ def _get_connection_pool(self) -> PoolManager: cert_reqs="CERT_NONE", ) - if self.extra.ssl_verify.is_dir(): - return PoolManager( - timeout=self.extra.timeout, - retries=self.extra.retry, - cert_reqs="CERT_REQUIRED", - ca_cert_dir=os.fspath(self.extra.ssl_verify), - ) - + ca_cert_path = cast("Path", self.extra.ssl_verify) return PoolManager( timeout=self.extra.timeout, retries=self.extra.retry, cert_reqs="CERT_REQUIRED", - ca_certs=os.fspath(self.extra.ssl_verify), + ca_cert_dir=os.fspath(ca_cert_path) if ca_cert_path.is_dir() else None, + ca_certs=os.fspath(ca_cert_path) if not ca_cert_path.is_dir() else None, ) def _get_client(self) -> Minio: @@ -506,7 +501,7 @@ def _scan_entries(self, path: RemotePath) -> Iterable[Object]: return (obj for obj in objects if obj.object_name != directory_path_str) def _extract_name_from_entry(self, entry: Object) -> str: - return RemotePath(entry.object_name).name + return RemotePath(entry.object_name).name # type: ignore[arg-type] def _is_dir_entry(self, top: RemotePath, entry: Object) -> bool: return entry.is_dir diff --git a/onetl/connection/file_connection/sftp.py b/onetl/connection/file_connection/sftp.py index abf71e773..75d8f7216 100644 --- a/onetl/connection/file_connection/sftp.py +++ b/onetl/connection/file_connection/sftp.py @@ -238,7 +238,7 @@ def _is_client_closed(self, client: SFTPClient) -> bool: def _close_client(self, client: SFTPClient) -> None: client.close() - def _parse_user_ssh_config(self) -> tuple[str | None, str | None]: + def _parse_user_ssh_config(self) -> tuple[ProxyCommand | None, str | None]: host_proxy = None key_file = os.fspath(self.key_file) if self.key_file else None @@ -249,11 +249,15 @@ def _parse_user_ssh_config(self) -> tuple[str | None, str | None]: ssh_conf = SSHConfig() ssh_conf.parse(SSH_CONFIG_PATH.read_text()) host_info = ssh_conf.lookup(self.host) or {} - if host_info.get("proxycommand"): - host_proxy = ProxyCommand(host_info.get("proxycommand")) - if not (self.password or key_file) and host_info.get("identityfile"): - key_file = host_info.get("identityfile")[0] + proxycommand = host_info.get("proxycommand") + if proxycommand: + host_proxy = ProxyCommand(proxycommand) + + if not (self.password or key_file): + identityfile = host_info.get("identityfile") + if identityfile: + key_file = identityfile[0] except ConfigParseError: log.exception("Failed to parse SSH config") @@ -310,7 +314,7 @@ def _get_stat(self, path: RemotePath) -> SFTPAttributes: return self.client.stat(os.fspath(path)) def _extract_name_from_entry(self, entry: SFTPAttributes) -> str: - return entry.filename + return entry.filename # type: ignore[attr-defined] def _is_dir_entry(self, top: RemotePath, entry: SFTPAttributes) -> bool: return S_ISDIR(entry.st_mode) diff --git a/onetl/connection/file_connection/webdav.py b/onetl/connection/file_connection/webdav.py index 1cab8fe7d..b09b4bb03 100644 --- a/onetl/connection/file_connection/webdav.py +++ b/onetl/connection/file_connection/webdav.py @@ -12,7 +12,6 @@ from typing import Any, Optional, Union from etl_entities.instance import Host -from pydantic import root_validator from onetl.impl.generic_options import GenericOptions diff --git a/onetl/connection/file_df_connection/spark_hdfs/connection.py b/onetl/connection/file_df_connection/spark_hdfs/connection.py index 92ec13394..6def30aaf 100644 --- a/onetl/connection/file_df_connection/spark_hdfs/connection.py +++ b/onetl/connection/file_df_connection/spark_hdfs/connection.py @@ -6,7 +6,6 @@ import logging import os from contextlib import suppress -from pathlib import Path from typing import TYPE_CHECKING, Optional from etl_entities.instance import Cluster, Host @@ -152,10 +151,10 @@ class SparkHDFS(SparkFileDFConnection): host: Optional[Host] = None ipc_port: int = Field(default=8020, alias=avoid_alias("port")) # type: ignore[literal-required] - _active_host: Optional[Host] = PrivateAttr(default=None) + _active_host: Optional[str] = PrivateAttr(default=None) @slot - def path_from_string(self, path: os.PathLike | str) -> Path: + def path_from_string(self, path: os.PathLike | str) -> RemotePath: return RemotePath(os.fspath(path)) @property @@ -253,7 +252,7 @@ def get_current(cls, spark: SparkSession): raise RuntimeError(msg) log.info("|%s| Got %r", cls.__name__, current_cluster) - return cls(cluster=current_cluster, spark=spark) + return cls(cluster=current_cluster, spark=spark) # type: ignore[arg-type] @validator("cluster") def _validate_cluster_name(cls, cluster): diff --git a/onetl/connection/file_df_connection/spark_s3/connection.py b/onetl/connection/file_df_connection/spark_s3/connection.py index a4720c1c5..f9475e749 100644 --- a/onetl/connection/file_df_connection/spark_s3/connection.py +++ b/onetl/connection/file_df_connection/spark_s3/connection.py @@ -396,7 +396,7 @@ def _check_java_class_imported(cls, spark: SparkSession) -> SparkSession: spark_version = get_spark_version(spark).format("{0}.{1}.{2}") msg = MISSING_JVM_CLASS_MSG.format( java_class=java_class, - package_source=cls.__name__, + package_source=cls.__name__, # type: ignore[attr-defined] args=f"spark_version='{spark_version}'", ) raise ValueError(msg) from e diff --git a/onetl/db/db_reader/db_reader.py b/onetl/db/db_reader/db_reader.py index 12ffec3a3..cb78ae724 100644 --- a/onetl/db/db_reader/db_reader.py +++ b/onetl/db/db_reader/db_reader.py @@ -315,9 +315,11 @@ class DBReader(FrozenModel): _connection_checked: bool = PrivateAttr(default=False) @validator("source", always=True) - def validate_source(cls, source, values): + def validate_source(cls, value: str, values): + if "connection" not in values: + return value connection: BaseDBConnection = values["connection"] - return connection.dialect.validate_name(source) + return connection.dialect.validate_name(value) @validator("columns", always=True, pre=True) def validate_columns(cls, value: str | list[str] | None, values: dict) -> list[str] | None: @@ -327,25 +329,31 @@ def validate_columns(cls, value: str | list[str] | None, values: dict) -> list[s return connection.dialect.validate_columns(value) @validator("where", always=True) - def validate_where(cls, where: Any, values: dict) -> Any: + def validate_where(cls, value: Any, values: dict) -> Any: + if "connection" not in values: + return value # type: ignore[return-value] connection: BaseDBConnection = values["connection"] - result = connection.dialect.validate_where(where) + result = connection.dialect.validate_where(value) if isinstance(result, dict): return frozendict.frozendict(result) # type: ignore[attr-defined, operator] return result @validator("hint", always=True) - def validate_hint(cls, hint: Any, values: dict) -> Any: + def validate_hint(cls, value: Any, values: dict) -> Any: + if "connection" not in values: + return value # type: ignore[return-value] connection: BaseDBConnection = values["connection"] - result = connection.dialect.validate_hint(hint) + result = connection.dialect.validate_hint(value) if isinstance(result, dict): return frozendict.frozendict(result) # type: ignore[attr-defined, operator] return result @validator("df_schema", always=True) - def validate_df_schema(cls, df_schema: StructType | None, values: dict) -> StructType | None: + def validate_df_schema(cls, value: StructType | None, values: dict) -> StructType | None: + if "connection" not in values: + return value # type: ignore[return-value] connection: BaseDBConnection = values["connection"] - return connection.dialect.validate_df_schema(df_schema) + return connection.dialect.validate_df_schema(value) @root_validator(skip_on_failure=True) def validate_hwm(cls, values: dict) -> dict: diff --git a/onetl/db/db_writer/db_writer.py b/onetl/db/db_writer/db_writer.py index 3263f05a0..3d0cfb794 100644 --- a/onetl/db/db_writer/db_writer.py +++ b/onetl/db/db_writer/db_writer.py @@ -106,12 +106,12 @@ class DBWriter(FrozenModel): _connection_checked: bool = PrivateAttr(default=False) - @validator("target", pre=True, always=True) - def validate_target(cls, target, values): + @validator("target", always=True) + def validate_target(cls, value: str, values): if "connection" not in values: - return target + return value connection: BaseDBConnection = values["connection"] - return connection.dialect.validate_name(target) + return connection.dialect.validate_name(value) @validator("options", pre=True, always=True) def validate_options(cls, options, values): diff --git a/onetl/file/file_downloader/file_downloader.py b/onetl/file/file_downloader/file_downloader.py index 0047feafe..e0a793849 100644 --- a/onetl/file/file_downloader/file_downloader.py +++ b/onetl/file/file_downloader/file_downloader.py @@ -9,7 +9,7 @@ import warnings from concurrent.futures import ThreadPoolExecutor, as_completed from enum import Enum -from typing import Generator, Iterable, List, Optional, Tuple, Type, Union +from typing import Generator, Iterable, List, Optional, Tuple, Type, Union, cast from etl_entities.hwm import FileHWM, FileListHWM from etl_entities.instance import AbsolutePath @@ -502,11 +502,11 @@ def view_files(self) -> FileSet[RemoteFile]: if self.hwm: filters.append(FileHWMFilter(hwm=self._init_hwm(self.hwm))) - result = FileSet() + result: FileSet[RemoteFile] = FileSet() try: for _root, _dirs, files in self.connection.walk(self.source_path, filters=filters, limits=self.limits): for file in files: - result.append(file) + result.append(cast("RemoteFile", file)) except Exception as e: msg = f"Couldn't read directory tree from remote dir '{self.source_path}'" @@ -641,12 +641,12 @@ def _check_strategy(self): raise ValueError(msg) def _init_hwm(self, hwm: FileHWM) -> FileHWM: - strategy: HWMStrategy = StrategyManager.get_current() + strategy = cast("HWMStrategy", StrategyManager.get_current()) if not strategy.hwm: strategy.hwm = self.hwm strategy.fetch_hwm() - return strategy.hwm + return cast("FileHWM", strategy.hwm) if not isinstance(strategy.hwm, FileHWM) or strategy.hwm.name != hwm.name: # exception raised when inside one strategy >1 processes on the same table but with different hwm columns @@ -776,17 +776,18 @@ def _download_files( try: for status, source_file, target_file in self._bulk_download(to_download): if status == FileDownloadStatus.SUCCESSFUL: - result.successful.add(target_file) + result.successful.add(target_file) # type: ignore[arg-type] source_files.append(source_file) elif status == FileDownloadStatus.FAILED: - result.failed.add(source_file) + result.failed.add(source_file) # type: ignore[arg-type] elif status == FileDownloadStatus.SKIPPED: - result.skipped.add(source_file) + result.skipped.add(source_file) # type: ignore[arg-type] elif status == FileDownloadStatus.MISSING: result.missing.add(source_file) finally: if self.hwm: # always update HWM in HWM store, even if downloader is interrupted + strategy = cast("HWMStrategy", strategy) strategy.update_hwm(source_files) strategy.save_hwm() return result @@ -799,7 +800,7 @@ def _create_dirs( Create all parent paths before downloading files This is required to avoid errors then multiple threads create the same dir """ - parent_paths = OrderedSet() + parent_paths: OrderedSet[LocalPath] = OrderedSet() for _, target_file, tmp_file in to_download: parent_paths.add(target_file.parent) if tmp_file: @@ -865,7 +866,7 @@ def _download_file( # noqa: PLR0912, C901 return FileDownloadStatus.MISSING, source_file, None try: - remote_file = self.connection.resolve_file(source_file) + remote_file = cast("RemoteFile", self.connection.resolve_file(source_file)) replace = False if local_file.exists(): diff --git a/onetl/file/file_downloader/result.py b/onetl/file/file_downloader/result.py index 302258e7f..7644cd6ca 100644 --- a/onetl/file/file_downloader/result.py +++ b/onetl/file/file_downloader/result.py @@ -58,14 +58,14 @@ class DownloadResult(FileResult): ``` """ - successful: FileSet[LocalPath] = Field(default_factory=FileSet) + successful: FileSet[LocalPath] = Field(default_factory=lambda: FileSet({})) "File paths (local) which were downloaded successfully" - failed: FileSet[FailedRemoteFile] = Field(default_factory=FileSet) + failed: FileSet[FailedRemoteFile] = Field(default_factory=lambda: FileSet({})) "File paths (remote) which were not downloaded because of some failure" - skipped: FileSet[RemoteFile] = Field(default_factory=FileSet) + skipped: FileSet[RemoteFile] = Field(default_factory=lambda: FileSet({})) "File paths (remote) which were skipped because of some reason" - missing: FileSet[RemotePath] = Field(default_factory=FileSet) + missing: FileSet[RemotePath] = Field(default_factory=lambda: FileSet({})) "File paths (remote) which are not present in the remote file system" diff --git a/onetl/file/file_mover/file_mover.py b/onetl/file/file_mover/file_mover.py index c6e6b52ab..8b65218a1 100644 --- a/onetl/file/file_mover/file_mover.py +++ b/onetl/file/file_mover/file_mover.py @@ -6,7 +6,7 @@ import os from concurrent.futures import ThreadPoolExecutor, as_completed from enum import Enum -from typing import Iterable, List, Optional, Tuple +from typing import Generator, Iterable, List, Optional, Tuple, Union, cast from ordered_set import OrderedSet @@ -16,8 +16,7 @@ from pydantic import Field, PrivateAttr, validator # type: ignore[no-redef, assignment] from onetl.base import BaseFileConnection, BaseFileFilter, BaseFileLimit -from onetl.base.path_protocol import PathProtocol, PathWithStatsProtocol -from onetl.base.pure_path_protocol import PurePathProtocol +from onetl.base.path_protocol import PathProtocol from onetl.file.file_mover.options import FileMoverOptions from onetl.file.file_mover.result import MoveResult from onetl.file.file_set import FileSet @@ -41,7 +40,7 @@ log = logging.getLogger(__name__) # source, target -MOVE_ITEMS_TYPE = OrderedSet[Tuple[RemotePath, RemotePath]] +MOVE_ITEMS_TYPE = OrderedSet[Tuple[Union[RemotePath, RemoteFile], RemotePath]] class FileMoveStatus(Enum): @@ -360,12 +359,12 @@ def view_files(self) -> FileSet[RemoteFile]: if not self._connection_checked: self._check_source_path() - result = FileSet() + result: FileSet[RemoteFile] = FileSet() try: for _root, _dirs, files in self.connection.walk(self.source_path, filters=self.filters, limits=self.limits): for file in files: - result.append(file) + result.append(cast("RemoteFile", file)) except Exception as e: msg = f"Couldn't read directory tree from remote dir '{self.source_path}'" @@ -395,7 +394,7 @@ def _validate_files( self, remote_files: Iterable[os.PathLike | str], ) -> MOVE_ITEMS_TYPE: - result = OrderedSet() + result: MOVE_ITEMS_TYPE = OrderedSet() for file in remote_files: remote_file_path = file if isinstance(file, PathProtocol) else RemotePath(file) @@ -424,7 +423,7 @@ def _validate_files( raise ValueError(msg) if not isinstance(old_file, PathProtocol) and self.connection.path_exists(old_file): - old_file = self.connection.resolve_file(old_file) + old_file = cast("RemoteFile", self.connection.resolve_file(old_file)) result.add((old_file, new_file)) @@ -459,11 +458,11 @@ def _move_files( result = MoveResult() for status, file in self._bulk_move(to_move): if status == FileMoveStatus.SUCCESSFUL: - result.successful.add(file) + result.successful.add(file) # type: ignore[arg-type] elif status == FileMoveStatus.FAILED: - result.failed.add(file) + result.failed.add(file) # type: ignore[arg-type] elif status == FileMoveStatus.SKIPPED: - result.skipped.add(file) + result.skipped.add(file) # type: ignore[arg-type] elif status == FileMoveStatus.MISSING: result.missing.add(file) @@ -484,10 +483,9 @@ def _create_dirs( def _bulk_move( self, to_move: MOVE_ITEMS_TYPE, - ) -> list[tuple[FileMoveStatus, PurePathProtocol | PathWithStatsProtocol]]: + ) -> Generator[tuple[FileMoveStatus, RemoteFile | FailedRemoteFile | RemotePath], None, None]: workers = self.options.workers files_count = len(to_move) - result = [] real_workers = workers if files_count < workers: @@ -508,30 +506,24 @@ def _bulk_move( futures = [ executor.submit(self._move_file, source_file, target_file) for source_file, target_file in to_move ] - result = [future.result() for future in as_completed(futures)] + yield from (future.result() for future in as_completed(futures)) else: log.debug("|%s| Using plain old for-loop", self.__class__.__name__) - for source_file, target_file in to_move: - result.append( - self._move_file( - source_file, - target_file, - ), - ) - - return result + yield from (self._move_file(source_file, target_file) for source_file, target_file in to_move) def _move_file( self, - source_file: RemotePath, + source_file: RemotePath | RemoteFile, target_file: RemotePath, - ) -> tuple[FileMoveStatus, PurePathProtocol | PathWithStatsProtocol]: + ) -> tuple[FileMoveStatus, RemoteFile | FailedRemoteFile | RemotePath]: log.info("|%s| Moving file '%s' to '%s'", self.__class__.__name__, source_file, target_file) if not self.connection.path_exists(source_file): log.warning("|%s| Missing file '%s', skipping", self.__class__.__name__, source_file) return FileMoveStatus.MISSING, source_file + source_file = cast("RemoteFile", source_file) + try: replace = False if self.connection.path_exists(target_file): diff --git a/onetl/file/file_mover/result.py b/onetl/file/file_mover/result.py index 53c43bc6e..d192b9db4 100644 --- a/onetl/file/file_mover/result.py +++ b/onetl/file/file_mover/result.py @@ -58,14 +58,14 @@ class MoveResult(FileResult): ``` """ - successful: FileSet[RemoteFile] = Field(default_factory=FileSet) + successful: FileSet[RemoteFile] = Field(default_factory=lambda: FileSet({})) "File paths (local) which were moved successfully" - failed: FileSet[FailedRemoteFile] = Field(default_factory=FileSet) + failed: FileSet[FailedRemoteFile] = Field(default_factory=lambda: FileSet({})) "File paths (remote) which were not moved because of some failure" - skipped: FileSet[RemoteFile] = Field(default_factory=FileSet) + skipped: FileSet[RemoteFile] = Field(default_factory=lambda: FileSet({})) "File paths (remote) which were skipped because of some reason" - missing: FileSet[RemotePath] = Field(default_factory=FileSet) + missing: FileSet[RemotePath] = Field(default_factory=lambda: FileSet({})) "File paths (remote) which are not present in the remote file system" diff --git a/onetl/file/file_result.py b/onetl/file/file_result.py index 6dde23ea2..0b4ae9f77 100644 --- a/onetl/file/file_result.py +++ b/onetl/file/file_result.py @@ -3,10 +3,12 @@ from __future__ import annotations import os -from typing import Iterable +from typing import Generic, Iterable, TypeVar from humanize import naturalsize +from onetl.base.path_protocol import PathProtocol + try: from pydantic.v1 import Field, validator except (ImportError, AttributeError): @@ -24,8 +26,12 @@ INDENT = " " * 4 +SuccessfulPath_co = TypeVar("SuccessfulPath_co", PurePathProtocol, PathProtocol, covariant=True) +FailedPath_co = TypeVar("FailedPath_co", PurePathProtocol, PathProtocol, covariant=True) +MissingPath_co = TypeVar("MissingPath_co", PurePathProtocol, PathProtocol, covariant=True) + -class FileResult(BaseModel): +class FileResult(BaseModel, Generic[SuccessfulPath_co, FailedPath_co, MissingPath_co]): """ Result of some file manipulation process, e.g. download, upload, etc. @@ -37,16 +43,16 @@ class FileResult(BaseModel): * :obj`missing` """ - successful: FileSet[PurePathProtocol] = Field(default_factory=FileSet) + successful: FileSet[SuccessfulPath_co] = Field(default_factory=lambda: FileSet({})) "Successfully handled files" - failed: FileSet[PurePathProtocol] = Field(default_factory=FileSet) + failed: FileSet[FailedPath_co] = Field(default_factory=lambda: FileSet({})) "File paths which were handled with some failures" - skipped: FileSet[PurePathProtocol] = Field(default_factory=FileSet) + skipped: FileSet[SuccessfulPath_co] = Field(default_factory=lambda: FileSet({})) "File paths which were skipped because of some reason" - missing: FileSet[PurePathProtocol] = Field(default_factory=FileSet) + missing: FileSet[MissingPath_co] = Field(default_factory=lambda: FileSet({})) "Unknown paths which cannot be handled" @validator("successful", "failed", "skipped", "missing") diff --git a/onetl/file/file_set.py b/onetl/file/file_set.py index 9d3f8e355..3a1bfb064 100644 --- a/onetl/file/file_set.py +++ b/onetl/file/file_set.py @@ -11,9 +11,10 @@ from onetl.exception import EmptyFilesError, ZeroFileSizeError from onetl.impl import path_repr -T = TypeVar("T", bound=PurePathProtocol) INDENT = " " * 4 +T = TypeVar("T", bound=PurePathProtocol) + class FileSet(OrderedSet[T], Generic[T]): """ diff --git a/onetl/file/file_uploader/file_uploader.py b/onetl/file/file_uploader/file_uploader.py index 94603fa48..6456bda19 100644 --- a/onetl/file/file_uploader/file_uploader.py +++ b/onetl/file/file_uploader/file_uploader.py @@ -6,7 +6,7 @@ import os from concurrent.futures import ThreadPoolExecutor, as_completed from enum import Enum -from typing import Iterable, Optional, Tuple +from typing import Iterable, Optional, Tuple, cast from ordered_set import OrderedSet @@ -17,8 +17,6 @@ from onetl._util.file import generate_temp_path from onetl.base import BaseFileConnection -from onetl.base.path_protocol import PathWithStatsProtocol -from onetl.base.pure_path_protocol import PurePathProtocol from onetl.exception import DirectoryNotFoundError, NotAFileError from onetl.file.file_set import FileSet from onetl.file.file_uploader.options import FileUploaderOptions @@ -29,6 +27,7 @@ FileExistBehavior, FrozenModel, LocalPath, + RemoteFile, RemotePath, path_repr, ) @@ -364,12 +363,12 @@ def view_files(self) -> FileSet[LocalPath]: if not self._connection_checked: self._check_local_path() - result = FileSet() + result: FileSet[LocalPath] = FileSet() try: for root, dirs, files in os.walk(self.local_path): log.debug("|Local FS| Listing dir '%s': %d dirs, %d files", root, len(dirs), len(files)) - result.update(LocalPath(root) / file for file in files) + result.update({LocalPath(root) / file for file in files}) except Exception as e: msg = f"Couldn't read directory tree from local dir '{self.local_path}'" raise RuntimeError(msg) from e @@ -412,7 +411,7 @@ def _validate_files( local_files: Iterable[os.PathLike | str], current_temp_dir: RemotePath | None, ) -> UPLOAD_ITEMS_TYPE: - result = OrderedSet() + result: UPLOAD_ITEMS_TYPE = OrderedSet() for file in local_files: local_file_path = LocalPath(file) @@ -475,9 +474,9 @@ def _upload_files(self, to_upload: UPLOAD_ITEMS_TYPE) -> UploadResult: result = UploadResult() for status, file in self._bulk_upload(to_upload): if status == FileUploadStatus.SUCCESSFUL: - result.successful.add(file) + result.successful.add(file) # type: ignore[arg-type] elif status == FileUploadStatus.FAILED: - result.failed.add(file) + result.failed.add(file) # type: ignore[arg-type] elif status == FileUploadStatus.SKIPPED: result.skipped.add(file) elif status == FileUploadStatus.MISSING: @@ -493,7 +492,7 @@ def _create_dirs( Create all parent paths before uploading files This is required to avoid errors then multiple threads create the same dir """ - parent_paths = OrderedSet() + parent_paths: OrderedSet[RemotePath] = OrderedSet() for _, target_file, tmp_file in to_upload: parent_paths.add(target_file.parent) if tmp_file: @@ -505,7 +504,7 @@ def _create_dirs( def _bulk_upload( self, to_upload: UPLOAD_ITEMS_TYPE, - ) -> list[tuple[FileUploadStatus, PurePathProtocol | PathWithStatsProtocol]]: + ) -> list[tuple[FileUploadStatus, LocalPath | RemoteFile | FailedLocalFile]]: workers = self.options.workers files_count = len(to_upload) result = [] @@ -549,7 +548,7 @@ def _upload_file( # noqa: PLR0912, C901 local_file: LocalPath, target_file: RemotePath, tmp_file: RemotePath | None, - ) -> tuple[FileUploadStatus, PurePathProtocol | PathWithStatsProtocol]: + ) -> tuple[FileUploadStatus, LocalPath | RemoteFile | FailedLocalFile]: if tmp_file: log.info( "|%s| Uploading file '%s' to '%s' (via tmp '%s')", @@ -589,6 +588,8 @@ def _upload_file( # noqa: PLR0912, C901 # Direct upload uploaded_file = self.connection.upload_file(local_file, target_file, replace=replace) + uploaded_file = cast("RemoteFile", uploaded_file) + if self.options.delete_local: local_file.unlink() log.warning("|Local FS| Successfully removed file %s", local_file) diff --git a/onetl/file/file_uploader/result.py b/onetl/file/file_uploader/result.py index 573d4e241..065eb169e 100644 --- a/onetl/file/file_uploader/result.py +++ b/onetl/file/file_uploader/result.py @@ -58,14 +58,14 @@ class UploadResult(FileResult): ``` """ - successful: FileSet[RemoteFile] = Field(default_factory=FileSet) + successful: FileSet[RemoteFile] = Field(default_factory=lambda: FileSet({})) "File paths (remote) which were uploaded successfully" - failed: FileSet[FailedLocalFile] = Field(default_factory=FileSet) + failed: FileSet[FailedLocalFile] = Field(default_factory=lambda: FileSet({})) "File paths (local) which were not uploaded because of some failure" - skipped: FileSet[LocalPath] = Field(default_factory=FileSet) + skipped: FileSet[LocalPath] = Field(default_factory=lambda: FileSet({})) "File paths (local) which were skipped because of some reason" - missing: FileSet[LocalPath] = Field(default_factory=FileSet) + missing: FileSet[LocalPath] = Field(default_factory=lambda: FileSet({})) "File paths (local) which are not present in the local file system" diff --git a/onetl/file/format/avro.py b/onetl/file/format/avro.py index 28df742f8..008c12fb2 100644 --- a/onetl/file/format/avro.py +++ b/onetl/file/format/avro.py @@ -5,7 +5,7 @@ import json import logging import warnings -from typing import TYPE_CHECKING, ClassVar, Optional, Union +from typing import TYPE_CHECKING, ClassVar, Optional, Union, cast from typing_extensions import Literal @@ -246,7 +246,7 @@ class Avro(ReadWriteFileFormat): """ class Config: - known_options = frozenset() + known_options: frozenset[str] = frozenset() prohibited_options = PROHIBITED_OPTIONS extra = "allow" @@ -409,7 +409,8 @@ def parse_column(self, column: str | Column) -> Column: from pyspark.sql import Column, SparkSession from pyspark.sql.functions import col - self.check_if_supported(SparkSession._instantiatedSession) # noqa: SLF001 + spark = cast("SparkSession", SparkSession._instantiatedSession) # noqa: SLF001 + self.check_if_supported(spark) self._check_unsupported_parse_options() from pyspark.sql.avro.functions import from_avro @@ -509,7 +510,8 @@ def serialize_column(self, column: str | Column) -> Column: from pyspark.sql import Column, SparkSession from pyspark.sql.functions import col - self.check_if_supported(SparkSession._instantiatedSession) # noqa: SLF001 + spark = cast("SparkSession", SparkSession._instantiatedSession) # noqa: SLF001 + self.check_if_supported(spark) self._check_unsupported_serialization_options() from pyspark.sql.avro.functions import to_avro diff --git a/onetl/file/format/csv.py b/onetl/file/format/csv.py index f6fdc7b37..201c09435 100644 --- a/onetl/file/format/csv.py +++ b/onetl/file/format/csv.py @@ -3,7 +3,7 @@ from __future__ import annotations import warnings -from typing import TYPE_CHECKING, ClassVar, Optional, Union +from typing import TYPE_CHECKING, ClassVar, Optional, Union, cast from typing_extensions import Literal @@ -452,7 +452,7 @@ class CSV(ReadWriteFileFormat): """ class Config: - known_options = frozenset() + known_options: frozenset[str] = frozenset() extra = "allow" @slot @@ -531,7 +531,8 @@ def parse_column(self, column: str | Column, schema: StructType) -> Column: from pyspark.sql import Column, SparkSession - self.check_if_supported(SparkSession._instantiatedSession) # noqa: SLF001 + spark = cast("SparkSession", SparkSession._instantiatedSession) # noqa: SLF001 + self.check_if_supported(spark) self._check_unsupported_serialization_options() from pyspark.sql.functions import col, from_csv @@ -604,7 +605,8 @@ def serialize_column(self, column: str | Column) -> Column: from pyspark.sql import Column, SparkSession - self.check_if_supported(SparkSession._instantiatedSession) # noqa: SLF001 + spark = cast("SparkSession", SparkSession._instantiatedSession) # noqa: SLF001 + self.check_if_supported(spark) self._check_unsupported_serialization_options() from pyspark.sql.functions import col, to_csv diff --git a/onetl/file/format/excel.py b/onetl/file/format/excel.py index 79dc341d3..49619c0c9 100644 --- a/onetl/file/format/excel.py +++ b/onetl/file/format/excel.py @@ -212,7 +212,7 @@ class Excel(ReadWriteFileFormat): """ class Config: - known_options = frozenset() + known_options: frozenset[str] = frozenset() extra = "allow" @slot diff --git a/onetl/file/format/json.py b/onetl/file/format/json.py index cf59c87ce..4e0f98685 100644 --- a/onetl/file/format/json.py +++ b/onetl/file/format/json.py @@ -3,7 +3,7 @@ from __future__ import annotations import warnings -from typing import TYPE_CHECKING, ClassVar, Optional +from typing import TYPE_CHECKING, ClassVar, Optional, cast from typing_extensions import Literal @@ -313,7 +313,7 @@ class JSON(ReadOnlyFileFormat): """ class Config: - known_options = frozenset() + known_options: frozenset[str] = frozenset() extra = "allow" @slot @@ -393,7 +393,8 @@ def parse_column(self, column: str | Column, schema: StructType | ArrayType | Ma from pyspark.sql import Column, SparkSession from pyspark.sql.functions import col, from_json - self.check_if_supported(SparkSession._instantiatedSession) # noqa: SLF001 + spark = cast("SparkSession", SparkSession._instantiatedSession) # noqa: SLF001 + self.check_if_supported(spark) self._check_unsupported_serialization_options() if isinstance(column, Column): @@ -402,7 +403,7 @@ def parse_column(self, column: str | Column, schema: StructType | ArrayType | Ma column_name, column = column, col(column).cast("string") options = stringify(self.dict(by_alias=True, exclude_none=True)) - return from_json(column, schema, options).alias(column_name) + return from_json(column, schema, options).alias(column_name) # type: ignore[arg-type] def serialize_column(self, column: str | Column) -> Column: """ @@ -459,7 +460,8 @@ def serialize_column(self, column: str | Column) -> Column: from pyspark.sql import Column, SparkSession from pyspark.sql.functions import col, to_json - self.check_if_supported(SparkSession._instantiatedSession) # noqa: SLF001 + spark = cast("SparkSession", SparkSession._instantiatedSession) # noqa: SLF001 + self.check_if_supported(spark) self._check_unsupported_serialization_options() if isinstance(column, Column): diff --git a/onetl/file/format/jsonline.py b/onetl/file/format/jsonline.py index 05740396e..3fb3a8811 100644 --- a/onetl/file/format/jsonline.py +++ b/onetl/file/format/jsonline.py @@ -309,7 +309,7 @@ class JSONLine(ReadWriteFileFormat): """ class Config: - known_options = frozenset() + known_options: frozenset[str] = frozenset() extra = "allow" @slot diff --git a/onetl/file/format/xml.py b/onetl/file/format/xml.py index 2abc4e95e..aa7f5cf17 100644 --- a/onetl/file/format/xml.py +++ b/onetl/file/format/xml.py @@ -4,7 +4,7 @@ import logging import warnings -from typing import TYPE_CHECKING, ClassVar, Optional, Union +from typing import TYPE_CHECKING, ClassVar, Optional, Union, cast from typing_extensions import Literal @@ -326,7 +326,7 @@ class XML(ReadWriteFileFormat): """ class Config: - known_options = frozenset() + known_options: frozenset[str] = frozenset() prohibited_options = frozenset(("path",)) # filled by FileDFReader/FileDFWriter extra = "allow" @@ -532,7 +532,7 @@ def parse_column(self, column: str | Column, schema: StructType) -> Column: """ from pyspark.sql import Column, SparkSession - spark = SparkSession._instantiatedSession # noqa: SLF001 + spark = cast("SparkSession", SparkSession._instantiatedSession) # noqa: SLF001 self.check_if_supported(spark) self._check_unsupported_serialization_options() @@ -546,7 +546,7 @@ def parse_column(self, column: str | Column, schema: StructType) -> Column: options = self.dict(by_alias=True, exclude_none=True) version = get_spark_version(spark) if version.major >= 4: # noqa: PLR2004 - from pyspark.sql.functions import from_xml + from pyspark.sql.functions import from_xml # type: ignore[attr-defined] return from_xml(column, schema, stringify(options)).alias(column_name) @@ -554,10 +554,10 @@ def parse_column(self, column: str | Column, schema: StructType) -> Column: java_column = _to_java_column(column) java_schema = spark._jsparkSession.parseDataType(schema.json()) # noqa: SLF001 - scala_options = spark._jvm.org.apache.spark.api.python.PythonUtils.toScalaMap( # noqa: SLF001 + scala_options = spark._jvm.org.apache.spark.api.python.PythonUtils.toScalaMap( # type: ignore[union-attr] # noqa: SLF001 stringify(options), ) - jc = spark._jvm.com.databricks.spark.xml.functions.from_xml( # noqa: SLF001 + jc = spark._jvm.com.databricks.spark.xml.functions.from_xml( # type: ignore[union-attr] # noqa: SLF001 java_column, java_schema, scala_options, diff --git a/onetl/impl/failed_local_file.py b/onetl/impl/failed_local_file.py index acad346ab..7514a301c 100644 --- a/onetl/impl/failed_local_file.py +++ b/onetl/impl/failed_local_file.py @@ -4,38 +4,47 @@ import os from dataclasses import dataclass +from typing import TYPE_CHECKING from onetl.impl.local_path import LocalPath from onetl.impl.path_container import PathContainer +if TYPE_CHECKING: -@dataclass(eq=False, frozen=True) -class FailedLocalFile(PathContainer[LocalPath]): - """ - Representation of existing local file with stat and attached exception object - """ + class FailedLocalFile(LocalPath): + def __init__(self, path: LocalPath, exception: Exception): ... - exception: Exception + @property + def exception(self) -> Exception: ... +else: - def __post_init__(self): - # frozen=True does not allow to change any field in __post_init__, small hack here - object.__setattr__(self, "path", LocalPath(self.path)) + @dataclass(eq=False, frozen=True) + class FailedLocalFile(PathContainer[LocalPath]): + """ + Representation of existing local file with stat and attached exception object + """ - def __repr__(self) -> str: - return f"{self.__class__.__name__}({os.fspath(self.path)!r}, {self.exception!r})" + exception: Exception - def exists(self) -> bool: - return self.path.exists() + def __post_init__(self): + # frozen=True does not allow to change any field in __post_init__, small hack here + object.__setattr__(self, "path", LocalPath(self.path)) - def is_file(self) -> bool: - return self.path.is_file() + def __repr__(self) -> str: + return f"{self.__class__.__name__}({os.fspath(self.path)!r}, {self.exception!r})" - def is_dir(self) -> bool: - return self.path.is_dir() + def exists(self) -> bool: + return self.path.exists() - def stat(self) -> os.stat_result: - return self.path.stat() + def is_file(self) -> bool: + return self.path.is_file() - # exceptions are not allowed to compare, another small hack - def _compare_tuple(self, args) -> tuple: - return tuple(str(arg) if isinstance(arg, Exception) else arg for arg in args) + def is_dir(self) -> bool: + return self.path.is_dir() + + def stat(self) -> os.stat_result: + return self.path.stat() + + # exceptions are not allowed to compare, another small hack + def _compare_tuple(self, args) -> tuple: + return tuple(str(arg) if isinstance(arg, Exception) else arg for arg in args) diff --git a/onetl/impl/local_path.py b/onetl/impl/local_path.py index 4de5067f0..fc483b047 100644 --- a/onetl/impl/local_path.py +++ b/onetl/impl/local_path.py @@ -3,20 +3,24 @@ import os import sys from pathlib import Path, PurePosixPath, PureWindowsPath +from typing import TYPE_CHECKING +from typing_extensions import TypeAlias -class LocalPath(Path): - def __new__(cls, *args, **kwargs): - if cls is LocalPath: - cls = LocalWindowsPath if os.name == "nt" else LocalPosixPath # noqa: PLW0642 - if sys.version_info < (3, 12): - return cls._from_parts(args) - return object.__new__(cls) +if TYPE_CHECKING: + LocalPath: TypeAlias = Path +else: + class LocalPath(Path): + def __new__(cls, *args, **kwargs): + if cls is LocalPath: + cls = LocalWindowsPath if os.name == "nt" else LocalPosixPath # noqa: PLW0642 + if sys.version_info < (3, 12): + return cls._from_parts(args) + return object.__new__(cls) -class LocalPosixPath(LocalPath, PurePosixPath): - pass + class LocalPosixPath(LocalPath, PurePosixPath): + pass - -class LocalWindowsPath(LocalPath, PureWindowsPath): - pass + class LocalWindowsPath(LocalPath, PureWindowsPath): + pass diff --git a/onetl/impl/remote_directory.py b/onetl/impl/remote_directory.py index 2403d5bc1..d57c57d2a 100644 --- a/onetl/impl/remote_directory.py +++ b/onetl/impl/remote_directory.py @@ -6,45 +6,55 @@ from dataclasses import dataclass, field from typing import TYPE_CHECKING +from onetl.base import PathProtocol, PathStatProtocol from onetl.impl.path_container import PathContainer from onetl.impl.remote_path import RemotePath from onetl.impl.remote_path_stat import RemotePathStat if TYPE_CHECKING: - from onetl.base import PathStatProtocol + class RemoteDirectory(PathProtocol, RemotePath): + def __init__(self, path: RemotePath, stats: PathStatProtocol): ... -@dataclass(eq=False, frozen=True) -class RemoteDirectory(PathContainer[RemotePath]): - """ - Representation of existing remote directory - """ + @property + def path(self) -> RemotePath: ... - stats: PathStatProtocol = field(default_factory=RemotePathStat) + @property + def stats(self) -> PathStatProtocol: ... - def __post_init__(self): - # frozen=True does not allow to change any field in __post_init__, small hack here - object.__setattr__(self, "path", RemotePath(self.path)) +else: - def is_dir(self) -> bool: - return True + @dataclass(eq=False, frozen=True) + class RemoteDirectory(PathContainer[RemotePath]): + """ + Representation of existing remote directory + """ - def is_file(self) -> bool: - return False + stats: PathStatProtocol = field(default_factory=RemotePathStat) - def exists(self) -> bool: - return True + def __post_init__(self): + # frozen=True does not allow to change any field in __post_init__, small hack here + object.__setattr__(self, "path", RemotePath(self.path)) - def stat(self) -> PathStatProtocol: - return self.stats + def is_dir(self) -> bool: + return True - @property - def parent(self) -> RemoteDirectory: - return RemoteDirectory(self.path.parent) + def is_file(self) -> bool: + return False - @property - def parents(self) -> list[RemoteDirectory]: - return [RemoteDirectory(parent) for parent in self.path.parents] + def exists(self) -> bool: + return True - def __repr__(self) -> str: - return f"{self.__class__.__name__}({os.fspath(self.path)!r})" + def stat(self) -> PathStatProtocol: + return self.stats + + @property + def parent(self) -> RemoteDirectory: + return RemoteDirectory(self.path.parent) + + @property + def parents(self) -> list[RemoteDirectory]: + return [RemoteDirectory(parent) for parent in self.path.parents] + + def __repr__(self) -> str: + return f"{self.__class__.__name__}({os.fspath(self.path)!r})" diff --git a/onetl/impl/remote_file.py b/onetl/impl/remote_file.py index 0871f9052..15a43bc53 100644 --- a/onetl/impl/remote_file.py +++ b/onetl/impl/remote_file.py @@ -6,61 +6,75 @@ from dataclasses import dataclass from typing import TYPE_CHECKING +from onetl.base import PathProtocol, PathStatProtocol from onetl.impl.path_container import PathContainer from onetl.impl.remote_directory import RemoteDirectory from onetl.impl.remote_path import RemotePath if TYPE_CHECKING: - from onetl.base import PathStatProtocol + class RemoteFile(PathProtocol, RemotePath): + def __init__(self, path: RemotePath, stats: PathStatProtocol): ... -@dataclass(eq=False, frozen=True) -class RemoteFile(PathContainer[RemotePath]): - """ - Representation of existing remote file with stat - """ + @property + def path(self) -> RemotePath: ... - stats: PathStatProtocol + @property + def stats(self) -> PathStatProtocol: ... - def __post_init__(self): - # frozen=True does not allow to change any field in __post_init__, small hack here - object.__setattr__(self, "path", RemotePath(self.path)) + class FailedRemoteFile(RemoteFile): + def __init__(self, path: RemotePath, stats: PathStatProtocol, exception: Exception): ... - def __repr__(self) -> str: - return f"{self.__class__.__name__}({os.fspath(self.path)!r})" + @property + def exception(self) -> Exception: ... +else: - def is_dir(self) -> bool: - return False + @dataclass(eq=False, frozen=True) + class RemoteFile(PathContainer[RemotePath]): + """ + Representation of existing remote file with stat + """ - def is_file(self) -> bool: - return True + stats: PathStatProtocol - def exists(self) -> bool: - return True + def __post_init__(self): + # frozen=True does not allow to change any field in __post_init__, small hack here + object.__setattr__(self, "path", RemotePath(self.path)) - def stat(self) -> PathStatProtocol: - return self.stats + def __repr__(self) -> str: + return f"{self.__class__.__name__}({os.fspath(self.path)!r})" - @property - def parent(self) -> RemoteDirectory: - return RemoteDirectory(self.path.parent) + def is_dir(self) -> bool: + return False - @property - def parents(self) -> list[RemoteDirectory]: - return [RemoteDirectory(parent) for parent in self.path.parents] + def is_file(self) -> bool: + return True + def exists(self) -> bool: + return True -@dataclass(eq=False, frozen=True) -class FailedRemoteFile(RemoteFile): - """ - Representation of existing remote file with stat and attached exception object - """ + def stat(self) -> PathStatProtocol: + return self.stats - exception: Exception + @property + def parent(self) -> RemoteDirectory: + return RemoteDirectory(self.path.parent) - def __repr__(self) -> str: - return f"{self.__class__.__name__}({os.fspath(self.path)!r}, {self.exception!r})" + @property + def parents(self) -> list[RemoteDirectory]: + return [RemoteDirectory(parent) for parent in self.path.parents] - # exceptions are not allowed to compare, small hack here - def _compare_tuple(self, args) -> tuple: - return tuple(str(arg) if isinstance(arg, Exception) else arg for arg in args) + @dataclass(eq=False, frozen=True) + class FailedRemoteFile(RemoteFile): + """ + Representation of existing remote file with stat and attached exception object + """ + + exception: Exception + + def __repr__(self) -> str: + return f"{self.__class__.__name__}({os.fspath(self.path)!r}, {self.exception!r})" + + # exceptions are not allowed to compare, small hack here + def _compare_tuple(self, args) -> tuple: + return tuple(str(arg) if isinstance(arg, Exception) else arg for arg in args) diff --git a/onetl/impl/remote_path.py b/onetl/impl/remote_path.py index 8120afb82..cf2d21ea5 100644 --- a/onetl/impl/remote_path.py +++ b/onetl/impl/remote_path.py @@ -1,7 +1,14 @@ # SPDX-FileCopyrightText: 2022-present MTS PJSC # SPDX-License-Identifier: Apache-2.0 + from pathlib import PurePosixPath +from typing import TYPE_CHECKING + +from typing_extensions import TypeAlias +from onetl.base.pure_path_protocol import PurePathProtocol -class RemotePath(PurePosixPath): - pass +if TYPE_CHECKING: + RemotePath: TypeAlias = PurePathProtocol +else: + RemotePath: TypeAlias = PurePosixPath diff --git a/onetl/strategy/incremental_strategy.py b/onetl/strategy/incremental_strategy.py index 0e44d1ee7..b59829d86 100644 --- a/onetl/strategy/incremental_strategy.py +++ b/onetl/strategy/incremental_strategy.py @@ -14,7 +14,7 @@ class IncrementalStrategy(HWMStrategy): """Incremental strategy for [db-reader][]/[file-downloader][]. Used for fetching only new rows/files from a source - by filtering items not covered by the previous [HWM][] value. + by filtering items not covered by the previous [hwm][] value. For [db-reader][]: First incremental run is just the same as diff --git a/pyproject.toml b/pyproject.toml index 40b14a2ff..a938e8e04 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -198,6 +198,7 @@ dev = [ "prek~=0.3.1", "types-Deprecated~=1.3.1", "types-PyYAML~=6.0.12", + "types-requests~=2.33.0", ] docs = [ "mkdocs>=1.6,<2", @@ -303,12 +304,15 @@ ignore = [ [tool.mypy] python_version = "3.7" plugins = ["pydantic.mypy"] -exclude = "^(?=.*file).*" strict_optional = true ignore_missing_imports = true follow_imports = "silent" show_error_codes = true +[[tool.mypy.overrides]] +module=["paramiko.*"] +follow_untyped_imports = true + [tool.codespell] ignore-words-list = ["INOUT", "inout", "thirdparty"] diff --git a/tests/tests_unit/test_db/test_db_reader_unit/test_common_reader_unit.py b/tests/tests_unit/test_db/test_db_reader_unit/test_common_reader_unit.py index 5ded9a23d..e85c18a13 100644 --- a/tests/tests_unit/test_db/test_db_reader_unit/test_common_reader_unit.py +++ b/tests/tests_unit/test_db/test_db_reader_unit/test_common_reader_unit.py @@ -29,6 +29,11 @@ def test_reader_deprecated_import(): assert OldDBReader is DBReader +def test_reader_no_connection(): + with pytest.raises(ValueError, match="field required"): + DBReader(target="schema.table") + + def test_reader_source_alias(spark_mock): reader1 = DBReader( connection=Hive(cluster="rnd-dwh", spark=spark_mock), diff --git a/tests/tests_unit/test_db/test_db_writer_unit/test_common_writer_unit.py b/tests/tests_unit/test_db/test_db_writer_unit/test_common_writer_unit.py index b19453a71..d95052589 100644 --- a/tests/tests_unit/test_db/test_db_writer_unit/test_common_writer_unit.py +++ b/tests/tests_unit/test_db/test_db_writer_unit/test_common_writer_unit.py @@ -36,3 +36,8 @@ def test_writer_target_alias(spark_mock): ) assert writer1.target == writer2.target + + +def test_writer_no_connection(): + with pytest.raises(ValueError, match="field required"): + DBWriter(target="schema.table") diff --git a/uv.lock b/uv.lock index 3e45aeebf..7f7005328 100644 --- a/uv.lock +++ b/uv.lock @@ -11456,6 +11456,7 @@ dev = [ { name = "ruff", marker = "python_full_version >= '3.12' or (extra == 'group-5-onetl-test-pydantic-1' and extra == 'group-5-onetl-test-pydantic-2') or (extra == 'group-5-onetl-test-spark-3-2' and extra == 'group-5-onetl-test-spark-3-3') or (extra == 'group-5-onetl-test-spark-3-2' and extra == 'group-5-onetl-test-spark-3-4') or (extra == 'group-5-onetl-test-spark-3-2' and extra == 'group-5-onetl-test-spark-3-5') or (extra == 'group-5-onetl-test-spark-3-2' and extra == 'group-5-onetl-test-spark-4-0') or (extra == 'group-5-onetl-test-spark-3-2' and extra == 'group-5-onetl-test-spark-4-1') or (extra == 'group-5-onetl-test-spark-3-3' and extra == 'group-5-onetl-test-spark-3-4') or (extra == 'group-5-onetl-test-spark-3-3' and extra == 'group-5-onetl-test-spark-3-5') or (extra == 'group-5-onetl-test-spark-3-3' and extra == 'group-5-onetl-test-spark-4-0') or (extra == 'group-5-onetl-test-spark-3-3' and extra == 'group-5-onetl-test-spark-4-1') or (extra == 'group-5-onetl-test-spark-3-4' and extra == 'group-5-onetl-test-spark-3-5') or (extra == 'group-5-onetl-test-spark-3-4' and extra == 'group-5-onetl-test-spark-4-0') or (extra == 'group-5-onetl-test-spark-3-4' and extra == 'group-5-onetl-test-spark-4-1') or (extra == 'group-5-onetl-test-spark-3-5' and extra == 'group-5-onetl-test-spark-4-0') or (extra == 'group-5-onetl-test-spark-3-5' and extra == 'group-5-onetl-test-spark-4-1') or (extra == 'group-5-onetl-test-spark-4-0' and extra == 'group-5-onetl-test-spark-4-1')" }, { name = "types-deprecated", marker = "python_full_version >= '3.12' or (extra == 'group-5-onetl-test-pydantic-1' and extra == 'group-5-onetl-test-pydantic-2') or (extra == 'group-5-onetl-test-spark-3-2' and extra == 'group-5-onetl-test-spark-3-3') or (extra == 'group-5-onetl-test-spark-3-2' and extra == 'group-5-onetl-test-spark-3-4') or (extra == 'group-5-onetl-test-spark-3-2' and extra == 'group-5-onetl-test-spark-3-5') or (extra == 'group-5-onetl-test-spark-3-2' and extra == 'group-5-onetl-test-spark-4-0') or (extra == 'group-5-onetl-test-spark-3-2' and extra == 'group-5-onetl-test-spark-4-1') or (extra == 'group-5-onetl-test-spark-3-3' and extra == 'group-5-onetl-test-spark-3-4') or (extra == 'group-5-onetl-test-spark-3-3' and extra == 'group-5-onetl-test-spark-3-5') or (extra == 'group-5-onetl-test-spark-3-3' and extra == 'group-5-onetl-test-spark-4-0') or (extra == 'group-5-onetl-test-spark-3-3' and extra == 'group-5-onetl-test-spark-4-1') or (extra == 'group-5-onetl-test-spark-3-4' and extra == 'group-5-onetl-test-spark-3-5') or (extra == 'group-5-onetl-test-spark-3-4' and extra == 'group-5-onetl-test-spark-4-0') or (extra == 'group-5-onetl-test-spark-3-4' and extra == 'group-5-onetl-test-spark-4-1') or (extra == 'group-5-onetl-test-spark-3-5' and extra == 'group-5-onetl-test-spark-4-0') or (extra == 'group-5-onetl-test-spark-3-5' and extra == 'group-5-onetl-test-spark-4-1') or (extra == 'group-5-onetl-test-spark-4-0' and extra == 'group-5-onetl-test-spark-4-1')" }, { name = "types-pyyaml", version = "6.0.12.20260510", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12' or (extra == 'group-5-onetl-test-pydantic-1' and extra == 'group-5-onetl-test-pydantic-2') or (extra == 'group-5-onetl-test-spark-3-2' and extra == 'group-5-onetl-test-spark-3-3') or (extra == 'group-5-onetl-test-spark-3-2' and extra == 'group-5-onetl-test-spark-3-4') or (extra == 'group-5-onetl-test-spark-3-2' and extra == 'group-5-onetl-test-spark-3-5') or (extra == 'group-5-onetl-test-spark-3-2' and extra == 'group-5-onetl-test-spark-4-0') or (extra == 'group-5-onetl-test-spark-3-2' and extra == 'group-5-onetl-test-spark-4-1') or (extra == 'group-5-onetl-test-spark-3-3' and extra == 'group-5-onetl-test-spark-3-4') or (extra == 'group-5-onetl-test-spark-3-3' and extra == 'group-5-onetl-test-spark-3-5') or (extra == 'group-5-onetl-test-spark-3-3' and extra == 'group-5-onetl-test-spark-4-0') or (extra == 'group-5-onetl-test-spark-3-3' and extra == 'group-5-onetl-test-spark-4-1') or (extra == 'group-5-onetl-test-spark-3-4' and extra == 'group-5-onetl-test-spark-3-5') or (extra == 'group-5-onetl-test-spark-3-4' and extra == 'group-5-onetl-test-spark-4-0') or (extra == 'group-5-onetl-test-spark-3-4' and extra == 'group-5-onetl-test-spark-4-1') or (extra == 'group-5-onetl-test-spark-3-5' and extra == 'group-5-onetl-test-spark-4-0') or (extra == 'group-5-onetl-test-spark-3-5' and extra == 'group-5-onetl-test-spark-4-1') or (extra == 'group-5-onetl-test-spark-4-0' and extra == 'group-5-onetl-test-spark-4-1')" }, + { name = "types-requests", marker = "python_full_version >= '3.12' or (extra == 'group-5-onetl-test-pydantic-1' and extra == 'group-5-onetl-test-pydantic-2') or (extra == 'group-5-onetl-test-spark-3-2' and extra == 'group-5-onetl-test-spark-3-3') or (extra == 'group-5-onetl-test-spark-3-2' and extra == 'group-5-onetl-test-spark-3-4') or (extra == 'group-5-onetl-test-spark-3-2' and extra == 'group-5-onetl-test-spark-3-5') or (extra == 'group-5-onetl-test-spark-3-2' and extra == 'group-5-onetl-test-spark-4-0') or (extra == 'group-5-onetl-test-spark-3-2' and extra == 'group-5-onetl-test-spark-4-1') or (extra == 'group-5-onetl-test-spark-3-3' and extra == 'group-5-onetl-test-spark-3-4') or (extra == 'group-5-onetl-test-spark-3-3' and extra == 'group-5-onetl-test-spark-3-5') or (extra == 'group-5-onetl-test-spark-3-3' and extra == 'group-5-onetl-test-spark-4-0') or (extra == 'group-5-onetl-test-spark-3-3' and extra == 'group-5-onetl-test-spark-4-1') or (extra == 'group-5-onetl-test-spark-3-4' and extra == 'group-5-onetl-test-spark-3-5') or (extra == 'group-5-onetl-test-spark-3-4' and extra == 'group-5-onetl-test-spark-4-0') or (extra == 'group-5-onetl-test-spark-3-4' and extra == 'group-5-onetl-test-spark-4-1') or (extra == 'group-5-onetl-test-spark-3-5' and extra == 'group-5-onetl-test-spark-4-0') or (extra == 'group-5-onetl-test-spark-3-5' and extra == 'group-5-onetl-test-spark-4-1') or (extra == 'group-5-onetl-test-spark-4-0' and extra == 'group-5-onetl-test-spark-4-1')" }, ] docs = [ { name = "griffe-inherited-docstrings", marker = "python_full_version >= '3.11' or (extra == 'group-5-onetl-test-pydantic-1' and extra == 'group-5-onetl-test-pydantic-2') or (extra == 'group-5-onetl-test-spark-3-2' and extra == 'group-5-onetl-test-spark-3-3') or (extra == 'group-5-onetl-test-spark-3-2' and extra == 'group-5-onetl-test-spark-3-4') or (extra == 'group-5-onetl-test-spark-3-2' and extra == 'group-5-onetl-test-spark-3-5') or (extra == 'group-5-onetl-test-spark-3-2' and extra == 'group-5-onetl-test-spark-4-0') or (extra == 'group-5-onetl-test-spark-3-2' and extra == 'group-5-onetl-test-spark-4-1') or (extra == 'group-5-onetl-test-spark-3-3' and extra == 'group-5-onetl-test-spark-3-4') or (extra == 'group-5-onetl-test-spark-3-3' and extra == 'group-5-onetl-test-spark-3-5') or (extra == 'group-5-onetl-test-spark-3-3' and extra == 'group-5-onetl-test-spark-4-0') or (extra == 'group-5-onetl-test-spark-3-3' and extra == 'group-5-onetl-test-spark-4-1') or (extra == 'group-5-onetl-test-spark-3-4' and extra == 'group-5-onetl-test-spark-3-5') or (extra == 'group-5-onetl-test-spark-3-4' and extra == 'group-5-onetl-test-spark-4-0') or (extra == 'group-5-onetl-test-spark-3-4' and extra == 'group-5-onetl-test-spark-4-1') or (extra == 'group-5-onetl-test-spark-3-5' and extra == 'group-5-onetl-test-spark-4-0') or (extra == 'group-5-onetl-test-spark-3-5' and extra == 'group-5-onetl-test-spark-4-1') or (extra == 'group-5-onetl-test-spark-4-0' and extra == 'group-5-onetl-test-spark-4-1')" }, @@ -11680,6 +11681,7 @@ dev = [ { name = "ruff", marker = "python_full_version >= '3.12'", specifier = "~=0.15.4" }, { name = "types-deprecated", marker = "python_full_version >= '3.12'", specifier = "~=1.3.1" }, { name = "types-pyyaml", marker = "python_full_version >= '3.12'", specifier = "~=6.0.12" }, + { name = "types-requests", marker = "python_full_version >= '3.12'", specifier = "~=2.33.0" }, ] docs = [ { name = "griffe-inherited-docstrings", marker = "python_full_version >= '3.11'", specifier = ">=1.0,<2" }, @@ -23062,6 +23064,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d3/ad/fd618a218925daada7b8a5e7326e662599fa5fdff4a4c44ab2795bd2d9ca/types_pyyaml-6.0.12.20260510-py3-none-any.whl", hash = "sha256:3492eb9ba4d9d833473214c4d5736cccf5f37d93f5854059721e1c84f785309d", size = 20304, upload-time = "2026-05-10T05:26:26.981Z" }, ] +[[package]] +name = "types-requests" +version = "2.33.0.20260508" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "urllib3", version = "2.7.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12' or (extra == 'group-5-onetl-test-pydantic-1' and extra == 'group-5-onetl-test-pydantic-2') or (extra == 'group-5-onetl-test-spark-3-2' and extra == 'group-5-onetl-test-spark-3-3') or (extra == 'group-5-onetl-test-spark-3-2' and extra == 'group-5-onetl-test-spark-3-4') or (extra == 'group-5-onetl-test-spark-3-2' and extra == 'group-5-onetl-test-spark-3-5') or (extra == 'group-5-onetl-test-spark-3-2' and extra == 'group-5-onetl-test-spark-4-0') or (extra == 'group-5-onetl-test-spark-3-2' and extra == 'group-5-onetl-test-spark-4-1') or (extra == 'group-5-onetl-test-spark-3-3' and extra == 'group-5-onetl-test-spark-3-4') or (extra == 'group-5-onetl-test-spark-3-3' and extra == 'group-5-onetl-test-spark-3-5') or (extra == 'group-5-onetl-test-spark-3-3' and extra == 'group-5-onetl-test-spark-4-0') or (extra == 'group-5-onetl-test-spark-3-3' and extra == 'group-5-onetl-test-spark-4-1') or (extra == 'group-5-onetl-test-spark-3-4' and extra == 'group-5-onetl-test-spark-3-5') or (extra == 'group-5-onetl-test-spark-3-4' and extra == 'group-5-onetl-test-spark-4-0') or (extra == 'group-5-onetl-test-spark-3-4' and extra == 'group-5-onetl-test-spark-4-1') or (extra == 'group-5-onetl-test-spark-3-5' and extra == 'group-5-onetl-test-spark-4-0') or (extra == 'group-5-onetl-test-spark-3-5' and extra == 'group-5-onetl-test-spark-4-1') or (extra == 'group-5-onetl-test-spark-4-0' and extra == 'group-5-onetl-test-spark-4-1')" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c7/6b/eb226bdd61a982c9a03e02c657fb4ab001733506e6423906ac142331f2e3/types_requests-2.33.0.20260508.tar.gz", hash = "sha256:81b2ae5f0d20967714a6aa5ef9284c05570d7cb06b7de8f2a77b918b63ddd411", size = 23991, upload-time = "2026-05-08T04:50:56.818Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cb/96/080db0afdf2c5cc5fe512b41354e8d114fe8f65e9510c56ff8dfd40216ce/types_requests-2.33.0.20260508-py3-none-any.whl", hash = "sha256:fa01459cca184229713df03709db46a905325906d27e042cd4fd7ea3d15d3400", size = 20722, upload-time = "2026-05-08T04:50:55.548Z" }, +] + [[package]] name = "typing-extensions" version = "4.7.1"