Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions providers/sftp/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ dependencies = [
# TODO: Bump to >= 4.0.0 once https://github.com/apache/airflow/issues/54079 is handled
"paramiko>=3.5.1,<4.0.0",
"asyncssh>=2.12.0; python_version < '3.14'",
"asgiref>=3.5.2",
"asyncssh>=2.22.0; python_version >= '3.14'",
]

Expand Down
28 changes: 28 additions & 0 deletions providers/sftp/src/airflow/providers/sftp/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Constants for the SFTP provider."""

from __future__ import annotations


class SFTPOperation:
"""Operation that can be used with SFTP."""

PUT = "put"
GET = "get"
DELETE = "delete"
93 changes: 93 additions & 0 deletions providers/sftp/src/airflow/providers/sftp/hooks/sftp.py
Original file line number Diff line number Diff line change
Expand Up @@ -706,6 +706,75 @@ def get_files_by_pattern(self, path, fnmatch_pattern) -> list[str]:

return matched_files

def transfer(
self,
operation: str,
local_filepath: str | list[str] | None,
remote_filepath: str | list[str],
confirm: bool = True,
create_intermediate_dirs: bool = False,
concurrency: int = 1,
prefetch: bool = True,
) -> None:
"""
Perform a synchronous SFTP transfer operation (GET, PUT, or DELETE).

Centralises transfer logic so both the operator and the trigger
can delegate to the hook, in line with the DRY principle.
Comment thread
sunildataengineer marked this conversation as resolved.

:param operation: The SFTP operation - put, get, or delete.
:param local_filepath: Local file path(s).
:param remote_filepath: Remote file path(s).
:param confirm: Whether to confirm file size after PUT (default: True).
:param create_intermediate_dirs: Create missing intermediate directories (default: False).
:param concurrency: Number of threads for directory transfers (default: 1).
:param prefetch: Whether to prefetch during GET (default: True).
"""
from airflow.providers.sftp.constants import SFTPOperation

if isinstance(local_filepath, str):
local_filepath_array = [local_filepath] if local_filepath else []
else:
local_filepath_array = local_filepath or []

if isinstance(remote_filepath, str):
remote_filepath_array = [remote_filepath]
else:
remote_filepath_array = list(remote_filepath)

if operation.lower() == SFTPOperation.GET:
for local, remote in zip(local_filepath_array, remote_filepath_array):
if create_intermediate_dirs:
Path(os.path.dirname(local)).mkdir(parents=True, exist_ok=True)
if self.isdir(remote):
if concurrency > 1:
self.retrieve_directory_concurrently(
remote, local, workers=concurrency, prefetch=prefetch
)
else:
self.retrieve_directory(remote, local)
else:
self.retrieve_file(remote, local, prefetch=prefetch)
elif operation.lower() == SFTPOperation.PUT:
for local, remote in zip(local_filepath_array, remote_filepath_array):
if create_intermediate_dirs:
self.create_directory(os.path.dirname(remote))
if os.path.isdir(local):
if concurrency > 1:
self.store_directory_concurrently(
remote, local, confirm=confirm, workers=concurrency
)
else:
self.store_directory(remote, local, confirm=confirm)
else:
self.store_file(remote, local, confirm=confirm)
elif operation.lower() == SFTPOperation.DELETE:
for remote in remote_filepath_array:
if self.isdir(remote):
self.delete_directory(remote, include_files=True)
else:
self.delete_file(remote)


class SFTPHookAsync(BaseHook):
"""
Expand Down Expand Up @@ -1040,3 +1109,27 @@ async def get_mod_time(self, path: str) -> str: # type: ignore[return]
return mod_time
except asyncssh.SFTPNoSuchFile:
raise AirflowException("No files matching")

async def transfer(
self,
operation: str,
local_filepath: str | list[str] | None,
remote_filepath: str | list[str],
confirm: bool = True,
create_intermediate_dirs: bool = False,
concurrency: int = 1,
prefetch: bool = True,
) -> None:
"""Perform an SFTP transfer operation (GET, PUT, or DELETE) via a thread executor."""
from asgiref.sync import sync_to_async

sync_hook = SFTPHook(ssh_conn_id=self.sftp_conn_id)
await sync_to_async(sync_hook.transfer)(
operation=operation,
local_filepath=local_filepath,
remote_filepath=remote_filepath,
confirm=confirm,
create_intermediate_dirs=create_intermediate_dirs,
concurrency=concurrency,
prefetch=prefetch,
)
118 changes: 48 additions & 70 deletions providers/sftp/src/airflow/providers/sftp/operators/sftp.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
Expand All @@ -15,29 +14,20 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""This module contains SFTP operator."""

from __future__ import annotations

import errno
import os
import socket
from collections.abc import Sequence
from pathlib import Path
from typing import Any

import paramiko

from airflow.providers.common.compat.sdk import AirflowException, BaseOperator
from airflow.providers.common.compat.sdk import AirflowException, BaseOperator, conf
from airflow.providers.sftp.constants import SFTPOperation
from airflow.providers.sftp.hooks.sftp import SFTPHook


class SFTPOperation:
"""Operation that can be used with SFTP."""

PUT = "put"
GET = "get"
DELETE = "delete"
from airflow.providers.sftp.triggers.sftp import SFTPTrigger


class SFTPOperator(BaseOperator):
Expand Down Expand Up @@ -95,6 +85,7 @@ def __init__(
create_intermediate_dirs: bool = False,
concurrency: int = 1,
prefetch: bool = True,
deferrable: bool = conf.getboolean("operators", "default_deferrable", fallback=False),
**kwargs,
) -> None:
super().__init__(**kwargs)
Expand All @@ -108,8 +99,25 @@ def __init__(
self.remote_filepath = remote_filepath
self.concurrency = concurrency
self.prefetch = prefetch
self.deferrable = deferrable

def execute(self, context: Any) -> str | list[str] | None:
if self.deferrable:
self.defer(
trigger=SFTPTrigger(
ssh_conn_id=self.ssh_conn_id,
local_filepath=self.local_filepath,
remote_filepath=self.remote_filepath,
operation=self.operation,
confirm=self.confirm,
create_intermediate_dirs=self.create_intermediate_dirs,
remote_host=self.remote_host,
concurrency=self.concurrency,
prefetch=self.prefetch,
),
method_name="execute_complete",
)

if self.local_filepath is None:
Comment thread
sunildataengineer marked this conversation as resolved.
local_filepath_array = []
elif isinstance(self.local_filepath, str):
Expand Down Expand Up @@ -163,62 +171,17 @@ def execute(self, context: Any) -> str | list[str] | None:
if not self.sftp_hook:
raise AirflowException("Cannot operate without sftp_hook or ssh_conn_id.")

if self.operation.lower() in (SFTPOperation.GET, SFTPOperation.PUT):
for _local_filepath, _remote_filepath in zip(local_filepath_array, remote_filepath_array):
if self.operation.lower() == SFTPOperation.GET:
local_folder = os.path.dirname(_local_filepath)
if self.create_intermediate_dirs:
Path(local_folder).mkdir(parents=True, exist_ok=True)
file_msg = f"from {_remote_filepath} to {_local_filepath}"
self.log.info("Starting to transfer %s", file_msg)
if self.sftp_hook.isdir(_remote_filepath):
if self.concurrency > 1:
self.sftp_hook.retrieve_directory_concurrently(
_remote_filepath,
_local_filepath,
workers=self.concurrency,
prefetch=self.prefetch,
)
elif self.concurrency == 1:
self.sftp_hook.retrieve_directory(_remote_filepath, _local_filepath)
else:
self.sftp_hook.retrieve_file(_remote_filepath, _local_filepath)
elif self.operation.lower() == SFTPOperation.PUT:
remote_folder = os.path.dirname(_remote_filepath)
if self.create_intermediate_dirs:
self.sftp_hook.create_directory(remote_folder)
file_msg = f"from {_local_filepath} to {_remote_filepath}"
self.log.info("Starting to transfer file %s", file_msg)
if os.path.isdir(_local_filepath):
if self.concurrency > 1:
self.sftp_hook.store_directory_concurrently(
_remote_filepath,
_local_filepath,
confirm=self.confirm,
workers=self.concurrency,
)
elif self.concurrency == 1:
self.sftp_hook.store_directory(
_remote_filepath, _local_filepath, confirm=self.confirm
)
else:
self.sftp_hook.store_file(_remote_filepath, _local_filepath, confirm=self.confirm)
elif self.operation.lower() == SFTPOperation.DELETE:
for _remote_filepath in remote_filepath_array:
file_msg = f"{_remote_filepath}"
self.log.info("Starting to delete %s", file_msg)
try:
if self.sftp_hook.isdir(_remote_filepath):
self.sftp_hook.delete_directory(_remote_filepath, include_files=True)
else:
self.sftp_hook.delete_file(_remote_filepath)
except OSError as exc:
if self._is_missing_path_error(exc):
self.log.warning(
"Remote path %s does not exist. Skipping delete.", _remote_filepath
)
continue
raise
file_msg = f"{self.operation.upper()} {self.local_filepath} <-> {self.remote_filepath}"
self.log.info("Starting to transfer %s", file_msg)
self.sftp_hook.transfer(
operation=self.operation,
local_filepath=self.local_filepath,
remote_filepath=self.remote_filepath,
confirm=self.confirm,
create_intermediate_dirs=self.create_intermediate_dirs,
concurrency=self.concurrency,
prefetch=self.prefetch,
)

except Exception as e:
raise AirflowException(
Expand All @@ -227,6 +190,21 @@ def execute(self, context: Any) -> str | list[str] | None:

return self.local_filepath

def execute_complete(self, context: Any, event: dict) -> str | list[str] | None:
"""
Execute when the trigger fires in deferrable mode.

:param context: The task context.
:param event: The event yielded by SFTPTrigger.
:return: The local filepath(s).
"""
if event.get("status") == "error":
raise AirflowException(
f"Error during deferrable SFTP {self.operation.upper()} operation: {event.get('message')}"
)
self.log.info("File transfer completed successfully via deferrable mode.")
return event.get("local_filepath")

@staticmethod
def _is_missing_path_error(exc: Exception) -> bool:
if isinstance(exc, FileNotFoundError):
Expand Down Expand Up @@ -316,4 +294,4 @@ def get_openlineage_facets_on_start(self):
def _get_namespace(self, scheme, host, port, path) -> str:
port = port or paramiko.config.SSH_PORT
authority = f"{host}:{port}"
return f"{scheme}://{authority}"
return f"{scheme}://{authority}"
Loading
Loading