Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions src/snowflake/snowpark/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -7027,6 +7027,33 @@ def print_schema(self, level: Optional[int] = None) -> None:
# naturalJoin = natural_join
# withColumns = with_columns

def pipe(self, f, *args, **kwargs):
Comment thread
sfc-gh-joshi marked this conversation as resolved.
Outdated
"""
Applies a function to the DataFrame and returns the result.

Args:
f: A function to apply to the DataFrame.
*args: Additional positional arguments to pass to the function.
**kwargs: Additional keyword arguments to pass to the function.

Returns:
The result of applying the function to the DataFrame.

Examples::
>>> df = session.create_dataframe([(1, "a"), (2, "b")], schema=["col1", "col2"])
>>> def add_column(df):
... return df.with_column("col3", lit(3))
>>> new_df = df.pipe(add_column)
>>> new_df.show()
--------------------
|"COL1" |"COL2" |"COL3" |
--------------------
|1 |a |3 |
|2 |b |3 |
--------------------
Comment thread
sfc-gh-joshi marked this conversation as resolved.
Outdated
"""
return f(self, *args, **kwargs)


def map(
dataframe: DataFrame,
Expand Down
18 changes: 18 additions & 0 deletions tests/unit/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -409,6 +409,24 @@ def test_dataFrame_printSchema(capfd, mock_server_connection):
)


def test_dataFrame_pipe(mock_server_connection):
Comment thread
Tijoxa marked this conversation as resolved.
Outdated
session = snowflake.snowpark.session.Session(mock_server_connection)
df = session.create_dataframe([[1, ""], [3, None]])
df._plan._metadata = PlanMetadata(
attributes=[
Attribute("A", IntegerType(), False),
Attribute("B", StringType()),
],
quoted_identifiers=None,
)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like you copied this example from test_dataFrame_printSchema in the same file, but we should avoid modifying internal dataframe attributes during tests whenever possible. I don't think this is necessary for this test regardless.

Also, please modify test_func to actually do something to the dataframe, since right now it's a no-op and not really a meaningful test.

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

With the new commits, I've updated the tests and tried to put the test at the correct place. However, I don't know if I should put it under tests/unit/test_dataframe.py, tests/integ/test_dataframe.py or any other directory.


def test_func(df):
return df

result_df, expected_result = df.pipe(test_func), test_func(df)
assert result_df == expected_result


def test_session():
fake_session = mock.create_autospec(Session, _session_id=123456)
fake_session._analyzer = mock.Mock()
Expand Down
Loading