-
Notifications
You must be signed in to change notification settings - Fork 201
Lazily load spooled result set segments #597
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -39,6 +39,7 @@ | |
| import base64 | ||
| import copy | ||
| import functools | ||
| import itertools | ||
| import os | ||
| import random | ||
| import re | ||
|
|
@@ -904,9 +905,26 @@ def execute(self, additional_http_headers=None) -> TrinoResult: | |
| rows = self._row_mapper.map(status.rows) if self._row_mapper else status.rows | ||
| self._result = TrinoResult(self, rows) | ||
|
|
||
| # Execute should block until at least one row is received or query is finished or cancelled | ||
| while not self.finished and not self.cancelled and len(self._result.rows) == 0: | ||
| self._result.rows += self.fetch() | ||
| # Block until rows are available, the query finishes, or it is canceled. | ||
| # Rows start as an empty list. Early responses often contain only stats, | ||
| # so we keep fetching until actual data arrives. | ||
| # | ||
| # Two protocols produce rows differently: | ||
| # - Direct: fetch() returns a list - accumulate into the existing list. | ||
| # - Spooling: fetch() returns a lazy iterator - replace rows and stop, | ||
| # because we cannot cheaply check iterator length. | ||
|
wendigo marked this conversation as resolved.
|
||
| while not self.finished and not self.cancelled and self._result.rows == []: | ||
| new_rows = self.fetch() | ||
| if isinstance(new_rows, list): | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. do we also need this check in
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why? I don't think we do
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
| self._result.rows += new_rows | ||
| else: | ||
| try: | ||
| first_row = next(new_rows) | ||
| self._result.rows = itertools.chain([first_row], new_rows) | ||
| break | ||
| except StopIteration: | ||
| self._result.rows = [] | ||
|
|
||
| return self._result | ||
|
|
||
| def _update_state(self, status): | ||
|
|
@@ -920,7 +938,7 @@ def _update_state(self, status): | |
| if status.columns: | ||
| self._columns = status.columns | ||
|
|
||
| def fetch(self) -> List[Union[List[Any]], Any]: | ||
| def fetch(self) -> Union[List[Union[List[Any], Any]], Iterator[List[Any]]]: | ||
| """Continue fetching data for the current query_id""" | ||
| try: | ||
| response = self._request.get(self._request.next_uri) | ||
|
|
@@ -941,7 +959,8 @@ def fetch(self) -> List[Union[List[Any]], Any]: | |
| spooled = self._to_segments(rows) | ||
| if self._fetch_mode == "segments": | ||
| return spooled | ||
| return list(SegmentIterator(spooled, self._row_mapper)) | ||
| # Return iterator directly, do NOT materialize with list() | ||
| return SegmentIterator(spooled, self._row_mapper) | ||
| elif isinstance(status.rows, list): | ||
| return self._row_mapper.map(rows) | ||
| else: | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.