Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions RELEASE-NOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
* Fixed `SELECT *` output being corrupted when joined tables share column names. Duplicate column names are now disambiguated by appending a numeric suffix (e.g. `NAME`, `NAME_2`).
* Fixed `snow connection generate-jwt` and `snow connection generate-workload-identity-token` failing with `Connection None is not configured` when used with `--temporary-connection`.
* The internal connection cache now remembers failed connect attempts and re-raises the original exception on subsequent accesses within the same process, instead of re-dialing Snowflake every time a command accesses the shared connection. This fixes, among other cases, the customer-visible duplicate `LOGIN_HISTORY` events (and `OVERFLOW_FAILURE_EVENTS_ELIDED`) previously emitted when a `snow` invocation was rejected by an authentication policy.
* `snow sql -f` and the `!source` directive now read SQL files as UTF-8 regardless of the process default text encoding, instead of relying on the platform's locale. This fixes `UnicodeDecodeError` crashes when reading UTF-8 SQL files on systems whose default encoding is not UTF-8 (for example Japanese Windows, which defaults to cp932).


# v3.17.0
Expand Down
4 changes: 2 additions & 2 deletions src/snowflake/cli/_plugins/sql/statement_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ def from_file(cls, path_part: str, raw_source: str) -> "ParsedStatement":
path = SecurePath(stripped_comments_path_part)

if path.is_file():
payload = path.read_text(file_size_limit_mb=UNLIMITED)
payload = path.read_text(file_size_limit_mb=UNLIMITED, encoding="utf-8")
return cls(payload, StatementType.FILE, path.as_posix())

error_msg = f"Could not read: {path_part}"
Expand Down Expand Up @@ -334,7 +334,7 @@ def files_reader(

Returns a generator with statements."""
for path in paths:
with path.open(read_file_limit_mb=UNLIMITED) as f:
with path.open(read_file_limit_mb=UNLIMITED, encoding="utf-8") as f:
content = f.read()
if pre_render:
content = pre_render(content)
Expand Down
65 changes: 65 additions & 0 deletions tests/sql/test_statement_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,71 @@ def test_read_files(tmp_path_factory: pytest.TempPathFactory):
]


def _force_default_encoding(monkeypatch, encoding: str) -> None:
"""Make pathlib.Path.open / read_text default to *encoding* in text mode.

Mirrors what happens on a system whose default text encoding is not UTF-8
(e.g. Japanese Windows where ``locale.getencoding() == 'cp932'``).
Monkeypatching ``locale.getpreferredencoding`` alone is not enough, because
pathlib resolves the default via ``io.text_encoding`` which consults the
C-level locale — we have to inject the encoding at the open() call site.
"""
import pathlib

original_open = pathlib.Path.open

def patched_open(self, mode="r", *args, **kwargs):
if "b" not in mode and kwargs.get("encoding") in (None, "locale"):
# "locale" is the sentinel io.text_encoding returns when the caller
# did not specify an encoding; on Japanese Windows open() would
# resolve it to cp932. Override it so this box behaves the same.
kwargs["encoding"] = encoding
return original_open(self, mode, *args, **kwargs)

monkeypatch.setattr(pathlib.Path, "open", patched_open)


def test_read_utf8_file_on_non_utf8_locale(
tmp_path_factory: pytest.TempPathFactory, monkeypatch
):
"""UTF-8 SQL files must be readable regardless of the process default encoding.

Regression test for https://github.com/snowflakedb/snowflake-cli/issues/2759
where Japanese Windows defaults to cp932 and non-ASCII characters in UTF-8
SQL files crash with UnicodeDecodeError.
"""
f1 = tmp_path_factory.mktemp("utf8") / "f1.sql"
f1.write_bytes("-- コメント\nselect 1;".encode("utf-8"))

_force_default_encoding(monkeypatch, "cp932")

files = (SecurePath(f1),)
errors, cnt, compiled = compile_statements(
files_reader(files, WORKING_OPERATOR_FUNCS, remove_comments=True),
)

assert not errors, errors
assert cnt == 1
assert compiled == [CompiledStatement(statement="select 1;")]


def test_source_utf8_file_on_non_utf8_locale(
tmp_path_factory: pytest.TempPathFactory, monkeypatch
):
"""!source must also read sourced SQL files as UTF-8 on non-UTF-8 locales."""
sourced = tmp_path_factory.mktemp("utf8_src") / "sourced.sql"
sourced.write_bytes("-- 日本語\nselect 42;".encode("utf-8"))

_force_default_encoding(monkeypatch, "cp932")

query = f"!source {sourced.as_posix()};"
source = parse_statement(query, WORKING_OPERATOR_FUNCS)

assert source.statement_type == StatementType.FILE
assert source.error is None
assert "select 42;" in source.statement.read()


def test_parsed_source_repr():
query = "select 1;"

Expand Down
Loading