diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md index 609d0e39a9..197f34d532 100644 --- a/RELEASE-NOTES.md +++ b/RELEASE-NOTES.md @@ -26,6 +26,7 @@ * Fixed `SELECT *` output being corrupted when joined tables share column names. Duplicate column names are now disambiguated by appending a numeric suffix (e.g. `NAME`, `NAME_2`). * Fixed `snow connection generate-jwt` and `snow connection generate-workload-identity-token` failing with `Connection None is not configured` when used with `--temporary-connection`. * The internal connection cache now remembers failed connect attempts and re-raises the original exception on subsequent accesses within the same process, instead of re-dialing Snowflake every time a command accesses the shared connection. This fixes, among other cases, the customer-visible duplicate `LOGIN_HISTORY` events (and `OVERFLOW_FAILURE_EVENTS_ELIDED`) previously emitted when a `snow` invocation was rejected by an authentication policy. +* `snow sql -f` and the `!source` directive now read SQL files as UTF-8 regardless of the process default text encoding, instead of relying on the platform's locale. This fixes `UnicodeDecodeError` crashes when reading UTF-8 SQL files on systems whose default encoding is not UTF-8 (for example Japanese Windows, which defaults to cp932). # v3.17.0 diff --git a/src/snowflake/cli/_plugins/sql/statement_reader.py b/src/snowflake/cli/_plugins/sql/statement_reader.py index 2a72d98b46..27af732547 100644 --- a/src/snowflake/cli/_plugins/sql/statement_reader.py +++ b/src/snowflake/cli/_plugins/sql/statement_reader.py @@ -214,7 +214,7 @@ def from_file(cls, path_part: str, raw_source: str) -> "ParsedStatement": path = SecurePath(stripped_comments_path_part) if path.is_file(): - payload = path.read_text(file_size_limit_mb=UNLIMITED) + payload = path.read_text(file_size_limit_mb=UNLIMITED, encoding="utf-8") return cls(payload, StatementType.FILE, path.as_posix()) error_msg = f"Could not read: {path_part}" @@ -334,7 +334,7 @@ def files_reader( Returns a generator with statements.""" for path in paths: - with path.open(read_file_limit_mb=UNLIMITED) as f: + with path.open(read_file_limit_mb=UNLIMITED, encoding="utf-8") as f: content = f.read() if pre_render: content = pre_render(content) diff --git a/tests/sql/test_statement_reader.py b/tests/sql/test_statement_reader.py index 1ffd413634..8eb9625a3d 100644 --- a/tests/sql/test_statement_reader.py +++ b/tests/sql/test_statement_reader.py @@ -184,6 +184,71 @@ def test_read_files(tmp_path_factory: pytest.TempPathFactory): ] +def _force_default_encoding(monkeypatch, encoding: str) -> None: + """Make pathlib.Path.open / read_text default to *encoding* in text mode. + + Mirrors what happens on a system whose default text encoding is not UTF-8 + (e.g. Japanese Windows where ``locale.getencoding() == 'cp932'``). + Monkeypatching ``locale.getpreferredencoding`` alone is not enough, because + pathlib resolves the default via ``io.text_encoding`` which consults the + C-level locale — we have to inject the encoding at the open() call site. + """ + import pathlib + + original_open = pathlib.Path.open + + def patched_open(self, mode="r", *args, **kwargs): + if "b" not in mode and kwargs.get("encoding") in (None, "locale"): + # "locale" is the sentinel io.text_encoding returns when the caller + # did not specify an encoding; on Japanese Windows open() would + # resolve it to cp932. Override it so this box behaves the same. + kwargs["encoding"] = encoding + return original_open(self, mode, *args, **kwargs) + + monkeypatch.setattr(pathlib.Path, "open", patched_open) + + +def test_read_utf8_file_on_non_utf8_locale( + tmp_path_factory: pytest.TempPathFactory, monkeypatch +): + """UTF-8 SQL files must be readable regardless of the process default encoding. + + Regression test for https://github.com/snowflakedb/snowflake-cli/issues/2759 + where Japanese Windows defaults to cp932 and non-ASCII characters in UTF-8 + SQL files crash with UnicodeDecodeError. + """ + f1 = tmp_path_factory.mktemp("utf8") / "f1.sql" + f1.write_bytes("-- コメント\nselect 1;".encode("utf-8")) + + _force_default_encoding(monkeypatch, "cp932") + + files = (SecurePath(f1),) + errors, cnt, compiled = compile_statements( + files_reader(files, WORKING_OPERATOR_FUNCS, remove_comments=True), + ) + + assert not errors, errors + assert cnt == 1 + assert compiled == [CompiledStatement(statement="select 1;")] + + +def test_source_utf8_file_on_non_utf8_locale( + tmp_path_factory: pytest.TempPathFactory, monkeypatch +): + """!source must also read sourced SQL files as UTF-8 on non-UTF-8 locales.""" + sourced = tmp_path_factory.mktemp("utf8_src") / "sourced.sql" + sourced.write_bytes("-- 日本語\nselect 42;".encode("utf-8")) + + _force_default_encoding(monkeypatch, "cp932") + + query = f"!source {sourced.as_posix()};" + source = parse_statement(query, WORKING_OPERATOR_FUNCS) + + assert source.statement_type == StatementType.FILE + assert source.error is None + assert "select 42;" in source.statement.read() + + def test_parsed_source_repr(): query = "select 1;"