snowflakedb · sfc-gh-olorek · May 8, 2026
@@ -26,6 +26,7 @@
 * Fixed `SELECT *` output being corrupted when joined tables share column names. Duplicate column names are now disambiguated by appending a numeric suffix (e.g. `NAME`, `NAME_2`).
 * Fixed `snow connection generate-jwt` and `snow connection generate-workload-identity-token` failing with `Connection None is not configured` when used with `--temporary-connection`.
 * The internal connection cache now remembers failed connect attempts and re-raises the original exception on subsequent accesses within the same process, instead of re-dialing Snowflake every time a command accesses the shared connection. This fixes, among other cases, the customer-visible duplicate `LOGIN_HISTORY` events (and `OVERFLOW_FAILURE_EVENTS_ELIDED`) previously emitted when a `snow` invocation was rejected by an authentication policy.
+* `snow sql -f <file>` and the `!source <file>` include directive now honor a UTF-8 / UTF-16 / UTF-32 byte-order mark at the start of a SQL file and decode it using the matching encoding. This fixes `snow sql` on Windows when the input was produced by a PowerShell `>` redirect (which writes UTF-16 LE with a BOM) or by an editor that prepends a UTF-8 BOM; previously these files crashed with `UnicodeDecodeError` or leaked U+FEFF into the first statement.
 
 
 # v3.17.0

@@ -1,3 +1,4 @@
+import codecs
 import enum
 import io
 import re
@@ -24,6 +25,35 @@
 
 ASYNC_SUFFIX = ";>"
 
+# Byte-order marks checked in descending length so that the 4-byte UTF-32 marks
+# are matched before the 2-byte UTF-16 marks they start with.
+_BOM_ENCODINGS: tuple[tuple[bytes, str], ...] = (
+    (codecs.BOM_UTF32_LE, "utf-32"),
+    (codecs.BOM_UTF32_BE, "utf-32"),
+    (codecs.BOM_UTF8, "utf-8-sig"),
+    (codecs.BOM_UTF16_LE, "utf-16"),
+    (codecs.BOM_UTF16_BE, "utf-16"),
+)
+
+
+def _read_sql_file(path: SecurePath) -> str:
+    """Read a SQL text file, honoring a UTF-8/16/32 BOM if present.
+
+    PowerShell's ``>`` redirect and many Windows editors write files with a
+    byte-order mark. Without BOM handling those files either crash with a
+    UnicodeDecodeError (UTF-16) or leak a stray U+FEFF into the first
+    statement (UTF-8 BOM). When no BOM is present we fall back to UTF-8,
+    matching how the CLI writes files elsewhere.
+    """
+    with path.open("rb", read_file_limit_mb=UNLIMITED) as f:
+        raw = f.read()
+    for bom, encoding in _BOM_ENCODINGS:
+        if raw.startswith(bom):
+            # ``utf-16``/``utf-32`` consume the BOM themselves; ``utf-8-sig``
+            # strips the UTF-8 BOM. Decode the full payload in each case.
+            return raw.decode(encoding)
+    return raw.decode("utf-8")
+
 
 # Regex that recognises SQL tokens whose contents must not be scanned for
 # comment syntax.  Alternatives are tried left-to-right:
@@ -214,7 +244,7 @@ def from_file(cls, path_part: str, raw_source: str) -> "ParsedStatement":
         path = SecurePath(stripped_comments_path_part)
 
         if path.is_file():
-            payload = path.read_text(file_size_limit_mb=UNLIMITED)
+            payload = _read_sql_file(path)
             return cls(payload, StatementType.FILE, path.as_posix())
 
         error_msg = f"Could not read: {path_part}"
@@ -334,18 +364,17 @@ def files_reader(
 
     Returns a generator with statements."""
     for path in paths:
-        with path.open(read_file_limit_mb=UNLIMITED) as f:
-            content = f.read()
-            if pre_render:
-                content = pre_render(content)
-            stmts = split_statements(io.StringIO(content), remove_comments)
-            yield from recursive_statement_reader(
-                stmts,
-                [path.as_posix()],
-                operators,
-                remove_comments,
-                pre_render,
-            )
+        content = _read_sql_file(path)
+        if pre_render:
+            content = pre_render(content)
+        stmts = split_statements(io.StringIO(content), remove_comments)
+        yield from recursive_statement_reader(
+            stmts,
+            [path.as_posix()],
+            operators,
+            remove_comments,
+            pre_render,
+        )
 
 
 def query_reader(

@@ -1,3 +1,4 @@
+import codecs
 from functools import partial
 
 import pytest
@@ -9,6 +10,7 @@
     ParsedStatement,
     StatementType,
     _protect_sql_comments,
+    _read_sql_file,
     compile_statements,
     files_reader,
     parse_statement,
@@ -541,3 +543,94 @@ def test_protect_comments_roundtrip_through_jinja():
     )
     restored = saved.restore(rendered)
     assert restored == "-- {{ not_a_var }}\nSELECT /* {{ also_not }} */ 1 WHERE x = 42;"
+
+
+# ---------------------------------------------------------------------------
+# BOM / encoding tests — regression coverage for SNOW-1528909
+#
+# PowerShell's ``>`` redirect writes UTF-16 LE by default, and many Windows
+# editors add a UTF-8 BOM. Without BOM handling the CLI either crashes with a
+# UnicodeDecodeError on UTF-16 or leaks a U+FEFF into the first statement.
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.parametrize(
+    ("encoding", "bom"),
+    [
+        # BOM-emitting codec names — Python appends the BOM itself.
+        ("utf-16", b""),
+        ("utf-8-sig", b""),
+        ("utf-32", b""),
+        # No-BOM codec names — we prepend the BOM manually to mimic what
+        # PowerShell / Notepad actually write on Windows.
+        ("utf-16-le", codecs.BOM_UTF16_LE),
+        ("utf-16-be", codecs.BOM_UTF16_BE),
+        ("utf-32-le", codecs.BOM_UTF32_LE),
+        ("utf-32-be", codecs.BOM_UTF32_BE),
+    ],
+)
+def test_read_sql_file_decodes_bom_prefixed_file(
+    tmp_path_factory: pytest.TempPathFactory, encoding, bom
+):
+    f = tmp_path_factory.mktemp("a") / "f.sql"
+    # Non-ASCII content ensures a naive UTF-8 decode of a UTF-16/UTF-32 file
+    # would raise UnicodeDecodeError — before the fix this was exactly the
+    # failure mode reported in the issue for PowerShell-redirected files.
+    text = "-- コメント\nselect 1;"
+    f.write_bytes(bom + text.encode(encoding))
+
+    assert _read_sql_file(SecurePath(f)) == text
+
+
+def test_read_sql_file_utf8_without_bom(tmp_path_factory: pytest.TempPathFactory):
+    # Regression guard: plain UTF-8 (the common case) stays on the fast path.
+    f = tmp_path_factory.mktemp("a") / "f.sql"
+    text = "-- ascii only\nselect 3;"
+    f.write_bytes(text.encode("utf-8"))
+
+    assert _read_sql_file(SecurePath(f)) == text
+
+
+def test_read_sql_file_strips_utf8_bom(tmp_path_factory: pytest.TempPathFactory):
+    # If the UTF-8 BOM leaked through, the first statement would start with
+    # U+FEFF and Snowflake would reject it as a syntax error.
+    f = tmp_path_factory.mktemp("a") / "f.sql"
+    f.write_bytes(b"\xef\xbb\xbfselect 1;")
+
+    result = _read_sql_file(SecurePath(f))
+
+    assert result == "select 1;"
+    assert "" not in result
+
+
+@pytest.mark.parametrize("encoding", ["utf-16", "utf-8-sig"])
+def test_files_reader_decodes_bom_prefixed_sql(
+    tmp_path_factory: pytest.TempPathFactory, encoding
+):
+    f = tmp_path_factory.mktemp("a") / "f.sql"
+    f.write_bytes("select 1;".encode(encoding))
+
+    errors, cnt, compiled = compile_statements(
+        files_reader((SecurePath(f),), WORKING_OPERATOR_FUNCS),
+    )
+
+    assert not errors, errors
+    assert cnt == 1
+    assert compiled == [CompiledStatement(statement="select 1;")]
+
+
+@pytest.mark.parametrize("encoding", ["utf-16", "utf-8-sig"])
+def test_source_directive_decodes_bom_prefixed_file(
+    tmp_path_factory: pytest.TempPathFactory, encoding
+):
+    # ``!source <file>`` goes through ParsedStatement.from_file, a different
+    # code path from files_reader. Both must honor BOMs.
+    included = tmp_path_factory.mktemp("a") / "included.sql"
+    included.write_bytes("select 2;\n".encode(encoding))
+
+    query = f"!source {included.as_posix()};"
+    source = parse_statement(query, WORKING_OPERATOR_FUNCS)
+
+    assert source.error is None
+    assert source.statement_type == StatementType.FILE
+    assert source.statement.read() == "select 2;\n"