diff --git a/ibis/_tstring.py b/ibis/_tstring.py new file mode 100644 index 000000000000..b57c203fa8c4 --- /dev/null +++ b/ibis/_tstring.py @@ -0,0 +1,249 @@ +"""A Backport of PEP 750 Template Strings (t-strings).""" + +from __future__ import annotations + +import re +import sys +from dataclasses import dataclass +from itertools import zip_longest +from typing import TYPE_CHECKING, Literal, NoReturn, Protocol, runtime_checkable + +if TYPE_CHECKING: + from collections.abc import Iterator + +__all__ = [ + "Interpolation", + "Template", + "t", +] + +# Regex to find and parse an f-string-like interpolation. +# It captures: +# 1. The main expression. +# 2. An optional debug specifier (=). +# 3. An optional conversion specifier (!r, !s, or !a). +# 4. An optional format specifier (:...). +_INTERPOLATION_RE = re.compile( + r""" + \{ + # The core expression, non-greedy + (?P.+?) + # Optional debug specifier + (?P=)? + # Optional conversion, one of !r, !s, or !a + (?P![rsa])? + # Optional format spec, starting with a colon, non-greedy until } + (?P:[^}]*)? + } + """, + re.VERBOSE | re.DOTALL, +) + +if sys.version_info >= (3, 10): + dataclass_extra_args = {"slots": True} +else: + dataclass_extra_args = {} + + +@runtime_checkable +class PInterpolation(Protocol): + """Protocol for an object that can be interpreted as a PEP 750 t-string Interpolation.""" + + @property + def value(self) -> object: ... + @property + def expression(self) -> str: ... + @property + def conversion(self) -> Literal["a", "r", "s"] | None: ... + @property + def format_spec(self) -> str: ... + + +@dataclass(frozen=True, eq=False, **dataclass_extra_args) +class Interpolation: + """Emulates the string.templatelib.Interpolation class from PEP 750. + + Represents an expression inside a template string. + """ + + value: object + expression: str + conversion: Literal["a", "r", "s"] | None = None + format_spec: str = "" + + def __eq__(self, value: object) -> bool: + """Template and Interpolation instances compare with object identity (is).""" + return self is value + + def __hash__(self) -> int: + """Hash based on identity.""" + return id(self) + + +@runtime_checkable +class PTemplate(Protocol): + """Protocol for an object that can be interpreted as a PEP 750 t-string Interpolation.""" + + @property + def strings(self) -> tuple[str, ...]: ... + @property + def interpolations(self) -> tuple[PInterpolation, ...]: ... + + +@dataclass(frozen=True, eq=False, **dataclass_extra_args) +class Template: + """Emulates the string.templatelib.Template class from PEP 750. + + Represents a parsed t-string literal. + """ + + strings: tuple[str, ...] + """ + A non-empty tuple of the string parts of the template, + with N+1 items, where N is the number of interpolations + in the template. + """ + interpolations: tuple[Interpolation, ...] + """ + A tuple of the interpolation parts of the template. + This will be an empty tuple if there are no interpolations. + """ + + @property + def values(self) -> tuple[object, ...]: + """A tuple of the `value` attributes of each Interpolation in the template. + + This will be an empty tuple if there are no interpolations. + """ + return tuple(interp.value for interp in self.interpolations) + + def __iter__(self) -> Iterator[str | Interpolation]: + """Iterate over the string parts and interpolations in the template. + + These may appear in any order. Empty strings will not be included. + """ + for s, i in zip_longest(self.strings, self.interpolations): + if s: + yield s + if i: + yield i + + def __add__(self, other: Template) -> Template: + """Adds two templates together.""" + # lazy duck-typing isinstance check + if not hasattr(other, "strings") or not hasattr(other, "interpolations"): + return NotImplemented + *first, final = self.strings + other_first, *other_rest = other.strings + return self.__class__( + strings=(*first, final + other_first, *other_rest), + interpolations=self.interpolations + other.interpolations, + ) + + def __eq__(self, value: object) -> bool: + """Template and Interpolation instances compare with object identity (is).""" + return self is value + + def __hash__(self) -> int: + """Hash based on identity.""" + return id(self) + + def __str__(self) -> NoReturn: + """Explicitly disallowed.""" + raise TypeError("Template instances cannot be converted to strings directly.") + + +def t(template_string: str, /, frame: int | None = None) -> Template: + # Get the execution frame of the caller to evaluate expressions in their scope. + # sys._getframe(0) is the current frame + # sys._getframe(1) is the frame of the caller + # So if we called t("foo"), we want the frame where t("foo") was called, eg one above this + # If we called t("foo", frame=1), we want 1 frame above where t("foo", frame=1) was called, + # which is ACTUALLY frame=2 in this function + if frame is None: + frame = 1 + else: + frame = frame + 1 + caller_frame = sys._getframe(frame) + caller_globals = caller_frame.f_globals + caller_locals = caller_frame.f_locals + + strings = [] + interpolations = [] + last_end = 0 + + for match in _INTERPOLATION_RE.finditer(template_string): + # Add the static string part before this interpolation + strings.append(template_string[last_end : match.start()]) + last_end = match.end() + + groups = match.groupdict() + + # The debug specifier is syntactic sugar. It modifies both the + # preceding string part and the interpolation itself. + if groups["debug"]: + # t'{value=}' becomes t'value={value!r}' + # t'{value=:fmt}' becomes t'value={value!s:fmt}' + + # Find the position of the '=' in the original match string + # so we can split the expression and the '=' (with whitespace) + expr_with_possible_ws = groups["expression"] + # Find the '=' at the end (possibly with whitespace before/after) + eq_index = expr_with_possible_ws.rfind("=") + if eq_index != -1: + expr_for_static = expr_with_possible_ws[: eq_index + 1] + # Remove trailing whitespace and the '=' for evaluation + expr_for_eval = expr_with_possible_ws[:eq_index] + # Strip all whitespace from both ends for evaluation + expr_for_eval = expr_for_eval.strip() + # Remove any trailing '=' if present (shouldn't be, but for safety) + if expr_for_eval.endswith("="): + expr_for_eval = expr_for_eval[:-1].rstrip() + else: + expr_for_static = expr_with_possible_ws + "=" + expr_for_eval = expr_with_possible_ws.strip() + + # Prepend 'expression=' (with whitespace) to the *current* static string. + strings[-1] += expr_for_static + + # For debug specifier, strip trailing '=' and whitespace for evaluation + # (already done above) + + if groups["conversion"]: + raise SyntaxError("f-string: cannot specify both conversion and '='") + + # If a format spec is present, conversion becomes 's'. Otherwise, 'r'. + conv_char = "s" if groups["format_spec"] else "r" + expression_to_eval = expr_for_eval + else: + conv_char = groups["conversion"][1] if groups["conversion"] else None + expression_to_eval = groups["expression"] + + fmt_spec = groups["format_spec"][1:] if groups["format_spec"] else "" + + # Dedent multiline expressions for evaluation + import textwrap + + expr_eval_str = textwrap.dedent(expression_to_eval) + + # Evaluate the expression to get its value using the caller's context + try: + value = eval(expr_eval_str, caller_globals, caller_locals) # noqa: S307 + except Exception as e: + # Re-raise with more context + msg = f"Failed to evaluate expression '{expression_to_eval}': {e}" + raise RuntimeError(msg) from e + + interpolations.append( + Interpolation( + value=value, + expression=expression_to_eval, + conversion=conv_char, + format_spec=fmt_spec, + ) + ) + + # Add the final static string part after the last interpolation + strings.append(template_string[last_end:]) + + return Template(strings=tuple(strings), interpolations=tuple(interpolations)) diff --git a/ibis/backends/sql/compilers/base.py b/ibis/backends/sql/compilers/base.py index 030f8cdefa44..b2aed7525b97 100644 --- a/ibis/backends/sql/compilers/base.py +++ b/ibis/backends/sql/compilers/base.py @@ -1608,6 +1608,28 @@ def visit_DropColumns(self, op, *, parent, columns_to_drop): ) return sg.select(*columns_to_keep).from_(parent) + def visit_TemplateSQLValue( + self, + op: ops.TemplateSQLValue, + *, + strings: tuple[str], + values: tuple[sge.Expression], + dialect: str, + dtype: dt.DataType, + ): + def iter(): + for s, i in itertools.zip_longest(strings, values): + if s: + yield s + if i: + yield i + + str_parts = [ + part if isinstance(part, str) else part.sql(dialect) for part in iter() + ] + sql = "".join(str_parts) + return sg.parse_one(sql, read=dialect) + def add_query_to_expr(self, *, name: str, table: ir.Table, query: str) -> str: dialect = self.dialect diff --git a/ibis/backends/sqlite/converter.py b/ibis/backends/sqlite/converter.py index 63516dc591a4..2bdc042a3c1e 100644 --- a/ibis/backends/sqlite/converter.py +++ b/ibis/backends/sqlite/converter.py @@ -1,13 +1,9 @@ from __future__ import annotations -import pandas as pd from packaging.version import parse as vparse from ibis.formats.pandas import PandasData -# The "mixed" format was added in pandas 2 -_DATETIME_FORMAT = "mixed" if vparse(pd.__version__) >= vparse("2.0.0") else None - class SQLitePandasData(PandasData): @classmethod @@ -20,5 +16,8 @@ def convert_Timestamp(cls, s, dtype, pandas_type): try: return super().convert_Timestamp(s, dtype, pandas_type) except ValueError: + import pandas as pd + # Parsing failed, try a more relaxed parser - return pd.to_datetime(s, format=_DATETIME_FORMAT, utc=True) + format = "mixed" if vparse(pd.__version__) >= vparse("2.0.0") else None + return pd.to_datetime(s, format=format, utc=True) diff --git a/ibis/backends/tests/test_template.py b/ibis/backends/tests/test_template.py new file mode 100644 index 000000000000..2d8ac4a4d846 --- /dev/null +++ b/ibis/backends/tests/test_template.py @@ -0,0 +1,232 @@ +from __future__ import annotations + +import contextlib + +import pytest + +import ibis +from ibis.common import exceptions as exc +from ibis.expr import datatypes as dt +from ibis.tstring import t + +five = ibis.literal(5) +world = ibis.literal("world") + + +@contextlib.contextmanager +def set_default_backend(backend: str): + """Context manager to set the default backend temporarily. + + eg + with set_default_backend('duckdb'): + ... + """ + original = ibis.get_backend() + ibis.set_backend(backend) + try: + yield + finally: + ibis.set_backend(original) + + +@pytest.mark.notimpl(["polars"]) +@pytest.mark.parametrize( + ("template", "expected_result"), + [ + (t("{five} + 3"), 8), + (t("{five:.2f} + 3"), 8), # format strings ignored + (t("'hello ' || {world}"), "hello world"), + (t("'hello ' || {world!r}"), "hello world"), # conversion strings ignored + ], +) +def test_scalar(con, template, expected_result): + """Test that scalar template expressions execute correctly.""" + expr = ibis.sql_value(template) + result = con.execute(expr) + assert result == expected_result + + +@pytest.mark.parametrize( + "typ", + [None, "timestamp('America/Anchorage')"], +) +def test_uninferrable_dtype(typ): + """Test behavior when a template's dtype can't be inferred using sqlglot""" + # parse a UTC timestamp into alaska local time, eg "8/1/2024 21:44:00" into 2024-08-01 13:44:00 (8 hours before UTC). + con = ibis.duckdb.connect() + timestamp = ibis.timestamp("2024-08-01 21:44:00") # noqa: F841 + template = t("{timestamp} AT TIME ZONE 'UTC' AT TIME ZONE 'America/Anchorage'") + val = ibis.sql_value(template, type=typ) + if typ is None: + assert val.type().is_unknown() + else: + assert val.type() == ibis.dtype(typ) + + # Still, even if the type couldn't be inferred, we can still cast it to string later + # and everything works. + in_ak_string = val.cast(str).name("in_ak_time") + assert isinstance(in_ak_string, ibis.ir.StringScalar) + + expected_sql = '''SELECT + CAST(MAKE_TIMESTAMP(2024, 8, 1, 21, 44, 0.0) AT TIME ZONE 'UTC' AT TIME ZONE 'America/Anchorage' AS TEXT) AS "in_ak_time"''' + actual_sql = in_ak_string.to_sql() + assert actual_sql == expected_sql + + result = con.execute(in_ak_string) + expected = "2024-08-01 13:44:00" + assert result == expected + + +@pytest.mark.notimpl(["polars"]) +def test_column(con, alltypes, backend): + """Test template with column interpolation.""" + c = alltypes.int_col # noqa: F841 + template = t("{c + 2} - 1") + expr = ibis.sql_value(template) + assert isinstance(expr, ibis.ir.IntegerColumn) + assert expr.type() == dt.int64 + result = con.execute(expr) + expected = con.execute(alltypes.int_col + 1) + backend.assert_series_equal(result, expected, check_names=False) + + +@pytest.mark.notimpl(["polars"]) +def test_deferred(con, alltypes, backend): + """Test template with column interpolation.""" + i = ibis._.int_col # noqa: F841 + template = t("{i + 2} - 1") + expr = ibis.sql_value(template) + assert isinstance(expr, ibis.Deferred) + with pytest.raises(TypeError): + # We can't execute a Deferred directly, we need to bind it to the table first + con.execute(expr) + (bound,) = alltypes.bind(expr) + result = con.execute(bound) + expected = con.execute(alltypes.int_col + 1) + backend.assert_series_equal(result, expected, check_names=False) + + +@pytest.mark.notimpl(["polars"]) +def test_direct_select(con, alltypes, backend): + """Test template with column interpolation.""" + i = ibis._.int_col # noqa: F841 + five = 5 # noqa: F841 + selected = alltypes.select( + scalar=t("{five} - 1"), + col=t("{alltypes.int_col + 2} - 1"), + deferred=t("cast({i + 2} as varchar)"), + ) + expected = alltypes.select( + scalar=ibis.literal(4).cast("int32"), + col=(alltypes.int_col + 1).cast("int64"), + deferred=(alltypes.int_col + 2).cast("string"), + ) + actual_schema = selected.schema() + expected_schema = expected.schema() + assert expected_schema == actual_schema + result = con.execute(selected) + expected_result = con.execute(expected) + backend.assert_frame_equal(result, expected_result) + + +def test_sqlite_template_correctly_executed_on_duckdb(): + pa = pytest.importorskip("pyarrow") + five = ibis.literal(5) # noqa: F841 + template = t("CAST({five} AS REAL)") + + expr_sqlite = ibis.sql_value(template, dialect="sqlite") + expr_default = ibis.sql_value(template) + + con_sqlite = ibis.sqlite.connect() + result = con_sqlite.to_pyarrow(expr_default) + assert result.type == pa.float32() + assert result.as_py() == 5.0 + result = con_sqlite.to_pyarrow(expr_sqlite) + assert result.type == pa.float64() + assert result.as_py() == 5.0 + + con_duckdb = ibis.duckdb.connect() + result = con_duckdb.to_pyarrow(expr_default) + assert result.type == pa.float32() + assert result.as_py() == 5.0 + result = con_duckdb.to_pyarrow(expr_sqlite) + assert result.type == pa.float64() + assert result.as_py() == 5.0 + + +@pytest.mark.parametrize( + "template_dialect,dialect_override,default_backend,expected_dialect", + [ + # If the template doesn't rely on a backend... + (None, None, "sqlite", "sqlite"), + (None, None, "duckdb", "duckdb"), + (None, "sqlite", "duckdb", "sqlite"), + (None, "sqlite", "sqlite", "sqlite"), + (None, "duckdb", "duckdb", "duckdb"), + (None, "duckdb", "sqlite", "duckdb"), + # If the template relies on a backend... + ("sqlite", None, "sqlite", "sqlite"), + ("sqlite", None, "duckdb", "sqlite"), + ("sqlite", "sqlite", "duckdb", "sqlite"), + ("sqlite", "sqlite", "sqlite", "sqlite"), + ("sqlite", "duckdb", "duckdb", "duckdb"), + ("sqlite", "duckdb", "sqlite", "duckdb"), + ], +) +def test_dialect_inferrence( + template_dialect, dialect_override, default_backend, expected_dialect +): + if template_dialect is None: + templ = ibis.t("4 + 5") + elif template_dialect == "sqlite": + con = ibis.sqlite.connect() + table = con.create_table("t1", {"i": [1, 2, 3]}) # noqa: F841 + templ = ibis.t("{table.i} + 5") + else: + raise ValueError(f"Unexpected template_dialect: {template_dialect}") + + with set_default_backend(default_backend): + expr = ibis.sql_value(templ, dialect=dialect_override) + actual = expr.op().dialect # ty:ignore[possibly-missing-attribute] + assert actual == expected_dialect + + +def test_multiple_backends_errors(): + """If you try to create a sql_value that relies on multiple backends, raise.""" + sqlite1 = ibis.sqlite.connect() + sqlite2 = ibis.sqlite.connect() + t1 = sqlite1.create_table("t1", {"i": [1, 2, 3]}) + t2 = sqlite2.create_table("t2", {"i": [4, 5, 6]}) + scalar1 = t1.i.sum() # noqa: F841 + scalar2 = t2.i.sum() # noqa: F841 + + template_same_backend = ibis.t("{scalar1} + {scalar1}") + actual = ibis.sql_value(template_same_backend).execute() + expected = sqlite1.execute(t1.i.sum() + t1.i.sum()) + assert actual == expected + + template_different_backends = ibis.t("{scalar1} + {scalar2}") + with pytest.raises( + exc.IbisInputError, + match="A SQL value can only depend on a single relation, got 2", + ): + ibis.sql_value(template_different_backends) + + +def test_multiple_relations_errors(): + """If you try to create a sql_value that relies on multiple relations, raise.""" + con = ibis.sqlite.connect() + t1 = con.create_table("t1", {"i": [1, 2, 3]}) + t2 = t1.mutate(i2=t1.i + 10) + + template_same_relation = ibis.t("{t2.i.sum()} + {t2.i2}") + actual = ibis.sql_value(template_same_relation).execute() + expected = con.execute(t2.i.sum() + t2.i2) + assert (actual == expected).all() + + template_different_relations = ibis.t("{t1.i.sum()} + {t2.i2}") + with pytest.raises( + exc.IbisInputError, + match="A SQL value can only depend on a single relation, got 2", + ): + ibis.sql_value(template_different_relations) diff --git a/ibis/expr/api.py b/ibis/expr/api.py index 66fa1372cf13..d9010f3bfb1f 100644 --- a/ibis/expr/api.py +++ b/ibis/expr/api.py @@ -16,7 +16,8 @@ import ibis.expr.operations as ops import ibis.expr.schema as sch import ibis.expr.types as ir -from ibis import selectors, util +from ibis import selectors, tstring, util +from ibis._tstring import PTemplate, t from ibis.backends import BaseBackend, connect from ibis.common.deferred import Deferred, _, deferrable from ibis.common.dispatch import lazy_singledispatch @@ -54,6 +55,7 @@ import polars as pl import pyarrow as pa import pyarrow.dataset as ds + import sqlglot as sg from ibis.expr.schema import SchemaLike @@ -120,7 +122,9 @@ "schema", "selectors", "set_backend", + "sql_value", "struct", + "t", "table", "time", "timestamp", @@ -128,6 +132,7 @@ "today", "trailing_range_window", "trailing_window", + "tstring", "union", "uuid", "watermark", @@ -728,6 +733,134 @@ def or_(*predicates: ir.BooleanValue | bool) -> ir.BooleanValue | bool: return functools.reduce(operator.or_, predicates) +def sql_value( + template: PTemplate, + /, + *, + dialect: str | sg.Dialect | None = None, + type: dt.IntoDtype | None = None, +) -> ir.Value | Deferred: + """Create an ibis value from a t-string. + + t-strings, or Template Strings, were added as builtin syntax in Python 3.14. + For more information, see https://docs.python.org/3.14/library/string.templatelib.html + + This function allows you to create an ibis value expression from a t-string. + It does NOT support generic SELECT statements, only expressions that + represent a single value, such as `my_table.my_column + 5`. + + Parameters + ---------- + template + The template to use for creating the SQL expression. + dialect + The SQL dialect to use for the expression. + If not provided, will be inferred from the backend of any expressions in the template. + If the template contains no expressions-with-backends, defaults to `ibis.options.sql.default_dialect`. + type + The datatype to use for the value. + If not given, will be inferred using SQLGlot. + + Returns + ------- + Value | Deferred + If any of the interpolations in the template are Deferred, returns a + Deferred expression. + Otherwise, returns a Value expression. + + Examples + -------- + >>> import ibis + >>> ibis.options.interactive = True + >>> con = ibis.duckdb.connect() + >>> table = con.create_table("my_table", {"a": [1, 2, 3], "b": [4, 5, 6]}) + + If you are using python 3.14+, you can replace the lines + below with `t"{table.b} + 3 - {table.a / 10}"`. + Here, since we are testing on older versions, + we use a tiny implementation of t-strings included in ibis that works as a replacement. + + >>> # use `t"{table.b} + 3 - {table.a / 10}"` in Python 3.14+ + >>> template = ibis.t("{table.b} + 3 - {table.a / 10}") + >>> expr = ibis.sql_value(template) + >>> print(expr.to_sql()) + SELECT + "t0"."b" + 3 - "t0"."a" / 10 AS "TemplateSQL((), (b, Divide(a, 10)))" + FROM "memory"."main"."my_table" AS "t0" + >>> table.mutate(expr=expr, s=expr.cast(str) + "!") + ┏━━━━━━━┳━━━━━━━┳━━━━━━━━━┳━━━━━━━━┓ + ┃ a ┃ b ┃ expr ┃ s ┃ + ┡━━━━━━━╇━━━━━━━╇━━━━━━━━━╇━━━━━━━━┩ + │ int64 │ int64 │ float64 │ string │ + ├───────┼───────┼─────────┼────────┤ + │ 1 │ 4 │ 6.9 │ 6.9! │ + │ 2 │ 5 │ 7.8 │ 7.8! │ + │ 3 │ 6 │ 8.7 │ 8.7! │ + └───────┴───────┴─────────┴────────┘ + + If you don't need the `dialect` or `type` parameters, you can also use the template + string directly wherever an ibis expression is expected, for example in Table.mutate: + + >>> # use `table.mutate(b2=t"{table.b} * 2")` in Python 3.14+ + >>> table.mutate(b2=ibis.t("{table.b} * 2")) + ┏━━━━━━━┳━━━━━━━┳━━━━━━━┓ + ┃ a ┃ b ┃ b2 ┃ + ┡━━━━━━━╇━━━━━━━╇━━━━━━━┩ + │ int64 │ int64 │ int64 │ + ├───────┼───────┼───────┤ + │ 1 │ 4 │ 8 │ + │ 2 │ 5 │ 10 │ + │ 3 │ 6 │ 12 │ + └───────┴───────┴───────┘ + + You can provide a `dialect` parameter if you pass in a template written in + a specific SQL dialect, and then this will be transpiled to + the correct dialect upon execution. + + For example, write a template in sqlite syntax (with datatype REAL) + and then execute it on duckdb (where REAL will be interpreted as DOUBLE). + + >>> # use `t"CAST({table.a} AS REAL)"` in Python 3.14+ + >>> expr = ibis.sql_value(ibis.t("CAST({table.a} AS REAL)"), dialect="sqlite") + >>> arr = con.to_pyarrow(expr) + >>> arr.type + DataType(double) + >>> arr.to_pylist() + [1.0, 2.0, 3.0] + + If the template contains any Deferred interpolations, + the resulting expression will also be Deferred, not a concrete ibis Value expression, + since we can't infer the datatype until the Deferreds are resolved: + + >>> # use `t"{ibis._.a} % 2 = 0"` in Python 3.14+ + >>> is_even = ibis.sql_value(ibis.t("{ibis._.a} % 2 = 0")) + >>> type(is_even) + + >>> is_odd = (~is_even).name("is_odd") + >>> table.mutate(is_odd, is_even=is_even) + ┏━━━━━━━┳━━━━━━━┳━━━━━━━━┳━━━━━━━━━┓ + ┃ a ┃ b ┃ is_odd ┃ is_even ┃ + ┡━━━━━━━╇━━━━━━━╇━━━━━━━━╇━━━━━━━━━┩ + │ int64 │ int64 │ bool │ bool │ + ├───────┼───────┼────────┼─────────┤ + │ 1 │ 4 │ True │ False │ + │ 2 │ 5 │ False │ True │ + │ 3 │ 6 │ True │ False │ + └───────┴───────┴────────┴─────────┘ + """ + if any(isinstance(i.value, Deferred) for i in template.interpolations): + from ibis.expr.operations.template_deferred import TemplateValueResolver + + resolver = TemplateValueResolver(template=template, dialect=dialect, dtype=type) + return Deferred(resolver) + else: + from ibis.expr.operations.template import TemplateSQLValue + + return TemplateSQLValue.from_template( + template, dialect=dialect, dtype=type + ).to_expr() + + def random() -> ir.FloatingScalar: """Return a random floating point number in the range [0.0, 1.0). diff --git a/ibis/expr/datatypes/core.py b/ibis/expr/datatypes/core.py index 4264b00168f6..e0521d305a00 100644 --- a/ibis/expr/datatypes/core.py +++ b/ibis/expr/datatypes/core.py @@ -39,6 +39,17 @@ import pyarrow as pa from pandas.api.extensions import ExtensionDtype + IntoDtype = Union[ + "DataType", + str, + type, + pa.DataType, + ExtensionDtype, + pl.DataType, + ] + """Something that can get converted to an ibis.DataType with `ibis.dtype(x)`""" + public(IntoDtype=IntoDtype) + @overload def dtype(value: type[int] | Literal["int"], nullable: bool = True) -> Int64: ... diff --git a/ibis/expr/operations/__init__.py b/ibis/expr/operations/__init__.py index ba89a1d4d2d4..34fd7c904ce8 100644 --- a/ibis/expr/operations/__init__.py +++ b/ibis/expr/operations/__init__.py @@ -16,6 +16,7 @@ from ibis.expr.operations.strings import * # noqa: F403 from ibis.expr.operations.structs import * # noqa: F403 from ibis.expr.operations.subqueries import * # noqa: F403 +from ibis.expr.operations.template import TemplateSQLValue # noqa: F401 from ibis.expr.operations.temporal import * # noqa: F403 from ibis.expr.operations.temporal_windows import * # noqa: F403 from ibis.expr.operations.udf import * # noqa: F403 diff --git a/ibis/expr/operations/template.py b/ibis/expr/operations/template.py new file mode 100644 index 000000000000..e0b5741d0e58 --- /dev/null +++ b/ibis/expr/operations/template.py @@ -0,0 +1,196 @@ +"""Operations for template strings (t-strings).""" + +from __future__ import annotations + +from itertools import zip_longest +from typing import TYPE_CHECKING, Any + +import sqlglot as sg +import sqlglot.expressions as sge +from public import public +from sqlglot.optimizer.annotate_types import annotate_types + +import ibis +import ibis.expr.datashape as ds +import ibis.expr.datatypes as dt +import ibis.expr.rules as rlz +from ibis.common.annotations import attribute +from ibis.common.deferred import Deferred +from ibis.common.exceptions import IbisInputError +from ibis.common.typing import VarTuple # noqa: TC001 +from ibis.expr import operations as ops +from ibis.expr.operations.util import find_backend + +if TYPE_CHECKING: + from collections.abc import Iterable, Iterator + + from ibis.backends.sql.datatypes import SqlglotType + from ibis.expr.operations.relations import Relation + from ibis.tstring import PTemplate + + +Dialect = str + + +@public +class TemplateSQLValue(ops.Value): + strings: VarTuple[str] + values: VarTuple[ops.Value] + dialect: Dialect + """The SQL dialect the template was written in. + + eg if t'CAST({val} AS REAL)', you should use 'sqlite', + since REAL is a sqlite-specific concept. + """ + dtype: dt.DataType + + @classmethod + def from_template( + cls, + template: PTemplate, + /, + *, + dialect: Dialect | None = None, + dtype: dt.IntoDtype | None = None, + ) -> TemplateSQLValue: + raw_values = [interp.value for interp in template.interpolations] + resolved_values = ensure_values(raw_values) + if dialect is None: + backend = find_backend(resolved_values) + if backend is None: + dialect = ibis.options.sql.default_dialect + else: + # Check for eg polars backends + from ibis.backends.sql import SQLBackend + + if not isinstance(backend, SQLBackend): + raise IbisInputError( + f"Expected a SQL backend, got {type(backend)}: {backend}" + ) + dialect = backend.name + if dtype is None: + parts = interleave(template.strings, resolved_values) + sql = sql_from_parts(parts, dialect=dialect) + dtype = dtype_from_sql(dialect, sql) + dtype = dt.dtype(dtype) + return cls( + strings=template.strings, + values=resolved_values, + dialect=dialect, + dtype=dtype, + ) + + @attribute + def shape(self): + if not self.values: + return ds.scalar + return rlz.highest_precedence_shape(self.values) + + @attribute + def relations(self) -> frozenset[Relation]: + return relations_of_vals(self.values) + + @property + def sql_for_inference(self) -> str: + parts = interleave(self.strings, self.values) + return sql_from_parts(parts, dialect=self.dialect) + + @property + def type_mapper(self) -> SqlglotType: + return get_type_mapper(self.dialect) + + +def relations_of_vals(vals: Iterable[ops.Value]) -> frozenset[Relation]: + children = (v.relations for v in vals) + return frozenset().union(*children) + + +def ensure_values(raw: Iterable[Any]) -> tuple[ops.Value, ...]: + raw = [_try_to_op_value(x) for x in raw] + already_values = [v for v in raw if isinstance(v, ops.Value)] + relations = relations_of_vals(already_values) + if len(relations) > 1: + raise IbisInputError( + f"A SQL value can only depend on a single relation, got {len(relations)}" + ) + relation = next(iter(relations), None) + return tuple(ensure_value(r, relation) for r in raw) + + +def _try_to_op_value(x): + from ibis.expr import types as ir + + if isinstance(x, ops.Value): + return x + if isinstance(x, ir.Value): + return x.op() + return x + + +def ensure_value(raw: Any, relation: Relation | None) -> ops.Value: + result = _ensure_value(raw, relation) + if not isinstance(result, ops.Value): + raise TypeError( + f"Could not convert object {raw} of type {type(raw)} to Value in context of relation {relation}" + ) + return result + + +def _ensure_value(raw: Any, relation: Relation | None) -> ops.Value: + if isinstance(raw, ops.Value): + return raw + if relation is None: + return ibis.literal(raw).op() + if isinstance(raw, Deferred): + return raw.resolve(relation).op() + if callable(raw): + called = raw(relation) + return ensure_value(called, relation) + return ibis.literal(raw).op() + + +def interleave( + strings: Iterable[str], values: Iterable[ops.Value] +) -> tuple[str | ops.Value, ...]: + FILL = object() + + def iter() -> Iterator[str | ops.Value]: + for s, v in zip_longest(strings, values, fillvalue=FILL): + if s is not FILL: + yield s + if v is not FILL: + yield v + + return tuple(iter()) + + +def sql_from_parts(parts: tuple[str | ops.Value, ...], dialect: Dialect) -> str: + result: list[str] = [] + for part in parts: + if isinstance(part, str): + result.append(part) + else: + ibis_type: dt.DataType = part.dtype + null_sqlglot_value = sge.cast( + sge.null(), get_type_mapper(dialect).from_ibis(ibis_type) + ) + result.append(null_sqlglot_value.sql(dialect)) + return "".join(result) + + +def dtype_from_sql(dialect: Dialect, sql: str) -> dt.DataType: + try: + parsed = sg.parse_one(sql, dialect=dialect) + except sg.errors.ParseError as e: + raise IbisInputError(f"failed to parse {sql}") from e + annotated = annotate_types(parsed, dialect=dialect) + sqlglot_type = annotated.type + type_mapper = get_type_mapper(dialect) + return type_mapper.to_ibis(sqlglot_type) + + +def get_type_mapper(dialect: Dialect) -> SqlglotType: + """Get the type mapper for the given SQL dialect.""" + from ibis.backends.sql.datatypes import TYPE_MAPPERS + + return TYPE_MAPPERS[dialect.lower()] diff --git a/ibis/expr/operations/template_deferred.py b/ibis/expr/operations/template_deferred.py new file mode 100644 index 000000000000..2ae6d3e8eeb2 --- /dev/null +++ b/ibis/expr/operations/template_deferred.py @@ -0,0 +1,69 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Any + +from ibis._tstring import Interpolation, PInterpolation, PTemplate, Template +from ibis.common.bases import FrozenSlotted +from ibis.common.deferred import Resolver, resolver +from ibis.expr import datatypes as dt +from ibis.expr.operations.template import TemplateSQLValue, interleave + +if TYPE_CHECKING: + from ibis.expr import types as ir + from ibis.expr.operations.template import Dialect + + +class TemplateValueResolver(FrozenSlotted, Resolver): + __slots__ = ("dialect", "dtype", "template") + template: PTemplate + dialect: Dialect + dtype: dt.DataType | None + + def __init__( + self, + template, + dialect: Dialect | None = None, + dtype: dt.IntoDtype | None = None, + ): + if dialect is None: + dialect = "duckdb" + dtype = dt.dtype(dtype) if dtype is not None else None + super().__init__(template=template, dialect=dialect, dtype=dtype) + + def __repr__(self): + vals = [i.value for i in self.template.interpolations] + parts = interleave(self.template.strings, vals) + repr_parts = [str(part) for part in parts] + template = "".join(repr_parts) + return ( + f"{type(self).__name__}(" + f"template={template!r}, " + f"dialect={self.dialect!r}, " + f"dtype={self.dtype!r})" + ) + + def resolve(self, context: dict[str, Any]) -> ir.Value: + resolved_template = resolve_template_values(self.template, context) + sql_value_op = TemplateSQLValue.from_template( + resolved_template, + dialect=self.dialect, + dtype=self.dtype, + ) + return sql_value_op.to_expr() + + +def resolve_template_values(template: PTemplate, context: dict[str, Any]) -> Template: + """Take a PTemplate, and return a Template with of the interpolation values resolved.""" + + def ensure_resolved(i: PInterpolation) -> Interpolation: + resolver_obj = resolver(i.value) + resolved = resolver_obj.resolve(context) + return Interpolation( + value=resolved, + expression=i.expression, + conversion=i.conversion, + format_spec=i.format_spec, + ) + + resolved_interpolations = tuple(ensure_resolved(i) for i in template.interpolations) + return Template(strings=template.strings, interpolations=resolved_interpolations) diff --git a/ibis/expr/operations/tests/test_template.py b/ibis/expr/operations/tests/test_template.py new file mode 100644 index 000000000000..44757bb8f6f2 --- /dev/null +++ b/ibis/expr/operations/tests/test_template.py @@ -0,0 +1,91 @@ +from __future__ import annotations + +import pytest + +import ibis +import ibis.expr.datatypes as dt +from ibis.expr.operations.template import TemplateSQLValue +from ibis.tstring import t + + +def test_set_backend(con, monkeypatch): + monkeypatch.setattr(ibis.options, "default_backend", None) + ibis.set_backend(con) + assert ibis.get_backend() is con + + +@pytest.mark.parametrize( + "five", + [ + pytest.param(5, id="int"), + pytest.param(ibis.literal(5), id="literal"), + pytest.param(ibis.literal(5).op(), id="value"), + ], +) +def test_scalar(five): # noqa: ARG001 + template = t("{five} + 4") + op = TemplateSQLValue.from_template(template) + assert op.dialect == "duckdb" + assert op.shape.is_scalar() + assert op.dtype == dt.int32 + + +def test_column(): + col = ibis.memtable({"c": ["a", "b"]}).c # noqa: F841 + template = t("{col} || 'c'") + op = TemplateSQLValue.from_template(template) + assert op.dialect == "duckdb" + assert op.shape.is_columnar() + assert op.dtype == dt.string + + +def test_dialect(): + # When parsed in sqlite dialect, REAL is interpreted as float64, + # in default duckdb dialect, REAL is interpreted as float32 + five = ibis.literal(5) # noqa: F841 + template = t("CAST({five} AS REAL)") + + op = TemplateSQLValue.from_template(template, dialect="sqlite") + assert op.dialect == "sqlite" + assert op.shape.is_scalar() + assert op.dtype == dt.float64 + + op = TemplateSQLValue.from_template(template) + assert op.dialect == "duckdb" + assert op.shape.is_scalar() + assert op.dtype == dt.float32 + + +def test_no_interpolations(): + template = t("5 + 4") + op = TemplateSQLValue.from_template(template) + assert op.dialect == "duckdb" + assert op.shape.is_scalar() + assert op.dtype == dt.int32 + + +def test_select_errors(): + five = ibis.literal(5) # noqa: F841 + template = t("SELECT {five}") + with pytest.raises(TypeError, match=r".*SELECT CAST\(NULL AS TINYINT\)"): + TemplateSQLValue.from_template(template) + + +def test_api(): + five = ibis.literal(5) # noqa: F841 + template = t("{five} + 4") + expr = ibis.sql_value(template) + assert isinstance(expr, ibis.Value) + assert expr.type().is_integer() + assert expr.type().nullable + + +def test_name(): + five = ibis.literal(5) # noqa: F841 + template = t("{five} + 4") + expr = ibis.sql_value(template) + actual = expr.get_name() + assert actual + # explicitly not tested + # expected_name = "TemplateSQL((), (5,))" + # assert actual == expected_name diff --git a/ibis/expr/operations/tests/test_template_deferred.py b/ibis/expr/operations/tests/test_template_deferred.py new file mode 100644 index 000000000000..0dd26da33a0e --- /dev/null +++ b/ibis/expr/operations/tests/test_template_deferred.py @@ -0,0 +1,12 @@ +from __future__ import annotations + +import ibis +from ibis.tstring import t + + +def test_repr(): + five = 5 # noqa: F841 + d = ibis.sql_value(t("{ibis._.foo + 3} * {five}")) + r = repr(d) + expected = """TemplateValueResolver(template='(_.foo + 3) * 5', dialect='duckdb', dtype=None)""" + assert r == expected diff --git a/ibis/expr/operations/util.py b/ibis/expr/operations/util.py new file mode 100644 index 000000000000..5c9a08468603 --- /dev/null +++ b/ibis/expr/operations/util.py @@ -0,0 +1,102 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +import ibis.expr.operations as ops +from ibis.common.exceptions import IbisError + +if TYPE_CHECKING: + from collections.abc import Iterable + + import ibis.expr.types as ir + from ibis.backends import BaseBackend + + +def find_backends(nodish: ir.Expr | ops.Node, /) -> tuple[list[BaseBackend], bool]: + """Return the possible backends for an expression. + + Returns + ------- + tuple[list[BaseBackend], bool] + A list of the backends found, and a boolean indicating whether there + are any unbound tables in the expression. + """ + node = ensure_node(nodish) + backends = set() + has_unbound = False + node_types = (ops.UnboundTable, ops.DatabaseTable, ops.SQLQueryResult) + for table in node.find(node_types): + if isinstance(table, ops.UnboundTable): + has_unbound = True + else: + backends.add(table.source) + + return list(backends), has_unbound + + +def find_backend( + nodes: ir.Expr | ops.Node | Iterable[ir.Expr | ops.Node], / +) -> BaseBackend | None: + """Find the backend attached to some expressions, if any. + + Parameters + ---------- + nodes : Expr or Node, or Iterable[Expr or Node] + The expressions to find the backend for. + + Returns + ------- + BaseBackend | None + A backend that is attached to one of the expressions, or `None` if no backend + is found. + + Raises + ------ + IbisError + If multiple backends are found. + """ + import ibis.expr.types as ir + + if isinstance(nodes, ir.Expr): + n = [nodes.op()] + elif isinstance(nodes, ops.Node): + n = [nodes] + else: + n = [ensure_node(node) for node in nodes] + + raw_backends = {_find_backend(node) for node in n} + backends = {b for b in raw_backends if b is not None} + if not backends: + return None + if len(backends) > 1: + raise IbisError( + f"Cannot determine backend from values with multiple backends: {backends}" + ) + result = next(iter(backends)) + return result + + +def _find_backend(node: ops.Node, /) -> BaseBackend | None: + backends, has_unbound = find_backends(node) + if not backends: + if has_unbound: + raise IbisError( + "Expression contains unbound tables and therefore cannot " + "be executed. Use `.execute(expr)` to execute " + "against an explicit backend, or rebuild the expression " + "using bound tables instead." + ) + return None + if len(backends) > 1: + raise IbisError("Multiple backends found for this expression") + return backends[0] + + +def ensure_node(raw: ir.Expr | ops.Node) -> ops.Node: + if isinstance(raw, ops.Node): + return raw + import ibis.expr.types as ir + + if isinstance(raw, ir.Expr): + return raw.op() + raise TypeError(f"Could not convert object {raw} of type {type(raw)} to Node") diff --git a/ibis/expr/tests/test_api.py b/ibis/expr/tests/test_api.py index f58e58471614..ba2d5e6fae3b 100644 --- a/ibis/expr/tests/test_api.py +++ b/ibis/expr/tests/test_api.py @@ -1,6 +1,7 @@ from __future__ import annotations import operator +import sys from datetime import datetime import pytest @@ -195,3 +196,29 @@ def test_unbound_table_namespace(): with pytest.raises(ValueError, match="A catalog-only namespace is invalid in Ibis"): ibis.table(name="bork", schema=(("a", "int"), ("b", "int")), catalog="bork") + + +def test_sql_value_deferred(): + five = ibis.literal(5) # noqa: F841 + expr = ibis.sql_value("{ibis._.age} + {five} + {ibis._.age + five}") + assert isinstance(expr, ibis.Deferred) + table = ibis.table(name="t", schema={"age": "int64"}) + (col,) = table.bind(expr) + assert isinstance(col, ibis.ir.IntegerColumn) + assert col.type() == dt.int64 + + +@pytest.mark.skipif( + sys.version_info < (3, 14), + reason="t'string' literals are only available in Python 3.14+", +) +def test_t_string_literal_equivalence(): + code = """ +import ibis +table = ibis.table(schema={"int_col": "int64"}) +template = t"{ibis._.int_col + 2} - {table.int_col * 3}" +col = table.select(template=template).template +assert isinstance(col, ibis.ir.IntegerColumn) +assert col.type() == ibis.dtype("int64") +""".strip() + exec(code) # noqa: S102 diff --git a/ibis/expr/types/core.py b/ibis/expr/types/core.py index 0ba927b599e1..5482e4ea97d3 100644 --- a/ibis/expr/types/core.py +++ b/ibis/expr/types/core.py @@ -16,6 +16,7 @@ from ibis.config import _default_backend from ibis.config import options as opts from ibis.expr.format import pretty +from ibis.expr.operations.util import find_backend from ibis.expr.types.rich import capture_rich_renderable, to_rich from ibis.util import experimental @@ -286,26 +287,6 @@ def pipe(self, f, /, *args: Any, **kwargs: Any) -> Expr: def op(self) -> ops.Node: return self._arg - def _find_backends(self) -> tuple[list[BaseBackend], bool]: - """Return the possible backends for an expression. - - Returns - ------- - list[BaseBackend] - A list of the backends found. - """ - - backends = set() - has_unbound = False - node_types = (ops.UnboundTable, ops.DatabaseTable, ops.SQLQueryResult) - for table in self.op().find(node_types): - if isinstance(table, ops.UnboundTable): - has_unbound = True - else: - backends.add(table.source) - - return list(backends), has_unbound - def _find_backend(self, *, use_default: bool = False) -> BaseBackend: """Find the backend attached to an expression. @@ -322,27 +303,17 @@ def _find_backend(self, *, use_default: bool = False) -> BaseBackend: BaseBackend A backend that is attached to the expression """ - backends, has_unbound = self._find_backends() - - if not backends: - if has_unbound: + backend = find_backend(self) + if backend is None: + if not use_default: raise IbisError( - "Expression contains unbound tables and therefore cannot " - "be executed. Use `.execute(expr)` to execute " + "Expression depends on no backends, please execute " "against an explicit backend, or rebuild the expression " "using bound tables instead." ) - default = _default_backend() if use_default else None - if default is None: - raise IbisError( - "Expression depends on no backends, and found no default" - ) - return default - - if len(backends) > 1: - raise IbisError("Multiple backends found for this expression") - - return backends[0] + else: + backend = _default_backend() + return backend def get_backend(self) -> BaseBackend: """Get the current Ibis backend of the expression. diff --git a/ibis/expr/types/relations.py b/ibis/expr/types/relations.py index 966ba75cac7a..d24faab6e459 100644 --- a/ibis/expr/types/relations.py +++ b/ibis/expr/types/relations.py @@ -25,6 +25,7 @@ from ibis.expr.types.generic import Value, literal from ibis.expr.types.rich import FixedTextJupyterMixin, to_rich from ibis.expr.types.temporal import TimestampColumn +from ibis.tstring import PTemplate from ibis.util import deprecated, experimental if TYPE_CHECKING: @@ -482,6 +483,9 @@ def bind(table: Table, value) -> Iterator[ir.Value]: yield value.resolve(table) elif isinstance(value, Resolver): yield value.resolve({"_": table}) + elif isinstance(value, PTemplate): + sql_value = ibis.sql_value(value) + yield from bind(table, sql_value) elif isinstance(value, Expandable): yield from value.expand(table) elif callable(value): diff --git a/ibis/tests/expr/test_template.py b/ibis/tests/expr/test_template.py new file mode 100644 index 000000000000..ec581e7d8957 --- /dev/null +++ b/ibis/tests/expr/test_template.py @@ -0,0 +1,156 @@ +from __future__ import annotations + +import pytest +import sqlglot.expressions as sge + +import ibis +from ibis import Deferred, _ +from ibis.common import exceptions as exc +from ibis.expr import datatypes as dt +from ibis.tstring import t + +my_scalar_int8 = ibis.literal(7, type="int8") +my_scalar_uint64 = ibis.literal(7, type="uint64") + +unknown_dtype = dt.Unknown(raw_type=sge.DataType(this=sge.DataType.Type.UNKNOWN)) + + +@pytest.mark.parametrize("arg_type", ["str", "template"]) +@pytest.mark.parametrize( + ("inp", "exp"), + [ + # This is just what sqlglot infers + ("5", dt.Int32()), + ("'foo'", dt.String()), + ("true", dt.Boolean()), + ("null", unknown_dtype), + ("'foo'::JSON", dt.JSON()), + # This is weird that these ints are parsed as int8 + # but the plain "5" is parsed as int32? + pytest.param( + "{{1,2,3}}", + dt.Array(value_type=dt.Int8()), + marks=pytest.mark.xfail(), + ), + pytest.param( + "{'x':1, 'y':'foo'}", + dt.Struct({"x": dt.Int8(), "y": dt.String()}), + marks=pytest.mark.xfail(), + ), + ("5 as my_column", dt.Int32()), + ("null as my_column", unknown_dtype), + # Should this error instead? + ("x", unknown_dtype), + ("this is not valid SQL", exc.IbisInputError), + ], +) +def test_valueless(arg_type, inp: str, exp): + if arg_type == "str": + arg = inp + elif arg_type == "template": + arg = t(inp) + else: + raise AssertionError(arg_type) + if isinstance(exp, type) and issubclass(exp, Exception): + with pytest.raises(exp): + ibis.sql_value(arg) + else: + expr = ibis.sql_value(arg) + assert expr.type() == exp + assert isinstance(expr, ibis.Scalar) + + +def test_int_simple(): + five = 5 # noqa: F841 + expr = ibis.sql_value(ibis.t("3 + {five}")) + # Should this be int64? + assert expr.type().is_int32() + assert isinstance(expr, ibis.Scalar) + op = expr.op() + assert op.strings == ("3 + ", "") + (val,) = op.values + assert val.equals(ibis.literal(5).op()) + + +def test_int_complex(): + five = 5 # noqa: F841 + expr = ibis.sql_value(ibis.t("3 + {five + 8}")) + # Should this be int64? + assert expr.type().is_int32() + assert isinstance(expr, ibis.Scalar) + op = expr.op() + assert op.strings == ("3 + ", "") + (val,) = op.values + assert val.equals((ibis.literal(13)).op()) + + +def test_literal_int_simple(): + five = ibis.literal(5) + expr = ibis.sql_value(ibis.t("3 + {five}")) + # Should this be int64? + assert expr.type().is_int32() + assert isinstance(expr, ibis.Scalar) + op = expr.op() + assert op.strings == ("3 + ", "") + (val,) = op.values + assert val.equals(five.op()) + + +def test_literal_int_complex(): + five = ibis.literal(5) + expr = ibis.sql_value(ibis.t("3 + {five + 8}")) + # Should this be int64? + assert expr.type().is_int32() + assert isinstance(expr, ibis.Scalar) + op = expr.op() + assert op.strings == ("3 + ", "") + (val,) = op.values + assert val.equals((five + 8).op()) + + +@pytest.mark.xfail( + reason="Need to detect the presence of Deffereds as values in sql_value() before constructing the op" +) +def test_deferred(): + v = _.my_int_col.sum() # noqa: F841 + expr = ibis.sql_value(ibis.t("3 + {v}")) + assert isinstance(expr, Deferred) + + +def test_dialect_dtype(): + assert ibis.sql_value(ibis.t("5::DOUBLE")).type().is_float64() + assert ibis.sql_value(ibis.t("5::DOUBLE"), dialect="duckdb").type().is_float64() + assert ibis.sql_value(ibis.t("5::DOUBLE"), dialect="sqlite").type().is_float64() + + assert ibis.sql_value(ibis.t("5::DOUBLE"), type=int).type().is_int64() + assert ( + ibis.sql_value(ibis.t("5::DOUBLE"), type=int, dialect="duckdb") + .type() + .is_int64() + ) + assert ( + ibis.sql_value(ibis.t("5::DOUBLE"), type=int, dialect="sqlite") + .type() + .is_int64() + ) + + assert ibis.sql_value(ibis.t("5::REAL")).type().is_float32() + assert ibis.sql_value(ibis.t("5::REAL"), dialect="duckdb").type().is_float32() + assert ibis.sql_value(ibis.t("5::REAL"), dialect="sqlite").type().is_float64() + + assert ibis.sql_value(ibis.t("5::REAL"), type=int).type().is_int64() + assert ( + ibis.sql_value(ibis.t("5::REAL"), type=int, dialect="duckdb").type().is_int64() + ) + assert ( + ibis.sql_value(ibis.t("5::REAL"), type=int, dialect="sqlite").type().is_int64() + ) + + +def test_multiple_relations(): + t1 = ibis.table({"i": int}) # noqa: F841 + t2 = ibis.table({"i": int}) # noqa: F841 + with pytest.raises(exc.IbisInputError): + ibis.sql_value(ibis.t("{t1.i} + {t2.i}")) + with pytest.raises(exc.IbisInputError): + ibis.sql_value(ibis.t("{t1.i + t2.i}")) diff --git a/ibis/tstring.py b/ibis/tstring.py new file mode 100644 index 000000000000..35c1c51e7c38 --- /dev/null +++ b/ibis/tstring.py @@ -0,0 +1,60 @@ +from __future__ import annotations + +from ibis._tstring import Interpolation as Interpolation # noqa: PLC0414 +from ibis._tstring import PInterpolation as PInterpolation # noqa: PLC0414 +from ibis._tstring import PTemplate as PTemplate # noqa: PLC0414 +from ibis._tstring import Template as Template # noqa: PLC0414 +from ibis._tstring import t as _t + + +def t(template_string: str, /) -> Template: + """Emulates a PEP 750 t-string literal for Python < 3.14. + + This function parses a string with f-string-like syntax and returns + a `Template` object, correctly evaluating expressions in the caller's + scope. + + Args: + template_string: The string to parse, e.g., "Hello {name!r}". + + Returns: + A `Template` instance containing the parsed static strings and + evaluated interpolations. + + Example: + >>> temp, unit = 22.43, "C" + >>> template = t("Temperature: {temp:.1f} degrees {unit!s}") + >>> template.strings + ('Temperature: ', ' degrees ', '') + >>> len(template.interpolations) + 2 + >>> template.interpolations[0] + Interpolation(value=22.43, expression='temp', conversion=None, format_spec='.1f') + >>> template.interpolations[1] + Interpolation(value='C', expression='unit', conversion='s', format_spec='') + """ + return _t(template_string, frame=1) + + +# @runtime_checkable +# class PInterpolation(Protocol): +# """Protocol for an object that can be interpreted as an Interpolation.""" + +# @property +# def value(self) -> object: ... +# @property +# def expression(self) -> str: ... +# @property +# def conversion(self) -> Literal["a", "r", "s"] | None: ... +# @property +# def format_spec(self) -> str: ... + + +# @runtime_checkable +# class PTemplateString(Protocol): +# """Protocol for an object that can be interpreted as a TemplateString.""" + +# @property +# def strings(self) -> tuple[str, ...]: ... +# @property +# def values(self) -> tuple[PInterpolation, ...]: ...