Skip to content

Commit ff3544b

Browse files
cpsievertclaude
andcommitted
feat(python): support data= dict parameter on execute() for inline DataFrames
Allows passing a dict of DataFrames to reader.execute() and the module-level execute() function, which are registered before query execution and unregistered afterward (cleanup happens even on error). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 71e8f83 commit ff3544b

3 files changed

Lines changed: 210 additions & 11 deletions

File tree

ggsql-python/python/ggsql/_ggsql.pyi

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,12 @@ class DuckDBReader:
9494
"""
9595
...
9696

97-
def execute(self, query: str) -> Spec:
97+
def execute(
98+
self,
99+
query: str,
100+
*,
101+
data: dict[str, pl.DataFrame] | None = None,
102+
) -> Spec:
98103
"""Execute a ggsql query and return the visualization specification.
99104
100105
This is the main entry point for creating visualizations. It parses
@@ -105,6 +110,10 @@ class DuckDBReader:
105110
----------
106111
query
107112
The ggsql query (SQL + VISUALISE clause).
113+
data
114+
Optional dictionary mapping table names to DataFrames. Tables are
115+
registered before execution and unregistered afterward (even on
116+
error).
108117
109118
Returns
110119
-------
@@ -385,7 +394,12 @@ def validate(query: str) -> Validated:
385394
"""
386395
...
387396

388-
def execute(query: str, reader: object) -> Spec:
397+
def execute(
398+
query: str,
399+
reader: object,
400+
*,
401+
data: dict[str, pl.DataFrame] | None = None,
402+
) -> Spec:
389403
"""Execute a ggsql query with a reader (native or custom Python object).
390404
391405
This is a convenience function for custom readers. For native readers,
@@ -399,6 +413,10 @@ def execute(query: str, reader: object) -> Spec:
399413
The database reader to execute SQL against. Can be a native
400414
``DuckDBReader`` for optimal performance, or any Python object with
401415
an ``execute_sql(sql: str) -> polars.DataFrame`` method.
416+
data
417+
Optional dictionary mapping table names to DataFrames. Tables are
418+
registered before execution and unregistered afterward (even on
419+
error).
402420
403421
Returns
404422
-------

ggsql-python/src/lib.rs

Lines changed: 113 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -337,6 +337,9 @@ impl PyDuckDBReader {
337337
/// ----------
338338
/// query : str
339339
/// The ggsql query (SQL + VISUALISE clause).
340+
/// data : dict[str, polars.DataFrame] | None
341+
/// Optional dictionary mapping table names to DataFrames. Tables are
342+
/// registered before execution and unregistered afterward (even on error).
340343
///
341344
/// Returns
342345
/// -------
@@ -354,11 +357,48 @@ impl PyDuckDBReader {
354357
/// >>> spec = reader.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point")
355358
/// >>> writer = VegaLiteWriter()
356359
/// >>> json_output = writer.render(spec)
357-
fn execute(&self, query: &str) -> PyResult<PySpec> {
358-
self.inner
360+
#[pyo3(signature = (query, *, data=None))]
361+
fn execute(&self, py: Python<'_>, query: &str, data: Option<&Bound<'_, PyDict>>) -> PyResult<PySpec> {
362+
// Register DataFrames from data dict
363+
let registered_names = if let Some(data_dict) = data {
364+
self.register_data_dict(py, data_dict)?
365+
} else {
366+
vec![]
367+
};
368+
369+
// Execute query (capture result, don't return early)
370+
let result = self.inner
359371
.execute(query)
360372
.map(|s| PySpec { inner: s })
361-
.map_err(ggsql_err_to_py)
373+
.map_err(ggsql_err_to_py);
374+
375+
// Cleanup: unregister temporary tables (even on error)
376+
for name in &registered_names {
377+
let _ = self.inner.unregister(name);
378+
}
379+
380+
result
381+
}
382+
}
383+
384+
impl PyDuckDBReader {
385+
/// Register DataFrames from a Python dict. Returns list of registered names for cleanup.
386+
/// This is a private Rust helper, not exposed to Python.
387+
fn register_data_dict(
388+
&self,
389+
py: Python<'_>,
390+
data: &Bound<'_, PyDict>,
391+
) -> PyResult<Vec<String>> {
392+
let mut names = Vec::new();
393+
for (key, value) in data.iter() {
394+
let name: String = key.extract()?;
395+
let df = py_to_polars(py, &value)?;
396+
self.inner
397+
.register(&name, df, true)
398+
.map_err(ggsql_err_to_py)?;
399+
names.push(name);
400+
}
401+
Ok(names)
362402
}
363403
}
364404

@@ -729,6 +769,9 @@ fn validate(query: &str) -> PyResult<PyValidated> {
729769
/// The database reader to execute SQL against. Can be a native Reader
730770
/// for optimal performance, or any Python object with an
731771
/// `execute_sql(sql: str) -> polars.DataFrame` method.
772+
/// data : dict[str, polars.DataFrame] | None
773+
/// Optional dictionary mapping table names to DataFrames. Tables are
774+
/// registered before execution and unregistered afterward (even on error).
732775
///
733776
/// Returns
734777
/// -------
@@ -755,19 +798,80 @@ fn validate(query: &str) -> PyResult<PyValidated> {
755798
/// >>> reader = MyReader()
756799
/// >>> spec = execute("SELECT * FROM data VISUALISE x, y DRAW point", reader)
757800
#[pyfunction]
758-
fn execute(query: &str, reader: &Bound<'_, PyAny>) -> PyResult<PySpec> {
759-
// Fast path: try all known native reader types
760-
// Add new native readers to this list as they're implemented
761-
try_native_readers!(query, reader, PyDuckDBReader);
801+
#[pyo3(signature = (query, reader, *, data=None))]
802+
fn execute(py: Python<'_>, query: &str, reader: &Bound<'_, PyAny>, data: Option<&Bound<'_, PyDict>>) -> PyResult<PySpec> {
803+
// Native reader fast path: DuckDBReader
804+
// Note: we can't use the try_native_readers! macro here because it uses `return`
805+
// which would skip cleanup of registered tables.
806+
if let Ok(native) = reader.downcast::<PyDuckDBReader>() {
807+
// Register DataFrames if provided
808+
let registered_names = if let Some(data_dict) = data {
809+
native.borrow().register_data_dict(py, data_dict)?
810+
} else {
811+
vec![]
812+
};
813+
814+
// Execute (capture result for cleanup)
815+
let result = native.borrow().inner.execute(query)
816+
.map(|s| PySpec { inner: s })
817+
.map_err(ggsql_err_to_py);
818+
819+
// Cleanup: unregister temporary tables (even on error)
820+
for name in &registered_names {
821+
let _ = native.borrow().inner.unregister(name);
822+
}
823+
824+
return result;
825+
}
762826

763827
// Bridge path: wrap Python object as Reader
828+
// Register DataFrames if provided
829+
let registered_names = if let Some(data_dict) = data {
830+
register_data_on_reader(py, reader, data_dict)?
831+
} else {
832+
vec![]
833+
};
834+
764835
let bridge = PyReaderBridge {
765836
obj: reader.clone().unbind(),
766837
};
767-
bridge
838+
let result = bridge
768839
.execute(query)
769840
.map(|s| PySpec { inner: s })
770-
.map_err(ggsql_err_to_py)
841+
.map_err(ggsql_err_to_py);
842+
843+
// Cleanup for bridge path
844+
for name in &registered_names {
845+
let _ = call_unregister(py, reader, name);
846+
}
847+
848+
result
849+
}
850+
851+
/// Register DataFrames from a Python dict onto a Python reader object.
852+
/// Returns list of registered names for cleanup.
853+
fn register_data_on_reader(
854+
py: Python<'_>,
855+
reader: &Bound<'_, PyAny>,
856+
data: &Bound<'_, PyDict>,
857+
) -> PyResult<Vec<String>> {
858+
let mut names = Vec::new();
859+
for (key, value) in data.iter() {
860+
let name: String = key.extract()?;
861+
let df = py_to_polars(py, &value)?;
862+
let py_df = polars_to_py(py, &df)?;
863+
reader.call_method("register", (&name, py_df, true), None)?;
864+
names.push(name);
865+
}
866+
Ok(names)
867+
}
868+
869+
/// Call unregister on a reader if the method exists.
870+
fn call_unregister(_py: Python<'_>, reader: &Bound<'_, PyAny>, name: &str) -> PyResult<()> {
871+
if reader.hasattr("unregister")? {
872+
reader.call_method1("unregister", (name,))?;
873+
}
874+
Ok(())
771875
}
772876

773877
// ============================================================================

ggsql-python/tests/test_ggsql.py

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -656,6 +656,83 @@ def test_render_chart_facet(self):
656656
assert isinstance(chart, altair.FacetChart)
657657

658658

659+
class TestExecuteWithData:
660+
"""Tests for reader.execute() with data= parameter."""
661+
662+
def test_execute_with_single_dataframe(self):
663+
"""Can pass a single DataFrame via data dict."""
664+
reader = ggsql.DuckDBReader("duckdb://memory")
665+
df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]})
666+
spec = reader.execute(
667+
"SELECT * FROM mydata VISUALISE x, y DRAW point",
668+
data={"mydata": df},
669+
)
670+
assert spec.metadata()["rows"] == 3
671+
672+
def test_execute_with_multiple_dataframes(self):
673+
"""Can pass multiple DataFrames via data dict."""
674+
reader = ggsql.DuckDBReader("duckdb://memory")
675+
df1 = pl.DataFrame({"id": [1, 2, 3], "y": [10, 20, 30]})
676+
df2 = pl.DataFrame({"id": [2, 3], "category": ["A", "B"]})
677+
spec = reader.execute(
678+
"SELECT t1.id AS x, t1.y FROM t1 JOIN t2 ON t1.id = t2.id "
679+
"VISUALISE x, y DRAW point",
680+
data={"t1": df1, "t2": df2},
681+
)
682+
assert spec.metadata()["rows"] == 2
683+
684+
def test_execute_with_data_cleans_up(self):
685+
"""DataFrames passed via data= are unregistered after execution."""
686+
reader = ggsql.DuckDBReader("duckdb://memory")
687+
df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]})
688+
reader.execute(
689+
"SELECT * FROM temp VISUALISE x, y DRAW point",
690+
data={"temp": df},
691+
)
692+
# Table should be cleaned up — querying it should fail
693+
with pytest.raises((ggsql.ReaderError, ValueError)):
694+
reader.execute_sql("SELECT * FROM temp")
695+
696+
def test_execute_with_data_cleans_up_on_error(self):
697+
"""DataFrames are unregistered even if execution fails."""
698+
reader = ggsql.DuckDBReader("duckdb://memory")
699+
df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]})
700+
with pytest.raises((ggsql.ParseError, ggsql.ValidationError, ValueError)):
701+
reader.execute(
702+
"SELECT * FROM temp VISUALISE DRAW not_a_geom",
703+
data={"temp": df},
704+
)
705+
# Table should still be cleaned up
706+
with pytest.raises((ggsql.ReaderError, ValueError)):
707+
reader.execute_sql("SELECT * FROM temp")
708+
709+
def test_execute_without_data_still_works(self):
710+
"""Calling execute() without data= still works as before."""
711+
reader = ggsql.DuckDBReader("duckdb://memory")
712+
spec = reader.execute("SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point")
713+
assert spec.metadata()["rows"] == 1
714+
715+
def test_execute_with_empty_data(self):
716+
"""Passing empty data= dict works fine."""
717+
reader = ggsql.DuckDBReader("duckdb://memory")
718+
spec = reader.execute(
719+
"SELECT 1 AS x, 2 AS y VISUALISE x, y DRAW point",
720+
data={},
721+
)
722+
assert spec.metadata()["rows"] == 1
723+
724+
def test_module_execute_with_data(self):
725+
"""Module-level execute() also supports data= parameter."""
726+
reader = ggsql.DuckDBReader("duckdb://memory")
727+
df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]})
728+
spec = ggsql.execute(
729+
"SELECT * FROM mydata VISUALISE x, y DRAW point",
730+
reader,
731+
data={"mydata": df},
732+
)
733+
assert spec.metadata()["rows"] == 3
734+
735+
659736
class TestTypeStubs:
660737
"""Tests for type stub presence and correctness."""
661738

0 commit comments

Comments
 (0)