Skip to content

Commit 5a4a2df

Browse files
cpsievertclaude
andcommitted
fix(python): improve type stubs, data= cleanup safety, and exception specificity
- Update type stubs to document typed exceptions (ParseError, ReaderError, etc.) instead of generic ValueError - Guard data= cleanup against destroying pre-existing tables by checking table existence before registration - Pass replace as keyword arg in bridge register_data_on_reader for compatibility with custom readers that omit the parameter - Tighten test assertions to expect specific exception types - Remove unused try_native_readers! macro Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent ff3544b commit 5a4a2df

3 files changed

Lines changed: 69 additions & 37 deletions

File tree

ggsql-python/python/ggsql/_ggsql.pyi

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ class DuckDBReader:
3131
3232
Raises
3333
------
34-
ValueError
34+
ReaderError
3535
If the connection string is invalid or the database cannot be opened.
3636
"""
3737

@@ -51,7 +51,7 @@ class DuckDBReader:
5151
5252
Raises
5353
------
54-
ValueError
54+
ReaderError
5555
If the SQL is invalid or execution fails.
5656
"""
5757
...
@@ -74,7 +74,7 @@ class DuckDBReader:
7474
7575
Raises
7676
------
77-
ValueError
77+
ReaderError
7878
If registration fails or the table name is invalid.
7979
"""
8080
...
@@ -89,7 +89,7 @@ class DuckDBReader:
8989
9090
Raises
9191
------
92-
ValueError
92+
ReaderError
9393
If the table was not registered or unregistration fails.
9494
"""
9595
...
@@ -122,9 +122,12 @@ class DuckDBReader:
122122
123123
Raises
124124
------
125-
ValueError
126-
If the query syntax is invalid, has no VISUALISE clause, or
127-
SQL execution fails.
125+
ParseError
126+
If the query syntax is invalid.
127+
ValidationError
128+
If the query has no VISUALISE clause or fails semantic checks.
129+
ReaderError
130+
If SQL execution fails.
128131
"""
129132
...
130133

@@ -154,7 +157,7 @@ class VegaLiteWriter:
154157
155158
Raises
156159
------
157-
ValueError
160+
WriterError
158161
If rendering fails.
159162
"""
160163
...
@@ -388,7 +391,7 @@ def validate(query: str) -> Validated:
388391
389392
Raises
390393
------
391-
ValueError
394+
ParseError
392395
If validation fails unexpectedly (syntax errors are captured in
393396
the returned ``Validated`` object, not raised).
394397
"""
@@ -425,7 +428,11 @@ def execute(
425428
426429
Raises
427430
------
428-
ValueError
429-
If parsing, validation, or SQL execution fails.
431+
ParseError
432+
If the query syntax is invalid.
433+
ValidationError
434+
If semantic validation fails.
435+
ReaderError
436+
If SQL execution fails.
430437
"""
431438
...

ggsql-python/src/lib.rs

Lines changed: 30 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -191,24 +191,6 @@ impl Reader for PyReaderBridge {
191191
}
192192
}
193193

194-
// ============================================================================
195-
// Native Reader Detection Macro
196-
// ============================================================================
197-
198-
/// Macro to try native readers and fall back to bridge.
199-
/// Adding new native readers = add to the macro invocation list.
200-
macro_rules! try_native_readers {
201-
($query:expr, $reader:expr, $($native_type:ty),*) => {{
202-
$(
203-
if let Ok(native) = $reader.downcast::<$native_type>() {
204-
return native.borrow().inner.execute($query)
205-
.map(|s| PySpec { inner: s })
206-
.map_err(ggsql_err_to_py);
207-
}
208-
)*
209-
}};
210-
}
211-
212194
// ============================================================================
213195
// PyDuckDBReader
214196
// ============================================================================
@@ -382,8 +364,16 @@ impl PyDuckDBReader {
382364
}
383365

384366
impl PyDuckDBReader {
385-
/// Register DataFrames from a Python dict. Returns list of registered names for cleanup.
386-
/// This is a private Rust helper, not exposed to Python.
367+
/// Check whether a table already exists in the reader.
368+
fn table_exists(&self, name: &str) -> bool {
369+
// A lightweight probe: try to select zero rows from the table.
370+
self.inner
371+
.execute_sql(&format!("SELECT 1 FROM {name} LIMIT 0"))
372+
.is_ok()
373+
}
374+
375+
/// Register DataFrames from a Python dict. Returns list of *newly created*
376+
/// table names for cleanup (pre-existing tables are not tracked).
387377
fn register_data_dict(
388378
&self,
389379
py: Python<'_>,
@@ -392,11 +382,14 @@ impl PyDuckDBReader {
392382
let mut names = Vec::new();
393383
for (key, value) in data.iter() {
394384
let name: String = key.extract()?;
385+
let existed = self.table_exists(&name);
395386
let df = py_to_polars(py, &value)?;
396387
self.inner
397388
.register(&name, df, true)
398389
.map_err(ggsql_err_to_py)?;
399-
names.push(name);
390+
if !existed {
391+
names.push(name);
392+
}
400393
}
401394
Ok(names)
402395
}
@@ -850,6 +843,15 @@ fn execute(py: Python<'_>, query: &str, reader: &Bound<'_, PyAny>, data: Option<
850843

851844
/// Register DataFrames from a Python dict onto a Python reader object.
852845
/// Returns list of registered names for cleanup.
846+
/// Check whether a table exists via a Python reader's execute_sql method.
847+
fn reader_table_exists(reader: &Bound<'_, PyAny>, name: &str) -> bool {
848+
reader
849+
.call_method1("execute_sql", (format!("SELECT 1 FROM {name} LIMIT 0"),))
850+
.is_ok()
851+
}
852+
853+
/// Register DataFrames from a Python dict onto a Python reader object.
854+
/// Returns list of *newly created* table names for cleanup.
853855
fn register_data_on_reader(
854856
py: Python<'_>,
855857
reader: &Bound<'_, PyAny>,
@@ -858,10 +860,15 @@ fn register_data_on_reader(
858860
let mut names = Vec::new();
859861
for (key, value) in data.iter() {
860862
let name: String = key.extract()?;
863+
let existed = reader_table_exists(reader, &name);
861864
let df = py_to_polars(py, &value)?;
862865
let py_df = polars_to_py(py, &df)?;
863-
reader.call_method("register", (&name, py_df, true), None)?;
864-
names.push(name);
866+
let kwargs = PyDict::new(py);
867+
kwargs.set_item("replace", true)?;
868+
reader.call_method("register", (&name, py_df), Some(&kwargs))?;
869+
if !existed {
870+
names.push(name);
871+
}
865872
}
866873
Ok(names)
867874
}

ggsql-python/tests/test_ggsql.py

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -690,20 +690,20 @@ def test_execute_with_data_cleans_up(self):
690690
data={"temp": df},
691691
)
692692
# Table should be cleaned up — querying it should fail
693-
with pytest.raises((ggsql.ReaderError, ValueError)):
693+
with pytest.raises(ggsql.ReaderError):
694694
reader.execute_sql("SELECT * FROM temp")
695695

696696
def test_execute_with_data_cleans_up_on_error(self):
697697
"""DataFrames are unregistered even if execution fails."""
698698
reader = ggsql.DuckDBReader("duckdb://memory")
699699
df = pl.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]})
700-
with pytest.raises((ggsql.ParseError, ggsql.ValidationError, ValueError)):
700+
with pytest.raises(ggsql.ParseError):
701701
reader.execute(
702702
"SELECT * FROM temp VISUALISE DRAW not_a_geom",
703703
data={"temp": df},
704704
)
705705
# Table should still be cleaned up
706-
with pytest.raises((ggsql.ReaderError, ValueError)):
706+
with pytest.raises(ggsql.ReaderError):
707707
reader.execute_sql("SELECT * FROM temp")
708708

709709
def test_execute_without_data_still_works(self):
@@ -721,6 +721,24 @@ def test_execute_with_empty_data(self):
721721
)
722722
assert spec.metadata()["rows"] == 1
723723

724+
def test_execute_with_data_preserves_preexisting_table(self):
725+
"""data= does not unregister a table that existed before the call."""
726+
reader = ggsql.DuckDBReader("duckdb://memory")
727+
existing = pl.DataFrame({"x": [1, 2], "y": [10, 20]})
728+
reader.register("mytable", existing)
729+
730+
# Pass same name via data= — should replace temporarily, then NOT unregister
731+
override = pl.DataFrame({"x": [3, 4, 5], "y": [30, 40, 50]})
732+
spec = reader.execute(
733+
"SELECT * FROM mytable VISUALISE x, y DRAW point",
734+
data={"mytable": override},
735+
)
736+
assert spec.metadata()["rows"] == 3
737+
738+
# The pre-existing table should still be queryable (not unregistered)
739+
result = reader.execute_sql("SELECT * FROM mytable")
740+
assert result.shape[0] > 0
741+
724742
def test_module_execute_with_data(self):
725743
"""Module-level execute() also supports data= parameter."""
726744
reader = ggsql.DuckDBReader("duckdb://memory")

0 commit comments

Comments
 (0)