diff --git a/.changes/unreleased/Fixes-20250611-140823.yaml b/.changes/unreleased/Fixes-20250611-140823.yaml new file mode 100644 index 00000000..bfd1e287 --- /dev/null +++ b/.changes/unreleased/Fixes-20250611-140823.yaml @@ -0,0 +1,7 @@ +kind: Fixes +body: Snapshot staging table is recreated every time +time: 2025-06-11T14:08:23.526502764+02:00 +custom: + Author: adam-tokarski + Issue: "488" + PR: "489" diff --git a/dbt/include/trino/macros/materializations/snapshot.sql b/dbt/include/trino/macros/materializations/snapshot.sql index 433ce6ce..d063a166 100644 --- a/dbt/include/trino/macros/materializations/snapshot.sql +++ b/dbt/include/trino/macros/materializations/snapshot.sql @@ -48,3 +48,25 @@ {%- endfor %}) {% endmacro %} + +/* + Overridden macro which builds a staging table for snapshot. + + As there is no such thing as a temporary table in Trino, such staging table is removed + on cleanup (see trino__post_snapshot macro above). But it may happen that something goes + wrong (like in `merge` statement), dbt fails and this 'temporary' table still exists. + This macro takes care of it. + */ +{% macro build_snapshot_staging_table(strategy, sql, target_relation) %} + {% set temp_relation = make_temp_relation(target_relation) %} + + {{ drop_relation_if_exists(temp_relation) }} + + {% set select = snapshot_staging_table(strategy, sql, target_relation) %} + + {% call statement('build_snapshot_staging_relation') %} + {{ create_table_as(True, temp_relation, select) }} + {% endcall %} + + {% do return(temp_relation) %} +{% endmacro %} diff --git a/tests/functional/adapter/test_simple_snapshot.py b/tests/functional/adapter/test_simple_snapshot.py index 7a9af586..34065910 100644 --- a/tests/functional/adapter/test_simple_snapshot.py +++ b/tests/functional/adapter/test_simple_snapshot.py @@ -3,6 +3,7 @@ BaseSimpleSnapshot, BaseSnapshotCheck, ) +from dbt.tests.fixtures.project import TestProjInfo from dbt.tests.util import run_dbt iceberg_macro_override_sql = """ @@ -13,7 +14,7 @@ class TrinoSimpleSnapshot(BaseSimpleSnapshot): - def test_updates_are_captured_by_snapshot(self, project): + def test_updates_are_captured_by_snapshot(self, project: TestProjInfo) -> None: """ Update the last 5 records. Show that all ids are current, but the last 5 reflect updates. """ @@ -26,7 +27,7 @@ def test_updates_are_captured_by_snapshot(self, project): ids_with_closed_out_snapshot_records=range(16, 21), ) - def test_new_column_captured_by_snapshot(self, project): + def test_new_column_captured_by_snapshot(self, project: TestProjInfo) -> None: """ Add a column to `fact` and populate the last 10 records with a non-null value. Show that all ids are current, but the last 10 reflect updates and the first 10 don't @@ -47,8 +48,20 @@ def test_new_column_captured_by_snapshot(self, project): ) + def test_staging_table_is_recreated(self, project: TestProjInfo) -> None: + """ + It may happen that staging temporary table still exists, from previous run. + Snapshot should work properly even in such situation. + """ + + project.run_sql(f"create table {project.test_schema}.snapshot__dbt_tmp (id int)") + + run_dbt(["snapshot"]) + + + class TrinoSnapshotCheck(BaseSnapshotCheck): - def test_column_selection_is_reflected_in_snapshot(self, project): + def test_column_selection_is_reflected_in_snapshot(self, project: TestProjInfo) -> None: """ Update the first 10 records on a non-tracked column. Update the middle 10 records on a tracked column. (hence records 6-10 are updated on both)