Skip to content

Commit bbbbadf

Browse files
fix(#1429): cascade through FK chain for part_integrity="cascade"
Diagram.cascade(part_integrity="cascade") used to derive each Part's Master restriction by joining `master_ft.proj() & child_ft.proj()` on shared attribute names. This failed when the Part referenced its Master indirectly — through another Part with renamed FK columns (`.proj()` in the definition) or via a Part-of-Part chain that does not directly inherit Master's PK names. The intermediate Parts' restrictions were also skipped. Replace the proj-join shortcut with an upward walk of the actual FK graph from the Part to its Master, applying symmetric (upward) counterparts of the existing propagation rules at each edge. Key changes in src/datajoint/diagram.py: - New Diagram._apply_propagation_rule_upward — mirror of the existing forward propagation method. Same three rules (shared-PK copy, aliased reverse-rename, non-aliased projection) applied in the reverse direction (child → parent). - New Diagram._propagate_part_to_master — walks nx.shortest_path (Master → Part) and applies the upward rules along each real edge, transparently skipping the integer-named alias nodes that the graph inserts for aliased FKs. Restricts intermediate Parts too (the chain case from #1429 Case 2). Materializes the Master's restriction via to_arrays() so the subsequent forward cascade back down to Master's other Parts produces literal `WHERE ... IN (values)` clauses rather than self-referential subqueries (avoiding MySQL error 1093). - New Diagram._find_real_edge_props — looks up edge props for parent → child via the direct edge OR through an alias node. - _propagate_restrictions: seed-is-Part case. When the cascade starts at a Part (e.g. `Master.PartB.delete(part_integrity="cascade")`), the main loop's part_integrity block — nested inside the out_edges iteration — cannot fire because a leaf Part has no out-edges. Trigger the upward propagation explicitly for the seed before the main loop. - Diagram.cascade: expand nodes_to_show to include any node that the part_integrity propagation pulled in (the master and its descendants), so counts() and __iter__ report the full cascade subgraph. Tests in tests/integration/test_cascade_delete.py — three new mysql tests covering both #1429 cases plus an end-to-end delete. Full regression: 8 + 15 + 33 mysql tests pass. Slated for DataJoint 2.3.
1 parent 6077074 commit bbbbadf

2 files changed

Lines changed: 325 additions & 20 deletions

File tree

src/datajoint/diagram.py

Lines changed: 189 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -365,6 +365,17 @@ def cascade(cls, table_expr, part_integrity="enforce"):
365365
# Propagate downstream
366366
result._propagate_restrictions(node, mode="cascade", part_integrity=part_integrity)
367367

368+
# part_integrity="cascade" may pull in nodes that aren't descendants of
369+
# the seed (e.g. the master of a seed Part, plus the master's other
370+
# Parts). Expand nodes_to_show to include any restricted node and the
371+
# descendants of any newly-restricted ancestor. See #1429.
372+
restricted_nodes = set(result._cascade_restrictions)
373+
expanded = set(result.nodes_to_show) | restricted_nodes
374+
for n in restricted_nodes - result.nodes_to_show:
375+
expanded.update(nx.descendants(result, n))
376+
result.nodes_to_show = expanded & set(result.nodes())
377+
result._expanded_nodes = set(result.nodes_to_show)
378+
368379
# Trim graph to cascade subgraph: only restricted tables
369380
# (seed + descendants) plus alias nodes connecting them.
370381
keep = set(result._cascade_restrictions)
@@ -443,7 +454,6 @@ def _propagate_restrictions(self, start_node, mode, part_integrity="enforce"):
443454
propagation rules at each edge. Only processes descendants of
444455
start_node to avoid duplicate propagation when chaining.
445456
"""
446-
from .table import FreeTable
447457

448458
sorted_nodes = topo_sort(self)
449459
# Only propagate through descendants of start_node
@@ -453,6 +463,18 @@ def _propagate_restrictions(self, start_node, mode, part_integrity="enforce"):
453463

454464
restrictions = self._cascade_restrictions if mode == "cascade" else self._restrict_conditions
455465

466+
# Seed-is-Part case: when the seed itself is a Part and part_integrity="cascade",
467+
# the main loop's part_integrity block (which fires inside `out_edges`)
468+
# cannot trigger from the seed because a leaf Part has no out-edges.
469+
# Trigger the upward propagation explicitly for the seed. See #1429.
470+
if part_integrity == "cascade" and mode == "cascade":
471+
seed_master = extract_master(start_node)
472+
if seed_master and seed_master in self.nodes() and seed_master not in visited_masters:
473+
visited_masters.add(seed_master)
474+
if self._propagate_part_to_master(start_node, seed_master, mode, restrictions, propagated_edges):
475+
allowed_nodes.add(seed_master)
476+
allowed_nodes.update(nx.descendants(self, seed_master))
477+
456478
# Multiple passes to handle part_integrity="cascade" upward propagation.
457479
# When a part table triggers its master to join the cascade, the master's
458480
# other descendants need processing in a subsequent pass. The loop
@@ -512,29 +534,21 @@ def _propagate_restrictions(self, start_node, mode, part_integrity="enforce"):
512534
any_new = True
513535

514536
# part_integrity="cascade": propagate up from part to master
537+
# via the actual FK graph path, applying upward propagation
538+
# rules at each edge. Handles Part-of-Part chains and
539+
# renamed FKs (via .proj()), unlike the prior implementation
540+
# which assumed shared PK attribute names. See #1429.
515541
if part_integrity == "cascade" and mode == "cascade":
516542
master_name = extract_master(target)
517-
if (
518-
master_name
519-
and master_name in self.nodes()
520-
and master_name not in restrictions
521-
and master_name not in visited_masters
522-
):
543+
if master_name and master_name in self.nodes() and master_name not in visited_masters:
523544
visited_masters.add(master_name)
524-
child_ft = self._restricted_table(target)
525-
master_ft = FreeTable(self._connection, master_name)
526-
from .condition import make_condition
527-
528-
master_restr = make_condition(
529-
master_ft,
530-
(master_ft.proj() & child_ft.proj()).to_arrays(),
531-
master_ft.restriction_attributes,
545+
propagated = self._propagate_part_to_master(
546+
target, master_name, mode, restrictions, propagated_edges
532547
)
533-
restrictions[master_name] = [master_restr]
534-
self._restriction_attrs[master_name] = set()
535-
allowed_nodes.add(master_name)
536-
allowed_nodes.update(nx.descendants(self, master_name))
537-
any_new = True
548+
if propagated:
549+
allowed_nodes.add(master_name)
550+
allowed_nodes.update(nx.descendants(self, master_name))
551+
any_new = True
538552

539553
def _apply_propagation_rule(
540554
self,
@@ -590,6 +604,161 @@ def _apply_propagation_rule(
590604

591605
self._restriction_attrs.setdefault(child_node, set()).update(child_attrs)
592606

607+
def _apply_propagation_rule_upward(self, child_ft, child_attrs, parent_node, attr_map, aliased, mode, restrictions):
608+
"""
609+
Apply the symmetric (upward) propagation rule to a parent←child edge.
610+
611+
Inverts `_apply_propagation_rule`: derives a restriction on the parent
612+
from a restriction on the child, following the FK chain in reverse.
613+
Used by part_integrity="cascade" to propagate a Part's restriction up
614+
to its Master, transparently handling renamed FKs (via .proj()) and
615+
Part-of-Part chains. See #1429.
616+
617+
Edge metadata convention (matches `_apply_propagation_rule`):
618+
- `attr_map`: dict mapping child column → parent (referenced) column.
619+
- `aliased`: True iff any column was renamed across the FK.
620+
621+
Rules (symmetric to the forward rules in `_apply_propagation_rule`):
622+
623+
1. Non-aliased AND child restriction attrs ⊆ parent PK:
624+
Copy child restriction directly (attrs are shared by name).
625+
2. Aliased FK (attr_map renames columns):
626+
``child.proj(**{parent: child for child, parent in attr_map.items()})``
627+
— reverses the renaming so the result has parent's column names.
628+
3. Non-aliased AND child restriction attrs ⊄ parent PK:
629+
``child.proj()`` — project child to parent's PK columns.
630+
"""
631+
parent_pk = self.nodes[parent_node].get("primary_key", set())
632+
633+
if not aliased and child_attrs and child_attrs <= parent_pk:
634+
# Backward Rule 1: copy child restriction directly
635+
child_restr = restrictions.get(
636+
child_ft.full_table_name,
637+
[] if mode == "cascade" else AndList(),
638+
)
639+
if mode == "cascade":
640+
restrictions.setdefault(parent_node, []).extend(child_restr)
641+
else:
642+
restrictions.setdefault(parent_node, AndList()).extend(child_restr)
643+
parent_attrs = set(child_attrs)
644+
elif aliased:
645+
# Backward Rule 2: reverse rename
646+
parent_item = child_ft.proj(**{pk: fk for fk, pk in attr_map.items()})
647+
if mode == "cascade":
648+
restrictions.setdefault(parent_node, []).append(parent_item)
649+
else:
650+
restrictions.setdefault(parent_node, AndList()).append(parent_item)
651+
parent_attrs = set(attr_map.values()) # parent's PK column names
652+
else:
653+
# Backward Rule 3: project child to parent PK
654+
parent_item = child_ft.proj()
655+
if mode == "cascade":
656+
restrictions.setdefault(parent_node, []).append(parent_item)
657+
else:
658+
restrictions.setdefault(parent_node, AndList()).append(parent_item)
659+
parent_attrs = set(attr_map.values())
660+
661+
self._restriction_attrs.setdefault(parent_node, set()).update(parent_attrs)
662+
663+
def _propagate_part_to_master(self, part_node, master_name, mode, restrictions, propagated_edges):
664+
"""
665+
Walk the FK graph from `part_node` up to `master_name`, applying
666+
`_apply_propagation_rule_upward` at each real edge along the path.
667+
668+
Returns True if any propagation occurred. Handles Part-of-Part chains
669+
by walking the full path (intermediate Parts get restricted too) and
670+
renamed FKs via the upward rules.
671+
672+
Alias nodes (integer-named graph nodes inserted for aliased edges)
673+
are transparent — both half-edges carry the same `attr_map` props,
674+
so we read props from one and skip the alias node when walking.
675+
676+
After the walk, the master's restriction is **materialized** to a
677+
literal value tuple via ``to_arrays()``. Without materialization, a
678+
subsequent forward cascade from the master back down to its parts
679+
would produce a self-referential subquery (MySQL error 1093, since
680+
the master's restriction depends on the same Part being deleted).
681+
Materializing converts the restriction into a static value set, so
682+
the forward cascade generates ``WHERE ... IN (literal-list)`` rather
683+
than ``WHERE ... IN (SELECT ... FROM <part>)``.
684+
"""
685+
try:
686+
path = nx.shortest_path(self, master_name, part_node)
687+
except (nx.NetworkXNoPath, nx.NodeNotFound):
688+
return False
689+
690+
# Strip alias nodes; what remains is the sequence of real tables.
691+
real_path = [n for n in path if not (isinstance(n, str) and n.isdigit())]
692+
if len(real_path) < 2 or real_path[-1] != part_node or real_path[0] != master_name:
693+
return False
694+
695+
# Walk real_path in reverse (child → parent direction). For each
696+
# adjacent (parent, child) pair, look up the edge props — direct
697+
# edge if non-aliased, via alias node if aliased.
698+
any_propagated = False
699+
for i in range(len(real_path) - 1, 0, -1):
700+
child = real_path[i]
701+
parent = real_path[i - 1]
702+
edge_props = self._find_real_edge_props(parent, child)
703+
if edge_props is None:
704+
return any_propagated # Path broken (shouldn't happen if shortest_path succeeded)
705+
706+
attr_map = edge_props.get("attr_map", {})
707+
aliased = edge_props.get("aliased", False)
708+
child_ft = self._restricted_table(child)
709+
child_attrs = self._restriction_attrs.get(child, set())
710+
711+
self._apply_propagation_rule_upward(
712+
child_ft,
713+
child_attrs,
714+
parent,
715+
attr_map,
716+
aliased,
717+
mode,
718+
restrictions,
719+
)
720+
any_propagated = True
721+
722+
# Materialize the master's restriction so subsequent forward cascade
723+
# doesn't produce self-referential subqueries. Replace the master's
724+
# accumulated query restrictions with a literal value tuple.
725+
if any_propagated and master_name in restrictions:
726+
from .condition import make_condition
727+
from .table import FreeTable
728+
729+
master_ft = self._restricted_table(master_name)
730+
master_pk_values = master_ft.proj().to_arrays()
731+
if mode == "cascade":
732+
bare_master = FreeTable(self._connection, master_name)
733+
if len(master_pk_values) > 0:
734+
materialized = make_condition(
735+
bare_master,
736+
master_pk_values,
737+
bare_master.restriction_attributes,
738+
)
739+
restrictions[master_name] = [materialized]
740+
else:
741+
# No matching master rows — false restriction so master is
742+
# included with zero matches in counts/iter.
743+
restrictions[master_name] = [False]
744+
self._restriction_attrs.setdefault(master_name, set())
745+
746+
return any_propagated
747+
748+
def _find_real_edge_props(self, parent, child):
749+
"""
750+
Return edge props for parent → child, transparently traversing the
751+
integer-named alias node that the graph inserts for aliased FKs.
752+
Returns None if no such edge or alias-mediated edge exists.
753+
"""
754+
if self.has_edge(parent, child):
755+
return self.edges[parent, child]
756+
for _, mid, _ in self.out_edges(parent, data=True):
757+
if isinstance(mid, str) and mid.isdigit() and self.has_edge(mid, child):
758+
# Both half-edges carry the same attr_map / aliased props
759+
return self.edges[parent, mid]
760+
return None
761+
593762
def counts(self):
594763
"""
595764
Return affected row counts per table without modifying data.

tests/integration/test_cascade_delete.py

Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -292,3 +292,139 @@ class Child(dj.Manual):
292292
connection_by_backend.query(f"DROP DATABASE IF EXISTS {qi(name)}")
293293
except Exception:
294294
pass
295+
296+
297+
# =========================================================================
298+
# Issue #1429: cascade with part_integrity="cascade" must traverse the FK
299+
# chain through intermediate Parts (and renamed FKs), not assume that the
300+
# Part shares PK attribute names with its Master.
301+
# =========================================================================
302+
303+
304+
def test_cascade_part_of_part_no_master_reference(schema_by_backend):
305+
"""
306+
Case 2 from #1429: PartB references PartA directly (no -> Master).
307+
Restricting PartB with part_integrity="cascade" must restrict both
308+
PartA and Master (PartA via the direct FK, Master via the master-part
309+
FK chained through PartA).
310+
"""
311+
312+
@schema_by_backend
313+
class Master(dj.Manual):
314+
definition = """
315+
master_id : int32
316+
"""
317+
318+
class PartA(dj.Part):
319+
definition = """
320+
-> master
321+
part_a_id : int32
322+
"""
323+
324+
class PartB(dj.Part):
325+
definition = """
326+
-> Master.PartA
327+
part_b_id : int32
328+
"""
329+
330+
Master.insert([(1,), (2,)])
331+
Master.PartA.insert([(1, 10), (1, 11), (2, 20)])
332+
Master.PartB.insert([(1, 10, 100), (1, 10, 101), (1, 11, 110), (2, 20, 200)])
333+
334+
# Cascade preview: deleting one PartB row must propagate up to PartA and Master.
335+
counts = dj.Diagram.cascade(
336+
Master.PartB & {"master_id": 1, "part_a_id": 10, "part_b_id": 100},
337+
part_integrity="cascade",
338+
).counts()
339+
340+
# Master row (1,) is the originating Part's master — must appear with count 1
341+
assert counts.get(Master.full_table_name, 0) == 1, (
342+
f"Master restricted by 1 row; got {counts.get(Master.full_table_name)}. "
343+
"Indicates the Part→Master upward propagation did not reach the Master "
344+
"through the intermediate PartA."
345+
)
346+
# Master cascades back down to ALL of master_id=1's Parts
347+
assert counts.get(Master.PartA.full_table_name, 0) == 2 # rows 10, 11
348+
assert counts.get(Master.PartB.full_table_name, 0) == 3 # rows under master_id=1
349+
350+
351+
def test_cascade_part_of_part_renamed_fk(schema_by_backend):
352+
"""
353+
Case 1 from #1429: PartB references PartA via a renamed FK (`.proj()`).
354+
PartB has no attribute named `master_id` (renamed to `src_master`). The
355+
upward propagation must use the FK metadata, not assume shared attribute
356+
names.
357+
"""
358+
359+
@schema_by_backend
360+
class Master(dj.Manual):
361+
definition = """
362+
master_id : int32
363+
"""
364+
365+
class PartA(dj.Part):
366+
definition = """
367+
-> master
368+
part_a_id : int32
369+
"""
370+
371+
class PartB(dj.Part):
372+
definition = """
373+
-> Master.PartA.proj(src_master='master_id', src_part='part_a_id')
374+
part_b_id : int32
375+
"""
376+
377+
Master.insert([(1,), (2,)])
378+
Master.PartA.insert([(1, 10), (2, 20)])
379+
Master.PartB.insert([(1, 10, 100), (2, 20, 200)])
380+
381+
# PartB has columns: src_master, src_part, part_b_id — NOT master_id.
382+
counts = dj.Diagram.cascade(
383+
Master.PartB & {"src_master": 1, "src_part": 10, "part_b_id": 100},
384+
part_integrity="cascade",
385+
).counts()
386+
387+
assert counts.get(Master.full_table_name, 0) == 1, (
388+
f"Master restricted by 1 row; got {counts.get(Master.full_table_name)}. "
389+
"Renamed FK was not reversed when propagating up to Master."
390+
)
391+
assert counts.get(Master.PartA.full_table_name, 0) == 1
392+
assert counts.get(Master.PartB.full_table_name, 0) == 1
393+
394+
395+
def test_cascade_part_of_part_actual_delete(schema_by_backend):
396+
"""
397+
End-to-end: actually run delete() with part_integrity="cascade" through
398+
a Part-of-Part chain. Verifies the upward propagation produces SQL that
399+
executes (no MySQL 1093 self-reference; correct row removal).
400+
"""
401+
402+
@schema_by_backend
403+
class Master(dj.Manual):
404+
definition = """
405+
master_id : int32
406+
"""
407+
408+
class PartA(dj.Part):
409+
definition = """
410+
-> master
411+
part_a_id : int32
412+
"""
413+
414+
class PartB(dj.Part):
415+
definition = """
416+
-> Master.PartA
417+
part_b_id : int32
418+
"""
419+
420+
Master.insert([(1,), (2,)])
421+
Master.PartA.insert([(1, 10), (2, 20)])
422+
Master.PartB.insert([(1, 10, 100), (2, 20, 200)])
423+
424+
(Master.PartB & {"master_id": 1}).delete(part_integrity="cascade")
425+
426+
# master_id=1 chain is entirely gone; master_id=2 chain intact.
427+
assert len(Master()) == 1
428+
assert Master().fetch1("master_id") == 2
429+
assert len(Master.PartA()) == 1
430+
assert len(Master.PartB()) == 1

0 commit comments

Comments
 (0)