From 31237a15611595bbf8d7a65b12e2772294e21b12 Mon Sep 17 00:00:00 2001 From: Magnus Jensen Date: Thu, 9 Apr 2026 16:23:30 -0500 Subject: [PATCH 01/10] fix wrong op type check, resulting in stuck pending/remove profiles --- server/datastore/mysql/apple_mdm.go | 2 +- server/datastore/mysql/apple_mdm_ddm_test.go | 95 ++++++++++++++++++++ 2 files changed, 96 insertions(+), 1 deletion(-) diff --git a/server/datastore/mysql/apple_mdm.go b/server/datastore/mysql/apple_mdm.go index 8b654574d6e..d09ee45238e 100644 --- a/server/datastore/mysql/apple_mdm.go +++ b/server/datastore/mysql/apple_mdm.go @@ -6042,7 +6042,7 @@ ON DUPLICATE KEY UPDATE for _, c := range current { // Skip updates for 'remove' operations because it is possible that IT admin removed a profile and then re-added it. // Pending removes are cleaned up after we update status of installs. - if u, ok := updatesByToken[c.Token]; ok && u.OperationType != fleet.MDMOperationTypeRemove { + if u, ok := updatesByToken[c.Token]; ok && c.OperationType != fleet.MDMOperationTypeRemove { insertVals.WriteString("(?, ?, ?, ?, ?, ?, ?, UNHEX(?), ?),") args = append(args, hostUUID, c.DeclarationUUID, u.Status, u.OperationType, u.Detail, c.Identifier, c.Name, c.Token, c.SecretsUpdatedAt) diff --git a/server/datastore/mysql/apple_mdm_ddm_test.go b/server/datastore/mysql/apple_mdm_ddm_test.go index 0a412635fc5..61a13e1c971 100644 --- a/server/datastore/mysql/apple_mdm_ddm_test.go +++ b/server/datastore/mysql/apple_mdm_ddm_test.go @@ -21,6 +21,7 @@ func TestMDMDDMApple(t *testing.T) { fn func(t *testing.T, ds *Datastore) }{ {"TestMDMAppleBatchSetHostDeclarationState", testMDMAppleBatchSetHostDeclarationState}, + {"StoreDDMStatusReportSkipsRemoveRows", testStoreDDMStatusReportSkipsRemoveRows}, } for _, c := range cases { @@ -276,3 +277,97 @@ func testMDMAppleBatchSetHostDeclarationState(t *testing.T, ds *Datastore) { } }) } + +func testStoreDDMStatusReportSkipsRemoveRows(t *testing.T, ds *Datastore) { + ctx := t.Context() + + // Create a test host + host, err := ds.NewHost(ctx, &fleet.Host{ + Hostname: "test-host-ddm-status", + UUID: "test-host-uuid-ddm-status", + HardwareSerial: "ABC123-DDM-STATUS", + PrimaryIP: "192.168.1.50", + PrimaryMac: "00:00:00:00:00:50", + OsqueryHostID: ptr.String("test-host-uuid-ddm-status"), + NodeKey: ptr.String("test-host-uuid-ddm-status"), + DetailUpdatedAt: time.Now(), + Platform: "darwin", + }) + require.NoError(t, err) + + setupMDMDeviceAndEnrollment(t, ds, ctx, host.UUID, host.HardwareSerial) + + // Insert two rows into host_mdm_apple_declarations: + // Row A: a remove-operation row (simulating a declaration pending removal) + // Row B: a normal install-operation row + insertHostDeclaration(t, ds, ctx, host.UUID, "decl-remove", "shared-token", "pending", "remove", "com.example.remove") + insertHostDeclaration(t, ds, ctx, host.UUID, "decl-install", "install-token", "pending", "install", "com.example.install") + + // Query back the HEX tokens from the DB (MDMAppleStoreDDMStatusReport reads tokens as HEX) + type tokenRow struct { + DeclarationUUID string `db:"declaration_uuid"` + Token string `db:"token"` + } + var tokens []tokenRow + ExecAdhocSQL(t, ds, func(q sqlx.ExtContext) error { + return sqlx.SelectContext(ctx, q, &tokens, ` + SELECT declaration_uuid, HEX(token) as token + FROM host_mdm_apple_declarations + WHERE host_uuid = ? + ORDER BY declaration_uuid`, host.UUID) + }) + require.Len(t, tokens, 2) + + tokenByDecl := make(map[string]string, 2) + for _, tok := range tokens { + tokenByDecl[tok.DeclarationUUID] = tok.Token + } + tokenA := tokenByDecl["decl-remove"] + tokenB := tokenByDecl["decl-install"] + require.NotEmpty(t, tokenA) + require.NotEmpty(t, tokenB) + + // Build the updates slice as if the device is reporting status. + // The device always reports operation_type='install' for all declarations. + updates := []*fleet.MDMAppleHostDeclaration{ + { + Token: tokenA, + Status: new(fleet.MDMDeliveryStatus), + OperationType: fleet.MDMOperationTypeInstall, + }, + { + Token: tokenB, + Status: new(fleet.MDMDeliveryStatus), + OperationType: fleet.MDMOperationTypeInstall, + }, + } + *updates[0].Status = fleet.MDMDeliveryVerified + *updates[1].Status = fleet.MDMDeliveryVerified + + // Call the method under test + err = ds.MDMAppleStoreDDMStatusReport(ctx, host.UUID, updates) + require.NoError(t, err) + + // Assert the end state. + type resultRow struct { + DeclarationUUID string `db:"declaration_uuid"` + Token string `db:"token"` + Status string `db:"status"` + OperationType string `db:"operation_type"` + } + var remaining []resultRow + ExecAdhocSQL(t, ds, func(q sqlx.ExtContext) error { + return sqlx.SelectContext(ctx, q, &remaining, ` + SELECT declaration_uuid, HEX(token) as token, status, operation_type + FROM host_mdm_apple_declarations + WHERE host_uuid = ? + ORDER BY declaration_uuid`, host.UUID) + }) + + // With the fix: only the install row should remain (remove row was skipped then deleted) + // With the bug: both rows remain, and the remove row was flipped to install/verified + require.Len(t, remaining, 1, "expected remove row to not survive as install/verified — this is the token collision bug") + assert.Equal(t, "decl-install", remaining[0].DeclarationUUID) + assert.Equal(t, "install", remaining[0].OperationType) + assert.Equal(t, "verified", remaining[0].Status) +} From 022595fa3f7e1a384035b0e4bb13942a450d4d2f Mon Sep 17 00:00:00 2001 From: Magnus Jensen Date: Thu, 9 Apr 2026 17:17:33 -0500 Subject: [PATCH 02/10] Ensure we batch process remove declarations first so we can cleanup --- server/datastore/mysql/apple_mdm.go | 30 ++--- server/datastore/mysql/apple_mdm_ddm_test.go | 135 +++++++++++++++++++ 2 files changed, 150 insertions(+), 15 deletions(-) diff --git a/server/datastore/mysql/apple_mdm.go b/server/datastore/mysql/apple_mdm.go index d09ee45238e..b8d4842dbce 100644 --- a/server/datastore/mysql/apple_mdm.go +++ b/server/datastore/mysql/apple_mdm.go @@ -5965,19 +5965,6 @@ func mdmAppleGetHostsWithChangedDeclarationsDB(ctx context.Context, tx sqlx.ExtC entitiesToInstallQuery, entitiesToInstallArgs := generateEntitiesToInstallQuery("declaration", "") entitiesToRemoveQuery, entitiesToRemoveArgs := generateEntitiesToRemoveQuery("declaration") stmt := fmt.Sprintf(` - ( - SELECT - ds.host_uuid, - 'install' as operation_type, - ds.token, - ds.secrets_updated_at, - ds.declaration_uuid, - ds.declaration_identifier, - ds.declaration_name - FROM - %s - ) - UNION ALL ( SELECT hmae.host_uuid, @@ -5990,13 +5977,26 @@ func mdmAppleGetHostsWithChangedDeclarationsDB(ctx context.Context, tx sqlx.ExtC FROM %s ) + UNION ALL + ( + SELECT + ds.host_uuid, + 'install' as operation_type, + ds.token, + ds.secrets_updated_at, + ds.declaration_uuid, + ds.declaration_identifier, + ds.declaration_name + FROM + %s + ) `, - entitiesToInstallQuery, entitiesToRemoveQuery, + entitiesToInstallQuery, ) var decls []*fleet.MDMAppleHostDeclaration - args := slices.Concat(entitiesToInstallArgs, entitiesToRemoveArgs) + args := slices.Concat(entitiesToRemoveArgs, entitiesToInstallArgs) if err := sqlx.SelectContext(ctx, tx, &decls, stmt, args...); err != nil { return nil, ctxerr.Wrap(ctx, err, "running sql statement") } diff --git a/server/datastore/mysql/apple_mdm_ddm_test.go b/server/datastore/mysql/apple_mdm_ddm_test.go index 61a13e1c971..c611f2de415 100644 --- a/server/datastore/mysql/apple_mdm_ddm_test.go +++ b/server/datastore/mysql/apple_mdm_ddm_test.go @@ -22,6 +22,7 @@ func TestMDMDDMApple(t *testing.T) { }{ {"TestMDMAppleBatchSetHostDeclarationState", testMDMAppleBatchSetHostDeclarationState}, {"StoreDDMStatusReportSkipsRemoveRows", testStoreDDMStatusReportSkipsRemoveRows}, + {"CleanUpDuplicateRemoveInstallAcrossBatches", testCleanUpDuplicateRemoveInstallAcrossBatches}, } for _, c := range cases { @@ -371,3 +372,137 @@ func testStoreDDMStatusReportSkipsRemoveRows(t *testing.T, ds *Datastore) { assert.Equal(t, "install", remaining[0].OperationType) assert.Equal(t, "verified", remaining[0].Status) } + +func testCleanUpDuplicateRemoveInstallAcrossBatches(t *testing.T, ds *Datastore) { + ctx := t.Context() + + // Create 2 declarations with the same raw_json (so they produce the same token). + // D1 simulates the "old" declaration that was already installed on hosts. + // D2 simulates the "new" declaration (same content, different UUID — e.g. after a name change caused delete+reinsert). + declJSON := []byte(`{"Type":"com.apple.configuration.test","Identifier":"com.example.cleanup"}`) + + d1, err := ds.NewMDMAppleDeclaration(ctx, &fleet.MDMAppleDeclaration{ + DeclarationUUID: "decl-old", + Name: "Old Declaration", + Identifier: "com.example.cleanup", + RawJSON: declJSON, + }) + require.NoError(t, err) + + // D2 has different name but same content/identifier — same token + d2, err := ds.NewMDMAppleDeclaration(ctx, &fleet.MDMAppleDeclaration{ + DeclarationUUID: "decl-new", + Name: "New Declaration", + Identifier: "com.example.cleanup.new", + RawJSON: declJSON, + }) + require.NoError(t, err) + + // Query the token for D1 from mdm_apple_declarations (generated column) + var d1Token string + ExecAdhocSQL(t, ds, func(q sqlx.ExtContext) error { + return sqlx.GetContext(ctx, q, &d1Token, + "SELECT HEX(token) FROM mdm_apple_declarations WHERE declaration_uuid = ?", d1.DeclarationUUID) + }) + var d2Token string + ExecAdhocSQL(t, ds, func(q sqlx.ExtContext) error { + return sqlx.GetContext(ctx, q, &d2Token, + "SELECT HEX(token) FROM mdm_apple_declarations WHERE declaration_uuid = ?", d2.DeclarationUUID) + }) + require.Equal(t, d1Token, d2Token, "declarations with same raw_json should have the same token") + + // Create 3 hosts, all enrolled + hosts := make([]*fleet.Host, 3) + for i := 0; i < 3; i++ { + hostUUID := fmt.Sprintf("cleanup-host-%d", i) + hosts[i], err = ds.NewHost(ctx, &fleet.Host{ + Hostname: fmt.Sprintf("cleanup-host-%d", i), + UUID: hostUUID, + HardwareSerial: fmt.Sprintf("CLEANUP-%d", i), + PrimaryIP: fmt.Sprintf("192.168.10.%d", i+1), + PrimaryMac: fmt.Sprintf("00:00:00:00:10:%02d", i+1), + OsqueryHostID: ptr.String(hostUUID), + NodeKey: ptr.String(hostUUID), + DetailUpdatedAt: time.Now(), + Platform: "darwin", + }) + require.NoError(t, err) + setupMDMDeviceAndEnrollment(t, ds, ctx, hostUUID, hosts[i].HardwareSerial) + } + + // For each host, insert a host_declaration row for D1 with status=verified, operation_type=install. + // This simulates D1 being currently installed on each host. + // NOTE: We use UNHEX(?) with the hex token directly (not insertHostDeclaration which does + // UNHEX(MD5(?))) because we need the token to match the generated column in mdm_apple_declarations exactly. + for _, h := range hosts { + ExecAdhocSQL(t, ds, func(q sqlx.ExtContext) error { + _, err := q.ExecContext(ctx, ` + INSERT INTO host_mdm_apple_declarations + (host_uuid, declaration_uuid, status, operation_type, token, declaration_identifier) + VALUES (?, ?, 'verified', 'install', UNHEX(?), ?)`, + h.UUID, d1.DeclarationUUID, d1Token, d1.Identifier) + return err + }) + } + + // Now delete D1 from mdm_apple_declarations (simulating IT admin removing the old declaration). + // The host_mdm_apple_declarations rows for D1 remain — the reconciler will mark them for removal. + ExecAdhocSQL(t, ds, func(q sqlx.ExtContext) error { + _, err := q.ExecContext(ctx, "DELETE FROM mdm_apple_declarations WHERE declaration_uuid = ?", d1.DeclarationUUID) + return err + }) + + // Force a small batch size so installs and removes end up in different batches. + // The UNION ALL query returns installs first, then removes. With 3 hosts: + // - 3 install rows (D2 for each host) + 3 remove rows (D1 for each host) = 6 rows + // - batch size 2 → batch 1: 2 installs, batch 2: 1 install + 1 remove, batch 3: 2 removes + // When batch 1 runs cleanUpDuplicateRemoveInstall, the removes haven't been upserted to + // status='pending' yet — they still have status='verified' — so the cleanup finds no match. + ds.testUpsertMDMDesiredProfilesBatchSize = 2 + t.Cleanup(func() { ds.testUpsertMDMDesiredProfilesBatchSize = 0 }) + + // Run the reconciler + _, err = ds.MDMAppleBatchSetHostDeclarationState(ctx) + require.NoError(t, err) + + // Assert: for each host, the cleanup should have: + // 1. Deleted the D1 remove row (same token as D2 install — duplicate remove/install) + // 2. Marked D2 install as verified with resync=1 + // With the bug: the D1 remove row survives because cleanup ran per-batch and missed + // cross-batch matches. Both D1 (remove/pending) and D2 (install/pending) exist. + type declRow struct { + DeclarationUUID string `db:"declaration_uuid"` + OperationType string `db:"operation_type"` + Status string `db:"status"` + Resync bool `db:"resync"` + Token *string `db:"token"` + } + for _, h := range hosts { + var rows []declRow + ExecAdhocSQL(t, ds, func(q sqlx.ExtContext) error { + return sqlx.SelectContext(ctx, q, &rows, ` + SELECT declaration_uuid, operation_type, COALESCE(status, '') as status, resync, HEX(token) as token + FROM host_mdm_apple_declarations + WHERE host_uuid = ? + ORDER BY declaration_uuid`, h.UUID) + }) + + // Build a map by declaration UUID for easier assertions + rowByDecl := make(map[string]declRow, len(rows)) + for _, r := range rows { + rowByDecl[r.DeclarationUUID] = r + } + + // D1 remove row should NOT exist (cleaned up because same token as D2 install) + _, d1Exists := rowByDecl[d1.DeclarationUUID] + assert.False(t, d1Exists, "host %s: D1 remove row should have been cleaned up (same token as D2 install)", h.UUID) + + // D2 install row should exist as verified with resync=1 + d2Row, d2Exists := rowByDecl[d2.DeclarationUUID] + if assert.True(t, d2Exists, "host %s: D2 install row should exist", h.UUID) { + assert.Equal(t, "install", d2Row.OperationType, "host %s: D2 should be install", h.UUID) + assert.Equal(t, "verified", d2Row.Status, "host %s: D2 should be marked verified by cleanup", h.UUID) + assert.True(t, d2Row.Resync, "host %s: D2 should have resync=1", h.UUID) + } + } +} From ca02a507abad64b75a11f8b9498309413fcecf32 Mon Sep 17 00:00:00 2001 From: Magnus Jensen Date: Thu, 9 Apr 2026 17:32:48 -0500 Subject: [PATCH 03/10] add cleanup function to reconciler to self-heal stuck pending/remove with verified install --- server/datastore/mysql/apple_mdm.go | 28 +++++- server/datastore/mysql/apple_mdm_ddm_test.go | 95 ++++++++++++++++++++ 2 files changed, 121 insertions(+), 2 deletions(-) diff --git a/server/datastore/mysql/apple_mdm.go b/server/datastore/mysql/apple_mdm.go index b8d4842dbce..1ee2b43784d 100644 --- a/server/datastore/mysql/apple_mdm.go +++ b/server/datastore/mysql/apple_mdm.go @@ -5725,7 +5725,14 @@ func (ds *Datastore) MDMAppleBatchSetHostDeclarationState(ctx context.Context) ( err := ds.withRetryTxx(ctx, func(tx sqlx.ExtContext) error { var err error uuids, _, err = mdmAppleBatchSetHostDeclarationStateDB(ctx, tx, batchSize, &fleet.MDMDeliveryPending) - return err + if err != nil { + return err + } + + // Safety net: always clean up orphaned remove/pending rows, even when + // there are no changed declarations. This handles stuck rows from + // previous runs that can't self-heal via device status reports. + return cleanUpOrphanedPendingRemoves(ctx, tx) }) return uuids, ctxerr.Wrap(ctx, err, "upserting host declaration state") @@ -5878,6 +5885,23 @@ func mdmAppleBatchSetPendingHostDeclarationsDB( return updatedDB, ctxerr.Wrap(ctx, err, "inserting changed host declaration state") } +// cleanUpOrphanedPendingRemoves deletes remove/pending rows from +// host_mdm_apple_declarations where a matching install row already exists with +// the same host, token, and identifier in a verified or verifying state. This +// means the declaration content is already on the device — the remove is stale. +func cleanUpOrphanedPendingRemoves(ctx context.Context, tx sqlx.ExtContext) error { + _, err := tx.ExecContext(ctx, ` + DELETE r FROM host_mdm_apple_declarations r + INNER JOIN host_mdm_apple_declarations i + ON r.host_uuid = i.host_uuid + AND r.token = i.token + AND r.declaration_identifier = i.declaration_identifier + WHERE r.operation_type = 'remove' AND r.status = 'pending' + AND i.operation_type = 'install' + AND i.status IN ('verified', 'verifying')`) + return ctxerr.Wrap(ctx, err, "deleting orphaned remove/pending rows") +} + func cleanUpDuplicateRemoveInstall(ctx context.Context, tx sqlx.ExtContext, profilesToInsert map[string]*fleet.MDMAppleHostDeclaration) error { // If we are inserting a declaration that has a matching pending "remove" declaration (same hash), // we will mark the insert as verified. Why? Because there is nothing for the host to do if the same @@ -5966,7 +5990,7 @@ func mdmAppleGetHostsWithChangedDeclarationsDB(ctx context.Context, tx sqlx.ExtC entitiesToRemoveQuery, entitiesToRemoveArgs := generateEntitiesToRemoveQuery("declaration") stmt := fmt.Sprintf(` ( - SELECT + SELECT hmae.host_uuid, 'remove' as operation_type, hmae.token, diff --git a/server/datastore/mysql/apple_mdm_ddm_test.go b/server/datastore/mysql/apple_mdm_ddm_test.go index c611f2de415..1e64ca3edfb 100644 --- a/server/datastore/mysql/apple_mdm_ddm_test.go +++ b/server/datastore/mysql/apple_mdm_ddm_test.go @@ -23,6 +23,7 @@ func TestMDMDDMApple(t *testing.T) { {"TestMDMAppleBatchSetHostDeclarationState", testMDMAppleBatchSetHostDeclarationState}, {"StoreDDMStatusReportSkipsRemoveRows", testStoreDDMStatusReportSkipsRemoveRows}, {"CleanUpDuplicateRemoveInstallAcrossBatches", testCleanUpDuplicateRemoveInstallAcrossBatches}, + {"CleanUpOrphanedPendingRemoves", testCleanUpOrphanedPendingRemoves}, } for _, c := range cases { @@ -506,3 +507,97 @@ func testCleanUpDuplicateRemoveInstallAcrossBatches(t *testing.T, ds *Datastore) } } } + +func testCleanUpOrphanedPendingRemoves(t *testing.T, ds *Datastore) { + ctx := t.Context() + + // This test simulates the "already stuck" scenario: a host has an orphaned + // remove/pending row and a matching install/verified row with the same token + // and identifier. No new declarations are changed, so the reconciler's + // changedDeclarations is empty. The cleanUpOrphanedPendingRemoves safety net + // in MDMAppleBatchSetHostDeclarationState should still clean it up. + + host, err := ds.NewHost(ctx, &fleet.Host{ + Hostname: "test-host-orphan", + UUID: "test-host-uuid-orphan", + HardwareSerial: "ORPHAN-001", + PrimaryIP: "192.168.20.1", + PrimaryMac: "00:00:00:00:20:01", + OsqueryHostID: ptr.String("test-host-uuid-orphan"), + NodeKey: ptr.String("test-host-uuid-orphan"), + DetailUpdatedAt: time.Now(), + Platform: "darwin", + }) + require.NoError(t, err) + setupMDMDeviceAndEnrollment(t, ds, ctx, host.UUID, host.HardwareSerial) + + // Create a declaration so we have a valid token to work with. + decl, err := ds.NewMDMAppleDeclaration(ctx, &fleet.MDMAppleDeclaration{ + DeclarationUUID: "decl-orphan-test", + Name: "Orphan Test Declaration", + Identifier: "com.example.orphan", + RawJSON: []byte(`{"Type":"com.apple.configuration.test","Identifier":"com.example.orphan"}`), + }) + require.NoError(t, err) + + // Get the token from mdm_apple_declarations + var hexToken string + ExecAdhocSQL(t, ds, func(q sqlx.ExtContext) error { + return sqlx.GetContext(ctx, q, &hexToken, + "SELECT HEX(token) FROM mdm_apple_declarations WHERE declaration_uuid = ?", decl.DeclarationUUID) + }) + + // Simulate the stuck state: insert both an install/verified row (new UUID) + // and a remove/pending row (old UUID) with the same token and identifier. + // Use UNHEX(?) directly to get the exact same binary token. + ExecAdhocSQL(t, ds, func(q sqlx.ExtContext) error { + _, err := q.ExecContext(ctx, ` + INSERT INTO host_mdm_apple_declarations + (host_uuid, declaration_uuid, status, operation_type, token, declaration_identifier) + VALUES (?, ?, 'verified', 'install', UNHEX(?), ?)`, + host.UUID, decl.DeclarationUUID, hexToken, decl.Identifier) + return err + }) + ExecAdhocSQL(t, ds, func(q sqlx.ExtContext) error { + _, err := q.ExecContext(ctx, ` + INSERT INTO host_mdm_apple_declarations + (host_uuid, declaration_uuid, status, operation_type, token, declaration_identifier) + VALUES (?, ?, 'pending', 'remove', UNHEX(?), ?)`, + host.UUID, "decl-orphan-old-uuid", hexToken, decl.Identifier) + return err + }) + + // Verify both rows exist before the reconciler runs. + var countBefore int + ExecAdhocSQL(t, ds, func(q sqlx.ExtContext) error { + return sqlx.GetContext(ctx, q, &countBefore, ` + SELECT COUNT(*) FROM host_mdm_apple_declarations WHERE host_uuid = ?`, host.UUID) + }) + require.Equal(t, 2, countBefore) + + // Run the reconciler. There are no changed declarations (the install row + // matches the desired state, and the remove row is excluded by the + // reconciler's query filter). The safety net should still clean up. + _, err = ds.MDMAppleBatchSetHostDeclarationState(ctx) + require.NoError(t, err) + + // Assert: the orphaned remove/pending row should be deleted. + type resultRow struct { + DeclarationUUID string `db:"declaration_uuid"` + OperationType string `db:"operation_type"` + Status string `db:"status"` + } + var remaining []resultRow + ExecAdhocSQL(t, ds, func(q sqlx.ExtContext) error { + return sqlx.SelectContext(ctx, q, &remaining, ` + SELECT declaration_uuid, operation_type, status + FROM host_mdm_apple_declarations + WHERE host_uuid = ? + ORDER BY declaration_uuid`, host.UUID) + }) + + require.Len(t, remaining, 1, "orphaned remove/pending row should have been cleaned up") + assert.Equal(t, decl.DeclarationUUID, remaining[0].DeclarationUUID) + assert.Equal(t, "install", remaining[0].OperationType) + assert.Equal(t, "verified", remaining[0].Status) +} From 7d62c981a8abde56613c0059e4d396ad2b8baea4 Mon Sep 17 00:00:00 2001 From: Magnus Jensen Date: Thu, 9 Apr 2026 17:35:27 -0500 Subject: [PATCH 04/10] add changelog --- changes/40332-fix-ddm-pending-issues | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 changes/40332-fix-ddm-pending-issues diff --git a/changes/40332-fix-ddm-pending-issues b/changes/40332-fix-ddm-pending-issues new file mode 100644 index 00000000000..eb214be9252 --- /dev/null +++ b/changes/40332-fix-ddm-pending-issues @@ -0,0 +1,3 @@ +- Fixed an issue where the DDM reconciler would not self-heal for stuck remove/pending profiles due to resend with update. +- Fixed an issue where a host DDM cleanup function was not executed for stale remove/pending profiles that wasn't reported by the device. +- Fixed an issue where batch processing many DDM profile changes would result in stuck remove/pending profiles. \ No newline at end of file From 09ebc4fa502b6480794d09317ecd243b2bb54f92 Mon Sep 17 00:00:00 2001 From: Magnus Jensen Date: Thu, 9 Apr 2026 17:37:03 -0500 Subject: [PATCH 05/10] add comment --- server/datastore/mysql/apple_mdm.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/server/datastore/mysql/apple_mdm.go b/server/datastore/mysql/apple_mdm.go index 1ee2b43784d..8de4ed420e4 100644 --- a/server/datastore/mysql/apple_mdm.go +++ b/server/datastore/mysql/apple_mdm.go @@ -6015,6 +6015,9 @@ func mdmAppleGetHostsWithChangedDeclarationsDB(ctx context.Context, tx sqlx.ExtC %s ) `, + // We specifically want Removals first, since we later batch process + // and rely on removals being updated so the matching install entry + // can find it and clean it up avoiding leaving stale pending removals in the database. entitiesToRemoveQuery, entitiesToInstallQuery, ) From aeffad7f7138a7221768d7cc687a7588a6b9fa72 Mon Sep 17 00:00:00 2001 From: Magnus Jensen Date: Thu, 9 Apr 2026 17:39:33 -0500 Subject: [PATCH 06/10] fix changelog file name --- ...{40332-fix-ddm-pending-issues => 40322-fix-ddm-pending-issues} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename changes/{40332-fix-ddm-pending-issues => 40322-fix-ddm-pending-issues} (100%) diff --git a/changes/40332-fix-ddm-pending-issues b/changes/40322-fix-ddm-pending-issues similarity index 100% rename from changes/40332-fix-ddm-pending-issues rename to changes/40322-fix-ddm-pending-issues From 43447eb40809838f4628c94bd883c41c1ffb781c Mon Sep 17 00:00:00 2001 From: Magnus Jensen Date: Thu, 9 Apr 2026 17:46:41 -0500 Subject: [PATCH 07/10] Apply suggestions from code review Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- server/datastore/mysql/apple_mdm_ddm_test.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/server/datastore/mysql/apple_mdm_ddm_test.go b/server/datastore/mysql/apple_mdm_ddm_test.go index 1e64ca3edfb..28bf112b0dc 100644 --- a/server/datastore/mysql/apple_mdm_ddm_test.go +++ b/server/datastore/mysql/apple_mdm_ddm_test.go @@ -394,7 +394,7 @@ func testCleanUpDuplicateRemoveInstallAcrossBatches(t *testing.T, ds *Datastore) d2, err := ds.NewMDMAppleDeclaration(ctx, &fleet.MDMAppleDeclaration{ DeclarationUUID: "decl-new", Name: "New Declaration", - Identifier: "com.example.cleanup.new", + Identifier: "com.example.cleanup", RawJSON: declJSON, }) require.NoError(t, err) @@ -454,10 +454,10 @@ func testCleanUpDuplicateRemoveInstallAcrossBatches(t *testing.T, ds *Datastore) }) // Force a small batch size so installs and removes end up in different batches. - // The UNION ALL query returns installs first, then removes. With 3 hosts: - // - 3 install rows (D2 for each host) + 3 remove rows (D1 for each host) = 6 rows - // - batch size 2 → batch 1: 2 installs, batch 2: 1 install + 1 remove, batch 3: 2 removes - // When batch 1 runs cleanUpDuplicateRemoveInstall, the removes haven't been upserted to + // The UNION ALL query returns removes first, then installs. With 3 hosts: + // - 3 remove rows (D1 for each host) + 3 install rows (D2 for each host) = 6 rows + // - batch size 2 → batch 1: 2 removes, batch 2: 1 remove + 1 install, batch 3: 2 installs + // When batch 1 runs cleanUpDuplicateRemoveInstall, the matching installs haven't been upserted to // status='pending' yet — they still have status='verified' — so the cleanup finds no match. ds.testUpsertMDMDesiredProfilesBatchSize = 2 t.Cleanup(func() { ds.testUpsertMDMDesiredProfilesBatchSize = 0 }) From 22eaabfbcbd78444b64ce39df4270e67a9def265 Mon Sep 17 00:00:00 2001 From: Magnus Jensen Date: Thu, 9 Apr 2026 17:47:03 -0500 Subject: [PATCH 08/10] Update changes/40322-fix-ddm-pending-issues Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- changes/40322-fix-ddm-pending-issues | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/changes/40322-fix-ddm-pending-issues b/changes/40322-fix-ddm-pending-issues index eb214be9252..fa5a6e6a1af 100644 --- a/changes/40322-fix-ddm-pending-issues +++ b/changes/40322-fix-ddm-pending-issues @@ -1,3 +1,3 @@ - Fixed an issue where the DDM reconciler would not self-heal for stuck remove/pending profiles due to resend with update. -- Fixed an issue where a host DDM cleanup function was not executed for stale remove/pending profiles that wasn't reported by the device. +- Fixed an issue where a host DDM cleanup function was not executed for stale remove/pending profiles that weren't reported by the device. - Fixed an issue where batch processing many DDM profile changes would result in stuck remove/pending profiles. \ No newline at end of file From 57382d365c6cbae0ab2dbbe4a3a2597542cc81ad Mon Sep 17 00:00:00 2001 From: Magnus Jensen Date: Thu, 9 Apr 2026 17:52:11 -0500 Subject: [PATCH 09/10] gate cleanup in reconciler with exists check to avoid expensive delete locks --- server/datastore/mysql/apple_mdm.go | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/server/datastore/mysql/apple_mdm.go b/server/datastore/mysql/apple_mdm.go index 8de4ed420e4..501f0f1c141 100644 --- a/server/datastore/mysql/apple_mdm.go +++ b/server/datastore/mysql/apple_mdm.go @@ -5890,7 +5890,26 @@ func mdmAppleBatchSetPendingHostDeclarationsDB( // the same host, token, and identifier in a verified or verifying state. This // means the declaration content is already on the device — the remove is stale. func cleanUpOrphanedPendingRemoves(ctx context.Context, tx sqlx.ExtContext) error { - _, err := tx.ExecContext(ctx, ` + var found bool + err := sqlx.GetContext(ctx, tx, &found, ` + SELECT EXISTS ( + SELECT 1 FROM host_mdm_apple_declarations r + INNER JOIN host_mdm_apple_declarations i + ON r.host_uuid = i.host_uuid + AND r.token = i.token + AND r.declaration_identifier = i.declaration_identifier + WHERE r.operation_type = 'remove' AND r.status = 'pending' + AND i.operation_type = 'install' + AND i.status IN ('verified', 'verifying') + LIMIT 1)`) + if err != nil { + return ctxerr.Wrap(ctx, err, "checking for orphaned remove/pending rows") + } + if !found { + return nil + } + + _, err = tx.ExecContext(ctx, ` DELETE r FROM host_mdm_apple_declarations r INNER JOIN host_mdm_apple_declarations i ON r.host_uuid = i.host_uuid From 3aa45c955bc503de45532df22382d1bf3c3a2389 Mon Sep 17 00:00:00 2001 From: Magnus Jensen Date: Thu, 9 Apr 2026 18:57:49 -0500 Subject: [PATCH 10/10] fix test order --- server/datastore/mysql/apple_mdm_ddm_test.go | 39 ++++++++++---------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/server/datastore/mysql/apple_mdm_ddm_test.go b/server/datastore/mysql/apple_mdm_ddm_test.go index 28bf112b0dc..b128e4d6cc1 100644 --- a/server/datastore/mysql/apple_mdm_ddm_test.go +++ b/server/datastore/mysql/apple_mdm_ddm_test.go @@ -377,9 +377,9 @@ func testStoreDDMStatusReportSkipsRemoveRows(t *testing.T, ds *Datastore) { func testCleanUpDuplicateRemoveInstallAcrossBatches(t *testing.T, ds *Datastore) { ctx := t.Context() - // Create 2 declarations with the same raw_json (so they produce the same token). // D1 simulates the "old" declaration that was already installed on hosts. - // D2 simulates the "new" declaration (same content, different UUID — e.g. after a name change caused delete+reinsert). + // D2 simulates the "new" declaration (same content/identifier, different UUID — e.g. after a name change caused delete+reinsert). + // They share the same identifier, so D1 must be deleted before D2 is created (unique constraint on team_id+identifier). declJSON := []byte(`{"Type":"com.apple.configuration.test","Identifier":"com.example.cleanup"}`) d1, err := ds.NewMDMAppleDeclaration(ctx, &fleet.MDMAppleDeclaration{ @@ -390,31 +390,16 @@ func testCleanUpDuplicateRemoveInstallAcrossBatches(t *testing.T, ds *Datastore) }) require.NoError(t, err) - // D2 has different name but same content/identifier — same token - d2, err := ds.NewMDMAppleDeclaration(ctx, &fleet.MDMAppleDeclaration{ - DeclarationUUID: "decl-new", - Name: "New Declaration", - Identifier: "com.example.cleanup", - RawJSON: declJSON, - }) - require.NoError(t, err) - // Query the token for D1 from mdm_apple_declarations (generated column) var d1Token string ExecAdhocSQL(t, ds, func(q sqlx.ExtContext) error { return sqlx.GetContext(ctx, q, &d1Token, "SELECT HEX(token) FROM mdm_apple_declarations WHERE declaration_uuid = ?", d1.DeclarationUUID) }) - var d2Token string - ExecAdhocSQL(t, ds, func(q sqlx.ExtContext) error { - return sqlx.GetContext(ctx, q, &d2Token, - "SELECT HEX(token) FROM mdm_apple_declarations WHERE declaration_uuid = ?", d2.DeclarationUUID) - }) - require.Equal(t, d1Token, d2Token, "declarations with same raw_json should have the same token") // Create 3 hosts, all enrolled hosts := make([]*fleet.Host, 3) - for i := 0; i < 3; i++ { + for i := range 3 { hostUUID := fmt.Sprintf("cleanup-host-%d", i) hosts[i], err = ds.NewHost(ctx, &fleet.Host{ Hostname: fmt.Sprintf("cleanup-host-%d", i), @@ -446,13 +431,29 @@ func testCleanUpDuplicateRemoveInstallAcrossBatches(t *testing.T, ds *Datastore) }) } - // Now delete D1 from mdm_apple_declarations (simulating IT admin removing the old declaration). + // Delete D1 from mdm_apple_declarations (simulating IT admin removing the old declaration). // The host_mdm_apple_declarations rows for D1 remain — the reconciler will mark them for removal. ExecAdhocSQL(t, ds, func(q sqlx.ExtContext) error { _, err := q.ExecContext(ctx, "DELETE FROM mdm_apple_declarations WHERE declaration_uuid = ?", d1.DeclarationUUID) return err }) + // Now create D2 with the same identifier (possible since D1 was just deleted) and same raw_json → same token. + d2, err := ds.NewMDMAppleDeclaration(ctx, &fleet.MDMAppleDeclaration{ + DeclarationUUID: "decl-new", + Name: "New Declaration", + Identifier: "com.example.cleanup", + RawJSON: declJSON, + }) + require.NoError(t, err) + + var d2Token string + ExecAdhocSQL(t, ds, func(q sqlx.ExtContext) error { + return sqlx.GetContext(ctx, q, &d2Token, + "SELECT HEX(token) FROM mdm_apple_declarations WHERE declaration_uuid = ?", d2.DeclarationUUID) + }) + require.Equal(t, d1Token, d2Token, "declarations with same raw_json should have the same token") + // Force a small batch size so installs and removes end up in different batches. // The UNION ALL query returns removes first, then installs. With 3 hosts: // - 3 remove rows (D1 for each host) + 3 install rows (D2 for each host) = 6 rows