Skip to content
3 changes: 3 additions & 0 deletions changes/40322-fix-ddm-pending-issues
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
- Fixed an issue where the DDM reconciler would not self-heal for stuck remove/pending profiles due to resend with update.
- Fixed an issue where a host DDM cleanup function was not executed for stale remove/pending profiles that weren't reported by the device.
- Fixed an issue where batch processing many DDM profile changes would result in stuck remove/pending profiles.
82 changes: 64 additions & 18 deletions server/datastore/mysql/apple_mdm.go
Original file line number Diff line number Diff line change
Expand Up @@ -5725,7 +5725,14 @@ func (ds *Datastore) MDMAppleBatchSetHostDeclarationState(ctx context.Context) (
err := ds.withRetryTxx(ctx, func(tx sqlx.ExtContext) error {
var err error
uuids, _, err = mdmAppleBatchSetHostDeclarationStateDB(ctx, tx, batchSize, &fleet.MDMDeliveryPending)
return err
if err != nil {
return err
}

// Safety net: always clean up orphaned remove/pending rows, even when
// there are no changed declarations. This handles stuck rows from
// previous runs that can't self-heal via device status reports.
return cleanUpOrphanedPendingRemoves(ctx, tx)
})

return uuids, ctxerr.Wrap(ctx, err, "upserting host declaration state")
Expand Down Expand Up @@ -5878,6 +5885,42 @@ func mdmAppleBatchSetPendingHostDeclarationsDB(
return updatedDB, ctxerr.Wrap(ctx, err, "inserting changed host declaration state")
}

// cleanUpOrphanedPendingRemoves deletes remove/pending rows from
// host_mdm_apple_declarations where a matching install row already exists with
// the same host, token, and identifier in a verified or verifying state. This
// means the declaration content is already on the device — the remove is stale.
func cleanUpOrphanedPendingRemoves(ctx context.Context, tx sqlx.ExtContext) error {
var found bool
err := sqlx.GetContext(ctx, tx, &found, `
SELECT EXISTS (
SELECT 1 FROM host_mdm_apple_declarations r
INNER JOIN host_mdm_apple_declarations i
ON r.host_uuid = i.host_uuid
AND r.token = i.token
AND r.declaration_identifier = i.declaration_identifier
WHERE r.operation_type = 'remove' AND r.status = 'pending'
AND i.operation_type = 'install'
AND i.status IN ('verified', 'verifying')
LIMIT 1)`)
if err != nil {
return ctxerr.Wrap(ctx, err, "checking for orphaned remove/pending rows")
}
if !found {
return nil
}

_, err = tx.ExecContext(ctx, `
DELETE r FROM host_mdm_apple_declarations r
INNER JOIN host_mdm_apple_declarations i
ON r.host_uuid = i.host_uuid
AND r.token = i.token
AND r.declaration_identifier = i.declaration_identifier
WHERE r.operation_type = 'remove' AND r.status = 'pending'
AND i.operation_type = 'install'
AND i.status IN ('verified', 'verifying')`)
return ctxerr.Wrap(ctx, err, "deleting orphaned remove/pending rows")
}

func cleanUpDuplicateRemoveInstall(ctx context.Context, tx sqlx.ExtContext, profilesToInsert map[string]*fleet.MDMAppleHostDeclaration) error {
// If we are inserting a declaration that has a matching pending "remove" declaration (same hash),
// we will mark the insert as verified. Why? Because there is nothing for the host to do if the same
Expand Down Expand Up @@ -5966,20 +6009,7 @@ func mdmAppleGetHostsWithChangedDeclarationsDB(ctx context.Context, tx sqlx.ExtC
entitiesToRemoveQuery, entitiesToRemoveArgs := generateEntitiesToRemoveQuery("declaration")
stmt := fmt.Sprintf(`
(
SELECT
ds.host_uuid,
'install' as operation_type,
ds.token,
ds.secrets_updated_at,
ds.declaration_uuid,
ds.declaration_identifier,
ds.declaration_name
FROM
%s
)
UNION ALL
(
SELECT
SELECT
hmae.host_uuid,
'remove' as operation_type,
hmae.token,
Expand All @@ -5990,13 +6020,29 @@ func mdmAppleGetHostsWithChangedDeclarationsDB(ctx context.Context, tx sqlx.ExtC
FROM
%s
)
UNION ALL
(
SELECT
ds.host_uuid,
'install' as operation_type,
ds.token,
ds.secrets_updated_at,
ds.declaration_uuid,
ds.declaration_identifier,
ds.declaration_name
FROM
%s
)
`,
entitiesToInstallQuery,
// We specifically want Removals first, since we later batch process
// and rely on removals being updated so the matching install entry
// can find it and clean it up avoiding leaving stale pending removals in the database.
entitiesToRemoveQuery,
entitiesToInstallQuery,
)

var decls []*fleet.MDMAppleHostDeclaration
args := slices.Concat(entitiesToInstallArgs, entitiesToRemoveArgs)
args := slices.Concat(entitiesToRemoveArgs, entitiesToInstallArgs)
if err := sqlx.SelectContext(ctx, tx, &decls, stmt, args...); err != nil {
return nil, ctxerr.Wrap(ctx, err, "running sql statement")
}
Expand Down Expand Up @@ -6042,7 +6088,7 @@ ON DUPLICATE KEY UPDATE
for _, c := range current {
// Skip updates for 'remove' operations because it is possible that IT admin removed a profile and then re-added it.
// Pending removes are cleaned up after we update status of installs.
if u, ok := updatesByToken[c.Token]; ok && u.OperationType != fleet.MDMOperationTypeRemove {
if u, ok := updatesByToken[c.Token]; ok && c.OperationType != fleet.MDMOperationTypeRemove {
insertVals.WriteString("(?, ?, ?, ?, ?, ?, ?, UNHEX(?), ?),")
args = append(args, hostUUID, c.DeclarationUUID, u.Status, u.OperationType, u.Detail, c.Identifier, c.Name, c.Token,
c.SecretsUpdatedAt)
Expand Down
Loading
Loading