From 9d29084cf01687417ed7289fdbfb212605dbb695 Mon Sep 17 00:00:00 2001 From: Nagadeesh Nagaraja Date: Tue, 25 Nov 2025 19:59:51 +0100 Subject: [PATCH 1/3] Fix unwanted reconcile in Server and enqueue logic in maintenance --- bmc/bmc.go | 1 + bmc/mock/server/server.go | 5 + bmc/redfish.go | 1 + .../biossettings_controller_test.go | 53 ++ .../biossettingsset_controller_test.go | 18 + .../controller/biosversion_controller_test.go | 16 + .../biosversionset_controller_test.go | 18 + .../controller/bmcsettings_controller_test.go | 25 +- .../controller/bmcversion_controller_test.go | 10 + .../controller/bmcversionset_controller.go | 10 +- .../bmcversionset_controller_test.go | 70 ++- internal/controller/server_controller.go | 460 ++++++++++-------- internal/controller/server_controller_test.go | 30 ++ .../servermaintenance_controller.go | 19 +- 14 files changed, 521 insertions(+), 215 deletions(-) diff --git a/bmc/bmc.go b/bmc/bmc.go index e7bd1cd0a..ef743b3eb 100644 --- a/bmc/bmc.go +++ b/bmc/bmc.go @@ -309,6 +309,7 @@ type SystemInfo struct { SerialNumber string SKU string IndicatorLED string + BIOSVersion string } // Manager represents the manager information. diff --git a/bmc/mock/server/server.go b/bmc/mock/server/server.go index c5e6b5241..840a3a5dc 100644 --- a/bmc/mock/server/server.go +++ b/bmc/mock/server/server.go @@ -326,6 +326,11 @@ func handleSystemReset(s *MockServer, r *http.Request, urlPath string, body []by if val, ok := base[ResourceLockKey]; ok && val == LockedResourceState { s.mu.Unlock() s.log.Info("System resource is locked, cannot perform reset", base) + go func() { + // unlock after waiting period incase of stuck lock + time.Sleep(300 * time.Millisecond) + delete(base, ResourceLockKey) + }() return errors.New("system resource locked, cannot perform reset") } base[ResourceLockKey] = LockedResourceState diff --git a/bmc/redfish.go b/bmc/redfish.go index 37699ec65..a6d47a5cc 100644 --- a/bmc/redfish.go +++ b/bmc/redfish.go @@ -283,6 +283,7 @@ func (r *RedfishBMC) GetSystemInfo(ctx context.Context, systemURI string) (Syste SKU: system.SKU, IndicatorLED: string(system.IndicatorLED), TotalSystemMemory: quantity, + BIOSVersion: system.BIOSVersion, }, nil } diff --git a/internal/controller/biossettings_controller_test.go b/internal/controller/biossettings_controller_test.go index cecc5479a..7a77a2809 100644 --- a/internal/controller/biossettings_controller_test.go +++ b/internal/controller/biossettings_controller_test.go @@ -178,6 +178,14 @@ var _ = Describe("BIOSSettings Controller", func() { Eventually(Object(server)).Should( HaveField("Spec.BIOSSettingsRef.Name", biosSettingsV2.Name), ) + By("Deleting the BIOSSettings V2 (new)") + Expect(k8sClient.Delete(ctx, biosSettingsV2)).To(Succeed()) + Eventually(Object(server)).Should( + HaveField("Spec.BIOSSettingsRef", BeNil()), + ) + Eventually(Object(server)).Should( + HaveField("Status.State", Not(Equal(metalv1alpha1.ServerStateMaintenance))), + ) }) It("should move to completed if no bios setting changes to referred server", func(ctx SpecContext) { @@ -239,6 +247,9 @@ var _ = Describe("BIOSSettings Controller", func() { Eventually(Object(server)).Should( HaveField("Spec.BIOSSettingsRef", BeNil()), ) + Eventually(Object(server)).Should( + HaveField("Status.State", Not(Equal(metalv1alpha1.ServerStateMaintenance))), + ) }) It("Should request maintenance when changing power status of server, even if bios settings update does not need it", func(ctx SpecContext) { @@ -386,6 +397,10 @@ var _ = Describe("BIOSSettings Controller", func() { // cleanup Expect(k8sClient.Delete(ctx, serverClaim)).Should(Succeed()) + Eventually(Object(server)).Should(SatisfyAll( + HaveField("Status.State", Not(Equal(metalv1alpha1.ServerStateMaintenance))), + HaveField("Status.State", Not(Equal(metalv1alpha1.ServerStateReserved))), + )) }) It("should create maintenance if setting update needs reboot", func(ctx SpecContext) { @@ -519,7 +534,12 @@ var _ = Describe("BIOSSettings Controller", func() { Consistently(Get(serverMaintenance)).Should(Satisfy(apierrors.IsNotFound)) // cleanup + Expect(k8sClient.Delete(ctx, biosSettings)).Should(Succeed()) Expect(k8sClient.Delete(ctx, serverClaim)).Should(Succeed()) + Eventually(Object(server)).Should(SatisfyAll( + HaveField("Status.State", Not(Equal(metalv1alpha1.ServerStateMaintenance))), + HaveField("Status.State", Not(Equal(metalv1alpha1.ServerStateReserved))), + )) }) It("should update setting if server is in available state", func(ctx SpecContext) { @@ -612,6 +632,9 @@ var _ = Describe("BIOSSettings Controller", func() { Eventually(Object(server)).Should( HaveField("Spec.BIOSSettingsRef", BeNil()), ) + Eventually(Object(server)).Should( + HaveField("Status.State", Not(Equal(metalv1alpha1.ServerStateMaintenance))), + ) }) It("should wait for upgrade and reconcile when biosSettings version is correct", func(ctx SpecContext) { @@ -735,6 +758,9 @@ var _ = Describe("BIOSSettings Controller", func() { // cleanup Expect(k8sClient.Delete(ctx, serverClaim)).To(Succeed()) + Eventually(Object(server)).Should( + HaveField("Status.State", Not(Equal(metalv1alpha1.ServerStateMaintenance))), + ) }) It("should allow retry using annotation", func(ctx SpecContext) { @@ -787,6 +813,9 @@ var _ = Describe("BIOSSettings Controller", func() { Expect(k8sClient.Delete(ctx, biosSettings)).To(Succeed()) Eventually(Get(biosSettings)).Should(Satisfy(apierrors.IsNotFound)) + Eventually(Object(server)).Should( + HaveField("Status.State", Not(Equal(metalv1alpha1.ServerStateMaintenance))), + ) }) }) @@ -996,6 +1025,10 @@ var _ = Describe("BIOSSettings Controller with BMCRef BMC", func() { // cleanup Expect(k8sClient.Delete(ctx, serverClaim)).Should(Succeed()) + Eventually(Object(server)).Should(SatisfyAll( + HaveField("Status.State", Not(Equal(metalv1alpha1.ServerStateMaintenance))), + HaveField("Status.State", Not(Equal(metalv1alpha1.ServerStateReserved))), + )) }) }) @@ -1098,6 +1131,9 @@ var _ = Describe("BIOSSettings Sequence Controller", func() { By("Deleting the BIOSSetting") Expect(k8sClient.Delete(ctx, biosSettings)).To(Succeed()) + Eventually(Object(server)).Should( + HaveField("Status.State", Not(Equal(metalv1alpha1.ServerStateMaintenance))), + ) }) It("Should fail if duplicate keys in names or settings found", func(ctx SpecContext) { @@ -1202,6 +1238,11 @@ var _ = Describe("BIOSSettings Sequence Controller", func() { By("Deleting the biosSettings2") Expect(k8sClient.Delete(ctx, biosSettings2)).To(Succeed()) + By("Deleting the biosSettings") + Expect(k8sClient.Delete(ctx, biosSettings)).To(Succeed()) + Eventually(Object(server)).Should( + HaveField("Status.State", Not(Equal(metalv1alpha1.ServerStateMaintenance))), + ) }) It("Should successfully apply sequence of different settings and reconcile from applied state", func(ctx SpecContext) { @@ -1243,6 +1284,12 @@ var _ = Describe("BIOSSettings Sequence Controller", func() { By("Ensuring that the BIOSSettings conditions are updated") ensureBiosSettingsFlowCondition(biosSettings) + // move server back to available state (to avoid initial/discovery state loop) + By("Ensure that the Server is in available state") + Eventually(UpdateStatus(server, func() { + server.Status.State = metalv1alpha1.ServerStateAvailable + })).Should(Succeed()) + // should reconcile again from the Applied state when the settings has been changed Eventually(Update(biosSettings, func() { biosSettings.Spec.SettingsFlow[1].Settings = map[string]string{"PowerProfile": "OsDbpm"} @@ -1261,6 +1308,9 @@ var _ = Describe("BIOSSettings Sequence Controller", func() { By("Deleting the BIOSSettings") Expect(k8sClient.Delete(ctx, biosSettings)).To(Succeed()) + Eventually(Object(server)).Should( + HaveField("Status.State", Not(Equal(metalv1alpha1.ServerStateMaintenance))), + ) }) It("should successfully apply sequence of settings when the names and priority changed, before the settings update was issued on server", func(ctx SpecContext) { @@ -1354,6 +1404,9 @@ var _ = Describe("BIOSSettings Sequence Controller", func() { By("Deleting the BIOSSetting") Expect(k8sClient.Delete(ctx, biosSettings)).To(Succeed()) + Eventually(Object(server)).Should( + HaveField("Status.State", Not(Equal(metalv1alpha1.ServerStateMaintenance))), + ) }) }) diff --git a/internal/controller/biossettingsset_controller_test.go b/internal/controller/biossettingsset_controller_test.go index fb1f02cb3..6b6d30793 100644 --- a/internal/controller/biossettingsset_controller_test.go +++ b/internal/controller/biossettingsset_controller_test.go @@ -216,6 +216,15 @@ var _ = Describe("BIOSSettingsSet Controller", func() { Eventually(Get(biosSettings03)).Should(Satisfy(apierrors.IsNotFound)) Expect(k8sClient.Delete(ctx, biosSettings02)).To(Succeed()) Eventually(Get(biosSettings02)).Should(Satisfy(apierrors.IsNotFound)) + Eventually(Object(server01)).Should( + HaveField("Status.State", Not(Equal(metalv1alpha1.ServerStateMaintenance))), + ) + Eventually(Object(server02)).Should( + HaveField("Status.State", Not(Equal(metalv1alpha1.ServerStateMaintenance))), + ) + Eventually(Object(server03)).Should( + HaveField("Status.State", Not(Equal(metalv1alpha1.ServerStateMaintenance))), + ) }) It("Should successfully reconcile the resource when server are deleted/created", func(ctx SpecContext) { @@ -386,5 +395,14 @@ var _ = Describe("BIOSSettingsSet Controller", func() { Eventually(Get(biosSettings02)).Should(Satisfy(apierrors.IsNotFound)) Expect(k8sClient.Delete(ctx, biosSettings03)).To(Succeed()) Eventually(Get(biosSettings03)).Should(Satisfy(apierrors.IsNotFound)) + Eventually(Object(server01)).Should( + HaveField("Status.State", Not(Equal(metalv1alpha1.ServerStateMaintenance))), + ) + Eventually(Object(server02)).Should( + HaveField("Status.State", Not(Equal(metalv1alpha1.ServerStateMaintenance))), + ) + Eventually(Object(server03)).Should( + HaveField("Status.State", Not(Equal(metalv1alpha1.ServerStateMaintenance))), + ) }) }) diff --git a/internal/controller/biosversion_controller_test.go b/internal/controller/biosversion_controller_test.go index 5944a293e..28ec0afed 100644 --- a/internal/controller/biosversion_controller_test.go +++ b/internal/controller/biosversion_controller_test.go @@ -123,6 +123,9 @@ var _ = Describe("BIOSVersion Controller", func() { By("Ensuring that the BiosVersion has been removed") Eventually(Get(biosVersion)).Should(Satisfy(apierrors.IsNotFound)) Consistently(Get(biosVersion)).Should(Satisfy(apierrors.IsNotFound)) + Eventually(Object(server)).Should( + HaveField("Status.State", Not(Equal(metalv1alpha1.ServerStateMaintenance))), + ) }) It("Should successfully Start and monitor Upgrade task to completion", func(ctx SpecContext) { @@ -223,6 +226,9 @@ var _ = Describe("BIOSVersion Controller", func() { By("Ensuring that the BiosVersion has been removed") Eventually(Get(biosVersion)).Should(Satisfy(apierrors.IsNotFound)) Consistently(Get(biosVersion)).Should(Satisfy(apierrors.IsNotFound)) + Eventually(Object(server)).Should( + HaveField("Status.State", Not(Equal(metalv1alpha1.ServerStateMaintenance))), + ) }) It("Should upgrade servers BIOS when in reserved state", func(ctx SpecContext) { @@ -361,6 +367,10 @@ var _ = Describe("BIOSVersion Controller", func() { // cleanup Expect(k8sClient.Delete(ctx, serverClaim)).To(Succeed()) + Eventually(Object(server)).Should(SatisfyAll( + HaveField("Status.State", Not(Equal(metalv1alpha1.ServerStateMaintenance))), + HaveField("Status.State", Not(Equal(metalv1alpha1.ServerStateReserved))), + )) }) It("Should allow retry using annotation", func(ctx SpecContext) { @@ -402,6 +412,9 @@ var _ = Describe("BIOSVersion Controller", func() { // cleanup Expect(k8sClient.Delete(ctx, biosVersion)).To(Succeed()) + Eventually(Object(server)).Should( + HaveField("Status.State", Not(Equal(metalv1alpha1.ServerStateMaintenance))), + ) }) }) @@ -568,6 +581,9 @@ var _ = Describe("BIOSVersion Controller with BMCRef BMC", func() { By("Ensuring that the BiosVersion has been removed") Eventually(Get(biosVersion)).Should(Satisfy(apierrors.IsNotFound)) Consistently(Get(biosVersion)).Should(Satisfy(apierrors.IsNotFound)) + Eventually(Object(server)).Should( + HaveField("Status.State", Not(Equal(metalv1alpha1.ServerStateMaintenance))), + ) }) }) diff --git a/internal/controller/biosversionset_controller_test.go b/internal/controller/biosversionset_controller_test.go index ac0dc7563..c19c278d2 100644 --- a/internal/controller/biosversionset_controller_test.go +++ b/internal/controller/biosversionset_controller_test.go @@ -224,6 +224,15 @@ var _ = Describe("BIOSVersionSet Controller", func() { // cleanup Expect(k8sClient.Delete(ctx, biosVersion02)).To(Succeed()) Expect(k8sClient.Delete(ctx, biosVersion03)).To(Succeed()) + Eventually(Object(server01)).Should( + HaveField("Status.State", Not(Equal(metalv1alpha1.ServerStateMaintenance))), + ) + Eventually(Object(server02)).Should( + HaveField("Status.State", Not(Equal(metalv1alpha1.ServerStateMaintenance))), + ) + Eventually(Object(server03)).Should( + HaveField("Status.State", Not(Equal(metalv1alpha1.ServerStateMaintenance))), + ) }) It("Should successfully reconcile the resource when BMC are deleted/created", func(ctx SpecContext) { @@ -385,5 +394,14 @@ var _ = Describe("BIOSVersionSet Controller", func() { Expect(k8sClient.Delete(ctx, biosVersion01)).To(Succeed()) Expect(k8sClient.Delete(ctx, biosVersion02)).To(Succeed()) Expect(k8sClient.Delete(ctx, biosVersion03)).To(Succeed()) + Eventually(Object(server01)).Should( + HaveField("Status.State", Not(Equal(metalv1alpha1.ServerStateMaintenance))), + ) + Eventually(Object(server02)).Should( + HaveField("Status.State", Not(Equal(metalv1alpha1.ServerStateMaintenance))), + ) + Eventually(Object(server03)).Should( + HaveField("Status.State", Not(Equal(metalv1alpha1.ServerStateMaintenance))), + ) }) }) diff --git a/internal/controller/bmcsettings_controller_test.go b/internal/controller/bmcsettings_controller_test.go index f363f1da6..acebba339 100644 --- a/internal/controller/bmcsettings_controller_test.go +++ b/internal/controller/bmcsettings_controller_test.go @@ -202,6 +202,14 @@ var _ = Describe("BMCSettings Controller", func() { HaveField("Status.State", metalv1alpha1.BMCSettingsStateApplied), )) + By("Ensuring that the Maintenance resource has been deleted") + var serverMaintenanceList metalv1alpha1.ServerMaintenanceList + Eventually(ObjectList(&serverMaintenanceList)).Should(HaveField("Items", BeEmpty())) + Consistently(ObjectList(&serverMaintenanceList)).Should(HaveField("Items", BeEmpty())) + Consistently(Object(bmcSettings)).Should(SatisfyAll( + HaveField("Spec.ServerMaintenanceRefs", BeNil()), + )) + By("Deleting the BMCSettings") Expect(k8sClient.Delete(ctx, bmcSettings)).To(Succeed()) @@ -211,9 +219,9 @@ var _ = Describe("BMCSettings Controller", func() { )) // cleanup - Eventually(UpdateStatus(server, func() { - server.Status.State = metalv1alpha1.ServerStateAvailable - })).Should(Succeed()) + Eventually(Object(server)).Should( + HaveField("Status.State", Not(Equal(metalv1alpha1.ServerStateMaintenance))), + ) }) It("Should create maintenance and wait for its approval before applying settings", func(ctx SpecContext) { @@ -336,6 +344,10 @@ var _ = Describe("BMCSettings Controller", func() { // cleanup Expect(k8sClient.Delete(ctx, serverClaim)).To(Succeed()) + Eventually(Object(server)).Should(SatisfyAll( + HaveField("Status.State", Not(Equal(metalv1alpha1.ServerStateMaintenance))), + HaveField("Status.State", Not(Equal(metalv1alpha1.ServerStateReserved))), + )) }) It("Should wait for upgrade and reconcile BMCSettings version is correct", func(ctx SpecContext) { @@ -428,6 +440,10 @@ var _ = Describe("BMCSettings Controller", func() { Eventually(Object(bmc)).Should(SatisfyAll( HaveField("Spec.BMCSettingRef", BeNil()), )) + + Eventually(Object(server)).Should( + HaveField("Status.State", Not(Equal(metalv1alpha1.ServerStateMaintenance))), + ) }) It("Should allow retry using annotation", func(ctx SpecContext) { @@ -478,5 +494,8 @@ var _ = Describe("BMCSettings Controller", func() { // cleanup Expect(k8sClient.Delete(ctx, bmcSettings)).To(Succeed()) + Eventually(Object(server)).Should( + HaveField("Status.State", Not(Equal(metalv1alpha1.ServerStateMaintenance))), + ) }) }) diff --git a/internal/controller/bmcversion_controller_test.go b/internal/controller/bmcversion_controller_test.go index ce4241178..7ec5ce4a4 100644 --- a/internal/controller/bmcversion_controller_test.go +++ b/internal/controller/bmcversion_controller_test.go @@ -134,6 +134,9 @@ var _ = Describe("BMCVersion Controller", func() { By("Ensuring that the BMCVersion has been removed") Eventually(Get(bmcVersion)).Should(Satisfy(apierrors.IsNotFound)) Consistently(Get(bmcVersion)).Should(Satisfy(apierrors.IsNotFound)) + Eventually(Object(server)).Should( + HaveField("Status.State", Not(Equal(metalv1alpha1.ServerStateMaintenance))), + ) }) It("Should successfully Start and monitor Upgrade task to completion", func(ctx SpecContext) { @@ -231,6 +234,9 @@ var _ = Describe("BMCVersion Controller", func() { By("Ensuring that the BMCVersion has been removed") Eventually(Get(bmcVersion)).Should(Satisfy(apierrors.IsNotFound)) Consistently(Get(bmcVersion)).Should(Satisfy(apierrors.IsNotFound)) + Eventually(Object(server)).Should( + HaveField("Status.State", Not(Equal(metalv1alpha1.ServerStateMaintenance))), + ) }) It("Should upgrade servers BMC when server in reserved state", func(ctx SpecContext) { @@ -364,6 +370,10 @@ var _ = Describe("BMCVersion Controller", func() { // cleanup Expect(k8sClient.Delete(ctx, serverClaim)).To(Succeed()) + Eventually(Object(server)).Should(SatisfyAll( + HaveField("Status.State", Not(Equal(metalv1alpha1.ServerStateMaintenance))), + HaveField("Status.State", Not(Equal(metalv1alpha1.ServerStateReserved))), + )) }) It("Should allow retry using annotation", func(ctx SpecContext) { diff --git a/internal/controller/bmcversionset_controller.go b/internal/controller/bmcversionset_controller.go index 2cbb3db43..0436b01c9 100644 --- a/internal/controller/bmcversionset_controller.go +++ b/internal/controller/bmcversionset_controller.go @@ -280,7 +280,15 @@ func (r *BMCVersionSetReconciler) patchBMCVersionfromTemplate( continue } opResult, err := controllerutil.CreateOrPatch(ctx, r.Client, &bmcVersion, func() error { - bmcVersion.Spec.BMCVersionTemplate = *bmcVersionTemplate.DeepCopy() + if bmcVersionTemplate.ServerMaintenanceRefs != nil { + bmcVersion.Spec.BMCVersionTemplate = *bmcVersionTemplate.DeepCopy() + } else { + // preserve existing serverMaintenanceRefs if not set in the template + // temporary solution to avoid accidental deletion of Refs and unit test failure until PR #515 is merged + existingServerMaintenanceRefs := bmcVersion.Spec.ServerMaintenanceRefs + bmcVersion.Spec.BMCVersionTemplate = *bmcVersionTemplate.DeepCopy() + bmcVersion.Spec.ServerMaintenanceRefs = existingServerMaintenanceRefs + } return nil }) //nolint:errcheck if err != nil { diff --git a/internal/controller/bmcversionset_controller_test.go b/internal/controller/bmcversionset_controller_test.go index 112232583..7e1363a99 100644 --- a/internal/controller/bmcversionset_controller_test.go +++ b/internal/controller/bmcversionset_controller_test.go @@ -31,6 +31,9 @@ var _ = Describe("BMCVersionSet Controller", func() { bmc01 *metalv1alpha1.BMC bmc02 *metalv1alpha1.BMC bmc03 *metalv1alpha1.BMC + server01 *metalv1alpha1.Server + server02 *metalv1alpha1.Server + server03 *metalv1alpha1.Server bmcSecret *metalv1alpha1.BMCSecret upgradeServerBMCVersion string ) @@ -75,6 +78,17 @@ var _ = Describe("BMCVersionSet Controller", func() { } Expect(k8sClient.Create(ctx, bmc01)).To(Succeed()) + server01 = &metalv1alpha1.Server{ + ObjectMeta: metav1.ObjectMeta{ + Name: bmcutils.GetServerNameFromBMCandIndex(0, bmc01), + }, + } + + By("Ensuring that the server01 is in available state") + Eventually(UpdateStatus(server01, func() { + server01.Status.State = metalv1alpha1.ServerStateAvailable + })).Should(Succeed()) + By("Creating a bmc02") bmc02 = &metalv1alpha1.BMC{ ObjectMeta: metav1.ObjectMeta{ @@ -100,6 +114,17 @@ var _ = Describe("BMCVersionSet Controller", func() { } Expect(k8sClient.Create(ctx, bmc02)).To(Succeed()) + server02 = &metalv1alpha1.Server{ + ObjectMeta: metav1.ObjectMeta{ + Name: bmcutils.GetServerNameFromBMCandIndex(0, bmc02), + }, + } + + By("Ensuring that the server02 is in available state") + Eventually(UpdateStatus(server02, func() { + server02.Status.State = metalv1alpha1.ServerStateAvailable + })).Should(Succeed()) + By("Creating a bmc03") bmc03 = &metalv1alpha1.BMC{ ObjectMeta: metav1.ObjectMeta{ @@ -124,6 +149,16 @@ var _ = Describe("BMCVersionSet Controller", func() { }, } Expect(k8sClient.Create(ctx, bmc03)).To(Succeed()) + server03 = &metalv1alpha1.Server{ + ObjectMeta: metav1.ObjectMeta{ + Name: bmcutils.GetServerNameFromBMCandIndex(0, bmc03), + }, + } + + By("Ensuring that the server03 is in available state") + Eventually(UpdateStatus(server03, func() { + server03.Status.State = metalv1alpha1.ServerStateAvailable + })).Should(Succeed()) }) AfterEach(func(ctx SpecContext) { @@ -135,11 +170,6 @@ var _ = Describe("BMCVersionSet Controller", func() { Expect(k8sClient.Delete(ctx, &maintenance)).To(Succeed()) } - server01 := &metalv1alpha1.Server{ - ObjectMeta: metav1.ObjectMeta{ - Name: bmcutils.GetServerNameFromBMCandIndex(0, bmc01), - }, - } Eventually(UpdateStatus(server01, func() { server01.Status.State = metalv1alpha1.ServerStateAvailable })).Should(Succeed()) @@ -147,11 +177,6 @@ var _ = Describe("BMCVersionSet Controller", func() { Expect(k8sClient.Delete(ctx, server01)).To(Succeed()) Eventually(Get(server01)).Should(Satisfy(apierrors.IsNotFound)) - server02 := &metalv1alpha1.Server{ - ObjectMeta: metav1.ObjectMeta{ - Name: bmcutils.GetServerNameFromBMCandIndex(0, bmc02), - }, - } Eventually(UpdateStatus(server02, func() { server02.Status.State = metalv1alpha1.ServerStateAvailable })).Should(Succeed()) @@ -159,11 +184,6 @@ var _ = Describe("BMCVersionSet Controller", func() { Expect(k8sClient.Delete(ctx, server02)).To(Succeed()) Eventually(Get(server02)).Should(Satisfy(apierrors.IsNotFound)) - server03 := &metalv1alpha1.Server{ - ObjectMeta: metav1.ObjectMeta{ - Name: bmcutils.GetServerNameFromBMCandIndex(0, bmc03), - }, - } Eventually(UpdateStatus(server03, func() { server03.Status.State = metalv1alpha1.ServerStateAvailable })).Should(Succeed()) @@ -269,6 +289,15 @@ var _ = Describe("BMCVersionSet Controller", func() { // cleanup Expect(k8sClient.Delete(ctx, bmcVersion01)).To(Succeed()) Expect(k8sClient.Delete(ctx, bmcVersion02)).To(Succeed()) + Eventually(Object(server01)).Should( + HaveField("Status.State", Not(Equal(metalv1alpha1.ServerStateMaintenance))), + ) + Eventually(Object(server02)).Should( + HaveField("Status.State", Not(Equal(metalv1alpha1.ServerStateMaintenance))), + ) + Eventually(Object(server03)).Should( + HaveField("Status.State", Not(Equal(metalv1alpha1.ServerStateMaintenance))), + ) }) It("Should successfully reconcile the resource when BMC are deleted/created", func(ctx SpecContext) { @@ -455,6 +484,8 @@ var _ = Describe("BMCVersionSet Controller", func() { HaveField("Status.InProgressBMCVersion", BeNumerically("==", 0)), HaveField("Status.FailedBMCVersion", BeNumerically("==", 0)), )) + var serverMaintainceList metalv1alpha1.ServerMaintenanceList + Eventually(ObjectList(&serverMaintainceList)).Should(HaveField("Items", HaveLen(0))) // cleanup Expect(k8sClient.Delete(ctx, bmcVersionSet)).Should(Succeed()) @@ -467,5 +498,14 @@ var _ = Describe("BMCVersionSet Controller", func() { } Expect(k8sClient.Delete(ctx, bmcVersion)).To(Succeed()) } + Eventually(Object(server01)).Should( + HaveField("Status.State", Not(Equal(metalv1alpha1.ServerStateMaintenance))), + ) + Eventually(Object(server02)).Should( + HaveField("Status.State", Not(Equal(metalv1alpha1.ServerStateMaintenance))), + ) + Eventually(Object(server03)).Should( + HaveField("Status.State", Not(Equal(metalv1alpha1.ServerStateMaintenance))), + ) }) }) diff --git a/internal/controller/server_controller.go b/internal/controller/server_controller.go index 9bf2e8e70..ae69910d2 100644 --- a/internal/controller/server_controller.go +++ b/internal/controller/server_controller.go @@ -227,7 +227,7 @@ func (r *ServerReconciler) reconcile(ctx context.Context, log logr.Logger, serve } log.V(1).Info("Updated Server BIOS boot order") - _, err = r.ensureServerStateTransition(ctx, log, bmcClient, server) + modified, err := r.ensureServerStateTransition(ctx, log, bmcClient, server) if err != nil { return ctrl.Result{}, fmt.Errorf("failed to ensure server state transition: %w", err) } @@ -236,6 +236,11 @@ func (r *ServerReconciler) reconcile(ctx context.Context, log logr.Logger, serve if err := r.updateServerStatus(ctx, log, bmcClient, server); err != nil { return ctrl.Result{}, fmt.Errorf("failed to update server status: %w", err) } + // Server has beem modified during state transition, requeue to process the new state + // no need to requeAfter duration as there there as an event to process the new state + if modified { + return ctrl.Result{}, nil + } log.V(1).Info("Reconciled Server") return ctrl.Result{RequeueAfter: r.ResyncInterval}, nil @@ -292,18 +297,18 @@ func (r *ServerReconciler) handleInitialState(ctx context.Context, log logr.Logg } log.V(1).Info("Initial conditions for Server met") - if err := r.ensureServerPowerState(ctx, log, bmcClient, server); err != nil { - return false, fmt.Errorf("failed to ensure server power state: %w", err) + if modified, err := r.ensureServerPowerState(ctx, log, bmcClient, server); err != nil || modified { + return modified, err } log.V(1).Info("Ensured power state for Server") - if err := r.updateServerStatusFromSystemInfo(ctx, log, bmcClient, server); err != nil { - return false, fmt.Errorf("failed to update server status system info: %w", err) + if modified, err := r.updateServerStatusFromSystemInfo(ctx, log, bmcClient, server); err != nil || modified { + return modified, err } log.V(1).Info("Updated Server status system info") - if err := r.applyBootConfigurationAndIgnitionForDiscovery(ctx, log, server); err != nil { - return false, fmt.Errorf("failed to apply server boot configuration: %w", err) + if modified, err := r.applyBootConfigurationAndIgnitionForDiscovery(ctx, log, server); err != nil || modified { + return modified, err } log.V(1).Info("Applied Server boot configuration") @@ -313,7 +318,7 @@ func (r *ServerReconciler) handleInitialState(ctx context.Context, log logr.Logg log.V(1).Info("Set PXE Boot for Server") if modified, err := r.patchServerState(ctx, server, metalv1alpha1.ServerStateDiscovery); err != nil || modified { - return false, err + return modified, err } return false, nil } @@ -321,73 +326,86 @@ func (r *ServerReconciler) handleInitialState(ctx context.Context, log logr.Logg func (r *ServerReconciler) handleDiscoveryState(ctx context.Context, log logr.Logger, bmcClient bmc.BMC, server *metalv1alpha1.Server) (bool, error) { if ready, err := r.serverBootConfigurationIsReady(ctx, server); err != nil || !ready { log.V(1).Info("Server boot configuration is not ready. Retrying ...") - return true, err + return false, err } log.V(1).Info("Server boot configuration is ready") - serverBase := server.DeepCopy() - server.Spec.Power = metalv1alpha1.PowerOn - if err := r.Patch(ctx, server, client.MergeFrom(serverBase)); err != nil { + opResult, err := controllerutil.CreateOrPatch(ctx, r.Client, server, func() error { + server.Spec.Power = metalv1alpha1.PowerOn + return nil + }) + if err != nil { return false, fmt.Errorf("failed to update server power state: %w", err) } - log.V(1).Info("Updated Server power state", "PowerState", metalv1alpha1.PowerOn) - - if err := r.ensureServerPowerState(ctx, log, bmcClient, server); err != nil { - return false, fmt.Errorf("failed to ensure server power state: %w", err) + if opResult != controllerutil.OperationResultNone { + log.V(1).Info("Updated Server power state", "PowerState", metalv1alpha1.PowerOn) + return true, nil } - log.V(1).Info("Server state set to power on") - if err := r.Status().Patch(ctx, server, client.MergeFrom(serverBase)); err != nil { - return false, fmt.Errorf("failed to patch Server status: %w", err) + if modified, err := r.ensureServerPowerState(ctx, log, bmcClient, server); err != nil || modified { + return modified, err } + log.V(1).Info("Server state set to power on") if r.checkLastStatusUpdateAfter(r.DiscoveryTimeout, server) { log.V(1).Info("Server did not post info to registry in time, back to initial state") if modified, err := r.patchServerState(ctx, server, metalv1alpha1.ServerStateInitial); err != nil || modified { - return false, err + return modified, err } } - ready, err := r.extractServerDetailsFromRegistry(ctx, log, server) - if !ready && err == nil { - log.V(1).Info("Server agent did not post info to registry") - return true, nil - } + nics, err := r.extractServerDetailsFromRegistry(log, server) if err != nil { - log.V(1).Info("Could not get server details from registry.") + log.V(1).Info("Could not get server details from registry.", "error", err) return false, err } + if nics == nil { + return false, nil + } log.V(1).Info("Extracted Server details") + opResult, err = controllerutil.CreateOrPatch(ctx, r.Client, server, func() error { + server.Status.NetworkInterfaces = nics + server.Status.State = metalv1alpha1.ServerStateAvailable + return nil + }) + if err != nil { + return false, fmt.Errorf("failed to patch server status: %w", err) + } if err := r.invalidateRegistryEntryForServer(log, server); err != nil { return false, fmt.Errorf("failed to invalidate registry entry for server: %w", err) } log.V(1).Info("Removed Server from Registry") - - log.V(1).Info("Setting Server state set to available") - if modified, err := r.patchServerState(ctx, server, metalv1alpha1.ServerStateAvailable); err != nil || modified { - return false, err + // make sure to update the Status before reconciling again + if opResult != controllerutil.OperationResultNone { + log.V(1).Info("Patched Server status with network interfaces") + return true, nil } return false, nil } func (r *ServerReconciler) handleAvailableState(ctx context.Context, log logr.Logger, bmcClient bmc.BMC, server *metalv1alpha1.Server) (bool, error) { - serverBase := server.DeepCopy() if server.Status.PowerState != metalv1alpha1.ServerOffPowerState { - server.Spec.Power = metalv1alpha1.PowerOff - if err := r.Patch(ctx, server, client.MergeFrom(serverBase)); err != nil { + opResult, err := controllerutil.CreateOrPatch(ctx, r.Client, server, func() error { + server.Spec.Power = metalv1alpha1.PowerOff + return nil + }) + if err != nil { return false, fmt.Errorf("failed to update server power state: %w", err) } + if opResult != controllerutil.OperationResultNone { + return true, nil + } log.V(1).Info("Updated Server power state", "PowerState", metalv1alpha1.PowerOff) - if err := r.ensureServerPowerState(ctx, log, bmcClient, server); err != nil { - return false, fmt.Errorf("failed to ensure server power state: %w", err) + if modified, err := r.ensureServerPowerState(ctx, log, bmcClient, server); err != nil || modified { + return modified, err } - log.V(1).Info("Server state set to power off") } + log.V(1).Info("Server state set to power off") - if err := r.ensureInitialBootConfigurationIsDeleted(ctx, server); err != nil { - return false, fmt.Errorf("failed to ensure server initial boot configuration is deleted: %w", err) + if modified, err := r.ensureInitialBootConfigurationIsDeleted(ctx, server); err != nil || modified { + return false, err } log.V(1).Info("Ensured initial boot configuration is deleted") @@ -396,11 +414,11 @@ func (r *ServerReconciler) handleAvailableState(ctx context.Context, log logr.Lo } if server.Spec.ServerClaimRef != nil { if modified, err := r.patchServerState(ctx, server, metalv1alpha1.ServerStateReserved); err != nil || modified { - return true, err + return modified, err } } log.V(1).Info("Reconciled available state") - return true, nil + return false, nil } func (r *ServerReconciler) handleReservedState(ctx context.Context, log logr.Logger, bmcClient bmc.BMC, server *metalv1alpha1.Server) (bool, error) { @@ -408,12 +426,12 @@ func (r *ServerReconciler) handleReservedState(ctx context.Context, log logr.Log // back to available state. if server.Spec.ServerClaimRef == nil { if modified, err := r.patchServerState(ctx, server, metalv1alpha1.ServerStateAvailable); err != nil || modified { - return true, err + return modified, err } } if ready, err := r.serverBootConfigurationIsReady(ctx, server); err != nil || !ready { log.V(1).Info("Server boot configuration is not ready. Retrying ...") - return true, err + return false, err } log.V(1).Info("Server boot configuration is ready") @@ -428,10 +446,16 @@ func (r *ServerReconciler) handleReservedState(ctx context.Context, log logr.Log "ServerClaim not found, removing ServerClaimRef", "Server", server.Name, "ServerClaim", server.Spec.ServerClaimRef.Name) - serverBase := server.DeepCopy() - server.Spec.ServerClaimRef = nil - if err := r.Patch(ctx, server, client.MergeFrom(serverBase)); err != nil { - return false, fmt.Errorf("failed to remove ServerClaimRef: %w", err) + + opResult, err := controllerutil.CreateOrPatch(ctx, r.Client, server, func() error { + server.Spec.ServerClaimRef = nil + return nil + }) + if err != nil { + return false, fmt.Errorf("failed to patch Server to remove ServerClaimRef: %w", err) + } + if opResult != controllerutil.OperationResultNone { + return true, nil } return false, nil } @@ -445,47 +469,55 @@ func (r *ServerReconciler) handleReservedState(ctx context.Context, log logr.Log } log.V(1).Info("Server is powered off, booting Server in PXE") } - if err := r.ensureServerPowerState(ctx, log, bmcClient, server); err != nil { - return false, fmt.Errorf("failed to ensure server power state: %w", err) + if modified, err := r.ensureServerPowerState(ctx, log, bmcClient, server); err != nil || modified { + return modified, err } if err := r.ensureIndicatorLED(ctx, log, server); err != nil { return false, fmt.Errorf("failed to ensure server indicator led: %w", err) } log.V(1).Info("Reconciled reserved state") - return true, nil + return false, nil } func (r *ServerReconciler) handleMaintenanceState(ctx context.Context, log logr.Logger, bmcClient bmc.BMC, server *metalv1alpha1.Server) (bool, error) { if server.Spec.ServerMaintenanceRef == nil { log.V(1).Info("Server is in Maintenance state, but no ServerMaintenanceRef is set, transitioning back to previous state") // update system info in case the server was changed during Maintenance state (hardwere changes, biosVersion etc.) - if err := r.updateServerStatusFromSystemInfo(ctx, log, bmcClient, server); err != nil { - return false, fmt.Errorf("failed to update server status system info: %w", err) + if modified, err := r.updateServerStatusFromSystemInfo(ctx, log, bmcClient, server); err != nil || modified { + return modified, err } if server.Spec.ServerClaimRef == nil { return r.patchServerState(ctx, server, metalv1alpha1.ServerStateInitial) } return r.patchServerState(ctx, server, metalv1alpha1.ServerStateReserved) } - if err := r.ensureServerPowerState(ctx, log, bmcClient, server); err != nil { - return false, fmt.Errorf("failed to ensure server power state: %w", err) + if modified, err := r.ensureServerPowerState(ctx, log, bmcClient, server); err != nil || modified { + return modified, err } - log.V(1).Info("Reconciled maintenance state") return false, nil } -func (r *ServerReconciler) ensureServerBootConfigRef(ctx context.Context, server *metalv1alpha1.Server, config *metalv1alpha1.ServerBootConfiguration) error { - serverBase := server.DeepCopy() - server.Spec.BootConfigurationRef = &v1.ObjectReference{ - Namespace: config.Namespace, - Name: config.Name, - UID: config.UID, - APIVersion: "metal.ironcore.dev/v1alpha1", - Kind: "ServerBootConfiguration", - } - return r.Patch(ctx, server, client.MergeFrom(serverBase)) +func (r *ServerReconciler) ensureServerBootConfigRef(ctx context.Context, server *metalv1alpha1.Server, config *metalv1alpha1.ServerBootConfiguration) (bool, error) { + opResult, err := controllerutil.CreateOrPatch(ctx, r.Client, server, func() error { + server.Spec.BootConfigurationRef = &v1.ObjectReference{ + Namespace: config.Namespace, + Name: config.Name, + UID: config.UID, + APIVersion: "metal.ironcore.dev/v1alpha1", + Kind: "ServerBootConfiguration", + } + return nil + }) + if err != nil { + return false, fmt.Errorf("failed to create or patch Server BootConfigurationRef: %w", err) + } + + if opResult != controllerutil.OperationResultNone { + return true, nil + } + return false, nil } // updates the Server status which can be changed via Spec @@ -508,84 +540,88 @@ func (r *ServerReconciler) updateServerStatus(ctx context.Context, log logr.Logg return nil } -func (r *ServerReconciler) updateServerStatusFromSystemInfo(ctx context.Context, log logr.Logger, bmcClient bmc.BMC, server *metalv1alpha1.Server) error { - serverBase := server.DeepCopy() +func (r *ServerReconciler) updateServerStatusFromSystemInfo(ctx context.Context, log logr.Logger, bmcClient bmc.BMC, server *metalv1alpha1.Server) (bool, error) { systemInfo, err := bmcClient.GetSystemInfo(ctx, server.Spec.SystemURI) if err != nil { - return fmt.Errorf("failed to get system info for Server: %w", err) + return false, fmt.Errorf("failed to get system info for Server: %w", err) } - biosVersion, err := bmcClient.GetBiosVersion(ctx, server.Spec.SystemURI) - if err != nil { - return fmt.Errorf("failed to get BIOS version for Server: %w", err) - } - server.Status.BIOSVersion = biosVersion - server.Status.PowerState = metalv1alpha1.ServerPowerState(systemInfo.PowerState) - server.Status.SerialNumber = systemInfo.SerialNumber - server.Status.SKU = systemInfo.SKU - server.Status.Manufacturer = systemInfo.Manufacturer - server.Status.Model = systemInfo.Model - server.Status.TotalSystemMemory = &systemInfo.TotalSystemMemory - processors, err := bmcClient.GetProcessors(ctx, server.Spec.SystemURI) if err != nil { - return fmt.Errorf("failed to get processors for Server: %w", err) - } - server.Status.Processors = make([]metalv1alpha1.Processor, 0, len(processors)) - for _, processor := range processors { - server.Status.Processors = append(server.Status.Processors, metalv1alpha1.Processor{ - ID: processor.ID, - Type: processor.Type, - Architecture: processor.Architecture, - InstructionSet: processor.InstructionSet, - Manufacturer: processor.Manufacturer, - Model: processor.Model, - MaxSpeedMHz: processor.MaxSpeedMHz, - TotalCores: processor.TotalCores, - TotalThreads: processor.TotalThreads, - }) + return false, fmt.Errorf("failed to get processors for Server: %w", err) } storages, err := bmcClient.GetStorages(ctx, server.Spec.SystemURI) if err != nil { - return fmt.Errorf("failed to get storages for Server: %w", err) - } - server.Status.Storages = nil - for _, storage := range storages { - metalStorage := metalv1alpha1.Storage{ - Name: storage.Name, - State: metalv1alpha1.StorageState(storage.State), - } - for _, drive := range storage.Drives { - metalStorage.Drives = append(metalStorage.Drives, metalv1alpha1.StorageDrive{ - Name: drive.Name, - Model: drive.Model, - Vendor: drive.Vendor, - Capacity: resource.NewQuantity(drive.SizeBytes, resource.BinarySI), - Type: string(drive.Type), - State: metalv1alpha1.StorageState(drive.State), - MediaType: drive.MediaType, + return false, fmt.Errorf("failed to get storages for Server: %w", err) + } + + opResult, err := controllerutil.CreateOrPatch(ctx, r.Client, server, func() error { + server.Status.BIOSVersion = systemInfo.BIOSVersion + server.Status.PowerState = metalv1alpha1.ServerPowerState(systemInfo.PowerState) + server.Status.SerialNumber = systemInfo.SerialNumber + server.Status.SKU = systemInfo.SKU + server.Status.Manufacturer = systemInfo.Manufacturer + server.Status.Model = systemInfo.Model + server.Status.TotalSystemMemory = &systemInfo.TotalSystemMemory + + server.Status.Processors = make([]metalv1alpha1.Processor, 0, len(processors)) + for _, processor := range processors { + server.Status.Processors = append(server.Status.Processors, metalv1alpha1.Processor{ + ID: processor.ID, + Type: processor.Type, + Architecture: processor.Architecture, + InstructionSet: processor.InstructionSet, + Manufacturer: processor.Manufacturer, + Model: processor.Model, + MaxSpeedMHz: processor.MaxSpeedMHz, + TotalCores: processor.TotalCores, + TotalThreads: processor.TotalThreads, }) } - metalStorage.Volumes = make([]metalv1alpha1.StorageVolume, 0, len(storage.Volumes)) - for _, volume := range storage.Volumes { - metalStorage.Volumes = append(metalStorage.Volumes, metalv1alpha1.StorageVolume{ - Name: volume.Name, - Capacity: resource.NewQuantity(volume.SizeBytes, resource.BinarySI), - State: metalv1alpha1.StorageState(volume.State), - RAIDType: string(volume.RAIDType), - VolumeUsage: volume.VolumeUsage, - }) + + server.Status.Storages = nil + for _, storage := range storages { + metalStorage := metalv1alpha1.Storage{ + Name: storage.Name, + State: metalv1alpha1.StorageState(storage.State), + } + for _, drive := range storage.Drives { + metalStorage.Drives = append(metalStorage.Drives, metalv1alpha1.StorageDrive{ + Name: drive.Name, + Model: drive.Model, + Vendor: drive.Vendor, + Capacity: resource.NewQuantity(drive.SizeBytes, resource.BinarySI), + Type: string(drive.Type), + State: metalv1alpha1.StorageState(drive.State), + MediaType: drive.MediaType, + }) + } + metalStorage.Volumes = make([]metalv1alpha1.StorageVolume, 0, len(storage.Volumes)) + for _, volume := range storage.Volumes { + metalStorage.Volumes = append(metalStorage.Volumes, metalv1alpha1.StorageVolume{ + Name: volume.Name, + Capacity: resource.NewQuantity(volume.SizeBytes, resource.BinarySI), + State: metalv1alpha1.StorageState(volume.State), + RAIDType: string(volume.RAIDType), + VolumeUsage: volume.VolumeUsage, + }) + } + server.Status.Storages = append(server.Status.Storages, metalStorage) } - server.Status.Storages = append(server.Status.Storages, metalStorage) + return nil + }) + if err != nil { + return false, fmt.Errorf("failed to create or patch Server Status SystemInfo: %w", err) } - if err := r.Status().Patch(ctx, server, client.MergeFrom(serverBase)); err != nil { - return fmt.Errorf("failed to patch Server status: %w", err) + if opResult != controllerutil.OperationResultNone { + return true, nil } + log.V(1).Info("Updated Server status", "Status", server.Status.State, "powerState", server.Status.PowerState) - return nil + return false, nil } -func (r *ServerReconciler) applyBootConfigurationAndIgnitionForDiscovery(ctx context.Context, log logr.Logger, server *metalv1alpha1.Server) error { +func (r *ServerReconciler) applyBootConfigurationAndIgnitionForDiscovery(ctx context.Context, log logr.Logger, server *metalv1alpha1.Server) (bool, error) { bootConfig := &metalv1alpha1.ServerBootConfiguration{} bootConfig.Name = server.Name bootConfig.Namespace = r.ManagerNamespace @@ -601,15 +637,19 @@ func (r *ServerReconciler) applyBootConfigurationAndIgnitionForDiscovery(ctx con return nil }) if err != nil { - return fmt.Errorf("failed to create or patch ServerBootConfiguration: %w", err) + return false, fmt.Errorf("failed to create or patch ServerBootConfiguration: %w", err) + } + if opResult != controllerutil.OperationResultNone { + log.V(1).Info("Created or patched", "ServerBootConfiguration", bootConfig.Name, "Namespace", bootConfig.Namespace, "Operation", opResult) + return true, nil } log.V(1).Info("Created or patched", "ServerBootConfiguration", bootConfig.Name, "Namespace", bootConfig.Namespace, "Operation", opResult) - if err := r.ensureServerBootConfigRef(ctx, server, bootConfig); err != nil { - return err + if modified, err := r.ensureServerBootConfigRef(ctx, server, bootConfig); err != nil || modified { + return modified, err } - return r.applyDefaultIgnitionForServer(ctx, log, server, bootConfig, r.RegistryURL) + return false, r.applyDefaultIgnitionForServer(ctx, log, server, bootConfig, r.RegistryURL) } func (r *ServerReconciler) applyDefaultIgnitionForServer(ctx context.Context, log logr.Logger, server *metalv1alpha1.Server, bootConfig *metalv1alpha1.ServerBootConfiguration, registryURL string) error { @@ -754,8 +794,8 @@ func (r *ServerReconciler) setAndPatchServerPowerState(ctx context.Context, log return false, fmt.Errorf("failed to patch Server: %w", err) } if op == controllerutil.OperationResultUpdated { - log.V(1).Info("Server updated to power off state.") - if err := r.ensureServerPowerState(ctx, log, bmcClient, server); err != nil { + log.V(1).Info("Server updated", "PowerState", powerState) + if _, err := r.ensureServerPowerState(ctx, log, bmcClient, server); err != nil { log.V(1).Info("ensuring power state failed.") } return true, nil @@ -790,28 +830,27 @@ func (r *ServerReconciler) pxeBootServer(ctx context.Context, log logr.Logger, b return nil } -func (r *ServerReconciler) extractServerDetailsFromRegistry(ctx context.Context, log logr.Logger, server *metalv1alpha1.Server) (bool, error) { +func (r *ServerReconciler) extractServerDetailsFromRegistry(log logr.Logger, server *metalv1alpha1.Server) ([]metalv1alpha1.NetworkInterface, error) { resp, err := http.Get(fmt.Sprintf("%s/systems/%s", r.RegistryURL, server.Spec.SystemUUID)) if resp != nil && resp.StatusCode == http.StatusNotFound { log.V(1).Info("Did not find server information in registry") - return false, nil + return nil, nil } if resp == nil { - return false, fmt.Errorf("failed to find server information in registry") + return nil, fmt.Errorf("failed to find server information in registry") } if err != nil { - return false, fmt.Errorf("failed to fetch server details: %w", err) + return nil, fmt.Errorf("failed to fetch server details: %w", err) } serverDetails := ®istry.Server{} if err := json.NewDecoder(resp.Body).Decode(serverDetails); err != nil { - return false, fmt.Errorf("failed to decode server details: %w", err) + return nil, fmt.Errorf("failed to decode server details: %w", err) } - serverBase := server.DeepCopy() - // update network interfaces + // extracted network interfaces nics := make([]metalv1alpha1.NetworkInterface, 0, len(serverDetails.NetworkInterfaces)) for _, s := range serverDetails.NetworkInterfaces { nics = append(nics, metalv1alpha1.NetworkInterface{ @@ -820,25 +859,26 @@ func (r *ServerReconciler) extractServerDetailsFromRegistry(ctx context.Context, MACAddress: s.MACAddress, }) } - server.Status.NetworkInterfaces = nics - - if err := r.Status().Patch(ctx, server, client.MergeFrom(serverBase)); err != nil { - return false, fmt.Errorf("failed to patch server status: %w", err) - } - - return true, nil + return nics, nil } func (r *ServerReconciler) patchServerState(ctx context.Context, server *metalv1alpha1.Server, state metalv1alpha1.ServerState) (bool, error) { if server.Status.State == state { return false, nil } - serverBase := server.DeepCopy() server.Status.State = state - if err := r.Status().Patch(ctx, server, client.MergeFrom(serverBase)); err != nil { + + opResult, err := controllerutil.CreateOrPatch(ctx, r.Client, server, func() error { + server.Status.State = state + return nil + }) + if err != nil { return false, fmt.Errorf("failed to patch server state: %w", err) } - return true, nil + if opResult != controllerutil.OperationResultNone { + return true, nil + } + return false, nil } func (r *ServerReconciler) patchServerURI(ctx context.Context, log logr.Logger, bmcClient bmc.BMC, server *metalv1alpha1.Server) (bool, error) { @@ -869,10 +909,10 @@ func (r *ServerReconciler) patchServerURI(ctx context.Context, log logr.Logger, return true, nil } -func (r *ServerReconciler) ensureServerPowerState(ctx context.Context, log logr.Logger, bmcClient bmc.BMC, server *metalv1alpha1.Server) error { +func (r *ServerReconciler) ensureServerPowerState(ctx context.Context, log logr.Logger, bmcClient bmc.BMC, server *metalv1alpha1.Server) (bool, error) { if server.Spec.Power == "" { // no desired power state set - return nil + return false, nil } powerOp := powerOpNoOP @@ -890,63 +930,98 @@ func (r *ServerReconciler) ensureServerPowerState(ctx context.Context, log logr. if powerOp == powerOpNoOP { log.V(1).Info("Server already in target power state", "powerState", server.Status.PowerState) - return nil + return false, nil } switch powerOp { case powerOpOn: log.V(1).Info("Server Power On") if err := bmcClient.PowerOn(ctx, server.Spec.SystemURI); err != nil { - return fmt.Errorf("failed to power on server: %w", err) + return false, fmt.Errorf("failed to power on server: %w", err) } if err := bmcClient.WaitForServerPowerState(ctx, server.Spec.SystemURI, redfish.OnPowerState); err != nil { - return fmt.Errorf("failed to wait for server power on server: %w", err) + return false, fmt.Errorf("failed to wait for server power on server: %w", err) } - if err := r.updatePowerOnCondition(ctx, server); err != nil { - return fmt.Errorf("failed to update power on condition: %w", err) + if modified, err := r.updatePowerOnCondition(ctx, server); err != nil || modified { + return modified, err } case powerOpOff: log.V(1).Info("Server Power Off") powerOffType := bmcClient.PowerOff if err := powerOffType(ctx, server.Spec.SystemURI); err != nil { - return fmt.Errorf("failed to power off server: %w", err) + return false, fmt.Errorf("failed to power off server: %w", err) } if err := bmcClient.WaitForServerPowerState(ctx, server.Spec.SystemURI, redfish.OffPowerState); err != nil { if r.EnforcePowerOff { log.V(1).Info("Failed to wait for server graceful shutdown, retrying with force power off") powerOffType = bmcClient.ForcePowerOff if err := powerOffType(ctx, server.Spec.SystemURI); err != nil { - return fmt.Errorf("failed to power off server: %w", err) + return false, fmt.Errorf("failed to power off server: %w", err) } if err := bmcClient.WaitForServerPowerState(ctx, server.Spec.SystemURI, redfish.OffPowerState); err != nil { - return fmt.Errorf("failed to wait for server force power off: %w", err) + return false, fmt.Errorf("failed to wait for server force power off: %w", err) } } else { - return fmt.Errorf("failed to wait for server power off: %w", err) + return false, fmt.Errorf("failed to wait for server power off: %w", err) } } + if modified, err := r.updatePowerOffCondition(ctx, server); err != nil || modified { + return modified, err + } } log.V(1).Info("Ensured server power state", "PowerState", server.Spec.Power) + return false, nil +} - return nil +func (r *ServerReconciler) updatePowerOnCondition(ctx context.Context, server *metalv1alpha1.Server) (bool, error) { + opResult, err := controllerutil.CreateOrPatch(ctx, r.Client, server, func() error { + acc := conditionutils.NewAccessor(conditionutils.AccessorOptions{}) + err := acc.UpdateSlice( + &server.Status.Conditions, + PoweringOnCondition, + conditionutils.UpdateStatus(metav1.ConditionTrue), + conditionutils.UpdateReason("ServerPowerOn"), + conditionutils.UpdateMessage("Server is powering on"), + conditionutils.UpdateObserved(server), + ) + if err != nil { + return fmt.Errorf("failed to update powering on condition: %w", err) + } + return nil + }) + if err != nil { + return false, fmt.Errorf("failed to create or patch server powering on condition: %w", err) + } + if opResult == controllerutil.OperationResultUpdatedStatus || opResult == controllerutil.OperationResultUpdatedStatusOnly { + return true, nil + } + return false, nil } -func (r *ServerReconciler) updatePowerOnCondition(ctx context.Context, server *metalv1alpha1.Server) error { - original := server.DeepCopy() - acc := conditionutils.NewAccessor(conditionutils.AccessorOptions{}) - err := acc.UpdateSlice( - &server.Status.Conditions, - PoweringOnCondition, - conditionutils.UpdateStatus(metav1.ConditionTrue), - conditionutils.UpdateReason("ServerPowerOn"), - conditionutils.UpdateMessage("Server is powering on"), - conditionutils.UpdateObserved(server), - ) +func (r *ServerReconciler) updatePowerOffCondition(ctx context.Context, server *metalv1alpha1.Server) (bool, error) { + opResult, err := controllerutil.CreateOrPatch(ctx, r.Client, server, func() error { + acc := conditionutils.NewAccessor(conditionutils.AccessorOptions{}) + err := acc.UpdateSlice( + &server.Status.Conditions, + PoweringOnCondition, + conditionutils.UpdateStatus(metav1.ConditionTrue), + conditionutils.UpdateReason("ServerPowerOff"), + conditionutils.UpdateMessage("Server is powering off"), + conditionutils.UpdateObserved(server), + ) + if err != nil { + return fmt.Errorf("failed to update powering off condition: %w", err) + } + return nil + }) if err != nil { - return fmt.Errorf("failed to update powering on condition: %w", err) + return false, fmt.Errorf("failed to create or patch server powering off condition: %w", err) + } + if opResult == controllerutil.OperationResultUpdatedStatus || opResult == controllerutil.OperationResultUpdatedStatusOnly { + return true, nil } - return r.Status().Patch(ctx, server, client.MergeFrom(original)) + return false, nil } func (r *ServerReconciler) ensureIndicatorLED(ctx context.Context, log logr.Logger, server *metalv1alpha1.Server) error { @@ -954,32 +1029,37 @@ func (r *ServerReconciler) ensureIndicatorLED(ctx context.Context, log logr.Logg return nil } -func (r *ServerReconciler) ensureInitialBootConfigurationIsDeleted(ctx context.Context, server *metalv1alpha1.Server) error { +func (r *ServerReconciler) ensureInitialBootConfigurationIsDeleted(ctx context.Context, server *metalv1alpha1.Server) (bool, error) { if server.Spec.BootConfigurationRef == nil { - return nil + return false, nil } config := &metalv1alpha1.ServerBootConfiguration{} - if err := r.Get(ctx, client.ObjectKey{Namespace: server.Spec.BootConfigurationRef.Namespace, Name: server.Spec.BootConfigurationRef.Name}, config); err != nil { - return err + err := r.Get(ctx, client.ObjectKey{Namespace: server.Spec.BootConfigurationRef.Namespace, Name: server.Spec.BootConfigurationRef.Name}, config) + if err != nil && !apierrors.IsNotFound(err) { + return false, fmt.Errorf("failed to get the initial bootConfiguration %w", err) } - - if val, ok := config.Annotations[InternalAnnotationTypeKeyName]; !ok || val != InternalAnnotationTypeValue { - // hit a non-initial boot config - return nil + if err == nil { + if val, ok := config.Annotations[InternalAnnotationTypeKeyName]; !ok || val != InternalAnnotationTypeValue { + // hit a non-initial boot config + return false, nil + } + if err := r.Delete(ctx, config); err != nil { + return false, fmt.Errorf("failed to delete the initial bootConfiguration %w", err) + } } - if err := r.Delete(ctx, config); err != nil { - return err + opResult, err := controllerutil.CreateOrPatch(ctx, r.Client, server, func() error { + server.Spec.BootConfigurationRef = nil + return nil + }) + if err != nil { + return false, fmt.Errorf("failed to patch server to remove bootConfiguration ref: %w", err) } - - serverBase := server.DeepCopy() - server.Spec.BootConfigurationRef = nil - if err := r.Patch(ctx, server, client.MergeFrom(serverBase)); err != nil { - return err + if opResult != controllerutil.OperationResultNone { + return true, nil } - - return nil + return false, nil } func (r *ServerReconciler) invalidateRegistryEntryForServer(log logr.Logger, server *metalv1alpha1.Server) error { diff --git a/internal/controller/server_controller_test.go b/internal/controller/server_controller_test.go index 8d99f19ea..10239c70c 100644 --- a/internal/controller/server_controller_test.go +++ b/internal/controller/server_controller_test.go @@ -562,6 +562,17 @@ var _ = Describe("Server Controller", func() { HaveField("Status.State", metalv1alpha1.ServerBootConfigurationStatePending), )) + go func(ctx SpecContext) { + for { + select { + case <-ctx.Done(): + return + default: + deleteRegistrySystemIfExists(server.Spec.SystemUUID) + } + } + }(ctx) + By("Patching the boot configuration to a Ready state") Eventually(UpdateStatus(bootConfig, func() { bootConfig.Status.State = metalv1alpha1.ServerBootConfigurationStateReady @@ -790,3 +801,22 @@ var _ = Describe("Server Controller", func() { Eventually(Get(&bootConfig)).Should(Satisfy(apierrors.IsNotFound)) }) }) + +func deleteRegistrySystemIfExists(systemUUID string) { + response, err := http.Get(registryURL + "/systems/" + systemUUID) + if err != nil { + return + } + if response.StatusCode == http.StatusOK { + req, err := http.NewRequest(http.MethodDelete, registryURL+"/systems/"+systemUUID, nil) + if err != nil { + return + } + client := &http.Client{} + resp, err := client.Do(req) + if err != nil { + return + } + defer resp.Body.Close() //nolint:errcheck + } +} diff --git a/internal/controller/servermaintenance_controller.go b/internal/controller/servermaintenance_controller.go index c04e16a54..62440a555 100644 --- a/internal/controller/servermaintenance_controller.go +++ b/internal/controller/servermaintenance_controller.go @@ -425,19 +425,26 @@ func (r *ServerMaintenanceReconciler) enqueueMaintenanceByServerRefs() handler.E server := object.(*metalv1alpha1.Server) var req []reconcile.Request + if server.Status.State == metalv1alpha1.ServerStateInitial { + return nil + } + maintenanceList := &metalv1alpha1.ServerMaintenanceList{} if err := r.List(ctx, maintenanceList); err != nil { log.Error(err, "failed to list host serverMaintenances") return nil } for _, maintenance := range maintenanceList.Items { - if server.Spec.ServerMaintenanceRef != nil && maintenance.Spec.ServerRef.Name == server.Spec.ServerMaintenanceRef.Name { - req = append(req, reconcile.Request{ - NamespacedName: types.NamespacedName{Namespace: maintenance.Namespace, Name: maintenance.Name}, - }) - return req + if server.Spec.ServerMaintenanceRef != nil { + if server.Spec.ServerMaintenanceRef.Name == maintenance.Name { + req = append(req, reconcile.Request{ + NamespacedName: types.NamespacedName{Namespace: maintenance.Namespace, Name: maintenance.Name}, + }) + return req + } + continue } - if server.Spec.ServerMaintenanceRef == nil { + if maintenance.Spec.ServerRef.Name == server.Name { req = append(req, reconcile.Request{ NamespacedName: types.NamespacedName{Namespace: maintenance.Namespace, Name: maintenance.Name}, }) From 099672cd51a012bab4a33e5198c2a2fe25ed60d3 Mon Sep 17 00:00:00 2001 From: Nagadeesh Nagaraja Date: Thu, 4 Dec 2025 15:35:34 +0100 Subject: [PATCH 2/3] Fix ServerMaintenance Controller enqueue logic will remove this commit once PR 549 merges --- .../servermaintenance_controller.go | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/internal/controller/servermaintenance_controller.go b/internal/controller/servermaintenance_controller.go index 4f410edfd..bd1f79b93 100644 --- a/internal/controller/servermaintenance_controller.go +++ b/internal/controller/servermaintenance_controller.go @@ -424,19 +424,26 @@ func (r *ServerMaintenanceReconciler) enqueueMaintenanceByServerRefs() handler.E server := object.(*metalv1alpha1.Server) var req []reconcile.Request + if server.Status.State == metalv1alpha1.ServerStateInitial { + return nil + } + maintenanceList := &metalv1alpha1.ServerMaintenanceList{} if err := r.List(ctx, maintenanceList); err != nil { log.Error(err, "failed to list host serverMaintenances") return nil } for _, maintenance := range maintenanceList.Items { - if server.Spec.ServerMaintenanceRef != nil && maintenance.Spec.ServerRef.Name == server.Spec.ServerMaintenanceRef.Name { - req = append(req, reconcile.Request{ - NamespacedName: types.NamespacedName{Namespace: maintenance.Namespace, Name: maintenance.Name}, - }) - return req + if server.Spec.ServerMaintenanceRef != nil { + if server.Spec.ServerMaintenanceRef.Name == maintenance.Name { + req = append(req, reconcile.Request{ + NamespacedName: types.NamespacedName{Namespace: maintenance.Namespace, Name: maintenance.Name}, + }) + return req + } + continue } - if server.Spec.ServerMaintenanceRef == nil { + if maintenance.Spec.ServerRef.Name == server.Name { req = append(req, reconcile.Request{ NamespacedName: types.NamespacedName{Namespace: maintenance.Namespace, Name: maintenance.Name}, }) From 9f791d3201a8806df072e5435ea7810431a7e70f Mon Sep 17 00:00:00 2001 From: Nagadeesh Nagaraja Date: Fri, 5 Dec 2025 09:36:00 +0100 Subject: [PATCH 3/3] Revert "Fix ServerMaintenance Controller enqueue logic" This reverts commit 099672cd51a012bab4a33e5198c2a2fe25ed60d3. --- .../servermaintenance_controller.go | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/internal/controller/servermaintenance_controller.go b/internal/controller/servermaintenance_controller.go index bd1f79b93..4f410edfd 100644 --- a/internal/controller/servermaintenance_controller.go +++ b/internal/controller/servermaintenance_controller.go @@ -424,26 +424,19 @@ func (r *ServerMaintenanceReconciler) enqueueMaintenanceByServerRefs() handler.E server := object.(*metalv1alpha1.Server) var req []reconcile.Request - if server.Status.State == metalv1alpha1.ServerStateInitial { - return nil - } - maintenanceList := &metalv1alpha1.ServerMaintenanceList{} if err := r.List(ctx, maintenanceList); err != nil { log.Error(err, "failed to list host serverMaintenances") return nil } for _, maintenance := range maintenanceList.Items { - if server.Spec.ServerMaintenanceRef != nil { - if server.Spec.ServerMaintenanceRef.Name == maintenance.Name { - req = append(req, reconcile.Request{ - NamespacedName: types.NamespacedName{Namespace: maintenance.Namespace, Name: maintenance.Name}, - }) - return req - } - continue + if server.Spec.ServerMaintenanceRef != nil && maintenance.Spec.ServerRef.Name == server.Spec.ServerMaintenanceRef.Name { + req = append(req, reconcile.Request{ + NamespacedName: types.NamespacedName{Namespace: maintenance.Namespace, Name: maintenance.Name}, + }) + return req } - if maintenance.Spec.ServerRef.Name == server.Name { + if server.Spec.ServerMaintenanceRef == nil { req = append(req, reconcile.Request{ NamespacedName: types.NamespacedName{Namespace: maintenance.Namespace, Name: maintenance.Name}, })