diff --git a/api/v1alpha1/constants.go b/api/v1alpha1/constants.go index a97431db1..905404100 100644 --- a/api/v1alpha1/constants.go +++ b/api/v1alpha1/constants.go @@ -51,6 +51,9 @@ const ( // OperationAnnotationRotateCredentials is used to indicate that credentials should be rotated. OperationAnnotationRotateCredentials = "rotate-credentials" + + // OperationAnnotationRediscover deletes the ServerMetadata and re-triggers discovery. + OperationAnnotationRediscover = "rediscover" ) const ( diff --git a/api/v1alpha1/servermetadata_types.go b/api/v1alpha1/servermetadata_types.go new file mode 100644 index 000000000..76309c064 --- /dev/null +++ b/api/v1alpha1/servermetadata_types.go @@ -0,0 +1,173 @@ +// SPDX-FileCopyrightText: 2024 SAP SE or an SAP affiliate company and IronCore contributors +// SPDX-License-Identifier: Apache-2.0 + +package v1alpha1 + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// +kubebuilder:object:root=true +// +kubebuilder:resource:scope=Cluster,shortName=smd +// +kubebuilder:printcolumn:name="Age",type=date,JSONPath=`.metadata.creationTimestamp` + +// ServerMetadata is a flat data object (no spec/status) that persists the full +// probe agent discovery payload. Similar to how Endpoints or ConfigMap store +// data directly at the root level. The relationship to its Server is +// established by using the same name and an owner reference. +type ServerMetadata struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + + // SystemInfo contains BIOS, system, and board information from DMI/SMBIOS. + SystemInfo MetaDataSystemInfo `json:"systemInfo,omitempty"` + + // CPU is a list of CPUs discovered on the server. + CPU []MetaDataCPU `json:"cpu,omitempty"` + + // NetworkInterfaces is a list of network interfaces discovered on the server. + NetworkInterfaces []MetaDataNetworkInterface `json:"networkInterfaces,omitempty"` + + // LLDP contains LLDP neighbor information per interface. + LLDP []MetaDataLLDPInterface `json:"lldp,omitempty"` + + // Storage is a list of block devices discovered on the server. + Storage []MetaDataBlockDevice `json:"storage,omitempty"` + + // Memory is a list of memory devices discovered on the server. + Memory []MetaDataMemoryDevice `json:"memory,omitempty"` + + // NICs is a list of raw NIC details (PCI address, speed, firmware). + NICs []MetaDataNIC `json:"nics,omitempty"` + + // PCIDevices is a list of PCI devices discovered on the server. + PCIDevices []MetaDataPCIDevice `json:"pciDevices,omitempty"` +} + +// +kubebuilder:object:root=true + +// ServerMetadataList contains a list of ServerMetadata. +type ServerMetadataList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + Items []ServerMetadata `json:"items"` +} + +type MetaDataSystemInfo struct { + BIOSInformation MetaDataBIOSInformation `json:"biosInformation,omitempty"` + SystemInformation MetaDataServerInformation `json:"systemInformation,omitempty"` + BoardInformation MetaDataBoardInformation `json:"boardInformation,omitempty"` +} + +type MetaDataBIOSInformation struct { + Vendor string `json:"vendor,omitempty"` + Version string `json:"version,omitempty"` + Date string `json:"date,omitempty"` +} + +type MetaDataServerInformation struct { + Manufacturer string `json:"manufacturer,omitempty"` + ProductName string `json:"productName,omitempty"` + Version string `json:"version,omitempty"` + SerialNumber string `json:"serialNumber,omitempty"` + UUID string `json:"uuid,omitempty"` + SKUNumber string `json:"skuNumber,omitempty"` + Family string `json:"family,omitempty"` +} + +type MetaDataBoardInformation struct { + Manufacturer string `json:"manufacturer,omitempty"` + Product string `json:"product,omitempty"` + Version string `json:"version,omitempty"` + SerialNumber string `json:"serialNumber,omitempty"` + AssetTag string `json:"assetTag,omitempty"` +} + +type MetaDataCPU struct { + ID int `json:"id"` + TotalCores uint32 `json:"totalCores,omitempty"` + TotalHardwareThreads uint32 `json:"totalHardwareThreads,omitempty"` + Vendor string `json:"vendor,omitempty"` + Model string `json:"model,omitempty"` + Capabilities []string `json:"capabilities,omitempty"` +} + +type MetaDataNetworkInterface struct { + Name string `json:"name"` + IPAddresses []string `json:"ipAddresses,omitempty"` + MACAddress string `json:"macAddress"` + CarrierStatus string `json:"carrierStatus,omitempty"` +} + +type MetaDataLLDPInterface struct { + Name string `json:"name"` + Neighbors []MetaDataLLDPNeighbor `json:"neighbors,omitempty"` +} + +type MetaDataLLDPNeighbor struct { + ChassisID string `json:"chassisId,omitempty"` + PortID string `json:"portId,omitempty"` + PortDescription string `json:"portDescription,omitempty"` + SystemName string `json:"systemName,omitempty"` + SystemDescription string `json:"systemDescription,omitempty"` + MgmtIP string `json:"mgmtIp,omitempty"` + Capabilities []string `json:"capabilities,omitempty"` + VlanID string `json:"vlanId,omitempty"` +} + +type MetaDataBlockDevice struct { + Path string `json:"path,omitempty"` + Name string `json:"name,omitempty"` + Rotational bool `json:"rotational,omitempty"` + Removable bool `json:"removable,omitempty"` + ReadOnly bool `json:"readOnly,omitempty"` + Vendor string `json:"vendor,omitempty"` + Model string `json:"model,omitempty"` + Serial string `json:"serial,omitempty"` + WWID string `json:"wwid,omitempty"` + PhysicalBlockSize uint64 `json:"physicalBlockSize,omitempty"` + LogicalBlockSize uint64 `json:"logicalBlockSize,omitempty"` + HWSectorSize uint64 `json:"hWSectorSize,omitempty"` + SizeBytes uint64 `json:"sizeBytes,omitempty"` + NUMANodeID int `json:"numaNodeID,omitempty"` +} + +type MetaDataMemoryDevice struct { + SizeBytes int64 `json:"size,omitempty"` + DeviceSet string `json:"deviceSet,omitempty"` + DeviceLocator string `json:"deviceLocator,omitempty"` + BankLocator string `json:"bankLocator,omitempty"` + MemoryType string `json:"memoryType,omitempty"` + Speed string `json:"speed,omitempty"` + Vendor string `json:"vendor,omitempty"` + SerialNumber string `json:"serialNumber,omitempty"` + AssetTag string `json:"assetTag,omitempty"` + PartNumber string `json:"partNumber,omitempty"` + ConfiguredMemorySpeed string `json:"configuredMemorySpeed,omitempty"` + MinimumVoltage string `json:"minimumVoltage,omitempty"` + MaximumVoltage string `json:"maximumVoltage,omitempty"` + ConfiguredVoltage string `json:"configuredVoltage,omitempty"` +} + +type MetaDataNIC struct { + Name string `json:"name,omitempty"` + MAC string `json:"mac,omitempty"` + PCIAddress string `json:"pciAddress,omitempty"` + Speed string `json:"speed,omitempty"` + LinkModes []string `json:"linkModes,omitempty"` + SupportedPorts []string `json:"supportedPorts,omitempty"` + FirmwareVersion string `json:"firmwareVersion,omitempty"` +} + +type MetaDataPCIDevice struct { + Address string `json:"address,omitempty"` + Vendor string `json:"vendor,omitempty"` + VendorID string `json:"vendorID,omitempty"` + Product string `json:"product,omitempty"` + ProductID string `json:"productID,omitempty"` + NumaNodeID int `json:"numaNodeID,omitempty"` +} + +func init() { + SchemeBuilder.Register(&ServerMetadata{}, &ServerMetadataList{}) +} diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index 00af3bf89..3826e447f 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -1532,6 +1532,221 @@ func (in *LLDPNeighbor) DeepCopy() *LLDPNeighbor { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MetaDataBIOSInformation) DeepCopyInto(out *MetaDataBIOSInformation) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MetaDataBIOSInformation. +func (in *MetaDataBIOSInformation) DeepCopy() *MetaDataBIOSInformation { + if in == nil { + return nil + } + out := new(MetaDataBIOSInformation) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MetaDataBlockDevice) DeepCopyInto(out *MetaDataBlockDevice) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MetaDataBlockDevice. +func (in *MetaDataBlockDevice) DeepCopy() *MetaDataBlockDevice { + if in == nil { + return nil + } + out := new(MetaDataBlockDevice) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MetaDataBoardInformation) DeepCopyInto(out *MetaDataBoardInformation) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MetaDataBoardInformation. +func (in *MetaDataBoardInformation) DeepCopy() *MetaDataBoardInformation { + if in == nil { + return nil + } + out := new(MetaDataBoardInformation) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MetaDataCPU) DeepCopyInto(out *MetaDataCPU) { + *out = *in + if in.Capabilities != nil { + in, out := &in.Capabilities, &out.Capabilities + *out = make([]string, len(*in)) + copy(*out, *in) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MetaDataCPU. +func (in *MetaDataCPU) DeepCopy() *MetaDataCPU { + if in == nil { + return nil + } + out := new(MetaDataCPU) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MetaDataLLDPInterface) DeepCopyInto(out *MetaDataLLDPInterface) { + *out = *in + if in.Neighbors != nil { + in, out := &in.Neighbors, &out.Neighbors + *out = make([]MetaDataLLDPNeighbor, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MetaDataLLDPInterface. +func (in *MetaDataLLDPInterface) DeepCopy() *MetaDataLLDPInterface { + if in == nil { + return nil + } + out := new(MetaDataLLDPInterface) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MetaDataLLDPNeighbor) DeepCopyInto(out *MetaDataLLDPNeighbor) { + *out = *in + if in.Capabilities != nil { + in, out := &in.Capabilities, &out.Capabilities + *out = make([]string, len(*in)) + copy(*out, *in) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MetaDataLLDPNeighbor. +func (in *MetaDataLLDPNeighbor) DeepCopy() *MetaDataLLDPNeighbor { + if in == nil { + return nil + } + out := new(MetaDataLLDPNeighbor) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MetaDataMemoryDevice) DeepCopyInto(out *MetaDataMemoryDevice) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MetaDataMemoryDevice. +func (in *MetaDataMemoryDevice) DeepCopy() *MetaDataMemoryDevice { + if in == nil { + return nil + } + out := new(MetaDataMemoryDevice) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MetaDataNIC) DeepCopyInto(out *MetaDataNIC) { + *out = *in + if in.LinkModes != nil { + in, out := &in.LinkModes, &out.LinkModes + *out = make([]string, len(*in)) + copy(*out, *in) + } + if in.SupportedPorts != nil { + in, out := &in.SupportedPorts, &out.SupportedPorts + *out = make([]string, len(*in)) + copy(*out, *in) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MetaDataNIC. +func (in *MetaDataNIC) DeepCopy() *MetaDataNIC { + if in == nil { + return nil + } + out := new(MetaDataNIC) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MetaDataNetworkInterface) DeepCopyInto(out *MetaDataNetworkInterface) { + *out = *in + if in.IPAddresses != nil { + in, out := &in.IPAddresses, &out.IPAddresses + *out = make([]string, len(*in)) + copy(*out, *in) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MetaDataNetworkInterface. +func (in *MetaDataNetworkInterface) DeepCopy() *MetaDataNetworkInterface { + if in == nil { + return nil + } + out := new(MetaDataNetworkInterface) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MetaDataPCIDevice) DeepCopyInto(out *MetaDataPCIDevice) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MetaDataPCIDevice. +func (in *MetaDataPCIDevice) DeepCopy() *MetaDataPCIDevice { + if in == nil { + return nil + } + out := new(MetaDataPCIDevice) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MetaDataServerInformation) DeepCopyInto(out *MetaDataServerInformation) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MetaDataServerInformation. +func (in *MetaDataServerInformation) DeepCopy() *MetaDataServerInformation { + if in == nil { + return nil + } + out := new(MetaDataServerInformation) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MetaDataSystemInfo) DeepCopyInto(out *MetaDataSystemInfo) { + *out = *in + out.BIOSInformation = in.BIOSInformation + out.SystemInformation = in.SystemInformation + out.BoardInformation = in.BoardInformation +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MetaDataSystemInfo. +func (in *MetaDataSystemInfo) DeepCopy() *MetaDataSystemInfo { + if in == nil { + return nil + } + out := new(MetaDataSystemInfo) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *NamespacedKeySelector) DeepCopyInto(out *NamespacedKeySelector) { *out = *in @@ -2023,6 +2238,107 @@ func (in *ServerMaintenanceStatus) DeepCopy() *ServerMaintenanceStatus { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ServerMetadata) DeepCopyInto(out *ServerMetadata) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + out.SystemInfo = in.SystemInfo + if in.CPU != nil { + in, out := &in.CPU, &out.CPU + *out = make([]MetaDataCPU, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + if in.NetworkInterfaces != nil { + in, out := &in.NetworkInterfaces, &out.NetworkInterfaces + *out = make([]MetaDataNetworkInterface, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + if in.LLDP != nil { + in, out := &in.LLDP, &out.LLDP + *out = make([]MetaDataLLDPInterface, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + if in.Storage != nil { + in, out := &in.Storage, &out.Storage + *out = make([]MetaDataBlockDevice, len(*in)) + copy(*out, *in) + } + if in.Memory != nil { + in, out := &in.Memory, &out.Memory + *out = make([]MetaDataMemoryDevice, len(*in)) + copy(*out, *in) + } + if in.NICs != nil { + in, out := &in.NICs, &out.NICs + *out = make([]MetaDataNIC, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + if in.PCIDevices != nil { + in, out := &in.PCIDevices, &out.PCIDevices + *out = make([]MetaDataPCIDevice, len(*in)) + copy(*out, *in) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServerMetadata. +func (in *ServerMetadata) DeepCopy() *ServerMetadata { + if in == nil { + return nil + } + out := new(ServerMetadata) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *ServerMetadata) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ServerMetadataList) DeepCopyInto(out *ServerMetadataList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]ServerMetadata, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServerMetadataList. +func (in *ServerMetadataList) DeepCopy() *ServerMetadataList { + if in == nil { + return nil + } + out := new(ServerMetadataList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *ServerMetadataList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *ServerSpec) DeepCopyInto(out *ServerSpec) { *out = *in diff --git a/config/crd/bases/metal.ironcore.dev_servermetadata.yaml b/config/crd/bases/metal.ironcore.dev_servermetadata.yaml new file mode 100644 index 000000000..6f641f850 --- /dev/null +++ b/config/crd/bases/metal.ironcore.dev_servermetadata.yaml @@ -0,0 +1,288 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.20.1 + name: servermetadata.metal.ironcore.dev +spec: + group: metal.ironcore.dev + names: + kind: ServerMetadata + listKind: ServerMetadataList + plural: servermetadata + shortNames: + - smd + singular: servermetadata + scope: Cluster + versions: + - additionalPrinterColumns: + - jsonPath: .metadata.creationTimestamp + name: Age + type: date + name: v1alpha1 + schema: + openAPIV3Schema: + description: |- + ServerMetadata is a flat data object (no spec/status) that persists the full + probe agent discovery payload. Similar to how Endpoints or ConfigMap store + data directly at the root level. The relationship to its Server is + established by using the same name and an owner reference. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + cpu: + description: CPU is a list of CPUs discovered on the server. + items: + properties: + capabilities: + items: + type: string + type: array + id: + type: integer + model: + type: string + totalCores: + format: int32 + type: integer + totalHardwareThreads: + format: int32 + type: integer + vendor: + type: string + required: + - id + type: object + type: array + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + lldp: + description: LLDP contains LLDP neighbor information per interface. + items: + properties: + name: + type: string + neighbors: + items: + properties: + capabilities: + items: + type: string + type: array + chassisId: + type: string + mgmtIp: + type: string + portDescription: + type: string + portId: + type: string + systemDescription: + type: string + systemName: + type: string + vlanId: + type: string + type: object + type: array + required: + - name + type: object + type: array + memory: + description: Memory is a list of memory devices discovered on the server. + items: + properties: + assetTag: + type: string + bankLocator: + type: string + configuredMemorySpeed: + type: string + configuredVoltage: + type: string + deviceLocator: + type: string + deviceSet: + type: string + maximumVoltage: + type: string + memoryType: + type: string + minimumVoltage: + type: string + partNumber: + type: string + serialNumber: + type: string + size: + format: int64 + type: integer + speed: + type: string + vendor: + type: string + type: object + type: array + metadata: + type: object + networkInterfaces: + description: NetworkInterfaces is a list of network interfaces discovered + on the server. + items: + properties: + carrierStatus: + type: string + ipAddresses: + items: + type: string + type: array + macAddress: + type: string + name: + type: string + required: + - macAddress + - name + type: object + type: array + nics: + description: NICs is a list of raw NIC details (PCI address, speed, firmware). + items: + properties: + firmwareVersion: + type: string + linkModes: + items: + type: string + type: array + mac: + type: string + name: + type: string + pciAddress: + type: string + speed: + type: string + supportedPorts: + items: + type: string + type: array + type: object + type: array + pciDevices: + description: PCIDevices is a list of PCI devices discovered on the server. + items: + properties: + address: + type: string + numaNodeID: + type: integer + product: + type: string + productID: + type: string + vendor: + type: string + vendorID: + type: string + type: object + type: array + storage: + description: Storage is a list of block devices discovered on the server. + items: + properties: + hWSectorSize: + format: int64 + type: integer + logicalBlockSize: + format: int64 + type: integer + model: + type: string + name: + type: string + numaNodeID: + type: integer + path: + type: string + physicalBlockSize: + format: int64 + type: integer + readOnly: + type: boolean + removable: + type: boolean + rotational: + type: boolean + serial: + type: string + sizeBytes: + format: int64 + type: integer + vendor: + type: string + wwid: + type: string + type: object + type: array + systemInfo: + description: SystemInfo contains BIOS, system, and board information from + DMI/SMBIOS. + properties: + biosInformation: + properties: + date: + type: string + vendor: + type: string + version: + type: string + type: object + boardInformation: + properties: + assetTag: + type: string + manufacturer: + type: string + product: + type: string + serialNumber: + type: string + version: + type: string + type: object + systemInformation: + properties: + family: + type: string + manufacturer: + type: string + productName: + type: string + serialNumber: + type: string + skuNumber: + type: string + uuid: + type: string + version: + type: string + type: object + type: object + type: object + served: true + storage: true + subresources: {} diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index 7b33068c8..e0904a14d 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -56,6 +56,7 @@ rules: - serverclaims - serverconfigurations - servermaintenances + - servermetadata - servers verbs: - create diff --git a/config/samples/kustomization.yaml b/config/samples/kustomization.yaml index b760d9c0d..a23987cd5 100644 --- a/config/samples/kustomization.yaml +++ b/config/samples/kustomization.yaml @@ -4,6 +4,7 @@ resources: - metal_v1alpha1_bmcsecret.yaml - metal_v1alpha1_bmc.yaml - metal_v1alpha1_server.yaml +- metal_v1alpha1_servermetadata.yaml - metal_v1alpha1_serverbootconfiguration.yaml - metal_v1alpha1_serverclaim.yaml - metal_v1alpha1_servermaintenance.yaml diff --git a/config/samples/metal_v1alpha1_servermetadata.yaml b/config/samples/metal_v1alpha1_servermetadata.yaml new file mode 100644 index 000000000..cebe1e9fe --- /dev/null +++ b/config/samples/metal_v1alpha1_servermetadata.yaml @@ -0,0 +1,28 @@ +apiVersion: metal.ironcore.dev/v1alpha1 +kind: ServerMetadata +metadata: + labels: + app.kubernetes.io/name: servermetadata + app.kubernetes.io/instance: servermetadata-sample + app.kubernetes.io/part-of: metal-operator + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/created-by: metal-operator + name: endpoint-sample-system-0 +systemInfo: + biosInformation: + vendor: "AMI" + version: "1.0.0" + date: "01/01/2024" + systemInformation: + manufacturer: "Contoso" + productName: "3500" + serialNumber: "437XR1138R2" + boardInformation: + manufacturer: "Contoso" + product: "Board-3500" +networkInterfaces: + - name: eth0 + macAddress: "aa:bb:cc:dd:ee:ff" + ipAddresses: + - "192.168.1.100" + carrierStatus: "up" diff --git a/dist/chart/templates/crd/metal.ironcore.dev_servermetadata.yaml b/dist/chart/templates/crd/metal.ironcore.dev_servermetadata.yaml new file mode 100644 index 000000000..5a47b3eef --- /dev/null +++ b/dist/chart/templates/crd/metal.ironcore.dev_servermetadata.yaml @@ -0,0 +1,295 @@ +{{- if .Values.crd.enable }} +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + labels: + {{- include "chart.labels" . | nindent 4 }} + annotations: + {{- if .Values.crd.keep }} + "helm.sh/resource-policy": keep + {{- end }} + controller-gen.kubebuilder.io/version: v0.20.1 + name: servermetadata.metal.ironcore.dev +spec: + group: metal.ironcore.dev + names: + kind: ServerMetadata + listKind: ServerMetadataList + plural: servermetadata + shortNames: + - smd + singular: servermetadata + scope: Cluster + versions: + - additionalPrinterColumns: + - jsonPath: .metadata.creationTimestamp + name: Age + type: date + name: v1alpha1 + schema: + openAPIV3Schema: + description: |- + ServerMetadata is a flat data object (no spec/status) that persists the full + probe agent discovery payload. Similar to how Endpoints or ConfigMap store + data directly at the root level. The relationship to its Server is + established by using the same name and an owner reference. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + cpu: + description: CPU is a list of CPUs discovered on the server. + items: + properties: + capabilities: + items: + type: string + type: array + id: + type: integer + model: + type: string + totalCores: + format: int32 + type: integer + totalHardwareThreads: + format: int32 + type: integer + vendor: + type: string + required: + - id + type: object + type: array + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + lldp: + description: LLDP contains LLDP neighbor information per interface. + items: + properties: + name: + type: string + neighbors: + items: + properties: + capabilities: + items: + type: string + type: array + chassisId: + type: string + mgmtIp: + type: string + portDescription: + type: string + portId: + type: string + systemDescription: + type: string + systemName: + type: string + vlanId: + type: string + type: object + type: array + required: + - name + type: object + type: array + memory: + description: Memory is a list of memory devices discovered on the server. + items: + properties: + assetTag: + type: string + bankLocator: + type: string + configuredMemorySpeed: + type: string + configuredVoltage: + type: string + deviceLocator: + type: string + deviceSet: + type: string + maximumVoltage: + type: string + memoryType: + type: string + minimumVoltage: + type: string + partNumber: + type: string + serialNumber: + type: string + size: + format: int64 + type: integer + speed: + type: string + vendor: + type: string + type: object + type: array + metadata: + type: object + networkInterfaces: + description: NetworkInterfaces is a list of network interfaces discovered + on the server. + items: + properties: + carrierStatus: + type: string + ipAddresses: + items: + type: string + type: array + macAddress: + type: string + name: + type: string + required: + - macAddress + - name + type: object + type: array + nics: + description: NICs is a list of raw NIC details (PCI address, speed, firmware). + items: + properties: + firmwareVersion: + type: string + linkModes: + items: + type: string + type: array + mac: + type: string + name: + type: string + pciAddress: + type: string + speed: + type: string + supportedPorts: + items: + type: string + type: array + type: object + type: array + pciDevices: + description: PCIDevices is a list of PCI devices discovered on the server. + items: + properties: + address: + type: string + numaNodeID: + type: integer + product: + type: string + productID: + type: string + vendor: + type: string + vendorID: + type: string + type: object + type: array + storage: + description: Storage is a list of block devices discovered on the server. + items: + properties: + hWSectorSize: + format: int64 + type: integer + logicalBlockSize: + format: int64 + type: integer + model: + type: string + name: + type: string + numaNodeID: + type: integer + path: + type: string + physicalBlockSize: + format: int64 + type: integer + readOnly: + type: boolean + removable: + type: boolean + rotational: + type: boolean + serial: + type: string + sizeBytes: + format: int64 + type: integer + vendor: + type: string + wwid: + type: string + type: object + type: array + systemInfo: + description: SystemInfo contains BIOS, system, and board information from + DMI/SMBIOS. + properties: + biosInformation: + properties: + date: + type: string + vendor: + type: string + version: + type: string + type: object + boardInformation: + properties: + assetTag: + type: string + manufacturer: + type: string + product: + type: string + serialNumber: + type: string + version: + type: string + type: object + systemInformation: + properties: + family: + type: string + manufacturer: + type: string + productName: + type: string + serialNumber: + type: string + skuNumber: + type: string + uuid: + type: string + version: + type: string + type: object + type: object + type: object + served: true + storage: true + subresources: {} +{{- end -}} diff --git a/dist/chart/templates/rbac/role.yaml b/dist/chart/templates/rbac/role.yaml index 96c75c9ae..a7796fa9e 100755 --- a/dist/chart/templates/rbac/role.yaml +++ b/dist/chart/templates/rbac/role.yaml @@ -59,6 +59,7 @@ rules: - serverclaims - serverconfigurations - servermaintenances + - servermetadata - servers verbs: - create diff --git a/docs/.vitepress/config.mts b/docs/.vitepress/config.mts index 2ce6164d9..c3a76c6ac 100644 --- a/docs/.vitepress/config.mts +++ b/docs/.vitepress/config.mts @@ -69,6 +69,7 @@ export default withMermaid({ { text: 'BMCVersion', link: '/concepts/bmcversion' }, { text: 'BMCVersionSet', link: '/concepts/bmcversionset' }, { text: 'Servers', link: '/concepts/servers' }, + { text: 'ServerMetadata', link: '/concepts/servermetadata' }, { text: 'ServerClaims', link: '/concepts/serverclaims' }, { text: 'ServerBootConfigurations', link: '/concepts/serverbootconfigurations' }, { text: 'ServerMaintenance', link: '/concepts/servermaintenance' }, diff --git a/docs/api-reference/api.md b/docs/api-reference/api.md index 73a1906f9..4bdb7a2d3 100644 --- a/docs/api-reference/api.md +++ b/docs/api-reference/api.md @@ -27,6 +27,7 @@ Package v1alpha1 contains API Schema definitions for the metal v1alpha1 API grou - [ServerBootConfiguration](#serverbootconfiguration) - [ServerClaim](#serverclaim) - [ServerMaintenance](#servermaintenance) +- [ServerMetadata](#servermetadata) @@ -1147,6 +1148,265 @@ _Appears in:_ | `systemDescription` _string_ | SystemDescription is the system description of the LLDP neighbor. | | | +#### MetaDataBIOSInformation + + + + + + + +_Appears in:_ +- [MetaDataSystemInfo](#metadatasysteminfo) + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `vendor` _string_ | | | | +| `version` _string_ | | | | +| `date` _string_ | | | | + + +#### MetaDataBlockDevice + + + + + + + +_Appears in:_ +- [ServerMetadata](#servermetadata) + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `path` _string_ | | | | +| `name` _string_ | | | | +| `rotational` _boolean_ | | | | +| `removable` _boolean_ | | | | +| `readOnly` _boolean_ | | | | +| `vendor` _string_ | | | | +| `model` _string_ | | | | +| `serial` _string_ | | | | +| `wwid` _string_ | | | | +| `physicalBlockSize` _integer_ | | | | +| `logicalBlockSize` _integer_ | | | | +| `hWSectorSize` _integer_ | | | | +| `sizeBytes` _integer_ | | | | +| `numaNodeID` _integer_ | | | | + + +#### MetaDataBoardInformation + + + + + + + +_Appears in:_ +- [MetaDataSystemInfo](#metadatasysteminfo) + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `manufacturer` _string_ | | | | +| `product` _string_ | | | | +| `version` _string_ | | | | +| `serialNumber` _string_ | | | | +| `assetTag` _string_ | | | | + + +#### MetaDataCPU + + + + + + + +_Appears in:_ +- [ServerMetadata](#servermetadata) + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `id` _integer_ | | | | +| `totalCores` _integer_ | | | | +| `totalHardwareThreads` _integer_ | | | | +| `vendor` _string_ | | | | +| `model` _string_ | | | | +| `capabilities` _string array_ | | | | + + +#### MetaDataLLDPInterface + + + + + + + +_Appears in:_ +- [ServerMetadata](#servermetadata) + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `name` _string_ | | | | +| `neighbors` _[MetaDataLLDPNeighbor](#metadatalldpneighbor) array_ | | | | + + +#### MetaDataLLDPNeighbor + + + + + + + +_Appears in:_ +- [MetaDataLLDPInterface](#metadatalldpinterface) + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `chassisId` _string_ | | | | +| `portId` _string_ | | | | +| `portDescription` _string_ | | | | +| `systemName` _string_ | | | | +| `systemDescription` _string_ | | | | +| `mgmtIp` _string_ | | | | +| `capabilities` _string array_ | | | | +| `vlanId` _string_ | | | | + + +#### MetaDataMemoryDevice + + + + + + + +_Appears in:_ +- [ServerMetadata](#servermetadata) + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `size` _integer_ | | | | +| `deviceSet` _string_ | | | | +| `deviceLocator` _string_ | | | | +| `bankLocator` _string_ | | | | +| `memoryType` _string_ | | | | +| `speed` _string_ | | | | +| `vendor` _string_ | | | | +| `serialNumber` _string_ | | | | +| `assetTag` _string_ | | | | +| `partNumber` _string_ | | | | +| `configuredMemorySpeed` _string_ | | | | +| `minimumVoltage` _string_ | | | | +| `maximumVoltage` _string_ | | | | +| `configuredVoltage` _string_ | | | | + + +#### MetaDataNIC + + + + + + + +_Appears in:_ +- [ServerMetadata](#servermetadata) + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `name` _string_ | | | | +| `mac` _string_ | | | | +| `pciAddress` _string_ | | | | +| `speed` _string_ | | | | +| `linkModes` _string array_ | | | | +| `supportedPorts` _string array_ | | | | +| `firmwareVersion` _string_ | | | | + + +#### MetaDataNetworkInterface + + + + + + + +_Appears in:_ +- [ServerMetadata](#servermetadata) + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `name` _string_ | | | | +| `ipAddresses` _string array_ | | | | +| `macAddress` _string_ | | | | +| `carrierStatus` _string_ | | | | + + +#### MetaDataPCIDevice + + + + + + + +_Appears in:_ +- [ServerMetadata](#servermetadata) + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `address` _string_ | | | | +| `vendor` _string_ | | | | +| `vendorID` _string_ | | | | +| `product` _string_ | | | | +| `productID` _string_ | | | | +| `numaNodeID` _integer_ | | | | + + +#### MetaDataServerInformation + + + + + + + +_Appears in:_ +- [MetaDataSystemInfo](#metadatasysteminfo) + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `manufacturer` _string_ | | | | +| `productName` _string_ | | | | +| `version` _string_ | | | | +| `serialNumber` _string_ | | | | +| `uuid` _string_ | | | | +| `skuNumber` _string_ | | | | +| `family` _string_ | | | | + + +#### MetaDataSystemInfo + + + + + + + +_Appears in:_ +- [ServerMetadata](#servermetadata) + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `biosInformation` _[MetaDataBIOSInformation](#metadatabiosinformation)_ | | | | +| `systemInformation` _[MetaDataServerInformation](#metadataserverinformation)_ | | | | +| `boardInformation` _[MetaDataBoardInformation](#metadataboardinformation)_ | | | | + + #### NamespacedKeySelector @@ -1602,6 +1862,34 @@ _Appears in:_ | `state` _[ServerMaintenanceState](#servermaintenancestate)_ | State specifies the current state of the server maintenance. | | | +#### ServerMetadata + + + +ServerMetadata is a flat data object (no spec/status) that persists the full +probe agent discovery payload. Similar to how Endpoints or ConfigMap store +data directly at the root level. The relationship to its Server is +established by using the same name and an owner reference. + + + + + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `apiVersion` _string_ | `metal.ironcore.dev/v1alpha1` | | | +| `kind` _string_ | `ServerMetadata` | | | +| `metadata` _[ObjectMeta](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.35/#objectmeta-v1-meta)_ | Refer to Kubernetes API documentation for fields of `metadata`. | | | +| `systemInfo` _[MetaDataSystemInfo](#metadatasysteminfo)_ | SystemInfo contains BIOS, system, and board information from DMI/SMBIOS. | | | +| `cpu` _[MetaDataCPU](#metadatacpu) array_ | CPU is a list of CPUs discovered on the server. | | | +| `networkInterfaces` _[MetaDataNetworkInterface](#metadatanetworkinterface) array_ | NetworkInterfaces is a list of network interfaces discovered on the server. | | | +| `lldp` _[MetaDataLLDPInterface](#metadatalldpinterface) array_ | LLDP contains LLDP neighbor information per interface. | | | +| `storage` _[MetaDataBlockDevice](#metadatablockdevice) array_ | Storage is a list of block devices discovered on the server. | | | +| `memory` _[MetaDataMemoryDevice](#metadatamemorydevice) array_ | Memory is a list of memory devices discovered on the server. | | | +| `nics` _[MetaDataNIC](#metadatanic) array_ | NICs is a list of raw NIC details (PCI address, speed, firmware). | | | +| `pciDevices` _[MetaDataPCIDevice](#metadatapcidevice) array_ | PCIDevices is a list of PCI devices discovered on the server. | | | + + #### ServerPowerState _Underlying type:_ _string_ diff --git a/docs/architecture.md b/docs/architecture.md index 7c4a560cc..36d21c57e 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -23,6 +23,9 @@ flowchart LR ServerReconciler -- Manages state/Power --> Server ServerReconciler -- Uses --> metalprobe ServerReconciler -- Waits for --> ServerBootConfiguration + ServerReconciler -- Creates/Updates --> ServerMetadata + + ServerMetadata -- Owned by --> Server ServerMaintenanceReconciler -- Manages --> ServerMaintenance ServerMaintenanceReconciler -- Creates/Deletes --> ServerBootConfiguration @@ -62,7 +65,7 @@ flowchart LR classDef external fill:#f48fb1, stroke:#000, stroke-width:2px, color:#000; class EndpointReconciler,BMCReconciler,ServerReconciler,ServerClaimReconciler,ServerMaintenanceReconciler,BiosSettingsReconciler,BiosVersionReconciler,BMCSettingsReconciler,BMCVersionReconciler operator; - class Endpoint,BMC,BMCSecret,Server,ServerClaim,ServerBootConfiguration,ServerMaintenance,BMCSettings,BMCVersion,BIOSVersion,BIOSSettings crd; + class Endpoint,BMC,BMCSecret,Server,ServerMetadata,ServerClaim,ServerBootConfiguration,ServerMaintenance,BMCSettings,BMCVersion,BIOSVersion,BIOSSettings crd; class BootOperator external; ``` @@ -74,6 +77,7 @@ flowchart LR - [**BMC**](concepts/bmcs.md): Models Baseboard Management Controllers (BMCs), allowing interaction with server hardware. - [**BMCSecret**](concepts/bmcsecrets.md): Securely stores credentials required to access BMCs. - [**Server**](concepts/servers.md): Represents physical servers, managing their state, power, and configurations. +- [**ServerMetadata**](concepts/servermetadata.md): Persists server discovery data (network interfaces, CPUs, storage, etc.) to enable status restoration without rediscovery. - [**ServerClaim**](concepts/serverclaims.md): Allows users to reserve servers by specifying desired configurations and boot images. - [**ServerBootConfiguration**](concepts/serverbootconfigurations.md): Signals the need to prepare the boot environment for a server. - [**ServerMaintenance**](concepts/servermaintenance.md): Represents maintenance tasks for servers, such as BIOS updates or hardware repairs. @@ -91,7 +95,7 @@ flowchart LR - **BMCReconciler**: Manages `BMC` resources by connecting to BMC devices using credentials from `BMCSecret`. It retrieves hardware information, updates the BMC status, and detects managed servers, creating `Server` resources for them. -- **ServerReconciler**: Manages `Server` resources and their lifecycle states. During the **Discovery** phase, it interacts with BMCs and uses the **metalprobe** agent to collect in-band hardware information, updating the server's status. It handles power management, BIOS configurations, and transitions servers through various states (e.g., Initial, Discovery, Available, Reserved). +- **ServerReconciler**: Manages `Server` resources and their lifecycle states. During the **Discovery** phase, it interacts with BMCs and uses the **metalprobe** agent to collect in-band hardware information, storing it in a [`ServerMetadata`](concepts/servermetadata.md) resource and updating the server's status. It handles power management, BIOS configurations, and transitions servers through various states (e.g., Initial, Discovery, Available, Reserved). If a server's status is empty but a `ServerMetadata` exists, it restores the server state without requiring a full rediscovery. - **ServerClaimReconciler**: Handles `ServerClaim` resources, allowing users to reserve servers. Upon creation of a `ServerClaim`, it allocates an available server, transitions it to the **Reserved** state, and creates a `ServerBootConfiguration`. When the claim is deleted, it releases the server, transitioning it to the **Cleanup** state for sanitization. @@ -117,7 +121,8 @@ flowchart LR - The **metalprobe** agent runs on the servers, collecting detailed hardware information (e.g., network interfaces, storage devices) and reporting back to update the `Server` status. 3. **Server Availability**: - - Once discovery is complete, servers transition to the **Available** state, ready to be claimed. + - Once discovery is complete, a [`ServerMetadata`](concepts/servermetadata.md) resource is created to persist the discovery data. + - Servers transition to the **Available** state, ready to be claimed. 4. **Server Reservation and Boot Configuration**: - Users create `ServerClaim` resources to reserve servers, specifying desired OS images and ignition configurations. diff --git a/docs/concepts/index.md b/docs/concepts/index.md index 55311df4a..fd28c0c9f 100644 --- a/docs/concepts/index.md +++ b/docs/concepts/index.md @@ -7,6 +7,7 @@ their relationships. Each concept is linked to its respective documentation for - [**BMC**](/concepts/bmcs): Models Baseboard Management Controllers (BMCs), allowing interaction with server hardware. - [**BMCSecret**](/concepts/bmcsecrets): Securely stores credentials required to access BMCs. - [**Server**](/concepts/servers): Represents physical servers, managing their state, power, and configurations. +- [**ServerMetadata**](/concepts/servermetadata): Persists server discovery data (network interfaces, CPUs, storage, etc.) to enable status restoration without rediscovery. - [**ServerClaim**](/concepts/serverclaims): Allows users to reserve servers by specifying desired configurations and boot images. - [**ServerBootConfiguration**](/concepts/serverbootconfigurations): Signals the need to prepare the boot environment for a server. - [**ServerMaintenance**](/concepts/servermaintenance): Represents maintenance tasks for servers, such as BIOS updates or hardware repairs. diff --git a/docs/concepts/servermetadata.md b/docs/concepts/servermetadata.md new file mode 100644 index 000000000..ad46d4b79 --- /dev/null +++ b/docs/concepts/servermetadata.md @@ -0,0 +1,98 @@ +# ServerMetadata + +The `ServerMetadata` Custom Resource Definition (CRD) is a cluster-scoped resource that persists hardware discovery +data collected during the [Server](servers.md) Discovery phase. It stores information such as network interfaces, +LLDP neighbors, CPUs, storage devices, memory, NICs, and PCI devices. + +`ServerMetadata` is created automatically by the `ServerReconciler` when a server completes Discovery. It is owned +by its corresponding `Server` via an owner reference, so it is garbage-collected when the `Server` is deleted. + +## Purpose + +Server discovery data lives in the `Server` status subresource, which can be lost when a `Server` resource is +recreated or its status is reset. `ServerMetadata` solves this by storing discovery data at the resource root +level (not in a status subresource), providing a durable record of hardware information. + +When a `Server` has an empty status, the `ServerReconciler` checks for an existing `ServerMetadata` with the +same name. If found, the server's network interfaces are restored from the metadata and the server transitions +directly to `Available` (or `Reserved` if a `ServerClaimRef` is set), skipping the full Discovery cycle. + +## Example ServerMetadata Resource + +```yaml +apiVersion: metal.ironcore.dev/v1alpha1 +kind: ServerMetadata +metadata: + name: my-server + ownerReferences: + - apiVersion: metal.ironcore.dev/v1alpha1 + kind: Server + name: my-server + uid: +systemInfo: + biosInformation: + vendor: "AMI" + version: "1.0.0" + date: "01/01/2024" + systemInformation: + manufacturer: "Contoso" + productName: "3500" + serialNumber: "437XR1138R2" + boardInformation: + manufacturer: "Contoso" + product: "Board-3500" +networkInterfaces: + - name: eth0 + macAddress: "aa:bb:cc:dd:ee:ff" + ipAddresses: + - "192.168.1.100" + carrierStatus: "up" +cpus: + - architecture: "x86_64" + modelName: "Intel Xeon E5-2680 v4" + totalCores: 14 + totalThreads: 28 +blockDevices: + - name: "/dev/sda" + type: "disk" + size: 960197124096 +``` + +## Lifecycle + +- **Created**: Automatically by the `ServerReconciler` when Discovery completes, before the server transitions + to `Available`. +- **Updated**: On each subsequent discovery, the `ServerMetadata` is updated with the latest hardware data. +- **Deleted**: Automatically via owner reference garbage collection when the parent `Server` is deleted, or + explicitly when a `rediscover` annotation is applied (see [Rediscover](#rediscover)). + +## Status Restoration + +When the `ServerReconciler` encounters a `Server` with an empty `Status.State`: + +1. It looks up a `ServerMetadata` resource with the same name as the server. +2. If found, it restores `Status.NetworkInterfaces` from the metadata. +3. If `Spec.ServerClaimRef` is set, the server transitions to `Reserved`; otherwise to `Available`. +4. The `ServerClaimReconciler` similarly detects bidirectional binding (`ServerClaim.Spec.ServerRef` ↔ + `Server.Spec.ServerClaimRef`) and restores the claim's phase to `Bound`. + +This allows servers and their claims to resume normal operation without rediscovery. + +## Rediscover + +To force a full rediscovery of a server's hardware, apply the `rediscover` operation annotation: + +```yaml +apiVersion: metal.ironcore.dev/v1alpha1 +kind: Server +metadata: + name: my-server + annotations: + metal.ironcore.dev/operation: rediscover +``` + +When the `ServerReconciler` processes this annotation, it: + +1. Deletes the associated `ServerMetadata` resource. +2. Removes the annotation from the server. +3. Transitions the server back to the `Initial` state, triggering a full Discovery cycle. diff --git a/docs/concepts/servers.md b/docs/concepts/servers.md index de7699ee5..2e2e8e814 100644 --- a/docs/concepts/servers.md +++ b/docs/concepts/servers.md @@ -47,7 +47,8 @@ A server undergoes the following phases: - The `ServerReconciler` interacts with the BMC to retrieve hardware details. - An initial boot is performed using a predefined ignition configuration. - An agent called [`metalprobe`](https://github.com/ironcore-dev/metal-operator/tree/main/cmd/metalprobe) runs on the server to collect additional data (e.g., network interfaces, disks). - - The collected data is reported back to the `metal-operator` and added to the `ServerStatus`.` + - The collected data is reported back to the `metal-operator` and added to the `ServerStatus`. + - A [`ServerMetadata`](servermetadata.md) resource is created to persist the discovery data. 3. **Available**: The server has completed discovery and is ready for use. @@ -137,3 +138,30 @@ spec: name: SSH port: 22 ``` + +## Status Restoration + +If a `Server` resource has its status reset or is recreated without status data, the `ServerReconciler` can +restore the server's state from its [`ServerMetadata`](servermetadata.md) resource rather than requiring a +full Discovery cycle. + +When a `Server` has an empty `Status.State`: + +1. The `ServerReconciler` looks up a `ServerMetadata` with the same name. +2. If found, it restores network interface data and transitions the server to `Available` (or `Reserved` if + `Spec.ServerClaimRef` is set). +3. The `ServerClaimReconciler` detects bidirectional binding between `ServerClaim` and `Server` and restores + the claim's phase to `Bound`. + +This allows servers and claims to resume normal operation without rediscovery. + +## Rediscover Annotation + +To force a full rediscovery of a server's hardware, apply the `rediscover` operation annotation: + +```yaml +kubectl annotate server my-server metal.ironcore.dev/operation=rediscover +``` + +This deletes the associated [`ServerMetadata`](servermetadata.md), removes the annotation, and transitions +the server back to the `Initial` state for a fresh Discovery cycle. diff --git a/internal/controller/biossettings_controller_test.go b/internal/controller/biossettings_controller_test.go index 3e0359d37..ec1a080fb 100644 --- a/internal/controller/biossettings_controller_test.go +++ b/internal/controller/biossettings_controller_test.go @@ -81,7 +81,7 @@ var _ = Describe("BIOSSettings Controller", func() { Expect(k8sClient.Delete(ctx, bmcSecret)).To(Succeed()) Expect(k8sClient.Delete(ctx, server)).To(Succeed()) - EnsureCleanState() + EnsureCleanState(ctx) }) It("Should successfully patch its reference to referred server", func(ctx SpecContext) { @@ -1051,7 +1051,7 @@ var _ = Describe("BIOSSettings Controller with BMCRef BMC", func() { Expect(k8sClient.Delete(ctx, bmcSecret)).To(Succeed()) Expect(k8sClient.Delete(ctx, bmcObj)).To(Succeed()) Expect(k8sClient.Delete(ctx, server)).To(Succeed()) - EnsureCleanState() + EnsureCleanState(ctx) }) It("Should request maintenance when changing power status of server, even if bios settings update does not need it", func(ctx SpecContext) { @@ -1383,7 +1383,7 @@ var _ = Describe("BIOSSettings Sequence Controller", func() { Expect(k8sClient.Delete(ctx, bmcSecret)).To(Succeed()) Expect(k8sClient.Delete(ctx, server)).To(Succeed()) - EnsureCleanState() + EnsureCleanState(ctx) }) It("Should successfully apply sequence of settings", func(ctx SpecContext) { diff --git a/internal/controller/biossettingsset_controller_test.go b/internal/controller/biossettingsset_controller_test.go index 0b2afa1ea..a0f48b00a 100644 --- a/internal/controller/biossettingsset_controller_test.go +++ b/internal/controller/biossettingsset_controller_test.go @@ -126,7 +126,7 @@ var _ = Describe("BIOSSettingsSet Controller", func() { Expect(k8sClient.Delete(ctx, server03)).To(Succeed()) Expect(k8sClient.Delete(ctx, bmcSecret)).To(Succeed()) - EnsureCleanState() + EnsureCleanState(ctx) }) It("Should successfully reconcile the resource", func(ctx SpecContext) { diff --git a/internal/controller/biosversion_controller_test.go b/internal/controller/biosversion_controller_test.go index 567839589..624a16358 100644 --- a/internal/controller/biosversion_controller_test.go +++ b/internal/controller/biosversion_controller_test.go @@ -78,7 +78,7 @@ var _ = Describe("BIOSVersion Controller", func() { Expect(k8sClient.Delete(ctx, server)).To(Succeed()) Expect(k8sClient.Delete(ctx, bmcSecret)).To(Succeed()) - EnsureCleanState() + EnsureCleanState(ctx) }) It("Should successfully mark completed if no BIOS version change", func(ctx SpecContext) { @@ -462,7 +462,7 @@ var _ = Describe("BIOSVersion Controller with BMCRef BMC", func() { Expect(k8sClient.Delete(ctx, bmcObj)).To(Succeed()) Expect(k8sClient.Delete(ctx, bmcSecret)).To(Succeed()) - EnsureCleanState() + EnsureCleanState(ctx) }) It("should successfully Start and monitor Upgrade task to completion", func(ctx SpecContext) { // mocked at diff --git a/internal/controller/biosversionset_controller_test.go b/internal/controller/biosversionset_controller_test.go index 729a1d3f9..f7b0a97dc 100644 --- a/internal/controller/biosversionset_controller_test.go +++ b/internal/controller/biosversionset_controller_test.go @@ -130,7 +130,7 @@ var _ = Describe("BIOSVersionSet Controller", func() { Expect(k8sClient.Delete(ctx, server03)).To(Succeed()) Expect(k8sClient.Delete(ctx, bmcSecret)).To(Succeed()) - EnsureCleanState() + EnsureCleanState(ctx) }) It("Should successfully reconcile the resource", func(ctx SpecContext) { diff --git a/internal/controller/bmc_controller_test.go b/internal/controller/bmc_controller_test.go index 22aab6641..794e1d9a9 100644 --- a/internal/controller/bmc_controller_test.go +++ b/internal/controller/bmc_controller_test.go @@ -23,7 +23,7 @@ var _ = Describe("BMC Controller", func() { ns := SetupTest(nil) AfterEach(func(ctx SpecContext) { - EnsureCleanState() + EnsureCleanState(ctx) }) It("Should successfully reconcile the a BMC resource", func(ctx SpecContext) { @@ -343,7 +343,7 @@ var _ = Describe("BMC Controller", func() { var _ = Describe("BMC Validation", func() { AfterEach(func(ctx SpecContext) { - EnsureCleanState() + EnsureCleanState(ctx) }) It("Should deny if the BMC has EndpointRef and InlineEndpoint spec fields", func(ctx SpecContext) { @@ -513,7 +513,7 @@ var _ = Describe("BMC Reset", func() { _ = SetupTest(nil) AfterEach(func(ctx SpecContext) { - EnsureCleanState() + EnsureCleanState(ctx) }) It("Should reset the BMC", func(ctx SpecContext) { @@ -580,7 +580,7 @@ var _ = Describe("BMC Conditions", func() { _ = SetupTest(nil) AfterEach(func(ctx SpecContext) { - EnsureCleanState() + EnsureCleanState(ctx) }) It("Should create ready conditions when there are bmc connection errors", func(ctx SpecContext) { diff --git a/internal/controller/bmcsettings_controller_test.go b/internal/controller/bmcsettings_controller_test.go index 9c5ff3212..e49cb1021 100644 --- a/internal/controller/bmcsettings_controller_test.go +++ b/internal/controller/bmcsettings_controller_test.go @@ -95,7 +95,7 @@ var _ = Describe("BMCSettings Controller", func() { })).To(Succeed()) Expect(k8sClient.Delete(ctx, server)).To(Succeed()) Expect(k8sClient.Delete(ctx, bmcSecret)).To(Succeed()) - EnsureCleanState() + EnsureCleanState(ctx) }) It("Should successfully patch BMCSettings reference to referred BMC", func(ctx SpecContext) { diff --git a/internal/controller/bmcsettingsset_controller_test.go b/internal/controller/bmcsettingsset_controller_test.go index cea18fb65..abf2a73a2 100644 --- a/internal/controller/bmcsettingsset_controller_test.go +++ b/internal/controller/bmcsettingsset_controller_test.go @@ -137,7 +137,7 @@ var _ = Describe("BMCSettingsSet Controller", func() { Expect(client.IgnoreNotFound(k8sClient.Delete(ctx, bmc02))).To(Succeed()) Expect(client.IgnoreNotFound(k8sClient.Delete(ctx, bmcSecret))).To(Succeed()) By("Ensuring all resources are cleaned up") - EnsureCleanState() + EnsureCleanState(ctx) }) It("Should successfully reconcile when BMCSettingsSet was generated, labels match and bmcsettings were generated", func(ctx SpecContext) { diff --git a/internal/controller/bmcuser_controller_test.go b/internal/controller/bmcuser_controller_test.go index 2fa62750c..1b758feda 100644 --- a/internal/controller/bmcuser_controller_test.go +++ b/internal/controller/bmcuser_controller_test.go @@ -74,7 +74,7 @@ var _ = Describe("BMCUser Controller", func() { Expect(k8sClient.Delete(ctx, server)).To(Succeed()) Expect(k8sClient.Delete(ctx, bmcSecret)).Should(Succeed()) Expect(k8sClient.Delete(ctx, bmc)).Should(Succeed()) - EnsureCleanState() + EnsureCleanState(ctx) }) It("Should create a bmc user and secret", func(ctx SpecContext) { diff --git a/internal/controller/bmcversion_controller_test.go b/internal/controller/bmcversion_controller_test.go index 192facf15..193af2f83 100644 --- a/internal/controller/bmcversion_controller_test.go +++ b/internal/controller/bmcversion_controller_test.go @@ -92,7 +92,7 @@ var _ = Describe("BMCVersion Controller", func() { Expect(k8sClient.Delete(ctx, bmcObj)).To(Succeed()) Expect(k8sClient.Delete(ctx, server)).To(Succeed()) Expect(k8sClient.Delete(ctx, bmcSecret)).To(Succeed()) - EnsureCleanState() + EnsureCleanState(ctx) }) It("Should successfully mark completed if no BMC version change", func(ctx SpecContext) { diff --git a/internal/controller/bmcversionset_controller_test.go b/internal/controller/bmcversionset_controller_test.go index a62be3e33..9059f05d4 100644 --- a/internal/controller/bmcversionset_controller_test.go +++ b/internal/controller/bmcversionset_controller_test.go @@ -192,7 +192,7 @@ var _ = Describe("BMCVersionSet Controller", func() { Eventually(Get(server03)).Should(Satisfy(apierrors.IsNotFound)) Expect(k8sClient.Delete(ctx, bmcSecret)).To(Succeed()) - EnsureCleanState() + EnsureCleanState(ctx) }) It("Should successfully reconcile the resource", func(ctx SpecContext) { diff --git a/internal/controller/endpoint_controller_test.go b/internal/controller/endpoint_controller_test.go index 107426e19..768d716e9 100644 --- a/internal/controller/endpoint_controller_test.go +++ b/internal/controller/endpoint_controller_test.go @@ -19,7 +19,7 @@ var _ = Describe("Endpoints Controller", func() { _ = SetupTest(nil) AfterEach(func(ctx SpecContext) { - EnsureCleanState() + EnsureCleanState(ctx) }) It("Should successfully create a BMC secret and BMC object from endpoint", func(ctx SpecContext) { diff --git a/internal/controller/server_controller.go b/internal/controller/server_controller.go index 1562e58bc..09f4fdaae 100644 --- a/internal/controller/server_controller.go +++ b/internal/controller/server_controller.go @@ -106,6 +106,7 @@ type ServerReconciler struct { // +kubebuilder:rbac:groups=metal.ironcore.dev,resources=servers,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=metal.ironcore.dev,resources=servers/status,verbs=get;update;patch // +kubebuilder:rbac:groups=metal.ironcore.dev,resources=servers/finalizers,verbs=update +// +kubebuilder:rbac:groups=metal.ironcore.dev,resources=servermetadata,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=metal.ironcore.dev,resources=serverconfigurations,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups="",resources=secrets,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups="batch",resources=jobs,verbs=get;list;watch;create;update;patch;delete @@ -216,6 +217,12 @@ func (r *ServerReconciler) reconcile(ctx context.Context, server *metalv1alpha1. // do late state initialization if server.Status.State == "" { + // Check if ServerMetadata exists — if so, reconstruct status instead of going to Initial + if restored, err := r.restoreServerStatusFromMetadata(ctx, server); err != nil { + return ctrl.Result{}, err + } else if restored { + return ctrl.Result{}, nil + } if modified, err := r.patchServerState(ctx, server, metalv1alpha1.ServerStateInitial); err != nil || modified { return ctrl.Result{}, err } @@ -413,6 +420,11 @@ func (r *ServerReconciler) handleDiscoveryState(ctx context.Context, bmcClient b log.V(1).Info("Extracted Server details") + if err := r.ensureServerMetadata(ctx, server, serverDetails); err != nil { + return false, fmt.Errorf("failed to create ServerMetadata: %w", err) + } + log.V(1).Info("Ensured ServerMetadata") + log.V(1).Info("Setting Server state to available") if _, err := r.patchServerState(ctx, server, metalv1alpha1.ServerStateAvailable); err != nil { return false, err @@ -958,6 +970,76 @@ func (r *ServerReconciler) extractServerDetailsFromRegistry(ctx context.Context, return true, nil } +func (r *ServerReconciler) ensureServerMetadata(ctx context.Context, server *metalv1alpha1.Server, serverDetails *registry.Server) error { + log := ctrl.LoggerFrom(ctx) + meta := &metalv1alpha1.ServerMetadata{} + meta.Name = server.Name + registryToServerMetadata(serverDetails, meta) + if err := controllerutil.SetControllerReference(server, meta, r.Scheme); err != nil { + return fmt.Errorf("failed to set owner reference on ServerMetadata: %w", err) + } + + existing := &metalv1alpha1.ServerMetadata{} + if err := r.Get(ctx, types.NamespacedName{Name: server.Name}, existing); err != nil { + if !apierrors.IsNotFound(err) { + return fmt.Errorf("failed to get ServerMetadata: %w", err) + } + if err := r.Create(ctx, meta); err != nil { + return fmt.Errorf("failed to create ServerMetadata: %w", err) + } + log.V(1).Info("Created ServerMetadata") + return nil + } + + // Update existing ServerMetadata + existingBase := existing.DeepCopy() + existing.SystemInfo = meta.SystemInfo + existing.CPU = meta.CPU + existing.NetworkInterfaces = meta.NetworkInterfaces + existing.LLDP = meta.LLDP + existing.Storage = meta.Storage + existing.Memory = meta.Memory + existing.NICs = meta.NICs + existing.PCIDevices = meta.PCIDevices + if err := r.Patch(ctx, existing, client.MergeFrom(existingBase)); err != nil { + return fmt.Errorf("failed to update ServerMetadata: %w", err) + } + log.V(1).Info("Updated ServerMetadata") + return nil +} + +func (r *ServerReconciler) restoreServerStatusFromMetadata(ctx context.Context, server *metalv1alpha1.Server) (bool, error) { + log := ctrl.LoggerFrom(ctx) + meta := &metalv1alpha1.ServerMetadata{} + if err := r.Get(ctx, types.NamespacedName{Name: server.Name}, meta); err != nil { + if apierrors.IsNotFound(err) { + return false, nil + } + return false, fmt.Errorf("failed to get ServerMetadata: %w", err) + } + + log.V(1).Info("Found ServerMetadata, restoring Server status") + + // Restore network interfaces from ServerMetadata + serverBase := server.DeepCopy() + server.Status.NetworkInterfaces = metaDataToNetworkInterfaces(log, meta) + if err := r.Status().Patch(ctx, server, client.MergeFrom(serverBase)); err != nil { + return false, fmt.Errorf("failed to patch server network interfaces from ServerMetadata: %w", err) + } + + // Determine the restored state: Reserved if there is a claim binding, otherwise Available + state := metalv1alpha1.ServerStateAvailable + if server.Spec.ServerClaimRef != nil { + state = metalv1alpha1.ServerStateReserved + } + if _, err := r.patchServerState(ctx, server, state); err != nil { + return false, err + } + + log.V(1).Info("Restored Server status from ServerMetadata", "State", state) + return true, nil +} + func (r *ServerReconciler) patchServerState(ctx context.Context, server *metalv1alpha1.Server, state metalv1alpha1.ServerState) (bool, error) { if server.Status.State == state { return false, nil @@ -1184,6 +1266,33 @@ func (r *ServerReconciler) handleAnnotionOperations(ctx context.Context, bmcClie return false, nil } + // Handle rediscover operation: delete ServerMetadata and transition to Initial + if operation == metalv1alpha1.OperationAnnotationRediscover { + log.V(1).Info("Handling rediscover operation") + serverMetadata := &metalv1alpha1.ServerMetadata{} + if err := r.Get(ctx, types.NamespacedName{Name: server.Name}, serverMetadata); err == nil { + if err := r.Delete(ctx, serverMetadata); err != nil { + return false, fmt.Errorf("failed to delete ServerMetadata: %w", err) + } + log.V(1).Info("Deleted ServerMetadata for rediscovery") + } else if !apierrors.IsNotFound(err) { + return false, fmt.Errorf("failed to get ServerMetadata: %w", err) + } + + serverBase := server.DeepCopy() + delete(annotations, metalv1alpha1.OperationAnnotation) + server.SetAnnotations(annotations) + if err := r.Patch(ctx, server, client.MergeFrom(serverBase)); err != nil { + return false, fmt.Errorf("failed to patch server annotations: %w", err) + } + + if _, err := r.patchServerState(ctx, server, metalv1alpha1.ServerStateInitial); err != nil { + return false, err + } + log.V(1).Info("Rediscover operation completed, server set to Initial") + return true, nil + } + if value, ok := metalv1alpha1.AnnotationToRedfishMapping[operation]; !ok { log.V(1).Info("Unsupported operation annotation", "Operation", operation, "SupportedOperations", metalv1alpha1.AnnotationToRedfishMapping) return false, nil diff --git a/internal/controller/server_controller_test.go b/internal/controller/server_controller_test.go index c050a9280..f323ac1ff 100644 --- a/internal/controller/server_controller_test.go +++ b/internal/controller/server_controller_test.go @@ -35,7 +35,7 @@ var _ = Describe("Server Controller", func() { ns := SetupTest(nil) AfterEach(func(ctx SpecContext) { - EnsureCleanState() + EnsureCleanState(ctx) }) It("Should initialize a Server from Endpoint", func(ctx SpecContext) { @@ -1203,6 +1203,293 @@ passwd: Expect(ignitionStr).To(ContainSubstring("custom-probe:v1.0.0"), "Should include custom probe image") }) }) + + It("Should create ServerMetadata after discovery", func(ctx SpecContext) { + By("Creating a BMCSecret") + bmcSecret := &metalv1alpha1.BMCSecret{ + ObjectMeta: metav1.ObjectMeta{ + GenerateName: "test-server-", + }, + Data: map[string][]byte{ + "username": []byte("foo"), + "password": []byte("bar"), + }, + } + Expect(k8sClient.Create(ctx, bmcSecret)).To(Succeed()) + + By("Creating a Server with inline BMC configuration") + server := &metalv1alpha1.Server{ + ObjectMeta: metav1.ObjectMeta{ + GenerateName: "server-", + }, + Spec: metalv1alpha1.ServerSpec{ + SystemUUID: "38947555-7742-3448-3784-823347823834", + BMC: &metalv1alpha1.BMCAccess{ + Protocol: metalv1alpha1.Protocol{ + Name: metalv1alpha1.ProtocolRedfishLocal, + Port: MockServerPort, + }, + Address: MockServerIP, + BMCSecretRef: v1.LocalObjectReference{ + Name: bmcSecret.Name, + }, + }, + }, + } + Expect(k8sClient.Create(ctx, server)).To(Succeed()) + + By("Ensuring the boot configuration has been created") + bootConfig := &metalv1alpha1.ServerBootConfiguration{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: ns.Name, + Name: server.Name, + }, + } + Eventually(Get(bootConfig)).Should(Succeed()) + + By("Patching the boot configuration to a Ready state") + Eventually(UpdateStatus(bootConfig, func() { + bootConfig.Status.State = metalv1alpha1.ServerBootConfigurationStateReady + })).Should(Succeed()) + + By("Waiting for the server to reach Discovery state") + Eventually(Object(server)).Should(HaveField("Status.State", metalv1alpha1.ServerStateDiscovery)) + + By("Posting registry data via /register endpoint") + regPayload := registry.RegistrationPayload{ + SystemUUID: server.Spec.SystemUUID, + Data: registry.Server{ + Timestamp: &metav1.Time{Time: time.Now()}, + NetworkInterfaces: []registry.NetworkInterface{ + { + Name: "eth0", + IPAddresses: []string{"192.168.1.100"}, + MACAddress: "aa:bb:cc:dd:ee:ff", + }, + }, + }, + } + regDataBytes, err := json.Marshal(regPayload) + Expect(err).NotTo(HaveOccurred()) + resp, err := http.Post(registryURL+"/register", "application/json", bytes.NewReader(regDataBytes)) + Expect(err).NotTo(HaveOccurred()) + defer resp.Body.Close() //nolint:errcheck + Expect(resp.StatusCode).To(Equal(http.StatusCreated)) + + By("Ensuring that the server reaches available state") + Eventually(Object(server), 30*time.Second).Should(HaveField("Status.State", metalv1alpha1.ServerStateAvailable)) + + By("Ensuring that the ServerMetadata has been created") + serverMetadata := &metalv1alpha1.ServerMetadata{ + ObjectMeta: metav1.ObjectMeta{ + Name: server.Name, + }, + } + Eventually(Object(serverMetadata)).Should(SatisfyAll( + HaveField("OwnerReferences", Not(BeEmpty())), + HaveField("NetworkInterfaces", Not(BeEmpty())), + )) + + // cleanup + deleteRegistrySystemIfExists(server.Spec.SystemUUID) + Expect(k8sClient.Delete(ctx, server)).Should(Succeed()) + Expect(k8sClient.Delete(ctx, bmcSecret)).Should(Succeed()) + }) + + It("Should restore Server status from ServerMetadata on empty status", func(ctx SpecContext) { + By("Creating a BMCSecret") + bmcSecret := &metalv1alpha1.BMCSecret{ + ObjectMeta: metav1.ObjectMeta{ + GenerateName: "test-server-", + }, + Data: map[string][]byte{ + "username": []byte("foo"), + "password": []byte("bar"), + }, + } + Expect(k8sClient.Create(ctx, bmcSecret)).To(Succeed()) + + By("Creating a ServerMetadata first (simulating pre-existing metadata)") + serverMetadata := &metalv1alpha1.ServerMetadata{ + ObjectMeta: metav1.ObjectMeta{ + Name: "server-restore-avail", + }, + NetworkInterfaces: []metalv1alpha1.MetaDataNetworkInterface{ + { + Name: "eth0", + MACAddress: "aa:bb:cc:dd:ee:ff", + IPAddresses: []string{ + "192.168.1.100", + }, + }, + }, + } + Expect(k8sClient.Create(ctx, serverMetadata)).To(Succeed()) + + By("Creating a Server with the same name") + server := &metalv1alpha1.Server{ + ObjectMeta: metav1.ObjectMeta{ + Name: "server-restore-avail", + }, + Spec: metalv1alpha1.ServerSpec{ + SystemUUID: "38947555-7742-3448-3784-823347823834", + BMC: &metalv1alpha1.BMCAccess{ + Protocol: metalv1alpha1.Protocol{ + Name: metalv1alpha1.ProtocolRedfishLocal, + Port: MockServerPort, + }, + Address: MockServerIP, + BMCSecretRef: v1.LocalObjectReference{ + Name: bmcSecret.Name, + }, + }, + }, + } + Expect(k8sClient.Create(ctx, server)).To(Succeed()) + + By("Ensuring that the Server status is restored to Available with network interfaces") + Eventually(Object(server)).Should(SatisfyAll( + HaveField("Status.State", metalv1alpha1.ServerStateAvailable), + HaveField("Status.NetworkInterfaces", HaveLen(1)), + )) + + // cleanup + Expect(k8sClient.Delete(ctx, server)).Should(Succeed()) + Expect(k8sClient.Delete(ctx, bmcSecret)).Should(Succeed()) + }) + + It("Should restore Server to Reserved state from ServerMetadata when ServerClaimRef is set", func(ctx SpecContext) { + By("Creating a BMCSecret") + bmcSecret := &metalv1alpha1.BMCSecret{ + ObjectMeta: metav1.ObjectMeta{ + GenerateName: "test-server-", + }, + Data: map[string][]byte{ + "username": []byte("foo"), + "password": []byte("bar"), + }, + } + Expect(k8sClient.Create(ctx, bmcSecret)).To(Succeed()) + + By("Creating a ServerMetadata first (simulating pre-existing metadata)") + serverMetadata := &metalv1alpha1.ServerMetadata{ + ObjectMeta: metav1.ObjectMeta{ + Name: "server-restore-reserved", + }, + NetworkInterfaces: []metalv1alpha1.MetaDataNetworkInterface{ + { + Name: "eth0", + MACAddress: "aa:bb:cc:dd:ee:ff", + }, + }, + } + Expect(k8sClient.Create(ctx, serverMetadata)).To(Succeed()) + + By("Creating a Server with inline BMC configuration and a ServerClaimRef") + server := &metalv1alpha1.Server{ + ObjectMeta: metav1.ObjectMeta{ + Name: "server-restore-reserved", + }, + Spec: metalv1alpha1.ServerSpec{ + SystemUUID: "38947555-7742-3448-3784-823347823834", + BMC: &metalv1alpha1.BMCAccess{ + Protocol: metalv1alpha1.Protocol{ + Name: metalv1alpha1.ProtocolRedfishLocal, + Port: MockServerPort, + }, + Address: MockServerIP, + BMCSecretRef: v1.LocalObjectReference{ + Name: bmcSecret.Name, + }, + }, + ServerClaimRef: &metalv1alpha1.ImmutableObjectReference{ + Namespace: ns.Name, + Name: "some-claim", + }, + }, + } + Expect(k8sClient.Create(ctx, server)).To(Succeed()) + + By("Ensuring that the Server status is restored to Reserved") + Eventually(Object(server)).Should(SatisfyAll( + HaveField("Status.State", metalv1alpha1.ServerStateReserved), + HaveField("Status.NetworkInterfaces", HaveLen(1)), + )) + + // cleanup + Expect(k8sClient.Delete(ctx, server)).Should(Succeed()) + Expect(k8sClient.Delete(ctx, bmcSecret)).Should(Succeed()) + }) + + It("Should handle rediscover annotation by deleting ServerMetadata and setting Initial state", func(ctx SpecContext) { + By("Creating a BMCSecret") + bmcSecret := &metalv1alpha1.BMCSecret{ + ObjectMeta: metav1.ObjectMeta{ + GenerateName: "test-server-", + }, + Data: map[string][]byte{ + "username": []byte("foo"), + "password": []byte("bar"), + }, + } + Expect(k8sClient.Create(ctx, bmcSecret)).To(Succeed()) + + By("Creating a ServerMetadata first (simulating existing metadata)") + serverMetadata := &metalv1alpha1.ServerMetadata{ + ObjectMeta: metav1.ObjectMeta{ + Name: "server-rediscover", + }, + NetworkInterfaces: []metalv1alpha1.MetaDataNetworkInterface{ + { + Name: "eth0", + MACAddress: "aa:bb:cc:dd:ee:ff", + }, + }, + } + Expect(k8sClient.Create(ctx, serverMetadata)).To(Succeed()) + + By("Creating a Server with inline BMC configuration") + server := &metalv1alpha1.Server{ + ObjectMeta: metav1.ObjectMeta{ + Name: "server-rediscover", + }, + Spec: metalv1alpha1.ServerSpec{ + SystemUUID: "38947555-7742-3448-3784-823347823834", + BMC: &metalv1alpha1.BMCAccess{ + Protocol: metalv1alpha1.Protocol{ + Name: metalv1alpha1.ProtocolRedfishLocal, + Port: MockServerPort, + }, + Address: MockServerIP, + BMCSecretRef: v1.LocalObjectReference{ + Name: bmcSecret.Name, + }, + }, + }, + } + Expect(k8sClient.Create(ctx, server)).To(Succeed()) + + By("Ensuring the server reaches Available state via metadata restoration") + Eventually(Object(server)).Should(HaveField("Status.State", metalv1alpha1.ServerStateAvailable)) + + By("Adding the rediscover annotation") + Eventually(Update(server, func() { + metav1.SetMetaDataAnnotation(&server.ObjectMeta, metalv1alpha1.OperationAnnotation, metalv1alpha1.OperationAnnotationRediscover) + })).Should(Succeed()) + + By("Ensuring the annotation is removed and state is Initial") + Eventually(Object(server)).Should(SatisfyAll( + HaveField("Status.State", metalv1alpha1.ServerStateInitial), + HaveField("Annotations", Not(HaveKey(metalv1alpha1.OperationAnnotation))), + )) + + By("Ensuring the ServerMetadata is deleted") + Eventually(Get(serverMetadata)).Should(Satisfy(apierrors.IsNotFound)) + + // cleanup + Expect(k8sClient.Delete(ctx, server)).Should(Succeed()) + Expect(k8sClient.Delete(ctx, bmcSecret)).Should(Succeed()) + }) }) func deleteRegistrySystemIfExists(systemUUID string) { diff --git a/internal/controller/serverbootconfiguration_controller_test.go b/internal/controller/serverbootconfiguration_controller_test.go index e93e35ed2..0454a0b1d 100644 --- a/internal/controller/serverbootconfiguration_controller_test.go +++ b/internal/controller/serverbootconfiguration_controller_test.go @@ -35,7 +35,7 @@ var _ = Describe("ServerBootConfiguration Controller", func() { AfterEach(func(ctx SpecContext) { Expect(k8sClient.Delete(ctx, server)).To(Succeed()) - EnsureCleanState() + EnsureCleanState(ctx) }) It("Should successfully add the boot configuration ref to server", func(ctx SpecContext) { diff --git a/internal/controller/serverclaim_controller.go b/internal/controller/serverclaim_controller.go index 0117d61a9..b4a106038 100644 --- a/internal/controller/serverclaim_controller.go +++ b/internal/controller/serverclaim_controller.go @@ -139,8 +139,24 @@ func (r *ServerClaimReconciler) reconcile(ctx context.Context, claim *metalv1alp // do late state initialization if claim.Status.Phase == "" { - if modified, err := r.patchServerClaimPhase(ctx, claim, metalv1alpha1.PhaseUnbound); err != nil || modified { - return ctrl.Result{}, err + // Check bidirectional binding to restore Bound phase + if claim.Spec.ServerRef != nil { + server := &metalv1alpha1.Server{} + if err := r.Get(ctx, client.ObjectKey{Name: claim.Spec.ServerRef.Name}, server); err == nil { + if server.Spec.ServerClaimRef != nil && + server.Spec.ServerClaimRef.Name == claim.Name && + server.Spec.ServerClaimRef.Namespace == claim.Namespace { + log.V(1).Info("Detected bidirectional binding, restoring Bound phase") + if modified, err := r.patchServerClaimPhase(ctx, claim, metalv1alpha1.PhaseBound); err != nil || modified { + return ctrl.Result{}, err + } + } + } + } + if claim.Status.Phase == "" { + if modified, err := r.patchServerClaimPhase(ctx, claim, metalv1alpha1.PhaseUnbound); err != nil || modified { + return ctrl.Result{}, err + } } } diff --git a/internal/controller/serverclaim_controller_test.go b/internal/controller/serverclaim_controller_test.go index 9a8a79af5..f2cce96e7 100644 --- a/internal/controller/serverclaim_controller_test.go +++ b/internal/controller/serverclaim_controller_test.go @@ -69,7 +69,7 @@ var _ = Describe("ServerClaim Controller", func() { AfterEach(func(ctx SpecContext) { Expect(k8sClient.Delete(ctx, server)).To(Succeed()) Expect(k8sClient.Delete(ctx, bmcSecret)).To(Succeed()) - EnsureCleanState() + EnsureCleanState(ctx) }) It("Should successfully claim a server in available state", func(ctx SpecContext) { @@ -491,6 +491,41 @@ var _ = Describe("ServerClaim Controller", func() { By("Ensuring that the ServerClaim is deleted") Eventually(Get(claim)).Should(Satisfy(apierrors.IsNotFound)) }) + + It("Should restore Bound phase from bidirectional binding", func(ctx SpecContext) { + By("Setting up Server with a ServerClaimRef pointing to our claim") + Eventually(Update(server, func() { + server.Spec.ServerClaimRef = &metalv1alpha1.ImmutableObjectReference{ + Namespace: ns.Name, + Name: "test-bound-claim", + } + })).Should(Succeed()) + + By("Setting server to Reserved state") + Eventually(UpdateStatus(server, func() { + server.Status.State = metalv1alpha1.ServerStateReserved + })).Should(Succeed()) + + By("Creating a ServerClaim with ServerRef pointing back to the server (simulating post-move state with empty status)") + claim := &metalv1alpha1.ServerClaim{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: ns.Name, + Name: "test-bound-claim", + }, + Spec: metalv1alpha1.ServerClaimSpec{ + Power: metalv1alpha1.PowerOn, + ServerRef: &v1.LocalObjectReference{Name: server.Name}, + Image: "foo:bar", + }, + } + Expect(k8sClient.Create(ctx, claim)).To(Succeed()) + + By("Ensuring the ServerClaim phase is restored to Bound") + Eventually(Object(claim)).Should(HaveField("Status.Phase", metalv1alpha1.PhaseBound)) + + // cleanup + Expect(k8sClient.Delete(ctx, claim)).To(Succeed()) + }) }) var _ = Describe("ServerClaim Validation", func() { @@ -536,7 +571,7 @@ var _ = Describe("ServerClaim Validation", func() { AfterEach(func(ctx SpecContext) { Expect(k8sClient.Delete(ctx, claim)).To(Succeed()) Expect(k8sClient.Delete(ctx, claimWithSelector)).To(Succeed()) - EnsureCleanState() + EnsureCleanState(ctx) }) It("Should deny if the ServerRef changes", func() { @@ -674,7 +709,7 @@ var _ = Describe("Server Claiming", MustPassRepeatedly(5), func() { for _, server := range serverList.Items { Expect(k8sClient.Delete(ctx, &server)).To(Succeed()) } - EnsureCleanState() + EnsureCleanState(ctx) }) It("Binds four out of ten server for four best effort claims", func(ctx SpecContext) { diff --git a/internal/controller/servermaintenance_controller_test.go b/internal/controller/servermaintenance_controller_test.go index c44919ab2..71ab10bf5 100644 --- a/internal/controller/servermaintenance_controller_test.go +++ b/internal/controller/servermaintenance_controller_test.go @@ -59,7 +59,7 @@ var _ = Describe("ServerMaintenance Controller", func() { AfterEach(func(ctx SpecContext) { Expect(k8sClient.Delete(ctx, server)).To(Succeed()) Expect(k8sClient.Delete(ctx, bmcSecret)).To(Succeed()) - EnsureCleanState() + EnsureCleanState(ctx) }) It("Should force a Server into maintenance from Initial State", func(ctx SpecContext) { diff --git a/internal/controller/servermetadata_helpers.go b/internal/controller/servermetadata_helpers.go new file mode 100644 index 000000000..6af26812e --- /dev/null +++ b/internal/controller/servermetadata_helpers.go @@ -0,0 +1,202 @@ +// SPDX-FileCopyrightText: 2024 SAP SE or an SAP affiliate company and IronCore contributors +// SPDX-License-Identifier: Apache-2.0 + +package controller + +import ( + metalv1alpha1 "github.com/ironcore-dev/metal-operator/api/v1alpha1" + "github.com/ironcore-dev/metal-operator/internal/api/registry" +) + +// registryToServerMetadata converts a registry.Server into root-level fields on a ServerMetadata object. +func registryToServerMetadata(regServer *registry.Server, meta *metalv1alpha1.ServerMetadata) { + // SystemInfo + meta.SystemInfo = metalv1alpha1.MetaDataSystemInfo{ + BIOSInformation: metalv1alpha1.MetaDataBIOSInformation{ + Vendor: regServer.SystemInfo.BIOSInformation.Vendor, + Version: regServer.SystemInfo.BIOSInformation.Version, + Date: regServer.SystemInfo.BIOSInformation.Date, + }, + SystemInformation: metalv1alpha1.MetaDataServerInformation{ + Manufacturer: regServer.SystemInfo.SystemInformation.Manufacturer, + ProductName: regServer.SystemInfo.SystemInformation.ProductName, + Version: regServer.SystemInfo.SystemInformation.Version, + SerialNumber: regServer.SystemInfo.SystemInformation.SerialNumber, + UUID: regServer.SystemInfo.SystemInformation.UUID, + SKUNumber: regServer.SystemInfo.SystemInformation.SKUNumber, + Family: regServer.SystemInfo.SystemInformation.Family, + }, + BoardInformation: metalv1alpha1.MetaDataBoardInformation{ + Manufacturer: regServer.SystemInfo.BoardInformation.Manufacturer, + Product: regServer.SystemInfo.BoardInformation.Product, + Version: regServer.SystemInfo.BoardInformation.Version, + SerialNumber: regServer.SystemInfo.BoardInformation.SerialNumber, + AssetTag: regServer.SystemInfo.BoardInformation.AssetTag, + }, + } + + // CPU + meta.CPU = make([]metalv1alpha1.MetaDataCPU, 0, len(regServer.CPU)) + for _, c := range regServer.CPU { + meta.CPU = append(meta.CPU, metalv1alpha1.MetaDataCPU{ + ID: c.ID, + TotalCores: c.TotalCores, + TotalHardwareThreads: c.TotalHardwareThreads, + Vendor: c.Vendor, + Model: c.Model, + Capabilities: c.Capabilities, + }) + } + + // NetworkInterfaces + meta.NetworkInterfaces = make([]metalv1alpha1.MetaDataNetworkInterface, 0, len(regServer.NetworkInterfaces)) + for _, ni := range regServer.NetworkInterfaces { + meta.NetworkInterfaces = append(meta.NetworkInterfaces, metalv1alpha1.MetaDataNetworkInterface{ + Name: ni.Name, + IPAddresses: ni.IPAddresses, + MACAddress: ni.MACAddress, + CarrierStatus: ni.CarrierStatus, + }) + } + + // LLDP + meta.LLDP = make([]metalv1alpha1.MetaDataLLDPInterface, 0, len(regServer.LLDP)) + for _, li := range regServer.LLDP { + iface := metalv1alpha1.MetaDataLLDPInterface{ + Name: li.Name, + } + for _, n := range li.Neighbors { + iface.Neighbors = append(iface.Neighbors, metalv1alpha1.MetaDataLLDPNeighbor{ + ChassisID: n.ChassisID, + PortID: n.PortID, + PortDescription: n.PortDescription, + SystemName: n.SystemName, + SystemDescription: n.SystemDescription, + MgmtIP: n.MgmtIP, + Capabilities: n.Capabilities, + VlanID: n.VlanID, + }) + } + meta.LLDP = append(meta.LLDP, iface) + } + + // Storage + meta.Storage = make([]metalv1alpha1.MetaDataBlockDevice, 0, len(regServer.Storage)) + for _, bd := range regServer.Storage { + meta.Storage = append(meta.Storage, metalv1alpha1.MetaDataBlockDevice{ + Path: bd.Path, + Name: bd.Name, + Rotational: bd.Rotational, + Removable: bd.Removable, + ReadOnly: bd.ReadOnly, + Vendor: bd.Vendor, + Model: bd.Model, + Serial: bd.Serial, + WWID: bd.WWID, + PhysicalBlockSize: bd.PhysicalBlockSize, + LogicalBlockSize: bd.LogicalBlockSize, + HWSectorSize: bd.HWSectorSize, + SizeBytes: bd.SizeBytes, + NUMANodeID: bd.NUMANodeID, + }) + } + + // Memory + meta.Memory = make([]metalv1alpha1.MetaDataMemoryDevice, 0, len(regServer.Memory)) + for _, m := range regServer.Memory { + meta.Memory = append(meta.Memory, metalv1alpha1.MetaDataMemoryDevice{ + SizeBytes: m.SizeBytes, + DeviceSet: m.DeviceSet, + DeviceLocator: m.DeviceLocator, + BankLocator: m.BankLocator, + MemoryType: m.MemoryType, + Speed: m.Speed, + Vendor: m.Vendor, + SerialNumber: m.SerialNumber, + AssetTag: m.AssetTag, + PartNumber: m.PartNumber, + ConfiguredMemorySpeed: m.ConfiguredMemorySpeed, + MinimumVoltage: m.MinimumVoltage, + MaximumVoltage: m.MaximumVoltage, + ConfiguredVoltage: m.ConfiguredVoltage, + }) + } + + // NICs + meta.NICs = make([]metalv1alpha1.MetaDataNIC, 0, len(regServer.NICs)) + for _, n := range regServer.NICs { + meta.NICs = append(meta.NICs, metalv1alpha1.MetaDataNIC{ + Name: n.Name, + MAC: n.MAC, + PCIAddress: n.PCIAddress, + Speed: n.Speed, + LinkModes: n.LinkModes, + SupportedPorts: n.SupportedPorts, + FirmwareVersion: n.FirmwareVersion, + }) + } + + // PCIDevices + meta.PCIDevices = make([]metalv1alpha1.MetaDataPCIDevice, 0, len(regServer.PCIDevices)) + for _, pd := range regServer.PCIDevices { + meta.PCIDevices = append(meta.PCIDevices, metalv1alpha1.MetaDataPCIDevice{ + Address: pd.Address, + Vendor: pd.Vendor, + VendorID: pd.VendorID, + Product: pd.Product, + ProductID: pd.ProductID, + NumaNodeID: pd.NumaNodeID, + }) + } +} + +// metaDataToNetworkInterfaces converts ServerMetadata network interface and LLDP data +// into the Server.Status.NetworkInterfaces format. +func metaDataToNetworkInterfaces(log interface { + Error(err error, msg string, keysAndValues ...any) +}, meta *metalv1alpha1.ServerMetadata) []metalv1alpha1.NetworkInterface { + nics := make([]metalv1alpha1.NetworkInterface, 0, len(meta.NetworkInterfaces)) + for _, ni := range meta.NetworkInterfaces { + nic := metalv1alpha1.NetworkInterface{ + Name: ni.Name, + MACAddress: ni.MACAddress, + CarrierStatus: ni.CarrierStatus, + } + + var allIPs []metalv1alpha1.IP + for _, ipAddr := range ni.IPAddresses { + if ipAddr != "" { + ip, err := metalv1alpha1.ParseIP(ipAddr) + if err != nil { + log.Error(err, "Invalid IP address in ServerMetadata, skipping", "interface", ni.Name, "ip", ipAddr) + continue + } + allIPs = append(allIPs, ip) + } + } + nic.IPs = allIPs + nics = append(nics, nic) + } + + // Merge LLDP neighbors into corresponding network interfaces + for _, lldpIface := range meta.LLDP { + for i := range nics { + if nics[i].Name == lldpIface.Name { + neighbors := make([]metalv1alpha1.LLDPNeighbor, 0, len(lldpIface.Neighbors)) + for _, neighbor := range lldpIface.Neighbors { + neighbors = append(neighbors, metalv1alpha1.LLDPNeighbor{ + MACAddress: neighbor.ChassisID, + PortID: neighbor.PortID, + PortDescription: neighbor.PortDescription, + SystemName: neighbor.SystemName, + SystemDescription: neighbor.SystemDescription, + }) + } + nics[i].Neighbors = neighbors + break + } + } + } + + return nics +} diff --git a/internal/controller/suite_test.go b/internal/controller/suite_test.go index e335bdc79..950e980f6 100644 --- a/internal/controller/suite_test.go +++ b/internal/controller/suite_test.go @@ -359,9 +359,13 @@ func SetupTest(redfishMockServers []netip.AddrPort) *corev1.Namespace { } // EnsureCleanState ensures that all ServerClaims and cluster scoped objects are removed from the API server. -func EnsureCleanState() { +func EnsureCleanState(ctx context.Context) { GinkgoHelper() + // Delete ServerMetadata explicitly since envtest has no garbage collector + // to cascade-delete them when the owning Server is deleted. + Expect(k8sClient.DeleteAllOf(ctx, &metalv1alpha1.ServerMetadata{})).To(Succeed()) + objectLists := []client.ObjectList{ &metalv1alpha1.EndpointList{}, &metalv1alpha1.BMCList{}, @@ -375,6 +379,7 @@ func EnsureCleanState() { &metalv1alpha1.BIOSSettingsSetList{}, &metalv1alpha1.BIOSSettingsList{}, &metalv1alpha1.ServerMaintenanceList{}, + &metalv1alpha1.ServerMetadataList{}, &metalv1alpha1.ServerList{}, }