diff --git a/designs/cpu-options-nested-virtualization.md b/designs/cpu-options-nested-virtualization.md new file mode 100644 index 000000000000..70acfc56e713 --- /dev/null +++ b/designs/cpu-options-nested-virtualization.md @@ -0,0 +1,99 @@ +# CPU Options and Nested Virtualization + +## Overview + +AWS announced nested virtualization support on virtual EC2 instances in February 2026, +enabling KVM-based workloads (container sandboxes, microVMs, development VMs) without +bare-metal instances. The feature is configured via the `CpuOptions.NestedVirtualization` +field in the EC2 `RunInstances` and `CreateLaunchTemplate` APIs. + +Karpenter needs to expose this capability on `EC2NodeClass` so users can request nodes +with nested virtualization enabled, and Karpenter needs to filter out instance types that +do not support the feature to avoid launch failures. + +## Goals + +- Expose `cpuOptions` on `EC2NodeClass.spec` with `coreCount`, `threadsPerCore`, and + `nestedVirtualization` fields. +- Pass `CpuOptions` through to the EC2 launch template. +- Filter instance types to only those reporting `nested-virtualization` in + `ProcessorInfo.SupportedFeatures` from `DescribeInstanceTypes`. +- Validate that `nestedVirtualization` is mutually exclusive with `coreCount` and + `threadsPerCore` (EC2 API constraint). +- Cache `UnsupportedOperation` fleet errors as unfulfillable capacity. + +## API Updates + +### EC2NodeClass Spec + +```yaml +apiVersion: karpenter.k8s.aws/v1 +kind: EC2NodeClass +metadata: + name: nested-virt +spec: + cpuOptions: + nestedVirtualization: enabled + # ... other fields +``` + +### CPUOptions Struct + +```go +type CPUOptions struct { + CoreCount *int32 `json:"coreCount,omitempty"` + ThreadsPerCore *int32 `json:"threadsPerCore,omitempty"` + NestedVirtualization *string `json:"nestedVirtualization,omitempty"` +} +``` + +CEL validation enforces that `nestedVirtualization: enabled` cannot be combined with +`coreCount` or `threadsPerCore` (EC2 rejects the combination). + +### Instance Type Label + +A new well-known label `karpenter.k8s.aws/instance-nested-virtualization` is populated +from `ProcessorInfo.SupportedFeatures` during instance type resolution. Instance types +that report `nested-virtualization` in their supported features receive the label value +`"true"`. + +As of March 2026, only the `*8i*` families support this feature: c8i, c8i-flex, m8i, +m8i-flex, r8i, r8i-flex (54 instance types total). No ARM, Xen, or bare-metal instances +support it. + +## Launch Behavior + +### Instance Type Filtering + +When an `EC2NodeClass` sets `cpuOptions.nestedVirtualization: enabled`, a +`NestedVirtualizationFilter` in the instance filter chain rejects any instance type +lacking the `instance-nested-virtualization=true` label. This runs after the +`CompatibleAvailableFilter` and before capacity reservation filters. + +### Launch Template + +The `cpuOptions()` converter maps the `CPUOptions` struct to +`LaunchTemplateCpuOptionsRequest`. It returns `nil` when all fields are nil (avoiding an +empty `CpuOptions` block in the API call). The `NestedVirtualization` string is cast to +the SDK enum type `ec2types.NestedVirtualizationSpecification`. + +### Error Handling + +`UnsupportedOperation` is added to the `unfulfillableCapacityErrorCodes` set so that +launches against incompatible instance types (if they bypass the filter) are cached as +unavailable rather than retried indefinitely. + +## Instance Type Compatibility + +The authoritative signal is `ProcessorInfo.SupportedFeatures` from `DescribeInstanceTypes`: + +```bash +aws ec2 describe-instance-types \ + --filters "Name=processor-info.supported-features,Values=nested-virtualization" \ + --query 'InstanceTypes[*].InstanceType' +``` + +This returns only the families that actually support the feature, avoiding heuristic-based +filtering (e.g., checking architecture + hypervisor) which would be both over-inclusive +(allowing older Intel families that don't support it) and fragile (breaking when AWS adds +support to new families). diff --git a/pkg/apis/v1/ec2nodeclass.go b/pkg/apis/v1/ec2nodeclass.go index b76671430ebc..ccacd59c7b32 100644 --- a/pkg/apis/v1/ec2nodeclass.go +++ b/pkg/apis/v1/ec2nodeclass.go @@ -143,6 +143,9 @@ type EC2NodeClassSpec struct { // https://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_CreateFleet.html // +optional Context *string `json:"context,omitempty"` + // CPUOptions defines the CPU options for the instance. + // +optional + CPUOptions *CPUOptions `json:"cpuOptions,omitempty"` } // SubnetSelectorTerm defines selection logic for a subnet used by Karpenter to launch nodes. @@ -356,6 +359,26 @@ type MetadataOptions struct { HTTPTokens *string `json:"httpTokens,omitempty"` } +// CPUOptions contains parameters for specifying the CPU configuration for provisioned EC2 nodes. +// +kubebuilder:validation:XValidation:message="nestedVirtualization cannot be set alongside coreCount or threadsPerCore",rule="!has(self.nestedVirtualization) || self.nestedVirtualization == 'disabled' || (!has(self.coreCount) && !has(self.threadsPerCore))" +type CPUOptions struct { + // CoreCount specifies the number of CPU cores for the instance. + // +kubebuilder:validation:Minimum:=1 + // +kubebuilder:validation:Maximum:=128 + // +optional + CoreCount *int32 `json:"coreCount,omitempty"` + // ThreadsPerCore specifies the number of threads per core for the instance. + // +kubebuilder:validation:Minimum:=1 + // +kubebuilder:validation:Maximum:=2 + // +optional + ThreadsPerCore *int32 `json:"threadsPerCore,omitempty"` + // NestedVirtualization enables or disables nested virtualization on the instance. + // This feature allows running virtual machines inside the EC2 instance. + // +kubebuilder:validation:Enum:={enabled,disabled} + // +optional + NestedVirtualization *string `json:"nestedVirtualization,omitempty"` +} + type BlockDeviceMapping struct { // The device name (for example, /dev/sdh or xvdh). // +optional @@ -535,6 +558,10 @@ func (in *EC2NodeClass) KubeletConfiguration() *KubeletConfiguration { return in.Spec.Kubelet } +func (in *EC2NodeClass) CPUOptions() *CPUOptions { + return in.Spec.CPUOptions +} + // AMIFamily returns the family for a NodePool based on the following items, in order of precdence: // - ec2nodeclass.spec.amiFamily // - ec2nodeclass.spec.amiSelectorTerms[].alias diff --git a/pkg/apis/v1/ec2nodeclass_validation_cel_test.go b/pkg/apis/v1/ec2nodeclass_validation_cel_test.go index dd6ee525fc3b..84823af40039 100644 --- a/pkg/apis/v1/ec2nodeclass_validation_cel_test.go +++ b/pkg/apis/v1/ec2nodeclass_validation_cel_test.go @@ -1288,4 +1288,71 @@ var _ = Describe("CEL/Validation", func() { Expect(env.Client.Update(ctx, nc)).To(Succeed()) }) }) + + Context("CPUOptions", func() { + When("valid CPU options are provided", func() { + It("should succeed", func() { + nc.Spec.CPUOptions = &v1.CPUOptions{ + CoreCount: aws.Int32(4), + ThreadsPerCore: aws.Int32(1), + NestedVirtualization: aws.String("enabled"), + } + Expect(env.Client.Create(ctx, nc)).To(Succeed()) + }) + }) + When("invalid CoreCount is provided", func() { + It("should fail when CoreCount is 0", func() { + nc.Spec.CPUOptions = &v1.CPUOptions{ + CoreCount: aws.Int32(0), + } + Expect(env.Client.Create(ctx, nc)).ToNot(Succeed()) + }) + It("should fail when CoreCount is negative", func() { + nc.Spec.CPUOptions = &v1.CPUOptions{ + CoreCount: aws.Int32(-1), + } + Expect(env.Client.Create(ctx, nc)).ToNot(Succeed()) + }) + It("should fail when CoreCount exceeds maximum", func() { + nc.Spec.CPUOptions = &v1.CPUOptions{ + CoreCount: aws.Int32(129), + } + Expect(env.Client.Create(ctx, nc)).ToNot(Succeed()) + }) + }) + When("invalid ThreadsPerCore is provided", func() { + It("should fail when ThreadsPerCore is 0", func() { + nc.Spec.CPUOptions = &v1.CPUOptions{ + ThreadsPerCore: aws.Int32(0), + } + Expect(env.Client.Create(ctx, nc)).ToNot(Succeed()) + }) + It("should fail when ThreadsPerCore is negative", func() { + nc.Spec.CPUOptions = &v1.CPUOptions{ + ThreadsPerCore: aws.Int32(-1), + } + Expect(env.Client.Create(ctx, nc)).ToNot(Succeed()) + }) + It("should fail when ThreadsPerCore exceeds maximum", func() { + nc.Spec.CPUOptions = &v1.CPUOptions{ + ThreadsPerCore: aws.Int32(3), + } + Expect(env.Client.Create(ctx, nc)).ToNot(Succeed()) + }) + }) + When("invalid NestedVirtualization is provided", func() { + It("should fail when NestedVirtualization has invalid value", func() { + nc.Spec.CPUOptions = &v1.CPUOptions{ + NestedVirtualization: aws.String("invalid"), + } + Expect(env.Client.Create(ctx, nc)).ToNot(Succeed()) + }) + }) + When("empty CPU options are provided", func() { + It("should succeed with nil CPU options", func() { + nc.Spec.CPUOptions = &v1.CPUOptions{} + Expect(env.Client.Create(ctx, nc)).To(Succeed()) + }) + }) + }) }) diff --git a/pkg/apis/v1/labels.go b/pkg/apis/v1/labels.go index c2740def4df5..a592c9e6e9a7 100644 --- a/pkg/apis/v1/labels.go +++ b/pkg/apis/v1/labels.go @@ -55,6 +55,7 @@ func init() { LabelInstanceAcceleratorName, LabelInstanceAcceleratorManufacturer, LabelInstanceAcceleratorCount, + LabelInstanceNestedVirtualization, LabelTopologyZoneID, LabelInstanceTenancy, corev1.LabelWindowsBuild, @@ -161,6 +162,7 @@ var ( LabelInstanceAcceleratorName = apis.Group + "/instance-accelerator-name" LabelInstanceAcceleratorManufacturer = apis.Group + "/instance-accelerator-manufacturer" LabelInstanceAcceleratorCount = apis.Group + "/instance-accelerator-count" + LabelInstanceNestedVirtualization = apis.Group + "/instance-nested-virtualization" LabelNodeClass = apis.Group + "/ec2nodeclass" LabelInstanceTenancy = apis.Group + "/instance-tenancy" diff --git a/pkg/errors/errors.go b/pkg/errors/errors.go index 1f0ff0379cb9..a1bb16cd57f1 100644 --- a/pkg/errors/errors.go +++ b/pkg/errors/errors.go @@ -58,6 +58,7 @@ var ( "VcpuLimitExceeded", "UnfulfillableCapacity", "Unsupported", + "UnsupportedOperation", "InsufficientFreeAddressesInSubnet", "MaxFleetCountExceeded", reservationCapacityExceededErrorCode, diff --git a/pkg/providers/amifamily/resolver.go b/pkg/providers/amifamily/resolver.go index 36e5c0b35768..dad62d39141e 100644 --- a/pkg/providers/amifamily/resolver.go +++ b/pkg/providers/amifamily/resolver.go @@ -78,6 +78,7 @@ type LaunchTemplate struct { UserData bootstrap.Bootstrapper BlockDeviceMappings []*v1.BlockDeviceMapping MetadataOptions *v1.MetadataOptions + CPUOptions *v1.CPUOptions AMIID string InstanceTypes []*cloudprovider.InstanceType `hash:"ignore"` DetailedMonitoring bool @@ -310,6 +311,7 @@ func (r DefaultResolver) resolveLaunchTemplates( ), BlockDeviceMappings: nodeClass.Spec.BlockDeviceMappings, MetadataOptions: nodeClass.Spec.MetadataOptions, + CPUOptions: nodeClass.Spec.CPUOptions, DetailedMonitoring: aws.ToBool(nodeClass.Spec.DetailedMonitoring), AMIID: amiID, InstanceTypes: instanceTypes, diff --git a/pkg/providers/instance/filter/filter.go b/pkg/providers/instance/filter/filter.go index 397c4fb03f30..cb9550f446c2 100644 --- a/pkg/providers/instance/filter/filter.go +++ b/pkg/providers/instance/filter/filter.go @@ -429,3 +429,26 @@ func (f spotOfferingFilter) FilterReject(instanceTypes []*cloudprovider.Instance func (spotOfferingFilter) Name() string { return "spot-offering-filter" } + +// NestedVirtualizationFilter removes instance types that lack the EC2 nested-virtualization +// processor feature (ProcessorInfo.SupportedFeatures from DescribeInstanceTypes). +func NestedVirtualizationFilter(enabled bool) Filter { + return nestedVirtualizationFilter{enabled: enabled} +} + +type nestedVirtualizationFilter struct { + enabled bool +} + +func (f nestedVirtualizationFilter) FilterReject(instanceTypes []*cloudprovider.InstanceType) ([]*cloudprovider.InstanceType, []*cloudprovider.InstanceType) { + if !f.enabled { + return instanceTypes, nil + } + return lo.FilterReject(instanceTypes, func(it *cloudprovider.InstanceType, _ int) bool { + return lo.Contains(it.Requirements.Get(v1.LabelInstanceNestedVirtualization).Values(), "true") + }) +} + +func (nestedVirtualizationFilter) Name() string { + return "nested-virtualization-filter" +} diff --git a/pkg/providers/instance/filter/filter_test.go b/pkg/providers/instance/filter/filter_test.go index c4ff6b5afaa4..d1c56e1fb57f 100644 --- a/pkg/providers/instance/filter/filter_test.go +++ b/pkg/providers/instance/filter/filter_test.go @@ -678,6 +678,47 @@ var _ = Describe("InstanceFiltersTest", func() { } }) }) + Context("NestedVirtualizationFilter", func() { + It("should keep all types when disabled", func() { + types := []*cloudprovider.InstanceType{ + makeInstanceType("m7g.xlarge"), + } + kept, rejected := filter.NestedVirtualizationFilter(false).FilterReject(types) + Expect(kept).To(HaveLen(1)) + Expect(rejected).To(BeEmpty()) + }) + It("should keep types with nested-virt label", func() { + types := []*cloudprovider.InstanceType{ + makeInstanceType("m8i.2xlarge", + withRequirements(scheduling.NewRequirement(v1.LabelInstanceNestedVirtualization, corev1.NodeSelectorOpIn, "true")), + ), + } + kept, rejected := filter.NestedVirtualizationFilter(true).FilterReject(types) + Expect(kept).To(HaveLen(1)) + Expect(rejected).To(BeEmpty()) + }) + It("should reject types without nested-virt label", func() { + types := []*cloudprovider.InstanceType{ + makeInstanceType("m7g.xlarge"), + } + kept, rejected := filter.NestedVirtualizationFilter(true).FilterReject(types) + Expect(kept).To(BeEmpty()) + Expect(rejected).To(HaveLen(1)) + }) + It("should filter a mixed pool to only supported types", func() { + types := []*cloudprovider.InstanceType{ + makeInstanceType("m7g.xlarge"), + makeInstanceType("m8i.2xlarge", + withRequirements(scheduling.NewRequirement(v1.LabelInstanceNestedVirtualization, corev1.NodeSelectorOpIn, "true")), + ), + makeInstanceType("m4.xlarge"), + } + kept, rejected := filter.NestedVirtualizationFilter(true).FilterReject(types) + Expect(kept).To(HaveLen(1)) + Expect(kept[0].Name).To(Equal("m8i.2xlarge")) + Expect(rejected).To(HaveLen(2)) + }) + }) }) func expectInstanceTypes(instanceTypes []*cloudprovider.InstanceType, names ...string) { diff --git a/pkg/providers/instance/instance.go b/pkg/providers/instance/instance.go index 924cb7e5c021..eec4fc0b2f74 100644 --- a/pkg/providers/instance/instance.go +++ b/pkg/providers/instance/instance.go @@ -131,7 +131,7 @@ func NewDefaultProvider( } func (p *DefaultProvider) Create(ctx context.Context, nodeClass *v1.EC2NodeClass, nodeClaim *karpv1.NodeClaim, tags map[string]string, instanceTypes []*cloudprovider.InstanceType) (*Instance, error) { - instanceTypes, err := p.filterInstanceTypes(ctx, instanceTypes, nodeClaim) + instanceTypes, err := p.filterInstanceTypes(ctx, instanceTypes, nodeClass, nodeClaim) if err != nil { return nil, err } @@ -270,11 +270,15 @@ func (p *DefaultProvider) CreateTags(ctx context.Context, id string, tags map[st return nil } -func (p *DefaultProvider) filterInstanceTypes(ctx context.Context, instanceTypes []*cloudprovider.InstanceType, nodeClaim *karpv1.NodeClaim) ([]*cloudprovider.InstanceType, error) { +func (p *DefaultProvider) filterInstanceTypes(ctx context.Context, instanceTypes []*cloudprovider.InstanceType, nodeClass *v1.EC2NodeClass, nodeClaim *karpv1.NodeClaim) ([]*cloudprovider.InstanceType, error) { rejectedInstanceTypes := map[string][]*cloudprovider.InstanceType{} reqs := scheduling.NewNodeSelectorRequirementsWithMinValues(nodeClaim.Spec.Requirements...) + nestedVirtEnabled := nodeClass.Spec.CPUOptions != nil && + nodeClass.Spec.CPUOptions.NestedVirtualization != nil && + *nodeClass.Spec.CPUOptions.NestedVirtualization == "enabled" for _, filter := range []instancefilter.Filter{ instancefilter.CompatibleAvailableFilter(reqs, nodeClaim.Spec.Resources.Requests), + instancefilter.NestedVirtualizationFilter(nestedVirtEnabled), instancefilter.CapacityReservationTypeFilter(reqs), instancefilter.CapacityBlockFilter(reqs), instancefilter.ReservedOfferingFilter(reqs), diff --git a/pkg/providers/instancetype/types.go b/pkg/providers/instancetype/types.go index df23f760fbd8..fae19f718ecd 100644 --- a/pkg/providers/instancetype/types.go +++ b/pkg/providers/instancetype/types.go @@ -208,6 +208,7 @@ func computeRequirements( scheduling.NewRequirement(v1.LabelInstanceAcceleratorName, corev1.NodeSelectorOpDoesNotExist), scheduling.NewRequirement(v1.LabelInstanceAcceleratorManufacturer, corev1.NodeSelectorOpDoesNotExist), scheduling.NewRequirement(v1.LabelInstanceAcceleratorCount, corev1.NodeSelectorOpDoesNotExist), + scheduling.NewRequirement(v1.LabelInstanceNestedVirtualization, corev1.NodeSelectorOpDoesNotExist), scheduling.NewRequirement(v1.LabelInstanceHypervisor, corev1.NodeSelectorOpIn, string(info.Hypervisor)), scheduling.NewRequirement(v1.LabelInstanceEncryptionInTransitSupported, corev1.NodeSelectorOpIn, fmt.Sprint(aws.ToBool(info.NetworkInfo.EncryptionInTransitSupported))), scheduling.NewRequirement(v1.LabelInstanceTenancy, corev1.NodeSelectorOpIn, string(ec2types.TenancyDefault), string(ec2types.TenancyDedicated)), @@ -291,6 +292,10 @@ func computeRequirements( if info.ProcessorInfo != nil { requirements.Get(v1.LabelInstanceCPUManufacturer).Insert(lowerKabobCase(aws.ToString(info.ProcessorInfo.Manufacturer))) } + // Nested virtualization support (from ProcessorInfo.SupportedFeatures) + if info.ProcessorInfo != nil && lo.Contains(info.ProcessorInfo.SupportedFeatures, "nested-virtualization") { + requirements.Get(v1.LabelInstanceNestedVirtualization).Insert("true") + } // CPU Sustained Clock Speed if info.ProcessorInfo != nil { // Convert from Ghz to Mhz and round to nearest whole number - converting from float64 to int to support Gt and Lt operators diff --git a/pkg/providers/launchtemplate/cpuoptions_test.go b/pkg/providers/launchtemplate/cpuoptions_test.go new file mode 100644 index 000000000000..3ac3e7724905 --- /dev/null +++ b/pkg/providers/launchtemplate/cpuoptions_test.go @@ -0,0 +1,75 @@ +/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package launchtemplate + +import ( + "testing" + + ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types" + "github.com/samber/lo" + + v1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1" +) + +func TestCpuOptions_Nil(t *testing.T) { + if cpuOptions(nil) != nil { + t.Fatal("expected nil for nil input") + } +} + +func TestCpuOptions_AllFieldsNil(t *testing.T) { + if cpuOptions(&v1.CPUOptions{}) != nil { + t.Fatal("expected nil for empty CPUOptions") + } +} + +func TestCpuOptions_NestedVirtualizationEnabled(t *testing.T) { + result := cpuOptions(&v1.CPUOptions{NestedVirtualization: lo.ToPtr("enabled")}) + if result == nil { + t.Fatal("expected non-nil result") + } + if result.NestedVirtualization != ec2types.NestedVirtualizationSpecification("enabled") { + t.Fatalf("expected 'enabled', got %q", result.NestedVirtualization) + } + if result.CoreCount != nil || result.ThreadsPerCore != nil { + t.Fatal("expected nil CoreCount and ThreadsPerCore") + } +} + +func TestCpuOptions_NestedVirtualizationDisabled(t *testing.T) { + result := cpuOptions(&v1.CPUOptions{NestedVirtualization: lo.ToPtr("disabled")}) + if result == nil { + t.Fatal("expected non-nil result") + } + if result.NestedVirtualization != ec2types.NestedVirtualizationSpecification("disabled") { + t.Fatalf("expected 'disabled', got %q", result.NestedVirtualization) + } +} + +func TestCpuOptions_CoreCountAndThreads(t *testing.T) { + result := cpuOptions(&v1.CPUOptions{CoreCount: lo.ToPtr(int32(4)), ThreadsPerCore: lo.ToPtr(int32(2))}) + if result == nil { + t.Fatal("expected non-nil result") + } + if *result.CoreCount != 4 { + t.Fatalf("expected CoreCount 4, got %d", *result.CoreCount) + } + if *result.ThreadsPerCore != 2 { + t.Fatalf("expected ThreadsPerCore 2, got %d", *result.ThreadsPerCore) + } + if result.NestedVirtualization != "" { + t.Fatalf("expected empty NestedVirtualization, got %q", result.NestedVirtualization) + } +} diff --git a/pkg/providers/launchtemplate/launchtemplate.go b/pkg/providers/launchtemplate/launchtemplate.go index 260dcb0e7f27..3311f7c1ec15 100644 --- a/pkg/providers/launchtemplate/launchtemplate.go +++ b/pkg/providers/launchtemplate/launchtemplate.go @@ -334,6 +334,23 @@ func volumeSize(quantity *resource.Quantity) *int32 { return lo.ToPtr(int32(math.Ceil(quantity.AsApproximateFloat64() / math.Pow(2, 30)))) } +func cpuOptions(cpuOptions *v1.CPUOptions) *ec2types.LaunchTemplateCpuOptionsRequest { + if cpuOptions == nil { + return nil + } + if cpuOptions.CoreCount == nil && cpuOptions.ThreadsPerCore == nil && cpuOptions.NestedVirtualization == nil { + return nil + } + opts := &ec2types.LaunchTemplateCpuOptionsRequest{ + CoreCount: cpuOptions.CoreCount, + ThreadsPerCore: cpuOptions.ThreadsPerCore, + } + if cpuOptions.NestedVirtualization != nil { + opts.NestedVirtualization = ec2types.NestedVirtualizationSpecification(*cpuOptions.NestedVirtualization) + } + return opts +} + // hydrateCache queries for existing Launch Templates created by Karpenter for the current cluster and adds to the LT cache. // Any error during hydration will result in a panic func (p *DefaultProvider) hydrateCache(ctx context.Context) { diff --git a/pkg/providers/launchtemplate/types.go b/pkg/providers/launchtemplate/types.go index 8efecc7640da..7439181ba5e3 100644 --- a/pkg/providers/launchtemplate/types.go +++ b/pkg/providers/launchtemplate/types.go @@ -107,13 +107,14 @@ func (b *CreateLaunchTemplateInputBuilder) Build(ctx context.Context) *ec2.Creat LaunchTemplateName: lo.ToPtr(LaunchTemplateName(b.options)), LaunchTemplateData: &ec2types.RequestLaunchTemplateData{ BlockDeviceMappings: blockDeviceMappings(b.options.BlockDeviceMappings), + CpuOptions: cpuOptions(b.options.CPUOptions), IamInstanceProfile: &ec2types.LaunchTemplateIamInstanceProfileSpecificationRequest{ Name: lo.ToPtr(b.options.InstanceProfile), }, Monitoring: &ec2types.LaunchTemplatesMonitoringRequest{ Enabled: lo.ToPtr(b.options.DetailedMonitoring), }, - // If the network interface is defined, the security groups are defined within it + // If network interface is defined, security groups are defined within it SecurityGroupIds: lo.Ternary(networkInterfaces != nil, nil, lo.Map(b.options.SecurityGroups, func(s v1.SecurityGroup, _ int) string { return s.ID })), UserData: lo.ToPtr(b.userData), ImageId: lo.ToPtr(b.options.AMIID), @@ -124,7 +125,7 @@ func (b *CreateLaunchTemplateInputBuilder) Build(ctx context.Context) *ec2.Creat //nolint: gosec HttpPutResponseHopLimit: lo.ToPtr(int32(lo.FromPtr(b.options.MetadataOptions.HTTPPutResponseHopLimit))), HttpTokens: ec2types.LaunchTemplateHttpTokensState(lo.FromPtr(b.options.MetadataOptions.HTTPTokens)), - // We statically set the InstanceMetadataTags to "disabled" for all new instances since + // We statically set is InstanceMetadataTags to "disabled" for all new instances since // account-wide defaults can override instance defaults on metadata settings // This can cause instance failure on accounts that default to instance tags since Karpenter // can't support instance tags with its current tags (e.g. kubernetes.io/cluster/*, karpenter.k8s.aws/ec2nodeclass) diff --git a/test/suites/integration/launch_template_test.go b/test/suites/integration/launch_template_test.go index 65c518fee356..8cb0fefde897 100644 --- a/test/suites/integration/launch_template_test.go +++ b/test/suites/integration/launch_template_test.go @@ -29,7 +29,7 @@ import ( ) var _ = Describe("Launch Template Deletion", func() { - It("should remove the generated Launch Templates when deleting the NodeClass", func() { + It("should remove itself generated Launch Templates when deleting NodeClass", func() { pod := coretest.Pod() env.ExpectCreated(nodePool, nodeClass, pod) env.EventuallyExpectHealthy(pod) @@ -46,3 +46,44 @@ var _ = Describe("Launch Template Deletion", func() { }).WithPolling(5.0).Should(Succeed()) }) }) + +var _ = Describe("Launch Template CPU Options", func() { + It("should create launch template with CPU options", func() { + nodeClass.Spec.CPUOptions = &v1.CPUOptions{ + CoreCount: aws.Int32(2), + ThreadsPerCore: aws.Int32(1), + NestedVirtualization: aws.String("enabled"), + } + + pod := coretest.Pod() + env.ExpectCreated(nodePool, nodeClass, pod) + env.EventuallyExpectHealthy(pod) + env.ExpectCreatedNodeCount("==", 1) + + // Verify the launch template was created with CPU options + Eventually(func(g Gomega) { + output, err := env.EC2API.DescribeLaunchTemplates(env.Context, &ec2.DescribeLaunchTemplatesInput{ + Filters: []ec2types.Filter{ + {Name: aws.String(fmt.Sprintf("tag:%s", v1.LabelNodeClass)), Values: []string{nodeClass.Name}}, + }, + }) + g.Expect(err).To(BeNil()) + g.Expect(output.LaunchTemplates).To(HaveLen(1)) + + // Get the launch template data to verify CPU options + ltVersion := aws.ToString(output.LaunchTemplates[0].LatestVersionNumber) + ltOutput, err := env.EC2API.DescribeLaunchTemplateVersions(env.Context, &ec2.DescribeLaunchTemplateVersionsInput{ + LaunchTemplateId: output.LaunchTemplates[0].LaunchTemplateId, + Versions: []string{ltVersion}, + }) + g.Expect(err).To(BeNil()) + g.Expect(ltOutput.LaunchTemplateVersions).To(HaveLen(1)) + + ltData := ltOutput.LaunchTemplateVersions[0].LaunchTemplateData + g.Expect(ltData.CpuOptions).ToNot(BeNil()) + g.Expect(ltData.CpuOptions.CoreCount).To(Equal(aws.Int32(2))) + g.Expect(ltData.CpuOptions.ThreadsPerCore).To(Equal(aws.Int32(1))) + // Note: NestedVirtualization may not be supported in all AWS regions/instance types yet + }).WithPolling(5.0).Should(Succeed()) + }) +})