aws · windsornguyen · Feb 18, 2026 · Feb 25, 2026 · Mar 31, 2026 · jmdeal
@@ -0,0 +1,99 @@
+# CPU Options and Nested Virtualization
+
+## Overview
+
+AWS announced nested virtualization support on virtual EC2 instances in February 2026,
+enabling KVM-based workloads (container sandboxes, microVMs, development VMs) without
+bare-metal instances. The feature is configured via the `CpuOptions.NestedVirtualization`
+field in the EC2 `RunInstances` and `CreateLaunchTemplate` APIs.
+
+Karpenter needs to expose this capability on `EC2NodeClass` so users can request nodes
+with nested virtualization enabled, and Karpenter needs to filter out instance types that
+do not support the feature to avoid launch failures.
+
+## Goals
+
+- Expose `cpuOptions` on `EC2NodeClass.spec` with `coreCount`, `threadsPerCore`, and
+  `nestedVirtualization` fields.
+- Pass `CpuOptions` through to the EC2 launch template.
+- Filter instance types to only those reporting `nested-virtualization` in
+  `ProcessorInfo.SupportedFeatures` from `DescribeInstanceTypes`.
+- Validate that `nestedVirtualization` is mutually exclusive with `coreCount` and
+  `threadsPerCore` (EC2 API constraint).
+- Cache `UnsupportedOperation` fleet errors as unfulfillable capacity.
+
+## API Updates
+
+### EC2NodeClass Spec
+
+```yaml
+apiVersion: karpenter.k8s.aws/v1
+kind: EC2NodeClass
+metadata:
+  name: nested-virt
+spec:
+  cpuOptions:
+    nestedVirtualization: enabled
+  # ... other fields
+```
+
+### CPUOptions Struct
+
+```go
+type CPUOptions struct {
+    CoreCount            *int32  `json:"coreCount,omitempty"`
+    ThreadsPerCore       *int32  `json:"threadsPerCore,omitempty"`
+    NestedVirtualization *string `json:"nestedVirtualization,omitempty"`
+}
+```
+
+CEL validation enforces that `nestedVirtualization: enabled` cannot be combined with
+`coreCount` or `threadsPerCore` (EC2 rejects the combination).
+
+### Instance Type Label
+
+A new well-known label `karpenter.k8s.aws/instance-nested-virtualization` is populated
+from `ProcessorInfo.SupportedFeatures` during instance type resolution. Instance types
+that report `nested-virtualization` in their supported features receive the label value
+`"true"`.
+
+As of March 2026, only the `*8i*` families support this feature: c8i, c8i-flex, m8i,
+m8i-flex, r8i, r8i-flex (54 instance types total). No ARM, Xen, or bare-metal instances
+support it.
+
+## Launch Behavior
+
+### Instance Type Filtering
+
+When an `EC2NodeClass` sets `cpuOptions.nestedVirtualization: enabled`, a
+`NestedVirtualizationFilter` in the instance filter chain rejects any instance type
+lacking the `instance-nested-virtualization=true` label. This runs after the
+`CompatibleAvailableFilter` and before capacity reservation filters.
+
+### Launch Template
+
+The `cpuOptions()` converter maps the `CPUOptions` struct to
+`LaunchTemplateCpuOptionsRequest`. It returns `nil` when all fields are nil (avoiding an
+empty `CpuOptions` block in the API call). The `NestedVirtualization` string is cast to
+the SDK enum type `ec2types.NestedVirtualizationSpecification`.
+
+### Error Handling
+
+`UnsupportedOperation` is added to the `unfulfillableCapacityErrorCodes` set so that
+launches against incompatible instance types (if they bypass the filter) are cached as
+unavailable rather than retried indefinitely.
+
+## Instance Type Compatibility
+
+The authoritative signal is `ProcessorInfo.SupportedFeatures` from `DescribeInstanceTypes`:
+
+```bash
+aws ec2 describe-instance-types \
+  --filters "Name=processor-info.supported-features,Values=nested-virtualization" \
+  --query 'InstanceTypes[*].InstanceType'
+```
+
+This returns only the families that actually support the feature, avoiding heuristic-based
+filtering (e.g., checking architecture + hypervisor) which would be both over-inclusive
+(allowing older Intel families that don't support it) and fragile (breaking when AWS adds
+support to new families).
@@ -143,6 +143,9 @@ type EC2NodeClassSpec struct {
 	// https://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_CreateFleet.html
 	// +optional
 	Context *string `json:"context,omitempty"`
+	// CPUOptions defines the CPU options for the instance.
+	// +optional
+	CPUOptions *CPUOptions `json:"cpuOptions,omitempty"`
 }
 
 // SubnetSelectorTerm defines selection logic for a subnet used by Karpenter to launch nodes.
@@ -356,6 +359,26 @@ type MetadataOptions struct {
 	HTTPTokens *string `json:"httpTokens,omitempty"`
 }
 
+// CPUOptions contains parameters for specifying the CPU configuration for provisioned EC2 nodes.
+// +kubebuilder:validation:XValidation:message="nestedVirtualization cannot be set alongside coreCount or threadsPerCore",rule="!has(self.nestedVirtualization) || self.nestedVirtualization == 'disabled' || (!has(self.coreCount) && !has(self.threadsPerCore))"
+type CPUOptions struct {
+	// CoreCount specifies the number of CPU cores for the instance.
+	// +kubebuilder:validation:Minimum:=1
+	// +kubebuilder:validation:Maximum:=128
+	// +optional
+	CoreCount *int32 `json:"coreCount,omitempty"`
+	// ThreadsPerCore specifies the number of threads per core for the instance.
+	// +kubebuilder:validation:Minimum:=1
+	// +kubebuilder:validation:Maximum:=2
+	// +optional
+	ThreadsPerCore *int32 `json:"threadsPerCore,omitempty"`
+	// NestedVirtualization enables or disables nested virtualization on the instance.
+	// This feature allows running virtual machines inside the EC2 instance.
+	// +kubebuilder:validation:Enum:={enabled,disabled}
+	// +optional
+	NestedVirtualization *string `json:"nestedVirtualization,omitempty"`
+}
+
 type BlockDeviceMapping struct {
 	// The device name (for example, /dev/sdh or xvdh).
 	// +optional
@@ -535,6 +558,10 @@ func (in *EC2NodeClass) KubeletConfiguration() *KubeletConfiguration {
 	return in.Spec.Kubelet
 }
 
+func (in *EC2NodeClass) CPUOptions() *CPUOptions {
+	return in.Spec.CPUOptions
+}
+
 // AMIFamily returns the family for a NodePool based on the following items, in order of precdence:
 //   - ec2nodeclass.spec.amiFamily
 //   - ec2nodeclass.spec.amiSelectorTerms[].alias

@@ -1288,4 +1288,71 @@ var _ = Describe("CEL/Validation", func() {
 			Expect(env.Client.Update(ctx, nc)).To(Succeed())
 		})
 	})
+
+	Context("CPUOptions", func() {
+		When("valid CPU options are provided", func() {
+			It("should succeed", func() {
+				nc.Spec.CPUOptions = &v1.CPUOptions{
+					CoreCount:          aws.Int32(4),
+					ThreadsPerCore:    aws.Int32(1),
+					NestedVirtualization: aws.String("enabled"),
+				}
+				Expect(env.Client.Create(ctx, nc)).To(Succeed())
+			})
+		})
+		When("invalid CoreCount is provided", func() {
+			It("should fail when CoreCount is 0", func() {
+				nc.Spec.CPUOptions = &v1.CPUOptions{
+					CoreCount: aws.Int32(0),
+				}
+				Expect(env.Client.Create(ctx, nc)).ToNot(Succeed())
+			})
+			It("should fail when CoreCount is negative", func() {
+				nc.Spec.CPUOptions = &v1.CPUOptions{
+					CoreCount: aws.Int32(-1),
+				}
+				Expect(env.Client.Create(ctx, nc)).ToNot(Succeed())
+			})
+			It("should fail when CoreCount exceeds maximum", func() {
+				nc.Spec.CPUOptions = &v1.CPUOptions{
+					CoreCount: aws.Int32(129),
+				}
+				Expect(env.Client.Create(ctx, nc)).ToNot(Succeed())
+			})
+		})
+		When("invalid ThreadsPerCore is provided", func() {
+			It("should fail when ThreadsPerCore is 0", func() {
+				nc.Spec.CPUOptions = &v1.CPUOptions{
+					ThreadsPerCore: aws.Int32(0),
+				}
+				Expect(env.Client.Create(ctx, nc)).ToNot(Succeed())
+			})
+			It("should fail when ThreadsPerCore is negative", func() {
+				nc.Spec.CPUOptions = &v1.CPUOptions{
+					ThreadsPerCore: aws.Int32(-1),
+				}
+				Expect(env.Client.Create(ctx, nc)).ToNot(Succeed())
+			})
+			It("should fail when ThreadsPerCore exceeds maximum", func() {
+				nc.Spec.CPUOptions = &v1.CPUOptions{
+					ThreadsPerCore: aws.Int32(3),
+				}
+				Expect(env.Client.Create(ctx, nc)).ToNot(Succeed())
+			})
+		})
+		When("invalid NestedVirtualization is provided", func() {
+			It("should fail when NestedVirtualization has invalid value", func() {
+				nc.Spec.CPUOptions = &v1.CPUOptions{
+					NestedVirtualization: aws.String("invalid"),
+				}
+				Expect(env.Client.Create(ctx, nc)).ToNot(Succeed())
+			})
+		})
+		When("empty CPU options are provided", func() {
+			It("should succeed with nil CPU options", func() {
+				nc.Spec.CPUOptions = &v1.CPUOptions{}
+				Expect(env.Client.Create(ctx, nc)).To(Succeed())
+			})
+		})
+	})
 })
@@ -55,6 +55,7 @@ func init() {
 		LabelInstanceAcceleratorName,
 		LabelInstanceAcceleratorManufacturer,
 		LabelInstanceAcceleratorCount,
+		LabelInstanceNestedVirtualization,
 		LabelTopologyZoneID,
 		LabelInstanceTenancy,
 		corev1.LabelWindowsBuild,
@@ -161,6 +162,7 @@ var (
 	LabelInstanceAcceleratorName              = apis.Group + "/instance-accelerator-name"
 	LabelInstanceAcceleratorManufacturer      = apis.Group + "/instance-accelerator-manufacturer"
 	LabelInstanceAcceleratorCount             = apis.Group + "/instance-accelerator-count"
+	LabelInstanceNestedVirtualization         = apis.Group + "/instance-nested-virtualization"
 	LabelNodeClass                            = apis.Group + "/ec2nodeclass"
 	LabelInstanceTenancy                      = apis.Group + "/instance-tenancy"
 

@@ -58,6 +58,7 @@ var (
 		"VcpuLimitExceeded",
 		"UnfulfillableCapacity",
 		"Unsupported",
+		"UnsupportedOperation",
 		"InsufficientFreeAddressesInSubnet",
 		"MaxFleetCountExceeded",
 		reservationCapacityExceededErrorCode,

@@ -78,6 +78,7 @@ type LaunchTemplate struct {
 	UserData                         bootstrap.Bootstrapper
 	BlockDeviceMappings              []*v1.BlockDeviceMapping
 	MetadataOptions                  *v1.MetadataOptions
+	CPUOptions                       *v1.CPUOptions
 	AMIID                            string
 	InstanceTypes                    []*cloudprovider.InstanceType `hash:"ignore"`
 	DetailedMonitoring               bool
@@ -310,6 +311,7 @@ func (r DefaultResolver) resolveLaunchTemplates(
 			),
 			BlockDeviceMappings:              nodeClass.Spec.BlockDeviceMappings,
 			MetadataOptions:                  nodeClass.Spec.MetadataOptions,
+			CPUOptions:                       nodeClass.Spec.CPUOptions,
 			DetailedMonitoring:               aws.ToBool(nodeClass.Spec.DetailedMonitoring),
 			AMIID:                            amiID,
 			InstanceTypes:                    instanceTypes,

@@ -429,3 +429,26 @@ func (f spotOfferingFilter) FilterReject(instanceTypes []*cloudprovider.Instance
 func (spotOfferingFilter) Name() string {
 	return "spot-offering-filter"
 }
+
+// NestedVirtualizationFilter removes instance types that lack the EC2 nested-virtualization
+// processor feature (ProcessorInfo.SupportedFeatures from DescribeInstanceTypes).
+func NestedVirtualizationFilter(enabled bool) Filter {
+	return nestedVirtualizationFilter{enabled: enabled}
+}
+
+type nestedVirtualizationFilter struct {
+	enabled bool
+}
+
+func (f nestedVirtualizationFilter) FilterReject(instanceTypes []*cloudprovider.InstanceType) ([]*cloudprovider.InstanceType, []*cloudprovider.InstanceType) {
+	if !f.enabled {
+		return instanceTypes, nil
+	}
+	return lo.FilterReject(instanceTypes, func(it *cloudprovider.InstanceType, _ int) bool {
+		return lo.Contains(it.Requirements.Get(v1.LabelInstanceNestedVirtualization).Values(), "true")
+	})
+}
+
+func (nestedVirtualizationFilter) Name() string {
+	return "nested-virtualization-filter"
+}
@@ -678,6 +678,47 @@ var _ = Describe("InstanceFiltersTest", func() {
 			}
 		})
 	})
+	Context("NestedVirtualizationFilter", func() {
+		It("should keep all types when disabled", func() {
+			types := []*cloudprovider.InstanceType{
+				makeInstanceType("m7g.xlarge"),
+			}
+			kept, rejected := filter.NestedVirtualizationFilter(false).FilterReject(types)
+			Expect(kept).To(HaveLen(1))
+			Expect(rejected).To(BeEmpty())
+		})
+		It("should keep types with nested-virt label", func() {
+			types := []*cloudprovider.InstanceType{
+				makeInstanceType("m8i.2xlarge",
+					withRequirements(scheduling.NewRequirement(v1.LabelInstanceNestedVirtualization, corev1.NodeSelectorOpIn, "true")),
+				),
+			}
+			kept, rejected := filter.NestedVirtualizationFilter(true).FilterReject(types)
+			Expect(kept).To(HaveLen(1))
+			Expect(rejected).To(BeEmpty())
+		})
+		It("should reject types without nested-virt label", func() {
+			types := []*cloudprovider.InstanceType{
+				makeInstanceType("m7g.xlarge"),
+			}
+			kept, rejected := filter.NestedVirtualizationFilter(true).FilterReject(types)
+			Expect(kept).To(BeEmpty())
+			Expect(rejected).To(HaveLen(1))
+		})
+		It("should filter a mixed pool to only supported types", func() {
+			types := []*cloudprovider.InstanceType{
+				makeInstanceType("m7g.xlarge"),
+				makeInstanceType("m8i.2xlarge",
+					withRequirements(scheduling.NewRequirement(v1.LabelInstanceNestedVirtualization, corev1.NodeSelectorOpIn, "true")),
+				),
+				makeInstanceType("m4.xlarge"),
+			}
+			kept, rejected := filter.NestedVirtualizationFilter(true).FilterReject(types)
+			Expect(kept).To(HaveLen(1))
+			Expect(kept[0].Name).To(Equal("m8i.2xlarge"))
+			Expect(rejected).To(HaveLen(2))
+		})
+	})
 })
 
 func expectInstanceTypes(instanceTypes []*cloudprovider.InstanceType, names ...string) {

@@ -131,7 +131,7 @@ func NewDefaultProvider(
 }
 
 func (p *DefaultProvider) Create(ctx context.Context, nodeClass *v1.EC2NodeClass, nodeClaim *karpv1.NodeClaim, tags map[string]string, instanceTypes []*cloudprovider.InstanceType) (*Instance, error) {
-	instanceTypes, err := p.filterInstanceTypes(ctx, instanceTypes, nodeClaim)
+	instanceTypes, err := p.filterInstanceTypes(ctx, instanceTypes, nodeClass, nodeClaim)
 	if err != nil {
 		return nil, err
 	}
@@ -270,11 +270,15 @@ func (p *DefaultProvider) CreateTags(ctx context.Context, id string, tags map[st
 	return nil
 }
 
-func (p *DefaultProvider) filterInstanceTypes(ctx context.Context, instanceTypes []*cloudprovider.InstanceType, nodeClaim *karpv1.NodeClaim) ([]*cloudprovider.InstanceType, error) {
+func (p *DefaultProvider) filterInstanceTypes(ctx context.Context, instanceTypes []*cloudprovider.InstanceType, nodeClass *v1.EC2NodeClass, nodeClaim *karpv1.NodeClaim) ([]*cloudprovider.InstanceType, error) {
 	rejectedInstanceTypes := map[string][]*cloudprovider.InstanceType{}
 	reqs := scheduling.NewNodeSelectorRequirementsWithMinValues(nodeClaim.Spec.Requirements...)
+	nestedVirtEnabled := nodeClass.Spec.CPUOptions != nil &&
+		nodeClass.Spec.CPUOptions.NestedVirtualization != nil &&
+		*nodeClass.Spec.CPUOptions.NestedVirtualization == "enabled"
 	for _, filter := range []instancefilter.Filter{
 		instancefilter.CompatibleAvailableFilter(reqs, nodeClaim.Spec.Resources.Requests),
+		instancefilter.NestedVirtualizationFilter(nestedVirtEnabled),
 		instancefilter.CapacityReservationTypeFilter(reqs),
 		instancefilter.CapacityBlockFilter(reqs),
 		instancefilter.ReservedOfferingFilter(reqs),

@@ -208,6 +208,7 @@ func computeRequirements(
 		scheduling.NewRequirement(v1.LabelInstanceAcceleratorName, corev1.NodeSelectorOpDoesNotExist),
 		scheduling.NewRequirement(v1.LabelInstanceAcceleratorManufacturer, corev1.NodeSelectorOpDoesNotExist),
 		scheduling.NewRequirement(v1.LabelInstanceAcceleratorCount, corev1.NodeSelectorOpDoesNotExist),
+		scheduling.NewRequirement(v1.LabelInstanceNestedVirtualization, corev1.NodeSelectorOpDoesNotExist),
 		scheduling.NewRequirement(v1.LabelInstanceHypervisor, corev1.NodeSelectorOpIn, string(info.Hypervisor)),
 		scheduling.NewRequirement(v1.LabelInstanceEncryptionInTransitSupported, corev1.NodeSelectorOpIn, fmt.Sprint(aws.ToBool(info.NetworkInfo.EncryptionInTransitSupported))),
 		scheduling.NewRequirement(v1.LabelInstanceTenancy, corev1.NodeSelectorOpIn, string(ec2types.TenancyDefault), string(ec2types.TenancyDedicated)),
@@ -291,6 +292,10 @@ func computeRequirements(
 	if info.ProcessorInfo != nil {
 		requirements.Get(v1.LabelInstanceCPUManufacturer).Insert(lowerKabobCase(aws.ToString(info.ProcessorInfo.Manufacturer)))
 	}
+	// Nested virtualization support (from ProcessorInfo.SupportedFeatures)
+	if info.ProcessorInfo != nil && lo.Contains(info.ProcessorInfo.SupportedFeatures, "nested-virtualization") {
+		requirements.Get(v1.LabelInstanceNestedVirtualization).Insert("true")
+	}
 	// CPU Sustained Clock Speed
 	if info.ProcessorInfo != nil {
 		// Convert from Ghz to Mhz and round to nearest whole number - converting from float64 to int to support Gt and Lt operators