diff --git a/charts/karpenter-crd/templates/karpenter.k8s.aws_ec2nodeclasses.yaml b/charts/karpenter-crd/templates/karpenter.k8s.aws_ec2nodeclasses.yaml index 0ee18288335f..ff34b452aa54 100644 --- a/charts/karpenter-crd/templates/karpenter.k8s.aws_ec2nodeclasses.yaml +++ b/charts/karpenter-crd/templates/karpenter.k8s.aws_ec2nodeclasses.yaml @@ -516,6 +516,31 @@ spec: - optional type: string type: object + placementGroup: + description: |- + PlacementGroup configures the EC2 placement group that Karpenter should launch instances into. + The referenced placement group must already exist; Karpenter does not create or delete placement groups. + properties: + id: + description: ID is the placement group id in EC2. This must be used when launching into a shared placement group. + pattern: ^pg-[0-9a-z]+$ + type: string + name: + description: Name is the name of the placement group in EC2. + maxLength: 255 + type: string + partition: + description: |- + Partition is the partition number that instances should launch into. + Valid only for partition placement groups. + format: int32 + maximum: 7 + minimum: 1 + type: integer + type: object + x-kubernetes-validations: + - message: expected exactly one of ['name', 'id'] + rule: has(self.name) != has(self.id) role: description: |- Role is the AWS identity that nodes use. @@ -811,6 +836,41 @@ spec: instanceProfile: description: InstanceProfile contains the resolved instance profile for the role type: string + placementGroup: + description: PlacementGroup contains the current placement group that is available to this NodeClass under the placementGroup reference. + properties: + id: + description: ID of the placement group. + pattern: ^pg-[0-9a-z]+$ + type: string + name: + description: Name of the placement group. + type: string + partitionCount: + description: PartitionCount is the number of partitions configured on the placement group. + format: int32 + type: integer + spreadLevel: + description: SpreadLevel determines how instances are spread when the placement group strategy is spread. + enum: + - host + - rack + type: string + state: + description: State of the placement group. + type: string + strategy: + description: Strategy of the placement group. + enum: + - cluster + - spread + - partition + type: string + required: + - id + - name + - strategy + type: object securityGroups: description: |- SecurityGroups contains the current security group values that are available to the diff --git a/cmd/controller/main.go b/cmd/controller/main.go index bec13d6e27c9..319e035cef50 100644 --- a/cmd/controller/main.go +++ b/cmd/controller/main.go @@ -76,6 +76,7 @@ func main() { cloudProvider, op.SubnetProvider, op.SecurityGroupProvider, + op.PlacementGroupProvider, op.InstanceProfileProvider, op.InstanceProvider, op.PricingProvider, diff --git a/designs/placement-groups.md b/designs/placement-groups.md new file mode 100644 index 000000000000..5a62319185fa --- /dev/null +++ b/designs/placement-groups.md @@ -0,0 +1,84 @@ +# Placement Group Support + +## Context + +Amazon EC2 placement groups let operators influence instance placement for low-latency (`cluster`), failure-domain isolation (`partition`), and small critical workloads (`spread`). The long-standing request in https://github.com/aws/karpenter-provider-aws/issues/3324 is to make these groups usable from `EC2NodeClass`. + +Karpenter already treats `EC2NodeClass` as launch configuration for existing AWS resources such as subnets, security groups, AMIs, and instance profiles. Placement groups fit best when modeled the same way. + +## Problem + +Users can launch Karpenter-managed nodes into subnets, security groups, and capacity reservations, but cannot direct those nodes into an existing placement group. This blocks workloads that already rely on EC2 placement-group semantics, for example: + +- tightly-coupled clusters that need cluster placement-group networking +- replicated systems that want partition placement-group isolation +- small critical workloads that want spread placement-group separation + +The previously proposed design in #5389 focused on Karpenter creating placement groups. That adds a new EC2 resource lifecycle to reconcile and exposes strategy-specific creation APIs that users may rely on long term. + +## Options + +### Option 1: Karpenter creates and owns placement groups + +Pros: + +- users can describe strategy directly in `EC2NodeClass` +- Karpenter could validate strategy-specific configuration at reconciliation time + +Cons: + +- introduces new lifecycle ownership for EC2 resources outside the current launch path +- expands the stable API surface with strategy creation details such as `cluster`, `spread`, `partition`, partition count, and spread level +- complicates shared placement groups and future AWS-specific variants +- makes rollback and drift semantics harder because the placement group becomes a controller-managed dependency + +### Option 2: Karpenter references an existing placement group + +Pros: + +- matches how `EC2NodeClass` already models other AWS launch dependencies +- keeps the API small: identify the group and optionally pin a partition +- works for user-managed, shared, and externally tagged placement groups +- avoids inventing a placement-group controller lifecycle before demand is proven + +Cons: + +- users must provision the placement group out of band +- Karpenter cannot configure placement-group strategy on behalf of the user + +## Recommendation + +Add an optional `spec.placementGroup` field on `EC2NodeClass`: + +```yaml +spec: + placementGroup: + name: analytics-partition + partition: 2 +``` + +Behavior: + +- `name` or `id` identifies the existing placement group; the fields are mutually exclusive +- `id` supports shared placement groups, which require `GroupId` during launch +- `partition` is optional and only meaningful for partition placement groups +- Karpenter resolves the configured group into `status.placementGroup` +- launch templates include the placement-group reference so both `CreateFleet` and `RunInstances` honor it + +## Key Decisions + +- Karpenter does not create, tag, delete, or mutate placement groups in this design +- placement-group strategy remains an operator concern because it belongs to the EC2 placement-group resource, not the instance launch request +- partition selection is the only launch-time knob worth exposing initially because AWS applies it at instance launch and it is useful even when the placement group is created elsewhere + +## User Guidance + +- Use `name` for placement groups in the same account and `id` for shared placement groups +- Pair cluster placement groups with subnet or topology constraints that keep launches in a single Availability Zone +- Omit `partition` to let EC2 distribute instances across partitions, or set it when the workload needs explicit partition affinity + +## Future Work + +- richer status surfacing for placement-group strategy and readiness +- strategy-aware validation and scheduling hints +- a separate proposal for Karpenter-managed placement-group lifecycle if real demand justifies the larger API diff --git a/examples/v1/placement-group.yaml b/examples/v1/placement-group.yaml new file mode 100644 index 000000000000..cd478d0919d1 --- /dev/null +++ b/examples/v1/placement-group.yaml @@ -0,0 +1,38 @@ +apiVersion: karpenter.sh/v1 +kind: NodePool +metadata: + name: placement-group +spec: + template: + spec: + requirements: + - key: topology.kubernetes.io/zone + operator: In + values: + - us-west-2a + nodeClassRef: + group: karpenter.k8s.aws + kind: EC2NodeClass + name: placement-group +--- +apiVersion: karpenter.k8s.aws/v1 +kind: EC2NodeClass +metadata: + name: placement-group +spec: + amiFamily: AL2023 + role: "KarpenterNodeRole-${CLUSTER_NAME}" + subnetSelectorTerms: + - tags: + karpenter.sh/discovery: "${CLUSTER_NAME}" + securityGroupSelectorTerms: + - tags: + karpenter.sh/discovery: "${CLUSTER_NAME}" + amiSelectorTerms: + - alias: al2023@latest + placementGroup: + # Use `name` for placement groups in the same account. + # Use `id` instead when launching into a shared placement group. + name: analytics-partition + # Optional, only valid for partition placement groups. + partition: 2 diff --git a/kwok/main.go b/kwok/main.go index 5e26efcb78f6..9dd773c2dbdf 100644 --- a/kwok/main.go +++ b/kwok/main.go @@ -91,6 +91,7 @@ func main() { cloudProvider, op.SubnetProvider, op.SecurityGroupProvider, + op.PlacementGroupProvider, op.InstanceProfileProvider, op.InstanceProvider, op.PricingProvider, diff --git a/kwok/operator/operator.go b/kwok/operator/operator.go index b0b246d6ad8a..ae17062406a9 100644 --- a/kwok/operator/operator.go +++ b/kwok/operator/operator.go @@ -61,6 +61,7 @@ import ( "github.com/aws/karpenter-provider-aws/pkg/providers/instanceprofile" "github.com/aws/karpenter-provider-aws/pkg/providers/instancetype" "github.com/aws/karpenter-provider-aws/pkg/providers/launchtemplate" + "github.com/aws/karpenter-provider-aws/pkg/providers/placementgroup" "github.com/aws/karpenter-provider-aws/pkg/providers/pricing" "github.com/aws/karpenter-provider-aws/pkg/providers/securitygroup" ssmp "github.com/aws/karpenter-provider-aws/pkg/providers/ssm" @@ -83,6 +84,7 @@ type Operator struct { RecreationCache *cache.Cache SubnetProvider subnet.Provider SecurityGroupProvider securitygroup.Provider + PlacementGroupProvider placementgroup.Provider InstanceProfileProvider instanceprofile.Provider AMIProvider amifamily.Provider AMIResolver amifamily.Resolver @@ -138,6 +140,7 @@ func NewOperator(ctx context.Context, operator *operator.Operator) (context.Cont cfg.Region, false, ) + placementGroupProvider := placementgroup.NewDefaultProvider(ec2api, cache.New(awscache.DefaultTTL, awscache.DefaultCleanupInterval)) versionProvider := version.NewDefaultProvider(operator.KubernetesInterface, eksapi) // Ensure we're able to hydrate the version before starting any reliant controllers. // Version updates are hydrated asynchronously after this, in the event of a failure @@ -205,6 +208,7 @@ func NewOperator(ctx context.Context, operator *operator.Operator) (context.Cont RecreationCache: recreationCache, SubnetProvider: subnetProvider, SecurityGroupProvider: securityGroupProvider, + PlacementGroupProvider: placementGroupProvider, InstanceProfileProvider: instanceProfileProvider, AMIProvider: amiProvider, AMIResolver: amiResolver, diff --git a/pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml b/pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml index 19eb6f036fcb..2540bac473fa 100644 --- a/pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml +++ b/pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml @@ -513,6 +513,31 @@ spec: - optional type: string type: object + placementGroup: + description: |- + PlacementGroup configures the EC2 placement group that Karpenter should launch instances into. + The referenced placement group must already exist; Karpenter does not create or delete placement groups. + properties: + id: + description: ID is the placement group id in EC2. This must be used when launching into a shared placement group. + pattern: ^pg-[0-9a-z]+$ + type: string + name: + description: Name is the name of the placement group in EC2. + maxLength: 255 + type: string + partition: + description: |- + Partition is the partition number that instances should launch into. + Valid only for partition placement groups. + format: int32 + maximum: 7 + minimum: 1 + type: integer + type: object + x-kubernetes-validations: + - message: expected exactly one of ['name', 'id'] + rule: has(self.name) != has(self.id) role: description: |- Role is the AWS identity that nodes use. @@ -808,6 +833,41 @@ spec: instanceProfile: description: InstanceProfile contains the resolved instance profile for the role type: string + placementGroup: + description: PlacementGroup contains the current placement group that is available to this NodeClass under the placementGroup reference. + properties: + id: + description: ID of the placement group. + pattern: ^pg-[0-9a-z]+$ + type: string + name: + description: Name of the placement group. + type: string + partitionCount: + description: PartitionCount is the number of partitions configured on the placement group. + format: int32 + type: integer + spreadLevel: + description: SpreadLevel determines how instances are spread when the placement group strategy is spread. + enum: + - host + - rack + type: string + state: + description: State of the placement group. + type: string + strategy: + description: Strategy of the placement group. + enum: + - cluster + - spread + - partition + type: string + required: + - id + - name + - strategy + type: object securityGroups: description: |- SecurityGroups contains the current security group values that are available to the diff --git a/pkg/apis/v1/ec2nodeclass.go b/pkg/apis/v1/ec2nodeclass.go index b76671430ebc..1fff6b5d543d 100644 --- a/pkg/apis/v1/ec2nodeclass.go +++ b/pkg/apis/v1/ec2nodeclass.go @@ -52,6 +52,11 @@ type EC2NodeClassSpec struct { // +kubebuilder:validation:MaxItems:=30 // +optional CapacityReservationSelectorTerms []CapacityReservationSelectorTerm `json:"capacityReservationSelectorTerms" hash:"ignore"` + // PlacementGroup configures the EC2 placement group that Karpenter should launch instances into. + // The referenced placement group must already exist; Karpenter does not create or delete placement groups. + // +kubebuilder:validation:XValidation:message="expected exactly one of ['name', 'id']",rule="has(self.name) != has(self.id)" + // +optional + PlacementGroup *PlacementGroup `json:"placementGroup,omitempty" hash:"ignore"` // AssociatePublicIPAddress controls if public IP addresses are assigned to instances that are launched with the nodeclass. // +optional AssociatePublicIPAddress *bool `json:"associatePublicIPAddress,omitempty"` @@ -199,6 +204,26 @@ type CapacityReservationSelectorTerm struct { InstanceMatchCriteria string `json:"instanceMatchCriteria,omitempty"` } +// PlacementGroup defines placement-group membership for instances launched with this node class. +type PlacementGroup struct { + // Name is the name of the placement group in EC2. + // Mutually exclusive with ID. + // +kubebuilder:validation:MaxLength:=255 + // +optional + Name string `json:"name,omitempty"` + // ID is the placement group id in EC2. This must be used when launching into a shared placement group. + // Mutually exclusive with Name. + // +kubebuilder:validation:Pattern:="^pg-[0-9a-z]+$" + // +optional + ID string `json:"id,omitempty"` + // Partition is the partition number that instances should launch into. + // Valid only for partition placement groups. + // +kubebuilder:validation:Minimum:=1 + // +kubebuilder:validation:Maximum:=7 + // +optional + Partition *int32 `json:"partition,omitempty"` +} + // AMISelectorTerm defines selection logic for an ami used by Karpenter to launch nodes. // If multiple fields are used for selection, the requirements are ANDed. type AMISelectorTerm struct { diff --git a/pkg/apis/v1/ec2nodeclass_hash_test.go b/pkg/apis/v1/ec2nodeclass_hash_test.go index 87ad4de0b5af..196455587875 100644 --- a/pkg/apis/v1/ec2nodeclass_hash_test.go +++ b/pkg/apis/v1/ec2nodeclass_hash_test.go @@ -88,6 +88,9 @@ var _ = Describe("Hash", func() { Entry("DetailedMonitoring", "14187487647319890991", v1.EC2NodeClass{Spec: v1.EC2NodeClassSpec{DetailedMonitoring: aws.Bool(true)}}), Entry("InstanceStorePolicy", "4160809219257698490", v1.EC2NodeClass{Spec: v1.EC2NodeClassSpec{InstanceStorePolicy: lo.ToPtr(v1.InstanceStorePolicyRAID0)}}), Entry("AssociatePublicIPAddress", "4469320567057431454", v1.EC2NodeClass{Spec: v1.EC2NodeClassSpec{AssociatePublicIPAddress: lo.ToPtr(true)}}), + Entry("PlacementGroup Name", "3719706974731311089", v1.EC2NodeClass{Spec: v1.EC2NodeClassSpec{PlacementGroup: &v1.PlacementGroup{Name: "analytics-cluster"}}}), + Entry("PlacementGroup ID", "18122240702898781533", v1.EC2NodeClass{Spec: v1.EC2NodeClassSpec{PlacementGroup: &v1.PlacementGroup{ID: "pg-0123456789abcdef0"}}}), + Entry("PlacementGroup Partition", "4265179377147301792", v1.EC2NodeClass{Spec: v1.EC2NodeClassSpec{PlacementGroup: &v1.PlacementGroup{Name: "analytics-cluster", Partition: lo.ToPtr(int32(1))}}}), Entry("MetadataOptions HTTPEndpoint", "1277386558528601282", v1.EC2NodeClass{Spec: v1.EC2NodeClassSpec{MetadataOptions: &v1.MetadataOptions{HTTPEndpoint: lo.ToPtr("enabled")}}}), Entry("MetadataOptions HTTPProtocolIPv6", "14697047633165484196", v1.EC2NodeClass{Spec: v1.EC2NodeClassSpec{MetadataOptions: &v1.MetadataOptions{HTTPProtocolIPv6: lo.ToPtr("enabled")}}}), Entry("MetadataOptions HTTPPutResponseHopLimit", "2086799014304536137", v1.EC2NodeClass{Spec: v1.EC2NodeClassSpec{MetadataOptions: &v1.MetadataOptions{HTTPPutResponseHopLimit: lo.ToPtr(int64(10))}}}), @@ -138,6 +141,9 @@ var _ = Describe("Hash", func() { Entry("DetailedMonitoring", v1.EC2NodeClass{Spec: v1.EC2NodeClassSpec{DetailedMonitoring: aws.Bool(true)}}), Entry("InstanceStorePolicy", v1.EC2NodeClass{Spec: v1.EC2NodeClassSpec{InstanceStorePolicy: lo.ToPtr(v1.InstanceStorePolicyRAID0)}}), Entry("AssociatePublicIPAddress", v1.EC2NodeClass{Spec: v1.EC2NodeClassSpec{AssociatePublicIPAddress: lo.ToPtr(true)}}), + Entry("PlacementGroup Name", v1.EC2NodeClass{Spec: v1.EC2NodeClassSpec{PlacementGroup: &v1.PlacementGroup{Name: "analytics-cluster"}}}), + Entry("PlacementGroup ID", v1.EC2NodeClass{Spec: v1.EC2NodeClassSpec{PlacementGroup: &v1.PlacementGroup{ID: "pg-0123456789abcdef0"}}}), + Entry("PlacementGroup Partition", v1.EC2NodeClass{Spec: v1.EC2NodeClassSpec{PlacementGroup: &v1.PlacementGroup{Name: "analytics-cluster", Partition: lo.ToPtr(int32(1))}}}), Entry("MetadataOptions HTTPEndpoint", v1.EC2NodeClass{Spec: v1.EC2NodeClassSpec{MetadataOptions: &v1.MetadataOptions{HTTPEndpoint: lo.ToPtr("enabled")}}}), Entry("MetadataOptions HTTPProtocolIPv6", v1.EC2NodeClass{Spec: v1.EC2NodeClassSpec{MetadataOptions: &v1.MetadataOptions{HTTPProtocolIPv6: lo.ToPtr("enabled")}}}), Entry("MetadataOptions HTTPPutResponseHopLimit", v1.EC2NodeClass{Spec: v1.EC2NodeClassSpec{MetadataOptions: &v1.MetadataOptions{HTTPPutResponseHopLimit: lo.ToPtr(int64(10))}}}), diff --git a/pkg/apis/v1/ec2nodeclass_status.go b/pkg/apis/v1/ec2nodeclass_status.go index ed83655df4ee..2f075ae68f80 100644 --- a/pkg/apis/v1/ec2nodeclass_status.go +++ b/pkg/apis/v1/ec2nodeclass_status.go @@ -35,6 +35,7 @@ const ( ConditionTypeSubnetsReady = "SubnetsReady" ConditionTypeSecurityGroupsReady = "SecurityGroupsReady" ConditionTypeAMIsReady = "AMIsReady" + ConditionTypePlacementGroupReady = "PlacementGroupReady" ConditionTypeInstanceProfileReady = "InstanceProfileReady" ConditionTypeCapacityReservationsReady = "CapacityReservationsReady" ConditionTypeValidationSucceeded = "ValidationSucceeded" @@ -136,6 +137,31 @@ const ( CapacityReservationStateExpiring CapacityReservationState = "expiring" ) +// PlacementGroupStatus contains the resolved placement group configuration utilized for node launch. +type PlacementGroupStatus struct { + // ID of the placement group. + // +kubebuilder:validation:Pattern:="^pg-[0-9a-z]+$" + // +required + ID string `json:"id"` + // Name of the placement group. + // +required + Name string `json:"name"` + // Strategy of the placement group. + // +kubebuilder:validation:Enum:={cluster,spread,partition} + // +required + Strategy string `json:"strategy"` + // PartitionCount is the number of partitions configured on the placement group. + // +optional + PartitionCount *int32 `json:"partitionCount,omitempty"` + // SpreadLevel determines how instances are spread when the placement group strategy is spread. + // +kubebuilder:validation:Enum:={host,rack} + // +optional + SpreadLevel string `json:"spreadLevel,omitempty"` + // State of the placement group. + // +optional + State string `json:"state,omitempty"` +} + // EC2NodeClassStatus contains the resolved state of the EC2NodeClass type EC2NodeClassStatus struct { // Subnets contains the current subnet values that are available to the @@ -146,6 +172,9 @@ type EC2NodeClassStatus struct { // cluster under the SecurityGroups selectors. // +optional SecurityGroups []SecurityGroup `json:"securityGroups,omitempty"` + // PlacementGroup contains the current placement group that is available to this NodeClass under the placementGroup reference. + // +optional + PlacementGroup *PlacementGroupStatus `json:"placementGroup,omitempty"` // CapacityReservations contains the current capacity reservation values that are available to this NodeClass under the // CapacityReservation selectors. // +optional @@ -167,6 +196,7 @@ func (in *EC2NodeClass) StatusConditions() status.ConditionSet { ConditionTypeAMIsReady, ConditionTypeSubnetsReady, ConditionTypeSecurityGroupsReady, + ConditionTypePlacementGroupReady, ConditionTypeInstanceProfileReady, ConditionTypeValidationSucceeded, } @@ -192,6 +222,20 @@ func (in *EC2NodeClass) CapacityReservations() []CapacityReservation { return in.Status.CapacityReservations } +func PlacementGroupStatusFromEC2(pg *ec2types.PlacementGroup) *PlacementGroupStatus { + if pg == nil { + return nil + } + return &PlacementGroupStatus{ + ID: lo.FromPtr(pg.GroupId), + Name: lo.FromPtr(pg.GroupName), + Strategy: string(pg.Strategy), + PartitionCount: pg.PartitionCount, + SpreadLevel: string(pg.SpreadLevel), + State: string(pg.State), + } +} + type ZoneInfo struct { Zone string ZoneID string diff --git a/pkg/apis/v1/ec2nodeclass_validation_cel_test.go b/pkg/apis/v1/ec2nodeclass_validation_cel_test.go index dd6ee525fc3b..1173a204dafd 100644 --- a/pkg/apis/v1/ec2nodeclass_validation_cel_test.go +++ b/pkg/apis/v1/ec2nodeclass_validation_cel_test.go @@ -208,6 +208,24 @@ var _ = Describe("CEL/Validation", func() { Expect(env.Client.Create(ctx, nc)).To(Not(Succeed())) }) }) + Context("PlacementGroup", func() { + It("should succeed with a placement group name", func() { + nc.Spec.PlacementGroup = &v1.PlacementGroup{Name: "analytics-cluster"} + Expect(env.Client.Create(ctx, nc)).To(Succeed()) + }) + It("should succeed with a placement group id", func() { + nc.Spec.PlacementGroup = &v1.PlacementGroup{ID: "pg-0123456789abcdef0"} + Expect(env.Client.Create(ctx, nc)).To(Succeed()) + }) + It("should fail when both name and id are set", func() { + nc.Spec.PlacementGroup = &v1.PlacementGroup{Name: "analytics-cluster", ID: "pg-0123456789abcdef0"} + Expect(env.Client.Create(ctx, nc)).ToNot(Succeed()) + }) + It("should fail when neither name nor id are set", func() { + nc.Spec.PlacementGroup = &v1.PlacementGroup{} + Expect(env.Client.Create(ctx, nc)).ToNot(Succeed()) + }) + }) Context("SubnetSelectorTerms", func() { It("should succeed with a valid subnet selector on tags", func() { nc.Spec.SubnetSelectorTerms = []v1.SubnetSelectorTerm{ diff --git a/pkg/apis/v1/zz_generated.deepcopy.go b/pkg/apis/v1/zz_generated.deepcopy.go index 6b283ed4ca03..2b3cb6e36db2 100644 --- a/pkg/apis/v1/zz_generated.deepcopy.go +++ b/pkg/apis/v1/zz_generated.deepcopy.go @@ -293,6 +293,11 @@ func (in *EC2NodeClassSpec) DeepCopyInto(out *EC2NodeClassSpec) { (*in)[i].DeepCopyInto(&(*out)[i]) } } + if in.PlacementGroup != nil { + in, out := &in.PlacementGroup, &out.PlacementGroup + *out = new(PlacementGroup) + (*in).DeepCopyInto(*out) + } if in.AssociatePublicIPAddress != nil { in, out := &in.AssociatePublicIPAddress, &out.AssociatePublicIPAddress *out = new(bool) @@ -393,6 +398,11 @@ func (in *EC2NodeClassStatus) DeepCopyInto(out *EC2NodeClassStatus) { *out = make([]SecurityGroup, len(*in)) copy(*out, *in) } + if in.PlacementGroup != nil { + in, out := &in.PlacementGroup, &out.PlacementGroup + *out = new(PlacementGroupStatus) + (*in).DeepCopyInto(*out) + } if in.CapacityReservations != nil { in, out := &in.CapacityReservations, &out.CapacityReservations *out = make([]CapacityReservation, len(*in)) @@ -546,6 +556,46 @@ func (in *MetadataOptions) DeepCopy() *MetadataOptions { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *PlacementGroup) DeepCopyInto(out *PlacementGroup) { + *out = *in + if in.Partition != nil { + in, out := &in.Partition, &out.Partition + *out = new(int32) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PlacementGroup. +func (in *PlacementGroup) DeepCopy() *PlacementGroup { + if in == nil { + return nil + } + out := new(PlacementGroup) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *PlacementGroupStatus) DeepCopyInto(out *PlacementGroupStatus) { + *out = *in + if in.PartitionCount != nil { + in, out := &in.PartitionCount, &out.PartitionCount + *out = new(int32) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PlacementGroupStatus. +func (in *PlacementGroupStatus) DeepCopy() *PlacementGroupStatus { + if in == nil { + return nil + } + out := new(PlacementGroupStatus) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *SecurityGroup) DeepCopyInto(out *SecurityGroup) { *out = *in diff --git a/pkg/aws/sdk.go b/pkg/aws/sdk.go index a594c9e53646..78eabd7e0b39 100644 --- a/pkg/aws/sdk.go +++ b/pkg/aws/sdk.go @@ -30,6 +30,7 @@ type EC2API interface { DescribeCapacityReservations(context.Context, *ec2.DescribeCapacityReservationsInput, ...func(*ec2.Options)) (*ec2.DescribeCapacityReservationsOutput, error) DescribeImages(context.Context, *ec2.DescribeImagesInput, ...func(*ec2.Options)) (*ec2.DescribeImagesOutput, error) DescribeLaunchTemplates(context.Context, *ec2.DescribeLaunchTemplatesInput, ...func(*ec2.Options)) (*ec2.DescribeLaunchTemplatesOutput, error) + DescribePlacementGroups(context.Context, *ec2.DescribePlacementGroupsInput, ...func(*ec2.Options)) (*ec2.DescribePlacementGroupsOutput, error) DescribeSubnets(context.Context, *ec2.DescribeSubnetsInput, ...func(*ec2.Options)) (*ec2.DescribeSubnetsOutput, error) DescribeSecurityGroups(context.Context, *ec2.DescribeSecurityGroupsInput, ...func(*ec2.Options)) (*ec2.DescribeSecurityGroupsOutput, error) DescribeInstanceTypes(context.Context, *ec2.DescribeInstanceTypesInput, ...func(*ec2.Options)) (*ec2.DescribeInstanceTypesOutput, error) diff --git a/pkg/cloudprovider/suite_test.go b/pkg/cloudprovider/suite_test.go index 4493086ab4fe..41ef7279ac96 100644 --- a/pkg/cloudprovider/suite_test.go +++ b/pkg/cloudprovider/suite_test.go @@ -1232,7 +1232,7 @@ var _ = Describe("CloudProvider", func() { {SubnetId: aws.String("test-subnet-2"), AvailabilityZone: aws.String("test-zone-1a"), AvailabilityZoneId: aws.String("tstz1-1a"), AvailableIpAddressCount: aws.Int32(100), Tags: []ec2types.Tag{{Key: aws.String("Name"), Value: aws.String("test-subnet-2")}}}, }}) - controller := nodeclass.NewController(awsEnv.Clock, env.Client, cloudProvider, recorder, fake.DefaultRegion, awsEnv.SubnetProvider, awsEnv.SecurityGroupProvider, awsEnv.AMIProvider, awsEnv.InstanceProfileProvider, awsEnv.InstanceTypesProvider, awsEnv.LaunchTemplateProvider, awsEnv.CapacityReservationProvider, awsEnv.EC2API, awsEnv.ValidationCache, awsEnv.RecreationCache, awsEnv.AMIResolver, options.FromContext(ctx).DisableDryRun) + controller := nodeclass.NewController(awsEnv.Clock, env.Client, cloudProvider, recorder, fake.DefaultRegion, awsEnv.SubnetProvider, awsEnv.SecurityGroupProvider, awsEnv.PlacementGroupProvider, awsEnv.AMIProvider, awsEnv.InstanceProfileProvider, awsEnv.InstanceTypesProvider, awsEnv.LaunchTemplateProvider, awsEnv.CapacityReservationProvider, awsEnv.EC2API, awsEnv.ValidationCache, awsEnv.RecreationCache, awsEnv.AMIResolver, options.FromContext(ctx).DisableDryRun) ExpectApplied(ctx, env.Client, nodePool, nodeClass) ExpectObjectReconciled(ctx, env.Client, controller, nodeClass) pod := coretest.UnschedulablePod(coretest.PodOptions{NodeSelector: map[string]string{corev1.LabelTopologyZone: "test-zone-1a"}}) @@ -1249,7 +1249,7 @@ var _ = Describe("CloudProvider", func() { {SubnetId: aws.String("test-subnet-2"), AvailabilityZone: aws.String("test-zone-1a"), AvailabilityZoneId: aws.String("tstz1-1a"), AvailableIpAddressCount: aws.Int32(11), Tags: []ec2types.Tag{{Key: aws.String("Name"), Value: aws.String("test-subnet-2")}}}, }}) - controller := nodeclass.NewController(awsEnv.Clock, env.Client, cloudProvider, recorder, fake.DefaultRegion, awsEnv.SubnetProvider, awsEnv.SecurityGroupProvider, awsEnv.AMIProvider, awsEnv.InstanceProfileProvider, awsEnv.InstanceTypesProvider, awsEnv.LaunchTemplateProvider, awsEnv.CapacityReservationProvider, awsEnv.EC2API, awsEnv.ValidationCache, awsEnv.RecreationCache, awsEnv.AMIResolver, options.FromContext(ctx).DisableDryRun) + controller := nodeclass.NewController(awsEnv.Clock, env.Client, cloudProvider, recorder, fake.DefaultRegion, awsEnv.SubnetProvider, awsEnv.SecurityGroupProvider, awsEnv.PlacementGroupProvider, awsEnv.AMIProvider, awsEnv.InstanceProfileProvider, awsEnv.InstanceTypesProvider, awsEnv.LaunchTemplateProvider, awsEnv.CapacityReservationProvider, awsEnv.EC2API, awsEnv.ValidationCache, awsEnv.RecreationCache, awsEnv.AMIResolver, options.FromContext(ctx).DisableDryRun) nodeClass.Spec.Kubelet = &v1.KubeletConfiguration{ MaxPods: aws.Int32(1), } @@ -1298,7 +1298,7 @@ var _ = Describe("CloudProvider", func() { }) nodeClass.Spec.SubnetSelectorTerms = []v1.SubnetSelectorTerm{{Tags: map[string]string{"Name": "test-subnet-1"}}} ExpectApplied(ctx, env.Client, nodePool, nodeClass) - controller := nodeclass.NewController(awsEnv.Clock, env.Client, cloudProvider, recorder, fake.DefaultRegion, awsEnv.SubnetProvider, awsEnv.SecurityGroupProvider, awsEnv.AMIProvider, awsEnv.InstanceProfileProvider, awsEnv.InstanceTypesProvider, awsEnv.LaunchTemplateProvider, awsEnv.CapacityReservationProvider, awsEnv.EC2API, awsEnv.ValidationCache, awsEnv.RecreationCache, awsEnv.AMIResolver, options.FromContext(ctx).DisableDryRun) + controller := nodeclass.NewController(awsEnv.Clock, env.Client, cloudProvider, recorder, fake.DefaultRegion, awsEnv.SubnetProvider, awsEnv.SecurityGroupProvider, awsEnv.PlacementGroupProvider, awsEnv.AMIProvider, awsEnv.InstanceProfileProvider, awsEnv.InstanceTypesProvider, awsEnv.LaunchTemplateProvider, awsEnv.CapacityReservationProvider, awsEnv.EC2API, awsEnv.ValidationCache, awsEnv.RecreationCache, awsEnv.AMIResolver, options.FromContext(ctx).DisableDryRun) ExpectObjectReconciled(ctx, env.Client, controller, nodeClass) podSubnet1 := coretest.UnschedulablePod() ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, prov, podSubnet1) diff --git a/pkg/controllers/controllers.go b/pkg/controllers/controllers.go index 16c1eeee5db1..33342e3c96d3 100644 --- a/pkg/controllers/controllers.go +++ b/pkg/controllers/controllers.go @@ -57,6 +57,7 @@ import ( "github.com/aws/karpenter-provider-aws/pkg/providers/instance" "github.com/aws/karpenter-provider-aws/pkg/providers/instanceprofile" "github.com/aws/karpenter-provider-aws/pkg/providers/instancetype" + "github.com/aws/karpenter-provider-aws/pkg/providers/placementgroup" "github.com/aws/karpenter-provider-aws/pkg/providers/pricing" "github.com/aws/karpenter-provider-aws/pkg/providers/securitygroup" "github.com/aws/karpenter-provider-aws/pkg/providers/sqs" @@ -78,6 +79,7 @@ func NewControllers( cloudProvider cloudprovider.CloudProvider, subnetProvider subnet.Provider, securityGroupProvider securitygroup.Provider, + placementGroupProvider placementgroup.Provider, instanceProfileProvider instanceprofile.Provider, instanceProvider instance.Provider, pricingProvider pricing.Provider, @@ -90,7 +92,7 @@ func NewControllers( ) []controller.Controller { controllers := []controller.Controller{ nodeclasshash.NewController(kubeClient), - nodeclass.NewController(clk, kubeClient, cloudProvider, recorder, cfg.Region, subnetProvider, securityGroupProvider, amiProvider, instanceProfileProvider, instanceTypeProvider, launchTemplateProvider, capacityReservationProvider, ec2api, validationCache, recreationCache, amiResolver, options.FromContext(ctx).DisableDryRun), + nodeclass.NewController(clk, kubeClient, cloudProvider, recorder, cfg.Region, subnetProvider, securityGroupProvider, placementGroupProvider, amiProvider, instanceProfileProvider, instanceTypeProvider, launchTemplateProvider, capacityReservationProvider, ec2api, validationCache, recreationCache, amiResolver, options.FromContext(ctx).DisableDryRun), nodeclaimgarbagecollection.NewController(kubeClient, cloudProvider), nodeclaimtagging.NewController(kubeClient, cloudProvider, instanceProvider), controllerspricing.NewController(pricingProvider), diff --git a/pkg/controllers/nodeclass/controller.go b/pkg/controllers/nodeclass/controller.go index 8609e683f207..9cb48824b8d6 100644 --- a/pkg/controllers/nodeclass/controller.go +++ b/pkg/controllers/nodeclass/controller.go @@ -57,6 +57,7 @@ import ( "github.com/aws/karpenter-provider-aws/pkg/providers/instanceprofile" "github.com/aws/karpenter-provider-aws/pkg/providers/instancetype" "github.com/aws/karpenter-provider-aws/pkg/providers/launchtemplate" + "github.com/aws/karpenter-provider-aws/pkg/providers/placementgroup" "github.com/aws/karpenter-provider-aws/pkg/providers/securitygroup" "github.com/aws/karpenter-provider-aws/pkg/providers/subnet" ) @@ -79,6 +80,7 @@ func NewController( region string, subnetProvider subnet.Provider, securityGroupProvider securitygroup.Provider, + placementGroupProvider placementgroup.Provider, amiProvider amifamily.Provider, instanceProfileProvider instanceprofile.Provider, instanceTypeProvider instancetype.Provider, @@ -103,6 +105,7 @@ func NewController( NewCapacityReservationReconciler(clk, capacityReservationProvider), NewSubnetReconciler(subnetProvider), NewSecurityGroupReconciler(securityGroupProvider), + NewPlacementGroupReconciler(placementGroupProvider), NewInstanceProfileReconciler(instanceProfileProvider, region, recreationCache), validation, }, diff --git a/pkg/controllers/nodeclass/placementgroup.go b/pkg/controllers/nodeclass/placementgroup.go new file mode 100644 index 000000000000..a1be10f8b8e0 --- /dev/null +++ b/pkg/controllers/nodeclass/placementgroup.go @@ -0,0 +1,76 @@ +/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package nodeclass + +import ( + "context" + "fmt" + "time" + + ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + + v1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1" + "github.com/aws/karpenter-provider-aws/pkg/providers/placementgroup" +) + +type PlacementGroupReconciler struct { + provider placementgroup.Provider +} + +func NewPlacementGroupReconciler(provider placementgroup.Provider) *PlacementGroupReconciler { + return &PlacementGroupReconciler{ + provider: provider, + } +} + +func (p *PlacementGroupReconciler) Reconcile(ctx context.Context, nodeClass *v1.EC2NodeClass) (reconcile.Result, error) { + if nodeClass.Spec.PlacementGroup == nil { + nodeClass.Status.PlacementGroup = nil + nodeClass.StatusConditions().SetTrue(v1.ConditionTypePlacementGroupReady) + return reconcile.Result{RequeueAfter: 5 * time.Minute}, nil + } + + pg, err := p.provider.Get(ctx, nodeClass.Spec.PlacementGroup) + if err != nil { + return reconcile.Result{}, err + } + if pg == nil { + nodeClass.Status.PlacementGroup = nil + nodeClass.StatusConditions().SetFalse(v1.ConditionTypePlacementGroupReady, "PlacementGroupNotFound", "placementGroup did not match any PlacementGroup") + return reconcile.Result{RequeueAfter: time.Minute}, nil + } + if pg.State != ec2types.PlacementGroupStateAvailable { + nodeClass.Status.PlacementGroup = v1.PlacementGroupStatusFromEC2(pg) + nodeClass.StatusConditions().SetFalse(v1.ConditionTypePlacementGroupReady, "PlacementGroupNotAvailable", + fmt.Sprintf("placementGroup is in state %q, must be %q", pg.State, ec2types.PlacementGroupStateAvailable)) + return reconcile.Result{RequeueAfter: time.Minute}, nil + } + if nodeClass.Spec.PlacementGroup.Partition != nil && pg.Strategy != ec2types.PlacementStrategyPartition { + nodeClass.Status.PlacementGroup = v1.PlacementGroupStatusFromEC2(pg) + nodeClass.StatusConditions().SetFalse(v1.ConditionTypePlacementGroupReady, "PlacementGroupInvalid", "placementGroup.partition may only be set for partition placement groups") + return reconcile.Result{RequeueAfter: time.Minute}, nil + } + if nodeClass.Spec.PlacementGroup.Partition != nil && pg.PartitionCount != nil && *nodeClass.Spec.PlacementGroup.Partition > *pg.PartitionCount { + nodeClass.Status.PlacementGroup = v1.PlacementGroupStatusFromEC2(pg) + nodeClass.StatusConditions().SetFalse(v1.ConditionTypePlacementGroupReady, "PlacementGroupInvalid", + fmt.Sprintf("placementGroup.partition %d exceeds placement group partition count %d", *nodeClass.Spec.PlacementGroup.Partition, *pg.PartitionCount)) + return reconcile.Result{RequeueAfter: time.Minute}, nil + } + + nodeClass.Status.PlacementGroup = v1.PlacementGroupStatusFromEC2(pg) + nodeClass.StatusConditions().SetTrue(v1.ConditionTypePlacementGroupReady) + return reconcile.Result{RequeueAfter: 5 * time.Minute}, nil +} diff --git a/pkg/controllers/nodeclass/placementgroup_test.go b/pkg/controllers/nodeclass/placementgroup_test.go new file mode 100644 index 000000000000..8a2becfb9221 --- /dev/null +++ b/pkg/controllers/nodeclass/placementgroup_test.go @@ -0,0 +1,155 @@ +/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package nodeclass_test + +import ( + "github.com/aws/aws-sdk-go-v2/aws" + ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types" + "github.com/samber/lo" + + v1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1" + "github.com/aws/karpenter-provider-aws/pkg/test" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + . "sigs.k8s.io/karpenter/pkg/test/expectations" +) + +var _ = Describe("NodeClass Placement Group Status Controller", func() { + BeforeEach(func() { + nodeClass = test.EC2NodeClass(v1.EC2NodeClass{ + Spec: v1.EC2NodeClassSpec{ + PlacementGroup: &v1.PlacementGroup{Name: "analytics-cluster"}, + SubnetSelectorTerms: []v1.SubnetSelectorTerm{ + {Tags: map[string]string{"*": "*"}}, + }, + SecurityGroupSelectorTerms: []v1.SecurityGroupSelectorTerm{ + {Tags: map[string]string{"*": "*"}}, + }, + AMIFamily: lo.ToPtr(v1.AMIFamilyCustom), + AMISelectorTerms: []v1.AMISelectorTerm{ + {Tags: map[string]string{"*": "*"}}, + }, + }, + }) + awsEnv.EC2API.PlacementGroups.Store("analytics-cluster", ec2types.PlacementGroup{ + GroupId: aws.String("pg-0123456789abcdef0"), + GroupName: aws.String("analytics-cluster"), + State: ec2types.PlacementGroupStateAvailable, + Strategy: ec2types.PlacementStrategyCluster, + }) + awsEnv.EC2API.PlacementGroups.Store("partitioned-workload", ec2types.PlacementGroup{ + GroupId: aws.String("pg-0fedcba9876543210"), + GroupName: aws.String("partitioned-workload"), + State: ec2types.PlacementGroupStateAvailable, + Strategy: ec2types.PlacementStrategyPartition, + PartitionCount: lo.ToPtr(int32(3)), + }) + }) + + It("should resolve the configured placement group into status", func() { + ExpectApplied(ctx, env.Client, nodeClass) + ExpectObjectReconciled(ctx, env.Client, controller, nodeClass) + nodeClass = ExpectExists(ctx, env.Client, nodeClass) + Expect(nodeClass.Status.PlacementGroup).To(Equal(&v1.PlacementGroupStatus{ + ID: "pg-0123456789abcdef0", + Name: "analytics-cluster", + Strategy: "cluster", + State: "available", + })) + Expect(nodeClass.StatusConditions().Get(v1.ConditionTypePlacementGroupReady).IsTrue()).To(BeTrue()) + }) + + It("should set the placement group condition true when placement groups are not configured", func() { + nodeClass.Spec.PlacementGroup = nil + ExpectApplied(ctx, env.Client, nodeClass) + ExpectObjectReconciled(ctx, env.Client, controller, nodeClass) + nodeClass = ExpectExists(ctx, env.Client, nodeClass) + Expect(nodeClass.Status.PlacementGroup).To(BeNil()) + Expect(nodeClass.StatusConditions().Get(v1.ConditionTypePlacementGroupReady).IsTrue()).To(BeTrue()) + }) + + It("should set the placement group condition false when the placement group is missing", func() { + nodeClass.Spec.PlacementGroup = &v1.PlacementGroup{Name: "missing-placement-group"} + ExpectApplied(ctx, env.Client, nodeClass) + ExpectObjectReconciled(ctx, env.Client, controller, nodeClass) + nodeClass = ExpectExists(ctx, env.Client, nodeClass) + Expect(nodeClass.Status.PlacementGroup).To(BeNil()) + Expect(nodeClass.StatusConditions().Get(v1.ConditionTypePlacementGroupReady).IsFalse()).To(BeTrue()) + }) + + It("should set the placement group condition false when the placement group is not available", func() { + awsEnv.EC2API.PlacementGroups.Store("pending-group", ec2types.PlacementGroup{ + GroupId: aws.String("pg-pending123"), + GroupName: aws.String("pending-group"), + State: ec2types.PlacementGroupStatePending, + Strategy: ec2types.PlacementStrategyCluster, + }) + nodeClass.Spec.PlacementGroup = &v1.PlacementGroup{Name: "pending-group"} + ExpectApplied(ctx, env.Client, nodeClass) + ExpectObjectReconciled(ctx, env.Client, controller, nodeClass) + nodeClass = ExpectExists(ctx, env.Client, nodeClass) + Expect(nodeClass.StatusConditions().Get(v1.ConditionTypePlacementGroupReady).IsFalse()).To(BeTrue()) + Expect(nodeClass.StatusConditions().Get(v1.ConditionTypePlacementGroupReady).Reason).To(Equal("PlacementGroupNotAvailable")) + }) + + It("should set the placement group condition false when the placement group is deleting", func() { + awsEnv.EC2API.PlacementGroups.Store("deleting-group", ec2types.PlacementGroup{ + GroupId: aws.String("pg-deleting123"), + GroupName: aws.String("deleting-group"), + State: ec2types.PlacementGroupStateDeleting, + Strategy: ec2types.PlacementStrategyCluster, + }) + nodeClass.Spec.PlacementGroup = &v1.PlacementGroup{Name: "deleting-group"} + ExpectApplied(ctx, env.Client, nodeClass) + ExpectObjectReconciled(ctx, env.Client, controller, nodeClass) + nodeClass = ExpectExists(ctx, env.Client, nodeClass) + Expect(nodeClass.StatusConditions().Get(v1.ConditionTypePlacementGroupReady).IsFalse()).To(BeTrue()) + Expect(nodeClass.StatusConditions().Get(v1.ConditionTypePlacementGroupReady).Reason).To(Equal("PlacementGroupNotAvailable")) + }) + + It("should reject partition overrides for non-partition placement groups", func() { + nodeClass.Spec.PlacementGroup = &v1.PlacementGroup{Name: "analytics-cluster", Partition: lo.ToPtr(int32(1))} + ExpectApplied(ctx, env.Client, nodeClass) + ExpectObjectReconciled(ctx, env.Client, controller, nodeClass) + nodeClass = ExpectExists(ctx, env.Client, nodeClass) + Expect(nodeClass.StatusConditions().Get(v1.ConditionTypePlacementGroupReady).IsFalse()).To(BeTrue()) + Expect(nodeClass.StatusConditions().Get(v1.ConditionTypePlacementGroupReady).Reason).To(Equal("PlacementGroupInvalid")) + }) + + It("should reject partition number exceeding partition count", func() { + nodeClass.Spec.PlacementGroup = &v1.PlacementGroup{Name: "partitioned-workload", Partition: lo.ToPtr(int32(5))} + ExpectApplied(ctx, env.Client, nodeClass) + ExpectObjectReconciled(ctx, env.Client, controller, nodeClass) + nodeClass = ExpectExists(ctx, env.Client, nodeClass) + Expect(nodeClass.StatusConditions().Get(v1.ConditionTypePlacementGroupReady).IsFalse()).To(BeTrue()) + Expect(nodeClass.StatusConditions().Get(v1.ConditionTypePlacementGroupReady).Reason).To(Equal("PlacementGroupInvalid")) + }) + + It("should allow partition overrides for partition placement groups", func() { + nodeClass.Spec.PlacementGroup = &v1.PlacementGroup{Name: "partitioned-workload", Partition: lo.ToPtr(int32(1))} + ExpectApplied(ctx, env.Client, nodeClass) + ExpectObjectReconciled(ctx, env.Client, controller, nodeClass) + nodeClass = ExpectExists(ctx, env.Client, nodeClass) + Expect(nodeClass.Status.PlacementGroup).To(Equal(&v1.PlacementGroupStatus{ + ID: "pg-0fedcba9876543210", + Name: "partitioned-workload", + Strategy: "partition", + PartitionCount: lo.ToPtr(int32(3)), + State: "available", + })) + Expect(nodeClass.StatusConditions().Get(v1.ConditionTypePlacementGroupReady).IsTrue()).To(BeTrue()) + }) +}) diff --git a/pkg/controllers/nodeclass/suite_test.go b/pkg/controllers/nodeclass/suite_test.go index 64ccfe25fae4..6931cc1e0a4c 100644 --- a/pkg/controllers/nodeclass/suite_test.go +++ b/pkg/controllers/nodeclass/suite_test.go @@ -95,6 +95,7 @@ var _ = BeforeEach(func() { fake.DefaultRegion, awsEnv.SubnetProvider, awsEnv.SecurityGroupProvider, + awsEnv.PlacementGroupProvider, awsEnv.AMIProvider, awsEnv.InstanceProfileProvider, awsEnv.InstanceTypesProvider, diff --git a/pkg/controllers/nodeclass/validation.go b/pkg/controllers/nodeclass/validation.go index 68dea8f8080f..b7681d5c3ab7 100644 --- a/pkg/controllers/nodeclass/validation.go +++ b/pkg/controllers/nodeclass/validation.go @@ -123,7 +123,7 @@ func (v *Validation) Reconcile(ctx context.Context, nodeClass *v1.EC2NodeClass) nodeClass.StatusConditions().SetFalse( v1.ConditionTypeValidationSucceeded, ConditionReasonDependenciesNotReady, - "Awaiting AMI, Instance Profile, Security Group, and Subnet resolution", + "Awaiting AMI, Instance Profile, Placement Group, Security Group, and Subnet resolution", ) return reconcile.Result{RequeueAfter: requeueAfterTime}, nil } @@ -135,7 +135,7 @@ func (v *Validation) Reconcile(ctx context.Context, nodeClass *v1.EC2NodeClass) nodeClass.StatusConditions().SetUnknownWithReason( v1.ConditionTypeValidationSucceeded, ConditionReasonDependenciesNotReady, - "Awaiting AMI, Instance Profile, Security Group, and Subnet resolution", + "Awaiting AMI, Instance Profile, Placement Group, Security Group, and Subnet resolution", ) return reconcile.Result{RequeueAfter: requeueAfterTime}, nil } @@ -297,6 +297,7 @@ func (*Validation) requiredConditions() []string { return []string{ v1.ConditionTypeAMIsReady, v1.ConditionTypeInstanceProfileReady, + v1.ConditionTypePlacementGroupReady, v1.ConditionTypeSecurityGroupsReady, v1.ConditionTypeSubnetsReady, } @@ -306,6 +307,7 @@ func (*Validation) cacheKey(nodeClass *v1.EC2NodeClass, tags map[string]string) hash := lo.Must(hashstructure.Hash([]any{ nodeClass.Status.Subnets, nodeClass.Status.SecurityGroups, + nodeClass.Status.PlacementGroup, nodeClass.Status.AMIs, nodeClass.Status.InstanceProfile, nodeClass.Spec, diff --git a/pkg/controllers/nodeclass/validation_test.go b/pkg/controllers/nodeclass/validation_test.go index 5c81cf113cc8..367017f6c484 100644 --- a/pkg/controllers/nodeclass/validation_test.go +++ b/pkg/controllers/nodeclass/validation_test.go @@ -57,6 +57,7 @@ var _ = Describe("NodeClass Validation Status Controller", func() { for _, cond := range []string{ v1.ConditionTypeAMIsReady, v1.ConditionTypeInstanceProfileReady, + v1.ConditionTypePlacementGroupReady, v1.ConditionTypeSecurityGroupsReady, v1.ConditionTypeSubnetsReady, } { @@ -74,6 +75,7 @@ var _ = Describe("NodeClass Validation Status Controller", func() { }, Entry(v1.ConditionTypeAMIsReady, v1.ConditionTypeAMIsReady), Entry(v1.ConditionTypeInstanceProfileReady, v1.ConditionTypeInstanceProfileReady), + Entry(v1.ConditionTypePlacementGroupReady, v1.ConditionTypePlacementGroupReady), Entry(v1.ConditionTypeSecurityGroupsReady, v1.ConditionTypeSecurityGroupsReady), Entry(v1.ConditionTypeSubnetsReady, v1.ConditionTypeSubnetsReady), ) @@ -88,6 +90,7 @@ var _ = Describe("NodeClass Validation Status Controller", func() { }, Entry(v1.ConditionTypeAMIsReady, v1.ConditionTypeAMIsReady), Entry(v1.ConditionTypeInstanceProfileReady, v1.ConditionTypeInstanceProfileReady), + Entry(v1.ConditionTypePlacementGroupReady, v1.ConditionTypePlacementGroupReady), Entry(v1.ConditionTypeSecurityGroupsReady, v1.ConditionTypeSecurityGroupsReady), Entry(v1.ConditionTypeSubnetsReady, v1.ConditionTypeSubnetsReady), ) @@ -485,6 +488,7 @@ var _ = Describe("NodeClass Validation Status Controller", func() { fake.DefaultRegion, awsEnv.SubnetProvider, awsEnv.SecurityGroupProvider, + awsEnv.PlacementGroupProvider, awsEnv.AMIProvider, awsEnv.InstanceProfileProvider, awsEnv.InstanceTypesProvider, diff --git a/pkg/fake/ec2api.go b/pkg/fake/ec2api.go index 3f1e3b1b1d62..83bbb07e3878 100644 --- a/pkg/fake/ec2api.go +++ b/pkg/fake/ec2api.go @@ -69,6 +69,7 @@ type EC2Behavior struct { NextError AtomicError Subnets sync.Map + PlacementGroups sync.Map LaunchTemplates sync.Map launchTemplatesToCapacityReservations sync.Map // map[lt-name]cr-id } @@ -105,6 +106,10 @@ func (e *EC2API) Reset() { e.Subnets.Delete(k) return true }) + e.PlacementGroups.Range(func(k, v any) bool { + e.PlacementGroups.Delete(k) + return true + }) e.Instances.Range(func(k, v any) bool { e.Instances.Delete(k) return true @@ -453,6 +458,25 @@ func (e *EC2API) DescribeLaunchTemplates(_ context.Context, input *ec2.DescribeL return output, nil } +func (e *EC2API) DescribePlacementGroups(_ context.Context, input *ec2.DescribePlacementGroupsInput, _ ...func(*ec2.Options)) (*ec2.DescribePlacementGroupsOutput, error) { + if !e.NextError.IsNil() { + defer e.NextError.Reset() + return nil, e.NextError.Get() + } + output := &ec2.DescribePlacementGroupsOutput{} + e.PlacementGroups.Range(func(_, value any) bool { + placementGroup := value.(ec2types.PlacementGroup) + switch { + case len(input.GroupIds) != 0 && lo.Contains(input.GroupIds, lo.FromPtr(placementGroup.GroupId)): + output.PlacementGroups = append(output.PlacementGroups, placementGroup) + case len(input.GroupNames) != 0 && lo.Contains(input.GroupNames, lo.FromPtr(placementGroup.GroupName)): + output.PlacementGroups = append(output.PlacementGroups, placementGroup) + } + return true + }) + return output, nil +} + func (e *EC2API) DeleteLaunchTemplate(_ context.Context, input *ec2.DeleteLaunchTemplateInput, _ ...func(*ec2.Options)) (*ec2.DeleteLaunchTemplateOutput, error) { if !e.NextError.IsNil() { defer e.NextError.Reset() diff --git a/pkg/fake/utils.go b/pkg/fake/utils.go index ff92c4a242a7..2b314f630553 100644 --- a/pkg/fake/utils.go +++ b/pkg/fake/utils.go @@ -68,6 +68,10 @@ func LaunchTemplateID() string { return fmt.Sprint(randomdata.Alphanumeric(17)) } +func PlacementGroupID() string { + return fmt.Sprintf("pg-%s", strings.ToLower(randomdata.Alphanumeric(17))) +} + func PrivateDNSName() string { return fmt.Sprintf("ip-192-168-%d-%d.%s.compute.internal", randomdata.Number(0, 256), randomdata.Number(0, 256), DefaultRegion) } diff --git a/pkg/operator/operator.go b/pkg/operator/operator.go index 1ce87461f537..7d734bffafad 100644 --- a/pkg/operator/operator.go +++ b/pkg/operator/operator.go @@ -59,6 +59,7 @@ import ( "github.com/aws/karpenter-provider-aws/pkg/providers/instanceprofile" "github.com/aws/karpenter-provider-aws/pkg/providers/instancetype" "github.com/aws/karpenter-provider-aws/pkg/providers/launchtemplate" + "github.com/aws/karpenter-provider-aws/pkg/providers/placementgroup" "github.com/aws/karpenter-provider-aws/pkg/providers/pricing" "github.com/aws/karpenter-provider-aws/pkg/providers/securitygroup" ssmp "github.com/aws/karpenter-provider-aws/pkg/providers/ssm" @@ -81,6 +82,7 @@ type Operator struct { RecreationCache *cache.Cache SubnetProvider subnet.Provider SecurityGroupProvider securitygroup.Provider + PlacementGroupProvider placementgroup.Provider InstanceProfileProvider instanceprofile.Provider AMIProvider amifamily.Provider AMIResolver amifamily.Resolver @@ -147,6 +149,7 @@ func NewOperator(ctx context.Context, operator *operator.Operator) (context.Cont cfg.Region, options.FromContext(ctx).IsolatedVPC, ) + placementGroupProvider := placementgroup.NewDefaultProvider(ec2api, cache.New(awscache.DefaultTTL, awscache.DefaultCleanupInterval)) versionProvider := version.NewDefaultProvider(operator.KubernetesInterface, eksapi) // Ensure we're able to hydrate the version before starting any reliant controllers. // Version updates are hydrated asynchronously after this, in the event of a failure @@ -214,6 +217,7 @@ func NewOperator(ctx context.Context, operator *operator.Operator) (context.Cont RecreationCache: recreationCache, SubnetProvider: subnetProvider, SecurityGroupProvider: securityGroupProvider, + PlacementGroupProvider: placementGroupProvider, InstanceProfileProvider: instanceProfileProvider, AMIProvider: amiProvider, AMIResolver: amiResolver, diff --git a/pkg/providers/amifamily/resolver.go b/pkg/providers/amifamily/resolver.go index db34b5561adf..9b51db57c855 100644 --- a/pkg/providers/amifamily/resolver.go +++ b/pkg/providers/amifamily/resolver.go @@ -69,6 +69,8 @@ type Options struct { KubeDNSIP net.IP AssociatePublicIPAddress *bool IPPrefixCount *int32 + PlacementGroup *v1.PlacementGroupStatus + PlacementGroupPartition *int32 NodeClassName string } diff --git a/pkg/providers/launchtemplate/launchtemplate.go b/pkg/providers/launchtemplate/launchtemplate.go index 260dcb0e7f27..8ff85a0eb2b9 100644 --- a/pkg/providers/launchtemplate/launchtemplate.go +++ b/pkg/providers/launchtemplate/launchtemplate.go @@ -209,6 +209,8 @@ func (p *DefaultProvider) CreateAMIOptions(ctx context.Context, nodeClass *v1.EC KubeDNSIP: p.KubeDNSIP, AssociatePublicIPAddress: nodeClass.Spec.AssociatePublicIPAddress, IPPrefixCount: nodeClass.Spec.IPPrefixCount, + PlacementGroup: nodeClass.Status.PlacementGroup, + PlacementGroupPartition: placementGroupPartition(nodeClass), NodeClassName: nodeClass.Name, }, nil } @@ -451,6 +453,13 @@ func (p *DefaultProvider) ResolveClusterCIDR(ctx context.Context) error { return fmt.Errorf("no CIDR found in DescribeCluster response") } +func placementGroupPartition(nodeClass *v1.EC2NodeClass) *int32 { + if nodeClass.Spec.PlacementGroup != nil { + return nodeClass.Spec.PlacementGroup.Partition + } + return nil +} + // InjectDoNotSyncTaintsLabel adds a label for all non-custom AMI families. It is exported just for ease // of testing. // This label is to tell karpenter that it should *not* sync taints. This is to work around a race condition. diff --git a/pkg/providers/launchtemplate/suite_test.go b/pkg/providers/launchtemplate/suite_test.go index 99dd1ce99b11..c6cdcab37a5e 100644 --- a/pkg/providers/launchtemplate/suite_test.go +++ b/pkg/providers/launchtemplate/suite_test.go @@ -2225,7 +2225,7 @@ eviction-max-pod-grace-period = 10 } ExpectApplied(ctx, env.Client, nodeClass, nodePool) - controller := nodeclass.NewController(awsEnv.Clock, env.Client, cloudProvider, recorder, fake.DefaultRegion, awsEnv.SubnetProvider, awsEnv.SecurityGroupProvider, awsEnv.AMIProvider, awsEnv.InstanceProfileProvider, awsEnv.InstanceTypesProvider, awsEnv.LaunchTemplateProvider, awsEnv.CapacityReservationProvider, awsEnv.EC2API, awsEnv.ValidationCache, awsEnv.RecreationCache, awsEnv.AMIResolver, options.FromContext(ctx).DisableDryRun) + controller := nodeclass.NewController(awsEnv.Clock, env.Client, cloudProvider, recorder, fake.DefaultRegion, awsEnv.SubnetProvider, awsEnv.SecurityGroupProvider, awsEnv.PlacementGroupProvider, awsEnv.AMIProvider, awsEnv.InstanceProfileProvider, awsEnv.InstanceTypesProvider, awsEnv.LaunchTemplateProvider, awsEnv.CapacityReservationProvider, awsEnv.EC2API, awsEnv.ValidationCache, awsEnv.RecreationCache, awsEnv.AMIResolver, options.FromContext(ctx).DisableDryRun) ExpectObjectReconciled(ctx, env.Client, controller, nodeClass) pod := coretest.UnschedulablePod() @@ -2523,6 +2523,50 @@ eviction-max-pod-grace-period = 10 Entry("when dedicated specified", lo.ToPtr("dedicated"), ec2types.TenancyDedicated), ) }) + Context("PlacementGroup", func() { + It("should set placement group name on the launch template", func() { + nodeClass.Spec.PlacementGroup = &v1.PlacementGroup{Name: "analytics-cluster"} + nodeClass.Status.PlacementGroup = &v1.PlacementGroupStatus{ + Name: "analytics-cluster", + ID: "pg-0123456789abcdef0", + Strategy: "cluster", + State: "available", + } + ExpectApplied(ctx, env.Client, nodePool, nodeClass) + pod := coretest.UnschedulablePod() + ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, prov, pod) + ExpectScheduled(ctx, env.Client, pod) + Expect(awsEnv.EC2API.CreateLaunchTemplateBehavior.CalledWithInput.Len()).To(BeNumerically(">=", 1)) + awsEnv.EC2API.CreateLaunchTemplateBehavior.CalledWithInput.ForEach(func(ltInput *ec2.CreateLaunchTemplateInput) { + Expect(lo.FromPtr(ltInput.LaunchTemplateData.Placement.GroupName)).To(Equal("analytics-cluster")) + Expect(lo.FromPtr(ltInput.LaunchTemplateData.Placement.GroupId)).To(Equal("pg-0123456789abcdef0")) + Expect(ltInput.LaunchTemplateData.Placement.PartitionNumber).To(BeNil()) + }) + }) + It("should set placement group id and partition on the launch template", func() { + nodeClass.Spec.PlacementGroup = &v1.PlacementGroup{ + ID: "pg-0123456789abcdef0", + Partition: lo.ToPtr(int32(1)), + } + nodeClass.Status.PlacementGroup = &v1.PlacementGroupStatus{ + Name: "partitioned-workload", + ID: "pg-0123456789abcdef0", + Strategy: "partition", + PartitionCount: lo.ToPtr(int32(3)), + State: "available", + } + ExpectApplied(ctx, env.Client, nodePool, nodeClass) + pod := coretest.UnschedulablePod() + ExpectProvisioned(ctx, env.Client, cluster, cloudProvider, prov, pod) + ExpectScheduled(ctx, env.Client, pod) + Expect(awsEnv.EC2API.CreateLaunchTemplateBehavior.CalledWithInput.Len()).To(BeNumerically(">=", 1)) + awsEnv.EC2API.CreateLaunchTemplateBehavior.CalledWithInput.ForEach(func(ltInput *ec2.CreateLaunchTemplateInput) { + Expect(lo.FromPtr(ltInput.LaunchTemplateData.Placement.GroupId)).To(Equal("pg-0123456789abcdef0")) + Expect(lo.FromPtr(ltInput.LaunchTemplateData.Placement.GroupName)).To(Equal("partitioned-workload")) + Expect(lo.FromPtr(ltInput.LaunchTemplateData.Placement.PartitionNumber)).To(Equal(int32(1))) + }) + }) + }) It("should generate a unique launch template per capacity reservation", func() { crs := []ec2types.CapacityReservation{ { diff --git a/pkg/providers/launchtemplate/types.go b/pkg/providers/launchtemplate/types.go index 5c1688b418b0..d9b5e2bd6061 100644 --- a/pkg/providers/launchtemplate/types.go +++ b/pkg/providers/launchtemplate/types.go @@ -167,5 +167,10 @@ func (b *CreateLaunchTemplateInputBuilder) Build(ctx context.Context) *ec2.Creat } } } + if b.options.PlacementGroup != nil { + lt.LaunchTemplateData.Placement.GroupName = lo.EmptyableToPtr(b.options.PlacementGroup.Name) + lt.LaunchTemplateData.Placement.GroupId = lo.EmptyableToPtr(b.options.PlacementGroup.ID) + lt.LaunchTemplateData.Placement.PartitionNumber = b.options.PlacementGroupPartition + } return lt } diff --git a/pkg/providers/placementgroup/provider.go b/pkg/providers/placementgroup/provider.go new file mode 100644 index 000000000000..60b281dde10f --- /dev/null +++ b/pkg/providers/placementgroup/provider.go @@ -0,0 +1,85 @@ +/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package placementgroup + +import ( + "context" + "fmt" + "sync" + + "github.com/aws/aws-sdk-go-v2/service/ec2" + ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types" + "github.com/patrickmn/go-cache" + + v1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1" + sdk "github.com/aws/karpenter-provider-aws/pkg/aws" +) + +type Provider interface { + Get(context.Context, *v1.PlacementGroup) (*ec2types.PlacementGroup, error) +} + +type DefaultProvider struct { + sync.Mutex + ec2api sdk.EC2API + cache *cache.Cache +} + +func NewDefaultProvider(ec2api sdk.EC2API, cache *cache.Cache) *DefaultProvider { + return &DefaultProvider{ + ec2api: ec2api, + cache: cache, + } +} + +func (p *DefaultProvider) Get(ctx context.Context, placementGroup *v1.PlacementGroup) (*ec2types.PlacementGroup, error) { + if placementGroup == nil { + return nil, nil + } + + p.Lock() + defer p.Unlock() + + if cached, ok := p.cache.Get(cacheKey(placementGroup)); ok { + pg := cached.(ec2types.PlacementGroup) + return &pg, nil + } + + input := &ec2.DescribePlacementGroupsInput{} + if placementGroup.ID != "" { + input.GroupIds = []string{placementGroup.ID} + } else { + input.GroupNames = []string{placementGroup.Name} + } + out, err := p.ec2api.DescribePlacementGroups(ctx, input) + if err != nil { + return nil, fmt.Errorf("describing placement groups, %w", err) + } + if len(out.PlacementGroups) == 0 { + return nil, nil + } + if len(out.PlacementGroups) != 1 { + return nil, fmt.Errorf("expected one placement group, got %d", len(out.PlacementGroups)) + } + p.cache.SetDefault(cacheKey(placementGroup), out.PlacementGroups[0]) + return &out.PlacementGroups[0], nil +} + +func cacheKey(pg *v1.PlacementGroup) string { + if pg.ID != "" { + return "id:" + pg.ID + } + return "name:" + pg.Name +} diff --git a/pkg/test/environment.go b/pkg/test/environment.go index 5ff5f8bab1bd..1a867ff8ec46 100644 --- a/pkg/test/environment.go +++ b/pkg/test/environment.go @@ -37,6 +37,7 @@ import ( "github.com/aws/karpenter-provider-aws/pkg/providers/instanceprofile" "github.com/aws/karpenter-provider-aws/pkg/providers/instancetype" "github.com/aws/karpenter-provider-aws/pkg/providers/launchtemplate" + "github.com/aws/karpenter-provider-aws/pkg/providers/placementgroup" "github.com/aws/karpenter-provider-aws/pkg/providers/pricing" "github.com/aws/karpenter-provider-aws/pkg/providers/securitygroup" ssmp "github.com/aws/karpenter-provider-aws/pkg/providers/ssm" @@ -85,12 +86,14 @@ type Environment struct { DiscoveredCapacityCache *cache.Cache CapacityReservationCache *cache.Cache CapacityReservationAvailabilityCache *cache.Cache + PlacementGroupCache *cache.Cache ValidationCache *cache.Cache RecreationCache *cache.Cache ProtectedProfilesCache *cache.Cache // Providers CapacityReservationProvider *capacityreservation.DefaultProvider + PlacementGroupProvider *placementgroup.DefaultProvider InstanceTypesResolver *instancetype.DefaultResolver InstanceTypesProvider *instancetype.DefaultProvider InstanceProvider *instance.DefaultProvider @@ -135,6 +138,7 @@ func NewEnvironment(ctx context.Context, env *coretest.Environment) *Environment ssmCache := cache.New(awscache.DefaultTTL, awscache.DefaultCleanupInterval) capacityReservationCache := cache.New(awscache.DefaultTTL, awscache.DefaultCleanupInterval) capacityReservationAvailabilityCache := cache.New(24*time.Hour, awscache.DefaultCleanupInterval) + placementGroupCache := cache.New(awscache.DefaultTTL, awscache.DefaultCleanupInterval) validationCache := cache.New(awscache.DefaultTTL, awscache.DefaultCleanupInterval) recreationCache := cache.New(awscache.DefaultTTL, awscache.DefaultCleanupInterval) fakePricingAPI := &fake.PricingAPI{} @@ -142,6 +146,7 @@ func NewEnvironment(ctx context.Context, env *coretest.Environment) *Environment // Providers pricingProvider := pricing.NewDefaultProvider(fakePricingAPI, ec2api, fake.DefaultRegion, false) + placementGroupProvider := placementgroup.NewDefaultProvider(ec2api, placementGroupCache) subnetProvider := subnet.NewDefaultProvider(ec2api, subnetCache, availableIPAdressCache, associatePublicIPAddressCache) securityGroupProvider := securitygroup.NewDefaultProvider(ec2api, securityGroupCache) versionProvider := version.NewDefaultProvider(env.KubernetesInterface, eksapi) @@ -218,11 +223,13 @@ func NewEnvironment(ctx context.Context, env *coretest.Environment) *Environment DiscoveredCapacityCache: discoveredCapacityCache, CapacityReservationCache: capacityReservationCache, CapacityReservationAvailabilityCache: capacityReservationAvailabilityCache, + PlacementGroupCache: placementGroupCache, ValidationCache: validationCache, RecreationCache: recreationCache, ProtectedProfilesCache: protectedProfilesCache, CapacityReservationProvider: capacityReservationProvider, + PlacementGroupProvider: placementGroupProvider, InstanceTypesResolver: instanceTypesResolver, InstanceTypesProvider: instanceTypesProvider, InstanceProvider: instanceProvider, @@ -262,6 +269,7 @@ func (env *Environment) Reset() { env.SSMCache.Flush() env.DiscoveredCapacityCache.Flush() env.CapacityReservationCache.Flush() + env.PlacementGroupCache.Flush() env.ValidationCache.Flush() env.RecreationCache.Flush() env.ProtectedProfilesCache.Flush() diff --git a/website/content/en/preview/concepts/nodeclasses.md b/website/content/en/preview/concepts/nodeclasses.md index 0e8d0886dd96..570bc34c2161 100644 --- a/website/content/en/preview/concepts/nodeclasses.md +++ b/website/content/en/preview/concepts/nodeclasses.md @@ -115,6 +115,14 @@ spec: - id: cr-123 - instanceMatchCriteria: open + # Optional, launches nodes into an existing EC2 placement group. + # Specify `name` for a placement group in the same account, or `id` + # for a shared placement group. `partition` is only valid for partition + # placement groups. + placementGroup: + name: analytics-partition + partition: 2 + # Optional, propagates tags to underlying EC2 resources tags: team: team-a @@ -211,6 +219,13 @@ status: reservationType: default state: active + # Resolved placement group + placementGroup: + id: pg-0fc13f6eb3example + name: analytics-partition + strategy: partition + partitionCount: 5 + # Generated instance profile name from "role" instanceProfile: "${CLUSTER_NAME}-0123456778901234567789" conditions: @@ -223,6 +238,9 @@ status: - lastTransitionTime: "2024-02-02T19:54:34Z" status: "True" type: SecurityGroupsReady + - lastTransitionTime: "2024-02-02T19:54:34Z" + status: "True" + type: PlacementGroupReady - lastTransitionTime: "2024-02-02T19:54:34Z" status: "True" type: AMIsReady @@ -960,6 +978,37 @@ spec: key: foo ``` +## spec.placementGroup + +Use `spec.placementGroup` to launch nodes into an existing EC2 placement group. + +Karpenter does not create or delete the placement group. The placement group must already exist in EC2, and Karpenter only references it during instance launch. + +```yaml +spec: + placementGroup: + name: analytics-partition +``` + +Use `name` for placement groups in the same account. Use `id` when launching into a [shared placement group](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/placement-groups.html), because EC2 requires the placement-group ID for shared launches. + +`partition` is optional and only applies to partition placement groups: + +```yaml +spec: + placementGroup: + name: analytics-partition + partition: 2 +``` + +If `partition` is omitted, EC2 chooses the partition placement according to the placement-group strategy. If it is set, Karpenter will request that specific partition for launched instances. + +Placement-group behavior still follows AWS limits and semantics: + +- cluster placement groups are single-AZ, so combine them with subnet selection or NodePool zone requirements that keep launches in one Availability Zone +- spread and partition placement groups can span multiple Availability Zones, but their per-AZ limits still apply +- Karpenter does not manage placement-group strategy, partition count, or spread level; those are properties of the placement group you create in EC2 + ## spec.tags Karpenter adds tags to all resources it creates, including EC2 Instances, EBS volumes, and Launch Templates. The default set of tags are listed below.