Skip to content

Commit fb48fbc

Browse files
committed
chore: invalidate NodeClass on unsupported AMI alias
1 parent 1de6d8c commit fb48fbc

File tree

10 files changed

+85
-7
lines changed

10 files changed

+85
-7
lines changed

pkg/controllers/nodeclass/ami.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ func NewAMIReconciler(provider amifamily.Provider) *AMI {
4747
func (a *AMI) Reconcile(ctx context.Context, nodeClass *v1.EC2NodeClass) (reconcile.Result, error) {
4848
amis, err := a.amiProvider.List(ctx, nodeClass)
4949
if err != nil {
50-
if amifamily.IsAl2DeprecationError(err) || amifamily.IsWS2025UnsupportedVersionError(err) {
50+
if amifamily.IsAl2DeprecationError(err) || amifamily.IsWS2025UnsupportedVersionError(err) || amifamily.IsAMINotFoundForAliasError(err) {
5151
nodeClass.StatusConditions().SetFalse(v1.ConditionTypeAMIsReady, "UnsupportedAlias", err.Error())
5252
return reconcile.Result{}, reconcile.TerminalError(fmt.Errorf("getting amis, %w", err))
5353
}

pkg/controllers/nodeclass/ami_test.go

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -618,6 +618,36 @@ var _ = Describe("NodeClass AMI Status Controller", func() {
618618
nodeClass = ExpectExists(ctx, env.Client, nodeClass)
619619
Expect(nodeClass.StatusConditions().IsTrue(v1.ConditionTypeAMIsReady)).To(BeFalse())
620620
})
621+
DescribeTable("should set UnsupportedAlias condition when no AMIs are found for alias", func(alias string) {
622+
awsEnv.SSMAPI.Parameters = map[string]string{"not-real": "not-real"}
623+
624+
nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{Alias: alias}}
625+
ExpectApplied(ctx, env.Client, nodeClass)
626+
_ = ExpectObjectReconcileFailed(ctx, env.Client, controller, nodeClass)
627+
nodeClass = ExpectExists(ctx, env.Client, nodeClass)
628+
629+
Expect(nodeClass.StatusConditions().Get(v1.ConditionTypeAMIsReady).IsFalse()).To(BeTrue())
630+
Expect(nodeClass.StatusConditions().Get(v1.ConditionTypeAMIsReady).Reason).To(Equal("UnsupportedAlias"))
631+
Expect(nodeClass.StatusConditions().Get(v1.ConditionTypeAMIsReady).Message).To(ContainSubstring("failed to discover any AMIs for alias"))
632+
},
633+
Entry("AL2023 with invalid version", "al2023@v20240101"),
634+
Entry("Bottlerocket with invalid version", "bottlerocket@v20240101"),
635+
Entry("Windows with invalid version", "windows2022@latest"),
636+
)
637+
DescribeTable("should retry when SSM returns transient errors", func(alias string) {
638+
awsEnv.SSMAPI.WantErr = fmt.Errorf("RequestLimitExceeded: Rate exceeded")
639+
640+
nodeClass.Spec.AMISelectorTerms = []v1.AMISelectorTerm{{Alias: alias}}
641+
ExpectApplied(ctx, env.Client, nodeClass)
642+
_ = ExpectObjectReconcileFailed(ctx, env.Client, controller, nodeClass)
643+
nodeClass = ExpectExists(ctx, env.Client, nodeClass)
644+
645+
Expect(nodeClass.StatusConditions().Get(v1.ConditionTypeAMIsReady).Reason).ToNot(Equal("UnsupportedAlias"))
646+
},
647+
Entry("AL2023 with invalid version", "al2023@v20240101"),
648+
Entry("Bottlerocket with invalid version", "bottlerocket@v20240101"),
649+
Entry("Windows with invalid version", "windows2022@latest"),
650+
)
621651
Context("NodeClass AMI Status", func() {
622652
BeforeEach(func() {
623653
// Set time using the injectable/fake clock to now

pkg/errors/errors.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ var (
4444
"QueueDoesNotExist",
4545
"NoSuchEntity",
4646
"ParameterNotFound",
47+
"ParameterVersionNotFound",
4748
)
4849
alreadyExistsErrorCodes = sets.New(
4950
"EntityAlreadyExists",

pkg/fake/ssmapi.go

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,11 @@ import (
1919
"fmt"
2020

2121
"github.com/Pallinder/go-randomdata"
22-
"github.com/awslabs/operatorpkg/serrors"
2322
"github.com/samber/lo"
2423

2524
"github.com/aws/aws-sdk-go-v2/service/ssm"
2625
ssmtypes "github.com/aws/aws-sdk-go-v2/service/ssm/types"
26+
"github.com/aws/smithy-go"
2727

2828
sdk "github.com/aws/karpenter-provider-aws/pkg/aws"
2929
)
@@ -54,7 +54,10 @@ func (a SSMAPI) GetParameter(_ context.Context, input *ssm.GetParameterInput, _
5454
if len(a.Parameters) != 0 {
5555
value, ok := a.Parameters[parameter]
5656
if !ok {
57-
return &ssm.GetParameterOutput{}, serrors.Wrap(fmt.Errorf("parameter not found"), "parameter", lo.FromPtr(input.Name))
57+
return &ssm.GetParameterOutput{}, &smithy.GenericAPIError{
58+
Code: "ParameterNotFound",
59+
Message: fmt.Sprintf("parameter %s not found", parameter),
60+
}
5861
}
5962
return &ssm.GetParameterOutput{
6063
Parameter: &ssmtypes.Parameter{

pkg/providers/amifamily/al2.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ import (
2828
"sigs.k8s.io/karpenter/pkg/scheduling"
2929

3030
v1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1"
31+
"github.com/aws/karpenter-provider-aws/pkg/errors"
3132

3233
"sigs.k8s.io/karpenter/pkg/cloudprovider"
3334

@@ -42,6 +43,7 @@ type AL2 struct {
4243

4344
func (a AL2) DescribeImageQuery(ctx context.Context, ssmProvider ssm.Provider, k8sVersion string, amiVersion string) (DescribeImageQuery, error) {
4445
ids := map[string][]Variant{}
46+
isAllNotFoundErrors := true
4547
for path, variants := range map[string][]Variant{
4648
fmt.Sprintf("/aws/service/eks/optimized-ami/%s/amazon-linux-2/%s/image_id", k8sVersion, lo.Ternary(
4749
amiVersion == v1.AliasVersionLatest,
@@ -64,12 +66,18 @@ func (a AL2) DescribeImageQuery(ctx context.Context, ssmProvider ssm.Provider, k
6466
IsMutable: amiVersion == v1.AliasVersionLatest,
6567
})
6668
if err != nil {
69+
isAllNotFoundErrors = isAllNotFoundErrors && errors.IsNotFound(err)
6770
continue
6871
}
6972
ids[imageID] = variants
7073
}
7174
// Failed to discover any AMIs, we should short circuit AMI discovery
7275
if len(ids) == 0 {
76+
if isAllNotFoundErrors {
77+
return DescribeImageQuery{}, &AMINotFoundForAliasError{
78+
error: serrors.Wrap(fmt.Errorf("failed to discover any AMIs for alias"), "alias", fmt.Sprintf("al2@%s", amiVersion)),
79+
}
80+
}
7381
return DescribeImageQuery{}, serrors.Wrap(fmt.Errorf("failed to discover any AMIs for alias"), "alias", fmt.Sprintf("al2@%s", amiVersion))
7482
}
7583

pkg/providers/amifamily/al2023.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ import (
2727
ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types"
2828

2929
v1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1"
30+
"github.com/aws/karpenter-provider-aws/pkg/errors"
3031
"github.com/aws/karpenter-provider-aws/pkg/providers/amifamily/bootstrap"
3132
"github.com/aws/karpenter-provider-aws/pkg/providers/ssm"
3233
)
@@ -38,6 +39,7 @@ type AL2023 struct {
3839

3940
func (a AL2023) DescribeImageQuery(ctx context.Context, ssmProvider ssm.Provider, k8sVersion string, amiVersion string) (DescribeImageQuery, error) {
4041
ids := map[string]Variant{}
42+
isAllNotFoundErrors := true
4143
for arch, variants := range map[string][]Variant{
4244
"x86_64": {VariantStandard, VariantNvidia, VariantNeuron},
4345
"arm64": {VariantStandard, VariantNvidia},
@@ -49,13 +51,19 @@ func (a AL2023) DescribeImageQuery(ctx context.Context, ssmProvider ssm.Provider
4951
IsMutable: amiVersion == v1.AliasVersionLatest,
5052
})
5153
if err != nil {
54+
isAllNotFoundErrors = isAllNotFoundErrors && errors.IsNotFound(err)
5255
continue
5356
}
5457
ids[imageID] = variant
5558
}
5659
}
5760
// Failed to discover any AMIs, we should short circuit AMI discovery
5861
if len(ids) == 0 {
62+
if isAllNotFoundErrors {
63+
return DescribeImageQuery{}, &AMINotFoundForAliasError{
64+
error: serrors.Wrap(fmt.Errorf("failed to discover any AMIs for alias"), "alias", fmt.Sprintf("al2023@%s", amiVersion)),
65+
}
66+
}
5967
return DescribeImageQuery{}, serrors.Wrap(fmt.Errorf("failed to discover any AMIs for alias"), "alias", fmt.Sprintf("al2023@%s", amiVersion))
6068
}
6169

pkg/providers/amifamily/ami.go

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -171,16 +171,18 @@ func (p *DefaultProvider) DescribeImageQueries(ctx context.Context, nodeClass *v
171171

172172
//nolint:gocyclo
173173
func (p *DefaultProvider) amis(ctx context.Context, nodeClass *v1.EC2NodeClass) (AMIs, error) {
174-
queries, err := p.DescribeImageQueries(ctx, nodeClass)
175-
if err != nil {
176-
return nil, fmt.Errorf("getting AMI queries, %w", err)
177-
}
178174
hash := utils.GetNodeClassHash(nodeClass)
179175
if images, ok := p.cache.Get(hash); ok {
180176
// Ensure what's returned from this function is a deep-copy of AMIs so alterations
181177
// to the data don't affect the original
182178
return append(AMIs{}, images.(AMIs)...), nil
183179
}
180+
181+
queries, err := p.DescribeImageQueries(ctx, nodeClass)
182+
if err != nil {
183+
return nil, fmt.Errorf("getting AMI queries, %w", err)
184+
}
185+
184186
images := map[uint64]AMI{}
185187
for _, query := range queries {
186188
paginator := ec2.NewDescribeImagesPaginator(p.ec2api, query.DescribeImagesInput())

pkg/providers/amifamily/bottlerocket.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import (
2323
"github.com/samber/lo"
2424

2525
v1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1"
26+
"github.com/aws/karpenter-provider-aws/pkg/errors"
2627
"github.com/aws/karpenter-provider-aws/pkg/providers/amifamily/bootstrap"
2728
"github.com/aws/karpenter-provider-aws/pkg/providers/ssm"
2829
"github.com/aws/karpenter-provider-aws/pkg/providers/version"
@@ -45,6 +46,7 @@ func (b Bottlerocket) DescribeImageQuery(ctx context.Context, ssmProvider ssm.Pr
4546
// Bottlerocket AMIs versions are prefixed with a v on GitHub, but not in the SSM path. We should accept both.
4647
trimmedAMIVersion := strings.TrimLeft(amiVersion, "v")
4748
ids := map[string][]Variant{}
49+
isAllNotFoundErrors := true
4850
for path, variants := range map[string][]Variant{
4951
fmt.Sprintf("/aws/service/bottlerocket/aws-k8s-%s/x86_64/%s/image_id", k8sVersion, trimmedAMIVersion): {VariantStandard, VariantNeuron},
5052
fmt.Sprintf("/aws/service/bottlerocket/aws-k8s-%s/arm64/%s/image_id", k8sVersion, trimmedAMIVersion): {VariantStandard},
@@ -56,12 +58,18 @@ func (b Bottlerocket) DescribeImageQuery(ctx context.Context, ssmProvider ssm.Pr
5658
IsMutable: amiVersion == v1.AliasVersionLatest,
5759
})
5860
if err != nil {
61+
isAllNotFoundErrors = isAllNotFoundErrors && errors.IsNotFound(err)
5962
continue
6063
}
6164
ids[imageID] = variants
6265
}
6366
// Failed to discover any AMIs, we should short circuit AMI discovery
6467
if len(ids) == 0 {
68+
if isAllNotFoundErrors {
69+
return DescribeImageQuery{}, &AMINotFoundForAliasError{
70+
error: serrors.Wrap(fmt.Errorf(`failed to discover any AMIs for alias`), "alias", fmt.Sprintf("bottlerocket@%s", amiVersion)),
71+
}
72+
}
6573
return DescribeImageQuery{}, serrors.Wrap(fmt.Errorf(`failed to discover any AMIs for alias`), "alias", fmt.Sprintf("bottlerocket@%s", amiVersion))
6674
}
6775

pkg/providers/amifamily/types.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,3 +153,15 @@ func IsWS2025UnsupportedVersionError(err error) bool {
153153
var ws2025Err *WS2025UnsupportedVersionError
154154
return errors.As(err, &ws2025Err)
155155
}
156+
157+
type AMINotFoundForAliasError struct {
158+
error
159+
}
160+
161+
func IsAMINotFoundForAliasError(err error) bool {
162+
if err == nil {
163+
return false
164+
}
165+
var notFoundErr *AMINotFoundForAliasError
166+
return errors.As(err, &notFoundErr)
167+
}

pkg/providers/amifamily/windows.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222
"sigs.k8s.io/karpenter/pkg/scheduling"
2323

2424
v1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1"
25+
"github.com/aws/karpenter-provider-aws/pkg/errors"
2526

2627
"github.com/samber/lo"
2728
"k8s.io/apimachinery/pkg/api/resource"
@@ -53,6 +54,11 @@ func (w Windows) DescribeImageQuery(ctx context.Context, ssmProvider ssm.Provide
5354
IsMutable: true,
5455
})
5556
if err != nil {
57+
if errors.IsNotFound(err) {
58+
return DescribeImageQuery{}, &AMINotFoundForAliasError{
59+
error: serrors.Wrap(fmt.Errorf("failed to discover any AMIs for alias"), "alias", fmt.Sprintf("windows%s@%s", w.Version, amiVersion)),
60+
}
61+
}
5662
return DescribeImageQuery{}, serrors.Wrap(fmt.Errorf("failed to discover any AMIs for alias"), "alias", fmt.Sprintf("windows%s@%s", w.Version, amiVersion))
5763
}
5864
return DescribeImageQuery{

0 commit comments

Comments
 (0)