Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion kwok/operator/operator.go
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ func NewOperator(ctx context.Context, operator *operator.Operator) (context.Cont
// the previously resolved value will be used.
lo.Must0(versionProvider.UpdateVersion(ctx))
ssmProvider := ssmp.NewDefaultProvider(ssm.NewFromConfig(cfg), ssmCache)
amiProvider := amifamily.NewDefaultProvider(operator.Clock, versionProvider, ssmProvider, ec2api, cache.New(awscache.DefaultTTL, awscache.DefaultCleanupInterval))
amiProvider := amifamily.NewDefaultProvider(operator.Clock, versionProvider, ssmProvider, ec2api, cache.New(options.FromContext(ctx).AMICacheTTL, awscache.DefaultCleanupInterval))
placementGroupProvider := placementgroup.NewProvider(
ec2api,
cache.New(awscache.DefaultTTL, awscache.DefaultCleanupInterval),
Expand Down
5 changes: 5 additions & 0 deletions pkg/cache/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,11 @@ const (
// AWS APIs, which can have a serious impact on performance and scalability.
// DO NOT CHANGE THIS VALUE WITHOUT DUE CONSIDERATION
DefaultTTL = time.Minute
// AMICacheTTL is the default TTL for cached AMI discovery results. Operators
// can override this at runtime via the --ami-cache-ttl flag. Setting the cache
// TTL >= the requeue interval ensures scheduled reconciles are served from cache
// rather than re-querying the EC2 API on every reconcile.
AMICacheTTL = time.Minute
// PlacementGroupAvailabilityTTL is the TTL for resolved placement group data.
PlacementGroupAvailabilityTTL = 24 * time.Hour
// UnavailableOfferingsTTL is the time before offerings that were marked as unavailable
Expand Down
2 changes: 1 addition & 1 deletion pkg/operator/operator.go
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ func NewOperator(ctx context.Context, operator *operator.Operator) (context.Cont
// the previously resolved value will be used.
lo.Must0(versionProvider.UpdateVersion(ctx))
ssmProvider := ssmp.NewDefaultProvider(ssm.NewFromConfig(cfg), ssmCache)
amiProvider := amifamily.NewDefaultProvider(operator.Clock, versionProvider, ssmProvider, ec2api, cache.New(awscache.DefaultTTL, awscache.DefaultCleanupInterval))
amiProvider := amifamily.NewDefaultProvider(operator.Clock, versionProvider, ssmProvider, ec2api, cache.New(options.FromContext(ctx).AMICacheTTL, awscache.DefaultCleanupInterval))
placementGroupProvider := placementgroup.NewProvider(
ec2api,
cache.New(awscache.DefaultTTL, awscache.DefaultCleanupInterval),
Expand Down
3 changes: 3 additions & 0 deletions pkg/operator/options/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"flag"
"fmt"
"os"
"time"

coreoptions "sigs.k8s.io/karpenter/pkg/operator/options"
"sigs.k8s.io/karpenter/pkg/utils/env"
Expand All @@ -43,6 +44,7 @@ type Options struct {
InterruptionQueue string
ReservedENIs int
DisableDryRun bool
AMICacheTTL time.Duration
}

func (o *Options) AddFlags(fs *coreoptions.FlagSet) {
Expand All @@ -55,6 +57,7 @@ func (o *Options) AddFlags(fs *coreoptions.FlagSet) {
fs.StringVar(&o.InterruptionQueue, "interruption-queue", env.WithDefaultString("INTERRUPTION_QUEUE", ""), "Interruption queue is the name of the SQS queue used for processing interruption events from EC2. Interruption handling is disabled if not specified. Enabling interruption handling may require additional permissions on the controller service account. Additional permissions are outlined in the docs.")
fs.IntVar(&o.ReservedENIs, "reserved-enis", env.WithDefaultInt("RESERVED_ENIS", 0), "Reserved ENIs are not included in the calculations for max-pods or kube-reserved. This is most often used in the VPC CNI custom networking setup https://docs.aws.amazon.com/eks/latest/userguide/cni-custom-network.html.")
fs.BoolVarWithEnv(&o.DisableDryRun, "disable-dry-run", "DISABLE_DRY_RUN", false, "If true, then disable dry run validation for EC2NodeClasses.")
fs.DurationVar(&o.AMICacheTTL, "ami-cache-ttl", env.WithDefaultDuration("AMI_CACHE_TTL", time.Minute), "TTL for cached AMI discovery results.")
}

func (o *Options) Parse(fs *coreoptions.FlagSet, args ...string) error {
Expand Down
9 changes: 9 additions & 0 deletions pkg/operator/options/options_validation.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ func (o *Options) Validate() error {
o.validateVMMemoryOverheadPercent(),
o.validateReservedENIs(),
o.validateRequiredFields(),
o.validateAMICacheTTL(),
)
}

Expand Down Expand Up @@ -64,3 +65,11 @@ func (o *Options) validateRequiredFields() error {
}
return nil
}

func (o *Options) validateAMICacheTTL() error {
if o.AMICacheTTL <= 0 {
return fmt.Errorf("ami-cache-ttl must be positive")
}
return nil
}

12 changes: 11 additions & 1 deletion pkg/operator/options/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"flag"
"os"
"testing"
"time"

"github.com/samber/lo"
coreoptions "sigs.k8s.io/karpenter/pkg/operator/options"
Expand Down Expand Up @@ -63,7 +64,8 @@ var _ = Describe("Options", func() {
"--vm-memory-overhead-percent", "0.1",
"--interruption-queue", "env-cluster",
"--reserved-enis", "10",
"--disable-dry-run")
"--disable-dry-run",
"--ami-cache-ttl", "15m")
Expect(err).ToNot(HaveOccurred())
expectOptionsEqual(opts, test.Options(test.OptionsFields{
ClusterCABundle: lo.ToPtr("env-bundle"),
Expand All @@ -74,6 +76,7 @@ var _ = Describe("Options", func() {
InterruptionQueue: lo.ToPtr("env-cluster"),
ReservedENIs: lo.ToPtr(10),
DisableDryRun: lo.ToPtr(true),
AMICacheTTL: lo.ToPtr(15 * time.Minute),
}))
})
It("should correctly fallback to env vars when CLI flags aren't set", func() {
Expand All @@ -85,6 +88,7 @@ var _ = Describe("Options", func() {
os.Setenv("INTERRUPTION_QUEUE", "env-cluster")
os.Setenv("RESERVED_ENIS", "10")
os.Setenv("DISABLE_DRY_RUN", "false")
os.Setenv("AMI_CACHE_TTL", "15m")

// Add flags after we set the environment variables so that the parsing logic correctly refers
// to the new environment variable values
Expand All @@ -100,6 +104,7 @@ var _ = Describe("Options", func() {
InterruptionQueue: lo.ToPtr("env-cluster"),
ReservedENIs: lo.ToPtr(10),
DisableDryRun: lo.ToPtr(false),
AMICacheTTL: lo.ToPtr(15 * time.Minute),
}))
})

Expand All @@ -123,6 +128,10 @@ var _ = Describe("Options", func() {
err := opts.Parse(fs, "--cluster-name", "test-cluster", "--reserved-enis", "-1")
Expect(err).To(HaveOccurred())
})
It("should fail when ami-cache-ttl is zero", func() {
err := opts.Parse(fs, "--cluster-name", "test-cluster", "--ami-cache-ttl", "0")
Expect(err).To(HaveOccurred())
})
})
})

Expand All @@ -136,4 +145,5 @@ func expectOptionsEqual(optsA *options.Options, optsB *options.Options) {
Expect(optsA.InterruptionQueue).To(Equal(optsB.InterruptionQueue))
Expect(optsA.ReservedENIs).To(Equal(optsB.ReservedENIs))
Expect(optsA.DisableDryRun).To(Equal(optsB.DisableDryRun))
Expect(optsA.AMICacheTTL).To(Equal(optsB.AMICacheTTL))
}
2 changes: 1 addition & 1 deletion pkg/test/environment.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ func NewEnvironment(ctx context.Context, env *coretest.Environment) *Environment
iamapi := fake.NewIAMAPI()

// cache
amiCache := cache.New(awscache.DefaultTTL, awscache.DefaultCleanupInterval)
amiCache := cache.New(awscache.AMICacheTTL, awscache.DefaultCleanupInterval)
ec2Cache := cache.New(awscache.DefaultTTL, awscache.DefaultCleanupInterval)
instanceTypeCache := cache.New(awscache.DefaultTTL, awscache.DefaultCleanupInterval)
instanceCache := cache.New(awscache.DefaultTTL, awscache.DefaultCleanupInterval)
Expand Down
3 changes: 3 additions & 0 deletions pkg/test/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ package test

import (
"fmt"
"time"

"github.com/imdario/mergo"
"github.com/samber/lo"
Expand All @@ -33,6 +34,7 @@ type OptionsFields struct {
InterruptionQueue *string
ReservedENIs *int
DisableDryRun *bool
AMICacheTTL *time.Duration
}

func Options(overrides ...OptionsFields) *options.Options {
Expand All @@ -52,5 +54,6 @@ func Options(overrides ...OptionsFields) *options.Options {
InterruptionQueue: lo.FromPtrOr(opts.InterruptionQueue, ""),
ReservedENIs: lo.FromPtrOr(opts.ReservedENIs, 0),
DisableDryRun: lo.FromPtrOr(opts.DisableDryRun, false),
AMICacheTTL: lo.FromPtrOr(opts.AMICacheTTL, time.Minute),
}
}
1 change: 1 addition & 0 deletions website/content/en/preview/reference/settings.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ Karpenter surfaces environment variables and CLI parameters to allow you to conf

| Environment Variable | CLI Flag | Description |
|--|--|--|
| AMI_CACHE_TTL | \-\-ami-cache-ttl | TTL for cached AMI discovery results. (default = 1m0s)|
| BATCH_IDLE_DURATION | \-\-batch-idle-duration | The maximum amount of time with no new pending pods that if exceeded ends the current batching window. If pods arrive faster than this time, the batching window will be extended up to the maxDuration. If they arrive slower, the pods will be batched separately. (default = 1s)|
| BATCH_MAX_DURATION | \-\-batch-max-duration | The maximum length of a batch window. The longer this is, the more pods we can consider for provisioning at one time which usually results in fewer but larger nodes. (default = 10s)|
| CLUSTER_CA_BUNDLE | \-\-cluster-ca-bundle | Cluster CA bundle for nodes to use for TLS connections with the API server. If not set, this is taken from the controller's TLS configuration.|
Expand Down