Skip to content

Commit dd6327a

Browse files
feat(gateway): Auto-create ServiceNetwork on Gateway creation
- Gateway reconciler auto-creates a VPC Lattice ServiceNetwork using the Gateway's .metadata.name when no external SN exists - CreateOrUpdate is idempotent: reuses existing SN if found - Sibling Gateway safety: when deleting a Gateway, skip SN deletion if another active Gateway with the same name exists in a different namespace - Service association guard: block SN deletion if it still has active service associations, returning a clear error message - Ownership tracked via ManagedBy tag; externally-created SNs are never deleted - Documentation updated with auto-creation behavior, default settings, and external SN reuse for advanced configuration - Unit tests for hasSiblingGateway (4 cases) and Delete with active service associations - E2E integration tests for sibling deletion safety and service association delete guard
1 parent b1a117d commit dd6327a

File tree

7 files changed

+461
-19
lines changed

7 files changed

+461
-19
lines changed

docs/api-types/gateway.md

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,28 @@ Service networks are identified by Gateway name (without namespace) - for exampl
1111
will point to a VPC Lattice service network `my-gateway`. If multiple Gateways share the same name, all of them
1212
will point to the same service network.
1313

14-
VPC Lattice service networks must be managed separately, as it is a broader concept that can cover resources
15-
outside the Kubernetes cluster. To create and manage a service network, you can either:
16-
17-
- Specify `DEFAULT_SERVICE_NETWORK` configuration option on the controller. This will make the controller
18-
to create a service network with such name, and associate the cluster VPC to it for you. This is suitable
19-
for simple use cases with single service network.
20-
- Manage service networks outside the cluster, using AWS Console, CDK, CloudFormation, etc. This is recommended
21-
for more advanced use cases that cover multiple clusters and VPCs.
14+
### ServiceNetwork Lifecycle
15+
16+
When a Gateway is created, the controller automatically creates a VPC Lattice ServiceNetwork with the same name
17+
and associates it with the cluster VPC. When the Gateway is deleted, the controller deletes the ServiceNetwork
18+
if it was created by the controller (tracked via the `application-networking.k8s.aws/ManagedBy` tag).
19+
20+
Auto-created ServiceNetworks use **default VPC Lattice settings** — no auth policy, no sharing configuration,
21+
and no custom attributes. If you need to configure auth type, sharing, or other
22+
[ServiceNetwork attributes](https://docs.aws.amazon.com/vpc-lattice/latest/APIReference/API_CreateServiceNetwork.html),
23+
create the ServiceNetwork externally (via Console, CLI, CDK, CloudFormation, etc.) before creating the Gateway.
24+
The controller will detect the existing ServiceNetwork by name and reuse it without modifying or deleting it.
25+
26+
**Deletion behavior:**
27+
28+
- If multiple Gateways share the same ServiceNetwork name, deleting one Gateway will **not** delete the
29+
ServiceNetwork as long as another active Gateway with the same name still exists.
30+
- If the ServiceNetwork has active service associations, the controller will not delete it and will report
31+
an error asking you to detach all services first.
32+
- Externally-created ServiceNetworks are never deleted by the controller.
33+
34+
In addition to auto-creation, you can also use the `DEFAULT_SERVICE_NETWORK` configuration option on the controller
35+
to create a default ServiceNetwork at startup.
2236

2337
Gateways with `amazon-vpc-lattice` GatewayClass do not create a single entrypoint to bind Listeners and Routes
2438
under them. Instead, each Route will have its own domain name assigned. To see an example of how domain names

pkg/controllers/gateway_controller.go

Lines changed: 40 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ type gatewayReconciler struct {
6161
finalizerManager k8s.FinalizerManager
6262
eventRecorder record.EventRecorder
6363
cloud aws.Cloud
64+
snManager deploy.ServiceNetworkManager
6465
}
6566

6667
func RegisterGatewayController(
@@ -73,18 +74,20 @@ func RegisterGatewayController(
7374
scheme := mgr.GetScheme()
7475
evtRec := mgr.GetEventRecorderFor("gateway")
7576

77+
snManager := deploy.NewDefaultServiceNetworkManager(log, cloud)
78+
7679
r := &gatewayReconciler{
7780
log: log,
7881
client: mgrClient,
7982
scheme: scheme,
8083
finalizerManager: finalizerManager,
8184
eventRecorder: evtRec,
8285
cloud: cloud,
86+
snManager: snManager,
8387
}
8488

8589
if config.DefaultServiceNetwork != "" {
8690
// Attempt creation of default service network, move gracefully even if it fails.
87-
snManager := deploy.NewDefaultServiceNetworkManager(log, cloud)
8891
_, err := snManager.CreateOrUpdate(context.Background(), &model.ServiceNetwork{
8992
Spec: model.ServiceNetworkSpec{
9093
Name: config.DefaultServiceNetwork,
@@ -177,12 +180,40 @@ func (r *gatewayReconciler) reconcileDelete(ctx context.Context, gw *gwv1.Gatewa
177180
}
178181
}
179182

180-
err = r.finalizerManager.RemoveFinalizers(ctx, gw, gatewayFinalizer)
183+
hasSibling, err := r.hasSiblingGateway(ctx, gw)
181184
if err != nil {
182185
return err
183186
}
187+
if hasSibling {
188+
r.log.Infof(ctx, "Skipping ServiceNetwork deletion for %s, another Gateway with the same name exists", gw.Name)
189+
} else {
190+
if err := r.snManager.Delete(ctx, gw.Name); err != nil {
191+
return err
192+
}
193+
}
184194

185-
return nil
195+
return r.finalizerManager.RemoveFinalizers(ctx, gw, gatewayFinalizer)
196+
}
197+
198+
// hasSiblingGateway checks if another active Lattice-controlled Gateway with the same .Name exists.
199+
// Gateways that are themselves being deleted (DeletionTimestamp set) are not counted as siblings,
200+
// so that simultaneous deletion of all Gateways sharing an SN name still cleans up the SN.
201+
func (r *gatewayReconciler) hasSiblingGateway(ctx context.Context, gw *gwv1.Gateway) (bool, error) {
202+
gwList := &gwv1.GatewayList{}
203+
if err := r.client.List(ctx, gwList); err != nil {
204+
return false, fmt.Errorf("failed to list gateways: %w", err)
205+
}
206+
for i := range gwList.Items {
207+
other := &gwList.Items[i]
208+
if other.UID == gw.UID {
209+
continue
210+
}
211+
if other.Name == gw.Name && other.DeletionTimestamp.IsZero() &&
212+
k8s.IsControlledByLatticeGatewayController(ctx, r.client, other) {
213+
return true, nil
214+
}
215+
}
216+
return false, nil
186217
}
187218

188219
func (r *gatewayReconciler) reconcileUpsert(ctx context.Context, gw *gwv1.Gateway) error {
@@ -205,14 +236,12 @@ func (r *gatewayReconciler) reconcileUpsert(ctx context.Context, gw *gwv1.Gatewa
205236
return err
206237
}
207238

208-
snInfo, err := r.cloud.Lattice().FindServiceNetwork(ctx, gw.Name)
239+
snStatus, err := r.snManager.CreateOrUpdate(ctx, &model.ServiceNetwork{
240+
Spec: model.ServiceNetworkSpec{
241+
Name: gw.Name,
242+
},
243+
})
209244
if err != nil {
210-
if services.IsNotFoundError(err) {
211-
if err = r.updateGatewayProgrammedStatus(ctx, gw, gwv1.GatewayReasonPending, "VPC Lattice Service Network not found"); err != nil {
212-
return lattice_runtime.NewRetryError()
213-
}
214-
return nil
215-
}
216245
if errors.Is(err, services.ErrNameConflict) {
217246
if err = r.updateGatewayProgrammedStatus(ctx, gw, gwv1.GatewayReasonInvalid, "Found multiple VPC Lattice Service Networks matching Gateway name. Either ensure only one Service Network has a matching name, or use the Service Network's id as the Gateway name."); err != nil {
218247
return lattice_runtime.NewRetryError()
@@ -222,7 +251,7 @@ func (r *gatewayReconciler) reconcileUpsert(ctx context.Context, gw *gwv1.Gatewa
222251
return err
223252
}
224253

225-
err = r.updateGatewayProgrammedStatus(ctx, gw, gwv1.GatewayReasonProgrammed, fmt.Sprintf("aws-service-network-arn: %s", *snInfo.SvcNetwork.Arn))
254+
err = r.updateGatewayProgrammedStatus(ctx, gw, gwv1.GatewayReasonProgrammed, fmt.Sprintf("aws-service-network-arn: %s", snStatus.ServiceNetworkARN))
226255
if err != nil {
227256
return err
228257
}

pkg/controllers/gateway_controller_test.go

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -701,3 +701,79 @@ func TestUpdateGWListenerStatus_SupportedKinds(t *testing.T) {
701701
})
702702
}
703703
}
704+
705+
func TestHasSiblingGateway(t *testing.T) {
706+
scheme := runtime.NewScheme()
707+
clientgoscheme.AddToScheme(scheme)
708+
gwv1.Install(scheme)
709+
gwv1alpha2.Install(scheme)
710+
addOptionalCRDs(scheme)
711+
712+
latticeControllerName := gwv1.GatewayController("application-networking.k8s.aws/gateway-api-controller")
713+
gwClass := &gwv1.GatewayClass{
714+
ObjectMeta: metav1.ObjectMeta{Name: "amazon-vpc-lattice"},
715+
Spec: gwv1.GatewayClassSpec{ControllerName: latticeControllerName},
716+
}
717+
718+
makeGW := func(name, ns string, uid types.UID, deleting bool) *gwv1.Gateway {
719+
gw := &gwv1.Gateway{
720+
ObjectMeta: metav1.ObjectMeta{
721+
Name: name, Namespace: ns, UID: uid,
722+
},
723+
Spec: gwv1.GatewaySpec{GatewayClassName: "amazon-vpc-lattice"},
724+
}
725+
if deleting {
726+
now := metav1.Now()
727+
gw.DeletionTimestamp = &now
728+
gw.Finalizers = []string{"test"}
729+
}
730+
return gw
731+
}
732+
733+
tests := []struct {
734+
name string
735+
gw *gwv1.Gateway
736+
others []*gwv1.Gateway
737+
expected bool
738+
}{
739+
{
740+
name: "no sibling — only gateway with this name",
741+
gw: makeGW("my-network", "ns1", "uid-1", false),
742+
others: nil,
743+
expected: false,
744+
},
745+
{
746+
name: "has sibling — same name different namespace",
747+
gw: makeGW("my-network", "ns1", "uid-1", false),
748+
others: []*gwv1.Gateway{makeGW("my-network", "ns2", "uid-2", false)},
749+
expected: true,
750+
},
751+
{
752+
name: "sibling being deleted — not counted",
753+
gw: makeGW("my-network", "ns1", "uid-1", false),
754+
others: []*gwv1.Gateway{makeGW("my-network", "ns2", "uid-2", true)},
755+
expected: false,
756+
},
757+
{
758+
name: "different name — not a sibling",
759+
gw: makeGW("my-network", "ns1", "uid-1", false),
760+
others: []*gwv1.Gateway{makeGW("other-network", "ns2", "uid-2", false)},
761+
expected: false,
762+
},
763+
}
764+
765+
for _, tt := range tests {
766+
t.Run(tt.name, func(t *testing.T) {
767+
objs := []runtime.Object{gwClass, tt.gw}
768+
for _, o := range tt.others {
769+
objs = append(objs, o)
770+
}
771+
k8sClient := testclient.NewClientBuilder().WithScheme(scheme).WithRuntimeObjects(objs...).Build()
772+
773+
r := &gatewayReconciler{client: k8sClient}
774+
result, err := r.hasSiblingGateway(context.Background(), tt.gw)
775+
assert.NoError(t, err)
776+
assert.Equal(t, tt.expected, result)
777+
})
778+
}
779+
}

pkg/deploy/lattice/service_network_manager.go

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ type ServiceNetworkManager interface {
2727
DeleteVpcAssociation(ctx context.Context, snName string) error
2828

2929
CreateOrUpdate(ctx context.Context, serviceNetwork *model.ServiceNetwork) (model.ServiceNetworkStatus, error)
30+
Delete(ctx context.Context, snName string) error
3031
}
3132

3233
func NewDefaultServiceNetworkManager(log gwlog.Logger, cloud pkg_aws.Cloud) *defaultServiceNetworkManager {
@@ -172,6 +173,52 @@ func (m *defaultServiceNetworkManager) DeleteVpcAssociation(ctx context.Context,
172173
return nil
173174
}
174175

176+
func (m *defaultServiceNetworkManager) Delete(ctx context.Context, snName string) error {
177+
sn, err := m.cloud.Lattice().FindServiceNetwork(ctx, snName)
178+
if err != nil {
179+
if services.IsNotFoundError(err) {
180+
return nil
181+
}
182+
return err
183+
}
184+
185+
snArn := aws.StringValue(sn.SvcNetwork.Arn)
186+
owned, err := m.cloud.IsArnManaged(ctx, snArn)
187+
if err != nil {
188+
m.log.Warnf(ctx, "cannot check ownership of ServiceNetwork %s: %s, skipping deletion", snName, err)
189+
return nil
190+
}
191+
if !owned {
192+
m.log.Infof(ctx, "ServiceNetwork %s not owned by controller, skipping deletion", snName)
193+
return nil
194+
}
195+
196+
assocs, err := m.cloud.Lattice().ListServiceNetworkServiceAssociationsAsList(ctx,
197+
&vpclattice.ListServiceNetworkServiceAssociationsInput{
198+
ServiceNetworkIdentifier: sn.SvcNetwork.Id,
199+
})
200+
if err != nil {
201+
return fmt.Errorf("failed to list service associations for ServiceNetwork %s: %w", snName, err)
202+
}
203+
if len(assocs) > 0 {
204+
return fmt.Errorf("cannot delete ServiceNetwork %s: %d service association(s) still active, "+
205+
"detach all services before deleting the Gateway", snName, len(assocs))
206+
}
207+
208+
if err := m.DeleteVpcAssociation(ctx, snName); err != nil {
209+
return err
210+
}
211+
212+
_, err = m.cloud.Lattice().DeleteServiceNetworkWithContext(ctx, &vpclattice.DeleteServiceNetworkInput{
213+
ServiceNetworkIdentifier: sn.SvcNetwork.Id,
214+
})
215+
if err != nil {
216+
return err
217+
}
218+
m.log.Infof(ctx, "Deleted ServiceNetwork %s", snName)
219+
return nil
220+
}
221+
175222
func (m *defaultServiceNetworkManager) getActiveVpcAssociation(ctx context.Context, serviceNetworkId string) (*vpclattice.ServiceNetworkVpcAssociationSummary, error) {
176223
vpcLatticeSess := m.cloud.Lattice()
177224
associationStatusInput := vpclattice.ListServiceNetworkVpcAssociationsInput{

pkg/deploy/lattice/service_network_manager_mock.go

Lines changed: 14 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)