From ad7aaecc87907dc66f9083c635ec27e23eca43ca Mon Sep 17 00:00:00 2001 From: Jakub Koterba Date: Wed, 8 Apr 2026 10:02:21 +0200 Subject: [PATCH] sync logic rewrite + tests and constants --- api/v4/postgrescluster_types.go | 3 +- .../postgrescluster_controller_test.go | 2 +- .../postgresdatabase_controller_test.go | 225 +++++-- pkg/postgresql/cluster/core/cluster.go | 556 +++++++++++++++--- .../cluster/core/cluster_unit_test.go | 135 +++++ pkg/postgresql/cluster/core/types.go | 50 +- .../cluster/core/types/constants/state.go | 41 ++ pkg/postgresql/database/core/database.go | 133 +++-- .../database/core/database_unit_test.go | 163 ++++- pkg/postgresql/database/core/types.go | 3 +- 10 files changed, 1094 insertions(+), 217 deletions(-) create mode 100644 pkg/postgresql/cluster/core/types/constants/state.go diff --git a/api/v4/postgrescluster_types.go b/api/v4/postgrescluster_types.go index 3e3dd0da7..5adc91f13 100644 --- a/api/v4/postgrescluster_types.go +++ b/api/v4/postgrescluster_types.go @@ -36,8 +36,7 @@ type ManagedRole struct { // Exists controls whether the role should be present (true) or absent (false) in PostgreSQL. // +kubebuilder:default=true - // +optional - Exists bool `json:"exists,omitempty"` + Exists bool `json:"exists"` } // PostgresClusterSpec defines the desired state of PostgresCluster. diff --git a/internal/controller/postgrescluster_controller_test.go b/internal/controller/postgrescluster_controller_test.go index ea4d66f64..5687ae1f8 100644 --- a/internal/controller/postgrescluster_controller_test.go +++ b/internal/controller/postgrescluster_controller_test.go @@ -51,7 +51,7 @@ import ( * PC-09 ignores no-op updates */ -var _ = Describe("PostgresCluster Controller", func() { +var _ = Describe("PostgresCluster Controller", Label("postgres"), func() { const ( postgresVersion = "15.10" diff --git a/internal/controller/postgresdatabase_controller_test.go b/internal/controller/postgresdatabase_controller_test.go index a1f5ed9ba..31f591573 100644 --- a/internal/controller/postgresdatabase_controller_test.go +++ b/internal/controller/postgresdatabase_controller_test.go @@ -40,6 +40,44 @@ import ( const postgresDatabaseFinalizer = "postgresdatabases.enterprise.splunk.com/finalizer" +// condition types +const ( + condClusterReady = "ClusterReady" + condSecretsReady = "SecretsReady" + condConfigMapsReady = "ConfigMapsReady" + condRolesReady = "RolesReady" + condDatabasesReady = "DatabasesReady" + condPrivilegesReady = "PrivilegesReady" +) + +// condition reasons +const ( + reasonClusterNotFound = "ClusterNotFound" + reasonClusterAvailable = "ClusterAvailable" + reasonSecretsCreated = "SecretsCreated" + reasonConfigMapsCreated = "ConfigMapsCreated" + reasonUsersAvailable = "UsersAvailable" + reasonDatabasesAvailable = "DatabasesAvailable" + reasonRoleConflict = "RoleConflict" +) + +// phases +const ( + phasePending = "Pending" + phaseReady = "Ready" + phaseFailed = "Failed" +) + +// annotations +const retainedFromAnnotation = "enterprise.splunk.com/retained-from" + +// database names used across tests +const ( + dbAppdb = "appdb" + dbKeepdb = "payments" + dbDropdb = "analytics" +) + func reconcilePostgresDatabase(ctx context.Context, nn types.NamespacedName) (ctrl.Result, error) { reconciler := &PostgresDatabaseReconciler{ Client: k8sClient, @@ -57,12 +95,20 @@ func managedRoleNames(roles []enterprisev4.ManagedRole) []string { return names } -func adminRoleNameForTest(dbName string) string { - return dbName + "_admin" -} +func adminRoleNameForTest(dbName string) string { return dbName + "_admin" } +func rwRoleNameForTest(dbName string) string { return dbName + "_rw" } -func rwRoleNameForTest(dbName string) string { - return dbName + "_rw" +func adminSecretNameForTest(resourceName, dbName string) string { + return fmt.Sprintf("%s-%s-admin", resourceName, dbName) +} +func rwSecretNameForTest(resourceName, dbName string) string { + return fmt.Sprintf("%s-%s-rw", resourceName, dbName) +} +func configMapNameForTest(resourceName, dbName string) string { + return fmt.Sprintf("%s-%s-config", resourceName, dbName) +} +func cnpgDatabaseNameForTest(resourceName, dbName string) string { + return fmt.Sprintf("%s-%s", resourceName, dbName) } func ownedByPostgresDatabase(postgresDB *enterprisev4.PostgresDatabase) []metav1.OwnerReference { @@ -208,17 +254,17 @@ func seedExistingDatabaseStatus(ctx context.Context, current *enterprisev4.Postg func expectProvisionedArtifacts(ctx context.Context, scenario readyClusterScenario, owner *enterprisev4.PostgresDatabase) { adminSecret := &corev1.Secret{} - Expect(k8sClient.Get(ctx, types.NamespacedName{Name: fmt.Sprintf("%s-%s-admin", scenario.resourceName, scenario.dbName), Namespace: scenario.namespace}, adminSecret)).To(Succeed()) + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: adminSecretNameForTest(scenario.resourceName, scenario.dbName), Namespace: scenario.namespace}, adminSecret)).To(Succeed()) Expect(adminSecret.Data).To(HaveKey("password")) Expect(metav1.IsControlledBy(adminSecret, owner)).To(BeTrue()) rwSecret := &corev1.Secret{} - Expect(k8sClient.Get(ctx, types.NamespacedName{Name: fmt.Sprintf("%s-%s-rw", scenario.resourceName, scenario.dbName), Namespace: scenario.namespace}, rwSecret)).To(Succeed()) + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: rwSecretNameForTest(scenario.resourceName, scenario.dbName), Namespace: scenario.namespace}, rwSecret)).To(Succeed()) Expect(rwSecret.Data).To(HaveKey("password")) Expect(metav1.IsControlledBy(rwSecret, owner)).To(BeTrue()) configMap := &corev1.ConfigMap{} - Expect(k8sClient.Get(ctx, types.NamespacedName{Name: fmt.Sprintf("%s-%s-config", scenario.resourceName, scenario.dbName), Namespace: scenario.namespace}, configMap)).To(Succeed()) + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: configMapNameForTest(scenario.resourceName, scenario.dbName), Namespace: scenario.namespace}, configMap)).To(Succeed()) Expect(configMap.Data).To(HaveKeyWithValue("rw-host", "tenant-rw."+scenario.namespace+".svc.cluster.local")) Expect(configMap.Data).To(HaveKeyWithValue("ro-host", "tenant-ro."+scenario.namespace+".svc.cluster.local")) Expect(configMap.Data).To(HaveKeyWithValue("admin-user", adminRoleNameForTest(scenario.dbName))) @@ -234,7 +280,7 @@ func expectManagedRolesPatched(ctx context.Context, scenario readyClusterScenari func expectCNPGDatabaseCreated(ctx context.Context, scenario readyClusterScenario, owner *enterprisev4.PostgresDatabase) *cnpgv1.Database { cnpgDatabase := &cnpgv1.Database{} - Expect(k8sClient.Get(ctx, types.NamespacedName{Name: fmt.Sprintf("%s-%s", scenario.resourceName, scenario.dbName), Namespace: scenario.namespace}, cnpgDatabase)).To(Succeed()) + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: cnpgDatabaseNameForTest(scenario.resourceName, scenario.dbName), Namespace: scenario.namespace}, cnpgDatabase)).To(Succeed()) Expect(cnpgDatabase.Spec.Name).To(Equal(scenario.dbName)) Expect(cnpgDatabase.Spec.Owner).To(Equal(adminRoleNameForTest(scenario.dbName))) Expect(cnpgDatabase.Spec.ClusterRef.Name).To(Equal(scenario.cnpgClusterName)) @@ -250,18 +296,18 @@ func markCNPGDatabaseApplied(ctx context.Context, cnpgDatabase *cnpgv1.Database) func expectPoolerConfigMap(ctx context.Context, scenario readyClusterScenario) { configMap := &corev1.ConfigMap{} - Expect(k8sClient.Get(ctx, types.NamespacedName{Name: fmt.Sprintf("%s-%s-config", scenario.resourceName, scenario.dbName), Namespace: scenario.namespace}, configMap)).To(Succeed()) + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: configMapNameForTest(scenario.resourceName, scenario.dbName), Namespace: scenario.namespace}, configMap)).To(Succeed()) Expect(configMap.Data).To(HaveKeyWithValue("pooler-rw-host", scenario.cnpgClusterName+"-pooler-rw."+scenario.namespace+".svc.cluster.local")) Expect(configMap.Data).To(HaveKeyWithValue("pooler-ro-host", scenario.cnpgClusterName+"-pooler-ro."+scenario.namespace+".svc.cluster.local")) } func seedMissingClusterScenario(ctx context.Context, namespace, resourceName string, finalizers ...string) types.NamespacedName { - createPostgresDatabaseResource(ctx, namespace, resourceName, "absent-cluster", []enterprisev4.DatabaseDefinition{{Name: "appdb"}}, finalizers...) + createPostgresDatabaseResource(ctx, namespace, resourceName, "absent-cluster", []enterprisev4.DatabaseDefinition{{Name: dbAppdb}}, finalizers...) return types.NamespacedName{Name: resourceName, Namespace: namespace} } func seedConflictScenario(ctx context.Context, namespace, resourceName, clusterName string) types.NamespacedName { - createPostgresDatabaseResource(ctx, namespace, resourceName, clusterName, []enterprisev4.DatabaseDefinition{{Name: "appdb"}}, postgresDatabaseFinalizer) + createPostgresDatabaseResource(ctx, namespace, resourceName, clusterName, []enterprisev4.DatabaseDefinition{{Name: dbAppdb}}, postgresDatabaseFinalizer) postgresCluster := createPostgresClusterResource(ctx, namespace, clusterName) markPostgresClusterReady(ctx, postgresCluster, "unused-cnpg", namespace, false) return types.NamespacedName{Name: resourceName, Namespace: namespace} @@ -272,7 +318,7 @@ func seedOwnedDatabaseArtifacts(ctx context.Context, namespace, resourceName, cl for _, dbName := range dbNames { Expect(k8sClient.Create(ctx, &corev1.Secret{ ObjectMeta: metav1.ObjectMeta{ - Name: fmt.Sprintf("%s-%s-admin", resourceName, dbName), + Name: adminSecretNameForTest(resourceName, dbName), Namespace: namespace, OwnerReferences: ownerReferences, }, @@ -280,7 +326,7 @@ func seedOwnedDatabaseArtifacts(ctx context.Context, namespace, resourceName, cl Expect(k8sClient.Create(ctx, &corev1.Secret{ ObjectMeta: metav1.ObjectMeta{ - Name: fmt.Sprintf("%s-%s-rw", resourceName, dbName), + Name: rwSecretNameForTest(resourceName, dbName), Namespace: namespace, OwnerReferences: ownerReferences, }, @@ -288,7 +334,7 @@ func seedOwnedDatabaseArtifacts(ctx context.Context, namespace, resourceName, cl Expect(k8sClient.Create(ctx, &corev1.ConfigMap{ ObjectMeta: metav1.ObjectMeta{ - Name: fmt.Sprintf("%s-%s-config", resourceName, dbName), + Name: configMapNameForTest(resourceName, dbName), Namespace: namespace, OwnerReferences: ownerReferences, }, @@ -296,7 +342,7 @@ func seedOwnedDatabaseArtifacts(ctx context.Context, namespace, resourceName, cl Expect(k8sClient.Create(ctx, &cnpgv1.Database{ ObjectMeta: metav1.ObjectMeta{ - Name: fmt.Sprintf("%s-%s", resourceName, dbName), + Name: cnpgDatabaseNameForTest(resourceName, dbName), Namespace: namespace, OwnerReferences: ownerReferences, }, @@ -309,9 +355,18 @@ func seedOwnedDatabaseArtifacts(ctx context.Context, namespace, resourceName, cl } } +func expectManagedRoleExists(cluster *enterprisev4.PostgresCluster, roleName string, exists bool) { + rolesByName := make(map[string]enterprisev4.ManagedRole, len(cluster.Spec.ManagedRoles)) + for _, r := range cluster.Spec.ManagedRoles { + rolesByName[r.Name] = r + } + Expect(rolesByName).To(HaveKey(roleName)) + Expect(rolesByName[roleName].Exists).To(Equal(exists), "role %s: expected Exists=%v", roleName, exists) +} + func expectRetainedArtifact(ctx context.Context, name, namespace, resourceName string, obj client.Object) { Expect(k8sClient.Get(ctx, types.NamespacedName{Name: name, Namespace: namespace}, obj)).To(Succeed()) - Expect(obj.GetAnnotations()).To(HaveKeyWithValue("enterprise.splunk.com/retained-from", resourceName)) + Expect(obj.GetAnnotations()).To(HaveKeyWithValue(retainedFromAnnotation, resourceName)) Expect(obj.GetOwnerReferences()).To(BeEmpty()) } @@ -333,7 +388,7 @@ func expectStatusCondition(current *enterprisev4.PostgresDatabase, conditionType } func expectReadyStatus(current *enterprisev4.PostgresDatabase, generation int64, expectedDatabase enterprisev4.DatabaseInfo) { - expectStatusPhase(current, "Ready") + expectStatusPhase(current, phaseReady) Expect(current.Status.ObservedGeneration).NotTo(BeNil()) Expect(*current.Status.ObservedGeneration).To(Equal(generation)) Expect(current.Status.Databases).To(HaveLen(1)) @@ -344,7 +399,7 @@ func expectReadyStatus(current *enterprisev4.PostgresDatabase, generation int64, Expect(current.Status.Databases[0].ConfigMapRef).NotTo(BeNil()) } -var _ = Describe("PostgresDatabase Controller", func() { +var _ = Describe("PostgresDatabase Controller", Label("postgres"), func() { var ( ctx context.Context namespace string @@ -384,9 +439,9 @@ var _ = Describe("PostgresDatabase Controller", func() { expectReconcileResult(result, err, 30*time.Second) current := fetchPostgresDatabase(ctx, requestName) - expectStatusPhase(current, "Pending") - expectStatusCondition(current, "ClusterReady", metav1.ConditionFalse, "ClusterNotFound") - clusterReady := meta.FindStatusCondition(current.Status.Conditions, "ClusterReady") + expectStatusPhase(current, phasePending) + expectStatusCondition(current, condClusterReady, metav1.ConditionFalse, reasonClusterNotFound) + clusterReady := meta.FindStatusCondition(current.Status.Conditions, condClusterReady) Expect(clusterReady.ObservedGeneration).To(Equal(current.Generation)) }) }) @@ -395,7 +450,7 @@ var _ = Describe("PostgresDatabase Controller", func() { When("the referenced PostgresCluster is ready", func() { Context("and live grants are not invoked", func() { It("reconciles secrets, configmaps, roles, and CNPG databases", func() { - scenario := newReadyClusterScenario(namespace, "ready-cluster", "tenant-cluster", "tenant-cnpg", "appdb") + scenario := newReadyClusterScenario(namespace, "ready-cluster", "tenant-cluster", "tenant-cnpg", dbAppdb) seedReadyClusterScenario(ctx, scenario, false) result, err := reconcilePostgresDatabase(ctx, scenario.requestName) @@ -419,18 +474,18 @@ var _ = Describe("PostgresDatabase Controller", func() { current = fetchPostgresDatabase(ctx, scenario.requestName) expectReadyStatus(current, current.Generation, enterprisev4.DatabaseInfo{Name: scenario.dbName, Ready: true}) - expectStatusCondition(current, "ClusterReady", metav1.ConditionTrue, "ClusterAvailable") - expectStatusCondition(current, "SecretsReady", metav1.ConditionTrue, "SecretsCreated") - expectStatusCondition(current, "ConfigMapsReady", metav1.ConditionTrue, "ConfigMapsCreated") - expectStatusCondition(current, "RolesReady", metav1.ConditionTrue, "UsersAvailable") - expectStatusCondition(current, "DatabasesReady", metav1.ConditionTrue, "DatabasesAvailable") - Expect(meta.FindStatusCondition(current.Status.Conditions, "PrivilegesReady")).To(BeNil()) + expectStatusCondition(current, condClusterReady, metav1.ConditionTrue, reasonClusterAvailable) + expectStatusCondition(current, condSecretsReady, metav1.ConditionTrue, reasonSecretsCreated) + expectStatusCondition(current, condConfigMapsReady, metav1.ConditionTrue, reasonConfigMapsCreated) + expectStatusCondition(current, condRolesReady, metav1.ConditionTrue, reasonUsersAvailable) + expectStatusCondition(current, condDatabasesReady, metav1.ConditionTrue, reasonDatabasesAvailable) + Expect(meta.FindStatusCondition(current.Status.Conditions, condPrivilegesReady)).To(BeNil()) }) }) Context("and connection pooling is enabled", func() { It("adds pooler endpoints to the generated ConfigMap", func() { - scenario := newReadyClusterScenario(namespace, "pooler-cluster", "pooler-postgres", "pooler-cnpg", "appdb") + scenario := newReadyClusterScenario(namespace, "pooler-cluster", "pooler-postgres", "pooler-cnpg", dbAppdb) seedReadyClusterScenario(ctx, scenario, true) result, err := reconcilePostgresDatabase(ctx, scenario.requestName) @@ -462,8 +517,8 @@ var _ = Describe("PostgresDatabase Controller", func() { }, "spec": map[string]any{ "managedRoles": []map[string]any{ - {"name": "appdb_admin", "exists": true}, - {"name": "appdb_rw", "exists": true}, + {"name": adminRoleNameForTest(dbAppdb), "exists": true}, + {"name": rwRoleNameForTest(dbAppdb), "exists": true}, }, }, }, @@ -476,23 +531,79 @@ var _ = Describe("PostgresDatabase Controller", func() { Expect(result).To(Equal(ctrl.Result{})) current := fetchPostgresDatabase(ctx, requestName) - expectStatusPhase(current, "Failed") - expectStatusCondition(current, "RolesReady", metav1.ConditionFalse, "RoleConflict") + expectStatusPhase(current, phaseFailed) + expectStatusCondition(current, condRolesReady, metav1.ConditionFalse, reasonRoleConflict) - rolesReady := meta.FindStatusCondition(current.Status.Conditions, "RolesReady") - Expect(rolesReady.Message).To(ContainSubstring("appdb_admin")) + rolesReady := meta.FindStatusCondition(current.Status.Conditions, condRolesReady) + Expect(rolesReady.Message).To(ContainSubstring(adminRoleNameForTest(dbAppdb))) Expect(rolesReady.Message).To(ContainSubstring("postgresdatabase-legacy")) configMap := &corev1.ConfigMap{} - err = k8sClient.Get(ctx, types.NamespacedName{Name: "conflict-cluster-appdb-config", Namespace: namespace}, configMap) + err = k8sClient.Get(ctx, types.NamespacedName{Name: configMapNameForTest("conflict-cluster", dbAppdb), Namespace: namespace}, configMap) Expect(apierrors.IsNotFound(err)).To(BeTrue()) cnpgDatabase := &cnpgv1.Database{} - err = k8sClient.Get(ctx, types.NamespacedName{Name: "conflict-cluster-appdb", Namespace: namespace}, cnpgDatabase) + err = k8sClient.Get(ctx, types.NamespacedName{Name: cnpgDatabaseNameForTest("conflict-cluster", dbAppdb), Namespace: namespace}, cnpgDatabase) Expect(apierrors.IsNotFound(err)).To(BeTrue()) }) }) + When("a database is removed from spec.databases while the CR stays alive", func() { + It("marks the removed database roles as absent in postgres cluster and keeps the retained roles present", func() { + resourceName := "live-db-removal" + clusterName := "live-db-removal-postgres" + cnpgClusterName := "live-db-removal-cnpg" + requestName := types.NamespacedName{Name: resourceName, Namespace: namespace} + + postgresDB := createPostgresDatabaseResource(ctx, namespace, resourceName, clusterName, []enterprisev4.DatabaseDefinition{ + {Name: dbKeepdb}, + {Name: dbDropdb}, + }, postgresDatabaseFinalizer) + Expect(k8sClient.Get(ctx, requestName, postgresDB)).To(Succeed()) + + postgresCluster := createPostgresClusterResource(ctx, namespace, clusterName) + markPostgresClusterReady(ctx, postgresCluster, cnpgClusterName, namespace, false) + cnpgCluster := createCNPGClusterResource(ctx, namespace, cnpgClusterName) + markCNPGClusterReady(ctx, cnpgCluster, []string{ + adminRoleNameForTest(dbKeepdb), rwRoleNameForTest(dbKeepdb), + adminRoleNameForTest(dbDropdb), rwRoleNameForTest(dbDropdb), + }, "tenant-rw", "tenant-ro") + + initialRolesPatch := &unstructured.Unstructured{ + Object: map[string]any{ + "apiVersion": enterprisev4.GroupVersion.String(), + "kind": "PostgresCluster", + "metadata": map[string]any{"name": clusterName, "namespace": namespace}, + "spec": map[string]any{ + "managedRoles": []map[string]any{ + {"name": adminRoleNameForTest(dbKeepdb), "exists": true, "passwordSecretRef": map[string]any{"name": resourceName + "-" + dbKeepdb + "-admin", "key": "password"}}, + {"name": rwRoleNameForTest(dbKeepdb), "exists": true, "passwordSecretRef": map[string]any{"name": resourceName + "-" + dbKeepdb + "-rw", "key": "password"}}, + {"name": adminRoleNameForTest(dbDropdb), "exists": true, "passwordSecretRef": map[string]any{"name": resourceName + "-" + dbDropdb + "-admin", "key": "password"}}, + {"name": rwRoleNameForTest(dbDropdb), "exists": true, "passwordSecretRef": map[string]any{"name": resourceName + "-" + dbDropdb + "-rw", "key": "password"}}, + }, + }, + }, + } + Expect(k8sClient.Patch(ctx, initialRolesPatch, client.Apply, client.FieldOwner("postgresdatabase-"+resourceName))).To(Succeed()) + + seedOwnedDatabaseArtifacts(ctx, namespace, resourceName, clusterName, postgresDB, dbKeepdb, dbDropdb) + + postgresDB.Spec.Databases = []enterprisev4.DatabaseDefinition{{Name: dbKeepdb}} + Expect(k8sClient.Update(ctx, postgresDB)).To(Succeed()) + + result, err := reconcilePostgresDatabase(ctx, requestName) + expectReconcileResult(result, err, 15*time.Second) + + updatedCluster := &enterprisev4.PostgresCluster{} + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: clusterName, Namespace: namespace}, updatedCluster)).To(Succeed()) + + expectManagedRoleExists(updatedCluster, adminRoleNameForTest(dbKeepdb), true) + expectManagedRoleExists(updatedCluster, rwRoleNameForTest(dbKeepdb), true) + expectManagedRoleExists(updatedCluster, adminRoleNameForTest(dbDropdb), false) + expectManagedRoleExists(updatedCluster, rwRoleNameForTest(dbDropdb), false) + }) + }) + When("the PostgresDatabase is being deleted", func() { Context("with retained and deleted databases", func() { It("orphans retained resources, removes deleted resources, and patches managed roles", func() { @@ -501,8 +612,8 @@ var _ = Describe("PostgresDatabase Controller", func() { requestName := types.NamespacedName{Name: resourceName, Namespace: namespace} postgresDB := createPostgresDatabaseResource(ctx, namespace, resourceName, clusterName, []enterprisev4.DatabaseDefinition{ - {Name: "keepdb", DeletionPolicy: "Retain"}, - {Name: "dropdb"}, + {Name: dbKeepdb, DeletionPolicy: "Retain"}, + {Name: dbDropdb}, }, postgresDatabaseFinalizer) Expect(k8sClient.Get(ctx, requestName, postgresDB)).To(Succeed()) @@ -518,36 +629,40 @@ var _ = Describe("PostgresDatabase Controller", func() { }, "spec": map[string]any{ "managedRoles": []map[string]any{ - {"name": "keepdb_admin", "exists": true, "passwordSecretRef": map[string]any{"name": "delete-cluster-keepdb-admin", "key": "password"}}, - {"name": "keepdb_rw", "exists": true, "passwordSecretRef": map[string]any{"name": "delete-cluster-keepdb-rw", "key": "password"}}, - {"name": "dropdb_admin", "exists": true, "passwordSecretRef": map[string]any{"name": "delete-cluster-dropdb-admin", "key": "password"}}, - {"name": "dropdb_rw", "exists": true, "passwordSecretRef": map[string]any{"name": "delete-cluster-dropdb-rw", "key": "password"}}, + {"name": adminRoleNameForTest(dbKeepdb), "exists": true, "passwordSecretRef": map[string]any{"name": resourceName + "-" + dbKeepdb + "-admin", "key": "password"}}, + {"name": rwRoleNameForTest(dbKeepdb), "exists": true, "passwordSecretRef": map[string]any{"name": resourceName + "-" + dbKeepdb + "-rw", "key": "password"}}, + {"name": adminRoleNameForTest(dbDropdb), "exists": true, "passwordSecretRef": map[string]any{"name": resourceName + "-" + dbDropdb + "-admin", "key": "password"}}, + {"name": rwRoleNameForTest(dbDropdb), "exists": true, "passwordSecretRef": map[string]any{"name": resourceName + "-" + dbDropdb + "-rw", "key": "password"}}, }, }, }, } - Expect(k8sClient.Patch(ctx, initialRolesPatch, client.Apply, client.FieldOwner("postgresdatabase-delete-cluster"))).To(Succeed()) + Expect(k8sClient.Patch(ctx, initialRolesPatch, client.Apply, client.FieldOwner("postgresdatabase-"+resourceName))).To(Succeed()) - seedOwnedDatabaseArtifacts(ctx, namespace, resourceName, clusterName, postgresDB, "keepdb", "dropdb") + seedOwnedDatabaseArtifacts(ctx, namespace, resourceName, clusterName, postgresDB, dbKeepdb, dbDropdb) Expect(k8sClient.Delete(ctx, postgresDB)).To(Succeed()) result, err := reconcilePostgresDatabase(ctx, requestName) expectEmptyReconcileResult(result, err) - expectRetainedArtifact(ctx, "delete-cluster-keepdb-config", namespace, resourceName, &corev1.ConfigMap{}) - expectRetainedArtifact(ctx, "delete-cluster-keepdb-admin", namespace, resourceName, &corev1.Secret{}) - expectRetainedArtifact(ctx, "delete-cluster-keepdb-rw", namespace, resourceName, &corev1.Secret{}) - expectRetainedArtifact(ctx, "delete-cluster-keepdb", namespace, resourceName, &cnpgv1.Database{}) + expectRetainedArtifact(ctx, configMapNameForTest(resourceName, dbKeepdb), namespace, resourceName, &corev1.ConfigMap{}) + expectRetainedArtifact(ctx, adminSecretNameForTest(resourceName, dbKeepdb), namespace, resourceName, &corev1.Secret{}) + expectRetainedArtifact(ctx, rwSecretNameForTest(resourceName, dbKeepdb), namespace, resourceName, &corev1.Secret{}) + expectRetainedArtifact(ctx, cnpgDatabaseNameForTest(resourceName, dbKeepdb), namespace, resourceName, &cnpgv1.Database{}) - expectDeletedArtifact(ctx, "delete-cluster-dropdb-config", namespace, &corev1.ConfigMap{}) - expectDeletedArtifact(ctx, "delete-cluster-dropdb-admin", namespace, &corev1.Secret{}) - expectDeletedArtifact(ctx, "delete-cluster-dropdb-rw", namespace, &corev1.Secret{}) - expectDeletedArtifact(ctx, "delete-cluster-dropdb", namespace, &cnpgv1.Database{}) + expectDeletedArtifact(ctx, configMapNameForTest(resourceName, dbDropdb), namespace, &corev1.ConfigMap{}) + expectDeletedArtifact(ctx, adminSecretNameForTest(resourceName, dbDropdb), namespace, &corev1.Secret{}) + expectDeletedArtifact(ctx, rwSecretNameForTest(resourceName, dbDropdb), namespace, &corev1.Secret{}) + expectDeletedArtifact(ctx, cnpgDatabaseNameForTest(resourceName, dbDropdb), namespace, &cnpgv1.Database{}) updatedCluster := &enterprisev4.PostgresCluster{} Expect(k8sClient.Get(ctx, types.NamespacedName{Name: clusterName, Namespace: namespace}, updatedCluster)).To(Succeed()) - Expect(managedRoleNames(updatedCluster.Spec.ManagedRoles)).To(ConsistOf("keepdb_admin", "keepdb_rw")) + + expectManagedRoleExists(updatedCluster, adminRoleNameForTest(dbKeepdb), true) + expectManagedRoleExists(updatedCluster, rwRoleNameForTest(dbKeepdb), true) + expectManagedRoleExists(updatedCluster, adminRoleNameForTest(dbDropdb), false) + expectManagedRoleExists(updatedCluster, rwRoleNameForTest(dbDropdb), false) current := &enterprisev4.PostgresDatabase{} err = k8sClient.Get(ctx, requestName, current) diff --git a/pkg/postgresql/cluster/core/cluster.go b/pkg/postgresql/cluster/core/cluster.go index 3334011c6..81a928595 100644 --- a/pkg/postgresql/cluster/core/cluster.go +++ b/pkg/postgresql/cluster/core/cluster.go @@ -24,6 +24,7 @@ import ( cnpgv1 "github.com/cloudnative-pg/cloudnative-pg/api/v1" password "github.com/sethvargo/go-password/password" enterprisev4 "github.com/splunk/splunk-operator/api/v4" + pgcConstants "github.com/splunk/splunk-operator/pkg/postgresql/cluster/core/types/constants" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/equality" apierrors "k8s.io/apimachinery/pkg/api/errors" @@ -285,6 +286,7 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. // Reconcile Connection Pooler. poolerEnabled = mergedConfig.Spec.ConnectionPoolerEnabled != nil && *mergedConfig.Spec.ConnectionPoolerEnabled + poolerConfigPresent := mergedConfig.CNPG != nil && mergedConfig.CNPG.ConnectionPooler != nil rwPoolerExists, err := poolerExists(ctx, c, postgresCluster, readWriteEndpoint) if err != nil { @@ -445,45 +447,475 @@ func PostgresClusterService(ctx context.Context, rc *ReconcileContext, req ctrl. } } - // Final status sync. - var oldPhase string - if postgresCluster.Status.Phase != nil { - oldPhase = *postgresCluster.Status.Phase + // Aggregate component readiness from iterative health checks. + state := pgcConstants.EmptyState + conditions := []clusterReadynessCheck{ + newProvisionerHealthCheck(postgresCluster, cnpgCluster), + newPoolerHealthCheck(c, postgresCluster, poolerEnabled, poolerConfigPresent), + newConfigMapHealthCheck(c, postgresCluster), + newSecretHealthCheck(c, postgresCluster), } - if err := syncStatus(ctx, c, postgresCluster, cnpgCluster); err != nil { - logger.Error(err, "Failed to sync status") - if apierrors.IsConflict(err) { - logger.Info("Conflict during status update, will requeue") - return ctrl.Result{Requeue: true}, nil + + for _, check := range conditions { + componentHealth, err := check.Condition(ctx) + if err != nil { + if statusErr := updateStatus(componentHealth.Condition, metav1.ConditionFalse, componentHealth.Reason, componentHealth.Message, componentHealth.Phase); statusErr != nil { + if apierrors.IsConflict(statusErr) { + return ctrl.Result{Requeue: true}, nil + } + return ctrl.Result{}, statusErr + } + logger.Error(err, "Component health check reported issues", + "component", check, + "requeueAfter", componentHealth.Result.RequeueAfter, + "reason", componentHealth.Reason) + return componentHealth.Result, err + } + + if isPendingState(componentHealth.State) { + if statusErr := updateStatus(componentHealth.Condition, metav1.ConditionFalse, componentHealth.Reason, componentHealth.Message, componentHealth.Phase); statusErr != nil { + if apierrors.IsConflict(statusErr) { + return ctrl.Result{Requeue: true}, nil + } + return ctrl.Result{}, statusErr + } + return componentHealth.Result, nil } - return ctrl.Result{}, fmt.Errorf("failed to sync status: %w", err) + state |= componentHealth.State } - var newPhase string - if postgresCluster.Status.Phase != nil { - newPhase = *postgresCluster.Status.Phase + + if state&pgcConstants.ComponentsReady == pgcConstants.ComponentsReady { + logger.Info("Reconciliation complete") + if err := updateStatus(clusterReady, metav1.ConditionTrue, reasonCNPGClusterHealthy, msgAllComponentsReady, readyClusterPhase); err != nil { + if apierrors.IsConflict(err) { + return ctrl.Result{Requeue: true}, nil + } + return ctrl.Result{}, err + } + return ctrl.Result{}, nil } - rc.emitClusterPhaseTransition(postgresCluster, oldPhase, newPhase) - if cnpgCluster.Status.Phase == cnpgv1.PhaseHealthy { - rwPooler := &cnpgv1.Pooler{} - rwErr := c.Get(ctx, types.NamespacedName{ - Name: poolerResourceName(postgresCluster.Name, readWriteEndpoint), - Namespace: postgresCluster.Namespace, - }, rwPooler) - roPooler := &cnpgv1.Pooler{} - roErr := c.Get(ctx, types.NamespacedName{ - Name: poolerResourceName(postgresCluster.Name, readOnlyEndpoint), - Namespace: postgresCluster.Namespace, - }, roPooler) - if rwErr == nil && roErr == nil && arePoolersReady(rwPooler, roPooler) { - logger.Info("Poolers ready, syncing status") - poolerOldConditions := make([]metav1.Condition, len(postgresCluster.Status.Conditions)) - copy(poolerOldConditions, postgresCluster.Status.Conditions) - _ = syncPoolerStatus(ctx, c, postgresCluster) - rc.emitPoolerReadyTransition(postgresCluster, poolerOldConditions) + return ctrl.Result{RequeueAfter: retryDelay}, nil +} + +// Free to place in specific dir/place along with the p&a work. +type StateInformationDto struct { + State pgcConstants.State + Condition conditionTypes + Reason conditionReasons + Message string + Phase reconcileClusterPhases + Result ctrl.Result +} + +// a unit of work in a way, extractable. +type clusterReadynessCheck interface { + Condition(ctx context.Context) (StateInformationDto, error) +} + +type provisionerHealthCheck struct { + cluster *enterprisev4.PostgresCluster + cnpgCluster *cnpgv1.Cluster +} + +func newProvisionerHealthCheck(cluster *enterprisev4.PostgresCluster, cnpgCluster *cnpgv1.Cluster) *provisionerHealthCheck { + return &provisionerHealthCheck{cluster: cluster, cnpgCluster: cnpgCluster} +} + +func (c *provisionerHealthCheck) Condition(_ context.Context) (StateInformationDto, error) { + if c.cnpgCluster != nil { + c.cluster.Status.ProvisionerRef = &corev1.ObjectReference{ + APIVersion: "postgresql.cnpg.io/v1", + Kind: "Cluster", + Namespace: c.cnpgCluster.Namespace, + Name: c.cnpgCluster.Name, + UID: c.cnpgCluster.UID, } } - logger.Info("Reconciliation complete") - return ctrl.Result{}, nil + + info := StateInformationDto{Condition: clusterReady} + + if c.cnpgCluster == nil { + info.State = pgcConstants.ProvisionerPending + info.Reason = reasonCNPGProvisioning + info.Message = msgCNPGPendingCreation + info.Phase = pendingClusterPhase + info.Result = ctrl.Result{RequeueAfter: retryDelay} + return info, nil + } + + switch c.cnpgCluster.Status.Phase { + case cnpgv1.PhaseHealthy: + info.State = pgcConstants.ProvisionerReady + info.Reason = reasonCNPGClusterHealthy + info.Message = msgProvisionerHealthy + info.Phase = readyClusterPhase + return info, nil + case cnpgv1.PhaseFirstPrimary, cnpgv1.PhaseCreatingReplica, cnpgv1.PhaseWaitingForInstancesToBeActive: + info.State = pgcConstants.ProvisionerProvisioning + info.Reason = reasonCNPGProvisioning + info.Message = fmt.Sprintf(msgFmtCNPGProvisioning, c.cnpgCluster.Status.Phase) + info.Phase = provisioningClusterPhase + info.Result = ctrl.Result{RequeueAfter: retryDelay} + return info, nil + case cnpgv1.PhaseSwitchover: + info.State = pgcConstants.ProvisionerConfiguring + info.Reason = reasonCNPGSwitchover + info.Message = msgCNPGSwitchover + info.Phase = configuringClusterPhase + info.Result = ctrl.Result{RequeueAfter: retryDelay} + return info, nil + case cnpgv1.PhaseFailOver: + info.State = pgcConstants.ProvisionerConfiguring + info.Reason = reasonCNPGFailingOver + info.Message = msgCNPGFailingOver + info.Phase = configuringClusterPhase + info.Result = ctrl.Result{RequeueAfter: retryDelay} + return info, nil + case cnpgv1.PhaseInplacePrimaryRestart, cnpgv1.PhaseInplaceDeletePrimaryRestart: + info.State = pgcConstants.ProvisionerConfiguring + info.Reason = reasonCNPGRestarting + info.Message = fmt.Sprintf(msgFmtCNPGRestarting, c.cnpgCluster.Status.Phase) + info.Phase = configuringClusterPhase + info.Result = ctrl.Result{RequeueAfter: retryDelay} + return info, nil + case cnpgv1.PhaseUpgrade, cnpgv1.PhaseMajorUpgrade, cnpgv1.PhaseUpgradeDelayed, cnpgv1.PhaseOnlineUpgrading: + info.State = pgcConstants.ProvisionerConfiguring + info.Reason = reasonCNPGUpgrading + info.Message = fmt.Sprintf(msgFmtCNPGUpgrading, c.cnpgCluster.Status.Phase) + info.Phase = configuringClusterPhase + info.Result = ctrl.Result{RequeueAfter: retryDelay} + return info, nil + case cnpgv1.PhaseApplyingConfiguration: + info.State = pgcConstants.ProvisionerConfiguring + info.Reason = reasonCNPGApplyingConfig + info.Message = msgCNPGApplyingConfiguration + info.Phase = configuringClusterPhase + info.Result = ctrl.Result{RequeueAfter: retryDelay} + return info, nil + case cnpgv1.PhaseReplicaClusterPromotion: + info.State = pgcConstants.ProvisionerConfiguring + info.Reason = reasonCNPGPromoting + info.Message = msgCNPGPromoting + info.Phase = configuringClusterPhase + info.Result = ctrl.Result{RequeueAfter: retryDelay} + return info, nil + case cnpgv1.PhaseWaitingForUser: + info.State = pgcConstants.ProvisionerFailed + info.Reason = reasonCNPGWaitingForUser + info.Message = msgCNPGWaitingForUser + info.Phase = failedClusterPhase + return info, fmt.Errorf("provisioner requires user action") + case cnpgv1.PhaseUnrecoverable: + info.State = pgcConstants.ProvisionerFailed + info.Reason = reasonCNPGUnrecoverable + info.Message = msgCNPGUnrecoverable + info.Phase = failedClusterPhase + return info, fmt.Errorf("provisioner unrecoverable") + case cnpgv1.PhaseCannotCreateClusterObjects: + info.State = pgcConstants.ProvisionerFailed + info.Reason = reasonCNPGProvisioningFailed + info.Message = msgCNPGCannotCreateObjects + info.Phase = failedClusterPhase + return info, fmt.Errorf("provisioner cannot create cluster objects") + case cnpgv1.PhaseUnknownPlugin, cnpgv1.PhaseFailurePlugin: + info.State = pgcConstants.ProvisionerFailed + info.Reason = reasonCNPGPluginError + info.Message = fmt.Sprintf(msgFmtCNPGPluginError, c.cnpgCluster.Status.Phase) + info.Phase = failedClusterPhase + return info, fmt.Errorf("provisioner plugin error") + case cnpgv1.PhaseImageCatalogError, cnpgv1.PhaseArchitectureBinaryMissing: + info.State = pgcConstants.ProvisionerFailed + info.Reason = reasonCNPGImageError + info.Message = fmt.Sprintf(msgFmtCNPGImageError, c.cnpgCluster.Status.Phase) + info.Phase = failedClusterPhase + return info, fmt.Errorf("provisioner image error") + case "": + info.State = pgcConstants.ProvisionerPending + info.Reason = reasonCNPGProvisioning + info.Message = msgCNPGPendingCreation + info.Phase = pendingClusterPhase + info.Result = ctrl.Result{RequeueAfter: retryDelay} + return info, nil + default: + info.State = pgcConstants.ProvisionerProvisioning + info.Reason = reasonCNPGProvisioning + info.Message = fmt.Sprintf(msgFmtCNPGClusterPhase, c.cnpgCluster.Status.Phase) + info.Phase = provisioningClusterPhase + info.Result = ctrl.Result{RequeueAfter: retryDelay} + return info, nil + } +} + +type poolerHealthCheck struct { + client client.Client + cluster *enterprisev4.PostgresCluster + poolerEnabled bool + poolerConfigPresent bool +} + +func newPoolerHealthCheck(c client.Client, cluster *enterprisev4.PostgresCluster, poolerEnabled bool, poolerConfigPresent bool) *poolerHealthCheck { + return &poolerHealthCheck{ + client: c, + cluster: cluster, + poolerEnabled: poolerEnabled, + poolerConfigPresent: poolerConfigPresent, + } +} + +func (p *poolerHealthCheck) Condition(ctx context.Context) (StateInformationDto, error) { + if !p.poolerEnabled { + return StateInformationDto{ + State: pgcConstants.PoolerReady, + Condition: poolerReady, + Reason: reasonAllInstancesReady, + Message: msgPoolerDisabled, + Phase: readyClusterPhase, + }, nil + } + if !p.poolerConfigPresent { + return StateInformationDto{ + State: pgcConstants.PoolerFailed, + Condition: poolerReady, + Reason: reasonPoolerConfigMissing, + Message: msgPoolerConfigMissing, + Phase: failedClusterPhase, + }, fmt.Errorf("pooler config missing") + } + + // TODO: Port material. + rwExists, err := poolerExists(ctx, p.client, p.cluster, readWriteEndpoint) + if err != nil { + return StateInformationDto{ + State: pgcConstants.PoolerFailed, + Condition: poolerReady, + Reason: reasonPoolerReconciliationFailed, + Message: fmt.Sprintf("Failed to check RW pooler existence: %v", err), + Phase: failedClusterPhase, + }, err + } + roExists, err := poolerExists(ctx, p.client, p.cluster, readOnlyEndpoint) + if err != nil { + return StateInformationDto{ + State: pgcConstants.PoolerFailed, + Condition: poolerReady, + Reason: reasonPoolerReconciliationFailed, + Message: fmt.Sprintf("Failed to check RO pooler existence: %v", err), + Phase: failedClusterPhase, + }, err + } + if !rwExists || !roExists { + return StateInformationDto{ + State: pgcConstants.PoolerProvisioning, + Condition: poolerReady, + Reason: reasonPoolerCreating, + Message: msgPoolersProvisioning, + Phase: provisioningClusterPhase, + Result: ctrl.Result{RequeueAfter: retryDelay}, + }, nil + } + + rwPooler := &cnpgv1.Pooler{} + if err := p.client.Get(ctx, types.NamespacedName{ + Name: poolerResourceName(p.cluster.Name, readWriteEndpoint), + Namespace: p.cluster.Namespace, + }, rwPooler); err != nil { + return StateInformationDto{ + State: pgcConstants.PoolerPending, + Condition: poolerReady, + Reason: reasonPoolerCreating, + Message: msgWaitRWPoolerObject, + Phase: pendingClusterPhase, + Result: ctrl.Result{RequeueAfter: retryDelay}, + }, nil + } + roPooler := &cnpgv1.Pooler{} + if err := p.client.Get(ctx, types.NamespacedName{ + Name: poolerResourceName(p.cluster.Name, readOnlyEndpoint), + Namespace: p.cluster.Namespace, + }, roPooler); err != nil { + return StateInformationDto{ + State: pgcConstants.PoolerPending, + Condition: poolerReady, + Reason: reasonPoolerCreating, + Message: msgWaitROPoolerObject, + Phase: pendingClusterPhase, + Result: ctrl.Result{RequeueAfter: retryDelay}, + }, nil + } + if !arePoolersReady(rwPooler, roPooler) { + return StateInformationDto{ + State: pgcConstants.PoolerPending, + Condition: poolerReady, + Reason: reasonPoolerCreating, + Message: msgPoolersNotReady, + Phase: pendingClusterPhase, + Result: ctrl.Result{RequeueAfter: retryDelay}, + }, nil + } + + return StateInformationDto{ + State: pgcConstants.PoolerReady, + Condition: poolerReady, + Reason: reasonAllInstancesReady, + Message: msgPoolersReady, + Phase: readyClusterPhase, + }, nil +} + +type configMapHealthCheck struct { + client client.Client + cluster *enterprisev4.PostgresCluster +} + +func newConfigMapHealthCheck(c client.Client, cluster *enterprisev4.PostgresCluster) *configMapHealthCheck { + return &configMapHealthCheck{client: c, cluster: cluster} +} + +func (c *configMapHealthCheck) Condition(ctx context.Context) (StateInformationDto, error) { + if c.cluster.Status.Resources == nil || c.cluster.Status.Resources.ConfigMapRef == nil { + return StateInformationDto{ + State: pgcConstants.ConfigMapProvisioning, + Condition: clusterReady, + Reason: reasonConfigMapFailed, + Message: msgConfigMapRefNotPublished, + Phase: provisioningClusterPhase, + Result: ctrl.Result{RequeueAfter: retryDelay}, + }, nil + } + + cm := &corev1.ConfigMap{} + key := types.NamespacedName{Name: c.cluster.Status.Resources.ConfigMapRef.Name, Namespace: c.cluster.Namespace} + if err := c.client.Get(ctx, key, cm); err != nil { + if apierrors.IsNotFound(err) { + return StateInformationDto{ + State: pgcConstants.ConfigMapProvisioning, + Condition: clusterReady, + Reason: reasonConfigMapFailed, + Message: msgConfigMapNotFoundYet, + Phase: provisioningClusterPhase, + Result: ctrl.Result{RequeueAfter: retryDelay}, + }, nil + } + return StateInformationDto{ + State: pgcConstants.ConfigMapFailed, + Condition: clusterReady, + Reason: reasonConfigMapFailed, + Message: fmt.Sprintf("Failed to fetch ConfigMap: %v", err), + Phase: failedClusterPhase, + }, err + } + + requiredKeys := []string{ + configKeyClusterRWEndpoint, + configKeyClusterROEndpoint, + configKeyDefaultClusterPort, + configKeySuperUserSecretRef, + } + for _, requiredKey := range requiredKeys { + if _, ok := cm.Data[requiredKey]; !ok { + return StateInformationDto{ + State: pgcConstants.ConfigMapFailed, + Condition: clusterReady, + Reason: reasonConfigMapFailed, + Message: fmt.Sprintf(msgFmtConfigMapMissingRequiredKey, requiredKey), + Phase: failedClusterPhase, + }, fmt.Errorf("configmap missing key %s", requiredKey) + } + } + + return StateInformationDto{ + State: pgcConstants.ConfigMapReady, + Condition: clusterReady, + Reason: reasonClusterBuildSucceeded, + Message: msgAccessConfigMapReady, + Phase: readyClusterPhase, + }, nil +} + +type secretHealthCheck struct { + client client.Client + cluster *enterprisev4.PostgresCluster +} + +func newSecretHealthCheck(c client.Client, cluster *enterprisev4.PostgresCluster) *secretHealthCheck { + return &secretHealthCheck{client: c, cluster: cluster} +} + +func (s *secretHealthCheck) Condition(ctx context.Context) (StateInformationDto, error) { + if s.cluster.Status.Resources == nil || s.cluster.Status.Resources.SuperUserSecretRef == nil { + return StateInformationDto{ + State: pgcConstants.SecretProvisioning, + Condition: clusterReady, + Reason: reasonUserSecretFailed, + Message: msgSecretRefNotPublished, + Phase: provisioningClusterPhase, + Result: ctrl.Result{RequeueAfter: retryDelay}, + }, nil + } + + secret := &corev1.Secret{} + key := types.NamespacedName{Name: s.cluster.Status.Resources.SuperUserSecretRef.Name, Namespace: s.cluster.Namespace} + if err := s.client.Get(ctx, key, secret); err != nil { + if apierrors.IsNotFound(err) { + return StateInformationDto{ + State: pgcConstants.SecretProvisioning, + Condition: clusterReady, + Reason: reasonUserSecretFailed, + Message: msgSecretNotFoundYet, + Phase: provisioningClusterPhase, + Result: ctrl.Result{RequeueAfter: retryDelay}, + }, nil + } + return StateInformationDto{ + State: pgcConstants.SecretFailed, + Condition: clusterReady, + Reason: reasonUserSecretFailed, + Message: fmt.Sprintf("Failed to fetch superuser secret: %v", err), + Phase: failedClusterPhase, + }, err + } + + refKey := s.cluster.Status.Resources.SuperUserSecretRef.Key + if refKey == "" { + refKey = secretKeyPassword + } + if _, ok := secret.Data[refKey]; !ok { + return StateInformationDto{ + State: pgcConstants.SecretFailed, + Condition: clusterReady, + Reason: reasonSuperUserSecretFailed, + Message: fmt.Sprintf(msgFmtSecretMissingKey, refKey), + Phase: failedClusterPhase, + }, fmt.Errorf("secret missing key %s", refKey) + } + + return StateInformationDto{ + State: pgcConstants.SecretReady, + Condition: clusterReady, + Reason: reasonClusterBuildSucceeded, + Message: msgSuperuserSecretReady, + Phase: readyClusterPhase, + }, nil +} + +func isPendingState(state pgcConstants.State) bool { + switch state { + case pgcConstants.PoolerPending, + pgcConstants.PoolerProvisioning, + pgcConstants.PoolerConfiguring, + pgcConstants.ProvisionerPending, + pgcConstants.ProvisionerProvisioning, + pgcConstants.ProvisionerConfiguring, + pgcConstants.ConfigMapPending, + pgcConstants.ConfigMapProvisioning, + pgcConstants.ConfigMapConfiguring, + pgcConstants.SecretPending, + pgcConstants.SecretProvisioning, + pgcConstants.SecretConfiguring: + return true + default: + return false + } } // getMergedConfig overlays PostgresCluster spec on top of the class defaults. @@ -606,7 +1038,6 @@ func reconcileManagedRoles(ctx context.Context, c client.Client, cluster *enterp Name: role.Name, Ensure: cnpgv1.EnsureAbsent, } - // Exists bool replaces the old Ensure string enum ("present"/"absent"). if role.Exists { r.Ensure = cnpgv1.EnsurePresent r.Login = true @@ -782,63 +1213,6 @@ func syncPoolerStatus(ctx context.Context, c client.Client, cluster *enterprisev readyClusterPhase) } -// syncStatus maps CNPG Cluster state to PostgresCluster status. -func syncStatus(ctx context.Context, c client.Client, cluster *enterprisev4.PostgresCluster, cnpgCluster *cnpgv1.Cluster) error { - cluster.Status.ProvisionerRef = &corev1.ObjectReference{ - APIVersion: "postgresql.cnpg.io/v1", - Kind: "Cluster", - Namespace: cnpgCluster.Namespace, - Name: cnpgCluster.Name, - UID: cnpgCluster.UID, - } - - var phase reconcileClusterPhases - var condStatus metav1.ConditionStatus - var reason conditionReasons - var message string - - switch cnpgCluster.Status.Phase { - case cnpgv1.PhaseHealthy: - phase, condStatus, reason, message = readyClusterPhase, metav1.ConditionTrue, reasonCNPGClusterHealthy, "Cluster is up and running" - case cnpgv1.PhaseFirstPrimary, cnpgv1.PhaseCreatingReplica, cnpgv1.PhaseWaitingForInstancesToBeActive: - phase, condStatus, reason = provisioningClusterPhase, metav1.ConditionFalse, reasonCNPGProvisioning - message = fmt.Sprintf("CNPG cluster provisioning: %s", cnpgCluster.Status.Phase) - case cnpgv1.PhaseSwitchover: - phase, condStatus, reason, message = configuringClusterPhase, metav1.ConditionFalse, reasonCNPGSwitchover, "Cluster changing primary node" - case cnpgv1.PhaseFailOver: - phase, condStatus, reason, message = configuringClusterPhase, metav1.ConditionFalse, reasonCNPGFailingOver, "Pod missing, need to change primary" - case cnpgv1.PhaseInplacePrimaryRestart, cnpgv1.PhaseInplaceDeletePrimaryRestart: - phase, condStatus, reason = configuringClusterPhase, metav1.ConditionFalse, reasonCNPGRestarting - message = fmt.Sprintf("CNPG cluster restarting: %s", cnpgCluster.Status.Phase) - case cnpgv1.PhaseUpgrade, cnpgv1.PhaseMajorUpgrade, cnpgv1.PhaseUpgradeDelayed, cnpgv1.PhaseOnlineUpgrading: - phase, condStatus, reason = configuringClusterPhase, metav1.ConditionFalse, reasonCNPGUpgrading - message = fmt.Sprintf("CNPG cluster upgrading: %s", cnpgCluster.Status.Phase) - case cnpgv1.PhaseApplyingConfiguration: - phase, condStatus, reason, message = configuringClusterPhase, metav1.ConditionFalse, reasonCNPGApplyingConfig, "Configuration change is being applied" - case cnpgv1.PhaseReplicaClusterPromotion: - phase, condStatus, reason, message = configuringClusterPhase, metav1.ConditionFalse, reasonCNPGPromoting, "Replica is being promoted to primary" - case cnpgv1.PhaseWaitingForUser: - phase, condStatus, reason, message = failedClusterPhase, metav1.ConditionFalse, reasonCNPGWaitingForUser, "Action from the user is required" - case cnpgv1.PhaseUnrecoverable: - phase, condStatus, reason, message = failedClusterPhase, metav1.ConditionFalse, reasonCNPGUnrecoverable, "Cluster failed, needs manual intervention" - case cnpgv1.PhaseCannotCreateClusterObjects: - phase, condStatus, reason, message = failedClusterPhase, metav1.ConditionFalse, reasonCNPGProvisioningFailed, "Cluster resources cannot be created" - case cnpgv1.PhaseUnknownPlugin, cnpgv1.PhaseFailurePlugin: - phase, condStatus, reason = failedClusterPhase, metav1.ConditionFalse, reasonCNPGPluginError - message = fmt.Sprintf("CNPG plugin error: %s", cnpgCluster.Status.Phase) - case cnpgv1.PhaseImageCatalogError, cnpgv1.PhaseArchitectureBinaryMissing: - phase, condStatus, reason = failedClusterPhase, metav1.ConditionFalse, reasonCNPGImageError - message = fmt.Sprintf("CNPG image error: %s", cnpgCluster.Status.Phase) - case "": - phase, condStatus, reason, message = pendingClusterPhase, metav1.ConditionFalse, reasonCNPGProvisioning, "CNPG cluster is pending creation" - default: - phase, condStatus, reason = provisioningClusterPhase, metav1.ConditionFalse, reasonCNPGProvisioning - message = fmt.Sprintf("CNPG cluster phase: %s", cnpgCluster.Status.Phase) - } - - return setStatus(ctx, c, cluster, clusterReady, condStatus, reason, message, phase) -} - // setStatus sets the phase, condition and persists the status. // It skips the API write when the resulting status is identical to the current // state, avoiding unnecessary etcd churn and ResourceVersion bumps on stable clusters. diff --git a/pkg/postgresql/cluster/core/cluster_unit_test.go b/pkg/postgresql/cluster/core/cluster_unit_test.go index e2466f54b..57ff04daa 100644 --- a/pkg/postgresql/cluster/core/cluster_unit_test.go +++ b/pkg/postgresql/cluster/core/cluster_unit_test.go @@ -6,6 +6,7 @@ import ( cnpgv1 "github.com/cloudnative-pg/cloudnative-pg/api/v1" enterprisev4 "github.com/splunk/splunk-operator/api/v4" + pgcConstants "github.com/splunk/splunk-operator/pkg/postgresql/cluster/core/types/constants" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" corev1 "k8s.io/api/core/v1" @@ -13,11 +14,23 @@ import ( "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/utils/ptr" client "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" ) +type configMapNotFoundClient struct { + client.Client +} + +func (c configMapNotFoundClient) Get(ctx context.Context, key client.ObjectKey, obj client.Object, opts ...client.GetOption) error { + if _, ok := obj.(*corev1.ConfigMap); ok { + return apierrors.NewNotFound(schema.GroupResource{Resource: "configmaps"}, key.Name) + } + return c.Client.Get(ctx, key, obj, opts...) +} + func TestPoolerResourceName(t *testing.T) { tests := []struct { name string @@ -1136,3 +1149,125 @@ func TestCreateOrUpdateConnectionPoolers(t *testing.T) { assert.Equal(t, int32(1), *ro.Spec.Instances) }) } + +func TestComponentStateTriggerConditions(t *testing.T) { + t.Parallel() + + ctx := t.Context() + scheme := runtime.NewScheme() + require.NoError(t, corev1.AddToScheme(scheme)) + + exampleCm := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pg1-config", + Namespace: "default", + }, + Data: map[string]string{ + "CLUSTER_RW_ENDPOINT": "pg1-rw.default", + "CLUSTER_RO_ENDPOINT": "pg1-ro.default", + "DEFAULT_CLUSTER_PORT": "5432", + "SUPER_USER_SECRET_REF": "pg1-secret", + }, + } + examplePgCluster := &enterprisev4.PostgresCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pg1", + Namespace: "default", + }, + Status: enterprisev4.PostgresClusterStatus{ + Resources: &enterprisev4.PostgresClusterResources{ + ConfigMapRef: &corev1.LocalObjectReference{Name: "pg1-config"}, + SuperUserSecretRef: &corev1.SecretKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{Name: "pg1-secret"}, + Key: "password", + }, + }, + }, + } + exampleSecret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pg1-secret", + Namespace: "default", + }, + Data: map[string][]byte{ + "password": []byte("s3cr3t"), + }, + } + + // TODO: as soon as coupling is addressed, remove this monster of a test. + combinations := []struct { + name string + componentChecks []clusterReadynessCheck + requeue []bool + expectedResult bool + message string + }{ + { + name: "Provisioner ready, pooler pending, sync not successful", + componentChecks: []clusterReadynessCheck{ + newProvisionerHealthCheck(examplePgCluster.DeepCopy(), &cnpgv1.Cluster{Status: cnpgv1.ClusterStatus{Phase: cnpgv1.PhaseHealthy}}), + newPoolerHealthCheck(nil, nil, true, false), + }, + requeue: []bool{false, false}, + expectedResult: false, + message: "Provisioner is ready but pooler is pending, don't fire", + }, + { + name: "Provisioner ready, pooler ready, configMap failed, sync not successful", + componentChecks: []clusterReadynessCheck{ + newProvisionerHealthCheck(examplePgCluster, &cnpgv1.Cluster{Status: cnpgv1.ClusterStatus{Phase: cnpgv1.PhaseHealthy}}), + newPoolerHealthCheck(nil, nil, false, false), + newConfigMapHealthCheck( + configMapNotFoundClient{ + Client: fake.NewClientBuilder(). + WithScheme(scheme). + Build(), + }, + examplePgCluster.DeepCopy(), + ), + }, + requeue: []bool{false, false, true}, + expectedResult: false, + message: "Provisioner and pooler ready are not enough when ConfigMap check returns NotFound/pending", + }, + { + name: "Sync successful, all components ready.", + componentChecks: []clusterReadynessCheck{ + newProvisionerHealthCheck(examplePgCluster, &cnpgv1.Cluster{Status: cnpgv1.ClusterStatus{Phase: cnpgv1.PhaseHealthy}}), + newPoolerHealthCheck(nil, nil, false, false), + newConfigMapHealthCheck( + fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(exampleCm). + Build(), + examplePgCluster.DeepCopy(), + ), + newSecretHealthCheck( + fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(exampleSecret). + Build(), + examplePgCluster.DeepCopy(), + ), + }, + requeue: []bool{false, false, false, false}, + expectedResult: true, + message: "", + }, + } + + for _, tt := range combinations { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + state := pgcConstants.EmptyState + for i, check := range tt.componentChecks { + info, _ := check.Condition(ctx) + state |= info.State + assert.Equal(t, tt.requeue[i], info.Result.RequeueAfter > 0) + } + assert.Equal(t, tt.expectedResult, state&pgcConstants.ComponentsReady == pgcConstants.ComponentsReady, + tt.message) + }) + } +} diff --git a/pkg/postgresql/cluster/core/types.go b/pkg/postgresql/cluster/core/types.go index 042a5ae82..99ea5bcb1 100644 --- a/pkg/postgresql/cluster/core/types.go +++ b/pkg/postgresql/cluster/core/types.go @@ -42,6 +42,7 @@ type MergedConfig struct { type reconcileClusterPhases string type conditionTypes string type conditionReasons string +type statusMessage = string type objectKind string const ( @@ -50,9 +51,17 @@ const ( readOnlyEndpoint string = "ro" readWriteEndpoint string = "rw" - defaultDatabaseName string = "postgres" - superUsername string = "postgres" - defaultPort string = "5432" + defaultDatabaseName string = "postgres" + superUsername string = "postgres" + defaultPort string = "5432" + configKeyClusterRWEndpoint string = "CLUSTER_RW_ENDPOINT" + configKeyClusterROEndpoint string = "CLUSTER_RO_ENDPOINT" + configKeyClusterREndpoint string = "CLUSTER_R_ENDPOINT" + configKeyDefaultClusterPort string = "DEFAULT_CLUSTER_PORT" + configKeySuperUserName string = "SUPER_USER_NAME" + configKeySuperUserSecretRef string = "SUPER_USER_SECRET_REF" + configKeyPoolerRWEndpoint string = "CLUSTER_POOLER_RW_ENDPOINT" + configKeyPoolerROEndpoint string = "CLUSTER_POOLER_RO_ENDPOINT" secretKeyPassword string = "password" defaultSecretSuffix string = "-secret" @@ -111,4 +120,39 @@ const ( reasonCNPGProvisioningFailed conditionReasons = "CNPGProvisioningFailed" reasonCNPGPluginError conditionReasons = "CNPGPluginError" reasonCNPGImageError conditionReasons = "CNPGImageError" + + // status messages — provisioner health check + msgProvisionerHealthy statusMessage = "Provisioner cluster is healthy" + msgCNPGPendingCreation statusMessage = "CNPG cluster is pending creation" + msgFmtCNPGProvisioning statusMessage = "CNPG cluster provisioning: %s" + msgCNPGSwitchover statusMessage = "Cluster changing primary node" + msgCNPGFailingOver statusMessage = "Pod missing, need to change primary" + msgFmtCNPGRestarting statusMessage = "CNPG cluster restarting: %s" + msgFmtCNPGUpgrading statusMessage = "CNPG cluster upgrading: %s" + msgCNPGApplyingConfiguration statusMessage = "Configuration change is being applied" + msgCNPGPromoting statusMessage = "Replica is being promoted to primary" + msgCNPGWaitingForUser statusMessage = "Action from the user is required" + msgCNPGUnrecoverable statusMessage = "Cluster failed, needs manual intervention" + msgCNPGCannotCreateObjects statusMessage = "Cluster resources cannot be created" + msgFmtCNPGPluginError statusMessage = "CNPG plugin error: %s" + msgFmtCNPGImageError statusMessage = "CNPG image error: %s" + msgFmtCNPGClusterPhase statusMessage = "CNPG cluster phase: %s" + + // status messages — aggregate and component readiness checks + msgAllComponentsReady statusMessage = "All components are ready" + msgPoolerDisabled statusMessage = "Connection pooler disabled" + msgPoolerConfigMissing statusMessage = "Connection pooler enabled but configuration is missing" + msgPoolersProvisioning statusMessage = "Connection poolers are being provisioned" + msgWaitRWPoolerObject statusMessage = "Waiting for RW pooler object" + msgWaitROPoolerObject statusMessage = "Waiting for RO pooler object" + msgPoolersNotReady statusMessage = "Connection poolers are not ready yet" + msgPoolersReady statusMessage = "Connection poolers are ready" + msgConfigMapRefNotPublished statusMessage = "ConfigMap reference not published yet" + msgConfigMapNotFoundYet statusMessage = "ConfigMap not found yet" + msgFmtConfigMapMissingRequiredKey statusMessage = "ConfigMap missing required key %q" + msgAccessConfigMapReady statusMessage = "Access ConfigMap is ready" + msgSecretRefNotPublished statusMessage = "Superuser secret reference not published yet" + msgSecretNotFoundYet statusMessage = "Superuser secret not found yet" + msgFmtSecretMissingKey statusMessage = "Superuser secret missing key %q" + msgSuperuserSecretReady statusMessage = "Superuser secret is ready" ) diff --git a/pkg/postgresql/cluster/core/types/constants/state.go b/pkg/postgresql/cluster/core/types/constants/state.go new file mode 100644 index 000000000..bf19698c6 --- /dev/null +++ b/pkg/postgresql/cluster/core/types/constants/state.go @@ -0,0 +1,41 @@ +package pgcConstants + +type State uint64 + +const ( + EmptyState State = 0 + PoolerReady State = 1 << iota + PoolerPending + PoolerProvisioning + PoolerConfiguring + PoolerFailed + + ProvisionerReady + ProvisionerPending + ProvisionerProvisioning + ProvisionerConfiguring + ProvisionerFailed + + ConfigMapReady + ConfigMapPending + ConfigMapProvisioning + ConfigMapConfiguring + ConfigMapFailed + + SecretReady + SecretPending + SecretProvisioning + SecretConfiguring + SecretFailed + + ClusterReady + ClusterPending + ClusterProvisioning + ClusterConfiguring + ClusterFailed +) + +const ( + ComponentsReady = PoolerReady | ProvisionerReady | SecretReady | ConfigMapReady + OwnershipReady +) diff --git a/pkg/postgresql/database/core/database.go b/pkg/postgresql/database/core/database.go index f84a35fd9..3a88bac80 100644 --- a/pkg/postgresql/database/core/database.go +++ b/pkg/postgresql/database/core/database.go @@ -172,31 +172,29 @@ func PostgresDatabaseService( } // Phase: RoleProvisioning - desiredUsers := getDesiredUsers(postgresDB) - actualRoles := getUsersInClusterSpec(cluster) - var missing []string - for _, role := range desiredUsers { - if !slices.Contains(actualRoles, role) { - missing = append(missing, role) - } - } - - if len(missing) > 0 { - logger.Info("CNPG Cluster patch started, missing roles detected", "missing", missing) - if err := patchManagedRoles(ctx, c, postgresDB, cluster); err != nil { + fieldManager := fieldManagerName(postgresDB.Name) + desired := buildDesiredRoles(postgresDB.Name, postgresDB.Spec.Databases) + rolesToAdd := findAddedRoleNames(cluster, desired) + rolesToRemove := absentRolesByName(findRemovedRoleNames(cluster, fieldManager, desired)) + allRoles := append(desired, rolesToRemove...) + + if len(rolesToAdd) > 0 || len(rolesToRemove) > 0 { + logger.Info("CNPG Cluster patch started, role drift detected", "toAdd", len(rolesToAdd), "toRemove", len(rolesToRemove)) + if err := patchManagedRoles(ctx, c, fieldManager, cluster, allRoles); err != nil { logger.Error(err, "Failed to patch users in CNPG Cluster") rc.emitWarning(postgresDB, EventManagedRolesPatchFailed, fmt.Sprintf("Failed to patch managed roles: %v", err)) return ctrl.Result{}, err } - rc.emitNormal(postgresDB, EventRoleReconciliationStarted, fmt.Sprintf("Patched managed roles, waiting for %d roles to reconcile", len(desiredUsers))) + rc.emitNormal(postgresDB, EventRoleReconciliationStarted, fmt.Sprintf("Patched managed roles: %d to add, %d to remove", len(rolesToAdd), len(rolesToRemove))) if err := updateStatus(rolesReady, metav1.ConditionFalse, reasonWaitingForCNPG, - fmt.Sprintf("Waiting for %d roles to be reconciled", len(desiredUsers)), provisioningDBPhase); err != nil { + fmt.Sprintf("Waiting for roles to be reconciled: %d to add, %d to remove", len(rolesToAdd), len(rolesToRemove)), provisioningDBPhase); err != nil { return ctrl.Result{}, err } return ctrl.Result{RequeueAfter: retryDelay}, nil } - notReadyRoles, err := verifyRolesReady(ctx, desiredUsers, cnpgCluster) + roleNames := getDesiredUsers(postgresDB) + notReadyRoles, err := verifyRolesReady(ctx, roleNames, cnpgCluster) if err != nil { rc.emitWarning(postgresDB, EventRoleFailed, fmt.Sprintf("Role reconciliation failed: %v", err)) if statusErr := updateStatus(rolesReady, metav1.ConditionFalse, reasonUsersCreationFailed, @@ -212,9 +210,9 @@ func PostgresDatabaseService( } return ctrl.Result{RequeueAfter: retryDelay}, nil } - rc.emitOnConditionTransition(postgresDB, postgresDB.Status.Conditions, rolesReady, EventRolesReady, fmt.Sprintf("All %d roles reconciled", len(desiredUsers))) + rc.emitOnConditionTransition(postgresDB, postgresDB.Status.Conditions, rolesReady, EventRolesReady, fmt.Sprintf("Roles reconciled: %d active, %d removed", len(rolesToAdd), len(rolesToRemove))) if err := updateStatus(rolesReady, metav1.ConditionTrue, reasonUsersAvailable, - fmt.Sprintf("All %d users in PostgreSQL", len(desiredUsers)), provisioningDBPhase); err != nil { + fmt.Sprintf("Roles reconciled: %d active, %d removed", len(rolesToAdd), len(rolesToRemove)), provisioningDBPhase); err != nil { return ctrl.Result{}, err } @@ -366,6 +364,9 @@ func getUsersInClusterSpec(cluster *enterprisev4.PostgresCluster) []string { return users } +// rolesMatchClusterSpec returns true if desired and actual contain the same roles +// (by name and Exists state), regardless of order. + func getRoleConflicts(postgresDB *enterprisev4.PostgresDatabase, cluster *enterprisev4.PostgresCluster) []string { myManager := fieldManagerName(postgresDB.Name) desired := make(map[string]struct{}, len(postgresDB.Spec.Databases)*2) @@ -413,18 +414,16 @@ func parseRoleNames(raw []byte) []string { return names } -func patchManagedRoles(ctx context.Context, c client.Client, postgresDB *enterprisev4.PostgresDatabase, cluster *enterprisev4.PostgresCluster) error { +func patchManagedRoles(ctx context.Context, c client.Client, fieldManager string, cluster *enterprisev4.PostgresCluster, roles []enterprisev4.ManagedRole) error { logger := log.FromContext(ctx) - allRoles := buildManagedRoles(postgresDB.Name, postgresDB.Spec.Databases) - rolePatch, err := buildManagedRolesPatch(cluster, allRoles, c.Scheme()) + rolePatch, err := buildManagedRolesPatch(cluster, roles, c.Scheme()) if err != nil { - return fmt.Errorf("building managed roles patch for PostgresDatabase %s: %w", postgresDB.Name, err) + return fmt.Errorf("building managed roles patch: %w", err) } - fieldManager := fieldManagerName(postgresDB.Name) if err := c.Patch(ctx, rolePatch, client.Apply, client.FieldOwner(fieldManager)); err != nil { - return fmt.Errorf("patching managed roles for PostgresDatabase %s: %w", postgresDB.Name, err) + return fmt.Errorf("patching managed roles: %w", err) } - logger.Info("Users added to PostgresCluster via SSA", "roleCount", len(allRoles)) + logger.Info("Managed roles patched", "count", len(roles)) return nil } @@ -580,7 +579,15 @@ func cleanupManagedRoles(ctx context.Context, c client.Client, postgresDB *enter logger.Info("PostgresCluster already deleted, skipping role cleanup") return nil } - return patchManagedRolesOnDeletion(ctx, c, postgresDB, cluster, plan.retained) + fieldManager := fieldManagerName(postgresDB.Name) + retainedRoles := buildDesiredRoles(postgresDB.Name, plan.retained) + rolesToRemove := buildRolesToRemove(plan.deleted) + allRoles := append(retainedRoles, rolesToRemove...) + if err := patchManagedRoles(ctx, c, fieldManager, cluster, allRoles); err != nil { + return err + } + logger.Info("Managed roles patched on deletion", "retained", len(retainedRoles), "removed", len(rolesToRemove)) + return nil } func orphanCNPGDatabases(ctx context.Context, c client.Client, postgresDB *enterprisev4.PostgresDatabase, databases []enterprisev4.DatabaseDefinition) error { @@ -716,7 +723,67 @@ func deleteSecrets(ctx context.Context, c client.Client, postgresDB *enterprisev return nil } -func buildManagedRoles(postgresDBName string, databases []enterprisev4.DatabaseDefinition) []enterprisev4.ManagedRole { +// buildRolesToRemove produces Exists:false entries for the given databases so CNPG drops their roles. +func buildRolesToRemove(databases []enterprisev4.DatabaseDefinition) []enterprisev4.ManagedRole { + roles := make([]enterprisev4.ManagedRole, 0, len(databases)*2) + for _, dbSpec := range databases { + roles = append(roles, + enterprisev4.ManagedRole{Name: adminRoleName(dbSpec.Name), Exists: false}, + enterprisev4.ManagedRole{Name: rwRoleName(dbSpec.Name), Exists: false}, + ) + } + return roles +} + +// absentRolesByName produces Exists:false entries from a list of raw role names. +// Used by the normal reconcile path where names come from SSA field manager parsing. +func absentRolesByName(names []string) []enterprisev4.ManagedRole { + roles := make([]enterprisev4.ManagedRole, 0, len(names)) + for _, name := range names { + roles = append(roles, enterprisev4.ManagedRole{Name: name, Exists: false}) + } + return roles +} + +// findAddedRoleNames returns role names from the desired list that are missing +// from the cluster spec or currently marked absent. +func findAddedRoleNames(cluster *enterprisev4.PostgresCluster, desired []enterprisev4.ManagedRole) []string { + current := make(map[string]bool, len(cluster.Spec.ManagedRoles)) + for _, r := range cluster.Spec.ManagedRoles { + current[r.Name] = r.Exists + } + var toAdd []string + for _, r := range desired { + exists, found := current[r.Name] + if !found || !exists { + toAdd = append(toAdd, r.Name) + } + } + return toAdd +} + +// findRemovedRoleNames returns role names currently owned by this field manager +// in the cluster spec that are absent from the desired list. +func findRemovedRoleNames(cluster *enterprisev4.PostgresCluster, manager string, desired []enterprisev4.ManagedRole) []string { + desiredSet := make(map[string]struct{}, len(desired)) + for _, r := range desired { + desiredSet[r.Name] = struct{}{} + } + owners := managedRoleOwners(cluster.ManagedFields) + var toRemove []string + for name, owner := range owners { + if owner == manager { + if _, ok := desiredSet[name]; !ok { + toRemove = append(toRemove, name) + } + } + } + return toRemove +} + +// buildDesiredRoles builds the full set of roles that should be present for the given databases. +// This is the input to findAddedRoleNames and findRemovedRoleNames. +func buildDesiredRoles(postgresDBName string, databases []enterprisev4.DatabaseDefinition) []enterprisev4.ManagedRole { roles := make([]enterprisev4.ManagedRole, 0, len(databases)*2) for _, dbSpec := range databases { roles = append(roles, @@ -752,20 +819,6 @@ func buildManagedRolesPatch(cluster *enterprisev4.PostgresCluster, roles []enter }, nil } -func patchManagedRolesOnDeletion(ctx context.Context, c client.Client, postgresDB *enterprisev4.PostgresDatabase, cluster *enterprisev4.PostgresCluster, retained []enterprisev4.DatabaseDefinition) error { - logger := log.FromContext(ctx) - roles := buildManagedRoles(postgresDB.Name, retained) - rolePatch, err := buildManagedRolesPatch(cluster, roles, c.Scheme()) - if err != nil { - return fmt.Errorf("building managed roles patch: %w", err) - } - if err := c.Patch(ctx, rolePatch, client.Apply, client.FieldOwner(fieldManagerName(postgresDB.Name))); err != nil { - return fmt.Errorf("patching managed roles on deletion: %w", err) - } - logger.Info("Managed roles patched on deletion", "retainedRoles", len(roles)) - return nil -} - func stripOwnerReference(obj metav1.Object, ownerUID types.UID) { refs := obj.GetOwnerReferences() filtered := make([]metav1.OwnerReference, 0, len(refs)) diff --git a/pkg/postgresql/database/core/database_unit_test.go b/pkg/postgresql/database/core/database_unit_test.go index 8d4da6c52..0e8bee12b 100644 --- a/pkg/postgresql/database/core/database_unit_test.go +++ b/pkg/postgresql/database/core/database_unit_test.go @@ -306,7 +306,7 @@ func TestVerifyRolesReady(t *testing.T) { }, }, }, - wantErr: "user main_db_rw reconciliation failed: [reserved role]", + wantErr: "reconciling user main_db_rw: [reserved role]", }, { name: "returns missing roles that are not reconciled yet", @@ -1283,7 +1283,7 @@ func TestBuildManagedRoles(t *testing.T) { }, } - got := buildManagedRoles("primary", databases) + got := buildDesiredRoles("primary", databases) assert.Equal(t, want, got) } @@ -1300,7 +1300,7 @@ func TestBuildManagedRolesPatch(t *testing.T) { Namespace: "dbs", }, } - roles := buildManagedRoles("primary", []enterprisev4.DatabaseDefinition{{Name: "payments"}}) + roles := buildDesiredRoles("primary", []enterprisev4.DatabaseDefinition{{Name: "payments"}}) c := testClient(t, scheme, cluster) got, err := buildManagedRolesPatch(cluster, roles, c.Scheme()) @@ -1312,37 +1312,152 @@ func TestBuildManagedRolesPatch(t *testing.T) { assert.Equal(t, map[string]any{"managedRoles": roles}, got.Object["spec"]) } -func TestPatchManagedRolesOnDeletion(t *testing.T) { - scheme := testScheme(t) - postgresDB := &enterprisev4.PostgresDatabase{ - ObjectMeta: metav1.ObjectMeta{ - Name: "primary", - Namespace: "dbs", +func TestFindAddedRoleNames(t *testing.T) { + desired := buildDesiredRoles("primary", []enterprisev4.DatabaseDefinition{{Name: "payments"}, {Name: "api"}}) + + tests := []struct { + name string + current []enterprisev4.ManagedRole + want []string + }{ + { + name: "all missing from cluster", + current: nil, + want: []string{"payments_admin", "payments_rw", "api_admin", "api_rw"}, + }, + { + name: "some already present", + current: []enterprisev4.ManagedRole{ + {Name: "payments_admin", Exists: true}, + {Name: "payments_rw", Exists: true}, + }, + want: []string{"api_admin", "api_rw"}, + }, + { + name: "role present but marked absent — should be re-added", + current: []enterprisev4.ManagedRole{ + {Name: "payments_admin", Exists: false}, + {Name: "payments_rw", Exists: true}, + }, + want: []string{"payments_admin", "api_admin", "api_rw"}, + }, + { + name: "all already present", + current: []enterprisev4.ManagedRole{ + {Name: "payments_admin", Exists: true}, + {Name: "payments_rw", Exists: true}, + {Name: "api_admin", Exists: true}, + {Name: "api_rw", Exists: true}, + }, + want: nil, }, } - cluster := &enterprisev4.PostgresCluster{ - TypeMeta: metav1.TypeMeta{ - APIVersion: enterprisev4.GroupVersion.String(), - Kind: "PostgresCluster", + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + cluster := &enterprisev4.PostgresCluster{ + Spec: enterprisev4.PostgresClusterSpec{ManagedRoles: tc.current}, + } + got := findAddedRoleNames(cluster, desired) + assert.ElementsMatch(t, tc.want, got) + }) + } +} + +type roleFieldOwner struct { + manager string + roles []string +} + +func TestFindRemovedRoleNames(t *testing.T) { + manager := "splunk-operator-primary" + desired := buildDesiredRoles("primary", []enterprisev4.DatabaseDefinition{{Name: "payments"}}) + + tests := []struct { + name string + fieldOwners []roleFieldOwner + want []string + }{ + { + name: "no roles owned by any manager", + fieldOwners: nil, + want: nil, }, - ObjectMeta: metav1.ObjectMeta{ - Name: "primary", - Namespace: "dbs", + { + name: "owned roles still in desired — nothing to remove", + fieldOwners: []roleFieldOwner{{manager: manager, roles: []string{"payments_admin", "payments_rw"}}}, + want: nil, + }, + { + name: "owned role no longer in desired — should be removed", + fieldOwners: []roleFieldOwner{{manager: manager, roles: []string{"payments_admin", "payments_rw", "api_admin", "api_rw"}}}, + want: []string{"api_admin", "api_rw"}, + }, + { + name: "role owned by different manager — ignored", + fieldOwners: []roleFieldOwner{{manager: "other-manager", roles: []string{"api_admin", "api_rw"}}}, + want: nil, }, } - retained := []enterprisev4.DatabaseDefinition{{Name: "payments"}} - want := buildManagedRoles(postgresDB.Name, retained) - c := testClient(t, scheme, cluster) - err := patchManagedRolesOnDeletion(context.Background(), c, postgresDB, cluster, retained) + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + var managedFields []metav1.ManagedFieldsEntry + for _, fo := range tc.fieldOwners { + keys := make([]string, len(fo.roles)) + for i, r := range fo.roles { + keys[i] = `k:{"name":"` + r + `"}` + } + managedFields = append(managedFields, metav1.ManagedFieldsEntry{ + Manager: fo.manager, + FieldsV1: &metav1.FieldsV1{Raw: managedRolesFieldsRaw(t, keys...)}, + Operation: metav1.ManagedFieldsOperationApply, + }) + } + cluster := &enterprisev4.PostgresCluster{ + ObjectMeta: metav1.ObjectMeta{ManagedFields: managedFields}, + } + got := findRemovedRoleNames(cluster, manager, desired) + assert.ElementsMatch(t, tc.want, got) + }) + } +} + - require.NoError(t, err) +func TestBuildRolesToRemove(t *testing.T) { + tests := []struct { + name string + deleted []enterprisev4.DatabaseDefinition + want []enterprisev4.ManagedRole + }{ + { + name: "nothing to remove", + deleted: nil, + want: []enterprisev4.ManagedRole{}, + }, + { + name: "single database removed", + deleted: []enterprisev4.DatabaseDefinition{{Name: "api"}}, + want: []enterprisev4.ManagedRole{{Name: "api_admin", Exists: false}, {Name: "api_rw", Exists: false}}, + }, + { + name: "multiple databases removed", + deleted: []enterprisev4.DatabaseDefinition{{Name: "api"}, {Name: "payments"}}, + want: []enterprisev4.ManagedRole{ + {Name: "api_admin", Exists: false}, {Name: "api_rw", Exists: false}, + {Name: "payments_admin", Exists: false}, {Name: "payments_rw", Exists: false}, + }, + }, + } - got := &enterprisev4.PostgresCluster{} - require.NoError(t, c.Get(context.Background(), types.NamespacedName{Name: cluster.Name, Namespace: cluster.Namespace}, got)) - assert.Equal(t, want, got.Spec.ManagedRoles) + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + assert.Equal(t, tc.want, buildRolesToRemove(tc.deleted)) + }) + } } + func TestStripOwnerReference(t *testing.T) { obj := &corev1.ConfigMap{ ObjectMeta: metav1.ObjectMeta{ diff --git a/pkg/postgresql/database/core/types.go b/pkg/postgresql/database/core/types.go index bf07fd19f..fb57dee91 100644 --- a/pkg/postgresql/database/core/types.go +++ b/pkg/postgresql/database/core/types.go @@ -31,7 +31,8 @@ const ( readWriteEndpoint string = "rw" deletionPolicyRetain string = "Retain" - + deletionPolicyDelete string = "Delete" + postgresDatabaseFinalizerName string = "postgresdatabases.enterprise.splunk.com/finalizer" annotationRetainedFrom string = "enterprise.splunk.com/retained-from"