From ddc3b68429c950fd9427a23264a757c4519ba168 Mon Sep 17 00:00:00 2001 From: Reynier Ortiz Vega Date: Wed, 15 Apr 2026 13:37:23 -0400 Subject: [PATCH 1/5] Add policy_stopped workload status and enforce policy gate on restart MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduces WorkloadStatusPolicyStopped as a first-class workload status to support enterprise policy enforcement. When non-registry servers are blocked by policy, workloads can be set to this status so the UI and CLI can surface the reason clearly, rather than showing an opaque "error". - Add WorkloadStatusPolicyStopped constant to pkg/container/runtime/types.go - Update enums tag in pkg/core/workload.go to include policy_stopped - Map policy_stopped to BackendUnhealthy in mapWorkloadStatusToVMCPHealth - Call EagerCheckCreateServer in restartSingleWorkload so the policy gate blocks restart of workloads that violate the active policy - Add 🚫 indicator for policy_stopped in CLI list and status output, following the existing ⚠️ pattern for unauthenticated workloads - Regenerate swagger docs to include the new status enum value Part of stacklok/stacklok-enterprise-platform#406 Co-Authored-By: Claude Sonnet 4.6 --- cmd/thv/app/list.go | 7 +++++-- cmd/thv/app/status.go | 5 ++++- docs/server/docs.go | 5 ++++- docs/server/swagger.json | 5 ++++- docs/server/swagger.yaml | 3 +++ pkg/container/runtime/types.go | 4 ++++ pkg/core/workload.go | 2 +- pkg/workloads/manager.go | 8 ++++++++ 8 files changed, 33 insertions(+), 6 deletions(-) diff --git a/cmd/thv/app/list.go b/cmd/thv/app/list.go index 759063e2ba..f6cb35f5a0 100644 --- a/cmd/thv/app/list.go +++ b/cmd/thv/app/list.go @@ -165,10 +165,13 @@ func printTextOutput(workloadList []core.Workload) { // Print workload information for _, c := range workloadList { - // Highlight unauthenticated workloads with a warning indicator + // Highlight unauthenticated and policy-stopped workloads with indicators status := string(c.Status) - if c.Status == rt.WorkloadStatusUnauthenticated { + switch c.Status { + case rt.WorkloadStatusUnauthenticated: status = "⚠️ " + status + case rt.WorkloadStatusPolicyStopped: + status = "🚫 " + status } // Print workload information diff --git a/cmd/thv/app/status.go b/cmd/thv/app/status.go index d10465f531..1f2b1b5b9d 100644 --- a/cmd/thv/app/status.go +++ b/cmd/thv/app/status.go @@ -101,8 +101,11 @@ func printStatusJSONOutput(workload core.Workload) error { func printStatusTextOutput(workload core.Workload) { w := tabwriter.NewWriter(os.Stdout, 0, 0, 3, ' ', 0) status := string(workload.Status) - if workload.Status == runtime.WorkloadStatusUnauthenticated { + switch workload.Status { + case runtime.WorkloadStatusUnauthenticated: status = "⚠️ " + status + case runtime.WorkloadStatusPolicyStopped: + status = "🚫 " + status } // Print workload information in key-value format diff --git a/docs/server/docs.go b/docs/server/docs.go index 5ea52bbaec..bbe57cce92 100644 --- a/docs/server/docs.go +++ b/docs/server/docs.go @@ -913,6 +913,7 @@ const docTemplate = `{ "removing", "unknown", "unauthenticated", + "policy_stopped", "running", "stopped", "error", @@ -922,6 +923,7 @@ const docTemplate = `{ "removing", "unknown", "unauthenticated", + "policy_stopped", "running", "stopped", "error", @@ -942,7 +944,8 @@ const docTemplate = `{ "WorkloadStatusUnhealthy", "WorkloadStatusRemoving", "WorkloadStatusUnknown", - "WorkloadStatusUnauthenticated" + "WorkloadStatusUnauthenticated", + "WorkloadStatusPolicyStopped" ] }, "github_com_stacklok_toolhive_pkg_container_templates.RuntimeConfig": { diff --git a/docs/server/swagger.json b/docs/server/swagger.json index 8bb3ca0cbb..a0c91bafe7 100644 --- a/docs/server/swagger.json +++ b/docs/server/swagger.json @@ -906,6 +906,7 @@ "removing", "unknown", "unauthenticated", + "policy_stopped", "running", "stopped", "error", @@ -915,6 +916,7 @@ "removing", "unknown", "unauthenticated", + "policy_stopped", "running", "stopped", "error", @@ -935,7 +937,8 @@ "WorkloadStatusUnhealthy", "WorkloadStatusRemoving", "WorkloadStatusUnknown", - "WorkloadStatusUnauthenticated" + "WorkloadStatusUnauthenticated", + "WorkloadStatusPolicyStopped" ] }, "github_com_stacklok_toolhive_pkg_container_templates.RuntimeConfig": { diff --git a/docs/server/swagger.yaml b/docs/server/swagger.yaml index 8822cc6d2f..a0d054e111 100644 --- a/docs/server/swagger.yaml +++ b/docs/server/swagger.yaml @@ -876,6 +876,7 @@ components: - removing - unknown - unauthenticated + - policy_stopped - running - stopped - error @@ -885,6 +886,7 @@ components: - removing - unknown - unauthenticated + - policy_stopped - running - stopped - error @@ -905,6 +907,7 @@ components: - WorkloadStatusRemoving - WorkloadStatusUnknown - WorkloadStatusUnauthenticated + - WorkloadStatusPolicyStopped github_com_stacklok_toolhive_pkg_container_templates.RuntimeConfig: description: |- RuntimeConfig allows overriding the default runtime configuration diff --git a/pkg/container/runtime/types.go b/pkg/container/runtime/types.go index 13299b033c..212d92a99f 100644 --- a/pkg/container/runtime/types.go +++ b/pkg/container/runtime/types.go @@ -44,6 +44,10 @@ const ( // WorkloadStatusUnauthenticated indicates that the workload is running but // cannot authenticate with the remote MCP server (e.g., expired refresh token). WorkloadStatusUnauthenticated WorkloadStatus = "unauthenticated" + // WorkloadStatusPolicyStopped indicates that the workload was stopped by + // enterprise policy enforcement. The StatusContext field carries the + // human-readable reason. + WorkloadStatusPolicyStopped WorkloadStatus = "policy_stopped" ) // ContainerInfo represents information about a container diff --git a/pkg/core/workload.go b/pkg/core/workload.go index 1583f74645..06199504dd 100644 --- a/pkg/core/workload.go +++ b/pkg/core/workload.go @@ -33,7 +33,7 @@ type Workload struct { ProxyMode string `json:"proxy_mode,omitempty"` // Status is the current status of the workload. //nolint:lll // enums tag needed for swagger generation with --parseDependencyLevel - Status runtime.WorkloadStatus `json:"status" enums:"running,stopped,error,starting,stopping,unhealthy,removing,unknown,unauthenticated"` + Status runtime.WorkloadStatus `json:"status" enums:"running,stopped,error,starting,stopping,unhealthy,removing,unknown,unauthenticated,policy_stopped"` // StatusContext provides additional context about the workload's status. // The exact meaning is determined by the status and the underlying runtime. StatusContext string `json:"status_context,omitempty"` diff --git a/pkg/workloads/manager.go b/pkg/workloads/manager.go index 249c396468..f97bddb7e1 100644 --- a/pkg/workloads/manager.go +++ b/pkg/workloads/manager.go @@ -243,6 +243,8 @@ func mapWorkloadStatusToVMCPHealth(status rt.WorkloadStatus) vmcp.BackendHealthS return vmcp.BackendUnknown case rt.WorkloadStatusUnauthenticated: return vmcp.BackendUnauthenticated + case rt.WorkloadStatusPolicyStopped: + return vmcp.BackendUnhealthy default: return vmcp.BackendUnknown } @@ -1086,6 +1088,12 @@ func (d *DefaultManager) restartSingleWorkload(ctx context.Context, name string, return d.restartContainerWorkload(ctx, name, foreground) } + // Check policy gates before restarting — the loaded RunConfig carries the same + // fields (RegistryAPIURL, RegistryURL, RemoteURL) that the gate evaluates on create. + if err := runner.EagerCheckCreateServer(ctx, runConfig); err != nil { + return fmt.Errorf("server restart blocked by policy: %w", err) + } + // Check if this is a remote workload if runConfig.RemoteURL != "" { return d.restartRemoteWorkload(ctx, name, runConfig, foreground) From dd375dfb39f0e56d23d3b1f1ad16df3001a4e1b8 Mon Sep 17 00:00:00 2001 From: Reynier Ortiz Vega Date: Wed, 15 Apr 2026 14:02:45 -0400 Subject: [PATCH 2/5] Remove enterprise reference from WorkloadStatusPolicyStopped comment Co-Authored-By: Claude Sonnet 4.6 --- pkg/container/runtime/types.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pkg/container/runtime/types.go b/pkg/container/runtime/types.go index 212d92a99f..04838e73a4 100644 --- a/pkg/container/runtime/types.go +++ b/pkg/container/runtime/types.go @@ -45,8 +45,7 @@ const ( // cannot authenticate with the remote MCP server (e.g., expired refresh token). WorkloadStatusUnauthenticated WorkloadStatus = "unauthenticated" // WorkloadStatusPolicyStopped indicates that the workload was stopped by - // enterprise policy enforcement. The StatusContext field carries the - // human-readable reason. + // policy enforcement. The StatusContext field carries the human-readable reason. WorkloadStatusPolicyStopped WorkloadStatus = "policy_stopped" ) From a441181913338474e178bd5d2346646a3f614fbd Mon Sep 17 00:00:00 2001 From: Reynier Ortiz Vega Date: Wed, 15 Apr 2026 15:00:00 -0400 Subject: [PATCH 3/5] Fix exhaustive lint errors in status switch statements Add all WorkloadStatus cases to the switches in list and status commands to satisfy the exhaustive linter. Co-Authored-By: Claude Sonnet 4.6 --- cmd/thv/app/list.go | 4 ++++ cmd/thv/app/status.go | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/cmd/thv/app/list.go b/cmd/thv/app/list.go index f6cb35f5a0..82d9acdd07 100644 --- a/cmd/thv/app/list.go +++ b/cmd/thv/app/list.go @@ -172,6 +172,10 @@ func printTextOutput(workloadList []core.Workload) { status = "⚠️ " + status case rt.WorkloadStatusPolicyStopped: status = "🚫 " + status + case rt.WorkloadStatusRunning, rt.WorkloadStatusStopped, rt.WorkloadStatusError, + rt.WorkloadStatusStarting, rt.WorkloadStatusStopping, rt.WorkloadStatusUnhealthy, + rt.WorkloadStatusRemoving, rt.WorkloadStatusUnknown: + // no indicator for other statuses } // Print workload information diff --git a/cmd/thv/app/status.go b/cmd/thv/app/status.go index 1f2b1b5b9d..2b171fb79c 100644 --- a/cmd/thv/app/status.go +++ b/cmd/thv/app/status.go @@ -106,6 +106,10 @@ func printStatusTextOutput(workload core.Workload) { status = "⚠️ " + status case runtime.WorkloadStatusPolicyStopped: status = "🚫 " + status + case runtime.WorkloadStatusRunning, runtime.WorkloadStatusStopped, runtime.WorkloadStatusError, + runtime.WorkloadStatusStarting, runtime.WorkloadStatusStopping, runtime.WorkloadStatusUnhealthy, + runtime.WorkloadStatusRemoving, runtime.WorkloadStatusUnknown: + // no indicator for other statuses } // Print workload information in key-value format From 374eeec256af6843008ef64744353a4b5755f5bc Mon Sep 17 00:00:00 2001 From: Reynier Ortiz Vega Date: Wed, 15 Apr 2026 15:57:57 -0400 Subject: [PATCH 4/5] Address PR review comments: fix policy gate bypass and missing enum - Add policy_stopped to workloadStatusResponse enums tag so the swagger spec for the status endpoint includes the new value - Add EagerCheckCreateServer check in maybeSetupContainerWorkload after loadRunnerFromState to cover the path where the outer LoadState call fails on a partial name but succeeds after label-based name resolution - Regenerate swagger docs Co-Authored-By: Claude Sonnet 4.6 --- docs/server/docs.go | 3 ++- docs/server/swagger.json | 3 ++- docs/server/swagger.yaml | 1 + pkg/api/v1/workload_types.go | 2 +- pkg/workloads/manager.go | 7 +++++++ 5 files changed, 13 insertions(+), 3 deletions(-) diff --git a/docs/server/docs.go b/docs/server/docs.go index bbe57cce92..75273d7d23 100644 --- a/docs/server/docs.go +++ b/docs/server/docs.go @@ -932,7 +932,8 @@ const docTemplate = `{ "unhealthy", "removing", "unknown", - "unauthenticated" + "unauthenticated", + "policy_stopped" ], "type": "string", "x-enum-varnames": [ diff --git a/docs/server/swagger.json b/docs/server/swagger.json index a0c91bafe7..1128c2d206 100644 --- a/docs/server/swagger.json +++ b/docs/server/swagger.json @@ -925,7 +925,8 @@ "unhealthy", "removing", "unknown", - "unauthenticated" + "unauthenticated", + "policy_stopped" ], "type": "string", "x-enum-varnames": [ diff --git a/docs/server/swagger.yaml b/docs/server/swagger.yaml index a0d054e111..529d76e610 100644 --- a/docs/server/swagger.yaml +++ b/docs/server/swagger.yaml @@ -896,6 +896,7 @@ components: - removing - unknown - unauthenticated + - policy_stopped type: string x-enum-varnames: - WorkloadStatusRunning diff --git a/pkg/api/v1/workload_types.go b/pkg/api/v1/workload_types.go index a7044486c3..a87fa44b72 100644 --- a/pkg/api/v1/workload_types.go +++ b/pkg/api/v1/workload_types.go @@ -32,7 +32,7 @@ type workloadListResponse struct { type workloadStatusResponse struct { // Current status of the workload //nolint:lll // enums tag needed for swagger generation with --parseDependencyLevel - Status runtime.WorkloadStatus `json:"status" enums:"running,stopped,error,starting,stopping,unhealthy,removing,unknown,unauthenticated"` + Status runtime.WorkloadStatus `json:"status" enums:"running,stopped,error,starting,stopping,unhealthy,removing,unknown,unauthenticated,policy_stopped"` } // updateRequest represents the request to update an existing workload diff --git a/pkg/workloads/manager.go b/pkg/workloads/manager.go index f97bddb7e1..fb410f9df8 100644 --- a/pkg/workloads/manager.go +++ b/pkg/workloads/manager.go @@ -1303,6 +1303,13 @@ func (d *DefaultManager) maybeSetupContainerWorkload(ctx context.Context, name s return "", nil, fmt.Errorf("failed to load state for %s: %w", workloadName, err) } + // Check policy gates before restarting. This covers the case where the caller + // could not load state via the original name but we resolved the canonical name + // from container labels above, so the check must happen here. + if err := runner.EagerCheckCreateServer(ctx, mcpRunner.Config); err != nil { + return "", nil, fmt.Errorf("server restart blocked by policy: %w", err) + } + // Set workload status to starting - use the workload name for status operations if err := d.statuses.SetWorkloadStatus(ctx, workloadName, rt.WorkloadStatusStarting, ""); err != nil { slog.Warn("Failed to set workload status to starting", "workload", workloadName, "error", err) From a13995ebf44ee9f83d162eb1ba49ceb801e6eedd Mon Sep 17 00:00:00 2001 From: Reynier Ortiz Vega Date: Wed, 15 Apr 2026 17:53:30 -0400 Subject: [PATCH 5/5] Set policy_stopped status on blocked restart and deduplicate status indicator MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Call SetWorkloadStatus with WorkloadStatusPolicyStopped in both policy gate failure paths in restartSingleWorkload and maybeSetupContainerWorkload, so a blocked restart transitions the workload into the new status instead of leaving it as stopped - Extract the status indicator switch (⚠️ / 🚫) from list.go and status.go into a single workloadStatusIndicator helper in common.go Co-Authored-By: Claude Sonnet 4.6 --- cmd/thv/app/common.go | 18 ++++++++++++++++++ cmd/thv/app/list.go | 13 +------------ cmd/thv/app/status.go | 13 +------------ pkg/workloads/manager.go | 6 ++++++ 4 files changed, 26 insertions(+), 24 deletions(-) diff --git a/cmd/thv/app/common.go b/cmd/thv/app/common.go index 53766aaf13..2cd23028c1 100644 --- a/cmd/thv/app/common.go +++ b/cmd/thv/app/common.go @@ -11,6 +11,7 @@ import ( groupval "github.com/stacklok/toolhive-core/validation/group" "github.com/stacklok/toolhive/pkg/config" + "github.com/stacklok/toolhive/pkg/container/runtime" "github.com/stacklok/toolhive/pkg/secrets" "github.com/stacklok/toolhive/pkg/workloads" ) @@ -155,6 +156,23 @@ func completeLogsArgs(cmd *cobra.Command, args []string, _ string) ([]string, co return completions, cobra.ShellCompDirectiveNoFileComp } +// workloadStatusIndicator returns the status string with a visual indicator prepended +// for statuses that warrant user attention (unauthenticated, policy_stopped). +// All other statuses are returned as plain strings. +func workloadStatusIndicator(status runtime.WorkloadStatus) string { + switch status { + case runtime.WorkloadStatusUnauthenticated: + return "⚠️ " + string(status) + case runtime.WorkloadStatusPolicyStopped: + return "🚫 " + string(status) + case runtime.WorkloadStatusRunning, runtime.WorkloadStatusStopped, runtime.WorkloadStatusError, + runtime.WorkloadStatusStarting, runtime.WorkloadStatusStopping, runtime.WorkloadStatusUnhealthy, + runtime.WorkloadStatusRemoving, runtime.WorkloadStatusUnknown: + return string(status) + } + return string(status) +} + // AddGroupFlag adds a --group flag to the provided command for filtering by group. // If withShorthand is true, adds the -g shorthand as well. func AddGroupFlag(cmd *cobra.Command, groupVar *string, withShorthand bool) { diff --git a/cmd/thv/app/list.go b/cmd/thv/app/list.go index 82d9acdd07..a3943bbd60 100644 --- a/cmd/thv/app/list.go +++ b/cmd/thv/app/list.go @@ -12,7 +12,6 @@ import ( "github.com/spf13/cobra" - rt "github.com/stacklok/toolhive/pkg/container/runtime" "github.com/stacklok/toolhive/pkg/core" "github.com/stacklok/toolhive/pkg/workloads" ) @@ -166,17 +165,7 @@ func printTextOutput(workloadList []core.Workload) { // Print workload information for _, c := range workloadList { // Highlight unauthenticated and policy-stopped workloads with indicators - status := string(c.Status) - switch c.Status { - case rt.WorkloadStatusUnauthenticated: - status = "⚠️ " + status - case rt.WorkloadStatusPolicyStopped: - status = "🚫 " + status - case rt.WorkloadStatusRunning, rt.WorkloadStatusStopped, rt.WorkloadStatusError, - rt.WorkloadStatusStarting, rt.WorkloadStatusStopping, rt.WorkloadStatusUnhealthy, - rt.WorkloadStatusRemoving, rt.WorkloadStatusUnknown: - // no indicator for other statuses - } + status := workloadStatusIndicator(c.Status) // Print workload information if _, err := fmt.Fprintf(w, "%s\t%s\t%s\t%s\t%d\t%s\t%s\n", diff --git a/cmd/thv/app/status.go b/cmd/thv/app/status.go index 2b171fb79c..08d6171ed9 100644 --- a/cmd/thv/app/status.go +++ b/cmd/thv/app/status.go @@ -13,7 +13,6 @@ import ( "github.com/spf13/cobra" - "github.com/stacklok/toolhive/pkg/container/runtime" "github.com/stacklok/toolhive/pkg/core" "github.com/stacklok/toolhive/pkg/workloads" ) @@ -100,17 +99,7 @@ func printStatusJSONOutput(workload core.Workload) error { func printStatusTextOutput(workload core.Workload) { w := tabwriter.NewWriter(os.Stdout, 0, 0, 3, ' ', 0) - status := string(workload.Status) - switch workload.Status { - case runtime.WorkloadStatusUnauthenticated: - status = "⚠️ " + status - case runtime.WorkloadStatusPolicyStopped: - status = "🚫 " + status - case runtime.WorkloadStatusRunning, runtime.WorkloadStatusStopped, runtime.WorkloadStatusError, - runtime.WorkloadStatusStarting, runtime.WorkloadStatusStopping, runtime.WorkloadStatusUnhealthy, - runtime.WorkloadStatusRemoving, runtime.WorkloadStatusUnknown: - // no indicator for other statuses - } + status := workloadStatusIndicator(workload.Status) // Print workload information in key-value format _, _ = fmt.Fprintf(w, "Name:\t%s\n", workload.Name) diff --git a/pkg/workloads/manager.go b/pkg/workloads/manager.go index fb410f9df8..7b0187bac7 100644 --- a/pkg/workloads/manager.go +++ b/pkg/workloads/manager.go @@ -1091,6 +1091,9 @@ func (d *DefaultManager) restartSingleWorkload(ctx context.Context, name string, // Check policy gates before restarting — the loaded RunConfig carries the same // fields (RegistryAPIURL, RegistryURL, RemoteURL) that the gate evaluates on create. if err := runner.EagerCheckCreateServer(ctx, runConfig); err != nil { + if statusErr := d.statuses.SetWorkloadStatus(ctx, name, rt.WorkloadStatusPolicyStopped, err.Error()); statusErr != nil { + slog.Warn("Failed to set workload status to policy_stopped", "workload", name, "error", statusErr) + } return fmt.Errorf("server restart blocked by policy: %w", err) } @@ -1307,6 +1310,9 @@ func (d *DefaultManager) maybeSetupContainerWorkload(ctx context.Context, name s // could not load state via the original name but we resolved the canonical name // from container labels above, so the check must happen here. if err := runner.EagerCheckCreateServer(ctx, mcpRunner.Config); err != nil { + if statusErr := d.statuses.SetWorkloadStatus(ctx, workloadName, rt.WorkloadStatusPolicyStopped, err.Error()); statusErr != nil { + slog.Warn("Failed to set workload status to policy_stopped", "workload", workloadName, "error", statusErr) + } return "", nil, fmt.Errorf("server restart blocked by policy: %w", err) }