Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 31 additions & 3 deletions cmd/gc/adoption_barrier.go
Original file line number Diff line number Diff line change
Expand Up @@ -133,8 +133,13 @@ func runAdoptionBarrier(
// base template name (e.g., "city-worker-3" -> "worker").
cfgAgent, isConfigAgent := agentBySession[sessionName]
isPoolInstance := false
staleSingletonSuffix := false
if !isConfigAgent {
if base := resolvePoolBase(sessionName, store, cityName, st, agentByQN); base != nil {
if base := resolveCanonicalSingletonSuffixBase(sessionName, store, cityName, st, agentByQN); base != nil {
cfgAgent = base
isConfigAgent = true
staleSingletonSuffix = true
} else if base := resolvePoolBase(sessionName, store, cityName, st, agentByQN); base != nil {
cfgAgent = base
isConfigAgent = true
isPoolInstance = true
Expand Down Expand Up @@ -189,15 +194,19 @@ func runAdoptionBarrier(
// instance expansion, to avoid false positives on direct session
// names that end in numbers.
slot := parsePoolSlot(sessionName)
if slot > 0 && isConfigAgent && cfgAgent.SupportsInstanceExpansion() {
switch {
case slot > 0 && staleSingletonSuffix:
fmt.Fprintf(stderr, "adoption barrier: adopting stale singleton suffix session %s as canonical agent %s without pool_slot metadata\n", //nolint:errcheck
sessionName, cfgAgent.QualifiedName())
case slot > 0 && isConfigAgent && cfgAgent.SupportsInstanceExpansion():
detail.PoolSlot = slot
meta["pool_slot"] = strconv.Itoa(slot)
if maxSess := cfgAgent.EffectiveMaxActiveSessions(); maxSess != nil && *maxSess >= 0 && slot > *maxSess {
detail.OutOfBounds = true
fmt.Fprintf(stderr, "adoption barrier: %s pool slot %d exceeds max %d (adopt-then-drain)\n", //nolint:errcheck
sessionName, slot, *maxSess)
}
} else if slot > 0 && !isConfigAgent {
case slot > 0 && !isConfigAgent:
// Defensive log (ga-fiw): a session ending in "-N" did not match
// any configured agent — either by exact session name or by pool
// base resolution. This is the orphan shape that produced the
Expand Down Expand Up @@ -305,6 +314,25 @@ func resolvePoolBase(sessionName string, store beads.Store, cityName, sessionTem
return nil
}

func resolveCanonicalSingletonSuffixBase(sessionName string, store beads.Store, cityName, sessionTemplate string, agentByQN map[string]*config.Agent) *config.Agent {
slot := parsePoolSlot(sessionName)
if slot == 0 {
return nil
}
suffix := fmt.Sprintf("-%d", slot)
baseSessName := sessionName[:len(sessionName)-len(suffix)]
for _, a := range agentByQN {
if !a.UsesCanonicalSingletonPoolIdentity() {
continue
}
sn := lookupSessionNameOrLegacy(store, cityName, a.QualifiedName(), sessionTemplate)
if sn == baseSessName {
return a
}
}
return nil
}

// parsePoolSlot extracts the numeric pool slot from a session name suffix.
// Returns 0 if no slot suffix is found.
func parsePoolSlot(sessionName string) int {
Expand Down
28 changes: 16 additions & 12 deletions cmd/gc/adoption_barrier_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -730,19 +730,15 @@ func TestAdoptionBarrier_SingletonWithNumericSuffix(t *testing.T) {
}
}

// TestAdoptionBarrier_OrphanDashNSessionLogsWarning verifies the ga-fiw
// defensive log: when a running session ends in "-N" but no configured pool
// agent claims it (because the matching base agent has max_active_sessions=1
// and SupportsInstanceExpansion()=false), the barrier still adopts the
// session but emits a stderr warning so the leak is traceable.
func TestAdoptionBarrier_OrphanDashNSessionLogsWarning(t *testing.T) {
func TestAdoptionBarrier_StaleDashNSingletonAdoptsCanonicalIdentity(t *testing.T) {
store := beads.NewMemStore()
// "refinery-1" looks like a pool instance but the base "refinery" agent
// has max_active_sessions=1, so resolvePoolBase rejects the suffix.
// has max_active_sessions=1, so it should be treated as stale singleton
// state rather than a live pool slot.
sp := &fakeAdoptionProvider{running: []string{"refinery-1"}}
cfg := &config.City{
Agents: []config.Agent{
{Name: "refinery", MaxActiveSessions: intPtr(1)},
{Name: "refinery", MaxActiveSessions: intPtr(1), ScaleCheck: "printf 1"},
},
}
var stderr bytes.Buffer
Expand All @@ -751,14 +747,22 @@ func TestAdoptionBarrier_OrphanDashNSessionLogsWarning(t *testing.T) {
if result.Adopted != 1 {
t.Errorf("Adopted = %d, want 1", result.Adopted)
}
if !bytes.Contains(stderr.Bytes(), []byte("refinery-1 ends in -1")) {
t.Errorf("stderr missing orphan -N warning; got: %s", stderr.String())
if !bytes.Contains(stderr.Bytes(), []byte("adopting stale singleton suffix session refinery-1")) {
t.Errorf("stderr missing stale singleton adoption warning; got: %s", stderr.String())
}
// Verify no pool_slot metadata (we explicitly decline to stamp it).
beadList, _ := store.ListByLabel(sessionBeadLabel, 0)
for _, b := range beadList {
if b.Metadata["agent_name"] != "refinery" {
t.Errorf("stale singleton agent_name = %q, want canonical refinery", b.Metadata["agent_name"])
}
if !containsString(b.Labels, "agent:refinery") {
t.Errorf("stale singleton labels = %v, want canonical agent label", b.Labels)
}
if containsString(b.Labels, "agent:refinery-1") {
t.Errorf("stale singleton labels = %v, must not include phantom pool identity", b.Labels)
}
if b.Metadata["pool_slot"] != "" {
t.Errorf("orphan -N session should not have pool_slot metadata, got %q", b.Metadata["pool_slot"])
t.Errorf("stale singleton session should not have pool_slot metadata, got %q", b.Metadata["pool_slot"])
}
}
}
Expand Down
Loading
Loading