From 90d47427ff9a2297ddff588d9bea252e6ee7c9fa Mon Sep 17 00:00:00 2001 From: Kyle Isom Date: Thu, 11 Jun 2026 13:07:41 -0700 Subject: [PATCH] agent/recover: only treat running containers as up MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Recover built its up-set from every listed container, so a stopped/exited container or a dead unikernel VM (whose state dir is still listed) counted as already-running and was skipped — the exact reason dead VMs stayed in drift instead of being recovered. Filter the set to State==running. Co-Authored-By: Claude Opus 4.8 --- internal/agent/recover.go | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/internal/agent/recover.go b/internal/agent/recover.go index f7d08d8..d87fafa 100644 --- a/internal/agent/recover.go +++ b/internal/agent/recover.go @@ -21,15 +21,20 @@ func (a *Agent) Recover(ctx context.Context) error { return fmt.Errorf("list services: %w", err) } - // Get the list of currently running containers from podman. - running, err := a.listAllContainers(ctx) + // Get the list of containers across runtimes. Only those actually in the + // "running" state count as up — a stopped/exited container or a dead + // unikernel VM (whose state dir still exists, so it is listed) must be + // recovered, not skipped. + listed, err := a.listAllContainers(ctx) if err != nil { a.Logger.Warn("cannot list containers, assuming none running", "err", err) - running = nil + listed = nil } runningSet := make(map[string]bool) - for _, c := range running { - runningSet[c.Name] = true + for _, c := range listed { + if c.State == "running" { + runningSet[c.Name] = true + } } var recovered, skipped, already int