agent/recover: only treat running containers as up
Recover built its up-set from every listed container, so a stopped/exited container or a dead unikernel VM (whose state dir is still listed) counted as already-running and was skipped — the exact reason dead VMs stayed in drift instead of being recovered. Filter the set to State==running. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -21,15 +21,20 @@ func (a *Agent) Recover(ctx context.Context) error {
|
|||||||
return fmt.Errorf("list services: %w", err)
|
return fmt.Errorf("list services: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get the list of currently running containers from podman.
|
// Get the list of containers across runtimes. Only those actually in the
|
||||||
running, err := a.listAllContainers(ctx)
|
// "running" state count as up — a stopped/exited container or a dead
|
||||||
|
// unikernel VM (whose state dir still exists, so it is listed) must be
|
||||||
|
// recovered, not skipped.
|
||||||
|
listed, err := a.listAllContainers(ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
a.Logger.Warn("cannot list containers, assuming none running", "err", err)
|
a.Logger.Warn("cannot list containers, assuming none running", "err", err)
|
||||||
running = nil
|
listed = nil
|
||||||
}
|
}
|
||||||
runningSet := make(map[string]bool)
|
runningSet := make(map[string]bool)
|
||||||
for _, c := range running {
|
for _, c := range listed {
|
||||||
runningSet[c.Name] = true
|
if c.State == "running" {
|
||||||
|
runningSet[c.Name] = true
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
var recovered, skipped, already int
|
var recovered, skipped, already int
|
||||||
|
|||||||
Reference in New Issue
Block a user