Add mcp-agent recover command
Recreates containers from the agent's SQLite registry when podman's database is lost (UID change, podman reset, reboot). For each service with desired_state="running" that doesn't have a running container: - Removes any stale container with the same name - Recreates the container from the stored spec (image, ports, volumes, cmd, network, user, restart policy) - Allocates route ports and injects PORT env vars - Re-registers mc-proxy routes - Provisions TLS certs for L7 routes Does NOT pull images — assumes local cache. Root cause action item from the 2026-04-03 UID incident. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
68
cmd/mcp-agent/recover.go
Normal file
68
cmd/mcp-agent/recover.go
Normal file
@@ -0,0 +1,68 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"os"
|
||||
|
||||
"git.wntrmute.dev/mc/mcp/internal/agent"
|
||||
"git.wntrmute.dev/mc/mcp/internal/config"
|
||||
"git.wntrmute.dev/mc/mcp/internal/registry"
|
||||
"git.wntrmute.dev/mc/mcp/internal/runtime"
|
||||
"github.com/spf13/cobra"
|
||||
)
|
||||
|
||||
func recoverCmd() *cobra.Command {
|
||||
return &cobra.Command{
|
||||
Use: "recover",
|
||||
Short: "Recreate containers from the agent registry",
|
||||
Long: `Recover recreates containers from the agent's SQLite registry for all
|
||||
services whose desired state is "running" but which don't have a running
|
||||
container in podman.
|
||||
|
||||
This is the recovery path after a podman database loss (e.g., after a
|
||||
UID change, podman reset, or reboot that cleared container state).
|
||||
|
||||
Images must be cached locally — recover does not pull from MCR.`,
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
cfg, err := config.LoadAgentConfig(cfgPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("load config: %w", err)
|
||||
}
|
||||
|
||||
logger := slog.New(slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{
|
||||
Level: slog.LevelInfo,
|
||||
}))
|
||||
|
||||
db, err := registry.Open(cfg.Database.Path)
|
||||
if err != nil {
|
||||
return fmt.Errorf("open registry: %w", err)
|
||||
}
|
||||
defer func() { _ = db.Close() }()
|
||||
|
||||
proxy, err := agent.NewProxyRouter(cfg.MCProxy.Socket, cfg.MCProxy.CertDir, logger)
|
||||
if err != nil {
|
||||
logger.Warn("mc-proxy not available, routes will not be registered", "err", err)
|
||||
}
|
||||
|
||||
certs, err := agent.NewCertProvisioner(cfg.Metacrypt, cfg.MCProxy.CertDir, logger)
|
||||
if err != nil {
|
||||
logger.Warn("cert provisioner not available", "err", err)
|
||||
}
|
||||
|
||||
a := &agent.Agent{
|
||||
Config: cfg,
|
||||
DB: db,
|
||||
Runtime: &runtime.Podman{},
|
||||
Logger: logger,
|
||||
PortAlloc: agent.NewPortAllocator(),
|
||||
Proxy: proxy,
|
||||
Certs: certs,
|
||||
Version: version,
|
||||
}
|
||||
|
||||
return a.Recover(context.Background())
|
||||
},
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user