diff --git a/internal/agent/agent.go b/internal/agent/agent.go index c016548..4331c09 100644 --- a/internal/agent/agent.go +++ b/internal/agent/agent.go @@ -78,7 +78,7 @@ func Run(cfg *config.AgentConfig, version string) error { uk = qemu } - mon := monitor.New(db, rt, cfg.Monitor, cfg.Agent.NodeName, logger) + mon := monitor.New(db, mergedLister{primary: rt, extra: uk, logger: logger}, cfg.Monitor, cfg.Agent.NodeName, logger) proxy, err := NewProxyRouter(cfg.MCProxy.Socket, cfg.MCProxy.CertDir, logger) if err != nil { diff --git a/internal/agent/names.go b/internal/agent/names.go index 33355ae..9d0287a 100644 --- a/internal/agent/names.go +++ b/internal/agent/names.go @@ -1,34 +1,14 @@ package agent -import "strings" +import "git.wntrmute.dev/mc/mcp/internal/naming" -// ContainerNameFor returns the expected container name for a service and -// component. For single-component services where the component name equals -// the service name, the container name is just the service name (e.g., -// "mc-proxy" not "mc-proxy-mc-proxy"). +// ContainerNameFor is a convenience alias for naming.ContainerNameFor (the +// canonical convention, shared with the monitor). func ContainerNameFor(service, component string) string { - if service == component { - return service - } - return service + "-" + component + return naming.ContainerNameFor(service, component) } -// SplitContainerName splits a container name into service and component parts. -// It checks known service names first to handle names like "mc-proxy" where a -// naive split on "-" would produce the wrong result. If no known service -// matches, it falls back to splitting on the first "-". +// SplitContainerName is a convenience alias for naming.SplitContainerName. func SplitContainerName(name string, knownServices map[string]bool) (service, component string) { - if knownServices[name] { - return name, name - } - for svc := range knownServices { - prefix := svc + "-" - if strings.HasPrefix(name, prefix) && len(name) > len(prefix) { - return svc, name[len(prefix):] - } - } - if i := strings.Index(name, "-"); i >= 0 { - return name[:i], name[i+1:] - } - return name, name + return naming.SplitContainerName(name, knownServices) } diff --git a/internal/agent/runtime.go b/internal/agent/runtime.go index 88a1515..1738235 100644 --- a/internal/agent/runtime.go +++ b/internal/agent/runtime.go @@ -2,6 +2,7 @@ package agent import ( "context" + "log/slog" "os" "os/exec" "path/filepath" @@ -54,21 +55,38 @@ func (a *Agent) runtimeFor(rt string) runtime.Runtime { return a.Runtime } -// listAllContainers returns the observed state across every configured -// runtime (containers + unikernel VMs) so reconciliation, status, and drift -// detection see the whole picture. -func (a *Agent) listAllContainers(ctx context.Context) ([]runtime.ContainerInfo, error) { - infos, err := a.Runtime.List(ctx) +// mergedLister lists observed containers across the container runtime and +// (optionally) the unikernel runtime, so reconciliation, status, drift +// detection, and the monitor see VMs and containers uniformly. It satisfies +// the small lister interface the monitor consumes. +type mergedLister struct { + primary runtime.Runtime + extra runtime.Runtime // unikernel runtime; may be nil + logger *slog.Logger +} + +// List returns containers from the primary runtime plus, when configured, +// unikernel VMs. A failure to list VMs is logged but not fatal — the +// container view still reconciles. +func (m mergedLister) List(ctx context.Context) ([]runtime.ContainerInfo, error) { + infos, err := m.primary.List(ctx) if err != nil { return nil, err } - if a.Unikernel != nil { - vms, vmErr := a.Unikernel.List(ctx) + if m.extra != nil { + vms, vmErr := m.extra.List(ctx) if vmErr == nil { infos = append(infos, vms...) - } else if a.Logger != nil { - a.Logger.Warn("list unikernel VMs failed", "err", vmErr) + } else if m.logger != nil { + m.logger.Warn("list unikernel VMs failed", "err", vmErr) } } return infos, nil } + +// listAllContainers returns the observed state across every configured +// runtime (containers + unikernel VMs) so reconciliation, status, and drift +// detection see the whole picture. +func (a *Agent) listAllContainers(ctx context.Context) ([]runtime.ContainerInfo, error) { + return mergedLister{primary: a.Runtime, extra: a.Unikernel, logger: a.Logger}.List(ctx) +} diff --git a/internal/monitor/monitor.go b/internal/monitor/monitor.go index 1841513..af34986 100644 --- a/internal/monitor/monitor.go +++ b/internal/monitor/monitor.go @@ -8,15 +8,23 @@ import ( "time" "git.wntrmute.dev/mc/mcp/internal/config" + "git.wntrmute.dev/mc/mcp/internal/naming" "git.wntrmute.dev/mc/mcp/internal/registry" "git.wntrmute.dev/mc/mcp/internal/runtime" ) +// ContainerLister reports the observed containers on the node. A full +// runtime.Runtime satisfies it; the agent passes a lister that merges the +// container and unikernel runtimes so the monitor sees VMs too. +type ContainerLister interface { + List(ctx context.Context) ([]runtime.ContainerInfo, error) +} + // Monitor watches container states and compares them to the registry, // recording events and firing alerts on drift or flapping. type Monitor struct { db *sql.DB - runtime runtime.Runtime + lister ContainerLister cfg config.MonitorConfig logger *slog.Logger alerter *Alerter @@ -26,11 +34,12 @@ type Monitor struct { prevState map[string]string // key: "service/component", value: observed state } -// New creates a Monitor with the given dependencies. -func New(db *sql.DB, rt runtime.Runtime, cfg config.MonitorConfig, nodeName string, logger *slog.Logger) *Monitor { +// New creates a Monitor with the given dependencies. lister reports observed +// containers (and unikernel VMs, via a merged lister). +func New(db *sql.DB, lister ContainerLister, cfg config.MonitorConfig, nodeName string, logger *slog.Logger) *Monitor { return &Monitor{ db: db, - runtime: rt, + lister: lister, cfg: cfg, logger: logger, alerter: NewAlerter(cfg, nodeName, db, logger), @@ -82,7 +91,7 @@ func (m *Monitor) tick() { ctx := context.Background() // Get the current runtime state of all containers. - containers, err := m.runtime.List(ctx) + containers, err := m.lister.List(ctx) if err != nil { m.logger.Error("monitor: list containers", "error", err) return @@ -113,7 +122,7 @@ func (m *Monitor) tick() { for _, comp := range components { key := comp.Service + "/" + comp.Name seen[key] = struct{}{} - containerName := comp.Service + "-" + comp.Name + containerName := naming.ContainerNameFor(comp.Service, comp.Name) observed := "unknown" if state, ok := runtimeState[containerName]; ok { diff --git a/internal/monitor/monitor_test.go b/internal/monitor/monitor_test.go index 310220c..3a65379 100644 --- a/internal/monitor/monitor_test.go +++ b/internal/monitor/monitor_test.go @@ -227,6 +227,45 @@ func TestMonitorTickStateChange(t *testing.T) { } } +// TestMonitorTickCollapsedName guards the naming convention: when a +// component's name equals its service, the container/VM name collapses to just +// the service (e.g. "uktest", not "uktest-uktest"). The monitor must use +// naming.ContainerNameFor to look it up, or it would report a running +// component as "unknown" and raise false drift. +func TestMonitorTickCollapsedName(t *testing.T) { + db := openTestDB(t) + logger := testLogger() + cfg := testMonitorConfig() + + if err := registry.CreateService(db, "uktest", true, ""); err != nil { + t.Fatalf("create service: %v", err) + } + if err := registry.CreateComponent(db, ®istry.Component{ + Name: "uktest", Service: "uktest", Image: "img:v1", + Restart: "unless-stopped", DesiredState: "running", ObservedState: "unknown", + }); err != nil { + t.Fatalf("create component: %v", err) + } + + // The runtime reports the collapsed name, as podman/QEMU actually do. + rt := &fakeRuntime{ + containers: []runtime.ContainerInfo{ + {Name: "uktest", State: "running"}, + }, + } + + m := New(db, rt, cfg, "test-node", logger) + m.tick() + + comp, err := registry.GetComponent(db, "uktest", "uktest") + if err != nil { + t.Fatalf("get component: %v", err) + } + if comp.ObservedState != "running" { + t.Fatalf("observed state: got %q, want %q (collapsed name not resolved)", comp.ObservedState, "running") + } +} + func TestMonitorStartStop(t *testing.T) { db := openTestDB(t) logger := testLogger() diff --git a/internal/naming/naming.go b/internal/naming/naming.go new file mode 100644 index 0000000..754f225 --- /dev/null +++ b/internal/naming/naming.go @@ -0,0 +1,37 @@ +// Package naming centralizes the container/VM naming convention shared by the +// agent (which creates and reconciles containers) and the monitor (which looks +// them up by name). Keeping it in one place prevents the two from disagreeing. +package naming + +import "strings" + +// ContainerNameFor returns the expected container (or unikernel VM) name for a +// service and component. For single-component services where the component +// name equals the service name, the name is just the service name (e.g., +// "mc-proxy" not "mc-proxy-mc-proxy"). +func ContainerNameFor(service, component string) string { + if service == component { + return service + } + return service + "-" + component +} + +// SplitContainerName splits a container name into service and component parts. +// It checks known service names first to handle names like "mc-proxy" where a +// naive split on "-" would produce the wrong result. If no known service +// matches, it falls back to splitting on the first "-". +func SplitContainerName(name string, knownServices map[string]bool) (service, component string) { + if knownServices[name] { + return name, name + } + for svc := range knownServices { + prefix := svc + "-" + if strings.HasPrefix(name, prefix) && len(name) > len(prefix) { + return svc, name[len(prefix):] + } + } + if i := strings.Index(name, "-"); i >= 0 { + return name[:i], name[i+1:] + } + return name, name +}