monitor: see unikernel VMs + use canonical container naming

Two drift-reporting bugs:
1. The monitor listed only the podman runtime, so unikernel VMs always
   showed observed=unknown (false drift). It now takes a ContainerLister
   and the agent passes a merged lister (containers + VMs), mirroring
   listAllContainers.
2. The monitor computed the lookup name as service+"-"+component, which
   is wrong when component==service (the name collapses to just the
   service, e.g. "uktest"/"mc-proxy"). It now uses the canonical
   naming.ContainerNameFor — extracted to a shared package so the agent
   and monitor can't disagree.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
Kyle Isom
2026-06-11 12:48:31 -07:00
parent 84dd897bcd
commit 4a55972455
6 changed files with 125 additions and 42 deletions

View File

@@ -8,15 +8,23 @@ import (
"time"
"git.wntrmute.dev/mc/mcp/internal/config"
"git.wntrmute.dev/mc/mcp/internal/naming"
"git.wntrmute.dev/mc/mcp/internal/registry"
"git.wntrmute.dev/mc/mcp/internal/runtime"
)
// ContainerLister reports the observed containers on the node. A full
// runtime.Runtime satisfies it; the agent passes a lister that merges the
// container and unikernel runtimes so the monitor sees VMs too.
type ContainerLister interface {
List(ctx context.Context) ([]runtime.ContainerInfo, error)
}
// Monitor watches container states and compares them to the registry,
// recording events and firing alerts on drift or flapping.
type Monitor struct {
db *sql.DB
runtime runtime.Runtime
lister ContainerLister
cfg config.MonitorConfig
logger *slog.Logger
alerter *Alerter
@@ -26,11 +34,12 @@ type Monitor struct {
prevState map[string]string // key: "service/component", value: observed state
}
// New creates a Monitor with the given dependencies.
func New(db *sql.DB, rt runtime.Runtime, cfg config.MonitorConfig, nodeName string, logger *slog.Logger) *Monitor {
// New creates a Monitor with the given dependencies. lister reports observed
// containers (and unikernel VMs, via a merged lister).
func New(db *sql.DB, lister ContainerLister, cfg config.MonitorConfig, nodeName string, logger *slog.Logger) *Monitor {
return &Monitor{
db: db,
runtime: rt,
lister: lister,
cfg: cfg,
logger: logger,
alerter: NewAlerter(cfg, nodeName, db, logger),
@@ -82,7 +91,7 @@ func (m *Monitor) tick() {
ctx := context.Background()
// Get the current runtime state of all containers.
containers, err := m.runtime.List(ctx)
containers, err := m.lister.List(ctx)
if err != nil {
m.logger.Error("monitor: list containers", "error", err)
return
@@ -113,7 +122,7 @@ func (m *Monitor) tick() {
for _, comp := range components {
key := comp.Service + "/" + comp.Name
seen[key] = struct{}{}
containerName := comp.Service + "-" + comp.Name
containerName := naming.ContainerNameFor(comp.Service, comp.Name)
observed := "unknown"
if state, ok := runtimeState[containerName]; ok {

View File

@@ -227,6 +227,45 @@ func TestMonitorTickStateChange(t *testing.T) {
}
}
// TestMonitorTickCollapsedName guards the naming convention: when a
// component's name equals its service, the container/VM name collapses to just
// the service (e.g. "uktest", not "uktest-uktest"). The monitor must use
// naming.ContainerNameFor to look it up, or it would report a running
// component as "unknown" and raise false drift.
func TestMonitorTickCollapsedName(t *testing.T) {
db := openTestDB(t)
logger := testLogger()
cfg := testMonitorConfig()
if err := registry.CreateService(db, "uktest", true, ""); err != nil {
t.Fatalf("create service: %v", err)
}
if err := registry.CreateComponent(db, &registry.Component{
Name: "uktest", Service: "uktest", Image: "img:v1",
Restart: "unless-stopped", DesiredState: "running", ObservedState: "unknown",
}); err != nil {
t.Fatalf("create component: %v", err)
}
// The runtime reports the collapsed name, as podman/QEMU actually do.
rt := &fakeRuntime{
containers: []runtime.ContainerInfo{
{Name: "uktest", State: "running"},
},
}
m := New(db, rt, cfg, "test-node", logger)
m.tick()
comp, err := registry.GetComponent(db, "uktest", "uktest")
if err != nil {
t.Fatalf("get component: %v", err)
}
if comp.ObservedState != "running" {
t.Fatalf("observed state: got %q, want %q (collapsed name not resolved)", comp.ObservedState, "running")
}
}
func TestMonitorStartStop(t *testing.T) {
db := openTestDB(t)
logger := testLogger()