monitor: see unikernel VMs + use canonical container naming
Two drift-reporting bugs: 1. The monitor listed only the podman runtime, so unikernel VMs always showed observed=unknown (false drift). It now takes a ContainerLister and the agent passes a merged lister (containers + VMs), mirroring listAllContainers. 2. The monitor computed the lookup name as service+"-"+component, which is wrong when component==service (the name collapses to just the service, e.g. "uktest"/"mc-proxy"). It now uses the canonical naming.ContainerNameFor — extracted to a shared package so the agent and monitor can't disagree. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -8,15 +8,23 @@ import (
|
||||
"time"
|
||||
|
||||
"git.wntrmute.dev/mc/mcp/internal/config"
|
||||
"git.wntrmute.dev/mc/mcp/internal/naming"
|
||||
"git.wntrmute.dev/mc/mcp/internal/registry"
|
||||
"git.wntrmute.dev/mc/mcp/internal/runtime"
|
||||
)
|
||||
|
||||
// ContainerLister reports the observed containers on the node. A full
|
||||
// runtime.Runtime satisfies it; the agent passes a lister that merges the
|
||||
// container and unikernel runtimes so the monitor sees VMs too.
|
||||
type ContainerLister interface {
|
||||
List(ctx context.Context) ([]runtime.ContainerInfo, error)
|
||||
}
|
||||
|
||||
// Monitor watches container states and compares them to the registry,
|
||||
// recording events and firing alerts on drift or flapping.
|
||||
type Monitor struct {
|
||||
db *sql.DB
|
||||
runtime runtime.Runtime
|
||||
lister ContainerLister
|
||||
cfg config.MonitorConfig
|
||||
logger *slog.Logger
|
||||
alerter *Alerter
|
||||
@@ -26,11 +34,12 @@ type Monitor struct {
|
||||
prevState map[string]string // key: "service/component", value: observed state
|
||||
}
|
||||
|
||||
// New creates a Monitor with the given dependencies.
|
||||
func New(db *sql.DB, rt runtime.Runtime, cfg config.MonitorConfig, nodeName string, logger *slog.Logger) *Monitor {
|
||||
// New creates a Monitor with the given dependencies. lister reports observed
|
||||
// containers (and unikernel VMs, via a merged lister).
|
||||
func New(db *sql.DB, lister ContainerLister, cfg config.MonitorConfig, nodeName string, logger *slog.Logger) *Monitor {
|
||||
return &Monitor{
|
||||
db: db,
|
||||
runtime: rt,
|
||||
lister: lister,
|
||||
cfg: cfg,
|
||||
logger: logger,
|
||||
alerter: NewAlerter(cfg, nodeName, db, logger),
|
||||
@@ -82,7 +91,7 @@ func (m *Monitor) tick() {
|
||||
ctx := context.Background()
|
||||
|
||||
// Get the current runtime state of all containers.
|
||||
containers, err := m.runtime.List(ctx)
|
||||
containers, err := m.lister.List(ctx)
|
||||
if err != nil {
|
||||
m.logger.Error("monitor: list containers", "error", err)
|
||||
return
|
||||
@@ -113,7 +122,7 @@ func (m *Monitor) tick() {
|
||||
for _, comp := range components {
|
||||
key := comp.Service + "/" + comp.Name
|
||||
seen[key] = struct{}{}
|
||||
containerName := comp.Service + "-" + comp.Name
|
||||
containerName := naming.ContainerNameFor(comp.Service, comp.Name)
|
||||
|
||||
observed := "unknown"
|
||||
if state, ok := runtimeState[containerName]; ok {
|
||||
|
||||
Reference in New Issue
Block a user