Extends MCP to own the full build-push-deploy lifecycle. When deploying, the CLI checks whether each component's image tag exists in the registry and builds/pushes automatically if missing and build config is present. - Add Build, Push, ImageExists to runtime.Runtime interface (podman impl) - Add mcp build <service>[/<image>] command - Add [build] section to CLI config (workspace path) - Add path and [build.images] to service definitions - Wire auto-build into mcp deploy before agent RPC - Update ARCHITECTURE.md with runtime interface and deploy auto-build docs Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
285 lines
8.3 KiB
Go
285 lines
8.3 KiB
Go
package monitor
|
|
|
|
import (
|
|
"context"
|
|
"database/sql"
|
|
"log/slog"
|
|
"os"
|
|
"path/filepath"
|
|
"testing"
|
|
"time"
|
|
|
|
"git.wntrmute.dev/kyle/mcp/internal/config"
|
|
"git.wntrmute.dev/kyle/mcp/internal/registry"
|
|
"git.wntrmute.dev/kyle/mcp/internal/runtime"
|
|
)
|
|
|
|
func openTestDB(t *testing.T) *sql.DB {
|
|
t.Helper()
|
|
db, err := registry.Open(filepath.Join(t.TempDir(), "test.db"))
|
|
if err != nil {
|
|
t.Fatalf("open db: %v", err)
|
|
}
|
|
t.Cleanup(func() { _ = db.Close() })
|
|
return db
|
|
}
|
|
|
|
func testLogger() *slog.Logger {
|
|
return slog.New(slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{Level: slog.LevelError}))
|
|
}
|
|
|
|
func testMonitorConfig() config.MonitorConfig {
|
|
return config.MonitorConfig{
|
|
Interval: config.Duration{Duration: 1 * time.Second},
|
|
Cooldown: config.Duration{Duration: 1 * time.Minute},
|
|
FlapThreshold: 3,
|
|
FlapWindow: config.Duration{Duration: 10 * time.Minute},
|
|
Retention: config.Duration{Duration: 24 * time.Hour},
|
|
}
|
|
}
|
|
|
|
// fakeRuntime implements runtime.Runtime for testing.
|
|
type fakeRuntime struct {
|
|
containers []runtime.ContainerInfo
|
|
}
|
|
|
|
func (f *fakeRuntime) Pull(_ context.Context, _ string) error { return nil }
|
|
func (f *fakeRuntime) Run(_ context.Context, _ runtime.ContainerSpec) error { return nil }
|
|
func (f *fakeRuntime) Stop(_ context.Context, _ string) error { return nil }
|
|
func (f *fakeRuntime) Remove(_ context.Context, _ string) error { return nil }
|
|
func (f *fakeRuntime) Build(_ context.Context, _, _, _ string) error { return nil }
|
|
func (f *fakeRuntime) Push(_ context.Context, _ string) error { return nil }
|
|
|
|
func (f *fakeRuntime) ImageExists(_ context.Context, _ string) (bool, error) { return true, nil }
|
|
|
|
func (f *fakeRuntime) Inspect(_ context.Context, _ string) (runtime.ContainerInfo, error) {
|
|
return runtime.ContainerInfo{}, nil
|
|
}
|
|
|
|
func (f *fakeRuntime) List(_ context.Context) ([]runtime.ContainerInfo, error) {
|
|
return f.containers, nil
|
|
}
|
|
|
|
func TestAlerterDriftDetection(t *testing.T) {
|
|
db := openTestDB(t)
|
|
logger := testLogger()
|
|
cfg := testMonitorConfig()
|
|
|
|
al := NewAlerter(cfg, "test-node", db, logger)
|
|
|
|
// Set up a service and component so CountEvents works.
|
|
if err := registry.CreateService(db, "metacrypt", true); err != nil {
|
|
t.Fatalf("create service: %v", err)
|
|
}
|
|
if err := registry.CreateComponent(db, ®istry.Component{
|
|
Name: "api", Service: "metacrypt", Image: "img:v1",
|
|
Restart: "unless-stopped", DesiredState: "running", ObservedState: "running",
|
|
}); err != nil {
|
|
t.Fatalf("create component: %v", err)
|
|
}
|
|
|
|
// Desired is "running" but observed is "exited" -- drift should fire.
|
|
al.Evaluate("metacrypt", "api", "running", "exited", "running")
|
|
|
|
// Verify alert was recorded (lastAlert should be set).
|
|
key := "metacrypt/api"
|
|
if _, ok := al.lastAlert[key]; !ok {
|
|
t.Fatal("expected drift alert to be recorded in lastAlert")
|
|
}
|
|
}
|
|
|
|
func TestAlerterIgnoreState(t *testing.T) {
|
|
db := openTestDB(t)
|
|
logger := testLogger()
|
|
cfg := testMonitorConfig()
|
|
|
|
al := NewAlerter(cfg, "test-node", db, logger)
|
|
|
|
// Components with desired_state "ignore" should not trigger alerts.
|
|
al.Evaluate("metacrypt", "api", "ignore", "exited", "running")
|
|
|
|
key := "metacrypt/api"
|
|
if _, ok := al.lastAlert[key]; ok {
|
|
t.Fatal("expected no alert for ignored component")
|
|
}
|
|
}
|
|
|
|
func TestAlerterCooldownSuppression(t *testing.T) {
|
|
db := openTestDB(t)
|
|
logger := testLogger()
|
|
cfg := testMonitorConfig()
|
|
cfg.Cooldown.Duration = 1 * time.Hour // long cooldown
|
|
|
|
al := NewAlerter(cfg, "test-node", db, logger)
|
|
|
|
if err := registry.CreateService(db, "metacrypt", true); err != nil {
|
|
t.Fatalf("create service: %v", err)
|
|
}
|
|
if err := registry.CreateComponent(db, ®istry.Component{
|
|
Name: "api", Service: "metacrypt", Image: "img:v1",
|
|
Restart: "unless-stopped", DesiredState: "running", ObservedState: "running",
|
|
}); err != nil {
|
|
t.Fatalf("create component: %v", err)
|
|
}
|
|
|
|
// First call should fire.
|
|
al.Evaluate("metacrypt", "api", "running", "exited", "running")
|
|
key := "metacrypt/api"
|
|
first, ok := al.lastAlert[key]
|
|
if !ok {
|
|
t.Fatal("expected first alert to fire")
|
|
}
|
|
|
|
// Second call should be suppressed (within cooldown).
|
|
al.Evaluate("metacrypt", "api", "running", "exited", "exited")
|
|
second := al.lastAlert[key]
|
|
if !second.Equal(first) {
|
|
t.Fatal("expected second alert to be suppressed by cooldown")
|
|
}
|
|
}
|
|
|
|
func TestAlerterFlapDetection(t *testing.T) {
|
|
db := openTestDB(t)
|
|
logger := testLogger()
|
|
cfg := testMonitorConfig()
|
|
cfg.FlapThreshold = 2
|
|
cfg.FlapWindow.Duration = 10 * time.Minute
|
|
cfg.Cooldown.Duration = 0 // disable cooldown for this test
|
|
|
|
al := NewAlerter(cfg, "test-node", db, logger)
|
|
|
|
if err := registry.CreateService(db, "metacrypt", true); err != nil {
|
|
t.Fatalf("create service: %v", err)
|
|
}
|
|
if err := registry.CreateComponent(db, ®istry.Component{
|
|
Name: "api", Service: "metacrypt", Image: "img:v1",
|
|
Restart: "unless-stopped", DesiredState: "running", ObservedState: "unknown",
|
|
}); err != nil {
|
|
t.Fatalf("create component: %v", err)
|
|
}
|
|
|
|
// Insert enough events to exceed the flap threshold.
|
|
for i := 0; i < 3; i++ {
|
|
if err := registry.InsertEvent(db, "metacrypt", "api", "running", "exited"); err != nil {
|
|
t.Fatalf("insert event %d: %v", i, err)
|
|
}
|
|
}
|
|
|
|
// Evaluate with a state transition -- should detect flapping.
|
|
al.Evaluate("metacrypt", "api", "running", "exited", "running")
|
|
|
|
key := "metacrypt/api"
|
|
if _, ok := al.lastAlert[key]; !ok {
|
|
t.Fatal("expected flap alert to fire")
|
|
}
|
|
}
|
|
|
|
func TestMonitorTickStateChange(t *testing.T) {
|
|
db := openTestDB(t)
|
|
logger := testLogger()
|
|
cfg := testMonitorConfig()
|
|
|
|
if err := registry.CreateService(db, "metacrypt", true); err != nil {
|
|
t.Fatalf("create service: %v", err)
|
|
}
|
|
if err := registry.CreateComponent(db, ®istry.Component{
|
|
Name: "api", Service: "metacrypt", Image: "img:v1",
|
|
Restart: "unless-stopped", DesiredState: "running", ObservedState: "unknown",
|
|
}); err != nil {
|
|
t.Fatalf("create component: %v", err)
|
|
}
|
|
|
|
rt := &fakeRuntime{
|
|
containers: []runtime.ContainerInfo{
|
|
{Name: "metacrypt-api", State: "running"},
|
|
},
|
|
}
|
|
|
|
m := New(db, rt, cfg, "test-node", logger)
|
|
|
|
// Run a single tick.
|
|
m.tick()
|
|
|
|
// Verify observed state was updated in the registry.
|
|
comp, err := registry.GetComponent(db, "metacrypt", "api")
|
|
if err != nil {
|
|
t.Fatalf("get component: %v", err)
|
|
}
|
|
if comp.ObservedState != "running" {
|
|
t.Fatalf("observed state: got %q, want %q", comp.ObservedState, "running")
|
|
}
|
|
|
|
// Verify an event was recorded (unknown -> running).
|
|
events, err := registry.QueryEvents(db, "metacrypt", "api", time.Now().Add(-1*time.Hour), 0)
|
|
if err != nil {
|
|
t.Fatalf("query events: %v", err)
|
|
}
|
|
if len(events) != 1 {
|
|
t.Fatalf("events: got %d, want 1", len(events))
|
|
}
|
|
if events[0].PrevState != "unknown" || events[0].NewState != "running" {
|
|
t.Fatalf("event: got %q->%q, want unknown->running", events[0].PrevState, events[0].NewState)
|
|
}
|
|
|
|
// Verify prevState map was updated.
|
|
if m.prevState["metacrypt/api"] != "running" {
|
|
t.Fatalf("prevState: got %q, want %q", m.prevState["metacrypt/api"], "running")
|
|
}
|
|
}
|
|
|
|
func TestMonitorStartStop(t *testing.T) {
|
|
db := openTestDB(t)
|
|
logger := testLogger()
|
|
cfg := testMonitorConfig()
|
|
cfg.Interval.Duration = 50 * time.Millisecond
|
|
|
|
rt := &fakeRuntime{}
|
|
m := New(db, rt, cfg, "test-node", logger)
|
|
|
|
m.Start()
|
|
|
|
// Give it a moment to tick at least once.
|
|
time.Sleep(150 * time.Millisecond)
|
|
|
|
m.Stop()
|
|
|
|
// If Stop returns, the goroutine exited cleanly.
|
|
}
|
|
|
|
func TestMonitorNoChangeNoEvent(t *testing.T) {
|
|
db := openTestDB(t)
|
|
logger := testLogger()
|
|
cfg := testMonitorConfig()
|
|
|
|
if err := registry.CreateService(db, "metacrypt", true); err != nil {
|
|
t.Fatalf("create service: %v", err)
|
|
}
|
|
if err := registry.CreateComponent(db, ®istry.Component{
|
|
Name: "api", Service: "metacrypt", Image: "img:v1",
|
|
Restart: "unless-stopped", DesiredState: "running", ObservedState: "running",
|
|
}); err != nil {
|
|
t.Fatalf("create component: %v", err)
|
|
}
|
|
|
|
rt := &fakeRuntime{
|
|
containers: []runtime.ContainerInfo{
|
|
{Name: "metacrypt-api", State: "running"},
|
|
},
|
|
}
|
|
|
|
m := New(db, rt, cfg, "test-node", logger)
|
|
// Seed prevState so that observed == prev (no change).
|
|
m.prevState["metacrypt/api"] = "running"
|
|
|
|
m.tick()
|
|
|
|
// No events should be recorded when state is unchanged.
|
|
events, err := registry.QueryEvents(db, "metacrypt", "api", time.Now().Add(-1*time.Hour), 0)
|
|
if err != nil {
|
|
t.Fatalf("query events: %v", err)
|
|
}
|
|
if len(events) != 0 {
|
|
t.Fatalf("events: got %d, want 0 (no state change)", len(events))
|
|
}
|
|
}
|