Implement mcp purge command for registry cleanup

Add PurgeComponent RPC to the agent service that removes stale registry
entries for components that are both gone (observed state is removed,
unknown, or exited) and unwanted (not in any current service definition).
Refuses to purge components with running or stopped containers. When all
components of a service are purged, the service row is deleted too.
Supports --dry-run to preview without modifying the database.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-03-26 22:30:45 -07:00
parent 1afbf5e1f6
commit 1e58dcce27
8 changed files with 1001 additions and 36 deletions

155
internal/agent/purge.go Normal file
View File

@@ -0,0 +1,155 @@
package agent
import (
"context"
"fmt"
mcpv1 "git.wntrmute.dev/kyle/mcp/gen/mcp/v1"
"git.wntrmute.dev/kyle/mcp/internal/registry"
)
// PurgeComponent removes stale registry entries for components that are both
// gone (observed state is removed/unknown/exited) and unwanted (not in any
// current service definition). It never touches running containers.
func (a *Agent) PurgeComponent(ctx context.Context, req *mcpv1.PurgeRequest) (*mcpv1.PurgeResponse, error) {
a.Logger.Info("PurgeComponent",
"service", req.GetService(),
"component", req.GetComponent(),
"dry_run", req.GetDryRun(),
)
// Build a set of defined service/component pairs for quick lookup.
defined := make(map[string]bool, len(req.GetDefinedComponents()))
for _, dc := range req.GetDefinedComponents() {
defined[dc] = true
}
// Determine which services to examine.
var services []registry.Service
if req.GetService() != "" {
svc, err := registry.GetService(a.DB, req.GetService())
if err != nil {
return nil, fmt.Errorf("get service %q: %w", req.GetService(), err)
}
services = []registry.Service{*svc}
} else {
var err error
services, err = registry.ListServices(a.DB)
if err != nil {
return nil, fmt.Errorf("list services: %w", err)
}
}
var results []*mcpv1.PurgeResult
for _, svc := range services {
components, err := registry.ListComponents(a.DB, svc.Name)
if err != nil {
return nil, fmt.Errorf("list components for %q: %w", svc.Name, err)
}
// If a specific component was requested, filter to just that one.
if req.GetComponent() != "" {
var filtered []registry.Component
for _, c := range components {
if c.Name == req.GetComponent() {
filtered = append(filtered, c)
}
}
components = filtered
}
for _, comp := range components {
result := a.evaluatePurge(svc.Name, &comp, defined, req.GetDryRun())
results = append(results, result)
}
// If all components of this service were purged (not dry-run),
// check if the service should be cleaned up too.
if !req.GetDryRun() {
remaining, err := registry.ListComponents(a.DB, svc.Name)
if err != nil {
a.Logger.Warn("failed to check remaining components", "service", svc.Name, "err", err)
continue
}
if len(remaining) == 0 {
if err := registry.DeleteService(a.DB, svc.Name); err != nil {
a.Logger.Warn("failed to delete empty service", "service", svc.Name, "err", err)
} else {
a.Logger.Info("purged empty service", "service", svc.Name)
}
}
}
}
return &mcpv1.PurgeResponse{Results: results}, nil
}
// purgeableStates are observed states that indicate a component's container
// is gone and the registry entry can be safely removed.
var purgeableStates = map[string]bool{
"removed": true,
"unknown": true,
"exited": true,
}
// evaluatePurge checks whether a single component is eligible for purge and,
// if not in dry-run mode, deletes it.
func (a *Agent) evaluatePurge(service string, comp *registry.Component, defined map[string]bool, dryRun bool) *mcpv1.PurgeResult {
key := service + "/" + comp.Name
// Safety: refuse to purge components with a live container.
if !purgeableStates[comp.ObservedState] {
return &mcpv1.PurgeResult{
Service: service,
Component: comp.Name,
Purged: false,
Reason: fmt.Sprintf("observed=%s, container still exists", comp.ObservedState),
}
}
// Don't purge components that are still in service definitions.
if defined[key] {
return &mcpv1.PurgeResult{
Service: service,
Component: comp.Name,
Purged: false,
Reason: "still in service definitions",
}
}
reason := fmt.Sprintf("observed=%s, not in service definitions", comp.ObservedState)
if dryRun {
return &mcpv1.PurgeResult{
Service: service,
Component: comp.Name,
Purged: true,
Reason: reason,
}
}
// Delete events first (events table has no FK to components).
if err := registry.DeleteComponentEvents(a.DB, service, comp.Name); err != nil {
a.Logger.Warn("failed to delete events during purge", "service", service, "component", comp.Name, "err", err)
}
// Delete the component (CASCADE handles ports, volumes, cmd).
if err := registry.DeleteComponent(a.DB, service, comp.Name); err != nil {
return &mcpv1.PurgeResult{
Service: service,
Component: comp.Name,
Purged: false,
Reason: fmt.Sprintf("delete failed: %v", err),
}
}
a.Logger.Info("purged component", "service", service, "component", comp.Name, "reason", reason)
return &mcpv1.PurgeResult{
Service: service,
Component: comp.Name,
Purged: true,
Reason: reason,
}
}

View File

@@ -0,0 +1,405 @@
package agent
import (
"context"
"testing"
mcpv1 "git.wntrmute.dev/kyle/mcp/gen/mcp/v1"
"git.wntrmute.dev/kyle/mcp/internal/registry"
)
func TestPurgeComponentRemoved(t *testing.T) {
rt := &fakeRuntime{}
a := newTestAgent(t, rt)
ctx := context.Background()
// Set up a service with a stale component.
if err := registry.CreateService(a.DB, "mcns", true); err != nil {
t.Fatalf("create service: %v", err)
}
if err := registry.CreateComponent(a.DB, &registry.Component{
Name: "coredns",
Service: "mcns",
Image: "coredns:latest",
DesiredState: "running",
ObservedState: "removed",
}); err != nil {
t.Fatalf("create component: %v", err)
}
// Insert an event for this component.
if err := registry.InsertEvent(a.DB, "mcns", "coredns", "running", "removed"); err != nil {
t.Fatalf("insert event: %v", err)
}
resp, err := a.PurgeComponent(ctx, &mcpv1.PurgeRequest{
DefinedComponents: []string{"mcns/mcns"},
})
if err != nil {
t.Fatalf("PurgeComponent: %v", err)
}
if len(resp.Results) != 1 {
t.Fatalf("expected 1 result, got %d", len(resp.Results))
}
r := resp.Results[0]
if !r.Purged {
t.Fatalf("expected purged=true, got reason: %s", r.Reason)
}
if r.Service != "mcns" || r.Component != "coredns" {
t.Fatalf("unexpected result: %s/%s", r.Service, r.Component)
}
// Verify component was deleted.
_, err = registry.GetComponent(a.DB, "mcns", "coredns")
if err == nil {
t.Fatal("component should have been deleted")
}
// Service should also be deleted since it has no remaining components.
_, err = registry.GetService(a.DB, "mcns")
if err == nil {
t.Fatal("service should have been deleted (no remaining components)")
}
}
func TestPurgeRefusesRunning(t *testing.T) {
rt := &fakeRuntime{}
a := newTestAgent(t, rt)
ctx := context.Background()
if err := registry.CreateService(a.DB, "mcr", true); err != nil {
t.Fatalf("create service: %v", err)
}
if err := registry.CreateComponent(a.DB, &registry.Component{
Name: "api",
Service: "mcr",
Image: "mcr:latest",
DesiredState: "running",
ObservedState: "running",
}); err != nil {
t.Fatalf("create component: %v", err)
}
resp, err := a.PurgeComponent(ctx, &mcpv1.PurgeRequest{
Service: "mcr",
Component: "api",
})
if err != nil {
t.Fatalf("PurgeComponent: %v", err)
}
if len(resp.Results) != 1 {
t.Fatalf("expected 1 result, got %d", len(resp.Results))
}
if resp.Results[0].Purged {
t.Fatal("should not purge a running component")
}
// Verify component still exists.
_, err = registry.GetComponent(a.DB, "mcr", "api")
if err != nil {
t.Fatalf("component should still exist: %v", err)
}
}
func TestPurgeRefusesStopped(t *testing.T) {
rt := &fakeRuntime{}
a := newTestAgent(t, rt)
ctx := context.Background()
if err := registry.CreateService(a.DB, "mcr", true); err != nil {
t.Fatalf("create service: %v", err)
}
if err := registry.CreateComponent(a.DB, &registry.Component{
Name: "api",
Service: "mcr",
Image: "mcr:latest",
DesiredState: "stopped",
ObservedState: "stopped",
}); err != nil {
t.Fatalf("create component: %v", err)
}
resp, err := a.PurgeComponent(ctx, &mcpv1.PurgeRequest{
Service: "mcr",
Component: "api",
})
if err != nil {
t.Fatalf("PurgeComponent: %v", err)
}
if resp.Results[0].Purged {
t.Fatal("should not purge a stopped component")
}
}
func TestPurgeSkipsDefinedComponent(t *testing.T) {
rt := &fakeRuntime{}
a := newTestAgent(t, rt)
ctx := context.Background()
if err := registry.CreateService(a.DB, "mcns", true); err != nil {
t.Fatalf("create service: %v", err)
}
if err := registry.CreateComponent(a.DB, &registry.Component{
Name: "mcns",
Service: "mcns",
Image: "mcns:latest",
DesiredState: "running",
ObservedState: "exited",
}); err != nil {
t.Fatalf("create component: %v", err)
}
resp, err := a.PurgeComponent(ctx, &mcpv1.PurgeRequest{
DefinedComponents: []string{"mcns/mcns"},
})
if err != nil {
t.Fatalf("PurgeComponent: %v", err)
}
if len(resp.Results) != 1 {
t.Fatalf("expected 1 result, got %d", len(resp.Results))
}
if resp.Results[0].Purged {
t.Fatal("should not purge a component that is still in service definitions")
}
if resp.Results[0].Reason != "still in service definitions" {
t.Fatalf("unexpected reason: %s", resp.Results[0].Reason)
}
}
func TestPurgeDryRun(t *testing.T) {
rt := &fakeRuntime{}
a := newTestAgent(t, rt)
ctx := context.Background()
if err := registry.CreateService(a.DB, "mcns", true); err != nil {
t.Fatalf("create service: %v", err)
}
if err := registry.CreateComponent(a.DB, &registry.Component{
Name: "coredns",
Service: "mcns",
Image: "coredns:latest",
DesiredState: "running",
ObservedState: "removed",
}); err != nil {
t.Fatalf("create component: %v", err)
}
resp, err := a.PurgeComponent(ctx, &mcpv1.PurgeRequest{
DryRun: true,
DefinedComponents: []string{"mcns/mcns"},
})
if err != nil {
t.Fatalf("PurgeComponent: %v", err)
}
if len(resp.Results) != 1 {
t.Fatalf("expected 1 result, got %d", len(resp.Results))
}
if !resp.Results[0].Purged {
t.Fatal("dry run should report purged=true for eligible components")
}
// Verify component was NOT deleted (dry run).
_, err = registry.GetComponent(a.DB, "mcns", "coredns")
if err != nil {
t.Fatalf("component should still exist after dry run: %v", err)
}
}
func TestPurgeServiceFilter(t *testing.T) {
rt := &fakeRuntime{}
a := newTestAgent(t, rt)
ctx := context.Background()
// Create two services.
if err := registry.CreateService(a.DB, "mcns", true); err != nil {
t.Fatalf("create service: %v", err)
}
if err := registry.CreateComponent(a.DB, &registry.Component{
Name: "coredns", Service: "mcns", Image: "coredns:latest",
DesiredState: "running", ObservedState: "removed",
}); err != nil {
t.Fatalf("create component: %v", err)
}
if err := registry.CreateService(a.DB, "mcr", true); err != nil {
t.Fatalf("create service: %v", err)
}
if err := registry.CreateComponent(a.DB, &registry.Component{
Name: "old", Service: "mcr", Image: "old:latest",
DesiredState: "running", ObservedState: "removed",
}); err != nil {
t.Fatalf("create component: %v", err)
}
// Purge only mcns.
resp, err := a.PurgeComponent(ctx, &mcpv1.PurgeRequest{
Service: "mcns",
})
if err != nil {
t.Fatalf("PurgeComponent: %v", err)
}
if len(resp.Results) != 1 {
t.Fatalf("expected 1 result, got %d", len(resp.Results))
}
if resp.Results[0].Service != "mcns" {
t.Fatalf("expected mcns, got %s", resp.Results[0].Service)
}
// mcr/old should still exist.
_, err = registry.GetComponent(a.DB, "mcr", "old")
if err != nil {
t.Fatalf("mcr/old should still exist: %v", err)
}
}
func TestPurgeServiceDeletedWhenEmpty(t *testing.T) {
rt := &fakeRuntime{}
a := newTestAgent(t, rt)
ctx := context.Background()
if err := registry.CreateService(a.DB, "mcns", true); err != nil {
t.Fatalf("create service: %v", err)
}
if err := registry.CreateComponent(a.DB, &registry.Component{
Name: "coredns", Service: "mcns", Image: "coredns:latest",
DesiredState: "running", ObservedState: "removed",
}); err != nil {
t.Fatalf("create component: %v", err)
}
if err := registry.CreateComponent(a.DB, &registry.Component{
Name: "old-thing", Service: "mcns", Image: "old:latest",
DesiredState: "stopped", ObservedState: "unknown",
}); err != nil {
t.Fatalf("create component: %v", err)
}
resp, err := a.PurgeComponent(ctx, &mcpv1.PurgeRequest{})
if err != nil {
t.Fatalf("PurgeComponent: %v", err)
}
// Both components should be purged.
if len(resp.Results) != 2 {
t.Fatalf("expected 2 results, got %d", len(resp.Results))
}
for _, r := range resp.Results {
if !r.Purged {
t.Fatalf("expected purged=true for %s/%s: %s", r.Service, r.Component, r.Reason)
}
}
// Service should be deleted.
_, err = registry.GetService(a.DB, "mcns")
if err == nil {
t.Fatal("service should have been deleted")
}
}
func TestPurgeServiceKeptWhenComponentsRemain(t *testing.T) {
rt := &fakeRuntime{}
a := newTestAgent(t, rt)
ctx := context.Background()
if err := registry.CreateService(a.DB, "mcns", true); err != nil {
t.Fatalf("create service: %v", err)
}
// Stale component (will be purged).
if err := registry.CreateComponent(a.DB, &registry.Component{
Name: "coredns", Service: "mcns", Image: "coredns:latest",
DesiredState: "running", ObservedState: "removed",
}); err != nil {
t.Fatalf("create component: %v", err)
}
// Live component (will not be purged).
if err := registry.CreateComponent(a.DB, &registry.Component{
Name: "mcns", Service: "mcns", Image: "mcns:latest",
DesiredState: "running", ObservedState: "running",
}); err != nil {
t.Fatalf("create component: %v", err)
}
resp, err := a.PurgeComponent(ctx, &mcpv1.PurgeRequest{})
if err != nil {
t.Fatalf("PurgeComponent: %v", err)
}
if len(resp.Results) != 2 {
t.Fatalf("expected 2 results, got %d", len(resp.Results))
}
// coredns should be purged, mcns should not.
purged := 0
for _, r := range resp.Results {
if r.Purged {
purged++
if r.Component != "coredns" {
t.Fatalf("expected coredns to be purged, got %s", r.Component)
}
}
}
if purged != 1 {
t.Fatalf("expected 1 purged, got %d", purged)
}
// Service should still exist.
_, err = registry.GetService(a.DB, "mcns")
if err != nil {
t.Fatalf("service should still exist: %v", err)
}
}
func TestPurgeExitedState(t *testing.T) {
rt := &fakeRuntime{}
a := newTestAgent(t, rt)
ctx := context.Background()
if err := registry.CreateService(a.DB, "test", true); err != nil {
t.Fatalf("create service: %v", err)
}
if err := registry.CreateComponent(a.DB, &registry.Component{
Name: "old", Service: "test", Image: "old:latest",
DesiredState: "stopped", ObservedState: "exited",
}); err != nil {
t.Fatalf("create component: %v", err)
}
resp, err := a.PurgeComponent(ctx, &mcpv1.PurgeRequest{})
if err != nil {
t.Fatalf("PurgeComponent: %v", err)
}
if len(resp.Results) != 1 || !resp.Results[0].Purged {
t.Fatalf("exited component should be purgeable")
}
}
func TestPurgeUnknownState(t *testing.T) {
rt := &fakeRuntime{}
a := newTestAgent(t, rt)
ctx := context.Background()
if err := registry.CreateService(a.DB, "test", true); err != nil {
t.Fatalf("create service: %v", err)
}
if err := registry.CreateComponent(a.DB, &registry.Component{
Name: "ghost", Service: "test", Image: "ghost:latest",
DesiredState: "running", ObservedState: "unknown",
}); err != nil {
t.Fatalf("create component: %v", err)
}
resp, err := a.PurgeComponent(ctx, &mcpv1.PurgeRequest{})
if err != nil {
t.Fatalf("PurgeComponent: %v", err)
}
if len(resp.Results) != 1 || !resp.Results[0].Purged {
t.Fatalf("unknown component should be purgeable")
}
}