Files
mcp/internal/agent/status.go
Kyle Isom 8f913ddf9b P2.2-P2.9, P3.2-P3.10, P4.1-P4.3: Complete Phases 2, 3, and 4
11 work units built in parallel and merged:

Agent handlers (Phase 2):
- P2.2 Deploy: pull images, stop/remove/run containers, update registry
- P2.3 Lifecycle: stop/start/restart with desired_state tracking
- P2.4 Status: list (registry), live check (runtime), get status (drift+events)
- P2.5 Sync: receive desired state, reconcile unmanaged containers
- P2.6 File transfer: push/pull scoped to /srv/<service>/, path validation
- P2.7 Adopt: match <service>-* containers, derive component names
- P2.8 Monitor: continuous watch loop, drift/flap alerting, event pruning
- P2.9 Snapshot: VACUUM INTO database backup command

CLI commands (Phase 3):
- P3.2 Login, P3.3 Deploy, P3.4 Stop/Start/Restart
- P3.5 List/Ps/Status, P3.6 Sync, P3.7 Adopt
- P3.8 Service show/edit/export, P3.9 Push/Pull, P3.10 Node list/add/remove

Deployment artifacts (Phase 4):
- Systemd units (agent service + backup timer)
- Example configs (CLI + agent)
- Install script (idempotent)

All packages: build, vet, lint (0 issues), test (all pass).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-26 12:21:18 -07:00

220 lines
5.9 KiB
Go

package agent
import (
"context"
"fmt"
"strings"
"time"
mcpv1 "git.wntrmute.dev/kyle/mcp/gen/mcp/v1"
"git.wntrmute.dev/kyle/mcp/internal/registry"
"git.wntrmute.dev/kyle/mcp/internal/runtime"
"google.golang.org/protobuf/types/known/timestamppb"
)
// buildServiceInfo converts a registry Service and its components into a proto
// ServiceInfo message.
func buildServiceInfo(svc registry.Service, components []registry.Component) *mcpv1.ServiceInfo {
info := &mcpv1.ServiceInfo{
Name: svc.Name,
Active: svc.Active,
}
for _, c := range components {
info.Components = append(info.Components, &mcpv1.ComponentInfo{
Name: c.Name,
Image: c.Image,
DesiredState: c.DesiredState,
ObservedState: c.ObservedState,
Version: c.Version,
})
}
return info
}
// ListServices returns all services and their components from the registry.
// It does not query the container runtime.
func (a *Agent) ListServices(ctx context.Context, req *mcpv1.ListServicesRequest) (*mcpv1.ListServicesResponse, error) {
a.Logger.Debug("ListServices called")
services, err := registry.ListServices(a.DB)
if err != nil {
return nil, fmt.Errorf("list services: %w", err)
}
resp := &mcpv1.ListServicesResponse{}
for _, svc := range services {
components, err := registry.ListComponents(a.DB, svc.Name)
if err != nil {
return nil, fmt.Errorf("list components for %q: %w", svc.Name, err)
}
resp.Services = append(resp.Services, buildServiceInfo(svc, components))
}
return resp, nil
}
// liveCheckServices queries the container runtime and reconciles with the
// registry. It returns a list of ServiceInfo messages with updated observed
// state. This shared logic is used by both LiveCheck and GetServiceStatus.
func (a *Agent) liveCheckServices(ctx context.Context) ([]*mcpv1.ServiceInfo, error) {
containers, err := a.Runtime.List(ctx)
if err != nil {
return nil, fmt.Errorf("runtime list: %w", err)
}
runtimeByName := make(map[string]runtime.ContainerInfo, len(containers))
for _, c := range containers {
runtimeByName[c.Name] = c
}
matched := make(map[string]bool)
services, err := registry.ListServices(a.DB)
if err != nil {
return nil, fmt.Errorf("list services: %w", err)
}
var result []*mcpv1.ServiceInfo
for _, svc := range services {
components, err := registry.ListComponents(a.DB, svc.Name)
if err != nil {
return nil, fmt.Errorf("list components for %q: %w", svc.Name, err)
}
info := &mcpv1.ServiceInfo{
Name: svc.Name,
Active: svc.Active,
}
for _, comp := range components {
containerName := svc.Name + "-" + comp.Name
ci := &mcpv1.ComponentInfo{
Name: comp.Name,
Image: comp.Image,
DesiredState: comp.DesiredState,
Version: comp.Version,
}
if rc, ok := runtimeByName[containerName]; ok {
ci.ObservedState = rc.State
matched[containerName] = true
} else {
ci.ObservedState = "removed"
}
info.Components = append(info.Components, ci)
}
result = append(result, info)
}
for _, c := range containers {
if matched[c.Name] {
continue
}
svcName, compName := splitContainerName(c.Name)
result = append(result, &mcpv1.ServiceInfo{
Name: svcName,
Active: false,
Components: []*mcpv1.ComponentInfo{
{
Name: compName,
Image: c.Image,
DesiredState: "ignore",
ObservedState: c.State,
Version: c.Version,
},
},
})
}
return result, nil
}
// LiveCheck queries the container runtime, reconciles against the registry,
// and returns the updated state for all services.
func (a *Agent) LiveCheck(ctx context.Context, req *mcpv1.LiveCheckRequest) (*mcpv1.LiveCheckResponse, error) {
a.Logger.Debug("LiveCheck called")
services, err := a.liveCheckServices(ctx)
if err != nil {
return nil, fmt.Errorf("live check: %w", err)
}
return &mcpv1.LiveCheckResponse{Services: services}, nil
}
// GetServiceStatus performs a live check, detects drift, and returns recent
// events. If a service name is provided, results are filtered to that service.
func (a *Agent) GetServiceStatus(ctx context.Context, req *mcpv1.GetServiceStatusRequest) (*mcpv1.GetServiceStatusResponse, error) {
a.Logger.Debug("GetServiceStatus called", "service", req.GetName())
services, err := a.liveCheckServices(ctx)
if err != nil {
return nil, fmt.Errorf("live check: %w", err)
}
if req.GetName() != "" {
var filtered []*mcpv1.ServiceInfo
for _, svc := range services {
if svc.Name == req.GetName() {
filtered = append(filtered, svc)
}
}
services = filtered
}
var drift []*mcpv1.DriftInfo
for _, svc := range services {
for _, comp := range svc.Components {
if comp.DesiredState == "ignore" {
continue
}
if comp.DesiredState != comp.ObservedState {
drift = append(drift, &mcpv1.DriftInfo{
Service: svc.Name,
Component: comp.Name,
DesiredState: comp.DesiredState,
ObservedState: comp.ObservedState,
})
}
}
}
since := time.Now().Add(-1 * time.Hour)
svcFilter := req.GetName()
events, err := registry.QueryEvents(a.DB, svcFilter, "", since, 50)
if err != nil {
return nil, fmt.Errorf("query events: %w", err)
}
var protoEvents []*mcpv1.EventInfo
for _, e := range events {
protoEvents = append(protoEvents, &mcpv1.EventInfo{
Service: e.Service,
Component: e.Component,
PrevState: e.PrevState,
NewState: e.NewState,
Timestamp: timestamppb.New(e.Timestamp),
})
}
return &mcpv1.GetServiceStatusResponse{
Services: services,
Drift: drift,
RecentEvents: protoEvents,
}, nil
}
// splitContainerName splits a container name like "metacrypt-api" into service
// and component parts. If there is no hyphen, the whole name is used as both
// the service and component name.
func splitContainerName(name string) (service, component string) {
if i := strings.Index(name, "-"); i >= 0 {
return name[:i], name[i+1:]
}
return name, name
}