Files
mcp/internal/agent/status.go
Kyle Isom 7383b370f0 Fix mcp ps showing registry version instead of runtime, error on unknown component
mcp ps now uses the actual container image and version from the runtime
instead of the registry, which could be stale after a failed deploy.

Deploy now returns an error when the component filter matches nothing
instead of silently succeeding with zero results.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-28 19:13:02 -07:00

221 lines
5.8 KiB
Go

package agent
import (
"context"
"fmt"
"time"
mcpv1 "git.wntrmute.dev/mc/mcp/gen/mcp/v1"
"git.wntrmute.dev/mc/mcp/internal/registry"
"git.wntrmute.dev/mc/mcp/internal/runtime"
"google.golang.org/protobuf/types/known/timestamppb"
)
// buildServiceInfo converts a registry Service and its components into a proto
// ServiceInfo message.
func buildServiceInfo(svc registry.Service, components []registry.Component) *mcpv1.ServiceInfo {
info := &mcpv1.ServiceInfo{
Name: svc.Name,
Active: svc.Active,
}
for _, c := range components {
info.Components = append(info.Components, &mcpv1.ComponentInfo{
Name: c.Name,
Image: c.Image,
DesiredState: c.DesiredState,
ObservedState: c.ObservedState,
Version: c.Version,
})
}
return info
}
// ListServices returns all services and their components from the registry.
// It does not query the container runtime.
func (a *Agent) ListServices(ctx context.Context, req *mcpv1.ListServicesRequest) (*mcpv1.ListServicesResponse, error) {
a.Logger.Debug("ListServices called")
services, err := registry.ListServices(a.DB)
if err != nil {
return nil, fmt.Errorf("list services: %w", err)
}
resp := &mcpv1.ListServicesResponse{}
for _, svc := range services {
components, err := registry.ListComponents(a.DB, svc.Name)
if err != nil {
return nil, fmt.Errorf("list components for %q: %w", svc.Name, err)
}
resp.Services = append(resp.Services, buildServiceInfo(svc, components))
}
return resp, nil
}
// liveCheckServices queries the container runtime and reconciles with the
// registry. It returns a list of ServiceInfo messages with updated observed
// state. This shared logic is used by both LiveCheck and GetServiceStatus.
func (a *Agent) liveCheckServices(ctx context.Context) ([]*mcpv1.ServiceInfo, error) {
containers, err := a.Runtime.List(ctx)
if err != nil {
return nil, fmt.Errorf("runtime list: %w", err)
}
runtimeByName := make(map[string]runtime.ContainerInfo, len(containers))
for _, c := range containers {
runtimeByName[c.Name] = c
}
matched := make(map[string]bool)
services, err := registry.ListServices(a.DB)
if err != nil {
return nil, fmt.Errorf("list services: %w", err)
}
var result []*mcpv1.ServiceInfo
knownServices := make(map[string]bool, len(services))
for _, svc := range services {
knownServices[svc.Name] = true
components, err := registry.ListComponents(a.DB, svc.Name)
if err != nil {
return nil, fmt.Errorf("list components for %q: %w", svc.Name, err)
}
info := &mcpv1.ServiceInfo{
Name: svc.Name,
Active: svc.Active,
}
for _, comp := range components {
containerName := ContainerNameFor(svc.Name, comp.Name)
ci := &mcpv1.ComponentInfo{
Name: comp.Name,
Image: comp.Image,
DesiredState: comp.DesiredState,
Version: comp.Version,
}
if rc, ok := runtimeByName[containerName]; ok {
ci.ObservedState = rc.State
if rc.Version != "" {
ci.Version = rc.Version
}
if rc.Image != "" {
ci.Image = rc.Image
}
if !rc.Started.IsZero() {
ci.Started = timestamppb.New(rc.Started)
}
matched[containerName] = true
} else {
ci.ObservedState = "removed"
}
info.Components = append(info.Components, ci)
}
result = append(result, info)
}
for _, c := range containers {
if matched[c.Name] {
continue
}
svcName, compName := SplitContainerName(c.Name, knownServices)
result = append(result, &mcpv1.ServiceInfo{
Name: svcName,
Active: false,
Components: []*mcpv1.ComponentInfo{
{
Name: compName,
Image: c.Image,
DesiredState: "ignore",
ObservedState: c.State,
Version: c.Version,
},
},
})
}
return result, nil
}
// LiveCheck queries the container runtime, reconciles against the registry,
// and returns the updated state for all services.
func (a *Agent) LiveCheck(ctx context.Context, req *mcpv1.LiveCheckRequest) (*mcpv1.LiveCheckResponse, error) {
a.Logger.Debug("LiveCheck called")
services, err := a.liveCheckServices(ctx)
if err != nil {
return nil, fmt.Errorf("live check: %w", err)
}
return &mcpv1.LiveCheckResponse{Services: services}, nil
}
// GetServiceStatus performs a live check, detects drift, and returns recent
// events. If a service name is provided, results are filtered to that service.
func (a *Agent) GetServiceStatus(ctx context.Context, req *mcpv1.GetServiceStatusRequest) (*mcpv1.GetServiceStatusResponse, error) {
a.Logger.Debug("GetServiceStatus called", "service", req.GetName())
services, err := a.liveCheckServices(ctx)
if err != nil {
return nil, fmt.Errorf("live check: %w", err)
}
if req.GetName() != "" {
var filtered []*mcpv1.ServiceInfo
for _, svc := range services {
if svc.Name == req.GetName() {
filtered = append(filtered, svc)
}
}
services = filtered
}
var drift []*mcpv1.DriftInfo
for _, svc := range services {
for _, comp := range svc.Components {
if comp.DesiredState == "ignore" {
continue
}
if comp.DesiredState != comp.ObservedState {
drift = append(drift, &mcpv1.DriftInfo{
Service: svc.Name,
Component: comp.Name,
DesiredState: comp.DesiredState,
ObservedState: comp.ObservedState,
})
}
}
}
since := time.Now().Add(-1 * time.Hour)
svcFilter := req.GetName()
events, err := registry.QueryEvents(a.DB, svcFilter, "", since, 50)
if err != nil {
return nil, fmt.Errorf("query events: %w", err)
}
var protoEvents []*mcpv1.EventInfo
for _, e := range events {
protoEvents = append(protoEvents, &mcpv1.EventInfo{
Service: e.Service,
Component: e.Component,
PrevState: e.PrevState,
NewState: e.NewState,
Timestamp: timestamppb.New(e.Timestamp),
})
}
return &mcpv1.GetServiceStatusResponse{
Services: services,
Drift: drift,
RecentEvents: protoEvents,
}, nil
}