Files
mcp/internal/agent/status.go
Kyle Isom d56f224359 Add unikernel runtime: run services as Nanos VMs under QEMU/KVM
Implements the hypervisor design's Phase 1: a second runtime.Runtime
backend (QEMU) that runs each service component as a Nanos unikernel VM
instead of a podman container, selected per-component via a new
runtime = "unikernel" service-def field.

- internal/runtime/qemu.go: QEMURuntime. Pull extracts the ELF from the
  OCI image; Run does `ops build` + boots qemu-system-x86_64 with KVM,
  user-mode net port-forwards, QMP control socket and serial console log;
  Stop/Remove/Inspect/List/Logs map onto VM lifecycle + state dir.
- proto/registry/servicedef: add runtime, memory_mb, vcpus fields
  (registry migration 5).
- agent: holds both runtimes; runtimeFor() selects per component;
  listAllContainers() merges containers + VMs so drift/status see both.
  Unikernel runtime auto-enables on nodes with /dev/kvm + ops.

Validated end-to-end on straylight: a test service deploys via
`mcp deploy --direct`, boots as a Nanos unikernel, serves HTTP through
the agent port-forward, and reports running via `mcp status`/`mcp logs`.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-11 00:54:49 -07:00

223 lines
5.9 KiB
Go

package agent
import (
"context"
"fmt"
"time"
mcpv1 "git.wntrmute.dev/mc/mcp/gen/mcp/v1"
"git.wntrmute.dev/mc/mcp/internal/registry"
"git.wntrmute.dev/mc/mcp/internal/runtime"
"google.golang.org/protobuf/types/known/timestamppb"
)
// buildServiceInfo converts a registry Service and its components into a proto
// ServiceInfo message.
func buildServiceInfo(svc registry.Service, components []registry.Component) *mcpv1.ServiceInfo {
info := &mcpv1.ServiceInfo{
Name: svc.Name,
Active: svc.Active,
Comment: svc.Comment,
}
for _, c := range components {
info.Components = append(info.Components, &mcpv1.ComponentInfo{
Name: c.Name,
Image: c.Image,
DesiredState: c.DesiredState,
ObservedState: c.ObservedState,
Version: c.Version,
})
}
return info
}
// ListServices returns all services and their components from the registry.
// It does not query the container runtime.
func (a *Agent) ListServices(ctx context.Context, req *mcpv1.ListServicesRequest) (*mcpv1.ListServicesResponse, error) {
a.Logger.Debug("ListServices called")
services, err := registry.ListServices(a.DB)
if err != nil {
return nil, fmt.Errorf("list services: %w", err)
}
resp := &mcpv1.ListServicesResponse{}
for _, svc := range services {
components, err := registry.ListComponents(a.DB, svc.Name)
if err != nil {
return nil, fmt.Errorf("list components for %q: %w", svc.Name, err)
}
resp.Services = append(resp.Services, buildServiceInfo(svc, components))
}
return resp, nil
}
// liveCheckServices queries the container runtime and reconciles with the
// registry. It returns a list of ServiceInfo messages with updated observed
// state. This shared logic is used by both LiveCheck and GetServiceStatus.
func (a *Agent) liveCheckServices(ctx context.Context) ([]*mcpv1.ServiceInfo, error) {
containers, err := a.listAllContainers(ctx)
if err != nil {
return nil, fmt.Errorf("runtime list: %w", err)
}
runtimeByName := make(map[string]runtime.ContainerInfo, len(containers))
for _, c := range containers {
runtimeByName[c.Name] = c
}
matched := make(map[string]bool)
services, err := registry.ListServices(a.DB)
if err != nil {
return nil, fmt.Errorf("list services: %w", err)
}
var result []*mcpv1.ServiceInfo
knownServices := make(map[string]bool, len(services))
for _, svc := range services {
knownServices[svc.Name] = true
components, err := registry.ListComponents(a.DB, svc.Name)
if err != nil {
return nil, fmt.Errorf("list components for %q: %w", svc.Name, err)
}
info := &mcpv1.ServiceInfo{
Name: svc.Name,
Active: svc.Active,
Comment: svc.Comment,
}
for _, comp := range components {
containerName := ContainerNameFor(svc.Name, comp.Name)
ci := &mcpv1.ComponentInfo{
Name: comp.Name,
Image: comp.Image,
DesiredState: comp.DesiredState,
Version: comp.Version,
}
if rc, ok := runtimeByName[containerName]; ok {
ci.ObservedState = rc.State
if rc.Version != "" {
ci.Version = rc.Version
}
if rc.Image != "" {
ci.Image = rc.Image
}
if !rc.Started.IsZero() {
ci.Started = timestamppb.New(rc.Started)
}
matched[containerName] = true
} else {
ci.ObservedState = "removed"
}
info.Components = append(info.Components, ci)
}
result = append(result, info)
}
for _, c := range containers {
if matched[c.Name] {
continue
}
svcName, compName := SplitContainerName(c.Name, knownServices)
result = append(result, &mcpv1.ServiceInfo{
Name: svcName,
Active: false,
Components: []*mcpv1.ComponentInfo{
{
Name: compName,
Image: c.Image,
DesiredState: "ignore",
ObservedState: c.State,
Version: c.Version,
},
},
})
}
return result, nil
}
// LiveCheck queries the container runtime, reconciles against the registry,
// and returns the updated state for all services.
func (a *Agent) LiveCheck(ctx context.Context, req *mcpv1.LiveCheckRequest) (*mcpv1.LiveCheckResponse, error) {
a.Logger.Debug("LiveCheck called")
services, err := a.liveCheckServices(ctx)
if err != nil {
return nil, fmt.Errorf("live check: %w", err)
}
return &mcpv1.LiveCheckResponse{Services: services}, nil
}
// GetServiceStatus performs a live check, detects drift, and returns recent
// events. If a service name is provided, results are filtered to that service.
func (a *Agent) GetServiceStatus(ctx context.Context, req *mcpv1.GetServiceStatusRequest) (*mcpv1.GetServiceStatusResponse, error) {
a.Logger.Debug("GetServiceStatus called", "service", req.GetName())
services, err := a.liveCheckServices(ctx)
if err != nil {
return nil, fmt.Errorf("live check: %w", err)
}
if req.GetName() != "" {
var filtered []*mcpv1.ServiceInfo
for _, svc := range services {
if svc.Name == req.GetName() {
filtered = append(filtered, svc)
}
}
services = filtered
}
var drift []*mcpv1.DriftInfo
for _, svc := range services {
for _, comp := range svc.Components {
if comp.DesiredState == "ignore" {
continue
}
if comp.DesiredState != comp.ObservedState {
drift = append(drift, &mcpv1.DriftInfo{
Service: svc.Name,
Component: comp.Name,
DesiredState: comp.DesiredState,
ObservedState: comp.ObservedState,
})
}
}
}
since := time.Now().Add(-1 * time.Hour)
svcFilter := req.GetName()
events, err := registry.QueryEvents(a.DB, svcFilter, "", since, 50)
if err != nil {
return nil, fmt.Errorf("query events: %w", err)
}
var protoEvents []*mcpv1.EventInfo
for _, e := range events {
protoEvents = append(protoEvents, &mcpv1.EventInfo{
Service: e.Service,
Component: e.Component,
PrevState: e.PrevState,
NewState: e.NewState,
Timestamp: timestamppb.New(e.Timestamp),
})
}
return &mcpv1.GetServiceStatusResponse{
Services: services,
Drift: drift,
RecentEvents: protoEvents,
}, nil
}