Files
mcp/internal/agent/lifecycle.go
Kyle Isom d56f224359 Add unikernel runtime: run services as Nanos VMs under QEMU/KVM
Implements the hypervisor design's Phase 1: a second runtime.Runtime
backend (QEMU) that runs each service component as a Nanos unikernel VM
instead of a podman container, selected per-component via a new
runtime = "unikernel" service-def field.

- internal/runtime/qemu.go: QEMURuntime. Pull extracts the ELF from the
  OCI image; Run does `ops build` + boots qemu-system-x86_64 with KVM,
  user-mode net port-forwards, QMP control socket and serial console log;
  Stop/Remove/Inspect/List/Logs map onto VM lifecycle + state dir.
- proto/registry/servicedef: add runtime, memory_mb, vcpus fields
  (registry migration 5).
- agent: holds both runtimes; runtimeFor() selects per component;
  listAllContainers() merges containers + VMs so drift/status see both.
  Unikernel runtime auto-enables on nodes with /dev/kvm + ops.

Validated end-to-end on straylight: a test service deploys via
`mcp deploy --direct`, boots as a Nanos unikernel, serves HTTP through
the agent port-forward, and reports running via `mcp status`/`mcp logs`.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-11 00:54:49 -07:00

211 lines
6.9 KiB
Go

package agent
import (
"context"
"database/sql"
"fmt"
mcpv1 "git.wntrmute.dev/mc/mcp/gen/mcp/v1"
"git.wntrmute.dev/mc/mcp/internal/registry"
"git.wntrmute.dev/mc/mcp/internal/runtime"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
)
// StopService stops all components of a service, or a single component if specified.
func (a *Agent) StopService(ctx context.Context, req *mcpv1.StopServiceRequest) (*mcpv1.StopServiceResponse, error) {
a.Logger.Info("StopService", "service", req.GetName(), "component", req.GetComponent())
if req.GetName() == "" {
return nil, status.Error(codes.InvalidArgument, "service name is required")
}
components, err := registry.ListComponents(a.DB, req.GetName())
if err != nil {
return nil, status.Errorf(codes.Internal, "list components: %v", err)
}
if target := req.GetComponent(); target != "" {
components, err = filterComponents(components, req.GetName(), target)
if err != nil {
return nil, err
}
}
var results []*mcpv1.ComponentResult
for _, c := range components {
containerName := ContainerNameFor(req.GetName(), c.Name)
r := &mcpv1.ComponentResult{Name: c.Name, Success: true}
// Remove routes from mc-proxy before stopping the container.
if len(c.Routes) > 0 && a.Proxy != nil {
if err := a.Proxy.RemoveRoutes(ctx, req.GetName(), c.Routes); err != nil {
a.Logger.Warn("failed to remove routes", "service", req.GetName(), "component", c.Name, "err", err)
}
}
// Remove DNS record when stopping the service.
if len(c.Routes) > 0 && a.DNS != nil {
if err := a.DNS.RemoveRecord(ctx, req.GetName()); err != nil {
a.Logger.Warn("failed to remove DNS record", "service", req.GetName(), "err", err)
}
}
if err := a.runtimeFor(c.Runtime).Stop(ctx, containerName); err != nil {
a.Logger.Info("stop container (ignored)", "container", containerName, "error", err)
}
if err := registry.UpdateComponentState(a.DB, req.GetName(), c.Name, "stopped", "stopped"); err != nil {
r.Success = false
r.Error = fmt.Sprintf("update state: %v", err)
}
results = append(results, r)
}
return &mcpv1.StopServiceResponse{Results: results}, nil
}
// StartService starts all components of a service, or a single component if specified.
// If a container already exists but is stopped, it is removed first so a fresh one can be created.
func (a *Agent) StartService(ctx context.Context, req *mcpv1.StartServiceRequest) (*mcpv1.StartServiceResponse, error) {
a.Logger.Info("StartService", "service", req.GetName(), "component", req.GetComponent())
if req.GetName() == "" {
return nil, status.Error(codes.InvalidArgument, "service name is required")
}
components, err := registry.ListComponents(a.DB, req.GetName())
if err != nil {
return nil, status.Errorf(codes.Internal, "list components: %v", err)
}
if target := req.GetComponent(); target != "" {
components, err = filterComponents(components, req.GetName(), target)
if err != nil {
return nil, err
}
}
var results []*mcpv1.ComponentResult
for _, c := range components {
r := startComponent(ctx, a, req.GetName(), &c)
results = append(results, r)
}
return &mcpv1.StartServiceResponse{Results: results}, nil
}
// RestartService restarts all components of a service, or a single component if specified,
// by stopping, removing, and re-creating each container. The desired_state is not changed.
func (a *Agent) RestartService(ctx context.Context, req *mcpv1.RestartServiceRequest) (*mcpv1.RestartServiceResponse, error) {
a.Logger.Info("RestartService", "service", req.GetName(), "component", req.GetComponent())
if req.GetName() == "" {
return nil, status.Error(codes.InvalidArgument, "service name is required")
}
components, err := registry.ListComponents(a.DB, req.GetName())
if err != nil {
return nil, status.Errorf(codes.Internal, "list components: %v", err)
}
if target := req.GetComponent(); target != "" {
components, err = filterComponents(components, req.GetName(), target)
if err != nil {
return nil, err
}
}
var results []*mcpv1.ComponentResult
for _, c := range components {
r := restartComponent(ctx, a, req.GetName(), &c)
results = append(results, r)
}
return &mcpv1.RestartServiceResponse{Results: results}, nil
}
// startComponent removes any existing container and runs a fresh one from
// the registry spec, then updates state to running.
func startComponent(ctx context.Context, a *Agent, service string, c *registry.Component) *mcpv1.ComponentResult {
containerName := ContainerNameFor(service, c.Name)
r := &mcpv1.ComponentResult{Name: c.Name, Success: true}
rt := a.runtimeFor(c.Runtime)
// Remove any pre-existing container; ignore errors for non-existent ones.
_ = rt.Stop(ctx, containerName)
_ = rt.Remove(ctx, containerName)
spec := componentToSpec(service, c)
if err := rt.Run(ctx, spec); err != nil {
r.Success = false
r.Error = fmt.Sprintf("run container: %v", err)
return r
}
if err := registry.UpdateComponentState(a.DB, service, c.Name, "running", "running"); err != nil {
r.Success = false
r.Error = fmt.Sprintf("update state: %v", err)
}
return r
}
// restartComponent stops, removes, and re-creates a container without
// changing the desired_state in the registry.
func restartComponent(ctx context.Context, a *Agent, service string, c *registry.Component) *mcpv1.ComponentResult {
containerName := ContainerNameFor(service, c.Name)
r := &mcpv1.ComponentResult{Name: c.Name, Success: true}
rt := a.runtimeFor(c.Runtime)
_ = rt.Stop(ctx, containerName)
_ = rt.Remove(ctx, containerName)
spec := componentToSpec(service, c)
if err := rt.Run(ctx, spec); err != nil {
r.Success = false
r.Error = fmt.Sprintf("run container: %v", err)
_ = registry.UpdateComponentState(a.DB, service, c.Name, "", "stopped")
return r
}
if err := registry.UpdateComponentState(a.DB, service, c.Name, "", "running"); err != nil {
r.Success = false
r.Error = fmt.Sprintf("update state: %v", err)
}
return r
}
// componentToSpec builds a runtime.ContainerSpec from a registry Component.
func componentToSpec(service string, c *registry.Component) runtime.ContainerSpec {
return runtime.ContainerSpec{
Name: ContainerNameFor(service, c.Name),
Image: c.Image,
Network: c.Network,
User: c.UserSpec,
Restart: c.Restart,
Ports: c.Ports,
Volumes: c.Volumes,
Cmd: c.Cmd,
MemoryMB: c.MemoryMB,
VCPUs: c.VCPUs,
}
}
// filterComponents returns only the component matching target, or an error if not found.
func filterComponents(components []registry.Component, service, target string) ([]registry.Component, error) {
for _, c := range components {
if c.Name == target {
return []registry.Component{c}, nil
}
}
return nil, status.Errorf(codes.NotFound, "component %q not found in service %q", target, service)
}
// componentExists checks whether a component already exists in the registry.
func componentExists(db *sql.DB, service, name string) bool {
_, err := registry.GetComponent(db, service, name)
return err == nil
}