Files
mcp/internal/agent/edge_rpc.go
Kyle Isom d56f224359 Add unikernel runtime: run services as Nanos VMs under QEMU/KVM
Implements the hypervisor design's Phase 1: a second runtime.Runtime
backend (QEMU) that runs each service component as a Nanos unikernel VM
instead of a podman container, selected per-component via a new
runtime = "unikernel" service-def field.

- internal/runtime/qemu.go: QEMURuntime. Pull extracts the ELF from the
  OCI image; Run does `ops build` + boots qemu-system-x86_64 with KVM,
  user-mode net port-forwards, QMP control socket and serial console log;
  Stop/Remove/Inspect/List/Logs map onto VM lifecycle + state dir.
- proto/registry/servicedef: add runtime, memory_mb, vcpus fields
  (registry migration 5).
- agent: holds both runtimes; runtimeFor() selects per component;
  listAllContainers() merges containers + VMs so drift/status see both.
  Unikernel runtime auto-enables on nodes with /dev/kvm + ops.

Validated end-to-end on straylight: a test service deploys via
`mcp deploy --direct`, boots as a Nanos unikernel, serves HTTP through
the agent port-forward, and reports running via `mcp status`/`mcp logs`.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-11 00:54:49 -07:00

197 lines
6.6 KiB
Go

package agent
import (
"context"
"crypto/x509"
"encoding/pem"
"fmt"
"net"
"os"
"time"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
mcproxy "git.wntrmute.dev/mc/mc-proxy/client/mcproxy"
mcpv1 "git.wntrmute.dev/mc/mcp/gen/mcp/v1"
"git.wntrmute.dev/mc/mcp/internal/registry"
)
// SetupEdgeRoute provisions a TLS cert and registers an mc-proxy route for a
// public hostname. Called by the master on edge nodes.
func (a *Agent) SetupEdgeRoute(ctx context.Context, req *mcpv1.SetupEdgeRouteRequest) (*mcpv1.SetupEdgeRouteResponse, error) {
a.Logger.Info("SetupEdgeRoute", "hostname", req.GetHostname(),
"backend_hostname", req.GetBackendHostname(), "backend_port", req.GetBackendPort())
// Validate required fields.
if req.GetHostname() == "" {
return nil, status.Error(codes.InvalidArgument, "hostname is required")
}
if req.GetBackendHostname() == "" {
return nil, status.Error(codes.InvalidArgument, "backend_hostname is required")
}
if req.GetBackendPort() == 0 {
return nil, status.Error(codes.InvalidArgument, "backend_port is required")
}
if !req.GetBackendTls() {
return nil, status.Error(codes.InvalidArgument, "backend_tls must be true")
}
if a.Proxy == nil {
return nil, status.Error(codes.FailedPrecondition, "mc-proxy not configured")
}
// Resolve the backend hostname to a Tailnet IP.
ips, err := net.LookupHost(req.GetBackendHostname())
if err != nil || len(ips) == 0 {
return nil, status.Errorf(codes.InvalidArgument, "cannot resolve backend_hostname %q: %v", req.GetBackendHostname(), err)
}
backendIP := ips[0]
// Validate the resolved IP is a Tailnet address (100.64.0.0/10).
ip := net.ParseIP(backendIP)
if ip == nil {
return nil, status.Errorf(codes.InvalidArgument, "resolved IP %q is not valid", backendIP)
}
_, tailnet, _ := net.ParseCIDR("100.64.0.0/10")
if !tailnet.Contains(ip) {
return nil, status.Errorf(codes.InvalidArgument, "resolved IP %s is not a Tailnet address", backendIP)
}
backend := fmt.Sprintf("%s:%d", backendIP, req.GetBackendPort())
// Provision TLS cert for the public hostname if cert provisioner is available.
certPath := ""
keyPath := ""
if a.Certs != nil {
if err := a.Certs.EnsureCert(ctx, req.GetHostname(), []string{req.GetHostname()}); err != nil {
return nil, status.Errorf(codes.Internal, "provision cert for %s: %v", req.GetHostname(), err)
}
certPath = a.Proxy.CertPath(req.GetHostname())
keyPath = a.Proxy.KeyPath(req.GetHostname())
} else {
// No cert provisioner — check if certs already exist on disk.
certPath = a.Proxy.CertPath(req.GetHostname())
keyPath = a.Proxy.KeyPath(req.GetHostname())
if _, err := os.Stat(certPath); err != nil {
return nil, status.Errorf(codes.FailedPrecondition, "no cert provisioner and cert not found at %s", certPath)
}
}
// Register the L7 route in mc-proxy.
route := mcproxy.Route{
Hostname: req.GetHostname(),
Backend: backend,
Mode: "l7",
TLSCert: certPath,
TLSKey: keyPath,
BackendTLS: true,
}
if err := a.Proxy.AddRoute(ctx, ":443", route); err != nil {
return nil, status.Errorf(codes.Internal, "add mc-proxy route: %v", err)
}
// Persist the edge route in the registry.
if err := registry.CreateEdgeRoute(a.DB, req.GetHostname(), req.GetBackendHostname(), int(req.GetBackendPort()), certPath, keyPath); err != nil {
a.Logger.Warn("failed to persist edge route", "hostname", req.GetHostname(), "err", err)
}
a.Logger.Info("edge route established",
"hostname", req.GetHostname(), "backend", backend, "cert", certPath)
return &mcpv1.SetupEdgeRouteResponse{}, nil
}
// RemoveEdgeRoute removes an mc-proxy route and cleans up the TLS cert for a
// public hostname. Called by the master on edge nodes.
func (a *Agent) RemoveEdgeRoute(ctx context.Context, req *mcpv1.RemoveEdgeRouteRequest) (*mcpv1.RemoveEdgeRouteResponse, error) {
a.Logger.Info("RemoveEdgeRoute", "hostname", req.GetHostname())
if req.GetHostname() == "" {
return nil, status.Error(codes.InvalidArgument, "hostname is required")
}
if a.Proxy == nil {
return nil, status.Error(codes.FailedPrecondition, "mc-proxy not configured")
}
// Remove the mc-proxy route.
if err := a.Proxy.RemoveRoute(ctx, ":443", req.GetHostname()); err != nil {
a.Logger.Warn("remove mc-proxy route", "hostname", req.GetHostname(), "err", err)
// Continue — clean up cert and registry even if route removal fails.
}
// Remove the TLS cert.
if a.Certs != nil {
if err := a.Certs.RemoveCert(req.GetHostname()); err != nil {
a.Logger.Warn("remove cert", "hostname", req.GetHostname(), "err", err)
}
}
// Remove from registry.
if err := registry.DeleteEdgeRoute(a.DB, req.GetHostname()); err != nil {
a.Logger.Warn("delete edge route from registry", "hostname", req.GetHostname(), "err", err)
}
a.Logger.Info("edge route removed", "hostname", req.GetHostname())
return &mcpv1.RemoveEdgeRouteResponse{}, nil
}
// ListEdgeRoutes returns all edge routes managed by this agent.
func (a *Agent) ListEdgeRoutes(_ context.Context, _ *mcpv1.ListEdgeRoutesRequest) (*mcpv1.ListEdgeRoutesResponse, error) {
a.Logger.Debug("ListEdgeRoutes called")
routes, err := registry.ListEdgeRoutes(a.DB)
if err != nil {
return nil, status.Errorf(codes.Internal, "list edge routes: %v", err)
}
resp := &mcpv1.ListEdgeRoutesResponse{}
for _, r := range routes {
er := &mcpv1.EdgeRoute{
Hostname: r.Hostname,
BackendHostname: r.BackendHostname,
BackendPort: int32(r.BackendPort), //nolint:gosec // port is a small positive integer
}
// Read cert metadata if available.
if r.TLSCert != "" {
if certData, readErr := os.ReadFile(r.TLSCert); readErr == nil { //nolint:gosec // path from registry, not user input
if block, _ := pem.Decode(certData); block != nil {
if cert, parseErr := x509.ParseCertificate(block.Bytes); parseErr == nil {
er.CertSerial = cert.SerialNumber.String()
er.CertExpires = cert.NotAfter.UTC().Format(time.RFC3339)
}
}
}
}
resp.Routes = append(resp.Routes, er)
}
return resp, nil
}
// HealthCheck returns the agent's health status. Called by the master when
// heartbeats are missed.
func (a *Agent) HealthCheck(_ context.Context, _ *mcpv1.HealthCheckRequest) (*mcpv1.HealthCheckResponse, error) {
a.Logger.Debug("HealthCheck called")
st := "healthy"
containers := int32(0)
// Count running containers if the runtime is available.
if a.Runtime != nil {
if list, err := a.listAllContainers(context.Background()); err == nil {
containers = int32(len(list)) //nolint:gosec // container count is small
} else {
st = "degraded"
}
}
return &mcpv1.HealthCheckResponse{
Status: st,
Containers: containers,
}, nil
}