Add unikernel runtime: run services as Nanos VMs under QEMU/KVM

Implements the hypervisor design's Phase 1: a second runtime.Runtime
backend (QEMU) that runs each service component as a Nanos unikernel VM
instead of a podman container, selected per-component via a new
runtime = "unikernel" service-def field.

- internal/runtime/qemu.go: QEMURuntime. Pull extracts the ELF from the
  OCI image; Run does `ops build` + boots qemu-system-x86_64 with KVM,
  user-mode net port-forwards, QMP control socket and serial console log;
  Stop/Remove/Inspect/List/Logs map onto VM lifecycle + state dir.
- proto/registry/servicedef: add runtime, memory_mb, vcpus fields
  (registry migration 5).
- agent: holds both runtimes; runtimeFor() selects per component;
  listAllContainers() merges containers + VMs so drift/status see both.
  Unikernel runtime auto-enables on nodes with /dev/kvm + ops.

Validated end-to-end on straylight: a test service deploys via
`mcp deploy --direct`, boots as a Nanos unikernel, serves HTTP through
the agent port-forward, and reports running via `mcp status`/`mcp logs`.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
Kyle Isom
2026-06-11 00:54:49 -07:00
parent 3b08caaa0a
commit d56f224359
30 changed files with 949 additions and 152 deletions

View File

@@ -60,15 +60,20 @@ func (a *Agent) syncService(_ context.Context, spec *mcpv1.ServiceSpec) (*mcpv1.
existing, err := registry.GetService(a.DB, spec.GetName())
if err != nil {
// Service does not exist; create it.
if err := registry.CreateService(a.DB, spec.GetName(), spec.GetActive()); err != nil {
if err := registry.CreateService(a.DB, spec.GetName(), spec.GetActive(), spec.GetComment()); err != nil {
return nil, nil, status.Errorf(codes.Internal, "create service %q: %v", spec.GetName(), err)
}
changes = append(changes, "created service")
} else if existing.Active != spec.GetActive() {
if err := registry.UpdateServiceActive(a.DB, spec.GetName(), spec.GetActive()); err != nil {
} else if existing.Active != spec.GetActive() || existing.Comment != spec.GetComment() {
if err := registry.UpdateServiceActive(a.DB, spec.GetName(), spec.GetActive(), spec.GetComment()); err != nil {
return nil, nil, status.Errorf(codes.Internal, "update service %q: %v", spec.GetName(), err)
}
changes = append(changes, fmt.Sprintf("active: %v -> %v", existing.Active, spec.GetActive()))
if existing.Active != spec.GetActive() {
changes = append(changes, fmt.Sprintf("active: %v -> %v", existing.Active, spec.GetActive()))
}
if existing.Comment != spec.GetComment() {
changes = append(changes, fmt.Sprintf("comment: %q", spec.GetComment()))
}
}
// Create or update each component.
@@ -107,7 +112,7 @@ func (a *Agent) syncService(_ context.Context, spec *mcpv1.ServiceSpec) (*mcpv1.
// reconcileUntracked lists all containers from the runtime and adds any that
// are not already tracked in the registry with desired_state "ignore".
func (a *Agent) reconcileUntracked(ctx context.Context, known map[string]bool) error {
containers, err := a.Runtime.List(ctx)
containers, err := a.listAllContainers(ctx)
if err != nil {
return fmt.Errorf("list containers: %w", err)
}
@@ -127,7 +132,7 @@ func (a *Agent) reconcileUntracked(ctx context.Context, known map[string]bool) e
}
if _, err := registry.GetService(a.DB, service); err != nil {
if err := registry.CreateService(a.DB, service, true); err != nil {
if err := registry.CreateService(a.DB, service, true, ""); err != nil {
a.Logger.Info("reconcile: create service failed", "service", service, "error", err)
continue
}