Add unikernel runtime: run services as Nanos VMs under QEMU/KVM

Implements the hypervisor design's Phase 1: a second runtime.Runtime
backend (QEMU) that runs each service component as a Nanos unikernel VM
instead of a podman container, selected per-component via a new
runtime = "unikernel" service-def field.

- internal/runtime/qemu.go: QEMURuntime. Pull extracts the ELF from the
  OCI image; Run does `ops build` + boots qemu-system-x86_64 with KVM,
  user-mode net port-forwards, QMP control socket and serial console log;
  Stop/Remove/Inspect/List/Logs map onto VM lifecycle + state dir.
- proto/registry/servicedef: add runtime, memory_mb, vcpus fields
  (registry migration 5).
- agent: holds both runtimes; runtimeFor() selects per component;
  listAllContainers() merges containers + VMs so drift/status see both.
  Unikernel runtime auto-enables on nodes with /dev/kvm + ops.

Validated end-to-end on straylight: a test service deploys via
`mcp deploy --direct`, boots as a Nanos unikernel, serves HTTP through
the agent port-forward, and reports running via `mcp status`/`mcp logs`.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
Kyle Isom
2026-06-11 00:54:49 -07:00
parent 3b08caaa0a
commit d56f224359
30 changed files with 949 additions and 152 deletions

View File

@@ -22,7 +22,7 @@ func (a *Agent) Deploy(ctx context.Context, req *mcpv1.DeployRequest) (*mcpv1.De
serviceName := spec.GetName()
a.Logger.Info("deploying", "service", serviceName)
if err := ensureService(a.DB, serviceName, spec.GetActive()); err != nil {
if err := ensureService(a.DB, serviceName, spec.GetActive(), spec.GetComment()); err != nil {
return nil, fmt.Errorf("deploy: ensure service %q: %w", serviceName, err)
}
@@ -94,8 +94,20 @@ func (a *Agent) deployComponent(ctx context.Context, serviceName string, cs *mcp
Volumes: cs.GetVolumes(),
Cmd: cs.GetCmd(),
Routes: regRoutes,
Runtime: cs.GetRuntime(),
MemoryMB: int(cs.GetMemoryMb()),
VCPUs: int(cs.GetVcpus()),
}
// Select the runtime backend (container vs unikernel) for this component.
if cs.GetRuntime() == "unikernel" && a.Unikernel == nil {
return &mcpv1.ComponentResult{
Name: compName,
Error: "service requests unikernel runtime but this node has no KVM/ops support",
}
}
rt := a.runtimeFor(cs.GetRuntime())
if err := ensureComponent(a.DB, regComp); err != nil {
return &mcpv1.ComponentResult{
Name: compName,
@@ -103,27 +115,29 @@ func (a *Agent) deployComponent(ctx context.Context, serviceName string, cs *mcp
}
}
if err := a.Runtime.Pull(ctx, cs.GetImage()); err != nil {
if err := rt.Pull(ctx, cs.GetImage()); err != nil {
return &mcpv1.ComponentResult{
Name: compName,
Error: fmt.Sprintf("pull image: %v", err),
}
}
_ = a.Runtime.Stop(ctx, containerName) // may not exist yet
_ = a.Runtime.Remove(ctx, containerName) // may not exist yet
_ = rt.Stop(ctx, containerName) // may not exist yet
_ = rt.Remove(ctx, containerName) // may not exist yet
// Build the container spec. If the component has routes, use route-based
// port allocation and env injection. Otherwise, fall back to legacy ports.
runSpec := runtime.ContainerSpec{
Name: containerName,
Image: cs.GetImage(),
Network: cs.GetNetwork(),
User: cs.GetUser(),
Restart: cs.GetRestart(),
Volumes: cs.GetVolumes(),
Cmd: cs.GetCmd(),
Env: cs.GetEnv(),
Name: containerName,
Image: cs.GetImage(),
Network: cs.GetNetwork(),
User: cs.GetUser(),
Restart: cs.GetRestart(),
Volumes: cs.GetVolumes(),
Cmd: cs.GetCmd(),
Env: cs.GetEnv(),
MemoryMB: int(cs.GetMemoryMb()),
VCPUs: int(cs.GetVcpus()),
}
if len(regRoutes) > 0 && a.PortAlloc != nil {
@@ -142,7 +156,7 @@ func (a *Agent) deployComponent(ctx context.Context, serviceName string, cs *mcp
runSpec.Ports = cs.GetPorts()
}
if err := a.Runtime.Run(ctx, runSpec); err != nil {
if err := rt.Run(ctx, runSpec); err != nil {
_ = registry.UpdateComponentState(a.DB, serviceName, compName, "", "removed")
return &mcpv1.ComponentResult{
Name: compName,
@@ -219,16 +233,16 @@ func (a *Agent) allocateRoutePorts(service, component string, routes []registry.
}
// ensureService creates the service if it does not exist, or updates its
// active flag if it does.
func ensureService(db *sql.DB, name string, active bool) error {
// active flag and comment if it does.
func ensureService(db *sql.DB, name string, active bool, comment string) error {
_, err := registry.GetService(db, name)
if errors.Is(err, sql.ErrNoRows) {
return registry.CreateService(db, name, active)
return registry.CreateService(db, name, active, comment)
}
if err != nil {
return err
}
return registry.UpdateServiceActive(db, name, active)
return registry.UpdateServiceActive(db, name, active, comment)
}
// hasL7Routes reports whether any route uses L7 (TLS-terminating) mode.