unikernel: bake volume config into images + per-service user-mode net
Enables migrating real services (config/cert dirs, stateless) to unikernels. Volume host dirs are copied into a per-VM staging tree mirroring guest paths; the ops config goes in the staging root with the top-level dirs in Dirs, so ops bakes them at the right absolute paths. (Staging is required — an absolute /srv MapDirs source makes ops descend into the agent's podman overlay storage and fail.) A component may set network = "user" to use QEMU user-mode NAT instead of the isolated bridge (Phase-1 networking for first migrations, before a gateway proxy). Verified: mcat (the MCIAS policy tester) deployed as a Nanos unikernel via 'mcp deploy', booting with its baked /srv/mcat config+certs, serving HTTPS verified against the platform CA, configured against MCIAS. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -204,6 +204,7 @@ func (q *QEMU) Pull(ctx context.Context, image string) error {
|
||||
type opsConfig struct {
|
||||
Args []string `json:"Args,omitempty"`
|
||||
Env map[string]string `json:"Env,omitempty"`
|
||||
Dirs []string `json:"Dirs,omitempty"` // dirs (relative to the config file) baked into the image
|
||||
RunConfig opsRunConfig `json:"RunConfig"`
|
||||
}
|
||||
|
||||
@@ -263,9 +264,15 @@ func (q *QEMU) Run(ctx context.Context, spec ContainerSpec) error {
|
||||
cpus = 1
|
||||
}
|
||||
|
||||
// A component can opt out of the isolated bridge with network = "user"
|
||||
// (e.g. a stateless service that needs NAT egress to its dependencies
|
||||
// before the gateway proxy exists). Otherwise unikernels use the bridge
|
||||
// when one is configured.
|
||||
useBridge := q.bridgeMode() && spec.Network != "user"
|
||||
|
||||
// In bridge mode, allocate a static IP on the isolated bridge for this VM.
|
||||
var vmIP string
|
||||
if q.bridgeMode() {
|
||||
if useBridge {
|
||||
ip, allocErr := q.allocateIP(spec.Name)
|
||||
if allocErr != nil {
|
||||
return fmt.Errorf("allocate VM IP: %w", allocErr)
|
||||
@@ -282,11 +289,52 @@ func (q *QEMU) Run(ctx context.Context, spec ContainerSpec) error {
|
||||
CPUs: cpus,
|
||||
},
|
||||
}
|
||||
if q.bridgeMode() {
|
||||
if useBridge {
|
||||
cfg.RunConfig.IPAddress = vmIP
|
||||
cfg.RunConfig.NetMask = "255.255.255.0"
|
||||
cfg.RunConfig.Gateway = q.Gateway
|
||||
}
|
||||
// Bake each volume's host directory into the image (read-only). Unikernels
|
||||
// have no writable host mount yet, so this suits config/cert directories;
|
||||
// stateful services need a different storage story (9p/virtio-blk).
|
||||
//
|
||||
// Volume contents are copied into a per-VM staging tree that mirrors the
|
||||
// guest paths (e.g. /srv/mcat -> <bake>/srv/mcat). We then put the ops
|
||||
// config in the staging root and list the top-level dirs in `Dirs`: `ops`
|
||||
// resolves Dirs relative to the config file and bakes the tree at the
|
||||
// matching absolute path. (Staging is required: an absolute /srv source
|
||||
// would make `ops` descend into the agent's podman overlay storage and
|
||||
// fail.)
|
||||
bakeDir := ""
|
||||
if len(spec.Volumes) > 0 {
|
||||
bakeDir = filepath.Join(q.vmDir(spec.Name), "bake")
|
||||
_ = os.RemoveAll(bakeDir)
|
||||
topLevel := map[string]bool{}
|
||||
for _, v := range spec.Volumes {
|
||||
parts := strings.SplitN(v, ":", 2)
|
||||
host := parts[0]
|
||||
guest := host
|
||||
if len(parts) == 2 {
|
||||
guest = parts[1]
|
||||
}
|
||||
rel := strings.TrimPrefix(guest, "/")
|
||||
stage := filepath.Join(bakeDir, rel)
|
||||
if err := os.MkdirAll(filepath.Dir(stage), 0o750); err != nil {
|
||||
return fmt.Errorf("stage volume dir: %w", err)
|
||||
}
|
||||
if err := os.CopyFS(stage, os.DirFS(host)); err != nil {
|
||||
return fmt.Errorf("stage volume %q: %w", host, err)
|
||||
}
|
||||
if i := strings.Index(rel, "/"); i >= 0 {
|
||||
topLevel[rel[:i]] = true
|
||||
} else {
|
||||
topLevel[rel] = true
|
||||
}
|
||||
}
|
||||
for d := range topLevel {
|
||||
cfg.Dirs = append(cfg.Dirs, d)
|
||||
}
|
||||
}
|
||||
if len(spec.Env) > 0 {
|
||||
cfg.Env = map[string]string{}
|
||||
for _, e := range spec.Env {
|
||||
@@ -295,7 +343,13 @@ func (q *QEMU) Run(ctx context.Context, spec ContainerSpec) error {
|
||||
}
|
||||
}
|
||||
}
|
||||
// Place the ops config in the staging root when baking volumes, so its
|
||||
// relative Dirs resolve to the staged tree; otherwise keep it in the VM
|
||||
// state dir.
|
||||
cfgPath := filepath.Join(q.vmDir(spec.Name), "ops.json")
|
||||
if bakeDir != "" {
|
||||
cfgPath = filepath.Join(bakeDir, "ops.json")
|
||||
}
|
||||
cfgBytes, err := json.MarshalIndent(cfg, "", " ")
|
||||
if err != nil {
|
||||
return fmt.Errorf("marshal ops config: %w", err)
|
||||
@@ -311,6 +365,9 @@ func (q *QEMU) Run(ctx context.Context, spec ContainerSpec) error {
|
||||
}
|
||||
build := exec.CommandContext(ctx, q.opsPath(), "build", bin, "-c", cfgPath, "-i", spec.Name) //nolint:gosec
|
||||
build.Env = q.opsEnv()
|
||||
if bakeDir != "" {
|
||||
build.Dir = bakeDir
|
||||
}
|
||||
if out, err := build.CombinedOutput(); err != nil {
|
||||
return fmt.Errorf("ops build %q: %w: %s", spec.Name, err, out)
|
||||
}
|
||||
@@ -339,7 +396,7 @@ func (q *QEMU) Run(ctx context.Context, spec ContainerSpec) error {
|
||||
// reaching only mc-proxy on the gateway. This makes mediation
|
||||
// mandatory rather than cooperative.
|
||||
var netDevice, netBackend string
|
||||
if q.bridgeMode() {
|
||||
if useBridge {
|
||||
tap, tapErr := q.createTAP(spec.Name)
|
||||
if tapErr != nil {
|
||||
return fmt.Errorf("create TAP: %w", tapErr)
|
||||
@@ -368,17 +425,6 @@ func (q *QEMU) Run(ctx context.Context, spec ContainerSpec) error {
|
||||
"-device", netDevice,
|
||||
"-netdev", netBackend,
|
||||
}
|
||||
// 9p passthrough for host /srv/<service> volumes (best-effort; Nanos must
|
||||
// support the 9p client for the guest to mount it).
|
||||
for i, v := range spec.Volumes {
|
||||
parts := strings.SplitN(v, ":", 2)
|
||||
host := parts[0]
|
||||
tag := fmt.Sprintf("srv%d", i)
|
||||
args = append(args,
|
||||
"-virtfs", fmt.Sprintf("local,path=%s,mount_tag=%s,security_model=none,id=%s", host, tag, tag),
|
||||
)
|
||||
}
|
||||
|
||||
cmd := exec.CommandContext(ctx, q.qemuPath(), args...) //nolint:gosec
|
||||
if out, err := cmd.CombinedOutput(); err != nil {
|
||||
return fmt.Errorf("qemu launch %q: %w: %s", spec.Name, err, out)
|
||||
|
||||
Reference in New Issue
Block a user