Phase C: Automated TLS cert provisioning for L7 routes

Add CertProvisioner that requests TLS certificates from Metacrypt's CA
API during deploy. When a service has L7 routes, the agent checks for
an existing cert, re-issues if missing or within 30 days of expiry,
and writes chain+key to mc-proxy's cert directory before registering
routes.

- Add MetacryptConfig to agent config (server_url, ca_cert, mount,
  issuer, token_path) with defaults and env overrides
- Add CertProvisioner (internal/agent/certs.go): REST client for
  Metacrypt IssueCert, atomic file writes, cert expiry checking
- Wire into Agent struct and deploy flow (before route registration)
- Add hasL7Routes/l7Hostnames helpers in deploy.go
- Fix pre-existing lint issues: unreachable code in portalloc.go,
  gofmt in servicedef.go, gosec suppressions, golangci v2 config
- Update vendored mc-proxy to fix protobuf init panic
- 10 new tests, make all passes with 0 issues

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-03-27 13:31:11 -07:00
parent 572d2fb196
commit c7e1232f98
13 changed files with 832 additions and 26 deletions

View File

@@ -33,6 +33,7 @@ type Agent struct {
Logger *slog.Logger
PortAlloc *PortAllocator
Proxy *ProxyRouter
Certs *CertProvisioner
}
// Run starts the agent: opens the database, sets up the gRPC server with
@@ -57,6 +58,11 @@ func Run(cfg *config.AgentConfig) error {
return fmt.Errorf("connect to mc-proxy: %w", err)
}
certs, err := NewCertProvisioner(cfg.Metacrypt, cfg.MCProxy.CertDir, logger)
if err != nil {
return fmt.Errorf("create cert provisioner: %w", err)
}
a := &Agent{
Config: cfg,
DB: db,
@@ -65,6 +71,7 @@ func Run(cfg *config.AgentConfig) error {
Logger: logger,
PortAlloc: NewPortAllocator(),
Proxy: proxy,
Certs: certs,
}
tlsCert, err := tls.LoadX509KeyPair(cfg.Server.TLSCert, cfg.Server.TLSKey)

244
internal/agent/certs.go Normal file
View File

@@ -0,0 +1,244 @@
package agent
import (
"bytes"
"context"
"crypto/tls"
"crypto/x509"
"encoding/json"
"encoding/pem"
"fmt"
"io"
"log/slog"
"net/http"
"os"
"path/filepath"
"strings"
"time"
"git.wntrmute.dev/mc/mcp/internal/auth"
"git.wntrmute.dev/mc/mcp/internal/config"
)
// renewWindow is how far before expiry a cert is considered stale and
// should be re-issued.
const renewWindow = 30 * 24 * time.Hour // 30 days
// CertProvisioner requests TLS certificates from Metacrypt's CA API
// and writes them to the mc-proxy cert directory. It is nil-safe: all
// methods are no-ops when the receiver is nil.
type CertProvisioner struct {
serverURL string
token string
mount string
issuer string
certDir string
httpClient *http.Client
logger *slog.Logger
}
// NewCertProvisioner creates a CertProvisioner. Returns (nil, nil) if
// cfg.ServerURL is empty (cert provisioning disabled).
func NewCertProvisioner(cfg config.MetacryptConfig, certDir string, logger *slog.Logger) (*CertProvisioner, error) {
if cfg.ServerURL == "" {
logger.Info("metacrypt not configured, cert provisioning disabled")
return nil, nil
}
token, err := auth.LoadToken(cfg.TokenPath)
if err != nil {
return nil, fmt.Errorf("load metacrypt token: %w", err)
}
httpClient, err := newTLSClient(cfg.CACert)
if err != nil {
return nil, fmt.Errorf("create metacrypt HTTP client: %w", err)
}
logger.Info("metacrypt cert provisioner enabled", "server", cfg.ServerURL, "mount", cfg.Mount, "issuer", cfg.Issuer)
return &CertProvisioner{
serverURL: strings.TrimRight(cfg.ServerURL, "/"),
token: token,
mount: cfg.Mount,
issuer: cfg.Issuer,
certDir: certDir,
httpClient: httpClient,
logger: logger,
}, nil
}
// EnsureCert checks whether a valid TLS certificate exists for the
// service. If the cert is missing or near expiry, it requests a new
// one from Metacrypt.
func (p *CertProvisioner) EnsureCert(ctx context.Context, serviceName string, hostnames []string) error {
if p == nil || len(hostnames) == 0 {
return nil
}
certPath := filepath.Join(p.certDir, serviceName+".pem")
if remaining, ok := certTimeRemaining(certPath); ok {
if remaining > renewWindow {
p.logger.Debug("cert valid, skipping provisioning",
"service", serviceName,
"expires_in", remaining.Round(time.Hour),
)
return nil
}
p.logger.Info("cert near expiry, re-issuing",
"service", serviceName,
"expires_in", remaining.Round(time.Hour),
)
}
return p.issueCert(ctx, serviceName, hostnames[0], hostnames)
}
// issueCert calls Metacrypt's CA API to issue a certificate and writes
// the chain and key to the cert directory.
func (p *CertProvisioner) issueCert(ctx context.Context, serviceName, commonName string, dnsNames []string) error {
p.logger.Info("provisioning TLS cert",
"service", serviceName,
"cn", commonName,
"sans", dnsNames,
)
reqBody := map[string]interface{}{
"mount": p.mount,
"operation": "issue",
"data": map[string]interface{}{
"issuer": p.issuer,
"common_name": commonName,
"dns_names": dnsNames,
"profile": "server",
"ttl": "2160h",
},
}
body, err := json.Marshal(reqBody)
if err != nil {
return fmt.Errorf("marshal issue request: %w", err)
}
url := p.serverURL + "/v1/engine/request"
req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
if err != nil {
return fmt.Errorf("create issue request: %w", err)
}
req.Header.Set("Content-Type", "application/json")
req.Header.Set("Authorization", "Bearer "+p.token)
resp, err := p.httpClient.Do(req)
if err != nil {
return fmt.Errorf("issue cert: %w", err)
}
defer func() { _ = resp.Body.Close() }()
respBody, err := io.ReadAll(resp.Body)
if err != nil {
return fmt.Errorf("read issue response: %w", err)
}
if resp.StatusCode != http.StatusOK {
return fmt.Errorf("issue cert: metacrypt returned %d: %s", resp.StatusCode, string(respBody))
}
var result struct {
ChainPEM string `json:"chain_pem"`
KeyPEM string `json:"key_pem"`
Serial string `json:"serial"`
ExpiresAt string `json:"expires_at"`
}
if err := json.Unmarshal(respBody, &result); err != nil {
return fmt.Errorf("parse issue response: %w", err)
}
if result.ChainPEM == "" || result.KeyPEM == "" {
return fmt.Errorf("issue cert: response missing chain_pem or key_pem")
}
// Write cert and key atomically (temp file + rename).
certPath := filepath.Join(p.certDir, serviceName+".pem")
keyPath := filepath.Join(p.certDir, serviceName+".key")
if err := atomicWrite(certPath, []byte(result.ChainPEM), 0644); err != nil {
return fmt.Errorf("write cert: %w", err)
}
if err := atomicWrite(keyPath, []byte(result.KeyPEM), 0600); err != nil {
return fmt.Errorf("write key: %w", err)
}
p.logger.Info("cert provisioned",
"service", serviceName,
"serial", result.Serial,
"expires_at", result.ExpiresAt,
)
return nil
}
// certTimeRemaining returns the time until the leaf certificate at
// path expires. Returns (0, false) if the cert cannot be read or parsed.
func certTimeRemaining(path string) (time.Duration, bool) {
data, err := os.ReadFile(path) //nolint:gosec // path from trusted config
if err != nil {
return 0, false
}
block, _ := pem.Decode(data)
if block == nil {
return 0, false
}
cert, err := x509.ParseCertificate(block.Bytes)
if err != nil {
return 0, false
}
remaining := time.Until(cert.NotAfter)
if remaining <= 0 {
return 0, true // expired
}
return remaining, true
}
// atomicWrite writes data to a temporary file then renames it to path,
// ensuring readers never see a partial file.
func atomicWrite(path string, data []byte, perm os.FileMode) error {
tmp := path + ".tmp"
if err := os.WriteFile(tmp, data, perm); err != nil {
return fmt.Errorf("write %s: %w", tmp, err)
}
if err := os.Rename(tmp, path); err != nil {
_ = os.Remove(tmp)
return fmt.Errorf("rename %s -> %s: %w", tmp, path, err)
}
return nil
}
// newTLSClient creates an HTTP client with TLS 1.3 minimum. If
// caCertPath is non-empty, the CA certificate is loaded into the
// root CA pool.
func newTLSClient(caCertPath string) (*http.Client, error) {
tlsConfig := &tls.Config{
MinVersion: tls.VersionTLS13,
}
if caCertPath != "" {
caCert, err := os.ReadFile(caCertPath) //nolint:gosec // path from trusted config
if err != nil {
return nil, fmt.Errorf("read CA cert %q: %w", caCertPath, err)
}
pool := x509.NewCertPool()
if !pool.AppendCertsFromPEM(caCert) {
return nil, fmt.Errorf("parse CA cert %q: no valid certificates found", caCertPath)
}
tlsConfig.RootCAs = pool
}
return &http.Client{
Timeout: 30 * time.Second,
Transport: &http.Transport{
TLSClientConfig: tlsConfig,
},
}, nil
}

View File

@@ -0,0 +1,392 @@
package agent
import (
"context"
"crypto/ecdsa"
"crypto/elliptic"
"crypto/rand"
"crypto/x509"
"crypto/x509/pkix"
"encoding/json"
"encoding/pem"
"log/slog"
"math/big"
"net/http"
"net/http/httptest"
"os"
"path/filepath"
"testing"
"time"
"git.wntrmute.dev/mc/mcp/internal/config"
"git.wntrmute.dev/mc/mcp/internal/registry"
)
func TestNilCertProvisionerIsNoop(t *testing.T) {
var p *CertProvisioner
if err := p.EnsureCert(context.Background(), "svc", []string{"svc.example.com"}); err != nil {
t.Fatalf("EnsureCert on nil: %v", err)
}
}
func TestNewCertProvisionerDisabledWhenUnconfigured(t *testing.T) {
p, err := NewCertProvisioner(config.MetacryptConfig{}, "/tmp", slog.Default())
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if p != nil {
t.Fatal("expected nil provisioner for empty config")
}
}
func TestEnsureCertSkipsValidCert(t *testing.T) {
certDir := t.TempDir()
certPath := filepath.Join(certDir, "svc.pem")
keyPath := filepath.Join(certDir, "svc.key")
// Generate a cert that expires in 90 days.
writeSelfSignedCert(t, certPath, keyPath, "svc.example.com", 90*24*time.Hour)
// Create a provisioner that would fail if it tried to issue.
p := &CertProvisioner{
serverURL: "https://will-fail-if-called:9999",
certDir: certDir,
logger: slog.Default(),
}
if err := p.EnsureCert(context.Background(), "svc", []string{"svc.example.com"}); err != nil {
t.Fatalf("EnsureCert: %v", err)
}
}
func TestEnsureCertReissuesExpiring(t *testing.T) {
certDir := t.TempDir()
certPath := filepath.Join(certDir, "svc.pem")
keyPath := filepath.Join(certDir, "svc.key")
// Generate a cert that expires in 10 days (within 30-day renewal window).
writeSelfSignedCert(t, certPath, keyPath, "svc.example.com", 10*24*time.Hour)
// Mock Metacrypt API.
newCert, newKey := generateCertPEM(t, "svc.example.com", 90*24*time.Hour)
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
resp := map[string]string{
"chain_pem": newCert,
"key_pem": newKey,
"serial": "abc123",
"expires_at": time.Now().Add(90 * 24 * time.Hour).Format(time.RFC3339),
}
w.Header().Set("Content-Type", "application/json")
_ = json.NewEncoder(w).Encode(resp)
}))
defer srv.Close()
p := &CertProvisioner{
serverURL: srv.URL,
token: "test-token",
mount: "pki",
issuer: "infra",
certDir: certDir,
httpClient: srv.Client(),
logger: slog.Default(),
}
if err := p.EnsureCert(context.Background(), "svc", []string{"svc.example.com"}); err != nil {
t.Fatalf("EnsureCert: %v", err)
}
// Verify new cert was written.
got, err := os.ReadFile(certPath)
if err != nil {
t.Fatalf("read cert: %v", err)
}
if string(got) != newCert {
t.Fatal("cert file was not updated with new cert")
}
}
func TestIssueCertWritesFiles(t *testing.T) {
certDir := t.TempDir()
// Mock Metacrypt API.
certPEM, keyPEM := generateCertPEM(t, "svc.example.com", 90*24*time.Hour)
var gotAuth string
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
gotAuth = r.Header.Get("Authorization")
var req map[string]interface{}
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
http.Error(w, "bad request", http.StatusBadRequest)
return
}
// Verify request structure.
if req["mount"] != "pki" || req["operation"] != "issue" {
t.Errorf("unexpected request: %v", req)
}
resp := map[string]string{
"chain_pem": certPEM,
"key_pem": keyPEM,
"serial": "deadbeef",
"expires_at": time.Now().Add(90 * 24 * time.Hour).Format(time.RFC3339),
}
w.Header().Set("Content-Type", "application/json")
_ = json.NewEncoder(w).Encode(resp)
}))
defer srv.Close()
p := &CertProvisioner{
serverURL: srv.URL,
token: "my-service-token",
mount: "pki",
issuer: "infra",
certDir: certDir,
httpClient: srv.Client(),
logger: slog.Default(),
}
if err := p.EnsureCert(context.Background(), "svc", []string{"svc.example.com"}); err != nil {
t.Fatalf("EnsureCert: %v", err)
}
// Verify auth header.
if gotAuth != "Bearer my-service-token" {
t.Fatalf("auth header: got %q, want %q", gotAuth, "Bearer my-service-token")
}
// Verify cert file.
certData, err := os.ReadFile(filepath.Join(certDir, "svc.pem"))
if err != nil {
t.Fatalf("read cert: %v", err)
}
if string(certData) != certPEM {
t.Fatal("cert content mismatch")
}
// Verify key file.
keyData, err := os.ReadFile(filepath.Join(certDir, "svc.key"))
if err != nil {
t.Fatalf("read key: %v", err)
}
if string(keyData) != keyPEM {
t.Fatal("key content mismatch")
}
// Verify key file permissions.
info, err := os.Stat(filepath.Join(certDir, "svc.key"))
if err != nil {
t.Fatalf("stat key: %v", err)
}
if perm := info.Mode().Perm(); perm != 0600 {
t.Fatalf("key permissions: got %o, want 0600", perm)
}
}
func TestIssueCertAPIError(t *testing.T) {
certDir := t.TempDir()
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
http.Error(w, `{"error":"sealed"}`, http.StatusServiceUnavailable)
}))
defer srv.Close()
p := &CertProvisioner{
serverURL: srv.URL,
token: "test-token",
mount: "pki",
issuer: "infra",
certDir: certDir,
httpClient: srv.Client(),
logger: slog.Default(),
}
err := p.EnsureCert(context.Background(), "svc", []string{"svc.example.com"})
if err == nil {
t.Fatal("expected error for sealed metacrypt")
}
}
func TestCertTimeRemaining(t *testing.T) {
t.Run("missing file", func(t *testing.T) {
if _, ok := certTimeRemaining("/nonexistent/cert.pem"); ok {
t.Fatal("expected false for missing file")
}
})
t.Run("valid cert", func(t *testing.T) {
certDir := t.TempDir()
path := filepath.Join(certDir, "test.pem")
writeSelfSignedCert(t, path, filepath.Join(certDir, "test.key"), "test.example.com", 90*24*time.Hour)
remaining, ok := certTimeRemaining(path)
if !ok {
t.Fatal("expected true for valid cert")
}
// Should be close to 90 days.
if remaining < 89*24*time.Hour || remaining > 91*24*time.Hour {
t.Fatalf("remaining: got %v, want ~90 days", remaining)
}
})
t.Run("expired cert", func(t *testing.T) {
certDir := t.TempDir()
path := filepath.Join(certDir, "expired.pem")
// Write a cert that's already expired (valid from -2h to -1h).
writeExpiredCert(t, path, filepath.Join(certDir, "expired.key"), "expired.example.com")
remaining, ok := certTimeRemaining(path)
if !ok {
t.Fatal("expected true for expired cert")
}
if remaining > 0 {
t.Fatalf("remaining: got %v, want <= 0", remaining)
}
})
}
func TestHasL7Routes(t *testing.T) {
tests := []struct {
name string
routes []registry.Route
want bool
}{
{"nil", nil, false},
{"empty", []registry.Route{}, false},
{"l4 only", []registry.Route{{Mode: "l4"}}, false},
{"l7 only", []registry.Route{{Mode: "l7"}}, true},
{"mixed", []registry.Route{{Mode: "l4"}, {Mode: "l7"}}, true},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := hasL7Routes(tt.routes); got != tt.want {
t.Fatalf("hasL7Routes = %v, want %v", got, tt.want)
}
})
}
}
func TestL7Hostnames(t *testing.T) {
routes := []registry.Route{
{Mode: "l7", Hostname: ""},
{Mode: "l4", Hostname: "ignored.example.com"},
{Mode: "l7", Hostname: "custom.example.com"},
{Mode: "l7", Hostname: ""}, // duplicate default
}
got := l7Hostnames("myservice", routes)
want := []string{"myservice.svc.mcp.metacircular.net", "custom.example.com"}
if len(got) != len(want) {
t.Fatalf("got %v, want %v", got, want)
}
for i := range want {
if got[i] != want[i] {
t.Fatalf("got[%d] = %q, want %q", i, got[i], want[i])
}
}
}
func TestAtomicWrite(t *testing.T) {
dir := t.TempDir()
path := filepath.Join(dir, "test.txt")
if err := atomicWrite(path, []byte("hello"), 0644); err != nil {
t.Fatalf("atomicWrite: %v", err)
}
data, err := os.ReadFile(path)
if err != nil {
t.Fatalf("read: %v", err)
}
if string(data) != "hello" {
t.Fatalf("got %q, want %q", string(data), "hello")
}
// Verify no .tmp file left behind.
if _, err := os.Stat(path + ".tmp"); !os.IsNotExist(err) {
t.Fatal("temp file should not exist after atomic write")
}
}
// --- test helpers ---
// writeSelfSignedCert generates a self-signed cert/key and writes them to disk.
func writeSelfSignedCert(t *testing.T, certPath, keyPath, hostname string, validity time.Duration) {
t.Helper()
certPEM, keyPEM := generateCertPEM(t, hostname, validity)
if err := os.WriteFile(certPath, []byte(certPEM), 0644); err != nil {
t.Fatalf("write cert: %v", err)
}
if err := os.WriteFile(keyPath, []byte(keyPEM), 0600); err != nil {
t.Fatalf("write key: %v", err)
}
}
// writeExpiredCert generates a cert that is already expired.
func writeExpiredCert(t *testing.T, certPath, keyPath, hostname string) {
t.Helper()
key, err := ecdsa.GenerateKey(elliptic.P256(), rand.Reader)
if err != nil {
t.Fatalf("generate key: %v", err)
}
tmpl := &x509.Certificate{
SerialNumber: big.NewInt(1),
Subject: pkix.Name{CommonName: hostname},
DNSNames: []string{hostname},
NotBefore: time.Now().Add(-2 * time.Hour),
NotAfter: time.Now().Add(-1 * time.Hour),
}
der, err := x509.CreateCertificate(rand.Reader, tmpl, tmpl, &key.PublicKey, key)
if err != nil {
t.Fatalf("create cert: %v", err)
}
certPEM := pem.EncodeToMemory(&pem.Block{Type: "CERTIFICATE", Bytes: der})
keyDER, err := x509.MarshalECPrivateKey(key)
if err != nil {
t.Fatalf("marshal key: %v", err)
}
keyPEM := pem.EncodeToMemory(&pem.Block{Type: "EC PRIVATE KEY", Bytes: keyDER})
if err := os.WriteFile(certPath, certPEM, 0644); err != nil {
t.Fatalf("write cert: %v", err)
}
if err := os.WriteFile(keyPath, keyPEM, 0600); err != nil {
t.Fatalf("write key: %v", err)
}
}
// generateCertPEM generates a self-signed cert and returns PEM strings.
func generateCertPEM(t *testing.T, hostname string, validity time.Duration) (certPEM, keyPEM string) {
t.Helper()
key, err := ecdsa.GenerateKey(elliptic.P256(), rand.Reader)
if err != nil {
t.Fatalf("generate key: %v", err)
}
tmpl := &x509.Certificate{
SerialNumber: big.NewInt(1),
Subject: pkix.Name{CommonName: hostname},
DNSNames: []string{hostname},
NotBefore: time.Now().Add(-1 * time.Hour),
NotAfter: time.Now().Add(validity),
}
der, err := x509.CreateCertificate(rand.Reader, tmpl, tmpl, &key.PublicKey, key)
if err != nil {
t.Fatalf("create cert: %v", err)
}
certBlock := pem.EncodeToMemory(&pem.Block{Type: "CERTIFICATE", Bytes: der})
keyDER, err := x509.MarshalECPrivateKey(key)
if err != nil {
t.Fatalf("marshal key: %v", err)
}
keyBlock := pem.EncodeToMemory(&pem.Block{Type: "EC PRIVATE KEY", Bytes: keyDER})
return string(certBlock), string(keyBlock)
}

View File

@@ -146,6 +146,14 @@ func (a *Agent) deployComponent(ctx context.Context, serviceName string, cs *mcp
}
}
// Provision TLS certs for L7 routes before registering with mc-proxy.
if a.Certs != nil && hasL7Routes(regRoutes) {
hostnames := l7Hostnames(serviceName, regRoutes)
if err := a.Certs.EnsureCert(ctx, serviceName, hostnames); err != nil {
a.Logger.Warn("failed to provision TLS cert", "service", serviceName, "err", err)
}
}
// Register routes with mc-proxy after the container is running.
if len(regRoutes) > 0 && a.Proxy != nil {
hostPorts, err := registry.GetRouteHostPorts(a.DB, serviceName, compName)
@@ -209,6 +217,37 @@ func ensureService(db *sql.DB, name string, active bool) error {
return registry.UpdateServiceActive(db, name, active)
}
// hasL7Routes reports whether any route uses L7 (TLS-terminating) mode.
func hasL7Routes(routes []registry.Route) bool {
for _, r := range routes {
if r.Mode == "l7" {
return true
}
}
return false
}
// l7Hostnames returns the unique hostnames from L7 routes, applying
// the default hostname convention when a route has no explicit hostname.
func l7Hostnames(serviceName string, routes []registry.Route) []string {
seen := make(map[string]bool)
var hostnames []string
for _, r := range routes {
if r.Mode != "l7" {
continue
}
h := r.Hostname
if h == "" {
h = serviceName + ".svc.mcp.metacircular.net"
}
if !seen[h] {
seen[h] = true
hostnames = append(hostnames, h)
}
}
return hostnames
}
// ensureComponent creates the component if it does not exist, or updates its
// spec if it does.
func ensureComponent(db *sql.DB, c *registry.Component) error {

View File

@@ -33,8 +33,8 @@ func (pa *PortAllocator) Allocate() (int, error) {
pa.mu.Lock()
defer pa.mu.Unlock()
for i := range maxRetries {
port := portRangeMin + rand.IntN(portRangeMax-portRangeMin)
for range maxRetries {
port := portRangeMin + rand.IntN(portRangeMax-portRangeMin) //nolint:gosec // port selection, not security
if pa.allocated[port] {
continue
}
@@ -45,7 +45,6 @@ func (pa *PortAllocator) Allocate() (int, error) {
pa.allocated[port] = true
return port, nil
_ = i
}
return 0, fmt.Errorf("failed to allocate port after %d attempts", maxRetries)