Files
mcp/internal/agent/certs.go
Kyle Isom c7e1232f98 Phase C: Automated TLS cert provisioning for L7 routes
Add CertProvisioner that requests TLS certificates from Metacrypt's CA
API during deploy. When a service has L7 routes, the agent checks for
an existing cert, re-issues if missing or within 30 days of expiry,
and writes chain+key to mc-proxy's cert directory before registering
routes.

- Add MetacryptConfig to agent config (server_url, ca_cert, mount,
  issuer, token_path) with defaults and env overrides
- Add CertProvisioner (internal/agent/certs.go): REST client for
  Metacrypt IssueCert, atomic file writes, cert expiry checking
- Wire into Agent struct and deploy flow (before route registration)
- Add hasL7Routes/l7Hostnames helpers in deploy.go
- Fix pre-existing lint issues: unreachable code in portalloc.go,
  gofmt in servicedef.go, gosec suppressions, golangci v2 config
- Update vendored mc-proxy to fix protobuf init panic
- 10 new tests, make all passes with 0 issues

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-27 13:31:11 -07:00

245 lines
6.6 KiB
Go

package agent
import (
"bytes"
"context"
"crypto/tls"
"crypto/x509"
"encoding/json"
"encoding/pem"
"fmt"
"io"
"log/slog"
"net/http"
"os"
"path/filepath"
"strings"
"time"
"git.wntrmute.dev/mc/mcp/internal/auth"
"git.wntrmute.dev/mc/mcp/internal/config"
)
// renewWindow is how far before expiry a cert is considered stale and
// should be re-issued.
const renewWindow = 30 * 24 * time.Hour // 30 days
// CertProvisioner requests TLS certificates from Metacrypt's CA API
// and writes them to the mc-proxy cert directory. It is nil-safe: all
// methods are no-ops when the receiver is nil.
type CertProvisioner struct {
serverURL string
token string
mount string
issuer string
certDir string
httpClient *http.Client
logger *slog.Logger
}
// NewCertProvisioner creates a CertProvisioner. Returns (nil, nil) if
// cfg.ServerURL is empty (cert provisioning disabled).
func NewCertProvisioner(cfg config.MetacryptConfig, certDir string, logger *slog.Logger) (*CertProvisioner, error) {
if cfg.ServerURL == "" {
logger.Info("metacrypt not configured, cert provisioning disabled")
return nil, nil
}
token, err := auth.LoadToken(cfg.TokenPath)
if err != nil {
return nil, fmt.Errorf("load metacrypt token: %w", err)
}
httpClient, err := newTLSClient(cfg.CACert)
if err != nil {
return nil, fmt.Errorf("create metacrypt HTTP client: %w", err)
}
logger.Info("metacrypt cert provisioner enabled", "server", cfg.ServerURL, "mount", cfg.Mount, "issuer", cfg.Issuer)
return &CertProvisioner{
serverURL: strings.TrimRight(cfg.ServerURL, "/"),
token: token,
mount: cfg.Mount,
issuer: cfg.Issuer,
certDir: certDir,
httpClient: httpClient,
logger: logger,
}, nil
}
// EnsureCert checks whether a valid TLS certificate exists for the
// service. If the cert is missing or near expiry, it requests a new
// one from Metacrypt.
func (p *CertProvisioner) EnsureCert(ctx context.Context, serviceName string, hostnames []string) error {
if p == nil || len(hostnames) == 0 {
return nil
}
certPath := filepath.Join(p.certDir, serviceName+".pem")
if remaining, ok := certTimeRemaining(certPath); ok {
if remaining > renewWindow {
p.logger.Debug("cert valid, skipping provisioning",
"service", serviceName,
"expires_in", remaining.Round(time.Hour),
)
return nil
}
p.logger.Info("cert near expiry, re-issuing",
"service", serviceName,
"expires_in", remaining.Round(time.Hour),
)
}
return p.issueCert(ctx, serviceName, hostnames[0], hostnames)
}
// issueCert calls Metacrypt's CA API to issue a certificate and writes
// the chain and key to the cert directory.
func (p *CertProvisioner) issueCert(ctx context.Context, serviceName, commonName string, dnsNames []string) error {
p.logger.Info("provisioning TLS cert",
"service", serviceName,
"cn", commonName,
"sans", dnsNames,
)
reqBody := map[string]interface{}{
"mount": p.mount,
"operation": "issue",
"data": map[string]interface{}{
"issuer": p.issuer,
"common_name": commonName,
"dns_names": dnsNames,
"profile": "server",
"ttl": "2160h",
},
}
body, err := json.Marshal(reqBody)
if err != nil {
return fmt.Errorf("marshal issue request: %w", err)
}
url := p.serverURL + "/v1/engine/request"
req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
if err != nil {
return fmt.Errorf("create issue request: %w", err)
}
req.Header.Set("Content-Type", "application/json")
req.Header.Set("Authorization", "Bearer "+p.token)
resp, err := p.httpClient.Do(req)
if err != nil {
return fmt.Errorf("issue cert: %w", err)
}
defer func() { _ = resp.Body.Close() }()
respBody, err := io.ReadAll(resp.Body)
if err != nil {
return fmt.Errorf("read issue response: %w", err)
}
if resp.StatusCode != http.StatusOK {
return fmt.Errorf("issue cert: metacrypt returned %d: %s", resp.StatusCode, string(respBody))
}
var result struct {
ChainPEM string `json:"chain_pem"`
KeyPEM string `json:"key_pem"`
Serial string `json:"serial"`
ExpiresAt string `json:"expires_at"`
}
if err := json.Unmarshal(respBody, &result); err != nil {
return fmt.Errorf("parse issue response: %w", err)
}
if result.ChainPEM == "" || result.KeyPEM == "" {
return fmt.Errorf("issue cert: response missing chain_pem or key_pem")
}
// Write cert and key atomically (temp file + rename).
certPath := filepath.Join(p.certDir, serviceName+".pem")
keyPath := filepath.Join(p.certDir, serviceName+".key")
if err := atomicWrite(certPath, []byte(result.ChainPEM), 0644); err != nil {
return fmt.Errorf("write cert: %w", err)
}
if err := atomicWrite(keyPath, []byte(result.KeyPEM), 0600); err != nil {
return fmt.Errorf("write key: %w", err)
}
p.logger.Info("cert provisioned",
"service", serviceName,
"serial", result.Serial,
"expires_at", result.ExpiresAt,
)
return nil
}
// certTimeRemaining returns the time until the leaf certificate at
// path expires. Returns (0, false) if the cert cannot be read or parsed.
func certTimeRemaining(path string) (time.Duration, bool) {
data, err := os.ReadFile(path) //nolint:gosec // path from trusted config
if err != nil {
return 0, false
}
block, _ := pem.Decode(data)
if block == nil {
return 0, false
}
cert, err := x509.ParseCertificate(block.Bytes)
if err != nil {
return 0, false
}
remaining := time.Until(cert.NotAfter)
if remaining <= 0 {
return 0, true // expired
}
return remaining, true
}
// atomicWrite writes data to a temporary file then renames it to path,
// ensuring readers never see a partial file.
func atomicWrite(path string, data []byte, perm os.FileMode) error {
tmp := path + ".tmp"
if err := os.WriteFile(tmp, data, perm); err != nil {
return fmt.Errorf("write %s: %w", tmp, err)
}
if err := os.Rename(tmp, path); err != nil {
_ = os.Remove(tmp)
return fmt.Errorf("rename %s -> %s: %w", tmp, path, err)
}
return nil
}
// newTLSClient creates an HTTP client with TLS 1.3 minimum. If
// caCertPath is non-empty, the CA certificate is loaded into the
// root CA pool.
func newTLSClient(caCertPath string) (*http.Client, error) {
tlsConfig := &tls.Config{
MinVersion: tls.VersionTLS13,
}
if caCertPath != "" {
caCert, err := os.ReadFile(caCertPath) //nolint:gosec // path from trusted config
if err != nil {
return nil, fmt.Errorf("read CA cert %q: %w", caCertPath, err)
}
pool := x509.NewCertPool()
if !pool.AppendCertsFromPEM(caCert) {
return nil, fmt.Errorf("parse CA cert %q: no valid certificates found", caCertPath)
}
tlsConfig.RootCAs = pool
}
return &http.Client{
Timeout: 30 * time.Second,
Transport: &http.Transport{
TLSClientConfig: tlsConfig,
},
}, nil
}