Add edge routing and health check RPCs (Phase 2)
New agent RPCs for v2 multi-node orchestration: - SetupEdgeRoute: provisions TLS cert from Metacrypt, resolves backend hostname to Tailnet IP, validates it's in 100.64.0.0/10, registers L7 route in mc-proxy. Rejects backend_tls=false. - RemoveEdgeRoute: removes mc-proxy route, cleans up TLS cert, removes registry entry. - ListEdgeRoutes: returns all edge routes with cert serial/expiry. - HealthCheck: returns agent health and container count. New database table (migration 4): edge_routes stores hostname, backend info, and cert paths for persistence across agent restarts. ProxyRouter gains CertPath/KeyPath helpers for consistent cert path construction. Security: - Backend hostname must resolve to a Tailnet IP (100.64.0.0/10) - backend_tls=false is rejected (no cleartext to backends) - Cert provisioning failure fails the setup (no route to missing cert) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
196
internal/agent/edge_rpc.go
Normal file
196
internal/agent/edge_rpc.go
Normal file
@@ -0,0 +1,196 @@
|
||||
package agent
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/x509"
|
||||
"encoding/pem"
|
||||
"fmt"
|
||||
"net"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
"google.golang.org/grpc/codes"
|
||||
"google.golang.org/grpc/status"
|
||||
|
||||
mcpv1 "git.wntrmute.dev/mc/mcp/gen/mcp/v1"
|
||||
mcproxy "git.wntrmute.dev/mc/mc-proxy/client/mcproxy"
|
||||
"git.wntrmute.dev/mc/mcp/internal/registry"
|
||||
)
|
||||
|
||||
// SetupEdgeRoute provisions a TLS cert and registers an mc-proxy route for a
|
||||
// public hostname. Called by the master on edge nodes.
|
||||
func (a *Agent) SetupEdgeRoute(ctx context.Context, req *mcpv1.SetupEdgeRouteRequest) (*mcpv1.SetupEdgeRouteResponse, error) {
|
||||
a.Logger.Info("SetupEdgeRoute", "hostname", req.GetHostname(),
|
||||
"backend_hostname", req.GetBackendHostname(), "backend_port", req.GetBackendPort())
|
||||
|
||||
// Validate required fields.
|
||||
if req.GetHostname() == "" {
|
||||
return nil, status.Error(codes.InvalidArgument, "hostname is required")
|
||||
}
|
||||
if req.GetBackendHostname() == "" {
|
||||
return nil, status.Error(codes.InvalidArgument, "backend_hostname is required")
|
||||
}
|
||||
if req.GetBackendPort() == 0 {
|
||||
return nil, status.Error(codes.InvalidArgument, "backend_port is required")
|
||||
}
|
||||
if !req.GetBackendTls() {
|
||||
return nil, status.Error(codes.InvalidArgument, "backend_tls must be true")
|
||||
}
|
||||
|
||||
if a.Proxy == nil {
|
||||
return nil, status.Error(codes.FailedPrecondition, "mc-proxy not configured")
|
||||
}
|
||||
|
||||
// Resolve the backend hostname to a Tailnet IP.
|
||||
ips, err := net.LookupHost(req.GetBackendHostname())
|
||||
if err != nil || len(ips) == 0 {
|
||||
return nil, status.Errorf(codes.InvalidArgument, "cannot resolve backend_hostname %q: %v", req.GetBackendHostname(), err)
|
||||
}
|
||||
backendIP := ips[0]
|
||||
|
||||
// Validate the resolved IP is a Tailnet address (100.64.0.0/10).
|
||||
ip := net.ParseIP(backendIP)
|
||||
if ip == nil {
|
||||
return nil, status.Errorf(codes.InvalidArgument, "resolved IP %q is not valid", backendIP)
|
||||
}
|
||||
_, tailnet, _ := net.ParseCIDR("100.64.0.0/10")
|
||||
if !tailnet.Contains(ip) {
|
||||
return nil, status.Errorf(codes.InvalidArgument, "resolved IP %s is not a Tailnet address", backendIP)
|
||||
}
|
||||
|
||||
backend := fmt.Sprintf("%s:%d", backendIP, req.GetBackendPort())
|
||||
|
||||
// Provision TLS cert for the public hostname if cert provisioner is available.
|
||||
certPath := ""
|
||||
keyPath := ""
|
||||
if a.Certs != nil {
|
||||
if err := a.Certs.EnsureCert(ctx, req.GetHostname(), []string{req.GetHostname()}); err != nil {
|
||||
return nil, status.Errorf(codes.Internal, "provision cert for %s: %v", req.GetHostname(), err)
|
||||
}
|
||||
certPath = a.Proxy.CertPath(req.GetHostname())
|
||||
keyPath = a.Proxy.KeyPath(req.GetHostname())
|
||||
} else {
|
||||
// No cert provisioner — check if certs already exist on disk.
|
||||
certPath = a.Proxy.CertPath(req.GetHostname())
|
||||
keyPath = a.Proxy.KeyPath(req.GetHostname())
|
||||
if _, err := os.Stat(certPath); err != nil {
|
||||
return nil, status.Errorf(codes.FailedPrecondition, "no cert provisioner and cert not found at %s", certPath)
|
||||
}
|
||||
}
|
||||
|
||||
// Register the L7 route in mc-proxy.
|
||||
route := mcproxy.Route{
|
||||
Hostname: req.GetHostname(),
|
||||
Backend: backend,
|
||||
Mode: "l7",
|
||||
TLSCert: certPath,
|
||||
TLSKey: keyPath,
|
||||
BackendTLS: true,
|
||||
}
|
||||
if err := a.Proxy.AddRoute(ctx, ":443", route); err != nil {
|
||||
return nil, status.Errorf(codes.Internal, "add mc-proxy route: %v", err)
|
||||
}
|
||||
|
||||
// Persist the edge route in the registry.
|
||||
if err := registry.CreateEdgeRoute(a.DB, req.GetHostname(), req.GetBackendHostname(), int(req.GetBackendPort()), certPath, keyPath); err != nil {
|
||||
a.Logger.Warn("failed to persist edge route", "hostname", req.GetHostname(), "err", err)
|
||||
}
|
||||
|
||||
a.Logger.Info("edge route established",
|
||||
"hostname", req.GetHostname(), "backend", backend, "cert", certPath)
|
||||
|
||||
return &mcpv1.SetupEdgeRouteResponse{}, nil
|
||||
}
|
||||
|
||||
// RemoveEdgeRoute removes an mc-proxy route and cleans up the TLS cert for a
|
||||
// public hostname. Called by the master on edge nodes.
|
||||
func (a *Agent) RemoveEdgeRoute(ctx context.Context, req *mcpv1.RemoveEdgeRouteRequest) (*mcpv1.RemoveEdgeRouteResponse, error) {
|
||||
a.Logger.Info("RemoveEdgeRoute", "hostname", req.GetHostname())
|
||||
|
||||
if req.GetHostname() == "" {
|
||||
return nil, status.Error(codes.InvalidArgument, "hostname is required")
|
||||
}
|
||||
|
||||
if a.Proxy == nil {
|
||||
return nil, status.Error(codes.FailedPrecondition, "mc-proxy not configured")
|
||||
}
|
||||
|
||||
// Remove the mc-proxy route.
|
||||
if err := a.Proxy.RemoveRoute(ctx, ":443", req.GetHostname()); err != nil {
|
||||
a.Logger.Warn("remove mc-proxy route", "hostname", req.GetHostname(), "err", err)
|
||||
// Continue — clean up cert and registry even if route removal fails.
|
||||
}
|
||||
|
||||
// Remove the TLS cert.
|
||||
if a.Certs != nil {
|
||||
if err := a.Certs.RemoveCert(req.GetHostname()); err != nil {
|
||||
a.Logger.Warn("remove cert", "hostname", req.GetHostname(), "err", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Remove from registry.
|
||||
if err := registry.DeleteEdgeRoute(a.DB, req.GetHostname()); err != nil {
|
||||
a.Logger.Warn("delete edge route from registry", "hostname", req.GetHostname(), "err", err)
|
||||
}
|
||||
|
||||
a.Logger.Info("edge route removed", "hostname", req.GetHostname())
|
||||
return &mcpv1.RemoveEdgeRouteResponse{}, nil
|
||||
}
|
||||
|
||||
// ListEdgeRoutes returns all edge routes managed by this agent.
|
||||
func (a *Agent) ListEdgeRoutes(_ context.Context, _ *mcpv1.ListEdgeRoutesRequest) (*mcpv1.ListEdgeRoutesResponse, error) {
|
||||
a.Logger.Debug("ListEdgeRoutes called")
|
||||
|
||||
routes, err := registry.ListEdgeRoutes(a.DB)
|
||||
if err != nil {
|
||||
return nil, status.Errorf(codes.Internal, "list edge routes: %v", err)
|
||||
}
|
||||
|
||||
resp := &mcpv1.ListEdgeRoutesResponse{}
|
||||
for _, r := range routes {
|
||||
er := &mcpv1.EdgeRoute{
|
||||
Hostname: r.Hostname,
|
||||
BackendHostname: r.BackendHostname,
|
||||
BackendPort: int32(r.BackendPort), //nolint:gosec // port is a small positive integer
|
||||
}
|
||||
|
||||
// Read cert metadata if available.
|
||||
if r.TLSCert != "" {
|
||||
if certData, readErr := os.ReadFile(r.TLSCert); readErr == nil { //nolint:gosec // path from registry, not user input
|
||||
if block, _ := pem.Decode(certData); block != nil {
|
||||
if cert, parseErr := x509.ParseCertificate(block.Bytes); parseErr == nil {
|
||||
er.CertSerial = cert.SerialNumber.String()
|
||||
er.CertExpires = cert.NotAfter.UTC().Format(time.RFC3339)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
resp.Routes = append(resp.Routes, er)
|
||||
}
|
||||
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
// HealthCheck returns the agent's health status. Called by the master when
|
||||
// heartbeats are missed.
|
||||
func (a *Agent) HealthCheck(_ context.Context, _ *mcpv1.HealthCheckRequest) (*mcpv1.HealthCheckResponse, error) {
|
||||
a.Logger.Debug("HealthCheck called")
|
||||
|
||||
st := "healthy"
|
||||
containers := int32(0)
|
||||
|
||||
// Count running containers if the runtime is available.
|
||||
if a.Runtime != nil {
|
||||
if list, err := a.Runtime.List(context.Background()); err == nil {
|
||||
containers = int32(len(list)) //nolint:gosec // container count is small
|
||||
} else {
|
||||
st = "degraded"
|
||||
}
|
||||
}
|
||||
|
||||
return &mcpv1.HealthCheckResponse{
|
||||
Status: st,
|
||||
Containers: containers,
|
||||
}, nil
|
||||
}
|
||||
Reference in New Issue
Block a user