Files
mcp/internal/master/deploy.go
Kyle Isom 6fd81cacf2 Add master core: deploy, undeploy, status, placement, DNS
Master struct with Run() lifecycle following the agent pattern exactly:
open DB → bootstrap nodes → create agent pool → DNS client → TLS →
auth interceptor → gRPC server → signal handler.

RPC handlers:
- Deploy: place service (tier-aware), forward to agent, register DNS
  with Tailnet IP, detect public routes, validate against allowed
  domains, coordinate edge routing via SetupEdgeRoute, record placement
  and edge routes in master DB, return structured per-step results.
- Undeploy: undeploy on worker first, then remove edge routes, DNS,
  and DB records. Best-effort cleanup on failure.
- Status: query agents for service status, aggregate with placements
  and edge route info from master DB.
- ListNodes: return all nodes with placement counts.

Placement algorithm: fewest services, ties broken alphabetically.
DNS client: extracted from agent's DNSRegistrar with explicit nodeAddr
parameter (master registers for different nodes).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-02 15:39:46 -07:00

212 lines
6.3 KiB
Go

package master
import (
"context"
"fmt"
"net"
"strings"
mcpv1 "git.wntrmute.dev/mc/mcp/gen/mcp/v1"
"git.wntrmute.dev/mc/mcp/internal/masterdb"
)
// Deploy handles the MasterDeployRequest: places the service, forwards to
// the agent, registers DNS, and coordinates edge routing.
func (m *Master) Deploy(ctx context.Context, req *mcpv1.MasterDeployRequest) (*mcpv1.MasterDeployResponse, error) {
spec := req.GetService()
if spec == nil || spec.GetName() == "" {
return nil, fmt.Errorf("service spec with name is required")
}
serviceName := spec.GetName()
tier := spec.GetTier()
if tier == "" {
tier = "worker"
}
m.Logger.Info("Deploy", "service", serviceName, "tier", tier, "node_override", spec.GetNode())
resp := &mcpv1.MasterDeployResponse{}
// Step 1: Place service.
nodeName := spec.GetNode()
if nodeName == "" {
var err error
switch tier {
case "core":
nodeName, err = FindMasterNode(m.DB)
default:
nodeName, err = PickNode(m.DB)
}
if err != nil {
resp.Error = fmt.Sprintf("placement failed: %v", err)
return resp, nil
}
}
resp.Node = nodeName
node, err := masterdb.GetNode(m.DB, nodeName)
if err != nil || node == nil {
resp.Error = fmt.Sprintf("node %q not found", nodeName)
return resp, nil
}
// Parse the node's Tailnet IP from its address (host:port).
nodeHost, _, err := net.SplitHostPort(node.Address)
if err != nil {
resp.Error = fmt.Sprintf("invalid node address %q: %v", node.Address, err)
return resp, nil
}
// Step 2: Forward deploy to the agent.
client, err := m.Pool.Get(nodeName)
if err != nil {
resp.Error = fmt.Sprintf("agent connection: %v", err)
return resp, nil
}
deployCtx, deployCancel := context.WithTimeout(ctx, m.Config.Timeouts.Deploy.Duration)
defer deployCancel()
deployResp, err := client.Deploy(deployCtx, &mcpv1.DeployRequest{
Service: spec,
})
if err != nil {
resp.DeployResult = &mcpv1.StepResult{Step: "deploy", Error: err.Error()}
resp.Error = fmt.Sprintf("agent deploy failed: %v", err)
return resp, nil
}
resp.DeployResult = &mcpv1.StepResult{Step: "deploy", Success: true}
// Check agent-side results for failures.
for _, cr := range deployResp.GetResults() {
if !cr.GetSuccess() {
resp.DeployResult.Success = false
resp.DeployResult.Error = fmt.Sprintf("component %s: %s", cr.GetName(), cr.GetError())
resp.Error = resp.DeployResult.Error
return resp, nil
}
}
// Step 3: Register DNS — Tailnet IP from the node address.
if m.DNS != nil {
if err := m.DNS.EnsureRecord(ctx, serviceName, nodeHost); err != nil {
m.Logger.Warn("DNS registration failed", "service", serviceName, "err", err)
resp.DnsResult = &mcpv1.StepResult{Step: "dns", Error: err.Error()}
} else {
resp.DnsResult = &mcpv1.StepResult{Step: "dns", Success: true}
}
}
// Record placement.
if err := masterdb.CreatePlacement(m.DB, serviceName, nodeName, tier); err != nil {
m.Logger.Error("record placement", "service", serviceName, "err", err)
}
// Steps 4-9: Detect public routes and coordinate edge routing.
edgeResult := m.setupEdgeRoutes(ctx, spec, serviceName, nodeHost)
if edgeResult != nil {
resp.EdgeRouteResult = edgeResult
}
// Compute overall success.
resp.Success = true
if resp.DeployResult != nil && !resp.DeployResult.Success {
resp.Success = false
}
if resp.EdgeRouteResult != nil && !resp.EdgeRouteResult.Success {
resp.Success = false
}
m.Logger.Info("deploy complete", "service", serviceName, "node", nodeName, "success", resp.Success)
return resp, nil
}
// setupEdgeRoutes detects public routes and coordinates edge routing.
func (m *Master) setupEdgeRoutes(ctx context.Context, spec *mcpv1.ServiceSpec, serviceName, nodeHost string) *mcpv1.StepResult {
var publicRoutes []*mcpv1.RouteSpec
for _, comp := range spec.GetComponents() {
for _, route := range comp.GetRoutes() {
if route.GetPublic() && route.GetHostname() != "" {
publicRoutes = append(publicRoutes, route)
}
}
}
if len(publicRoutes) == 0 {
return nil
}
// Find the edge node.
edgeNodeName, err := FindEdgeNode(m.DB)
if err != nil {
return &mcpv1.StepResult{Step: "edge_route", Error: fmt.Sprintf("no edge node: %v", err)}
}
edgeClient, err := m.Pool.Get(edgeNodeName)
if err != nil {
return &mcpv1.StepResult{Step: "edge_route", Error: fmt.Sprintf("edge agent connection: %v", err)}
}
var lastErr string
for _, route := range publicRoutes {
hostname := route.GetHostname()
// Validate hostname against allowed domains.
if !m.isAllowedDomain(hostname) {
lastErr = fmt.Sprintf("hostname %q not under an allowed domain", hostname)
m.Logger.Warn("edge route rejected", "hostname", hostname, "reason", lastErr)
continue
}
// Construct the backend hostname: <component>.svc.mcp.<zone>
// For simplicity, use the service name as the component name.
zone := "metacircular.net"
if m.DNS != nil && m.DNS.Zone() != "" {
zone = m.DNS.Zone()
}
backendHostname := serviceName + "." + zone
edgeCtx, edgeCancel := context.WithTimeout(ctx, m.Config.Timeouts.EdgeRoute.Duration)
_, setupErr := edgeClient.SetupEdgeRoute(edgeCtx, &mcpv1.SetupEdgeRouteRequest{
Hostname: hostname,
BackendHostname: backendHostname,
BackendPort: route.GetPort(),
BackendTls: true,
})
edgeCancel()
if setupErr != nil {
lastErr = fmt.Sprintf("setup edge route %s: %v", hostname, setupErr)
m.Logger.Warn("edge route setup failed", "hostname", hostname, "err", setupErr)
continue
}
// Record edge route in master DB.
if dbErr := masterdb.CreateEdgeRoute(m.DB, hostname, serviceName, edgeNodeName, backendHostname, int(route.GetPort())); dbErr != nil {
m.Logger.Warn("record edge route", "hostname", hostname, "err", dbErr)
}
m.Logger.Info("edge route established", "hostname", hostname, "edge_node", edgeNodeName)
}
if lastErr != "" {
return &mcpv1.StepResult{Step: "edge_route", Error: lastErr}
}
return &mcpv1.StepResult{Step: "edge_route", Success: true}
}
// isAllowedDomain checks if hostname falls under one of the configured
// allowed domains using proper domain label matching.
func (m *Master) isAllowedDomain(hostname string) bool {
if len(m.Config.Edge.AllowedDomains) == 0 {
return true // no restrictions configured
}
for _, domain := range m.Config.Edge.AllowedDomains {
if hostname == domain || strings.HasSuffix(hostname, "."+domain) {
return true
}
}
return false
}