Master struct with Run() lifecycle following the agent pattern exactly: open DB → bootstrap nodes → create agent pool → DNS client → TLS → auth interceptor → gRPC server → signal handler. RPC handlers: - Deploy: place service (tier-aware), forward to agent, register DNS with Tailnet IP, detect public routes, validate against allowed domains, coordinate edge routing via SetupEdgeRoute, record placement and edge routes in master DB, return structured per-step results. - Undeploy: undeploy on worker first, then remove edge routes, DNS, and DB records. Best-effort cleanup on failure. - Status: query agents for service status, aggregate with placements and edge route info from master DB. - ListNodes: return all nodes with placement counts. Placement algorithm: fewest services, ties broken alphabetically. DNS client: extracted from agent's DNSRegistrar with explicit nodeAddr parameter (master registers for different nodes). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
212 lines
6.3 KiB
Go
212 lines
6.3 KiB
Go
package master
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"net"
|
|
"strings"
|
|
|
|
mcpv1 "git.wntrmute.dev/mc/mcp/gen/mcp/v1"
|
|
"git.wntrmute.dev/mc/mcp/internal/masterdb"
|
|
)
|
|
|
|
// Deploy handles the MasterDeployRequest: places the service, forwards to
|
|
// the agent, registers DNS, and coordinates edge routing.
|
|
func (m *Master) Deploy(ctx context.Context, req *mcpv1.MasterDeployRequest) (*mcpv1.MasterDeployResponse, error) {
|
|
spec := req.GetService()
|
|
if spec == nil || spec.GetName() == "" {
|
|
return nil, fmt.Errorf("service spec with name is required")
|
|
}
|
|
|
|
serviceName := spec.GetName()
|
|
tier := spec.GetTier()
|
|
if tier == "" {
|
|
tier = "worker"
|
|
}
|
|
|
|
m.Logger.Info("Deploy", "service", serviceName, "tier", tier, "node_override", spec.GetNode())
|
|
|
|
resp := &mcpv1.MasterDeployResponse{}
|
|
|
|
// Step 1: Place service.
|
|
nodeName := spec.GetNode()
|
|
if nodeName == "" {
|
|
var err error
|
|
switch tier {
|
|
case "core":
|
|
nodeName, err = FindMasterNode(m.DB)
|
|
default:
|
|
nodeName, err = PickNode(m.DB)
|
|
}
|
|
if err != nil {
|
|
resp.Error = fmt.Sprintf("placement failed: %v", err)
|
|
return resp, nil
|
|
}
|
|
}
|
|
resp.Node = nodeName
|
|
|
|
node, err := masterdb.GetNode(m.DB, nodeName)
|
|
if err != nil || node == nil {
|
|
resp.Error = fmt.Sprintf("node %q not found", nodeName)
|
|
return resp, nil
|
|
}
|
|
|
|
// Parse the node's Tailnet IP from its address (host:port).
|
|
nodeHost, _, err := net.SplitHostPort(node.Address)
|
|
if err != nil {
|
|
resp.Error = fmt.Sprintf("invalid node address %q: %v", node.Address, err)
|
|
return resp, nil
|
|
}
|
|
|
|
// Step 2: Forward deploy to the agent.
|
|
client, err := m.Pool.Get(nodeName)
|
|
if err != nil {
|
|
resp.Error = fmt.Sprintf("agent connection: %v", err)
|
|
return resp, nil
|
|
}
|
|
|
|
deployCtx, deployCancel := context.WithTimeout(ctx, m.Config.Timeouts.Deploy.Duration)
|
|
defer deployCancel()
|
|
|
|
deployResp, err := client.Deploy(deployCtx, &mcpv1.DeployRequest{
|
|
Service: spec,
|
|
})
|
|
if err != nil {
|
|
resp.DeployResult = &mcpv1.StepResult{Step: "deploy", Error: err.Error()}
|
|
resp.Error = fmt.Sprintf("agent deploy failed: %v", err)
|
|
return resp, nil
|
|
}
|
|
resp.DeployResult = &mcpv1.StepResult{Step: "deploy", Success: true}
|
|
|
|
// Check agent-side results for failures.
|
|
for _, cr := range deployResp.GetResults() {
|
|
if !cr.GetSuccess() {
|
|
resp.DeployResult.Success = false
|
|
resp.DeployResult.Error = fmt.Sprintf("component %s: %s", cr.GetName(), cr.GetError())
|
|
resp.Error = resp.DeployResult.Error
|
|
return resp, nil
|
|
}
|
|
}
|
|
|
|
// Step 3: Register DNS — Tailnet IP from the node address.
|
|
if m.DNS != nil {
|
|
if err := m.DNS.EnsureRecord(ctx, serviceName, nodeHost); err != nil {
|
|
m.Logger.Warn("DNS registration failed", "service", serviceName, "err", err)
|
|
resp.DnsResult = &mcpv1.StepResult{Step: "dns", Error: err.Error()}
|
|
} else {
|
|
resp.DnsResult = &mcpv1.StepResult{Step: "dns", Success: true}
|
|
}
|
|
}
|
|
|
|
// Record placement.
|
|
if err := masterdb.CreatePlacement(m.DB, serviceName, nodeName, tier); err != nil {
|
|
m.Logger.Error("record placement", "service", serviceName, "err", err)
|
|
}
|
|
|
|
// Steps 4-9: Detect public routes and coordinate edge routing.
|
|
edgeResult := m.setupEdgeRoutes(ctx, spec, serviceName, nodeHost)
|
|
if edgeResult != nil {
|
|
resp.EdgeRouteResult = edgeResult
|
|
}
|
|
|
|
// Compute overall success.
|
|
resp.Success = true
|
|
if resp.DeployResult != nil && !resp.DeployResult.Success {
|
|
resp.Success = false
|
|
}
|
|
if resp.EdgeRouteResult != nil && !resp.EdgeRouteResult.Success {
|
|
resp.Success = false
|
|
}
|
|
|
|
m.Logger.Info("deploy complete", "service", serviceName, "node", nodeName, "success", resp.Success)
|
|
return resp, nil
|
|
}
|
|
|
|
// setupEdgeRoutes detects public routes and coordinates edge routing.
|
|
func (m *Master) setupEdgeRoutes(ctx context.Context, spec *mcpv1.ServiceSpec, serviceName, nodeHost string) *mcpv1.StepResult {
|
|
var publicRoutes []*mcpv1.RouteSpec
|
|
for _, comp := range spec.GetComponents() {
|
|
for _, route := range comp.GetRoutes() {
|
|
if route.GetPublic() && route.GetHostname() != "" {
|
|
publicRoutes = append(publicRoutes, route)
|
|
}
|
|
}
|
|
}
|
|
|
|
if len(publicRoutes) == 0 {
|
|
return nil
|
|
}
|
|
|
|
// Find the edge node.
|
|
edgeNodeName, err := FindEdgeNode(m.DB)
|
|
if err != nil {
|
|
return &mcpv1.StepResult{Step: "edge_route", Error: fmt.Sprintf("no edge node: %v", err)}
|
|
}
|
|
|
|
edgeClient, err := m.Pool.Get(edgeNodeName)
|
|
if err != nil {
|
|
return &mcpv1.StepResult{Step: "edge_route", Error: fmt.Sprintf("edge agent connection: %v", err)}
|
|
}
|
|
|
|
var lastErr string
|
|
for _, route := range publicRoutes {
|
|
hostname := route.GetHostname()
|
|
|
|
// Validate hostname against allowed domains.
|
|
if !m.isAllowedDomain(hostname) {
|
|
lastErr = fmt.Sprintf("hostname %q not under an allowed domain", hostname)
|
|
m.Logger.Warn("edge route rejected", "hostname", hostname, "reason", lastErr)
|
|
continue
|
|
}
|
|
|
|
// Construct the backend hostname: <component>.svc.mcp.<zone>
|
|
// For simplicity, use the service name as the component name.
|
|
zone := "metacircular.net"
|
|
if m.DNS != nil && m.DNS.Zone() != "" {
|
|
zone = m.DNS.Zone()
|
|
}
|
|
backendHostname := serviceName + "." + zone
|
|
|
|
edgeCtx, edgeCancel := context.WithTimeout(ctx, m.Config.Timeouts.EdgeRoute.Duration)
|
|
_, setupErr := edgeClient.SetupEdgeRoute(edgeCtx, &mcpv1.SetupEdgeRouteRequest{
|
|
Hostname: hostname,
|
|
BackendHostname: backendHostname,
|
|
BackendPort: route.GetPort(),
|
|
BackendTls: true,
|
|
})
|
|
edgeCancel()
|
|
|
|
if setupErr != nil {
|
|
lastErr = fmt.Sprintf("setup edge route %s: %v", hostname, setupErr)
|
|
m.Logger.Warn("edge route setup failed", "hostname", hostname, "err", setupErr)
|
|
continue
|
|
}
|
|
|
|
// Record edge route in master DB.
|
|
if dbErr := masterdb.CreateEdgeRoute(m.DB, hostname, serviceName, edgeNodeName, backendHostname, int(route.GetPort())); dbErr != nil {
|
|
m.Logger.Warn("record edge route", "hostname", hostname, "err", dbErr)
|
|
}
|
|
|
|
m.Logger.Info("edge route established", "hostname", hostname, "edge_node", edgeNodeName)
|
|
}
|
|
|
|
if lastErr != "" {
|
|
return &mcpv1.StepResult{Step: "edge_route", Error: lastErr}
|
|
}
|
|
return &mcpv1.StepResult{Step: "edge_route", Success: true}
|
|
}
|
|
|
|
// isAllowedDomain checks if hostname falls under one of the configured
|
|
// allowed domains using proper domain label matching.
|
|
func (m *Master) isAllowedDomain(hostname string) bool {
|
|
if len(m.Config.Edge.AllowedDomains) == 0 {
|
|
return true // no restrictions configured
|
|
}
|
|
for _, domain := range m.Config.Edge.AllowedDomains {
|
|
if hostname == domain || strings.HasSuffix(hostname, "."+domain) {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|