Node addresses may be Tailscale DNS names (e.g., rift.scylla-hammerhead.ts.net:9444) but MCNS needs an IPv4 address for A records. The master now resolves the hostname via net.LookupHost before passing it to the DNS client. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
223 lines
6.7 KiB
Go
223 lines
6.7 KiB
Go
package master
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"net"
|
|
"strings"
|
|
|
|
mcpv1 "git.wntrmute.dev/mc/mcp/gen/mcp/v1"
|
|
"git.wntrmute.dev/mc/mcp/internal/masterdb"
|
|
)
|
|
|
|
// Deploy handles the MasterDeployRequest: places the service, forwards to
|
|
// the agent, registers DNS, and coordinates edge routing.
|
|
func (m *Master) Deploy(ctx context.Context, req *mcpv1.MasterDeployRequest) (*mcpv1.MasterDeployResponse, error) {
|
|
spec := req.GetService()
|
|
if spec == nil || spec.GetName() == "" {
|
|
return nil, fmt.Errorf("service spec with name is required")
|
|
}
|
|
|
|
serviceName := spec.GetName()
|
|
tier := spec.GetTier()
|
|
if tier == "" {
|
|
tier = "worker"
|
|
}
|
|
|
|
m.Logger.Info("Deploy", "service", serviceName, "tier", tier, "node_override", spec.GetNode())
|
|
|
|
resp := &mcpv1.MasterDeployResponse{}
|
|
|
|
// Step 1: Place service.
|
|
nodeName := spec.GetNode()
|
|
if nodeName == "" {
|
|
var err error
|
|
switch tier {
|
|
case "core":
|
|
nodeName, err = FindMasterNode(m.DB)
|
|
default:
|
|
nodeName, err = PickNode(m.DB)
|
|
}
|
|
if err != nil {
|
|
resp.Error = fmt.Sprintf("placement failed: %v", err)
|
|
return resp, nil
|
|
}
|
|
}
|
|
resp.Node = nodeName
|
|
|
|
node, err := masterdb.GetNode(m.DB, nodeName)
|
|
if err != nil || node == nil {
|
|
resp.Error = fmt.Sprintf("node %q not found", nodeName)
|
|
return resp, nil
|
|
}
|
|
|
|
// Resolve the node's address to an IP for DNS registration.
|
|
// Node addresses may be Tailscale DNS names (e.g., rift.scylla-hammerhead.ts.net:9444)
|
|
// but MCNS needs an IP address for A records.
|
|
nodeHost, _, err := net.SplitHostPort(node.Address)
|
|
if err != nil {
|
|
resp.Error = fmt.Sprintf("invalid node address %q: %v", node.Address, err)
|
|
return resp, nil
|
|
}
|
|
// If nodeHost is not an IP, resolve it.
|
|
if net.ParseIP(nodeHost) == nil {
|
|
ips, lookupErr := net.LookupHost(nodeHost)
|
|
if lookupErr != nil || len(ips) == 0 {
|
|
m.Logger.Warn("cannot resolve node address", "host", nodeHost, "err", lookupErr)
|
|
} else {
|
|
nodeHost = ips[0]
|
|
}
|
|
}
|
|
|
|
// Step 2: Forward deploy to the agent.
|
|
client, err := m.Pool.Get(nodeName)
|
|
if err != nil {
|
|
resp.Error = fmt.Sprintf("agent connection: %v", err)
|
|
return resp, nil
|
|
}
|
|
|
|
deployCtx, deployCancel := context.WithTimeout(ctx, m.Config.Timeouts.Deploy.Duration)
|
|
defer deployCancel()
|
|
|
|
deployResp, err := client.Deploy(deployCtx, &mcpv1.DeployRequest{
|
|
Service: spec,
|
|
})
|
|
if err != nil {
|
|
resp.DeployResult = &mcpv1.StepResult{Step: "deploy", Error: err.Error()}
|
|
resp.Error = fmt.Sprintf("agent deploy failed: %v", err)
|
|
return resp, nil
|
|
}
|
|
resp.DeployResult = &mcpv1.StepResult{Step: "deploy", Success: true}
|
|
|
|
// Check agent-side results for failures.
|
|
for _, cr := range deployResp.GetResults() {
|
|
if !cr.GetSuccess() {
|
|
resp.DeployResult.Success = false
|
|
resp.DeployResult.Error = fmt.Sprintf("component %s: %s", cr.GetName(), cr.GetError())
|
|
resp.Error = resp.DeployResult.Error
|
|
return resp, nil
|
|
}
|
|
}
|
|
|
|
// Step 3: Register DNS — Tailnet IP from the node address.
|
|
if m.DNS != nil {
|
|
if err := m.DNS.EnsureRecord(ctx, serviceName, nodeHost); err != nil {
|
|
m.Logger.Warn("DNS registration failed", "service", serviceName, "err", err)
|
|
resp.DnsResult = &mcpv1.StepResult{Step: "dns", Error: err.Error()}
|
|
} else {
|
|
resp.DnsResult = &mcpv1.StepResult{Step: "dns", Success: true}
|
|
}
|
|
}
|
|
|
|
// Record placement.
|
|
if err := masterdb.CreatePlacement(m.DB, serviceName, nodeName, tier); err != nil {
|
|
m.Logger.Error("record placement", "service", serviceName, "err", err)
|
|
}
|
|
|
|
// Steps 4-9: Detect public routes and coordinate edge routing.
|
|
edgeResult := m.setupEdgeRoutes(ctx, spec, serviceName, nodeHost)
|
|
if edgeResult != nil {
|
|
resp.EdgeRouteResult = edgeResult
|
|
}
|
|
|
|
// Compute overall success.
|
|
resp.Success = true
|
|
if resp.DeployResult != nil && !resp.DeployResult.Success {
|
|
resp.Success = false
|
|
}
|
|
if resp.EdgeRouteResult != nil && !resp.EdgeRouteResult.Success {
|
|
resp.Success = false
|
|
}
|
|
|
|
m.Logger.Info("deploy complete", "service", serviceName, "node", nodeName, "success", resp.Success)
|
|
return resp, nil
|
|
}
|
|
|
|
// setupEdgeRoutes detects public routes and coordinates edge routing.
|
|
func (m *Master) setupEdgeRoutes(ctx context.Context, spec *mcpv1.ServiceSpec, serviceName, nodeHost string) *mcpv1.StepResult {
|
|
var publicRoutes []*mcpv1.RouteSpec
|
|
for _, comp := range spec.GetComponents() {
|
|
for _, route := range comp.GetRoutes() {
|
|
if route.GetPublic() && route.GetHostname() != "" {
|
|
publicRoutes = append(publicRoutes, route)
|
|
}
|
|
}
|
|
}
|
|
|
|
if len(publicRoutes) == 0 {
|
|
return nil
|
|
}
|
|
|
|
// Find the edge node.
|
|
edgeNodeName, err := FindEdgeNode(m.DB)
|
|
if err != nil {
|
|
return &mcpv1.StepResult{Step: "edge_route", Error: fmt.Sprintf("no edge node: %v", err)}
|
|
}
|
|
|
|
edgeClient, err := m.Pool.Get(edgeNodeName)
|
|
if err != nil {
|
|
return &mcpv1.StepResult{Step: "edge_route", Error: fmt.Sprintf("edge agent connection: %v", err)}
|
|
}
|
|
|
|
var lastErr string
|
|
for _, route := range publicRoutes {
|
|
hostname := route.GetHostname()
|
|
|
|
// Validate hostname against allowed domains.
|
|
if !m.isAllowedDomain(hostname) {
|
|
lastErr = fmt.Sprintf("hostname %q not under an allowed domain", hostname)
|
|
m.Logger.Warn("edge route rejected", "hostname", hostname, "reason", lastErr)
|
|
continue
|
|
}
|
|
|
|
// Construct the backend hostname: <component>.svc.mcp.<zone>
|
|
// For simplicity, use the service name as the component name.
|
|
zone := "metacircular.net"
|
|
if m.DNS != nil && m.DNS.Zone() != "" {
|
|
zone = m.DNS.Zone()
|
|
}
|
|
backendHostname := serviceName + "." + zone
|
|
|
|
edgeCtx, edgeCancel := context.WithTimeout(ctx, m.Config.Timeouts.EdgeRoute.Duration)
|
|
_, setupErr := edgeClient.SetupEdgeRoute(edgeCtx, &mcpv1.SetupEdgeRouteRequest{
|
|
Hostname: hostname,
|
|
BackendHostname: backendHostname,
|
|
BackendPort: route.GetPort(),
|
|
BackendTls: true,
|
|
})
|
|
edgeCancel()
|
|
|
|
if setupErr != nil {
|
|
lastErr = fmt.Sprintf("setup edge route %s: %v", hostname, setupErr)
|
|
m.Logger.Warn("edge route setup failed", "hostname", hostname, "err", setupErr)
|
|
continue
|
|
}
|
|
|
|
// Record edge route in master DB.
|
|
if dbErr := masterdb.CreateEdgeRoute(m.DB, hostname, serviceName, edgeNodeName, backendHostname, int(route.GetPort())); dbErr != nil {
|
|
m.Logger.Warn("record edge route", "hostname", hostname, "err", dbErr)
|
|
}
|
|
|
|
m.Logger.Info("edge route established", "hostname", hostname, "edge_node", edgeNodeName)
|
|
}
|
|
|
|
if lastErr != "" {
|
|
return &mcpv1.StepResult{Step: "edge_route", Error: lastErr}
|
|
}
|
|
return &mcpv1.StepResult{Step: "edge_route", Success: true}
|
|
}
|
|
|
|
// isAllowedDomain checks if hostname falls under one of the configured
|
|
// allowed domains using proper domain label matching.
|
|
func (m *Master) isAllowedDomain(hostname string) bool {
|
|
if len(m.Config.Edge.AllowedDomains) == 0 {
|
|
return true // no restrictions configured
|
|
}
|
|
for _, domain := range m.Config.Edge.AllowedDomains {
|
|
if hostname == domain || strings.HasSuffix(hostname, "."+domain) {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|