Files
mcp/internal/master/undeploy.go
Kyle Isom 6fd81cacf2 Add master core: deploy, undeploy, status, placement, DNS
Master struct with Run() lifecycle following the agent pattern exactly:
open DB → bootstrap nodes → create agent pool → DNS client → TLS →
auth interceptor → gRPC server → signal handler.

RPC handlers:
- Deploy: place service (tier-aware), forward to agent, register DNS
  with Tailnet IP, detect public routes, validate against allowed
  domains, coordinate edge routing via SetupEdgeRoute, record placement
  and edge routes in master DB, return structured per-step results.
- Undeploy: undeploy on worker first, then remove edge routes, DNS,
  and DB records. Best-effort cleanup on failure.
- Status: query agents for service status, aggregate with placements
  and edge route info from master DB.
- ListNodes: return all nodes with placement counts.

Placement algorithm: fewest services, ties broken alphabetically.
DNS client: extracted from agent's DNSRegistrar with explicit nodeAddr
parameter (master registers for different nodes).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-02 15:39:46 -07:00

95 lines
2.9 KiB
Go

package master
import (
"context"
"fmt"
mcpv1 "git.wntrmute.dev/mc/mcp/gen/mcp/v1"
"git.wntrmute.dev/mc/mcp/internal/masterdb"
)
// Undeploy handles MasterUndeployRequest: removes edge routes, DNS, then
// forwards the undeploy to the worker agent.
func (m *Master) Undeploy(ctx context.Context, req *mcpv1.MasterUndeployRequest) (*mcpv1.MasterUndeployResponse, error) {
serviceName := req.GetServiceName()
if serviceName == "" {
return nil, fmt.Errorf("service_name is required")
}
m.Logger.Info("Undeploy", "service", serviceName)
// Look up placement.
placement, err := masterdb.GetPlacement(m.DB, serviceName)
if err != nil {
return &mcpv1.MasterUndeployResponse{Error: fmt.Sprintf("lookup placement: %v", err)}, nil
}
if placement == nil {
return &mcpv1.MasterUndeployResponse{Error: fmt.Sprintf("service %q not found in placements", serviceName)}, nil
}
// Step 1: Undeploy on worker first (stops the backend).
client, err := m.Pool.Get(placement.Node)
if err != nil {
return &mcpv1.MasterUndeployResponse{Error: fmt.Sprintf("agent connection: %v", err)}, nil
}
undeployCtx, undeployCancel := context.WithTimeout(ctx, m.Config.Timeouts.Undeploy.Duration)
defer undeployCancel()
_, undeployErr := client.UndeployService(undeployCtx, &mcpv1.UndeployServiceRequest{
Name: serviceName,
})
if undeployErr != nil {
m.Logger.Warn("agent undeploy failed", "service", serviceName, "node", placement.Node, "err", undeployErr)
// Continue — still clean up edge routes and records.
}
// Step 2: Remove edge routes.
edgeRoutes, err := masterdb.ListEdgeRoutesForService(m.DB, serviceName)
if err != nil {
m.Logger.Warn("list edge routes for undeploy", "service", serviceName, "err", err)
}
for _, er := range edgeRoutes {
edgeClient, getErr := m.Pool.Get(er.EdgeNode)
if getErr != nil {
m.Logger.Warn("edge agent connection", "edge_node", er.EdgeNode, "err", getErr)
continue
}
edgeCtx, edgeCancel := context.WithTimeout(ctx, m.Config.Timeouts.EdgeRoute.Duration)
_, removeErr := edgeClient.RemoveEdgeRoute(edgeCtx, &mcpv1.RemoveEdgeRouteRequest{
Hostname: er.Hostname,
})
edgeCancel()
if removeErr != nil {
m.Logger.Warn("remove edge route", "hostname", er.Hostname, "err", removeErr)
} else {
m.Logger.Info("edge route removed", "hostname", er.Hostname, "edge_node", er.EdgeNode)
}
}
// Step 3: Remove DNS.
if m.DNS != nil {
if dnsErr := m.DNS.RemoveRecord(ctx, serviceName); dnsErr != nil {
m.Logger.Warn("DNS removal failed", "service", serviceName, "err", dnsErr)
}
}
// Step 4: Clean up records.
_ = masterdb.DeleteEdgeRoutesForService(m.DB, serviceName)
_ = masterdb.DeletePlacement(m.DB, serviceName)
success := undeployErr == nil
var errMsg string
if !success {
errMsg = fmt.Sprintf("agent undeploy: %v", undeployErr)
}
m.Logger.Info("undeploy complete", "service", serviceName, "success", success)
return &mcpv1.MasterUndeployResponse{
Success: success,
Error: errMsg,
}, nil
}