Add multi-address fallback for node connectivity
NodeConfig and MasterNodeConfig gain an optional addresses[] field for fallback addresses tried in order after the primary address. Provides resilience when Tailscale DNS is down or a node is only reachable via LAN. - dialAgentMulti: tries each address with a 3s health check, returns first success - forEachNode: uses multi-address dialing - AgentPool.AddNodeMulti: master tries all addresses when connecting - AllAddresses(): deduplicates primary + fallback addresses Config example: [[nodes]] name = "rift" address = "rift.scylla-hammerhead.ts.net:9444" addresses = ["100.95.252.120:9444", "192.168.88.181:9444"] Existing configs without addresses[] work unchanged. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -140,21 +140,30 @@ func NewAgentPool(caCertPath, token string) *AgentPool {
|
||||
|
||||
// AddNode dials an agent and adds it to the pool.
|
||||
func (p *AgentPool) AddNode(name, address string) error {
|
||||
client, err := DialAgent(address, p.caCert, p.token)
|
||||
if err != nil {
|
||||
return fmt.Errorf("add node %s: %w", name, err)
|
||||
}
|
||||
client.Node = name
|
||||
return p.AddNodeMulti(name, []string{address})
|
||||
}
|
||||
|
||||
p.mu.Lock()
|
||||
defer p.mu.Unlock()
|
||||
// AddNodeMulti tries each address in order and adds the first successful
|
||||
// connection to the pool.
|
||||
func (p *AgentPool) AddNodeMulti(name string, addresses []string) error {
|
||||
var lastErr error
|
||||
for _, addr := range addresses {
|
||||
client, err := DialAgent(addr, p.caCert, p.token)
|
||||
if err != nil {
|
||||
lastErr = fmt.Errorf("%s: %w", addr, err)
|
||||
continue
|
||||
}
|
||||
client.Node = name
|
||||
|
||||
// Close existing connection if re-adding.
|
||||
if old, ok := p.clients[name]; ok {
|
||||
_ = old.Close()
|
||||
p.mu.Lock()
|
||||
if old, ok := p.clients[name]; ok {
|
||||
_ = old.Close()
|
||||
}
|
||||
p.clients[name] = client
|
||||
p.mu.Unlock()
|
||||
return nil
|
||||
}
|
||||
p.clients[name] = client
|
||||
return nil
|
||||
return fmt.Errorf("add node %s: all addresses failed: %w", name, lastErr)
|
||||
}
|
||||
|
||||
// Get returns the agent client for a node.
|
||||
|
||||
@@ -63,7 +63,7 @@ func Run(cfg *config.MasterConfig, version string) error {
|
||||
// Create agent connection pool.
|
||||
pool := NewAgentPool(cfg.Master.CACert, token)
|
||||
for _, n := range cfg.Nodes {
|
||||
if addErr := pool.AddNode(n.Name, n.Address); addErr != nil {
|
||||
if addErr := pool.AddNodeMulti(n.Name, n.AllAddresses()); addErr != nil {
|
||||
logger.Warn("failed to connect to agent", "node", n.Name, "err", addErr)
|
||||
// Non-fatal: the node may come up later.
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user