Add agent registration, heartbeats, and monitoring (Phase 4)
Master side: - Register RPC: identity-bound (agent-rift → rift), allowlist check, max nodes limit, upserts node in registry, updates agent pool - Heartbeat RPC: derives node name from MCIAS identity (not request), updates container count and last-heartbeat timestamp - HeartbeatMonitor: background goroutine checks for missed heartbeats (90s threshold), probes agents via HealthCheck, marks unhealthy Agent side: - HeartbeatClient: connects to master via env vars (MCP_MASTER_ADDRESS, MCP_MASTER_CA_CERT, MCP_MASTER_TOKEN_PATH), registers on startup with exponential backoff, sends heartbeats every 30s Proto: added Register and Heartbeat RPCs + messages to master.proto. Architecture v2 Phase 4. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -16,6 +16,10 @@ service McpMasterService {
|
||||
rpc Undeploy(MasterUndeployRequest) returns (MasterUndeployResponse);
|
||||
rpc Status(MasterStatusRequest) returns (MasterStatusResponse);
|
||||
rpc ListNodes(ListNodesRequest) returns (ListNodesResponse);
|
||||
|
||||
// Agent registration and health (called by agents).
|
||||
rpc Register(RegisterRequest) returns (RegisterResponse);
|
||||
rpc Heartbeat(HeartbeatRequest) returns (HeartbeatResponse);
|
||||
}
|
||||
|
||||
// --- Deploy ---
|
||||
@@ -93,3 +97,30 @@ message NodeInfo {
|
||||
string last_heartbeat = 7; // RFC3339
|
||||
int32 services = 8; // placement count
|
||||
}
|
||||
|
||||
// --- Registration ---
|
||||
|
||||
message RegisterRequest {
|
||||
string name = 1;
|
||||
string role = 2; // "worker", "edge", or "master"
|
||||
string address = 3; // agent gRPC address
|
||||
string arch = 4; // "amd64" or "arm64"
|
||||
}
|
||||
|
||||
message RegisterResponse {
|
||||
bool accepted = 1;
|
||||
}
|
||||
|
||||
// --- Heartbeat ---
|
||||
|
||||
message HeartbeatRequest {
|
||||
string name = 1;
|
||||
int64 cpu_millicores = 2;
|
||||
int64 memory_bytes = 3;
|
||||
int64 disk_bytes = 4;
|
||||
int32 containers = 5;
|
||||
}
|
||||
|
||||
message HeartbeatResponse {
|
||||
bool acknowledged = 1;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user