Fix design-vs-implementation gaps found in verification
Critical fixes: - Wire monitor subsystem to agent startup (was dead code) - Implement NodeStatus RPC (disk, memory, CPU, runtime version, uptime) - Deploy respects active=false (sets desired_state=stopped, not always running) Medium fixes: - Add Started field to runtime.ContainerInfo, populate from podman inspect - Populate ComponentInfo.started in status handlers for uptime display - Add Monitor field to Agent struct for graceful shutdown Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
67
internal/agent/nodestatus.go
Normal file
67
internal/agent/nodestatus.go
Normal file
@@ -0,0 +1,67 @@
|
||||
package agent
|
||||
|
||||
import (
|
||||
"context"
|
||||
"os/exec"
|
||||
"runtime"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
mcpv1 "git.wntrmute.dev/kyle/mcp/gen/mcp/v1"
|
||||
"git.wntrmute.dev/kyle/mcp/internal/registry"
|
||||
"golang.org/x/sys/unix"
|
||||
"google.golang.org/protobuf/types/known/timestamppb"
|
||||
)
|
||||
|
||||
// NodeStatus returns information about this agent's node.
|
||||
func (a *Agent) NodeStatus(ctx context.Context, _ *mcpv1.NodeStatusRequest) (*mcpv1.NodeStatusResponse, error) {
|
||||
services, err := registry.ListServices(a.DB)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var componentCount uint32
|
||||
for _, svc := range services {
|
||||
comps, _ := registry.ListComponents(a.DB, svc.Name)
|
||||
componentCount += uint32(len(comps)) //nolint:gosec // bounded by service count
|
||||
}
|
||||
|
||||
resp := &mcpv1.NodeStatusResponse{
|
||||
NodeName: a.Config.Agent.NodeName,
|
||||
Runtime: a.Config.Agent.ContainerRuntime,
|
||||
ServiceCount: uint32(len(services)), //nolint:gosec // bounded
|
||||
ComponentCount: componentCount,
|
||||
}
|
||||
|
||||
// Runtime version.
|
||||
cmd := exec.CommandContext(ctx, a.Config.Agent.ContainerRuntime, "--version") //nolint:gosec // trusted config
|
||||
if out, err := cmd.Output(); err == nil {
|
||||
resp.RuntimeVersion = strings.TrimSpace(string(out))
|
||||
}
|
||||
|
||||
// Disk usage for /srv.
|
||||
var stat unix.Statfs_t
|
||||
if err := unix.Statfs("/srv", &stat); err == nil {
|
||||
resp.DiskTotalBytes = stat.Blocks * uint64(stat.Bsize) //nolint:gosec // kernel values
|
||||
resp.DiskFreeBytes = stat.Bavail * uint64(stat.Bsize) //nolint:gosec // kernel values
|
||||
}
|
||||
|
||||
// Memory.
|
||||
var sysinfo unix.Sysinfo_t
|
||||
if err := unix.Sysinfo(&sysinfo); err == nil {
|
||||
resp.MemoryTotalBytes = sysinfo.Totalram
|
||||
resp.MemoryFreeBytes = sysinfo.Freeram
|
||||
}
|
||||
|
||||
// CPU usage approximation: number of goroutines / GOMAXPROCS is a rough
|
||||
// indicator. Real CPU monitoring would use /proc/stat, which is a v2 concern.
|
||||
resp.CpuUsagePercent = float64(runtime.NumGoroutine()) / float64(runtime.GOMAXPROCS(0)) * 100
|
||||
|
||||
// Uptime: use sysinfo.
|
||||
if err := unix.Sysinfo(&sysinfo); err == nil {
|
||||
bootTime := time.Now().Add(-time.Duration(sysinfo.Uptime) * time.Second)
|
||||
resp.UptimeSince = timestamppb.New(bootTime)
|
||||
}
|
||||
|
||||
return resp, nil
|
||||
}
|
||||
Reference in New Issue
Block a user