Add Prometheus metrics for connections, firewall, L7, and bytes transferred

Instrument mc-proxy with prometheus/client_golang. New internal/metrics/
package defines counters, gauges, and histograms for connection totals,
active connections, firewall blocks by reason, backend dial latency,
bytes transferred, L7 HTTP status codes, and L7 policy blocks. Optional
[metrics] config section starts a scrape endpoint. Firewall gains
BlockedWithReason() to report block cause. L7 handler wraps
ResponseWriter to record status codes per hostname.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-03-25 18:05:25 -07:00
parent 42c7fffc3e
commit ffc31f7d55
16 changed files with 439 additions and 32 deletions

View File

@@ -0,0 +1,95 @@
// Package metrics defines Prometheus metrics for mc-proxy and provides
// an HTTP server for the /metrics endpoint.
package metrics
import (
"context"
"errors"
"net"
"net/http"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"github.com/prometheus/client_golang/prometheus/promhttp"
)
var (
// ConnectionsTotal counts connections accepted per listener and mode.
ConnectionsTotal = promauto.NewCounterVec(prometheus.CounterOpts{
Namespace: "mcproxy",
Name: "connections_total",
Help: "Total connections accepted.",
}, []string{"listener", "mode"})
// ConnectionsActive tracks currently active connections per listener.
ConnectionsActive = promauto.NewGaugeVec(prometheus.GaugeOpts{
Namespace: "mcproxy",
Name: "connections_active",
Help: "Currently active connections.",
}, []string{"listener"})
// FirewallBlockedTotal counts firewall blocks by reason.
FirewallBlockedTotal = promauto.NewCounterVec(prometheus.CounterOpts{
Namespace: "mcproxy",
Name: "firewall_blocked_total",
Help: "Total connections blocked by the firewall.",
}, []string{"reason"})
// BackendDialDuration observes backend dial latency in seconds.
BackendDialDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{
Namespace: "mcproxy",
Name: "backend_dial_duration_seconds",
Help: "Backend dial latency in seconds.",
Buckets: []float64{0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5},
}, []string{"backend"})
// TransferredBytesTotal counts bytes transferred by direction and hostname.
TransferredBytesTotal = promauto.NewCounterVec(prometheus.CounterOpts{
Namespace: "mcproxy",
Name: "transferred_bytes_total",
Help: "Total bytes transferred.",
}, []string{"direction", "hostname"})
// L7ResponsesTotal counts L7 HTTP responses by hostname and status code.
L7ResponsesTotal = promauto.NewCounterVec(prometheus.CounterOpts{
Namespace: "mcproxy",
Name: "l7_responses_total",
Help: "Total L7 HTTP responses.",
}, []string{"hostname", "code"})
// L7PolicyBlocksTotal counts L7 policy blocks by hostname and policy type.
L7PolicyBlocksTotal = promauto.NewCounterVec(prometheus.CounterOpts{
Namespace: "mcproxy",
Name: "l7_policy_blocks_total",
Help: "Total L7 policy blocks.",
}, []string{"hostname", "policy_type"})
)
// ListenAndServe starts a Prometheus metrics HTTP server. It blocks until
// ctx is cancelled, then shuts down gracefully.
func ListenAndServe(ctx context.Context, addr, path string) error {
if path == "" {
path = "/metrics"
}
mux := http.NewServeMux()
mux.Handle(path, promhttp.Handler())
ln, err := net.Listen("tcp", addr)
if err != nil {
return err
}
srv := &http.Server{Handler: mux}
go func() {
<-ctx.Done()
_ = srv.Close()
}()
err = srv.Serve(ln)
if errors.Is(err, http.ErrServerClosed) {
return nil
}
return err
}

View File

@@ -0,0 +1,121 @@
package metrics
import (
"context"
"io"
"net"
"net/http"
"strings"
"testing"
"time"
)
func TestListenAndServeShutdown(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
errCh := make(chan error, 1)
go func() {
errCh <- ListenAndServe(ctx, "127.0.0.1:0", "/metrics")
}()
time.Sleep(50 * time.Millisecond)
cancel()
select {
case err := <-errCh:
if err != nil {
t.Fatalf("ListenAndServe returned error: %v", err)
}
case <-time.After(2 * time.Second):
t.Fatal("ListenAndServe did not return after context cancel")
}
}
func TestMetricsEndpoint(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
ln, err := net.Listen("tcp", "127.0.0.1:0")
if err != nil {
t.Fatal(err)
}
addr := ln.Addr().String()
_ = ln.Close()
// Increment counters so they appear in output.
ConnectionsTotal.WithLabelValues("127.0.0.1:4430", "l4").Inc()
FirewallBlockedTotal.WithLabelValues("ip").Inc()
ConnectionsActive.WithLabelValues("127.0.0.1:4430").Set(1)
go func() { _ = ListenAndServe(ctx, addr, "/metrics") }()
time.Sleep(100 * time.Millisecond)
resp, err := http.Get("http://" + addr + "/metrics")
if err != nil {
t.Fatalf("GET /metrics: %v", err)
}
defer func() { _ = resp.Body.Close() }()
if resp.StatusCode != 200 {
t.Fatalf("status = %d, want 200", resp.StatusCode)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
t.Fatalf("reading body: %v", err)
}
text := string(body)
for _, want := range []string{
"mcproxy_connections_total",
"mcproxy_firewall_blocked_total",
"mcproxy_connections_active",
} {
if !strings.Contains(text, want) {
t.Errorf("response missing %s", want)
}
}
}
func TestMetricsDefaultPath(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
ln, err := net.Listen("tcp", "127.0.0.1:0")
if err != nil {
t.Fatal(err)
}
addr := ln.Addr().String()
_ = ln.Close()
go func() { _ = ListenAndServe(ctx, addr, "") }()
time.Sleep(100 * time.Millisecond)
resp, err := http.Get("http://" + addr + "/metrics")
if err != nil {
t.Fatalf("GET /metrics: %v", err)
}
defer func() { _ = resp.Body.Close() }()
if resp.StatusCode != 200 {
t.Fatalf("status = %d, want 200", resp.StatusCode)
}
}
func TestMetricsSanity(t *testing.T) {
// Verify all metric vars can be used without panicking.
ConnectionsTotal.WithLabelValues("test:443", "l4").Inc()
ConnectionsActive.WithLabelValues("test:443").Set(5)
FirewallBlockedTotal.WithLabelValues("ip").Inc()
FirewallBlockedTotal.WithLabelValues("cidr").Inc()
FirewallBlockedTotal.WithLabelValues("country").Inc()
FirewallBlockedTotal.WithLabelValues("rate_limit").Inc()
BackendDialDuration.WithLabelValues("127.0.0.1:8080").Observe(0.005)
TransferredBytesTotal.WithLabelValues("client_to_backend", "example.com").Add(1024)
TransferredBytesTotal.WithLabelValues("backend_to_client", "example.com").Add(2048)
L7ResponsesTotal.WithLabelValues("example.com", "200").Inc()
L7ResponsesTotal.WithLabelValues("example.com", "502").Inc()
L7PolicyBlocksTotal.WithLabelValues("example.com", "block_user_agent").Inc()
L7PolicyBlocksTotal.WithLabelValues("example.com", "require_header").Inc()
}