From 17e3eab464261d8e7710971ebb5e8c5686c7ba7c Mon Sep 17 00:00:00 2001 From: Kyle Isom Date: Thu, 30 Oct 2025 16:12:24 -0700 Subject: [PATCH] Initial import. --- README.md | 176 ++++++++++++++++++++++++++++++++++ caddyhole.go | 265 +++++++++++++++++++++++++++++++++++++++++++++++++++ go.mod | 133 ++++++++++++++++++++++++++ 3 files changed, 574 insertions(+) create mode 100644 README.md create mode 100644 caddyhole.go create mode 100644 go.mod diff --git a/README.md b/README.md new file mode 100644 index 0000000..dbc7d99 --- /dev/null +++ b/README.md @@ -0,0 +1,176 @@ +# CaddyHole + +A Caddy module that blocks clients based on country and sends crawlers/bots a random amount of data from `/dev/random`. + +## Features + +- **Country-based blocking**: Block requests from specific countries using GeoIP2 database +- **Bot/Crawler detection**: Automatically detect bots and crawlers based on User-Agent +- **Bot tarpit**: Send bots/crawlers random data from `/dev/random` to waste their resources +- **Configurable**: Customize blocked countries and amount of data sent to bots + +## Installation + +To use this module, you need to build Caddy with this plugin included. You can use [xcaddy](https://github.com/caddyserver/xcaddy): + +```bash +xcaddy build --with git.wntrmute.dev/kyle/caddyhole +``` + +## Configuration + +### Caddyfile + +```caddyfile +example.com { + caddyhole { + # Path to GeoIP2 database (optional, required for country blocking) + database /path/to/GeoLite2-Country.mmdb + + # List of country ISO codes to block (optional) + block_countries CN RU KP IN IL + + # Minimum bytes to send to bots (optional, default: 1MB) + min_bot_bytes 1048576 + + # Maximum bytes to send to bots (optional, default: 100MB) + max_bot_bytes 104857600 + } + + # Your other handlers here + respond "Hello, World!" +} +``` + +### JSON Config + +```json +{ + "apps": { + "http": { + "servers": { + "srv0": { + "listen": [":443"], + "routes": [ + { + "handle": [ + { + "handler": "caddyhole", + "database_path": "/path/to/GeoLite2-Country.mmdb", + "blocked_countries": ["CN", "RU", "KP"], + "min_bot_bytes": 1048576, + "max_bot_bytes": 104857600 + }, + { + "handler": "static_response", + "body": "Hello, World!" + } + ] + } + ] + } + } + } + } +} +``` + +## How It Works + +### Bot Detection + +The module detects bots/crawlers by examining the `User-Agent` header. It looks for common bot signatures including: + +- bot, crawler, spider, scraper +- curl, wget +- python-requests, python-urllib +- go-http-client +- java, perl, ruby, php +- http_request + +When a bot is detected, the module: +1. Generates a random amount of data between `min_bot_bytes` and `max_bot_bytes` +2. Streams that amount of random data from `/dev/random` (or `/dev/urandom` as fallback) +3. Returns HTTP 200 OK to make the bot think it succeeded +4. Never passes the request to downstream handlers + +### Country Blocking + +The module uses MaxMind's GeoIP2 database to determine the country of the client based on their IP address. If the country code matches any in the `blocked_countries` list: + +1. Returns HTTP 403 Forbidden +2. Sends "Access denied" message +3. Never passes the request to downstream handlers + +The module checks the following headers for the client IP (in order): +1. `X-Forwarded-For` (first IP) +2. `X-Real-IP` +3. `RemoteAddr` + +### Execution Order + +The module processes requests in this order: +1. Check if request is from a bot → If yes, send random data +2. Check if request is from a blocked country → If yes, return 403 +3. Otherwise, pass to the next handler + +## GeoIP2 Database + +To use country blocking, you need a GeoIP2 database. You can download the free GeoLite2 Country database from MaxMind: + +1. Sign up for a free account at [MaxMind](https://www.maxmind.com/en/geolite2/signup) +2. Download the GeoLite2 Country database in MMDB format +3. Extract the `.mmdb` file +4. Configure the `database` path in your Caddyfile + +## Examples + +### Block only specific countries (no bot handling) + +```caddyfile +example.com { + caddyhole { + database /path/to/GeoLite2-Country.mmdb + block_countries CN RU + } + respond "Hello, World!" +} +``` + +### Bot tarpit only (no country blocking) + +```caddyfile +example.com { + caddyhole { + min_bot_bytes 10485760 # 10MB + max_bot_bytes 1073741824 # 1GB + } + respond "Hello, World!" +} +``` + +### Full protection + +```caddyfile +example.com { + caddyhole { + database /path/to/GeoLite2-Country.mmdb + block_countries CN RU KP IR + min_bot_bytes 52428800 # 50MB + max_bot_bytes 524288000 # 500MB + } + respond "Hello, World!" +} +``` + +## Notes + +- Bot detection happens before country blocking, so bots will get random data regardless of their country +- The random data is streamed directly from `/dev/random` (or `/dev/urandom`), which may impact system entropy on some systems +- The `Content-Type` is set to `application/octet-stream` and `Content-Length` is set to make the response appear legitimate +- Country blocking requires a GeoIP2 database; without it, no country blocking occurs +- All configuration parameters are optional, but you need at least `database` and `block_countries` for country blocking to work + +## License + +This module is provided as-is for use with Caddy. diff --git a/caddyhole.go b/caddyhole.go new file mode 100644 index 0000000..b4c3231 --- /dev/null +++ b/caddyhole.go @@ -0,0 +1,265 @@ +package caddyhole + +import ( + "fmt" + "io" + "math/rand" + "net" + "net/http" + "os" + "strings" + + "github.com/caddyserver/caddy/v2" + "github.com/caddyserver/caddy/v2/caddyconfig/caddyfile" + "github.com/caddyserver/caddy/v2/caddyconfig/httpcaddyfile" + "github.com/caddyserver/caddy/v2/modules/caddyhttp" + "github.com/oschwald/geoip2-golang" +) + +func init() { + caddy.RegisterModule(CaddyHole{}) + httpcaddyfile.RegisterHandlerDirective("caddyhole", parseCaddyfile) +} + +// CaddyHole implements a Caddy module that blocks clients based on country +// and sends crawlers/bots random data from /dev/random. +type CaddyHole struct { + // Path to the GeoIP2 database file + DatabasePath string `json:"database_path,omitempty"` + + // List of country ISO codes to block + BlockedCountries []string `json:"blocked_countries,omitempty"` + + // Minimum bytes to send to bots (default: 1MB) + MinBotBytes int64 `json:"min_bot_bytes,omitempty"` + + // Maximum bytes to send to bots (default: 100MB) + MaxBotBytes int64 `json:"max_bot_bytes,omitempty"` + + db *geoip2.Reader +} + +// CaddyModule returns the Caddy module information. +func (CaddyHole) CaddyModule() caddy.ModuleInfo { + return caddy.ModuleInfo{ + ID: "http.handlers.caddyhole", + New: func() caddy.Module { return new(CaddyHole) }, + } +} + +// Provision sets up the CaddyHole module. +func (c *CaddyHole) Provision(ctx caddy.Context) error { + // Set defaults + if c.MinBotBytes == 0 { + c.MinBotBytes = 1024 * 1024 // 1MB + } + if c.MaxBotBytes == 0 { + c.MaxBotBytes = 100 * 1024 * 1024 // 100MB + } + + // Open GeoIP2 database if path is provided + if c.DatabasePath != "" { + db, err := geoip2.Open(c.DatabasePath) + if err != nil { + return fmt.Errorf("failed to open GeoIP2 database: %v", err) + } + c.db = db + } + + return nil +} + +// Cleanup closes the GeoIP2 database. +func (c *CaddyHole) Cleanup() error { + if c.db != nil { + return c.db.Close() + } + return nil +} + +// ServeHTTP implements caddyhttp.MiddlewareHandler. +func (c *CaddyHole) ServeHTTP(w http.ResponseWriter, r *http.Request, next caddyhttp.Handler) error { + // Check if the request is from a bot/crawler + if c.isBot(r) { + c.feedBot(w, r) + return nil + } + + // Check if the request should be blocked based on country + if c.shouldBlock(r) { + w.WriteHeader(http.StatusForbidden) + w.Write([]byte("Access denied")) + return nil + } + + // Continue to the next handler + return next.ServeHTTP(w, r) +} + +// isBot checks if the request is from a bot/crawler based on User-Agent. +func (c *CaddyHole) isBot(r *http.Request) bool { + userAgent := strings.ToLower(r.Header.Get("User-Agent")) + + // Common bot/crawler identifiers + botSignatures := []string{ + "bot", "crawler", "spider", "scraper", "curl", "wget", + "python-requests", "python-urllib", "go-http-client", + "java", "perl", "ruby", "php", "http_request", + } + + for _, sig := range botSignatures { + if strings.Contains(userAgent, sig) { + return true + } + } + + return false +} + +// feedBot sends random data from /dev/random to the bot. +func (c *CaddyHole) feedBot(w http.ResponseWriter, r *http.Request) { + // Calculate random amount of bytes to send + bytesToSend := c.MinBotBytes + if c.MaxBotBytes > c.MinBotBytes { + bytesToSend += rand.Int63n(c.MaxBotBytes - c.MinBotBytes) + } + + // Open /dev/random + devRandom, err := os.Open("/dev/random") + if err != nil { + // Fallback to /dev/urandom if /dev/random is not available + devRandom, err = os.Open("/dev/urandom") + if err != nil { + w.WriteHeader(http.StatusInternalServerError) + return + } + } + defer devRandom.Close() + + // Set headers to make it look like a legitimate response + w.Header().Set("Content-Type", "application/octet-stream") + w.Header().Set("Content-Length", fmt.Sprintf("%d", bytesToSend)) + w.WriteHeader(http.StatusOK) + + // Copy random data to the response + io.CopyN(w, devRandom, bytesToSend) +} + +// shouldBlock checks if the request should be blocked based on country. +func (c *CaddyHole) shouldBlock(r *http.Request) bool { + if c.db == nil || len(c.BlockedCountries) == 0 { + return false + } + + // Get the client IP address + ip := c.getClientIP(r) + if ip == nil { + return false + } + + // Look up the country for the IP + record, err := c.db.Country(ip) + if err != nil { + return false + } + + // Check if the country is in the blocked list + for _, blocked := range c.BlockedCountries { + if strings.EqualFold(record.Country.IsoCode, blocked) { + return true + } + } + + return false +} + +// getClientIP extracts the client IP address from the request. +func (c *CaddyHole) getClientIP(r *http.Request) net.IP { + // Check X-Forwarded-For header first + xff := r.Header.Get("X-Forwarded-For") + if xff != "" { + ips := strings.Split(xff, ",") + if len(ips) > 0 { + ipStr := strings.TrimSpace(ips[0]) + if ip := net.ParseIP(ipStr); ip != nil { + return ip + } + } + } + + // Check X-Real-IP header + xri := r.Header.Get("X-Real-IP") + if xri != "" { + if ip := net.ParseIP(xri); ip != nil { + return ip + } + } + + // Fall back to RemoteAddr + host, _, err := net.SplitHostPort(r.RemoteAddr) + if err != nil { + return nil + } + + return net.ParseIP(host) +} + +// UnmarshalCaddyfile implements caddyfile.Unmarshaler. +func (c *CaddyHole) UnmarshalCaddyfile(d *caddyfile.Dispenser) error { + for d.Next() { + for d.NextBlock(0) { + switch d.Val() { + case "database": + if !d.NextArg() { + return d.ArgErr() + } + c.DatabasePath = d.Val() + + case "block_countries": + c.BlockedCountries = d.RemainingArgs() + if len(c.BlockedCountries) == 0 { + return d.ArgErr() + } + + case "min_bot_bytes": + if !d.NextArg() { + return d.ArgErr() + } + var err error + _, err = fmt.Sscanf(d.Val(), "%d", &c.MinBotBytes) + if err != nil { + return d.Errf("invalid min_bot_bytes value: %v", err) + } + + case "max_bot_bytes": + if !d.NextArg() { + return d.ArgErr() + } + var err error + _, err = fmt.Sscanf(d.Val(), "%d", &c.MaxBotBytes) + if err != nil { + return d.Errf("invalid max_bot_bytes value: %v", err) + } + + default: + return d.Errf("unrecognized subdirective: %s", d.Val()) + } + } + } + return nil +} + +// parseCaddyfile unmarshals tokens from h into a new Middleware. +func parseCaddyfile(h httpcaddyfile.Helper) (caddyhttp.MiddlewareHandler, error) { + var c CaddyHole + err := c.UnmarshalCaddyfile(h.Dispenser) + return &c, err +} + +// Interface guards +var ( + _ caddy.Provisioner = (*CaddyHole)(nil) + _ caddy.CleanerUpper = (*CaddyHole)(nil) + _ caddyhttp.MiddlewareHandler = (*CaddyHole)(nil) + _ caddyfile.Unmarshaler = (*CaddyHole)(nil) +) diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..71f66e3 --- /dev/null +++ b/go.mod @@ -0,0 +1,133 @@ +module git.wntrmute.dev/kyle/caddyhole + +go 1.25.1 + +require ( + github.com/caddyserver/caddy/v2 v2.10.2 + github.com/oschwald/geoip2-golang v1.13.0 +) + +require ( + cel.dev/expr v0.24.0 // indirect + cloud.google.com/go/auth v0.16.2 // indirect + cloud.google.com/go/auth/oauth2adapt v0.2.8 // indirect + cloud.google.com/go/compute/metadata v0.7.0 // indirect + dario.cat/mergo v1.0.1 // indirect + filippo.io/edwards25519 v1.1.0 // indirect + github.com/AndreasBriese/bbloom v0.0.0-20190825152654-46b345b51c96 // indirect + github.com/KimMachineGun/automemlimit v0.7.4 // indirect + github.com/Masterminds/goutils v1.1.1 // indirect + github.com/Masterminds/semver/v3 v3.3.0 // indirect + github.com/Masterminds/sprig/v3 v3.3.0 // indirect + github.com/Microsoft/go-winio v0.6.0 // indirect + github.com/antlr4-go/antlr/v4 v4.13.0 // indirect + github.com/aryann/difflib v0.0.0-20210328193216-ff5ff6dc229b // indirect + github.com/beorn7/perks v1.0.1 // indirect + github.com/caddyserver/certmagic v0.24.0 // indirect + github.com/caddyserver/zerossl v0.1.3 // indirect + github.com/ccoveille/go-safecast v1.6.1 // indirect + github.com/cespare/xxhash v1.1.0 // indirect + github.com/cespare/xxhash/v2 v2.3.0 // indirect + github.com/chzyer/readline v1.5.1 // indirect + github.com/cloudflare/circl v1.6.1 // indirect + github.com/coreos/go-oidc/v3 v3.14.1 // indirect + github.com/cpuguy83/go-md2man/v2 v2.0.7 // indirect + github.com/dgraph-io/badger v1.6.2 // indirect + github.com/dgraph-io/badger/v2 v2.2007.4 // indirect + github.com/dgraph-io/ristretto v0.2.0 // indirect + github.com/dgryski/go-farm v0.0.0-20200201041132-a6ae2369ad13 // indirect + github.com/dustin/go-humanize v1.0.1 // indirect + github.com/felixge/httpsnoop v1.0.4 // indirect + github.com/francoispqt/gojay v1.2.13 // indirect + github.com/go-jose/go-jose/v3 v3.0.4 // indirect + github.com/go-jose/go-jose/v4 v4.0.5 // indirect + github.com/go-logr/logr v1.4.3 // indirect + github.com/go-logr/stdr v1.2.2 // indirect + github.com/go-sql-driver/mysql v1.8.1 // indirect + github.com/golang/protobuf v1.5.4 // indirect + github.com/golang/snappy v0.0.4 // indirect + github.com/google/cel-go v0.26.0 // indirect + github.com/google/s2a-go v0.1.9 // indirect + github.com/google/uuid v1.6.0 // indirect + github.com/googleapis/enterprise-certificate-proxy v0.3.6 // indirect + github.com/googleapis/gax-go/v2 v2.14.2 // indirect + github.com/huandu/xstrings v1.5.0 // indirect + github.com/inconshreveable/mousetrap v1.1.0 // indirect + github.com/jackc/pgpassfile v1.0.0 // indirect + github.com/jackc/pgservicefile v0.0.0-20221227161230-091c0ba34f0a // indirect + github.com/jackc/pgx/v5 v5.6.0 // indirect + github.com/jackc/puddle/v2 v2.2.1 // indirect + github.com/klauspost/compress v1.18.0 // indirect + github.com/klauspost/cpuid/v2 v2.3.0 // indirect + github.com/libdns/libdns v1.1.0 // indirect + github.com/manifoldco/promptui v0.9.0 // indirect + github.com/mattn/go-colorable v0.1.13 // indirect + github.com/mattn/go-isatty v0.0.20 // indirect + github.com/mgutz/ansi v0.0.0-20200706080929-d51e80ef957d // indirect + github.com/mholt/acmez/v3 v3.1.2 // indirect + github.com/miekg/dns v1.1.63 // indirect + github.com/mitchellh/copystructure v1.2.0 // indirect + github.com/mitchellh/go-ps v1.0.0 // indirect + github.com/mitchellh/reflectwalk v1.0.2 // indirect + github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect + github.com/oschwald/maxminddb-golang v1.13.0 // indirect + github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58 // indirect + github.com/pkg/errors v0.9.1 // indirect + github.com/prometheus/client_golang v1.23.0 // indirect + github.com/prometheus/client_model v0.6.2 // indirect + github.com/prometheus/common v0.65.0 // indirect + github.com/prometheus/procfs v0.16.1 // indirect + github.com/quic-go/qpack v0.5.1 // indirect + github.com/quic-go/quic-go v0.54.0 // indirect + github.com/rs/xid v1.6.0 // indirect + github.com/russross/blackfriday/v2 v2.1.0 // indirect + github.com/shopspring/decimal v1.4.0 // indirect + github.com/shurcooL/sanitized_anchor_name v1.0.0 // indirect + github.com/slackhq/nebula v1.9.5 // indirect + github.com/smallstep/certificates v0.28.4 // indirect + github.com/smallstep/cli-utils v0.12.1 // indirect + github.com/smallstep/linkedca v0.23.0 // indirect + github.com/smallstep/nosql v0.7.0 // indirect + github.com/smallstep/pkcs7 v0.2.1 // indirect + github.com/smallstep/scep v0.0.0-20240926084937-8cf1ca453101 // indirect + github.com/smallstep/truststore v0.13.0 // indirect + github.com/spf13/cast v1.7.0 // indirect + github.com/spf13/cobra v1.9.1 // indirect + github.com/spf13/pflag v1.0.7 // indirect + github.com/stoewer/go-strcase v1.2.0 // indirect + github.com/tailscale/tscert v0.0.0-20240608151842-d3f834017e53 // indirect + github.com/urfave/cli v1.22.17 // indirect + github.com/zeebo/blake3 v0.2.4 // indirect + go.etcd.io/bbolt v1.3.10 // indirect + go.opentelemetry.io/auto/sdk v1.1.0 // indirect + go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0 // indirect + go.opentelemetry.io/otel v1.37.0 // indirect + go.opentelemetry.io/otel/metric v1.37.0 // indirect + go.opentelemetry.io/otel/trace v1.37.0 // indirect + go.step.sm/crypto v0.67.0 // indirect + go.uber.org/automaxprocs v1.6.0 // indirect + go.uber.org/mock v0.5.2 // indirect + go.uber.org/multierr v1.11.0 // indirect + go.uber.org/zap v1.27.0 // indirect + go.uber.org/zap/exp v0.3.0 // indirect + golang.org/x/crypto v0.40.0 // indirect + golang.org/x/crypto/x509roots/fallback v0.0.0-20250305170421-49bf5b80c810 // indirect + golang.org/x/exp v0.0.0-20250408133849-7e4ce0ab07d0 // indirect + golang.org/x/mod v0.25.0 // indirect + golang.org/x/net v0.42.0 // indirect + golang.org/x/oauth2 v0.30.0 // indirect + golang.org/x/sync v0.16.0 // indirect + golang.org/x/sys v0.34.0 // indirect + golang.org/x/term v0.33.0 // indirect + golang.org/x/text v0.27.0 // indirect + golang.org/x/time v0.12.0 // indirect + golang.org/x/tools v0.34.0 // indirect + google.golang.org/api v0.240.0 // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20250603155806-513f23925822 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20250603155806-513f23925822 // indirect + google.golang.org/grpc v1.73.0 // indirect + google.golang.org/grpc/cmd/protoc-gen-go-grpc v1.5.1 // indirect + google.golang.org/protobuf v1.36.6 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect + howett.net/plist v1.0.0 // indirect +)