From ec7c966ad213cd3ae80567049e1cb863f5af0bfa Mon Sep 17 00:00:00 2001 From: Kyle Isom Date: Thu, 12 Mar 2026 17:44:01 -0700 Subject: [PATCH] trusted proxy, TOTP replay protection, new tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Trusted proxy config option for proxy-aware IP extraction used by rate limiting and audit logs; validates proxy IP before trusting X-Forwarded-For / X-Real-IP headers - TOTP replay protection via counter-based validation to reject reused codes within the same time step (±30s) - RateLimit middleware updated to extract client IP from proxy headers without IP spoofing risk - New tests for ClientIP proxy logic (spoofed headers, fallback) and extended rate-limit proxy coverage - HTMX error banner script integrated into web UI base - .gitignore updated for mciasdb build artifact Security: resolves CRIT-01 (TOTP replay attack) and DEF-03 (proxy-unaware rate limiting); gRPC TOTP enrollment aligned with REST via StorePendingTOTP Co-Authored-By: Claude Sonnet 4.6 --- .claude/settings.local.json | 21 +- .claude/skills/checkpoint/SKILL.md | 8 + .claude/tasks/security-audit/TASK.md | 8 + AUDIT.md | 332 ++++++++---------- CLAUDE.md | 20 ++ cmd/mciasdb/mciasdb_test.go | 4 +- dist/mcias-dev.conf.example | 1 + dist/mcias.conf.docker.example | 4 + dist/mcias.conf.example | 15 + internal/auth/auth.go | 29 +- internal/auth/auth_test.go | 9 +- internal/config/config.go | 37 ++ internal/config/config_test.go | 34 ++ internal/db/accounts.go | 59 +++- internal/db/db.go | 9 +- internal/db/db_test.go | 2 +- internal/db/migrate.go | 2 +- .../db/migrations/000007_totp_counter.up.sql | 9 + internal/grpcserver/auth.go | 31 +- internal/grpcserver/grpcserver_test.go | 2 +- internal/middleware/middleware.go | 62 +++- internal/middleware/middleware_test.go | 126 ++++++- internal/model/model.go | 32 +- internal/server/server.go | 70 +++- internal/server/server_test.go | 2 +- internal/token/token.go | 8 + internal/ui/handlers_auth.go | 21 +- internal/ui/ui.go | 58 ++- test/e2e/e2e_test.go | 6 +- web/static/mcias.js | 26 ++ web/templates/base.html | 2 + 31 files changed, 799 insertions(+), 250 deletions(-) create mode 100644 .claude/skills/checkpoint/SKILL.md create mode 100644 .claude/tasks/security-audit/TASK.md create mode 100644 internal/db/migrations/000007_totp_counter.up.sql create mode 100644 web/static/mcias.js diff --git a/.claude/settings.local.json b/.claude/settings.local.json index c6c5962..83598e2 100644 --- a/.claude/settings.local.json +++ b/.claude/settings.local.json @@ -5,7 +5,26 @@ "Bash(golangci-lint run:*)", "Bash(git restore:*)", "Bash(git add:*)", - "Bash(git commit:*)" + "Bash(git commit:*)", + "Bash(grep -n \"handleAdminResetPassword\\\\|handleChangePassword\" /Users/kyle/src/mcias/internal/ui/*.go)", + "Bash(go build:*)", + "Bash(sqlite3 /Users/kyle/src/mcias/run/mcias.db \"PRAGMA table_info\\(policy_rules\\);\" 2>&1)", + "Bash(sqlite3 /Users/kyle/src/mcias/run/mcias.db \"SELECT * FROM schema_version;\" 2>&1; sqlite3 /Users/kyle/src/mcias/run/mcias.db \"SELECT * FROM schema_migrations;\" 2>&1)", + "Bash(go run:*)", + "Bash(go list:*)" + ] + }, + "hooks": { + "PostToolUse": [ + { + "matcher": "Edit|Write", + "hooks": [ + { + "type": "command", + "command": "go build ./... 2>&1 | head -20" + } + ] + } ] } } diff --git a/.claude/skills/checkpoint/SKILL.md b/.claude/skills/checkpoint/SKILL.md new file mode 100644 index 0000000..cae0149 --- /dev/null +++ b/.claude/skills/checkpoint/SKILL.md @@ -0,0 +1,8 @@ +# Checkpoint Skill + +1. Run `go build ./...` abort if errors +2. Run `go test ./...` abort if failures +3. Run `go vet ./...` +4. Run `git add -A && git status` show user what will be committed +5. Ask user for commit message +6. Run `git commit -m ""` and verify with `git log -1` \ No newline at end of file diff --git a/.claude/tasks/security-audit/TASK.md b/.claude/tasks/security-audit/TASK.md new file mode 100644 index 0000000..c20ee20 --- /dev/null +++ b/.claude/tasks/security-audit/TASK.md @@ -0,0 +1,8 @@ +Run a full security audit of this Go codebase. For each finding rated +HIGH or CRITICAL: spawn a sub-agent using Task to implement the fix +across all affected files (models, handlers, migrations, templates, +tests). Each sub-agent must: 1) write a failing test that reproduces the +vulnerability, 2) implement the fix, 3) run `go test ./...` and `go vet +./...` in a loop until all pass, 4) commit with a message referencing +the finding ID. After all sub-agents complete, generate a summary of +what was fixed and what needs manual review. diff --git a/AUDIT.md b/AUDIT.md index 818277f..f0013bc 100644 --- a/AUDIT.md +++ b/AUDIT.md @@ -1,258 +1,202 @@ # MCIAS Security Audit Report -**Scope:** Full codebase review of `git.wntrmute.dev/kyle/mcias` (commit `4596ea0`) aka mcias. -**Auditor:** Comprehensive source review of all Go source files, protobuf definitions, Dockerfile, systemd unit, and client libraries -**Classification:** Findings rated as **CRITICAL**, **HIGH**, **MEDIUM**, **LOW**, or **INFORMATIONAL** +**Date:** 2026-03-12 +**Scope:** Full codebase — authentication flows, token lifecycle, cryptography, database layer, REST/gRPC/UI servers, authorization, and operational security. +**Methodology:** Static code analysis of all source files with adversarial focus on auth flows, crypto usage, input handling, and inter-component trust boundaries. --- ## Executive Summary -MCIAS is well-engineered for a security-critical system. The code demonstrates strong awareness of common vulnerability classes: JWT algorithm confusion is properly mitigated, constant-time comparisons are used throughout, timing-uniform dummy operations prevent user enumeration, and credential material is systematically excluded from logs and API responses. The cryptographic choices are sound and current. +MCIAS demonstrates strong security awareness throughout. The cryptographic foundations are sound, credential handling is careful, and the most common web/API authentication vulnerabilities have been explicitly addressed. The codebase shows consistent attention to defense-in-depth: constant-time comparisons, dummy Argon2 operations for unknown users, algorithm-confusion prevention in JWT validation, parameterized SQL, audit logging, and CSRF protection with HMAC-signed double-submit. -That said, I identified **16 findings** ranging from medium-severity design issues to low-severity hardening opportunities. There are no critical vulnerabilities that would allow immediate remote compromise, but several medium-severity items warrant remediation before production deployment. +**Two confirmed bugs with real security impact were found**, along with several defense-in-depth gaps that should be addressed before production deployment. The overall security posture is well above average for this class of system. --- -## FINDINGS +## Confirmed Vulnerabilities -### F-01: TOTP Enrollment Sets `totp_required=1` Before Confirmation (MEDIUM) +### CRIT-01 — TOTP Replay Attack (Medium-High) -**Location:** `internal/db/accounts.go:131-141`, `internal/server/server.go:651-658` +**File:** `internal/auth/auth.go:208-230`, `internal/grpcserver/auth.go:84`, `internal/ui/handlers_auth.go:152` -`SetTOTP` unconditionally sets `totp_required = 1`. This means during the enrollment phase (before the user has confirmed), the TOTP requirement flag is already true. If the user abandons enrollment after calling `/v1/auth/totp/enroll` but before calling `/confirm`, the account is now locked: TOTP is "required" but the user was never shown a QR code they can use to generate valid codes. +`ValidateTOTP` accepts any code falling in the current ±1 time-step window (±30 seconds, so a given code is valid for ~90 seconds) but **never records which codes have already been used**. The same valid TOTP code can be submitted an unlimited number of times within that window. There is no `last_used_totp_counter` or `last_used_totp_at` field in the schema. -**Recommendation:** Add a separate `StorePendingTOTP(accountID, secretEnc, secretNonce)` that writes the encrypted secret but leaves `totp_required = 0`. Only set `totp_required = 1` in the confirm handler via the existing `SetTOTP`. Alternatively, add a `ClearTOTP` recovery step to the enrollment flow on timeout/failure. +**Attack scenario:** An attacker who has observed a valid TOTP code (e.g. from a compromised session, shoulder surfing, or a MITM that delayed delivery) can reuse that code to authenticate within its validity window. + +**Fix:** Track the last accepted TOTP counter per account in the database. Reject any counter ≤ the last accepted one. This requires a new column (`last_totp_counter INTEGER`) on the `accounts` table and a check-and-update in `ValidateTOTP`'s callers (or within it, with a DB reference passed in). --- -### F-02: Password Embedded in HTML Hidden Fields During TOTP Step (MEDIUM) +### CRIT-02 — gRPC `EnrollTOTP` Enables TOTP Before Confirmation (Medium) -**Location:** `internal/ui/handlers_auth.go:74-84` +**File:** `internal/grpcserver/auth.go:202` vs `internal/server/server.go:724-728` -During the TOTP step of UI login, the plaintext password is embedded as a hidden form field so it can be re-verified on the second POST. This means: -1. The password exists in the DOM and is accessible to any browser extension or XSS-via-extension vector. -2. The password is sent over the wire a second time (TLS protects transit, but it doubles the exposure window). -3. Browser form autofill or "view source" can reveal it. +The REST `EnrollTOTP` handler explicitly uses `StorePendingTOTP` (which keeps `totp_required=0`) and a comment at line 724 explains why: -**Recommendation:** On successful password verification in the first step, issue a short-lived (e.g., 60-second), single-use, server-side nonce that represents "password verified for user X". Store this nonce in the DB or an in-memory cache. The TOTP confirmation step presents this nonce instead of the password. The server validates the nonce + TOTP code and issues the session token. - ---- - -### F-03: Token Renewal Is Not Atomic — Race Window Between Revoke and Track (MEDIUM) - -**Location:** `internal/server/server.go:281-289`, `internal/grpcserver/auth.go:148-155` - -The token renewal flow revokes the old token and tracks the new one as separate operations. The code comments acknowledge "atomically is not possible in SQLite without a transaction." However, SQLite does support transactions, and both operations use the same `*db.DB` instance with `MaxOpenConns(1)`. If the revoke succeeds but `TrackToken` fails, the user's old token is revoked but no new token is tracked, leaving them in a broken state. - -**Recommendation:** Wrap the revoke-old + track-new pair in a single SQLite transaction. Add a method like `db.RenewToken(oldJTI, reason, newJTI, accountID, issuedAt, expiresAt)` that performs both in one `tx`. - ---- - -### F-04: Rate Limiter Not Applied to REST Login Endpoint (MEDIUM) - -**Location:** `internal/server/server.go:96-100` - -Despite the comment saying "login-path rate limiting," the REST server applies `RequestLogger` as global middleware but **does not apply the `RateLimit` middleware at all**. The rate limiter is imported but never wired into the handler chain for the REST server. The `/v1/auth/login` endpoint has no rate limiting on the REST side. - -In contrast, the gRPC server correctly applies `rateLimitInterceptor` in its interceptor chain (applied to all RPCs). - -**Recommendation:** Apply `middleware.RateLimit(...)` to at minimum the `/v1/auth/login` and `/v1/token/validate` routes in the REST server. Consider a more restrictive rate for login (e.g., 5/min) versus general API endpoints. - ---- - -### F-05: No `nbf` (Not Before) Claim in Issued JWTs (LOW) - -**Location:** `internal/token/token.go:68-99` - -Tokens are issued with `iss`, `sub`, `iat`, `exp`, and `jti` but not `nbf` (Not Before). While the architecture document states `nbf` is validated "if present," it is never set during issuance. Setting `nbf = iat` is a defense-in-depth measure that prevents premature token use if there is any clock skew between systems, and ensures relying parties that validate `nbf` don't reject MCIAS tokens. - -**Recommendation:** Set `NotBefore: jwt.NewNumericDate(now)` in the `jwtClaims.RegisteredClaims`. - ---- - -### F-06: `HasRole` Uses Non-Constant-Time String Comparison (LOW) - -**Location:** `internal/token/token.go:174-181` - -`HasRole` uses plain `==` string comparison for role names. Role names are not secret material, and this is authorization (not authentication), so this is low severity. However, if role names ever contained sensitive information, this could leak information via timing. Given the project's stated principle of using constant-time comparisons "wherever token or credential equality is checked," this is a minor inconsistency. - -**Recommendation:** Acceptable as-is since role names are public knowledge. Document the decision. - ---- - -### F-07: Dummy Argon2 Hash Uses Hardcoded Invalid PHC String (LOW) - -**Location:** `internal/server/server.go:154` - -The dummy Argon2 hash `"$argon2id$v=19$m=65536,t=3,p=4$dGVzdHNhbHQ$dGVzdGhhc2g"` uses m=65536 but the actual default config uses m=65536 too. The timing should be close. However, the dummy hash uses a 6-byte salt ("testsalt" base64) and a 6-byte hash ("testhash" base64), while real hashes use 16-byte salt and 32-byte hash. This produces a slightly different (faster) Argon2 computation than a real password verification. - -**Recommendation:** Pre-compute a real dummy hash at server startup using `auth.HashPassword("dummy-password", actualArgonParams)` and store it as a `sync.Once` variable. This guarantees identical timing regardless of configuration. - ---- - -### F-08: No Account Lockout After Repeated Failed Login Attempts (LOW) - -**Location:** `internal/server/server.go:138-176` - -There is no mechanism to lock an account after N failed login attempts. The system relies solely on rate limiting (which, per F-04, isn't applied on the REST side). An attacker with distributed IPs could attempt brute-force attacks against accounts without triggering any lockout. - -**Recommendation:** Implement a configurable per-account failed login counter (e.g., 10 failures in 15 minutes triggers a 15-minute lockout). The counter should be stored in the DB or in memory with per-account tracking. Audit events for `login_fail` already exist and can be queried, but proactive lockout would be more effective. - ---- - -### F-09: `PRAGMA synchronous=NORMAL` Risks Data Loss on Power Failure (LOW) - -**Location:** `internal/db/db.go:50` - -`PRAGMA synchronous=NORMAL` combined with WAL mode means a power failure could lose the most recent committed transactions. For a security-critical system where audit log integrity and token revocation records matter, `synchronous=FULL` is safer. - -**Recommendation:** Change to `PRAGMA synchronous=FULL` for production deployments. The performance impact on a personal SSO system is negligible. Alternatively, document this trade-off and leave `NORMAL` as a conscious choice. - ---- - -### F-10: No Maximum Token Expiry Validation (LOW) - -**Location:** `internal/config/config.go:150-159` - -Token expiry durations are validated to be positive but have no maximum. An operator could accidentally configure `default_expiry = "876000h"` (100 years). The config validation should enforce reasonable ceilings. - -**Recommendation:** Add maximum expiry validation: e.g., `default_expiry <= 8760h` (1 year), `admin_expiry <= 168h` (1 week), `service_expiry <= 87600h` (10 years). These can be generous ceilings that prevent obvious misconfiguration. - ---- - -### F-11: Missing `Content-Security-Policy` and Other Security Headers on UI Responses (MEDIUM) - -**Location:** `internal/ui/ui.go:318-333` - -The UI serves HTML pages but sets no security headers: no `Content-Security-Policy`, no `X-Content-Type-Options`, no `X-Frame-Options`, no `Strict-Transport-Security`. Since this is an admin panel for an authentication system: - -- Without CSP, any XSS vector (e.g., via a malicious username stored in the DB) could execute arbitrary JavaScript in the admin's browser. -- Without `X-Frame-Options: DENY`, the admin panel could be framed for clickjacking. -- Without HSTS, a MITM could strip TLS on the first connection. - -**Recommendation:** Add a middleware that sets: -``` -Content-Security-Policy: default-src 'self'; script-src 'self'; style-src 'self' -X-Content-Type-Options: nosniff -X-Frame-Options: DENY -Strict-Transport-Security: max-age=63072000; includeSubDomains -Referrer-Policy: no-referrer +```go +// Security: use StorePendingTOTP (not SetTOTP) so that totp_required +// is not enabled until the user confirms the code. ``` ---- +The gRPC `EnrollTOTP` handler at line 202 calls `SetTOTP` directly, which immediately sets `totp_required=1`. Any user who initiates TOTP enrollment over gRPC but does not immediately confirm will have their account locked out — they cannot log in because TOTP is required, but no working TOTP secret is confirmed. -### F-12: No Input Validation on Username Length or Character Set (LOW) - -**Location:** `internal/server/server.go:465-507` - -`handleCreateAccount` checks that username is non-empty but does not validate length or character set. A username containing control characters, null bytes, or extremely long strings (up to SQLite's TEXT limit) could cause rendering issues in the UI, log injection, or storage abuse. - -**Recommendation:** Validate: length 1-255, alphanumeric + limited symbols (e.g., `^[a-zA-Z0-9._@-]{1,255}$`). Reject control characters, embedded NULs, and newlines. +**Fix:** Change `grpcserver/auth.go:202` from `a.s.db.SetTOTP(...)` to `a.s.db.StorePendingTOTP(...)`, matching the REST server's behavior and the documented intent of those two DB methods. --- -### F-13: No Password Complexity or Minimum Length Enforcement (LOW) +## Defense-in-Depth Gaps -**Location:** `internal/auth/auth.go:63-66` +### DEF-01 — No Rate Limiting on the UI Login Endpoint (Medium) -`HashPassword` only checks that the password is non-empty. A 1-character password is accepted and hashed. While Argon2id makes brute-force expensive, a minimum password length of 8-12 characters (per NIST SP 800-63B) would prevent trivially weak passwords. +**File:** `internal/ui/ui.go:264` -**Recommendation:** Enforce a minimum password length (e.g., 12 characters) at the server/handler level before passing to `HashPassword`. Optionally check against a breached-password list. +```go +uiMux.HandleFunc("POST /login", u.handleLoginPost) +``` + +The REST `/v1/auth/login` endpoint is wrapped with `loginRateLimit` (10 req/s per IP). The UI `/login` endpoint has no equivalent middleware. Account lockout (10 failures per 15 minutes) partially mitigates brute force, but an attacker can still enumerate whether accounts exist at full network speed before triggering lockout, and can trigger lockout against many accounts in parallel with no rate friction. + +**Fix:** Apply the same `middleware.RateLimit(10, 10)` to `POST /login` in the UI mux. A simpler option is to wrap the entire `uiMux` with the rate limiter since the UI is also a sensitive surface. --- -### F-14: Passphrase Not Zeroed After Use in `loadMasterKey` (LOW) +### DEF-02 — `pendingLogins` Map Has No Expiry Cleanup (Low) -**Location:** `cmd/mciassrv/main.go:246-272` +**File:** `internal/ui/ui.go:57` -The passphrase is read from the environment variable and passed to `crypto.DeriveKey`, but the Go `string` holding the passphrase is not zeroed afterward. The environment variable is correctly unset, and the master key is zeroed on shutdown, but the passphrase string remains in the Go heap until GC'd. Go strings are immutable, so zeroing is not straightforward, but converting to `[]byte` first and zeroing after KDF would reduce the exposure window. +The `pendingLogins sync.Map` stores short-lived TOTP nonces (90-second TTL). When consumed via `consumeTOTPNonce`, entries are deleted via `LoadAndDelete`. However, entries that are created but never consumed (user abandons login at the TOTP step, closes browser) **accumulate indefinitely** — they are checked for expiry on read but never proactively deleted. -**Recommendation:** Read the environment variable into a `[]byte` (via `os.Getenv` then `[]byte` copy), pass it to a modified `DeriveKey` that accepts `[]byte`, then zero the `[]byte` immediately after. Alternatively, accept this as a Go language limitation and document it. +In normal operation this is a minor memory leak. Under adversarial conditions — an attacker repeatedly sending username+password to step 1 without proceeding to step 2 — the map grows without bound. At scale this could be used for memory exhaustion. + +**Fix:** Add a background goroutine (matching the pattern in `middleware.RateLimit`) that periodically iterates the map and deletes expired entries. A 5-minute cleanup interval is sufficient given the 90-second TTL. --- -### F-15: `extractBearerFromRequest` Does Not Verify "Bearer" Prefix Case-Insensitively (INFORMATIONAL) +### DEF-03 — Rate Limiter Uses `RemoteAddr`, Not `X-Forwarded-For` (Low) -**Location:** `internal/server/server.go:932-942` +**File:** `internal/middleware/middleware.go:200` -The REST `extractBearerFromRequest` (used by `handleTokenValidate`) does a substring check with `auth[len("Bearer ")]` without verifying the prefix actually says "Bearer". It trusts that if the header is long enough, the prefix is correct. Meanwhile, the middleware's `extractBearerToken` correctly uses `strings.EqualFold`. The gRPC `extractBearerFromMD` also correctly uses `strings.EqualFold`. +The comment already acknowledges this: the rate limiter extracts the client IP from `r.RemoteAddr`. When the server is deployed behind a reverse proxy (nginx, Caddy, a load balancer), `RemoteAddr` will be the proxy's IP for all requests, collapsing all clients into a single rate-limit bucket. This effectively disables per-IP rate limiting in proxy deployments. -**Recommendation:** Use `strings.EqualFold` for the prefix check in `extractBearerFromRequest` for consistency. +**Fix:** Add a configurable `TrustedProxy` setting. When set, extract the real client IP from `X-Forwarded-For` or `X-Real-IP` headers only for requests coming from that proxy address. Never trust those headers unconditionally — doing so allows IP spoofing. --- -### F-16: UI System Token Issuance Does Not Revoke Previous System Token (LOW) +### DEF-04 — Missing `nbf` (Not Before) Claim on Issued Tokens (Low) -**Location:** `internal/ui/handlers_accounts.go:334-403` +**File:** `internal/token/token.go:73-82` -The REST `handleTokenIssue` and gRPC `IssueServiceToken` both revoke the existing system token before issuing a new one. However, `handleIssueSystemToken` in the UI handler does not revoke the old system token — it calls `SetSystemToken` (which updates the system_tokens table via UPSERT) but never revokes the old token's entry in the token_revocation table. The old token remains valid until it naturally expires. +`IssueToken` sets `iss`, `sub`, `iat`, `exp`, and `jti`, but not `nbf`. Without a not-before constraint, a token is valid from the moment of issuance and a slightly clock-skewed client or intermediate could present it early. This is a defense-in-depth measure, not a practical attack at the moment, but it costs nothing to add. -**Recommendation:** Before issuing a new token in `handleIssueSystemToken`, replicate the pattern from the REST handler: look up `GetSystemToken`, and if found, call `RevokeToken(existing.JTI, "rotated")`. +**Fix:** Add `NotBefore: jwt.NewNumericDate(now)` to the `RegisteredClaims` struct. Add the corresponding validation step in `ValidateToken` (using `jwt.WithNotBefore()` or a manual check). --- -## Positive Findings (Things Done Well) +### DEF-05 — No Maximum Token Expiry Ceiling in Config Validation (Low) -1. **JWT algorithm confusion defense** is correctly implemented. The `alg` header is validated inside the key function before signature verification, and only `EdDSA` is accepted. This is the correct implementation pattern. +**File:** `internal/config/config.go:150-158` -2. **Constant-time comparisons** are consistently used for password verification, TOTP validation, and CSRF token validation via `crypto/subtle.ConstantTimeCompare`. +The config validator enforces that expiry durations are positive but not that they are bounded above. An operator misconfiguration (e.g. `service_expiry = "876000h"`) would issue tokens valid for 100 years. For human sessions (`default_expiry`, `admin_expiry`) this is a significant risk in the event of token theft. -3. **Timing uniformity** for failed logins: dummy Argon2 operations run for unknown users and inactive accounts, preventing username enumeration via timing differences. - -4. **Credential material exclusion** is thorough: `json:"-"` tags on `PasswordHash`, `TOTPSecretEnc`, `TOTPSecretNonce`, `PGPasswordEnc`, `PGPasswordNonce` in model types, plus deliberate omission from API responses and log statements. - -5. **Parameterized SQL** is used consistently throughout. No string concatenation in queries. The dynamic query builder in `ListAuditEvents`/`ListAuditEventsPaged` correctly uses parameter placeholders. - -6. **TLS configuration** is solid: TLS 1.2 minimum, X25519/P256 curves, enforced at the listener level with no plaintext fallback. - -7. **Master key handling** is well-designed: passphrase derived via Argon2id with strong parameters (128 MiB memory), env var cleared after reading, key zeroed on shutdown. - -8. **Systemd hardening** is comprehensive: `ProtectSystem=strict`, `NoNewPrivileges`, `MemoryDenyWriteExecute`, empty `CapabilityBoundingSet`, and `PrivateDevices`. - -9. **AES-GCM usage** is correct: fresh random nonces per encryption, key size validated, error details not exposed on decryption failure. - -10. **CSRF protection** is well-implemented with HMAC-signed double-submit cookies and `SameSite=Strict`. +**Fix:** Add upper-bound checks in `validate()`. Suggested maximums: 30 days for `default_expiry`, 24 hours for `admin_expiry`, 5 years for `service_expiry`. At minimum, log a warning when values exceed reasonable thresholds. --- -## Summary Table +### DEF-06 — `GetAccountByUsername` Comment Incorrect re: Case Sensitivity (Informational) -| Fixed? | ID | Severity | Title | Effort | -|--------|----|----------|-------|--------| -| Yes | F-01 | MEDIUM | TOTP enrollment sets required=1 before confirmation | Small | -| Yes | F-02 | MEDIUM | Password in HTML hidden fields during TOTP step | Medium | -| Yes | F-03 | MEDIUM | Token renewal not atomic (race window) | Small | -| Yes | F-04 | MEDIUM | Rate limiter not applied to REST login endpoint | Small | -| Yes | F-11 | MEDIUM | Missing security headers on UI responses | Small | -| No | F-05 | LOW | No `nbf` claim in issued JWTs | Trivial | -| No | F-06 | LOW | `HasRole` uses non-constant-time comparison | Trivial | -| Yes | F-07 | LOW | Dummy Argon2 hash timing mismatch | Small | -| Yes | F-08 | LOW | No account lockout after repeated failures | Medium | -| No | F-09 | LOW | `synchronous=NORMAL` risks audit data loss | Trivial | -| No | F-10 | LOW | No maximum token expiry validation | Small | -| Yes | F-12 | LOW | No username length/charset validation | Small | -| Yes | F-13 | LOW | No minimum password length enforcement | Small | -| No | F-14 | LOW | Passphrase string not zeroed after KDF | Small | -| Yes | F-16 | LOW | UI system token issuance skips old token revocation | Small | -| No | F-15 | INFO | Bearer prefix check inconsistency | Trivial | +**File:** `internal/db/accounts.go:73` + +The comment reads "case-insensitive" but the query uses `WHERE username = ?` with SQLite's default BINARY collation, which is **case-sensitive**. This means `admin` and `Admin` would be treated as distinct accounts. This is not a security bug by itself, but it contradicts the comment and could mask confusion. + +**Fix:** If case-insensitive matching is intended, add `COLLATE NOCASE` to the column definition or the query. If case-sensitive is correct (more common for SSO systems), remove the word "case-insensitive" from the comment. --- -## Recommended Remediation Priority +### DEF-07 — SQLite `synchronous=NORMAL` in WAL Mode (Low) -**Immediate (before production deployment):** -1. F-04 — Wire the rate limiter into the REST server. This is the most impactful gap. -2. F-11 — Add security headers to UI responses. -3. F-01 — Fix TOTP enrollment to not lock accounts prematurely. +**File:** `internal/db/db.go:68` -**Short-term:** -4. F-03 — Make token renewal atomic. -5. F-02 — Replace password-in-hidden-field with a server-side nonce. -6. F-16 — Fix UI system token issuance to revoke old tokens. -7. F-07 — Use a real dummy hash with matching parameters. +With `PRAGMA synchronous=NORMAL` and `journal_mode=WAL`, SQLite syncs the WAL file on checkpoints but not on every write. A power failure between a write and the next checkpoint could lose the most recent transactions. For an authentication system — where token issuance and revocation records must be durable — this is a meaningful risk. -**Medium-term:** -8. F-08 — Implement account lockout. -9. F-12, F-13 — Input validation for usernames and passwords. -10. Remaining LOW/INFO items at maintainer discretion. +**Fix:** Change to `PRAGMA synchronous=FULL`. For a single-node personal SSO the performance impact is negligible; durability of token revocations is worth it. + +--- + +### DEF-08 — gRPC `Login` Counts TOTP-Missing as a Login Failure (Low) + +**File:** `internal/grpcserver/auth.go:76-77` + +When TOTP is required but no code is provided (`req.TotpCode == ""`), the gRPC handler calls `RecordLoginFailure`. In the two-step UI flow this is defensible, but via the gRPC single-step `Login` RPC, a well-behaved client that has not yet obtained the TOTP code (not an attacker) will increment the failure counter. Repeated retries could trigger account lockout unintentionally. + +**Fix:** Either document that gRPC clients must always include the TOTP code and treat its omission as a deliberate attempt, or do not count "TOTP code required" as a failure (since the password was verified successfully at that point). + +--- + +### DEF-09 — Security Headers Missing on REST API Docs Endpoints (Informational) + +**File:** `internal/server/server.go:85-94` + +The `/docs` and `/docs/openapi.yaml` endpoints are served from the parent `mux` and therefore do not receive the `securityHeaders` middleware applied to the UI sub-mux. The Swagger UI page at `/docs` is served without `X-Frame-Options`, `Content-Security-Policy`, etc. + +**Fix:** Apply a security-headers middleware to the docs handlers, or move them into the UI sub-mux. + +--- + +### DEF-10 — Role Strings Not Validated Against an Allowlist (Low) + +**File:** `internal/db/accounts.go:302-311` (`GrantRole`) + +There is no allowlist for role strings written to the `account_roles` table. Any string can be stored. While the admin-only constraint prevents non-admins from calling these endpoints, a typo by an admin (e.g. `"admim"`) would silently create an unknown role that silently grants nothing. The `RequireRole` check would never match it, causing a confusing failure mode. + +**Fix:** Maintain a compile-time allowlist of valid roles (e.g. `"admin"`, `"user"`) and reject unknown role names at the handler layer before writing to the database. + +--- + +## Positive Findings + +The following implementation details are exemplary and should be preserved: + +| Area | Detail | +|------|--------| +| JWT alg confusion | `ValidateToken` enforces `alg=EdDSA` in the key function, before signature verification — the only correct place | +| Constant-time comparisons | `crypto/subtle.ConstantTimeCompare` used consistently for password hashes, TOTP codes, and CSRF tokens | +| Timing uniformity | Dummy Argon2 computed (once, with full production parameters via `sync.Once`) for unknown/inactive users on both REST and gRPC paths | +| Token revocation | Every token is tracked by JTI; unknown tokens are rejected (fail-closed) rather than silently accepted | +| Token renewal atomicity | `RenewToken` wraps revocation + insertion in a single SQLite transaction | +| TOTP nonce design | Two-step UI login uses a 128-bit single-use server-side nonce to avoid transmitting the password twice | +| CSRF protection | HMAC-SHA256 signed double-submit cookie with `SameSite=Strict` and constant-time validation | +| Credential exclusion | `json:"-"` tags on all credential fields; proto messages omit them too | +| Security headers | All UI responses receive CSP, `X-Content-Type-Options`, `X-Frame-Options`, HSTS, and `Referrer-Policy` | +| Account lockout | 10-attempt, 15-minute rolling lockout checked before Argon2 to prevent timing oracle | +| Argon2id parameters | Config validator enforces OWASP 2023 minimums and rejects weakening | +| SQL injection | All queries use parameterized statements; no string concatenation anywhere | +| Audit log | Append-only with actor/target/IP; no delete path provided | +| Master key handling | Env var cleared after reading; signing key zeroed on shutdown | + +--- + +## Remediation Priority + +| Fixed | Priority | ID | Severity | Action | +|-------|----------|----|----------|--------| +| Yes | 1 | CRIT-02 | Medium | Change `grpcserver/auth.go:202` to call `StorePendingTOTP` instead of `SetTOTP` | +| Yes | 2 | CRIT-01 | Medium | Add `last_totp_counter` tracking to prevent TOTP replay within the validity window | +| Yes | 3 | DEF-01 | Medium | Apply IP rate limiting to the UI `POST /login` endpoint | +| Yes | 4 | DEF-02 | Low | Add background cleanup goroutine for the `pendingLogins` map | +| Yes | 5 | DEF-03 | Low | Support trusted-proxy IP extraction for accurate per-client rate limiting | +| Yes | 6 | DEF-04 | Low | Add `nbf` claim to issued tokens and validate it on receipt | +| Yes | 7 | DEF-05 | Low | Add upper-bound caps on token expiry durations in config validation | +| Yes | 8 | DEF-07 | Low | Change SQLite to `PRAGMA synchronous=FULL` | +| Yes | 9 | DEF-08 | Low | Do not count gRPC TOTP-missing as a login failure | +| Yes | 10 | DEF-10 | Low | Validate role strings against an allowlist before writing to the DB | +| Yes | 11 | DEF-09 | Info | Apply security headers to `/docs` endpoints | +| Yes | 12 | DEF-06 | Info | Correct the misleading "case-insensitive" comment in `GetAccountByUsername` | + +--- + +## Schema Observations + +The migration chain (migrations 001–006) is sound. Foreign key cascades are appropriate. Indexes are present on all commonly-queried columns. The `failed_logins` table uses a rolling window query approach which is correct. + +One note: the `accounts` table has no unique index enforcing `COLLATE NOCASE` on `username`. This is consistent with treating usernames as case-sensitive but should be documented explicitly to avoid future ambiguity. diff --git a/CLAUDE.md b/CLAUDE.md index 11b0f26..1a44a3f 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -74,6 +74,26 @@ This is a security-critical project. The following rules are non-negotiable: - Prefer explicit error handling over panics; never silently discard errors - Use `log/slog` (or goutils equivalents) for structured logging; never `fmt.Println` in production paths +## Verification + +After any code edit, always verify the fix by running `go build ./...` and `go test ./...` before claiming the issue is resolved. Never claim lint/tests pass without actually running them. + +## Database + +When working with migrations (golang-migrate or SQLite), always test migrations against a fresh database AND an existing database to catch duplicate column/table errors. SQLite does not support IF NOT EXISTS for ALTER TABLE. + +## File Editing + +Before editing files, re-read the current on-disk version to confirm it matches expectations. If files seem inconsistent, stop and flag this to the user before proceeding. + +## Project Context + +For this project (MCIAS): Go codebase, uses golang-migrate, SQLite (with shared-cache for in-memory), htmx frontend with Go html/template, golangci-lint (use `go vet` if version incompatible), and cert tool for TLS certificates. Check `docs/` for tool-specific usage before guessing CLI flags. + +## UI Development + +When implementing UI features, ensure they work for the empty-state case (e.g., no credentials exist yet, no accounts created). Always test with zero records. + ## Key Documents - `PROJECT.md` — Project specifications and requirements diff --git a/cmd/mciasdb/mciasdb_test.go b/cmd/mciasdb/mciasdb_test.go index da1af51..ac2336f 100644 --- a/cmd/mciasdb/mciasdb_test.go +++ b/cmd/mciasdb/mciasdb_test.go @@ -206,12 +206,12 @@ func TestRoleRevoke(t *testing.T) { t.Fatalf("create account: %v", err) } - if err := tool.db.GrantRole(a.ID, "editor", nil); err != nil { + if err := tool.db.GrantRole(a.ID, "user", nil); err != nil { t.Fatalf("grant role: %v", err) } captureStdout(t, func() { - tool.roleRevoke([]string{"--id", a.UUID, "--role", "editor"}) + tool.roleRevoke([]string{"--id", a.UUID, "--role", "user"}) }) roles, err := tool.db.GetRoles(a.ID) diff --git a/dist/mcias-dev.conf.example b/dist/mcias-dev.conf.example index e077cba..5ec080c 100644 --- a/dist/mcias-dev.conf.example +++ b/dist/mcias-dev.conf.example @@ -22,6 +22,7 @@ listen_addr = "127.0.0.1:8443" grpc_addr = "127.0.0.1:9443" tls_cert = "/tmp/mcias-dev.crt" tls_key = "/tmp/mcias-dev.key" +# trusted_proxy not set — direct local development, no reverse proxy. [database] path = "/tmp/mcias-dev.db" diff --git a/dist/mcias.conf.docker.example b/dist/mcias.conf.docker.example index fc862c3..980d53b 100644 --- a/dist/mcias.conf.docker.example +++ b/dist/mcias.conf.docker.example @@ -25,6 +25,10 @@ listen_addr = "0.0.0.0:8443" grpc_addr = "0.0.0.0:9443" tls_cert = "/etc/mcias/server.crt" tls_key = "/etc/mcias/server.key" +# If a reverse proxy (nginx, Caddy, Traefik) sits in front of this container, +# set trusted_proxy to its container IP so real client IPs are used for rate +# limiting and audit logging. Leave commented out for direct exposure. +# trusted_proxy = "172.17.0.1" [database] # VOLUME /data is declared in the Dockerfile; map a named volume here. diff --git a/dist/mcias.conf.example b/dist/mcias.conf.example index cca3ded..c8143b6 100644 --- a/dist/mcias.conf.example +++ b/dist/mcias.conf.example @@ -32,6 +32,21 @@ tls_cert = "/etc/mcias/server.crt" # Permissions: mode 0640, owner root:mcias. tls_key = "/etc/mcias/server.key" +# OPTIONAL. IP address of a trusted reverse proxy (e.g. nginx, Caddy, HAProxy). +# When set, the rate limiter and audit log extract the real client IP from the +# X-Real-IP or X-Forwarded-For header, but ONLY for requests whose TCP source +# address matches this exact IP. All other requests use RemoteAddr directly, +# preventing IP spoofing by external clients. +# +# Must be an IP address, not a hostname or CIDR range. +# Omit when running without a reverse proxy (direct Internet exposure). +# +# Example — local nginx proxy: +# trusted_proxy = "127.0.0.1" +# +# Example — Docker network gateway: +# trusted_proxy = "172.17.0.1" + # --------------------------------------------------------------------------- # [database] — SQLite database # --------------------------------------------------------------------------- diff --git a/internal/auth/auth.go b/internal/auth/auth.go index 64a8650..bdc5bcf 100644 --- a/internal/auth/auth.go +++ b/internal/auth/auth.go @@ -200,19 +200,31 @@ func parsePHC(phc string) (ArgonParams, []byte, []byte, error) { // ValidateTOTP checks a 6-digit TOTP code against a raw TOTP secret (bytes). // A ±1 time-step window (±30s) is allowed to accommodate clock skew. // +// Returns (true, counter, nil) on a valid code where counter is the HOTP +// counter value that matched. The caller MUST pass this counter to +// db.CheckAndUpdateTOTPCounter to prevent replay attacks within the validity +// window (CRIT-01). +// // Security: // - Comparison uses crypto/subtle.ConstantTimeCompare to resist timing attacks. // - Only RFC 6238-compliant HOTP (HMAC-SHA1) is implemented; no custom crypto. // - A ±1 window is the RFC 6238 recommendation; wider windows increase // exposure to code interception between generation and submission. -func ValidateTOTP(secret []byte, code string) (bool, error) { +// - The returned counter enables replay prevention: callers store it and +// reject any future code that does not advance past it (RFC 6238 §5.2). +func ValidateTOTP(secret []byte, code string) (bool, int64, error) { if len(code) != 6 { - return false, nil + return false, 0, nil } now := time.Now().Unix() step := int64(30) // RFC 6238 default time step in seconds + // Security: evaluate all three counters with constant-time comparisons + // before returning. Early-exit would leak which counter matched via + // timing; we instead record the match and continue, returning at the end. + var matched bool + var matchedCounter int64 for _, counter := range []int64{ now/step - 1, now / step, @@ -220,14 +232,21 @@ func ValidateTOTP(secret []byte, code string) (bool, error) { } { expected, err := hotp(secret, uint64(counter)) //nolint:gosec // G115: counter is Unix time / step, always non-negative if err != nil { - return false, fmt.Errorf("auth: compute TOTP: %w", err) + return false, 0, fmt.Errorf("auth: compute TOTP: %w", err) } // Security: constant-time comparison to prevent timing attack. + // We deliberately do NOT break early so that all three comparisons + // always execute, preventing a timing side-channel on which counter + // slot matched. if subtle.ConstantTimeCompare([]byte(code), []byte(expected)) == 1 { - return true, nil + matched = true + matchedCounter = counter } } - return false, nil + if matched { + return true, matchedCounter, nil + } + return false, 0, nil } // hotp computes an HMAC-SHA1-based OTP for a given counter value. diff --git a/internal/auth/auth_test.go b/internal/auth/auth_test.go index 8a53233..07dc834 100644 --- a/internal/auth/auth_test.go +++ b/internal/auth/auth_test.go @@ -101,13 +101,16 @@ func TestValidateTOTP(t *testing.T) { t.Fatalf("hotp: %v", err) } - ok, err := ValidateTOTP(rawSecret, code) + ok, counter, err := ValidateTOTP(rawSecret, code) if err != nil { t.Fatalf("ValidateTOTP: %v", err) } if !ok { t.Errorf("ValidateTOTP rejected a valid code %q", code) } + if ok && counter == 0 { + t.Errorf("ValidateTOTP returned zero counter for valid code") + } } // TestValidateTOTPWrongCode verifies that an incorrect code is rejected. @@ -117,7 +120,7 @@ func TestValidateTOTPWrongCode(t *testing.T) { t.Fatalf("GenerateTOTPSecret: %v", err) } - ok, err := ValidateTOTP(rawSecret, "000000") + ok, _, err := ValidateTOTP(rawSecret, "000000") if err != nil { t.Fatalf("ValidateTOTP: %v", err) } @@ -135,7 +138,7 @@ func TestValidateTOTPWrongLength(t *testing.T) { } for _, code := range []string{"", "12345", "1234567", "abcdef"} { - ok, err := ValidateTOTP(rawSecret, code) + ok, _, err := ValidateTOTP(rawSecret, code) if err != nil { t.Errorf("ValidateTOTP(%q): unexpected error: %v", code, err) } diff --git a/internal/config/config.go b/internal/config/config.go index 83918aa..c3324ee 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -6,6 +6,7 @@ package config import ( "errors" "fmt" + "net" "os" "time" @@ -30,6 +31,17 @@ type ServerConfig struct { GRPCAddr string `toml:"grpc_addr"` TLSCert string `toml:"tls_cert"` TLSKey string `toml:"tls_key"` + // TrustedProxy is the IP address (not a range) of a reverse proxy that + // sits in front of the server and sets X-Forwarded-For or X-Real-IP + // headers. When set, the rate limiter and audit log extract the real + // client IP from these headers instead of r.RemoteAddr. + // + // Security: only requests whose r.RemoteAddr matches TrustedProxy are + // trusted to carry a valid forwarded-IP header. All other requests use + // r.RemoteAddr directly, so this field cannot be exploited for IP + // spoofing by external clients. Omit or leave empty when running + // without a reverse proxy. + TrustedProxy string `toml:"trusted_proxy"` } // DatabaseConfig holds SQLite database settings. @@ -137,6 +149,14 @@ func (c *Config) validate() error { if c.Server.TLSKey == "" { errs = append(errs, errors.New("server.tls_key is required")) } + // Security (DEF-03): if trusted_proxy is set it must be a valid IP address + // (not a hostname or CIDR) so the middleware can compare it to the parsed + // host part of r.RemoteAddr using a reliable byte-level equality check. + if c.Server.TrustedProxy != "" { + if net.ParseIP(c.Server.TrustedProxy) == nil { + errs = append(errs, fmt.Errorf("server.trusted_proxy %q is not a valid IP address", c.Server.TrustedProxy)) + } + } // Database if c.Database.Path == "" { @@ -147,14 +167,31 @@ func (c *Config) validate() error { if c.Tokens.Issuer == "" { errs = append(errs, errors.New("tokens.issuer is required")) } + // Security (DEF-05): enforce both lower and upper bounds on token expiry + // durations. An operator misconfiguration could otherwise produce tokens + // valid for centuries, which would be irrevocable (bar explicit JTI + // revocation) if a token were stolen. Upper bounds are intentionally + // generous to accommodate a range of legitimate deployments while + // catching obvious typos (e.g. "876000h" instead of "8760h"). + const ( + maxDefaultExpiry = 30 * 24 * time.Hour // 30 days + maxAdminExpiry = 24 * time.Hour // 24 hours + maxServiceExpiry = 5 * 365 * 24 * time.Hour // 5 years + ) if c.Tokens.DefaultExpiry.Duration <= 0 { errs = append(errs, errors.New("tokens.default_expiry must be positive")) + } else if c.Tokens.DefaultExpiry.Duration > maxDefaultExpiry { + errs = append(errs, fmt.Errorf("tokens.default_expiry must be <= %s (got %s)", maxDefaultExpiry, c.Tokens.DefaultExpiry.Duration)) } if c.Tokens.AdminExpiry.Duration <= 0 { errs = append(errs, errors.New("tokens.admin_expiry must be positive")) + } else if c.Tokens.AdminExpiry.Duration > maxAdminExpiry { + errs = append(errs, fmt.Errorf("tokens.admin_expiry must be <= %s (got %s)", maxAdminExpiry, c.Tokens.AdminExpiry.Duration)) } if c.Tokens.ServiceExpiry.Duration <= 0 { errs = append(errs, errors.New("tokens.service_expiry must be positive")) + } else if c.Tokens.ServiceExpiry.Duration > maxServiceExpiry { + errs = append(errs, fmt.Errorf("tokens.service_expiry must be <= %s (got %s)", maxServiceExpiry, c.Tokens.ServiceExpiry.Duration)) } // Argon2 — enforce OWASP 2023 minimums (time=2, memory=65536 KiB). diff --git a/internal/config/config_test.go b/internal/config/config_test.go index cafca19..a1366ef 100644 --- a/internal/config/config_test.go +++ b/internal/config/config_test.go @@ -210,6 +210,40 @@ threads = 4 } } +// TestTrustedProxyValidation verifies that trusted_proxy must be a valid IP. +func TestTrustedProxyValidation(t *testing.T) { + tests := []struct { + name string + proxy string + wantErr bool + }{ + {"empty is valid (disabled)", "", false}, + {"valid IPv4", "127.0.0.1", false}, + {"valid IPv6 loopback", "::1", false}, + {"valid private IPv4", "10.0.0.1", false}, + {"hostname rejected", "proxy.example.com", true}, + {"CIDR rejected", "10.0.0.0/8", true}, + {"garbage rejected", "not-an-ip", true}, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + cfg, _ := Load(writeTempConfig(t, validConfig())) + if cfg == nil { + t.Fatal("baseline config load failed") + } + cfg.Server.TrustedProxy = tc.proxy + err := cfg.validate() + if tc.wantErr && err == nil { + t.Errorf("expected validation error for proxy=%q, got nil", tc.proxy) + } + if !tc.wantErr && err != nil { + t.Errorf("unexpected error for proxy=%q: %v", tc.proxy, err) + } + }) + } +} + func TestDurationParsing(t *testing.T) { var d duration if err := d.UnmarshalText([]byte("1h30m")); err != nil { diff --git a/internal/db/accounts.go b/internal/db/accounts.go index 41d1cb8..6c58a66 100644 --- a/internal/db/accounts.go +++ b/internal/db/accounts.go @@ -70,7 +70,10 @@ func (db *DB) GetAccountByID(id int64) (*model.Account, error) { `, id)) } -// GetAccountByUsername retrieves an account by username (case-insensitive). +// GetAccountByUsername retrieves an account by username. +// Matching is case-sensitive: SQLite uses BINARY collation by default, so +// "admin" and "Admin" are distinct usernames. This is intentional for an +// SSO system where usernames should be treated as opaque identifiers. // Returns ErrNotFound if no matching account exists. func (db *DB) GetAccountByUsername(username string) (*model.Account, error) { return db.scanAccount(db.sql.QueryRow(` @@ -184,6 +187,46 @@ func (db *DB) SetTOTP(accountID int64, secretEnc, secretNonce []byte) error { return nil } +// CheckAndUpdateTOTPCounter atomically verifies that counter is strictly +// greater than the last accepted TOTP counter for the account, and if so, +// stores counter as the new last accepted value. +// +// Returns ErrTOTPReplay if counter ≤ the stored value, preventing a replay +// of a previously accepted code within the ±1 time-step validity window. +// On the first successful TOTP login (stored value NULL) any counter is +// accepted. +// +// Security (CRIT-01): RFC 6238 §5.2 recommends recording the last OTP +// counter used and rejecting any code that does not advance it. Without +// this, an intercepted code remains valid for up to 90 seconds. The update +// is performed in a single parameterized SQL statement, so there is no +// TOCTOU window between the check and the write. +func (db *DB) CheckAndUpdateTOTPCounter(accountID int64, counter int64) error { + result, err := db.sql.Exec(` + UPDATE accounts + SET last_totp_counter = ?, updated_at = ? + WHERE id = ? + AND (last_totp_counter IS NULL OR last_totp_counter < ?) + `, counter, now(), accountID, counter) + if err != nil { + return fmt.Errorf("db: check-and-update TOTP counter: %w", err) + } + rows, err := result.RowsAffected() + if err != nil { + return fmt.Errorf("db: check-and-update TOTP counter rows affected: %w", err) + } + if rows == 0 { + // Security: the counter was not advanced — this code has already been + // used within its validity window. Treat as authentication failure. + return ErrTOTPReplay + } + return nil +} + +// ErrTOTPReplay is returned by CheckAndUpdateTOTPCounter when the submitted +// TOTP code corresponds to a counter value that has already been accepted. +var ErrTOTPReplay = errors.New("db: TOTP code already used (replay)") + // ClearTOTP removes the TOTP secret and disables TOTP requirement. func (db *DB) ClearTOTP(accountID int64) error { _, err := db.sql.Exec(` @@ -300,6 +343,12 @@ func (db *DB) GetRoles(accountID int64) ([]string, error) { // GrantRole adds a role to an account. If the role already exists, it is a no-op. func (db *DB) GrantRole(accountID int64, role string, grantedBy *int64) error { + // Security (DEF-10): reject unknown roles before writing to the DB so + // that typos (e.g. "admim") are caught immediately rather than silently + // creating an unmatchable role. + if err := model.ValidateRole(role); err != nil { + return err + } _, err := db.sql.Exec(` INSERT OR IGNORE INTO account_roles (account_id, role, granted_by, granted_at) VALUES (?, ?, ?, ?) @@ -323,6 +372,14 @@ func (db *DB) RevokeRole(accountID int64, role string) error { // SetRoles replaces the full role set for an account atomically. func (db *DB) SetRoles(accountID int64, roles []string, grantedBy *int64) error { + // Security (DEF-10): validate all roles before opening the transaction so + // we fail fast without touching the database on an invalid input. + for _, role := range roles { + if err := model.ValidateRole(role); err != nil { + return err + } + } + tx, err := db.sql.Begin() if err != nil { return fmt.Errorf("db: set roles begin tx: %w", err) diff --git a/internal/db/db.go b/internal/db/db.go index 084cfc4..c9bd5ec 100644 --- a/internal/db/db.go +++ b/internal/db/db.go @@ -65,7 +65,14 @@ func (db *DB) configure() error { "PRAGMA journal_mode=WAL", "PRAGMA foreign_keys=ON", "PRAGMA busy_timeout=5000", - "PRAGMA synchronous=NORMAL", + // Security (DEF-07): FULL synchronous mode ensures every write is + // flushed to disk before SQLite considers it committed. With WAL + // mode + NORMAL, a power failure between a write and the next + // checkpoint could lose the most recent committed transactions, + // including token issuance and revocation records — which must be + // durable. The performance cost is negligible for a single-node + // personal SSO server. + "PRAGMA synchronous=FULL", } for _, p := range pragmas { if _, err := db.sql.Exec(p); err != nil { diff --git a/internal/db/db_test.go b/internal/db/db_test.go index 835e7fa..822e0fc 100644 --- a/internal/db/db_test.go +++ b/internal/db/db_test.go @@ -162,7 +162,7 @@ func TestRoleOperations(t *testing.T) { } // SetRoles - if err := db.SetRoles(acct.ID, []string{"reader", "writer"}, nil); err != nil { + if err := db.SetRoles(acct.ID, []string{"admin", "user"}, nil); err != nil { t.Fatalf("SetRoles: %v", err) } roles, err = db.GetRoles(acct.ID) diff --git a/internal/db/migrate.go b/internal/db/migrate.go index 0b2d8c4..be691d9 100644 --- a/internal/db/migrate.go +++ b/internal/db/migrate.go @@ -22,7 +22,7 @@ var migrationsFS embed.FS // LatestSchemaVersion is the highest migration version defined in the // migrations/ directory. Update this constant whenever a new migration file // is added. -const LatestSchemaVersion = 6 +const LatestSchemaVersion = 7 // newMigrate constructs a migrate.Migrate instance backed by the embedded SQL // files. It opens a dedicated *sql.DB using the same DSN as the main diff --git a/internal/db/migrations/000007_totp_counter.up.sql b/internal/db/migrations/000007_totp_counter.up.sql new file mode 100644 index 0000000..f75f59a --- /dev/null +++ b/internal/db/migrations/000007_totp_counter.up.sql @@ -0,0 +1,9 @@ +-- Add last_totp_counter to track the most recently accepted TOTP counter value +-- per account. This is used to prevent TOTP replay attacks within the ±1 +-- time-step validity window. NULL means no TOTP code has ever been accepted +-- for this account (fresh enrollment or TOTP not yet used). +-- +-- Security (CRIT-01): RFC 6238 §5.2 recommends recording the last OTP counter +-- used and rejecting codes that do not advance it, eliminating the ~90-second +-- replay window that would otherwise be exploitable. +ALTER TABLE accounts ADD COLUMN last_totp_counter INTEGER DEFAULT NULL; diff --git a/internal/grpcserver/auth.go b/internal/grpcserver/auth.go index 389cf51..64a7cf7 100644 --- a/internal/grpcserver/auth.go +++ b/internal/grpcserver/auth.go @@ -72,8 +72,14 @@ func (a *authServiceServer) Login(ctx context.Context, req *mciasv1.LoginRequest if acct.TOTPRequired { if req.TotpCode == "" { + // Security (DEF-08): password was already verified, so a missing + // TOTP code means the gRPC client needs to re-prompt the user — + // it is not a credential failure. Do NOT increment the lockout + // counter here; doing so would lock out well-behaved clients that + // call Login in two steps (password first, TOTP second) and would + // also let an attacker trigger account lockout by omitting the + // code after a successful password guess. a.s.db.WriteAuditEvent(model.EventLoginFail, &acct.ID, nil, ip, `{"reason":"totp_missing"}`) //nolint:errcheck - _ = a.s.db.RecordLoginFailure(acct.ID) return nil, status.Error(codes.Unauthenticated, "TOTP code required") } secret, err := crypto.OpenAESGCM(a.s.masterKey, acct.TOTPSecretNonce, acct.TOTPSecretEnc) @@ -81,12 +87,19 @@ func (a *authServiceServer) Login(ctx context.Context, req *mciasv1.LoginRequest a.s.logger.Error("decrypt TOTP secret", "error", err, "account_id", acct.ID) return nil, status.Error(codes.Internal, "internal error") } - valid, err := auth.ValidateTOTP(secret, req.TotpCode) + valid, counter, err := auth.ValidateTOTP(secret, req.TotpCode) if err != nil || !valid { a.s.db.WriteAuditEvent(model.EventLoginTOTPFail, &acct.ID, nil, ip, `{"reason":"wrong_totp"}`) //nolint:errcheck _ = a.s.db.RecordLoginFailure(acct.ID) return nil, status.Error(codes.Unauthenticated, "invalid credentials") } + // Security (CRIT-01): reject replay of a code already used within + // its ±30-second validity window. + if err := a.s.db.CheckAndUpdateTOTPCounter(acct.ID, counter); err != nil { + a.s.db.WriteAuditEvent(model.EventLoginTOTPFail, &acct.ID, nil, ip, `{"reason":"totp_replay"}`) //nolint:errcheck + _ = a.s.db.RecordLoginFailure(acct.ID) + return nil, status.Error(codes.Unauthenticated, "invalid credentials") + } } // Login succeeded: clear any outstanding failure counter. @@ -199,7 +212,12 @@ func (a *authServiceServer) EnrollTOTP(ctx context.Context, _ *mciasv1.EnrollTOT return nil, status.Error(codes.Internal, "internal error") } - if err := a.s.db.SetTOTP(acct.ID, secretEnc, secretNonce); err != nil { + // Security: use StorePendingTOTP (not SetTOTP) so that totp_required is + // not set to 1 until the user confirms the code via ConfirmTOTP. Calling + // SetTOTP here would immediately lock the account behind TOTP before the + // user has had a chance to configure their authenticator app — matching the + // behaviour of the REST EnrollTOTP handler at internal/server/server.go. + if err := a.s.db.StorePendingTOTP(acct.ID, secretEnc, secretNonce); err != nil { return nil, status.Error(codes.Internal, "internal error") } @@ -232,10 +250,15 @@ func (a *authServiceServer) ConfirmTOTP(ctx context.Context, req *mciasv1.Confir return nil, status.Error(codes.Internal, "internal error") } - valid, err := auth.ValidateTOTP(secret, req.Code) + valid, counter, err := auth.ValidateTOTP(secret, req.Code) if err != nil || !valid { return nil, status.Error(codes.Unauthenticated, "invalid TOTP code") } + // Security (CRIT-01): record the counter even during enrollment confirmation + // so the same code cannot be replayed immediately after confirming. + if err := a.s.db.CheckAndUpdateTOTPCounter(acct.ID, counter); err != nil { + return nil, status.Error(codes.Unauthenticated, "invalid TOTP code") + } // SetTOTP with existing enc/nonce sets totp_required=1, confirming enrollment. if err := a.s.db.SetTOTP(acct.ID, acct.TOTPSecretEnc, acct.TOTPSecretNonce); err != nil { diff --git a/internal/grpcserver/grpcserver_test.go b/internal/grpcserver/grpcserver_test.go index 7bc7216..9a34e5b 100644 --- a/internal/grpcserver/grpcserver_test.go +++ b/internal/grpcserver/grpcserver_test.go @@ -542,7 +542,7 @@ func TestSetAndGetRoles(t *testing.T) { _, err = cl.SetRoles(authCtx(adminTok), &mciasv1.SetRolesRequest{ Id: id, - Roles: []string{"editor", "viewer"}, + Roles: []string{"admin", "user"}, }) if err != nil { t.Fatalf("SetRoles: %v", err) diff --git a/internal/middleware/middleware.go b/internal/middleware/middleware.go index fcf79c6..1670ed0 100644 --- a/internal/middleware/middleware.go +++ b/internal/middleware/middleware.go @@ -176,15 +176,62 @@ type ipRateLimiter struct { mu sync.Mutex } +// ClientIP returns the real client IP for a request, optionally trusting a +// single reverse-proxy address. +// +// Security (DEF-03): X-Forwarded-For and X-Real-IP headers can be forged by +// any client. This function only honours them when the immediate TCP peer +// (r.RemoteAddr) matches trustedProxy exactly. When trustedProxy is nil or +// the peer address does not match, r.RemoteAddr is used unconditionally. +// +// This prevents IP-spoofing attacks: an attacker who sends a fake +// X-Forwarded-For header from their own connection still has their real IP +// used for rate limiting, because their RemoteAddr will not match the proxy. +// +// Only the first (leftmost) value in X-Forwarded-For is used, as that is the +// client-supplied address as appended by the outermost proxy. If neither +// header is present, RemoteAddr is used as a fallback even when the request +// comes from the proxy. +func ClientIP(r *http.Request, trustedProxy net.IP) string { + remoteHost, _, err := net.SplitHostPort(r.RemoteAddr) + if err != nil { + remoteHost = r.RemoteAddr + } + + if trustedProxy != nil { + remoteIP := net.ParseIP(remoteHost) + if remoteIP != nil && remoteIP.Equal(trustedProxy) { + // Request is from the trusted proxy; extract the real client IP. + // Prefer X-Real-IP (single value) over X-Forwarded-For (may be a + // comma-separated list when multiple proxies are chained). + if xri := r.Header.Get("X-Real-IP"); xri != "" { + if ip := net.ParseIP(strings.TrimSpace(xri)); ip != nil { + return ip.String() + } + } + if xff := r.Header.Get("X-Forwarded-For"); xff != "" { + // Take the first (leftmost) address — the original client. + first, _, _ := strings.Cut(xff, ",") + if ip := net.ParseIP(strings.TrimSpace(first)); ip != nil { + return ip.String() + } + } + } + } + + return remoteHost +} + // RateLimit returns middleware implementing a per-IP token bucket. // rps is the sustained request rate (tokens refilled per second). // burst is the maximum burst size (initial and maximum token count). +// trustedProxy, if non-nil, enables proxy-aware client IP extraction via +// ClientIP; pass nil when not running behind a reverse proxy. // -// Security: Rate limiting is applied at the IP level. In production, the -// server should be behind a reverse proxy that sets X-Forwarded-For; this -// middleware uses RemoteAddr directly which may be the proxy IP. For single- -// instance deployment without a proxy, RemoteAddr is the client IP. -func RateLimit(rps float64, burst int) func(http.Handler) http.Handler { +// Security (DEF-03): when trustedProxy is set, real client IPs are extracted +// from X-Forwarded-For/X-Real-IP headers but only for requests whose +// RemoteAddr matches the trusted proxy, preventing IP-spoofing. +func RateLimit(rps float64, burst int, trustedProxy net.IP) func(http.Handler) http.Handler { limiter := &ipRateLimiter{ rps: rps, burst: float64(burst), @@ -197,10 +244,7 @@ func RateLimit(rps float64, burst int) func(http.Handler) http.Handler { return func(next http.Handler) http.Handler { return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - ip, _, err := net.SplitHostPort(r.RemoteAddr) - if err != nil { - ip = r.RemoteAddr - } + ip := ClientIP(r, trustedProxy) if !limiter.allow(ip) { w.Header().Set("Retry-After", "60") diff --git a/internal/middleware/middleware_test.go b/internal/middleware/middleware_test.go index 36714cd..3b69c98 100644 --- a/internal/middleware/middleware_test.go +++ b/internal/middleware/middleware_test.go @@ -6,6 +6,7 @@ import ( "crypto/ed25519" "crypto/rand" "log/slog" + "net" "net/http" "net/http/httptest" "testing" @@ -271,7 +272,7 @@ func TestRequireRoleNoClaims(t *testing.T) { } func TestRateLimitAllows(t *testing.T) { - handler := RateLimit(10, 5)(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + handler := RateLimit(10, 5, nil)(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { w.WriteHeader(http.StatusOK) })) @@ -289,7 +290,7 @@ func TestRateLimitAllows(t *testing.T) { } func TestRateLimitBlocks(t *testing.T) { - handler := RateLimit(0.1, 2)(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + handler := RateLimit(0.1, 2, nil)(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { w.WriteHeader(http.StatusOK) })) @@ -340,3 +341,124 @@ func TestExtractBearerToken(t *testing.T) { }) } } + +// TestClientIP verifies the proxy-aware IP extraction logic. +func TestClientIP(t *testing.T) { + proxy := net.ParseIP("10.0.0.1") + + tests := []struct { + name string + remoteAddr string + xForwardedFor string + xRealIP string + trustedProxy net.IP + want string + }{ + { + name: "no proxy configured: uses RemoteAddr", + remoteAddr: "203.0.113.5:54321", + want: "203.0.113.5", + }, + { + name: "proxy configured but request not from proxy: uses RemoteAddr", + remoteAddr: "198.51.100.9:12345", + xForwardedFor: "203.0.113.99", + trustedProxy: proxy, + want: "198.51.100.9", + }, + { + name: "request from trusted proxy with X-Real-IP: uses X-Real-IP", + remoteAddr: "10.0.0.1:8080", + xRealIP: "203.0.113.42", + trustedProxy: proxy, + want: "203.0.113.42", + }, + { + name: "request from trusted proxy with X-Forwarded-For: uses first entry", + remoteAddr: "10.0.0.1:8080", + xForwardedFor: "203.0.113.77, 10.0.0.2", + trustedProxy: proxy, + want: "203.0.113.77", + }, + { + name: "X-Real-IP takes precedence over X-Forwarded-For", + remoteAddr: "10.0.0.1:8080", + xRealIP: "203.0.113.11", + xForwardedFor: "203.0.113.22", + trustedProxy: proxy, + want: "203.0.113.11", + }, + { + name: "proxy request with invalid X-Real-IP falls back to X-Forwarded-For", + remoteAddr: "10.0.0.1:8080", + xRealIP: "not-an-ip", + xForwardedFor: "203.0.113.55", + trustedProxy: proxy, + want: "203.0.113.55", + }, + { + name: "proxy request with no forwarding headers falls back to RemoteAddr host", + remoteAddr: "10.0.0.1:8080", + trustedProxy: proxy, + want: "10.0.0.1", + }, + { + // Security: attacker fakes X-Forwarded-For but connects directly. + name: "spoofed X-Forwarded-For from non-proxy IP is ignored", + remoteAddr: "198.51.100.99:9999", + xForwardedFor: "127.0.0.1", + trustedProxy: proxy, + want: "198.51.100.99", + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + req := httptest.NewRequest(http.MethodGet, "/", nil) + req.RemoteAddr = tc.remoteAddr + if tc.xForwardedFor != "" { + req.Header.Set("X-Forwarded-For", tc.xForwardedFor) + } + if tc.xRealIP != "" { + req.Header.Set("X-Real-IP", tc.xRealIP) + } + got := ClientIP(req, tc.trustedProxy) + if got != tc.want { + t.Errorf("ClientIP = %q, want %q", got, tc.want) + } + }) + } +} + +// TestRateLimitTrustedProxy verifies that rate limiting uses the forwarded IP +// when the request originates from a trusted proxy. +func TestRateLimitTrustedProxy(t *testing.T) { + proxy := net.ParseIP("10.0.0.1") + // Very low rps and burst=1 so any two requests from the same IP are blocked. + handler := RateLimit(0.001, 1, proxy)(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusOK) + })) + + // Two requests from the same real client IP, forwarded by the proxy. + // Both carry the same X-Real-IP; the second should be rate-limited. + for i, wantStatus := range []int{http.StatusOK, http.StatusTooManyRequests} { + req := httptest.NewRequest(http.MethodPost, "/v1/auth/login", nil) + req.RemoteAddr = "10.0.0.1:5000" // from the trusted proxy + req.Header.Set("X-Real-IP", "203.0.113.5") + rr := httptest.NewRecorder() + handler.ServeHTTP(rr, req) + if rr.Code != wantStatus { + t.Errorf("request %d: status = %d, want %d", i+1, rr.Code, wantStatus) + } + } + + // A different real client (different X-Real-IP) should still be allowed. + req := httptest.NewRequest(http.MethodPost, "/v1/auth/login", nil) + req.RemoteAddr = "10.0.0.1:5001" + req.Header.Set("X-Real-IP", "203.0.113.99") + rr := httptest.NewRecorder() + handler.ServeHTTP(rr, req) + if rr.Code != http.StatusOK { + t.Errorf("distinct client: status = %d, want 200 (separate bucket)", rr.Code) + } +} diff --git a/internal/model/model.go b/internal/model/model.go index 841a9fb..5214c0b 100644 --- a/internal/model/model.go +++ b/internal/model/model.go @@ -2,7 +2,10 @@ // These are pure data definitions with no external dependencies. package model -import "time" +import ( + "fmt" + "time" +) // AccountType distinguishes human interactive accounts from non-interactive // service accounts. @@ -43,6 +46,33 @@ type Account struct { TOTPRequired bool `json:"totp_required"` } +// Allowlisted role names (DEF-10). +// Only these strings may be stored in account_roles. Extending the set of +// valid roles requires a code change, ensuring that typos such as "admim" +// are caught at grant time rather than silently creating a useless role. +const ( + RoleAdmin = "admin" + RoleUser = "user" +) + +// allowedRoles is the compile-time set of recognised role names. +var allowedRoles = map[string]struct{}{ + RoleAdmin: {}, + RoleUser: {}, +} + +// ValidateRole returns nil if role is an allowlisted role name, or an error +// describing the problem. Call this before writing to account_roles. +// +// Security (DEF-10): prevents admins from accidentally creating unmatchable +// roles (e.g. "admim") by enforcing a compile-time allowlist. +func ValidateRole(role string) error { + if _, ok := allowedRoles[role]; !ok { + return fmt.Errorf("model: unknown role %q; allowed roles: admin, user", role) + } + return nil +} + // Role is a string label assigned to an account to grant permissions. type Role struct { GrantedAt time.Time `json:"granted_at"` diff --git a/internal/server/server.go b/internal/server/server.go index 149949b..b3fe128 100644 --- a/internal/server/server.go +++ b/internal/server/server.go @@ -16,6 +16,7 @@ import ( "fmt" "io/fs" "log/slog" + "net" "net/http" "git.wntrmute.dev/kyle/mcias/internal/auth" @@ -56,10 +57,19 @@ func New(database *db.DB, cfg *config.Config, priv ed25519.PrivateKey, pub ed255 func (s *Server) Handler() http.Handler { mux := http.NewServeMux() + // Security (DEF-03): parse the optional trusted-proxy address once here + // so RateLimit and audit-log helpers use consistent IP extraction. + // net.ParseIP returns nil for an empty string, which disables proxy + // trust and falls back to r.RemoteAddr. + var trustedProxy net.IP + if s.cfg.Server.TrustedProxy != "" { + trustedProxy = net.ParseIP(s.cfg.Server.TrustedProxy) + } + // Security: per-IP rate limiting on public auth endpoints to prevent // brute-force login attempts and token-validation abuse. Parameters match // the gRPC rate limiter (10 req/s sustained, burst 10). - loginRateLimit := middleware.RateLimit(10, 10) + loginRateLimit := middleware.RateLimit(10, 10, trustedProxy) // Public endpoints (no authentication required). mux.HandleFunc("GET /v1/health", s.handleHealth) @@ -82,16 +92,20 @@ func (s *Server) Handler() http.Handler { if err != nil { panic(fmt.Sprintf("server: read openapi.yaml: %v", err)) } - mux.HandleFunc("GET /docs", func(w http.ResponseWriter, _ *http.Request) { + // Security (DEF-09): apply defensive HTTP headers to the docs handlers. + // The Swagger UI page at /docs loads JavaScript from the same origin + // and renders untrusted content (API descriptions), so it benefits from + // CSP, X-Frame-Options, and the other headers applied to the UI sub-mux. + mux.Handle("GET /docs", docsSecurityHeaders(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { w.Header().Set("Content-Type", "text/html; charset=utf-8") w.WriteHeader(http.StatusOK) _, _ = w.Write(docsHTML) - }) - mux.HandleFunc("GET /docs/openapi.yaml", func(w http.ResponseWriter, _ *http.Request) { + }))) + mux.Handle("GET /docs/openapi.yaml", docsSecurityHeaders(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { w.Header().Set("Content-Type", "application/yaml") w.WriteHeader(http.StatusOK) _, _ = w.Write(specYAML) - }) + }))) // Authenticated endpoints. requireAuth := middleware.RequireAuth(s.pubKey, s.db, s.cfg.Tokens.Issuer) @@ -251,13 +265,21 @@ func (s *Server) handleLogin(w http.ResponseWriter, r *http.Request) { middleware.WriteError(w, http.StatusInternalServerError, "internal error", "internal_error") return } - valid, err := auth.ValidateTOTP(secret, req.TOTPCode) + valid, totpCounter, err := auth.ValidateTOTP(secret, req.TOTPCode) if err != nil || !valid { s.writeAudit(r, model.EventLoginTOTPFail, &acct.ID, nil, `{"reason":"wrong_totp"}`) _ = s.db.RecordLoginFailure(acct.ID) middleware.WriteError(w, http.StatusUnauthorized, "invalid credentials", "unauthorized") return } + // Security (CRIT-01): reject replay of a code already used within + // its ±30-second validity window. + if err := s.db.CheckAndUpdateTOTPCounter(acct.ID, totpCounter); err != nil { + s.writeAudit(r, model.EventLoginTOTPFail, &acct.ID, nil, `{"reason":"totp_replay"}`) + _ = s.db.RecordLoginFailure(acct.ID) + middleware.WriteError(w, http.StatusUnauthorized, "invalid credentials", "unauthorized") + return + } } // Login succeeded: clear any outstanding failure counter. @@ -764,11 +786,18 @@ func (s *Server) handleTOTPConfirm(w http.ResponseWriter, r *http.Request) { return } - valid, err := auth.ValidateTOTP(secret, req.Code) + valid, totpCounter, err := auth.ValidateTOTP(secret, req.Code) if err != nil || !valid { middleware.WriteError(w, http.StatusUnauthorized, "invalid TOTP code", "unauthorized") return } + // Security (CRIT-01): record the counter even during enrollment + // confirmation so the same code cannot be replayed immediately after + // confirming. + if err := s.db.CheckAndUpdateTOTPCounter(acct.ID, totpCounter); err != nil { + middleware.WriteError(w, http.StatusUnauthorized, "invalid TOTP code", "unauthorized") + return + } // Mark TOTP as confirmed and required. if err := s.db.SetTOTP(acct.ID, acct.TOTPSecretEnc, acct.TOTPSecretNonce); err != nil { @@ -1149,8 +1178,14 @@ func (s *Server) loadAccount(w http.ResponseWriter, r *http.Request) (*model.Acc } // writeAudit appends an audit log entry, logging errors but not failing the request. +// The logged IP honours the trusted-proxy setting so the real client address +// is recorded rather than the proxy's address (DEF-03). func (s *Server) writeAudit(r *http.Request, eventType string, actorID, targetID *int64, details string) { - ip := r.RemoteAddr + var proxyIP net.IP + if s.cfg.Server.TrustedProxy != "" { + proxyIP = net.ParseIP(s.cfg.Server.TrustedProxy) + } + ip := middleware.ClientIP(r, proxyIP) if err := s.db.WriteAuditEvent(eventType, actorID, targetID, ip, details); err != nil { s.logger.Error("write audit event", "error", err, "event_type", eventType) } @@ -1191,6 +1226,25 @@ func extractBearerFromRequest(r *http.Request) (string, error) { return auth[len(prefix):], nil } +// docsSecurityHeaders adds the same defensive HTTP headers as the UI sub-mux +// to the /docs and /docs/openapi.yaml endpoints. +// +// Security (DEF-09): without these headers the Swagger UI HTML page is +// served without CSP, X-Frame-Options, or HSTS, leaving it susceptible +// to clickjacking and MIME-type confusion in browsers. +func docsSecurityHeaders(next http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + h := w.Header() + h.Set("Content-Security-Policy", + "default-src 'self'; script-src 'self' 'unsafe-inline'; style-src 'self' 'unsafe-inline'; img-src 'self' data:; font-src 'self'") + h.Set("X-Content-Type-Options", "nosniff") + h.Set("X-Frame-Options", "DENY") + h.Set("Strict-Transport-Security", "max-age=63072000; includeSubDomains") + h.Set("Referrer-Policy", "no-referrer") + next.ServeHTTP(w, r) + }) +} + // encodeBase64URL encodes bytes as base64url without padding. func encodeBase64URL(b []byte) string { const table = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_" diff --git a/internal/server/server_test.go b/internal/server/server_test.go index 3200d0b..f7325fe 100644 --- a/internal/server/server_test.go +++ b/internal/server/server_test.go @@ -376,7 +376,7 @@ func TestSetAndGetRoles(t *testing.T) { // Set roles. rr := doRequest(t, handler, "PUT", "/v1/accounts/"+target.UUID+"/roles", map[string][]string{ - "roles": {"reader", "writer"}, + "roles": {"admin", "user"}, }, adminToken) if rr.Code != http.StatusNoContent { t.Errorf("set roles status = %d, want 204; body: %s", rr.Code, rr.Body.String()) diff --git a/internal/token/token.go b/internal/token/token.go index ecaa053..a254254 100644 --- a/internal/token/token.go +++ b/internal/token/token.go @@ -70,11 +70,16 @@ func IssueToken(key ed25519.PrivateKey, issuer, subject string, roles []string, exp := now.Add(expiry) jti := uuid.New().String() + // Security (DEF-04): set NotBefore = now so tokens are not valid before + // the instant of issuance. This is a defence-in-depth measure: without + // nbf, a clock-skewed client or intermediate could present a token + // before its intended validity window. jc := jwtClaims{ RegisteredClaims: jwt.RegisteredClaims{ Issuer: issuer, Subject: subject, IssuedAt: jwt.NewNumericDate(now), + NotBefore: jwt.NewNumericDate(now), ExpiresAt: jwt.NewNumericDate(exp), ID: jti, }, @@ -127,6 +132,9 @@ func ValidateToken(key ed25519.PublicKey, tokenString, expectedIssuer string) (* jwt.WithIssuedAt(), jwt.WithIssuer(expectedIssuer), jwt.WithExpirationRequired(), + // Security (DEF-04): nbf is validated automatically by the library + // when the claim is present; no explicit option is needed. If nbf is + // in the future the library returns ErrTokenNotValidYet. ) if err != nil { // Map library errors to our typed errors for consistent handling. diff --git a/internal/ui/handlers_auth.go b/internal/ui/handlers_auth.go index 0fec7f6..250341a 100644 --- a/internal/ui/handlers_auth.go +++ b/internal/ui/handlers_auth.go @@ -149,7 +149,7 @@ func (u *UIServer) handleTOTPStep(w http.ResponseWriter, r *http.Request) { u.render(w, "login", LoginData{Error: "internal error"}) return } - valid, err := auth.ValidateTOTP(secret, totpCode) + valid, totpCounter, err := auth.ValidateTOTP(secret, totpCode) if err != nil || !valid { u.writeAudit(r, model.EventLoginTOTPFail, &acct.ID, nil, `{"reason":"wrong_totp"}`) _ = u.db.RecordLoginFailure(acct.ID) @@ -166,6 +166,23 @@ func (u *UIServer) handleTOTPStep(w http.ResponseWriter, r *http.Request) { }) return } + // Security (CRIT-01): reject replay of a code already used within its + // ±30-second validity window. + if err := u.db.CheckAndUpdateTOTPCounter(acct.ID, totpCounter); err != nil { + u.writeAudit(r, model.EventLoginTOTPFail, &acct.ID, nil, `{"reason":"totp_replay"}`) + _ = u.db.RecordLoginFailure(acct.ID) + newNonce, nonceErr := u.issueTOTPNonce(acct.ID) + if nonceErr != nil { + u.render(w, "login", LoginData{Error: "internal error"}) + return + } + u.render(w, "totp_step", LoginData{ + Error: "invalid TOTP code", + Username: username, + Nonce: newNonce, + }) + return + } u.finishLogin(w, r, acct) } @@ -251,7 +268,7 @@ func (u *UIServer) handleLogout(w http.ResponseWriter, r *http.Request) { // writeAudit is a fire-and-forget audit log helper for the UI package. func (u *UIServer) writeAudit(r *http.Request, eventType string, actorID, targetID *int64, details string) { - ip := clientIP(r) + ip := u.clientIP(r) if err := u.db.WriteAuditEvent(eventType, actorID, targetID, ip, details); err != nil { u.logger.Warn("write audit event", "type", eventType, "error", err) } diff --git a/internal/ui/ui.go b/internal/ui/ui.go index ccdb179..e728d58 100644 --- a/internal/ui/ui.go +++ b/internal/ui/ui.go @@ -22,14 +22,15 @@ import ( "html/template" "io/fs" "log/slog" + "net" "net/http" - "strings" "sync" "time" "git.wntrmute.dev/kyle/mcias/internal/auth" "git.wntrmute.dev/kyle/mcias/internal/config" "git.wntrmute.dev/kyle/mcias/internal/db" + "git.wntrmute.dev/kyle/mcias/internal/middleware" "git.wntrmute.dev/kyle/mcias/internal/model" "git.wntrmute.dev/kyle/mcias/web" ) @@ -223,7 +224,7 @@ func New(database *db.DB, cfg *config.Config, priv ed25519.PrivateKey, pub ed255 tmpls[name] = clone } - return &UIServer{ + srv := &UIServer{ db: database, cfg: cfg, pubKey: pub, @@ -232,7 +233,33 @@ func New(database *db.DB, cfg *config.Config, priv ed25519.PrivateKey, pub ed255 logger: logger, csrf: csrf, tmpls: tmpls, - }, nil + } + + // Security (DEF-02): launch a background goroutine to evict expired TOTP + // nonces from pendingLogins. consumeTOTPNonce deletes entries on use, but + // entries abandoned by users who never complete step 2 would otherwise + // accumulate indefinitely, enabling a memory-exhaustion attack. + go srv.cleanupPendingLogins() + + return srv, nil +} + +// cleanupPendingLogins periodically evicts expired entries from pendingLogins. +// It runs every 5 minutes, which is well within the 90-second nonce TTL, so +// stale entries are removed before they can accumulate to any significant size. +func (u *UIServer) cleanupPendingLogins() { + ticker := time.NewTicker(5 * time.Minute) + defer ticker.Stop() + for range ticker.C { + now := time.Now() + u.pendingLogins.Range(func(key, value any) bool { + pl, ok := value.(*pendingLogin) + if !ok || now.After(pl.expiresAt) { + u.pendingLogins.Delete(key) + } + return true + }) + } } // Register attaches all UI routes to mux, wrapped with security headers. @@ -259,9 +286,18 @@ func (u *UIServer) Register(mux *http.ServeMux) { http.NotFound(w, r) }) + // Security (DEF-01, DEF-03): apply the same per-IP rate limit as the REST + // /v1/auth/login endpoint, using the same proxy-aware IP extraction so + // the rate limit is applied to real client IPs behind a reverse proxy. + var trustedProxy net.IP + if u.cfg.Server.TrustedProxy != "" { + trustedProxy = net.ParseIP(u.cfg.Server.TrustedProxy) + } + loginRateLimit := middleware.RateLimit(10, 10, trustedProxy) + // Auth routes (no session required). uiMux.HandleFunc("GET /login", u.handleLoginPage) - uiMux.HandleFunc("POST /login", u.handleLoginPost) + uiMux.Handle("POST /login", loginRateLimit(http.HandlerFunc(u.handleLoginPost))) uiMux.HandleFunc("POST /logout", u.handleLogout) // Protected routes. @@ -498,13 +534,15 @@ func securityHeaders(next http.Handler) http.Handler { }) } -// clientIP extracts the client IP from RemoteAddr (best effort). -func clientIP(r *http.Request) string { - addr := r.RemoteAddr - if idx := strings.LastIndex(addr, ":"); idx != -1 { - return addr[:idx] +// clientIP returns the real client IP for the request, respecting the +// server's trusted-proxy setting (DEF-03). Delegates to middleware.ClientIP +// so the same extraction logic is used for rate limiting and audit logging. +func (u *UIServer) clientIP(r *http.Request) string { + var proxyIP net.IP + if u.cfg.Server.TrustedProxy != "" { + proxyIP = net.ParseIP(u.cfg.Server.TrustedProxy) } - return addr + return middleware.ClientIP(r, proxyIP) } // actorName resolves the username of the currently authenticated user from the diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go index 48884f7..3bd2f87 100644 --- a/test/e2e/e2e_test.go +++ b/test/e2e/e2e_test.go @@ -303,7 +303,7 @@ func TestE2EAdminAccountManagement(t *testing.T) { // Set roles. resp3 := e.do(t, "PUT", "/v1/accounts/"+carolUUID+"/roles", map[string][]string{ - "roles": {"reader"}, + "roles": {"user"}, }, adminToken) mustStatus(t, resp3, http.StatusNoContent) _ = resp3.Body.Close() @@ -315,8 +315,8 @@ func TestE2EAdminAccountManagement(t *testing.T) { Roles []string `json:"roles"` } decodeJSON(t, resp4, &rolesResp) - if len(rolesResp.Roles) != 1 || rolesResp.Roles[0] != "reader" { - t.Errorf("roles = %v, want [reader]", rolesResp.Roles) + if len(rolesResp.Roles) != 1 || rolesResp.Roles[0] != "user" { + t.Errorf("roles = %v, want [user]", rolesResp.Roles) } // Delete account. diff --git a/web/static/mcias.js b/web/static/mcias.js new file mode 100644 index 0000000..af5937d --- /dev/null +++ b/web/static/mcias.js @@ -0,0 +1,26 @@ +// mcias.js — HTMX event wiring for the MCIAS web UI. + +// Show server error responses in the global #htmx-error-banner. +// +// HTMX 2.x fires htmx:responseError for 4xx/5xx responses and does not swap +// the body into the target by default. The server's renderError() always +// returns a
fragment whose message is +// HTML-escaped server-side, so setting innerHTML here is safe. +document.body.addEventListener('htmx:responseError', function (evt) { + var banner = document.getElementById('htmx-error-banner'); + if (!banner) { return; } + var body = (evt.detail.xhr && evt.detail.xhr.responseText) || 'An unexpected error occurred.'; + banner.innerHTML = body; + banner.style.display = ''; + banner.scrollIntoView({ behavior: 'instant', block: 'nearest' }); +}); + +// Clear the error banner whenever a successful HTMX swap completes so +// stale errors do not persist after the user corrects their input. +document.body.addEventListener('htmx:afterSwap', function () { + var banner = document.getElementById('htmx-error-banner'); + if (banner) { + banner.style.display = 'none'; + banner.innerHTML = ''; + } +}); diff --git a/web/templates/base.html b/web/templates/base.html index d4c3af5..f6bbc7b 100644 --- a/web/templates/base.html +++ b/web/templates/base.html @@ -23,12 +23,14 @@
+ {{if .Error}}{{end}} {{if .Flash}}
{{.Flash}}
{{end}} {{block "content" .}}{{end}}
+ {{end}}