From b1b52000c45c65ab09506a76f7dbd6937882eef6 Mon Sep 17 00:00:00 2001 From: Kyle Isom Date: Sun, 15 Mar 2026 00:39:41 -0700 Subject: [PATCH] Sync docs and fix flaky renewal e2e test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - ARCHITECTURE.md: add Vault Endpoints section, /unseal UI page, vault_sealed/vault_unsealed audit events, sealed interceptor in gRPC chain - openapi.yaml: add /v1/vault/{status,unseal,seal} endpoints, update /v1/health sealed-state docs, add VaultSealed response component, add vault audit event types and Admin — Vault tag - web/static/openapi.yaml: kept in sync with root - test/e2e: increase renewal test token lifetime from 2s to 10s (sleep 6s) to eliminate race between token expiry and HTTP round-trip Co-Authored-By: Claude Opus 4.6 --- ARCHITECTURE.md | 22 ++++++- openapi.yaml | 141 ++++++++++++++++++++++++++++++++++++++++++- test/e2e/e2e_test.go | 10 +-- 3 files changed, 164 insertions(+), 9 deletions(-) diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md index 019f8fe..8f2cee6 100644 --- a/ARCHITECTURE.md +++ b/ARCHITECTURE.md @@ -431,11 +431,23 @@ All endpoints use JSON request/response bodies. All responses include a |---|---|---|---| | GET | `/v1/audit` | admin JWT | List audit log events | +### Vault Endpoints + +| Method | Path | Auth required | Description | +|---|---|---|---| +| GET | `/v1/vault/status` | none | Returns `{"sealed": bool}`; always accessible | +| POST | `/v1/vault/unseal` | none | Accept passphrase, derive key, unseal (rate-limited 3/s burst 5) | +| POST | `/v1/vault/seal` | admin JWT | Zero key material and seal the vault; invalidates all JWTs | + +When the vault is sealed, all endpoints except health, vault status, and unseal +return 503 with `{"error":"vault is sealed","code":"vault_sealed"}`. The UI +redirects non-exempt paths to `/unseal`. + ### Admin / Server Endpoints | Method | Path | Auth required | Description | |---|---|---|---| -| GET | `/v1/health` | none | Health check | +| GET | `/v1/health` | none | Health check — returns `{"status":"ok"}` or `{"status":"sealed"}` | | GET | `/v1/keys/public` | none | Ed25519 public key (JWK format) | ### Web Management UI @@ -458,6 +470,7 @@ cookie pattern (`mcias_csrf`). | Path | Description | |---|---| +| `/unseal` | Passphrase form to unseal the vault; shown for all paths when sealed | | `/login` | Username/password login with optional TOTP step | | `/` | Dashboard (account summary) | | `/accounts` | Account list | @@ -797,6 +810,8 @@ The `cmd/` packages are thin wrappers that wire dependencies and call into | `policy_rule_updated` | Policy rule updated (priority, enabled, description) | | `policy_rule_deleted` | Policy rule deleted | | `policy_deny` | Policy engine denied a request (logged for every explicit deny) | +| `vault_unsealed` | Vault unsealed via REST API or web UI; details include `source` (api\|ui) and `ip` | +| `vault_sealed` | Vault sealed via REST API; details include actor ID, `source`, and `ip` | --- @@ -1010,9 +1025,12 @@ details. ### Interceptor Chain ``` -[Request Logger] → [Auth Interceptor] → [Rate Limiter] → [Handler] +[Sealed Interceptor] → [Request Logger] → [Auth Interceptor] → [Rate Limiter] → [Handler] ``` +- **Sealed Interceptor**: first in chain; blocks all RPCs with + `codes.Unavailable` ("vault sealed") when the vault is sealed, except + `AdminService/Health` which returns the sealed status. - **Request Logger**: logs method, peer IP, status code, duration; never logs the `authorization` metadata value. - **Auth Interceptor**: validates Bearer JWT, injects claims. Public RPCs diff --git a/openapi.yaml b/openapi.yaml index 4fb3ef3..2d6dbce 100644 --- a/openapi.yaml +++ b/openapi.yaml @@ -307,6 +307,18 @@ components: error: rate limit exceeded code: rate_limited + VaultSealed: + description: | + The vault is sealed. The server is running but has no key material. + Unseal via `POST /v1/vault/unseal` before retrying. + content: + application/json: + schema: + $ref: "#/components/schemas/Error" + example: + error: vault is sealed + code: vault_sealed + paths: # ── Public ──────────────────────────────────────────────────────────────── @@ -314,12 +326,17 @@ paths: /v1/health: get: summary: Health check - description: Returns `{"status":"ok"}` if the server is running. No auth required. + description: | + Returns server health status. Always returns HTTP 200, even when the + vault is sealed. No auth required. + + When the vault is sealed, `status` is `"sealed"` and most other + endpoints return 503. When healthy, `status` is `"ok"`. operationId: getHealth tags: [Public] responses: "200": - description: Server is healthy. + description: Server is running (check `status` for sealed state). content: application/json: schema: @@ -327,6 +344,7 @@ paths: properties: status: type: string + enum: [ok, sealed] example: ok /v1/keys/public: @@ -369,6 +387,121 @@ paths: description: Base64url-encoded public key bytes. example: 11qYAYKxCrfVS_7TyWQHOg7hcvPapiMlrwIaaPcHURo + /v1/vault/status: + get: + summary: Vault seal status + description: | + Returns whether the vault is currently sealed. Always accessible, + even when sealed. No auth required. + + Clients should poll this after startup or after a 503 `vault_sealed` + response to determine when to attempt an unseal. + operationId: getVaultStatus + tags: [Public] + responses: + "200": + description: Current vault seal state. + content: + application/json: + schema: + type: object + required: [sealed] + properties: + sealed: + type: boolean + example: false + + /v1/vault/unseal: + post: + summary: Unseal the vault + description: | + Provide the master passphrase to derive the encryption key, decrypt + the Ed25519 signing key, and unseal the vault. Once unsealed, all + other endpoints become available. + + Rate limited to 3 requests per second per IP (burst 5) to limit + brute-force attempts against the passphrase. + + The passphrase is never logged. A generic `"unseal failed"` error + is returned for any failure (wrong passphrase, vault already unsealed + mid-flight, etc.) to avoid leaking information. + operationId: unsealVault + tags: [Public] + requestBody: + required: true + content: + application/json: + schema: + type: object + required: [passphrase] + properties: + passphrase: + type: string + description: Master passphrase used to derive the encryption key. + example: correct-horse-battery-staple + responses: + "200": + description: Vault unsealed (or was already unsealed). + content: + application/json: + schema: + type: object + properties: + status: + type: string + enum: [unsealed, already unsealed] + example: unsealed + "400": + $ref: "#/components/responses/BadRequest" + "401": + description: Wrong passphrase or key decryption failure. + content: + application/json: + schema: + $ref: "#/components/schemas/Error" + example: + error: unseal failed + code: unauthorized + "429": + $ref: "#/components/responses/RateLimited" + + /v1/vault/seal: + post: + summary: Seal the vault (admin) + description: | + Zero all key material in memory and transition the server to the + sealed state. After this call: + + - All subsequent requests (except health, vault status, and unseal) + return 503 `vault_sealed`. + - The caller's own JWT is immediately invalidated because the public + key needed to verify it is no longer held in memory. + - The server can be unsealed again via `POST /v1/vault/unseal`. + + This is an emergency operation. Use it to protect key material if a + compromise is suspected. It does **not** restart the server or wipe + the database. + operationId: sealVault + tags: [Admin — Vault] + security: + - bearerAuth: [] + responses: + "200": + description: Vault sealed (or was already sealed). + content: + application/json: + schema: + type: object + properties: + status: + type: string + enum: [sealed, already sealed] + example: sealed + "401": + $ref: "#/components/responses/Unauthorized" + "403": + $ref: "#/components/responses/Forbidden" + /v1/auth/login: post: summary: Login @@ -1148,7 +1281,7 @@ paths: `pgcred_accessed`, `pgcred_updated`, `pgcred_access_granted`, `pgcred_access_revoked`, `tag_added`, `tag_removed`, `policy_rule_created`, `policy_rule_updated`, `policy_rule_deleted`, - `policy_deny`. + `policy_deny`, `vault_sealed`, `vault_unsealed`. operationId: listAudit tags: [Admin — Audit] security: @@ -1530,3 +1663,5 @@ tags: description: Requires admin role. - name: Admin — Policy description: Requires admin role. Manage policy rules and account tags. + - name: Admin — Vault + description: Requires admin role. Emergency vault seal operation. diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go index 7859b16..820e8d2 100644 --- a/test/e2e/e2e_test.go +++ b/test/e2e/e2e_test.go @@ -227,9 +227,11 @@ func TestE2ETokenRenewal(t *testing.T) { e := newTestEnv(t) acct := e.createAccount(t, "bob") - // Issue a short-lived token (2s) directly so we can wait past the 50% + // Issue a short-lived token (10s) directly so we can wait past the 50% // renewal threshold (SEC-03) without blocking the test for minutes. - oldToken, claims, err := token.IssueToken(e.privKey, e2eIssuer, acct.UUID, nil, 2*time.Second) + // 10s gives ample headroom: we sleep 6s (>50%), leaving 4s for the HTTP + // round-trip before expiry — eliminating the race that plagued the 2s token. + oldToken, claims, err := token.IssueToken(e.privKey, e2eIssuer, acct.UUID, nil, 10*time.Second) if err != nil { t.Fatalf("IssueToken: %v", err) } @@ -237,8 +239,8 @@ func TestE2ETokenRenewal(t *testing.T) { t.Fatalf("TrackToken: %v", err) } - // Wait for >50% of the 2s lifetime to elapse. - time.Sleep(1100 * time.Millisecond) + // Wait for >50% of the 10s lifetime to elapse. + time.Sleep(6 * time.Second) // Renew. resp2 := e.do(t, "POST", "/v1/auth/renew", nil, oldToken)