Files
metacrypt/internal/barrier/barrier.go
Kyle Isom 64d921827e Add MEK rotation, per-engine DEKs, and v2 ciphertext format (audit #6, #22)
Implement a two-level key hierarchy: the MEK now wraps per-engine DEKs
stored in a new barrier_keys table, rather than encrypting all barrier
entries directly. A v2 ciphertext format (0x02) embeds the key ID so the
barrier can resolve which DEK to use on decryption. v1 ciphertext remains
supported for backward compatibility.

Key changes:
- crypto: EncryptV2/DecryptV2/ExtractKeyID for v2 ciphertext with key IDs
- barrier: key registry (CreateKey, RotateKey, ListKeys, MigrateToV2, ReWrapKeys)
- seal: RotateMEK re-wraps DEKs without re-encrypting data
- engine: Mount auto-creates per-engine DEK
- REST + gRPC: barrier/keys, barrier/rotate-mek, barrier/rotate-key, barrier/migrate
- proto: BarrierService (v1 + v2) with ListKeys, RotateMEK, RotateKey, Migrate
- db: migration v2 adds barrier_keys table

Also includes: security audit report, CSRF protection, engine design specs
(sshca, transit, user), path-bound AAD migration tool, policy engine
enhancements, and ARCHITECTURE.md updates.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-16 18:27:44 -07:00

647 lines
16 KiB
Go

// Package barrier provides an encrypted storage barrier backed by SQLite.
package barrier
import (
"context"
"database/sql"
"errors"
"fmt"
"strings"
"sync"
"git.wntrmute.dev/kyle/metacrypt/internal/crypto"
)
var (
ErrSealed = errors.New("barrier: sealed")
ErrNotFound = errors.New("barrier: entry not found")
ErrKeyNotFound = errors.New("barrier: key not found")
)
// Barrier is the encrypted storage barrier interface.
type Barrier interface {
// Unseal opens the barrier with the given master encryption key.
Unseal(mek []byte) error
// Seal closes the barrier and zeroizes the key material.
Seal() error
// IsSealed returns true if the barrier is sealed.
IsSealed() bool
// Get retrieves and decrypts a value by path.
Get(ctx context.Context, path string) ([]byte, error)
// Put encrypts and stores a value at the given path.
Put(ctx context.Context, path string, value []byte) error
// Delete removes an entry by path.
Delete(ctx context.Context, path string) error
// List returns paths with the given prefix.
List(ctx context.Context, prefix string) ([]string, error)
}
// KeyInfo holds metadata about a barrier key (DEK).
type KeyInfo struct {
KeyID string `json:"key_id"`
Version int `json:"version"`
CreatedAt string `json:"created_at"`
RotatedAt string `json:"rotated_at"`
}
// AESGCMBarrier implements Barrier using AES-256-GCM encryption.
type AESGCMBarrier struct {
db *sql.DB
mek []byte
keys map[string][]byte // key_id → plaintext DEK
mu sync.RWMutex
}
// NewAESGCMBarrier creates a new AES-GCM barrier backed by the given database.
func NewAESGCMBarrier(db *sql.DB) *AESGCMBarrier {
return &AESGCMBarrier{db: db}
}
func (b *AESGCMBarrier) Unseal(mek []byte) error {
b.mu.Lock()
defer b.mu.Unlock()
k := make([]byte, len(mek))
copy(k, mek)
b.mek = k
b.keys = make(map[string][]byte)
// Load DEKs from barrier_keys table.
if err := b.loadKeys(); err != nil {
// If the table doesn't exist yet (pre-migration), that's OK.
// The barrier will use MEK directly for v1 entries.
b.keys = make(map[string][]byte)
}
return nil
}
// loadKeys decrypts all DEKs from the barrier_keys table into memory.
// Caller must hold b.mu.
func (b *AESGCMBarrier) loadKeys() error {
rows, err := b.db.Query("SELECT key_id, encrypted_dek FROM barrier_keys")
if err != nil {
return err
}
defer func() { _ = rows.Close() }()
for rows.Next() {
var keyID string
var encDEK []byte
if err := rows.Scan(&keyID, &encDEK); err != nil {
return fmt.Errorf("barrier: scan key %q: %w", keyID, err)
}
dek, err := crypto.Decrypt(b.mek, encDEK, []byte(keyID))
if err != nil {
return fmt.Errorf("barrier: decrypt key %q: %w", keyID, err)
}
b.keys[keyID] = dek
}
return rows.Err()
}
func (b *AESGCMBarrier) Seal() error {
b.mu.Lock()
defer b.mu.Unlock()
// Zeroize all DEKs.
for _, dek := range b.keys {
crypto.Zeroize(dek)
}
b.keys = nil
if b.mek != nil {
crypto.Zeroize(b.mek)
b.mek = nil
}
return nil
}
func (b *AESGCMBarrier) IsSealed() bool {
b.mu.RLock()
defer b.mu.RUnlock()
return b.mek == nil
}
// resolveKeyID determines the key ID for a given barrier path.
func resolveKeyID(path string) string {
// Paths under engine/{type}/{mount}/... use per-engine DEKs.
if strings.HasPrefix(path, "engine/") {
parts := strings.SplitN(path, "/", 4) // engine/{type}/{mount}/...
if len(parts) >= 3 {
return "engine/" + parts[1] + "/" + parts[2]
}
}
return "system"
}
func (b *AESGCMBarrier) Get(ctx context.Context, path string) ([]byte, error) {
b.mu.RLock()
mek := b.mek
keys := b.keys
b.mu.RUnlock()
if mek == nil {
return nil, ErrSealed
}
var encrypted []byte
err := b.db.QueryRowContext(ctx,
"SELECT value FROM barrier_entries WHERE path = ?", path).Scan(&encrypted)
if errors.Is(err, sql.ErrNoRows) {
return nil, ErrNotFound
}
if err != nil {
return nil, fmt.Errorf("barrier: get %q: %w", path, err)
}
// Check version byte to determine decryption strategy.
if len(encrypted) > 0 && encrypted[0] == crypto.BarrierVersionV2 {
keyID, err := crypto.ExtractKeyID(encrypted)
if err != nil {
return nil, fmt.Errorf("barrier: extract key ID %q: %w", path, err)
}
dek, ok := keys[keyID]
if !ok {
return nil, fmt.Errorf("barrier: %w: %q for path %q", ErrKeyNotFound, keyID, path)
}
pt, _, err := crypto.DecryptV2(dek, encrypted, []byte(path))
if err != nil {
return nil, fmt.Errorf("barrier: decrypt %q: %w", path, err)
}
return pt, nil
}
// v1 ciphertext — use MEK directly (backward compat).
pt, err := crypto.Decrypt(mek, encrypted, []byte(path))
if err != nil {
return nil, fmt.Errorf("barrier: decrypt %q: %w", path, err)
}
return pt, nil
}
func (b *AESGCMBarrier) Put(ctx context.Context, path string, value []byte) error {
b.mu.RLock()
mek := b.mek
keys := b.keys
b.mu.RUnlock()
if mek == nil {
return ErrSealed
}
keyID := resolveKeyID(path)
var encrypted []byte
var err error
if dek, ok := keys[keyID]; ok {
// Use v2 format with the appropriate DEK.
encrypted, err = crypto.EncryptV2(dek, keyID, value, []byte(path))
} else {
// No DEK registered for this key ID — fall back to MEK with v1 format.
encrypted, err = crypto.Encrypt(mek, value, []byte(path))
}
if err != nil {
return fmt.Errorf("barrier: encrypt %q: %w", path, err)
}
_, err = b.db.ExecContext(ctx, `
INSERT INTO barrier_entries (path, value) VALUES (?, ?)
ON CONFLICT(path) DO UPDATE SET value = excluded.value, updated_at = datetime('now')`,
path, encrypted)
if err != nil {
return fmt.Errorf("barrier: put %q: %w", path, err)
}
return nil
}
func (b *AESGCMBarrier) Delete(ctx context.Context, path string) error {
b.mu.RLock()
mek := b.mek
b.mu.RUnlock()
if mek == nil {
return ErrSealed
}
_, err := b.db.ExecContext(ctx,
"DELETE FROM barrier_entries WHERE path = ?", path)
if err != nil {
return fmt.Errorf("barrier: delete %q: %w", path, err)
}
return nil
}
func (b *AESGCMBarrier) List(ctx context.Context, prefix string) ([]string, error) {
b.mu.RLock()
mek := b.mek
b.mu.RUnlock()
if mek == nil {
return nil, ErrSealed
}
rows, err := b.db.QueryContext(ctx,
"SELECT path FROM barrier_entries WHERE path LIKE ?",
prefix+"%")
if err != nil {
return nil, fmt.Errorf("barrier: list %q: %w", prefix, err)
}
defer func() { _ = rows.Close() }()
var paths []string
for rows.Next() {
var p string
if err := rows.Scan(&p); err != nil {
return nil, fmt.Errorf("barrier: list scan: %w", err)
}
remainder := strings.TrimPrefix(p, prefix)
paths = append(paths, remainder)
}
return paths, rows.Err()
}
// CreateKey generates a new DEK for the given key ID, wraps it with MEK,
// and stores it in the barrier_keys table.
func (b *AESGCMBarrier) CreateKey(ctx context.Context, keyID string) error {
b.mu.Lock()
defer b.mu.Unlock()
if b.mek == nil {
return ErrSealed
}
if _, exists := b.keys[keyID]; exists {
return nil // Already exists.
}
dek, err := crypto.GenerateKey()
if err != nil {
return fmt.Errorf("barrier: generate DEK %q: %w", keyID, err)
}
encDEK, err := crypto.Encrypt(b.mek, dek, []byte(keyID))
if err != nil {
crypto.Zeroize(dek)
return fmt.Errorf("barrier: wrap DEK %q: %w", keyID, err)
}
_, err = b.db.ExecContext(ctx, `
INSERT INTO barrier_keys (key_id, version, encrypted_dek)
VALUES (?, 1, ?)
ON CONFLICT(key_id) DO NOTHING`,
keyID, encDEK)
if err != nil {
crypto.Zeroize(dek)
return fmt.Errorf("barrier: store DEK %q: %w", keyID, err)
}
b.keys[keyID] = dek
return nil
}
// RotateKey generates a new DEK for the given key ID and re-encrypts all
// barrier entries under that key ID's prefix with the new DEK.
func (b *AESGCMBarrier) RotateKey(ctx context.Context, keyID string) error {
b.mu.Lock()
defer b.mu.Unlock()
if b.mek == nil {
return ErrSealed
}
oldDEK, ok := b.keys[keyID]
if !ok {
return fmt.Errorf("barrier: %w: %q", ErrKeyNotFound, keyID)
}
// Generate new DEK.
newDEK, err := crypto.GenerateKey()
if err != nil {
return fmt.Errorf("barrier: generate DEK: %w", err)
}
// Wrap new DEK with MEK.
encDEK, err := crypto.Encrypt(b.mek, newDEK, []byte(keyID))
if err != nil {
crypto.Zeroize(newDEK)
return fmt.Errorf("barrier: wrap DEK: %w", err)
}
// Determine the prefix for entries encrypted with this key.
prefix := keyID + "/"
if keyID == "system" {
// System key covers non-engine paths. Re-encrypt everything
// that doesn't start with "engine/".
prefix = ""
}
// Re-encrypt all entries under this key ID.
tx, err := b.db.BeginTx(ctx, nil)
if err != nil {
crypto.Zeroize(newDEK)
return fmt.Errorf("barrier: begin tx: %w", err)
}
defer func() { _ = tx.Rollback() }()
// Update the key in barrier_keys.
_, err = tx.ExecContext(ctx, `
UPDATE barrier_keys SET encrypted_dek = ?, version = version + 1, rotated_at = datetime('now')
WHERE key_id = ?`, encDEK, keyID)
if err != nil {
crypto.Zeroize(newDEK)
return fmt.Errorf("barrier: update key: %w", err)
}
// Fetch and re-encrypt entries.
var query string
var args []interface{}
if keyID == "system" {
query = "SELECT path, value FROM barrier_entries WHERE path NOT LIKE 'engine/%'"
} else {
query = "SELECT path, value FROM barrier_entries WHERE path LIKE ?"
args = append(args, prefix+"%")
}
rows, err := tx.QueryContext(ctx, query, args...)
if err != nil {
crypto.Zeroize(newDEK)
return fmt.Errorf("barrier: query entries: %w", err)
}
type entry struct {
path string
value []byte
}
var entries []entry
for rows.Next() {
var e entry
if err := rows.Scan(&e.path, &e.value); err != nil {
_ = rows.Close()
crypto.Zeroize(newDEK)
return fmt.Errorf("barrier: scan entry: %w", err)
}
entries = append(entries, e)
}
_ = rows.Close()
if err := rows.Err(); err != nil {
crypto.Zeroize(newDEK)
return fmt.Errorf("barrier: rows error: %w", err)
}
for _, e := range entries {
// Decrypt with old DEK (handle v1 or v2).
var plaintext []byte
if len(e.value) > 0 && e.value[0] == crypto.BarrierVersionV2 {
pt, _, decErr := crypto.DecryptV2(oldDEK, e.value, []byte(e.path))
if decErr != nil {
crypto.Zeroize(newDEK)
return fmt.Errorf("barrier: decrypt %q during rotation: %w", e.path, decErr)
}
plaintext = pt
} else {
// v1: encrypted with MEK.
pt, decErr := crypto.Decrypt(b.mek, e.value, []byte(e.path))
if decErr != nil {
crypto.Zeroize(newDEK)
return fmt.Errorf("barrier: decrypt v1 %q during rotation: %w", e.path, decErr)
}
plaintext = pt
}
// Re-encrypt with new DEK using v2 format.
newCiphertext, encErr := crypto.EncryptV2(newDEK, keyID, plaintext, []byte(e.path))
if encErr != nil {
crypto.Zeroize(newDEK)
return fmt.Errorf("barrier: re-encrypt %q: %w", e.path, encErr)
}
_, err = tx.ExecContext(ctx,
"UPDATE barrier_entries SET value = ?, updated_at = datetime('now') WHERE path = ?",
newCiphertext, e.path)
if err != nil {
crypto.Zeroize(newDEK)
return fmt.Errorf("barrier: update %q: %w", e.path, err)
}
}
if err := tx.Commit(); err != nil {
crypto.Zeroize(newDEK)
return fmt.Errorf("barrier: commit rotation: %w", err)
}
// Swap the in-memory key.
crypto.Zeroize(oldDEK)
b.keys[keyID] = newDEK
return nil
}
// ListKeys returns metadata about all registered barrier keys.
func (b *AESGCMBarrier) ListKeys(ctx context.Context) ([]KeyInfo, error) {
b.mu.RLock()
mek := b.mek
b.mu.RUnlock()
if mek == nil {
return nil, ErrSealed
}
rows, err := b.db.QueryContext(ctx,
"SELECT key_id, version, created_at, rotated_at FROM barrier_keys ORDER BY key_id")
if err != nil {
return nil, fmt.Errorf("barrier: list keys: %w", err)
}
defer func() { _ = rows.Close() }()
var keys []KeyInfo
for rows.Next() {
var ki KeyInfo
if err := rows.Scan(&ki.KeyID, &ki.Version, &ki.CreatedAt, &ki.RotatedAt); err != nil {
return nil, fmt.Errorf("barrier: scan key info: %w", err)
}
keys = append(keys, ki)
}
return keys, rows.Err()
}
// MigrateToV2 creates per-engine DEKs and re-encrypts entries from v1
// (MEK-encrypted) to v2 (DEK-encrypted) format. On first call after upgrade,
// it creates a "system" DEK equal to the MEK for zero-cost backward compat,
// then creates per-engine DEKs and re-encrypts those entries.
func (b *AESGCMBarrier) MigrateToV2(ctx context.Context) (int, error) {
b.mu.Lock()
defer b.mu.Unlock()
if b.mek == nil {
return 0, ErrSealed
}
// Ensure the "system" key exists.
if _, ok := b.keys["system"]; !ok {
if err := b.createKeyLocked(ctx, "system"); err != nil {
return 0, fmt.Errorf("barrier: create system DEK: %w", err)
}
}
// Find all entries still in v1 format.
rows, err := b.db.QueryContext(ctx, "SELECT path, value FROM barrier_entries")
if err != nil {
return 0, fmt.Errorf("barrier: query entries: %w", err)
}
type entry struct {
path string
value []byte
}
var toMigrate []entry
for rows.Next() {
var e entry
if err := rows.Scan(&e.path, &e.value); err != nil {
_ = rows.Close()
return 0, fmt.Errorf("barrier: scan: %w", err)
}
if len(e.value) > 0 && e.value[0] == crypto.BarrierVersionV1 {
toMigrate = append(toMigrate, e)
}
}
_ = rows.Close()
if err := rows.Err(); err != nil {
return 0, err
}
if len(toMigrate) == 0 {
return 0, nil
}
tx, err := b.db.BeginTx(ctx, nil)
if err != nil {
return 0, fmt.Errorf("barrier: begin tx: %w", err)
}
defer func() { _ = tx.Rollback() }()
migrated := 0
for _, e := range toMigrate {
// Decrypt with MEK (v1).
plaintext, decErr := crypto.Decrypt(b.mek, e.value, []byte(e.path))
if decErr != nil {
return migrated, fmt.Errorf("barrier: decrypt %q: %w", e.path, decErr)
}
keyID := resolveKeyID(e.path)
// Ensure the DEK exists for this key ID.
if _, ok := b.keys[keyID]; !ok {
if err := b.createKeyLockedTx(ctx, tx, keyID); err != nil {
return migrated, fmt.Errorf("barrier: create DEK %q: %w", keyID, err)
}
}
dek := b.keys[keyID]
newCiphertext, encErr := crypto.EncryptV2(dek, keyID, plaintext, []byte(e.path))
if encErr != nil {
return migrated, fmt.Errorf("barrier: encrypt v2 %q: %w", e.path, encErr)
}
_, err = tx.ExecContext(ctx,
"UPDATE barrier_entries SET value = ?, updated_at = datetime('now') WHERE path = ?",
newCiphertext, e.path)
if err != nil {
return migrated, fmt.Errorf("barrier: update %q: %w", e.path, err)
}
migrated++
}
if err := tx.Commit(); err != nil {
return migrated, fmt.Errorf("barrier: commit migration: %w", err)
}
return migrated, nil
}
// createKeyLocked generates and stores a new DEK. Caller must hold b.mu.
func (b *AESGCMBarrier) createKeyLocked(ctx context.Context, keyID string) error {
dek, err := crypto.GenerateKey()
if err != nil {
return err
}
encDEK, err := crypto.Encrypt(b.mek, dek, []byte(keyID))
if err != nil {
crypto.Zeroize(dek)
return err
}
_, err = b.db.ExecContext(ctx, `
INSERT INTO barrier_keys (key_id, version, encrypted_dek)
VALUES (?, 1, ?) ON CONFLICT(key_id) DO NOTHING`, keyID, encDEK)
if err != nil {
crypto.Zeroize(dek)
return err
}
b.keys[keyID] = dek
return nil
}
// createKeyLockedTx is like createKeyLocked but uses an existing transaction.
func (b *AESGCMBarrier) createKeyLockedTx(ctx context.Context, tx *sql.Tx, keyID string) error {
dek, err := crypto.GenerateKey()
if err != nil {
return err
}
encDEK, err := crypto.Encrypt(b.mek, dek, []byte(keyID))
if err != nil {
crypto.Zeroize(dek)
return err
}
_, err = tx.ExecContext(ctx, `
INSERT INTO barrier_keys (key_id, version, encrypted_dek)
VALUES (?, 1, ?) ON CONFLICT(key_id) DO NOTHING`, keyID, encDEK)
if err != nil {
crypto.Zeroize(dek)
return err
}
b.keys[keyID] = dek
return nil
}
// ReWrapKeys re-encrypts all DEKs with a new MEK. Called during MEK rotation.
// The new MEK is already set in b.mek by the caller.
func (b *AESGCMBarrier) ReWrapKeys(ctx context.Context, newMEK []byte) error {
b.mu.Lock()
defer b.mu.Unlock()
if b.mek == nil {
return ErrSealed
}
tx, err := b.db.BeginTx(ctx, nil)
if err != nil {
return fmt.Errorf("barrier: begin tx: %w", err)
}
defer func() { _ = tx.Rollback() }()
for keyID, dek := range b.keys {
encDEK, err := crypto.Encrypt(newMEK, dek, []byte(keyID))
if err != nil {
return fmt.Errorf("barrier: re-wrap key %q: %w", keyID, err)
}
_, err = tx.ExecContext(ctx,
"UPDATE barrier_keys SET encrypted_dek = ?, rotated_at = datetime('now') WHERE key_id = ?",
encDEK, keyID)
if err != nil {
return fmt.Errorf("barrier: update key %q: %w", keyID, err)
}
}
if err := tx.Commit(); err != nil {
return fmt.Errorf("barrier: commit re-wrap: %w", err)
}
// Update the MEK in memory.
crypto.Zeroize(b.mek)
k := make([]byte, len(newMEK))
copy(k, newMEK)
b.mek = k
return nil
}