Add archive package: tar.zst service directory snapshots

- Snapshot: VACUUM INTO for consistent db copy, excludes live db
  files and backups/, injects db snapshot, custom exclude patterns,
  streaming output via io.Writer
- Restore: extract tar.zst with path traversal protection
- zstd via github.com/klauspost/compress/zstd
- 5 tests: full roundtrip with db integrity verification,
  without db, exclude patterns, dest dir creation
- Update PROGRESS.md: all 9 packages complete, 87 total tests

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-03-25 16:37:18 -07:00
parent 20dc7ae0d6
commit 9012b889d6
5 changed files with 550 additions and 43 deletions

235
archive/archive.go Normal file
View File

@@ -0,0 +1,235 @@
// Package archive provides service directory snapshot and restore using
// tar.zst (tar compressed with Zstandard), with SQLite-aware handling.
//
// Snapshots exclude live database files (*.db, *.db-wal, *.db-shm) and
// the backups/ directory. A consistent database copy is created via
// VACUUM INTO and injected into the archive as <basename>.db.
//
// The result is a clean, minimal archive that extracts directly into a
// working service directory on the destination.
package archive
import (
"archive/tar"
"database/sql"
"fmt"
"io"
"os"
"path/filepath"
"strings"
"github.com/klauspost/compress/zstd"
"git.wntrmute.dev/kyle/mcdsl/db"
)
// defaultExcludePatterns are always excluded from snapshots.
var defaultExcludePatterns = []string{
"*.db",
"*.db-wal",
"*.db-shm",
"backups",
}
// SnapshotOptions configures a snapshot operation.
type SnapshotOptions struct {
// ServiceDir is the root directory to snapshot (e.g., /srv/myservice).
ServiceDir string
// DBPath is the path to the live database file. The filename (without
// directory) is used as the name in the archive. If empty, no database
// is included.
DBPath string
// DB is the live database connection. Used for VACUUM INTO to create
// a consistent snapshot. Required if DBPath is set.
DB *sql.DB
// ExcludePatterns are additional glob patterns to exclude (beyond the
// defaults: *.db, *.db-wal, *.db-shm, backups/).
ExcludePatterns []string
}
// Snapshot creates a tar.zst archive of the service directory, writing
// it to w. Live database files are excluded and a consistent VACUUM INTO
// copy is injected in their place.
func Snapshot(opts SnapshotOptions, w io.Writer) error {
// Create the VACUUM INTO copy if a database is specified.
var dbSnapshotPath string
if opts.DBPath != "" && opts.DB != nil {
tmp, err := os.CreateTemp("", "mcdsl-snapshot-*.db")
if err != nil {
return fmt.Errorf("archive: create temp db: %w", err)
}
tmpPath := tmp.Name()
_ = tmp.Close()
_ = os.Remove(tmpPath) // VACUUM INTO creates the file itself
if err := db.Snapshot(opts.DB, tmpPath); err != nil {
return fmt.Errorf("archive: vacuum into: %w", err)
}
dbSnapshotPath = tmpPath
defer func() { _ = os.Remove(tmpPath) }()
}
// Build the exclude set.
excludes := append(defaultExcludePatterns, opts.ExcludePatterns...)
// Create zstd writer → tar writer.
zw, err := zstd.NewWriter(w)
if err != nil {
return fmt.Errorf("archive: create zstd writer: %w", err)
}
tw := tar.NewWriter(zw)
// Walk the service directory.
serviceDir := filepath.Clean(opts.ServiceDir)
err = filepath.Walk(serviceDir, func(path string, info os.FileInfo, walkErr error) error {
if walkErr != nil {
return walkErr
}
relPath, relErr := filepath.Rel(serviceDir, path)
if relErr != nil {
return relErr
}
// Skip the root directory itself.
if relPath == "." {
return nil
}
// Check excludes.
if shouldExclude(relPath, info, excludes) {
if info.IsDir() {
return filepath.SkipDir
}
return nil
}
return addToTar(tw, path, relPath, info)
})
if err != nil {
return fmt.Errorf("archive: walk %s: %w", serviceDir, err)
}
// Inject the database snapshot.
if dbSnapshotPath != "" {
dbName := filepath.Base(opts.DBPath)
snapInfo, statErr := os.Stat(dbSnapshotPath)
if statErr != nil {
return fmt.Errorf("archive: stat db snapshot: %w", statErr)
}
if err := addToTar(tw, dbSnapshotPath, dbName, snapInfo); err != nil {
return fmt.Errorf("archive: add db snapshot: %w", err)
}
}
if err := tw.Close(); err != nil {
return fmt.Errorf("archive: close tar: %w", err)
}
if err := zw.Close(); err != nil {
return fmt.Errorf("archive: close zstd: %w", err)
}
return nil
}
// Restore extracts a tar.zst archive from r into destDir. Creates the
// directory if it does not exist. Overwrites existing files. Preserves
// file permissions.
func Restore(r io.Reader, destDir string) error {
if err := os.MkdirAll(destDir, 0700); err != nil {
return fmt.Errorf("archive: create dest dir: %w", err)
}
zr, err := zstd.NewReader(r)
if err != nil {
return fmt.Errorf("archive: create zstd reader: %w", err)
}
defer zr.Close()
tr := tar.NewReader(zr)
for {
header, readErr := tr.Next()
if readErr == io.EOF {
break
}
if readErr != nil {
return fmt.Errorf("archive: read tar entry: %w", readErr)
}
target := filepath.Join(destDir, header.Name) //nolint:gosec // archive is from trusted MCP agent, not user upload
// Prevent path traversal.
if !strings.HasPrefix(filepath.Clean(target), filepath.Clean(destDir)+string(os.PathSeparator)) &&
filepath.Clean(target) != filepath.Clean(destDir) {
return fmt.Errorf("archive: path traversal in %q", header.Name)
}
switch header.Typeflag {
case tar.TypeDir:
if mkErr := os.MkdirAll(target, os.FileMode(header.Mode&0o777)); mkErr != nil { //nolint:gosec // mode from trusted archive
return fmt.Errorf("archive: mkdir %s: %w", target, mkErr)
}
case tar.TypeReg:
if mkErr := os.MkdirAll(filepath.Dir(target), 0700); mkErr != nil {
return fmt.Errorf("archive: mkdir parent %s: %w", target, mkErr)
}
f, createErr := os.OpenFile(target, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, os.FileMode(header.Mode&0o777)) //nolint:gosec // path validated above, mode from trusted archive
if createErr != nil {
return fmt.Errorf("archive: create %s: %w", target, createErr)
}
if _, copyErr := io.Copy(f, tr); copyErr != nil { //nolint:gosec // bounded by tar entry size
_ = f.Close()
return fmt.Errorf("archive: write %s: %w", target, copyErr)
}
_ = f.Close()
}
}
return nil
}
// shouldExclude returns true if the given path matches any exclude pattern.
func shouldExclude(relPath string, info os.FileInfo, patterns []string) bool {
name := filepath.Base(relPath)
for _, pattern := range patterns {
// Match directory names exactly (e.g., "backups").
if info.IsDir() && name == pattern {
return true
}
// Match file patterns (e.g., "*.db").
if matched, _ := filepath.Match(pattern, name); matched {
return true
}
}
return false
}
// addToTar adds a file or directory to the tar writer.
func addToTar(tw *tar.Writer, srcPath, archiveName string, info os.FileInfo) error {
header, err := tar.FileInfoHeader(info, "")
if err != nil {
return err
}
header.Name = archiveName
if err := tw.WriteHeader(header); err != nil {
return err
}
if info.IsDir() {
return nil
}
f, err := os.Open(srcPath) //nolint:gosec // path from controlled walk
if err != nil {
return err
}
defer func() { _ = f.Close() }()
_, err = io.Copy(tw, f)
return err
}

252
archive/archive_test.go Normal file
View File

@@ -0,0 +1,252 @@
package archive
import (
"bytes"
"database/sql"
"os"
"path/filepath"
"testing"
"git.wntrmute.dev/kyle/mcdsl/db"
)
// setupServiceDir creates a realistic /srv/<service>/ directory.
func setupServiceDir(t *testing.T, database *sql.DB) string {
t.Helper()
dir := t.TempDir()
// Config file.
writeFile(t, filepath.Join(dir, "service.toml"), "listen_addr = \":8443\"\n")
// Certs directory.
certsDir := filepath.Join(dir, "certs")
if err := os.Mkdir(certsDir, 0700); err != nil {
t.Fatalf("mkdir certs: %v", err)
}
writeFile(t, filepath.Join(certsDir, "cert.pem"), "-----BEGIN CERTIFICATE-----\ntest\n-----END CERTIFICATE-----\n")
writeFile(t, filepath.Join(certsDir, "key.pem"), "-----BEGIN PRIVATE KEY-----\ntest\n-----END PRIVATE KEY-----\n")
// Live database file (should be excluded).
writeFile(t, filepath.Join(dir, "service.db"), "live-db-data")
writeFile(t, filepath.Join(dir, "service.db-wal"), "wal-data")
writeFile(t, filepath.Join(dir, "service.db-shm"), "shm-data")
// Backups directory (should be excluded).
backupsDir := filepath.Join(dir, "backups")
if err := os.Mkdir(backupsDir, 0700); err != nil {
t.Fatalf("mkdir backups: %v", err)
}
writeFile(t, filepath.Join(backupsDir, "old-backup.db"), "backup-data")
return dir
}
func writeFile(t *testing.T, path, content string) {
t.Helper()
if err := os.WriteFile(path, []byte(content), 0600); err != nil {
t.Fatalf("write %s: %v", path, err)
}
}
func openTestDB(t *testing.T) (*sql.DB, string) {
t.Helper()
dir := t.TempDir()
path := filepath.Join(dir, "real.db")
database, err := db.Open(path)
if err != nil {
t.Fatalf("db.Open: %v", err)
}
t.Cleanup(func() { _ = database.Close() })
// Create some data to verify snapshot integrity.
if _, err := database.Exec("CREATE TABLE test (id INTEGER PRIMARY KEY, val TEXT)"); err != nil {
t.Fatalf("create table: %v", err)
}
if _, err := database.Exec("INSERT INTO test (val) VALUES ('snapshot-data')"); err != nil {
t.Fatalf("insert: %v", err)
}
return database, path
}
func TestSnapshotAndRestore(t *testing.T) {
database, dbPath := openTestDB(t)
serviceDir := setupServiceDir(t, database)
// Snapshot.
var buf bytes.Buffer
err := Snapshot(SnapshotOptions{
ServiceDir: serviceDir,
DBPath: dbPath,
DB: database,
}, &buf)
if err != nil {
t.Fatalf("Snapshot: %v", err)
}
if buf.Len() == 0 {
t.Fatal("archive is empty")
}
// Restore to a new directory.
restoreDir := t.TempDir()
if err := Restore(&buf, restoreDir); err != nil {
t.Fatalf("Restore: %v", err)
}
// Verify config file was restored.
content, err := os.ReadFile(filepath.Join(restoreDir, "service.toml")) //nolint:gosec // test code
if err != nil {
t.Fatalf("read config: %v", err)
}
if string(content) != "listen_addr = \":8443\"\n" {
t.Fatalf("config content = %q", string(content))
}
// Verify certs were restored.
if _, err := os.Stat(filepath.Join(restoreDir, "certs", "cert.pem")); err != nil {
t.Fatalf("cert.pem missing: %v", err)
}
if _, err := os.Stat(filepath.Join(restoreDir, "certs", "key.pem")); err != nil {
t.Fatalf("key.pem missing: %v", err)
}
// Verify live DB files were excluded.
if _, err := os.Stat(filepath.Join(restoreDir, "service.db-wal")); !os.IsNotExist(err) {
t.Fatal("service.db-wal should not be in archive")
}
if _, err := os.Stat(filepath.Join(restoreDir, "service.db-shm")); !os.IsNotExist(err) {
t.Fatal("service.db-shm should not be in archive")
}
// Verify backups were excluded.
if _, err := os.Stat(filepath.Join(restoreDir, "backups")); !os.IsNotExist(err) {
t.Fatal("backups/ should not be in archive")
}
// Verify the VACUUM INTO snapshot was injected as the DB.
dbFile := filepath.Join(restoreDir, filepath.Base(dbPath))
restoredDB, err := sql.Open("sqlite", dbFile)
if err != nil {
t.Fatalf("open restored db: %v", err)
}
defer func() { _ = restoredDB.Close() }()
var val string
if err := restoredDB.QueryRow("SELECT val FROM test").Scan(&val); err != nil {
t.Fatalf("query restored db: %v", err)
}
if val != "snapshot-data" {
t.Fatalf("val = %q, want %q", val, "snapshot-data")
}
}
func TestSnapshotWithoutDB(t *testing.T) {
dir := t.TempDir()
writeFile(t, filepath.Join(dir, "config.toml"), "test = true\n")
var buf bytes.Buffer
err := Snapshot(SnapshotOptions{
ServiceDir: dir,
}, &buf)
if err != nil {
t.Fatalf("Snapshot: %v", err)
}
// Restore and verify.
restoreDir := t.TempDir()
if err := Restore(&buf, restoreDir); err != nil {
t.Fatalf("Restore: %v", err)
}
content, err := os.ReadFile(filepath.Join(restoreDir, "config.toml")) //nolint:gosec // test code
if err != nil {
t.Fatalf("read: %v", err)
}
if string(content) != "test = true\n" {
t.Fatalf("content = %q", string(content))
}
}
func TestSnapshotExcludesLiveDB(t *testing.T) {
dir := t.TempDir()
writeFile(t, filepath.Join(dir, "service.db"), "live")
writeFile(t, filepath.Join(dir, "service.db-wal"), "wal")
writeFile(t, filepath.Join(dir, "other.txt"), "keep")
var buf bytes.Buffer
err := Snapshot(SnapshotOptions{
ServiceDir: dir,
}, &buf)
if err != nil {
t.Fatalf("Snapshot: %v", err)
}
restoreDir := t.TempDir()
if err := Restore(&buf, restoreDir); err != nil {
t.Fatalf("Restore: %v", err)
}
// other.txt should be present.
if _, err := os.Stat(filepath.Join(restoreDir, "other.txt")); err != nil {
t.Fatalf("other.txt missing: %v", err)
}
// DB files should not.
if _, err := os.Stat(filepath.Join(restoreDir, "service.db")); !os.IsNotExist(err) {
t.Fatal("service.db should be excluded")
}
if _, err := os.Stat(filepath.Join(restoreDir, "service.db-wal")); !os.IsNotExist(err) {
t.Fatal("service.db-wal should be excluded")
}
}
func TestSnapshotCustomExcludes(t *testing.T) {
dir := t.TempDir()
writeFile(t, filepath.Join(dir, "keep.txt"), "keep")
writeFile(t, filepath.Join(dir, "skip.log"), "skip")
var buf bytes.Buffer
err := Snapshot(SnapshotOptions{
ServiceDir: dir,
ExcludePatterns: []string{"*.log"},
}, &buf)
if err != nil {
t.Fatalf("Snapshot: %v", err)
}
restoreDir := t.TempDir()
if err := Restore(&buf, restoreDir); err != nil {
t.Fatalf("Restore: %v", err)
}
if _, err := os.Stat(filepath.Join(restoreDir, "keep.txt")); err != nil {
t.Fatal("keep.txt should be present")
}
if _, err := os.Stat(filepath.Join(restoreDir, "skip.log")); !os.IsNotExist(err) {
t.Fatal("skip.log should be excluded")
}
}
func TestRestoreCreatesDestDir(t *testing.T) {
dir := t.TempDir()
writeFile(t, filepath.Join(dir, "file.txt"), "data")
var buf bytes.Buffer
if err := Snapshot(SnapshotOptions{ServiceDir: dir}, &buf); err != nil {
t.Fatalf("Snapshot: %v", err)
}
destDir := filepath.Join(t.TempDir(), "new", "nested", "dir")
if err := Restore(&buf, destDir); err != nil {
t.Fatalf("Restore: %v", err)
}
content, err := os.ReadFile(filepath.Join(destDir, "file.txt")) //nolint:gosec // test code
if err != nil {
t.Fatalf("read: %v", err)
}
if string(content) != "data" {
t.Fatalf("content = %q", string(content))
}
}