Add archive package: tar.zst service directory snapshots

- Snapshot: VACUUM INTO for consistent db copy, excludes live db
  files and backups/, injects db snapshot, custom exclude patterns,
  streaming output via io.Writer
- Restore: extract tar.zst with path traversal protection
- zstd via github.com/klauspost/compress/zstd
- 5 tests: full roundtrip with db integrity verification,
  without db, exclude patterns, dest dir creation
- Update PROGRESS.md: all 9 packages complete, 87 total tests

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-03-25 16:37:18 -07:00
parent 20dc7ae0d6
commit 9012b889d6
5 changed files with 550 additions and 43 deletions

235
archive/archive.go Normal file
View File

@@ -0,0 +1,235 @@
// Package archive provides service directory snapshot and restore using
// tar.zst (tar compressed with Zstandard), with SQLite-aware handling.
//
// Snapshots exclude live database files (*.db, *.db-wal, *.db-shm) and
// the backups/ directory. A consistent database copy is created via
// VACUUM INTO and injected into the archive as <basename>.db.
//
// The result is a clean, minimal archive that extracts directly into a
// working service directory on the destination.
package archive
import (
"archive/tar"
"database/sql"
"fmt"
"io"
"os"
"path/filepath"
"strings"
"github.com/klauspost/compress/zstd"
"git.wntrmute.dev/kyle/mcdsl/db"
)
// defaultExcludePatterns are always excluded from snapshots.
var defaultExcludePatterns = []string{
"*.db",
"*.db-wal",
"*.db-shm",
"backups",
}
// SnapshotOptions configures a snapshot operation.
type SnapshotOptions struct {
// ServiceDir is the root directory to snapshot (e.g., /srv/myservice).
ServiceDir string
// DBPath is the path to the live database file. The filename (without
// directory) is used as the name in the archive. If empty, no database
// is included.
DBPath string
// DB is the live database connection. Used for VACUUM INTO to create
// a consistent snapshot. Required if DBPath is set.
DB *sql.DB
// ExcludePatterns are additional glob patterns to exclude (beyond the
// defaults: *.db, *.db-wal, *.db-shm, backups/).
ExcludePatterns []string
}
// Snapshot creates a tar.zst archive of the service directory, writing
// it to w. Live database files are excluded and a consistent VACUUM INTO
// copy is injected in their place.
func Snapshot(opts SnapshotOptions, w io.Writer) error {
// Create the VACUUM INTO copy if a database is specified.
var dbSnapshotPath string
if opts.DBPath != "" && opts.DB != nil {
tmp, err := os.CreateTemp("", "mcdsl-snapshot-*.db")
if err != nil {
return fmt.Errorf("archive: create temp db: %w", err)
}
tmpPath := tmp.Name()
_ = tmp.Close()
_ = os.Remove(tmpPath) // VACUUM INTO creates the file itself
if err := db.Snapshot(opts.DB, tmpPath); err != nil {
return fmt.Errorf("archive: vacuum into: %w", err)
}
dbSnapshotPath = tmpPath
defer func() { _ = os.Remove(tmpPath) }()
}
// Build the exclude set.
excludes := append(defaultExcludePatterns, opts.ExcludePatterns...)
// Create zstd writer → tar writer.
zw, err := zstd.NewWriter(w)
if err != nil {
return fmt.Errorf("archive: create zstd writer: %w", err)
}
tw := tar.NewWriter(zw)
// Walk the service directory.
serviceDir := filepath.Clean(opts.ServiceDir)
err = filepath.Walk(serviceDir, func(path string, info os.FileInfo, walkErr error) error {
if walkErr != nil {
return walkErr
}
relPath, relErr := filepath.Rel(serviceDir, path)
if relErr != nil {
return relErr
}
// Skip the root directory itself.
if relPath == "." {
return nil
}
// Check excludes.
if shouldExclude(relPath, info, excludes) {
if info.IsDir() {
return filepath.SkipDir
}
return nil
}
return addToTar(tw, path, relPath, info)
})
if err != nil {
return fmt.Errorf("archive: walk %s: %w", serviceDir, err)
}
// Inject the database snapshot.
if dbSnapshotPath != "" {
dbName := filepath.Base(opts.DBPath)
snapInfo, statErr := os.Stat(dbSnapshotPath)
if statErr != nil {
return fmt.Errorf("archive: stat db snapshot: %w", statErr)
}
if err := addToTar(tw, dbSnapshotPath, dbName, snapInfo); err != nil {
return fmt.Errorf("archive: add db snapshot: %w", err)
}
}
if err := tw.Close(); err != nil {
return fmt.Errorf("archive: close tar: %w", err)
}
if err := zw.Close(); err != nil {
return fmt.Errorf("archive: close zstd: %w", err)
}
return nil
}
// Restore extracts a tar.zst archive from r into destDir. Creates the
// directory if it does not exist. Overwrites existing files. Preserves
// file permissions.
func Restore(r io.Reader, destDir string) error {
if err := os.MkdirAll(destDir, 0700); err != nil {
return fmt.Errorf("archive: create dest dir: %w", err)
}
zr, err := zstd.NewReader(r)
if err != nil {
return fmt.Errorf("archive: create zstd reader: %w", err)
}
defer zr.Close()
tr := tar.NewReader(zr)
for {
header, readErr := tr.Next()
if readErr == io.EOF {
break
}
if readErr != nil {
return fmt.Errorf("archive: read tar entry: %w", readErr)
}
target := filepath.Join(destDir, header.Name) //nolint:gosec // archive is from trusted MCP agent, not user upload
// Prevent path traversal.
if !strings.HasPrefix(filepath.Clean(target), filepath.Clean(destDir)+string(os.PathSeparator)) &&
filepath.Clean(target) != filepath.Clean(destDir) {
return fmt.Errorf("archive: path traversal in %q", header.Name)
}
switch header.Typeflag {
case tar.TypeDir:
if mkErr := os.MkdirAll(target, os.FileMode(header.Mode&0o777)); mkErr != nil { //nolint:gosec // mode from trusted archive
return fmt.Errorf("archive: mkdir %s: %w", target, mkErr)
}
case tar.TypeReg:
if mkErr := os.MkdirAll(filepath.Dir(target), 0700); mkErr != nil {
return fmt.Errorf("archive: mkdir parent %s: %w", target, mkErr)
}
f, createErr := os.OpenFile(target, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, os.FileMode(header.Mode&0o777)) //nolint:gosec // path validated above, mode from trusted archive
if createErr != nil {
return fmt.Errorf("archive: create %s: %w", target, createErr)
}
if _, copyErr := io.Copy(f, tr); copyErr != nil { //nolint:gosec // bounded by tar entry size
_ = f.Close()
return fmt.Errorf("archive: write %s: %w", target, copyErr)
}
_ = f.Close()
}
}
return nil
}
// shouldExclude returns true if the given path matches any exclude pattern.
func shouldExclude(relPath string, info os.FileInfo, patterns []string) bool {
name := filepath.Base(relPath)
for _, pattern := range patterns {
// Match directory names exactly (e.g., "backups").
if info.IsDir() && name == pattern {
return true
}
// Match file patterns (e.g., "*.db").
if matched, _ := filepath.Match(pattern, name); matched {
return true
}
}
return false
}
// addToTar adds a file or directory to the tar writer.
func addToTar(tw *tar.Writer, srcPath, archiveName string, info os.FileInfo) error {
header, err := tar.FileInfoHeader(info, "")
if err != nil {
return err
}
header.Name = archiveName
if err := tw.WriteHeader(header); err != nil {
return err
}
if info.IsDir() {
return nil
}
f, err := os.Open(srcPath) //nolint:gosec // path from controlled walk
if err != nil {
return err
}
defer func() { _ = f.Close() }()
_, err = io.Copy(tw, f)
return err
}