Add content-addressable blob store package.

Implement the store package with SHA-256 keyed blob storage using a
two-level directory layout (blobs/XX/YY/hash). Supports atomic writes
via temp file + rename, deduplication, and hash validation. Includes
comprehensive tests for round-trip, deduplication, existence checks,
deletion, and subdirectory creation.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-03-23 21:25:06 -07:00
parent db09939d38
commit ab57f6d01d
3 changed files with 379 additions and 0 deletions

3
go.mod Normal file
View File

@@ -0,0 +1,3 @@
module github.com/kisom/sgard
go 1.25.7

138
store/store.go Normal file
View File

@@ -0,0 +1,138 @@
// Package store implements a content-addressable blob store keyed by SHA-256 hash.
//
// Blobs are stored in a two-level directory structure under a blobs/ subdirectory:
//
// blobs/<first 2 hex chars>/<next 2 hex chars>/<full 64-char hash>
//
// The store only handles raw bytes. It does not know about files, paths, or
// permissions — that is the garden package's job.
package store
import (
"crypto/sha256"
"encoding/hex"
"errors"
"fmt"
"os"
"path/filepath"
)
// validHash reports whether s is a 64-character lowercase hex string.
func validHash(s string) bool {
if len(s) != 64 {
return false
}
for _, c := range s {
if !((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f')) {
return false
}
}
return true
}
// Store is a content-addressable blob store rooted at a directory on disk.
type Store struct {
root string
}
// New creates a Store rooted at root. It ensures the blobs/ subdirectory
// exists, creating it (and any parents) if needed.
func New(root string) (*Store, error) {
blobsDir := filepath.Join(root, "blobs")
if err := os.MkdirAll(blobsDir, 0o755); err != nil {
return nil, fmt.Errorf("store: create blobs directory: %w", err)
}
return &Store{root: root}, nil
}
// Write computes the SHA-256 hash of data, writes the blob to disk, and
// returns the hex-encoded hash. If a blob with the same hash already exists,
// this is a no-op (deduplication). Writes are atomic: data is written to a
// temporary file first, then renamed into place.
func (s *Store) Write(data []byte) (string, error) {
sum := sha256.Sum256(data)
hash := hex.EncodeToString(sum[:])
p := s.blobPath(hash)
// Deduplication: if the blob already exists, nothing to do.
if _, err := os.Stat(p); err == nil {
return hash, nil
}
// Ensure the parent directory exists.
dir := filepath.Dir(p)
if err := os.MkdirAll(dir, 0o755); err != nil {
return "", fmt.Errorf("store: create blob directory: %w", err)
}
// Write to a temp file in the same directory, then rename for atomicity.
tmp, err := os.CreateTemp(dir, ".blob-*")
if err != nil {
return "", fmt.Errorf("store: create temp file: %w", err)
}
tmpName := tmp.Name()
if _, err := tmp.Write(data); err != nil {
tmp.Close()
os.Remove(tmpName)
return "", fmt.Errorf("store: write temp file: %w", err)
}
if err := tmp.Close(); err != nil {
os.Remove(tmpName)
return "", fmt.Errorf("store: close temp file: %w", err)
}
if err := os.Rename(tmpName, p); err != nil {
os.Remove(tmpName)
return "", fmt.Errorf("store: rename blob into place: %w", err)
}
return hash, nil
}
// Read returns the blob contents for the given hash. It returns an error if
// the hash is malformed or the blob does not exist.
func (s *Store) Read(hash string) ([]byte, error) {
if !validHash(hash) {
return nil, fmt.Errorf("store: invalid hash %q", hash)
}
data, err := os.ReadFile(s.blobPath(hash))
if err != nil {
return nil, fmt.Errorf("store: read blob %s: %w", hash, err)
}
return data, nil
}
// Exists reports whether a blob with the given hash is present in the store.
// It returns false for malformed hashes.
func (s *Store) Exists(hash string) bool {
if !validHash(hash) {
return false
}
_, err := os.Stat(s.blobPath(hash))
return err == nil
}
// Delete removes the blob file for the given hash. It returns an error if the
// hash is malformed or the blob does not exist.
func (s *Store) Delete(hash string) error {
if !validHash(hash) {
return fmt.Errorf("store: invalid hash %q", hash)
}
if err := os.Remove(s.blobPath(hash)); err != nil {
if errors.Is(err, os.ErrNotExist) {
return fmt.Errorf("store: blob %s does not exist: %w", hash, err)
}
return fmt.Errorf("store: delete blob %s: %w", hash, err)
}
return nil
}
// blobPath returns the filesystem path for a blob with the given hash.
// Layout: blobs/<first 2 hex chars>/<next 2 hex chars>/<full 64-char hash>
func (s *Store) blobPath(hash string) string {
return filepath.Join(s.root, "blobs", hash[:2], hash[2:4], hash)
}

238
store/store_test.go Normal file
View File

@@ -0,0 +1,238 @@
package store
import (
"crypto/sha256"
"encoding/hex"
"os"
"path/filepath"
"testing"
)
func TestWriteAndReadRoundTrip(t *testing.T) {
s, err := New(t.TempDir())
if err != nil {
t.Fatalf("New: %v", err)
}
data := []byte("hello, sgard")
hash, err := s.Write(data)
if err != nil {
t.Fatalf("Write: %v", err)
}
got, err := s.Read(hash)
if err != nil {
t.Fatalf("Read: %v", err)
}
if string(got) != string(data) {
t.Errorf("Read returned %q, want %q", got, data)
}
}
func TestHashCorrectness(t *testing.T) {
s, err := New(t.TempDir())
if err != nil {
t.Fatalf("New: %v", err)
}
data := []byte("known test data")
sum := sha256.Sum256(data)
want := hex.EncodeToString(sum[:])
got, err := s.Write(data)
if err != nil {
t.Fatalf("Write: %v", err)
}
if got != want {
t.Errorf("Write returned hash %q, want %q", got, want)
}
}
func TestDeduplication(t *testing.T) {
root := t.TempDir()
s, err := New(root)
if err != nil {
t.Fatalf("New: %v", err)
}
data := []byte("duplicate me")
hash1, err := s.Write(data)
if err != nil {
t.Fatalf("first Write: %v", err)
}
hash2, err := s.Write(data)
if err != nil {
t.Fatalf("second Write: %v", err)
}
if hash1 != hash2 {
t.Errorf("hashes differ: %q vs %q", hash1, hash2)
}
// Verify only one blob file exists on disk at the expected path.
p := s.blobPath(hash1)
info, err := os.Stat(p)
if err != nil {
t.Fatalf("Stat blob: %v", err)
}
if info.IsDir() {
t.Fatal("blob path is a directory, not a file")
}
// Count files in the leaf directory — should be exactly one.
dir := filepath.Dir(p)
entries, err := os.ReadDir(dir)
if err != nil {
t.Fatalf("ReadDir: %v", err)
}
var count int
for _, e := range entries {
if !e.IsDir() {
count++
}
}
if count != 1 {
t.Errorf("expected 1 blob file in %s, found %d", dir, count)
}
}
func TestExists(t *testing.T) {
s, err := New(t.TempDir())
if err != nil {
t.Fatalf("New: %v", err)
}
data := []byte("existence check")
hash, err := s.Write(data)
if err != nil {
t.Fatalf("Write: %v", err)
}
if !s.Exists(hash) {
t.Error("Exists returned false for written blob")
}
fake := "0000000000000000000000000000000000000000000000000000000000000000"
if s.Exists(fake) {
t.Error("Exists returned true for nonexistent hash")
}
}
func TestExistsInvalidHash(t *testing.T) {
s, err := New(t.TempDir())
if err != nil {
t.Fatalf("New: %v", err)
}
if s.Exists("not-a-valid-hash") {
t.Error("Exists returned true for invalid hash")
}
}
func TestReadNonexistent(t *testing.T) {
s, err := New(t.TempDir())
if err != nil {
t.Fatalf("New: %v", err)
}
fake := "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
_, err = s.Read(fake)
if err == nil {
t.Error("Read of nonexistent blob should return an error")
}
}
func TestReadInvalidHash(t *testing.T) {
s, err := New(t.TempDir())
if err != nil {
t.Fatalf("New: %v", err)
}
_, err = s.Read("bad")
if err == nil {
t.Error("Read with invalid hash should return an error")
}
}
func TestDelete(t *testing.T) {
s, err := New(t.TempDir())
if err != nil {
t.Fatalf("New: %v", err)
}
data := []byte("delete me")
hash, err := s.Write(data)
if err != nil {
t.Fatalf("Write: %v", err)
}
if err := s.Delete(hash); err != nil {
t.Fatalf("Delete: %v", err)
}
if s.Exists(hash) {
t.Error("Exists returned true after Delete")
}
if _, err := s.Read(hash); err == nil {
t.Error("Read succeeded after Delete, expected error")
}
}
func TestDeleteNonexistent(t *testing.T) {
s, err := New(t.TempDir())
if err != nil {
t.Fatalf("New: %v", err)
}
fake := "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"
if err := s.Delete(fake); err == nil {
t.Error("Delete of nonexistent blob should return an error")
}
}
func TestDeleteInvalidHash(t *testing.T) {
s, err := New(t.TempDir())
if err != nil {
t.Fatalf("New: %v", err)
}
if err := s.Delete("xyz"); err == nil {
t.Error("Delete with invalid hash should return an error")
}
}
func TestWriteCreatesSubdirectories(t *testing.T) {
root := t.TempDir()
s, err := New(root)
if err != nil {
t.Fatalf("New: %v", err)
}
data := []byte("subdir test")
hash, err := s.Write(data)
if err != nil {
t.Fatalf("Write: %v", err)
}
// Verify the two-level directory structure exists.
level1 := filepath.Join(root, "blobs", hash[:2])
level2 := filepath.Join(level1, hash[2:4])
if info, err := os.Stat(level1); err != nil || !info.IsDir() {
t.Errorf("expected directory at %s", level1)
}
if info, err := os.Stat(level2); err != nil || !info.IsDir() {
t.Errorf("expected directory at %s", level2)
}
// And the blob file itself exists in level2.
blobFile := filepath.Join(level2, hash)
if _, err := os.Stat(blobFile); err != nil {
t.Errorf("expected blob file at %s: %v", blobFile, err)
}
}