Add content-addressable blob store package.
Implement the store package with SHA-256 keyed blob storage using a two-level directory layout (blobs/XX/YY/hash). Supports atomic writes via temp file + rename, deduplication, and hash validation. Includes comprehensive tests for round-trip, deduplication, existence checks, deletion, and subdirectory creation. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
138
store/store.go
Normal file
138
store/store.go
Normal file
@@ -0,0 +1,138 @@
|
||||
// Package store implements a content-addressable blob store keyed by SHA-256 hash.
|
||||
//
|
||||
// Blobs are stored in a two-level directory structure under a blobs/ subdirectory:
|
||||
//
|
||||
// blobs/<first 2 hex chars>/<next 2 hex chars>/<full 64-char hash>
|
||||
//
|
||||
// The store only handles raw bytes. It does not know about files, paths, or
|
||||
// permissions — that is the garden package's job.
|
||||
package store
|
||||
|
||||
import (
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
)
|
||||
|
||||
// validHash reports whether s is a 64-character lowercase hex string.
|
||||
func validHash(s string) bool {
|
||||
if len(s) != 64 {
|
||||
return false
|
||||
}
|
||||
for _, c := range s {
|
||||
if !((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f')) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// Store is a content-addressable blob store rooted at a directory on disk.
|
||||
type Store struct {
|
||||
root string
|
||||
}
|
||||
|
||||
// New creates a Store rooted at root. It ensures the blobs/ subdirectory
|
||||
// exists, creating it (and any parents) if needed.
|
||||
func New(root string) (*Store, error) {
|
||||
blobsDir := filepath.Join(root, "blobs")
|
||||
if err := os.MkdirAll(blobsDir, 0o755); err != nil {
|
||||
return nil, fmt.Errorf("store: create blobs directory: %w", err)
|
||||
}
|
||||
return &Store{root: root}, nil
|
||||
}
|
||||
|
||||
// Write computes the SHA-256 hash of data, writes the blob to disk, and
|
||||
// returns the hex-encoded hash. If a blob with the same hash already exists,
|
||||
// this is a no-op (deduplication). Writes are atomic: data is written to a
|
||||
// temporary file first, then renamed into place.
|
||||
func (s *Store) Write(data []byte) (string, error) {
|
||||
sum := sha256.Sum256(data)
|
||||
hash := hex.EncodeToString(sum[:])
|
||||
|
||||
p := s.blobPath(hash)
|
||||
|
||||
// Deduplication: if the blob already exists, nothing to do.
|
||||
if _, err := os.Stat(p); err == nil {
|
||||
return hash, nil
|
||||
}
|
||||
|
||||
// Ensure the parent directory exists.
|
||||
dir := filepath.Dir(p)
|
||||
if err := os.MkdirAll(dir, 0o755); err != nil {
|
||||
return "", fmt.Errorf("store: create blob directory: %w", err)
|
||||
}
|
||||
|
||||
// Write to a temp file in the same directory, then rename for atomicity.
|
||||
tmp, err := os.CreateTemp(dir, ".blob-*")
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("store: create temp file: %w", err)
|
||||
}
|
||||
tmpName := tmp.Name()
|
||||
|
||||
if _, err := tmp.Write(data); err != nil {
|
||||
tmp.Close()
|
||||
os.Remove(tmpName)
|
||||
return "", fmt.Errorf("store: write temp file: %w", err)
|
||||
}
|
||||
if err := tmp.Close(); err != nil {
|
||||
os.Remove(tmpName)
|
||||
return "", fmt.Errorf("store: close temp file: %w", err)
|
||||
}
|
||||
|
||||
if err := os.Rename(tmpName, p); err != nil {
|
||||
os.Remove(tmpName)
|
||||
return "", fmt.Errorf("store: rename blob into place: %w", err)
|
||||
}
|
||||
|
||||
return hash, nil
|
||||
}
|
||||
|
||||
// Read returns the blob contents for the given hash. It returns an error if
|
||||
// the hash is malformed or the blob does not exist.
|
||||
func (s *Store) Read(hash string) ([]byte, error) {
|
||||
if !validHash(hash) {
|
||||
return nil, fmt.Errorf("store: invalid hash %q", hash)
|
||||
}
|
||||
|
||||
data, err := os.ReadFile(s.blobPath(hash))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("store: read blob %s: %w", hash, err)
|
||||
}
|
||||
return data, nil
|
||||
}
|
||||
|
||||
// Exists reports whether a blob with the given hash is present in the store.
|
||||
// It returns false for malformed hashes.
|
||||
func (s *Store) Exists(hash string) bool {
|
||||
if !validHash(hash) {
|
||||
return false
|
||||
}
|
||||
_, err := os.Stat(s.blobPath(hash))
|
||||
return err == nil
|
||||
}
|
||||
|
||||
// Delete removes the blob file for the given hash. It returns an error if the
|
||||
// hash is malformed or the blob does not exist.
|
||||
func (s *Store) Delete(hash string) error {
|
||||
if !validHash(hash) {
|
||||
return fmt.Errorf("store: invalid hash %q", hash)
|
||||
}
|
||||
|
||||
if err := os.Remove(s.blobPath(hash)); err != nil {
|
||||
if errors.Is(err, os.ErrNotExist) {
|
||||
return fmt.Errorf("store: blob %s does not exist: %w", hash, err)
|
||||
}
|
||||
return fmt.Errorf("store: delete blob %s: %w", hash, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// blobPath returns the filesystem path for a blob with the given hash.
|
||||
// Layout: blobs/<first 2 hex chars>/<next 2 hex chars>/<full 64-char hash>
|
||||
func (s *Store) blobPath(hash string) string {
|
||||
return filepath.Join(s.root, "blobs", hash[:2], hash[2:4], hash)
|
||||
}
|
||||
238
store/store_test.go
Normal file
238
store/store_test.go
Normal file
@@ -0,0 +1,238 @@
|
||||
package store
|
||||
|
||||
import (
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestWriteAndReadRoundTrip(t *testing.T) {
|
||||
s, err := New(t.TempDir())
|
||||
if err != nil {
|
||||
t.Fatalf("New: %v", err)
|
||||
}
|
||||
|
||||
data := []byte("hello, sgard")
|
||||
hash, err := s.Write(data)
|
||||
if err != nil {
|
||||
t.Fatalf("Write: %v", err)
|
||||
}
|
||||
|
||||
got, err := s.Read(hash)
|
||||
if err != nil {
|
||||
t.Fatalf("Read: %v", err)
|
||||
}
|
||||
|
||||
if string(got) != string(data) {
|
||||
t.Errorf("Read returned %q, want %q", got, data)
|
||||
}
|
||||
}
|
||||
|
||||
func TestHashCorrectness(t *testing.T) {
|
||||
s, err := New(t.TempDir())
|
||||
if err != nil {
|
||||
t.Fatalf("New: %v", err)
|
||||
}
|
||||
|
||||
data := []byte("known test data")
|
||||
sum := sha256.Sum256(data)
|
||||
want := hex.EncodeToString(sum[:])
|
||||
|
||||
got, err := s.Write(data)
|
||||
if err != nil {
|
||||
t.Fatalf("Write: %v", err)
|
||||
}
|
||||
|
||||
if got != want {
|
||||
t.Errorf("Write returned hash %q, want %q", got, want)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDeduplication(t *testing.T) {
|
||||
root := t.TempDir()
|
||||
s, err := New(root)
|
||||
if err != nil {
|
||||
t.Fatalf("New: %v", err)
|
||||
}
|
||||
|
||||
data := []byte("duplicate me")
|
||||
hash1, err := s.Write(data)
|
||||
if err != nil {
|
||||
t.Fatalf("first Write: %v", err)
|
||||
}
|
||||
|
||||
hash2, err := s.Write(data)
|
||||
if err != nil {
|
||||
t.Fatalf("second Write: %v", err)
|
||||
}
|
||||
|
||||
if hash1 != hash2 {
|
||||
t.Errorf("hashes differ: %q vs %q", hash1, hash2)
|
||||
}
|
||||
|
||||
// Verify only one blob file exists on disk at the expected path.
|
||||
p := s.blobPath(hash1)
|
||||
info, err := os.Stat(p)
|
||||
if err != nil {
|
||||
t.Fatalf("Stat blob: %v", err)
|
||||
}
|
||||
if info.IsDir() {
|
||||
t.Fatal("blob path is a directory, not a file")
|
||||
}
|
||||
|
||||
// Count files in the leaf directory — should be exactly one.
|
||||
dir := filepath.Dir(p)
|
||||
entries, err := os.ReadDir(dir)
|
||||
if err != nil {
|
||||
t.Fatalf("ReadDir: %v", err)
|
||||
}
|
||||
|
||||
var count int
|
||||
for _, e := range entries {
|
||||
if !e.IsDir() {
|
||||
count++
|
||||
}
|
||||
}
|
||||
if count != 1 {
|
||||
t.Errorf("expected 1 blob file in %s, found %d", dir, count)
|
||||
}
|
||||
}
|
||||
|
||||
func TestExists(t *testing.T) {
|
||||
s, err := New(t.TempDir())
|
||||
if err != nil {
|
||||
t.Fatalf("New: %v", err)
|
||||
}
|
||||
|
||||
data := []byte("existence check")
|
||||
hash, err := s.Write(data)
|
||||
if err != nil {
|
||||
t.Fatalf("Write: %v", err)
|
||||
}
|
||||
|
||||
if !s.Exists(hash) {
|
||||
t.Error("Exists returned false for written blob")
|
||||
}
|
||||
|
||||
fake := "0000000000000000000000000000000000000000000000000000000000000000"
|
||||
if s.Exists(fake) {
|
||||
t.Error("Exists returned true for nonexistent hash")
|
||||
}
|
||||
}
|
||||
|
||||
func TestExistsInvalidHash(t *testing.T) {
|
||||
s, err := New(t.TempDir())
|
||||
if err != nil {
|
||||
t.Fatalf("New: %v", err)
|
||||
}
|
||||
|
||||
if s.Exists("not-a-valid-hash") {
|
||||
t.Error("Exists returned true for invalid hash")
|
||||
}
|
||||
}
|
||||
|
||||
func TestReadNonexistent(t *testing.T) {
|
||||
s, err := New(t.TempDir())
|
||||
if err != nil {
|
||||
t.Fatalf("New: %v", err)
|
||||
}
|
||||
|
||||
fake := "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
|
||||
_, err = s.Read(fake)
|
||||
if err == nil {
|
||||
t.Error("Read of nonexistent blob should return an error")
|
||||
}
|
||||
}
|
||||
|
||||
func TestReadInvalidHash(t *testing.T) {
|
||||
s, err := New(t.TempDir())
|
||||
if err != nil {
|
||||
t.Fatalf("New: %v", err)
|
||||
}
|
||||
|
||||
_, err = s.Read("bad")
|
||||
if err == nil {
|
||||
t.Error("Read with invalid hash should return an error")
|
||||
}
|
||||
}
|
||||
|
||||
func TestDelete(t *testing.T) {
|
||||
s, err := New(t.TempDir())
|
||||
if err != nil {
|
||||
t.Fatalf("New: %v", err)
|
||||
}
|
||||
|
||||
data := []byte("delete me")
|
||||
hash, err := s.Write(data)
|
||||
if err != nil {
|
||||
t.Fatalf("Write: %v", err)
|
||||
}
|
||||
|
||||
if err := s.Delete(hash); err != nil {
|
||||
t.Fatalf("Delete: %v", err)
|
||||
}
|
||||
|
||||
if s.Exists(hash) {
|
||||
t.Error("Exists returned true after Delete")
|
||||
}
|
||||
|
||||
if _, err := s.Read(hash); err == nil {
|
||||
t.Error("Read succeeded after Delete, expected error")
|
||||
}
|
||||
}
|
||||
|
||||
func TestDeleteNonexistent(t *testing.T) {
|
||||
s, err := New(t.TempDir())
|
||||
if err != nil {
|
||||
t.Fatalf("New: %v", err)
|
||||
}
|
||||
|
||||
fake := "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"
|
||||
if err := s.Delete(fake); err == nil {
|
||||
t.Error("Delete of nonexistent blob should return an error")
|
||||
}
|
||||
}
|
||||
|
||||
func TestDeleteInvalidHash(t *testing.T) {
|
||||
s, err := New(t.TempDir())
|
||||
if err != nil {
|
||||
t.Fatalf("New: %v", err)
|
||||
}
|
||||
|
||||
if err := s.Delete("xyz"); err == nil {
|
||||
t.Error("Delete with invalid hash should return an error")
|
||||
}
|
||||
}
|
||||
|
||||
func TestWriteCreatesSubdirectories(t *testing.T) {
|
||||
root := t.TempDir()
|
||||
s, err := New(root)
|
||||
if err != nil {
|
||||
t.Fatalf("New: %v", err)
|
||||
}
|
||||
|
||||
data := []byte("subdir test")
|
||||
hash, err := s.Write(data)
|
||||
if err != nil {
|
||||
t.Fatalf("Write: %v", err)
|
||||
}
|
||||
|
||||
// Verify the two-level directory structure exists.
|
||||
level1 := filepath.Join(root, "blobs", hash[:2])
|
||||
level2 := filepath.Join(level1, hash[2:4])
|
||||
|
||||
if info, err := os.Stat(level1); err != nil || !info.IsDir() {
|
||||
t.Errorf("expected directory at %s", level1)
|
||||
}
|
||||
if info, err := os.Stat(level2); err != nil || !info.IsDir() {
|
||||
t.Errorf("expected directory at %s", level2)
|
||||
}
|
||||
|
||||
// And the blob file itself exists in level2.
|
||||
blobFile := filepath.Join(level2, hash)
|
||||
if _, err := os.Stat(blobFile); err != nil {
|
||||
t.Errorf("expected blob file at %s: %v", blobFile, err)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user