diff --git a/store/store.go b/store/store.go new file mode 100644 index 0000000..0e0995a --- /dev/null +++ b/store/store.go @@ -0,0 +1,138 @@ +// Package store implements a content-addressable blob store keyed by SHA-256 hash. +// +// Blobs are stored in a two-level directory structure under a blobs/ subdirectory: +// +// blobs/// +// +// The store only handles raw bytes. It does not know about files, paths, or +// permissions — that is the garden package's job. +package store + +import ( + "crypto/sha256" + "encoding/hex" + "errors" + "fmt" + "os" + "path/filepath" +) + +// validHash reports whether s is a 64-character lowercase hex string. +func validHash(s string) bool { + if len(s) != 64 { + return false + } + for _, c := range s { + if !((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f')) { + return false + } + } + return true +} + +// Store is a content-addressable blob store rooted at a directory on disk. +type Store struct { + root string +} + +// New creates a Store rooted at root. It ensures the blobs/ subdirectory +// exists, creating it (and any parents) if needed. +func New(root string) (*Store, error) { + blobsDir := filepath.Join(root, "blobs") + if err := os.MkdirAll(blobsDir, 0o755); err != nil { + return nil, fmt.Errorf("store: create blobs directory: %w", err) + } + return &Store{root: root}, nil +} + +// Write computes the SHA-256 hash of data, writes the blob to disk, and +// returns the hex-encoded hash. If a blob with the same hash already exists, +// this is a no-op (deduplication). Writes are atomic: data is written to a +// temporary file first, then renamed into place. +func (s *Store) Write(data []byte) (string, error) { + sum := sha256.Sum256(data) + hash := hex.EncodeToString(sum[:]) + + p := s.blobPath(hash) + + // Deduplication: if the blob already exists, nothing to do. + if _, err := os.Stat(p); err == nil { + return hash, nil + } + + // Ensure the parent directory exists. + dir := filepath.Dir(p) + if err := os.MkdirAll(dir, 0o755); err != nil { + return "", fmt.Errorf("store: create blob directory: %w", err) + } + + // Write to a temp file in the same directory, then rename for atomicity. + tmp, err := os.CreateTemp(dir, ".blob-*") + if err != nil { + return "", fmt.Errorf("store: create temp file: %w", err) + } + tmpName := tmp.Name() + + if _, err := tmp.Write(data); err != nil { + tmp.Close() + os.Remove(tmpName) + return "", fmt.Errorf("store: write temp file: %w", err) + } + if err := tmp.Close(); err != nil { + os.Remove(tmpName) + return "", fmt.Errorf("store: close temp file: %w", err) + } + + if err := os.Rename(tmpName, p); err != nil { + os.Remove(tmpName) + return "", fmt.Errorf("store: rename blob into place: %w", err) + } + + return hash, nil +} + +// Read returns the blob contents for the given hash. It returns an error if +// the hash is malformed or the blob does not exist. +func (s *Store) Read(hash string) ([]byte, error) { + if !validHash(hash) { + return nil, fmt.Errorf("store: invalid hash %q", hash) + } + + data, err := os.ReadFile(s.blobPath(hash)) + if err != nil { + return nil, fmt.Errorf("store: read blob %s: %w", hash, err) + } + return data, nil +} + +// Exists reports whether a blob with the given hash is present in the store. +// It returns false for malformed hashes. +func (s *Store) Exists(hash string) bool { + if !validHash(hash) { + return false + } + _, err := os.Stat(s.blobPath(hash)) + return err == nil +} + +// Delete removes the blob file for the given hash. It returns an error if the +// hash is malformed or the blob does not exist. +func (s *Store) Delete(hash string) error { + if !validHash(hash) { + return fmt.Errorf("store: invalid hash %q", hash) + } + + if err := os.Remove(s.blobPath(hash)); err != nil { + if errors.Is(err, os.ErrNotExist) { + return fmt.Errorf("store: blob %s does not exist: %w", hash, err) + } + return fmt.Errorf("store: delete blob %s: %w", hash, err) + } + return nil +} + +// blobPath returns the filesystem path for a blob with the given hash. +// Layout: blobs/// +func (s *Store) blobPath(hash string) string { + return filepath.Join(s.root, "blobs", hash[:2], hash[2:4], hash) +} diff --git a/store/store_test.go b/store/store_test.go new file mode 100644 index 0000000..99e355c --- /dev/null +++ b/store/store_test.go @@ -0,0 +1,238 @@ +package store + +import ( + "crypto/sha256" + "encoding/hex" + "os" + "path/filepath" + "testing" +) + +func TestWriteAndReadRoundTrip(t *testing.T) { + s, err := New(t.TempDir()) + if err != nil { + t.Fatalf("New: %v", err) + } + + data := []byte("hello, sgard") + hash, err := s.Write(data) + if err != nil { + t.Fatalf("Write: %v", err) + } + + got, err := s.Read(hash) + if err != nil { + t.Fatalf("Read: %v", err) + } + + if string(got) != string(data) { + t.Errorf("Read returned %q, want %q", got, data) + } +} + +func TestHashCorrectness(t *testing.T) { + s, err := New(t.TempDir()) + if err != nil { + t.Fatalf("New: %v", err) + } + + data := []byte("known test data") + sum := sha256.Sum256(data) + want := hex.EncodeToString(sum[:]) + + got, err := s.Write(data) + if err != nil { + t.Fatalf("Write: %v", err) + } + + if got != want { + t.Errorf("Write returned hash %q, want %q", got, want) + } +} + +func TestDeduplication(t *testing.T) { + root := t.TempDir() + s, err := New(root) + if err != nil { + t.Fatalf("New: %v", err) + } + + data := []byte("duplicate me") + hash1, err := s.Write(data) + if err != nil { + t.Fatalf("first Write: %v", err) + } + + hash2, err := s.Write(data) + if err != nil { + t.Fatalf("second Write: %v", err) + } + + if hash1 != hash2 { + t.Errorf("hashes differ: %q vs %q", hash1, hash2) + } + + // Verify only one blob file exists on disk at the expected path. + p := s.blobPath(hash1) + info, err := os.Stat(p) + if err != nil { + t.Fatalf("Stat blob: %v", err) + } + if info.IsDir() { + t.Fatal("blob path is a directory, not a file") + } + + // Count files in the leaf directory — should be exactly one. + dir := filepath.Dir(p) + entries, err := os.ReadDir(dir) + if err != nil { + t.Fatalf("ReadDir: %v", err) + } + + var count int + for _, e := range entries { + if !e.IsDir() { + count++ + } + } + if count != 1 { + t.Errorf("expected 1 blob file in %s, found %d", dir, count) + } +} + +func TestExists(t *testing.T) { + s, err := New(t.TempDir()) + if err != nil { + t.Fatalf("New: %v", err) + } + + data := []byte("existence check") + hash, err := s.Write(data) + if err != nil { + t.Fatalf("Write: %v", err) + } + + if !s.Exists(hash) { + t.Error("Exists returned false for written blob") + } + + fake := "0000000000000000000000000000000000000000000000000000000000000000" + if s.Exists(fake) { + t.Error("Exists returned true for nonexistent hash") + } +} + +func TestExistsInvalidHash(t *testing.T) { + s, err := New(t.TempDir()) + if err != nil { + t.Fatalf("New: %v", err) + } + + if s.Exists("not-a-valid-hash") { + t.Error("Exists returned true for invalid hash") + } +} + +func TestReadNonexistent(t *testing.T) { + s, err := New(t.TempDir()) + if err != nil { + t.Fatalf("New: %v", err) + } + + fake := "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + _, err = s.Read(fake) + if err == nil { + t.Error("Read of nonexistent blob should return an error") + } +} + +func TestReadInvalidHash(t *testing.T) { + s, err := New(t.TempDir()) + if err != nil { + t.Fatalf("New: %v", err) + } + + _, err = s.Read("bad") + if err == nil { + t.Error("Read with invalid hash should return an error") + } +} + +func TestDelete(t *testing.T) { + s, err := New(t.TempDir()) + if err != nil { + t.Fatalf("New: %v", err) + } + + data := []byte("delete me") + hash, err := s.Write(data) + if err != nil { + t.Fatalf("Write: %v", err) + } + + if err := s.Delete(hash); err != nil { + t.Fatalf("Delete: %v", err) + } + + if s.Exists(hash) { + t.Error("Exists returned true after Delete") + } + + if _, err := s.Read(hash); err == nil { + t.Error("Read succeeded after Delete, expected error") + } +} + +func TestDeleteNonexistent(t *testing.T) { + s, err := New(t.TempDir()) + if err != nil { + t.Fatalf("New: %v", err) + } + + fake := "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb" + if err := s.Delete(fake); err == nil { + t.Error("Delete of nonexistent blob should return an error") + } +} + +func TestDeleteInvalidHash(t *testing.T) { + s, err := New(t.TempDir()) + if err != nil { + t.Fatalf("New: %v", err) + } + + if err := s.Delete("xyz"); err == nil { + t.Error("Delete with invalid hash should return an error") + } +} + +func TestWriteCreatesSubdirectories(t *testing.T) { + root := t.TempDir() + s, err := New(root) + if err != nil { + t.Fatalf("New: %v", err) + } + + data := []byte("subdir test") + hash, err := s.Write(data) + if err != nil { + t.Fatalf("Write: %v", err) + } + + // Verify the two-level directory structure exists. + level1 := filepath.Join(root, "blobs", hash[:2]) + level2 := filepath.Join(level1, hash[2:4]) + + if info, err := os.Stat(level1); err != nil || !info.IsDir() { + t.Errorf("expected directory at %s", level1) + } + if info, err := os.Stat(level2); err != nil || !info.IsDir() { + t.Errorf("expected directory at %s", level2) + } + + // And the blob file itself exists in level2. + blobFile := filepath.Join(level2, hash) + if _, err := os.Stat(blobFile); err != nil { + t.Errorf("expected blob file at %s: %v", blobFile, err) + } +}