Add Phase 2 artifact repository: types, blob store, gRPC service

Build the complete artifact pillar with five packages:
- artifacts: Artifact, Snapshot, Citation, Publisher types with Get/Store
  DB methods, tag/category management, metadata ops, YAML import
- blob: content-addressable store (SHA256, hierarchical dir layout)
- proto: protobuf definitions (common.proto, artifacts.proto) with buf
  linting and code generation
- server: gRPC ArtifactService implementation (create/get artifacts,
  store/retrieve blobs, manage tags/categories, search by tag)

All FK insertion ordering is correct (parent rows before children).
Full test coverage across artifacts, blob, and server packages.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-03-21 09:56:34 -07:00
parent bb2c7f7ef3
commit b64177baa8
22 changed files with 5017 additions and 1 deletions

80
blob/blob.go Normal file
View File

@@ -0,0 +1,80 @@
// Package blob implements a content-addressable store for artifact content.
// Files are addressed by their SHA256 hash and stored in a hierarchical
// directory layout for filesystem friendliness.
package blob
import (
"crypto/sha256"
"errors"
"fmt"
"os"
"path/filepath"
)
// Store manages a content-addressable blob store on the local filesystem.
type Store struct {
basePath string
}
// NewStore creates a Store rooted at the given base path.
func NewStore(basePath string) *Store {
return &Store{basePath: basePath}
}
// Write computes the SHA256 hash of data, writes it to the store, and returns
// the hex-encoded hash (which is the blob ID).
func (s *Store) Write(data []byte) (string, error) {
hash := sha256.Sum256(data)
id := fmt.Sprintf("%x", hash[:])
p := s.path(id)
dir := filepath.Dir(p)
if err := os.MkdirAll(dir, 0o750); err != nil {
return "", fmt.Errorf("blob: failed to create directory %q: %w", dir, err)
}
if err := os.WriteFile(p, data, 0o600); err != nil {
return "", fmt.Errorf("blob: failed to write blob %q: %w", id, err)
}
return id, nil
}
// Read returns the content of the blob with the given ID.
func (s *Store) Read(id string) ([]byte, error) {
data, err := os.ReadFile(s.path(id))
if err != nil {
return nil, fmt.Errorf("blob: failed to read blob %q: %w", id, err)
}
return data, nil
}
// Exists returns true if a blob with the given ID exists in the store.
func (s *Store) Exists(id string) bool {
_, err := os.Stat(s.path(id))
return !errors.Is(err, os.ErrNotExist)
}
// Path returns the full filesystem path for a blob ID.
func (s *Store) Path(id string) string {
return s.path(id)
}
// HashData returns the SHA256 hex digest of data without writing it.
func HashData(data []byte) string {
hash := sha256.Sum256(data)
return fmt.Sprintf("%x", hash[:])
}
// path computes the filesystem path for a blob ID. The hex hash is split
// into 4-character segments as nested directories.
// Example: "a1b2c3d4..." -> basePath/a1b2/c3d4/.../a1b2c3d4...
func (s *Store) path(id string) string {
parts := []string{s.basePath}
for i := 0; i+4 <= len(id); i += 4 {
parts = append(parts, id[i:i+4])
}
parts = append(parts, id)
return filepath.Join(parts...)
}

145
blob/blob_test.go Normal file
View File

@@ -0,0 +1,145 @@
package blob
import (
"bytes"
"os"
"strings"
"testing"
)
func testStore(t *testing.T) *Store {
t.Helper()
return NewStore(t.TempDir())
}
func TestWriteAndRead(t *testing.T) {
s := testStore(t)
data := []byte("hello, exocortex")
id, err := s.Write(data)
if err != nil {
t.Fatalf("Write failed: %v", err)
}
if id == "" {
t.Fatal("Write returned empty ID")
}
got, err := s.Read(id)
if err != nil {
t.Fatalf("Read failed: %v", err)
}
if !bytes.Equal(got, data) {
t.Fatalf("data mismatch: got %q, want %q", got, data)
}
}
func TestWriteDeterministic(t *testing.T) {
s := testStore(t)
data := []byte("deterministic content")
id1, err := s.Write(data)
if err != nil {
t.Fatalf("first Write failed: %v", err)
}
id2, err := s.Write(data)
if err != nil {
t.Fatalf("second Write failed: %v", err)
}
if id1 != id2 {
t.Fatalf("same content should produce same ID: %q vs %q", id1, id2)
}
}
func TestExists(t *testing.T) {
s := testStore(t)
data := []byte("existence check")
if s.Exists("nonexistent") {
t.Fatal("Exists should return false for missing blob")
}
id, err := s.Write(data)
if err != nil {
t.Fatalf("Write failed: %v", err)
}
if !s.Exists(id) {
t.Fatal("Exists should return true after write")
}
}
func TestReadMissing(t *testing.T) {
s := testStore(t)
_, err := s.Read("0000000000000000000000000000000000000000000000000000000000000000")
if err == nil {
t.Fatal("Read of missing blob should return error")
}
}
func TestPathLayout(t *testing.T) {
s := NewStore("/base")
// A 64-char hex SHA256 hash split into 4-char segments.
id := "a1b2c3d4e5f67890a1b2c3d4e5f67890a1b2c3d4e5f67890a1b2c3d4e5f67890"
p := s.Path(id)
// Should contain the 4-char directory segments.
if !strings.Contains(p, "a1b2") {
t.Fatalf("path should contain 4-char segments: %q", p)
}
if !strings.HasSuffix(p, id) {
t.Fatalf("path should end with the full hash: %q", p)
}
}
func TestHashData(t *testing.T) {
data := []byte("hash me")
h := HashData(data)
if len(h) != 64 {
t.Fatalf("expected 64-char hex hash, got %d chars", len(h))
}
// Same data should produce same hash.
h2 := HashData(data)
if h != h2 {
t.Fatal("HashData is not deterministic")
}
}
func TestWriteLargeBlob(t *testing.T) {
s := testStore(t)
data := make([]byte, 1<<20) // 1 MiB
for i := range data {
data[i] = byte(i % 256)
}
id, err := s.Write(data)
if err != nil {
t.Fatalf("Write failed: %v", err)
}
got, err := s.Read(id)
if err != nil {
t.Fatalf("Read failed: %v", err)
}
if !bytes.Equal(got, data) {
t.Fatal("large blob round-trip failed")
}
}
func TestWriteCreatesDirectories(t *testing.T) {
s := testStore(t)
data := []byte("directory creation test")
id, err := s.Write(data)
if err != nil {
t.Fatalf("Write failed: %v", err)
}
p := s.Path(id)
if _, err := os.Stat(p); err != nil {
t.Fatalf("blob file should exist at %q: %v", p, err)
}
}