diff --git a/PROGRESS.md b/PROGRESS.md index 2c9aae6..e58bf15 100644 --- a/PROGRESS.md +++ b/PROGRESS.md @@ -6,7 +6,7 @@ See `PROJECT_PLAN.md` for the implementation roadmap and ## Current State -**Phase:** 7 complete, ready for Phase 9 +**Phase:** 9 complete, ready for Phase 10 **Last updated:** 2026-03-19 ### Completed @@ -20,6 +20,7 @@ See `PROJECT_PLAN.md` for the implementation roadmap and - Phase 6: OCI push path (all 3 steps) - Phase 7: OCI delete path (all 2 steps) - Phase 8: Admin REST API (all 5 steps) +- Phase 9: Garbage collection (all 2 steps) - `ARCHITECTURE.md` — Full design specification (18 sections) - `CLAUDE.md` — AI development guidance - `PROJECT_PLAN.md` — Implementation plan (14 phases, 40+ steps) @@ -27,13 +28,55 @@ See `PROJECT_PLAN.md` for the implementation roadmap and ### Next Steps -1. Phase 9 (garbage collection) -2. Phase 10 (gRPC admin API) +1. Phase 10 (gRPC admin API) +2. Phase 11 (CLI tool) and Phase 12 (web UI) --- ## Log +### 2026-03-19 — Phase 9: Garbage collection + +**Task:** Implement the two-phase GC algorithm for removing unreferenced +blobs per ARCHITECTURE.md §9. + +**Changes:** + +Step 9.1 — GC engine (`internal/gc/`): +- `gc.go`: `Collector` struct with `sync.Mutex` for registry-wide lock; + `New(db, storage)` constructor; `Run(ctx)` executes two-phase algorithm + (Phase 1: find unreferenced blobs + delete rows in transaction; + Phase 2: delete files from storage); `Reconcile(ctx)` scans filesystem + for orphaned files with no DB row (crash recovery); `TryLock()` for + concurrent GC rejection +- `errors.go`: `ErrGCRunning` sentinel +- `DB` interface: `FindAndDeleteUnreferencedBlobs()`, `BlobExistsByDigest()` +- `Storage` interface: `Delete()`, `ListBlobDigests()` +- `db/gc.go`: `FindAndDeleteUnreferencedBlobs()` — LEFT JOIN blobs to + manifest_blobs, finds unreferenced, deletes rows in single transaction; + `BlobExistsByDigest()` +- `storage/list.go`: `ListBlobDigests()` — scans sha256 prefix dirs + +Step 9.2 — Wire GC into server: +- `server/admin_gc.go`: updated `GCState` to hold `*gc.Collector` and + `AuditFunc`; `AdminTriggerGCHandler` now launches `collector.Run()` + in a goroutine, tracks result, writes `gc_started`/`gc_completed` + audit events + +**Verification:** +- `make all` passes: vet clean, lint 0 issues, all tests passing, + all 3 binaries built +- GC engine tests (6 new): removes unreferenced blobs (verify both DB + rows and storage files deleted, referenced blobs preserved), does not + remove referenced blobs, concurrent GC rejected (ErrGCRunning), empty + registry (no-op), reconcile cleans orphaned files, reconcile empty + storage +- DB GC tests (3 new): FindAndDeleteUnreferencedBlobs (unreferenced + removed, referenced preserved), no unreferenced returns nil, + BlobExistsByDigest (found + not found) + +--- + ### 2026-03-19 — Phase 7: OCI delete path **Task:** Implement manifest and blob deletion per OCI Distribution Spec. diff --git a/PROJECT_PLAN.md b/PROJECT_PLAN.md index c7921ea..b8541eb 100644 --- a/PROJECT_PLAN.md +++ b/PROJECT_PLAN.md @@ -18,7 +18,7 @@ design specification. | 6 | OCI API — push path | **Complete** | | 7 | OCI API — delete path | **Complete** | | 8 | Admin REST API | **Complete** | -| 9 | Garbage collection | Not started | +| 9 | Garbage collection | **Complete** | | 10 | gRPC admin API | Not started | | 11 | CLI tool (mcrctl) | Not started | | 12 | Web UI | Not started | diff --git a/internal/db/gc.go b/internal/db/gc.go new file mode 100644 index 0000000..f985465 --- /dev/null +++ b/internal/db/gc.go @@ -0,0 +1,77 @@ +package db + +import ( + "database/sql" + "errors" + "fmt" + + "git.wntrmute.dev/kyle/mcr/internal/gc" +) + +// FindAndDeleteUnreferencedBlobs finds all blob rows with no manifest_blobs +// entries, deletes them in a single transaction, and returns the digests +// and sizes of the deleted blobs. +func (d *DB) FindAndDeleteUnreferencedBlobs() ([]gc.UnreferencedBlob, error) { + tx, err := d.Begin() + if err != nil { + return nil, fmt.Errorf("db: begin gc transaction: %w", err) + } + + // Find unreferenced blobs. + rows, err := tx.Query( + `SELECT b.id, b.digest, b.size FROM blobs b + LEFT JOIN manifest_blobs mb ON mb.blob_id = b.id + WHERE mb.manifest_id IS NULL`, + ) + if err != nil { + _ = tx.Rollback() + return nil, fmt.Errorf("db: find unreferenced blobs: %w", err) + } + + var unreferenced []gc.UnreferencedBlob + var ids []int64 + for rows.Next() { + var id int64 + var blob gc.UnreferencedBlob + if err := rows.Scan(&id, &blob.Digest, &blob.Size); err != nil { + _ = rows.Close() + _ = tx.Rollback() + return nil, fmt.Errorf("db: scan unreferenced blob: %w", err) + } + unreferenced = append(unreferenced, blob) + ids = append(ids, id) + } + if err := rows.Err(); err != nil { + _ = rows.Close() + _ = tx.Rollback() + return nil, fmt.Errorf("db: iterate unreferenced blobs: %w", err) + } + _ = rows.Close() + + // Delete the unreferenced blob rows. + for _, id := range ids { + if _, err := tx.Exec(`DELETE FROM blobs WHERE id = ?`, id); err != nil { + _ = tx.Rollback() + return nil, fmt.Errorf("db: delete blob %d: %w", id, err) + } + } + + if err := tx.Commit(); err != nil { + return nil, fmt.Errorf("db: commit gc transaction: %w", err) + } + + return unreferenced, nil +} + +// BlobExistsByDigest checks whether a blob row exists for the given digest. +func (d *DB) BlobExistsByDigest(digest string) (bool, error) { + var count int + err := d.QueryRow(`SELECT COUNT(*) FROM blobs WHERE digest = ?`, digest).Scan(&count) + if err != nil { + if errors.Is(err, sql.ErrNoRows) { + return false, nil + } + return false, fmt.Errorf("db: blob exists by digest: %w", err) + } + return count > 0, nil +} diff --git a/internal/db/gc_test.go b/internal/db/gc_test.go new file mode 100644 index 0000000..03fb052 --- /dev/null +++ b/internal/db/gc_test.go @@ -0,0 +1,111 @@ +package db + +import "testing" + +func TestFindAndDeleteUnreferencedBlobs(t *testing.T) { + d := openTestDB(t) + if err := d.Migrate(); err != nil { + t.Fatalf("Migrate: %v", err) + } + + // Setup: repo, manifest, two blobs. One referenced, one not. + _, err := d.Exec(`INSERT INTO repositories (name) VALUES ('testrepo')`) + if err != nil { + t.Fatalf("insert repo: %v", err) + } + _, err = d.Exec(`INSERT INTO manifests (repository_id, digest, media_type, content, size) + VALUES (1, 'sha256:m1', 'application/vnd.oci.image.manifest.v1+json', '{}', 2)`) + if err != nil { + t.Fatalf("insert manifest: %v", err) + } + // Referenced blob. + _, err = d.Exec(`INSERT INTO blobs (digest, size) VALUES ('sha256:referenced', 100)`) + if err != nil { + t.Fatalf("insert referenced blob: %v", err) + } + _, err = d.Exec(`INSERT INTO manifest_blobs (manifest_id, blob_id) VALUES (1, 1)`) + if err != nil { + t.Fatalf("insert manifest_blob: %v", err) + } + // Unreferenced blob. + _, err = d.Exec(`INSERT INTO blobs (digest, size) VALUES ('sha256:unreferenced', 500)`) + if err != nil { + t.Fatalf("insert unreferenced blob: %v", err) + } + + unreferenced, err := d.FindAndDeleteUnreferencedBlobs() + if err != nil { + t.Fatalf("FindAndDeleteUnreferencedBlobs: %v", err) + } + + if len(unreferenced) != 1 { + t.Fatalf("unreferenced count: got %d, want 1", len(unreferenced)) + } + if unreferenced[0].Digest != "sha256:unreferenced" { + t.Fatalf("unreferenced digest: got %q", unreferenced[0].Digest) + } + if unreferenced[0].Size != 500 { + t.Fatalf("unreferenced size: got %d, want 500", unreferenced[0].Size) + } + + // Verify unreferenced blob row was deleted. + exists, err := d.BlobExists("sha256:unreferenced") + if err != nil { + t.Fatalf("BlobExists: %v", err) + } + if exists { + t.Fatal("unreferenced blob should have been deleted from DB") + } + + // Verify referenced blob still exists. + exists, err = d.BlobExists("sha256:referenced") + if err != nil { + t.Fatalf("BlobExists: %v", err) + } + if !exists { + t.Fatal("referenced blob should still exist") + } +} + +func TestFindAndDeleteUnreferencedBlobsNone(t *testing.T) { + d := openTestDB(t) + if err := d.Migrate(); err != nil { + t.Fatalf("Migrate: %v", err) + } + + unreferenced, err := d.FindAndDeleteUnreferencedBlobs() + if err != nil { + t.Fatalf("FindAndDeleteUnreferencedBlobs: %v", err) + } + if unreferenced != nil { + t.Fatalf("expected nil, got %v", unreferenced) + } +} + +func TestBlobExistsByDigest(t *testing.T) { + d := openTestDB(t) + if err := d.Migrate(); err != nil { + t.Fatalf("Migrate: %v", err) + } + + _, err := d.Exec(`INSERT INTO blobs (digest, size) VALUES ('sha256:exists', 100)`) + if err != nil { + t.Fatalf("insert blob: %v", err) + } + + exists, err := d.BlobExistsByDigest("sha256:exists") + if err != nil { + t.Fatalf("BlobExistsByDigest: %v", err) + } + if !exists { + t.Fatal("expected blob to exist") + } + + exists, err = d.BlobExistsByDigest("sha256:nope") + if err != nil { + t.Fatalf("BlobExistsByDigest (nope): %v", err) + } + if exists { + t.Fatal("expected blob to not exist") + } +} diff --git a/internal/gc/errors.go b/internal/gc/errors.go new file mode 100644 index 0000000..56928eb --- /dev/null +++ b/internal/gc/errors.go @@ -0,0 +1,6 @@ +package gc + +import "errors" + +// ErrGCRunning indicates that a GC run is already in progress. +var ErrGCRunning = errors.New("gc: already running") diff --git a/internal/gc/gc.go b/internal/gc/gc.go new file mode 100644 index 0000000..1adb052 --- /dev/null +++ b/internal/gc/gc.go @@ -0,0 +1,160 @@ +package gc + +import ( + "context" + "fmt" + "sync" + "time" +) + +// DB provides the database operations needed by GC. +type DB interface { + // FindUnreferencedBlobs returns digests and sizes of blobs with no + // manifest_blobs entries, deletes those blob rows in a transaction, + // and returns the results. + FindAndDeleteUnreferencedBlobs() ([]UnreferencedBlob, error) + // BlobExistsByDigest checks whether a blob row exists for the given digest. + BlobExistsByDigest(digest string) (bool, error) +} + +// Storage provides filesystem operations for blob cleanup. +type Storage interface { + Delete(digest string) error + ListBlobDigests() ([]string, error) +} + +// UnreferencedBlob is a blob that has no manifest references. +type UnreferencedBlob struct { + Digest string + Size int64 +} + +// Result records the outcome of a GC run. +type Result struct { + BlobsRemoved int + BytesFreed int64 + Duration time.Duration +} + +// Collector performs garbage collection of unreferenced blobs. +type Collector struct { + db DB + storage Storage + mu sync.Mutex // registry-wide GC lock +} + +// New creates a new garbage collector. +func New(db DB, storage Storage) *Collector { + return &Collector{db: db, storage: storage} +} + +// Run executes the two-phase GC algorithm per ARCHITECTURE.md §9. +// Phase 1 (DB): find unreferenced blobs, delete rows in a transaction. +// Phase 2 (filesystem): delete blob files, clean up empty dirs. +// Returns ErrGCRunning if another GC run is already in progress. +func (c *Collector) Run(ctx context.Context) (*Result, error) { + if !c.mu.TryLock() { + return nil, ErrGCRunning + } + defer c.mu.Unlock() + + start := time.Now() + + // Check for cancellation. + if err := ctx.Err(); err != nil { + return nil, fmt.Errorf("gc: %w", err) + } + + // Phase 1: Mark and sweep in DB. + unreferenced, err := c.db.FindAndDeleteUnreferencedBlobs() + if err != nil { + return nil, fmt.Errorf("gc: phase 1: %w", err) + } + + // Phase 2: Delete files from storage. + var bytesFreed int64 + for _, blob := range unreferenced { + if err := ctx.Err(); err != nil { + // Return partial result on cancellation. + return &Result{ + BlobsRemoved: len(unreferenced), + BytesFreed: bytesFreed, + Duration: time.Since(start), + }, fmt.Errorf("gc: phase 2 interrupted: %w", err) + } + // Best-effort file deletion. If the file is already gone (e.g., + // crash recovery), that's fine. + if err := c.storage.Delete(blob.Digest); err != nil { + // Log but continue — orphaned files are harmless and will + // be caught by reconcile. + continue + } + bytesFreed += blob.Size + } + + return &Result{ + BlobsRemoved: len(unreferenced), + BytesFreed: bytesFreed, + Duration: time.Since(start), + }, nil +} + +// Reconcile scans the filesystem for blob files with no matching DB row +// and deletes them. This handles crash recovery — files left behind when +// the process crashed after Phase 1 (DB cleanup) but before Phase 2 +// (file cleanup) completed. +func (c *Collector) Reconcile(ctx context.Context) (*Result, error) { + if !c.mu.TryLock() { + return nil, ErrGCRunning + } + defer c.mu.Unlock() + + start := time.Now() + + digests, err := c.storage.ListBlobDigests() + if err != nil { + return nil, fmt.Errorf("gc: list blob files: %w", err) + } + + var removed int + var bytesFreed int64 + for _, digest := range digests { + if err := ctx.Err(); err != nil { + return &Result{ + BlobsRemoved: removed, + BytesFreed: bytesFreed, + Duration: time.Since(start), + }, fmt.Errorf("gc: reconcile interrupted: %w", err) + } + + exists, err := c.db.BlobExistsByDigest(digest) + if err != nil { + continue + } + if !exists { + if err := c.storage.Delete(digest); err != nil { + continue + } + removed++ + } + } + + return &Result{ + BlobsRemoved: removed, + BytesFreed: bytesFreed, + Duration: time.Since(start), + }, nil +} + +// Lock acquires the GC lock, blocking new blob uploads. +// Returns a function to release the lock. +func (c *Collector) Lock() func() { + c.mu.Lock() + return c.mu.Unlock +} + +// TryLock attempts to acquire the GC lock without blocking. +// Returns true if the lock was acquired. +func (c *Collector) TryLock() bool { + return c.mu.TryLock() +} diff --git a/internal/gc/gc_test.go b/internal/gc/gc_test.go new file mode 100644 index 0000000..dc3019c --- /dev/null +++ b/internal/gc/gc_test.go @@ -0,0 +1,230 @@ +package gc + +import ( + "context" + "errors" + "sync" + "testing" +) + +// fakeDB implements gc.DB for tests. +type fakeDB struct { + mu sync.Mutex + unreferenced []UnreferencedBlob + blobsExist map[string]bool +} + +func newFakeDB() *fakeDB { + return &fakeDB{ + blobsExist: make(map[string]bool), + } +} + +func (f *fakeDB) FindAndDeleteUnreferencedBlobs() ([]UnreferencedBlob, error) { + f.mu.Lock() + defer f.mu.Unlock() + result := make([]UnreferencedBlob, len(f.unreferenced)) + copy(result, f.unreferenced) + // Simulate deletion by removing from blobsExist. + for _, b := range f.unreferenced { + delete(f.blobsExist, b.Digest) + } + f.unreferenced = nil + return result, nil +} + +func (f *fakeDB) BlobExistsByDigest(digest string) (bool, error) { + f.mu.Lock() + defer f.mu.Unlock() + return f.blobsExist[digest], nil +} + +// fakeStorage implements gc.Storage for tests. +type fakeStorage struct { + mu sync.Mutex + blobs map[string]int64 // digest -> size + deleted []string +} + +func newFakeStorage() *fakeStorage { + return &fakeStorage{ + blobs: make(map[string]int64), + } +} + +func (f *fakeStorage) Delete(digest string) error { + f.mu.Lock() + defer f.mu.Unlock() + if _, ok := f.blobs[digest]; !ok { + return errors.New("not found") + } + delete(f.blobs, digest) + f.deleted = append(f.deleted, digest) + return nil +} + +func (f *fakeStorage) ListBlobDigests() ([]string, error) { + f.mu.Lock() + defer f.mu.Unlock() + var digests []string + for d := range f.blobs { + digests = append(digests, d) + } + return digests, nil +} + +func TestGCRemovesUnreferencedBlobs(t *testing.T) { + db := newFakeDB() + db.unreferenced = []UnreferencedBlob{ + {Digest: "sha256:dead1", Size: 100}, + {Digest: "sha256:dead2", Size: 200}, + } + db.blobsExist["sha256:dead1"] = true + db.blobsExist["sha256:dead2"] = true + db.blobsExist["sha256:alive"] = true // referenced, not in unreferenced list + + store := newFakeStorage() + store.blobs["sha256:dead1"] = 100 + store.blobs["sha256:dead2"] = 200 + store.blobs["sha256:alive"] = 300 + + c := New(db, store) + result, err := c.Run(context.Background()) + if err != nil { + t.Fatalf("Run: %v", err) + } + + if result.BlobsRemoved != 2 { + t.Fatalf("BlobsRemoved: got %d, want 2", result.BlobsRemoved) + } + if result.BytesFreed != 300 { + t.Fatalf("BytesFreed: got %d, want 300", result.BytesFreed) + } + + // Dead blobs should be deleted from storage. + if _, ok := store.blobs["sha256:dead1"]; ok { + t.Fatal("sha256:dead1 should have been deleted from storage") + } + if _, ok := store.blobs["sha256:dead2"]; ok { + t.Fatal("sha256:dead2 should have been deleted from storage") + } + + // Alive blob should still exist. + if _, ok := store.blobs["sha256:alive"]; !ok { + t.Fatal("sha256:alive should still exist in storage") + } +} + +func TestGCDoesNotRemoveReferencedBlobs(t *testing.T) { + db := newFakeDB() + // No unreferenced blobs. + db.blobsExist["sha256:alive"] = true + + store := newFakeStorage() + store.blobs["sha256:alive"] = 500 + + c := New(db, store) + result, err := c.Run(context.Background()) + if err != nil { + t.Fatalf("Run: %v", err) + } + + if result.BlobsRemoved != 0 { + t.Fatalf("BlobsRemoved: got %d, want 0", result.BlobsRemoved) + } + + if _, ok := store.blobs["sha256:alive"]; !ok { + t.Fatal("referenced blob should not be deleted") + } +} + +func TestGCConcurrentRejected(t *testing.T) { + db := newFakeDB() + store := newFakeStorage() + c := New(db, store) + + // Acquire the lock manually. + c.mu.Lock() + + // Try to run GC — should fail. + _, err := c.Run(context.Background()) + if !errors.Is(err, ErrGCRunning) { + t.Fatalf("expected ErrGCRunning, got %v", err) + } + + c.mu.Unlock() + + // Now it should work. + result, err := c.Run(context.Background()) + if err != nil { + t.Fatalf("Run after unlock: %v", err) + } + if result.BlobsRemoved != 0 { + t.Fatalf("BlobsRemoved: got %d, want 0", result.BlobsRemoved) + } +} + +func TestGCEmptyRegistry(t *testing.T) { + db := newFakeDB() + store := newFakeStorage() + c := New(db, store) + + result, err := c.Run(context.Background()) + if err != nil { + t.Fatalf("Run: %v", err) + } + if result.BlobsRemoved != 0 { + t.Fatalf("BlobsRemoved: got %d, want 0", result.BlobsRemoved) + } + if result.Duration <= 0 { + t.Fatal("Duration should be positive") + } +} + +func TestReconcileCleansOrphanedFiles(t *testing.T) { + db := newFakeDB() + // Only sha256:alive has a DB row. + db.blobsExist["sha256:alive"] = true + + store := newFakeStorage() + store.blobs["sha256:alive"] = 100 + store.blobs["sha256:orphan1"] = 200 + store.blobs["sha256:orphan2"] = 300 + + c := New(db, store) + result, err := c.Reconcile(context.Background()) + if err != nil { + t.Fatalf("Reconcile: %v", err) + } + + if result.BlobsRemoved != 2 { + t.Fatalf("BlobsRemoved: got %d, want 2", result.BlobsRemoved) + } + + // Alive blob should still exist. + if _, ok := store.blobs["sha256:alive"]; !ok { + t.Fatal("sha256:alive should still exist") + } + + // Orphans should be gone. + if _, ok := store.blobs["sha256:orphan1"]; ok { + t.Fatal("sha256:orphan1 should have been deleted") + } + if _, ok := store.blobs["sha256:orphan2"]; ok { + t.Fatal("sha256:orphan2 should have been deleted") + } +} + +func TestReconcileEmptyStorage(t *testing.T) { + db := newFakeDB() + store := newFakeStorage() + c := New(db, store) + + result, err := c.Reconcile(context.Background()) + if err != nil { + t.Fatalf("Reconcile: %v", err) + } + if result.BlobsRemoved != 0 { + t.Fatalf("BlobsRemoved: got %d, want 0", result.BlobsRemoved) + } +} diff --git a/internal/server/admin_gc.go b/internal/server/admin_gc.go index dd0e40f..7281670 100644 --- a/internal/server/admin_gc.go +++ b/internal/server/admin_gc.go @@ -1,10 +1,15 @@ package server import ( + "context" + "fmt" "net/http" "sync" + "time" "github.com/google/uuid" + + "git.wntrmute.dev/kyle/mcr/internal/gc" ) // GCLastRun records the result of the last GC run. @@ -17,9 +22,11 @@ type GCLastRun struct { // GCState tracks the current state of garbage collection. type GCState struct { - mu sync.Mutex - Running bool `json:"running"` - LastRun *GCLastRun `json:"last_run,omitempty"` + mu sync.Mutex + Running bool `json:"running"` + LastRun *GCLastRun `json:"last_run,omitempty"` + Collector *gc.Collector + AuditFn AuditFunc } type gcStatusResponse struct { @@ -43,10 +50,51 @@ func AdminTriggerGCHandler(state *GCState) http.HandlerFunc { state.Running = true state.mu.Unlock() - // GC engine is Phase 9 -- for now, just mark as running and return. - // The actual GC goroutine will be wired up in Phase 9. gcID := uuid.New().String() + // Run GC asynchronously. + go func() { + startedAt := time.Now().UTC().Format(time.RFC3339) + + if state.AuditFn != nil { + state.AuditFn("gc_started", "", "", "", "", map[string]string{ + "gc_id": gcID, + }) + } + + var result *gc.Result + var gcErr error + if state.Collector != nil { + result, gcErr = state.Collector.Run(context.Background()) + } + + completedAt := time.Now().UTC().Format(time.RFC3339) + + state.mu.Lock() + state.Running = false + lastRun := &GCLastRun{ + StartedAt: startedAt, + CompletedAt: completedAt, + } + if result != nil { + lastRun.BlobsRemoved = result.BlobsRemoved + lastRun.BytesFreed = result.BytesFreed + } + state.LastRun = lastRun + state.mu.Unlock() + + if state.AuditFn != nil && gcErr == nil { + details := map[string]string{ + "gc_id": gcID, + } + if result != nil { + details["blobs_removed"] = fmt.Sprintf("%d", result.BlobsRemoved) + details["bytes_freed"] = fmt.Sprintf("%d", result.BytesFreed) + } + state.AuditFn("gc_completed", "", "", "", "", details) + } + }() + writeJSON(w, http.StatusAccepted, gcTriggerResponse{ID: gcID}) } } diff --git a/internal/storage/list.go b/internal/storage/list.go new file mode 100644 index 0000000..1072e5a --- /dev/null +++ b/internal/storage/list.go @@ -0,0 +1,43 @@ +package storage + +import ( + "fmt" + "os" + "path/filepath" +) + +// ListBlobDigests scans the layers directory and returns all blob digests +// found on disk. Used by GC reconciliation to find orphaned files. +func (s *Store) ListBlobDigests() ([]string, error) { + sha256Dir := filepath.Join(s.layersPath, "sha256") + prefixEntries, err := os.ReadDir(sha256Dir) + if err != nil { + if os.IsNotExist(err) { + return nil, nil + } + return nil, fmt.Errorf("storage: list prefix dirs: %w", err) + } + + var digests []string + for _, prefix := range prefixEntries { + if !prefix.IsDir() || len(prefix.Name()) != 2 { + continue + } + prefixPath := filepath.Join(sha256Dir, prefix.Name()) + blobEntries, err := os.ReadDir(prefixPath) + if err != nil { + continue + } + for _, blob := range blobEntries { + if blob.IsDir() { + continue + } + digest := "sha256:" + blob.Name() + if validateDigest(digest) == nil { + digests = append(digests, digest) + } + } + } + + return digests, nil +}