Add Phase 2 artifact repository: types, blob store, gRPC service

Build the complete artifact pillar with five packages:
- artifacts: Artifact, Snapshot, Citation, Publisher types with Get/Store
  DB methods, tag/category management, metadata ops, YAML import
- blob: content-addressable store (SHA256, hierarchical dir layout)
- proto: protobuf definitions (common.proto, artifacts.proto) with buf
  linting and code generation
- server: gRPC ArtifactService implementation (create/get artifacts,
  store/retrieve blobs, manage tags/categories, search by tag)

All FK insertion ordering is correct (parent rows before children).
Full test coverage across artifacts, blob, and server packages.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-03-21 09:56:34 -07:00
parent bb2c7f7ef3
commit b64177baa8
22 changed files with 5017 additions and 1 deletions

212
artifacts/artifact.go Normal file
View File

@@ -0,0 +1,212 @@
// Package artifacts implements the artifact repository pillar — storing,
// retrieving, and managing source documents (PDFs, papers, webpages, etc.)
// with bibliographic metadata, versioned snapshots, and content-addressable
// blob storage.
package artifacts
import (
"context"
"database/sql"
"fmt"
"time"
"git.wntrmute.dev/kyle/exo/core"
"git.wntrmute.dev/kyle/exo/db"
)
// ArtifactType enumerates the kinds of artifacts.
type ArtifactType string
const (
ArtifactTypeUnknown ArtifactType = "Unknown"
ArtifactTypeCustom ArtifactType = "Custom"
ArtifactTypeArticle ArtifactType = "Article"
ArtifactTypeBook ArtifactType = "Book"
ArtifactTypeURL ArtifactType = "URL"
ArtifactTypePaper ArtifactType = "Paper"
ArtifactTypeVideo ArtifactType = "Video"
ArtifactTypeImage ArtifactType = "Image"
)
// Artifact is the top-level container for a knowledge source.
type Artifact struct {
ID string
Type ArtifactType
Citation *Citation
Latest time.Time
History map[time.Time]string // datetime -> snapshot ID
Tags map[string]bool
Categories map[string]bool
Metadata core.Metadata
}
// Store persists an Artifact and all its associations (citation, tags,
// categories, history, metadata).
func (art *Artifact) Store(ctx context.Context, tx *sql.Tx) error {
if art.Citation == nil {
return fmt.Errorf("artifacts: artifact missing citation")
}
if err := art.Citation.Store(ctx, tx); err != nil {
return fmt.Errorf("artifacts: failed to store artifact citation: %w", err)
}
// Insert the artifact row first so FK-dependent rows can reference it.
_, err := tx.ExecContext(ctx,
`INSERT INTO artifacts (id, type, citation_id, latest) VALUES (?, ?, ?, ?)`,
art.ID, string(art.Type), art.Citation.ID, db.ToDBTime(art.Latest))
if err != nil {
return fmt.Errorf("artifacts: failed to store artifact: %w", err)
}
if err := StoreMetadata(ctx, tx, art.ID, art.Metadata); err != nil {
return fmt.Errorf("artifacts: failed to store artifact metadata: %w", err)
}
// Store history entries.
for t, id := range art.History {
_, err := tx.ExecContext(ctx,
`INSERT INTO artifacts_history (artifact_id, snapshot_id, datetime) VALUES (?, ?, ?)`,
art.ID, id, db.ToDBTime(t))
if err != nil {
return fmt.Errorf("artifacts: failed to store artifact history: %w", err)
}
}
// Resolve and link tags.
for tag := range art.Tags {
tagID, err := GetTag(ctx, tx, tag)
if err != nil {
return fmt.Errorf("artifacts: failed to resolve tag %q: %w", tag, err)
}
if tagID == "" {
return fmt.Errorf("artifacts: unknown tag %q (create it first)", tag)
}
_, err = tx.ExecContext(ctx,
`INSERT INTO artifact_tags (artifact_id, tag_id) VALUES (?, ?)`,
art.ID, tagID)
if err != nil {
return fmt.Errorf("artifacts: failed to link tag: %w", err)
}
}
// Resolve and link categories.
for cat := range art.Categories {
catID, err := GetCategory(ctx, tx, cat)
if err != nil {
return fmt.Errorf("artifacts: failed to resolve category %q: %w", cat, err)
}
if catID == "" {
return fmt.Errorf("artifacts: unknown category %q (create it first)", cat)
}
_, err = tx.ExecContext(ctx,
`INSERT INTO artifact_categories (artifact_id, category_id) VALUES (?, ?)`,
art.ID, catID)
if err != nil {
return fmt.Errorf("artifacts: failed to link category: %w", err)
}
}
return nil
}
// Get retrieves an Artifact by its ID, hydrating citation, history, tags,
// categories, and metadata.
func (art *Artifact) Get(ctx context.Context, tx *sql.Tx) error {
if art.ID == "" {
return fmt.Errorf("artifacts: artifact missing ID: %w", core.ErrNoID)
}
art.Citation = &Citation{}
var latest, artType string
row := tx.QueryRowContext(ctx,
`SELECT type, citation_id, latest FROM artifacts WHERE id=?`, art.ID)
if err := row.Scan(&artType, &art.Citation.ID, &latest); err != nil {
return fmt.Errorf("artifacts: failed to retrieve artifact: %w", err)
}
art.Type = ArtifactType(artType)
var err error
art.Latest, err = db.FromDBTime(latest, nil)
if err != nil {
return fmt.Errorf("artifacts: failed to parse artifact latest time: %w", err)
}
if err := art.Citation.Get(ctx, tx); err != nil {
return fmt.Errorf("artifacts: failed to load artifact citation: %w", err)
}
// Load history.
art.History = map[time.Time]string{}
rows, err := tx.QueryContext(ctx,
`SELECT snapshot_id, datetime FROM artifacts_history WHERE artifact_id=?`, art.ID)
if err != nil {
return fmt.Errorf("artifacts: failed to load artifact history: %w", err)
}
defer func() { _ = rows.Close() }()
for rows.Next() {
var id, datetime string
if err := rows.Scan(&id, &datetime); err != nil {
return err
}
t, err := db.FromDBTime(datetime, nil)
if err != nil {
return err
}
art.History[t] = id
}
if err := rows.Err(); err != nil {
return err
}
// Load tag IDs, then resolve.
var tagIDs []string
tagRows, err := tx.QueryContext(ctx,
`SELECT tag_id FROM artifact_tags WHERE artifact_id=?`, art.ID)
if err != nil {
return fmt.Errorf("artifacts: failed to load artifact tags: %w", err)
}
defer func() { _ = tagRows.Close() }()
for tagRows.Next() {
var tagID string
if err := tagRows.Scan(&tagID); err != nil {
return err
}
tagIDs = append(tagIDs, tagID)
}
if err := tagRows.Err(); err != nil {
return err
}
art.Tags, err = tagsFromTagIDs(ctx, tx, tagIDs)
if err != nil {
return err
}
// Load category IDs, then resolve.
var catIDs []string
catRows, err := tx.QueryContext(ctx,
`SELECT category_id FROM artifact_categories WHERE artifact_id=?`, art.ID)
if err != nil {
return fmt.Errorf("artifacts: failed to load artifact categories: %w", err)
}
defer func() { _ = catRows.Close() }()
for catRows.Next() {
var catID string
if err := catRows.Scan(&catID); err != nil {
return err
}
catIDs = append(catIDs, catID)
}
if err := catRows.Err(); err != nil {
return err
}
art.Categories, err = categoriesFromCategoryIDs(ctx, tx, catIDs)
if err != nil {
return err
}
// Load metadata.
art.Metadata, err = GetMetadata(ctx, tx, art.ID)
return err
}

503
artifacts/artifacts_test.go Normal file
View File

@@ -0,0 +1,503 @@
package artifacts
import (
"context"
"database/sql"
"path/filepath"
"testing"
"time"
"git.wntrmute.dev/kyle/exo/blob"
"git.wntrmute.dev/kyle/exo/core"
"git.wntrmute.dev/kyle/exo/db"
)
func mustOpenAndMigrate(t *testing.T) *sql.DB {
t.Helper()
path := filepath.Join(t.TempDir(), "test.db")
database, err := db.Open(path)
if err != nil {
t.Fatalf("Open failed: %v", err)
}
t.Cleanup(func() { _ = database.Close() })
if err := db.Migrate(database); err != nil {
t.Fatalf("Migrate failed: %v", err)
}
return database
}
func mustTX(t *testing.T, database *sql.DB) (*sql.Tx, context.Context) {
t.Helper()
ctx := context.Background()
tx, err := db.StartTX(ctx, database)
if err != nil {
t.Fatalf("StartTX failed: %v", err)
}
return tx, ctx
}
// --- Tag tests ---
func TestCreateAndGetTag(t *testing.T) {
database := mustOpenAndMigrate(t)
tx, ctx := mustTX(t, database)
if err := CreateTag(ctx, tx, "golang"); err != nil {
t.Fatalf("CreateTag failed: %v", err)
}
id, err := GetTag(ctx, tx, "golang")
if err != nil {
t.Fatalf("GetTag failed: %v", err)
}
if id == "" {
t.Fatal("tag should exist after creation")
}
if err := db.EndTX(tx, nil); err != nil {
t.Fatalf("EndTX failed: %v", err)
}
}
func TestCreateTagIdempotent(t *testing.T) {
database := mustOpenAndMigrate(t)
tx, ctx := mustTX(t, database)
if err := CreateTag(ctx, tx, "dup"); err != nil {
t.Fatalf("first CreateTag failed: %v", err)
}
if err := CreateTag(ctx, tx, "dup"); err != nil {
t.Fatalf("second CreateTag should be idempotent: %v", err)
}
if err := db.EndTX(tx, nil); err != nil {
t.Fatalf("EndTX failed: %v", err)
}
}
func TestGetAllTags(t *testing.T) {
database := mustOpenAndMigrate(t)
tx, ctx := mustTX(t, database)
for _, tag := range []string{"zebra", "alpha", "mid"} {
if err := CreateTag(ctx, tx, tag); err != nil {
t.Fatalf("CreateTag %q failed: %v", tag, err)
}
}
tags, err := GetAllTags(ctx, tx)
if err != nil {
t.Fatalf("GetAllTags failed: %v", err)
}
if len(tags) != 3 {
t.Fatalf("expected 3 tags, got %d", len(tags))
}
// Should be sorted.
if tags[0] != "alpha" || tags[1] != "mid" || tags[2] != "zebra" {
t.Fatalf("tags not sorted: %v", tags)
}
if err := db.EndTX(tx, nil); err != nil {
t.Fatalf("EndTX failed: %v", err)
}
}
func TestGetTagMissing(t *testing.T) {
database := mustOpenAndMigrate(t)
tx, ctx := mustTX(t, database)
id, err := GetTag(ctx, tx, "nonexistent")
if err != nil {
t.Fatalf("GetTag should not error for missing tag: %v", err)
}
if id != "" {
t.Fatalf("missing tag should return empty ID, got %q", id)
}
if err := db.EndTX(tx, nil); err != nil {
t.Fatalf("EndTX failed: %v", err)
}
}
// --- Category tests ---
func TestCreateAndGetCategory(t *testing.T) {
database := mustOpenAndMigrate(t)
tx, ctx := mustTX(t, database)
if err := CreateCategory(ctx, tx, "cs/systems"); err != nil {
t.Fatalf("CreateCategory failed: %v", err)
}
id, err := GetCategory(ctx, tx, "cs/systems")
if err != nil {
t.Fatalf("GetCategory failed: %v", err)
}
if id == "" {
t.Fatal("category should exist after creation")
}
if err := db.EndTX(tx, nil); err != nil {
t.Fatalf("EndTX failed: %v", err)
}
}
func TestGetAllCategories(t *testing.T) {
database := mustOpenAndMigrate(t)
tx, ctx := mustTX(t, database)
for _, cat := range []string{"z/last", "a/first", "m/mid"} {
if err := CreateCategory(ctx, tx, cat); err != nil {
t.Fatalf("CreateCategory %q failed: %v", cat, err)
}
}
cats, err := GetAllCategories(ctx, tx)
if err != nil {
t.Fatalf("GetAllCategories failed: %v", err)
}
if len(cats) != 3 {
t.Fatalf("expected 3 categories, got %d", len(cats))
}
if cats[0] != "a/first" {
t.Fatalf("categories not sorted: %v", cats)
}
if err := db.EndTX(tx, nil); err != nil {
t.Fatalf("EndTX failed: %v", err)
}
}
// --- Publisher tests ---
func TestPublisherStoreAndGet(t *testing.T) {
database := mustOpenAndMigrate(t)
tx, ctx := mustTX(t, database)
pub := &Publisher{Name: "MIT Press", Address: "Cambridge, MA"}
if err := pub.Store(ctx, tx); err != nil {
t.Fatalf("Publisher.Store failed: %v", err)
}
if pub.ID == "" {
t.Fatal("publisher ID should be set after store")
}
got := &Publisher{ID: pub.ID}
ok, err := got.Get(ctx, tx)
if err != nil {
t.Fatalf("Publisher.Get failed: %v", err)
}
if !ok {
t.Fatal("publisher should be found")
}
if got.Name != "MIT Press" || got.Address != "Cambridge, MA" {
t.Fatalf("publisher data mismatch: %+v", got)
}
if err := db.EndTX(tx, nil); err != nil {
t.Fatalf("EndTX failed: %v", err)
}
}
func TestPublisherStoreDedup(t *testing.T) {
database := mustOpenAndMigrate(t)
tx, ctx := mustTX(t, database)
pub1 := &Publisher{Name: "ACM", Address: "New York"}
if err := pub1.Store(ctx, tx); err != nil {
t.Fatalf("first Store failed: %v", err)
}
pub2 := &Publisher{Name: "ACM", Address: "New York"}
if err := pub2.Store(ctx, tx); err != nil {
t.Fatalf("second Store failed: %v", err)
}
if pub1.ID != pub2.ID {
t.Fatalf("duplicate publisher should reuse ID: %q vs %q", pub1.ID, pub2.ID)
}
if err := db.EndTX(tx, nil); err != nil {
t.Fatalf("EndTX failed: %v", err)
}
}
// --- Citation tests ---
func TestCitationStoreAndGet(t *testing.T) {
database := mustOpenAndMigrate(t)
tx, ctx := mustTX(t, database)
cite := &Citation{
Title: "The Art of Computer Programming",
DOI: "10.1234/test",
Year: 1968,
Published: time.Date(1968, 1, 1, 0, 0, 0, 0, time.UTC),
Authors: []string{"Donald Knuth"},
Publisher: &Publisher{Name: "Addison-Wesley", Address: "Reading, MA"},
Source: "https://example.com",
Abstract: "A comprehensive monograph on algorithms.",
Metadata: core.Metadata{"edition": core.Val("3rd")},
}
if err := cite.Store(ctx, tx); err != nil {
t.Fatalf("Citation.Store failed: %v", err)
}
if cite.ID == "" {
t.Fatal("citation ID should be set after store")
}
got := &Citation{ID: cite.ID}
if err := got.Get(ctx, tx); err != nil {
t.Fatalf("Citation.Get failed: %v", err)
}
if got.Title != cite.Title {
t.Fatalf("title mismatch: got %q, want %q", got.Title, cite.Title)
}
if got.DOI != cite.DOI {
t.Fatalf("DOI mismatch: got %q, want %q", got.DOI, cite.DOI)
}
if got.Year != cite.Year {
t.Fatalf("year mismatch: got %d, want %d", got.Year, cite.Year)
}
if len(got.Authors) != 1 || got.Authors[0] != "Donald Knuth" {
t.Fatalf("authors mismatch: %v", got.Authors)
}
if got.Publisher.Name != "Addison-Wesley" {
t.Fatalf("publisher mismatch: %+v", got.Publisher)
}
if got.Metadata["edition"].Contents != "3rd" {
t.Fatalf("metadata mismatch: %v", got.Metadata)
}
if err := db.EndTX(tx, nil); err != nil {
t.Fatalf("EndTX failed: %v", err)
}
}
// --- Metadata tests ---
func TestMetadataStoreAndGet(t *testing.T) {
database := mustOpenAndMigrate(t)
tx, ctx := mustTX(t, database)
id := core.NewUUID()
meta := core.Metadata{
"key1": core.Val("value1"),
"key2": core.Vals("value2"),
}
if err := StoreMetadata(ctx, tx, id, meta); err != nil {
t.Fatalf("StoreMetadata failed: %v", err)
}
got, err := GetMetadata(ctx, tx, id)
if err != nil {
t.Fatalf("GetMetadata failed: %v", err)
}
if len(got) != 2 {
t.Fatalf("expected 2 metadata entries, got %d", len(got))
}
if got["key1"].Contents != "value1" {
t.Fatalf("key1 mismatch: %+v", got["key1"])
}
if got["key2"].Type != core.ValueTypeString {
t.Fatalf("key2 type mismatch: %+v", got["key2"])
}
if err := db.EndTX(tx, nil); err != nil {
t.Fatalf("EndTX failed: %v", err)
}
}
// --- Full artifact round-trip ---
func TestArtifactStoreAndGet(t *testing.T) {
database := mustOpenAndMigrate(t)
tx, ctx := mustTX(t, database)
blobStore := blob.NewStore(t.TempDir())
// Create tags and categories first.
for _, tag := range []string{"algorithms", "textbook"} {
if err := CreateTag(ctx, tx, tag); err != nil {
t.Fatalf("CreateTag failed: %v", err)
}
}
if err := CreateCategory(ctx, tx, "cs/fundamentals"); err != nil {
t.Fatalf("CreateCategory failed: %v", err)
}
snapID := core.NewUUID()
artID := core.NewUUID()
now := time.Now().UTC().Truncate(time.Second)
art := &Artifact{
ID: artID,
Type: ArtifactTypeBook,
Citation: &Citation{
Title: "TAOCP",
Year: 1968,
Published: time.Date(1968, 1, 1, 0, 0, 0, 0, time.UTC),
Authors: []string{"Donald Knuth"},
Publisher: &Publisher{Name: "Addison-Wesley", Address: "Boston"},
Source: "https://example.com/taocp",
},
Latest: now,
History: map[time.Time]string{now: snapID},
Tags: map[string]bool{"algorithms": true, "textbook": true},
Categories: map[string]bool{"cs/fundamentals": true},
Metadata: core.Metadata{"volume": core.Val("1")},
}
if err := art.Store(ctx, tx); err != nil {
t.Fatalf("Artifact.Store failed: %v", err)
}
// Store a snapshot.
snap := &Snapshot{
ArtifactID: artID,
ID: snapID,
StoreDate: now,
Datetime: now,
Citation: art.Citation,
Source: "local import",
Blobs: map[MIME]*BlobRef{
"application/pdf": {
Format: "application/pdf",
Data: []byte("fake PDF content"),
},
},
Metadata: core.Metadata{},
}
if err := snap.Store(ctx, tx, blobStore); err != nil {
t.Fatalf("Snapshot.Store failed: %v", err)
}
// Retrieve and verify the artifact.
got := &Artifact{ID: artID}
if err := got.Get(ctx, tx); err != nil {
t.Fatalf("Artifact.Get failed: %v", err)
}
if got.Type != ArtifactTypeBook {
t.Fatalf("type mismatch: got %q, want %q", got.Type, ArtifactTypeBook)
}
if got.Citation.Title != "TAOCP" {
t.Fatalf("citation title mismatch: %q", got.Citation.Title)
}
if !got.Tags["algorithms"] || !got.Tags["textbook"] {
t.Fatalf("tags mismatch: %v", got.Tags)
}
if !got.Categories["cs/fundamentals"] {
t.Fatalf("categories mismatch: %v", got.Categories)
}
if len(got.History) != 1 {
t.Fatalf("expected 1 history entry, got %d", len(got.History))
}
// Retrieve and verify the snapshot.
gotSnap := &Snapshot{ID: snapID}
if err := gotSnap.Get(ctx, tx); err != nil {
t.Fatalf("Snapshot.Get failed: %v", err)
}
if gotSnap.ArtifactID != artID {
t.Fatalf("snapshot artifact ID mismatch: %q", gotSnap.ArtifactID)
}
if len(gotSnap.Blobs) != 1 {
t.Fatalf("expected 1 blob, got %d", len(gotSnap.Blobs))
}
pdfBlob, ok := gotSnap.Blobs["application/pdf"]
if !ok {
t.Fatal("missing PDF blob reference")
}
if pdfBlob.ID == "" {
t.Fatal("blob ID should be set")
}
// Verify blob content via store.
data, err := blobStore.Read(pdfBlob.ID)
if err != nil {
t.Fatalf("blob store Read failed: %v", err)
}
if string(data) != "fake PDF content" {
t.Fatalf("blob content mismatch: %q", data)
}
if err := db.EndTX(tx, nil); err != nil {
t.Fatalf("EndTX failed: %v", err)
}
}
func TestGetArtifactIDsForTag(t *testing.T) {
database := mustOpenAndMigrate(t)
tx, ctx := mustTX(t, database)
if err := CreateTag(ctx, tx, "search-tag"); err != nil {
t.Fatalf("CreateTag failed: %v", err)
}
artID := core.NewUUID()
art := &Artifact{
ID: artID,
Type: ArtifactTypeArticle,
Citation: &Citation{
Title: "Test Article",
Year: 2024,
Published: time.Date(2024, 1, 1, 0, 0, 0, 0, time.UTC),
Publisher: &Publisher{Name: "Test", Address: ""},
Source: "test",
},
Latest: time.Now().UTC().Truncate(time.Second),
History: map[time.Time]string{},
Tags: map[string]bool{"search-tag": true},
Categories: map[string]bool{},
Metadata: core.Metadata{},
}
if err := art.Store(ctx, tx); err != nil {
t.Fatalf("Artifact.Store failed: %v", err)
}
ids, err := GetArtifactIDsForTag(ctx, tx, "search-tag")
if err != nil {
t.Fatalf("GetArtifactIDsForTag failed: %v", err)
}
if len(ids) != 1 || ids[0] != artID {
t.Fatalf("expected [%s], got %v", artID, ids)
}
if err := db.EndTX(tx, nil); err != nil {
t.Fatalf("EndTX failed: %v", err)
}
}
func TestCitationUpdate(t *testing.T) {
base := &Citation{
DOI: "10.1234/base",
Title: "Base Title",
Year: 2020,
Authors: []string{"Author A"},
Metadata: core.Metadata{"key": core.Val("base-val")},
}
c := &Citation{
Title: "Override Title",
Metadata: core.Metadata{},
}
c.Update(base)
if c.DOI != "10.1234/base" {
t.Fatalf("DOI should inherit from base: %q", c.DOI)
}
if c.Title != "Override Title" {
t.Fatalf("Title should not be overridden: %q", c.Title)
}
if c.Year != 2020 {
t.Fatalf("Year should inherit from base: %d", c.Year)
}
if c.Metadata["key"].Contents != "base-val" {
t.Fatalf("Metadata should inherit from base: %v", c.Metadata)
}
}

197
artifacts/citation.go Normal file
View File

@@ -0,0 +1,197 @@
package artifacts
import (
"context"
"database/sql"
"errors"
"fmt"
"time"
"git.wntrmute.dev/kyle/exo/core"
"git.wntrmute.dev/kyle/exo/db"
)
// Citation holds bibliographic information for an artifact.
type Citation struct {
ID string
DOI string
Title string
Year int
Published time.Time
Authors []string
Publisher *Publisher
Source string
Abstract string
Metadata core.Metadata
}
// Update applies non-zero fields from base into c where c's fields are empty.
func (c *Citation) Update(base *Citation) {
if c.DOI == "" {
c.DOI = base.DOI
}
if c.Title == "" {
c.Title = base.Title
}
if c.Year == 0 {
c.Year = base.Year
}
if c.Published.IsZero() {
c.Published = base.Published
}
if len(c.Authors) == 0 {
c.Authors = base.Authors
}
if c.Publisher != nil && c.Publisher.Name == "" {
c.Publisher.Name = base.Publisher.Name
}
if c.Publisher != nil && c.Publisher.Address == "" {
c.Publisher.Address = base.Publisher.Address
}
if c.Source == "" {
c.Source = base.Source
}
if c.Abstract == "" {
c.Abstract = base.Abstract
}
for key, value := range base.Metadata {
if _, ok := c.Metadata[key]; !ok {
c.Metadata[key] = value
}
}
}
func (c *Citation) present(ctx context.Context, tx *sql.Tx) (bool, error) {
var id string
row := tx.QueryRowContext(ctx, `SELECT id FROM citations WHERE id=?`, c.ID)
if err := row.Scan(&id); err != nil {
if errors.Is(err, sql.ErrNoRows) {
return false, nil
}
return false, fmt.Errorf("artifacts: failed to look up citation: %w", err)
}
return true, nil
}
// Store persists a Citation and its associated publisher and authors.
func (c *Citation) Store(ctx context.Context, tx *sql.Tx) error {
if c.ID == "" {
c.ID = core.NewUUID()
} else {
ok, err := c.present(ctx, tx)
if err != nil {
return fmt.Errorf("artifacts: couldn't store citation: %w", err)
}
if ok {
return nil
}
}
if c.Publisher != nil {
if err := c.Publisher.Store(ctx, tx); err != nil {
return fmt.Errorf("artifacts: failed to store citation publisher: %w", err)
}
}
publisherID := ""
if c.Publisher != nil {
publisherID = c.Publisher.ID
}
// Insert the citation row first so FK-dependent rows (authors, metadata) can reference it.
_, err := tx.ExecContext(ctx,
`INSERT INTO citations (id, doi, title, year, published, publisher, source, abstract) VALUES (?, ?, ?, ?, ?, ?, ?, ?)`,
c.ID, c.DOI, c.Title, c.Year, db.ToDBTime(c.Published), publisherID, c.Source, c.Abstract)
if err != nil {
return fmt.Errorf("artifacts: failed to store citation: %w", err)
}
if err := storeAuthors(ctx, tx, c.ID, c.Authors); err != nil {
return fmt.Errorf("artifacts: failed to store citation authors: %w", err)
}
if err := StoreMetadata(ctx, tx, c.ID, c.Metadata); err != nil {
return fmt.Errorf("artifacts: failed to store citation metadata: %w", err)
}
return nil
}
// Get retrieves a Citation by its ID, including authors, publisher, and metadata.
func (c *Citation) Get(ctx context.Context, tx *sql.Tx) error {
if c.ID == "" {
return fmt.Errorf("artifacts: citation missing ID: %w", core.ErrNoID)
}
// Get authors.
rows, err := tx.QueryContext(ctx,
`SELECT author_name FROM authors WHERE citation_id=?`, c.ID)
if err != nil {
return fmt.Errorf("artifacts: failed to retrieve citation authors: %w", err)
}
defer func() { _ = rows.Close() }()
c.Authors = nil
for rows.Next() {
var name string
if err := rows.Scan(&name); err != nil {
return fmt.Errorf("artifacts: failed to scan author: %w", err)
}
c.Authors = append(c.Authors, name)
}
if err := rows.Err(); err != nil {
return err
}
// Get citation fields.
c.Publisher = &Publisher{}
var published string
row := tx.QueryRowContext(ctx,
`SELECT doi, title, year, published, publisher, source, abstract FROM citations WHERE id=?`, c.ID)
if err := row.Scan(&c.DOI, &c.Title, &c.Year, &published, &c.Publisher.ID, &c.Source, &c.Abstract); err != nil {
return fmt.Errorf("artifacts: failed to retrieve citation: %w", err)
}
c.Published, err = db.FromDBTime(published, nil)
if err != nil {
return err
}
if c.Publisher.ID != "" {
ok, err := c.Publisher.Get(ctx, tx)
if err != nil {
return fmt.Errorf("artifacts: failed to retrieve citation publisher: %w", err)
}
if !ok {
return fmt.Errorf("artifacts: citation references missing publisher %s", c.Publisher.ID)
}
}
c.Metadata, err = GetMetadata(ctx, tx, c.ID)
if err != nil {
return fmt.Errorf("artifacts: failed to retrieve citation metadata: %w", err)
}
return nil
}
func storeAuthors(ctx context.Context, tx *sql.Tx, citationID string, authors []string) error {
for _, name := range authors {
// Check if this author already exists for this citation.
var existing string
row := tx.QueryRowContext(ctx,
`SELECT author_name FROM authors WHERE citation_id=? AND author_name=?`,
citationID, name)
if err := row.Scan(&existing); err == nil {
continue // already exists
}
_, err := tx.ExecContext(ctx,
`INSERT INTO authors (citation_id, author_name) VALUES (?, ?)`,
citationID, name)
if err != nil {
return fmt.Errorf("artifacts: failed to store author %q: %w", name, err)
}
}
return nil
}

42
artifacts/metadata.go Normal file
View File

@@ -0,0 +1,42 @@
package artifacts
import (
"context"
"database/sql"
"fmt"
"git.wntrmute.dev/kyle/exo/core"
)
// StoreMetadata persists metadata key-value pairs for the given owner ID.
func StoreMetadata(ctx context.Context, tx *sql.Tx, id string, metadata core.Metadata) error {
for key, value := range metadata {
_, err := tx.ExecContext(ctx,
`INSERT OR REPLACE INTO metadata (id, mkey, contents, type) VALUES (?, ?, ?, ?)`,
id, key, value.Contents, value.Type)
if err != nil {
return fmt.Errorf("artifacts: failed to store metadata for %s: %w", id, err)
}
}
return nil
}
// GetMetadata retrieves all metadata key-value pairs for the given owner ID.
func GetMetadata(ctx context.Context, tx *sql.Tx, id string) (core.Metadata, error) {
metadata := core.Metadata{}
rows, err := tx.QueryContext(ctx,
`SELECT mkey, contents, type FROM metadata WHERE id=?`, id)
if err != nil {
return nil, fmt.Errorf("artifacts: failed to retrieve metadata for %s: %w", id, err)
}
defer func() { _ = rows.Close() }()
for rows.Next() {
var key, contents, ctype string
if err := rows.Scan(&key, &contents, &ctype); err != nil {
return nil, fmt.Errorf("artifacts: failed to scan metadata row: %w", err)
}
metadata[key] = core.Value{Contents: contents, Type: ctype}
}
return metadata, rows.Err()
}

70
artifacts/publisher.go Normal file
View File

@@ -0,0 +1,70 @@
package artifacts
import (
"context"
"database/sql"
"errors"
"fmt"
"git.wntrmute.dev/kyle/exo/core"
)
// Publisher represents a publishing entity.
type Publisher struct {
ID string
Name string
Address string
}
// findPublisher looks up a publisher by name and address, returning its ID.
func findPublisher(ctx context.Context, tx *sql.Tx, name, address string) (string, error) {
var id string
row := tx.QueryRowContext(ctx,
`SELECT id FROM publishers WHERE name=? AND address=?`, name, address)
if err := row.Scan(&id); err != nil {
return "", err
}
return id, nil
}
// Store persists a Publisher. If a publisher with the same name and address
// already exists, it reuses that record.
func (p *Publisher) Store(ctx context.Context, tx *sql.Tx) error {
if p.ID == "" {
id, err := findPublisher(ctx, tx, p.Name, p.Address)
if err == nil {
p.ID = id
return nil
}
if !errors.Is(err, sql.ErrNoRows) {
return fmt.Errorf("artifacts: failed to look up publisher: %w", err)
}
p.ID = core.NewUUID()
}
_, err := tx.ExecContext(ctx,
`INSERT INTO publishers (id, name, address) VALUES (?, ?, ?)`,
p.ID, p.Name, p.Address)
if err != nil {
return fmt.Errorf("artifacts: failed to store publisher: %w", err)
}
return nil
}
// Get retrieves a Publisher by its ID.
func (p *Publisher) Get(ctx context.Context, tx *sql.Tx) (bool, error) {
if p.ID == "" {
return false, fmt.Errorf("artifacts: publisher missing ID: %w", core.ErrNoID)
}
row := tx.QueryRowContext(ctx,
`SELECT name, address FROM publishers WHERE id=?`, p.ID)
err := row.Scan(&p.Name, &p.Address)
if err != nil {
if errors.Is(err, sql.ErrNoRows) {
return false, nil
}
return false, fmt.Errorf("artifacts: failed to look up publisher: %w", err)
}
return true, nil
}

145
artifacts/snapshot.go Normal file
View File

@@ -0,0 +1,145 @@
package artifacts
import (
"context"
"database/sql"
"fmt"
"time"
"git.wntrmute.dev/kyle/exo/blob"
"git.wntrmute.dev/kyle/exo/core"
"git.wntrmute.dev/kyle/exo/db"
)
// MIME makes explicit where a MIME type is expected.
type MIME string
// BlobRef is a reference to a blob in the content-addressable store.
type BlobRef struct {
SnapshotID string
ID string // SHA256 hash
Format MIME
Data []byte // in-memory content (nil when loaded from DB)
}
// Store persists a BlobRef's metadata in the database and writes its data
// to the blob store (if data is present).
func (b *BlobRef) Store(ctx context.Context, tx *sql.Tx, store *blob.Store) error {
if b.Data != nil && store != nil {
id, err := store.Write(b.Data)
if err != nil {
return fmt.Errorf("artifacts: failed to write blob to store: %w", err)
}
b.ID = id
}
_, err := tx.ExecContext(ctx,
`INSERT INTO blobs (snapshot_id, id, format) VALUES (?, ?, ?)`,
b.SnapshotID, b.ID, string(b.Format))
if err != nil {
return fmt.Errorf("artifacts: failed to store blob ref: %w", err)
}
return nil
}
// Snapshot represents content at a specific point in time or format.
type Snapshot struct {
ArtifactID string
ID string
StoreDate time.Time
Datetime time.Time
Citation *Citation
Source string
Blobs map[MIME]*BlobRef
Metadata core.Metadata
}
// Store persists a Snapshot and its blobs.
func (snap *Snapshot) Store(ctx context.Context, tx *sql.Tx, store *blob.Store) error {
if snap.Citation != nil {
if err := snap.Citation.Store(ctx, tx); err != nil {
return fmt.Errorf("artifacts: failed to store snapshot citation: %w", err)
}
}
citationID := ""
if snap.Citation != nil {
citationID = snap.Citation.ID
}
// Insert the snapshot row first so FK-dependent rows (blobs, metadata) can reference it.
_, err := tx.ExecContext(ctx,
`INSERT INTO artifact_snapshots (artifact_id, id, stored_at, datetime, citation_id, source) VALUES (?, ?, ?, ?, ?, ?)`,
snap.ArtifactID, snap.ID, snap.StoreDate.Unix(), db.ToDBTime(snap.Datetime), citationID, snap.Source)
if err != nil {
return fmt.Errorf("artifacts: failed to store snapshot: %w", err)
}
if err := StoreMetadata(ctx, tx, snap.ID, snap.Metadata); err != nil {
return fmt.Errorf("artifacts: failed to store snapshot metadata: %w", err)
}
for _, b := range snap.Blobs {
b.SnapshotID = snap.ID
if err := b.Store(ctx, tx, store); err != nil {
return fmt.Errorf("artifacts: failed to store snapshot blob: %w", err)
}
}
return nil
}
// Get retrieves a Snapshot by its ID, including blobs and metadata.
func (snap *Snapshot) Get(ctx context.Context, tx *sql.Tx) error {
if snap.ID == "" {
return fmt.Errorf("artifacts: snapshot missing ID: %w", core.ErrNoID)
}
snap.Citation = &Citation{}
var datetime string
var stored int64
row := tx.QueryRowContext(ctx,
`SELECT artifact_id, stored_at, datetime, citation_id, source FROM artifact_snapshots WHERE id=?`,
snap.ID)
err := row.Scan(&snap.ArtifactID, &stored, &datetime, &snap.Citation.ID, &snap.Source)
if err != nil {
return fmt.Errorf("artifacts: failed to retrieve snapshot: %w", err)
}
snap.StoreDate = time.Unix(stored, 0)
snap.Datetime, err = db.FromDBTime(datetime, nil)
if err != nil {
return err
}
if err := snap.Citation.Get(ctx, tx); err != nil {
return fmt.Errorf("artifacts: failed to retrieve snapshot citation: %w", err)
}
snap.Metadata, err = GetMetadata(ctx, tx, snap.ID)
if err != nil {
return err
}
// Load blob references.
snap.Blobs = map[MIME]*BlobRef{}
rows, err := tx.QueryContext(ctx,
`SELECT id, format FROM blobs WHERE snapshot_id=?`, snap.ID)
if err != nil {
return fmt.Errorf("artifacts: failed to retrieve snapshot blobs: %w", err)
}
defer func() { _ = rows.Close() }()
for rows.Next() {
var id, format string
if err := rows.Scan(&id, &format); err != nil {
return fmt.Errorf("artifacts: failed to scan blob: %w", err)
}
snap.Blobs[MIME(format)] = &BlobRef{
SnapshotID: snap.ID,
ID: id,
Format: MIME(format),
}
}
return rows.Err()
}

205
artifacts/tagcat.go Normal file
View File

@@ -0,0 +1,205 @@
package artifacts
import (
"context"
"database/sql"
"errors"
"fmt"
"sort"
"strings"
"git.wntrmute.dev/kyle/exo/core"
)
// GetTag returns the tag ID for a given tag string. Returns empty string if
// the tag doesn't exist.
func GetTag(ctx context.Context, tx *sql.Tx, tag string) (string, error) {
var id string
row := tx.QueryRowContext(ctx, `SELECT id FROM tags WHERE tag=?`, tag)
if err := row.Scan(&id); err != nil {
if errors.Is(err, sql.ErrNoRows) {
return "", nil
}
return "", fmt.Errorf("artifacts: failed to look up tag %q: %w", tag, err)
}
return id, nil
}
// CreateTag idempotently creates a tag. If the tag already exists, this is a no-op.
func CreateTag(ctx context.Context, tx *sql.Tx, tag string) error {
id, err := GetTag(ctx, tx, tag)
if err != nil {
return fmt.Errorf("artifacts: creating tag failed: %w", err)
}
if id != "" {
return nil
}
id = core.NewUUID()
_, err = tx.ExecContext(ctx, `INSERT INTO tags (id, tag) VALUES (?, ?)`, id, tag)
if err != nil {
return fmt.Errorf("artifacts: creating tag %q failed: %w", tag, err)
}
return nil
}
// GetAllTags returns all tag strings, sorted alphabetically.
func GetAllTags(ctx context.Context, tx *sql.Tx) ([]string, error) {
rows, err := tx.QueryContext(ctx, `SELECT tag FROM tags`)
if err != nil {
return nil, fmt.Errorf("artifacts: failed to get all tags: %w", err)
}
defer func() { _ = rows.Close() }()
var tags []string
for rows.Next() {
var tag string
if err := rows.Scan(&tag); err != nil {
return nil, fmt.Errorf("artifacts: failed to scan tag: %w", err)
}
tags = append(tags, tag)
}
sort.Strings(tags)
return tags, rows.Err()
}
// GetCategory returns the category ID for a given category string.
// Returns empty string if the category doesn't exist.
func GetCategory(ctx context.Context, tx *sql.Tx, category string) (string, error) {
var id string
row := tx.QueryRowContext(ctx, `SELECT id FROM categories WHERE category=?`, category)
if err := row.Scan(&id); err != nil {
if errors.Is(err, sql.ErrNoRows) {
return "", nil
}
return "", fmt.Errorf("artifacts: failed to look up category %q: %w", category, err)
}
return id, nil
}
// CreateCategory idempotently creates a category.
func CreateCategory(ctx context.Context, tx *sql.Tx, category string) error {
id, err := GetCategory(ctx, tx, category)
if err != nil {
return fmt.Errorf("artifacts: creating category failed: %w", err)
}
if id != "" {
return nil
}
id = core.NewUUID()
_, err = tx.ExecContext(ctx, `INSERT INTO categories (id, category) VALUES (?, ?)`, id, category)
if err != nil {
return fmt.Errorf("artifacts: creating category %q failed: %w", category, err)
}
return nil
}
// GetAllCategories returns all category strings, sorted alphabetically.
func GetAllCategories(ctx context.Context, tx *sql.Tx) ([]string, error) {
rows, err := tx.QueryContext(ctx, `SELECT category FROM categories`)
if err != nil {
return nil, fmt.Errorf("artifacts: failed to get all categories: %w", err)
}
defer func() { _ = rows.Close() }()
var categories []string
for rows.Next() {
var category string
if err := rows.Scan(&category); err != nil {
return nil, fmt.Errorf("artifacts: failed to scan category: %w", err)
}
categories = append(categories, category)
}
sort.Strings(categories)
return categories, rows.Err()
}
// tagsFromTagIDs resolves a list of tag UUIDs to their string values.
func tagsFromTagIDs(ctx context.Context, tx *sql.Tx, idList []string) (map[string]bool, error) {
if len(idList) == 0 {
return map[string]bool{}, nil
}
placeholders := make([]string, len(idList))
args := make([]any, len(idList))
for i, id := range idList {
placeholders[i] = "?"
args[i] = id
}
query := `SELECT tag FROM tags WHERE id IN (` + strings.Join(placeholders, ",") + `)` //nolint:gosec // placeholders are literal "?" strings, not user input
rows, err := tx.QueryContext(ctx, query, args...)
if err != nil {
return nil, fmt.Errorf("artifacts: failed to resolve tag IDs: %w", err)
}
defer func() { _ = rows.Close() }()
tags := map[string]bool{}
for rows.Next() {
var tag string
if err := rows.Scan(&tag); err != nil {
return nil, err
}
tags[tag] = true
}
return tags, rows.Err()
}
// categoriesFromCategoryIDs resolves a list of category UUIDs to their string values.
func categoriesFromCategoryIDs(ctx context.Context, tx *sql.Tx, idList []string) (map[string]bool, error) {
if len(idList) == 0 {
return map[string]bool{}, nil
}
placeholders := make([]string, len(idList))
args := make([]any, len(idList))
for i, id := range idList {
placeholders[i] = "?"
args[i] = id
}
query := `SELECT category FROM categories WHERE id IN (` + strings.Join(placeholders, ",") + `)` //nolint:gosec // placeholders are literal "?" strings, not user input
rows, err := tx.QueryContext(ctx, query, args...)
if err != nil {
return nil, fmt.Errorf("artifacts: failed to resolve category IDs: %w", err)
}
defer func() { _ = rows.Close() }()
categories := map[string]bool{}
for rows.Next() {
var category string
if err := rows.Scan(&category); err != nil {
return nil, err
}
categories[category] = true
}
return categories, rows.Err()
}
// GetArtifactIDsForTag returns artifact IDs that have the given tag.
func GetArtifactIDsForTag(ctx context.Context, tx *sql.Tx, tag string) ([]string, error) {
tagID, err := GetTag(ctx, tx, tag)
if err != nil {
return nil, fmt.Errorf("artifacts: failed to look up tag ID: %w", err)
}
if tagID == "" {
return nil, nil
}
rows, err := tx.QueryContext(ctx, `SELECT artifact_id FROM artifact_tags WHERE tag_id=?`, tagID)
if err != nil {
return nil, fmt.Errorf("artifacts: failed to get artifact IDs for tag: %w", err)
}
defer func() { _ = rows.Close() }()
var ids []string
for rows.Next() {
var id string
if err := rows.Scan(&id); err != nil {
return nil, fmt.Errorf("artifacts: failed to scan artifact ID: %w", err)
}
ids = append(ids, id)
}
return ids, rows.Err()
}

209
artifacts/yaml.go Normal file
View File

@@ -0,0 +1,209 @@
package artifacts
import (
"fmt"
"os"
"time"
"git.wntrmute.dev/kyle/exo/blob"
"git.wntrmute.dev/kyle/exo/core"
"git.wntrmute.dev/kyle/exo/db"
"gopkg.in/yaml.v3"
)
// MetadataYAML is the YAML representation of metadata entries.
type MetadataYAML []struct {
Key string `yaml:"key"`
Contents string `yaml:"contents"`
Type string `yaml:"type"`
}
// ToStd converts MetadataYAML to core.Metadata.
func (my MetadataYAML) ToStd() core.Metadata {
if my == nil {
return core.Metadata{}
}
metadata := core.Metadata{}
for _, entry := range my {
metadata[entry.Key] = core.Value{Contents: entry.Contents, Type: entry.Type}
}
return metadata
}
// CitationYAML is the YAML representation of a citation.
type CitationYAML struct {
ID string `yaml:"id"`
DOI string `yaml:"doi"`
Title string `yaml:"title"`
Year int `yaml:"year"`
Published string `yaml:"published"`
Authors []string `yaml:"authors"`
Publisher *Publisher `yaml:"publisher"`
Source string `yaml:"source"`
Abstract string `yaml:"abstract"`
Metadata MetadataYAML `yaml:"metadata"`
}
// ToStd converts a CitationYAML to a Citation.
func (cy *CitationYAML) ToStd() (*Citation, error) {
if cy == nil {
return nil, nil
}
cite := &Citation{
ID: cy.ID,
DOI: cy.DOI,
Title: cy.Title,
Year: cy.Year,
Authors: cy.Authors,
Publisher: cy.Publisher,
Source: cy.Source,
Abstract: cy.Abstract,
Metadata: cy.Metadata.ToStd(),
}
if cy.Published != "" {
var err error
cite.Published, err = db.FromDBTime(cy.Published, nil)
if err != nil {
return nil, fmt.Errorf("artifacts: failed to parse citation published date: %w", err)
}
}
return cite, nil
}
// BlobHeaderYAML is the YAML representation of a blob reference.
type BlobHeaderYAML struct {
Format string `yaml:"format"`
Path string `yaml:"path"`
}
// SnapshotYAML is the YAML representation of a snapshot.
type SnapshotYAML struct {
ID string `yaml:"id"`
StoreDate int64 `yaml:"stored"`
Datetime string `yaml:"datetime"`
Citation *CitationYAML `yaml:"citation"`
Source string `yaml:"source"`
Blobs []BlobHeaderYAML `yaml:"blobs"`
Metadata MetadataYAML `yaml:"metadata"`
}
// ToStd converts a SnapshotYAML to a Snapshot, reading blob data from files.
func (syml SnapshotYAML) ToStd(artifactID string, parentCitation *Citation) (*Snapshot, error) {
cite, err := syml.Citation.ToStd()
if err != nil {
return nil, err
}
snap := &Snapshot{
ArtifactID: artifactID,
ID: syml.ID,
StoreDate: time.Unix(syml.StoreDate, 0),
Citation: cite,
Source: syml.Source,
Blobs: map[MIME]*BlobRef{},
Metadata: syml.Metadata.ToStd(),
}
snap.Datetime, err = db.FromDBTime(syml.Datetime, nil)
if err != nil {
return nil, err
}
// Inherit from parent citation if snapshot citation is nil or partial.
if snap.Citation == nil {
snap.Citation = parentCitation
} else if parentCitation != nil {
snap.Citation.Update(parentCitation)
}
for _, bh := range syml.Blobs {
data, err := os.ReadFile(bh.Path)
if err != nil {
return nil, fmt.Errorf("artifacts: failed to read blob file %q: %w", bh.Path, err)
}
id := blob.HashData(data)
snap.Blobs[MIME(bh.Format)] = &BlobRef{
SnapshotID: syml.ID,
ID: id,
Format: MIME(bh.Format),
Data: data,
}
}
return snap, nil
}
// ArtifactYAML is the YAML representation of a complete artifact with snapshots.
type ArtifactYAML struct {
ID string `yaml:"id"`
Type string `yaml:"type"`
Citation *CitationYAML `yaml:"citation"`
Latest string `yaml:"latest"`
History map[string]string `yaml:"history"`
Tags []string `yaml:"tags"`
Categories []string `yaml:"categories"`
Metadata MetadataYAML `yaml:"metadata"`
Snapshots []SnapshotYAML `yaml:"snapshots"`
}
// ToStd converts an ArtifactYAML to an Artifact and its Snapshots.
func (ayml *ArtifactYAML) ToStd() (*Artifact, []*Snapshot, error) {
cite, err := ayml.Citation.ToStd()
if err != nil {
return nil, nil, err
}
art := &Artifact{
ID: ayml.ID,
Type: ArtifactType(ayml.Type),
Citation: cite,
History: map[time.Time]string{},
Tags: core.MapFromList(ayml.Tags),
Categories: core.MapFromList(ayml.Categories),
Metadata: ayml.Metadata.ToStd(),
}
if ayml.Latest != "" {
art.Latest, err = db.FromDBTime(ayml.Latest, nil)
if err != nil {
return nil, nil, err
}
}
for timestamp, id := range ayml.History {
datetime, err := db.FromDBTime(timestamp, nil)
if err != nil {
return nil, nil, err
}
art.History[datetime] = id
}
var snaps []*Snapshot
for _, syml := range ayml.Snapshots {
snap, err := syml.ToStd(ayml.ID, art.Citation)
if err != nil {
return nil, nil, err
}
snaps = append(snaps, snap)
}
return art, snaps, nil
}
// LoadArtifactFromYAML reads and parses an artifact YAML file.
func LoadArtifactFromYAML(path string) (*ArtifactYAML, error) {
data, err := os.ReadFile(path) //nolint:gosec // path is a user-provided file for import
if err != nil {
return nil, fmt.Errorf("artifacts: failed to read YAML file %q: %w", path, err)
}
ay := &ArtifactYAML{}
if err := yaml.Unmarshal(data, ay); err != nil {
return nil, fmt.Errorf("artifacts: failed to parse YAML file %q: %w", path, err)
}
return ay, nil
}