Add Phase 2 artifact repository: types, blob store, gRPC service

Build the complete artifact pillar with five packages:
- artifacts: Artifact, Snapshot, Citation, Publisher types with Get/Store
  DB methods, tag/category management, metadata ops, YAML import
- blob: content-addressable store (SHA256, hierarchical dir layout)
- proto: protobuf definitions (common.proto, artifacts.proto) with buf
  linting and code generation
- server: gRPC ArtifactService implementation (create/get artifacts,
  store/retrieve blobs, manage tags/categories, search by tag)

All FK insertion ordering is correct (parent rows before children).
Full test coverage across artifacts, blob, and server packages.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-03-21 09:56:34 -07:00
parent bb2c7f7ef3
commit b64177baa8
22 changed files with 5017 additions and 1 deletions

212
artifacts/artifact.go Normal file
View File

@@ -0,0 +1,212 @@
// Package artifacts implements the artifact repository pillar — storing,
// retrieving, and managing source documents (PDFs, papers, webpages, etc.)
// with bibliographic metadata, versioned snapshots, and content-addressable
// blob storage.
package artifacts
import (
"context"
"database/sql"
"fmt"
"time"
"git.wntrmute.dev/kyle/exo/core"
"git.wntrmute.dev/kyle/exo/db"
)
// ArtifactType enumerates the kinds of artifacts.
type ArtifactType string
const (
ArtifactTypeUnknown ArtifactType = "Unknown"
ArtifactTypeCustom ArtifactType = "Custom"
ArtifactTypeArticle ArtifactType = "Article"
ArtifactTypeBook ArtifactType = "Book"
ArtifactTypeURL ArtifactType = "URL"
ArtifactTypePaper ArtifactType = "Paper"
ArtifactTypeVideo ArtifactType = "Video"
ArtifactTypeImage ArtifactType = "Image"
)
// Artifact is the top-level container for a knowledge source.
type Artifact struct {
ID string
Type ArtifactType
Citation *Citation
Latest time.Time
History map[time.Time]string // datetime -> snapshot ID
Tags map[string]bool
Categories map[string]bool
Metadata core.Metadata
}
// Store persists an Artifact and all its associations (citation, tags,
// categories, history, metadata).
func (art *Artifact) Store(ctx context.Context, tx *sql.Tx) error {
if art.Citation == nil {
return fmt.Errorf("artifacts: artifact missing citation")
}
if err := art.Citation.Store(ctx, tx); err != nil {
return fmt.Errorf("artifacts: failed to store artifact citation: %w", err)
}
// Insert the artifact row first so FK-dependent rows can reference it.
_, err := tx.ExecContext(ctx,
`INSERT INTO artifacts (id, type, citation_id, latest) VALUES (?, ?, ?, ?)`,
art.ID, string(art.Type), art.Citation.ID, db.ToDBTime(art.Latest))
if err != nil {
return fmt.Errorf("artifacts: failed to store artifact: %w", err)
}
if err := StoreMetadata(ctx, tx, art.ID, art.Metadata); err != nil {
return fmt.Errorf("artifacts: failed to store artifact metadata: %w", err)
}
// Store history entries.
for t, id := range art.History {
_, err := tx.ExecContext(ctx,
`INSERT INTO artifacts_history (artifact_id, snapshot_id, datetime) VALUES (?, ?, ?)`,
art.ID, id, db.ToDBTime(t))
if err != nil {
return fmt.Errorf("artifacts: failed to store artifact history: %w", err)
}
}
// Resolve and link tags.
for tag := range art.Tags {
tagID, err := GetTag(ctx, tx, tag)
if err != nil {
return fmt.Errorf("artifacts: failed to resolve tag %q: %w", tag, err)
}
if tagID == "" {
return fmt.Errorf("artifacts: unknown tag %q (create it first)", tag)
}
_, err = tx.ExecContext(ctx,
`INSERT INTO artifact_tags (artifact_id, tag_id) VALUES (?, ?)`,
art.ID, tagID)
if err != nil {
return fmt.Errorf("artifacts: failed to link tag: %w", err)
}
}
// Resolve and link categories.
for cat := range art.Categories {
catID, err := GetCategory(ctx, tx, cat)
if err != nil {
return fmt.Errorf("artifacts: failed to resolve category %q: %w", cat, err)
}
if catID == "" {
return fmt.Errorf("artifacts: unknown category %q (create it first)", cat)
}
_, err = tx.ExecContext(ctx,
`INSERT INTO artifact_categories (artifact_id, category_id) VALUES (?, ?)`,
art.ID, catID)
if err != nil {
return fmt.Errorf("artifacts: failed to link category: %w", err)
}
}
return nil
}
// Get retrieves an Artifact by its ID, hydrating citation, history, tags,
// categories, and metadata.
func (art *Artifact) Get(ctx context.Context, tx *sql.Tx) error {
if art.ID == "" {
return fmt.Errorf("artifacts: artifact missing ID: %w", core.ErrNoID)
}
art.Citation = &Citation{}
var latest, artType string
row := tx.QueryRowContext(ctx,
`SELECT type, citation_id, latest FROM artifacts WHERE id=?`, art.ID)
if err := row.Scan(&artType, &art.Citation.ID, &latest); err != nil {
return fmt.Errorf("artifacts: failed to retrieve artifact: %w", err)
}
art.Type = ArtifactType(artType)
var err error
art.Latest, err = db.FromDBTime(latest, nil)
if err != nil {
return fmt.Errorf("artifacts: failed to parse artifact latest time: %w", err)
}
if err := art.Citation.Get(ctx, tx); err != nil {
return fmt.Errorf("artifacts: failed to load artifact citation: %w", err)
}
// Load history.
art.History = map[time.Time]string{}
rows, err := tx.QueryContext(ctx,
`SELECT snapshot_id, datetime FROM artifacts_history WHERE artifact_id=?`, art.ID)
if err != nil {
return fmt.Errorf("artifacts: failed to load artifact history: %w", err)
}
defer func() { _ = rows.Close() }()
for rows.Next() {
var id, datetime string
if err := rows.Scan(&id, &datetime); err != nil {
return err
}
t, err := db.FromDBTime(datetime, nil)
if err != nil {
return err
}
art.History[t] = id
}
if err := rows.Err(); err != nil {
return err
}
// Load tag IDs, then resolve.
var tagIDs []string
tagRows, err := tx.QueryContext(ctx,
`SELECT tag_id FROM artifact_tags WHERE artifact_id=?`, art.ID)
if err != nil {
return fmt.Errorf("artifacts: failed to load artifact tags: %w", err)
}
defer func() { _ = tagRows.Close() }()
for tagRows.Next() {
var tagID string
if err := tagRows.Scan(&tagID); err != nil {
return err
}
tagIDs = append(tagIDs, tagID)
}
if err := tagRows.Err(); err != nil {
return err
}
art.Tags, err = tagsFromTagIDs(ctx, tx, tagIDs)
if err != nil {
return err
}
// Load category IDs, then resolve.
var catIDs []string
catRows, err := tx.QueryContext(ctx,
`SELECT category_id FROM artifact_categories WHERE artifact_id=?`, art.ID)
if err != nil {
return fmt.Errorf("artifacts: failed to load artifact categories: %w", err)
}
defer func() { _ = catRows.Close() }()
for catRows.Next() {
var catID string
if err := catRows.Scan(&catID); err != nil {
return err
}
catIDs = append(catIDs, catID)
}
if err := catRows.Err(); err != nil {
return err
}
art.Categories, err = categoriesFromCategoryIDs(ctx, tx, catIDs)
if err != nil {
return err
}
// Load metadata.
art.Metadata, err = GetMetadata(ctx, tx, art.ID)
return err
}