Add Phase 2 artifact repository: types, blob store, gRPC service

Build the complete artifact pillar with five packages:
- artifacts: Artifact, Snapshot, Citation, Publisher types with Get/Store
  DB methods, tag/category management, metadata ops, YAML import
- blob: content-addressable store (SHA256, hierarchical dir layout)
- proto: protobuf definitions (common.proto, artifacts.proto) with buf
  linting and code generation
- server: gRPC ArtifactService implementation (create/get artifacts,
  store/retrieve blobs, manage tags/categories, search by tag)

All FK insertion ordering is correct (parent rows before children).
Full test coverage across artifacts, blob, and server packages.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-03-21 09:56:34 -07:00
parent bb2c7f7ef3
commit b64177baa8
22 changed files with 5017 additions and 1 deletions

209
artifacts/yaml.go Normal file
View File

@@ -0,0 +1,209 @@
package artifacts
import (
"fmt"
"os"
"time"
"git.wntrmute.dev/kyle/exo/blob"
"git.wntrmute.dev/kyle/exo/core"
"git.wntrmute.dev/kyle/exo/db"
"gopkg.in/yaml.v3"
)
// MetadataYAML is the YAML representation of metadata entries.
type MetadataYAML []struct {
Key string `yaml:"key"`
Contents string `yaml:"contents"`
Type string `yaml:"type"`
}
// ToStd converts MetadataYAML to core.Metadata.
func (my MetadataYAML) ToStd() core.Metadata {
if my == nil {
return core.Metadata{}
}
metadata := core.Metadata{}
for _, entry := range my {
metadata[entry.Key] = core.Value{Contents: entry.Contents, Type: entry.Type}
}
return metadata
}
// CitationYAML is the YAML representation of a citation.
type CitationYAML struct {
ID string `yaml:"id"`
DOI string `yaml:"doi"`
Title string `yaml:"title"`
Year int `yaml:"year"`
Published string `yaml:"published"`
Authors []string `yaml:"authors"`
Publisher *Publisher `yaml:"publisher"`
Source string `yaml:"source"`
Abstract string `yaml:"abstract"`
Metadata MetadataYAML `yaml:"metadata"`
}
// ToStd converts a CitationYAML to a Citation.
func (cy *CitationYAML) ToStd() (*Citation, error) {
if cy == nil {
return nil, nil
}
cite := &Citation{
ID: cy.ID,
DOI: cy.DOI,
Title: cy.Title,
Year: cy.Year,
Authors: cy.Authors,
Publisher: cy.Publisher,
Source: cy.Source,
Abstract: cy.Abstract,
Metadata: cy.Metadata.ToStd(),
}
if cy.Published != "" {
var err error
cite.Published, err = db.FromDBTime(cy.Published, nil)
if err != nil {
return nil, fmt.Errorf("artifacts: failed to parse citation published date: %w", err)
}
}
return cite, nil
}
// BlobHeaderYAML is the YAML representation of a blob reference.
type BlobHeaderYAML struct {
Format string `yaml:"format"`
Path string `yaml:"path"`
}
// SnapshotYAML is the YAML representation of a snapshot.
type SnapshotYAML struct {
ID string `yaml:"id"`
StoreDate int64 `yaml:"stored"`
Datetime string `yaml:"datetime"`
Citation *CitationYAML `yaml:"citation"`
Source string `yaml:"source"`
Blobs []BlobHeaderYAML `yaml:"blobs"`
Metadata MetadataYAML `yaml:"metadata"`
}
// ToStd converts a SnapshotYAML to a Snapshot, reading blob data from files.
func (syml SnapshotYAML) ToStd(artifactID string, parentCitation *Citation) (*Snapshot, error) {
cite, err := syml.Citation.ToStd()
if err != nil {
return nil, err
}
snap := &Snapshot{
ArtifactID: artifactID,
ID: syml.ID,
StoreDate: time.Unix(syml.StoreDate, 0),
Citation: cite,
Source: syml.Source,
Blobs: map[MIME]*BlobRef{},
Metadata: syml.Metadata.ToStd(),
}
snap.Datetime, err = db.FromDBTime(syml.Datetime, nil)
if err != nil {
return nil, err
}
// Inherit from parent citation if snapshot citation is nil or partial.
if snap.Citation == nil {
snap.Citation = parentCitation
} else if parentCitation != nil {
snap.Citation.Update(parentCitation)
}
for _, bh := range syml.Blobs {
data, err := os.ReadFile(bh.Path)
if err != nil {
return nil, fmt.Errorf("artifacts: failed to read blob file %q: %w", bh.Path, err)
}
id := blob.HashData(data)
snap.Blobs[MIME(bh.Format)] = &BlobRef{
SnapshotID: syml.ID,
ID: id,
Format: MIME(bh.Format),
Data: data,
}
}
return snap, nil
}
// ArtifactYAML is the YAML representation of a complete artifact with snapshots.
type ArtifactYAML struct {
ID string `yaml:"id"`
Type string `yaml:"type"`
Citation *CitationYAML `yaml:"citation"`
Latest string `yaml:"latest"`
History map[string]string `yaml:"history"`
Tags []string `yaml:"tags"`
Categories []string `yaml:"categories"`
Metadata MetadataYAML `yaml:"metadata"`
Snapshots []SnapshotYAML `yaml:"snapshots"`
}
// ToStd converts an ArtifactYAML to an Artifact and its Snapshots.
func (ayml *ArtifactYAML) ToStd() (*Artifact, []*Snapshot, error) {
cite, err := ayml.Citation.ToStd()
if err != nil {
return nil, nil, err
}
art := &Artifact{
ID: ayml.ID,
Type: ArtifactType(ayml.Type),
Citation: cite,
History: map[time.Time]string{},
Tags: core.MapFromList(ayml.Tags),
Categories: core.MapFromList(ayml.Categories),
Metadata: ayml.Metadata.ToStd(),
}
if ayml.Latest != "" {
art.Latest, err = db.FromDBTime(ayml.Latest, nil)
if err != nil {
return nil, nil, err
}
}
for timestamp, id := range ayml.History {
datetime, err := db.FromDBTime(timestamp, nil)
if err != nil {
return nil, nil, err
}
art.History[datetime] = id
}
var snaps []*Snapshot
for _, syml := range ayml.Snapshots {
snap, err := syml.ToStd(ayml.ID, art.Citation)
if err != nil {
return nil, nil, err
}
snaps = append(snaps, snap)
}
return art, snaps, nil
}
// LoadArtifactFromYAML reads and parses an artifact YAML file.
func LoadArtifactFromYAML(path string) (*ArtifactYAML, error) {
data, err := os.ReadFile(path) //nolint:gosec // path is a user-provided file for import
if err != nil {
return nil, fmt.Errorf("artifacts: failed to read YAML file %q: %w", path, err)
}
ay := &ArtifactYAML{}
if err := yaml.Unmarshal(data, ay); err != nil {
return nil, fmt.Errorf("artifacts: failed to parse YAML file %q: %w", path, err)
}
return ay, nil
}