Add Phase 1 foundation: Go module, core types, DB infrastructure, config

Establish the project foundation with three packages:
- core: shared types (Header, Metadata, Value, ObjectType, UUID generation)
- db: SQLite migration framework, connection management (WAL, FK, busy
  timeout), transaction helpers (StartTX/EndTX), time conversion
- config: runtime configuration (DB path, blob store, Minio, gRPC addr)

Includes initial schema migration (001_initial.sql) with 13 tables covering
shared infrastructure, bibliographic data, and artifact repository. Full test
coverage for all packages, strict linting (.golangci.yaml), and Makefile.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-03-21 09:46:08 -07:00
parent 98990c6d76
commit bb2c7f7ef3
13 changed files with 1038 additions and 4 deletions

159
db/db.go Normal file
View File

@@ -0,0 +1,159 @@
// Package db provides SQLite database management, migration support,
// and transaction helpers for the exo system.
package db
import (
"context"
"database/sql"
"embed"
"fmt"
"log"
"sort"
"strings"
"time"
_ "github.com/mattn/go-sqlite3"
)
//go:embed migrations/*.sql
var migrationsFS embed.FS
const iso8601 = "2006-01-02 15:04:05"
// ToDBTime formats a time.Time as an ISO 8601 UTC string for storage.
func ToDBTime(t time.Time) string {
return t.UTC().Format(iso8601)
}
// FromDBTime parses an ISO 8601 UTC string back to a time.Time.
// If loc is non-nil, the result is converted to that location.
func FromDBTime(datetime string, loc *time.Location) (time.Time, error) {
t, err := time.Parse(iso8601, datetime)
if err != nil {
return t, fmt.Errorf("db: failed to parse time %q: %w", datetime, err)
}
if loc != nil {
t = t.In(loc)
}
return t, nil
}
// Open opens a SQLite database at the given path with standard pragmas.
func Open(path string) (*sql.DB, error) {
db, err := sql.Open("sqlite3", path+"?_journal_mode=WAL&_foreign_keys=ON&_busy_timeout=5000")
if err != nil {
return nil, fmt.Errorf("db: failed to open database %q: %w", path, err)
}
// Verify the connection works.
if err := db.Ping(); err != nil {
_ = db.Close()
return nil, fmt.Errorf("db: failed to ping database %q: %w", path, err)
}
return db, nil
}
// StartTX begins a new database transaction.
func StartTX(ctx context.Context, db *sql.DB) (*sql.Tx, error) {
return db.BeginTx(ctx, nil)
}
// EndTX commits or rolls back a transaction based on the error value.
// If err is non-nil, the transaction is rolled back. Otherwise it is committed.
func EndTX(tx *sql.Tx, err error) error {
if err != nil {
rbErr := tx.Rollback()
if rbErr != nil {
return fmt.Errorf("db: rollback failed (%w) after error: %w", rbErr, err)
}
return err
}
return tx.Commit()
}
// Migrate runs all pending migrations against the database.
// Migrations are embedded SQL files in the migrations/ directory,
// named with a numeric prefix (e.g., 001_initial.sql).
func Migrate(database *sql.DB) error {
// Ensure schema_version table exists for tracking.
_, err := database.Exec(`CREATE TABLE IF NOT EXISTS schema_version (
version INTEGER NOT NULL,
applied TEXT NOT NULL
)`)
if err != nil {
return fmt.Errorf("db: failed to ensure schema_version table: %w", err)
}
currentVersion, err := getCurrentVersion(database)
if err != nil {
return fmt.Errorf("db: failed to get current schema version: %w", err)
}
entries, err := migrationsFS.ReadDir("migrations")
if err != nil {
return fmt.Errorf("db: failed to read migrations directory: %w", err)
}
// Sort migration files by name to ensure order.
sort.Slice(entries, func(i, j int) bool {
return entries[i].Name() < entries[j].Name()
})
for _, entry := range entries {
if entry.IsDir() || !strings.HasSuffix(entry.Name(), ".sql") {
continue
}
var version int
if _, err := fmt.Sscanf(entry.Name(), "%d_", &version); err != nil {
return fmt.Errorf("db: failed to parse migration version from %q: %w", entry.Name(), err)
}
if version <= currentVersion {
continue
}
sqlBytes, err := migrationsFS.ReadFile("migrations/" + entry.Name())
if err != nil {
return fmt.Errorf("db: failed to read migration %q: %w", entry.Name(), err)
}
log.Printf("db: applying migration %d (%s)", version, entry.Name())
tx, err := database.Begin()
if err != nil {
return fmt.Errorf("db: failed to begin migration transaction: %w", err)
}
if _, err := tx.Exec(string(sqlBytes)); err != nil {
_ = tx.Rollback()
return fmt.Errorf("db: migration %d failed: %w", version, err)
}
if err := tx.Commit(); err != nil {
return fmt.Errorf("db: failed to commit migration %d: %w", version, err)
}
log.Printf("db: migration %d applied successfully", version)
}
return nil
}
func getCurrentVersion(database *sql.DB) (int, error) {
var version int
row := database.QueryRow(`SELECT COALESCE(MAX(version), 0) FROM schema_version`)
if err := row.Scan(&version); err != nil {
// Table might not have any rows yet — that's version 0.
return 0, nil
}
return version, nil
}
// DBObject is the interface for types that can be stored in and retrieved from
// the database within a transaction.
type DBObject interface {
Get(ctx context.Context, tx *sql.Tx) error
Store(ctx context.Context, tx *sql.Tx) error
}

203
db/db_test.go Normal file
View File

@@ -0,0 +1,203 @@
package db
import (
"context"
"database/sql"
"errors"
"os"
"path/filepath"
"testing"
"time"
)
func tempDB(t *testing.T) string {
t.Helper()
dir := t.TempDir()
return filepath.Join(dir, "test.db")
}
func mustOpen(t *testing.T) *sql.DB {
t.Helper()
path := tempDB(t)
database, err := Open(path)
if err != nil {
t.Fatalf("Open failed: %v", err)
}
t.Cleanup(func() { _ = database.Close() })
return database
}
func mustOpenAndMigrate(t *testing.T) *sql.DB {
t.Helper()
database := mustOpen(t)
if err := Migrate(database); err != nil {
t.Fatalf("Migrate failed: %v", err)
}
return database
}
func TestOpenAndPing(t *testing.T) {
database := mustOpen(t)
if err := database.Ping(); err != nil {
t.Fatalf("Ping failed: %v", err)
}
}
func TestOpenCreatesFile(t *testing.T) {
path := tempDB(t)
database, err := Open(path)
if err != nil {
t.Fatalf("Open failed: %v", err)
}
t.Cleanup(func() { _ = database.Close() })
if _, err := os.Stat(path); os.IsNotExist(err) {
t.Fatal("database file was not created")
}
}
func TestMigrate(t *testing.T) {
database := mustOpenAndMigrate(t)
tables := []string{
"metadata", "tags", "categories", "publishers", "citations",
"authors", "artifacts", "artifact_tags", "artifact_categories",
"artifacts_history", "artifact_snapshots", "blobs", "schema_version",
}
for _, table := range tables {
var name string
row := database.QueryRow(`SELECT name FROM sqlite_master WHERE type='table' AND name=?`, table)
if err := row.Scan(&name); err != nil {
t.Errorf("table %q not found after migration: %v", table, err)
}
}
}
func TestMigrateIdempotent(t *testing.T) {
database := mustOpenAndMigrate(t)
if err := Migrate(database); err != nil {
t.Fatalf("second Migrate failed: %v", err)
}
}
func TestStartTXAndEndTX(t *testing.T) {
database := mustOpenAndMigrate(t)
ctx := context.Background()
tx, err := StartTX(ctx, database)
if err != nil {
t.Fatalf("StartTX failed: %v", err)
}
_, err = tx.ExecContext(ctx, `INSERT INTO tags (id, tag) VALUES ('test-id', 'test-tag')`)
if err != nil {
t.Fatalf("INSERT failed: %v", err)
}
if err := EndTX(tx, nil); err != nil {
t.Fatalf("EndTX (commit) failed: %v", err)
}
var tag string
row := database.QueryRow(`SELECT tag FROM tags WHERE id='test-id'`)
if err := row.Scan(&tag); err != nil {
t.Fatalf("committed row not found: %v", err)
}
if tag != "test-tag" {
t.Fatalf("expected 'test-tag', got %q", tag)
}
}
func TestEndTXRollback(t *testing.T) {
database := mustOpenAndMigrate(t)
ctx := context.Background()
tx, err := StartTX(ctx, database)
if err != nil {
t.Fatalf("StartTX failed: %v", err)
}
_, err = tx.ExecContext(ctx, `INSERT INTO tags (id, tag) VALUES ('rollback-id', 'rollback-tag')`)
if err != nil {
t.Fatalf("INSERT failed: %v", err)
}
simErr := context.DeadlineExceeded
if err := EndTX(tx, simErr); !errors.Is(err, simErr) {
t.Fatalf("EndTX should return the original error, got: %v", err)
}
var tag string
row := database.QueryRow(`SELECT tag FROM tags WHERE id='rollback-id'`)
if err := row.Scan(&tag); err == nil {
t.Fatal("rolled-back row should not be found")
}
}
func TestToDBTimeAndFromDBTime(t *testing.T) {
original := time.Date(2024, 6, 15, 14, 30, 0, 0, time.UTC)
s := ToDBTime(original)
if s != "2024-06-15 14:30:00" {
t.Fatalf("unexpected time string: %q", s)
}
parsed, err := FromDBTime(s, nil)
if err != nil {
t.Fatalf("FromDBTime failed: %v", err)
}
if !parsed.Equal(original) {
t.Fatalf("round-trip failed: got %v, want %v", parsed, original)
}
}
func TestFromDBTimeWithLocation(t *testing.T) {
s := "2024-06-15 14:30:00"
loc, err := time.LoadLocation("America/New_York")
if err != nil {
t.Skipf("timezone not available: %v", err)
}
parsed, err := FromDBTime(s, loc)
if err != nil {
t.Fatalf("FromDBTime failed: %v", err)
}
if parsed.Location() != loc {
t.Fatalf("expected location %v, got %v", loc, parsed.Location())
}
}
func TestFromDBTimeInvalid(t *testing.T) {
_, err := FromDBTime("not-a-date", nil)
if err == nil {
t.Fatal("expected error for invalid time string")
}
}
func TestForeignKeysEnabled(t *testing.T) {
database := mustOpen(t)
var fk int
row := database.QueryRow(`PRAGMA foreign_keys`)
if err := row.Scan(&fk); err != nil {
t.Fatalf("PRAGMA foreign_keys failed: %v", err)
}
if fk != 1 {
t.Fatalf("foreign keys should be enabled, got %d", fk)
}
}
func TestSchemaVersion(t *testing.T) {
database := mustOpenAndMigrate(t)
version, err := getCurrentVersion(database)
if err != nil {
t.Fatalf("getCurrentVersion failed: %v", err)
}
if version != 1 {
t.Fatalf("expected schema version 1, got %d", version)
}
}

View File

@@ -0,0 +1,128 @@
-- Migration 001: Initial schema
-- Shared infrastructure, bibliographic tables, and artifact repository.
-- Polymorphic key-value metadata. The id column references any object's UUID.
CREATE TABLE IF NOT EXISTS metadata
(
id TEXT NOT NULL,
mkey TEXT NOT NULL,
contents TEXT NOT NULL,
type TEXT NOT NULL,
PRIMARY KEY (mkey, contents, type),
UNIQUE (id, mkey)
);
CREATE INDEX IF NOT EXISTS idx_metadata_id ON metadata (id);
-- Shared tag pool (used by both artifacts and knowledge graph nodes).
CREATE TABLE IF NOT EXISTS tags
(
id TEXT NOT NULL PRIMARY KEY,
tag TEXT NOT NULL UNIQUE
);
-- Shared category pool.
CREATE TABLE IF NOT EXISTS categories
(
id TEXT NOT NULL PRIMARY KEY,
category TEXT NOT NULL UNIQUE
);
-- Publishers for bibliographic citations.
CREATE TABLE IF NOT EXISTS publishers
(
id TEXT UNIQUE NOT NULL PRIMARY KEY,
name TEXT NOT NULL,
address TEXT,
UNIQUE (name, address)
);
-- Bibliographic citations.
CREATE TABLE IF NOT EXISTS citations
(
id TEXT PRIMARY KEY,
doi TEXT,
title TEXT NOT NULL,
year INTEGER NOT NULL,
published TEXT NOT NULL,
publisher TEXT NOT NULL,
source TEXT NOT NULL,
abstract TEXT,
FOREIGN KEY (publisher) REFERENCES publishers (id)
);
CREATE INDEX IF NOT EXISTS idx_citations_doi ON citations (id, doi);
-- Many-to-one: multiple authors per citation.
CREATE TABLE IF NOT EXISTS authors
(
citation_id TEXT NOT NULL,
author_name TEXT NOT NULL,
FOREIGN KEY (citation_id) REFERENCES citations (id)
);
-- Artifact repository.
CREATE TABLE IF NOT EXISTS artifacts
(
id TEXT PRIMARY KEY,
type TEXT NOT NULL,
citation_id TEXT NOT NULL,
latest TEXT NOT NULL,
FOREIGN KEY (citation_id) REFERENCES citations (id)
);
-- Many-to-many junction: artifacts <-> tags.
CREATE TABLE IF NOT EXISTS artifact_tags
(
artifact_id TEXT NOT NULL,
tag_id TEXT NOT NULL,
FOREIGN KEY (artifact_id) REFERENCES artifacts (id),
FOREIGN KEY (tag_id) REFERENCES tags (id)
);
-- Many-to-many junction: artifacts <-> categories.
CREATE TABLE IF NOT EXISTS artifact_categories
(
artifact_id TEXT NOT NULL,
category_id TEXT NOT NULL,
FOREIGN KEY (artifact_id) REFERENCES artifacts (id),
FOREIGN KEY (category_id) REFERENCES categories (id)
);
-- Temporal index linking artifacts to snapshots by datetime.
CREATE TABLE IF NOT EXISTS artifacts_history
(
artifact_id TEXT NOT NULL,
snapshot_id TEXT NOT NULL UNIQUE,
datetime TEXT NOT NULL,
PRIMARY KEY (artifact_id, datetime),
FOREIGN KEY (artifact_id) REFERENCES artifacts (id)
);
-- Snapshot records with storage and content timestamps.
CREATE TABLE IF NOT EXISTS artifact_snapshots
(
artifact_id TEXT NOT NULL,
id TEXT UNIQUE PRIMARY KEY,
stored_at INTEGER NOT NULL,
datetime TEXT NOT NULL,
citation_id TEXT NOT NULL,
source TEXT NOT NULL,
FOREIGN KEY (artifact_id) REFERENCES artifacts (id),
FOREIGN KEY (id) REFERENCES artifacts_history (snapshot_id)
);
-- Blob registry. Actual content lives in the CAS on disk.
CREATE TABLE IF NOT EXISTS blobs
(
snapshot_id TEXT NOT NULL,
id TEXT NOT NULL UNIQUE PRIMARY KEY,
format TEXT NOT NULL,
FOREIGN KEY (snapshot_id) REFERENCES artifact_snapshots (id)
);
-- Schema version tracking.
CREATE TABLE IF NOT EXISTS schema_version
(
version INTEGER NOT NULL,
applied TEXT NOT NULL
);
INSERT INTO schema_version (version, applied) VALUES (1, datetime('now'));