Files
mcdoc/internal/server/fetch.go
Kyle Isom 28afaa2c56 Implement mcdoc v0.1.0: public documentation server
Single-binary Go server that fetches markdown from Gitea (mc org),
renders to HTML with goldmark (GFM, chroma syntax highlighting,
heading anchors), and serves a navigable read-only documentation site.

Features:
- Boot fetch with retry, webhook refresh, 15-minute poll fallback
- In-memory cache with atomic per-repo swap
- chi router with htmx partial responses for SPA-like navigation
- HMAC-SHA256 webhook validation
- Responsive CSS, TOC generation, priority doc ordering
- $PORT env var support for MCP agent port assignment

33 tests across config, cache, render, and server packages.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-27 13:04:15 -07:00

219 lines
4.8 KiB
Go

package server
import (
"context"
"log/slog"
"path/filepath"
"strings"
"sync"
"time"
"git.wntrmute.dev/mc/mcdoc/internal/cache"
"git.wntrmute.dev/mc/mcdoc/internal/gitea"
"git.wntrmute.dev/mc/mcdoc/internal/render"
)
// Fetcher coordinates fetching content from Gitea and populating the cache.
type Fetcher struct {
client *gitea.Client
renderer *render.Renderer
excludePaths []string
excludeRepos map[string]bool
concurrency int
log *slog.Logger
}
// FetcherConfig holds fetcher configuration.
type FetcherConfig struct {
Client *gitea.Client
Renderer *render.Renderer
ExcludePaths []string
ExcludeRepos []string
Concurrency int
Log *slog.Logger
}
// NewFetcher creates a Fetcher.
func NewFetcher(cfg FetcherConfig) *Fetcher {
excludeRepos := make(map[string]bool, len(cfg.ExcludeRepos))
for _, name := range cfg.ExcludeRepos {
excludeRepos[name] = true
}
if cfg.Concurrency < 1 {
cfg.Concurrency = 4
}
if cfg.Log == nil {
cfg.Log = slog.Default()
}
return &Fetcher{
client: cfg.Client,
renderer: cfg.Renderer,
excludePaths: cfg.ExcludePaths,
excludeRepos: excludeRepos,
concurrency: cfg.Concurrency,
log: cfg.Log,
}
}
// FetchRepo fetches and renders all markdown files for a single repo.
func (f *Fetcher) FetchRepo(ctx context.Context, repo gitea.Repo) (*cache.RepoInfo, error) {
files, err := f.client.ListMarkdownFiles(ctx, repo.Name, repo.DefaultBranch)
if err != nil {
return nil, err
}
sha, commitDate, err := f.client.LatestCommitSHA(ctx, repo.Name, repo.DefaultBranch)
if err != nil {
f.log.Warn("could not get latest commit", "repo", repo.Name, "error", err)
}
var docs []*cache.Document
for _, filePath := range files {
if f.isExcluded(filePath) {
continue
}
content, err := f.client.FetchFileContent(ctx, repo.Name, repo.DefaultBranch, filePath)
if err != nil {
f.log.Warn("skip file", "repo", repo.Name, "file", filePath, "error", err)
continue
}
result, err := f.renderer.Render(content)
if err != nil {
f.log.Warn("render failed", "repo", repo.Name, "file", filePath, "error", err)
continue
}
urlPath := strings.TrimSuffix(filePath, filepath.Ext(filePath))
title := titleFromHeadings(result.Headings)
if title == "" {
title = titleFromPath(filePath)
}
docs = append(docs, &cache.Document{
Repo: repo.Name,
FilePath: filePath,
URLPath: urlPath,
Title: title,
HTML: result.HTML,
Headings: result.Headings,
LastUpdated: commitDate,
})
}
return &cache.RepoInfo{
Name: repo.Name,
Description: repo.Description,
Docs: docs,
CommitSHA: sha,
FetchedAt: time.Now(),
}, nil
}
func (f *Fetcher) isExcluded(filePath string) bool {
for _, pattern := range f.excludePaths {
if strings.Contains(filePath, pattern) {
return true
}
}
return false
}
func titleFromHeadings(headings []render.Heading) string {
for _, h := range headings {
if h.Level == 1 {
return h.Text
}
}
if len(headings) > 0 {
return headings[0].Text
}
return ""
}
func titleFromPath(filePath string) string {
base := filepath.Base(filePath)
return strings.TrimSuffix(base, filepath.Ext(base))
}
func fetchAllRepos(ctx context.Context, cfg BackgroundConfig) error {
repos, err := cfg.Fetcher.client.ListRepos(ctx)
if err != nil {
return err
}
sem := make(chan struct{}, cfg.Fetcher.concurrency)
var wg sync.WaitGroup
var mu sync.Mutex
var firstErr error
for _, repo := range repos {
if cfg.Fetcher.excludeRepos[repo.Name] {
continue
}
wg.Add(1)
go func(r gitea.Repo) {
defer wg.Done()
sem <- struct{}{}
defer func() { <-sem }()
info, err := cfg.Fetcher.FetchRepo(ctx, r)
if err != nil {
cfg.Log.Warn("fetch repo failed", "repo", r.Name, "error", err)
mu.Lock()
if firstErr == nil {
firstErr = err
}
mu.Unlock()
return
}
if len(info.Docs) > 0 {
cfg.Cache.SetRepo(info)
}
cfg.Log.Info("fetched repo", "repo", r.Name, "docs", len(info.Docs))
}(repo)
}
wg.Wait()
return nil
}
func pollForChanges(ctx context.Context, cfg BackgroundConfig) error {
repos, err := cfg.Fetcher.client.ListRepos(ctx)
if err != nil {
return err
}
for _, repo := range repos {
if cfg.Fetcher.excludeRepos[repo.Name] {
continue
}
sha, _, err := cfg.Fetcher.client.LatestCommitSHA(ctx, repo.Name, repo.DefaultBranch)
if err != nil {
cfg.Log.Warn("poll: could not check commit", "repo", repo.Name, "error", err)
continue
}
cached := cfg.Cache.GetCommitSHA(repo.Name)
if sha == cached {
continue
}
cfg.Log.Info("repo changed, re-fetching", "repo", repo.Name, "old_sha", cached, "new_sha", sha)
info, err := cfg.Fetcher.FetchRepo(ctx, repo)
if err != nil {
cfg.Log.Warn("poll: re-fetch failed", "repo", repo.Name, "error", err)
continue
}
cfg.Cache.SetRepo(info)
}
return nil
}