package server import ( "context" "log/slog" "path/filepath" "strings" "sync" "time" "git.wntrmute.dev/mc/mcdoc/internal/cache" "git.wntrmute.dev/mc/mcdoc/internal/gitea" "git.wntrmute.dev/mc/mcdoc/internal/render" ) // Fetcher coordinates fetching content from Gitea and populating the cache. type Fetcher struct { client *gitea.Client renderer *render.Renderer excludePaths []string excludeRepos map[string]bool concurrency int log *slog.Logger } // FetcherConfig holds fetcher configuration. type FetcherConfig struct { Client *gitea.Client Renderer *render.Renderer ExcludePaths []string ExcludeRepos []string Concurrency int Log *slog.Logger } // NewFetcher creates a Fetcher. func NewFetcher(cfg FetcherConfig) *Fetcher { excludeRepos := make(map[string]bool, len(cfg.ExcludeRepos)) for _, name := range cfg.ExcludeRepos { excludeRepos[name] = true } if cfg.Concurrency < 1 { cfg.Concurrency = 4 } if cfg.Log == nil { cfg.Log = slog.Default() } return &Fetcher{ client: cfg.Client, renderer: cfg.Renderer, excludePaths: cfg.ExcludePaths, excludeRepos: excludeRepos, concurrency: cfg.Concurrency, log: cfg.Log, } } // FetchRepo fetches and renders all markdown files for a single repo. func (f *Fetcher) FetchRepo(ctx context.Context, repo gitea.Repo) (*cache.RepoInfo, error) { files, err := f.client.ListMarkdownFiles(ctx, repo.Name, repo.DefaultBranch) if err != nil { return nil, err } sha, commitDate, err := f.client.LatestCommitSHA(ctx, repo.Name, repo.DefaultBranch) if err != nil { f.log.Warn("could not get latest commit", "repo", repo.Name, "error", err) } var docs []*cache.Document for _, filePath := range files { if f.isExcluded(filePath) { continue } content, err := f.client.FetchFileContent(ctx, repo.Name, repo.DefaultBranch, filePath) if err != nil { f.log.Warn("skip file", "repo", repo.Name, "file", filePath, "error", err) continue } result, err := f.renderer.Render(content) if err != nil { f.log.Warn("render failed", "repo", repo.Name, "file", filePath, "error", err) continue } urlPath := strings.TrimSuffix(filePath, filepath.Ext(filePath)) title := titleFromHeadings(result.Headings) if title == "" { title = titleFromPath(filePath) } docs = append(docs, &cache.Document{ Repo: repo.Name, FilePath: filePath, URLPath: urlPath, Title: title, HTML: result.HTML, Headings: result.Headings, LastUpdated: commitDate, }) } return &cache.RepoInfo{ Name: repo.Name, Description: repo.Description, Docs: docs, CommitSHA: sha, FetchedAt: time.Now(), }, nil } func (f *Fetcher) isExcluded(filePath string) bool { for _, pattern := range f.excludePaths { if strings.Contains(filePath, pattern) { return true } } return false } func titleFromHeadings(headings []render.Heading) string { for _, h := range headings { if h.Level == 1 { return h.Text } } if len(headings) > 0 { return headings[0].Text } return "" } func titleFromPath(filePath string) string { base := filepath.Base(filePath) return strings.TrimSuffix(base, filepath.Ext(base)) } func fetchAllRepos(ctx context.Context, cfg BackgroundConfig) error { repos, err := cfg.Fetcher.client.ListRepos(ctx) if err != nil { return err } sem := make(chan struct{}, cfg.Fetcher.concurrency) var wg sync.WaitGroup var mu sync.Mutex var firstErr error for _, repo := range repos { if cfg.Fetcher.excludeRepos[repo.Name] { continue } wg.Add(1) go func(r gitea.Repo) { defer wg.Done() sem <- struct{}{} defer func() { <-sem }() info, err := cfg.Fetcher.FetchRepo(ctx, r) if err != nil { cfg.Log.Warn("fetch repo failed", "repo", r.Name, "error", err) mu.Lock() if firstErr == nil { firstErr = err } mu.Unlock() return } if len(info.Docs) > 0 { cfg.Cache.SetRepo(info) } cfg.Log.Info("fetched repo", "repo", r.Name, "docs", len(info.Docs)) }(repo) } wg.Wait() return nil } func pollForChanges(ctx context.Context, cfg BackgroundConfig) error { repos, err := cfg.Fetcher.client.ListRepos(ctx) if err != nil { return err } for _, repo := range repos { if cfg.Fetcher.excludeRepos[repo.Name] { continue } sha, _, err := cfg.Fetcher.client.LatestCommitSHA(ctx, repo.Name, repo.DefaultBranch) if err != nil { cfg.Log.Warn("poll: could not check commit", "repo", repo.Name, "error", err) continue } cached := cfg.Cache.GetCommitSHA(repo.Name) if sha == cached { continue } cfg.Log.Info("repo changed, re-fetching", "repo", repo.Name, "old_sha", cached, "new_sha", sha) info, err := cfg.Fetcher.FetchRepo(ctx, repo) if err != nil { cfg.Log.Warn("poll: re-fetch failed", "repo", repo.Name, "error", err) continue } cfg.Cache.SetRepo(info) } return nil }