Files
kte/Buffer.h
Kyle Isom 8ec0d6ac41 Add benchmarks, migration tests, and dev guide
Add benchmarks for core operations, migration edge case tests, improved
buffer I/O tests, and developer guide

- Introduced `test_benchmarks.cc` for performance benchmarking of key
  operations in `PieceTable` and `Buffer`, including syntax highlighting
  and iteration patterns.
- Added `test_migration_coverage.cc` to provide comprehensive tests for
  migration of `Buffer::Rows()` to `PieceTable` APIs, with edge cases,
  boundary handling, and consistency checks.
- Enhanced `test_buffer_io.cc` with additional cases for save/load
  workflows, file handling, and better integration with the core API.
- Documented architectural details and core concepts in a new
  `DEVELOPER_GUIDE.md`. Highlighted design principles, code
  organization, and contribution workflows.
2026-02-17 16:08:23 -08:00

626 lines
13 KiB
C++

/*
* Buffer.h - editor buffer representing an open document
*
* Buffer is the central document model in kte. Each Buffer represents one open file
* or scratch document and manages:
*
* - Content storage: Uses PieceTable for efficient text operations
* - Cursor state: Current position (curx_, cury_), rendered column (rx_)
* - Viewport: Scroll offsets (rowoffs_, coloffs_) for display
* - File backing: Optional association with a file on disk
* - Undo/Redo: Integrated UndoSystem for operation history
* - Syntax highlighting: Optional HighlighterEngine for language-aware coloring
* - Swap/crash recovery: Integration with SwapRecorder for journaling
* - Dirty tracking: Modification state for save prompts
*
* Key concepts:
*
* 1. Cursor coordinates:
* - (curx_, cury_): Logical character position in the document
* - rx_: Rendered column accounting for tab expansion
*
* 2. File backing:
* - Buffers can be file-backed (associated with a path) or scratch (unnamed)
* - File identity tracking detects external modifications
*
* 3. Legacy Line wrapper:
* - Buffer::Line provides a string-like interface for legacy command code
* - New code should prefer direct PieceTable operations
* - See DEVELOPER_GUIDE.md for migration guidance
*
* 4. Content access:
* - Rows(): Materialized line cache (legacy, being phased out)
* - GetLineView(): Zero-copy line access via string_view (preferred)
* - Direct PieceTable access for new editing operations
*/
#pragma once
#include <cstddef>
#include <memory>
#include <string>
#include <vector>
#include <string_view>
#include "PieceTable.h"
#include "UndoSystem.h"
#include <cstdint>
#include "syntax/HighlighterEngine.h"
#include "Highlight.h"
#include <mutex>
// Forward declaration for swap journal integration
namespace kte {
class SwapRecorder;
}
class Buffer {
public:
Buffer();
Buffer(const Buffer &other);
Buffer &operator=(const Buffer &other);
Buffer(Buffer &&other) noexcept;
Buffer &operator=(Buffer &&other) noexcept;
explicit Buffer(const std::string &path);
// File operations
bool OpenFromFile(const std::string &path, std::string &err);
bool Save(std::string &err) const; // saves to existing filename; returns false if not file-backed
bool SaveAs(const std::string &path, std::string &err); // saves to path and makes buffer file-backed
// External modification detection.
// Returns true if the file on disk differs from the last observed identity recorded
// on open/save.
[[nodiscard]] bool ExternallyModifiedOnDisk() const;
// Refresh the stored on-disk identity to match current stat (used after open/save).
void RefreshOnDiskIdentity();
// Accessors
[[nodiscard]] std::size_t Curx() const
{
return curx_;
}
[[nodiscard]] std::size_t Cury() const
{
return cury_;
}
[[nodiscard]] std::size_t Rx() const
{
return rx_;
}
[[nodiscard]] std::size_t Nrows() const
{
return content_LineCount_();
}
[[nodiscard]] std::size_t Rowoffs() const
{
return rowoffs_;
}
[[nodiscard]] std::size_t Coloffs() const
{
return coloffs_;
}
// Line wrapper used by legacy command paths.
// Keep this lightweight: store materialized bytes only for that line.
class Line {
public:
Line() = default;
explicit Line(const char *s)
{
assign_from(s ? std::string(s) : std::string());
}
explicit Line(const std::string &s)
{
assign_from(s);
}
Line(const Line &other) = default;
Line &operator=(const Line &other) = default;
Line(Line &&other) noexcept = default;
Line &operator=(Line &&other) noexcept = default;
// capacity helpers
void Clear()
{
s_.clear();
}
// size/access
[[nodiscard]] std::size_t size() const
{
return s_.size();
}
[[nodiscard]] bool empty() const
{
return s_.empty();
}
// read-only raw view
[[nodiscard]] const char *Data() const
{
return s_.data();
}
[[nodiscard]] std::size_t Size() const
{
return s_.size();
}
// element access (read-only)
[[nodiscard]] char operator[](std::size_t i) const
{
return (i < s_.size()) ? s_[i] : '\0';
}
// conversions
explicit operator std::string() const
{
return s_;
}
// string-like API used by command/renderer layers (implemented via materialization for now)
[[nodiscard]] std::string substr(std::size_t pos) const
{
return pos < s_.size() ? s_.substr(pos) : std::string();
}
[[nodiscard]] std::string substr(std::size_t pos, std::size_t len) const
{
return pos < s_.size() ? s_.substr(pos, len) : std::string();
}
// minimal find() to support search within a line
[[nodiscard]] std::size_t find(const std::string &needle, const std::size_t pos = 0) const
{
return s_.find(needle, pos);
}
void erase(std::size_t pos)
{
if (pos < s_.size())
s_.erase(pos);
}
void erase(std::size_t pos, std::size_t len)
{
if (pos < s_.size())
s_.erase(pos, len);
}
void insert(std::size_t pos, const std::string &seg)
{
if (pos > s_.size())
pos = s_.size();
s_.insert(pos, seg);
}
Line &operator+=(const Line &other)
{
s_ += other.s_;
return *this;
}
Line &operator+=(const std::string &s)
{
s_ += s;
return *this;
}
Line &operator=(const std::string &s)
{
assign_from(s);
return *this;
}
private:
void assign_from(const std::string &s)
{
s_ = s;
}
std::string s_;
};
[[nodiscard]] const std::vector<Line> &Rows() const
{
ensure_rows_cache();
return rows_;
}
[[nodiscard]] std::vector<Line> &Rows()
{
ensure_rows_cache();
return rows_;
}
// Lightweight, lazy per-line accessors that avoid materializing all rows.
// Prefer these over Rows() in hot paths to reduce memory overhead on large files.
[[nodiscard]] std::string GetLineString(std::size_t row) const
{
return content_.GetLine(row);
}
[[nodiscard]] std::pair<std::size_t, std::size_t> GetLineRange(std::size_t row) const
{
return content_.GetLineRange(row);
}
// Zero-copy view of a line. Points into the materialized backing store; becomes
// invalid after subsequent edits. Use immediately.
[[nodiscard]] std::string_view GetLineView(std::size_t row) const;
[[nodiscard]] const std::string &Filename() const
{
return filename_;
}
// Set a virtual (non file-backed) display name for this buffer, e.g. "+HELP+"
// This does not mark the buffer as file-backed.
void SetVirtualName(const std::string &name)
{
filename_ = name;
is_file_backed_ = false;
}
[[nodiscard]] bool IsFileBacked() const
{
return is_file_backed_;
}
[[nodiscard]] bool Dirty() const
{
return dirty_;
}
// Read-only flag
[[nodiscard]] bool IsReadOnly() const
{
return read_only_;
}
void SetReadOnly(bool ro)
{
read_only_ = ro;
}
void ToggleReadOnly()
{
read_only_ = !read_only_;
}
void SetCursor(const std::size_t x, const std::size_t y)
{
curx_ = x;
cury_ = y;
}
void SetRenderX(const std::size_t rx)
{
rx_ = rx;
}
void SetOffsets(const std::size_t row, const std::size_t col)
{
rowoffs_ = row;
coloffs_ = col;
}
void SetDirty(bool d)
{
dirty_ = d;
if (d) {
++version_;
if (highlighter_) {
highlighter_->InvalidateFrom(0);
}
}
}
// Mark support
void ClearMark()
{
mark_set_ = false;
}
void SetMark(const std::size_t x, const std::size_t y)
{
mark_set_ = true;
mark_curx_ = x;
mark_cury_ = y;
}
[[nodiscard]] bool MarkSet() const
{
return mark_set_;
}
[[nodiscard]] std::size_t MarkCurx() const
{
return mark_curx_;
}
[[nodiscard]] std::size_t MarkCury() const
{
return mark_cury_;
}
// Visual-line selection support (multicursor/visual mode)
void VisualLineClear()
{
visual_line_active_ = false;
}
void VisualLineStart()
{
visual_line_active_ = true;
visual_line_anchor_y_ = cury_;
visual_line_active_y_ = cury_;
}
void VisualLineToggle()
{
if (visual_line_active_)
VisualLineClear();
else
VisualLineStart();
}
[[nodiscard]] bool VisualLineActive() const
{
return visual_line_active_;
}
void VisualLineSetActiveY(std::size_t y)
{
visual_line_active_y_ = y;
}
[[nodiscard]] std::size_t VisualLineStartY() const
{
return visual_line_anchor_y_ < visual_line_active_y_ ? visual_line_anchor_y_ : visual_line_active_y_;
}
[[nodiscard]] std::size_t VisualLineEndY() const
{
return visual_line_anchor_y_ < visual_line_active_y_ ? visual_line_active_y_ : visual_line_anchor_y_;
}
// In visual-line (multi-cursor) mode, the UI should highlight only the per-line
// cursor "spot" (Curx clamped to each line length), not the entire line.
[[nodiscard]] bool VisualLineSpotSelected(std::size_t y, std::size_t sx) const
{
if (!visual_line_active_)
return false;
if (y < VisualLineStartY() || y > VisualLineEndY())
return false;
std::string_view ln = GetLineView(y);
// `GetLineView()` returns the raw range, which may include a trailing '\n'.
if (!ln.empty() && ln.back() == '\n')
ln.remove_suffix(1);
const std::size_t spot = std::min(Curx(), ln.size());
return sx == spot;
}
[[nodiscard]] std::string AsString() const;
// Syntax highlighting integration (per-buffer)
[[nodiscard]] std::uint64_t Version() const
{
return version_;
}
void SetSyntaxEnabled(bool on)
{
syntax_enabled_ = on;
}
[[nodiscard]] bool SyntaxEnabled() const
{
return syntax_enabled_;
}
void SetFiletype(const std::string &ft)
{
filetype_ = ft;
}
[[nodiscard]] const std::string &Filetype() const
{
return filetype_;
}
[[nodiscard]] kte::HighlighterEngine *Highlighter()
{
return highlighter_.get();
}
[[nodiscard]] const kte::HighlighterEngine *Highlighter() const
{
return highlighter_.get();
}
void EnsureHighlighter()
{
if (!highlighter_)
highlighter_ = std::make_unique<kte::HighlighterEngine>();
}
// Swap journal integration (set by Editor)
void SetSwapRecorder(kte::SwapRecorder *rec)
{
swap_rec_ = rec;
}
[[nodiscard]] kte::SwapRecorder *SwapRecorder() const
{
return swap_rec_;
}
// Raw, low-level editing APIs used by UndoSystem apply().
// These must NOT trigger undo recording. They also do not move the cursor.
void insert_text(int row, int col, std::string_view text);
void delete_text(int row, int col, std::size_t len);
void split_line(int row, int col);
void join_lines(int row);
void insert_row(int row, std::string_view text);
void delete_row(int row);
// Replace the entire buffer content with raw bytes.
// Intended for crash recovery (swap replay) and test harnesses.
// This does not trigger swap or undo recording.
void replace_all_bytes(std::string_view bytes);
// Undo system accessors (created per-buffer)
[[nodiscard]] UndoSystem *Undo();
[[nodiscard]] const UndoSystem *Undo() const;
#if defined(KTE_TESTS)
// Test-only: return the raw buffer bytes (including newlines) as a string.
[[nodiscard]] std::string BytesForTests() const;
#endif
private:
struct FileIdentity {
bool valid = false;
std::uint64_t mtime_ns = 0;
std::uint64_t size = 0;
std::uint64_t dev = 0;
std::uint64_t ino = 0;
};
[[nodiscard]] static bool stat_identity(const std::string &path, FileIdentity &out);
[[nodiscard]] bool current_disk_identity(FileIdentity &out) const;
mutable FileIdentity on_disk_identity_{};
// State mirroring original C struct (without undo_tree)
std::size_t curx_ = 0, cury_ = 0; // cursor position in characters
std::size_t rx_ = 0; // render x (tabs expanded)
std::size_t nrows_ = 0; // number of rows
std::size_t rowoffs_ = 0, coloffs_ = 0; // viewport offsets
mutable std::vector<Line> rows_; // materialized cache of rows (without trailing newlines)
// PieceTable is the source of truth.
PieceTable content_{};
mutable bool rows_cache_dirty_ = true; // invalidate on edits / I/O
// Helper to rebuild rows_ from content_
void ensure_rows_cache() const;
// Helper to query content_.LineCount() while keeping header minimal
std::size_t content_LineCount_() const;
std::string filename_;
bool is_file_backed_ = false;
bool dirty_ = false;
bool read_only_ = false;
bool mark_set_ = false;
std::size_t mark_curx_ = 0, mark_cury_ = 0;
bool visual_line_active_ = false;
std::size_t visual_line_anchor_y_ = 0;
std::size_t visual_line_active_y_ = 0;
// Per-buffer undo state
std::unique_ptr<struct UndoTree> undo_tree_;
std::unique_ptr<UndoSystem> undo_sys_;
// Syntax/highlighting state
std::uint64_t version_ = 0; // increment on edits
bool syntax_enabled_ = true;
std::string filetype_;
std::unique_ptr<kte::HighlighterEngine> highlighter_;
// Non-owning pointer to swap recorder managed by Editor/SwapManager
kte::SwapRecorder *swap_rec_ = nullptr;
mutable std::mutex buffer_mutex_;
};