Introduce PieceTable-based buffer backend (Phase 1)

- Added `PieceTable` class for efficient text manipulation and implemented core editing APIs (`Insert`, `Delete`, `Find`, etc.).
- Integrated `PieceTable` into `Buffer` class with an adapter for rows caching.
- Enabled seamless switching between legacy row-based and new PieceTable-backed editing via `KTE_USE_BUFFER_PIECE_TABLE`.
- Updated file I/O, line-based queries, and cursor operations to support PieceTable-based storage.
- Lazy rebuilding of line index and improved management of edit state for performance.
This commit is contained in:
2025-12-05 15:29:35 -08:00
parent 222f73252b
commit afb6888c31
6 changed files with 722 additions and 73 deletions

262
Buffer.cc
View File

@@ -2,6 +2,7 @@
#include <sstream>
#include <filesystem>
#include <cstdlib>
#include <limits>
#include "Buffer.h"
#include "UndoSystem.h"
@@ -29,13 +30,17 @@ Buffer::Buffer(const std::string &path)
// Copy constructor/assignment: perform a deep copy of core fields; reinitialize undo for the new buffer.
Buffer::Buffer(const Buffer &other)
{
curx_ = other.curx_;
cury_ = other.cury_;
rx_ = other.rx_;
nrows_ = other.nrows_;
rowoffs_ = other.rowoffs_;
coloffs_ = other.coloffs_;
rows_ = other.rows_;
curx_ = other.curx_;
cury_ = other.cury_;
rx_ = other.rx_;
nrows_ = other.nrows_;
rowoffs_ = other.rowoffs_;
coloffs_ = other.coloffs_;
rows_ = other.rows_;
#ifdef KTE_USE_BUFFER_PIECE_TABLE
content_ = other.content_;
rows_cache_dirty_ = other.rows_cache_dirty_;
#endif
filename_ = other.filename_;
is_file_backed_ = other.is_file_backed_;
dirty_ = other.dirty_;
@@ -77,13 +82,17 @@ Buffer::operator=(const Buffer &other)
{
if (this == &other)
return *this;
curx_ = other.curx_;
cury_ = other.cury_;
rx_ = other.rx_;
nrows_ = other.nrows_;
rowoffs_ = other.rowoffs_;
coloffs_ = other.coloffs_;
rows_ = other.rows_;
curx_ = other.curx_;
cury_ = other.cury_;
rx_ = other.rx_;
nrows_ = other.nrows_;
rowoffs_ = other.rowoffs_;
coloffs_ = other.coloffs_;
rows_ = other.rows_;
#ifdef KTE_USE_BUFFER_PIECE_TABLE
content_ = other.content_;
rows_cache_dirty_ = other.rows_cache_dirty_;
#endif
filename_ = other.filename_;
is_file_backed_ = other.is_file_backed_;
dirty_ = other.dirty_;
@@ -141,6 +150,10 @@ Buffer::Buffer(Buffer &&other) noexcept
syntax_enabled_ = other.syntax_enabled_;
filetype_ = std::move(other.filetype_);
highlighter_ = std::move(other.highlighter_);
#ifdef KTE_USE_BUFFER_PIECE_TABLE
content_ = std::move(other.content_);
rows_cache_dirty_ = other.rows_cache_dirty_;
#endif
// Update UndoSystem's buffer reference to point to this object
if (undo_sys_) {
undo_sys_->UpdateBufferReference(*this);
@@ -178,6 +191,10 @@ Buffer::operator=(Buffer &&other) noexcept
filetype_ = std::move(other.filetype_);
highlighter_ = std::move(other.highlighter_);
#ifdef KTE_USE_BUFFER_PIECE_TABLE
content_ = std::move(other.content_);
rows_cache_dirty_ = other.rows_cache_dirty_;
#endif
// Update UndoSystem's buffer reference to point to this object
if (undo_sys_) {
undo_sys_->UpdateBufferReference(*this);
@@ -229,6 +246,12 @@ Buffer::OpenFromFile(const std::string &path, std::string &err)
mark_set_ = false;
mark_curx_ = mark_cury_ = 0;
#ifdef KTE_USE_BUFFER_PIECE_TABLE
// Empty PieceTable
content_.Clear();
rows_cache_dirty_ = true;
#endif
return true;
}
@@ -238,6 +261,22 @@ Buffer::OpenFromFile(const std::string &path, std::string &err)
return false;
}
#ifdef KTE_USE_BUFFER_PIECE_TABLE
// Read entire file into PieceTable as-is
std::string data;
in.seekg(0, std::ios::end);
auto sz = in.tellg();
if (sz > 0) {
data.resize(static_cast<std::size_t>(sz));
in.seekg(0, std::ios::beg);
in.read(data.data(), static_cast<std::streamsize>(data.size()));
}
content_.Clear();
if (!data.empty())
content_.Append(data.data(), data.size());
rows_cache_dirty_ = true;
nrows_ = 0; // not used under adapter
#else
// Detect if file ends with a newline so we can preserve a final empty line
// in our in-memory representation (mg-style semantics).
bool ends_with_nl = false;
@@ -278,7 +317,8 @@ Buffer::OpenFromFile(const std::string &path, std::string &err)
}
}
nrows_ = rows_.size();
nrows_ = rows_.size();
#endif
filename_ = norm;
is_file_backed_ = true;
dirty_ = false;
@@ -313,6 +353,12 @@ Buffer::Save(std::string &err) const
err = "Failed to open for write: " + filename_;
return false;
}
#ifdef KTE_USE_BUFFER_PIECE_TABLE
const char *d = content_.Data();
std::size_t n = content_.Size();
if (d && n)
out.write(d, static_cast<std::streamsize>(n));
#else
for (std::size_t i = 0; i < rows_.size(); ++i) {
const char *d = rows_[i].Data();
std::size_t n = rows_[i].Size();
@@ -322,6 +368,7 @@ Buffer::Save(std::string &err) const
out.put('\n');
}
}
#endif
if (!out.good()) {
err = "Write error";
return false;
@@ -360,6 +407,14 @@ Buffer::SaveAs(const std::string &path, std::string &err)
err = "Failed to open for write: " + out_path;
return false;
}
#ifdef KTE_USE_BUFFER_PIECE_TABLE
{
const char *d = content_.Data();
std::size_t n = content_.Size();
if (d && n)
out.write(d, static_cast<std::streamsize>(n));
}
#else
for (std::size_t i = 0; i < rows_.size(); ++i) {
const char *d = rows_[i].Data();
std::size_t n = rows_[i].Size();
@@ -369,6 +424,7 @@ Buffer::SaveAs(const std::string &path, std::string &err)
out.put('\n');
}
}
#endif
if (!out.good()) {
err = "Write error";
return false;
@@ -389,7 +445,11 @@ Buffer::AsString() const
if (this->Dirty()) {
ss << "*";
}
#ifdef KTE_USE_BUFFER_PIECE_TABLE
ss << ">: " << content_.LineCount() << " lines";
#else
ss << ">: " << rows_.size() << " lines";
#endif
return ss.str();
}
@@ -398,6 +458,19 @@ Buffer::AsString() const
void
Buffer::insert_text(int row, int col, std::string_view text)
{
#ifdef KTE_USE_BUFFER_PIECE_TABLE
if (row < 0)
row = 0;
if (col < 0)
col = 0;
const std::size_t off = content_.LineColToByteOffset(static_cast<std::size_t>(row),
static_cast<std::size_t>(col));
if (!text.empty()) {
content_.Insert(off, text.data(), text.size());
rows_cache_dirty_ = true;
}
return;
#else
if (row < 0)
row = 0;
if (static_cast<std::size_t>(row) > rows_.size())
@@ -409,8 +482,9 @@ Buffer::insert_text(int row, int col, std::string_view text)
auto y = static_cast<std::size_t>(row);
auto x = static_cast<std::size_t>(col);
if (x > rows_[y].size())
if (x > rows_[y].size()) {
x = rows_[y].size();
}
std::string remain(text);
while (true) {
@@ -432,12 +506,110 @@ Buffer::insert_text(int row, int col, std::string_view text)
remain.erase(0, pos + 1);
}
// Do not set dirty here; UndoSystem will manage state/dirty externally
#endif
}
#ifdef KTE_USE_BUFFER_PIECE_TABLE
// ===== Adapter helpers for PieceTable-backed Buffer =====
void
Buffer::ensure_rows_cache() const
{
if (!rows_cache_dirty_)
return;
rows_.clear();
const std::size_t lc = content_.LineCount();
rows_.reserve(lc);
for (std::size_t i = 0; i < lc; ++i) {
rows_.emplace_back(content_.GetLine(i));
}
// Keep nrows_ in sync for any legacy code that still reads it
const_cast<Buffer *>(this)->nrows_ = rows_.size();
rows_cache_dirty_ = false;
}
std::size_t
Buffer::content_LineCount_() const
{
return content_.LineCount();
}
#endif
void
Buffer::delete_text(int row, int col, std::size_t len)
{
#ifdef KTE_USE_BUFFER_PIECE_TABLE
if (len == 0)
return;
if (row < 0)
row = 0;
if (col < 0)
col = 0;
std::size_t start = content_.LineColToByteOffset(static_cast<std::size_t>(row), static_cast<std::size_t>(col));
// Walk len logical characters across lines to compute end offset
std::size_t r = static_cast<std::size_t>(row);
std::size_t c = static_cast<std::size_t>(col);
std::size_t remaining = len;
const std::size_t line_count = content_.LineCount();
while (remaining > 0 && r < line_count) {
auto range = content_.GetLineRange(r); // [start,end)
// Compute end of line excluding trailing '\n'
std::size_t line_end = range.second;
if (line_end > range.first) {
// If last char is '\n', don't count in-column span
std::string last = content_.GetRange(line_end - 1, 1);
if (!last.empty() && last[0] == '\n') {
line_end -= 1;
}
}
std::size_t cur_off = content_.LineColToByteOffset(r, c);
std::size_t in_line = (cur_off < line_end) ? (line_end - cur_off) : 0;
if (remaining <= in_line) {
// All within current line
std::size_t end = cur_off + remaining;
content_.Delete(start, end - start);
rows_cache_dirty_ = true;
return;
}
// Consume rest of line
remaining -= in_line;
std::size_t end = cur_off + in_line;
// If there is a next line and remaining > 0, consider consuming the newline as 1
if (r + 1 < line_count) {
if (remaining > 0) {
// newline
end += 1;
remaining -= 1;
}
// Move to next line
r += 1;
c = 0;
// Update start deletion length so far by postponing until we know final end; we keep start fixed
if (remaining == 0) {
content_.Delete(start, end - start);
rows_cache_dirty_ = true;
return;
}
// Continue loop with updated r/c; but also keep track of 'end' as current consumed position
// Rather than tracking incrementally, we will recompute cur_off at top of loop.
// However, we need to carry forward the consumed part; we can temporarily store 'end' in start_of_next
// To simplify, after loop finishes we will compute final end using current r/c using remaining.
} else {
// No next line; delete to file end
std::size_t total = content_.Size();
content_.Delete(start, total - start);
rows_cache_dirty_ = true;
return;
}
}
// If loop ended because remaining==0 at a line boundary
if (remaining == 0) {
std::size_t end = content_.LineColToByteOffset(r, c);
content_.Delete(start, end - start);
rows_cache_dirty_ = true;
}
return;
#else
if (rows_.empty() || len == 0)
return;
if (row < 0)
@@ -470,12 +642,25 @@ Buffer::delete_text(int row, int col, std::size_t len)
break;
}
}
#endif
}
void
Buffer::split_line(int row, const int col)
{
#ifdef KTE_USE_BUFFER_PIECE_TABLE
if (row < 0)
row = 0;
if (col < 0)
row = 0;
const std::size_t off = content_.LineColToByteOffset(static_cast<std::size_t>(row),
static_cast<std::size_t>(col));
const char nl = '\n';
content_.Insert(off, &nl, 1);
rows_cache_dirty_ = true;
return;
#else
if (row < 0) {
row = 0;
}
@@ -488,12 +673,26 @@ Buffer::split_line(int row, const int col)
const auto tail = rows_[y].substr(x);
rows_[y].erase(x);
rows_.insert(rows_.begin() + static_cast<std::ptrdiff_t>(y + 1), Line(tail));
#endif
}
void
Buffer::join_lines(int row)
{
#ifdef KTE_USE_BUFFER_PIECE_TABLE
if (row < 0)
row = 0;
std::size_t r = static_cast<std::size_t>(row);
if (r + 1 >= content_.LineCount())
return;
// Delete the newline between line r and r+1
std::size_t end_of_line = content_.LineColToByteOffset(r, std::numeric_limits<std::size_t>::max());
// end_of_line now equals line end (clamped before newline). The newline should be exactly at this position.
content_.Delete(end_of_line, 1);
rows_cache_dirty_ = true;
return;
#else
if (row < 0) {
row = 0;
}
@@ -505,28 +704,57 @@ Buffer::join_lines(int row)
rows_[y] += rows_[y + 1];
rows_.erase(rows_.begin() + static_cast<std::ptrdiff_t>(y + 1));
#endif
}
void
Buffer::insert_row(int row, const std::string_view text)
{
#ifdef KTE_USE_BUFFER_PIECE_TABLE
if (row < 0)
row = 0;
std::size_t off = content_.LineColToByteOffset(static_cast<std::size_t>(row), 0);
if (!text.empty())
content_.Insert(off, text.data(), text.size());
const char nl = '\n';
content_.Insert(off + text.size(), &nl, 1);
rows_cache_dirty_ = true;
return;
#else
if (row < 0)
row = 0;
if (static_cast<std::size_t>(row) > rows_.size())
row = static_cast<int>(rows_.size());
rows_.insert(rows_.begin() + row, Line(std::string(text)));
#endif
}
void
Buffer::delete_row(int row)
{
#ifdef KTE_USE_BUFFER_PIECE_TABLE
if (row < 0)
row = 0;
std::size_t r = static_cast<std::size_t>(row);
if (r >= content_.LineCount())
return;
auto range = content_.GetLineRange(r); // [start,end)
// If not last line, ensure we include the separating newline by using end as-is (which points to next line start)
// If last line, end may equal total_size_. We still delete [start,end) which removes the last line content.
std::size_t start = range.first;
std::size_t end = range.second;
content_.Delete(start, end - start);
rows_cache_dirty_ = true;
return;
#else
if (row < 0)
row = 0;
if (static_cast<std::size_t>(row) >= rows_.size())
return;
rows_.erase(rows_.begin() + row);
#endif
}
@@ -542,4 +770,4 @@ const UndoSystem *
Buffer::Undo() const
{
return undo_sys_.get();
}
}

View File

@@ -10,6 +10,9 @@
#include <string_view>
#include "AppendBuffer.h"
#ifdef KTE_USE_BUFFER_PIECE_TABLE
#include "PieceTable.h"
#endif
#include "UndoSystem.h"
#include <cstdint>
#include <memory>
@@ -63,7 +66,11 @@ public:
[[nodiscard]] std::size_t Nrows() const
{
#ifdef KTE_USE_BUFFER_PIECE_TABLE
return content_LineCount_();
#else
return nrows_;
#endif
}
@@ -255,13 +262,23 @@ public:
[[nodiscard]] const std::vector<Line> &Rows() const
{
#ifdef KTE_USE_BUFFER_PIECE_TABLE
ensure_rows_cache();
return rows_;
#else
return rows_;
#endif
}
[[nodiscard]] std::vector<Line> &Rows()
{
#ifdef KTE_USE_BUFFER_PIECE_TABLE
ensure_rows_cache();
return rows_;
#else
return rows_;
#endif
}
@@ -460,7 +477,22 @@ private:
std::size_t rx_ = 0; // render x (tabs expanded)
std::size_t nrows_ = 0; // number of rows
std::size_t rowoffs_ = 0, coloffs_ = 0; // viewport offsets
#ifdef KTE_USE_BUFFER_PIECE_TABLE
mutable std::vector<Line> rows_; // materialized cache of rows (without trailing newlines)
#else
std::vector<Line> rows_; // buffer rows (without trailing newlines)
#endif
#ifdef KTE_USE_BUFFER_PIECE_TABLE
// When using the adapter, PieceTable is the source of truth.
PieceTable content_{};
mutable bool rows_cache_dirty_ = true; // invalidate on edits / I/O
// Helper to rebuild rows_ from content_
void ensure_rows_cache() const;
// Helper to query content_.LineCount() while keeping header minimal
std::size_t content_LineCount_() const;
#endif
std::string filename_;
bool is_file_backed_ = false;
bool dirty_ = false;

View File

@@ -12,6 +12,7 @@ set(BUILD_GUI ON CACHE BOOL "Enable building the graphical version.")
set(KTE_USE_QT OFF CACHE BOOL "Build the QT frontend instead of ImGui.")
set(BUILD_TESTS OFF CACHE BOOL "Enable building test programs.")
option(KTE_USE_PIECE_TABLE "Use PieceTable instead of GapBuffer implementation" ON)
option(KTE_USE_BUFFER_PIECE_TABLE "Use PieceTable inside Buffer adapter (Phase 2)" OFF)
set(KTE_FONT_SIZE "18.0" CACHE STRING "Default font size for GUI")
option(KTE_UNDO_DEBUG "Enable undo instrumentation logs" OFF)
option(KTE_ENABLE_TREESITTER "Enable optional Tree-sitter highlighter adapter" OFF)
@@ -273,6 +274,9 @@ add_executable(kte
if (KTE_USE_PIECE_TABLE)
target_compile_definitions(kte PRIVATE KTE_USE_PIECE_TABLE=1)
endif ()
if (KTE_USE_BUFFER_PIECE_TABLE)
target_compile_definitions(kte PRIVATE KTE_USE_BUFFER_PIECE_TABLE=1)
endif ()
if (KTE_UNDO_DEBUG)
target_compile_definitions(kte PRIVATE KTE_UNDO_DEBUG=1)
endif ()
@@ -309,6 +313,9 @@ if (BUILD_TESTS)
if (KTE_USE_PIECE_TABLE)
target_compile_definitions(test_undo PRIVATE KTE_USE_PIECE_TABLE=1)
endif ()
if (KTE_USE_BUFFER_PIECE_TABLE)
target_compile_definitions(test_undo PRIVATE KTE_USE_BUFFER_PIECE_TABLE=1)
endif ()
if (KTE_UNDO_DEBUG)
target_compile_definitions(test_undo PRIVATE KTE_UNDO_DEBUG=1)
@@ -357,6 +364,9 @@ if (${BUILD_GUI})
if (KTE_UNDO_DEBUG)
target_compile_definitions(kge PRIVATE KTE_UNDO_DEBUG=1)
endif ()
if (KTE_USE_BUFFER_PIECE_TABLE)
target_compile_definitions(kge PRIVATE KTE_USE_BUFFER_PIECE_TABLE=1)
endif ()
if (KTE_USE_QT)
target_link_libraries(kge ${CURSES_LIBRARIES} Qt6::Widgets)
else ()

View File

@@ -83,7 +83,7 @@ ensure_cursor_visible(const Editor &ed, Buffer &buf)
}
// Clamp vertical offset to available content
const auto total_rows = buf.Rows().size();
const auto total_rows = buf.Nrows();
if (content_rows < total_rows) {
std::size_t max_rowoffs = total_rows - content_rows;
if (rowoffs > max_rowoffs)
@@ -115,8 +115,7 @@ cmd_center_on_cursor(CommandContext &ctx)
Buffer *buf = ctx.editor.CurrentBuffer();
if (!buf)
return false;
const auto &rows = buf->Rows();
std::size_t total = rows.size();
std::size_t total = buf->Nrows();
std::size_t content = ctx.editor.ContentRows();
if (content == 0)
content = 1;
@@ -139,8 +138,8 @@ cmd_center_on_cursor(CommandContext &ctx)
static void
ensure_at_least_one_line(Buffer &buf)
{
if (buf.Rows().empty()) {
buf.Rows().emplace_back("");
if (buf.Nrows() == 0) {
buf.insert_row(0, "");
buf.SetDirty(true);
}
}
@@ -254,33 +253,57 @@ extract_region_text(const Buffer &buf, std::size_t sx, std::size_t sy, std::size
static void
delete_region(Buffer &buf, std::size_t sx, std::size_t sy, std::size_t ex, std::size_t ey)
{
auto &rows = buf.Rows();
if (rows.empty())
std::size_t nrows = buf.Nrows();
if (nrows == 0)
return;
if (sy >= rows.size())
if (sy >= nrows)
return;
if (ey >= rows.size())
ey = rows.size() - 1;
if (ey >= nrows)
ey = nrows - 1;
if (sy == ey) {
auto &line = rows[sy];
std::size_t xs = std::min(sx, line.size());
std::size_t xe = std::min(ex, line.size());
// Single line: delete text from xs to xe
const auto &rows = buf.Rows();
const auto &line = rows[sy];
std::size_t xs = std::min(sx, line.size());
std::size_t xe = std::min(ex, line.size());
if (xe < xs)
std::swap(xs, xe);
line.erase(xs, xe - xs);
buf.delete_text(static_cast<int>(sy), static_cast<int>(xs), xe - xs);
} else {
// Keep prefix of first and suffix of last then join
std::string prefix = rows[sy].substr(0, std::min(sx, rows[sy].size()));
std::string suffix;
{
const auto &last = rows[ey];
std::size_t xe = std::min(ex, last.size());
suffix = last.substr(xe);
// Multi-line: delete from (sx,sy) to (ex,ey)
// Strategy:
// 1. Save suffix of last line (from ex to end)
// 2. Delete tail of first line (from sx to end)
// 3. Delete all lines from sy+1 to ey (inclusive)
// 4. Insert saved suffix at end of first line
// 5. Join if needed (no, suffix is appended directly)
const auto &rows = buf.Rows();
std::size_t first_line_len = rows[sy].size();
std::size_t last_line_len = rows[ey].size();
std::size_t xs = std::min(sx, first_line_len);
std::size_t xe = std::min(ex, last_line_len);
// Save suffix of last line before any modifications
std::string suffix = rows[ey].substr(xe);
// Delete tail of first line (from xs to end)
if (xs < first_line_len) {
buf.delete_text(static_cast<int>(sy), static_cast<int>(xs), first_line_len - xs);
}
// Delete lines from ey down to sy+1 (reverse order to preserve indices)
for (std::size_t i = ey; i > sy; --i) {
buf.delete_row(static_cast<int>(i));
}
// Append saved suffix to first line
if (!suffix.empty()) {
// Get current length of line sy after deletions
const auto &rows_after = buf.Rows();
std::size_t line_len = rows_after[sy].size();
buf.insert_text(static_cast<int>(sy), static_cast<int>(line_len), suffix);
}
rows[sy] = prefix + suffix;
// erase middle lines and the last line
rows.erase(rows.begin() + static_cast<std::ptrdiff_t>(sy + 1),
rows.begin() + static_cast<std::ptrdiff_t>(ey + 1));
}
buf.SetCursor(sx, sy);
buf.SetDirty(true);
@@ -291,15 +314,19 @@ delete_region(Buffer &buf, std::size_t sx, std::size_t sy, std::size_t ex, std::
static void
insert_text_at_cursor(Buffer &buf, const std::string &text)
{
auto &rows = buf.Rows();
std::size_t y = buf.Cury();
std::size_t x = buf.Curx();
if (y > rows.size())
y = rows.size();
if (rows.empty())
rows.emplace_back("");
if (y >= rows.size())
rows.emplace_back("");
std::size_t nrows = buf.Nrows();
std::size_t y = buf.Cury();
std::size_t x = buf.Curx();
if (y > nrows)
y = nrows;
if (nrows == 0) {
buf.insert_row(0, "");
nrows = 1;
}
if (y >= nrows) {
buf.insert_row(static_cast<int>(nrows), "");
nrows = buf.Nrows();
}
std::size_t cur_y = y;
std::size_t cur_x = x;
@@ -309,25 +336,28 @@ insert_text_at_cursor(Buffer &buf, const std::string &text)
auto pos = remain.find('\n');
if (pos == std::string::npos) {
// insert remaining into current line
if (cur_y >= rows.size())
rows.emplace_back("");
nrows = buf.Nrows();
if (cur_y >= nrows) {
buf.insert_row(static_cast<int>(nrows), "");
}
const auto &rows = buf.Rows();
if (cur_x > rows[cur_y].size())
cur_x = rows[cur_y].size();
rows[cur_y].insert(cur_x, remain);
buf.insert_text(static_cast<int>(cur_y), static_cast<int>(cur_x), remain);
cur_x += remain.size();
break;
}
// insert segment before newline
std::string seg = remain.substr(0, pos);
if (cur_x > rows[cur_y].size())
cur_x = rows[cur_y].size();
rows[cur_y].insert(cur_x, seg);
{
const auto &rows = buf.Rows();
if (cur_x > rows[cur_y].size())
cur_x = rows[cur_y].size();
}
buf.insert_text(static_cast<int>(cur_y), static_cast<int>(cur_x), seg);
// split line at cur_x + seg.size()
cur_x += seg.size();
std::string after = rows[cur_y].substr(cur_x);
rows[cur_y].erase(cur_x);
// create new line after current with the 'after' tail
rows.insert(rows.begin() + static_cast<std::ptrdiff_t>(cur_y + 1), Buffer::Line(after));
buf.split_line(static_cast<int>(cur_y), static_cast<int>(cur_x));
// move to start of next line
cur_y += 1;
cur_x = 0;
@@ -410,10 +440,8 @@ cmd_move_cursor_to(CommandContext &ctx)
std::size_t bco = buf->Coloffs();
std::size_t by = bro + vy;
// Clamp by to existing lines later
auto &lines2 = buf->Rows();
if (lines2.empty()) {
lines2.emplace_back("");
}
ensure_at_least_one_line(*buf);
const auto &lines2 = buf->Rows();
if (by >= lines2.size())
by = lines2.size() - 1;
std::string line2 = static_cast<std::string>(lines2[by]);
@@ -430,10 +458,8 @@ cmd_move_cursor_to(CommandContext &ctx)
}
}
}
auto &lines = buf->Rows();
if (lines.empty()) {
lines.emplace_back("");
}
ensure_at_least_one_line(*buf);
const auto &lines = buf->Rows();
if (row >= lines.size())
row = lines.size() - 1;
std::string line = static_cast<std::string>(lines[row]);
@@ -2122,20 +2148,24 @@ cmd_show_help(CommandContext &ctx)
};
auto populate_from_text = [](Buffer &b, const std::string &text) {
auto &rows = b.Rows();
rows.clear();
// Clear existing rows
while (b.Nrows() > 0) {
b.delete_row(0);
}
// Parse text and insert rows
std::string line;
line.reserve(128);
int row_idx = 0;
for (char ch: text) {
if (ch == '\n') {
rows.emplace_back(line);
b.insert_row(row_idx++, line);
line.clear();
} else if (ch != '\r') {
line.push_back(ch);
}
}
// Add last line (even if empty)
rows.emplace_back(line);
b.insert_row(row_idx, line);
b.SetDirty(false);
b.SetCursor(0, 0);
b.SetOffsets(0, 0);

View File

@@ -1,5 +1,6 @@
#include <algorithm>
#include <utility>
#include <limits>
#include "PieceTable.h"
@@ -151,6 +152,8 @@ PieceTable::Clear()
materialized_.clear();
total_size_ = 0;
dirty_ = true;
line_index_.clear();
line_index_dirty_ = true;
}
@@ -179,6 +182,7 @@ PieceTable::addPieceBack(const Source src, const std::size_t start, const std::s
pieces_.push_back(Piece{src, start, len});
total_size_ += len;
dirty_ = true;
InvalidateLineIndex();
}
@@ -203,6 +207,7 @@ PieceTable::addPieceFront(Source src, std::size_t start, std::size_t len)
pieces_.insert(pieces_.begin(), Piece{src, start, len});
total_size_ += len;
dirty_ = true;
InvalidateLineIndex();
}
@@ -225,3 +230,308 @@ PieceTable::materialize() const
// Ensure there is a null terminator present via std::string invariants
dirty_ = false;
}
// ===== New Phase 1 implementation =====
std::pair<std::size_t, std::size_t>
PieceTable::locate(const std::size_t byte_offset) const
{
if (byte_offset >= total_size_) {
return {pieces_.size(), 0};
}
std::size_t off = byte_offset;
for (std::size_t i = 0; i < pieces_.size(); ++i) {
const auto &p = pieces_[i];
if (off < p.len) {
return {i, off};
}
off -= p.len;
}
// Should not reach here unless inconsistency; return end
return {pieces_.size(), 0};
}
void
PieceTable::coalesceNeighbors(std::size_t index)
{
if (pieces_.empty())
return;
if (index >= pieces_.size())
index = pieces_.size() - 1;
// Try merge with previous
if (index > 0) {
auto &prev = pieces_[index - 1];
auto &curr = pieces_[index];
if (prev.src == curr.src && prev.start + prev.len == curr.start) {
prev.len += curr.len;
pieces_.erase(pieces_.begin() + static_cast<std::ptrdiff_t>(index));
if (index > 0)
index -= 1;
}
}
// Try merge with next (index may have shifted)
if (index + 1 < pieces_.size()) {
auto &curr = pieces_[index];
auto &next = pieces_[index + 1];
if (curr.src == next.src && curr.start + curr.len == next.start) {
curr.len += next.len;
pieces_.erase(pieces_.begin() + static_cast<std::ptrdiff_t>(index + 1));
}
}
}
void
PieceTable::InvalidateLineIndex() const
{
line_index_dirty_ = true;
}
void
PieceTable::RebuildLineIndex() const
{
if (!line_index_dirty_)
return;
line_index_.clear();
line_index_.push_back(0);
std::size_t pos = 0;
for (const auto &pc: pieces_) {
const std::string &src = pc.src == Source::Original ? original_ : add_;
const char *base = src.data() + static_cast<std::ptrdiff_t>(pc.start);
for (std::size_t j = 0; j < pc.len; ++j) {
if (base[j] == '\n') {
// next line starts after the newline
line_index_.push_back(pos + j + 1);
}
}
pos += pc.len;
}
line_index_dirty_ = false;
}
void
PieceTable::Insert(std::size_t byte_offset, const char *text, std::size_t len)
{
if (len == 0)
return;
if (byte_offset > total_size_)
byte_offset = total_size_;
const std::size_t add_start = add_.size();
add_.append(text, len);
if (pieces_.empty()) {
pieces_.push_back(Piece{Source::Add, add_start, len});
total_size_ += len;
dirty_ = true;
InvalidateLineIndex();
return;
}
auto [idx, inner] = locate(byte_offset);
if (idx == pieces_.size()) {
// insert at end
pieces_.push_back(Piece{Source::Add, add_start, len});
total_size_ += len;
dirty_ = true;
InvalidateLineIndex();
coalesceNeighbors(pieces_.size() - 1);
return;
}
Piece target = pieces_[idx];
// Build replacement sequence: left, inserted, right
std::vector<Piece> repl;
repl.reserve(3);
if (inner > 0) {
repl.push_back(Piece{target.src, target.start, inner});
}
repl.push_back(Piece{Source::Add, add_start, len});
const std::size_t right_len = target.len - inner;
if (right_len > 0) {
repl.push_back(Piece{target.src, target.start + inner, right_len});
}
// Replace target with repl
pieces_.erase(pieces_.begin() + static_cast<std::ptrdiff_t>(idx));
pieces_.insert(pieces_.begin() + static_cast<std::ptrdiff_t>(idx), repl.begin(), repl.end());
total_size_ += len;
dirty_ = true;
InvalidateLineIndex();
// Try coalescing around the inserted position (the inserted piece is at idx + (inner>0 ? 1 : 0))
std::size_t ins_index = idx + (inner > 0 ? 1 : 0);
coalesceNeighbors(ins_index);
}
void
PieceTable::Delete(std::size_t byte_offset, std::size_t len)
{
if (len == 0)
return;
if (byte_offset >= total_size_)
return;
if (byte_offset + len > total_size_)
len = total_size_ - byte_offset;
auto [idx, inner] = locate(byte_offset);
std::size_t remaining = len;
while (remaining > 0 && idx < pieces_.size()) {
Piece &pc = pieces_[idx];
std::size_t available = pc.len - inner; // bytes we can remove from this piece starting at inner
std::size_t take = std::min(available, remaining);
// Compute lengths for left and right remnants
std::size_t left_len = inner;
std::size_t right_len = pc.len - inner - take;
Source src = pc.src;
std::size_t start = pc.start;
// Replace current piece with up to two remnants
if (left_len > 0 && right_len > 0) {
pc.len = left_len; // keep left in place
Piece right{src, start + inner + take, right_len};
pieces_.insert(pieces_.begin() + static_cast<std::ptrdiff_t>(idx + 1), right);
idx += 1; // move to right for next iteration decision
} else if (left_len > 0) {
pc.len = left_len;
// no insertion; idx now points to left; move to next piece
} else if (right_len > 0) {
pc.start = start + inner + take;
pc.len = right_len;
} else {
// entire piece removed
pieces_.erase(pieces_.begin() + static_cast<std::ptrdiff_t>(idx));
// stay at same idx for next piece
inner = 0;
remaining -= take;
continue;
}
// After modifying current idx, next deletion continues at beginning of the next logical region
inner = 0;
remaining -= take;
if (remaining == 0)
break;
// Move to next piece
idx += 1;
}
total_size_ -= len;
dirty_ = true;
InvalidateLineIndex();
if (idx < pieces_.size())
coalesceNeighbors(idx);
if (idx > 0)
coalesceNeighbors(idx - 1);
}
std::size_t
PieceTable::LineCount() const
{
RebuildLineIndex();
return line_index_.empty() ? 0 : line_index_.size();
}
std::pair<std::size_t, std::size_t>
PieceTable::GetLineRange(std::size_t line_num) const
{
RebuildLineIndex();
if (line_index_.empty())
return {0, 0};
if (line_num >= line_index_.size())
return {0, 0};
std::size_t start = line_index_[line_num];
std::size_t end = (line_num + 1 < line_index_.size()) ? line_index_[line_num + 1] : total_size_;
return {start, end};
}
std::string
PieceTable::GetLine(std::size_t line_num) const
{
auto [start, end] = GetLineRange(line_num);
if (end < start)
return std::string();
// Trim trailing '\n'
if (end > start) {
// To check last char, we can get it via GetRange of len 1 at end-1 without materializing whole
std::string last = GetRange(end - 1, 1);
if (!last.empty() && last[0] == '\n') {
end -= 1;
}
}
return GetRange(start, end - start);
}
std::pair<std::size_t, std::size_t>
PieceTable::ByteOffsetToLineCol(std::size_t byte_offset) const
{
if (byte_offset > total_size_)
byte_offset = total_size_;
RebuildLineIndex();
if (line_index_.empty())
return {0, 0};
auto it = std::upper_bound(line_index_.begin(), line_index_.end(), byte_offset);
std::size_t row = (it == line_index_.begin()) ? 0 : static_cast<std::size_t>((it - line_index_.begin()) - 1);
std::size_t col = byte_offset - line_index_[row];
return {row, col};
}
std::size_t
PieceTable::LineColToByteOffset(std::size_t row, std::size_t col) const
{
RebuildLineIndex();
if (line_index_.empty())
return 0;
if (row >= line_index_.size())
return total_size_;
std::size_t start = line_index_[row];
std::size_t end = (row + 1 < line_index_.size()) ? line_index_[row + 1] : total_size_;
// Clamp col to line length excluding trailing newline
if (end > start) {
std::string last = GetRange(end - 1, 1);
if (!last.empty() && last[0] == '\n') {
end -= 1;
}
}
std::size_t target = start + std::min(col, end - start);
return target;
}
std::string
PieceTable::GetRange(std::size_t byte_offset, std::size_t len) const
{
if (byte_offset >= total_size_ || len == 0)
return std::string();
if (byte_offset + len > total_size_)
len = total_size_ - byte_offset;
materialize();
return materialized_.substr(byte_offset, len);
}
std::size_t
PieceTable::Find(const std::string &needle, std::size_t start) const
{
if (needle.empty())
return start <= total_size_ ? start : std::numeric_limits<std::size_t>::max();
if (start > total_size_)
return std::numeric_limits<std::size_t>::max();
materialize();
auto pos = materialized_.find(needle, start);
if (pos == std::string::npos)
return std::numeric_limits<std::size_t>::max();
return pos;
}

View File

@@ -68,6 +68,30 @@ public:
return materialized_.capacity();
}
// ===== New buffer-wide API (Phase 1) =====
// Byte-based editing operations
void Insert(std::size_t byte_offset, const char *text, std::size_t len);
void Delete(std::size_t byte_offset, std::size_t len);
// Line-based queries
[[nodiscard]] std::size_t LineCount() const; // number of logical lines
[[nodiscard]] std::string GetLine(std::size_t line_num) const;
[[nodiscard]] std::pair<std::size_t, std::size_t> GetLineRange(std::size_t line_num) const; // [start,end)
// Position conversion
[[nodiscard]] std::pair<std::size_t, std::size_t> ByteOffsetToLineCol(std::size_t byte_offset) const;
[[nodiscard]] std::size_t LineColToByteOffset(std::size_t row, std::size_t col) const;
// Substring extraction
[[nodiscard]] std::string GetRange(std::size_t byte_offset, std::size_t len) const;
// Simple search utility; returns byte offset or npos
[[nodiscard]] std::size_t Find(const std::string &needle, std::size_t start = 0) const;
private:
enum class Source : unsigned char { Original, Add };
@@ -83,6 +107,17 @@ private:
void materialize() const;
// Helper: locate piece index and inner offset for a global byte offset
[[nodiscard]] std::pair<std::size_t, std::size_t> locate(std::size_t byte_offset) const;
// Helper: try to coalesce neighboring pieces around index
void coalesceNeighbors(std::size_t index);
// Line index support (rebuilt lazily on demand)
void InvalidateLineIndex() const;
void RebuildLineIndex() const;
// Underlying storages
std::string original_; // unused for builder use-case, but kept for API symmetry
std::string add_;
@@ -91,4 +126,8 @@ private:
mutable std::string materialized_;
mutable bool dirty_ = true;
std::size_t total_size_ = 0;
// Cached line index: starting byte offset of each line (always contains at least 1 entry: 0)
mutable std::vector<std::size_t> line_index_;
mutable bool line_index_dirty_ = true;
};