From afb6888c31e17fce7555d73173120387500b25a0 Mon Sep 17 00:00:00 2001 From: Kyle Isom Date: Fri, 5 Dec 2025 15:29:35 -0800 Subject: [PATCH] Introduce PieceTable-based buffer backend (Phase 1) - Added `PieceTable` class for efficient text manipulation and implemented core editing APIs (`Insert`, `Delete`, `Find`, etc.). - Integrated `PieceTable` into `Buffer` class with an adapter for rows caching. - Enabled seamless switching between legacy row-based and new PieceTable-backed editing via `KTE_USE_BUFFER_PIECE_TABLE`. - Updated file I/O, line-based queries, and cursor operations to support PieceTable-based storage. - Lazy rebuilding of line index and improved management of edit state for performance. --- Buffer.cc | 262 ++++++++++++++++++++++++++++++++++++++--- Buffer.h | 32 +++++ CMakeLists.txt | 10 ++ Command.cc | 142 +++++++++++++--------- PieceTable.cc | 310 +++++++++++++++++++++++++++++++++++++++++++++++++ PieceTable.h | 39 +++++++ 6 files changed, 722 insertions(+), 73 deletions(-) diff --git a/Buffer.cc b/Buffer.cc index e021313..db3f98f 100644 --- a/Buffer.cc +++ b/Buffer.cc @@ -2,6 +2,7 @@ #include #include #include +#include #include "Buffer.h" #include "UndoSystem.h" @@ -29,13 +30,17 @@ Buffer::Buffer(const std::string &path) // Copy constructor/assignment: perform a deep copy of core fields; reinitialize undo for the new buffer. Buffer::Buffer(const Buffer &other) { - curx_ = other.curx_; - cury_ = other.cury_; - rx_ = other.rx_; - nrows_ = other.nrows_; - rowoffs_ = other.rowoffs_; - coloffs_ = other.coloffs_; - rows_ = other.rows_; + curx_ = other.curx_; + cury_ = other.cury_; + rx_ = other.rx_; + nrows_ = other.nrows_; + rowoffs_ = other.rowoffs_; + coloffs_ = other.coloffs_; + rows_ = other.rows_; +#ifdef KTE_USE_BUFFER_PIECE_TABLE + content_ = other.content_; + rows_cache_dirty_ = other.rows_cache_dirty_; +#endif filename_ = other.filename_; is_file_backed_ = other.is_file_backed_; dirty_ = other.dirty_; @@ -77,13 +82,17 @@ Buffer::operator=(const Buffer &other) { if (this == &other) return *this; - curx_ = other.curx_; - cury_ = other.cury_; - rx_ = other.rx_; - nrows_ = other.nrows_; - rowoffs_ = other.rowoffs_; - coloffs_ = other.coloffs_; - rows_ = other.rows_; + curx_ = other.curx_; + cury_ = other.cury_; + rx_ = other.rx_; + nrows_ = other.nrows_; + rowoffs_ = other.rowoffs_; + coloffs_ = other.coloffs_; + rows_ = other.rows_; +#ifdef KTE_USE_BUFFER_PIECE_TABLE + content_ = other.content_; + rows_cache_dirty_ = other.rows_cache_dirty_; +#endif filename_ = other.filename_; is_file_backed_ = other.is_file_backed_; dirty_ = other.dirty_; @@ -141,6 +150,10 @@ Buffer::Buffer(Buffer &&other) noexcept syntax_enabled_ = other.syntax_enabled_; filetype_ = std::move(other.filetype_); highlighter_ = std::move(other.highlighter_); +#ifdef KTE_USE_BUFFER_PIECE_TABLE + content_ = std::move(other.content_); + rows_cache_dirty_ = other.rows_cache_dirty_; +#endif // Update UndoSystem's buffer reference to point to this object if (undo_sys_) { undo_sys_->UpdateBufferReference(*this); @@ -178,6 +191,10 @@ Buffer::operator=(Buffer &&other) noexcept filetype_ = std::move(other.filetype_); highlighter_ = std::move(other.highlighter_); +#ifdef KTE_USE_BUFFER_PIECE_TABLE + content_ = std::move(other.content_); + rows_cache_dirty_ = other.rows_cache_dirty_; +#endif // Update UndoSystem's buffer reference to point to this object if (undo_sys_) { undo_sys_->UpdateBufferReference(*this); @@ -229,6 +246,12 @@ Buffer::OpenFromFile(const std::string &path, std::string &err) mark_set_ = false; mark_curx_ = mark_cury_ = 0; +#ifdef KTE_USE_BUFFER_PIECE_TABLE + // Empty PieceTable + content_.Clear(); + rows_cache_dirty_ = true; +#endif + return true; } @@ -238,6 +261,22 @@ Buffer::OpenFromFile(const std::string &path, std::string &err) return false; } +#ifdef KTE_USE_BUFFER_PIECE_TABLE + // Read entire file into PieceTable as-is + std::string data; + in.seekg(0, std::ios::end); + auto sz = in.tellg(); + if (sz > 0) { + data.resize(static_cast(sz)); + in.seekg(0, std::ios::beg); + in.read(data.data(), static_cast(data.size())); + } + content_.Clear(); + if (!data.empty()) + content_.Append(data.data(), data.size()); + rows_cache_dirty_ = true; + nrows_ = 0; // not used under adapter +#else // Detect if file ends with a newline so we can preserve a final empty line // in our in-memory representation (mg-style semantics). bool ends_with_nl = false; @@ -278,7 +317,8 @@ Buffer::OpenFromFile(const std::string &path, std::string &err) } } - nrows_ = rows_.size(); + nrows_ = rows_.size(); +#endif filename_ = norm; is_file_backed_ = true; dirty_ = false; @@ -313,6 +353,12 @@ Buffer::Save(std::string &err) const err = "Failed to open for write: " + filename_; return false; } +#ifdef KTE_USE_BUFFER_PIECE_TABLE + const char *d = content_.Data(); + std::size_t n = content_.Size(); + if (d && n) + out.write(d, static_cast(n)); +#else for (std::size_t i = 0; i < rows_.size(); ++i) { const char *d = rows_[i].Data(); std::size_t n = rows_[i].Size(); @@ -322,6 +368,7 @@ Buffer::Save(std::string &err) const out.put('\n'); } } +#endif if (!out.good()) { err = "Write error"; return false; @@ -360,6 +407,14 @@ Buffer::SaveAs(const std::string &path, std::string &err) err = "Failed to open for write: " + out_path; return false; } +#ifdef KTE_USE_BUFFER_PIECE_TABLE + { + const char *d = content_.Data(); + std::size_t n = content_.Size(); + if (d && n) + out.write(d, static_cast(n)); + } +#else for (std::size_t i = 0; i < rows_.size(); ++i) { const char *d = rows_[i].Data(); std::size_t n = rows_[i].Size(); @@ -369,6 +424,7 @@ Buffer::SaveAs(const std::string &path, std::string &err) out.put('\n'); } } +#endif if (!out.good()) { err = "Write error"; return false; @@ -389,7 +445,11 @@ Buffer::AsString() const if (this->Dirty()) { ss << "*"; } +#ifdef KTE_USE_BUFFER_PIECE_TABLE + ss << ">: " << content_.LineCount() << " lines"; +#else ss << ">: " << rows_.size() << " lines"; +#endif return ss.str(); } @@ -398,6 +458,19 @@ Buffer::AsString() const void Buffer::insert_text(int row, int col, std::string_view text) { +#ifdef KTE_USE_BUFFER_PIECE_TABLE + if (row < 0) + row = 0; + if (col < 0) + col = 0; + const std::size_t off = content_.LineColToByteOffset(static_cast(row), + static_cast(col)); + if (!text.empty()) { + content_.Insert(off, text.data(), text.size()); + rows_cache_dirty_ = true; + } + return; +#else if (row < 0) row = 0; if (static_cast(row) > rows_.size()) @@ -409,8 +482,9 @@ Buffer::insert_text(int row, int col, std::string_view text) auto y = static_cast(row); auto x = static_cast(col); - if (x > rows_[y].size()) + if (x > rows_[y].size()) { x = rows_[y].size(); + } std::string remain(text); while (true) { @@ -432,12 +506,110 @@ Buffer::insert_text(int row, int col, std::string_view text) remain.erase(0, pos + 1); } // Do not set dirty here; UndoSystem will manage state/dirty externally +#endif } +#ifdef KTE_USE_BUFFER_PIECE_TABLE +// ===== Adapter helpers for PieceTable-backed Buffer ===== +void +Buffer::ensure_rows_cache() const +{ + if (!rows_cache_dirty_) + return; + rows_.clear(); + const std::size_t lc = content_.LineCount(); + rows_.reserve(lc); + for (std::size_t i = 0; i < lc; ++i) { + rows_.emplace_back(content_.GetLine(i)); + } + // Keep nrows_ in sync for any legacy code that still reads it + const_cast(this)->nrows_ = rows_.size(); + rows_cache_dirty_ = false; +} + +std::size_t +Buffer::content_LineCount_() const +{ + return content_.LineCount(); +} +#endif + void Buffer::delete_text(int row, int col, std::size_t len) { +#ifdef KTE_USE_BUFFER_PIECE_TABLE + if (len == 0) + return; + if (row < 0) + row = 0; + if (col < 0) + col = 0; + std::size_t start = content_.LineColToByteOffset(static_cast(row), static_cast(col)); + // Walk len logical characters across lines to compute end offset + std::size_t r = static_cast(row); + std::size_t c = static_cast(col); + std::size_t remaining = len; + const std::size_t line_count = content_.LineCount(); + while (remaining > 0 && r < line_count) { + auto range = content_.GetLineRange(r); // [start,end) + // Compute end of line excluding trailing '\n' + std::size_t line_end = range.second; + if (line_end > range.first) { + // If last char is '\n', don't count in-column span + std::string last = content_.GetRange(line_end - 1, 1); + if (!last.empty() && last[0] == '\n') { + line_end -= 1; + } + } + std::size_t cur_off = content_.LineColToByteOffset(r, c); + std::size_t in_line = (cur_off < line_end) ? (line_end - cur_off) : 0; + if (remaining <= in_line) { + // All within current line + std::size_t end = cur_off + remaining; + content_.Delete(start, end - start); + rows_cache_dirty_ = true; + return; + } + // Consume rest of line + remaining -= in_line; + std::size_t end = cur_off + in_line; + // If there is a next line and remaining > 0, consider consuming the newline as 1 + if (r + 1 < line_count) { + if (remaining > 0) { + // newline + end += 1; + remaining -= 1; + } + // Move to next line + r += 1; + c = 0; + // Update start deletion length so far by postponing until we know final end; we keep start fixed + if (remaining == 0) { + content_.Delete(start, end - start); + rows_cache_dirty_ = true; + return; + } + // Continue loop with updated r/c; but also keep track of 'end' as current consumed position + // Rather than tracking incrementally, we will recompute cur_off at top of loop. + // However, we need to carry forward the consumed part; we can temporarily store 'end' in start_of_next + // To simplify, after loop finishes we will compute final end using current r/c using remaining. + } else { + // No next line; delete to file end + std::size_t total = content_.Size(); + content_.Delete(start, total - start); + rows_cache_dirty_ = true; + return; + } + } + // If loop ended because remaining==0 at a line boundary + if (remaining == 0) { + std::size_t end = content_.LineColToByteOffset(r, c); + content_.Delete(start, end - start); + rows_cache_dirty_ = true; + } + return; +#else if (rows_.empty() || len == 0) return; if (row < 0) @@ -470,12 +642,25 @@ Buffer::delete_text(int row, int col, std::size_t len) break; } } +#endif } void Buffer::split_line(int row, const int col) { +#ifdef KTE_USE_BUFFER_PIECE_TABLE + if (row < 0) + row = 0; + if (col < 0) + row = 0; + const std::size_t off = content_.LineColToByteOffset(static_cast(row), + static_cast(col)); + const char nl = '\n'; + content_.Insert(off, &nl, 1); + rows_cache_dirty_ = true; + return; +#else if (row < 0) { row = 0; } @@ -488,12 +673,26 @@ Buffer::split_line(int row, const int col) const auto tail = rows_[y].substr(x); rows_[y].erase(x); rows_.insert(rows_.begin() + static_cast(y + 1), Line(tail)); +#endif } void Buffer::join_lines(int row) { +#ifdef KTE_USE_BUFFER_PIECE_TABLE + if (row < 0) + row = 0; + std::size_t r = static_cast(row); + if (r + 1 >= content_.LineCount()) + return; + // Delete the newline between line r and r+1 + std::size_t end_of_line = content_.LineColToByteOffset(r, std::numeric_limits::max()); + // end_of_line now equals line end (clamped before newline). The newline should be exactly at this position. + content_.Delete(end_of_line, 1); + rows_cache_dirty_ = true; + return; +#else if (row < 0) { row = 0; } @@ -505,28 +704,57 @@ Buffer::join_lines(int row) rows_[y] += rows_[y + 1]; rows_.erase(rows_.begin() + static_cast(y + 1)); +#endif } void Buffer::insert_row(int row, const std::string_view text) { +#ifdef KTE_USE_BUFFER_PIECE_TABLE + if (row < 0) + row = 0; + std::size_t off = content_.LineColToByteOffset(static_cast(row), 0); + if (!text.empty()) + content_.Insert(off, text.data(), text.size()); + const char nl = '\n'; + content_.Insert(off + text.size(), &nl, 1); + rows_cache_dirty_ = true; + return; +#else if (row < 0) row = 0; if (static_cast(row) > rows_.size()) row = static_cast(rows_.size()); rows_.insert(rows_.begin() + row, Line(std::string(text))); +#endif } void Buffer::delete_row(int row) { +#ifdef KTE_USE_BUFFER_PIECE_TABLE + if (row < 0) + row = 0; + std::size_t r = static_cast(row); + if (r >= content_.LineCount()) + return; + auto range = content_.GetLineRange(r); // [start,end) + // If not last line, ensure we include the separating newline by using end as-is (which points to next line start) + // If last line, end may equal total_size_. We still delete [start,end) which removes the last line content. + std::size_t start = range.first; + std::size_t end = range.second; + content_.Delete(start, end - start); + rows_cache_dirty_ = true; + return; +#else if (row < 0) row = 0; if (static_cast(row) >= rows_.size()) return; rows_.erase(rows_.begin() + row); +#endif } @@ -542,4 +770,4 @@ const UndoSystem * Buffer::Undo() const { return undo_sys_.get(); -} +} \ No newline at end of file diff --git a/Buffer.h b/Buffer.h index 0c5fc36..af4c242 100644 --- a/Buffer.h +++ b/Buffer.h @@ -10,6 +10,9 @@ #include #include "AppendBuffer.h" +#ifdef KTE_USE_BUFFER_PIECE_TABLE +#include "PieceTable.h" +#endif #include "UndoSystem.h" #include #include @@ -63,7 +66,11 @@ public: [[nodiscard]] std::size_t Nrows() const { +#ifdef KTE_USE_BUFFER_PIECE_TABLE + return content_LineCount_(); +#else return nrows_; +#endif } @@ -255,13 +262,23 @@ public: [[nodiscard]] const std::vector &Rows() const { +#ifdef KTE_USE_BUFFER_PIECE_TABLE + ensure_rows_cache(); return rows_; +#else + return rows_; +#endif } [[nodiscard]] std::vector &Rows() { +#ifdef KTE_USE_BUFFER_PIECE_TABLE + ensure_rows_cache(); return rows_; +#else + return rows_; +#endif } @@ -460,7 +477,22 @@ private: std::size_t rx_ = 0; // render x (tabs expanded) std::size_t nrows_ = 0; // number of rows std::size_t rowoffs_ = 0, coloffs_ = 0; // viewport offsets +#ifdef KTE_USE_BUFFER_PIECE_TABLE + mutable std::vector rows_; // materialized cache of rows (without trailing newlines) +#else std::vector rows_; // buffer rows (without trailing newlines) +#endif +#ifdef KTE_USE_BUFFER_PIECE_TABLE + // When using the adapter, PieceTable is the source of truth. + PieceTable content_{}; + mutable bool rows_cache_dirty_ = true; // invalidate on edits / I/O + + // Helper to rebuild rows_ from content_ + void ensure_rows_cache() const; + + // Helper to query content_.LineCount() while keeping header minimal + std::size_t content_LineCount_() const; +#endif std::string filename_; bool is_file_backed_ = false; bool dirty_ = false; diff --git a/CMakeLists.txt b/CMakeLists.txt index ca489b8..a9b130a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -12,6 +12,7 @@ set(BUILD_GUI ON CACHE BOOL "Enable building the graphical version.") set(KTE_USE_QT OFF CACHE BOOL "Build the QT frontend instead of ImGui.") set(BUILD_TESTS OFF CACHE BOOL "Enable building test programs.") option(KTE_USE_PIECE_TABLE "Use PieceTable instead of GapBuffer implementation" ON) +option(KTE_USE_BUFFER_PIECE_TABLE "Use PieceTable inside Buffer adapter (Phase 2)" OFF) set(KTE_FONT_SIZE "18.0" CACHE STRING "Default font size for GUI") option(KTE_UNDO_DEBUG "Enable undo instrumentation logs" OFF) option(KTE_ENABLE_TREESITTER "Enable optional Tree-sitter highlighter adapter" OFF) @@ -273,6 +274,9 @@ add_executable(kte if (KTE_USE_PIECE_TABLE) target_compile_definitions(kte PRIVATE KTE_USE_PIECE_TABLE=1) endif () +if (KTE_USE_BUFFER_PIECE_TABLE) + target_compile_definitions(kte PRIVATE KTE_USE_BUFFER_PIECE_TABLE=1) +endif () if (KTE_UNDO_DEBUG) target_compile_definitions(kte PRIVATE KTE_UNDO_DEBUG=1) endif () @@ -309,6 +313,9 @@ if (BUILD_TESTS) if (KTE_USE_PIECE_TABLE) target_compile_definitions(test_undo PRIVATE KTE_USE_PIECE_TABLE=1) endif () + if (KTE_USE_BUFFER_PIECE_TABLE) + target_compile_definitions(test_undo PRIVATE KTE_USE_BUFFER_PIECE_TABLE=1) + endif () if (KTE_UNDO_DEBUG) target_compile_definitions(test_undo PRIVATE KTE_UNDO_DEBUG=1) @@ -357,6 +364,9 @@ if (${BUILD_GUI}) if (KTE_UNDO_DEBUG) target_compile_definitions(kge PRIVATE KTE_UNDO_DEBUG=1) endif () + if (KTE_USE_BUFFER_PIECE_TABLE) + target_compile_definitions(kge PRIVATE KTE_USE_BUFFER_PIECE_TABLE=1) + endif () if (KTE_USE_QT) target_link_libraries(kge ${CURSES_LIBRARIES} Qt6::Widgets) else () diff --git a/Command.cc b/Command.cc index a50d378..2f76ac2 100644 --- a/Command.cc +++ b/Command.cc @@ -83,7 +83,7 @@ ensure_cursor_visible(const Editor &ed, Buffer &buf) } // Clamp vertical offset to available content - const auto total_rows = buf.Rows().size(); + const auto total_rows = buf.Nrows(); if (content_rows < total_rows) { std::size_t max_rowoffs = total_rows - content_rows; if (rowoffs > max_rowoffs) @@ -115,8 +115,7 @@ cmd_center_on_cursor(CommandContext &ctx) Buffer *buf = ctx.editor.CurrentBuffer(); if (!buf) return false; - const auto &rows = buf->Rows(); - std::size_t total = rows.size(); + std::size_t total = buf->Nrows(); std::size_t content = ctx.editor.ContentRows(); if (content == 0) content = 1; @@ -139,8 +138,8 @@ cmd_center_on_cursor(CommandContext &ctx) static void ensure_at_least_one_line(Buffer &buf) { - if (buf.Rows().empty()) { - buf.Rows().emplace_back(""); + if (buf.Nrows() == 0) { + buf.insert_row(0, ""); buf.SetDirty(true); } } @@ -254,33 +253,57 @@ extract_region_text(const Buffer &buf, std::size_t sx, std::size_t sy, std::size static void delete_region(Buffer &buf, std::size_t sx, std::size_t sy, std::size_t ex, std::size_t ey) { - auto &rows = buf.Rows(); - if (rows.empty()) + std::size_t nrows = buf.Nrows(); + if (nrows == 0) return; - if (sy >= rows.size()) + if (sy >= nrows) return; - if (ey >= rows.size()) - ey = rows.size() - 1; + if (ey >= nrows) + ey = nrows - 1; if (sy == ey) { - auto &line = rows[sy]; - std::size_t xs = std::min(sx, line.size()); - std::size_t xe = std::min(ex, line.size()); + // Single line: delete text from xs to xe + const auto &rows = buf.Rows(); + const auto &line = rows[sy]; + std::size_t xs = std::min(sx, line.size()); + std::size_t xe = std::min(ex, line.size()); if (xe < xs) std::swap(xs, xe); - line.erase(xs, xe - xs); + buf.delete_text(static_cast(sy), static_cast(xs), xe - xs); } else { - // Keep prefix of first and suffix of last then join - std::string prefix = rows[sy].substr(0, std::min(sx, rows[sy].size())); - std::string suffix; - { - const auto &last = rows[ey]; - std::size_t xe = std::min(ex, last.size()); - suffix = last.substr(xe); + // Multi-line: delete from (sx,sy) to (ex,ey) + // Strategy: + // 1. Save suffix of last line (from ex to end) + // 2. Delete tail of first line (from sx to end) + // 3. Delete all lines from sy+1 to ey (inclusive) + // 4. Insert saved suffix at end of first line + // 5. Join if needed (no, suffix is appended directly) + + const auto &rows = buf.Rows(); + std::size_t first_line_len = rows[sy].size(); + std::size_t last_line_len = rows[ey].size(); + std::size_t xs = std::min(sx, first_line_len); + std::size_t xe = std::min(ex, last_line_len); + + // Save suffix of last line before any modifications + std::string suffix = rows[ey].substr(xe); + + // Delete tail of first line (from xs to end) + if (xs < first_line_len) { + buf.delete_text(static_cast(sy), static_cast(xs), first_line_len - xs); + } + + // Delete lines from ey down to sy+1 (reverse order to preserve indices) + for (std::size_t i = ey; i > sy; --i) { + buf.delete_row(static_cast(i)); + } + + // Append saved suffix to first line + if (!suffix.empty()) { + // Get current length of line sy after deletions + const auto &rows_after = buf.Rows(); + std::size_t line_len = rows_after[sy].size(); + buf.insert_text(static_cast(sy), static_cast(line_len), suffix); } - rows[sy] = prefix + suffix; - // erase middle lines and the last line - rows.erase(rows.begin() + static_cast(sy + 1), - rows.begin() + static_cast(ey + 1)); } buf.SetCursor(sx, sy); buf.SetDirty(true); @@ -291,15 +314,19 @@ delete_region(Buffer &buf, std::size_t sx, std::size_t sy, std::size_t ex, std:: static void insert_text_at_cursor(Buffer &buf, const std::string &text) { - auto &rows = buf.Rows(); - std::size_t y = buf.Cury(); - std::size_t x = buf.Curx(); - if (y > rows.size()) - y = rows.size(); - if (rows.empty()) - rows.emplace_back(""); - if (y >= rows.size()) - rows.emplace_back(""); + std::size_t nrows = buf.Nrows(); + std::size_t y = buf.Cury(); + std::size_t x = buf.Curx(); + if (y > nrows) + y = nrows; + if (nrows == 0) { + buf.insert_row(0, ""); + nrows = 1; + } + if (y >= nrows) { + buf.insert_row(static_cast(nrows), ""); + nrows = buf.Nrows(); + } std::size_t cur_y = y; std::size_t cur_x = x; @@ -309,25 +336,28 @@ insert_text_at_cursor(Buffer &buf, const std::string &text) auto pos = remain.find('\n'); if (pos == std::string::npos) { // insert remaining into current line - if (cur_y >= rows.size()) - rows.emplace_back(""); + nrows = buf.Nrows(); + if (cur_y >= nrows) { + buf.insert_row(static_cast(nrows), ""); + } + const auto &rows = buf.Rows(); if (cur_x > rows[cur_y].size()) cur_x = rows[cur_y].size(); - rows[cur_y].insert(cur_x, remain); + buf.insert_text(static_cast(cur_y), static_cast(cur_x), remain); cur_x += remain.size(); break; } // insert segment before newline std::string seg = remain.substr(0, pos); - if (cur_x > rows[cur_y].size()) - cur_x = rows[cur_y].size(); - rows[cur_y].insert(cur_x, seg); + { + const auto &rows = buf.Rows(); + if (cur_x > rows[cur_y].size()) + cur_x = rows[cur_y].size(); + } + buf.insert_text(static_cast(cur_y), static_cast(cur_x), seg); // split line at cur_x + seg.size() cur_x += seg.size(); - std::string after = rows[cur_y].substr(cur_x); - rows[cur_y].erase(cur_x); - // create new line after current with the 'after' tail - rows.insert(rows.begin() + static_cast(cur_y + 1), Buffer::Line(after)); + buf.split_line(static_cast(cur_y), static_cast(cur_x)); // move to start of next line cur_y += 1; cur_x = 0; @@ -410,10 +440,8 @@ cmd_move_cursor_to(CommandContext &ctx) std::size_t bco = buf->Coloffs(); std::size_t by = bro + vy; // Clamp by to existing lines later - auto &lines2 = buf->Rows(); - if (lines2.empty()) { - lines2.emplace_back(""); - } + ensure_at_least_one_line(*buf); + const auto &lines2 = buf->Rows(); if (by >= lines2.size()) by = lines2.size() - 1; std::string line2 = static_cast(lines2[by]); @@ -430,10 +458,8 @@ cmd_move_cursor_to(CommandContext &ctx) } } } - auto &lines = buf->Rows(); - if (lines.empty()) { - lines.emplace_back(""); - } + ensure_at_least_one_line(*buf); + const auto &lines = buf->Rows(); if (row >= lines.size()) row = lines.size() - 1; std::string line = static_cast(lines[row]); @@ -2122,20 +2148,24 @@ cmd_show_help(CommandContext &ctx) }; auto populate_from_text = [](Buffer &b, const std::string &text) { - auto &rows = b.Rows(); - rows.clear(); + // Clear existing rows + while (b.Nrows() > 0) { + b.delete_row(0); + } + // Parse text and insert rows std::string line; line.reserve(128); + int row_idx = 0; for (char ch: text) { if (ch == '\n') { - rows.emplace_back(line); + b.insert_row(row_idx++, line); line.clear(); } else if (ch != '\r') { line.push_back(ch); } } // Add last line (even if empty) - rows.emplace_back(line); + b.insert_row(row_idx, line); b.SetDirty(false); b.SetCursor(0, 0); b.SetOffsets(0, 0); diff --git a/PieceTable.cc b/PieceTable.cc index a58f6fd..b8f8f7a 100644 --- a/PieceTable.cc +++ b/PieceTable.cc @@ -1,5 +1,6 @@ #include #include +#include #include "PieceTable.h" @@ -151,6 +152,8 @@ PieceTable::Clear() materialized_.clear(); total_size_ = 0; dirty_ = true; + line_index_.clear(); + line_index_dirty_ = true; } @@ -179,6 +182,7 @@ PieceTable::addPieceBack(const Source src, const std::size_t start, const std::s pieces_.push_back(Piece{src, start, len}); total_size_ += len; dirty_ = true; + InvalidateLineIndex(); } @@ -203,6 +207,7 @@ PieceTable::addPieceFront(Source src, std::size_t start, std::size_t len) pieces_.insert(pieces_.begin(), Piece{src, start, len}); total_size_ += len; dirty_ = true; + InvalidateLineIndex(); } @@ -225,3 +230,308 @@ PieceTable::materialize() const // Ensure there is a null terminator present via std::string invariants dirty_ = false; } + + +// ===== New Phase 1 implementation ===== + +std::pair +PieceTable::locate(const std::size_t byte_offset) const +{ + if (byte_offset >= total_size_) { + return {pieces_.size(), 0}; + } + std::size_t off = byte_offset; + for (std::size_t i = 0; i < pieces_.size(); ++i) { + const auto &p = pieces_[i]; + if (off < p.len) { + return {i, off}; + } + off -= p.len; + } + // Should not reach here unless inconsistency; return end + return {pieces_.size(), 0}; +} + + +void +PieceTable::coalesceNeighbors(std::size_t index) +{ + if (pieces_.empty()) + return; + if (index >= pieces_.size()) + index = pieces_.size() - 1; + // Try merge with previous + if (index > 0) { + auto &prev = pieces_[index - 1]; + auto &curr = pieces_[index]; + if (prev.src == curr.src && prev.start + prev.len == curr.start) { + prev.len += curr.len; + pieces_.erase(pieces_.begin() + static_cast(index)); + if (index > 0) + index -= 1; + } + } + // Try merge with next (index may have shifted) + if (index + 1 < pieces_.size()) { + auto &curr = pieces_[index]; + auto &next = pieces_[index + 1]; + if (curr.src == next.src && curr.start + curr.len == next.start) { + curr.len += next.len; + pieces_.erase(pieces_.begin() + static_cast(index + 1)); + } + } +} + + +void +PieceTable::InvalidateLineIndex() const +{ + line_index_dirty_ = true; +} + + +void +PieceTable::RebuildLineIndex() const +{ + if (!line_index_dirty_) + return; + line_index_.clear(); + line_index_.push_back(0); + std::size_t pos = 0; + for (const auto &pc: pieces_) { + const std::string &src = pc.src == Source::Original ? original_ : add_; + const char *base = src.data() + static_cast(pc.start); + for (std::size_t j = 0; j < pc.len; ++j) { + if (base[j] == '\n') { + // next line starts after the newline + line_index_.push_back(pos + j + 1); + } + } + pos += pc.len; + } + line_index_dirty_ = false; +} + + +void +PieceTable::Insert(std::size_t byte_offset, const char *text, std::size_t len) +{ + if (len == 0) + return; + if (byte_offset > total_size_) + byte_offset = total_size_; + + const std::size_t add_start = add_.size(); + add_.append(text, len); + + if (pieces_.empty()) { + pieces_.push_back(Piece{Source::Add, add_start, len}); + total_size_ += len; + dirty_ = true; + InvalidateLineIndex(); + return; + } + + auto [idx, inner] = locate(byte_offset); + if (idx == pieces_.size()) { + // insert at end + pieces_.push_back(Piece{Source::Add, add_start, len}); + total_size_ += len; + dirty_ = true; + InvalidateLineIndex(); + coalesceNeighbors(pieces_.size() - 1); + return; + } + + Piece target = pieces_[idx]; + // Build replacement sequence: left, inserted, right + std::vector repl; + repl.reserve(3); + if (inner > 0) { + repl.push_back(Piece{target.src, target.start, inner}); + } + repl.push_back(Piece{Source::Add, add_start, len}); + const std::size_t right_len = target.len - inner; + if (right_len > 0) { + repl.push_back(Piece{target.src, target.start + inner, right_len}); + } + + // Replace target with repl + pieces_.erase(pieces_.begin() + static_cast(idx)); + pieces_.insert(pieces_.begin() + static_cast(idx), repl.begin(), repl.end()); + + total_size_ += len; + dirty_ = true; + InvalidateLineIndex(); + // Try coalescing around the inserted position (the inserted piece is at idx + (inner>0 ? 1 : 0)) + std::size_t ins_index = idx + (inner > 0 ? 1 : 0); + coalesceNeighbors(ins_index); +} + + +void +PieceTable::Delete(std::size_t byte_offset, std::size_t len) +{ + if (len == 0) + return; + if (byte_offset >= total_size_) + return; + if (byte_offset + len > total_size_) + len = total_size_ - byte_offset; + + auto [idx, inner] = locate(byte_offset); + std::size_t remaining = len; + + while (remaining > 0 && idx < pieces_.size()) { + Piece &pc = pieces_[idx]; + std::size_t available = pc.len - inner; // bytes we can remove from this piece starting at inner + std::size_t take = std::min(available, remaining); + + // Compute lengths for left and right remnants + std::size_t left_len = inner; + std::size_t right_len = pc.len - inner - take; + Source src = pc.src; + std::size_t start = pc.start; + + // Replace current piece with up to two remnants + if (left_len > 0 && right_len > 0) { + pc.len = left_len; // keep left in place + Piece right{src, start + inner + take, right_len}; + pieces_.insert(pieces_.begin() + static_cast(idx + 1), right); + idx += 1; // move to right for next iteration decision + } else if (left_len > 0) { + pc.len = left_len; + // no insertion; idx now points to left; move to next piece + } else if (right_len > 0) { + pc.start = start + inner + take; + pc.len = right_len; + } else { + // entire piece removed + pieces_.erase(pieces_.begin() + static_cast(idx)); + // stay at same idx for next piece + inner = 0; + remaining -= take; + continue; + } + + // After modifying current idx, next deletion continues at beginning of the next logical region + inner = 0; + remaining -= take; + if (remaining == 0) + break; + // Move to next piece + idx += 1; + } + + total_size_ -= len; + dirty_ = true; + InvalidateLineIndex(); + if (idx < pieces_.size()) + coalesceNeighbors(idx); + if (idx > 0) + coalesceNeighbors(idx - 1); +} + + +std::size_t +PieceTable::LineCount() const +{ + RebuildLineIndex(); + return line_index_.empty() ? 0 : line_index_.size(); +} + + +std::pair +PieceTable::GetLineRange(std::size_t line_num) const +{ + RebuildLineIndex(); + if (line_index_.empty()) + return {0, 0}; + if (line_num >= line_index_.size()) + return {0, 0}; + std::size_t start = line_index_[line_num]; + std::size_t end = (line_num + 1 < line_index_.size()) ? line_index_[line_num + 1] : total_size_; + return {start, end}; +} + + +std::string +PieceTable::GetLine(std::size_t line_num) const +{ + auto [start, end] = GetLineRange(line_num); + if (end < start) + return std::string(); + // Trim trailing '\n' + if (end > start) { + // To check last char, we can get it via GetRange of len 1 at end-1 without materializing whole + std::string last = GetRange(end - 1, 1); + if (!last.empty() && last[0] == '\n') { + end -= 1; + } + } + return GetRange(start, end - start); +} + + +std::pair +PieceTable::ByteOffsetToLineCol(std::size_t byte_offset) const +{ + if (byte_offset > total_size_) + byte_offset = total_size_; + RebuildLineIndex(); + if (line_index_.empty()) + return {0, 0}; + auto it = std::upper_bound(line_index_.begin(), line_index_.end(), byte_offset); + std::size_t row = (it == line_index_.begin()) ? 0 : static_cast((it - line_index_.begin()) - 1); + std::size_t col = byte_offset - line_index_[row]; + return {row, col}; +} + + +std::size_t +PieceTable::LineColToByteOffset(std::size_t row, std::size_t col) const +{ + RebuildLineIndex(); + if (line_index_.empty()) + return 0; + if (row >= line_index_.size()) + return total_size_; + std::size_t start = line_index_[row]; + std::size_t end = (row + 1 < line_index_.size()) ? line_index_[row + 1] : total_size_; + // Clamp col to line length excluding trailing newline + if (end > start) { + std::string last = GetRange(end - 1, 1); + if (!last.empty() && last[0] == '\n') { + end -= 1; + } + } + std::size_t target = start + std::min(col, end - start); + return target; +} + + +std::string +PieceTable::GetRange(std::size_t byte_offset, std::size_t len) const +{ + if (byte_offset >= total_size_ || len == 0) + return std::string(); + if (byte_offset + len > total_size_) + len = total_size_ - byte_offset; + materialize(); + return materialized_.substr(byte_offset, len); +} + + +std::size_t +PieceTable::Find(const std::string &needle, std::size_t start) const +{ + if (needle.empty()) + return start <= total_size_ ? start : std::numeric_limits::max(); + if (start > total_size_) + return std::numeric_limits::max(); + materialize(); + auto pos = materialized_.find(needle, start); + if (pos == std::string::npos) + return std::numeric_limits::max(); + return pos; +} \ No newline at end of file diff --git a/PieceTable.h b/PieceTable.h index 07022d4..2295b73 100644 --- a/PieceTable.h +++ b/PieceTable.h @@ -68,6 +68,30 @@ public: return materialized_.capacity(); } + + // ===== New buffer-wide API (Phase 1) ===== + // Byte-based editing operations + void Insert(std::size_t byte_offset, const char *text, std::size_t len); + + void Delete(std::size_t byte_offset, std::size_t len); + + // Line-based queries + [[nodiscard]] std::size_t LineCount() const; // number of logical lines + [[nodiscard]] std::string GetLine(std::size_t line_num) const; + + [[nodiscard]] std::pair GetLineRange(std::size_t line_num) const; // [start,end) + + // Position conversion + [[nodiscard]] std::pair ByteOffsetToLineCol(std::size_t byte_offset) const; + + [[nodiscard]] std::size_t LineColToByteOffset(std::size_t row, std::size_t col) const; + + // Substring extraction + [[nodiscard]] std::string GetRange(std::size_t byte_offset, std::size_t len) const; + + // Simple search utility; returns byte offset or npos + [[nodiscard]] std::size_t Find(const std::string &needle, std::size_t start = 0) const; + private: enum class Source : unsigned char { Original, Add }; @@ -83,6 +107,17 @@ private: void materialize() const; + // Helper: locate piece index and inner offset for a global byte offset + [[nodiscard]] std::pair locate(std::size_t byte_offset) const; + + // Helper: try to coalesce neighboring pieces around index + void coalesceNeighbors(std::size_t index); + + // Line index support (rebuilt lazily on demand) + void InvalidateLineIndex() const; + + void RebuildLineIndex() const; + // Underlying storages std::string original_; // unused for builder use-case, but kept for API symmetry std::string add_; @@ -91,4 +126,8 @@ private: mutable std::string materialized_; mutable bool dirty_ = true; std::size_t total_size_ = 0; + + // Cached line index: starting byte offset of each line (always contains at least 1 entry: 0) + mutable std::vector line_index_; + mutable bool line_index_dirty_ = true; }; \ No newline at end of file