Files
kte/Buffer.cc
Kyle Isom a428b204a0 Improve exception robustness.
- Introduced `test_swap_edge_cases.cc` with extensive tests for minimum payload sizes, truncated payloads, data overflows, unsupported encoding versions, CRC mismatches, and mixed valid/invalid records to ensure reliability under complex scenarios.
- Enhanced `main.cc` with a top-level exception handler to prevent data loss and ensure cleanup during unexpected failures.
2026-02-17 20:12:09 -08:00

786 lines
21 KiB
C++

#include <fstream>
#include <sstream>
#include <filesystem>
#include <cstdlib>
#include <limits>
#include <cerrno>
#include <cstring>
#include <string_view>
#include <vector>
#include <fcntl.h>
#include <unistd.h>
#include <sys/stat.h>
#include <sys/types.h>
#include "Buffer.h"
#include "SwapRecorder.h"
#include "UndoSystem.h"
#include "UndoTree.h"
// For reconstructing highlighter state on copies
#include "syntax/HighlighterRegistry.h"
#include "syntax/NullHighlighter.h"
Buffer::Buffer()
{
// Initialize undo system per buffer
undo_tree_ = std::make_unique<UndoTree>();
undo_sys_ = std::make_unique<UndoSystem>(*this, *undo_tree_);
}
bool
Buffer::stat_identity(const std::string &path, FileIdentity &out)
{
struct stat st{};
if (::stat(path.c_str(), &st) != 0) {
out.valid = false;
return false;
}
out.valid = true;
// Use nanosecond timestamp when available.
std::uint64_t ns = 0;
#if defined(__APPLE__)
ns = static_cast<std::uint64_t>(st.st_mtimespec.tv_sec) * 1000000000ull
+ static_cast<std::uint64_t>(st.st_mtimespec.tv_nsec);
#else
ns = static_cast<std::uint64_t>(st.st_mtim.tv_sec) * 1000000000ull
+ static_cast<std::uint64_t>(st.st_mtim.tv_nsec);
#endif
out.mtime_ns = ns;
out.size = static_cast<std::uint64_t>(st.st_size);
out.dev = static_cast<std::uint64_t>(st.st_dev);
out.ino = static_cast<std::uint64_t>(st.st_ino);
return true;
}
bool
Buffer::current_disk_identity(FileIdentity &out) const
{
if (!is_file_backed_ || filename_.empty()) {
out.valid = false;
return false;
}
return stat_identity(filename_, out);
}
bool
Buffer::ExternallyModifiedOnDisk() const
{
if (!is_file_backed_ || filename_.empty())
return false;
FileIdentity now{};
if (!current_disk_identity(now)) {
// If the file vanished, treat as modified when we previously had an identity.
return on_disk_identity_.valid;
}
if (!on_disk_identity_.valid)
return false;
return now.mtime_ns != on_disk_identity_.mtime_ns
|| now.size != on_disk_identity_.size
|| now.dev != on_disk_identity_.dev
|| now.ino != on_disk_identity_.ino;
}
void
Buffer::RefreshOnDiskIdentity()
{
FileIdentity id{};
if (current_disk_identity(id))
on_disk_identity_ = id;
}
static bool
write_all_fd(int fd, const char *data, std::size_t len, std::string &err)
{
std::size_t off = 0;
while (off < len) {
ssize_t n = ::write(fd, data + off, len - off);
if (n < 0) {
if (errno == EINTR)
continue;
err = std::string("Write failed: ") + std::strerror(errno);
return false;
}
off += static_cast<std::size_t>(n);
}
return true;
}
static void
best_effort_fsync_dir(const std::string &path)
{
try {
std::filesystem::path p(path);
std::filesystem::path dir = p.parent_path();
if (dir.empty())
return;
int dfd = ::open(dir.c_str(), O_RDONLY);
if (dfd < 0)
return;
(void) ::fsync(dfd);
(void) ::close(dfd);
} catch (...) {
// best-effort
}
}
static bool
atomic_write_file(const std::string &path, const char *data, std::size_t len, std::string &err)
{
// Create a temp file in the same directory so rename() is atomic.
std::filesystem::path p(path);
std::filesystem::path dir = p.parent_path();
std::string base = p.filename().string();
std::filesystem::path tmpl = dir / ("." + base + ".kte.tmp.XXXXXX");
std::string tmpl_s = tmpl.string();
// mkstemp requires a mutable buffer.
std::vector<char> buf(tmpl_s.begin(), tmpl_s.end());
buf.push_back('\0');
int fd = ::mkstemp(buf.data());
if (fd < 0) {
err = std::string("Failed to create temp file for save: ") + std::strerror(errno);
return false;
}
std::string tmp_path(buf.data());
// If the destination exists, carry over its permissions.
struct stat dst_st{};
if (::stat(path.c_str(), &dst_st) == 0) {
(void) ::fchmod(fd, dst_st.st_mode);
}
bool ok = write_all_fd(fd, data, len, err);
if (ok) {
if (::fsync(fd) != 0) {
err = std::string("fsync failed: ") + std::strerror(errno);
ok = false;
}
}
(void) ::close(fd);
if (ok) {
if (::rename(tmp_path.c_str(), path.c_str()) != 0) {
err = std::string("rename failed: ") + std::strerror(errno);
ok = false;
}
}
if (!ok) {
(void) ::unlink(tmp_path.c_str());
return false;
}
best_effort_fsync_dir(path);
return true;
}
Buffer::Buffer(const std::string &path)
{
std::string err;
OpenFromFile(path, err);
}
// Copy constructor/assignment: perform a deep copy of core fields; reinitialize undo for the new buffer.
Buffer::Buffer(const Buffer &other)
{
curx_ = other.curx_;
cury_ = other.cury_;
rx_ = other.rx_;
nrows_ = other.nrows_;
rowoffs_ = other.rowoffs_;
coloffs_ = other.coloffs_;
rows_ = other.rows_;
content_ = other.content_;
rows_cache_dirty_ = other.rows_cache_dirty_;
filename_ = other.filename_;
is_file_backed_ = other.is_file_backed_;
dirty_ = other.dirty_;
read_only_ = other.read_only_;
mark_set_ = other.mark_set_;
mark_curx_ = other.mark_curx_;
mark_cury_ = other.mark_cury_;
// Copy syntax/highlighting flags
version_ = other.version_;
syntax_enabled_ = other.syntax_enabled_;
filetype_ = other.filetype_;
// Fresh undo system for the copy
undo_tree_ = std::make_unique<UndoTree>();
undo_sys_ = std::make_unique<UndoSystem>(*this, *undo_tree_);
// Recreate a highlighter engine for this copy based on filetype/syntax state
if (syntax_enabled_) {
// Allocate engine and install an appropriate highlighter
highlighter_ = std::make_unique<kte::HighlighterEngine>();
if (!filetype_.empty()) {
auto hl = kte::HighlighterRegistry::CreateFor(filetype_);
if (hl) {
highlighter_->SetHighlighter(std::move(hl));
} else {
// Unsupported filetype -> NullHighlighter keeps syntax pipeline active
highlighter_->SetHighlighter(std::make_unique<kte::NullHighlighter>());
}
} else {
// No filetype -> keep syntax enabled but use NullHighlighter
highlighter_->SetHighlighter(std::make_unique<kte::NullHighlighter>());
}
// Fresh engine has empty caches; nothing to invalidate
}
}
Buffer &
Buffer::operator=(const Buffer &other)
{
if (this == &other)
return *this;
curx_ = other.curx_;
cury_ = other.cury_;
rx_ = other.rx_;
nrows_ = other.nrows_;
rowoffs_ = other.rowoffs_;
coloffs_ = other.coloffs_;
rows_ = other.rows_;
content_ = other.content_;
rows_cache_dirty_ = other.rows_cache_dirty_;
filename_ = other.filename_;
is_file_backed_ = other.is_file_backed_;
dirty_ = other.dirty_;
read_only_ = other.read_only_;
mark_set_ = other.mark_set_;
mark_curx_ = other.mark_curx_;
mark_cury_ = other.mark_cury_;
version_ = other.version_;
syntax_enabled_ = other.syntax_enabled_;
filetype_ = other.filetype_;
// Recreate undo system for this instance
undo_tree_ = std::make_unique<UndoTree>();
undo_sys_ = std::make_unique<UndoSystem>(*this, *undo_tree_);
// Recreate highlighter engine consistent with syntax settings
highlighter_.reset();
if (syntax_enabled_) {
highlighter_ = std::make_unique<kte::HighlighterEngine>();
if (!filetype_.empty()) {
auto hl = kte::HighlighterRegistry::CreateFor(filetype_);
if (hl) {
highlighter_->SetHighlighter(std::move(hl));
} else {
highlighter_->SetHighlighter(std::make_unique<kte::NullHighlighter>());
}
} else {
highlighter_->SetHighlighter(std::make_unique<kte::NullHighlighter>());
}
}
return *this;
}
// Move constructor: move all fields and update UndoSystem's buffer reference
Buffer::Buffer(Buffer &&other) noexcept
: curx_(other.curx_),
cury_(other.cury_),
rx_(other.rx_),
nrows_(other.nrows_),
rowoffs_(other.rowoffs_),
coloffs_(other.coloffs_),
rows_(std::move(other.rows_)),
filename_(std::move(other.filename_)),
is_file_backed_(other.is_file_backed_),
dirty_(other.dirty_),
read_only_(other.read_only_),
mark_set_(other.mark_set_),
mark_curx_(other.mark_curx_),
mark_cury_(other.mark_cury_),
undo_tree_(std::move(other.undo_tree_)),
undo_sys_(std::move(other.undo_sys_))
{
// Move syntax/highlighting state
version_ = other.version_;
syntax_enabled_ = other.syntax_enabled_;
filetype_ = std::move(other.filetype_);
highlighter_ = std::move(other.highlighter_);
content_ = std::move(other.content_);
rows_cache_dirty_ = other.rows_cache_dirty_;
// Update UndoSystem's buffer reference to point to this object
if (undo_sys_) {
undo_sys_->UpdateBufferReference(*this);
}
}
// Move assignment: move all fields and update UndoSystem's buffer reference
Buffer &
Buffer::operator=(Buffer &&other) noexcept
{
if (this == &other)
return *this;
curx_ = other.curx_;
cury_ = other.cury_;
rx_ = other.rx_;
nrows_ = other.nrows_;
rowoffs_ = other.rowoffs_;
coloffs_ = other.coloffs_;
rows_ = std::move(other.rows_);
filename_ = std::move(other.filename_);
is_file_backed_ = other.is_file_backed_;
dirty_ = other.dirty_;
read_only_ = other.read_only_;
mark_set_ = other.mark_set_;
mark_curx_ = other.mark_curx_;
mark_cury_ = other.mark_cury_;
undo_tree_ = std::move(other.undo_tree_);
undo_sys_ = std::move(other.undo_sys_);
// Move syntax/highlighting state
version_ = other.version_;
syntax_enabled_ = other.syntax_enabled_;
filetype_ = std::move(other.filetype_);
highlighter_ = std::move(other.highlighter_);
content_ = std::move(other.content_);
rows_cache_dirty_ = other.rows_cache_dirty_;
// Update UndoSystem's buffer reference to point to this object
if (undo_sys_) {
undo_sys_->UpdateBufferReference(*this);
}
return *this;
}
bool
Buffer::OpenFromFile(const std::string &path, std::string &err)
{
auto normalize_path = [](const std::string &in) -> std::string {
std::string expanded = in;
// Expand leading '~' to HOME
if (!expanded.empty() && expanded[0] == '~') {
const char *home = std::getenv("HOME");
if (home && expanded.size() >= 2 && (expanded[1] == '/' || expanded[1] == '\\')) {
expanded = std::string(home) + expanded.substr(1);
} else if (home && expanded.size() == 1) {
expanded = std::string(home);
}
}
try {
std::filesystem::path p(expanded);
if (std::filesystem::exists(p)) {
return std::filesystem::canonical(p).string();
}
return std::filesystem::absolute(p).string();
} catch (...) {
// On any error, fall back to input
return expanded;
}
};
const std::string norm = normalize_path(path);
// If the file doesn't exist, initialize an empty, non-file-backed buffer
// with the provided filename. Do not touch the filesystem until Save/SaveAs.
if (!std::filesystem::exists(norm)) {
rows_.clear();
nrows_ = 0;
filename_ = norm;
is_file_backed_ = false;
dirty_ = false;
// Reset cursor/viewport state
curx_ = cury_ = rx_ = 0;
rowoffs_ = coloffs_ = 0;
mark_set_ = false;
mark_curx_ = mark_cury_ = 0;
// Empty PieceTable
content_.Clear();
rows_cache_dirty_ = true;
return true;
}
std::ifstream in(norm, std::ios::in | std::ios::binary);
if (!in) {
err = "Failed to open file: " + norm;
return false;
}
// Read entire file into PieceTable as-is
std::string data;
in.seekg(0, std::ios::end);
if (!in) {
err = "Failed to seek to end of file: " + norm;
return false;
}
auto sz = in.tellg();
if (sz < 0) {
err = "Failed to get file size: " + norm;
return false;
}
if (sz > 0) {
data.resize(static_cast<std::size_t>(sz));
in.seekg(0, std::ios::beg);
if (!in) {
err = "Failed to seek to beginning of file: " + norm;
return false;
}
in.read(data.data(), static_cast<std::streamsize>(data.size()));
if (!in && !in.eof()) {
err = "Failed to read file: " + norm;
return false;
}
// Validate we read the expected number of bytes
const std::streamsize bytes_read = in.gcount();
if (bytes_read != static_cast<std::streamsize>(data.size())) {
err = "Partial read of file (expected " + std::to_string(data.size()) +
" bytes, got " + std::to_string(bytes_read) + "): " + norm;
return false;
}
}
content_.Clear();
if (!data.empty())
content_.Append(data.data(), data.size());
rows_cache_dirty_ = true;
nrows_ = 0; // not used under PieceTable
filename_ = norm;
is_file_backed_ = true;
dirty_ = false;
RefreshOnDiskIdentity();
// Reset/initialize undo system for this loaded file
if (!undo_tree_)
undo_tree_ = std::make_unique<UndoTree>();
if (!undo_sys_)
undo_sys_ = std::make_unique<UndoSystem>(*this, *undo_tree_);
// Clear any existing history for a fresh load
undo_sys_->clear();
// Reset cursor/viewport state
curx_ = cury_ = rx_ = 0;
rowoffs_ = coloffs_ = 0;
mark_set_ = false;
mark_curx_ = mark_cury_ = 0;
return true;
}
bool
Buffer::Save(std::string &err) const
{
if (!is_file_backed_ || filename_.empty()) {
err = "Buffer is not file-backed; use SaveAs()";
return false;
}
const std::size_t sz = content_.Size();
const char *data = sz ? content_.Data() : nullptr;
if (sz && !data) {
err = "Internal error: buffer materialization failed";
return false;
}
if (!atomic_write_file(filename_, data ? data : "", sz, err))
return false;
// Update observed on-disk identity after a successful save.
const_cast<Buffer *>(this)->RefreshOnDiskIdentity();
// Note: const method cannot change dirty_. Intentionally const to allow UI code
// to decide when to flip dirty flag after successful save.
return true;
}
bool
Buffer::SaveAs(const std::string &path, std::string &err)
{
// Normalize output path first
std::string out_path;
try {
std::filesystem::path p(path);
// Do a light expansion of '~'
std::string expanded = path;
if (!expanded.empty() && expanded[0] == '~') {
const char *home = std::getenv("HOME");
if (home && expanded.size() >= 2 && (expanded[1] == '/' || expanded[1] == '\\'))
expanded = std::string(home) + expanded.substr(1);
else if (home && expanded.size() == 1)
expanded = std::string(home);
}
std::filesystem::path ep(expanded);
out_path = std::filesystem::absolute(ep).string();
} catch (...) {
out_path = path;
}
const std::size_t sz = content_.Size();
const char *data = sz ? content_.Data() : nullptr;
if (sz && !data) {
err = "Internal error: buffer materialization failed";
return false;
}
if (!atomic_write_file(out_path, data ? data : "", sz, err))
return false;
filename_ = out_path;
is_file_backed_ = true;
dirty_ = false;
RefreshOnDiskIdentity();
return true;
}
std::string
Buffer::AsString() const
{
std::stringstream ss;
ss << "Buffer<" << this->filename_;
if (this->Dirty()) {
ss << "*";
}
ss << ">: " << content_.LineCount() << " lines";
return ss.str();
}
// --- Raw editing APIs (no undo recording, cursor untouched) ---
void
Buffer::insert_text(int row, int col, std::string_view text)
{
if (row < 0)
row = 0;
if (col < 0)
col = 0;
const std::size_t off = content_.LineColToByteOffset(static_cast<std::size_t>(row),
static_cast<std::size_t>(col));
if (!text.empty()) {
content_.Insert(off, text.data(), text.size());
rows_cache_dirty_ = true;
if (swap_rec_)
swap_rec_->OnInsert(row, col, text);
}
}
// ===== Adapter helpers for PieceTable-backed Buffer =====
std::string_view
Buffer::GetLineView(std::size_t row) const
{
// Get byte range for the logical line and return a view into materialized data
auto range = content_.GetLineRange(row); // [start,end) in bytes
const char *base = content_.Data(); // materializes if needed
if (!base)
return std::string_view();
const std::size_t start = range.first;
const std::size_t len = (range.second > range.first) ? (range.second - range.first) : 0;
return std::string_view(base + start, len);
}
void
Buffer::ensure_rows_cache() const
{
std::lock_guard<std::mutex> lock(buffer_mutex_);
if (!rows_cache_dirty_)
return;
rows_.clear();
const std::size_t lc = content_.LineCount();
rows_.reserve(lc);
for (std::size_t i = 0; i < lc; ++i) {
rows_.emplace_back(content_.GetLine(i));
}
// Keep nrows_ in sync for any legacy code that still reads it
const_cast<Buffer *>(this)->nrows_ = rows_.size();
rows_cache_dirty_ = false;
}
std::size_t
Buffer::content_LineCount_() const
{
return content_.LineCount();
}
#if defined(KTE_TESTS)
std::string
Buffer::BytesForTests() const
{
const std::size_t sz = content_.Size();
if (sz == 0)
return std::string();
const char *data = content_.Data();
if (!data)
return std::string();
return std::string(data, data + sz);
}
#endif
void
Buffer::delete_text(int row, int col, std::size_t len)
{
if (len == 0)
return;
if (row < 0)
row = 0;
if (col < 0)
col = 0;
const std::size_t start = content_.LineColToByteOffset(static_cast<std::size_t>(row),
static_cast<std::size_t>(col));
std::size_t r = static_cast<std::size_t>(row);
std::size_t c = static_cast<std::size_t>(col);
std::size_t remaining = len;
const std::size_t lc = content_.LineCount();
while (remaining > 0 && r < lc) {
const std::string line = content_.GetLine(r); // logical line (without trailing '\n')
const std::size_t L = line.size();
if (c < L) {
const std::size_t take = std::min(remaining, L - c);
c += take;
remaining -= take;
}
if (remaining == 0)
break;
// Consume newline between lines as one char, if there is a next line
if (r + 1 < lc) {
remaining -= 1; // the newline
r += 1;
c = 0;
} else {
// At last line and still remaining: delete to EOF
const std::size_t total = content_.Size();
const std::size_t actual = (total > start) ? (total - start) : 0;
if (actual == 0)
return;
content_.Delete(start, actual);
rows_cache_dirty_ = true;
if (swap_rec_)
swap_rec_->OnDelete(row, col, actual);
return;
}
}
// Compute end offset at (r,c)
std::size_t end = content_.LineColToByteOffset(r, c);
if (end > start) {
const std::size_t actual = end - start;
content_.Delete(start, actual);
rows_cache_dirty_ = true;
if (swap_rec_)
swap_rec_->OnDelete(row, col, actual);
}
}
void
Buffer::split_line(int row, const int col)
{
int c = col;
if (row < 0)
row = 0;
if (c < 0)
c = 0;
const std::size_t off = content_.LineColToByteOffset(static_cast<std::size_t>(row),
static_cast<std::size_t>(c));
const char nl = '\n';
content_.Insert(off, &nl, 1);
rows_cache_dirty_ = true;
if (swap_rec_)
swap_rec_->OnInsert(row, c, std::string_view("\n", 1));
}
void
Buffer::join_lines(int row)
{
if (row < 0)
row = 0;
std::size_t r = static_cast<std::size_t>(row);
if (r + 1 >= content_.LineCount())
return;
const int col = static_cast<int>(content_.GetLine(r).size());
// Delete the newline between line r and r+1
std::size_t end_of_line = content_.LineColToByteOffset(r, std::numeric_limits<std::size_t>::max());
// end_of_line now equals line end (clamped before newline). The newline should be exactly at this position.
content_.Delete(end_of_line, 1);
rows_cache_dirty_ = true;
if (swap_rec_)
swap_rec_->OnDelete(row, col, 1);
}
void
Buffer::insert_row(int row, const std::string_view text)
{
if (row < 0)
row = 0;
std::size_t off = content_.LineColToByteOffset(static_cast<std::size_t>(row), 0);
if (!text.empty())
content_.Insert(off, text.data(), text.size());
const char nl = '\n';
content_.Insert(off + text.size(), &nl, 1);
rows_cache_dirty_ = true;
if (swap_rec_) {
// Avoid allocation: emit the row text insertion (if any) and the newline insertion.
if (!text.empty())
swap_rec_->OnInsert(row, 0, text);
swap_rec_->OnInsert(row, static_cast<int>(text.size()), std::string_view("\n", 1));
}
}
void
Buffer::delete_row(int row)
{
if (row < 0)
row = 0;
std::size_t r = static_cast<std::size_t>(row);
if (r >= content_.LineCount())
return;
auto range = content_.GetLineRange(r); // [start,end)
// If not last line, ensure we include the separating newline by using end as-is (which points to next line start)
// If last line, end may equal total_size_. We still delete [start,end) which removes the last line content.
const std::size_t start = range.first;
const std::size_t end = range.second;
const std::size_t actual = (end > start) ? (end - start) : 0;
if (actual == 0)
return;
content_.Delete(start, actual);
rows_cache_dirty_ = true;
if (swap_rec_)
swap_rec_->OnDelete(row, 0, actual);
}
void
Buffer::replace_all_bytes(const std::string_view bytes)
{
content_.Clear();
if (!bytes.empty())
content_.Append(bytes.data(), bytes.size());
rows_cache_dirty_ = true;
}
// Undo system accessors
UndoSystem *
Buffer::Undo()
{
return undo_sys_.get();
}
const UndoSystem *
Buffer::Undo() const
{
return undo_sys_.get();
}