MacOS: remove static linking. Bump minor version.

Introduce error recovery mechanisms with retry logic and circuit breaker integration.
- Added `ErrorRecovery.cc` and `ErrorRecovery.h` for retry and circuit breaker implementations. - Enhanced swap file handling with transient error retries and exponential backoff (e.g., ENOSPC, EDQUOT). - Integrated circuit breaker into SwapManager to gracefully handle repeated failures, prevent system overload, and enable automatic recovery. - Updated `DEVELOPER_GUIDE.md` with comprehensive documentation on error recovery patterns and graceful degradation strategies. - Refined fsync, temp file creation, and swap file logic with retry-on-failure mechanisms for improved resilience.
2026-02-19 21:00:29 -08:00 · 2026-02-17 21:38:40 -08:00 · 2026-02-17 21:25:19 -08:00 · 2026-02-17 20:12:09 -08:00 · 2026-02-17 17:17:55 -08:00
14 changed files with 3297 additions and 231 deletions
--- a/Buffer.cc
+++ b/Buffer.cc
@@ -18,6 +18,9 @@
 #include "SwapRecorder.h"
 #include "UndoSystem.h"
 #include "UndoTree.h"
+#include "ErrorHandler.h"
+#include "SyscallWrappers.h"
+#include "ErrorRecovery.h"
 // For reconstructing highlighter state on copies
 #include "syntax/HighlighterRegistry.h"
 #include "syntax/NullHighlighter.h"
@@ -122,11 +125,11 @@ best_effort_fsync_dir(const std::string &path)
 		std::filesystem::path dir = p.parent_path();
 		if (dir.empty())
 			return;
-		int dfd = ::open(dir.c_str(), O_RDONLY);
+		int dfd = kte::syscall::Open(dir.c_str(), O_RDONLY);
 		if (dfd < 0)
 			return;
-		(void) ::fsync(dfd);
-		(void) ::close(dfd);
+		(void) kte::syscall::Fsync(dfd);
+		(void) kte::syscall::Close(dfd);
 	} catch (...) {
 		// best-effort
 	}
@@ -146,9 +149,21 @@ atomic_write_file(const std::string &path, const char *data, std::size_t len, st
 	// mkstemp requires a mutable buffer.
 	std::vector<char> buf(tmpl_s.begin(), tmpl_s.end());
 	buf.push_back('\0');
-	int fd = ::mkstemp(buf.data());
+
+	// Retry on transient errors for temp file creation
+	int fd          = -1;
+	auto mkstemp_fn = [&]() -> bool {
+		// Reset buffer for each retry attempt
+		buf.assign(tmpl_s.begin(), tmpl_s.end());
+		buf.push_back('\0');
+		fd = kte::syscall::Mkstemp(buf.data());
+		return fd >= 0;
+	};
+
+	if (!kte::RetryOnTransientError(mkstemp_fn, kte::RetryPolicy::Aggressive(), err)) {
 		if (fd < 0) {
-		err = std::string("Failed to create temp file for save: ") + std::strerror(errno);
+			err = std::string("Failed to create temp file for save: ") + std::strerror(errno) + err;
+		}
 		return false;
 	}
 	std::string tmp_path(buf.data());
@@ -156,17 +171,23 @@ atomic_write_file(const std::string &path, const char *data, std::size_t len, st
 	// If the destination exists, carry over its permissions.
 	struct stat dst_st{};
 	if (::stat(path.c_str(), &dst_st) == 0) {
-		(void) ::fchmod(fd, dst_st.st_mode);
+		(void) kte::syscall::Fchmod(fd, dst_st.st_mode);
 	}

 	bool ok = write_all_fd(fd, data, len, err);
 	if (ok) {
-		if (::fsync(fd) != 0) {
-			err = std::string("fsync failed: ") + std::strerror(errno);
+		// Retry fsync on transient errors
+		auto fsync_fn = [&]() -> bool {
+			return kte::syscall::Fsync(fd) == 0;
+		};
+
+		std::string fsync_err;
+		if (!kte::RetryOnTransientError(fsync_fn, kte::RetryPolicy::Aggressive(), fsync_err)) {
+			err = std::string("fsync failed: ") + std::strerror(errno) + fsync_err;
 			ok  = false;
 		}
 	}
-	(void) ::close(fd);
+	(void) kte::syscall::Close(fd);

 	if (ok) {
 		if (::rename(tmp_path.c_str(), path.c_str()) != 0) {
@@ -411,17 +432,46 @@ Buffer::OpenFromFile(const std::string &path, std::string &err)
 	std::ifstream in(norm, std::ios::in | std::ios::binary);
 	if (!in) {
 		err = "Failed to open file: " + norm;
+		kte::ErrorHandler::Instance().Error("Buffer", err, norm);
 		return false;
 	}

 	// Read entire file into PieceTable as-is
 	std::string data;
 	in.seekg(0, std::ios::end);
+	if (!in) {
+		err = "Failed to seek to end of file: " + norm;
+		kte::ErrorHandler::Instance().Error("Buffer", err, norm);
+		return false;
+	}
 	auto sz = in.tellg();
+	if (sz < 0) {
+		err = "Failed to get file size: " + norm;
+		kte::ErrorHandler::Instance().Error("Buffer", err, norm);
+		return false;
+	}
 	if (sz > 0) {
 		data.resize(static_cast<std::size_t>(sz));
 		in.seekg(0, std::ios::beg);
+		if (!in) {
+			err = "Failed to seek to beginning of file: " + norm;
+			kte::ErrorHandler::Instance().Error("Buffer", err, norm);
+			return false;
+		}
 		in.read(data.data(), static_cast<std::streamsize>(data.size()));
+		if (!in && !in.eof()) {
+			err = "Failed to read file: " + norm;
+			kte::ErrorHandler::Instance().Error("Buffer", err, norm);
+			return false;
+		}
+		// Validate we read the expected number of bytes
+		const std::streamsize bytes_read = in.gcount();
+		if (bytes_read != static_cast<std::streamsize>(data.size())) {
+			err = "Partial read of file (expected " + std::to_string(data.size()) +
+			      " bytes, got " + std::to_string(bytes_read) + "): " + norm;
+			kte::ErrorHandler::Instance().Error("Buffer", err, norm);
+			return false;
+		}
 	}
 	content_.Clear();
 	if (!data.empty())
@@ -464,8 +514,10 @@ Buffer::Save(std::string &err) const
 		err = "Internal error: buffer materialization failed";
 		return false;
 	}
-	if (!atomic_write_file(filename_, data ? data : "", sz, err))
+	if (!atomic_write_file(filename_, data ? data : "", sz, err)) {
+		kte::ErrorHandler::Instance().Error("Buffer", err, filename_);
 		return false;
+	}
 	// Update observed on-disk identity after a successful save.
 	const_cast<Buffer *>(this)->RefreshOnDiskIdentity();
 	// Note: const method cannot change dirty_. Intentionally const to allow UI code
@@ -502,8 +554,10 @@ Buffer::SaveAs(const std::string &path, std::string &err)
 		err = "Internal error: buffer materialization failed";
 		return false;
 	}
-	if (!atomic_write_file(out_path, data ? data : "", sz, err))
+	if (!atomic_write_file(out_path, data ? data : "", sz, err)) {
+		kte::ErrorHandler::Instance().Error("Buffer", err, out_path);
 		return false;
+	}

 	filename_       = out_path;
 	is_file_backed_ = true;
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -4,7 +4,7 @@ project(kte)
 include(GNUInstallDirs)

 set(CMAKE_CXX_STANDARD 20)
-set(KTE_VERSION "1.6.6")
+set(KTE_VERSION "1.7.0")

 # Default to terminal-only build to avoid SDL/OpenGL dependency by default.
 # Enable with -DBUILD_GUI=ON when SDL2/OpenGL/Freetype are available.
@@ -39,7 +39,6 @@ if (MSVC)
    add_compile_options("/W4" "$<$<CONFIG:RELEASE>:/O2>")
 else ()
    add_compile_options(
-            "-static"
            "-Wall"
            "-Wextra"
            "-Werror"
@@ -142,6 +141,9 @@ set(COMMON_SOURCES
        HelpText.cc
        KKeymap.cc
        Swap.cc
+        ErrorHandler.cc
+        SyscallWrappers.cc
+        ErrorRecovery.cc
        TerminalInputHandler.cc
        TerminalRenderer.cc
        TerminalFrontend.cc
@@ -282,6 +284,11 @@ endif ()

 target_link_libraries(kte ${CURSES_LIBRARIES})

+# Static linking on Linux only (macOS does not support static linking of system libraries)
+if (NOT APPLE)
+    target_link_options(kte PRIVATE -static)
+endif ()
+
 if (KTE_ENABLE_TREESITTER)
    # Users can provide their own tree-sitter include/lib via cache variables
    set(TREESITTER_INCLUDE_DIR "" CACHE PATH "Path to tree-sitter include directory")
@@ -316,6 +323,7 @@ if (BUILD_TESTS)
            tests/test_swap_recorder.cc
            tests/test_swap_writer.cc
            tests/test_swap_replay.cc
+            tests/test_swap_edge_cases.cc
            tests/test_swap_recovery_prompt.cc
            tests/test_swap_cleanup.cc
            tests/test_swap_git_editor.cc
@@ -336,6 +344,9 @@ if (BUILD_TESTS)
            Command.cc
            HelpText.cc
            Swap.cc
+            ErrorHandler.cc
+            SyscallWrappers.cc
+            ErrorRecovery.cc
            KKeymap.cc
            SwapRecorder.h
            OptimizedSearch.cc
@@ -360,6 +371,11 @@ if (BUILD_TESTS)
            target_link_libraries(kte_tests ${TREESITTER_LIBRARY})
        endif ()
    endif ()
+
+    # Static linking on Linux only (macOS does not support static linking of system libraries)
+    if (NOT APPLE)
+        target_link_options(kte_tests PRIVATE -static)
+    endif ()
 endif ()

 if (BUILD_GUI)
@@ -399,6 +415,11 @@ if (BUILD_GUI)
        target_link_libraries(kge ${CURSES_LIBRARIES} imgui)
    endif ()

+    # Static linking on Linux only (macOS does not support static linking of system libraries)
+    if (NOT APPLE)
+        target_link_options(kge PRIVATE -static)
+    endif ()
+
    # On macOS, build kge as a proper .app bundle
    if (APPLE)
        # Define the icon file
--- a/ErrorHandler.cc
+++ b/ErrorHandler.cc
@@ -0,0 +1,318 @@
+#include "ErrorHandler.h"
+#include <chrono>
+#include <ctime>
+#include <iomanip>
+#include <sstream>
+#include <filesystem>
+#include <cstdlib>
+
+namespace fs = std::filesystem;
+
+namespace kte {
+ErrorHandler::ErrorHandler()
+{
+	// Determine log file path: ~/.local/state/kte/error.log
+	const char *home = std::getenv("HOME");
+	if (home) {
+		fs::path log_dir = fs::path(home) / ".local" / "state" / "kte";
+		try {
+			if (!fs::exists(log_dir)) {
+				fs::create_directories(log_dir);
+			}
+			log_file_path_ = (log_dir / "error.log").string();
+		} catch (...) {
+			// If we can't create the directory, disable file logging
+			file_logging_enabled_ = false;
+		}
+	} else {
+		// No HOME, disable file logging
+		file_logging_enabled_ = false;
+	}
+}
+
+
+ErrorHandler::~ErrorHandler()
+{
+	std::lock_guard<std::mutex> lg(mtx_);
+	if (log_file_ &&log_file_
+	->
+	is_open()
+	)
+	{
+		log_file_->flush();
+		log_file_->close();
+	}
+}
+
+
+ErrorHandler &
+ErrorHandler::Instance()
+{
+	static ErrorHandler instance;
+	return instance;
+}
+
+
+void
+ErrorHandler::Report(ErrorSeverity severity, const std::string &component,
+                     const std::string &message, const std::string &context)
+{
+	ErrorRecord record;
+	record.timestamp_ns = now_ns();
+	record.severity     = severity;
+	record.component    = component;
+	record.message      = message;
+	record.context      = context;
+
+	{
+		std::lock_guard<std::mutex> lg(mtx_);
+
+		// Add to in-memory queue
+		errors_.push_back(record);
+		while (errors_.size() > 100) {
+			errors_.pop_front();
+		}
+
+		++total_error_count_;
+		if (severity == ErrorSeverity::Critical) {
+			++critical_error_count_;
+		}
+
+		// Write to log file if enabled
+		if (file_logging_enabled_) {
+			write_to_log(record);
+		}
+	}
+}
+
+
+void
+ErrorHandler::Info(const std::string &component, const std::string &message,
+                   const std::string &context)
+{
+	Report(ErrorSeverity::Info, component, message, context);
+}
+
+
+void
+ErrorHandler::Warning(const std::string &component, const std::string &message,
+                      const std::string &context)
+{
+	Report(ErrorSeverity::Warning, component, message, context);
+}
+
+
+void
+ErrorHandler::Error(const std::string &component, const std::string &message,
+                    const std::string &context)
+{
+	Report(ErrorSeverity::Error, component, message, context);
+}
+
+
+void
+ErrorHandler::Critical(const std::string &component, const std::string &message,
+                       const std::string &context)
+{
+	Report(ErrorSeverity::Critical, component, message, context);
+}
+
+
+bool
+ErrorHandler::HasErrors() const
+{
+	std::lock_guard<std::mutex> lg(mtx_);
+	return !errors_.empty();
+}
+
+
+bool
+ErrorHandler::HasCriticalErrors() const
+{
+	std::lock_guard<std::mutex> lg(mtx_);
+	return critical_error_count_ > 0;
+}
+
+
+std::string
+ErrorHandler::GetLastError() const
+{
+	std::lock_guard<std::mutex> lg(mtx_);
+	if (errors_.empty())
+		return "";
+
+	const ErrorRecord &e = errors_.back();
+	std::string result   = "[" + severity_to_string(e.severity) + "] ";
+	result               += e.component;
+	if (!e.context.empty()) {
+		result += " (" + e.context + ")";
+	}
+	result += ": " + e.message;
+	return result;
+}
+
+
+std::size_t
+ErrorHandler::GetErrorCount() const
+{
+	std::lock_guard<std::mutex> lg(mtx_);
+	return total_error_count_;
+}
+
+
+std::size_t
+ErrorHandler::GetErrorCount(ErrorSeverity severity) const
+{
+	std::lock_guard<std::mutex> lg(mtx_);
+	std::size_t count = 0;
+	for (const auto &e: errors_) {
+		if (e.severity == severity) {
+			++count;
+		}
+	}
+	return count;
+}
+
+
+std::vector<ErrorHandler::ErrorRecord>
+ErrorHandler::GetRecentErrors(std::size_t max_count) const
+{
+	std::lock_guard<std::mutex> lg(mtx_);
+	std::vector<ErrorRecord> result;
+	result.reserve(std::min(max_count, errors_.size()));
+
+	// Return most recent first
+	auto it = errors_.rbegin();
+	for (std::size_t i = 0; i < max_count && it != errors_.rend(); ++i, ++it) {
+		result.push_back(*it);
+	}
+	return result;
+}
+
+
+void
+ErrorHandler::ClearErrors()
+{
+	std::lock_guard<std::mutex> lg(mtx_);
+	errors_.clear();
+	total_error_count_    = 0;
+	critical_error_count_ = 0;
+}
+
+
+void
+ErrorHandler::SetFileLoggingEnabled(bool enabled)
+{
+	std::lock_guard<std::mutex> lg(mtx_);
+	file_logging_enabled_ = enabled;
+	if (!enabled && log_file_ && log_file_->is_open()) {
+		log_file_->flush();
+		log_file_->close();
+		log_file_.reset();
+	}
+}
+
+
+std::string
+ErrorHandler::GetLogFilePath() const
+{
+	std::lock_guard<std::mutex> lg(mtx_);
+	return log_file_path_;
+}
+
+
+void
+ErrorHandler::write_to_log(const ErrorRecord &record)
+{
+	// Must be called with mtx_ held
+	if (log_file_path_.empty())
+		return;
+
+	ensure_log_file();
+	if (!log_file_ || !log_file_->is_open())
+		return;
+
+	// Format: [timestamp] [SEVERITY] component (context): message
+	std::string timestamp = format_timestamp(record.timestamp_ns);
+	std::string severity  = severity_to_string(record.severity);
+
+	*log_file_ << "[" << timestamp << "] [" << severity << "] " << record.component;
+	if (!record.context.empty()) {
+		*log_file_ << " (" << record.context << ")";
+	}
+	*log_file_ << ": " << record.message << "\n";
+	log_file_->flush();
+}
+
+
+void
+ErrorHandler::ensure_log_file()
+{
+	// Must be called with mtx_ held
+	if (log_file_ &&log_file_
+	->
+	is_open()
+	)
+	return;
+
+	if (log_file_path_.empty())
+		return;
+
+	try {
+		log_file_ = std::make_unique<std::ofstream>(log_file_path_,
+		                                            std::ios::app | std::ios::out);
+		if (!log_file_->is_open()) {
+			log_file_.reset();
+		}
+	} catch (...) {
+		log_file_.reset();
+	}
+}
+
+
+std::string
+ErrorHandler::format_timestamp(std::uint64_t timestamp_ns) const
+{
+	// Convert nanoseconds to time_t (seconds)
+	std::time_t seconds = static_cast<std::time_t>(timestamp_ns / 1000000000ULL);
+	std::uint64_t nanos = timestamp_ns % 1000000000ULL;
+
+	std::tm tm_buf{};
+#if defined(_WIN32)
+	localtime_s(&tm_buf, &seconds);
+#else
+	localtime_r(&seconds, &tm_buf);
+#endif
+
+	std::ostringstream oss;
+	oss << std::put_time(&tm_buf, "%Y-%m-%d %H:%M:%S");
+	oss << "." << std::setfill('0') << std::setw(3) << (nanos / 1000000ULL);
+	return oss.str();
+}
+
+
+std::string
+ErrorHandler::severity_to_string(ErrorSeverity severity) const
+{
+	switch (severity) {
+	case ErrorSeverity::Info:
+		return "INFO";
+	case ErrorSeverity::Warning:
+		return "WARNING";
+	case ErrorSeverity::Error:
+		return "ERROR";
+	case ErrorSeverity::Critical:
+		return "CRITICAL";
+	default:
+		return "UNKNOWN";
+	}
+}
+
+
+std::uint64_t
+ErrorHandler::now_ns()
+{
+	using namespace std::chrono;
+	return duration_cast<nanoseconds>(steady_clock::now().time_since_epoch()).count();
+}
+} // namespace kte
--- a/ErrorHandler.h
+++ b/ErrorHandler.h
@@ -0,0 +1,106 @@
+// ErrorHandler.h - Centralized error handling and logging for kte
+#pragma once
+
+#include <string>
+#include <vector>
+#include <deque>
+#include <mutex>
+#include <cstdint>
+#include <memory>
+#include <fstream>
+
+namespace kte {
+enum class ErrorSeverity {
+	Info, // Informational messages
+	Warning, // Non-critical issues
+	Error, // Errors that affect functionality but allow continuation
+	Critical // Critical errors that may cause data loss or crashes
+};
+
+// Centralized error handler with logging and in-memory error tracking
+class ErrorHandler {
+public:
+	struct ErrorRecord {
+		std::uint64_t timestamp_ns{0};
+		ErrorSeverity severity{ErrorSeverity::Error};
+		std::string component; // e.g., "SwapManager", "Buffer", "main"
+		std::string message;
+		std::string context; // e.g., filename, buffer name, operation
+	};
+
+	// Get the global ErrorHandler instance
+	static ErrorHandler &Instance();
+
+	// Report an error with severity, component, message, and optional context
+	void Report(ErrorSeverity severity, const std::string &component,
+	            const std::string &message, const std::string &context = "");
+
+	// Convenience methods for common severity levels
+	void Info(const std::string &component, const std::string &message,
+	          const std::string &context = "");
+
+	void Warning(const std::string &component, const std::string &message,
+	             const std::string &context = "");
+
+	void Error(const std::string &component, const std::string &message,
+	           const std::string &context = "");
+
+	void Critical(const std::string &component, const std::string &message,
+	              const std::string &context = "");
+
+	// Query error state (thread-safe)
+	bool HasErrors() const;
+
+	bool HasCriticalErrors() const;
+
+	std::string GetLastError() const;
+
+	std::size_t GetErrorCount() const;
+
+	std::size_t GetErrorCount(ErrorSeverity severity) const;
+
+	// Get recent errors (up to max_count, most recent first)
+	std::vector<ErrorRecord> GetRecentErrors(std::size_t max_count = 10) const;
+
+	// Clear in-memory error history (does not affect log file)
+	void ClearErrors();
+
+	// Enable/disable file logging (enabled by default)
+	void SetFileLoggingEnabled(bool enabled);
+
+	// Get the path to the error log file
+	std::string GetLogFilePath() const;
+
+private:
+	ErrorHandler();
+
+	~ErrorHandler();
+
+	// Non-copyable, non-movable
+	ErrorHandler(const ErrorHandler &) = delete;
+
+	ErrorHandler &operator=(const ErrorHandler &) = delete;
+
+	ErrorHandler(ErrorHandler &&) = delete;
+
+	ErrorHandler &operator=(ErrorHandler &&) = delete;
+
+	void write_to_log(const ErrorRecord &record);
+
+	void ensure_log_file();
+
+	std::string format_timestamp(std::uint64_t timestamp_ns) const;
+
+	std::string severity_to_string(ErrorSeverity severity) const;
+
+	static std::uint64_t now_ns();
+
+	mutable std::mutex mtx_;
+	std::deque<ErrorRecord> errors_; // bounded to max 100 entries
+	std::size_t total_error_count_{0};
+	std::size_t critical_error_count_{0};
+	bool file_logging_enabled_{true};
+	std::string log_file_path_;
+	std::unique_ptr<std::ofstream> log_file_;
+};
+} // namespace kte
--- a/ErrorRecovery.cc
+++ b/ErrorRecovery.cc
@@ -0,0 +1,157 @@
+// ErrorRecovery.cc - Error recovery mechanisms implementation
+#include "ErrorRecovery.h"
+#include <mutex>
+
+namespace kte {
+CircuitBreaker::CircuitBreaker(const Config &cfg)
+	: config_(cfg), state_(State::Closed), failure_count_(0), success_count_(0),
+	  last_failure_time_(std::chrono::steady_clock::time_point::min()),
+	  state_change_time_(std::chrono::steady_clock::now()) {}
+
+
+bool
+CircuitBreaker::AllowRequest()
+{
+	std::lock_guard<std::mutex> lg(mtx_);
+
+	const auto now = std::chrono::steady_clock::now();
+
+	switch (state_) {
+	case State::Closed:
+		// Normal operation, allow all requests
+		return true;
+
+	case State::Open: {
+		// Check if timeout has elapsed to transition to HalfOpen
+		const auto elapsed = std::chrono::duration_cast<std::chrono::seconds>(
+			now - state_change_time_
+		);
+		if (elapsed >= config_.open_timeout) {
+			TransitionTo(State::HalfOpen);
+			return true; // Allow one request to test recovery
+		}
+		return false; // Circuit is open, reject request
+	}
+
+	case State::HalfOpen:
+		// Allow limited requests to test recovery
+		return true;
+	}
+
+	return false;
+}
+
+
+void
+CircuitBreaker::RecordSuccess()
+{
+	std::lock_guard<std::mutex> lg(mtx_);
+
+	switch (state_) {
+	case State::Closed:
+		// Reset failure count on success in normal operation
+		failure_count_ = 0;
+		break;
+
+	case State::HalfOpen:
+		++success_count_;
+		if (success_count_ >= config_.success_threshold) {
+			// Enough successes, close the circuit
+			TransitionTo(State::Closed);
+		}
+		break;
+
+	case State::Open:
+		// Shouldn't happen (requests rejected), but handle gracefully
+		break;
+	}
+}
+
+
+void
+CircuitBreaker::RecordFailure()
+{
+	std::lock_guard<std::mutex> lg(mtx_);
+
+	const auto now     = std::chrono::steady_clock::now();
+	last_failure_time_ = now;
+
+	switch (state_) {
+	case State::Closed:
+		// Check if we need to reset the failure count (window expired)
+		if (IsWindowExpired()) {
+			failure_count_ = 0;
+		}
+
+		++failure_count_;
+		if (failure_count_ >= config_.failure_threshold) {
+			// Too many failures, open the circuit
+			TransitionTo(State::Open);
+		}
+		break;
+
+	case State::HalfOpen:
+		// Failure during recovery test, reopen the circuit
+		TransitionTo(State::Open);
+		break;
+
+	case State::Open:
+		// Already open, just track the failure
+		++failure_count_;
+		break;
+	}
+}
+
+
+void
+CircuitBreaker::Reset()
+{
+	std::lock_guard<std::mutex> lg(mtx_);
+	TransitionTo(State::Closed);
+}
+
+
+void
+CircuitBreaker::TransitionTo(State new_state)
+{
+	if (state_ == new_state) {
+		return;
+	}
+
+	state_             = new_state;
+	state_change_time_ = std::chrono::steady_clock::now();
+
+	switch (new_state) {
+	case State::Closed:
+		failure_count_ = 0;
+		success_count_ = 0;
+		break;
+
+	case State::Open:
+		success_count_ = 0;
+		// Keep failure_count_ for diagnostics
+		break;
+
+	case State::HalfOpen:
+		success_count_ = 0;
+		// Keep failure_count_ for diagnostics
+		break;
+	}
+}
+
+
+bool
+CircuitBreaker::IsWindowExpired() const
+{
+	if (failure_count_ == 0) {
+		return false;
+	}
+
+	const auto now     = std::chrono::steady_clock::now();
+	const auto elapsed = std::chrono::duration_cast<std::chrono::seconds>(
+		now - last_failure_time_
+	);
+
+	return elapsed >= config_.window;
+}
+} // namespace kte
--- a/ErrorRecovery.h
+++ b/ErrorRecovery.h
@@ -0,0 +1,170 @@
+// ErrorRecovery.h - Error recovery mechanisms for kte
+#pragma once
+
+#include <chrono>
+#include <cstddef>
+#include <functional>
+#include <string>
+#include <thread>
+#include <mutex>
+#include <cerrno>
+
+namespace kte {
+// Classify errno values as transient (retryable) or permanent
+inline bool
+IsTransientError(int err)
+{
+	switch (err) {
+	case EAGAIN:
+#if EAGAIN != EWOULDBLOCK
+	case EWOULDBLOCK: 
+#endif
+	case EBUSY:
+	case EIO: // I/O error (may be transient on network filesystems)
+	case ETIMEDOUT:
+	case ENOSPC: // Disk full (may become available)
+	case EDQUOT: // Quota exceeded (may become available)
+		return true;
+	default:
+		return false;
+	}
+}
+
+
+// RetryPolicy defines retry behavior for transient failures
+struct RetryPolicy {
+	std::size_t max_attempts{3}; // Maximum retry attempts
+	std::chrono::milliseconds initial_delay{100}; // Initial delay before first retry
+	double backoff_multiplier{2.0}; // Exponential backoff multiplier
+	std::chrono::milliseconds max_delay{5000}; // Maximum delay between retries
+
+	// Default policy: 3 attempts, 100ms initial, 2x backoff, 5s max
+	static RetryPolicy Default()
+	{
+		return RetryPolicy{};
+	}
+
+
+	// Aggressive policy for critical operations: more attempts, faster retries
+	static RetryPolicy Aggressive()
+	{
+		return RetryPolicy{5, std::chrono::milliseconds(50), 1.5, std::chrono::milliseconds(2000)};
+	}
+
+
+	// Conservative policy for non-critical operations: fewer attempts, slower retries
+	static RetryPolicy Conservative()
+	{
+		return RetryPolicy{2, std::chrono::milliseconds(200), 2.5, std::chrono::milliseconds(10000)};
+	}
+};
+
+// Retry a function with exponential backoff for transient errors
+// Returns true on success, false on permanent failure or exhausted retries
+// The function `fn` should return true on success, false on failure, and set errno on failure
+template<typename Func>
+bool
+RetryOnTransientError(Func fn, const RetryPolicy &policy, std::string &err)
+{
+	std::size_t attempt             = 0;
+	std::chrono::milliseconds delay = policy.initial_delay;
+
+	while (attempt < policy.max_attempts) {
+		++attempt;
+		errno = 0;
+		if (fn()) {
+			return true; // Success
+		}
+
+		int saved_errno = errno;
+		if (!IsTransientError(saved_errno)) {
+			// Permanent error, don't retry
+			return false;
+		}
+
+		if (attempt >= policy.max_attempts) {
+			// Exhausted retries
+			err += " (exhausted " + std::to_string(policy.max_attempts) + " retry attempts)";
+			return false;
+		}
+
+		// Sleep before retry
+		std::this_thread::sleep_for(delay);
+
+		// Exponential backoff
+		delay = std::chrono::milliseconds(
+			static_cast<long long>(delay.count() * policy.backoff_multiplier)
+		);
+		if (delay > policy.max_delay) {
+			delay = policy.max_delay;
+		}
+	}
+
+	return false;
+}
+
+
+// CircuitBreaker prevents repeated attempts to failing operations
+// States: Closed (normal), Open (failing, reject immediately), HalfOpen (testing recovery)
+class CircuitBreaker {
+public:
+	enum class State {
+		Closed, // Normal operation, allow all requests
+		Open, // Failing, reject requests immediately
+		HalfOpen // Testing recovery, allow limited requests
+	};
+
+	struct Config {
+		std::size_t failure_threshold; // Failures before opening circuit
+		std::chrono::seconds open_timeout; // Time before attempting recovery (Open → HalfOpen)
+		std::size_t success_threshold; // Successes in HalfOpen before closing
+		std::chrono::seconds window; // Time window for counting failures
+
+		Config()
+			: failure_threshold(5), open_timeout(30), success_threshold(2), window(60) {}
+	};
+
+
+	explicit CircuitBreaker(const Config &cfg = Config());
+
+
+	// Check if operation is allowed (returns false if circuit is Open)
+	bool AllowRequest();
+
+	// Record successful operation
+	void RecordSuccess();
+
+	// Record failed operation
+	void RecordFailure();
+
+	// Get current state
+	State GetState() const
+	{
+		return state_;
+	}
+
+
+	// Get failure count in current window
+	std::size_t GetFailureCount() const
+	{
+		return failure_count_;
+	}
+
+
+	// Reset circuit to Closed state (for testing or manual intervention)
+	void Reset();
+
+private:
+	void TransitionTo(State new_state);
+
+	bool IsWindowExpired() const;
+
+	Config config_;
+	State state_;
+	std::size_t failure_count_;
+	std::size_t success_count_;
+	std::chrono::steady_clock::time_point last_failure_time_;
+	std::chrono::steady_clock::time_point state_change_time_;
+	mutable std::mutex mtx_;
+};
+} // namespace kte
--- a/Swap.cc
+++ b/Swap.cc
@@ -1,5 +1,8 @@
 #include "Swap.h"
 #include "Buffer.h"
+#include "ErrorHandler.h"
+#include "SyscallWrappers.h"
+#include "ErrorRecovery.h"

 #include <algorithm>
 #include <chrono>
@@ -530,8 +533,7 @@ SwapManager::ComputeSidecarPath(const Buffer &buf)
 	// Unnamed buffers: unique within the process.
 	static std::atomic<std::uint64_t> ctr{0};
 	const std::uint64_t n  = ++ctr;
-	const int pid          = (int) ::getpid();
-	const std::string name = "unnamed-" + std::to_string(pid) + "-" + std::to_string(n) + ".swp";
+	const std::string name = "unnamed-" + std::to_string(n) + ".swp";
 	return (root / name).string();
 }

@@ -599,38 +601,68 @@ SwapManager::write_header(int fd)


 bool
-SwapManager::open_ctx(JournalCtx &ctx, const std::string &path)
+SwapManager::open_ctx(JournalCtx &ctx, const std::string &path, std::string &err)
 {
+	err.clear();
 	if (ctx.fd >= 0)
 		return true;
-	if (!ensure_parent_dir(path))
+	if (!ensure_parent_dir(path)) {
+		err = "Failed to create parent directory for swap file: " + path;
 		return false;
+	}
 	int flags = O_CREAT | O_WRONLY | O_APPEND;
 #ifdef O_CLOEXEC
 	flags |= O_CLOEXEC;
 #endif
-	int fd = ::open(path.c_str(), flags, 0600);
-	if (fd < 0)
+
+	// Retry on transient errors (ENOSPC, EDQUOT, EBUSY, etc.)
+	int fd       = -1;
+	auto open_fn = [&]() -> bool {
+		fd = kte::syscall::Open(path.c_str(), flags, 0600);
+		return fd >= 0;
+	};
+
+	if (!RetryOnTransientError(open_fn, RetryPolicy::Aggressive(), err)) {
+		if (fd < 0) {
+			int saved_errno = errno;
+			err = "Failed to open swap file '" + path + "': " + std::strerror(saved_errno) + err;
+		}
 		return false;
+	}
 	// Ensure permissions even if file already existed.
-	(void) ::fchmod(fd, 0600);
+	(void) kte::syscall::Fchmod(fd, 0600);
 	struct stat st{};
-	if (fstat(fd, &st) != 0) {
-		::close(fd);
+	if (kte::syscall::Fstat(fd, &st) != 0) {
+		int saved_errno = errno;
+		kte::syscall::Close(fd);
+		err = "Failed to fstat swap file '" + path + "': " + std::strerror(saved_errno);
 		return false;
 	}
 	// If an existing file is too small to contain the fixed header, truncate
 	// and restart.
 	if (st.st_size > 0 && st.st_size < 64) {
-		::close(fd);
+		kte::syscall::Close(fd);
 		int tflags = O_CREAT | O_WRONLY | O_TRUNC | O_APPEND;
 #ifdef O_CLOEXEC
 		tflags |= O_CLOEXEC;
 #endif
-		fd = ::open(path.c_str(), tflags, 0600);
-		if (fd < 0)
+
+		// Retry on transient errors for truncation open
+		fd             = -1;
+		auto reopen_fn = [&]() -> bool {
+			fd = kte::syscall::Open(path.c_str(), tflags, 0600);
+			return fd >= 0;
+		};
+
+		if (!RetryOnTransientError(reopen_fn, RetryPolicy::Aggressive(), err)) {
+			if (fd < 0) {
+				int saved_errno = errno;
+				err = "Failed to reopen swap file for truncation '" + path + "': " + std::strerror(
+					      saved_errno) + err;
+			}
 			return false;
-		(void) ::fchmod(fd, 0600);
+		}
+		(void) kte::syscall::Fchmod(fd, 0600);
 		st.st_size = 0;
 	}
 	ctx.fd   = fd;
@@ -638,6 +670,9 @@ SwapManager::open_ctx(JournalCtx &ctx, const std::string &path)
 	if (st.st_size == 0) {
 		ctx.header_ok         = write_header(fd);
 		ctx.approx_size_bytes = ctx.header_ok ? 64 : 0;
+		if (!ctx.header_ok) {
+			err = "Failed to write swap file header: " + path;
+		}
 	} else {
 		ctx.header_ok         = true; // stage 1: trust existing header
 		ctx.approx_size_bytes = static_cast<std::uint64_t>(st.st_size);
@@ -650,8 +685,8 @@ void
 SwapManager::close_ctx(JournalCtx &ctx)
 {
 	if (ctx.fd >= 0) {
-		(void) ::fsync(ctx.fd);
-		::close(ctx.fd);
+		(void) kte::syscall::Fsync(ctx.fd);
+		kte::syscall::Close(ctx.fd);
 		ctx.fd = -1;
 	}
 	ctx.header_ok = false;
@@ -659,47 +694,77 @@ SwapManager::close_ctx(JournalCtx &ctx)


 bool
-SwapManager::compact_to_checkpoint(JournalCtx &ctx, const std::vector<std::uint8_t> &chkpt_record)
+SwapManager::compact_to_checkpoint(JournalCtx &ctx, const std::vector<std::uint8_t> &chkpt_record, std::string &err)
 {
-	if (ctx.path.empty())
+	err.clear();
+	if (ctx.path.empty()) {
+		err = "Compact failed: empty path";
 		return false;
-	if (chkpt_record.empty())
+	}
+	if (chkpt_record.empty()) {
+		err = "Compact failed: empty checkpoint record";
 		return false;
+	}

 	// Close existing file before rename.
 	if (ctx.fd >= 0) {
-		(void) ::fsync(ctx.fd);
-		::close(ctx.fd);
+		(void) kte::syscall::Fsync(ctx.fd);
+		kte::syscall::Close(ctx.fd);
 		ctx.fd = -1;
 	}
 	ctx.header_ok = false;

 	const std::string tmp_path = ctx.path + ".tmp";
 	// Create the compacted file: header + checkpoint record.
-	if (!ensure_parent_dir(tmp_path))
+	if (!ensure_parent_dir(tmp_path)) {
+		err = "Failed to create parent directory for temp swap file: " + tmp_path;
 		return false;
+	}

 	int flags = O_CREAT | O_WRONLY | O_TRUNC;
 #ifdef O_CLOEXEC
 	flags |= O_CLOEXEC;
 #endif
-	int tfd = ::open(tmp_path.c_str(), flags, 0600);
-	if (tfd < 0)
+
+	// Retry on transient errors for temp file creation
+	int tfd          = -1;
+	auto open_tmp_fn = [&]() -> bool {
+		tfd = kte::syscall::Open(tmp_path.c_str(), flags, 0600);
+		return tfd >= 0;
+	};
+
+	if (!RetryOnTransientError(open_tmp_fn, RetryPolicy::Aggressive(), err)) {
+		if (tfd < 0) {
+			int saved_errno = errno;
+			err = "Failed to open temp swap file '" + tmp_path + "': " + std::strerror(saved_errno) + err;
+		}
 		return false;
-	(void) ::fchmod(tfd, 0600);
+	}
+	(void) kte::syscall::Fchmod(tfd, 0600);
 	bool ok = write_header(tfd);
 	if (ok)
 		ok = write_full(tfd, chkpt_record.data(), chkpt_record.size());
-	if (ok)
-		ok = (::fsync(tfd) == 0);
-	::close(tfd);
+	if (ok) {
+		if (kte::syscall::Fsync(tfd) != 0) {
+			int saved_errno = errno;
+			err = "Failed to fsync temp swap file '" + tmp_path + "': " + std::strerror(saved_errno);
+			ok = false;
+		}
+	}
+	kte::syscall::Close(tfd);
 	if (!ok) {
+		if (err.empty()) {
+			err = "Failed to write temp swap file: " + tmp_path;
+		}
 		std::remove(tmp_path.c_str());
 		return false;
 	}

 	// Atomic replace.
 	if (::rename(tmp_path.c_str(), ctx.path.c_str()) != 0) {
+		int saved_errno = errno;
+		err = "Failed to rename temp swap file '" + tmp_path + "' to '" + ctx.path + "': " + std::strerror(
+			      saved_errno);
 		std::remove(tmp_path.c_str());
 		return false;
 	}
@@ -713,10 +778,10 @@ SwapManager::compact_to_checkpoint(JournalCtx &ctx, const std::vector<std::uint8
 #ifdef O_DIRECTORY
 			dflags |= O_DIRECTORY;
 #endif
-			int dfd = ::open(dir.string().c_str(), dflags);
+			int dfd = kte::syscall::Open(dir.string().c_str(), dflags);
 			if (dfd >= 0) {
-				(void) ::fsync(dfd);
-				::close(dfd);
+				(void) kte::syscall::Fsync(dfd);
+				kte::syscall::Close(dfd);
 			}
 		}
 	} catch (...) {
@@ -724,8 +789,10 @@ SwapManager::compact_to_checkpoint(JournalCtx &ctx, const std::vector<std::uint8
 	}

 	// Re-open for further appends.
-	if (!open_ctx(ctx, ctx.path))
+	if (!open_ctx(ctx, ctx.path, err)) {
+		// err already set by open_ctx
 		return false;
+	}
 	ctx.approx_size_bytes = 64 + static_cast<std::uint64_t>(chkpt_record.size());
 	return true;
 }
@@ -970,7 +1037,13 @@ SwapManager::writer_loop()
 			continue;

 		for (const Pending &p: batch) {
+			try {
 				process_one(p);
+			} catch (const std::exception &e) {
+				report_error(std::string("Exception in process_one: ") + e.what(), p.buf);
+			} catch (...) {
+				report_error("Unknown exception in process_one", p.buf);
+			}
 			{
 				std::lock_guard<std::mutex> lg(mtx_);
 				if (p.seq > last_processed_)
@@ -982,6 +1055,7 @@ SwapManager::writer_loop()
 		}

 		// Throttled fsync: best-effort (grouped)
+		try {
 			std::vector<int> to_sync;
 			std::uint64_t now = now_ns();
 			{
@@ -998,7 +1072,12 @@ SwapManager::writer_loop()
 				}
 			}
 			for (int fd: to_sync) {
-			(void) ::fsync(fd);
+				(void) kte::syscall::Fsync(fd);
+			}
+		} catch (const std::exception &e) {
+			report_error(std::string("Exception in fsync operations: ") + e.what());
+		} catch (...) {
+			report_error("Unknown exception in fsync operations");
 		}
 	}
 	// Wake any waiters.
@@ -1011,6 +1090,36 @@ SwapManager::process_one(const Pending &p)
 {
 	if (!p.buf)
 		return;
+
+	// Check circuit breaker before processing
+	bool circuit_open = false;
+	{
+		std::lock_guard<std::mutex> lg(mtx_);
+		if (!circuit_breaker_.AllowRequest()) {
+			circuit_open = true;
+		}
+	}
+
+	if (circuit_open) {
+		// Circuit is open - graceful degradation: skip swap write
+		// This prevents repeated failures from overwhelming the system
+		// Swap recording will resume when circuit closes
+		static std::atomic<std::uint64_t> last_warning_ns{0};
+		const std::uint64_t now  = now_ns();
+		const std::uint64_t last = last_warning_ns.load();
+		// Log warning at most once per 60 seconds to avoid spam
+		if (now - last > 60000000000ULL) {
+			last_warning_ns.store(now);
+			ErrorHandler::Instance().Warning("SwapManager",
+			                                 "Swap operations temporarily disabled due to repeated failures (circuit breaker open)",
+			                                 p.buf && !p.buf->Filename().empty()
+				                                 ? p.buf->Filename()
+				                                 : "<unnamed>");
+		}
+		return;
+	}
+
+	try {
 		Buffer &buf = *p.buf;

 		JournalCtx *ctxp = nullptr;
@@ -1029,10 +1138,23 @@ SwapManager::process_one(const Pending &p)
 		}
 		if (!ctxp)
 			return;
-	if (!open_ctx(*ctxp, path))
+		std::string open_err;
+		if (!open_ctx(*ctxp, path, open_err)) {
+			report_error(open_err, p.buf);
+			{
+				std::lock_guard<std::mutex> lg(mtx_);
+				circuit_breaker_.RecordFailure();
+			}
 			return;
-	if (p.payload.size() > 0xFFFFFFu)
+		}
+		if (p.payload.size() > 0xFFFFFFu) {
+			report_error("Payload too large: " + std::to_string(p.payload.size()) + " bytes", p.buf);
+			{
+				std::lock_guard<std::mutex> lg(mtx_);
+				circuit_breaker_.RecordFailure();
+			}
 			return;
+		}

 		// Build record: [type u8][len u24][payload][crc32 u32]
 		std::uint8_t len3[3];
@@ -1063,18 +1185,50 @@ SwapManager::process_one(const Pending &p)

 		// Write (handle partial writes and check results)
 		bool ok = write_full(ctxp->fd, rec.data(), rec.size());
-	if (ok) {
+		if (!ok) {
+			int err = errno;
+			report_error("Failed to write swap record to '" + path + "': " + std::strerror(err), p.buf);
+			{
+				std::lock_guard<std::mutex> lg(mtx_);
+				circuit_breaker_.RecordFailure();
+			}
+			return;
+		}
 		ctxp->approx_size_bytes += static_cast<std::uint64_t>(rec.size());
 		if (p.urgent_flush) {
-			(void) ::fsync(ctxp->fd);
+			if (kte::syscall::Fsync(ctxp->fd) != 0) {
+				int err = errno;
+				report_error("Failed to fsync swap file '" + path + "': " + std::strerror(err), p.buf);
+			}
 			ctxp->last_fsync_ns = now_ns();
 		}
 		if (p.type == SwapRecType::CHKPT && compact_bytes > 0 &&
 		    ctxp->approx_size_bytes >= static_cast<std::uint64_t>(compact_bytes)) {
-			(void) compact_to_checkpoint(*ctxp, rec);
+			std::string compact_err;
+			if (!compact_to_checkpoint(*ctxp, rec, compact_err)) {
+				report_error(compact_err, p.buf);
+				// Note: compaction failure is not fatal, don't record circuit breaker failure
+			}
+		}
+
+		// Record success for circuit breaker
+		{
+			std::lock_guard<std::mutex> lg(mtx_);
+			circuit_breaker_.RecordSuccess();
+		}
+	} catch (const std::exception &e) {
+		report_error(std::string("Exception in process_one: ") + e.what(), p.buf);
+		{
+			std::lock_guard<std::mutex> lg(mtx_);
+			circuit_breaker_.RecordFailure();
+		}
+	} catch (...) {
+		report_error("Unknown exception in process_one", p.buf);
+		{
+			std::lock_guard<std::mutex> lg(mtx_);
+			circuit_breaker_.RecordFailure();
 		}
 	}
-	(void) ok; // best-effort; future work could mark ctx error state
 }


@@ -1185,8 +1339,10 @@ SwapManager::ReplayFile(Buffer &buf, const std::string &swap_path, std::string &
 		switch (type) {
 		case SwapRecType::INS: {
 			std::size_t off = 0;
-			if (payload.empty()) {
-				err = "Swap record missing INS payload";
+			// INS payload: encver(1) + row(4) + col(4) + nbytes(4) + data(nbytes)
+			// Minimum: 1 + 4 + 4 + 4 = 13 bytes
+			if (payload.size() < 13) {
+				err = "INS payload too short (need at least 13 bytes)";
 				return false;
 			}
 			const std::uint8_t encver = payload[off++];
@@ -1197,7 +1353,7 @@ SwapManager::ReplayFile(Buffer &buf, const std::string &swap_path, std::string &
 			std::uint32_t row = 0, col = 0, nbytes = 0;
 			if (!parse_u32_le(payload, off, row) || !parse_u32_le(payload, off, col) || !parse_u32_le(
 				    payload, off, nbytes)) {
-				err = "Malformed INS payload";
+				err = "Malformed INS payload (failed to parse row/col/nbytes)";
 				return false;
 			}
 			if (off + nbytes > payload.size()) {
@@ -1210,8 +1366,10 @@ SwapManager::ReplayFile(Buffer &buf, const std::string &swap_path, std::string &
 		}
 		case SwapRecType::DEL: {
 			std::size_t off = 0;
-			if (payload.empty()) {
-				err = "Swap record missing DEL payload";
+			// DEL payload: encver(1) + row(4) + col(4) + dlen(4)
+			// Minimum: 1 + 4 + 4 + 4 = 13 bytes
+			if (payload.size() < 13) {
+				err = "DEL payload too short (need at least 13 bytes)";
 				return false;
 			}
 			const std::uint8_t encver = payload[off++];
@@ -1222,7 +1380,7 @@ SwapManager::ReplayFile(Buffer &buf, const std::string &swap_path, std::string &
 			std::uint32_t row = 0, col = 0, dlen = 0;
 			if (!parse_u32_le(payload, off, row) || !parse_u32_le(payload, off, col) || !parse_u32_le(
 				    payload, off, dlen)) {
-				err = "Malformed DEL payload";
+				err = "Malformed DEL payload (failed to parse row/col/dlen)";
 				return false;
 			}
 			buf.delete_text((int) row, (int) col, (std::size_t) dlen);
@@ -1230,8 +1388,10 @@ SwapManager::ReplayFile(Buffer &buf, const std::string &swap_path, std::string &
 		}
 		case SwapRecType::SPLIT: {
 			std::size_t off = 0;
-			if (payload.empty()) {
-				err = "Swap record missing SPLIT payload";
+			// SPLIT payload: encver(1) + row(4) + col(4)
+			// Minimum: 1 + 4 + 4 = 9 bytes
+			if (payload.size() < 9) {
+				err = "SPLIT payload too short (need at least 9 bytes)";
 				return false;
 			}
 			const std::uint8_t encver = payload[off++];
@@ -1241,7 +1401,7 @@ SwapManager::ReplayFile(Buffer &buf, const std::string &swap_path, std::string &
 			}
 			std::uint32_t row = 0, col = 0;
 			if (!parse_u32_le(payload, off, row) || !parse_u32_le(payload, off, col)) {
-				err = "Malformed SPLIT payload";
+				err = "Malformed SPLIT payload (failed to parse row/col)";
 				return false;
 			}
 			buf.split_line((int) row, (int) col);
@@ -1249,8 +1409,10 @@ SwapManager::ReplayFile(Buffer &buf, const std::string &swap_path, std::string &
 		}
 		case SwapRecType::JOIN: {
 			std::size_t off = 0;
-			if (payload.empty()) {
-				err = "Swap record missing JOIN payload";
+			// JOIN payload: encver(1) + row(4)
+			// Minimum: 1 + 4 = 5 bytes
+			if (payload.size() < 5) {
+				err = "JOIN payload too short (need at least 5 bytes)";
 				return false;
 			}
 			const std::uint8_t encver = payload[off++];
@@ -1260,7 +1422,7 @@ SwapManager::ReplayFile(Buffer &buf, const std::string &swap_path, std::string &
 			}
 			std::uint32_t row = 0;
 			if (!parse_u32_le(payload, off, row)) {
-				err = "Malformed JOIN payload";
+				err = "Malformed JOIN payload (failed to parse row)";
 				return false;
 			}
 			buf.join_lines((int) row);
@@ -1268,8 +1430,10 @@ SwapManager::ReplayFile(Buffer &buf, const std::string &swap_path, std::string &
 		}
 		case SwapRecType::CHKPT: {
 			std::size_t off = 0;
+			// CHKPT payload: encver(1) + nbytes(4) + data(nbytes)
+			// Minimum: 1 + 4 = 5 bytes
 			if (payload.size() < 5) {
-				err = "Malformed CHKPT payload";
+				err = "CHKPT payload too short (need at least 5 bytes)";
 				return false;
 			}
 			const std::uint8_t encver = payload[off++];
@@ -1279,7 +1443,7 @@ SwapManager::ReplayFile(Buffer &buf, const std::string &swap_path, std::string &
 			}
 			std::uint32_t nbytes = 0;
 			if (!parse_u32_le(payload, off, nbytes)) {
-				err = "Malformed CHKPT payload";
+				err = "Malformed CHKPT payload (failed to parse nbytes)";
 				return false;
 			}
 			if (off + nbytes > payload.size()) {
@@ -1296,4 +1460,61 @@ SwapManager::ReplayFile(Buffer &buf, const std::string &swap_path, std::string &
 		}
 	}
 }
+
+
+void
+SwapManager::report_error(const std::string &message, Buffer *buf)
+{
+	std::string context;
+	if (buf && !buf->Filename().empty()) {
+		context = buf->Filename();
+	} else if (buf) {
+		context = "<unnamed>";
+	} else {
+		context = "<unknown>";
+	}
+
+	// Report to centralized error handler
+	ErrorHandler::Instance().Error("SwapManager", message, context);
+
+	// Maintain local error tracking for backward compatibility
+	std::lock_guard<std::mutex> lg(mtx_);
+	SwapError err;
+	err.timestamp_ns = now_ns();
+	err.message      = message;
+	err.buffer_name  = context;
+	errors_.push_back(err);
+	// Bound the error queue to 100 entries
+	while (errors_.size() > 100) {
+		errors_.pop_front();
+	}
+	++total_error_count_;
+}
+
+
+bool
+SwapManager::HasErrors() const
+{
+	std::lock_guard<std::mutex> lg(mtx_);
+	return !errors_.empty();
+}
+
+
+std::string
+SwapManager::GetLastError() const
+{
+	std::lock_guard<std::mutex> lg(mtx_);
+	if (errors_.empty())
+		return "";
+	const SwapError &e = errors_.back();
+	return "[" + e.buffer_name + "] " + e.message;
+}
+
+
+std::size_t
+SwapManager::GetErrorCount() const
+{
+	std::lock_guard<std::mutex> lg(mtx_);
+	return total_error_count_;
+}
 } // namespace kte
--- a/Swap.h
+++ b/Swap.h
@@ -10,10 +10,12 @@
 #include <memory>
 #include <mutex>
 #include <condition_variable>
+#include <deque>
 #include <thread>
 #include <atomic>

 #include "SwapRecorder.h"
+#include "ErrorRecovery.h"

 class Buffer;

@@ -131,6 +133,20 @@ public:
 	// Per-buffer toggle
 	void SetSuspended(Buffer &buf, bool on);

+	// Error reporting for background thread
+	struct SwapError {
+		std::uint64_t timestamp_ns{0};
+		std::string message;
+		std::string buffer_name; // filename or "<unnamed>"
+	};
+
+	// Query error state (thread-safe)
+	bool HasErrors() const;
+
+	std::string GetLastError() const;
+
+	std::size_t GetErrorCount() const;
+
 private:
 	class BufferRecorder final : public SwapRecorder {
 	public:
@@ -190,11 +206,12 @@ private:

 	static bool write_header(int fd);

-	static bool open_ctx(JournalCtx &ctx, const std::string &path);
+	static bool open_ctx(JournalCtx &ctx, const std::string &path, std::string &err);

 	static void close_ctx(JournalCtx &ctx);

-	static bool compact_to_checkpoint(JournalCtx &ctx, const std::vector<std::uint8_t> &chkpt_record);
+	static bool compact_to_checkpoint(JournalCtx &ctx, const std::vector<std::uint8_t> &chkpt_record,
+	                                  std::string &err);

 	static std::uint32_t crc32(const std::uint8_t *data, std::size_t len, std::uint32_t seed = 0);

@@ -210,11 +227,14 @@ private:

 	void process_one(const Pending &p);

+	// Error reporting helper (called from writer thread)
+	void report_error(const std::string &message, Buffer *buf = nullptr);
+
 	// State
 	SwapConfig cfg_{};
 	std::unordered_map<Buffer *, JournalCtx> journals_;
 	std::unordered_map<Buffer *, std::unique_ptr<BufferRecorder> > recorders_;
-	std::mutex mtx_;
+	mutable std::mutex mtx_;
 	std::condition_variable cv_;
 	std::vector<Pending> queue_;
 	std::uint64_t next_seq_{0};
@@ -222,5 +242,12 @@ private:
 	std::uint64_t inflight_{0};
 	std::atomic<bool> running_{false};
 	std::thread worker_;
+
+	// Error tracking (protected by mtx_)
+	std::deque<SwapError> errors_; // bounded to max 100 entries
+	std::size_t total_error_count_{0};
+
+	// Circuit breaker for swap operations (protected by mtx_)
+	CircuitBreaker circuit_breaker_;
 };
 } // namespace kte
--- a/SyscallWrappers.cc
+++ b/SyscallWrappers.cc
@@ -0,0 +1,76 @@
+#include "SyscallWrappers.h"
+
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/stat.h>
+#include <cerrno>
+#include <cstdlib>
+
+namespace kte {
+namespace syscall {
+int
+Open(const char *path, int flags, mode_t mode)
+{
+	int fd;
+	do {
+		fd = ::open(path, flags, mode);
+	} while (fd == -1 && errno == EINTR);
+	return fd;
+}
+
+
+int
+Close(int fd)
+{
+	int ret;
+	do {
+		ret = ::close(fd);
+	} while (ret == -1 && errno == EINTR);
+	return ret;
+}
+
+
+int
+Fsync(int fd)
+{
+	int ret;
+	do {
+		ret = ::fsync(fd);
+	} while (ret == -1 && errno == EINTR);
+	return ret;
+}
+
+
+int
+Fstat(int fd, struct stat *buf)
+{
+	int ret;
+	do {
+		ret = ::fstat(fd, buf);
+	} while (ret == -1 && errno == EINTR);
+	return ret;
+}
+
+
+int
+Fchmod(int fd, mode_t mode)
+{
+	int ret;
+	do {
+		ret = ::fchmod(fd, mode);
+	} while (ret == -1 && errno == EINTR);
+	return ret;
+}
+
+
+int
+Mkstemp(char *template_str)
+{
+	int fd;
+	do {
+		fd = ::mkstemp(template_str);
+	} while (fd == -1 && errno == EINTR);
+	return fd;
+}
+} // namespace syscall
+} // namespace kte
--- a/SyscallWrappers.h
+++ b/SyscallWrappers.h
@@ -0,0 +1,47 @@
+// SyscallWrappers.h - EINTR-safe syscall wrappers for kte
+#pragma once
+
+#include <string>
+#include <cstddef>
+#include <sys/stat.h>
+
+namespace kte {
+namespace syscall {
+// EINTR-safe wrapper for open(2).
+// Returns file descriptor on success, -1 on failure (errno set).
+// Automatically retries on EINTR.
+int Open(const char *path, int flags, mode_t mode = 0);
+
+// EINTR-safe wrapper for close(2).
+// Returns 0 on success, -1 on failure (errno set).
+// Automatically retries on EINTR.
+// Note: Some systems may not restart close() on EINTR, but we retry anyway
+// as recommended by POSIX.1-2008.
+int Close(int fd);
+
+// EINTR-safe wrapper for fsync(2).
+// Returns 0 on success, -1 on failure (errno set).
+// Automatically retries on EINTR.
+int Fsync(int fd);
+
+// EINTR-safe wrapper for fstat(2).
+// Returns 0 on success, -1 on failure (errno set).
+// Automatically retries on EINTR.
+int Fstat(int fd, struct stat *buf);
+
+// EINTR-safe wrapper for fchmod(2).
+// Returns 0 on success, -1 on failure (errno set).
+// Automatically retries on EINTR.
+int Fchmod(int fd, mode_t mode);
+
+// EINTR-safe wrapper for mkstemp(3).
+// Returns file descriptor on success, -1 on failure (errno set).
+// Automatically retries on EINTR.
+// Note: template_str must be a mutable buffer ending in "XXXXXX".
+int Mkstemp(char *template_str);
+
+// Note: rename(2) and unlink(2) are not wrapped because they operate on
+// filesystem metadata and typically complete atomically without EINTR.
+// If interrupted, they either succeed or fail without partial state.
+} // namespace syscall
+} // namespace kte
--- a/docs/DEVELOPER_GUIDE.md
+++ b/docs/DEVELOPER_GUIDE.md
@@ -11,7 +11,8 @@ codebase, make changes, and contribute effectively.
 4. [Building and Testing](#building-and-testing)
 5. [Making Changes](#making-changes)
 6. [Code Style](#code-style)
-7. [Common Tasks](#common-tasks)
+7. [Error Handling Conventions](#error-handling-conventions)
+8. [Common Tasks](#common-tasks)

 ## Architecture Overview

@@ -537,6 +538,491 @@ void maybeConsolidate() {
 }
 ```

+## Error Handling Conventions
+
+kte uses standardized error handling patterns to ensure consistency and
+reliability across the codebase. This section documents when to use each
+pattern and how to integrate with the centralized error handling system.
+
+### Error Propagation Patterns
+
+kte uses three standard patterns for error handling:
+
+#### 1. `bool` + `std::string &err` (I/O and Fallible Operations)
+
+**When to use**: Operations that can fail and need detailed error
+messages
+(file I/O, network operations, parsing, resource allocation).
+
+**Pattern**:
+
+```cpp
+bool OperationName(args..., std::string &err) {
+    err.clear();
+    
+    // Attempt operation
+    if (/* operation failed */) {
+        err = "Detailed error message with context";
+        ErrorHandler::Instance().Error("ComponentName", err, "optional_context");
+        return false;
+    }
+    
+    return true;
+}
+```
+
+**Examples**:
+
+- `Buffer::OpenFromFile(const std::string &path, std::string &err)`
+- `Buffer::Save(std::string &err)`
+-
+
+`SwapManager::ReplayFile(Buffer &buf, const std::string &path, std::string &err)`
+
+**Guidelines**:
+
+- Always clear `err` at the start of the function
+- Provide actionable error messages with context (file paths, operation
+  details)
+- Call `ErrorHandler::Instance().Error()` for centralized logging
+- Return `false` on failure, `true` on success
+- Capture `errno` immediately after syscall failures: `int saved_errno =
+  errno;`
+- Use `std::strerror(saved_errno)` for syscall error messages
+
+#### 2. `void` (Infallible State Changes)
+
+**When to use**: Operations that modify internal state and cannot fail
+(setters, cursor movement, flag toggles).
+
+**Pattern**:
+
+```cpp
+void SetProperty(Type value) {
+    property_ = value;
+    // Update related state if needed
+}
+```
+
+**Examples**:
+
+- `Buffer::SetCursor(std::size_t x, std::size_t y)`
+- `Buffer::SetDirty(bool d)`
+- `Editor::SetStatus(const std::string &msg)`
+
+**Guidelines**:
+
+- Use for simple state changes that cannot fail
+- No error reporting needed
+- Keep operations atomic and side-effect free when possible
+
+#### 3. `bool` without error parameter (Control Flow)
+
+**When to use**: Operations where success/failure is sufficient
+information
+and detailed error messages aren't needed (validation checks, control
+flow
+decisions).
+
+**Pattern**:
+
+```cpp
+bool CheckCondition() const {
+    return condition_is_met;
+}
+```
+
+**Examples**:
+
+- `Editor::SwitchTo(std::size_t index)` - returns false if index invalid
+- `Editor::CloseBuffer(std::size_t index)` - returns false if can't
+  close
+
+**Guidelines**:
+
+- Use when the caller only needs to know success/failure
+- Typically for validation or control flow decisions
+- Don't use for operations that need error diagnostics
+
+### ErrorHandler Integration
+
+All error-prone operations should report errors to the centralized
+`ErrorHandler` for logging and UI integration.
+
+**Severity Levels**:
+
+```cpp
+ErrorHandler::Instance().Info("Component", "message", "context");     // Informational
+ErrorHandler::Instance().Warning("Component", "message", "context");  // Warning
+ErrorHandler::Instance().Error("Component", "message", "context");    // Error
+ErrorHandler::Instance().Critical("Component", "message", "context"); // Critical
+```
+
+**When to use each severity**:
+
+- **Info**: Non-error events (file saved, operation completed)
+- **Warning**: Recoverable issues (external file modification detected)
+- **Error**: Operation failures (file I/O errors, allocation failures)
+- **Critical**: Fatal errors (unhandled exceptions, data corruption)
+
+**Component names**: Use the class name ("Buffer", "SwapManager",
+"Editor", "main")
+
+**Context**: Optional string providing additional context (filename,
+buffer
+name, operation details)
+
+### Error Handling in Different Contexts
+
+#### File I/O Operations
+
+```cpp
+bool Buffer::Save(std::string &err) const {
+    if (!is_file_backed_ || filename_.empty()) {
+        err = "Buffer is not file-backed; use SaveAs()";
+        return false;
+    }
+    
+    const std::size_t sz = content_.Size();
+    const char *data = sz ? content_.Data() : nullptr;
+    
+    if (!atomic_write_file(filename_, data ? data : "", sz, err)) {
+        ErrorHandler::Instance().Error("Buffer", err, filename_);
+        return false;
+    }
+    
+    return true;
+}
+```
+
+#### Syscall Error Handling with EINTR-Safe Wrappers
+
+kte provides EINTR-safe syscall wrappers in `SyscallWrappers.h` that
+automatically retry on `EINTR`. **Always use these wrappers instead of
+direct syscalls.**
+
+```cpp
+#include "SyscallWrappers.h"
+
+bool open_file(const std::string &path, std::string &err) {
+    int fd = kte::syscall::Open(path.c_str(), O_RDONLY);
+    if (fd < 0) {
+        int saved_errno = errno;  // Capture immediately!
+        err = "Failed to open file '" + path + "': " + std::strerror(saved_errno);
+        ErrorHandler::Instance().Error("Component", err, path);
+        return false;
+    }
+    // ... use fd
+    kte::syscall::Close(fd);
+    return true;
+}
+```
+
+**Available EINTR-safe wrappers**:
+
+- `kte::syscall::Open(path, flags, mode)` - wraps `open(2)`
+- `kte::syscall::Close(fd)` - wraps `close(2)`
+- `kte::syscall::Fsync(fd)` - wraps `fsync(2)`
+- `kte::syscall::Fstat(fd, buf)` - wraps `fstat(2)`
+- `kte::syscall::Fchmod(fd, mode)` - wraps `fchmod(2)`
+- `kte::syscall::Mkstemp(template)` - wraps `mkstemp(3)`
+
+**Note**: `rename(2)` and `unlink(2)` are NOT wrapped because they
+operate on filesystem metadata atomically and don't need EINTR retry.
+
+#### Background Thread Errors
+
+```cpp
+void background_worker() {
+    try {
+        // ... work
+    } catch (const std::exception &e) {
+        std::string msg = std::string("Exception in worker: ") + e.what();
+        ErrorHandler::Instance().Error("WorkerThread", msg);
+    } catch (...) {
+        ErrorHandler::Instance().Error("WorkerThread", "Unknown exception");
+    }
+}
+```
+
+#### Top-Level Exception Handling
+
+```cpp
+int main(int argc, char *argv[]) {
+    try {
+        // ... main logic
+        return 0;
+    } catch (const std::exception &e) {
+        std::string msg = std::string("Unhandled exception: ") + e.what();
+        ErrorHandler::Instance().Critical("main", msg);
+        std::cerr << "FATAL ERROR: " << e.what() << "\n";
+        return 1;
+    } catch (...) {
+        ErrorHandler::Instance().Critical("main", "Unknown exception");
+        std::cerr << "FATAL ERROR: Unknown exception\n";
+        return 1;
+    }
+}
+```
+
+### Error Handling Anti-Patterns
+
+**❌ Don't**: Silently ignore errors
+
+```cpp
+// BAD
+void process() {
+    std::string err;
+    if (!operation(err)) {
+        // Error ignored!
+    }
+}
+```
+
+**✅ Do**: Always handle or propagate errors
+
+```cpp
+// GOOD
+bool process(std::string &err) {
+    if (!operation(err)) {
+        // err already set by operation()
+        return false;
+    }
+    return true;
+}
+```
+
+**❌ Don't**: Use generic error messages
+
+```cpp
+// BAD
+err = "Operation failed";
+```
+
+**✅ Do**: Provide specific, actionable error messages
+
+```cpp
+// GOOD
+err = "Failed to open file '" + path + "': " + std::strerror(errno);
+```
+
+**❌ Don't**: Forget to capture errno
+
+```cpp
+// BAD
+if (::write(fd, data, len) < 0) {
+    // errno might be overwritten by other calls!
+    err = std::strerror(errno);
+}
+```
+
+**✅ Do**: Capture errno immediately
+
+```cpp
+// GOOD
+if (::write(fd, data, len) < 0) {
+    int saved_errno = errno;
+    err = std::strerror(saved_errno);
+}
+```
+
+### Error Log Location
+
+All errors are automatically logged to:
+
+```
+~/.local/state/kte/error.log
+```
+
+Log format:
+
+```
+[2026-02-17 20:12:34.567] [ERROR] SwapManager (buffer.txt): Failed to write swap record
+[2026-02-17 20:12:35.123] [CRITICAL] main: Unhandled exception: out of memory
+```
+
+### Migration Guide
+
+When updating existing code to follow these conventions:
+
+1. **Identify error-prone operations** - File I/O, syscalls, allocations
+2. **Add `std::string &err` parameter** if not present
+3. **Add ErrorHandler calls** at all error sites
+4. **Capture errno** for syscall failures
+5. **Update callers** to handle the error parameter
+6. **Write tests** that verify error handling
+
+### Error Recovery Mechanisms
+
+kte implements automatic error recovery for transient failures using
+retry logic and circuit breaker patterns.
+
+#### Transient Error Classification
+
+Transient errors are temporary failures that may succeed on retry:
+
+```cpp
+#include "ErrorRecovery.h"
+
+bool IsTransientError(int err);  // Returns true for EAGAIN, EWOULDBLOCK, EBUSY, EIO, ETIMEDOUT, ENOSPC, EDQUOT
+```
+
+**Transient errors**:
+
+- `EAGAIN` / `EWOULDBLOCK` - Resource temporarily unavailable
+- `EBUSY` - Device or resource busy
+- `EIO` - I/O error (may be transient on network filesystems)
+- `ETIMEDOUT` - Operation timed out
+- `ENOSPC` - No space left on device (may become available)
+- `EDQUOT` - Disk quota exceeded (may become available)
+
+**Permanent errors** (don't retry):
+
+- `ENOENT` - File not found
+- `EACCES` - Permission denied
+- `EINVAL` - Invalid argument
+- `ENOTDIR` - Not a directory
+
+#### Retry Policies
+
+Three predefined retry policies are available:
+
+```cpp
+// Default: 3 attempts, 100ms initial delay, 2x backoff, 5s max delay
+RetryPolicy::Default()
+
+// Aggressive: 5 attempts, 50ms initial delay, 1.5x backoff, 2s max delay
+// Use for critical operations (swap files, file saves)
+RetryPolicy::Aggressive()
+
+// Conservative: 2 attempts, 200ms initial delay, 2.5x backoff, 10s max delay
+// Use for non-critical operations
+RetryPolicy::Conservative()
+```
+
+#### Using RetryOnTransientError
+
+Wrap syscalls with automatic retry on transient errors:
+
+```cpp
+#include "ErrorRecovery.h"
+#include "SyscallWrappers.h"
+
+bool save_file(const std::string &path, std::string &err) {
+    int fd = -1;
+    auto open_fn = [&]() -> bool {
+        fd = kte::syscall::Open(path.c_str(), O_CREAT | O_WRONLY, 0644);
+        return fd >= 0;
+    };
+    
+    if (!kte::RetryOnTransientError(open_fn, kte::RetryPolicy::Aggressive(), err)) {
+        if (fd < 0) {
+            int saved_errno = errno;
+            err = "Failed to open file '" + path + "': " + std::strerror(saved_errno) + err;
+        }
+        return false;
+    }
+    
+    // ... use fd
+    kte::syscall::Close(fd);
+    return true;
+}
+```
+
+**Key points**:
+
+- Lambda must return `bool` (true = success, false = failure)
+- Lambda must set `errno` on failure for transient error detection
+- Use EINTR-safe syscall wrappers (`kte::syscall::*`) inside lambdas
+- Capture errno immediately after failure
+- Append retry info to error message (automatically added by
+  RetryOnTransientError)
+
+#### Circuit Breaker Pattern
+
+The circuit breaker prevents repeated attempts to failing operations,
+enabling graceful degradation.
+
+**States**:
+
+- **Closed** (normal): All requests allowed
+- **Open** (failing): Requests rejected immediately, operation disabled
+- **HalfOpen** (testing): Limited requests allowed to test recovery
+
+**Configuration** (SwapManager example):
+
+```cpp
+CircuitBreaker::Config cfg;
+cfg.failure_threshold = 5;      // Open after 5 failures
+cfg.timeout = std::chrono::seconds(30);  // Try recovery after 30s
+cfg.success_threshold = 2;      // Close after 2 successes in HalfOpen
+cfg.window = std::chrono::seconds(60);   // Count failures in 60s window
+
+CircuitBreaker breaker(cfg);
+```
+
+**Usage**:
+
+```cpp
+// Check before operation
+if (!breaker.AllowRequest()) {
+    // Circuit is open - graceful degradation
+    log_warning("Operation disabled due to repeated failures");
+    return;  // Skip operation
+}
+
+// Perform operation
+if (operation_succeeds()) {
+    breaker.RecordSuccess();
+} else {
+    breaker.RecordFailure();
+}
+```
+
+**SwapManager Integration**:
+
+The SwapManager uses a circuit breaker to handle repeated swap file
+failures:
+
+1. After 5 swap write failures in 60 seconds, circuit opens
+2. Swap recording is disabled (graceful degradation)
+3. Warning logged once per 60 seconds to avoid spam
+4. After 30 seconds, circuit enters HalfOpen state
+5. If 2 consecutive operations succeed, circuit closes and swap
+   recording resumes
+
+This ensures the editor remains functional even when swap files are
+unavailable (disk full, quota exceeded, filesystem errors).
+
+#### Graceful Degradation Strategies
+
+When operations fail repeatedly:
+
+1. **Disable non-critical features** - Swap recording can be disabled
+   without affecting editing
+2. **Log warnings** - Inform user of degraded operation via ErrorHandler
+3. **Rate-limit warnings** - Avoid log spam (e.g., once per 60 seconds)
+4. **Automatic recovery** - Circuit breaker automatically tests recovery
+5. **Preserve core functionality** - Editor remains usable without swap
+   files
+
+**Example** (from SwapManager):
+
+```cpp
+if (circuit_open) {
+    // Graceful degradation: skip swap write
+    static std::atomic<std::uint64_t> last_warning_ns{0};
+    const std::uint64_t now = now_ns();
+    if (now - last_warning_ns.load() > 60000000000ULL) {
+        last_warning_ns.store(now);
+        ErrorHandler::Instance().Warning("SwapManager", 
+            "Swap operations temporarily disabled due to repeated failures",
+            buffer_name);
+    }
+    return;  // Skip operation, editor continues normally
+}
+```
+
 ## Common Tasks

 ### Adding a New Command
--- a/docs/audits/error-propagation-standardization.md
+++ b/docs/audits/error-propagation-standardization.md
@@ -0,0 +1,549 @@
+# Error Propagation Standardization Report
+
+**Project:** kte (Kyle's Text Editor)  
+**Date:** 2026-02-17  
+**Auditor:** Error Propagation Standardization Review  
+**Language:** C++20
+
+---
+
+## Executive Summary
+
+This report documents the standardization of error propagation patterns
+across the kte codebase. Following the implementation of centralized
+error handling (ErrorHandler), this audit identifies inconsistencies in
+error propagation and provides concrete remediation recommendations.
+
+**Key Findings:**
+
+- **Dominant Pattern**: `bool + std::string &err` is used consistently
+  in Buffer and SwapManager for I/O operations
+- **Inconsistencies**: PieceTable has no error reporting mechanism; some
+  internal helpers lack error propagation
+- **Standard Chosen**: `bool + std::string &err` pattern (C++20 project,
+  std::expected not available)
+- **Documentation**: Comprehensive error handling conventions added to
+  DEVELOPER_GUIDE.md
+
+**Overall Assessment**: The codebase has a **solid foundation** with the
+`bool + err` pattern used consistently in critical I/O paths. Primary
+gaps are in PieceTable memory allocation error handling and some
+internal helper functions.
+
+---
+
+## 1. CURRENT STATE ANALYSIS
+
+### 1.1 Error Propagation Patterns Found
+
+#### Pattern 1: `bool + std::string &err` (Dominant)
+
+**Usage**: File I/O, swap operations, resource allocation
+
+**Examples**:
+
+- `Buffer::OpenFromFile(const std::string &path, std::string &err)` (
+  Buffer.h:72)
+- `Buffer::Save(std::string &err)` (Buffer.h:74)
+- `Buffer::SaveAs(const std::string &path, std::string &err)` (Buffer.h:
+  75)
+- `Editor::OpenFile(const std::string &path, std::string &err)` (
+  Editor.h:536)
+-
+`SwapManager::ReplayFile(Buffer &buf, const std::string &swap_path, std::string &err)` (
+Swap.h:104)
+-
+`SwapManager::open_ctx(JournalCtx &ctx, const std::string &path, std::string &err)` (
+Swap.h:208)
+-
+`SwapManager::compact_to_checkpoint(JournalCtx &ctx, const std::vector<std::uint8_t> &chkpt_record, std::string &err)` (
+Swap.h:212-213)
+
+**Assessment**: ✅ **Excellent** - Consistent, well-implemented,
+integrated with ErrorHandler
+
+#### Pattern 2: `void` (State Changes)
+
+**Usage**: Setters, cursor movement, flag toggles, internal state
+modifications
+
+**Examples**:
+
+- `Buffer::SetCursor(std::size_t x, std::size_t y)` (Buffer.h:348)
+- `Buffer::SetDirty(bool d)` (Buffer.h:368)
+- `Buffer::SetMark(std::size_t x, std::size_t y)` (Buffer.h:387)
+- `Buffer::insert_text(int row, int col, std::string_view text)` (
+  Buffer.h:545)
+- `Buffer::delete_text(int row, int col, std::size_t len)` (Buffer.h:
+  547)
+- `Editor::SetStatus(const std::string &msg)` (Editor.h:various)
+
+**Assessment**: ✅ **Appropriate** - These operations are infallible
+state changes
+
+#### Pattern 3: `bool` without error parameter (Control Flow)
+
+**Usage**: Validation checks, control flow decisions
+
+**Examples**:
+
+- `Editor::ProcessPendingOpens()` (Editor.h:544)
+- `Editor::ResolveRecoveryPrompt(bool yes)` (Editor.h:558)
+- `Editor::SwitchTo(std::size_t index)` (Editor.h:563)
+- `Editor::CloseBuffer(std::size_t index)` (Editor.h:565)
+
+**Assessment**: ✅ **Appropriate** - Success/failure is sufficient for
+control flow
+
+#### Pattern 4: No Error Reporting (PieceTable)
+
+**Usage**: Memory allocation, text manipulation
+
+**Examples**:
+
+- `void PieceTable::Reserve(std::size_t newCapacity)` (PieceTable.h:71)
+- `void PieceTable::Append(const char *s, std::size_t len)` (
+  PieceTable.h:75)
+-
+`void PieceTable::Insert(std::size_t byte_offset, const char *text, std::size_t len)` (
+PieceTable.h:118)
+- `char *PieceTable::Data()` (PieceTable.h:89-93) - returns nullptr on
+  allocation failure
+
+**Assessment**: ⚠️ **Gap** - Memory allocation failures are not reported
+
+---
+
+## 2. STANDARDIZATION DECISION
+
+### 2.1 Chosen Pattern: `bool + std::string &err`
+
+**Rationale**:
+
+1. **C++20 Project**: `std::expected` (C++23) is not available
+2. **Existing Adoption**: Already used consistently in Buffer,
+   SwapManager, Editor for I/O operations
+3. **Clear Semantics**: `bool` return indicates success/failure, `err`
+   provides details
+4. **ErrorHandler Integration**: Works seamlessly with centralized error
+   logging
+5. **Zero Overhead**: No exceptions, no dynamic allocation for error
+   paths
+6. **Testability**: Easy to verify error messages in unit tests
+
+**Alternative Considered**: `std::expected<T, std::string>` (C++23)
+
+- **Rejected**: Requires C++23, would require major refactoring, not
+  available in current toolchain
+
+### 2.2 Pattern Selection Guidelines
+
+| Operation Type      | Pattern                   | Example                                                                           |
+|---------------------|---------------------------|-----------------------------------------------------------------------------------|
+| File I/O            | `bool + std::string &err` | `Buffer::Save(std::string &err)`                                                  |
+| Syscalls            | `bool + std::string &err` | `open_ctx(JournalCtx &ctx, const std::string &path, std::string &err)`            |
+| Resource Allocation | `bool + std::string &err` | Future: `PieceTable::Reserve(std::size_t cap, std::string &err)`                  |
+| Parsing/Validation  | `bool + std::string &err` | `SwapManager::ReplayFile(Buffer &buf, const std::string &path, std::string &err)` |
+| State Changes       | `void`                    | `Buffer::SetCursor(std::size_t x, std::size_t y)`                                 |
+| Control Flow        | `bool` (no err)           | `Editor::SwitchTo(std::size_t index)`                                             |
+
+---
+
+## 3. INCONSISTENCIES AND GAPS
+
+### 3.1 PieceTable Memory Allocation (Severity: 6/10)
+
+**Finding**: PieceTable methods that allocate memory (`Reserve`,
+`Append`, `Insert`, `Data`) do not report allocation failures.
+
+**Impact**:
+
+- Memory allocation failures are silent
+- `Data()` returns `nullptr` on failure, but callers may not check
+- Large file operations could fail without user notification
+
+**Evidence**:
+
+```cpp
+// PieceTable.h:71
+void Reserve(std::size_t newCapacity);  // No error reporting
+
+// PieceTable.h:89-93
+char *Data();  // Returns nullptr on allocation failure
+```
+
+**Remediation Priority**: **Medium** - Memory allocation failures are
+rare on modern systems, but should be handled for robustness
+
+**Recommended Fix**:
+
+**Option 1: Add error parameter to fallible operations** (Preferred)
+
+```cpp
+// PieceTable.h
+bool Reserve(std::size_t newCapacity, std::string &err);
+bool Append(const char *s, std::size_t len, std::string &err);
+bool Insert(std::size_t byte_offset, const char *text, std::size_t len, std::string &err);
+
+// Returns nullptr on failure; check with HasMaterializationError()
+char *Data();
+bool HasMaterializationError() const;
+std::string GetMaterializationError() const;
+```
+
+**Option 2: Use exceptions for allocation failures** (Not recommended)
+
+PieceTable could throw `std::bad_alloc` on allocation failures, but this
+conflicts with the project's error handling philosophy and would require
+exception handling throughout the codebase.
+
+**Option 3: Status quo with improved documentation** (Minimal change)
+
+Document that `Data()` can return `nullptr` and callers must check. Add
+assertions in debug builds.
+
+```cpp
+// PieceTable.h
+// Returns pointer to materialized buffer, or nullptr if materialization fails.
+// Callers MUST check for nullptr before dereferencing.
+char *Data();
+```
+
+**Recommendation**: **Option 3** for now (document + assertions), *
+*Option 1** if memory allocation errors become a concern in production.
+
+### 3.2 Internal Helper Functions (Severity: 4/10)
+
+**Finding**: Some internal helper functions in Swap.cc and Buffer.cc use
+`bool` returns without error parameters.
+
+**Examples**:
+
+```cpp
+// Swap.cc:562
+static bool ensure_parent_dir(const std::string &path);  // No error details
+
+// Swap.cc:579
+static bool write_header(int fd);  // No error details
+
+// Buffer.cc:101
+static bool write_all_fd(int fd, const char *data, std::size_t len, std::string &err);  // ✅ Good
+```
+
+**Impact**: Limited - These are internal helpers called by functions
+that do report errors
+
+**Remediation Priority**: **Low** - Callers already provide error
+context
+
+**Recommended Fix**: Add error parameters to internal helpers for
+consistency
+
+```cpp
+// Swap.cc
+static bool ensure_parent_dir(const std::string &path, std::string &err);
+static bool write_header(int fd, std::string &err);
+```
+
+**Status**: **Deferred** - Low priority, callers already provide
+adequate error context
+
+### 3.3 Editor Control Flow Methods (Severity: 2/10)
+
+**Finding**: Editor methods like `SwitchTo()`, `CloseBuffer()` return
+`bool` without error details.
+
+**Assessment**: ✅ **Appropriate** - These are control flow decisions
+where success/failure is sufficient
+
+**Remediation**: **None needed** - Current pattern is correct for this
+use case
+
+---
+
+## 4. ERRORHANDLER INTEGRATION STATUS
+
+### 4.1 Components with ErrorHandler Integration
+
+✅ **Buffer** (Buffer.cc)
+
+- `OpenFromFile()` - Reports file open, seek, read errors
+- `Save()` - Reports write errors
+- `SaveAs()` - Reports write errors
+
+✅ **SwapManager** (Swap.cc)
+
+- `report_error()` - All swap file errors reported
+- Background thread errors captured and logged
+- Errno captured for all syscalls
+
+✅ **main** (main.cc)
+
+- Top-level exception handler reports Critical errors
+- Both `std::exception` and unknown exceptions captured
+
+### 4.2 Components Without ErrorHandler Integration
+
+⚠️ **PieceTable** (PieceTable.cc)
+
+- No error reporting mechanism
+- Memory allocation failures are silent
+
+⚠️ **Editor** (Editor.cc)
+
+- File operations delegate to Buffer (✅ covered)
+- Control flow methods don't need error reporting (✅ appropriate)
+
+⚠️ **Command** (Command.cc)
+
+- Commands use `Editor::SetStatus()` for user-facing messages
+- No ErrorHandler integration for command failures
+- **Assessment**: Commands are user-initiated actions; status messages
+  are appropriate
+
+---
+
+## 5. DOCUMENTATION STATUS
+
+### 5.1 Error Handling Conventions (DEVELOPER_GUIDE.md)
+
+✅ **Added comprehensive section** covering:
+
+- Three standard error propagation patterns
+- Pattern selection guidelines with decision tree
+- ErrorHandler integration requirements
+- Code examples for file I/O, syscalls, background threads, top-level
+  handlers
+- Anti-patterns and best practices
+- Error log location and format
+- Migration guide for updating existing code
+
+**Location**: `docs/DEVELOPER_GUIDE.md` section 7
+
+### 5.2 API Documentation
+
+⚠️ **Gap**: Individual function documentation in headers could be
+improved
+
+**Recommendation**: Add brief comments to public APIs documenting error
+behavior
+
+```cpp
+// Buffer.h
+// Opens a file and loads its content into the buffer.
+// Returns false on failure; err contains detailed error message.
+// Errors are logged to ErrorHandler.
+bool OpenFromFile(const std::string &path, std::string &err);
+```
+
+---
+
+## 6. REMEDIATION RECOMMENDATIONS
+
+### 6.1 High Priority (Severity 7-10)
+
+**None identified** - Critical error handling gaps were addressed in
+previous sessions:
+
+- ✅ Top-level exception handler added (Severity 9/10)
+- ✅ Background thread error reporting added (Severity 9/10)
+- ✅ File I/O error checking added (Severity 8/10)
+- ✅ Errno capture added to swap operations (Severity 7/10)
+- ✅ Centralized error handling implemented (Severity 7/10)
+
+### 6.2 Medium Priority (Severity 4-6)
+
+#### 6.2.1 PieceTable Memory Allocation Error Handling (Severity: 6/10)
+
+**Action**: Document that `Data()` can return `nullptr` and add debug
+assertions
+
+**Implementation**:
+
+```cpp
+// PieceTable.h
+// Returns pointer to materialized buffer, or nullptr if materialization fails
+// due to memory allocation error. Callers MUST check for nullptr.
+char *Data();
+
+// PieceTable.cc
+char *PieceTable::Data() {
+    materialize();
+    assert(materialized_ != nullptr && "PieceTable materialization failed");
+    return materialized_;
+}
+```
+
+**Effort**: Low (documentation + assertions)  
+**Risk**: Low (no API changes)  
+**Timeline**: Next maintenance cycle
+
+#### 6.2.2 Add Error Parameters to Internal Helpers (Severity: 4/10)
+
+**Action**: Add `std::string &err` parameters to `ensure_parent_dir()`
+and `write_header()`
+
+**Implementation**:
+
+```cpp
+// Swap.cc
+static bool ensure_parent_dir(const std::string &path, std::string &err) {
+    try {
+        fs::path p(path);
+        fs::path dir = p.parent_path();
+        if (dir.empty())
+            return true;
+        if (!fs::exists(dir))
+            fs::create_directories(dir);
+        return true;
+    } catch (const std::exception &e) {
+        err = std::string("Failed to create directory: ") + e.what();
+        return false;
+    } catch (...) {
+        err = "Failed to create directory: unknown error";
+        return false;
+    }
+}
+```
+
+**Effort**: Low (update 2 functions + call sites)  
+**Risk**: Low (internal helpers only)  
+**Timeline**: Next maintenance cycle
+
+### 6.3 Low Priority (Severity 1-3)
+
+#### 6.3.1 Add Function-Level Error Documentation (Severity: 3/10)
+
+**Action**: Add brief comments to public APIs documenting error behavior
+
+**Effort**: Medium (many functions to document)  
+**Risk**: None (documentation only)  
+**Timeline**: Ongoing as code is touched
+
+#### 6.3.2 Add ErrorHandler Integration to Commands (Severity: 2/10)
+
+**Action**: Consider logging command failures to ErrorHandler for
+diagnostics
+
+**Assessment**: **Not recommended** - Commands are user-initiated
+actions; status messages are more appropriate than error logs
+
+---
+
+## 7. TESTING RECOMMENDATIONS
+
+### 7.1 Error Handling Test Coverage
+
+**Current State**:
+
+- ✅ Swap file error handling tested (test_swap_edge_cases.cc)
+- ✅ Buffer I/O error handling tested (test_buffer_io.cc)
+- ⚠️ PieceTable allocation failure testing missing
+
+**Recommendations**:
+
+1. **Add PieceTable allocation failure tests** (if Option 1 from 3.1 is
+   implemented)
+2. **Add ErrorHandler query tests** - Verify error logging and retrieval
+3. **Add errno capture tests** - Verify errno is captured correctly in
+   syscall failures
+
+### 7.2 Test Examples
+
+```cpp
+// test_error_handler.cc
+TEST(ErrorHandler, LogsErrorsWithContext) {
+    ErrorHandler::Instance().Error("TestComponent", "Test error", "test.txt");
+    EXPECT_TRUE(ErrorHandler::Instance().HasErrors());
+    EXPECT_EQ(ErrorHandler::Instance().GetErrorCount(), 1);
+    std::string last = ErrorHandler::Instance().GetLastError();
+    EXPECT_TRUE(last.find("Test error") != std::string::npos);
+    EXPECT_TRUE(last.find("test.txt") != std::string::npos);
+}
+
+// test_piece_table.cc (if Option 1 implemented)
+TEST(PieceTable, ReportsAllocationFailure) {
+    PieceTable pt;
+    std::string err;
+    // Attempt to allocate huge buffer
+    bool ok = pt.Reserve(SIZE_MAX, err);
+    EXPECT_FALSE(ok);
+    EXPECT_FALSE(err.empty());
+}
+```
+
+---
+
+## 8. MIGRATION CHECKLIST
+
+For developers updating existing code to follow error handling
+conventions:
+
+- [ ] Identify all error-prone operations (file I/O, syscalls,
+  allocations)
+- [ ] Add `std::string &err` parameter if not present
+- [ ] Clear `err` at function start: `err.clear();`
+- [ ] Capture `errno` immediately after syscall failures:
+  `int saved_errno = errno;`
+- [ ] Build detailed error messages with context (paths, operation
+  details)
+- [ ] Call `ErrorHandler::Instance().Error()` at all error sites
+- [ ] Return `false` on failure, `true` on success
+- [ ] Update all call sites to handle the error parameter
+- [ ] Write unit tests that verify error handling
+- [ ] Update function documentation to describe error behavior
+
+---
+
+## 9. SUMMARY AND NEXT STEPS
+
+### 9.1 Achievements
+
+✅ **Standardized on `bool + std::string &err` pattern** for error-prone
+operations  
+✅ **Documented comprehensive error handling conventions** in
+DEVELOPER_GUIDE.md  
+✅ **Identified and prioritized remaining gaps** (PieceTable, internal
+helpers)  
+✅ **Integrated ErrorHandler** into Buffer, SwapManager, and main  
+✅ **Established clear pattern selection guidelines** for future
+development
+
+### 9.2 Remaining Work
+
+**Medium Priority**:
+
+1. Document PieceTable `Data()` nullptr behavior and add assertions
+2. Add error parameters to internal helper functions
+
+**Low Priority**:
+
+3. Add function-level error documentation to public APIs
+4. Add ErrorHandler query tests
+
+### 9.3 Conclusion
+
+The kte codebase has achieved **strong error handling consistency** with
+the `bool + std::string &err` pattern used uniformly across critical I/O
+paths. The centralized ErrorHandler provides comprehensive logging and
+UI integration. Remaining gaps are minor and primarily affect edge
+cases (memory allocation failures) that are rare in practice.
+
+**Overall Grade**: **B+ (8.5/10)**
+
+**Strengths**:
+
+- Consistent error propagation in Buffer and SwapManager
+- Comprehensive ErrorHandler integration
+- Excellent documentation in DEVELOPER_GUIDE.md
+- Errno capture for all syscalls
+- Top-level exception handling
+
+**Areas for Improvement**:
+
+- PieceTable memory allocation error handling
+- Internal helper function error propagation
+- Function-level API documentation
+
+The error handling infrastructure is **production-ready** and provides a
+solid foundation for reliable operation and debugging.
--- a/main.cc
+++ b/main.cc
@@ -20,6 +20,7 @@
 #include "Editor.h"
 #include "Frontend.h"
 #include "TerminalFrontend.h"
+#include "ErrorHandler.h"

 #if defined(KTE_BUILD_GUI)
 #if defined(KTE_USE_QT)
@@ -181,10 +182,13 @@ main(int argc, char *argv[])
 		return RunStressHighlighter(stress_seconds);
 	}

+	// Top-level exception handler to prevent data loss and ensure cleanup
+	try {
 		// Determine frontend
 #if !defined(KTE_BUILD_GUI)
 		if (req_gui) {
-		std::cerr << "kte: GUI not built. Reconfigure with -DBUILD_GUI=ON and required deps installed." <<
+			std::cerr << "kte: GUI not built. Reconfigure with -DBUILD_GUI=ON and required deps installed."
+				<<
 				std::endl;
 			return 2;
 		}
@@ -195,6 +199,9 @@ main(int argc, char *argv[])
 		} else if (req_term) {
 			use_gui = false;
 		} else {
+
+
+
 		// Default depends on build target: kge defaults to GUI, kte to terminal
 #if defined(KTE_DEFAULT_GUI)
 		use_gui = true;
@@ -301,4 +308,18 @@ main(int argc, char *argv[])
 		fe->Shutdown();

 		return 0;
+	} catch (const std::exception &e) {
+		std::string msg = std::string("Unhandled exception: ") + e.what();
+		kte::ErrorHandler::Instance().Critical("main", msg);
+		std::cerr << "\n*** FATAL ERROR ***\n"
+			<< "kte encountered an unhandled exception: " << e.what() << "\n"
+			<< "The editor will now exit. Any unsaved changes may be recovered from swap files.\n";
+		return 1;
+	} catch (...) {
+		kte::ErrorHandler::Instance().Critical("main", "Unknown exception");
+		std::cerr << "\n*** FATAL ERROR ***\n"
+			<< "kte encountered an unknown exception.\n"
+			<< "The editor will now exit. Any unsaved changes may be recovered from swap files.\n";
+		return 1;
+	}
 }
--- a/tests/test_swap_edge_cases.cc
+++ b/tests/test_swap_edge_cases.cc
@@ -0,0 +1,813 @@
+#include "Test.h"
+
+#include "Buffer.h"
+#include "Swap.h"
+
+#include <cstdint>
+#include <cstdio>
+#include <fstream>
+#include <string>
+#include <vector>
+
+
+// CRC32 helper (same algorithm as SwapManager::crc32)
+static std::uint32_t
+crc32(const std::uint8_t *data, std::size_t len, std::uint32_t seed = 0)
+{
+	static std::uint32_t table[256];
+	static bool inited = false;
+	if (!inited) {
+		for (std::uint32_t i = 0; i < 256; ++i) {
+			std::uint32_t c = i;
+			for (int j = 0; j < 8; ++j)
+				c = (c & 1) ? (0xEDB88320u ^ (c >> 1)) : (c >> 1);
+			table[i] = c;
+		}
+		inited = true;
+	}
+	std::uint32_t c = ~seed;
+	for (std::size_t i = 0; i < len; ++i)
+		c = table[(c ^ data[i]) & 0xFFu] ^ (c >> 8);
+	return ~c;
+}
+
+
+// Build a valid 64-byte swap file header
+static std::string
+build_swap_header()
+{
+	std::uint8_t hdr[64];
+	std::memset(hdr, 0, sizeof(hdr));
+	// Magic
+	const std::uint8_t magic[8] = {'K', 'T', 'E', '_', 'S', 'W', 'P', '\0'};
+	std::memcpy(hdr, magic, 8);
+	// Version = 1 (little-endian)
+	hdr[8]  = 1;
+	hdr[9]  = 0;
+	hdr[10] = 0;
+	hdr[11] = 0;
+	// Flags = 0
+	// Created time (just use 0 for tests)
+	return std::string(reinterpret_cast<char *>(hdr), sizeof(hdr));
+}
+
+
+// Build a swap record: [type u8][len u24][payload][crc32 u32]
+static std::string
+build_swap_record(std::uint8_t type, const std::vector<std::uint8_t> &payload)
+{
+	std::vector<std::uint8_t> record;
+
+	// Record header: type(1) + length(3)
+	record.push_back(type);
+	std::uint32_t len = static_cast<std::uint32_t>(payload.size());
+	record.push_back(static_cast<std::uint8_t>(len & 0xFFu));
+	record.push_back(static_cast<std::uint8_t>((len >> 8) & 0xFFu));
+	record.push_back(static_cast<std::uint8_t>((len >> 16) & 0xFFu));
+
+	// Payload
+	record.insert(record.end(), payload.begin(), payload.end());
+
+	// CRC32 (compute over header + payload)
+	std::uint32_t crc = crc32(record.data(), record.size());
+	record.push_back(static_cast<std::uint8_t>(crc & 0xFFu));
+	record.push_back(static_cast<std::uint8_t>((crc >> 8) & 0xFFu));
+	record.push_back(static_cast<std::uint8_t>((crc >> 16) & 0xFFu));
+	record.push_back(static_cast<std::uint8_t>((crc >> 24) & 0xFFu));
+
+	return std::string(reinterpret_cast<char *>(record.data()), record.size());
+}
+
+
+// Build complete swap file with header and records
+static std::string
+build_swap_file(const std::vector<std::string> &records)
+{
+	std::string file = build_swap_header();
+	for (const auto &rec: records) {
+		file += rec;
+	}
+	return file;
+}
+
+
+// Write bytes to file
+static void
+write_file_bytes(const std::string &path, const std::string &bytes)
+{
+	std::ofstream out(path, std::ios::binary | std::ios::trunc);
+	out.write(bytes.data(), static_cast<std::streamsize>(bytes.size()));
+}
+
+
+// Helper to encode u32 little-endian
+static void
+put_u32_le(std::vector<std::uint8_t> &out, std::uint32_t v)
+{
+	out.push_back(static_cast<std::uint8_t>(v & 0xFFu));
+	out.push_back(static_cast<std::uint8_t>((v >> 8) & 0xFFu));
+	out.push_back(static_cast<std::uint8_t>((v >> 16) & 0xFFu));
+	out.push_back(static_cast<std::uint8_t>((v >> 24) & 0xFFu));
+}
+
+
+//=============================================================================
+// 1. MINIMUM VALID PAYLOAD SIZE TESTS
+//=============================================================================
+
+TEST (SwapEdge_INS_MinimumValidPayload)
+{
+	const std::string path      = "./.kte_ut_edge_ins_min.txt";
+	const std::string swap_path = "./.kte_ut_edge_ins_min.swp";
+	std::remove(path.c_str());
+	std::remove(swap_path.c_str());
+
+	write_file_bytes(path, "hello\n");
+
+	// INS record: encver(1) + row(4) + col(4) + nbytes(4) = 13 bytes minimum
+	// nbytes=0 means zero-length insertion
+	std::vector<std::uint8_t> payload;
+	payload.push_back(1); // encver
+	put_u32_le(payload, 0); // row
+	put_u32_le(payload, 0); // col
+	put_u32_le(payload, 0); // nbytes=0
+
+	std::string rec  = build_swap_record(static_cast<std::uint8_t>(kte::SwapRecType::INS), payload);
+	std::string file = build_swap_file({rec});
+	write_file_bytes(swap_path, file);
+
+	Buffer b;
+	std::string err;
+	ASSERT_TRUE(b.OpenFromFile(path, err));
+	ASSERT_TRUE(kte::SwapManager::ReplayFile(b, swap_path, err));
+
+	std::remove(path.c_str());
+	std::remove(swap_path.c_str());
+}
+
+
+TEST (SwapEdge_DEL_MinimumValidPayload)
+{
+	const std::string path      = "./.kte_ut_edge_del_min.txt";
+	const std::string swap_path = "./.kte_ut_edge_del_min.swp";
+	std::remove(path.c_str());
+	std::remove(swap_path.c_str());
+
+	write_file_bytes(path, "hello\n");
+
+	// DEL record: encver(1) + row(4) + col(4) + dlen(4) = 13 bytes minimum
+	std::vector<std::uint8_t> payload;
+	payload.push_back(1); // encver
+	put_u32_le(payload, 0); // row
+	put_u32_le(payload, 0); // col
+	put_u32_le(payload, 0); // dlen=0
+
+	std::string rec  = build_swap_record(static_cast<std::uint8_t>(kte::SwapRecType::DEL), payload);
+	std::string file = build_swap_file({rec});
+	write_file_bytes(swap_path, file);
+
+	Buffer b;
+	std::string err;
+	ASSERT_TRUE(b.OpenFromFile(path, err));
+	ASSERT_TRUE(kte::SwapManager::ReplayFile(b, swap_path, err));
+
+	std::remove(path.c_str());
+	std::remove(swap_path.c_str());
+}
+
+
+TEST (SwapEdge_SPLIT_MinimumValidPayload)
+{
+	const std::string path      = "./.kte_ut_edge_split_min.txt";
+	const std::string swap_path = "./.kte_ut_edge_split_min.swp";
+	std::remove(path.c_str());
+	std::remove(swap_path.c_str());
+
+	write_file_bytes(path, "hello\n");
+
+	// SPLIT record: encver(1) + row(4) + col(4) = 9 bytes minimum
+	std::vector<std::uint8_t> payload;
+	payload.push_back(1); // encver
+	put_u32_le(payload, 0); // row
+	put_u32_le(payload, 0); // col
+
+	std::string rec  = build_swap_record(static_cast<std::uint8_t>(kte::SwapRecType::SPLIT), payload);
+	std::string file = build_swap_file({rec});
+	write_file_bytes(swap_path, file);
+
+	Buffer b;
+	std::string err;
+	ASSERT_TRUE(b.OpenFromFile(path, err));
+	ASSERT_TRUE(kte::SwapManager::ReplayFile(b, swap_path, err));
+
+	std::remove(path.c_str());
+	std::remove(swap_path.c_str());
+}
+
+
+TEST (SwapEdge_JOIN_MinimumValidPayload)
+{
+	const std::string path      = "./.kte_ut_edge_join_min.txt";
+	const std::string swap_path = "./.kte_ut_edge_join_min.swp";
+	std::remove(path.c_str());
+	std::remove(swap_path.c_str());
+
+	write_file_bytes(path, "hello\nworld\n");
+
+	// JOIN record: encver(1) + row(4) = 5 bytes minimum
+	std::vector<std::uint8_t> payload;
+	payload.push_back(1); // encver
+	put_u32_le(payload, 0); // row
+
+	std::string rec  = build_swap_record(static_cast<std::uint8_t>(kte::SwapRecType::JOIN), payload);
+	std::string file = build_swap_file({rec});
+	write_file_bytes(swap_path, file);
+
+	Buffer b;
+	std::string err;
+	ASSERT_TRUE(b.OpenFromFile(path, err));
+	ASSERT_TRUE(kte::SwapManager::ReplayFile(b, swap_path, err));
+
+	std::remove(path.c_str());
+	std::remove(swap_path.c_str());
+}
+
+
+TEST (SwapEdge_CHKPT_MinimumValidPayload)
+{
+	const std::string path      = "./.kte_ut_edge_chkpt_min.txt";
+	const std::string swap_path = "./.kte_ut_edge_chkpt_min.swp";
+	std::remove(path.c_str());
+	std::remove(swap_path.c_str());
+
+	write_file_bytes(path, "hello\n");
+
+	// CHKPT record: encver(1) + nbytes(4) = 5 bytes minimum
+	std::vector<std::uint8_t> payload;
+	payload.push_back(1); // encver
+	put_u32_le(payload, 0); // nbytes=0
+
+	std::string rec  = build_swap_record(static_cast<std::uint8_t>(kte::SwapRecType::CHKPT), payload);
+	std::string file = build_swap_file({rec});
+	write_file_bytes(swap_path, file);
+
+	Buffer b;
+	std::string err;
+	ASSERT_TRUE(b.OpenFromFile(path, err));
+	ASSERT_TRUE(kte::SwapManager::ReplayFile(b, swap_path, err));
+
+	std::remove(path.c_str());
+	std::remove(swap_path.c_str());
+}
+
+
+//=============================================================================
+// 2. TRUNCATED PAYLOAD TESTS (BELOW MINIMUM)
+//=============================================================================
+
+TEST (SwapEdge_INS_TruncatedPayload_1Byte)
+{
+	const std::string path      = "./.kte_ut_edge_ins_trunc1.txt";
+	const std::string swap_path = "./.kte_ut_edge_ins_trunc1.swp";
+	std::remove(path.c_str());
+	std::remove(swap_path.c_str());
+
+	write_file_bytes(path, "hello\n");
+
+	// INS record with only 1 byte (just encver)
+	std::vector<std::uint8_t> payload;
+	payload.push_back(1); // encver only
+
+	std::string rec  = build_swap_record(static_cast<std::uint8_t>(kte::SwapRecType::INS), payload);
+	std::string file = build_swap_file({rec});
+	write_file_bytes(swap_path, file);
+
+	Buffer b;
+	std::string err;
+	ASSERT_TRUE(b.OpenFromFile(path, err));
+	ASSERT_EQ(kte::SwapManager::ReplayFile(b, swap_path, err), false);
+	ASSERT_TRUE(err.find("INS payload too short") != std::string::npos);
+
+	std::remove(path.c_str());
+	std::remove(swap_path.c_str());
+}
+
+
+TEST (SwapEdge_INS_TruncatedPayload_5Bytes)
+{
+	const std::string path      = "./.kte_ut_edge_ins_trunc5.txt";
+	const std::string swap_path = "./.kte_ut_edge_ins_trunc5.swp";
+	std::remove(path.c_str());
+	std::remove(swap_path.c_str());
+
+	write_file_bytes(path, "hello\n");
+
+	// INS record with 5 bytes (encver + row only)
+	std::vector<std::uint8_t> payload;
+	payload.push_back(1); // encver
+	put_u32_le(payload, 0); // row
+
+	std::string rec  = build_swap_record(static_cast<std::uint8_t>(kte::SwapRecType::INS), payload);
+	std::string file = build_swap_file({rec});
+	write_file_bytes(swap_path, file);
+
+	Buffer b;
+	std::string err;
+	ASSERT_TRUE(b.OpenFromFile(path, err));
+	ASSERT_EQ(kte::SwapManager::ReplayFile(b, swap_path, err), false);
+	ASSERT_TRUE(err.find("INS payload too short") != std::string::npos);
+
+	std::remove(path.c_str());
+	std::remove(swap_path.c_str());
+}
+
+
+TEST (SwapEdge_DEL_TruncatedPayload_9Bytes)
+{
+	const std::string path      = "./.kte_ut_edge_del_trunc9.txt";
+	const std::string swap_path = "./.kte_ut_edge_del_trunc9.swp";
+	std::remove(path.c_str());
+	std::remove(swap_path.c_str());
+
+	write_file_bytes(path, "hello\n");
+
+	// DEL record with 9 bytes (encver + row + col, missing dlen)
+	std::vector<std::uint8_t> payload;
+	payload.push_back(1); // encver
+	put_u32_le(payload, 0); // row
+	put_u32_le(payload, 0); // col
+	// missing dlen
+
+	std::string rec  = build_swap_record(static_cast<std::uint8_t>(kte::SwapRecType::DEL), payload);
+	std::string file = build_swap_file({rec});
+	write_file_bytes(swap_path, file);
+
+	Buffer b;
+	std::string err;
+	ASSERT_TRUE(b.OpenFromFile(path, err));
+	ASSERT_EQ(kte::SwapManager::ReplayFile(b, swap_path, err), false);
+	ASSERT_TRUE(err.find("DEL payload too short") != std::string::npos);
+
+	std::remove(path.c_str());
+	std::remove(swap_path.c_str());
+}
+
+
+TEST (SwapEdge_SPLIT_TruncatedPayload_5Bytes)
+{
+	const std::string path      = "./.kte_ut_edge_split_trunc5.txt";
+	const std::string swap_path = "./.kte_ut_edge_split_trunc5.swp";
+	std::remove(path.c_str());
+	std::remove(swap_path.c_str());
+
+	write_file_bytes(path, "hello\n");
+
+	// SPLIT record with 5 bytes (encver + row, missing col)
+	std::vector<std::uint8_t> payload;
+	payload.push_back(1); // encver
+	put_u32_le(payload, 0); // row
+	// missing col
+
+	std::string rec  = build_swap_record(static_cast<std::uint8_t>(kte::SwapRecType::SPLIT), payload);
+	std::string file = build_swap_file({rec});
+	write_file_bytes(swap_path, file);
+
+	Buffer b;
+	std::string err;
+	ASSERT_TRUE(b.OpenFromFile(path, err));
+	ASSERT_EQ(kte::SwapManager::ReplayFile(b, swap_path, err), false);
+	ASSERT_TRUE(err.find("SPLIT payload too short") != std::string::npos);
+
+	std::remove(path.c_str());
+	std::remove(swap_path.c_str());
+}
+
+
+TEST (SwapEdge_JOIN_TruncatedPayload_1Byte)
+{
+	const std::string path      = "./.kte_ut_edge_join_trunc1.txt";
+	const std::string swap_path = "./.kte_ut_edge_join_trunc1.swp";
+	std::remove(path.c_str());
+	std::remove(swap_path.c_str());
+
+	write_file_bytes(path, "hello\nworld\n");
+
+	// JOIN record with 1 byte (just encver)
+	std::vector<std::uint8_t> payload;
+	payload.push_back(1); // encver only
+
+	std::string rec  = build_swap_record(static_cast<std::uint8_t>(kte::SwapRecType::JOIN), payload);
+	std::string file = build_swap_file({rec});
+	write_file_bytes(swap_path, file);
+
+	Buffer b;
+	std::string err;
+	ASSERT_TRUE(b.OpenFromFile(path, err));
+	ASSERT_EQ(kte::SwapManager::ReplayFile(b, swap_path, err), false);
+	ASSERT_TRUE(err.find("JOIN payload too short") != std::string::npos);
+
+	std::remove(path.c_str());
+	std::remove(swap_path.c_str());
+}
+
+
+TEST (SwapEdge_CHKPT_TruncatedPayload_3Bytes)
+{
+	const std::string path      = "./.kte_ut_edge_chkpt_trunc3.txt";
+	const std::string swap_path = "./.kte_ut_edge_chkpt_trunc3.swp";
+	std::remove(path.c_str());
+	std::remove(swap_path.c_str());
+
+	write_file_bytes(path, "hello\n");
+
+	// CHKPT record with 3 bytes (encver + partial nbytes)
+	std::vector<std::uint8_t> payload;
+	payload.push_back(1); // encver
+	payload.push_back(0); // partial nbytes (only 2 bytes instead of 4)
+	payload.push_back(0);
+
+	std::string rec  = build_swap_record(static_cast<std::uint8_t>(kte::SwapRecType::CHKPT), payload);
+	std::string file = build_swap_file({rec});
+	write_file_bytes(swap_path, file);
+
+	Buffer b;
+	std::string err;
+	ASSERT_TRUE(b.OpenFromFile(path, err));
+	ASSERT_EQ(kte::SwapManager::ReplayFile(b, swap_path, err), false);
+	ASSERT_TRUE(err.find("CHKPT payload too short") != std::string::npos);
+
+	std::remove(path.c_str());
+	std::remove(swap_path.c_str());
+}
+
+
+//=============================================================================
+// 3. DATA OVERFLOW TESTS
+//=============================================================================
+
+TEST (SwapEdge_INS_TruncatedData_NbytesExceedsPayload)
+{
+	const std::string path      = "./.kte_ut_edge_ins_overflow.txt";
+	const std::string swap_path = "./.kte_ut_edge_ins_overflow.swp";
+	std::remove(path.c_str());
+	std::remove(swap_path.c_str());
+
+	write_file_bytes(path, "hello\n");
+
+	// INS record where nbytes=100 but payload only contains 13 bytes total
+	std::vector<std::uint8_t> payload;
+	payload.push_back(1); // encver
+	put_u32_le(payload, 0); // row
+	put_u32_le(payload, 0); // col
+	put_u32_le(payload, 100); // nbytes=100 (but no data follows)
+
+	std::string rec  = build_swap_record(static_cast<std::uint8_t>(kte::SwapRecType::INS), payload);
+	std::string file = build_swap_file({rec});
+	write_file_bytes(swap_path, file);
+
+	Buffer b;
+	std::string err;
+	ASSERT_TRUE(b.OpenFromFile(path, err));
+	ASSERT_EQ(kte::SwapManager::ReplayFile(b, swap_path, err), false);
+	ASSERT_TRUE(err.find("Truncated INS payload bytes") != std::string::npos);
+
+	std::remove(path.c_str());
+	std::remove(swap_path.c_str());
+}
+
+
+TEST (SwapEdge_CHKPT_TruncatedData_NbytesExceedsPayload)
+{
+	const std::string path      = "./.kte_ut_edge_chkpt_overflow.txt";
+	const std::string swap_path = "./.kte_ut_edge_chkpt_overflow.swp";
+	std::remove(path.c_str());
+	std::remove(swap_path.c_str());
+
+	write_file_bytes(path, "hello\n");
+
+	// CHKPT record where nbytes=1000 but payload only contains 5 bytes total
+	std::vector<std::uint8_t> payload;
+	payload.push_back(1); // encver
+	put_u32_le(payload, 1000); // nbytes=1000 (but no data follows)
+
+	std::string rec  = build_swap_record(static_cast<std::uint8_t>(kte::SwapRecType::CHKPT), payload);
+	std::string file = build_swap_file({rec});
+	write_file_bytes(swap_path, file);
+
+	Buffer b;
+	std::string err;
+	ASSERT_TRUE(b.OpenFromFile(path, err));
+	ASSERT_EQ(kte::SwapManager::ReplayFile(b, swap_path, err), false);
+	ASSERT_TRUE(err.find("Truncated CHKPT payload bytes") != std::string::npos);
+
+	std::remove(path.c_str());
+	std::remove(swap_path.c_str());
+}
+
+
+//=============================================================================
+// 4. UNSUPPORTED ENCODING VERSION TESTS
+//=============================================================================
+
+TEST (SwapEdge_INS_UnsupportedEncodingVersion)
+{
+	const std::string path      = "./.kte_ut_edge_ins_badenc.txt";
+	const std::string swap_path = "./.kte_ut_edge_ins_badenc.swp";
+	std::remove(path.c_str());
+	std::remove(swap_path.c_str());
+
+	write_file_bytes(path, "hello\n");
+
+	// INS record with encver=2 (unsupported)
+	std::vector<std::uint8_t> payload;
+	payload.push_back(2); // encver=2 (unsupported)
+	put_u32_le(payload, 0); // row
+	put_u32_le(payload, 0); // col
+	put_u32_le(payload, 0); // nbytes
+
+	std::string rec  = build_swap_record(static_cast<std::uint8_t>(kte::SwapRecType::INS), payload);
+	std::string file = build_swap_file({rec});
+	write_file_bytes(swap_path, file);
+
+	Buffer b;
+	std::string err;
+	ASSERT_TRUE(b.OpenFromFile(path, err));
+	ASSERT_EQ(kte::SwapManager::ReplayFile(b, swap_path, err), false);
+	ASSERT_TRUE(err.find("Unsupported swap payload encoding") != std::string::npos);
+
+	std::remove(path.c_str());
+	std::remove(swap_path.c_str());
+}
+
+
+TEST (SwapEdge_CHKPT_UnsupportedEncodingVersion)
+{
+	const std::string path      = "./.kte_ut_edge_chkpt_badenc.txt";
+	const std::string swap_path = "./.kte_ut_edge_chkpt_badenc.swp";
+	std::remove(path.c_str());
+	std::remove(swap_path.c_str());
+
+	write_file_bytes(path, "hello\n");
+
+	// CHKPT record with encver=99 (unsupported)
+	std::vector<std::uint8_t> payload;
+	payload.push_back(99); // encver=99 (unsupported)
+	put_u32_le(payload, 0); // nbytes
+
+	std::string rec  = build_swap_record(static_cast<std::uint8_t>(kte::SwapRecType::CHKPT), payload);
+	std::string file = build_swap_file({rec});
+	write_file_bytes(swap_path, file);
+
+	Buffer b;
+	std::string err;
+	ASSERT_TRUE(b.OpenFromFile(path, err));
+	ASSERT_EQ(kte::SwapManager::ReplayFile(b, swap_path, err), false);
+	ASSERT_TRUE(err.find("Unsupported swap checkpoint encoding") != std::string::npos);
+
+	std::remove(path.c_str());
+	std::remove(swap_path.c_str());
+}
+
+
+//=============================================================================
+// 5. BOUNDARY CONDITION TESTS
+//=============================================================================
+
+TEST (SwapEdge_INS_ExactlyEnoughBytes)
+{
+	const std::string path      = "./.kte_ut_edge_ins_exact.txt";
+	const std::string swap_path = "./.kte_ut_edge_ins_exact.swp";
+	std::remove(path.c_str());
+	std::remove(swap_path.c_str());
+
+	write_file_bytes(path, "hello\n");
+
+	// INS record with nbytes=10 and exactly 23 bytes total (13 header + 10 data)
+	std::vector<std::uint8_t> payload;
+	payload.push_back(1); // encver
+	put_u32_le(payload, 0); // row
+	put_u32_le(payload, 0); // col
+	put_u32_le(payload, 10); // nbytes=10
+	// Add exactly 10 bytes of data
+	for (int i = 0; i < 10; i++) {
+		payload.push_back('X');
+	}
+
+	std::string rec  = build_swap_record(static_cast<std::uint8_t>(kte::SwapRecType::INS), payload);
+	std::string file = build_swap_file({rec});
+	write_file_bytes(swap_path, file);
+
+	Buffer b;
+	std::string err;
+	ASSERT_TRUE(b.OpenFromFile(path, err));
+	ASSERT_TRUE(kte::SwapManager::ReplayFile(b, swap_path, err));
+
+	std::remove(path.c_str());
+	std::remove(swap_path.c_str());
+}
+
+
+TEST (SwapEdge_INS_OneByteTooFew)
+{
+	const std::string path      = "./.kte_ut_edge_ins_toofew.txt";
+	const std::string swap_path = "./.kte_ut_edge_ins_toofew.swp";
+	std::remove(path.c_str());
+	std::remove(swap_path.c_str());
+
+	write_file_bytes(path, "hello\n");
+
+	// INS record with nbytes=10 but only 22 bytes total (13 header + 9 data)
+	std::vector<std::uint8_t> payload;
+	payload.push_back(1); // encver
+	put_u32_le(payload, 0); // row
+	put_u32_le(payload, 0); // col
+	put_u32_le(payload, 10); // nbytes=10
+	// Add only 9 bytes of data (one too few)
+	for (int i = 0; i < 9; i++) {
+		payload.push_back('X');
+	}
+
+	std::string rec  = build_swap_record(static_cast<std::uint8_t>(kte::SwapRecType::INS), payload);
+	std::string file = build_swap_file({rec});
+	write_file_bytes(swap_path, file);
+
+	Buffer b;
+	std::string err;
+	ASSERT_TRUE(b.OpenFromFile(path, err));
+	ASSERT_EQ(kte::SwapManager::ReplayFile(b, swap_path, err), false);
+	ASSERT_TRUE(err.find("Truncated INS payload bytes") != std::string::npos);
+
+	std::remove(path.c_str());
+	std::remove(swap_path.c_str());
+}
+
+
+//=============================================================================
+// 6. MIXED VALID AND INVALID RECORDS
+//=============================================================================
+
+TEST (SwapEdge_MixedRecords_ValidThenInvalid)
+{
+	const std::string path      = "./.kte_ut_edge_mixed1.txt";
+	const std::string swap_path = "./.kte_ut_edge_mixed1.swp";
+	std::remove(path.c_str());
+	std::remove(swap_path.c_str());
+
+	write_file_bytes(path, "hello\n");
+
+	// First record: valid INS
+	std::vector<std::uint8_t> payload1;
+	payload1.push_back(1); // encver
+	put_u32_le(payload1, 0); // row
+	put_u32_le(payload1, 0); // col
+	put_u32_le(payload1, 1); // nbytes=1
+	payload1.push_back('X'); // data
+
+	std::string rec1 = build_swap_record(static_cast<std::uint8_t>(kte::SwapRecType::INS), payload1);
+
+	// Second record: truncated DEL
+	std::vector<std::uint8_t> payload2;
+	payload2.push_back(1); // encver only
+
+	std::string rec2 = build_swap_record(static_cast<std::uint8_t>(kte::SwapRecType::DEL), payload2);
+
+	std::string file = build_swap_file({rec1, rec2});
+	write_file_bytes(swap_path, file);
+
+	Buffer b;
+	std::string err;
+	ASSERT_TRUE(b.OpenFromFile(path, err));
+	ASSERT_EQ(kte::SwapManager::ReplayFile(b, swap_path, err), false);
+	ASSERT_TRUE(err.find("DEL payload too short") != std::string::npos);
+
+	// Verify first INS was applied before failure
+	auto view = b.GetLineView(0);
+	std::string line(view.data(), view.size());
+	ASSERT_TRUE(line.find('X') != std::string::npos);
+
+	std::remove(path.c_str());
+	std::remove(swap_path.c_str());
+}
+
+
+TEST (SwapEdge_MixedRecords_MultipleValidOneInvalid)
+{
+	const std::string path      = "./.kte_ut_edge_mixed2.txt";
+	const std::string swap_path = "./.kte_ut_edge_mixed2.swp";
+	std::remove(path.c_str());
+	std::remove(swap_path.c_str());
+
+	write_file_bytes(path, "ab\n");
+
+	// First record: valid INS at (0,0)
+	std::vector<std::uint8_t> payload1;
+	payload1.push_back(1);
+	put_u32_le(payload1, 0);
+	put_u32_le(payload1, 0);
+	put_u32_le(payload1, 1);
+	payload1.push_back('X');
+	std::string rec1 = build_swap_record(static_cast<std::uint8_t>(kte::SwapRecType::INS), payload1);
+
+	// Second record: valid INS at (0,1)
+	std::vector<std::uint8_t> payload2;
+	payload2.push_back(1);
+	put_u32_le(payload2, 0);
+	put_u32_le(payload2, 1);
+	put_u32_le(payload2, 1);
+	payload2.push_back('Y');
+	std::string rec2 = build_swap_record(static_cast<std::uint8_t>(kte::SwapRecType::INS), payload2);
+
+	// Third record: truncated SPLIT
+	std::vector<std::uint8_t> payload3;
+	payload3.push_back(1); // encver only
+	std::string rec3 = build_swap_record(static_cast<std::uint8_t>(kte::SwapRecType::SPLIT), payload3);
+
+	std::string file = build_swap_file({rec1, rec2, rec3});
+	write_file_bytes(swap_path, file);
+
+	Buffer b;
+	std::string err;
+	ASSERT_TRUE(b.OpenFromFile(path, err));
+	ASSERT_EQ(kte::SwapManager::ReplayFile(b, swap_path, err), false);
+	ASSERT_TRUE(err.find("SPLIT payload too short") != std::string::npos);
+
+	// Verify first two INS were applied
+	auto view = b.GetLineView(0);
+	std::string line(view.data(), view.size());
+	ASSERT_TRUE(line.find('X') != std::string::npos);
+	ASSERT_TRUE(line.find('Y') != std::string::npos);
+
+	std::remove(path.c_str());
+	std::remove(swap_path.c_str());
+}
+
+
+//=============================================================================
+// 7. EMPTY PAYLOAD TEST
+//=============================================================================
+
+TEST (SwapEdge_EmptyPayload_INS)
+{
+	const std::string path      = "./.kte_ut_edge_empty.txt";
+	const std::string swap_path = "./.kte_ut_edge_empty.swp";
+	std::remove(path.c_str());
+	std::remove(swap_path.c_str());
+
+	write_file_bytes(path, "hello\n");
+
+	// INS record with zero-length payload
+	std::vector<std::uint8_t> payload; // empty
+
+	std::string rec  = build_swap_record(static_cast<std::uint8_t>(kte::SwapRecType::INS), payload);
+	std::string file = build_swap_file({rec});
+	write_file_bytes(swap_path, file);
+
+	Buffer b;
+	std::string err;
+	ASSERT_TRUE(b.OpenFromFile(path, err));
+	ASSERT_EQ(kte::SwapManager::ReplayFile(b, swap_path, err), false);
+	ASSERT_TRUE(err.find("INS payload too short") != std::string::npos);
+
+	std::remove(path.c_str());
+	std::remove(swap_path.c_str());
+}
+
+
+//=============================================================================
+// 8. CRC MISMATCH TEST
+//=============================================================================
+
+TEST (SwapEdge_ValidStructure_BadCRC)
+{
+	const std::string path      = "./.kte_ut_edge_badcrc.txt";
+	const std::string swap_path = "./.kte_ut_edge_badcrc.swp";
+	std::remove(path.c_str());
+	std::remove(swap_path.c_str());
+
+	write_file_bytes(path, "hello\n");
+
+	// Build a valid INS record
+	std::vector<std::uint8_t> payload;
+	payload.push_back(1);
+	put_u32_le(payload, 0);
+	put_u32_le(payload, 0);
+	put_u32_le(payload, 1);
+	payload.push_back('X');
+
+	std::string rec = build_swap_record(static_cast<std::uint8_t>(kte::SwapRecType::INS), payload);
+
+	// Corrupt the CRC (last 4 bytes)
+	rec[rec.size() - 1] ^= 0xFF;
+
+	std::string file = build_swap_file({rec});
+	write_file_bytes(swap_path, file);
+
+	Buffer b;
+	std::string err;
+	ASSERT_TRUE(b.OpenFromFile(path, err));
+	ASSERT_EQ(kte::SwapManager::ReplayFile(b, swap_path, err), false);
+	ASSERT_TRUE(err.find("CRC mismatch") != std::string::npos);
+
+	std::remove(path.c_str());
+	std::remove(swap_path.c_str());
+}
Author	SHA1	Message	Date
Kyle Isom	690c51b0f3	MacOS: remove static linking. Bump minor version.	2026-02-19 21:00:29 -08:00
Kyle Isom	0d87bc0b25	Introduce error recovery mechanisms with retry logic and circuit breaker integration. - Added `ErrorRecovery.cc` and `ErrorRecovery.h` for retry and circuit breaker implementations. - Enhanced swap file handling with transient error retries and exponential backoff (e.g., ENOSPC, EDQUOT). - Integrated circuit breaker into SwapManager to gracefully handle repeated failures, prevent system overload, and enable automatic recovery. - Updated `DEVELOPER_GUIDE.md` with comprehensive documentation on error recovery patterns and graceful degradation strategies. - Refined fsync, temp file creation, and swap file logic with retry-on-failure mechanisms for improved resilience.	2026-02-17 21:38:40 -08:00
Kyle Isom	daeeecb342	Standardize error handling patterns and improve ErrorHandler integration. - Added a comprehensive error propagation standardization report detailing dominant patterns, inconsistencies, and recommended remediations (`docs/audits/error-propagation-standardization.md`). - Integrated `ErrorHandler` into key components, including `main.cc` for robust exception reporting, and added centralized logging to a user state path. - Introduced EINTR-safe syscall wrappers (`SyscallWrappers.h`, `.cc`) to improve resilience of file and metadata operations. - Enhanced `DEVELOPER_GUIDE.md` with an error handling conventions section, covering pattern guidelines and best practices. - Identified gaps in `PieceTable` and internal helpers; deferred fixes with detailed recommendations for improved memory allocation error reporting.	2026-02-17 21:25:19 -08:00
Kyle Isom	a428b204a0	Improve exception robustness. - Introduced `test_swap_edge_cases.cc` with extensive tests for minimum payload sizes, truncated payloads, data overflows, unsupported encoding versions, CRC mismatches, and mixed valid/invalid records to ensure reliability under complex scenarios. - Enhanced `main.cc` with a top-level exception handler to prevent data loss and ensure cleanup during unexpected failures.	2026-02-17 20:12:09 -08:00
Kyle Isom	a21409e689	Remove PID from unnamed buffer swap names.	2026-02-17 17:17:55 -08:00