From 389dcf9cc7f789916e4e9767e6d975db3e772e82 Mon Sep 17 00:00:00 2001 From: Kyle Isom Date: Wed, 3 Dec 2025 13:53:24 -0800 Subject: [PATCH] Add new benchmarks, optimized search, UndoNode pool, and fix horizontal scrolling. - Added benchmarking for GapBuffer and PieceTable (BufferBench, PerformanceSuite). - Implemented `OptimizedSearch` using Boyer-Moore (bad character heuristic). - Introduced `UndoNodePool` for efficient memory management. - Fixed horizontal scrolling and cursor placement in GUI: ensured cursor visibility and improved accuracy for rendered columns. --- GUIRenderer.cc | 93 ++++++----- OptimizedSearch.cc | 79 +++++++++ OptimizedSearch.h | 27 ++++ UndoNodePool.h | 65 ++++++++ bench/BufferBench.cc | 206 ++++++++++++++++++++++++ bench/PerformanceSuite.cc | 318 +++++++++++++++++++++++++++++++++++++ test_buffer_correctness.cc | 102 ++++++++++++ test_search_correctness.cc | 74 +++++++++ 8 files changed, 916 insertions(+), 48 deletions(-) create mode 100644 OptimizedSearch.cc create mode 100644 OptimizedSearch.h create mode 100644 UndoNodePool.h create mode 100644 bench/BufferBench.cc create mode 100644 bench/PerformanceSuite.cc create mode 100644 test_buffer_correctness.cc create mode 100644 test_search_correctness.cc diff --git a/GUIRenderer.cc b/GUIRenderer.cc index 1694e20..a0dacf7 100644 --- a/GUIRenderer.cc +++ b/GUIRenderer.cc @@ -140,8 +140,7 @@ GUIRenderer::Draw(Editor &ed) prev_buf_coloffs = buf_coloffs; // Synchronize cursor and scrolling. - // Ensure the cursor is visible even on the first frame or when it didn't move, - // unless we already forced scrolling from Buffer::Rowoffs this frame. + // Ensure the cursor is visible even on the first frame or when it didn't move. { // Compute visible row range using the child window height float child_h = ImGui::GetWindowHeight(); @@ -151,56 +150,54 @@ GUIRenderer::Draw(Editor &ed) vis_rows = 1; long last_row = first_row + vis_rows - 1; - if (!forced_scroll) { - long cyr = static_cast(cy); - if (cyr < first_row || cyr > last_row) { - float target = (static_cast(cyr) - std::max(0L, vis_rows / 2)) * row_h; - float max_y = ImGui::GetScrollMaxY(); - if (target < 0.f) - target = 0.f; - if (max_y >= 0.f && target > max_y) - target = max_y; - ImGui::SetScrollY(target); - // refresh local variables - scroll_y = ImGui::GetScrollY(); - first_row = static_cast(scroll_y / row_h); - last_row = first_row + vis_rows - 1; - } + long cyr = static_cast(cy); + if (cyr < first_row || cyr > last_row) { + float target = (static_cast(cyr) - std::max(0L, vis_rows / 2)) * row_h; + float max_y = ImGui::GetScrollMaxY(); + if (target < 0.f) + target = 0.f; + if (max_y >= 0.f && target > max_y) + target = max_y; + ImGui::SetScrollY(target); + // refresh local variables + scroll_y = ImGui::GetScrollY(); + first_row = static_cast(scroll_y / row_h); + last_row = first_row + vis_rows - 1; + } - // Horizontal scroll: ensure cursor column is visible - float child_w = ImGui::GetWindowWidth(); - long vis_cols = static_cast(child_w / space_w); - if (vis_cols < 1) - vis_cols = 1; - long first_col = static_cast(scroll_x / space_w); - long last_col = first_col + vis_cols - 1; + // Horizontal scroll: ensure cursor column is visible + float child_w = ImGui::GetWindowWidth(); + long vis_cols = static_cast(child_w / space_w); + if (vis_cols < 1) + vis_cols = 1; + long first_col = static_cast(scroll_x / space_w); + long last_col = first_col + vis_cols - 1; - // Compute cursor's rendered X position (accounting for tabs) - std::size_t cursor_rx = 0; - if (cy < lines.size()) { - std::string cur_line = static_cast(lines[cy]); - const std::size_t tabw = 8; - for (std::size_t i = 0; i < cx && i < cur_line.size(); ++i) { - if (cur_line[i] == '\t') { - cursor_rx += tabw - (cursor_rx % tabw); - } else { - cursor_rx += 1; - } + // Compute cursor's rendered X position (accounting for tabs) + std::size_t cursor_rx = 0; + if (cy < lines.size()) { + std::string cur_line = static_cast(lines[cy]); + const std::size_t tabw = 8; + for (std::size_t i = 0; i < cx && i < cur_line.size(); ++i) { + if (cur_line[i] == '\t') { + cursor_rx += tabw - (cursor_rx % tabw); + } else { + cursor_rx += 1; } } - long cxr = static_cast(cursor_rx); - if (cxr < first_col || cxr > last_col) { - float target_x = static_cast(cxr) * space_w; - // Center horizontally if possible - target_x -= (child_w / 2.0f); - if (target_x < 0.f) - target_x = 0.f; - float max_x = ImGui::GetScrollMaxX(); - if (max_x >= 0.f && target_x > max_x) - target_x = max_x; - ImGui::SetScrollX(target_x); - scroll_x = ImGui::GetScrollX(); - } + } + long cxr = static_cast(cursor_rx); + if (cxr < first_col || cxr > last_col) { + float target_x = static_cast(cxr) * space_w; + // Center horizontally if possible + target_x -= (child_w / 2.0f); + if (target_x < 0.f) + target_x = 0.f; + float max_x = ImGui::GetScrollMaxX(); + if (max_x >= 0.f && target_x > max_x) + target_x = max_x; + ImGui::SetScrollX(target_x); + scroll_x = ImGui::GetScrollX(); } // Phase 3: prefetch visible viewport highlights and warm around in background if (buf->SyntaxEnabled() && buf->Highlighter() && buf->Highlighter()->HasHighlighter()) { diff --git a/OptimizedSearch.cc b/OptimizedSearch.cc new file mode 100644 index 0000000..4ae71a6 --- /dev/null +++ b/OptimizedSearch.cc @@ -0,0 +1,79 @@ +#include "OptimizedSearch.h" + +#include + + +void +OptimizedSearch::build_bad_char(const std::string &pattern) +{ + if (pattern == last_pat_) + return; + last_pat_ = pattern; + std::fill(bad_char_.begin(), bad_char_.end(), -1); + for (std::size_t i = 0; i < pattern.size(); ++i) { + bad_char_[static_cast(pattern[i])] = static_cast(i); + } +} + + +std::size_t +OptimizedSearch::find_first(const std::string &text, const std::string &pattern, std::size_t start) +{ + const std::size_t n = text.size(); + const std::size_t m = pattern.size(); + if (m == 0) + return start <= n ? start : std::string::npos; + if (m > n || start >= n) + return std::string::npos; + build_bad_char(pattern); + std::size_t s = start; + while (s <= n - m) { + std::size_t j = m; + while (j > 0 && pattern[j - 1] == text[s + j - 1]) { + --j; + } + if (j == 0) { + return s; // match found + } + unsigned char badc = static_cast(text[s + j - 1]); + int bcidx = bad_char_[badc]; + std::size_t shift = (j - 1 > static_cast(bcidx)) + ? (j - 1 - static_cast(bcidx)) + : 1; + s += shift; + } + return std::string::npos; +} + + +std::vector +OptimizedSearch::find_all(const std::string &text, const std::string &pattern, std::size_t start) +{ + std::vector res; + const std::size_t n = text.size(); + const std::size_t m = pattern.size(); + if (m == 0) + return res; + if (m > n || start >= n) + return res; + build_bad_char(pattern); + std::size_t s = start; + while (s <= n - m) { + std::size_t j = m; + while (j > 0 && pattern[j - 1] == text[s + j - 1]) { + --j; + } + if (j == 0) { + res.push_back(s); + s += m; // non-overlapping + continue; + } + unsigned char badc = static_cast(text[s + j - 1]); + int bcidx = bad_char_[badc]; + std::size_t shift = (j - 1 > static_cast(bcidx)) + ? (j - 1 - static_cast(bcidx)) + : 1; + s += shift; + } + return res; +} diff --git a/OptimizedSearch.h b/OptimizedSearch.h new file mode 100644 index 0000000..3893491 --- /dev/null +++ b/OptimizedSearch.h @@ -0,0 +1,27 @@ +// OptimizedSearch.h - Boyer–Moore (bad character) based substring search +#ifndef KTE_OPTIMIZED_SEARCH_H +#define KTE_OPTIMIZED_SEARCH_H + +#include +#include +#include +#include + +class OptimizedSearch { +public: + OptimizedSearch() = default; + + // Find first occurrence at or after start. Returns npos if not found. + std::size_t find_first(const std::string &text, const std::string &pattern, std::size_t start = 0); + + // Find all non-overlapping matches at or after start. Returns starting indices. + std::vector find_all(const std::string &text, const std::string &pattern, std::size_t start = 0); + +private: + std::array bad_char_{}; + std::string last_pat_; + + void build_bad_char(const std::string &pattern); +}; + +#endif // KTE_OPTIMIZED_SEARCH_H \ No newline at end of file diff --git a/UndoNodePool.h b/UndoNodePool.h new file mode 100644 index 0000000..f6409d1 --- /dev/null +++ b/UndoNodePool.h @@ -0,0 +1,65 @@ +#ifndef KTE_UNDONODEPOOL_H +#define KTE_UNDONODEPOOL_H + +#include +#include +#include +#include "UndoNode.h" + +// Pool allocator for UndoNode to eliminate frequent malloc/free. +// Uses fixed-size blocks to keep node addresses stable. +class UndoNodePool { +public: + explicit UndoNodePool(std::size_t block_size = 64) + : block_size_(block_size) {} + + + UndoNode *acquire() + { + if (available_.empty()) + allocate_block(); + auto *node = available_.top(); + available_.pop(); + // Node comes zeroed; ensure links are reset + node->text.clear(); + node->child = nullptr; + node->next = nullptr; + node->row = node->col = 0; + node->type = UndoType{}; + return node; + } + + + void release(UndoNode *node) + { + if (!node) + return; + // Clear heavy fields to free memory held by strings + node->text.clear(); + node->child = nullptr; + node->next = nullptr; + node->row = node->col = 0; + node->type = UndoType{}; + available_.push(node); + } + +private: + void allocate_block() + { + // allocate a new block; keep ownership so memory stays valid + std::unique_ptr block(new UndoNode[block_size_]); + UndoNode *base = block.get(); + blocks_.push_back(std::move(block)); + for (std::size_t i = 0; i < block_size_; ++i) { + // ensure the node is reset; rely on default constructor/zero init + available_.push(&base[i]); + } + } + + + std::size_t block_size_; + std::vector > blocks_; + std::stack available_; +}; + +#endif // KTE_UNDONODEPOOL_H \ No newline at end of file diff --git a/bench/BufferBench.cc b/bench/BufferBench.cc new file mode 100644 index 0000000..10e3737 --- /dev/null +++ b/bench/BufferBench.cc @@ -0,0 +1,206 @@ +/* + * BufferBench.cc - microbenchmarks for GapBuffer and PieceTable + * + * This benchmark exercises the public APIs shared by both structures as used + * in Buffer::Line: Reserve, AppendChar, Append, PrependChar, Prepend, Clear. + * + * Run examples: + * ./kte_bench_buffer # defaults + * ./kte_bench_buffer 200000 8 4096 # N=200k, rounds=8, chunk=4096 + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "GapBuffer.h" +#include "PieceTable.h" + +using clock_t = std::chrono::steady_clock; +using us = std::chrono::microseconds; + +struct Result { + std::string name; + std::string scenario; + double micros = 0.0; + std::size_t bytes = 0; +}; + + +static void +print_header() +{ + std::cout << std::left << std::setw(14) << "Structure" + << std::left << std::setw(18) << "Scenario" + << std::right << std::setw(12) << "time(us)" + << std::right << std::setw(14) << "bytes" + << std::right << std::setw(14) << "MB/s" + << "\n"; + std::cout << std::string(72, '-') << "\n"; +} + + +static void +print_row(const Result &r) +{ + double mb = r.bytes / (1024.0 * 1024.0); + double mbps = (r.micros > 0.0) ? (mb / (r.micros / 1'000'000.0)) : 0.0; + std::cout << std::left << std::setw(14) << r.name + << std::left << std::setw(18) << r.scenario + << std::right << std::setw(12) << std::fixed << std::setprecision(2) << r.micros + << std::right << std::setw(14) << r.bytes + << std::right << std::setw(14) << std::fixed << std::setprecision(2) << mbps + << "\n"; +} + + +template +Result +bench_sequential_append(std::size_t N, int rounds) +{ + Result r; + r.name = typeid(Buf).name(); + r.scenario = "seq_append"; + const char c = 'x'; + auto start = clock_t::now(); + std::size_t bytes = 0; + for (int t = 0; t < rounds; ++t) { + Buf b; + b.Reserve(N); + for (std::size_t i = 0; i < N; ++i) { + b.AppendChar(c); + } + bytes += N; + } + auto end = clock_t::now(); + r.micros = std::chrono::duration_cast(end - start).count(); + r.bytes = bytes; + return r; +} + + +template +Result +bench_sequential_prepend(std::size_t N, int rounds) +{ + Result r; + r.name = typeid(Buf).name(); + r.scenario = "seq_prepend"; + const char c = 'x'; + auto start = clock_t::now(); + std::size_t bytes = 0; + for (int t = 0; t < rounds; ++t) { + Buf b; + b.Reserve(N); + for (std::size_t i = 0; i < N; ++i) { + b.PrependChar(c); + } + bytes += N; + } + auto end = clock_t::now(); + r.micros = std::chrono::duration_cast(end - start).count(); + r.bytes = bytes; + return r; +} + + +template +Result +bench_chunk_append(std::size_t N, std::size_t chunk, int rounds) +{ + Result r; + r.name = typeid(Buf).name(); + r.scenario = "chunk_append"; + std::string payload(chunk, 'y'); + auto start = clock_t::now(); + std::size_t bytes = 0; + for (int t = 0; t < rounds; ++t) { + Buf b; + b.Reserve(N); + std::size_t written = 0; + while (written < N) { + std::size_t now = std::min(chunk, N - written); + b.Append(payload.data(), now); + written += now; + } + bytes += N; + } + auto end = clock_t::now(); + r.micros = std::chrono::duration_cast(end - start).count(); + r.bytes = bytes; + return r; +} + + +template +Result +bench_mixed(std::size_t N, std::size_t chunk, int rounds) +{ + Result r; + r.name = typeid(Buf).name(); + r.scenario = "mixed"; + std::string payload(chunk, 'z'); + auto start = clock_t::now(); + std::size_t bytes = 0; + for (int t = 0; t < rounds; ++t) { + Buf b; + b.Reserve(N); + std::size_t written = 0; + while (written < N) { + // alternate append/prepend with small chunks + std::size_t now = std::min(chunk, N - written); + if ((written / chunk) % 2 == 0) { + b.Append(payload.data(), now); + } else { + b.Prepend(payload.data(), now); + } + written += now; + } + bytes += N; + } + auto end = clock_t::now(); + r.micros = std::chrono::duration_cast(end - start).count(); + r.bytes = bytes; + return r; +} + + +int +main(int argc, char **argv) +{ + // Parameters + std::size_t N = 100'000; // bytes per round + int rounds = 5; // iterations + std::size_t chunk = 1024; // chunk size for chunked scenarios + if (argc >= 2) + N = static_cast(std::stoull(argv[1])); + if (argc >= 3) + rounds = std::stoi(argv[2]); + if (argc >= 4) + chunk = static_cast(std::stoull(argv[3])); + + std::cout << "KTE Buffer Microbenchmarks" << "\n"; + std::cout << "N=" << N << ", rounds=" << rounds << ", chunk=" << chunk << "\n\n"; + + print_header(); + + // Run for GapBuffer + print_row(bench_sequential_append(N, rounds)); + print_row(bench_sequential_prepend(N, rounds)); + print_row(bench_chunk_append(N, chunk, rounds)); + print_row(bench_mixed(N, chunk, rounds)); + + // Run for PieceTable + print_row(bench_sequential_append(N, rounds)); + print_row(bench_sequential_prepend(N, rounds)); + print_row(bench_chunk_append(N, chunk, rounds)); + print_row(bench_mixed(N, chunk, rounds)); + + return 0; +} \ No newline at end of file diff --git a/bench/PerformanceSuite.cc b/bench/PerformanceSuite.cc new file mode 100644 index 0000000..2517584 --- /dev/null +++ b/bench/PerformanceSuite.cc @@ -0,0 +1,318 @@ +/* + * PerformanceSuite.cc - broader performance and verification benchmarks + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "GapBuffer.h" +#include "PieceTable.h" +#include "OptimizedSearch.h" + +using clock_t = std::chrono::steady_clock; +using us = std::chrono::microseconds; + +namespace { +struct Stat { + double micros{0.0}; + std::size_t bytes{0}; + std::size_t ops{0}; +}; + + +static void +print_header(const std::string &title) +{ + std::cout << "\n" << title << "\n"; + std::cout << std::left << std::setw(18) << "Case" + << std::left << std::setw(18) << "Type" + << std::right << std::setw(12) << "time(us)" + << std::right << std::setw(14) << "bytes" + << std::right << std::setw(14) << "ops/s" + << std::right << std::setw(14) << "MB/s" + << "\n"; + std::cout << std::string(90, '-') << "\n"; +} + + +static void +print_row(const std::string &caseName, const std::string &typeName, const Stat &s) +{ + double mb = s.bytes / (1024.0 * 1024.0); + double sec = s.micros / 1'000'000.0; + double mbps = sec > 0 ? (mb / sec) : 0.0; + double opss = sec > 0 ? (static_cast(s.ops) / sec) : 0.0; + std::cout << std::left << std::setw(18) << caseName + << std::left << std::setw(18) << typeName + << std::right << std::setw(12) << std::fixed << std::setprecision(2) << s.micros + << std::right << std::setw(14) << s.bytes + << std::right << std::setw(14) << std::fixed << std::setprecision(2) << opss + << std::right << std::setw(14) << std::fixed << std::setprecision(2) << mbps + << "\n"; +} +} // namespace + +class PerformanceSuite { +public: + void benchmarkBufferOperations(std::size_t N, int rounds, std::size_t chunk) + { + print_header("Buffer Operations"); + run_buffer_case("append_char", N, rounds, chunk, [&](auto &b, std::size_t count) { + for (std::size_t i = 0; i < count; ++i) + b.AppendChar('a'); + }); + run_buffer_case("prepend_char", N, rounds, chunk, [&](auto &b, std::size_t count) { + for (std::size_t i = 0; i < count; ++i) + b.PrependChar('a'); + }); + run_buffer_case("chunk_mix", N, rounds, chunk, [&](auto &b, std::size_t) { + std::string payload(chunk, 'x'); + std::size_t written = 0; + while (written < N) { + std::size_t now = std::min(chunk, N - written); + if (((written / chunk) & 1) == 0) + b.Append(payload.data(), now); + else + b.Prepend(payload.data(), now); + written += now; + } + }); + run_buffer_case("append_char", N, rounds, chunk, [&](auto &b, std::size_t count) { + for (std::size_t i = 0; i < count; ++i) + b.AppendChar('a'); + }); + run_buffer_case("prepend_char", N, rounds, chunk, [&](auto &b, std::size_t count) { + for (std::size_t i = 0; i < count; ++i) + b.PrependChar('a'); + }); + run_buffer_case("chunk_mix", N, rounds, chunk, [&](auto &b, std::size_t) { + std::string payload(chunk, 'x'); + std::size_t written = 0; + while (written < N) { + std::size_t now = std::min(chunk, N - written); + if (((written / chunk) & 1) == 0) + b.Append(payload.data(), now); + else + b.Prepend(payload.data(), now); + written += now; + } + }); + } + + + void benchmarkSearchOperations(std::size_t textLen, std::size_t patLen, int rounds) + { + print_header("Search Operations"); + std::mt19937_64 rng(0xC0FFEE); + std::uniform_int_distribution dist('a', 'z'); + std::string text(textLen, '\0'); + for (auto &ch: text) + ch = static_cast(dist(rng)); + std::string pattern(patLen, '\0'); + for (auto &ch: pattern) + ch = static_cast(dist(rng)); + + // Ensure at least one hit + if (textLen >= patLen && patLen > 0) { + std::size_t pos = textLen / 2; + std::memcpy(&text[pos], pattern.data(), patLen); + } + + // OptimizedSearch find_all vs std::string reference + OptimizedSearch os; + Stat s{}; + auto start = clock_t::now(); + std::size_t matches = 0; + std::size_t bytesScanned = 0; + for (int r = 0; r < rounds; ++r) { + auto hits = os.find_all(text, pattern, 0); + matches += hits.size(); + bytesScanned += text.size(); + // Verify with reference + std::vector ref; + std::size_t from = 0; + while (true) { + auto p = text.find(pattern, from); + if (p == std::string::npos) + break; + ref.push_back(p); + from = p + (patLen ? patLen : 1); + } + assert(ref == hits); + } + auto end = clock_t::now(); + s.micros = std::chrono::duration_cast(end - start).count(); + s.bytes = bytesScanned; + s.ops = matches; + print_row("find_all", "OptimizedSearch", s); + } + + + void benchmarkMemoryAllocation(std::size_t N, int rounds) + { + print_header("Memory Allocation (allocations during editing)"); + // Measure number of allocations by simulating editing patterns. + auto run_session = [&](auto &&buffer) { + // alternate small appends and prepends + const std::size_t chunk = 32; + std::string payload(chunk, 'q'); + for (int r = 0; r < rounds; ++r) { + buffer.Clear(); + for (std::size_t i = 0; i < N; i += chunk) + buffer.Append(payload.data(), std::min(chunk, N - i)); + for (std::size_t i = 0; i < N / 2; i += chunk) + buffer.Prepend(payload.data(), std::min(chunk, N / 2 - i)); + } + }; + + // Local allocation counters for this TU via overriding operators + reset_alloc_counters(); + GapBuffer gb; + run_session(gb); + auto gap_allocs = current_allocs(); + print_row("edit_session", "GapBuffer", Stat{ + 0.0, static_cast(gap_allocs.bytes), + static_cast(gap_allocs.count) + }); + + reset_alloc_counters(); + PieceTable pt; + run_session(pt); + auto pt_allocs = current_allocs(); + print_row("edit_session", "PieceTable", Stat{ + 0.0, static_cast(pt_allocs.bytes), + static_cast(pt_allocs.count) + }); + } + +private: + template + void run_buffer_case(const std::string &caseName, std::size_t N, int rounds, std::size_t chunk, Fn fn) + { + Stat s{}; + auto start = clock_t::now(); + std::size_t bytes = 0; + std::size_t ops = 0; + for (int t = 0; t < rounds; ++t) { + Buf b; + b.Reserve(N); + fn(b, N); + // compare to reference string where possible (only for append_char/prepend_char) + bytes += N; + ops += N / (chunk ? chunk : 1); + } + auto end = clock_t::now(); + s.micros = std::chrono::duration_cast(end - start).count(); + s.bytes = bytes; + s.ops = ops; + print_row(caseName, typeid(Buf).name(), s); + } + + + // Simple global allocation tracking for this TU + struct AllocStats { + std::uint64_t count{0}; + std::uint64_t bytes{0}; + }; + + + static AllocStats &alloc_stats() + { + static AllocStats s; + return s; + } + + + static void reset_alloc_counters() + { + alloc_stats() = {}; + } + + + static AllocStats current_allocs() + { + return alloc_stats(); + } + + + // Friend global new/delete defined below + friend void *operator new(std::size_t sz) noexcept(false); + + friend void operator delete(void *p) noexcept; + + friend void *operator new[](std::size_t sz) noexcept(false); + + friend void operator delete[](void *p) noexcept; +}; + +// Override new/delete only in this translation unit to track allocations made here +void * +operator new(std::size_t sz) noexcept(false) +{ + auto &s = PerformanceSuite::alloc_stats(); + s.count++; + s.bytes += sz; + if (void *p = std::malloc(sz)) + return p; + throw std::bad_alloc(); +} + + +void +operator delete(void *p) noexcept +{ + std::free(p); +} + + +void * +operator new[](std::size_t sz) noexcept(false) +{ + auto &s = PerformanceSuite::alloc_stats(); + s.count++; + s.bytes += sz; + if (void *p = std::malloc(sz)) + return p; + throw std::bad_alloc(); +} + + +void +operator delete[](void *p) noexcept +{ + std::free(p); +} + + +int +main(int argc, char **argv) +{ + std::size_t N = 200'000; // bytes per round for buffer cases + int rounds = 3; + std::size_t chunk = 1024; + if (argc >= 2) + N = static_cast(std::stoull(argv[1])); + if (argc >= 3) + rounds = std::stoi(argv[2]); + if (argc >= 4) + chunk = static_cast(std::stoull(argv[3])); + + std::cout << "KTE Performance Suite" << "\n"; + std::cout << "N=" << N << ", rounds=" << rounds << ", chunk=" << chunk << "\n"; + + PerformanceSuite suite; + suite.benchmarkBufferOperations(N, rounds, chunk); + suite.benchmarkSearchOperations(1'000'000, 16, rounds); + suite.benchmarkMemoryAllocation(N, rounds); + return 0; +} \ No newline at end of file diff --git a/test_buffer_correctness.cc b/test_buffer_correctness.cc new file mode 100644 index 0000000..c2937c0 --- /dev/null +++ b/test_buffer_correctness.cc @@ -0,0 +1,102 @@ +// Simple buffer correctness tests comparing GapBuffer and PieceTable to std::string +#include +#include +#include +#include +#include +#include + +#include "GapBuffer.h" +#include "PieceTable.h" + + +template +static void +check_equals(const Buf &b, const std::string &ref) +{ + assert(b.Size() == ref.size()); + if (b.Size() == 0) + return; + const char *p = b.Data(); + assert(p != nullptr); + assert(std::memcmp(p, ref.data(), ref.size()) == 0); +} + + +template +static void +run_basic_cases() +{ + // empty + { + Buf b; + std::string ref; + check_equals(b, ref); + } + + // append chars + { + Buf b; + std::string ref; + for (int i = 0; i < 1000; ++i) { + b.AppendChar('a'); + ref.push_back('a'); + } + check_equals(b, ref); + } + + // prepend chars + { + Buf b; + std::string ref; + for (int i = 0; i < 1000; ++i) { + b.PrependChar('b'); + ref.insert(ref.begin(), 'b'); + } + check_equals(b, ref); + } + + // append/prepend strings + { + Buf b; + std::string ref; + const char *hello = "hello"; + b.Append(hello, 5); + ref.append("hello"); + b.Prepend(hello, 5); + ref.insert(0, "hello"); + check_equals(b, ref); + } + + // larger random blocks + { + std::mt19937 rng(42); + std::uniform_int_distribution len_dist(0, 128); + std::uniform_int_distribution coin(0, 1); + Buf b; + std::string ref; + for (int step = 0; step < 2000; ++step) { + int L = len_dist(rng); + std::string payload(L, '\0'); + for (int i = 0; i < L; ++i) + payload[i] = static_cast('a' + (i % 26)); + if (coin(rng)) { + b.Append(payload.data(), payload.size()); + ref.append(payload); + } else { + b.Prepend(payload.data(), payload.size()); + ref.insert(0, payload); + } + } + check_equals(b, ref); + } +} + + +int +main() +{ + run_basic_cases(); + run_basic_cases(); + return 0; +} \ No newline at end of file diff --git a/test_search_correctness.cc b/test_search_correctness.cc new file mode 100644 index 0000000..9efb283 --- /dev/null +++ b/test_search_correctness.cc @@ -0,0 +1,74 @@ +// Verify OptimizedSearch against std::string reference across patterns and sizes +#include +#include +#include +#include +#include + +#include "OptimizedSearch.h" + + +static std::vector +ref_find_all(const std::string &text, const std::string &pat) +{ + std::vector res; + if (pat.empty()) + return res; + std::size_t from = 0; + while (true) { + auto p = text.find(pat, from); + if (p == std::string::npos) + break; + res.push_back(p); + from = p + pat.size(); // non-overlapping + } + return res; +} + + +static void +run_case(std::size_t textLen, std::size_t patLen, unsigned seed) +{ + std::mt19937 rng(seed); + std::uniform_int_distribution dist('a', 'z'); + std::string text(textLen, '\0'); + for (auto &ch: text) + ch = static_cast(dist(rng)); + std::string pat(patLen, '\0'); + for (auto &ch: pat) + ch = static_cast(dist(rng)); + + // Guarantee at least one match when possible + if (textLen >= patLen && patLen > 0) { + std::size_t pos = textLen / 3; + if (pos + patLen <= text.size()) + std::copy(pat.begin(), pat.end(), text.begin() + static_cast(pos)); + } + + OptimizedSearch os; + auto got = os.find_all(text, pat, 0); + auto ref = ref_find_all(text, pat); + assert(got == ref); +} + + +int +main() +{ + // Edge cases + run_case(0, 0, 1); + run_case(0, 1, 2); + run_case(1, 0, 3); + run_case(1, 1, 4); + + // Various sizes + for (std::size_t t = 128; t <= 4096; t *= 2) { + for (std::size_t p = 1; p <= 64; p *= 2) { + run_case(t, p, static_cast(t + p)); + } + } + // Larger random + run_case(100000, 16, 12345); + run_case(250000, 32, 67890); + return 0; +} \ No newline at end of file