Add new benchmarks, optimized search, UndoNode pool, and fix horizontal scrolling.

- Added benchmarking for GapBuffer and PieceTable (BufferBench, PerformanceSuite).
- Implemented `OptimizedSearch` using Boyer-Moore (bad character heuristic).
- Introduced `UndoNodePool` for efficient memory management.
- Fixed horizontal scrolling and cursor placement in GUI: ensured cursor visibility and improved accuracy for rendered columns.
This commit is contained in:
2025-12-03 13:53:24 -08:00
parent c98d9e717a
commit 389dcf9cc7
8 changed files with 916 additions and 48 deletions

View File

@@ -140,8 +140,7 @@ GUIRenderer::Draw(Editor &ed)
prev_buf_coloffs = buf_coloffs;
// Synchronize cursor and scrolling.
// Ensure the cursor is visible even on the first frame or when it didn't move,
// unless we already forced scrolling from Buffer::Rowoffs this frame.
// Ensure the cursor is visible even on the first frame or when it didn't move.
{
// Compute visible row range using the child window height
float child_h = ImGui::GetWindowHeight();
@@ -151,56 +150,54 @@ GUIRenderer::Draw(Editor &ed)
vis_rows = 1;
long last_row = first_row + vis_rows - 1;
if (!forced_scroll) {
long cyr = static_cast<long>(cy);
if (cyr < first_row || cyr > last_row) {
float target = (static_cast<float>(cyr) - std::max(0L, vis_rows / 2)) * row_h;
float max_y = ImGui::GetScrollMaxY();
if (target < 0.f)
target = 0.f;
if (max_y >= 0.f && target > max_y)
target = max_y;
ImGui::SetScrollY(target);
// refresh local variables
scroll_y = ImGui::GetScrollY();
first_row = static_cast<long>(scroll_y / row_h);
last_row = first_row + vis_rows - 1;
}
long cyr = static_cast<long>(cy);
if (cyr < first_row || cyr > last_row) {
float target = (static_cast<float>(cyr) - std::max(0L, vis_rows / 2)) * row_h;
float max_y = ImGui::GetScrollMaxY();
if (target < 0.f)
target = 0.f;
if (max_y >= 0.f && target > max_y)
target = max_y;
ImGui::SetScrollY(target);
// refresh local variables
scroll_y = ImGui::GetScrollY();
first_row = static_cast<long>(scroll_y / row_h);
last_row = first_row + vis_rows - 1;
}
// Horizontal scroll: ensure cursor column is visible
float child_w = ImGui::GetWindowWidth();
long vis_cols = static_cast<long>(child_w / space_w);
if (vis_cols < 1)
vis_cols = 1;
long first_col = static_cast<long>(scroll_x / space_w);
long last_col = first_col + vis_cols - 1;
// Horizontal scroll: ensure cursor column is visible
float child_w = ImGui::GetWindowWidth();
long vis_cols = static_cast<long>(child_w / space_w);
if (vis_cols < 1)
vis_cols = 1;
long first_col = static_cast<long>(scroll_x / space_w);
long last_col = first_col + vis_cols - 1;
// Compute cursor's rendered X position (accounting for tabs)
std::size_t cursor_rx = 0;
if (cy < lines.size()) {
std::string cur_line = static_cast<std::string>(lines[cy]);
const std::size_t tabw = 8;
for (std::size_t i = 0; i < cx && i < cur_line.size(); ++i) {
if (cur_line[i] == '\t') {
cursor_rx += tabw - (cursor_rx % tabw);
} else {
cursor_rx += 1;
}
// Compute cursor's rendered X position (accounting for tabs)
std::size_t cursor_rx = 0;
if (cy < lines.size()) {
std::string cur_line = static_cast<std::string>(lines[cy]);
const std::size_t tabw = 8;
for (std::size_t i = 0; i < cx && i < cur_line.size(); ++i) {
if (cur_line[i] == '\t') {
cursor_rx += tabw - (cursor_rx % tabw);
} else {
cursor_rx += 1;
}
}
long cxr = static_cast<long>(cursor_rx);
if (cxr < first_col || cxr > last_col) {
float target_x = static_cast<float>(cxr) * space_w;
// Center horizontally if possible
target_x -= (child_w / 2.0f);
if (target_x < 0.f)
target_x = 0.f;
float max_x = ImGui::GetScrollMaxX();
if (max_x >= 0.f && target_x > max_x)
target_x = max_x;
ImGui::SetScrollX(target_x);
scroll_x = ImGui::GetScrollX();
}
}
long cxr = static_cast<long>(cursor_rx);
if (cxr < first_col || cxr > last_col) {
float target_x = static_cast<float>(cxr) * space_w;
// Center horizontally if possible
target_x -= (child_w / 2.0f);
if (target_x < 0.f)
target_x = 0.f;
float max_x = ImGui::GetScrollMaxX();
if (max_x >= 0.f && target_x > max_x)
target_x = max_x;
ImGui::SetScrollX(target_x);
scroll_x = ImGui::GetScrollX();
}
// Phase 3: prefetch visible viewport highlights and warm around in background
if (buf->SyntaxEnabled() && buf->Highlighter() && buf->Highlighter()->HasHighlighter()) {

79
OptimizedSearch.cc Normal file
View File

@@ -0,0 +1,79 @@
#include "OptimizedSearch.h"
#include <algorithm>
void
OptimizedSearch::build_bad_char(const std::string &pattern)
{
if (pattern == last_pat_)
return;
last_pat_ = pattern;
std::fill(bad_char_.begin(), bad_char_.end(), -1);
for (std::size_t i = 0; i < pattern.size(); ++i) {
bad_char_[static_cast<unsigned char>(pattern[i])] = static_cast<int>(i);
}
}
std::size_t
OptimizedSearch::find_first(const std::string &text, const std::string &pattern, std::size_t start)
{
const std::size_t n = text.size();
const std::size_t m = pattern.size();
if (m == 0)
return start <= n ? start : std::string::npos;
if (m > n || start >= n)
return std::string::npos;
build_bad_char(pattern);
std::size_t s = start;
while (s <= n - m) {
std::size_t j = m;
while (j > 0 && pattern[j - 1] == text[s + j - 1]) {
--j;
}
if (j == 0) {
return s; // match found
}
unsigned char badc = static_cast<unsigned char>(text[s + j - 1]);
int bcidx = bad_char_[badc];
std::size_t shift = (j - 1 > static_cast<std::size_t>(bcidx))
? (j - 1 - static_cast<std::size_t>(bcidx))
: 1;
s += shift;
}
return std::string::npos;
}
std::vector<std::size_t>
OptimizedSearch::find_all(const std::string &text, const std::string &pattern, std::size_t start)
{
std::vector<std::size_t> res;
const std::size_t n = text.size();
const std::size_t m = pattern.size();
if (m == 0)
return res;
if (m > n || start >= n)
return res;
build_bad_char(pattern);
std::size_t s = start;
while (s <= n - m) {
std::size_t j = m;
while (j > 0 && pattern[j - 1] == text[s + j - 1]) {
--j;
}
if (j == 0) {
res.push_back(s);
s += m; // non-overlapping
continue;
}
unsigned char badc = static_cast<unsigned char>(text[s + j - 1]);
int bcidx = bad_char_[badc];
std::size_t shift = (j - 1 > static_cast<std::size_t>(bcidx))
? (j - 1 - static_cast<std::size_t>(bcidx))
: 1;
s += shift;
}
return res;
}

27
OptimizedSearch.h Normal file
View File

@@ -0,0 +1,27 @@
// OptimizedSearch.h - BoyerMoore (bad character) based substring search
#ifndef KTE_OPTIMIZED_SEARCH_H
#define KTE_OPTIMIZED_SEARCH_H
#include <array>
#include <cstddef>
#include <string>
#include <vector>
class OptimizedSearch {
public:
OptimizedSearch() = default;
// Find first occurrence at or after start. Returns npos if not found.
std::size_t find_first(const std::string &text, const std::string &pattern, std::size_t start = 0);
// Find all non-overlapping matches at or after start. Returns starting indices.
std::vector<std::size_t> find_all(const std::string &text, const std::string &pattern, std::size_t start = 0);
private:
std::array<int, 256> bad_char_{};
std::string last_pat_;
void build_bad_char(const std::string &pattern);
};
#endif // KTE_OPTIMIZED_SEARCH_H

65
UndoNodePool.h Normal file
View File

@@ -0,0 +1,65 @@
#ifndef KTE_UNDONODEPOOL_H
#define KTE_UNDONODEPOOL_H
#include <stack>
#include <vector>
#include <memory>
#include "UndoNode.h"
// Pool allocator for UndoNode to eliminate frequent malloc/free.
// Uses fixed-size blocks to keep node addresses stable.
class UndoNodePool {
public:
explicit UndoNodePool(std::size_t block_size = 64)
: block_size_(block_size) {}
UndoNode *acquire()
{
if (available_.empty())
allocate_block();
auto *node = available_.top();
available_.pop();
// Node comes zeroed; ensure links are reset
node->text.clear();
node->child = nullptr;
node->next = nullptr;
node->row = node->col = 0;
node->type = UndoType{};
return node;
}
void release(UndoNode *node)
{
if (!node)
return;
// Clear heavy fields to free memory held by strings
node->text.clear();
node->child = nullptr;
node->next = nullptr;
node->row = node->col = 0;
node->type = UndoType{};
available_.push(node);
}
private:
void allocate_block()
{
// allocate a new block; keep ownership so memory stays valid
std::unique_ptr<UndoNode[]> block(new UndoNode[block_size_]);
UndoNode *base = block.get();
blocks_.push_back(std::move(block));
for (std::size_t i = 0; i < block_size_; ++i) {
// ensure the node is reset; rely on default constructor/zero init
available_.push(&base[i]);
}
}
std::size_t block_size_;
std::vector<std::unique_ptr<UndoNode[]> > blocks_;
std::stack<UndoNode *> available_;
};
#endif // KTE_UNDONODEPOOL_H

206
bench/BufferBench.cc Normal file
View File

@@ -0,0 +1,206 @@
/*
* BufferBench.cc - microbenchmarks for GapBuffer and PieceTable
*
* This benchmark exercises the public APIs shared by both structures as used
* in Buffer::Line: Reserve, AppendChar, Append, PrependChar, Prepend, Clear.
*
* Run examples:
* ./kte_bench_buffer # defaults
* ./kte_bench_buffer 200000 8 4096 # N=200k, rounds=8, chunk=4096
*/
#include <chrono>
#include <cstdint>
#include <cstring>
#include <iomanip>
#include <iostream>
#include <random>
#include <string>
#include <vector>
#include <typeinfo>
#include "GapBuffer.h"
#include "PieceTable.h"
using clock_t = std::chrono::steady_clock;
using us = std::chrono::microseconds;
struct Result {
std::string name;
std::string scenario;
double micros = 0.0;
std::size_t bytes = 0;
};
static void
print_header()
{
std::cout << std::left << std::setw(14) << "Structure"
<< std::left << std::setw(18) << "Scenario"
<< std::right << std::setw(12) << "time(us)"
<< std::right << std::setw(14) << "bytes"
<< std::right << std::setw(14) << "MB/s"
<< "\n";
std::cout << std::string(72, '-') << "\n";
}
static void
print_row(const Result &r)
{
double mb = r.bytes / (1024.0 * 1024.0);
double mbps = (r.micros > 0.0) ? (mb / (r.micros / 1'000'000.0)) : 0.0;
std::cout << std::left << std::setw(14) << r.name
<< std::left << std::setw(18) << r.scenario
<< std::right << std::setw(12) << std::fixed << std::setprecision(2) << r.micros
<< std::right << std::setw(14) << r.bytes
<< std::right << std::setw(14) << std::fixed << std::setprecision(2) << mbps
<< "\n";
}
template<typename Buf>
Result
bench_sequential_append(std::size_t N, int rounds)
{
Result r;
r.name = typeid(Buf).name();
r.scenario = "seq_append";
const char c = 'x';
auto start = clock_t::now();
std::size_t bytes = 0;
for (int t = 0; t < rounds; ++t) {
Buf b;
b.Reserve(N);
for (std::size_t i = 0; i < N; ++i) {
b.AppendChar(c);
}
bytes += N;
}
auto end = clock_t::now();
r.micros = std::chrono::duration_cast<us>(end - start).count();
r.bytes = bytes;
return r;
}
template<typename Buf>
Result
bench_sequential_prepend(std::size_t N, int rounds)
{
Result r;
r.name = typeid(Buf).name();
r.scenario = "seq_prepend";
const char c = 'x';
auto start = clock_t::now();
std::size_t bytes = 0;
for (int t = 0; t < rounds; ++t) {
Buf b;
b.Reserve(N);
for (std::size_t i = 0; i < N; ++i) {
b.PrependChar(c);
}
bytes += N;
}
auto end = clock_t::now();
r.micros = std::chrono::duration_cast<us>(end - start).count();
r.bytes = bytes;
return r;
}
template<typename Buf>
Result
bench_chunk_append(std::size_t N, std::size_t chunk, int rounds)
{
Result r;
r.name = typeid(Buf).name();
r.scenario = "chunk_append";
std::string payload(chunk, 'y');
auto start = clock_t::now();
std::size_t bytes = 0;
for (int t = 0; t < rounds; ++t) {
Buf b;
b.Reserve(N);
std::size_t written = 0;
while (written < N) {
std::size_t now = std::min(chunk, N - written);
b.Append(payload.data(), now);
written += now;
}
bytes += N;
}
auto end = clock_t::now();
r.micros = std::chrono::duration_cast<us>(end - start).count();
r.bytes = bytes;
return r;
}
template<typename Buf>
Result
bench_mixed(std::size_t N, std::size_t chunk, int rounds)
{
Result r;
r.name = typeid(Buf).name();
r.scenario = "mixed";
std::string payload(chunk, 'z');
auto start = clock_t::now();
std::size_t bytes = 0;
for (int t = 0; t < rounds; ++t) {
Buf b;
b.Reserve(N);
std::size_t written = 0;
while (written < N) {
// alternate append/prepend with small chunks
std::size_t now = std::min(chunk, N - written);
if ((written / chunk) % 2 == 0) {
b.Append(payload.data(), now);
} else {
b.Prepend(payload.data(), now);
}
written += now;
}
bytes += N;
}
auto end = clock_t::now();
r.micros = std::chrono::duration_cast<us>(end - start).count();
r.bytes = bytes;
return r;
}
int
main(int argc, char **argv)
{
// Parameters
std::size_t N = 100'000; // bytes per round
int rounds = 5; // iterations
std::size_t chunk = 1024; // chunk size for chunked scenarios
if (argc >= 2)
N = static_cast<std::size_t>(std::stoull(argv[1]));
if (argc >= 3)
rounds = std::stoi(argv[2]);
if (argc >= 4)
chunk = static_cast<std::size_t>(std::stoull(argv[3]));
std::cout << "KTE Buffer Microbenchmarks" << "\n";
std::cout << "N=" << N << ", rounds=" << rounds << ", chunk=" << chunk << "\n\n";
print_header();
// Run for GapBuffer
print_row(bench_sequential_append<GapBuffer>(N, rounds));
print_row(bench_sequential_prepend<GapBuffer>(N, rounds));
print_row(bench_chunk_append<GapBuffer>(N, chunk, rounds));
print_row(bench_mixed<GapBuffer>(N, chunk, rounds));
// Run for PieceTable
print_row(bench_sequential_append<PieceTable>(N, rounds));
print_row(bench_sequential_prepend<PieceTable>(N, rounds));
print_row(bench_chunk_append<PieceTable>(N, chunk, rounds));
print_row(bench_mixed<PieceTable>(N, chunk, rounds));
return 0;
}

318
bench/PerformanceSuite.cc Normal file
View File

@@ -0,0 +1,318 @@
/*
* PerformanceSuite.cc - broader performance and verification benchmarks
*/
#include <algorithm>
#include <cassert>
#include <chrono>
#include <cstddef>
#include <cstdint>
#include <cstring>
#include <iomanip>
#include <iostream>
#include <random>
#include <string>
#include <typeinfo>
#include <vector>
#include "GapBuffer.h"
#include "PieceTable.h"
#include "OptimizedSearch.h"
using clock_t = std::chrono::steady_clock;
using us = std::chrono::microseconds;
namespace {
struct Stat {
double micros{0.0};
std::size_t bytes{0};
std::size_t ops{0};
};
static void
print_header(const std::string &title)
{
std::cout << "\n" << title << "\n";
std::cout << std::left << std::setw(18) << "Case"
<< std::left << std::setw(18) << "Type"
<< std::right << std::setw(12) << "time(us)"
<< std::right << std::setw(14) << "bytes"
<< std::right << std::setw(14) << "ops/s"
<< std::right << std::setw(14) << "MB/s"
<< "\n";
std::cout << std::string(90, '-') << "\n";
}
static void
print_row(const std::string &caseName, const std::string &typeName, const Stat &s)
{
double mb = s.bytes / (1024.0 * 1024.0);
double sec = s.micros / 1'000'000.0;
double mbps = sec > 0 ? (mb / sec) : 0.0;
double opss = sec > 0 ? (static_cast<double>(s.ops) / sec) : 0.0;
std::cout << std::left << std::setw(18) << caseName
<< std::left << std::setw(18) << typeName
<< std::right << std::setw(12) << std::fixed << std::setprecision(2) << s.micros
<< std::right << std::setw(14) << s.bytes
<< std::right << std::setw(14) << std::fixed << std::setprecision(2) << opss
<< std::right << std::setw(14) << std::fixed << std::setprecision(2) << mbps
<< "\n";
}
} // namespace
class PerformanceSuite {
public:
void benchmarkBufferOperations(std::size_t N, int rounds, std::size_t chunk)
{
print_header("Buffer Operations");
run_buffer_case<GapBuffer>("append_char", N, rounds, chunk, [&](auto &b, std::size_t count) {
for (std::size_t i = 0; i < count; ++i)
b.AppendChar('a');
});
run_buffer_case<GapBuffer>("prepend_char", N, rounds, chunk, [&](auto &b, std::size_t count) {
for (std::size_t i = 0; i < count; ++i)
b.PrependChar('a');
});
run_buffer_case<GapBuffer>("chunk_mix", N, rounds, chunk, [&](auto &b, std::size_t) {
std::string payload(chunk, 'x');
std::size_t written = 0;
while (written < N) {
std::size_t now = std::min(chunk, N - written);
if (((written / chunk) & 1) == 0)
b.Append(payload.data(), now);
else
b.Prepend(payload.data(), now);
written += now;
}
});
run_buffer_case<PieceTable>("append_char", N, rounds, chunk, [&](auto &b, std::size_t count) {
for (std::size_t i = 0; i < count; ++i)
b.AppendChar('a');
});
run_buffer_case<PieceTable>("prepend_char", N, rounds, chunk, [&](auto &b, std::size_t count) {
for (std::size_t i = 0; i < count; ++i)
b.PrependChar('a');
});
run_buffer_case<PieceTable>("chunk_mix", N, rounds, chunk, [&](auto &b, std::size_t) {
std::string payload(chunk, 'x');
std::size_t written = 0;
while (written < N) {
std::size_t now = std::min(chunk, N - written);
if (((written / chunk) & 1) == 0)
b.Append(payload.data(), now);
else
b.Prepend(payload.data(), now);
written += now;
}
});
}
void benchmarkSearchOperations(std::size_t textLen, std::size_t patLen, int rounds)
{
print_header("Search Operations");
std::mt19937_64 rng(0xC0FFEE);
std::uniform_int_distribution<int> dist('a', 'z');
std::string text(textLen, '\0');
for (auto &ch: text)
ch = static_cast<char>(dist(rng));
std::string pattern(patLen, '\0');
for (auto &ch: pattern)
ch = static_cast<char>(dist(rng));
// Ensure at least one hit
if (textLen >= patLen && patLen > 0) {
std::size_t pos = textLen / 2;
std::memcpy(&text[pos], pattern.data(), patLen);
}
// OptimizedSearch find_all vs std::string reference
OptimizedSearch os;
Stat s{};
auto start = clock_t::now();
std::size_t matches = 0;
std::size_t bytesScanned = 0;
for (int r = 0; r < rounds; ++r) {
auto hits = os.find_all(text, pattern, 0);
matches += hits.size();
bytesScanned += text.size();
// Verify with reference
std::vector<std::size_t> ref;
std::size_t from = 0;
while (true) {
auto p = text.find(pattern, from);
if (p == std::string::npos)
break;
ref.push_back(p);
from = p + (patLen ? patLen : 1);
}
assert(ref == hits);
}
auto end = clock_t::now();
s.micros = std::chrono::duration_cast<us>(end - start).count();
s.bytes = bytesScanned;
s.ops = matches;
print_row("find_all", "OptimizedSearch", s);
}
void benchmarkMemoryAllocation(std::size_t N, int rounds)
{
print_header("Memory Allocation (allocations during editing)");
// Measure number of allocations by simulating editing patterns.
auto run_session = [&](auto &&buffer) {
// alternate small appends and prepends
const std::size_t chunk = 32;
std::string payload(chunk, 'q');
for (int r = 0; r < rounds; ++r) {
buffer.Clear();
for (std::size_t i = 0; i < N; i += chunk)
buffer.Append(payload.data(), std::min(chunk, N - i));
for (std::size_t i = 0; i < N / 2; i += chunk)
buffer.Prepend(payload.data(), std::min(chunk, N / 2 - i));
}
};
// Local allocation counters for this TU via overriding operators
reset_alloc_counters();
GapBuffer gb;
run_session(gb);
auto gap_allocs = current_allocs();
print_row("edit_session", "GapBuffer", Stat{
0.0, static_cast<std::size_t>(gap_allocs.bytes),
static_cast<std::size_t>(gap_allocs.count)
});
reset_alloc_counters();
PieceTable pt;
run_session(pt);
auto pt_allocs = current_allocs();
print_row("edit_session", "PieceTable", Stat{
0.0, static_cast<std::size_t>(pt_allocs.bytes),
static_cast<std::size_t>(pt_allocs.count)
});
}
private:
template<typename Buf, typename Fn>
void run_buffer_case(const std::string &caseName, std::size_t N, int rounds, std::size_t chunk, Fn fn)
{
Stat s{};
auto start = clock_t::now();
std::size_t bytes = 0;
std::size_t ops = 0;
for (int t = 0; t < rounds; ++t) {
Buf b;
b.Reserve(N);
fn(b, N);
// compare to reference string where possible (only for append_char/prepend_char)
bytes += N;
ops += N / (chunk ? chunk : 1);
}
auto end = clock_t::now();
s.micros = std::chrono::duration_cast<us>(end - start).count();
s.bytes = bytes;
s.ops = ops;
print_row(caseName, typeid(Buf).name(), s);
}
// Simple global allocation tracking for this TU
struct AllocStats {
std::uint64_t count{0};
std::uint64_t bytes{0};
};
static AllocStats &alloc_stats()
{
static AllocStats s;
return s;
}
static void reset_alloc_counters()
{
alloc_stats() = {};
}
static AllocStats current_allocs()
{
return alloc_stats();
}
// Friend global new/delete defined below
friend void *operator new(std::size_t sz) noexcept(false);
friend void operator delete(void *p) noexcept;
friend void *operator new[](std::size_t sz) noexcept(false);
friend void operator delete[](void *p) noexcept;
};
// Override new/delete only in this translation unit to track allocations made here
void *
operator new(std::size_t sz) noexcept(false)
{
auto &s = PerformanceSuite::alloc_stats();
s.count++;
s.bytes += sz;
if (void *p = std::malloc(sz))
return p;
throw std::bad_alloc();
}
void
operator delete(void *p) noexcept
{
std::free(p);
}
void *
operator new[](std::size_t sz) noexcept(false)
{
auto &s = PerformanceSuite::alloc_stats();
s.count++;
s.bytes += sz;
if (void *p = std::malloc(sz))
return p;
throw std::bad_alloc();
}
void
operator delete[](void *p) noexcept
{
std::free(p);
}
int
main(int argc, char **argv)
{
std::size_t N = 200'000; // bytes per round for buffer cases
int rounds = 3;
std::size_t chunk = 1024;
if (argc >= 2)
N = static_cast<std::size_t>(std::stoull(argv[1]));
if (argc >= 3)
rounds = std::stoi(argv[2]);
if (argc >= 4)
chunk = static_cast<std::size_t>(std::stoull(argv[3]));
std::cout << "KTE Performance Suite" << "\n";
std::cout << "N=" << N << ", rounds=" << rounds << ", chunk=" << chunk << "\n";
PerformanceSuite suite;
suite.benchmarkBufferOperations(N, rounds, chunk);
suite.benchmarkSearchOperations(1'000'000, 16, rounds);
suite.benchmarkMemoryAllocation(N, rounds);
return 0;
}

102
test_buffer_correctness.cc Normal file
View File

@@ -0,0 +1,102 @@
// Simple buffer correctness tests comparing GapBuffer and PieceTable to std::string
#include <cassert>
#include <cstddef>
#include <cstring>
#include <random>
#include <string>
#include <vector>
#include "GapBuffer.h"
#include "PieceTable.h"
template<typename Buf>
static void
check_equals(const Buf &b, const std::string &ref)
{
assert(b.Size() == ref.size());
if (b.Size() == 0)
return;
const char *p = b.Data();
assert(p != nullptr);
assert(std::memcmp(p, ref.data(), ref.size()) == 0);
}
template<typename Buf>
static void
run_basic_cases()
{
// empty
{
Buf b;
std::string ref;
check_equals(b, ref);
}
// append chars
{
Buf b;
std::string ref;
for (int i = 0; i < 1000; ++i) {
b.AppendChar('a');
ref.push_back('a');
}
check_equals(b, ref);
}
// prepend chars
{
Buf b;
std::string ref;
for (int i = 0; i < 1000; ++i) {
b.PrependChar('b');
ref.insert(ref.begin(), 'b');
}
check_equals(b, ref);
}
// append/prepend strings
{
Buf b;
std::string ref;
const char *hello = "hello";
b.Append(hello, 5);
ref.append("hello");
b.Prepend(hello, 5);
ref.insert(0, "hello");
check_equals(b, ref);
}
// larger random blocks
{
std::mt19937 rng(42);
std::uniform_int_distribution<int> len_dist(0, 128);
std::uniform_int_distribution<int> coin(0, 1);
Buf b;
std::string ref;
for (int step = 0; step < 2000; ++step) {
int L = len_dist(rng);
std::string payload(L, '\0');
for (int i = 0; i < L; ++i)
payload[i] = static_cast<char>('a' + (i % 26));
if (coin(rng)) {
b.Append(payload.data(), payload.size());
ref.append(payload);
} else {
b.Prepend(payload.data(), payload.size());
ref.insert(0, payload);
}
}
check_equals(b, ref);
}
}
int
main()
{
run_basic_cases<GapBuffer>();
run_basic_cases<PieceTable>();
return 0;
}

View File

@@ -0,0 +1,74 @@
// Verify OptimizedSearch against std::string reference across patterns and sizes
#include <cassert>
#include <cstddef>
#include <random>
#include <string>
#include <vector>
#include "OptimizedSearch.h"
static std::vector<std::size_t>
ref_find_all(const std::string &text, const std::string &pat)
{
std::vector<std::size_t> res;
if (pat.empty())
return res;
std::size_t from = 0;
while (true) {
auto p = text.find(pat, from);
if (p == std::string::npos)
break;
res.push_back(p);
from = p + pat.size(); // non-overlapping
}
return res;
}
static void
run_case(std::size_t textLen, std::size_t patLen, unsigned seed)
{
std::mt19937 rng(seed);
std::uniform_int_distribution<int> dist('a', 'z');
std::string text(textLen, '\0');
for (auto &ch: text)
ch = static_cast<char>(dist(rng));
std::string pat(patLen, '\0');
for (auto &ch: pat)
ch = static_cast<char>(dist(rng));
// Guarantee at least one match when possible
if (textLen >= patLen && patLen > 0) {
std::size_t pos = textLen / 3;
if (pos + patLen <= text.size())
std::copy(pat.begin(), pat.end(), text.begin() + static_cast<long>(pos));
}
OptimizedSearch os;
auto got = os.find_all(text, pat, 0);
auto ref = ref_find_all(text, pat);
assert(got == ref);
}
int
main()
{
// Edge cases
run_case(0, 0, 1);
run_case(0, 1, 2);
run_case(1, 0, 3);
run_case(1, 1, 4);
// Various sizes
for (std::size_t t = 128; t <= 4096; t *= 2) {
for (std::size_t p = 1; p <= 64; p *= 2) {
run_case(t, p, static_cast<unsigned>(t + p));
}
}
// Larger random
run_case(100000, 16, 12345);
run_case(250000, 32, 67890);
return 0;
}