Files
kte/test_utfcodec.cc
Kyle Isom 33bbb5b98f Add SQL, Erlang, and Forth highlighter implementations and tests for LSP process and transport handling.
- Added highlighters for new languages (SQL, Erlang, Forth) with filetype recognition.
- Updated and reorganized syntax files to maintain consistency and modularity.
- Introduced LSP transport framing unit tests and JSON decoding/dispatch tests.
- Refactored `LspManager`, integrating UTF-16/UTF-8 position conversions and robust diagnostics handling.
- Enhanced server start/restart logic with workspace root detection and logging to improve LSP usability.
2025-12-02 00:15:15 -08:00

101 lines
3.1 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
// test_utfcodec.cc - simple tests for UtfCodec helpers
#include <cassert>
#include <cstdio>
#include <string>
#include <string_view>
#include "lsp/UtfCodec.h"
using namespace kte::lsp;
static std::string_view
lp(const std::string &, int)
{
return std::string_view();
}
int
main()
{
// ASCII: each scalar = 1 UTF-16 unit
{
std::string s = "hello"; // 5 ASCII
assert(utf8ColToUtf16Units(s, 0) == 0);
assert(utf8ColToUtf16Units(s, 3) == 3);
assert(utf16UnitsToUtf8Col(s, 3) == 3);
assert(utf16UnitsToUtf8Col(s, 10) == 5); // clamp to EOL
}
// BMP multibyte (e.g., ü U+00FC, α U+03B1) -> still 1 UTF-16 unit
{
std::string s = u8"αb"; // bytes: a [C3 BC] [CE B1] b
// columns by codepoints: a(0), ü(1), α(2), b(3)
assert(utf8ColToUtf16Units(s, 0) == 0);
assert(utf8ColToUtf16Units(s, 1) == 1);
assert(utf8ColToUtf16Units(s, 2) == 2);
assert(utf8ColToUtf16Units(s, 4) == 4); // past EOL clamps to 4 units
assert(utf16UnitsToUtf8Col(s, 0) == 0);
assert(utf16UnitsToUtf8Col(s, 2) == 2);
assert(utf16UnitsToUtf8Col(s, 4) == 4);
}
// Non-BMP (emoji) -> 2 UTF-16 units per code point
{
std::string s = u8"A😀B"; // U+1F600 between A and B
// codepoints: A, 😀, B => utf8 columns 0..3
// utf16 units: A(1), 😀(2), B(1) cumulative: 0,1,3,4
assert(utf8ColToUtf16Units(s, 0) == 0);
assert(utf8ColToUtf16Units(s, 1) == 1); // after A
assert(utf8ColToUtf16Units(s, 2) == 3); // after 😀 (2 units)
assert(utf8ColToUtf16Units(s, 3) == 4); // after B
assert(utf16UnitsToUtf8Col(s, 0) == 0);
assert(utf16UnitsToUtf8Col(s, 1) == 1); // A
assert(utf16UnitsToUtf8Col(s, 2) == 1); // mid-surrogate -> stays before 😀
assert(utf16UnitsToUtf8Col(s, 3) == 2); // end of 😀
assert(utf16UnitsToUtf8Col(s, 4) == 3); // after B
assert(utf16UnitsToUtf8Col(s, 10) == 3); // clamp
}
// Invalid UTF-8: treat invalid byte as U+FFFD (1 UTF-16 unit), consume 1 byte
{
std::string s;
s.push_back('X');
s.push_back(char(0xFF)); // invalid single byte
s.push_back('Y');
// Columns by codepoints as we decode: 'X', U+FFFD, 'Y'
assert(utf8ColToUtf16Units(s, 0) == 0);
assert(utf8ColToUtf16Units(s, 1) == 1);
assert(utf8ColToUtf16Units(s, 2) == 2);
assert(utf8ColToUtf16Units(s, 3) == 3);
assert(utf16UnitsToUtf8Col(s, 0) == 0);
assert(utf16UnitsToUtf8Col(s, 1) == 1);
assert(utf16UnitsToUtf8Col(s, 2) == 2);
assert(utf16UnitsToUtf8Col(s, 3) == 3);
}
// Position/Range helpers with a simple provider
{
std::string lines[] = {u8"A😀B"};
LineProvider provider = [&](const std::string &, int line) -> std::string_view {
return (line == 0) ? std::string_view(lines[0]) : std::string_view();
};
Position p8{0, 2}; // after 😀 in utf8 columns
Position p16 = toUtf16("file:///x", p8, provider);
assert(p16.line == 0 && p16.character == 3);
Position back = toUtf8("file:///x", p16, provider);
assert(back.line == 0 && back.character == 2);
Range r8{{0, 1}, {0, 3}}; // A|😀|B end
Range r16 = toUtf16("file:///x", r8, provider);
assert(r16.start.character == 1 && r16.end.character == 4);
}
std::puts("test_utfcodec: OK");
return 0;
}