- Added highlighters for new languages (SQL, Erlang, Forth) with filetype recognition. - Updated and reorganized syntax files to maintain consistency and modularity. - Introduced LSP transport framing unit tests and JSON decoding/dispatch tests. - Refactored `LspManager`, integrating UTF-16/UTF-8 position conversions and robust diagnostics handling. - Enhanced server start/restart logic with workspace root detection and logging to improve LSP usability.
101 lines
3.1 KiB
C++
101 lines
3.1 KiB
C++
// test_utfcodec.cc - simple tests for UtfCodec helpers
|
||
#include <cassert>
|
||
#include <cstdio>
|
||
#include <string>
|
||
#include <string_view>
|
||
|
||
#include "lsp/UtfCodec.h"
|
||
|
||
using namespace kte::lsp;
|
||
|
||
|
||
static std::string_view
|
||
lp(const std::string &, int)
|
||
{
|
||
return std::string_view();
|
||
}
|
||
|
||
|
||
int
|
||
main()
|
||
{
|
||
// ASCII: each scalar = 1 UTF-16 unit
|
||
{
|
||
std::string s = "hello"; // 5 ASCII
|
||
assert(utf8ColToUtf16Units(s, 0) == 0);
|
||
assert(utf8ColToUtf16Units(s, 3) == 3);
|
||
assert(utf16UnitsToUtf8Col(s, 3) == 3);
|
||
assert(utf16UnitsToUtf8Col(s, 10) == 5); // clamp to EOL
|
||
}
|
||
|
||
// BMP multibyte (e.g., ü U+00FC, α U+03B1) -> still 1 UTF-16 unit
|
||
{
|
||
std::string s = u8"aüαb"; // bytes: a [C3 BC] [CE B1] b
|
||
// columns by codepoints: a(0), ü(1), α(2), b(3)
|
||
assert(utf8ColToUtf16Units(s, 0) == 0);
|
||
assert(utf8ColToUtf16Units(s, 1) == 1);
|
||
assert(utf8ColToUtf16Units(s, 2) == 2);
|
||
assert(utf8ColToUtf16Units(s, 4) == 4); // past EOL clamps to 4 units
|
||
|
||
assert(utf16UnitsToUtf8Col(s, 0) == 0);
|
||
assert(utf16UnitsToUtf8Col(s, 2) == 2);
|
||
assert(utf16UnitsToUtf8Col(s, 4) == 4);
|
||
}
|
||
|
||
// Non-BMP (emoji) -> 2 UTF-16 units per code point
|
||
{
|
||
std::string s = u8"A😀B"; // U+1F600 between A and B
|
||
// codepoints: A, 😀, B => utf8 columns 0..3
|
||
// utf16 units: A(1), 😀(2), B(1) cumulative: 0,1,3,4
|
||
assert(utf8ColToUtf16Units(s, 0) == 0);
|
||
assert(utf8ColToUtf16Units(s, 1) == 1); // after A
|
||
assert(utf8ColToUtf16Units(s, 2) == 3); // after 😀 (2 units)
|
||
assert(utf8ColToUtf16Units(s, 3) == 4); // after B
|
||
|
||
assert(utf16UnitsToUtf8Col(s, 0) == 0);
|
||
assert(utf16UnitsToUtf8Col(s, 1) == 1); // A
|
||
assert(utf16UnitsToUtf8Col(s, 2) == 1); // mid-surrogate -> stays before 😀
|
||
assert(utf16UnitsToUtf8Col(s, 3) == 2); // end of 😀
|
||
assert(utf16UnitsToUtf8Col(s, 4) == 3); // after B
|
||
assert(utf16UnitsToUtf8Col(s, 10) == 3); // clamp
|
||
}
|
||
|
||
// Invalid UTF-8: treat invalid byte as U+FFFD (1 UTF-16 unit), consume 1 byte
|
||
{
|
||
std::string s;
|
||
s.push_back('X');
|
||
s.push_back(char(0xFF)); // invalid single byte
|
||
s.push_back('Y');
|
||
// Columns by codepoints as we decode: 'X', U+FFFD, 'Y'
|
||
assert(utf8ColToUtf16Units(s, 0) == 0);
|
||
assert(utf8ColToUtf16Units(s, 1) == 1);
|
||
assert(utf8ColToUtf16Units(s, 2) == 2);
|
||
assert(utf8ColToUtf16Units(s, 3) == 3);
|
||
|
||
assert(utf16UnitsToUtf8Col(s, 0) == 0);
|
||
assert(utf16UnitsToUtf8Col(s, 1) == 1);
|
||
assert(utf16UnitsToUtf8Col(s, 2) == 2);
|
||
assert(utf16UnitsToUtf8Col(s, 3) == 3);
|
||
}
|
||
|
||
// Position/Range helpers with a simple provider
|
||
{
|
||
std::string lines[] = {u8"A😀B"};
|
||
LineProvider provider = [&](const std::string &, int line) -> std::string_view {
|
||
return (line == 0) ? std::string_view(lines[0]) : std::string_view();
|
||
};
|
||
Position p8{0, 2}; // after 😀 in utf8 columns
|
||
Position p16 = toUtf16("file:///x", p8, provider);
|
||
assert(p16.line == 0 && p16.character == 3);
|
||
|
||
Position back = toUtf8("file:///x", p16, provider);
|
||
assert(back.line == 0 && back.character == 2);
|
||
|
||
Range r8{{0, 1}, {0, 3}}; // A|😀|B end
|
||
Range r16 = toUtf16("file:///x", r8, provider);
|
||
assert(r16.start.character == 1 && r16.end.character == 4);
|
||
}
|
||
|
||
std::puts("test_utfcodec: OK");
|
||
return 0;
|
||
} |