Add SQL, Erlang, and Forth highlighter implementations and tests for LSP process and transport handling.

- Added highlighters for new languages (SQL, Erlang, Forth) with filetype recognition.
- Updated and reorganized syntax files to maintain consistency and modularity.
- Introduced LSP transport framing unit tests and JSON decoding/dispatch tests.
- Refactored `LspManager`, integrating UTF-16/UTF-8 position conversions and robust diagnostics handling.
- Enhanced server start/restart logic with workspace root detection and logging to improve LSP usability.
This commit is contained in:
2025-12-02 00:15:15 -08:00
parent e089c6e4d1
commit 33bbb5b98f
68 changed files with 29571 additions and 945 deletions

101
test_utfcodec.cc Normal file
View File

@@ -0,0 +1,101 @@
// test_utfcodec.cc - simple tests for UtfCodec helpers
#include <cassert>
#include <cstdio>
#include <string>
#include <string_view>
#include "lsp/UtfCodec.h"
using namespace kte::lsp;
static std::string_view
lp(const std::string &, int)
{
return std::string_view();
}
int
main()
{
// ASCII: each scalar = 1 UTF-16 unit
{
std::string s = "hello"; // 5 ASCII
assert(utf8ColToUtf16Units(s, 0) == 0);
assert(utf8ColToUtf16Units(s, 3) == 3);
assert(utf16UnitsToUtf8Col(s, 3) == 3);
assert(utf16UnitsToUtf8Col(s, 10) == 5); // clamp to EOL
}
// BMP multibyte (e.g., ü U+00FC, α U+03B1) -> still 1 UTF-16 unit
{
std::string s = u8"αb"; // bytes: a [C3 BC] [CE B1] b
// columns by codepoints: a(0), ü(1), α(2), b(3)
assert(utf8ColToUtf16Units(s, 0) == 0);
assert(utf8ColToUtf16Units(s, 1) == 1);
assert(utf8ColToUtf16Units(s, 2) == 2);
assert(utf8ColToUtf16Units(s, 4) == 4); // past EOL clamps to 4 units
assert(utf16UnitsToUtf8Col(s, 0) == 0);
assert(utf16UnitsToUtf8Col(s, 2) == 2);
assert(utf16UnitsToUtf8Col(s, 4) == 4);
}
// Non-BMP (emoji) -> 2 UTF-16 units per code point
{
std::string s = u8"A😀B"; // U+1F600 between A and B
// codepoints: A, 😀, B => utf8 columns 0..3
// utf16 units: A(1), 😀(2), B(1) cumulative: 0,1,3,4
assert(utf8ColToUtf16Units(s, 0) == 0);
assert(utf8ColToUtf16Units(s, 1) == 1); // after A
assert(utf8ColToUtf16Units(s, 2) == 3); // after 😀 (2 units)
assert(utf8ColToUtf16Units(s, 3) == 4); // after B
assert(utf16UnitsToUtf8Col(s, 0) == 0);
assert(utf16UnitsToUtf8Col(s, 1) == 1); // A
assert(utf16UnitsToUtf8Col(s, 2) == 1); // mid-surrogate -> stays before 😀
assert(utf16UnitsToUtf8Col(s, 3) == 2); // end of 😀
assert(utf16UnitsToUtf8Col(s, 4) == 3); // after B
assert(utf16UnitsToUtf8Col(s, 10) == 3); // clamp
}
// Invalid UTF-8: treat invalid byte as U+FFFD (1 UTF-16 unit), consume 1 byte
{
std::string s;
s.push_back('X');
s.push_back(char(0xFF)); // invalid single byte
s.push_back('Y');
// Columns by codepoints as we decode: 'X', U+FFFD, 'Y'
assert(utf8ColToUtf16Units(s, 0) == 0);
assert(utf8ColToUtf16Units(s, 1) == 1);
assert(utf8ColToUtf16Units(s, 2) == 2);
assert(utf8ColToUtf16Units(s, 3) == 3);
assert(utf16UnitsToUtf8Col(s, 0) == 0);
assert(utf16UnitsToUtf8Col(s, 1) == 1);
assert(utf16UnitsToUtf8Col(s, 2) == 2);
assert(utf16UnitsToUtf8Col(s, 3) == 3);
}
// Position/Range helpers with a simple provider
{
std::string lines[] = {u8"A😀B"};
LineProvider provider = [&](const std::string &, int line) -> std::string_view {
return (line == 0) ? std::string_view(lines[0]) : std::string_view();
};
Position p8{0, 2}; // after 😀 in utf8 columns
Position p16 = toUtf16("file:///x", p8, provider);
assert(p16.line == 0 && p16.character == 3);
Position back = toUtf8("file:///x", p16, provider);
assert(back.line == 0 && back.character == 2);
Range r8{{0, 1}, {0, 3}}; // A|😀|B end
Range r16 = toUtf16("file:///x", r8, provider);
assert(r16.start.character == 1 && r16.end.character == 4);
}
std::puts("test_utfcodec: OK");
return 0;
}