LSP integration steps 1-4, part of 5.

This commit is contained in:
2025-12-01 20:09:49 -08:00
parent ceef6af3ae
commit e089c6e4d1
56 changed files with 3685 additions and 1638 deletions

View File

@@ -3,168 +3,277 @@
#include <cctype>
namespace kte {
static bool
is_digit(char c)
{
return c >= '0' && c <= '9';
}
static bool is_digit(char c) { return c >= '0' && c <= '9'; }
CppHighlighter::CppHighlighter()
{
const char *kw[] = {
"if","else","for","while","do","switch","case","default","break","continue",
"return","goto","struct","class","namespace","using","template","typename",
"public","private","protected","virtual","override","const","constexpr","auto",
"static","inline","operator","new","delete","try","catch","throw","friend",
"enum","union","extern","volatile","mutable","noexcept","sizeof","this"
};
for (auto s: kw) keywords_.insert(s);
const char *types[] = {
"int","long","short","char","signed","unsigned","float","double","void",
"bool","wchar_t","size_t","ptrdiff_t","uint8_t","uint16_t","uint32_t","uint64_t",
"int8_t","int16_t","int32_t","int64_t"
};
for (auto s: types) types_.insert(s);
const char *kw[] = {
"if", "else", "for", "while", "do", "switch", "case", "default", "break", "continue",
"return", "goto", "struct", "class", "namespace", "using", "template", "typename",
"public", "private", "protected", "virtual", "override", "const", "constexpr", "auto",
"static", "inline", "operator", "new", "delete", "try", "catch", "throw", "friend",
"enum", "union", "extern", "volatile", "mutable", "noexcept", "sizeof", "this"
};
for (auto s: kw)
keywords_.insert(s);
const char *types[] = {
"int", "long", "short", "char", "signed", "unsigned", "float", "double", "void",
"bool", "wchar_t", "size_t", "ptrdiff_t", "uint8_t", "uint16_t", "uint32_t", "uint64_t",
"int8_t", "int16_t", "int32_t", "int64_t"
};
for (auto s: types)
types_.insert(s);
}
bool CppHighlighter::is_ident_start(char c) { return std::isalpha(static_cast<unsigned char>(c)) || c == '_'; }
bool CppHighlighter::is_ident_char(char c) { return std::isalnum(static_cast<unsigned char>(c)) || c == '_'; }
void CppHighlighter::HighlightLine(const Buffer &buf, int row, std::vector<HighlightSpan> &out) const
bool
CppHighlighter::is_ident_start(char c)
{
// Stateless entry simply delegates to stateful with a clean previous state
StatefulHighlighter::LineState prev;
(void)HighlightLineStateful(buf, row, prev, out);
return std::isalpha(static_cast<unsigned char>(c)) || c == '_';
}
StatefulHighlighter::LineState CppHighlighter::HighlightLineStateful(const Buffer &buf,
int row,
const LineState &prev,
std::vector<HighlightSpan> &out) const
bool
CppHighlighter::is_ident_char(char c)
{
const auto &rows = buf.Rows();
StatefulHighlighter::LineState state = prev;
if (row < 0 || static_cast<std::size_t>(row) >= rows.size()) return state;
std::string s = static_cast<std::string>(rows[static_cast<std::size_t>(row)]);
if (s.empty()) return state;
auto push = [&](int a, int b, TokenKind k){ if (b> a) out.push_back({a,b,k}); };
int n = static_cast<int>(s.size());
int bol = 0; while (bol < n && (s[bol] == ' ' || s[bol] == '\t')) ++bol;
int i = 0;
// Continue multi-line raw string from previous line
if (state.in_raw_string) {
std::string needle = ")" + state.raw_delim + "\"";
auto pos = s.find(needle);
if (pos == std::string::npos) {
push(0, n, TokenKind::String);
state.in_raw_string = true;
return state;
} else {
int end = static_cast<int>(pos + needle.size());
push(0, end, TokenKind::String);
i = end;
state.in_raw_string = false;
state.raw_delim.clear();
}
}
// Continue multi-line block comment from previous line
if (state.in_block_comment) {
int j = i;
while (i + 1 < n) {
if (s[i] == '*' && s[i+1] == '/') { i += 2; push(j, i, TokenKind::Comment); state.in_block_comment = false; break; }
++i;
}
if (state.in_block_comment) { push(j, n, TokenKind::Comment); return state; }
}
while (i < n) {
char c = s[i];
// Preprocessor at beginning of line (after leading whitespace)
if (i == bol && c == '#') { push(0, n, TokenKind::Preproc); break; }
// Whitespace
if (c == ' ' || c == '\t') {
int j = i+1; while (j < n && (s[j] == ' ' || s[j] == '\t')) ++j; push(i,j,TokenKind::Whitespace); i=j; continue;
}
// Line comment
if (c == '/' && i+1 < n && s[i+1] == '/') { push(i, n, TokenKind::Comment); break; }
// Block comment
if (c == '/' && i+1 < n && s[i+1] == '*') {
int j = i+2;
bool closed = false;
while (j + 1 <= n) {
if (j + 1 < n && s[j] == '*' && s[j+1] == '/') { j += 2; closed = true; break; }
++j;
}
if (closed) { push(i, j, TokenKind::Comment); i = j; continue; }
// Spill to next lines
push(i, n, TokenKind::Comment);
state.in_block_comment = true;
return state;
}
// Raw string start: very simple detection: R"delim(
if (c == 'R' && i+1 < n && s[i+1] == '"') {
int k = i + 2;
std::string delim;
while (k < n && s[k] != '(') { delim.push_back(s[k]); ++k; }
if (k < n && s[k] == '(') {
int body_start = k + 1;
std::string needle = ")" + delim + "\"";
auto pos = s.find(needle, static_cast<std::size_t>(body_start));
if (pos == std::string::npos) {
push(i, n, TokenKind::String);
state.in_raw_string = true;
state.raw_delim = delim;
return state;
} else {
int end = static_cast<int>(pos + needle.size());
push(i, end, TokenKind::String);
i = end;
continue;
}
}
// If malformed, just treat 'R' as identifier fallback
}
// Regular string literal
if (c == '"') {
int j = i+1; bool esc=false; while (j < n) { char d = s[j++]; if (esc) { esc=false; continue; } if (d == '\\') { esc=true; continue; } if (d == '"') break; }
push(i, j, TokenKind::String); i = j; continue;
}
// Char literal
if (c == '\'') {
int j = i+1; bool esc=false; while (j < n) { char d = s[j++]; if (esc) { esc=false; continue; } if (d == '\\') { esc=true; continue; } if (d == '\'') break; }
push(i, j, TokenKind::Char); i = j; continue;
}
// Number literal (simple)
if (is_digit(c) || (c == '.' && i+1 < n && is_digit(s[i+1]))) {
int j = i+1; while (j < n && (std::isalnum(static_cast<unsigned char>(s[j])) || s[j]=='.' || s[j]=='x' || s[j]=='X' || s[j]=='b' || s[j]=='B' || s[j]=='_')) ++j;
push(i, j, TokenKind::Number); i = j; continue;
}
// Identifier / keyword / type
if (is_ident_start(c)) {
int j = i+1; while (j < n && is_ident_char(s[j])) ++j; std::string id = s.substr(i, j-i);
TokenKind k = TokenKind::Identifier; if (keywords_.count(id)) k = TokenKind::Keyword; else if (types_.count(id)) k = TokenKind::Type; push(i, j, k); i = j; continue;
}
// Operators and punctuation (single char for now)
TokenKind kind = TokenKind::Operator;
if (std::ispunct(static_cast<unsigned char>(c)) && c != '_' && c != '#') {
if (c==';' || c==',' || c=='(' || c==')' || c=='{' || c=='}' || c=='[' || c==']') kind = TokenKind::Punctuation;
push(i, i+1, kind); ++i; continue;
}
// Fallback
push(i, i+1, TokenKind::Default); ++i;
}
return state;
return std::isalnum(static_cast<unsigned char>(c)) || c == '_';
}
} // namespace kte
void
CppHighlighter::HighlightLine(const Buffer &buf, int row, std::vector<HighlightSpan> &out) const
{
// Stateless entry simply delegates to stateful with a clean previous state
StatefulHighlighter::LineState prev;
(void) HighlightLineStateful(buf, row, prev, out);
}
StatefulHighlighter::LineState
CppHighlighter::HighlightLineStateful(const Buffer &buf,
int row,
const LineState &prev,
std::vector<HighlightSpan> &out) const
{
const auto &rows = buf.Rows();
StatefulHighlighter::LineState state = prev;
if (row < 0 || static_cast<std::size_t>(row) >= rows.size())
return state;
std::string s = static_cast<std::string>(rows[static_cast<std::size_t>(row)]);
if (s.empty())
return state;
auto push = [&](int a, int b, TokenKind k) {
if (b > a)
out.push_back({a, b, k});
};
int n = static_cast<int>(s.size());
int bol = 0;
while (bol < n && (s[bol] == ' ' || s[bol] == '\t'))
++bol;
int i = 0;
// Continue multi-line raw string from previous line
if (state.in_raw_string) {
std::string needle = ")" + state.raw_delim + "\"";
auto pos = s.find(needle);
if (pos == std::string::npos) {
push(0, n, TokenKind::String);
state.in_raw_string = true;
return state;
} else {
int end = static_cast<int>(pos + needle.size());
push(0, end, TokenKind::String);
i = end;
state.in_raw_string = false;
state.raw_delim.clear();
}
}
// Continue multi-line block comment from previous line
if (state.in_block_comment) {
int j = i;
while (i + 1 < n) {
if (s[i] == '*' && s[i + 1] == '/') {
i += 2;
push(j, i, TokenKind::Comment);
state.in_block_comment = false;
break;
}
++i;
}
if (state.in_block_comment) {
push(j, n, TokenKind::Comment);
return state;
}
}
while (i < n) {
char c = s[i];
// Preprocessor at beginning of line (after leading whitespace)
if (i == bol && c == '#') {
push(0, n, TokenKind::Preproc);
break;
}
// Whitespace
if (c == ' ' || c == '\t') {
int j = i + 1;
while (j < n && (s[j] == ' ' || s[j] == '\t'))
++j;
push(i, j, TokenKind::Whitespace);
i = j;
continue;
}
// Line comment
if (c == '/' && i + 1 < n && s[i + 1] == '/') {
push(i, n, TokenKind::Comment);
break;
}
// Block comment
if (c == '/' && i + 1 < n && s[i + 1] == '*') {
int j = i + 2;
bool closed = false;
while (j + 1 <= n) {
if (j + 1 < n && s[j] == '*' && s[j + 1] == '/') {
j += 2;
closed = true;
break;
}
++j;
}
if (closed) {
push(i, j, TokenKind::Comment);
i = j;
continue;
}
// Spill to next lines
push(i, n, TokenKind::Comment);
state.in_block_comment = true;
return state;
}
// Raw string start: very simple detection: R"delim(
if (c == 'R' && i + 1 < n && s[i + 1] == '"') {
int k = i + 2;
std::string delim;
while (k < n && s[k] != '(') {
delim.push_back(s[k]);
++k;
}
if (k < n && s[k] == '(') {
int body_start = k + 1;
std::string needle = ")" + delim + "\"";
auto pos = s.find(needle, static_cast<std::size_t>(body_start));
if (pos == std::string::npos) {
push(i, n, TokenKind::String);
state.in_raw_string = true;
state.raw_delim = delim;
return state;
} else {
int end = static_cast<int>(pos + needle.size());
push(i, end, TokenKind::String);
i = end;
continue;
}
}
// If malformed, just treat 'R' as identifier fallback
}
// Regular string literal
if (c == '"') {
int j = i + 1;
bool esc = false;
while (j < n) {
char d = s[j++];
if (esc) {
esc = false;
continue;
}
if (d == '\\') {
esc = true;
continue;
}
if (d == '"')
break;
}
push(i, j, TokenKind::String);
i = j;
continue;
}
// Char literal
if (c == '\'') {
int j = i + 1;
bool esc = false;
while (j < n) {
char d = s[j++];
if (esc) {
esc = false;
continue;
}
if (d == '\\') {
esc = true;
continue;
}
if (d == '\'')
break;
}
push(i, j, TokenKind::Char);
i = j;
continue;
}
// Number literal (simple)
if (is_digit(c) || (c == '.' && i + 1 < n && is_digit(s[i + 1]))) {
int j = i + 1;
while (j < n && (std::isalnum(static_cast<unsigned char>(s[j])) || s[j] == '.' || s[j] == 'x' ||
s[j] == 'X' || s[j] == 'b' || s[j] == 'B' || s[j] == '_'))
++j;
push(i, j, TokenKind::Number);
i = j;
continue;
}
// Identifier / keyword / type
if (is_ident_start(c)) {
int j = i + 1;
while (j < n && is_ident_char(s[j]))
++j;
std::string id = s.substr(i, j - i);
TokenKind k = TokenKind::Identifier;
if (keywords_.count(id))
k = TokenKind::Keyword;
else if (types_.count(id))
k = TokenKind::Type;
push(i, j, k);
i = j;
continue;
}
// Operators and punctuation (single char for now)
TokenKind kind = TokenKind::Operator;
if (std::ispunct(static_cast<unsigned char>(c)) && c != '_' && c != '#') {
if (c == ';' || c == ',' || c == '(' || c == ')' || c == '{' || c == '}' || c == '[' || c ==
']')
kind = TokenKind::Punctuation;
push(i, i + 1, kind);
++i;
continue;
}
// Fallback
push(i, i + 1, TokenKind::Default);
++i;
}
return state;
}
} // namespace kte