Add SQL, Erlang, and Forth highlighter implementations and tests for LSP process and transport handling.
- Added highlighters for new languages (SQL, Erlang, Forth) with filetype recognition. - Updated and reorganized syntax files to maintain consistency and modularity. - Introduced LSP transport framing unit tests and JSON decoding/dispatch tests. - Refactored `LspManager`, integrating UTF-16/UTF-8 position conversions and robust diagnostics handling. - Enhanced server start/restart logic with workspace root detection and logging to improve LSP usability.
This commit is contained in:
279
syntax/CppHighlighter.cc
Normal file
279
syntax/CppHighlighter.cc
Normal file
@@ -0,0 +1,279 @@
|
||||
#include "CppHighlighter.h"
|
||||
#include "../Buffer.h"
|
||||
#include <cctype>
|
||||
|
||||
namespace kte {
|
||||
static bool
|
||||
is_digit(char c)
|
||||
{
|
||||
return c >= '0' && c <= '9';
|
||||
}
|
||||
|
||||
|
||||
CppHighlighter::CppHighlighter()
|
||||
{
|
||||
const char *kw[] = {
|
||||
"if", "else", "for", "while", "do", "switch", "case", "default", "break", "continue",
|
||||
"return", "goto", "struct", "class", "namespace", "using", "template", "typename",
|
||||
"public", "private", "protected", "virtual", "override", "const", "constexpr", "auto",
|
||||
"static", "inline", "operator", "new", "delete", "try", "catch", "throw", "friend",
|
||||
"enum", "union", "extern", "volatile", "mutable", "noexcept", "sizeof", "this"
|
||||
};
|
||||
for (auto s: kw)
|
||||
keywords_.insert(s);
|
||||
const char *types[] = {
|
||||
"int", "long", "short", "char", "signed", "unsigned", "float", "double", "void",
|
||||
"bool", "wchar_t", "size_t", "ptrdiff_t", "uint8_t", "uint16_t", "uint32_t", "uint64_t",
|
||||
"int8_t", "int16_t", "int32_t", "int64_t"
|
||||
};
|
||||
for (auto s: types)
|
||||
types_.insert(s);
|
||||
}
|
||||
|
||||
|
||||
bool
|
||||
CppHighlighter::is_ident_start(char c)
|
||||
{
|
||||
return std::isalpha(static_cast<unsigned char>(c)) || c == '_';
|
||||
}
|
||||
|
||||
|
||||
bool
|
||||
CppHighlighter::is_ident_char(char c)
|
||||
{
|
||||
return std::isalnum(static_cast<unsigned char>(c)) || c == '_';
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
CppHighlighter::HighlightLine(const Buffer &buf, int row, std::vector<HighlightSpan> &out) const
|
||||
{
|
||||
// Stateless entry simply delegates to stateful with a clean previous state
|
||||
StatefulHighlighter::LineState prev;
|
||||
(void) HighlightLineStateful(buf, row, prev, out);
|
||||
}
|
||||
|
||||
|
||||
StatefulHighlighter::LineState
|
||||
CppHighlighter::HighlightLineStateful(const Buffer &buf,
|
||||
int row,
|
||||
const LineState &prev,
|
||||
std::vector<HighlightSpan> &out) const
|
||||
{
|
||||
const auto &rows = buf.Rows();
|
||||
StatefulHighlighter::LineState state = prev;
|
||||
if (row < 0 || static_cast<std::size_t>(row) >= rows.size())
|
||||
return state;
|
||||
std::string s = static_cast<std::string>(rows[static_cast<std::size_t>(row)]);
|
||||
if (s.empty())
|
||||
return state;
|
||||
|
||||
auto push = [&](int a, int b, TokenKind k) {
|
||||
if (b > a)
|
||||
out.push_back({a, b, k});
|
||||
};
|
||||
int n = static_cast<int>(s.size());
|
||||
int bol = 0;
|
||||
while (bol < n && (s[bol] == ' ' || s[bol] == '\t'))
|
||||
++bol;
|
||||
int i = 0;
|
||||
|
||||
// Continue multi-line raw string from previous line
|
||||
if (state.in_raw_string) {
|
||||
std::string needle = ")" + state.raw_delim + "\"";
|
||||
auto pos = s.find(needle);
|
||||
if (pos == std::string::npos) {
|
||||
push(0, n, TokenKind::String);
|
||||
state.in_raw_string = true;
|
||||
return state;
|
||||
} else {
|
||||
int end = static_cast<int>(pos + needle.size());
|
||||
push(0, end, TokenKind::String);
|
||||
i = end;
|
||||
state.in_raw_string = false;
|
||||
state.raw_delim.clear();
|
||||
}
|
||||
}
|
||||
|
||||
// Continue multi-line block comment from previous line
|
||||
if (state.in_block_comment) {
|
||||
int j = i;
|
||||
while (i + 1 < n) {
|
||||
if (s[i] == '*' && s[i + 1] == '/') {
|
||||
i += 2;
|
||||
push(j, i, TokenKind::Comment);
|
||||
state.in_block_comment = false;
|
||||
break;
|
||||
}
|
||||
++i;
|
||||
}
|
||||
if (state.in_block_comment) {
|
||||
push(j, n, TokenKind::Comment);
|
||||
return state;
|
||||
}
|
||||
}
|
||||
|
||||
while (i < n) {
|
||||
char c = s[i];
|
||||
// Preprocessor at beginning of line (after leading whitespace)
|
||||
if (i == bol && c == '#') {
|
||||
push(0, n, TokenKind::Preproc);
|
||||
break;
|
||||
}
|
||||
|
||||
// Whitespace
|
||||
if (c == ' ' || c == '\t') {
|
||||
int j = i + 1;
|
||||
while (j < n && (s[j] == ' ' || s[j] == '\t'))
|
||||
++j;
|
||||
push(i, j, TokenKind::Whitespace);
|
||||
i = j;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Line comment
|
||||
if (c == '/' && i + 1 < n && s[i + 1] == '/') {
|
||||
push(i, n, TokenKind::Comment);
|
||||
break;
|
||||
}
|
||||
|
||||
// Block comment
|
||||
if (c == '/' && i + 1 < n && s[i + 1] == '*') {
|
||||
int j = i + 2;
|
||||
bool closed = false;
|
||||
while (j + 1 <= n) {
|
||||
if (j + 1 < n && s[j] == '*' && s[j + 1] == '/') {
|
||||
j += 2;
|
||||
closed = true;
|
||||
break;
|
||||
}
|
||||
++j;
|
||||
}
|
||||
if (closed) {
|
||||
push(i, j, TokenKind::Comment);
|
||||
i = j;
|
||||
continue;
|
||||
}
|
||||
// Spill to next lines
|
||||
push(i, n, TokenKind::Comment);
|
||||
state.in_block_comment = true;
|
||||
return state;
|
||||
}
|
||||
|
||||
// Raw string start: very simple detection: R"delim(
|
||||
if (c == 'R' && i + 1 < n && s[i + 1] == '"') {
|
||||
int k = i + 2;
|
||||
std::string delim;
|
||||
while (k < n && s[k] != '(') {
|
||||
delim.push_back(s[k]);
|
||||
++k;
|
||||
}
|
||||
if (k < n && s[k] == '(') {
|
||||
int body_start = k + 1;
|
||||
std::string needle = ")" + delim + "\"";
|
||||
auto pos = s.find(needle, static_cast<std::size_t>(body_start));
|
||||
if (pos == std::string::npos) {
|
||||
push(i, n, TokenKind::String);
|
||||
state.in_raw_string = true;
|
||||
state.raw_delim = delim;
|
||||
return state;
|
||||
} else {
|
||||
int end = static_cast<int>(pos + needle.size());
|
||||
push(i, end, TokenKind::String);
|
||||
i = end;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
// If malformed, just treat 'R' as identifier fallback
|
||||
}
|
||||
|
||||
// Regular string literal
|
||||
if (c == '"') {
|
||||
int j = i + 1;
|
||||
bool esc = false;
|
||||
while (j < n) {
|
||||
char d = s[j++];
|
||||
if (esc) {
|
||||
esc = false;
|
||||
continue;
|
||||
}
|
||||
if (d == '\\') {
|
||||
esc = true;
|
||||
continue;
|
||||
}
|
||||
if (d == '"')
|
||||
break;
|
||||
}
|
||||
push(i, j, TokenKind::String);
|
||||
i = j;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Char literal
|
||||
if (c == '\'') {
|
||||
int j = i + 1;
|
||||
bool esc = false;
|
||||
while (j < n) {
|
||||
char d = s[j++];
|
||||
if (esc) {
|
||||
esc = false;
|
||||
continue;
|
||||
}
|
||||
if (d == '\\') {
|
||||
esc = true;
|
||||
continue;
|
||||
}
|
||||
if (d == '\'')
|
||||
break;
|
||||
}
|
||||
push(i, j, TokenKind::Char);
|
||||
i = j;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Number literal (simple)
|
||||
if (is_digit(c) || (c == '.' && i + 1 < n && is_digit(s[i + 1]))) {
|
||||
int j = i + 1;
|
||||
while (j < n && (std::isalnum(static_cast<unsigned char>(s[j])) || s[j] == '.' || s[j] == 'x' ||
|
||||
s[j] == 'X' || s[j] == 'b' || s[j] == 'B' || s[j] == '_'))
|
||||
++j;
|
||||
push(i, j, TokenKind::Number);
|
||||
i = j;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Identifier / keyword / type
|
||||
if (is_ident_start(c)) {
|
||||
int j = i + 1;
|
||||
while (j < n && is_ident_char(s[j]))
|
||||
++j;
|
||||
std::string id = s.substr(i, j - i);
|
||||
TokenKind k = TokenKind::Identifier;
|
||||
if (keywords_.count(id))
|
||||
k = TokenKind::Keyword;
|
||||
else if (types_.count(id))
|
||||
k = TokenKind::Type;
|
||||
push(i, j, k);
|
||||
i = j;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Operators and punctuation (single char for now)
|
||||
TokenKind kind = TokenKind::Operator;
|
||||
if (std::ispunct(static_cast<unsigned char>(c)) && c != '_' && c != '#') {
|
||||
if (c == ';' || c == ',' || c == '(' || c == ')' || c == '{' || c == '}' || c == '[' || c ==
|
||||
']')
|
||||
kind = TokenKind::Punctuation;
|
||||
push(i, i + 1, kind);
|
||||
++i;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Fallback
|
||||
push(i, i + 1, TokenKind::Default);
|
||||
++i;
|
||||
}
|
||||
|
||||
return state;
|
||||
}
|
||||
} // namespace kte
|
||||
35
syntax/CppHighlighter.h
Normal file
35
syntax/CppHighlighter.h
Normal file
@@ -0,0 +1,35 @@
|
||||
// CppHighlighter.h - minimal stateless C/C++ line highlighter
|
||||
#pragma once
|
||||
|
||||
#include <regex>
|
||||
#include <string>
|
||||
#include <unordered_set>
|
||||
#include <vector>
|
||||
|
||||
#include "LanguageHighlighter.h"
|
||||
|
||||
class Buffer;
|
||||
|
||||
namespace kte {
|
||||
class CppHighlighter final : public StatefulHighlighter {
|
||||
public:
|
||||
CppHighlighter();
|
||||
|
||||
~CppHighlighter() override = default;
|
||||
|
||||
void HighlightLine(const Buffer &buf, int row, std::vector<HighlightSpan> &out) const override;
|
||||
|
||||
LineState HighlightLineStateful(const Buffer &buf,
|
||||
int row,
|
||||
const LineState &prev,
|
||||
std::vector<HighlightSpan> &out) const override;
|
||||
|
||||
private:
|
||||
std::unordered_set<std::string> keywords_;
|
||||
std::unordered_set<std::string> types_;
|
||||
|
||||
static bool is_ident_start(char c);
|
||||
|
||||
static bool is_ident_char(char c);
|
||||
};
|
||||
} // namespace kte
|
||||
159
syntax/ErlangHighlighter.cc
Normal file
159
syntax/ErlangHighlighter.cc
Normal file
@@ -0,0 +1,159 @@
|
||||
#include "ErlangHighlighter.h"
|
||||
#include "../Buffer.h"
|
||||
#include <cctype>
|
||||
|
||||
namespace kte {
|
||||
static void
|
||||
push(std::vector<HighlightSpan> &out, int a, int b, TokenKind k)
|
||||
{
|
||||
if (b > a)
|
||||
out.push_back({a, b, k});
|
||||
}
|
||||
|
||||
|
||||
static bool
|
||||
is_ident_start(char c)
|
||||
{
|
||||
return std::isalpha(static_cast<unsigned char>(c)) || c == '_' || c == '\'';
|
||||
}
|
||||
|
||||
|
||||
static bool
|
||||
is_ident_char(char c)
|
||||
{
|
||||
return std::isalnum(static_cast<unsigned char>(c)) || c == '_' || c == '@' || c == ':' || c == '?';
|
||||
}
|
||||
|
||||
|
||||
ErlangHighlighter::ErlangHighlighter()
|
||||
{
|
||||
const char *kw[] = {
|
||||
"after", "begin", "case", "catch", "cond", "div", "end", "fun", "if", "let", "of",
|
||||
"receive", "when", "try", "rem", "and", "andalso", "orelse", "not", "band", "bor", "bxor",
|
||||
"bnot", "xor", "module", "export", "import", "record", "define", "undef", "include", "include_lib"
|
||||
};
|
||||
for (auto s: kw)
|
||||
kws_.insert(s);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
ErlangHighlighter::HighlightLine(const Buffer &buf, int row, std::vector<HighlightSpan> &out) const
|
||||
{
|
||||
const auto &rows = buf.Rows();
|
||||
if (row < 0 || static_cast<std::size_t>(row) >= rows.size())
|
||||
return;
|
||||
std::string s = static_cast<std::string>(rows[static_cast<std::size_t>(row)]);
|
||||
int n = static_cast<int>(s.size());
|
||||
int i = 0;
|
||||
|
||||
while (i < n) {
|
||||
char c = s[i];
|
||||
if (c == ' ' || c == '\t') {
|
||||
int j = i + 1;
|
||||
while (j < n && (s[j] == ' ' || s[j] == '\t'))
|
||||
++j;
|
||||
push(out, i, j, TokenKind::Whitespace);
|
||||
i = j;
|
||||
continue;
|
||||
}
|
||||
// comment
|
||||
if (c == '%') {
|
||||
push(out, i, n, TokenKind::Comment);
|
||||
break;
|
||||
}
|
||||
// strings
|
||||
if (c == '"') {
|
||||
int j = i + 1;
|
||||
bool esc = false;
|
||||
while (j < n) {
|
||||
char d = s[j++];
|
||||
if (esc) {
|
||||
esc = false;
|
||||
continue;
|
||||
}
|
||||
if (d == '\\') {
|
||||
esc = true;
|
||||
continue;
|
||||
}
|
||||
if (d == '"')
|
||||
break;
|
||||
}
|
||||
push(out, i, j, TokenKind::String);
|
||||
i = j;
|
||||
continue;
|
||||
}
|
||||
// char literal $X
|
||||
if (c == '$') {
|
||||
int j = i + 1;
|
||||
if (j < n && s[j] == '\\' && j + 1 < n)
|
||||
j += 2;
|
||||
else if (j < n)
|
||||
++j;
|
||||
push(out, i, j, TokenKind::Char);
|
||||
i = j;
|
||||
continue;
|
||||
}
|
||||
// numbers
|
||||
if (std::isdigit(static_cast<unsigned char>(c))) {
|
||||
int j = i + 1;
|
||||
while (j < n && (std::isalnum(static_cast<unsigned char>(s[j])) || s[j] == '#' || s[j] == '.' ||
|
||||
s[j] == '_'))
|
||||
++j;
|
||||
push(out, i, j, TokenKind::Number);
|
||||
i = j;
|
||||
continue;
|
||||
}
|
||||
// atoms/variables/identifiers (including quoted atoms)
|
||||
if (is_ident_start(c)) {
|
||||
// quoted atom: '...'
|
||||
if (c == '\'') {
|
||||
int j = i + 1;
|
||||
bool esc = false;
|
||||
while (j < n) {
|
||||
char d = s[j++];
|
||||
if (d == '\'') {
|
||||
if (j < n && s[j] == '\'') {
|
||||
++j;
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (d == '\\')
|
||||
esc = !esc;
|
||||
}
|
||||
push(out, i, j, TokenKind::Identifier);
|
||||
i = j;
|
||||
continue;
|
||||
}
|
||||
int j = i + 1;
|
||||
while (j < n && is_ident_char(s[j]))
|
||||
++j;
|
||||
std::string id = s.substr(i, j - i);
|
||||
// lowercase leading -> atom/function/module; uppercase or '_' -> variable
|
||||
TokenKind k = TokenKind::Identifier;
|
||||
// keyword check (lowercase)
|
||||
std::string lower;
|
||||
lower.reserve(id.size());
|
||||
for (char ch: id)
|
||||
lower.push_back(static_cast<char>(std::tolower(static_cast<unsigned char>(ch))));
|
||||
if (kws_.count(lower))
|
||||
k = TokenKind::Keyword;
|
||||
push(out, i, j, k);
|
||||
i = j;
|
||||
continue;
|
||||
}
|
||||
if (std::ispunct(static_cast<unsigned char>(c))) {
|
||||
TokenKind k = TokenKind::Operator;
|
||||
if (c == ',' || c == ';' || c == '(' || c == ')' || c == '[' || c == ']' || c == '{' || c ==
|
||||
'}')
|
||||
k = TokenKind::Punctuation;
|
||||
push(out, i, i + 1, k);
|
||||
++i;
|
||||
continue;
|
||||
}
|
||||
push(out, i, i + 1, TokenKind::Default);
|
||||
++i;
|
||||
}
|
||||
}
|
||||
} // namespace kte
|
||||
17
syntax/ErlangHighlighter.h
Normal file
17
syntax/ErlangHighlighter.h
Normal file
@@ -0,0 +1,17 @@
|
||||
// ErlangHighlighter.h - simple Erlang highlighter
|
||||
#pragma once
|
||||
|
||||
#include "LanguageHighlighter.h"
|
||||
#include <unordered_set>
|
||||
|
||||
namespace kte {
|
||||
class ErlangHighlighter final : public LanguageHighlighter {
|
||||
public:
|
||||
ErlangHighlighter();
|
||||
|
||||
void HighlightLine(const Buffer &buf, int row, std::vector<HighlightSpan> &out) const override;
|
||||
|
||||
private:
|
||||
std::unordered_set<std::string> kws_;
|
||||
};
|
||||
} // namespace kte
|
||||
121
syntax/ForthHighlighter.cc
Normal file
121
syntax/ForthHighlighter.cc
Normal file
@@ -0,0 +1,121 @@
|
||||
#include "ForthHighlighter.h"
|
||||
#include "../Buffer.h"
|
||||
#include <cctype>
|
||||
|
||||
namespace kte {
|
||||
static void
|
||||
push(std::vector<HighlightSpan> &out, int a, int b, TokenKind k)
|
||||
{
|
||||
if (b > a)
|
||||
out.push_back({a, b, k});
|
||||
}
|
||||
|
||||
|
||||
static bool
|
||||
is_word_char(char c)
|
||||
{
|
||||
return std::isalnum(static_cast<unsigned char>(c)) || c == '_' || c == '>' || c == '<' || c == '?';
|
||||
}
|
||||
|
||||
|
||||
ForthHighlighter::ForthHighlighter()
|
||||
{
|
||||
const char *kw[] = {
|
||||
":", ";", "if", "else", "then", "begin", "until", "while", "repeat",
|
||||
"do", "loop", "+loop", "leave", "again", "case", "of", "endof", "endcase",
|
||||
".", ".r", ".s", ".\"", ",", "cr", "emit", "type", "key",
|
||||
"+", "-", "*", "/", "mod", "/mod", "+-", "abs", "min", "max",
|
||||
"dup", "drop", "swap", "over", "rot", "-rot", "nip", "tuck", "pick", "roll",
|
||||
"and", "or", "xor", "invert", "lshift", "rshift",
|
||||
"variable", "constant", "value", "to", "create", "does>", "allot", ",",
|
||||
"cells", "cell+", "chars", "char+",
|
||||
"[", "]", "immediate",
|
||||
"s\"", ".\""
|
||||
};
|
||||
for (auto s: kw)
|
||||
kws_.insert(s);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
ForthHighlighter::HighlightLine(const Buffer &buf, int row, std::vector<HighlightSpan> &out) const
|
||||
{
|
||||
const auto &rows = buf.Rows();
|
||||
if (row < 0 || static_cast<std::size_t>(row) >= rows.size())
|
||||
return;
|
||||
std::string s = static_cast<std::string>(rows[static_cast<std::size_t>(row)]);
|
||||
int n = static_cast<int>(s.size());
|
||||
int i = 0;
|
||||
|
||||
while (i < n) {
|
||||
char c = s[i];
|
||||
if (c == ' ' || c == '\t') {
|
||||
int j = i + 1;
|
||||
while (j < n && (s[j] == ' ' || s[j] == '\t'))
|
||||
++j;
|
||||
push(out, i, j, TokenKind::Whitespace);
|
||||
i = j;
|
||||
continue;
|
||||
}
|
||||
// backslash comment to end of line
|
||||
if (c == '\\') {
|
||||
push(out, i, n, TokenKind::Comment);
|
||||
break;
|
||||
}
|
||||
// parenthesis comment ( ... ) if at word boundary
|
||||
if (c == '(') {
|
||||
int j = i + 1;
|
||||
while (j < n && s[j] != ')')
|
||||
++j;
|
||||
if (j < n)
|
||||
++j;
|
||||
push(out, i, j, TokenKind::Comment);
|
||||
i = j;
|
||||
continue;
|
||||
}
|
||||
// strings: ." ... " and S" ... " and raw "..."
|
||||
if (c == '"') {
|
||||
int j = i + 1;
|
||||
while (j < n && s[j] != '"')
|
||||
++j;
|
||||
if (j < n)
|
||||
++j;
|
||||
push(out, i, j, TokenKind::String);
|
||||
i = j;
|
||||
continue;
|
||||
}
|
||||
if (std::isdigit(static_cast<unsigned char>(c))) {
|
||||
int j = i + 1;
|
||||
while (j < n && (std::isalnum(static_cast<unsigned char>(s[j])) || s[j] == '.' || s[j] == '#'))
|
||||
++j;
|
||||
push(out, i, j, TokenKind::Number);
|
||||
i = j;
|
||||
continue;
|
||||
}
|
||||
// word/identifier
|
||||
if (std::isalpha(static_cast<unsigned char>(c)) || std::ispunct(static_cast<unsigned char>(c))) {
|
||||
int j = i + 1;
|
||||
while (j < n && is_word_char(s[j]))
|
||||
++j;
|
||||
std::string w = s.substr(i, j - i);
|
||||
// normalize to lowercase for keyword compare (Forth is case-insensitive typically)
|
||||
std::string lower;
|
||||
lower.reserve(w.size());
|
||||
for (char ch: w)
|
||||
lower.push_back(static_cast<char>(std::tolower(static_cast<unsigned char>(ch))));
|
||||
TokenKind k = kws_.count(lower) ? TokenKind::Keyword : TokenKind::Identifier;
|
||||
// Single-char punctuation fallback
|
||||
if (w.size() == 1 && std::ispunct(static_cast<unsigned char>(w[0])) && !kws_.count(lower)) {
|
||||
k = (w[0] == '(' || w[0] == ')' || w[0] == ',')
|
||||
? TokenKind::Punctuation
|
||||
: TokenKind::Operator;
|
||||
}
|
||||
push(out, i, j, k);
|
||||
i = j;
|
||||
continue;
|
||||
}
|
||||
push(out, i, i + 1, TokenKind::Default);
|
||||
++i;
|
||||
}
|
||||
}
|
||||
} // namespace kte
|
||||
17
syntax/ForthHighlighter.h
Normal file
17
syntax/ForthHighlighter.h
Normal file
@@ -0,0 +1,17 @@
|
||||
// ForthHighlighter.h - simple Forth highlighter
|
||||
#pragma once
|
||||
|
||||
#include "LanguageHighlighter.h"
|
||||
#include <unordered_set>
|
||||
|
||||
namespace kte {
|
||||
class ForthHighlighter final : public LanguageHighlighter {
|
||||
public:
|
||||
ForthHighlighter();
|
||||
|
||||
void HighlightLine(const Buffer &buf, int row, std::vector<HighlightSpan> &out) const override;
|
||||
|
||||
private:
|
||||
std::unordered_set<std::string> kws_;
|
||||
};
|
||||
} // namespace kte
|
||||
157
syntax/GoHighlighter.cc
Normal file
157
syntax/GoHighlighter.cc
Normal file
@@ -0,0 +1,157 @@
|
||||
#include "GoHighlighter.h"
|
||||
#include "../Buffer.h"
|
||||
#include <cctype>
|
||||
|
||||
namespace kte {
|
||||
static void
|
||||
push(std::vector<HighlightSpan> &out, int a, int b, TokenKind k)
|
||||
{
|
||||
if (b > a)
|
||||
out.push_back({a, b, k});
|
||||
}
|
||||
|
||||
|
||||
static bool
|
||||
is_ident_start(char c)
|
||||
{
|
||||
return std::isalpha(static_cast<unsigned char>(c)) || c == '_';
|
||||
}
|
||||
|
||||
|
||||
static bool
|
||||
is_ident_char(char c)
|
||||
{
|
||||
return std::isalnum(static_cast<unsigned char>(c)) || c == '_';
|
||||
}
|
||||
|
||||
|
||||
GoHighlighter::GoHighlighter()
|
||||
{
|
||||
const char *kw[] = {
|
||||
"break", "case", "chan", "const", "continue", "default", "defer", "else", "fallthrough", "for", "func",
|
||||
"go", "goto", "if", "import", "interface", "map", "package", "range", "return", "select", "struct",
|
||||
"switch", "type", "var"
|
||||
};
|
||||
for (auto s: kw)
|
||||
kws_.insert(s);
|
||||
const char *tp[] = {
|
||||
"bool", "byte", "complex64", "complex128", "error", "float32", "float64", "int", "int8", "int16",
|
||||
"int32", "int64", "rune", "string", "uint", "uint8", "uint16", "uint32", "uint64", "uintptr"
|
||||
};
|
||||
for (auto s: tp)
|
||||
types_.insert(s);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
GoHighlighter::HighlightLine(const Buffer &buf, int row, std::vector<HighlightSpan> &out) const
|
||||
{
|
||||
const auto &rows = buf.Rows();
|
||||
if (row < 0 || static_cast<std::size_t>(row) >= rows.size())
|
||||
return;
|
||||
std::string s = static_cast<std::string>(rows[static_cast<std::size_t>(row)]);
|
||||
int n = static_cast<int>(s.size());
|
||||
int i = 0;
|
||||
int bol = 0;
|
||||
while (bol < n && (s[bol] == ' ' || s[bol] == '\t'))
|
||||
++bol;
|
||||
// line comment
|
||||
while (i < n) {
|
||||
char c = s[i];
|
||||
if (c == ' ' || c == '\t') {
|
||||
int j = i + 1;
|
||||
while (j < n && (s[j] == ' ' || s[j] == '\t'))
|
||||
++j;
|
||||
push(out, i, j, TokenKind::Whitespace);
|
||||
i = j;
|
||||
continue;
|
||||
}
|
||||
if (c == '/' && i + 1 < n && s[i + 1] == '/') {
|
||||
push(out, i, n, TokenKind::Comment);
|
||||
break;
|
||||
}
|
||||
if (c == '/' && i + 1 < n && s[i + 1] == '*') {
|
||||
int j = i + 2;
|
||||
bool closed = false;
|
||||
while (j + 1 <= n) {
|
||||
if (j + 1 < n && s[j] == '*' && s[j + 1] == '/') {
|
||||
j += 2;
|
||||
closed = true;
|
||||
break;
|
||||
}
|
||||
++j;
|
||||
}
|
||||
if (!closed) {
|
||||
push(out, i, n, TokenKind::Comment);
|
||||
break;
|
||||
} else {
|
||||
push(out, i, j, TokenKind::Comment);
|
||||
i = j;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (c == '"' || c == '`') {
|
||||
char q = c;
|
||||
int j = i + 1;
|
||||
bool esc = false;
|
||||
if (q == '`') {
|
||||
while (j < n && s[j] != '`')
|
||||
++j;
|
||||
if (j < n)
|
||||
++j;
|
||||
} else {
|
||||
while (j < n) {
|
||||
char d = s[j++];
|
||||
if (esc) {
|
||||
esc = false;
|
||||
continue;
|
||||
}
|
||||
if (d == '\\') {
|
||||
esc = true;
|
||||
continue;
|
||||
}
|
||||
if (d == '"')
|
||||
break;
|
||||
}
|
||||
}
|
||||
push(out, i, j, TokenKind::String);
|
||||
i = j;
|
||||
continue;
|
||||
}
|
||||
if (std::isdigit(static_cast<unsigned char>(c))) {
|
||||
int j = i + 1;
|
||||
while (j < n && (std::isalnum(static_cast<unsigned char>(s[j])) || s[j] == '.' || s[j] == 'x' ||
|
||||
s[j] == 'X' || s[j] == '_'))
|
||||
++j;
|
||||
push(out, i, j, TokenKind::Number);
|
||||
i = j;
|
||||
continue;
|
||||
}
|
||||
if (is_ident_start(c)) {
|
||||
int j = i + 1;
|
||||
while (j < n && is_ident_char(s[j]))
|
||||
++j;
|
||||
std::string id = s.substr(i, j - i);
|
||||
TokenKind k = TokenKind::Identifier;
|
||||
if (kws_.count(id))
|
||||
k = TokenKind::Keyword;
|
||||
else if (types_.count(id))
|
||||
k = TokenKind::Type;
|
||||
push(out, i, j, k);
|
||||
i = j;
|
||||
continue;
|
||||
}
|
||||
if (std::ispunct(static_cast<unsigned char>(c))) {
|
||||
TokenKind k = TokenKind::Operator;
|
||||
if (c == ';' || c == ',' || c == '(' || c == ')' || c == '{' || c == '}' || c == '[' || c ==
|
||||
']')
|
||||
k = TokenKind::Punctuation;
|
||||
push(out, i, i + 1, k);
|
||||
++i;
|
||||
continue;
|
||||
}
|
||||
push(out, i, i + 1, TokenKind::Default);
|
||||
++i;
|
||||
}
|
||||
}
|
||||
} // namespace kte
|
||||
18
syntax/GoHighlighter.h
Normal file
18
syntax/GoHighlighter.h
Normal file
@@ -0,0 +1,18 @@
|
||||
// GoHighlighter.h - simple Go highlighter
|
||||
#pragma once
|
||||
|
||||
#include "LanguageHighlighter.h"
|
||||
#include <unordered_set>
|
||||
|
||||
namespace kte {
|
||||
class GoHighlighter final : public LanguageHighlighter {
|
||||
public:
|
||||
GoHighlighter();
|
||||
|
||||
void HighlightLine(const Buffer &buf, int row, std::vector<HighlightSpan> &out) const override;
|
||||
|
||||
private:
|
||||
std::unordered_set<std::string> kws_;
|
||||
std::unordered_set<std::string> types_;
|
||||
};
|
||||
} // namespace kte
|
||||
209
syntax/HighlighterEngine.cc
Normal file
209
syntax/HighlighterEngine.cc
Normal file
@@ -0,0 +1,209 @@
|
||||
#include "HighlighterEngine.h"
|
||||
#include "../Buffer.h"
|
||||
#include "LanguageHighlighter.h"
|
||||
#include <thread>
|
||||
|
||||
namespace kte {
|
||||
HighlighterEngine::HighlighterEngine() = default;
|
||||
|
||||
|
||||
HighlighterEngine::~HighlighterEngine()
|
||||
{
|
||||
// stop background worker
|
||||
if (worker_running_.load()) {
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(mtx_);
|
||||
worker_running_.store(false);
|
||||
has_request_ = true; // wake it up to exit
|
||||
}
|
||||
cv_.notify_one();
|
||||
if (worker_.joinable())
|
||||
worker_.join();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
HighlighterEngine::SetHighlighter(std::unique_ptr<LanguageHighlighter> hl)
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(mtx_);
|
||||
hl_ = std::move(hl);
|
||||
cache_.clear();
|
||||
state_cache_.clear();
|
||||
state_last_contig_.clear();
|
||||
}
|
||||
|
||||
|
||||
const LineHighlight &
|
||||
HighlighterEngine::GetLine(const Buffer &buf, int row, std::uint64_t buf_version) const
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(mtx_);
|
||||
auto it = cache_.find(row);
|
||||
if (it != cache_.end() && it->second.version == buf_version) {
|
||||
return it->second;
|
||||
}
|
||||
|
||||
// Prepare destination slot to reuse its capacity and avoid allocations
|
||||
LineHighlight &slot = cache_[row];
|
||||
slot.version = buf_version;
|
||||
slot.spans.clear();
|
||||
|
||||
if (!hl_) {
|
||||
return slot;
|
||||
}
|
||||
|
||||
// Copy shared_ptr-like raw pointer for use outside critical sections
|
||||
LanguageHighlighter *hl_ptr = hl_.get();
|
||||
bool is_stateful = dynamic_cast<StatefulHighlighter *>(hl_ptr) != nullptr;
|
||||
|
||||
if (!is_stateful) {
|
||||
// Stateless fast path: we can release the lock while computing to reduce contention
|
||||
auto &out = slot.spans;
|
||||
lock.unlock();
|
||||
hl_ptr->HighlightLine(buf, row, out);
|
||||
return cache_.at(row);
|
||||
}
|
||||
|
||||
// Stateful path: we need to walk from a known previous state. Keep lock while consulting caches,
|
||||
// but release during heavy computation.
|
||||
auto *stateful = static_cast<StatefulHighlighter *>(hl_ptr);
|
||||
|
||||
StatefulHighlighter::LineState prev_state;
|
||||
int start_row = -1;
|
||||
if (!state_cache_.empty()) {
|
||||
// linear search over map (unordered), track best candidate
|
||||
int best = -1;
|
||||
for (const auto &kv: state_cache_) {
|
||||
int r = kv.first;
|
||||
if (r <= row - 1 && kv.second.version == buf_version) {
|
||||
if (r > best)
|
||||
best = r;
|
||||
}
|
||||
}
|
||||
if (best >= 0) {
|
||||
start_row = best;
|
||||
prev_state = state_cache_.at(best).state;
|
||||
}
|
||||
}
|
||||
|
||||
// We'll compute states and the target line's spans without holding the lock for most of the work.
|
||||
// Create a local copy of prev_state and iterate rows; we will update caches under lock.
|
||||
lock.unlock();
|
||||
StatefulHighlighter::LineState cur_state = prev_state;
|
||||
for (int r = start_row + 1; r <= row; ++r) {
|
||||
std::vector<HighlightSpan> tmp;
|
||||
std::vector<HighlightSpan> &out = (r == row) ? slot.spans : tmp;
|
||||
auto next_state = stateful->HighlightLineStateful(buf, r, cur_state, out);
|
||||
// Update state cache for r
|
||||
std::lock_guard<std::mutex> gl(mtx_);
|
||||
StateEntry se;
|
||||
se.version = buf_version;
|
||||
se.state = next_state;
|
||||
state_cache_[r] = se;
|
||||
cur_state = next_state;
|
||||
}
|
||||
|
||||
// Return reference under lock to ensure slot's address stability in map
|
||||
lock.lock();
|
||||
return cache_.at(row);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
HighlighterEngine::InvalidateFrom(int row)
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(mtx_);
|
||||
if (cache_.empty())
|
||||
return;
|
||||
// Simple implementation: erase all rows >= row
|
||||
for (auto it = cache_.begin(); it != cache_.end();) {
|
||||
if (it->first >= row)
|
||||
it = cache_.erase(it);
|
||||
else
|
||||
++it;
|
||||
}
|
||||
if (!state_cache_.empty()) {
|
||||
for (auto it = state_cache_.begin(); it != state_cache_.end();) {
|
||||
if (it->first >= row)
|
||||
it = state_cache_.erase(it);
|
||||
else
|
||||
++it;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
HighlighterEngine::ensure_worker_started() const
|
||||
{
|
||||
if (worker_running_.load())
|
||||
return;
|
||||
worker_running_.store(true);
|
||||
worker_ = std::thread([this]() {
|
||||
this->worker_loop();
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
HighlighterEngine::worker_loop() const
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(mtx_);
|
||||
while (worker_running_.load()) {
|
||||
cv_.wait(lock, [this]() {
|
||||
return has_request_ || !worker_running_.load();
|
||||
});
|
||||
if (!worker_running_.load())
|
||||
break;
|
||||
WarmRequest req = pending_;
|
||||
has_request_ = false;
|
||||
// Copy locals then release lock while computing
|
||||
lock.unlock();
|
||||
if (req.buf) {
|
||||
int start = std::max(0, req.start_row);
|
||||
int end = std::max(start, req.end_row);
|
||||
for (int r = start; r <= end; ++r) {
|
||||
// Re-check version staleness quickly by peeking cache version; not strictly necessary
|
||||
// Compute line; GetLine is thread-safe
|
||||
(void) this->GetLine(*req.buf, r, req.version);
|
||||
}
|
||||
}
|
||||
lock.lock();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
HighlighterEngine::PrefetchViewport(const Buffer &buf, int first_row, int row_count, std::uint64_t buf_version,
|
||||
int warm_margin) const
|
||||
{
|
||||
if (row_count <= 0)
|
||||
return;
|
||||
// Synchronously compute visible rows to ensure cache hits during draw
|
||||
int start = std::max(0, first_row);
|
||||
int end = start + row_count - 1;
|
||||
int max_rows = static_cast<int>(buf.Nrows());
|
||||
if (start >= max_rows)
|
||||
return;
|
||||
if (end >= max_rows)
|
||||
end = max_rows - 1;
|
||||
|
||||
for (int r = start; r <= end; ++r) {
|
||||
(void) GetLine(buf, r, buf_version);
|
||||
}
|
||||
|
||||
// Enqueue background warm-around
|
||||
int warm_start = std::max(0, start - warm_margin);
|
||||
int warm_end = std::min(max_rows - 1, end + warm_margin);
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(mtx_);
|
||||
pending_.buf = &buf;
|
||||
pending_.version = buf_version;
|
||||
pending_.start_row = warm_start;
|
||||
pending_.end_row = warm_end;
|
||||
has_request_ = true;
|
||||
}
|
||||
ensure_worker_started();
|
||||
cv_.notify_one();
|
||||
}
|
||||
} // namespace kte
|
||||
85
syntax/HighlighterEngine.h
Normal file
85
syntax/HighlighterEngine.h
Normal file
@@ -0,0 +1,85 @@
|
||||
// HighlighterEngine.h - caching layer for per-line highlights
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
#include <mutex>
|
||||
#include <condition_variable>
|
||||
#include <atomic>
|
||||
#include <thread>
|
||||
|
||||
#include "../Highlight.h"
|
||||
#include "LanguageHighlighter.h"
|
||||
|
||||
class Buffer;
|
||||
|
||||
namespace kte {
|
||||
class HighlighterEngine {
|
||||
public:
|
||||
HighlighterEngine();
|
||||
|
||||
~HighlighterEngine();
|
||||
|
||||
void SetHighlighter(std::unique_ptr<LanguageHighlighter> hl);
|
||||
|
||||
// Retrieve highlights for a given line and buffer version.
|
||||
// If cache is stale, recompute using the current highlighter.
|
||||
const LineHighlight &GetLine(const Buffer &buf, int row, std::uint64_t buf_version) const;
|
||||
|
||||
// Invalidate cached lines from row (inclusive)
|
||||
void InvalidateFrom(int row);
|
||||
|
||||
|
||||
bool HasHighlighter() const
|
||||
{
|
||||
return static_cast<bool>(hl_);
|
||||
}
|
||||
|
||||
|
||||
// Phase 3: viewport-first prefetch and background warming
|
||||
// Compute only the visible range now, and enqueue a background warm-around task.
|
||||
// warm_margin: how many extra lines above/below to warm in the background.
|
||||
void PrefetchViewport(const Buffer &buf, int first_row, int row_count, std::uint64_t buf_version,
|
||||
int warm_margin = 200) const;
|
||||
|
||||
private:
|
||||
std::unique_ptr<LanguageHighlighter> hl_;
|
||||
// Simple cache by row index (mutable to allow caching in const GetLine)
|
||||
mutable std::unordered_map<int, LineHighlight> cache_;
|
||||
|
||||
// For stateful highlighters, remember per-line state (state after finishing that row)
|
||||
struct StateEntry {
|
||||
std::uint64_t version{0};
|
||||
// Using the interface type; forward-declare via header
|
||||
StatefulHighlighter::LineState state;
|
||||
};
|
||||
|
||||
mutable std::unordered_map<int, StateEntry> state_cache_;
|
||||
|
||||
// Track best known contiguous state row for a given version to avoid O(n) scans
|
||||
mutable std::unordered_map<std::uint64_t, int> state_last_contig_;
|
||||
|
||||
// Thread-safety for caches and background worker state
|
||||
mutable std::mutex mtx_;
|
||||
|
||||
// Background warmer
|
||||
struct WarmRequest {
|
||||
const Buffer *buf{nullptr};
|
||||
std::uint64_t version{0};
|
||||
int start_row{0};
|
||||
int end_row{0}; // inclusive
|
||||
};
|
||||
|
||||
mutable std::condition_variable cv_;
|
||||
mutable std::thread worker_;
|
||||
mutable std::atomic<bool> worker_running_{false};
|
||||
mutable bool has_request_{false};
|
||||
mutable WarmRequest pending_{};
|
||||
|
||||
void ensure_worker_started() const;
|
||||
|
||||
void worker_loop() const;
|
||||
};
|
||||
} // namespace kte
|
||||
247
syntax/HighlighterRegistry.cc
Normal file
247
syntax/HighlighterRegistry.cc
Normal file
@@ -0,0 +1,247 @@
|
||||
#include "HighlighterRegistry.h"
|
||||
#include "CppHighlighter.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <filesystem>
|
||||
#include <vector>
|
||||
#include <cctype>
|
||||
|
||||
// Forward declare simple highlighters implemented in this project
|
||||
namespace kte {
|
||||
// Registration storage
|
||||
struct RegEntry {
|
||||
std::string ft; // normalized
|
||||
HighlighterRegistry::Factory factory;
|
||||
};
|
||||
|
||||
|
||||
static std::vector<RegEntry> &
|
||||
registry()
|
||||
{
|
||||
static std::vector<RegEntry> reg;
|
||||
return reg;
|
||||
}
|
||||
|
||||
|
||||
class JSONHighlighter;
|
||||
class MarkdownHighlighter;
|
||||
class ShellHighlighter;
|
||||
class GoHighlighter;
|
||||
class PythonHighlighter;
|
||||
class RustHighlighter;
|
||||
class LispHighlighter;
|
||||
class SqlHighlighter;
|
||||
class ErlangHighlighter;
|
||||
class ForthHighlighter;
|
||||
}
|
||||
|
||||
// Headers for the above
|
||||
#include "JsonHighlighter.h"
|
||||
#include "MarkdownHighlighter.h"
|
||||
#include "ShellHighlighter.h"
|
||||
#include "GoHighlighter.h"
|
||||
#include "PythonHighlighter.h"
|
||||
#include "RustHighlighter.h"
|
||||
#include "LispHighlighter.h"
|
||||
#include "SqlHighlighter.h"
|
||||
#include "ErlangHighlighter.h"
|
||||
#include "ForthHighlighter.h"
|
||||
|
||||
namespace kte {
|
||||
static std::string
|
||||
to_lower(std::string_view s)
|
||||
{
|
||||
std::string r(s);
|
||||
std::transform(r.begin(), r.end(), r.begin(), [](unsigned char c) {
|
||||
return static_cast<char>(std::tolower(c));
|
||||
});
|
||||
return r;
|
||||
}
|
||||
|
||||
|
||||
std::string
|
||||
HighlighterRegistry::Normalize(std::string_view ft)
|
||||
{
|
||||
std::string f = to_lower(ft);
|
||||
if (f == "c" || f == "c++" || f == "cc" || f == "hpp" || f == "hh" || f == "h" || f == "cxx")
|
||||
return "cpp";
|
||||
if (f == "cpp")
|
||||
return "cpp";
|
||||
if (f == "json")
|
||||
return "json";
|
||||
if (f == "markdown" || f == "md" || f == "mkd" || f == "mdown")
|
||||
return "markdown";
|
||||
if (f == "shell" || f == "sh" || f == "bash" || f == "zsh" || f == "ksh" || f == "fish")
|
||||
return "shell";
|
||||
if (f == "go" || f == "golang")
|
||||
return "go";
|
||||
if (f == "py" || f == "python")
|
||||
return "python";
|
||||
if (f == "rs" || f == "rust")
|
||||
return "rust";
|
||||
if (f == "lisp" || f == "scheme" || f == "scm" || f == "rkt" || f == "el" || f == "clj" || f == "cljc" || f ==
|
||||
"cl")
|
||||
return "lisp";
|
||||
if (f == "sql" || f == "sqlite" || f == "sqlite3")
|
||||
return "sql";
|
||||
if (f == "erlang" || f == "erl" || f == "hrl")
|
||||
return "erlang";
|
||||
if (f == "forth" || f == "fth" || f == "4th" || f == "fs")
|
||||
return "forth";
|
||||
return f;
|
||||
}
|
||||
|
||||
|
||||
std::unique_ptr<LanguageHighlighter>
|
||||
HighlighterRegistry::CreateFor(std::string_view filetype)
|
||||
{
|
||||
std::string ft = Normalize(filetype);
|
||||
// Prefer externally registered factories
|
||||
for (const auto &e: registry()) {
|
||||
if (e.ft == ft && e.factory)
|
||||
return e.factory();
|
||||
}
|
||||
if (ft == "cpp")
|
||||
return std::make_unique<CppHighlighter>();
|
||||
if (ft == "json")
|
||||
return std::make_unique<JSONHighlighter>();
|
||||
if (ft == "markdown")
|
||||
return std::make_unique<MarkdownHighlighter>();
|
||||
if (ft == "shell")
|
||||
return std::make_unique<ShellHighlighter>();
|
||||
if (ft == "go")
|
||||
return std::make_unique<GoHighlighter>();
|
||||
if (ft == "python")
|
||||
return std::make_unique<PythonHighlighter>();
|
||||
if (ft == "rust")
|
||||
return std::make_unique<RustHighlighter>();
|
||||
if (ft == "lisp")
|
||||
return std::make_unique<LispHighlighter>();
|
||||
if (ft == "sql")
|
||||
return std::make_unique<SqlHighlighter>();
|
||||
if (ft == "erlang")
|
||||
return std::make_unique<ErlangHighlighter>();
|
||||
if (ft == "forth")
|
||||
return std::make_unique<ForthHighlighter>();
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
|
||||
static std::string
|
||||
shebang_to_ft(std::string_view first_line)
|
||||
{
|
||||
if (first_line.size() < 2 || first_line.substr(0, 2) != "#!")
|
||||
return "";
|
||||
std::string low = to_lower(first_line);
|
||||
if (low.find("python") != std::string::npos)
|
||||
return "python";
|
||||
if (low.find("bash") != std::string::npos)
|
||||
return "shell";
|
||||
if (low.find("sh") != std::string::npos)
|
||||
return "shell";
|
||||
if (low.find("zsh") != std::string::npos)
|
||||
return "shell";
|
||||
if (low.find("fish") != std::string::npos)
|
||||
return "shell";
|
||||
if (low.find("scheme") != std::string::npos || low.find("racket") != std::string::npos || low.find("guile") !=
|
||||
std::string::npos)
|
||||
return "lisp";
|
||||
return "";
|
||||
}
|
||||
|
||||
|
||||
std::string
|
||||
HighlighterRegistry::DetectForPath(std::string_view path, std::string_view first_line)
|
||||
{
|
||||
// Extension
|
||||
std::string p(path);
|
||||
std::error_code ec;
|
||||
std::string ext = std::filesystem::path(p).extension().string();
|
||||
for (auto &ch: ext)
|
||||
ch = static_cast<char>(std::tolower(static_cast<unsigned char>(ch)));
|
||||
if (!ext.empty()) {
|
||||
if (ext == ".c" || ext == ".cc" || ext == ".cpp" || ext == ".cxx" || ext == ".h" || ext == ".hpp" || ext
|
||||
== ".hh")
|
||||
return "cpp";
|
||||
if (ext == ".json")
|
||||
return "json";
|
||||
if (ext == ".md" || ext == ".markdown" || ext == ".mkd")
|
||||
return "markdown";
|
||||
if (ext == ".sh" || ext == ".bash" || ext == ".zsh" || ext == ".ksh" || ext == ".fish")
|
||||
return "shell";
|
||||
if (ext == ".go")
|
||||
return "go";
|
||||
if (ext == ".py")
|
||||
return "python";
|
||||
if (ext == ".rs")
|
||||
return "rust";
|
||||
if (ext == ".lisp" || ext == ".scm" || ext == ".rkt" || ext == ".el" || ext == ".clj" || ext == ".cljc"
|
||||
|| ext == ".cl")
|
||||
return "lisp";
|
||||
if (ext == ".sql" || ext == ".sqlite")
|
||||
return "sql";
|
||||
if (ext == ".erl" || ext == ".hrl")
|
||||
return "erlang";
|
||||
if (ext == ".forth" || ext == ".fth" || ext == ".4th" || ext == ".fs")
|
||||
return "forth";
|
||||
}
|
||||
// Shebang
|
||||
std::string ft = shebang_to_ft(first_line);
|
||||
return ft;
|
||||
}
|
||||
} // namespace kte
|
||||
|
||||
// Extensibility API implementations
|
||||
namespace kte {
|
||||
void
|
||||
HighlighterRegistry::Register(std::string_view filetype, Factory factory, bool override_existing)
|
||||
{
|
||||
std::string ft = Normalize(filetype);
|
||||
for (auto &e: registry()) {
|
||||
if (e.ft == ft) {
|
||||
if (override_existing)
|
||||
e.factory = std::move(factory);
|
||||
return;
|
||||
}
|
||||
}
|
||||
registry().push_back(RegEntry{ft, std::move(factory)});
|
||||
}
|
||||
|
||||
|
||||
bool
|
||||
HighlighterRegistry::IsRegistered(std::string_view filetype)
|
||||
{
|
||||
std::string ft = Normalize(filetype);
|
||||
for (const auto &e: registry())
|
||||
if (e.ft == ft)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
std::vector<std::string>
|
||||
HighlighterRegistry::RegisteredFiletypes()
|
||||
{
|
||||
std::vector<std::string> out;
|
||||
out.reserve(registry().size());
|
||||
for (const auto &e: registry())
|
||||
out.push_back(e.ft);
|
||||
return out;
|
||||
}
|
||||
|
||||
#ifdef KTE_ENABLE_TREESITTER
|
||||
// Forward declare adapter factory
|
||||
std::unique_ptr<LanguageHighlighter> CreateTreeSitterHighlighter(const char *filetype,
|
||||
const void * (*get_lang)());
|
||||
|
||||
void
|
||||
HighlighterRegistry::RegisterTreeSitter(std::string_view filetype,
|
||||
const TSLanguage * (*get_language)())
|
||||
{
|
||||
std::string ft = Normalize(filetype);
|
||||
Register(ft, [ft, get_language]() {
|
||||
return CreateTreeSitterHighlighter(ft.c_str(), reinterpret_cast<const void* (*)()>(get_language));
|
||||
}, /*override_existing=*/true);
|
||||
}
|
||||
#endif
|
||||
} // namespace kte
|
||||
47
syntax/HighlighterRegistry.h
Normal file
47
syntax/HighlighterRegistry.h
Normal file
@@ -0,0 +1,47 @@
|
||||
// HighlighterRegistry.h - create/detect language highlighters and allow external registration
|
||||
#pragma once
|
||||
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <vector>
|
||||
|
||||
#include "LanguageHighlighter.h"
|
||||
|
||||
namespace kte {
|
||||
class HighlighterRegistry {
|
||||
public:
|
||||
using Factory = std::function<std::unique_ptr<LanguageHighlighter>()>;
|
||||
|
||||
// Create a highlighter for normalized filetype id (e.g., "cpp", "json", "markdown", "shell", "go", "python", "rust", "lisp").
|
||||
static std::unique_ptr<LanguageHighlighter> CreateFor(std::string_view filetype);
|
||||
|
||||
// Detect filetype by path extension and shebang (first line).
|
||||
// Returns normalized id or empty string if unknown.
|
||||
static std::string DetectForPath(std::string_view path, std::string_view first_line);
|
||||
|
||||
// Normalize various aliases/extensions to canonical ids.
|
||||
static std::string Normalize(std::string_view ft);
|
||||
|
||||
// Extensibility: allow external code to register highlighters at runtime.
|
||||
// The filetype key is normalized via Normalize(). If a factory is already registered for the
|
||||
// normalized key and override=false, the existing factory is kept.
|
||||
static void Register(std::string_view filetype, Factory factory, bool override_existing = true);
|
||||
|
||||
// Returns true if a factory is registered for the (normalized) filetype.
|
||||
static bool IsRegistered(std::string_view filetype);
|
||||
|
||||
// Return a list of currently registered (normalized) filetypes. Primarily for diagnostics/tests.
|
||||
static std::vector<std::string> RegisteredFiletypes();
|
||||
|
||||
#ifdef KTE_ENABLE_TREESITTER
|
||||
// Forward declaration to avoid hard dependency when disabled.
|
||||
struct TSLanguage;
|
||||
// Convenience: register a Tree-sitter-backed highlighter for a filetype.
|
||||
// The getter should return a non-null language pointer for the grammar.
|
||||
static void RegisterTreeSitter(std::string_view filetype,
|
||||
const TSLanguage * (*get_language)());
|
||||
#endif
|
||||
};
|
||||
} // namespace kte
|
||||
90
syntax/JsonHighlighter.cc
Normal file
90
syntax/JsonHighlighter.cc
Normal file
@@ -0,0 +1,90 @@
|
||||
#include "JsonHighlighter.h"
|
||||
#include "../Buffer.h"
|
||||
#include <cctype>
|
||||
|
||||
namespace kte {
|
||||
static bool
|
||||
is_digit(char c)
|
||||
{
|
||||
return c >= '0' && c <= '9';
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
JSONHighlighter::HighlightLine(const Buffer &buf, int row, std::vector<HighlightSpan> &out) const
|
||||
{
|
||||
const auto &rows = buf.Rows();
|
||||
if (row < 0 || static_cast<std::size_t>(row) >= rows.size())
|
||||
return;
|
||||
std::string s = static_cast<std::string>(rows[static_cast<std::size_t>(row)]);
|
||||
int n = static_cast<int>(s.size());
|
||||
auto push = [&](int a, int b, TokenKind k) {
|
||||
if (b > a)
|
||||
out.push_back({a, b, k});
|
||||
};
|
||||
|
||||
int i = 0;
|
||||
while (i < n) {
|
||||
char c = s[i];
|
||||
if (c == ' ' || c == '\t') {
|
||||
int j = i + 1;
|
||||
while (j < n && (s[j] == ' ' || s[j] == '\t'))
|
||||
++j;
|
||||
push(i, j, TokenKind::Whitespace);
|
||||
i = j;
|
||||
continue;
|
||||
}
|
||||
if (c == '"') {
|
||||
int j = i + 1;
|
||||
bool esc = false;
|
||||
while (j < n) {
|
||||
char d = s[j++];
|
||||
if (esc) {
|
||||
esc = false;
|
||||
continue;
|
||||
}
|
||||
if (d == '\\') {
|
||||
esc = true;
|
||||
continue;
|
||||
}
|
||||
if (d == '"')
|
||||
break;
|
||||
}
|
||||
push(i, j, TokenKind::String);
|
||||
i = j;
|
||||
continue;
|
||||
}
|
||||
if (is_digit(c) || (c == '-' && i + 1 < n && is_digit(s[i + 1]))) {
|
||||
int j = i + 1;
|
||||
while (j < n && (std::isdigit(static_cast<unsigned char>(s[j])) || s[j] == '.' || s[j] == 'e' ||
|
||||
s[j] == 'E' || s[j] == '+' || s[j] == '-' || s[j] == '_'))
|
||||
++j;
|
||||
push(i, j, TokenKind::Number);
|
||||
i = j;
|
||||
continue;
|
||||
}
|
||||
// booleans/null
|
||||
if (std::isalpha(static_cast<unsigned char>(c))) {
|
||||
int j = i + 1;
|
||||
while (j < n && std::isalpha(static_cast<unsigned char>(s[j])))
|
||||
++j;
|
||||
std::string id = s.substr(i, j - i);
|
||||
if (id == "true" || id == "false" || id == "null")
|
||||
push(i, j, TokenKind::Constant);
|
||||
else
|
||||
push(i, j, TokenKind::Identifier);
|
||||
i = j;
|
||||
continue;
|
||||
}
|
||||
// punctuation
|
||||
if (c == '{' || c == '}' || c == '[' || c == ']' || c == ',' || c == ':') {
|
||||
push(i, i + 1, TokenKind::Punctuation);
|
||||
++i;
|
||||
continue;
|
||||
}
|
||||
// fallback
|
||||
push(i, i + 1, TokenKind::Default);
|
||||
++i;
|
||||
}
|
||||
}
|
||||
} // namespace kte
|
||||
12
syntax/JsonHighlighter.h
Normal file
12
syntax/JsonHighlighter.h
Normal file
@@ -0,0 +1,12 @@
|
||||
// JsonHighlighter.h - simple JSON line highlighter
|
||||
#pragma once
|
||||
|
||||
#include "LanguageHighlighter.h"
|
||||
#include <vector>
|
||||
|
||||
namespace kte {
|
||||
class JSONHighlighter final : public LanguageHighlighter {
|
||||
public:
|
||||
void HighlightLine(const Buffer &buf, int row, std::vector<HighlightSpan> &out) const override;
|
||||
};
|
||||
} // namespace kte
|
||||
51
syntax/LanguageHighlighter.h
Normal file
51
syntax/LanguageHighlighter.h
Normal file
@@ -0,0 +1,51 @@
|
||||
// LanguageHighlighter.h - interface for line-based highlighters
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
#include "../Highlight.h"
|
||||
|
||||
class Buffer;
|
||||
|
||||
namespace kte {
|
||||
class LanguageHighlighter {
|
||||
public:
|
||||
virtual ~LanguageHighlighter() = default;
|
||||
|
||||
// Produce highlight spans for a given buffer row. Implementations should append to out.
|
||||
virtual void HighlightLine(const Buffer &buf, int row, std::vector<HighlightSpan> &out) const = 0;
|
||||
|
||||
|
||||
virtual bool Stateful() const
|
||||
{
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
// Optional extension for stateful highlighters (e.g., multi-line comments/strings).
|
||||
// Engines may detect and use this via dynamic_cast without breaking stateless impls.
|
||||
class StatefulHighlighter : public LanguageHighlighter {
|
||||
public:
|
||||
struct LineState {
|
||||
bool in_block_comment{false};
|
||||
bool in_raw_string{false};
|
||||
// For raw strings, remember the delimiter between the opening R"delim( and closing )delim"
|
||||
std::string raw_delim;
|
||||
};
|
||||
|
||||
// Highlight one line given the previous line state; return the resulting state after this line.
|
||||
// Implementations should append spans for this line to out and compute the next state.
|
||||
virtual LineState HighlightLineStateful(const Buffer &buf,
|
||||
int row,
|
||||
const LineState &prev,
|
||||
std::vector<HighlightSpan> &out) const = 0;
|
||||
|
||||
|
||||
bool Stateful() const override
|
||||
{
|
||||
return true;
|
||||
}
|
||||
};
|
||||
} // namespace kte
|
||||
107
syntax/LispHighlighter.cc
Normal file
107
syntax/LispHighlighter.cc
Normal file
@@ -0,0 +1,107 @@
|
||||
#include "LispHighlighter.h"
|
||||
#include "../Buffer.h"
|
||||
#include <cctype>
|
||||
|
||||
namespace kte {
|
||||
static void
|
||||
push(std::vector<HighlightSpan> &out, int a, int b, TokenKind k)
|
||||
{
|
||||
if (b > a)
|
||||
out.push_back({a, b, k});
|
||||
}
|
||||
|
||||
|
||||
LispHighlighter::LispHighlighter()
|
||||
{
|
||||
const char *kw[] = {
|
||||
"defun", "lambda", "let", "let*", "define", "set!", "if", "cond", "begin", "quote", "quasiquote",
|
||||
"unquote", "unquote-splicing", "loop", "do", "and", "or", "not"
|
||||
};
|
||||
for (auto s: kw)
|
||||
kws_.insert(s);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
LispHighlighter::HighlightLine(const Buffer &buf, int row, std::vector<HighlightSpan> &out) const
|
||||
{
|
||||
const auto &rows = buf.Rows();
|
||||
if (row < 0 || static_cast<std::size_t>(row) >= rows.size())
|
||||
return;
|
||||
std::string s = static_cast<std::string>(rows[static_cast<std::size_t>(row)]);
|
||||
int n = static_cast<int>(s.size());
|
||||
int i = 0;
|
||||
int bol = 0;
|
||||
while (bol < n && (s[bol] == ' ' || s[bol] == '\t'))
|
||||
++bol;
|
||||
if (bol < n && s[bol] == ';') {
|
||||
push(out, bol, n, TokenKind::Comment);
|
||||
if (bol > 0)
|
||||
push(out, 0, bol, TokenKind::Whitespace);
|
||||
return;
|
||||
}
|
||||
while (i < n) {
|
||||
char c = s[i];
|
||||
if (c == ' ' || c == '\t') {
|
||||
int j = i + 1;
|
||||
while (j < n && (s[j] == ' ' || s[j] == '\t'))
|
||||
++j;
|
||||
push(out, i, j, TokenKind::Whitespace);
|
||||
i = j;
|
||||
continue;
|
||||
}
|
||||
if (c == ';') {
|
||||
push(out, i, n, TokenKind::Comment);
|
||||
break;
|
||||
}
|
||||
if (c == '"') {
|
||||
int j = i + 1;
|
||||
bool esc = false;
|
||||
while (j < n) {
|
||||
char d = s[j++];
|
||||
if (esc) {
|
||||
esc = false;
|
||||
continue;
|
||||
}
|
||||
if (d == '\\') {
|
||||
esc = true;
|
||||
continue;
|
||||
}
|
||||
if (d == '"')
|
||||
break;
|
||||
}
|
||||
push(out, i, j, TokenKind::String);
|
||||
i = j;
|
||||
continue;
|
||||
}
|
||||
if (std::isalpha(static_cast<unsigned char>(c)) || c == '*' || c == '-' || c == '+' || c == '/' || c ==
|
||||
'_') {
|
||||
int j = i + 1;
|
||||
while (j < n && (std::isalnum(static_cast<unsigned char>(s[j])) || s[j] == '*' || s[j] == '-' ||
|
||||
s[j] == '+' || s[j] == '/' || s[j] == '_' || s[j] == '!'))
|
||||
++j;
|
||||
std::string id = s.substr(i, j - i);
|
||||
TokenKind k = kws_.count(id) ? TokenKind::Keyword : TokenKind::Identifier;
|
||||
push(out, i, j, k);
|
||||
i = j;
|
||||
continue;
|
||||
}
|
||||
if (std::isdigit(static_cast<unsigned char>(c))) {
|
||||
int j = i + 1;
|
||||
while (j < n && (std::isdigit(static_cast<unsigned char>(s[j])) || s[j] == '.'))
|
||||
++j;
|
||||
push(out, i, j, TokenKind::Number);
|
||||
i = j;
|
||||
continue;
|
||||
}
|
||||
if (std::ispunct(static_cast<unsigned char>(c))) {
|
||||
TokenKind k = TokenKind::Punctuation;
|
||||
push(out, i, i + 1, k);
|
||||
++i;
|
||||
continue;
|
||||
}
|
||||
push(out, i, i + 1, TokenKind::Default);
|
||||
++i;
|
||||
}
|
||||
}
|
||||
} // namespace kte
|
||||
17
syntax/LispHighlighter.h
Normal file
17
syntax/LispHighlighter.h
Normal file
@@ -0,0 +1,17 @@
|
||||
// LispHighlighter.h - simple Lisp/Scheme family highlighter
|
||||
#pragma once
|
||||
|
||||
#include "LanguageHighlighter.h"
|
||||
#include <unordered_set>
|
||||
|
||||
namespace kte {
|
||||
class LispHighlighter final : public LanguageHighlighter {
|
||||
public:
|
||||
LispHighlighter();
|
||||
|
||||
void HighlightLine(const Buffer &buf, int row, std::vector<HighlightSpan> &out) const override;
|
||||
|
||||
private:
|
||||
std::unordered_set<std::string> kws_;
|
||||
};
|
||||
} // namespace kte
|
||||
132
syntax/MarkdownHighlighter.cc
Normal file
132
syntax/MarkdownHighlighter.cc
Normal file
@@ -0,0 +1,132 @@
|
||||
#include "MarkdownHighlighter.h"
|
||||
#include "../Buffer.h"
|
||||
#include <cctype>
|
||||
|
||||
namespace kte {
|
||||
static void
|
||||
push_span(std::vector<HighlightSpan> &out, int a, int b, TokenKind k)
|
||||
{
|
||||
if (b > a)
|
||||
out.push_back({a, b, k});
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
MarkdownHighlighter::HighlightLine(const Buffer &buf, int row, std::vector<HighlightSpan> &out) const
|
||||
{
|
||||
LineState st; // not used in stateless entry
|
||||
(void) HighlightLineStateful(buf, row, st, out);
|
||||
}
|
||||
|
||||
|
||||
StatefulHighlighter::LineState
|
||||
MarkdownHighlighter::HighlightLineStateful(const Buffer &buf, int row, const LineState &prev,
|
||||
std::vector<HighlightSpan> &out) const
|
||||
{
|
||||
StatefulHighlighter::LineState state = prev;
|
||||
const auto &rows = buf.Rows();
|
||||
if (row < 0 || static_cast<std::size_t>(row) >= rows.size())
|
||||
return state;
|
||||
std::string s = static_cast<std::string>(rows[static_cast<std::size_t>(row)]);
|
||||
int n = static_cast<int>(s.size());
|
||||
|
||||
// Reuse in_block_comment flag as "in fenced code" state.
|
||||
if (state.in_block_comment) {
|
||||
// If line contains closing fence ``` then close after it
|
||||
auto pos = s.find("```");
|
||||
if (pos == std::string::npos) {
|
||||
push_span(out, 0, n, TokenKind::String);
|
||||
state.in_block_comment = true;
|
||||
return state;
|
||||
} else {
|
||||
int end = static_cast<int>(pos + 3);
|
||||
push_span(out, 0, end, TokenKind::String);
|
||||
// rest of line processed normally after fence
|
||||
int i = end;
|
||||
// whitespace
|
||||
if (i < n)
|
||||
push_span(out, i, n, TokenKind::Default);
|
||||
state.in_block_comment = false;
|
||||
return state;
|
||||
}
|
||||
}
|
||||
|
||||
// Detect fenced code block start at beginning (allow leading spaces)
|
||||
int bol = 0;
|
||||
while (bol < n && (s[bol] == ' ' || s[bol] == '\t'))
|
||||
++bol;
|
||||
if (bol + 3 <= n && s.compare(bol, 3, "```") == 0) {
|
||||
push_span(out, bol, n, TokenKind::String);
|
||||
state.in_block_comment = true; // enter fenced mode
|
||||
return state;
|
||||
}
|
||||
|
||||
// Headings: lines starting with 1-6 '#'
|
||||
if (bol < n && s[bol] == '#') {
|
||||
int j = bol;
|
||||
while (j < n && s[j] == '#')
|
||||
++j; // hashes
|
||||
// include following space and text as Keyword to stand out
|
||||
push_span(out, bol, n, TokenKind::Keyword);
|
||||
return state;
|
||||
}
|
||||
|
||||
// Process inline: emphasis and code spans
|
||||
int i = 0;
|
||||
while (i < n) {
|
||||
char c = s[i];
|
||||
if (c == '`') {
|
||||
int j = i + 1;
|
||||
while (j < n && s[j] != '`')
|
||||
++j;
|
||||
if (j < n)
|
||||
++j;
|
||||
push_span(out, i, j, TokenKind::String);
|
||||
i = j;
|
||||
continue;
|
||||
}
|
||||
if (c == '*' || c == '_') {
|
||||
// bold/italic markers: treat the marker and until next same marker as Type to highlight
|
||||
char m = c;
|
||||
int j = i + 1;
|
||||
while (j < n && s[j] != m)
|
||||
++j;
|
||||
if (j < n)
|
||||
++j;
|
||||
push_span(out, i, j, TokenKind::Type);
|
||||
i = j;
|
||||
continue;
|
||||
}
|
||||
// links []() minimal: treat [text](url) as Function
|
||||
if (c == '[') {
|
||||
int j = i + 1;
|
||||
while (j < n && s[j] != ']')
|
||||
++j;
|
||||
if (j < n)
|
||||
++j; // include ]
|
||||
if (j < n && s[j] == '(') {
|
||||
while (j < n && s[j] != ')')
|
||||
++j;
|
||||
if (j < n)
|
||||
++j;
|
||||
}
|
||||
push_span(out, i, j, TokenKind::Function);
|
||||
i = j;
|
||||
continue;
|
||||
}
|
||||
// whitespace
|
||||
if (c == ' ' || c == '\t') {
|
||||
int j = i + 1;
|
||||
while (j < n && (s[j] == ' ' || s[j] == '\t'))
|
||||
++j;
|
||||
push_span(out, i, j, TokenKind::Whitespace);
|
||||
i = j;
|
||||
continue;
|
||||
}
|
||||
// fallback: default single char
|
||||
push_span(out, i, i + 1, TokenKind::Default);
|
||||
++i;
|
||||
}
|
||||
return state;
|
||||
}
|
||||
} // namespace kte
|
||||
14
syntax/MarkdownHighlighter.h
Normal file
14
syntax/MarkdownHighlighter.h
Normal file
@@ -0,0 +1,14 @@
|
||||
// MarkdownHighlighter.h - simple Markdown highlighter
|
||||
#pragma once
|
||||
|
||||
#include "LanguageHighlighter.h"
|
||||
|
||||
namespace kte {
|
||||
class MarkdownHighlighter final : public StatefulHighlighter {
|
||||
public:
|
||||
void HighlightLine(const Buffer &buf, int row, std::vector<HighlightSpan> &out) const override;
|
||||
|
||||
LineState HighlightLineStateful(const Buffer &buf, int row, const LineState &prev,
|
||||
std::vector<HighlightSpan> &out) const override;
|
||||
};
|
||||
} // namespace kte
|
||||
17
syntax/NullHighlighter.cc
Normal file
17
syntax/NullHighlighter.cc
Normal file
@@ -0,0 +1,17 @@
|
||||
#include "NullHighlighter.h"
|
||||
#include "../Buffer.h"
|
||||
|
||||
namespace kte {
|
||||
void
|
||||
NullHighlighter::HighlightLine(const Buffer &buf, int row, std::vector<HighlightSpan> &out) const
|
||||
{
|
||||
const auto &rows = buf.Rows();
|
||||
if (row < 0 || static_cast<std::size_t>(row) >= rows.size())
|
||||
return;
|
||||
std::string s = static_cast<std::string>(rows[static_cast<std::size_t>(row)]);
|
||||
int n = static_cast<int>(s.size());
|
||||
if (n <= 0)
|
||||
return;
|
||||
out.push_back({0, n, TokenKind::Default});
|
||||
}
|
||||
} // namespace kte
|
||||
11
syntax/NullHighlighter.h
Normal file
11
syntax/NullHighlighter.h
Normal file
@@ -0,0 +1,11 @@
|
||||
// NullHighlighter.h - default highlighter that emits a single Default span per line
|
||||
#pragma once
|
||||
|
||||
#include "LanguageHighlighter.h"
|
||||
|
||||
namespace kte {
|
||||
class NullHighlighter final : public LanguageHighlighter {
|
||||
public:
|
||||
void HighlightLine(const Buffer &buf, int row, std::vector<HighlightSpan> &out) const override;
|
||||
};
|
||||
} // namespace kte
|
||||
172
syntax/PythonHighlighter.cc
Normal file
172
syntax/PythonHighlighter.cc
Normal file
@@ -0,0 +1,172 @@
|
||||
#include "PythonHighlighter.h"
|
||||
#include "../Buffer.h"
|
||||
#include <cctype>
|
||||
|
||||
namespace kte {
|
||||
static void
|
||||
push(std::vector<HighlightSpan> &out, int a, int b, TokenKind k)
|
||||
{
|
||||
if (b > a)
|
||||
out.push_back({a, b, k});
|
||||
}
|
||||
|
||||
|
||||
static bool
|
||||
is_ident_start(char c)
|
||||
{
|
||||
return std::isalpha(static_cast<unsigned char>(c)) || c == '_';
|
||||
}
|
||||
|
||||
|
||||
static bool
|
||||
is_ident_char(char c)
|
||||
{
|
||||
return std::isalnum(static_cast<unsigned char>(c)) || c == '_';
|
||||
}
|
||||
|
||||
|
||||
PythonHighlighter::PythonHighlighter()
|
||||
{
|
||||
const char *kw[] = {
|
||||
"and", "as", "assert", "break", "class", "continue", "def", "del", "elif", "else", "except", "False",
|
||||
"finally", "for", "from", "global", "if", "import", "in", "is", "lambda", "None", "nonlocal", "not",
|
||||
"or", "pass", "raise", "return", "True", "try", "while", "with", "yield"
|
||||
};
|
||||
for (auto s: kw)
|
||||
kws_.insert(s);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
PythonHighlighter::HighlightLine(const Buffer &buf, int row, std::vector<HighlightSpan> &out) const
|
||||
{
|
||||
LineState st;
|
||||
(void) HighlightLineStateful(buf, row, st, out);
|
||||
}
|
||||
|
||||
|
||||
StatefulHighlighter::LineState
|
||||
PythonHighlighter::HighlightLineStateful(const Buffer &buf, int row, const LineState &prev,
|
||||
std::vector<HighlightSpan> &out) const
|
||||
{
|
||||
StatefulHighlighter::LineState state = prev;
|
||||
const auto &rows = buf.Rows();
|
||||
if (row < 0 || static_cast<std::size_t>(row) >= rows.size())
|
||||
return state;
|
||||
std::string s = static_cast<std::string>(rows[static_cast<std::size_t>(row)]);
|
||||
int n = static_cast<int>(s.size());
|
||||
|
||||
// Triple-quoted string continuation uses in_raw_string with raw_delim either "'''" or "\"\"\""
|
||||
if (state.in_raw_string && (state.raw_delim == "'''" || state.raw_delim == "\"\"\"")) {
|
||||
auto pos = s.find(state.raw_delim);
|
||||
if (pos == std::string::npos) {
|
||||
push(out, 0, n, TokenKind::String);
|
||||
return state; // still inside
|
||||
} else {
|
||||
int end = static_cast<int>(pos + static_cast<int>(state.raw_delim.size()));
|
||||
push(out, 0, end, TokenKind::String);
|
||||
// remainder processed normally
|
||||
s = s.substr(end);
|
||||
n = static_cast<int>(s.size());
|
||||
state.in_raw_string = false;
|
||||
state.raw_delim.clear();
|
||||
// Continue parsing remainder as a separate small loop
|
||||
int base = end;
|
||||
// original offset, but we already emitted to 'out' with base=0; following spans should be from 'end'
|
||||
// For simplicity, mark rest as Default
|
||||
if (n > 0)
|
||||
push(out, base, base + n, TokenKind::Default);
|
||||
return state;
|
||||
}
|
||||
}
|
||||
|
||||
int i = 0;
|
||||
// Detect comment start '#', ignoring inside strings
|
||||
while (i < n) {
|
||||
char c = s[i];
|
||||
if (c == ' ' || c == '\t') {
|
||||
int j = i + 1;
|
||||
while (j < n && (s[j] == ' ' || s[j] == '\t'))
|
||||
++j;
|
||||
push(out, i, j, TokenKind::Whitespace);
|
||||
i = j;
|
||||
continue;
|
||||
}
|
||||
if (c == '#') {
|
||||
push(out, i, n, TokenKind::Comment);
|
||||
break;
|
||||
}
|
||||
// Strings: triple quotes and single-line
|
||||
if (c == '"' || c == '\'') {
|
||||
char q = c;
|
||||
// triple?
|
||||
if (i + 2 < n && s[i + 1] == q && s[i + 2] == q) {
|
||||
std::string delim(3, q);
|
||||
int j = i + 3; // search for closing triple
|
||||
auto pos = s.find(delim, static_cast<std::size_t>(j));
|
||||
if (pos == std::string::npos) {
|
||||
push(out, i, n, TokenKind::String);
|
||||
state.in_raw_string = true;
|
||||
state.raw_delim = delim;
|
||||
return state;
|
||||
} else {
|
||||
int end = static_cast<int>(pos + 3);
|
||||
push(out, i, end, TokenKind::String);
|
||||
i = end;
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
int j = i + 1;
|
||||
bool esc = false;
|
||||
while (j < n) {
|
||||
char d = s[j++];
|
||||
if (esc) {
|
||||
esc = false;
|
||||
continue;
|
||||
}
|
||||
if (d == '\\') {
|
||||
esc = true;
|
||||
continue;
|
||||
}
|
||||
if (d == q)
|
||||
break;
|
||||
}
|
||||
push(out, i, j, TokenKind::String);
|
||||
i = j;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (std::isdigit(static_cast<unsigned char>(c))) {
|
||||
int j = i + 1;
|
||||
while (j < n && (std::isalnum(static_cast<unsigned char>(s[j])) || s[j] == '.' || s[j] == '_'))
|
||||
++j;
|
||||
push(out, i, j, TokenKind::Number);
|
||||
i = j;
|
||||
continue;
|
||||
}
|
||||
if (is_ident_start(c)) {
|
||||
int j = i + 1;
|
||||
while (j < n && is_ident_char(s[j]))
|
||||
++j;
|
||||
std::string id = s.substr(i, j - i);
|
||||
TokenKind k = TokenKind::Identifier;
|
||||
if (kws_.count(id))
|
||||
k = TokenKind::Keyword;
|
||||
push(out, i, j, k);
|
||||
i = j;
|
||||
continue;
|
||||
}
|
||||
if (std::ispunct(static_cast<unsigned char>(c))) {
|
||||
TokenKind k = TokenKind::Operator;
|
||||
if (c == ':' || c == ',' || c == '(' || c == ')' || c == '[' || c == ']')
|
||||
k = TokenKind::Punctuation;
|
||||
push(out, i, i + 1, k);
|
||||
++i;
|
||||
continue;
|
||||
}
|
||||
push(out, i, i + 1, TokenKind::Default);
|
||||
++i;
|
||||
}
|
||||
return state;
|
||||
}
|
||||
} // namespace kte
|
||||
20
syntax/PythonHighlighter.h
Normal file
20
syntax/PythonHighlighter.h
Normal file
@@ -0,0 +1,20 @@
|
||||
// PythonHighlighter.h - simple Python highlighter with triple-quote state
|
||||
#pragma once
|
||||
|
||||
#include "LanguageHighlighter.h"
|
||||
#include <unordered_set>
|
||||
|
||||
namespace kte {
|
||||
class PythonHighlighter final : public StatefulHighlighter {
|
||||
public:
|
||||
PythonHighlighter();
|
||||
|
||||
void HighlightLine(const Buffer &buf, int row, std::vector<HighlightSpan> &out) const override;
|
||||
|
||||
LineState HighlightLineStateful(const Buffer &buf, int row, const LineState &prev,
|
||||
std::vector<HighlightSpan> &out) const override;
|
||||
|
||||
private:
|
||||
std::unordered_set<std::string> kws_;
|
||||
};
|
||||
} // namespace kte
|
||||
145
syntax/RustHighlighter.cc
Normal file
145
syntax/RustHighlighter.cc
Normal file
@@ -0,0 +1,145 @@
|
||||
#include "RustHighlighter.h"
|
||||
#include "../Buffer.h"
|
||||
#include <cctype>
|
||||
|
||||
namespace kte {
|
||||
static void
|
||||
push(std::vector<HighlightSpan> &out, int a, int b, TokenKind k)
|
||||
{
|
||||
if (b > a)
|
||||
out.push_back({a, b, k});
|
||||
}
|
||||
|
||||
|
||||
static bool
|
||||
is_ident_start(char c)
|
||||
{
|
||||
return std::isalpha(static_cast<unsigned char>(c)) || c == '_';
|
||||
}
|
||||
|
||||
|
||||
static bool
|
||||
is_ident_char(char c)
|
||||
{
|
||||
return std::isalnum(static_cast<unsigned char>(c)) || c == '_';
|
||||
}
|
||||
|
||||
|
||||
RustHighlighter::RustHighlighter()
|
||||
{
|
||||
const char *kw[] = {
|
||||
"as", "break", "const", "continue", "crate", "else", "enum", "extern", "false", "fn", "for", "if",
|
||||
"impl", "in", "let", "loop", "match", "mod", "move", "mut", "pub", "ref", "return", "self", "Self",
|
||||
"static", "struct", "super", "trait", "true", "type", "unsafe", "use", "where", "while", "dyn", "async",
|
||||
"await", "try"
|
||||
};
|
||||
for (auto s: kw)
|
||||
kws_.insert(s);
|
||||
const char *tp[] = {
|
||||
"u8", "u16", "u32", "u64", "u128", "usize", "i8", "i16", "i32", "i64", "i128", "isize", "f32", "f64",
|
||||
"bool", "char", "str"
|
||||
};
|
||||
for (auto s: tp)
|
||||
types_.insert(s);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
RustHighlighter::HighlightLine(const Buffer &buf, int row, std::vector<HighlightSpan> &out) const
|
||||
{
|
||||
const auto &rows = buf.Rows();
|
||||
if (row < 0 || static_cast<std::size_t>(row) >= rows.size())
|
||||
return;
|
||||
std::string s = static_cast<std::string>(rows[static_cast<std::size_t>(row)]);
|
||||
int n = static_cast<int>(s.size());
|
||||
int i = 0;
|
||||
while (i < n) {
|
||||
char c = s[i];
|
||||
if (c == ' ' || c == '\t') {
|
||||
int j = i + 1;
|
||||
while (j < n && (s[j] == ' ' || s[j] == '\t'))
|
||||
++j;
|
||||
push(out, i, j, TokenKind::Whitespace);
|
||||
i = j;
|
||||
continue;
|
||||
}
|
||||
if (c == '/' && i + 1 < n && s[i + 1] == '/') {
|
||||
push(out, i, n, TokenKind::Comment);
|
||||
break;
|
||||
}
|
||||
if (c == '/' && i + 1 < n && s[i + 1] == '*') {
|
||||
int j = i + 2;
|
||||
bool closed = false;
|
||||
while (j + 1 <= n) {
|
||||
if (j + 1 < n && s[j] == '*' && s[j + 1] == '/') {
|
||||
j += 2;
|
||||
closed = true;
|
||||
break;
|
||||
}
|
||||
++j;
|
||||
}
|
||||
if (!closed) {
|
||||
push(out, i, n, TokenKind::Comment);
|
||||
break;
|
||||
} else {
|
||||
push(out, i, j, TokenKind::Comment);
|
||||
i = j;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (c == '"') {
|
||||
int j = i + 1;
|
||||
bool esc = false;
|
||||
while (j < n) {
|
||||
char d = s[j++];
|
||||
if (esc) {
|
||||
esc = false;
|
||||
continue;
|
||||
}
|
||||
if (d == '\\') {
|
||||
esc = true;
|
||||
continue;
|
||||
}
|
||||
if (d == '"')
|
||||
break;
|
||||
}
|
||||
push(out, i, j, TokenKind::String);
|
||||
i = j;
|
||||
continue;
|
||||
}
|
||||
if (std::isdigit(static_cast<unsigned char>(c))) {
|
||||
int j = i + 1;
|
||||
while (j < n && (std::isalnum(static_cast<unsigned char>(s[j])) || s[j] == '.' || s[j] == '_'))
|
||||
++j;
|
||||
push(out, i, j, TokenKind::Number);
|
||||
i = j;
|
||||
continue;
|
||||
}
|
||||
if (is_ident_start(c)) {
|
||||
int j = i + 1;
|
||||
while (j < n && is_ident_char(s[j]))
|
||||
++j;
|
||||
std::string id = s.substr(i, j - i);
|
||||
TokenKind k = TokenKind::Identifier;
|
||||
if (kws_.count(id))
|
||||
k = TokenKind::Keyword;
|
||||
else if (types_.count(id))
|
||||
k = TokenKind::Type;
|
||||
push(out, i, j, k);
|
||||
i = j;
|
||||
continue;
|
||||
}
|
||||
if (std::ispunct(static_cast<unsigned char>(c))) {
|
||||
TokenKind k = TokenKind::Operator;
|
||||
if (c == ';' || c == ',' || c == '(' || c == ')' || c == '{' || c == '}' || c == '[' || c ==
|
||||
']')
|
||||
k = TokenKind::Punctuation;
|
||||
push(out, i, i + 1, k);
|
||||
++i;
|
||||
continue;
|
||||
}
|
||||
push(out, i, i + 1, TokenKind::Default);
|
||||
++i;
|
||||
}
|
||||
}
|
||||
} // namespace kte
|
||||
18
syntax/RustHighlighter.h
Normal file
18
syntax/RustHighlighter.h
Normal file
@@ -0,0 +1,18 @@
|
||||
// RustHighlighter.h - simple Rust highlighter
|
||||
#pragma once
|
||||
|
||||
#include "LanguageHighlighter.h"
|
||||
#include <unordered_set>
|
||||
|
||||
namespace kte {
|
||||
class RustHighlighter final : public LanguageHighlighter {
|
||||
public:
|
||||
RustHighlighter();
|
||||
|
||||
void HighlightLine(const Buffer &buf, int row, std::vector<HighlightSpan> &out) const override;
|
||||
|
||||
private:
|
||||
std::unordered_set<std::string> kws_;
|
||||
std::unordered_set<std::string> types_;
|
||||
};
|
||||
} // namespace kte
|
||||
105
syntax/ShellHighlighter.cc
Normal file
105
syntax/ShellHighlighter.cc
Normal file
@@ -0,0 +1,105 @@
|
||||
#include "ShellHighlighter.h"
|
||||
#include "../Buffer.h"
|
||||
#include <cctype>
|
||||
|
||||
namespace kte {
|
||||
static void
|
||||
push(std::vector<HighlightSpan> &out, int a, int b, TokenKind k)
|
||||
{
|
||||
if (b > a)
|
||||
out.push_back({a, b, k});
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
ShellHighlighter::HighlightLine(const Buffer &buf, int row, std::vector<HighlightSpan> &out) const
|
||||
{
|
||||
const auto &rows = buf.Rows();
|
||||
if (row < 0 || static_cast<std::size_t>(row) >= rows.size())
|
||||
return;
|
||||
std::string s = static_cast<std::string>(rows[static_cast<std::size_t>(row)]);
|
||||
int n = static_cast<int>(s.size());
|
||||
int i = 0;
|
||||
// if first non-space is '#', whole line is comment
|
||||
int bol = 0;
|
||||
while (bol < n && (s[bol] == ' ' || s[bol] == '\t'))
|
||||
++bol;
|
||||
if (bol < n && s[bol] == '#') {
|
||||
push(out, bol, n, TokenKind::Comment);
|
||||
if (bol > 0)
|
||||
push(out, 0, bol, TokenKind::Whitespace);
|
||||
return;
|
||||
}
|
||||
while (i < n) {
|
||||
char c = s[i];
|
||||
if (c == ' ' || c == '\t') {
|
||||
int j = i + 1;
|
||||
while (j < n && (s[j] == ' ' || s[j] == '\t'))
|
||||
++j;
|
||||
push(out, i, j, TokenKind::Whitespace);
|
||||
i = j;
|
||||
continue;
|
||||
}
|
||||
if (c == '#') {
|
||||
push(out, i, n, TokenKind::Comment);
|
||||
break;
|
||||
}
|
||||
if (c == '\'' || c == '"') {
|
||||
char q = c;
|
||||
int j = i + 1;
|
||||
bool esc = false;
|
||||
while (j < n) {
|
||||
char d = s[j++];
|
||||
if (q == '"') {
|
||||
if (esc) {
|
||||
esc = false;
|
||||
continue;
|
||||
}
|
||||
if (d == '\\') {
|
||||
esc = true;
|
||||
continue;
|
||||
}
|
||||
if (d == '"')
|
||||
break;
|
||||
} else {
|
||||
if (d == '\'')
|
||||
break;
|
||||
}
|
||||
}
|
||||
push(out, i, j, TokenKind::String);
|
||||
i = j;
|
||||
continue;
|
||||
}
|
||||
// simple keywords
|
||||
if (std::isalpha(static_cast<unsigned char>(c))) {
|
||||
int j = i + 1;
|
||||
while (j < n && (std::isalnum(static_cast<unsigned char>(s[j])) || s[j] == '_'))
|
||||
++j;
|
||||
std::string id = s.substr(i, j - i);
|
||||
static const char *kws[] = {
|
||||
"if", "then", "fi", "for", "in", "do", "done", "case", "esac", "while", "function",
|
||||
"elif", "else"
|
||||
};
|
||||
bool kw = false;
|
||||
for (auto k: kws)
|
||||
if (id == k) {
|
||||
kw = true;
|
||||
break;
|
||||
}
|
||||
push(out, i, j, kw ? TokenKind::Keyword : TokenKind::Identifier);
|
||||
i = j;
|
||||
continue;
|
||||
}
|
||||
if (std::ispunct(static_cast<unsigned char>(c))) {
|
||||
TokenKind k = TokenKind::Operator;
|
||||
if (c == '(' || c == ')' || c == '{' || c == '}' || c == ',' || c == ';')
|
||||
k = TokenKind::Punctuation;
|
||||
push(out, i, i + 1, k);
|
||||
++i;
|
||||
continue;
|
||||
}
|
||||
push(out, i, i + 1, TokenKind::Default);
|
||||
++i;
|
||||
}
|
||||
}
|
||||
} // namespace kte
|
||||
11
syntax/ShellHighlighter.h
Normal file
11
syntax/ShellHighlighter.h
Normal file
@@ -0,0 +1,11 @@
|
||||
// ShellHighlighter.h - simple POSIX shell highlighter
|
||||
#pragma once
|
||||
|
||||
#include "LanguageHighlighter.h"
|
||||
|
||||
namespace kte {
|
||||
class ShellHighlighter final : public LanguageHighlighter {
|
||||
public:
|
||||
void HighlightLine(const Buffer &buf, int row, std::vector<HighlightSpan> &out) const override;
|
||||
};
|
||||
} // namespace kte
|
||||
156
syntax/SqlHighlighter.cc
Normal file
156
syntax/SqlHighlighter.cc
Normal file
@@ -0,0 +1,156 @@
|
||||
#include "SqlHighlighter.h"
|
||||
#include "../Buffer.h"
|
||||
#include <cctype>
|
||||
|
||||
namespace kte {
|
||||
static void
|
||||
push(std::vector<HighlightSpan> &out, int a, int b, TokenKind k)
|
||||
{
|
||||
if (b > a)
|
||||
out.push_back({a, b, k});
|
||||
}
|
||||
|
||||
|
||||
static bool
|
||||
is_ident_start(char c)
|
||||
{
|
||||
return std::isalpha(static_cast<unsigned char>(c)) || c == '_';
|
||||
}
|
||||
|
||||
|
||||
static bool
|
||||
is_ident_char(char c)
|
||||
{
|
||||
return std::isalnum(static_cast<unsigned char>(c)) || c == '_' || c == '$';
|
||||
}
|
||||
|
||||
|
||||
SqlHighlighter::SqlHighlighter()
|
||||
{
|
||||
const char *kw[] = {
|
||||
"select", "insert", "update", "delete", "from", "where", "group", "by", "order", "limit",
|
||||
"offset", "values", "into", "create", "table", "index", "unique", "on", "as", "and", "or",
|
||||
"not", "null", "is", "primary", "key", "constraint", "foreign", "references", "drop", "alter",
|
||||
"add", "column", "rename", "to", "if", "exists", "join", "left", "right", "inner", "outer",
|
||||
"cross", "using", "set", "distinct", "having", "union", "all", "case", "when", "then", "else",
|
||||
"end", "pragma", "transaction", "begin", "commit", "rollback", "replace"
|
||||
};
|
||||
for (auto s: kw)
|
||||
kws_.insert(s);
|
||||
|
||||
const char *types[] = {"integer", "real", "text", "blob", "numeric", "boolean", "date", "datetime"};
|
||||
for (auto s: types)
|
||||
types_.insert(s);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
SqlHighlighter::HighlightLine(const Buffer &buf, int row, std::vector<HighlightSpan> &out) const
|
||||
{
|
||||
const auto &rows = buf.Rows();
|
||||
if (row < 0 || static_cast<std::size_t>(row) >= rows.size())
|
||||
return;
|
||||
std::string s = static_cast<std::string>(rows[static_cast<std::size_t>(row)]);
|
||||
int n = static_cast<int>(s.size());
|
||||
int i = 0;
|
||||
|
||||
while (i < n) {
|
||||
char c = s[i];
|
||||
if (c == ' ' || c == '\t') {
|
||||
int j = i + 1;
|
||||
while (j < n && (s[j] == ' ' || s[j] == '\t'))
|
||||
++j;
|
||||
push(out, i, j, TokenKind::Whitespace);
|
||||
i = j;
|
||||
continue;
|
||||
}
|
||||
// line comments: -- ...
|
||||
if (c == '-' && i + 1 < n && s[i + 1] == '-') {
|
||||
push(out, i, n, TokenKind::Comment);
|
||||
break;
|
||||
}
|
||||
// simple block comment on same line: /* ... */
|
||||
if (c == '/' && i + 1 < n && s[i + 1] == '*') {
|
||||
int j = i + 2;
|
||||
bool closed = false;
|
||||
while (j + 1 <= n) {
|
||||
if (j + 1 < n && s[j] == '*' && s[j + 1] == '/') {
|
||||
j += 2;
|
||||
closed = true;
|
||||
break;
|
||||
}
|
||||
++j;
|
||||
}
|
||||
if (!closed) {
|
||||
push(out, i, n, TokenKind::Comment);
|
||||
break;
|
||||
} else {
|
||||
push(out, i, j, TokenKind::Comment);
|
||||
i = j;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
// strings: '...' or "..."
|
||||
if (c == '\'' || c == '"') {
|
||||
char q = c;
|
||||
int j = i + 1;
|
||||
bool esc = false;
|
||||
while (j < n) {
|
||||
char d = s[j++];
|
||||
if (d == q) {
|
||||
// Handle doubled quote escaping for SQL single quotes
|
||||
if (q == '\'' && j < n && s[j] == '\'') {
|
||||
++j;
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (d == '\\') {
|
||||
esc = !esc;
|
||||
} else {
|
||||
esc = false;
|
||||
}
|
||||
}
|
||||
push(out, i, j, TokenKind::String);
|
||||
i = j;
|
||||
continue;
|
||||
}
|
||||
if (std::isdigit(static_cast<unsigned char>(c))) {
|
||||
int j = i + 1;
|
||||
while (j < n && (std::isalnum(static_cast<unsigned char>(s[j])) || s[j] == '.' || s[j] == '_'))
|
||||
++j;
|
||||
push(out, i, j, TokenKind::Number);
|
||||
i = j;
|
||||
continue;
|
||||
}
|
||||
if (is_ident_start(c)) {
|
||||
int j = i + 1;
|
||||
while (j < n && is_ident_char(s[j]))
|
||||
++j;
|
||||
std::string id = s.substr(i, j - i);
|
||||
std::string lower;
|
||||
lower.reserve(id.size());
|
||||
for (char ch: id)
|
||||
lower.push_back(static_cast<char>(std::tolower(static_cast<unsigned char>(ch))));
|
||||
TokenKind k = TokenKind::Identifier;
|
||||
if (kws_.count(lower))
|
||||
k = TokenKind::Keyword;
|
||||
else if (types_.count(lower))
|
||||
k = TokenKind::Type;
|
||||
push(out, i, j, k);
|
||||
i = j;
|
||||
continue;
|
||||
}
|
||||
if (std::ispunct(static_cast<unsigned char>(c))) {
|
||||
TokenKind k = TokenKind::Operator;
|
||||
if (c == ',' || c == ';' || c == '(' || c == ')')
|
||||
k = TokenKind::Punctuation;
|
||||
push(out, i, i + 1, k);
|
||||
++i;
|
||||
continue;
|
||||
}
|
||||
push(out, i, i + 1, TokenKind::Default);
|
||||
++i;
|
||||
}
|
||||
}
|
||||
} // namespace kte
|
||||
18
syntax/SqlHighlighter.h
Normal file
18
syntax/SqlHighlighter.h
Normal file
@@ -0,0 +1,18 @@
|
||||
// SqlHighlighter.h - simple SQL/SQLite highlighter
|
||||
#pragma once
|
||||
|
||||
#include "LanguageHighlighter.h"
|
||||
#include <unordered_set>
|
||||
|
||||
namespace kte {
|
||||
class SqlHighlighter final : public LanguageHighlighter {
|
||||
public:
|
||||
SqlHighlighter();
|
||||
|
||||
void HighlightLine(const Buffer &buf, int row, std::vector<HighlightSpan> &out) const override;
|
||||
|
||||
private:
|
||||
std::unordered_set<std::string> kws_;
|
||||
std::unordered_set<std::string> types_;
|
||||
};
|
||||
} // namespace kte
|
||||
51
syntax/TreeSitterHighlighter.cc
Normal file
51
syntax/TreeSitterHighlighter.cc
Normal file
@@ -0,0 +1,51 @@
|
||||
#include "../TreeSitterHighlighter.h"
|
||||
|
||||
#ifdef KTE_ENABLE_TREESITTER
|
||||
|
||||
#include "Buffer.h"
|
||||
#include <utility>
|
||||
|
||||
namespace kte {
|
||||
TreeSitterHighlighter::TreeSitterHighlighter(const TSLanguage *lang, std::string filetype)
|
||||
: language_(lang), filetype_(std::move(filetype)) {}
|
||||
|
||||
|
||||
TreeSitterHighlighter::~TreeSitterHighlighter()
|
||||
{
|
||||
disposeParser();
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
TreeSitterHighlighter::ensureParsed(const Buffer & /*buf*/) const
|
||||
{
|
||||
// Intentionally a stub to avoid pulling the Tree-sitter API and library by default.
|
||||
// In future, when linking against tree-sitter, initialize parser_, set language_,
|
||||
// and build tree_ from the buffer contents.
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
TreeSitterHighlighter::disposeParser() const
|
||||
{
|
||||
// Stub; nothing to dispose when not actually creating parser/tree
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
TreeSitterHighlighter::HighlightLine(const Buffer &/*buf*/, int /*row*/, std::vector<HighlightSpan> &/*out*/) const
|
||||
{
|
||||
// For now, no-op. When tree-sitter is wired, map nodes to TokenKind spans per line.
|
||||
}
|
||||
|
||||
|
||||
std::unique_ptr<LanguageHighlighter>
|
||||
CreateTreeSitterHighlighter(const char *filetype,
|
||||
const void * (*get_lang)())
|
||||
{
|
||||
const auto *lang = reinterpret_cast<const TSLanguage *>(get_lang ? get_lang() : nullptr);
|
||||
return std::make_unique < TreeSitterHighlighter > (lang, filetype ? std::string(filetype) : std::string());
|
||||
}
|
||||
} // namespace kte
|
||||
|
||||
#endif // KTE_ENABLE_TREESITTER
|
||||
48
syntax/TreeSitterHighlighter.h
Normal file
48
syntax/TreeSitterHighlighter.h
Normal file
@@ -0,0 +1,48 @@
|
||||
// TreeSitterHighlighter.h - optional adapter for Tree-sitter (behind KTE_ENABLE_TREESITTER)
|
||||
#pragma once
|
||||
|
||||
#ifdef KTE_ENABLE_TREESITTER
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "LanguageHighlighter.h"
|
||||
|
||||
// Forward-declare Tree-sitter C API to avoid hard coupling in headers if includes are not present
|
||||
extern "C" {
|
||||
struct TSLanguage;
|
||||
struct TSParser;
|
||||
struct TSTree;
|
||||
}
|
||||
|
||||
namespace kte {
|
||||
// A minimal adapter that uses Tree-sitter to parse the whole buffer and then, for now,
|
||||
// does very limited token classification. This acts as a scaffold for future richer
|
||||
// queries. If no queries are provided, it currently produces no spans (safe fallback).
|
||||
class TreeSitterHighlighter : public LanguageHighlighter {
|
||||
public:
|
||||
explicit TreeSitterHighlighter(const TSLanguage *lang, std::string filetype);
|
||||
|
||||
~TreeSitterHighlighter() override;
|
||||
|
||||
void HighlightLine(const Buffer &buf, int row, std::vector<HighlightSpan> &out) const override;
|
||||
|
||||
private:
|
||||
const TSLanguage *language_{nullptr};
|
||||
std::string filetype_;
|
||||
// Lazy parser to avoid startup cost; mutable to allow creation in const method
|
||||
mutable TSParser *parser_{nullptr};
|
||||
mutable TSTree *tree_{nullptr};
|
||||
|
||||
void ensureParsed(const Buffer &buf) const;
|
||||
|
||||
void disposeParser() const;
|
||||
};
|
||||
|
||||
// Factory used by HighlighterRegistry when registering via RegisterTreeSitter.
|
||||
std::unique_ptr<LanguageHighlighter> CreateTreeSitterHighlighter(const char *filetype,
|
||||
const void * (*get_lang)());
|
||||
} // namespace kte
|
||||
|
||||
#endif // KTE_ENABLE_TREESITTER
|
||||
Reference in New Issue
Block a user