Add SQL, Erlang, and Forth highlighter implementations and tests for LSP process and transport handling.

- Added highlighters for new languages (SQL, Erlang, Forth) with filetype recognition.
- Updated and reorganized syntax files to maintain consistency and modularity.
- Introduced LSP transport framing unit tests and JSON decoding/dispatch tests.
- Refactored `LspManager`, integrating UTF-16/UTF-8 position conversions and robust diagnostics handling.
- Enhanced server start/restart logic with workspace root detection and logging to improve LSP usability.
This commit is contained in:
2025-12-02 00:15:15 -08:00
parent e089c6e4d1
commit 33bbb5b98f
68 changed files with 29571 additions and 945 deletions

279
syntax/CppHighlighter.cc Normal file
View File

@@ -0,0 +1,279 @@
#include "CppHighlighter.h"
#include "../Buffer.h"
#include <cctype>
namespace kte {
static bool
is_digit(char c)
{
return c >= '0' && c <= '9';
}
CppHighlighter::CppHighlighter()
{
const char *kw[] = {
"if", "else", "for", "while", "do", "switch", "case", "default", "break", "continue",
"return", "goto", "struct", "class", "namespace", "using", "template", "typename",
"public", "private", "protected", "virtual", "override", "const", "constexpr", "auto",
"static", "inline", "operator", "new", "delete", "try", "catch", "throw", "friend",
"enum", "union", "extern", "volatile", "mutable", "noexcept", "sizeof", "this"
};
for (auto s: kw)
keywords_.insert(s);
const char *types[] = {
"int", "long", "short", "char", "signed", "unsigned", "float", "double", "void",
"bool", "wchar_t", "size_t", "ptrdiff_t", "uint8_t", "uint16_t", "uint32_t", "uint64_t",
"int8_t", "int16_t", "int32_t", "int64_t"
};
for (auto s: types)
types_.insert(s);
}
bool
CppHighlighter::is_ident_start(char c)
{
return std::isalpha(static_cast<unsigned char>(c)) || c == '_';
}
bool
CppHighlighter::is_ident_char(char c)
{
return std::isalnum(static_cast<unsigned char>(c)) || c == '_';
}
void
CppHighlighter::HighlightLine(const Buffer &buf, int row, std::vector<HighlightSpan> &out) const
{
// Stateless entry simply delegates to stateful with a clean previous state
StatefulHighlighter::LineState prev;
(void) HighlightLineStateful(buf, row, prev, out);
}
StatefulHighlighter::LineState
CppHighlighter::HighlightLineStateful(const Buffer &buf,
int row,
const LineState &prev,
std::vector<HighlightSpan> &out) const
{
const auto &rows = buf.Rows();
StatefulHighlighter::LineState state = prev;
if (row < 0 || static_cast<std::size_t>(row) >= rows.size())
return state;
std::string s = static_cast<std::string>(rows[static_cast<std::size_t>(row)]);
if (s.empty())
return state;
auto push = [&](int a, int b, TokenKind k) {
if (b > a)
out.push_back({a, b, k});
};
int n = static_cast<int>(s.size());
int bol = 0;
while (bol < n && (s[bol] == ' ' || s[bol] == '\t'))
++bol;
int i = 0;
// Continue multi-line raw string from previous line
if (state.in_raw_string) {
std::string needle = ")" + state.raw_delim + "\"";
auto pos = s.find(needle);
if (pos == std::string::npos) {
push(0, n, TokenKind::String);
state.in_raw_string = true;
return state;
} else {
int end = static_cast<int>(pos + needle.size());
push(0, end, TokenKind::String);
i = end;
state.in_raw_string = false;
state.raw_delim.clear();
}
}
// Continue multi-line block comment from previous line
if (state.in_block_comment) {
int j = i;
while (i + 1 < n) {
if (s[i] == '*' && s[i + 1] == '/') {
i += 2;
push(j, i, TokenKind::Comment);
state.in_block_comment = false;
break;
}
++i;
}
if (state.in_block_comment) {
push(j, n, TokenKind::Comment);
return state;
}
}
while (i < n) {
char c = s[i];
// Preprocessor at beginning of line (after leading whitespace)
if (i == bol && c == '#') {
push(0, n, TokenKind::Preproc);
break;
}
// Whitespace
if (c == ' ' || c == '\t') {
int j = i + 1;
while (j < n && (s[j] == ' ' || s[j] == '\t'))
++j;
push(i, j, TokenKind::Whitespace);
i = j;
continue;
}
// Line comment
if (c == '/' && i + 1 < n && s[i + 1] == '/') {
push(i, n, TokenKind::Comment);
break;
}
// Block comment
if (c == '/' && i + 1 < n && s[i + 1] == '*') {
int j = i + 2;
bool closed = false;
while (j + 1 <= n) {
if (j + 1 < n && s[j] == '*' && s[j + 1] == '/') {
j += 2;
closed = true;
break;
}
++j;
}
if (closed) {
push(i, j, TokenKind::Comment);
i = j;
continue;
}
// Spill to next lines
push(i, n, TokenKind::Comment);
state.in_block_comment = true;
return state;
}
// Raw string start: very simple detection: R"delim(
if (c == 'R' && i + 1 < n && s[i + 1] == '"') {
int k = i + 2;
std::string delim;
while (k < n && s[k] != '(') {
delim.push_back(s[k]);
++k;
}
if (k < n && s[k] == '(') {
int body_start = k + 1;
std::string needle = ")" + delim + "\"";
auto pos = s.find(needle, static_cast<std::size_t>(body_start));
if (pos == std::string::npos) {
push(i, n, TokenKind::String);
state.in_raw_string = true;
state.raw_delim = delim;
return state;
} else {
int end = static_cast<int>(pos + needle.size());
push(i, end, TokenKind::String);
i = end;
continue;
}
}
// If malformed, just treat 'R' as identifier fallback
}
// Regular string literal
if (c == '"') {
int j = i + 1;
bool esc = false;
while (j < n) {
char d = s[j++];
if (esc) {
esc = false;
continue;
}
if (d == '\\') {
esc = true;
continue;
}
if (d == '"')
break;
}
push(i, j, TokenKind::String);
i = j;
continue;
}
// Char literal
if (c == '\'') {
int j = i + 1;
bool esc = false;
while (j < n) {
char d = s[j++];
if (esc) {
esc = false;
continue;
}
if (d == '\\') {
esc = true;
continue;
}
if (d == '\'')
break;
}
push(i, j, TokenKind::Char);
i = j;
continue;
}
// Number literal (simple)
if (is_digit(c) || (c == '.' && i + 1 < n && is_digit(s[i + 1]))) {
int j = i + 1;
while (j < n && (std::isalnum(static_cast<unsigned char>(s[j])) || s[j] == '.' || s[j] == 'x' ||
s[j] == 'X' || s[j] == 'b' || s[j] == 'B' || s[j] == '_'))
++j;
push(i, j, TokenKind::Number);
i = j;
continue;
}
// Identifier / keyword / type
if (is_ident_start(c)) {
int j = i + 1;
while (j < n && is_ident_char(s[j]))
++j;
std::string id = s.substr(i, j - i);
TokenKind k = TokenKind::Identifier;
if (keywords_.count(id))
k = TokenKind::Keyword;
else if (types_.count(id))
k = TokenKind::Type;
push(i, j, k);
i = j;
continue;
}
// Operators and punctuation (single char for now)
TokenKind kind = TokenKind::Operator;
if (std::ispunct(static_cast<unsigned char>(c)) && c != '_' && c != '#') {
if (c == ';' || c == ',' || c == '(' || c == ')' || c == '{' || c == '}' || c == '[' || c ==
']')
kind = TokenKind::Punctuation;
push(i, i + 1, kind);
++i;
continue;
}
// Fallback
push(i, i + 1, TokenKind::Default);
++i;
}
return state;
}
} // namespace kte

35
syntax/CppHighlighter.h Normal file
View File

@@ -0,0 +1,35 @@
// CppHighlighter.h - minimal stateless C/C++ line highlighter
#pragma once
#include <regex>
#include <string>
#include <unordered_set>
#include <vector>
#include "LanguageHighlighter.h"
class Buffer;
namespace kte {
class CppHighlighter final : public StatefulHighlighter {
public:
CppHighlighter();
~CppHighlighter() override = default;
void HighlightLine(const Buffer &buf, int row, std::vector<HighlightSpan> &out) const override;
LineState HighlightLineStateful(const Buffer &buf,
int row,
const LineState &prev,
std::vector<HighlightSpan> &out) const override;
private:
std::unordered_set<std::string> keywords_;
std::unordered_set<std::string> types_;
static bool is_ident_start(char c);
static bool is_ident_char(char c);
};
} // namespace kte

159
syntax/ErlangHighlighter.cc Normal file
View File

@@ -0,0 +1,159 @@
#include "ErlangHighlighter.h"
#include "../Buffer.h"
#include <cctype>
namespace kte {
static void
push(std::vector<HighlightSpan> &out, int a, int b, TokenKind k)
{
if (b > a)
out.push_back({a, b, k});
}
static bool
is_ident_start(char c)
{
return std::isalpha(static_cast<unsigned char>(c)) || c == '_' || c == '\'';
}
static bool
is_ident_char(char c)
{
return std::isalnum(static_cast<unsigned char>(c)) || c == '_' || c == '@' || c == ':' || c == '?';
}
ErlangHighlighter::ErlangHighlighter()
{
const char *kw[] = {
"after", "begin", "case", "catch", "cond", "div", "end", "fun", "if", "let", "of",
"receive", "when", "try", "rem", "and", "andalso", "orelse", "not", "band", "bor", "bxor",
"bnot", "xor", "module", "export", "import", "record", "define", "undef", "include", "include_lib"
};
for (auto s: kw)
kws_.insert(s);
}
void
ErlangHighlighter::HighlightLine(const Buffer &buf, int row, std::vector<HighlightSpan> &out) const
{
const auto &rows = buf.Rows();
if (row < 0 || static_cast<std::size_t>(row) >= rows.size())
return;
std::string s = static_cast<std::string>(rows[static_cast<std::size_t>(row)]);
int n = static_cast<int>(s.size());
int i = 0;
while (i < n) {
char c = s[i];
if (c == ' ' || c == '\t') {
int j = i + 1;
while (j < n && (s[j] == ' ' || s[j] == '\t'))
++j;
push(out, i, j, TokenKind::Whitespace);
i = j;
continue;
}
// comment
if (c == '%') {
push(out, i, n, TokenKind::Comment);
break;
}
// strings
if (c == '"') {
int j = i + 1;
bool esc = false;
while (j < n) {
char d = s[j++];
if (esc) {
esc = false;
continue;
}
if (d == '\\') {
esc = true;
continue;
}
if (d == '"')
break;
}
push(out, i, j, TokenKind::String);
i = j;
continue;
}
// char literal $X
if (c == '$') {
int j = i + 1;
if (j < n && s[j] == '\\' && j + 1 < n)
j += 2;
else if (j < n)
++j;
push(out, i, j, TokenKind::Char);
i = j;
continue;
}
// numbers
if (std::isdigit(static_cast<unsigned char>(c))) {
int j = i + 1;
while (j < n && (std::isalnum(static_cast<unsigned char>(s[j])) || s[j] == '#' || s[j] == '.' ||
s[j] == '_'))
++j;
push(out, i, j, TokenKind::Number);
i = j;
continue;
}
// atoms/variables/identifiers (including quoted atoms)
if (is_ident_start(c)) {
// quoted atom: '...'
if (c == '\'') {
int j = i + 1;
bool esc = false;
while (j < n) {
char d = s[j++];
if (d == '\'') {
if (j < n && s[j] == '\'') {
++j;
continue;
}
break;
}
if (d == '\\')
esc = !esc;
}
push(out, i, j, TokenKind::Identifier);
i = j;
continue;
}
int j = i + 1;
while (j < n && is_ident_char(s[j]))
++j;
std::string id = s.substr(i, j - i);
// lowercase leading -> atom/function/module; uppercase or '_' -> variable
TokenKind k = TokenKind::Identifier;
// keyword check (lowercase)
std::string lower;
lower.reserve(id.size());
for (char ch: id)
lower.push_back(static_cast<char>(std::tolower(static_cast<unsigned char>(ch))));
if (kws_.count(lower))
k = TokenKind::Keyword;
push(out, i, j, k);
i = j;
continue;
}
if (std::ispunct(static_cast<unsigned char>(c))) {
TokenKind k = TokenKind::Operator;
if (c == ',' || c == ';' || c == '(' || c == ')' || c == '[' || c == ']' || c == '{' || c ==
'}')
k = TokenKind::Punctuation;
push(out, i, i + 1, k);
++i;
continue;
}
push(out, i, i + 1, TokenKind::Default);
++i;
}
}
} // namespace kte

View File

@@ -0,0 +1,17 @@
// ErlangHighlighter.h - simple Erlang highlighter
#pragma once
#include "LanguageHighlighter.h"
#include <unordered_set>
namespace kte {
class ErlangHighlighter final : public LanguageHighlighter {
public:
ErlangHighlighter();
void HighlightLine(const Buffer &buf, int row, std::vector<HighlightSpan> &out) const override;
private:
std::unordered_set<std::string> kws_;
};
} // namespace kte

121
syntax/ForthHighlighter.cc Normal file
View File

@@ -0,0 +1,121 @@
#include "ForthHighlighter.h"
#include "../Buffer.h"
#include <cctype>
namespace kte {
static void
push(std::vector<HighlightSpan> &out, int a, int b, TokenKind k)
{
if (b > a)
out.push_back({a, b, k});
}
static bool
is_word_char(char c)
{
return std::isalnum(static_cast<unsigned char>(c)) || c == '_' || c == '>' || c == '<' || c == '?';
}
ForthHighlighter::ForthHighlighter()
{
const char *kw[] = {
":", ";", "if", "else", "then", "begin", "until", "while", "repeat",
"do", "loop", "+loop", "leave", "again", "case", "of", "endof", "endcase",
".", ".r", ".s", ".\"", ",", "cr", "emit", "type", "key",
"+", "-", "*", "/", "mod", "/mod", "+-", "abs", "min", "max",
"dup", "drop", "swap", "over", "rot", "-rot", "nip", "tuck", "pick", "roll",
"and", "or", "xor", "invert", "lshift", "rshift",
"variable", "constant", "value", "to", "create", "does>", "allot", ",",
"cells", "cell+", "chars", "char+",
"[", "]", "immediate",
"s\"", ".\""
};
for (auto s: kw)
kws_.insert(s);
}
void
ForthHighlighter::HighlightLine(const Buffer &buf, int row, std::vector<HighlightSpan> &out) const
{
const auto &rows = buf.Rows();
if (row < 0 || static_cast<std::size_t>(row) >= rows.size())
return;
std::string s = static_cast<std::string>(rows[static_cast<std::size_t>(row)]);
int n = static_cast<int>(s.size());
int i = 0;
while (i < n) {
char c = s[i];
if (c == ' ' || c == '\t') {
int j = i + 1;
while (j < n && (s[j] == ' ' || s[j] == '\t'))
++j;
push(out, i, j, TokenKind::Whitespace);
i = j;
continue;
}
// backslash comment to end of line
if (c == '\\') {
push(out, i, n, TokenKind::Comment);
break;
}
// parenthesis comment ( ... ) if at word boundary
if (c == '(') {
int j = i + 1;
while (j < n && s[j] != ')')
++j;
if (j < n)
++j;
push(out, i, j, TokenKind::Comment);
i = j;
continue;
}
// strings: ." ... " and S" ... " and raw "..."
if (c == '"') {
int j = i + 1;
while (j < n && s[j] != '"')
++j;
if (j < n)
++j;
push(out, i, j, TokenKind::String);
i = j;
continue;
}
if (std::isdigit(static_cast<unsigned char>(c))) {
int j = i + 1;
while (j < n && (std::isalnum(static_cast<unsigned char>(s[j])) || s[j] == '.' || s[j] == '#'))
++j;
push(out, i, j, TokenKind::Number);
i = j;
continue;
}
// word/identifier
if (std::isalpha(static_cast<unsigned char>(c)) || std::ispunct(static_cast<unsigned char>(c))) {
int j = i + 1;
while (j < n && is_word_char(s[j]))
++j;
std::string w = s.substr(i, j - i);
// normalize to lowercase for keyword compare (Forth is case-insensitive typically)
std::string lower;
lower.reserve(w.size());
for (char ch: w)
lower.push_back(static_cast<char>(std::tolower(static_cast<unsigned char>(ch))));
TokenKind k = kws_.count(lower) ? TokenKind::Keyword : TokenKind::Identifier;
// Single-char punctuation fallback
if (w.size() == 1 && std::ispunct(static_cast<unsigned char>(w[0])) && !kws_.count(lower)) {
k = (w[0] == '(' || w[0] == ')' || w[0] == ',')
? TokenKind::Punctuation
: TokenKind::Operator;
}
push(out, i, j, k);
i = j;
continue;
}
push(out, i, i + 1, TokenKind::Default);
++i;
}
}
} // namespace kte

17
syntax/ForthHighlighter.h Normal file
View File

@@ -0,0 +1,17 @@
// ForthHighlighter.h - simple Forth highlighter
#pragma once
#include "LanguageHighlighter.h"
#include <unordered_set>
namespace kte {
class ForthHighlighter final : public LanguageHighlighter {
public:
ForthHighlighter();
void HighlightLine(const Buffer &buf, int row, std::vector<HighlightSpan> &out) const override;
private:
std::unordered_set<std::string> kws_;
};
} // namespace kte

157
syntax/GoHighlighter.cc Normal file
View File

@@ -0,0 +1,157 @@
#include "GoHighlighter.h"
#include "../Buffer.h"
#include <cctype>
namespace kte {
static void
push(std::vector<HighlightSpan> &out, int a, int b, TokenKind k)
{
if (b > a)
out.push_back({a, b, k});
}
static bool
is_ident_start(char c)
{
return std::isalpha(static_cast<unsigned char>(c)) || c == '_';
}
static bool
is_ident_char(char c)
{
return std::isalnum(static_cast<unsigned char>(c)) || c == '_';
}
GoHighlighter::GoHighlighter()
{
const char *kw[] = {
"break", "case", "chan", "const", "continue", "default", "defer", "else", "fallthrough", "for", "func",
"go", "goto", "if", "import", "interface", "map", "package", "range", "return", "select", "struct",
"switch", "type", "var"
};
for (auto s: kw)
kws_.insert(s);
const char *tp[] = {
"bool", "byte", "complex64", "complex128", "error", "float32", "float64", "int", "int8", "int16",
"int32", "int64", "rune", "string", "uint", "uint8", "uint16", "uint32", "uint64", "uintptr"
};
for (auto s: tp)
types_.insert(s);
}
void
GoHighlighter::HighlightLine(const Buffer &buf, int row, std::vector<HighlightSpan> &out) const
{
const auto &rows = buf.Rows();
if (row < 0 || static_cast<std::size_t>(row) >= rows.size())
return;
std::string s = static_cast<std::string>(rows[static_cast<std::size_t>(row)]);
int n = static_cast<int>(s.size());
int i = 0;
int bol = 0;
while (bol < n && (s[bol] == ' ' || s[bol] == '\t'))
++bol;
// line comment
while (i < n) {
char c = s[i];
if (c == ' ' || c == '\t') {
int j = i + 1;
while (j < n && (s[j] == ' ' || s[j] == '\t'))
++j;
push(out, i, j, TokenKind::Whitespace);
i = j;
continue;
}
if (c == '/' && i + 1 < n && s[i + 1] == '/') {
push(out, i, n, TokenKind::Comment);
break;
}
if (c == '/' && i + 1 < n && s[i + 1] == '*') {
int j = i + 2;
bool closed = false;
while (j + 1 <= n) {
if (j + 1 < n && s[j] == '*' && s[j + 1] == '/') {
j += 2;
closed = true;
break;
}
++j;
}
if (!closed) {
push(out, i, n, TokenKind::Comment);
break;
} else {
push(out, i, j, TokenKind::Comment);
i = j;
continue;
}
}
if (c == '"' || c == '`') {
char q = c;
int j = i + 1;
bool esc = false;
if (q == '`') {
while (j < n && s[j] != '`')
++j;
if (j < n)
++j;
} else {
while (j < n) {
char d = s[j++];
if (esc) {
esc = false;
continue;
}
if (d == '\\') {
esc = true;
continue;
}
if (d == '"')
break;
}
}
push(out, i, j, TokenKind::String);
i = j;
continue;
}
if (std::isdigit(static_cast<unsigned char>(c))) {
int j = i + 1;
while (j < n && (std::isalnum(static_cast<unsigned char>(s[j])) || s[j] == '.' || s[j] == 'x' ||
s[j] == 'X' || s[j] == '_'))
++j;
push(out, i, j, TokenKind::Number);
i = j;
continue;
}
if (is_ident_start(c)) {
int j = i + 1;
while (j < n && is_ident_char(s[j]))
++j;
std::string id = s.substr(i, j - i);
TokenKind k = TokenKind::Identifier;
if (kws_.count(id))
k = TokenKind::Keyword;
else if (types_.count(id))
k = TokenKind::Type;
push(out, i, j, k);
i = j;
continue;
}
if (std::ispunct(static_cast<unsigned char>(c))) {
TokenKind k = TokenKind::Operator;
if (c == ';' || c == ',' || c == '(' || c == ')' || c == '{' || c == '}' || c == '[' || c ==
']')
k = TokenKind::Punctuation;
push(out, i, i + 1, k);
++i;
continue;
}
push(out, i, i + 1, TokenKind::Default);
++i;
}
}
} // namespace kte

18
syntax/GoHighlighter.h Normal file
View File

@@ -0,0 +1,18 @@
// GoHighlighter.h - simple Go highlighter
#pragma once
#include "LanguageHighlighter.h"
#include <unordered_set>
namespace kte {
class GoHighlighter final : public LanguageHighlighter {
public:
GoHighlighter();
void HighlightLine(const Buffer &buf, int row, std::vector<HighlightSpan> &out) const override;
private:
std::unordered_set<std::string> kws_;
std::unordered_set<std::string> types_;
};
} // namespace kte

209
syntax/HighlighterEngine.cc Normal file
View File

@@ -0,0 +1,209 @@
#include "HighlighterEngine.h"
#include "../Buffer.h"
#include "LanguageHighlighter.h"
#include <thread>
namespace kte {
HighlighterEngine::HighlighterEngine() = default;
HighlighterEngine::~HighlighterEngine()
{
// stop background worker
if (worker_running_.load()) {
{
std::lock_guard<std::mutex> lock(mtx_);
worker_running_.store(false);
has_request_ = true; // wake it up to exit
}
cv_.notify_one();
if (worker_.joinable())
worker_.join();
}
}
void
HighlighterEngine::SetHighlighter(std::unique_ptr<LanguageHighlighter> hl)
{
std::lock_guard<std::mutex> lock(mtx_);
hl_ = std::move(hl);
cache_.clear();
state_cache_.clear();
state_last_contig_.clear();
}
const LineHighlight &
HighlighterEngine::GetLine(const Buffer &buf, int row, std::uint64_t buf_version) const
{
std::unique_lock<std::mutex> lock(mtx_);
auto it = cache_.find(row);
if (it != cache_.end() && it->second.version == buf_version) {
return it->second;
}
// Prepare destination slot to reuse its capacity and avoid allocations
LineHighlight &slot = cache_[row];
slot.version = buf_version;
slot.spans.clear();
if (!hl_) {
return slot;
}
// Copy shared_ptr-like raw pointer for use outside critical sections
LanguageHighlighter *hl_ptr = hl_.get();
bool is_stateful = dynamic_cast<StatefulHighlighter *>(hl_ptr) != nullptr;
if (!is_stateful) {
// Stateless fast path: we can release the lock while computing to reduce contention
auto &out = slot.spans;
lock.unlock();
hl_ptr->HighlightLine(buf, row, out);
return cache_.at(row);
}
// Stateful path: we need to walk from a known previous state. Keep lock while consulting caches,
// but release during heavy computation.
auto *stateful = static_cast<StatefulHighlighter *>(hl_ptr);
StatefulHighlighter::LineState prev_state;
int start_row = -1;
if (!state_cache_.empty()) {
// linear search over map (unordered), track best candidate
int best = -1;
for (const auto &kv: state_cache_) {
int r = kv.first;
if (r <= row - 1 && kv.second.version == buf_version) {
if (r > best)
best = r;
}
}
if (best >= 0) {
start_row = best;
prev_state = state_cache_.at(best).state;
}
}
// We'll compute states and the target line's spans without holding the lock for most of the work.
// Create a local copy of prev_state and iterate rows; we will update caches under lock.
lock.unlock();
StatefulHighlighter::LineState cur_state = prev_state;
for (int r = start_row + 1; r <= row; ++r) {
std::vector<HighlightSpan> tmp;
std::vector<HighlightSpan> &out = (r == row) ? slot.spans : tmp;
auto next_state = stateful->HighlightLineStateful(buf, r, cur_state, out);
// Update state cache for r
std::lock_guard<std::mutex> gl(mtx_);
StateEntry se;
se.version = buf_version;
se.state = next_state;
state_cache_[r] = se;
cur_state = next_state;
}
// Return reference under lock to ensure slot's address stability in map
lock.lock();
return cache_.at(row);
}
void
HighlighterEngine::InvalidateFrom(int row)
{
std::lock_guard<std::mutex> lock(mtx_);
if (cache_.empty())
return;
// Simple implementation: erase all rows >= row
for (auto it = cache_.begin(); it != cache_.end();) {
if (it->first >= row)
it = cache_.erase(it);
else
++it;
}
if (!state_cache_.empty()) {
for (auto it = state_cache_.begin(); it != state_cache_.end();) {
if (it->first >= row)
it = state_cache_.erase(it);
else
++it;
}
}
}
void
HighlighterEngine::ensure_worker_started() const
{
if (worker_running_.load())
return;
worker_running_.store(true);
worker_ = std::thread([this]() {
this->worker_loop();
});
}
void
HighlighterEngine::worker_loop() const
{
std::unique_lock<std::mutex> lock(mtx_);
while (worker_running_.load()) {
cv_.wait(lock, [this]() {
return has_request_ || !worker_running_.load();
});
if (!worker_running_.load())
break;
WarmRequest req = pending_;
has_request_ = false;
// Copy locals then release lock while computing
lock.unlock();
if (req.buf) {
int start = std::max(0, req.start_row);
int end = std::max(start, req.end_row);
for (int r = start; r <= end; ++r) {
// Re-check version staleness quickly by peeking cache version; not strictly necessary
// Compute line; GetLine is thread-safe
(void) this->GetLine(*req.buf, r, req.version);
}
}
lock.lock();
}
}
void
HighlighterEngine::PrefetchViewport(const Buffer &buf, int first_row, int row_count, std::uint64_t buf_version,
int warm_margin) const
{
if (row_count <= 0)
return;
// Synchronously compute visible rows to ensure cache hits during draw
int start = std::max(0, first_row);
int end = start + row_count - 1;
int max_rows = static_cast<int>(buf.Nrows());
if (start >= max_rows)
return;
if (end >= max_rows)
end = max_rows - 1;
for (int r = start; r <= end; ++r) {
(void) GetLine(buf, r, buf_version);
}
// Enqueue background warm-around
int warm_start = std::max(0, start - warm_margin);
int warm_end = std::min(max_rows - 1, end + warm_margin);
{
std::lock_guard<std::mutex> lock(mtx_);
pending_.buf = &buf;
pending_.version = buf_version;
pending_.start_row = warm_start;
pending_.end_row = warm_end;
has_request_ = true;
}
ensure_worker_started();
cv_.notify_one();
}
} // namespace kte

View File

@@ -0,0 +1,85 @@
// HighlighterEngine.h - caching layer for per-line highlights
#pragma once
#include <cstdint>
#include <memory>
#include <unordered_map>
#include <vector>
#include <mutex>
#include <condition_variable>
#include <atomic>
#include <thread>
#include "../Highlight.h"
#include "LanguageHighlighter.h"
class Buffer;
namespace kte {
class HighlighterEngine {
public:
HighlighterEngine();
~HighlighterEngine();
void SetHighlighter(std::unique_ptr<LanguageHighlighter> hl);
// Retrieve highlights for a given line and buffer version.
// If cache is stale, recompute using the current highlighter.
const LineHighlight &GetLine(const Buffer &buf, int row, std::uint64_t buf_version) const;
// Invalidate cached lines from row (inclusive)
void InvalidateFrom(int row);
bool HasHighlighter() const
{
return static_cast<bool>(hl_);
}
// Phase 3: viewport-first prefetch and background warming
// Compute only the visible range now, and enqueue a background warm-around task.
// warm_margin: how many extra lines above/below to warm in the background.
void PrefetchViewport(const Buffer &buf, int first_row, int row_count, std::uint64_t buf_version,
int warm_margin = 200) const;
private:
std::unique_ptr<LanguageHighlighter> hl_;
// Simple cache by row index (mutable to allow caching in const GetLine)
mutable std::unordered_map<int, LineHighlight> cache_;
// For stateful highlighters, remember per-line state (state after finishing that row)
struct StateEntry {
std::uint64_t version{0};
// Using the interface type; forward-declare via header
StatefulHighlighter::LineState state;
};
mutable std::unordered_map<int, StateEntry> state_cache_;
// Track best known contiguous state row for a given version to avoid O(n) scans
mutable std::unordered_map<std::uint64_t, int> state_last_contig_;
// Thread-safety for caches and background worker state
mutable std::mutex mtx_;
// Background warmer
struct WarmRequest {
const Buffer *buf{nullptr};
std::uint64_t version{0};
int start_row{0};
int end_row{0}; // inclusive
};
mutable std::condition_variable cv_;
mutable std::thread worker_;
mutable std::atomic<bool> worker_running_{false};
mutable bool has_request_{false};
mutable WarmRequest pending_{};
void ensure_worker_started() const;
void worker_loop() const;
};
} // namespace kte

View File

@@ -0,0 +1,247 @@
#include "HighlighterRegistry.h"
#include "CppHighlighter.h"
#include <algorithm>
#include <filesystem>
#include <vector>
#include <cctype>
// Forward declare simple highlighters implemented in this project
namespace kte {
// Registration storage
struct RegEntry {
std::string ft; // normalized
HighlighterRegistry::Factory factory;
};
static std::vector<RegEntry> &
registry()
{
static std::vector<RegEntry> reg;
return reg;
}
class JSONHighlighter;
class MarkdownHighlighter;
class ShellHighlighter;
class GoHighlighter;
class PythonHighlighter;
class RustHighlighter;
class LispHighlighter;
class SqlHighlighter;
class ErlangHighlighter;
class ForthHighlighter;
}
// Headers for the above
#include "JsonHighlighter.h"
#include "MarkdownHighlighter.h"
#include "ShellHighlighter.h"
#include "GoHighlighter.h"
#include "PythonHighlighter.h"
#include "RustHighlighter.h"
#include "LispHighlighter.h"
#include "SqlHighlighter.h"
#include "ErlangHighlighter.h"
#include "ForthHighlighter.h"
namespace kte {
static std::string
to_lower(std::string_view s)
{
std::string r(s);
std::transform(r.begin(), r.end(), r.begin(), [](unsigned char c) {
return static_cast<char>(std::tolower(c));
});
return r;
}
std::string
HighlighterRegistry::Normalize(std::string_view ft)
{
std::string f = to_lower(ft);
if (f == "c" || f == "c++" || f == "cc" || f == "hpp" || f == "hh" || f == "h" || f == "cxx")
return "cpp";
if (f == "cpp")
return "cpp";
if (f == "json")
return "json";
if (f == "markdown" || f == "md" || f == "mkd" || f == "mdown")
return "markdown";
if (f == "shell" || f == "sh" || f == "bash" || f == "zsh" || f == "ksh" || f == "fish")
return "shell";
if (f == "go" || f == "golang")
return "go";
if (f == "py" || f == "python")
return "python";
if (f == "rs" || f == "rust")
return "rust";
if (f == "lisp" || f == "scheme" || f == "scm" || f == "rkt" || f == "el" || f == "clj" || f == "cljc" || f ==
"cl")
return "lisp";
if (f == "sql" || f == "sqlite" || f == "sqlite3")
return "sql";
if (f == "erlang" || f == "erl" || f == "hrl")
return "erlang";
if (f == "forth" || f == "fth" || f == "4th" || f == "fs")
return "forth";
return f;
}
std::unique_ptr<LanguageHighlighter>
HighlighterRegistry::CreateFor(std::string_view filetype)
{
std::string ft = Normalize(filetype);
// Prefer externally registered factories
for (const auto &e: registry()) {
if (e.ft == ft && e.factory)
return e.factory();
}
if (ft == "cpp")
return std::make_unique<CppHighlighter>();
if (ft == "json")
return std::make_unique<JSONHighlighter>();
if (ft == "markdown")
return std::make_unique<MarkdownHighlighter>();
if (ft == "shell")
return std::make_unique<ShellHighlighter>();
if (ft == "go")
return std::make_unique<GoHighlighter>();
if (ft == "python")
return std::make_unique<PythonHighlighter>();
if (ft == "rust")
return std::make_unique<RustHighlighter>();
if (ft == "lisp")
return std::make_unique<LispHighlighter>();
if (ft == "sql")
return std::make_unique<SqlHighlighter>();
if (ft == "erlang")
return std::make_unique<ErlangHighlighter>();
if (ft == "forth")
return std::make_unique<ForthHighlighter>();
return nullptr;
}
static std::string
shebang_to_ft(std::string_view first_line)
{
if (first_line.size() < 2 || first_line.substr(0, 2) != "#!")
return "";
std::string low = to_lower(first_line);
if (low.find("python") != std::string::npos)
return "python";
if (low.find("bash") != std::string::npos)
return "shell";
if (low.find("sh") != std::string::npos)
return "shell";
if (low.find("zsh") != std::string::npos)
return "shell";
if (low.find("fish") != std::string::npos)
return "shell";
if (low.find("scheme") != std::string::npos || low.find("racket") != std::string::npos || low.find("guile") !=
std::string::npos)
return "lisp";
return "";
}
std::string
HighlighterRegistry::DetectForPath(std::string_view path, std::string_view first_line)
{
// Extension
std::string p(path);
std::error_code ec;
std::string ext = std::filesystem::path(p).extension().string();
for (auto &ch: ext)
ch = static_cast<char>(std::tolower(static_cast<unsigned char>(ch)));
if (!ext.empty()) {
if (ext == ".c" || ext == ".cc" || ext == ".cpp" || ext == ".cxx" || ext == ".h" || ext == ".hpp" || ext
== ".hh")
return "cpp";
if (ext == ".json")
return "json";
if (ext == ".md" || ext == ".markdown" || ext == ".mkd")
return "markdown";
if (ext == ".sh" || ext == ".bash" || ext == ".zsh" || ext == ".ksh" || ext == ".fish")
return "shell";
if (ext == ".go")
return "go";
if (ext == ".py")
return "python";
if (ext == ".rs")
return "rust";
if (ext == ".lisp" || ext == ".scm" || ext == ".rkt" || ext == ".el" || ext == ".clj" || ext == ".cljc"
|| ext == ".cl")
return "lisp";
if (ext == ".sql" || ext == ".sqlite")
return "sql";
if (ext == ".erl" || ext == ".hrl")
return "erlang";
if (ext == ".forth" || ext == ".fth" || ext == ".4th" || ext == ".fs")
return "forth";
}
// Shebang
std::string ft = shebang_to_ft(first_line);
return ft;
}
} // namespace kte
// Extensibility API implementations
namespace kte {
void
HighlighterRegistry::Register(std::string_view filetype, Factory factory, bool override_existing)
{
std::string ft = Normalize(filetype);
for (auto &e: registry()) {
if (e.ft == ft) {
if (override_existing)
e.factory = std::move(factory);
return;
}
}
registry().push_back(RegEntry{ft, std::move(factory)});
}
bool
HighlighterRegistry::IsRegistered(std::string_view filetype)
{
std::string ft = Normalize(filetype);
for (const auto &e: registry())
if (e.ft == ft)
return true;
return false;
}
std::vector<std::string>
HighlighterRegistry::RegisteredFiletypes()
{
std::vector<std::string> out;
out.reserve(registry().size());
for (const auto &e: registry())
out.push_back(e.ft);
return out;
}
#ifdef KTE_ENABLE_TREESITTER
// Forward declare adapter factory
std::unique_ptr<LanguageHighlighter> CreateTreeSitterHighlighter(const char *filetype,
const void * (*get_lang)());
void
HighlighterRegistry::RegisterTreeSitter(std::string_view filetype,
const TSLanguage * (*get_language)())
{
std::string ft = Normalize(filetype);
Register(ft, [ft, get_language]() {
return CreateTreeSitterHighlighter(ft.c_str(), reinterpret_cast<const void* (*)()>(get_language));
}, /*override_existing=*/true);
}
#endif
} // namespace kte

View File

@@ -0,0 +1,47 @@
// HighlighterRegistry.h - create/detect language highlighters and allow external registration
#pragma once
#include <functional>
#include <memory>
#include <string>
#include <string_view>
#include <vector>
#include "LanguageHighlighter.h"
namespace kte {
class HighlighterRegistry {
public:
using Factory = std::function<std::unique_ptr<LanguageHighlighter>()>;
// Create a highlighter for normalized filetype id (e.g., "cpp", "json", "markdown", "shell", "go", "python", "rust", "lisp").
static std::unique_ptr<LanguageHighlighter> CreateFor(std::string_view filetype);
// Detect filetype by path extension and shebang (first line).
// Returns normalized id or empty string if unknown.
static std::string DetectForPath(std::string_view path, std::string_view first_line);
// Normalize various aliases/extensions to canonical ids.
static std::string Normalize(std::string_view ft);
// Extensibility: allow external code to register highlighters at runtime.
// The filetype key is normalized via Normalize(). If a factory is already registered for the
// normalized key and override=false, the existing factory is kept.
static void Register(std::string_view filetype, Factory factory, bool override_existing = true);
// Returns true if a factory is registered for the (normalized) filetype.
static bool IsRegistered(std::string_view filetype);
// Return a list of currently registered (normalized) filetypes. Primarily for diagnostics/tests.
static std::vector<std::string> RegisteredFiletypes();
#ifdef KTE_ENABLE_TREESITTER
// Forward declaration to avoid hard dependency when disabled.
struct TSLanguage;
// Convenience: register a Tree-sitter-backed highlighter for a filetype.
// The getter should return a non-null language pointer for the grammar.
static void RegisterTreeSitter(std::string_view filetype,
const TSLanguage * (*get_language)());
#endif
};
} // namespace kte

90
syntax/JsonHighlighter.cc Normal file
View File

@@ -0,0 +1,90 @@
#include "JsonHighlighter.h"
#include "../Buffer.h"
#include <cctype>
namespace kte {
static bool
is_digit(char c)
{
return c >= '0' && c <= '9';
}
void
JSONHighlighter::HighlightLine(const Buffer &buf, int row, std::vector<HighlightSpan> &out) const
{
const auto &rows = buf.Rows();
if (row < 0 || static_cast<std::size_t>(row) >= rows.size())
return;
std::string s = static_cast<std::string>(rows[static_cast<std::size_t>(row)]);
int n = static_cast<int>(s.size());
auto push = [&](int a, int b, TokenKind k) {
if (b > a)
out.push_back({a, b, k});
};
int i = 0;
while (i < n) {
char c = s[i];
if (c == ' ' || c == '\t') {
int j = i + 1;
while (j < n && (s[j] == ' ' || s[j] == '\t'))
++j;
push(i, j, TokenKind::Whitespace);
i = j;
continue;
}
if (c == '"') {
int j = i + 1;
bool esc = false;
while (j < n) {
char d = s[j++];
if (esc) {
esc = false;
continue;
}
if (d == '\\') {
esc = true;
continue;
}
if (d == '"')
break;
}
push(i, j, TokenKind::String);
i = j;
continue;
}
if (is_digit(c) || (c == '-' && i + 1 < n && is_digit(s[i + 1]))) {
int j = i + 1;
while (j < n && (std::isdigit(static_cast<unsigned char>(s[j])) || s[j] == '.' || s[j] == 'e' ||
s[j] == 'E' || s[j] == '+' || s[j] == '-' || s[j] == '_'))
++j;
push(i, j, TokenKind::Number);
i = j;
continue;
}
// booleans/null
if (std::isalpha(static_cast<unsigned char>(c))) {
int j = i + 1;
while (j < n && std::isalpha(static_cast<unsigned char>(s[j])))
++j;
std::string id = s.substr(i, j - i);
if (id == "true" || id == "false" || id == "null")
push(i, j, TokenKind::Constant);
else
push(i, j, TokenKind::Identifier);
i = j;
continue;
}
// punctuation
if (c == '{' || c == '}' || c == '[' || c == ']' || c == ',' || c == ':') {
push(i, i + 1, TokenKind::Punctuation);
++i;
continue;
}
// fallback
push(i, i + 1, TokenKind::Default);
++i;
}
}
} // namespace kte

12
syntax/JsonHighlighter.h Normal file
View File

@@ -0,0 +1,12 @@
// JsonHighlighter.h - simple JSON line highlighter
#pragma once
#include "LanguageHighlighter.h"
#include <vector>
namespace kte {
class JSONHighlighter final : public LanguageHighlighter {
public:
void HighlightLine(const Buffer &buf, int row, std::vector<HighlightSpan> &out) const override;
};
} // namespace kte

View File

@@ -0,0 +1,51 @@
// LanguageHighlighter.h - interface for line-based highlighters
#pragma once
#include <memory>
#include <vector>
#include <string>
#include "../Highlight.h"
class Buffer;
namespace kte {
class LanguageHighlighter {
public:
virtual ~LanguageHighlighter() = default;
// Produce highlight spans for a given buffer row. Implementations should append to out.
virtual void HighlightLine(const Buffer &buf, int row, std::vector<HighlightSpan> &out) const = 0;
virtual bool Stateful() const
{
return false;
}
};
// Optional extension for stateful highlighters (e.g., multi-line comments/strings).
// Engines may detect and use this via dynamic_cast without breaking stateless impls.
class StatefulHighlighter : public LanguageHighlighter {
public:
struct LineState {
bool in_block_comment{false};
bool in_raw_string{false};
// For raw strings, remember the delimiter between the opening R"delim( and closing )delim"
std::string raw_delim;
};
// Highlight one line given the previous line state; return the resulting state after this line.
// Implementations should append spans for this line to out and compute the next state.
virtual LineState HighlightLineStateful(const Buffer &buf,
int row,
const LineState &prev,
std::vector<HighlightSpan> &out) const = 0;
bool Stateful() const override
{
return true;
}
};
} // namespace kte

107
syntax/LispHighlighter.cc Normal file
View File

@@ -0,0 +1,107 @@
#include "LispHighlighter.h"
#include "../Buffer.h"
#include <cctype>
namespace kte {
static void
push(std::vector<HighlightSpan> &out, int a, int b, TokenKind k)
{
if (b > a)
out.push_back({a, b, k});
}
LispHighlighter::LispHighlighter()
{
const char *kw[] = {
"defun", "lambda", "let", "let*", "define", "set!", "if", "cond", "begin", "quote", "quasiquote",
"unquote", "unquote-splicing", "loop", "do", "and", "or", "not"
};
for (auto s: kw)
kws_.insert(s);
}
void
LispHighlighter::HighlightLine(const Buffer &buf, int row, std::vector<HighlightSpan> &out) const
{
const auto &rows = buf.Rows();
if (row < 0 || static_cast<std::size_t>(row) >= rows.size())
return;
std::string s = static_cast<std::string>(rows[static_cast<std::size_t>(row)]);
int n = static_cast<int>(s.size());
int i = 0;
int bol = 0;
while (bol < n && (s[bol] == ' ' || s[bol] == '\t'))
++bol;
if (bol < n && s[bol] == ';') {
push(out, bol, n, TokenKind::Comment);
if (bol > 0)
push(out, 0, bol, TokenKind::Whitespace);
return;
}
while (i < n) {
char c = s[i];
if (c == ' ' || c == '\t') {
int j = i + 1;
while (j < n && (s[j] == ' ' || s[j] == '\t'))
++j;
push(out, i, j, TokenKind::Whitespace);
i = j;
continue;
}
if (c == ';') {
push(out, i, n, TokenKind::Comment);
break;
}
if (c == '"') {
int j = i + 1;
bool esc = false;
while (j < n) {
char d = s[j++];
if (esc) {
esc = false;
continue;
}
if (d == '\\') {
esc = true;
continue;
}
if (d == '"')
break;
}
push(out, i, j, TokenKind::String);
i = j;
continue;
}
if (std::isalpha(static_cast<unsigned char>(c)) || c == '*' || c == '-' || c == '+' || c == '/' || c ==
'_') {
int j = i + 1;
while (j < n && (std::isalnum(static_cast<unsigned char>(s[j])) || s[j] == '*' || s[j] == '-' ||
s[j] == '+' || s[j] == '/' || s[j] == '_' || s[j] == '!'))
++j;
std::string id = s.substr(i, j - i);
TokenKind k = kws_.count(id) ? TokenKind::Keyword : TokenKind::Identifier;
push(out, i, j, k);
i = j;
continue;
}
if (std::isdigit(static_cast<unsigned char>(c))) {
int j = i + 1;
while (j < n && (std::isdigit(static_cast<unsigned char>(s[j])) || s[j] == '.'))
++j;
push(out, i, j, TokenKind::Number);
i = j;
continue;
}
if (std::ispunct(static_cast<unsigned char>(c))) {
TokenKind k = TokenKind::Punctuation;
push(out, i, i + 1, k);
++i;
continue;
}
push(out, i, i + 1, TokenKind::Default);
++i;
}
}
} // namespace kte

17
syntax/LispHighlighter.h Normal file
View File

@@ -0,0 +1,17 @@
// LispHighlighter.h - simple Lisp/Scheme family highlighter
#pragma once
#include "LanguageHighlighter.h"
#include <unordered_set>
namespace kte {
class LispHighlighter final : public LanguageHighlighter {
public:
LispHighlighter();
void HighlightLine(const Buffer &buf, int row, std::vector<HighlightSpan> &out) const override;
private:
std::unordered_set<std::string> kws_;
};
} // namespace kte

View File

@@ -0,0 +1,132 @@
#include "MarkdownHighlighter.h"
#include "../Buffer.h"
#include <cctype>
namespace kte {
static void
push_span(std::vector<HighlightSpan> &out, int a, int b, TokenKind k)
{
if (b > a)
out.push_back({a, b, k});
}
void
MarkdownHighlighter::HighlightLine(const Buffer &buf, int row, std::vector<HighlightSpan> &out) const
{
LineState st; // not used in stateless entry
(void) HighlightLineStateful(buf, row, st, out);
}
StatefulHighlighter::LineState
MarkdownHighlighter::HighlightLineStateful(const Buffer &buf, int row, const LineState &prev,
std::vector<HighlightSpan> &out) const
{
StatefulHighlighter::LineState state = prev;
const auto &rows = buf.Rows();
if (row < 0 || static_cast<std::size_t>(row) >= rows.size())
return state;
std::string s = static_cast<std::string>(rows[static_cast<std::size_t>(row)]);
int n = static_cast<int>(s.size());
// Reuse in_block_comment flag as "in fenced code" state.
if (state.in_block_comment) {
// If line contains closing fence ``` then close after it
auto pos = s.find("```");
if (pos == std::string::npos) {
push_span(out, 0, n, TokenKind::String);
state.in_block_comment = true;
return state;
} else {
int end = static_cast<int>(pos + 3);
push_span(out, 0, end, TokenKind::String);
// rest of line processed normally after fence
int i = end;
// whitespace
if (i < n)
push_span(out, i, n, TokenKind::Default);
state.in_block_comment = false;
return state;
}
}
// Detect fenced code block start at beginning (allow leading spaces)
int bol = 0;
while (bol < n && (s[bol] == ' ' || s[bol] == '\t'))
++bol;
if (bol + 3 <= n && s.compare(bol, 3, "```") == 0) {
push_span(out, bol, n, TokenKind::String);
state.in_block_comment = true; // enter fenced mode
return state;
}
// Headings: lines starting with 1-6 '#'
if (bol < n && s[bol] == '#') {
int j = bol;
while (j < n && s[j] == '#')
++j; // hashes
// include following space and text as Keyword to stand out
push_span(out, bol, n, TokenKind::Keyword);
return state;
}
// Process inline: emphasis and code spans
int i = 0;
while (i < n) {
char c = s[i];
if (c == '`') {
int j = i + 1;
while (j < n && s[j] != '`')
++j;
if (j < n)
++j;
push_span(out, i, j, TokenKind::String);
i = j;
continue;
}
if (c == '*' || c == '_') {
// bold/italic markers: treat the marker and until next same marker as Type to highlight
char m = c;
int j = i + 1;
while (j < n && s[j] != m)
++j;
if (j < n)
++j;
push_span(out, i, j, TokenKind::Type);
i = j;
continue;
}
// links []() minimal: treat [text](url) as Function
if (c == '[') {
int j = i + 1;
while (j < n && s[j] != ']')
++j;
if (j < n)
++j; // include ]
if (j < n && s[j] == '(') {
while (j < n && s[j] != ')')
++j;
if (j < n)
++j;
}
push_span(out, i, j, TokenKind::Function);
i = j;
continue;
}
// whitespace
if (c == ' ' || c == '\t') {
int j = i + 1;
while (j < n && (s[j] == ' ' || s[j] == '\t'))
++j;
push_span(out, i, j, TokenKind::Whitespace);
i = j;
continue;
}
// fallback: default single char
push_span(out, i, i + 1, TokenKind::Default);
++i;
}
return state;
}
} // namespace kte

View File

@@ -0,0 +1,14 @@
// MarkdownHighlighter.h - simple Markdown highlighter
#pragma once
#include "LanguageHighlighter.h"
namespace kte {
class MarkdownHighlighter final : public StatefulHighlighter {
public:
void HighlightLine(const Buffer &buf, int row, std::vector<HighlightSpan> &out) const override;
LineState HighlightLineStateful(const Buffer &buf, int row, const LineState &prev,
std::vector<HighlightSpan> &out) const override;
};
} // namespace kte

17
syntax/NullHighlighter.cc Normal file
View File

@@ -0,0 +1,17 @@
#include "NullHighlighter.h"
#include "../Buffer.h"
namespace kte {
void
NullHighlighter::HighlightLine(const Buffer &buf, int row, std::vector<HighlightSpan> &out) const
{
const auto &rows = buf.Rows();
if (row < 0 || static_cast<std::size_t>(row) >= rows.size())
return;
std::string s = static_cast<std::string>(rows[static_cast<std::size_t>(row)]);
int n = static_cast<int>(s.size());
if (n <= 0)
return;
out.push_back({0, n, TokenKind::Default});
}
} // namespace kte

11
syntax/NullHighlighter.h Normal file
View File

@@ -0,0 +1,11 @@
// NullHighlighter.h - default highlighter that emits a single Default span per line
#pragma once
#include "LanguageHighlighter.h"
namespace kte {
class NullHighlighter final : public LanguageHighlighter {
public:
void HighlightLine(const Buffer &buf, int row, std::vector<HighlightSpan> &out) const override;
};
} // namespace kte

172
syntax/PythonHighlighter.cc Normal file
View File

@@ -0,0 +1,172 @@
#include "PythonHighlighter.h"
#include "../Buffer.h"
#include <cctype>
namespace kte {
static void
push(std::vector<HighlightSpan> &out, int a, int b, TokenKind k)
{
if (b > a)
out.push_back({a, b, k});
}
static bool
is_ident_start(char c)
{
return std::isalpha(static_cast<unsigned char>(c)) || c == '_';
}
static bool
is_ident_char(char c)
{
return std::isalnum(static_cast<unsigned char>(c)) || c == '_';
}
PythonHighlighter::PythonHighlighter()
{
const char *kw[] = {
"and", "as", "assert", "break", "class", "continue", "def", "del", "elif", "else", "except", "False",
"finally", "for", "from", "global", "if", "import", "in", "is", "lambda", "None", "nonlocal", "not",
"or", "pass", "raise", "return", "True", "try", "while", "with", "yield"
};
for (auto s: kw)
kws_.insert(s);
}
void
PythonHighlighter::HighlightLine(const Buffer &buf, int row, std::vector<HighlightSpan> &out) const
{
LineState st;
(void) HighlightLineStateful(buf, row, st, out);
}
StatefulHighlighter::LineState
PythonHighlighter::HighlightLineStateful(const Buffer &buf, int row, const LineState &prev,
std::vector<HighlightSpan> &out) const
{
StatefulHighlighter::LineState state = prev;
const auto &rows = buf.Rows();
if (row < 0 || static_cast<std::size_t>(row) >= rows.size())
return state;
std::string s = static_cast<std::string>(rows[static_cast<std::size_t>(row)]);
int n = static_cast<int>(s.size());
// Triple-quoted string continuation uses in_raw_string with raw_delim either "'''" or "\"\"\""
if (state.in_raw_string && (state.raw_delim == "'''" || state.raw_delim == "\"\"\"")) {
auto pos = s.find(state.raw_delim);
if (pos == std::string::npos) {
push(out, 0, n, TokenKind::String);
return state; // still inside
} else {
int end = static_cast<int>(pos + static_cast<int>(state.raw_delim.size()));
push(out, 0, end, TokenKind::String);
// remainder processed normally
s = s.substr(end);
n = static_cast<int>(s.size());
state.in_raw_string = false;
state.raw_delim.clear();
// Continue parsing remainder as a separate small loop
int base = end;
// original offset, but we already emitted to 'out' with base=0; following spans should be from 'end'
// For simplicity, mark rest as Default
if (n > 0)
push(out, base, base + n, TokenKind::Default);
return state;
}
}
int i = 0;
// Detect comment start '#', ignoring inside strings
while (i < n) {
char c = s[i];
if (c == ' ' || c == '\t') {
int j = i + 1;
while (j < n && (s[j] == ' ' || s[j] == '\t'))
++j;
push(out, i, j, TokenKind::Whitespace);
i = j;
continue;
}
if (c == '#') {
push(out, i, n, TokenKind::Comment);
break;
}
// Strings: triple quotes and single-line
if (c == '"' || c == '\'') {
char q = c;
// triple?
if (i + 2 < n && s[i + 1] == q && s[i + 2] == q) {
std::string delim(3, q);
int j = i + 3; // search for closing triple
auto pos = s.find(delim, static_cast<std::size_t>(j));
if (pos == std::string::npos) {
push(out, i, n, TokenKind::String);
state.in_raw_string = true;
state.raw_delim = delim;
return state;
} else {
int end = static_cast<int>(pos + 3);
push(out, i, end, TokenKind::String);
i = end;
continue;
}
} else {
int j = i + 1;
bool esc = false;
while (j < n) {
char d = s[j++];
if (esc) {
esc = false;
continue;
}
if (d == '\\') {
esc = true;
continue;
}
if (d == q)
break;
}
push(out, i, j, TokenKind::String);
i = j;
continue;
}
}
if (std::isdigit(static_cast<unsigned char>(c))) {
int j = i + 1;
while (j < n && (std::isalnum(static_cast<unsigned char>(s[j])) || s[j] == '.' || s[j] == '_'))
++j;
push(out, i, j, TokenKind::Number);
i = j;
continue;
}
if (is_ident_start(c)) {
int j = i + 1;
while (j < n && is_ident_char(s[j]))
++j;
std::string id = s.substr(i, j - i);
TokenKind k = TokenKind::Identifier;
if (kws_.count(id))
k = TokenKind::Keyword;
push(out, i, j, k);
i = j;
continue;
}
if (std::ispunct(static_cast<unsigned char>(c))) {
TokenKind k = TokenKind::Operator;
if (c == ':' || c == ',' || c == '(' || c == ')' || c == '[' || c == ']')
k = TokenKind::Punctuation;
push(out, i, i + 1, k);
++i;
continue;
}
push(out, i, i + 1, TokenKind::Default);
++i;
}
return state;
}
} // namespace kte

View File

@@ -0,0 +1,20 @@
// PythonHighlighter.h - simple Python highlighter with triple-quote state
#pragma once
#include "LanguageHighlighter.h"
#include <unordered_set>
namespace kte {
class PythonHighlighter final : public StatefulHighlighter {
public:
PythonHighlighter();
void HighlightLine(const Buffer &buf, int row, std::vector<HighlightSpan> &out) const override;
LineState HighlightLineStateful(const Buffer &buf, int row, const LineState &prev,
std::vector<HighlightSpan> &out) const override;
private:
std::unordered_set<std::string> kws_;
};
} // namespace kte

145
syntax/RustHighlighter.cc Normal file
View File

@@ -0,0 +1,145 @@
#include "RustHighlighter.h"
#include "../Buffer.h"
#include <cctype>
namespace kte {
static void
push(std::vector<HighlightSpan> &out, int a, int b, TokenKind k)
{
if (b > a)
out.push_back({a, b, k});
}
static bool
is_ident_start(char c)
{
return std::isalpha(static_cast<unsigned char>(c)) || c == '_';
}
static bool
is_ident_char(char c)
{
return std::isalnum(static_cast<unsigned char>(c)) || c == '_';
}
RustHighlighter::RustHighlighter()
{
const char *kw[] = {
"as", "break", "const", "continue", "crate", "else", "enum", "extern", "false", "fn", "for", "if",
"impl", "in", "let", "loop", "match", "mod", "move", "mut", "pub", "ref", "return", "self", "Self",
"static", "struct", "super", "trait", "true", "type", "unsafe", "use", "where", "while", "dyn", "async",
"await", "try"
};
for (auto s: kw)
kws_.insert(s);
const char *tp[] = {
"u8", "u16", "u32", "u64", "u128", "usize", "i8", "i16", "i32", "i64", "i128", "isize", "f32", "f64",
"bool", "char", "str"
};
for (auto s: tp)
types_.insert(s);
}
void
RustHighlighter::HighlightLine(const Buffer &buf, int row, std::vector<HighlightSpan> &out) const
{
const auto &rows = buf.Rows();
if (row < 0 || static_cast<std::size_t>(row) >= rows.size())
return;
std::string s = static_cast<std::string>(rows[static_cast<std::size_t>(row)]);
int n = static_cast<int>(s.size());
int i = 0;
while (i < n) {
char c = s[i];
if (c == ' ' || c == '\t') {
int j = i + 1;
while (j < n && (s[j] == ' ' || s[j] == '\t'))
++j;
push(out, i, j, TokenKind::Whitespace);
i = j;
continue;
}
if (c == '/' && i + 1 < n && s[i + 1] == '/') {
push(out, i, n, TokenKind::Comment);
break;
}
if (c == '/' && i + 1 < n && s[i + 1] == '*') {
int j = i + 2;
bool closed = false;
while (j + 1 <= n) {
if (j + 1 < n && s[j] == '*' && s[j + 1] == '/') {
j += 2;
closed = true;
break;
}
++j;
}
if (!closed) {
push(out, i, n, TokenKind::Comment);
break;
} else {
push(out, i, j, TokenKind::Comment);
i = j;
continue;
}
}
if (c == '"') {
int j = i + 1;
bool esc = false;
while (j < n) {
char d = s[j++];
if (esc) {
esc = false;
continue;
}
if (d == '\\') {
esc = true;
continue;
}
if (d == '"')
break;
}
push(out, i, j, TokenKind::String);
i = j;
continue;
}
if (std::isdigit(static_cast<unsigned char>(c))) {
int j = i + 1;
while (j < n && (std::isalnum(static_cast<unsigned char>(s[j])) || s[j] == '.' || s[j] == '_'))
++j;
push(out, i, j, TokenKind::Number);
i = j;
continue;
}
if (is_ident_start(c)) {
int j = i + 1;
while (j < n && is_ident_char(s[j]))
++j;
std::string id = s.substr(i, j - i);
TokenKind k = TokenKind::Identifier;
if (kws_.count(id))
k = TokenKind::Keyword;
else if (types_.count(id))
k = TokenKind::Type;
push(out, i, j, k);
i = j;
continue;
}
if (std::ispunct(static_cast<unsigned char>(c))) {
TokenKind k = TokenKind::Operator;
if (c == ';' || c == ',' || c == '(' || c == ')' || c == '{' || c == '}' || c == '[' || c ==
']')
k = TokenKind::Punctuation;
push(out, i, i + 1, k);
++i;
continue;
}
push(out, i, i + 1, TokenKind::Default);
++i;
}
}
} // namespace kte

18
syntax/RustHighlighter.h Normal file
View File

@@ -0,0 +1,18 @@
// RustHighlighter.h - simple Rust highlighter
#pragma once
#include "LanguageHighlighter.h"
#include <unordered_set>
namespace kte {
class RustHighlighter final : public LanguageHighlighter {
public:
RustHighlighter();
void HighlightLine(const Buffer &buf, int row, std::vector<HighlightSpan> &out) const override;
private:
std::unordered_set<std::string> kws_;
std::unordered_set<std::string> types_;
};
} // namespace kte

105
syntax/ShellHighlighter.cc Normal file
View File

@@ -0,0 +1,105 @@
#include "ShellHighlighter.h"
#include "../Buffer.h"
#include <cctype>
namespace kte {
static void
push(std::vector<HighlightSpan> &out, int a, int b, TokenKind k)
{
if (b > a)
out.push_back({a, b, k});
}
void
ShellHighlighter::HighlightLine(const Buffer &buf, int row, std::vector<HighlightSpan> &out) const
{
const auto &rows = buf.Rows();
if (row < 0 || static_cast<std::size_t>(row) >= rows.size())
return;
std::string s = static_cast<std::string>(rows[static_cast<std::size_t>(row)]);
int n = static_cast<int>(s.size());
int i = 0;
// if first non-space is '#', whole line is comment
int bol = 0;
while (bol < n && (s[bol] == ' ' || s[bol] == '\t'))
++bol;
if (bol < n && s[bol] == '#') {
push(out, bol, n, TokenKind::Comment);
if (bol > 0)
push(out, 0, bol, TokenKind::Whitespace);
return;
}
while (i < n) {
char c = s[i];
if (c == ' ' || c == '\t') {
int j = i + 1;
while (j < n && (s[j] == ' ' || s[j] == '\t'))
++j;
push(out, i, j, TokenKind::Whitespace);
i = j;
continue;
}
if (c == '#') {
push(out, i, n, TokenKind::Comment);
break;
}
if (c == '\'' || c == '"') {
char q = c;
int j = i + 1;
bool esc = false;
while (j < n) {
char d = s[j++];
if (q == '"') {
if (esc) {
esc = false;
continue;
}
if (d == '\\') {
esc = true;
continue;
}
if (d == '"')
break;
} else {
if (d == '\'')
break;
}
}
push(out, i, j, TokenKind::String);
i = j;
continue;
}
// simple keywords
if (std::isalpha(static_cast<unsigned char>(c))) {
int j = i + 1;
while (j < n && (std::isalnum(static_cast<unsigned char>(s[j])) || s[j] == '_'))
++j;
std::string id = s.substr(i, j - i);
static const char *kws[] = {
"if", "then", "fi", "for", "in", "do", "done", "case", "esac", "while", "function",
"elif", "else"
};
bool kw = false;
for (auto k: kws)
if (id == k) {
kw = true;
break;
}
push(out, i, j, kw ? TokenKind::Keyword : TokenKind::Identifier);
i = j;
continue;
}
if (std::ispunct(static_cast<unsigned char>(c))) {
TokenKind k = TokenKind::Operator;
if (c == '(' || c == ')' || c == '{' || c == '}' || c == ',' || c == ';')
k = TokenKind::Punctuation;
push(out, i, i + 1, k);
++i;
continue;
}
push(out, i, i + 1, TokenKind::Default);
++i;
}
}
} // namespace kte

11
syntax/ShellHighlighter.h Normal file
View File

@@ -0,0 +1,11 @@
// ShellHighlighter.h - simple POSIX shell highlighter
#pragma once
#include "LanguageHighlighter.h"
namespace kte {
class ShellHighlighter final : public LanguageHighlighter {
public:
void HighlightLine(const Buffer &buf, int row, std::vector<HighlightSpan> &out) const override;
};
} // namespace kte

156
syntax/SqlHighlighter.cc Normal file
View File

@@ -0,0 +1,156 @@
#include "SqlHighlighter.h"
#include "../Buffer.h"
#include <cctype>
namespace kte {
static void
push(std::vector<HighlightSpan> &out, int a, int b, TokenKind k)
{
if (b > a)
out.push_back({a, b, k});
}
static bool
is_ident_start(char c)
{
return std::isalpha(static_cast<unsigned char>(c)) || c == '_';
}
static bool
is_ident_char(char c)
{
return std::isalnum(static_cast<unsigned char>(c)) || c == '_' || c == '$';
}
SqlHighlighter::SqlHighlighter()
{
const char *kw[] = {
"select", "insert", "update", "delete", "from", "where", "group", "by", "order", "limit",
"offset", "values", "into", "create", "table", "index", "unique", "on", "as", "and", "or",
"not", "null", "is", "primary", "key", "constraint", "foreign", "references", "drop", "alter",
"add", "column", "rename", "to", "if", "exists", "join", "left", "right", "inner", "outer",
"cross", "using", "set", "distinct", "having", "union", "all", "case", "when", "then", "else",
"end", "pragma", "transaction", "begin", "commit", "rollback", "replace"
};
for (auto s: kw)
kws_.insert(s);
const char *types[] = {"integer", "real", "text", "blob", "numeric", "boolean", "date", "datetime"};
for (auto s: types)
types_.insert(s);
}
void
SqlHighlighter::HighlightLine(const Buffer &buf, int row, std::vector<HighlightSpan> &out) const
{
const auto &rows = buf.Rows();
if (row < 0 || static_cast<std::size_t>(row) >= rows.size())
return;
std::string s = static_cast<std::string>(rows[static_cast<std::size_t>(row)]);
int n = static_cast<int>(s.size());
int i = 0;
while (i < n) {
char c = s[i];
if (c == ' ' || c == '\t') {
int j = i + 1;
while (j < n && (s[j] == ' ' || s[j] == '\t'))
++j;
push(out, i, j, TokenKind::Whitespace);
i = j;
continue;
}
// line comments: -- ...
if (c == '-' && i + 1 < n && s[i + 1] == '-') {
push(out, i, n, TokenKind::Comment);
break;
}
// simple block comment on same line: /* ... */
if (c == '/' && i + 1 < n && s[i + 1] == '*') {
int j = i + 2;
bool closed = false;
while (j + 1 <= n) {
if (j + 1 < n && s[j] == '*' && s[j + 1] == '/') {
j += 2;
closed = true;
break;
}
++j;
}
if (!closed) {
push(out, i, n, TokenKind::Comment);
break;
} else {
push(out, i, j, TokenKind::Comment);
i = j;
continue;
}
}
// strings: '...' or "..."
if (c == '\'' || c == '"') {
char q = c;
int j = i + 1;
bool esc = false;
while (j < n) {
char d = s[j++];
if (d == q) {
// Handle doubled quote escaping for SQL single quotes
if (q == '\'' && j < n && s[j] == '\'') {
++j;
continue;
}
break;
}
if (d == '\\') {
esc = !esc;
} else {
esc = false;
}
}
push(out, i, j, TokenKind::String);
i = j;
continue;
}
if (std::isdigit(static_cast<unsigned char>(c))) {
int j = i + 1;
while (j < n && (std::isalnum(static_cast<unsigned char>(s[j])) || s[j] == '.' || s[j] == '_'))
++j;
push(out, i, j, TokenKind::Number);
i = j;
continue;
}
if (is_ident_start(c)) {
int j = i + 1;
while (j < n && is_ident_char(s[j]))
++j;
std::string id = s.substr(i, j - i);
std::string lower;
lower.reserve(id.size());
for (char ch: id)
lower.push_back(static_cast<char>(std::tolower(static_cast<unsigned char>(ch))));
TokenKind k = TokenKind::Identifier;
if (kws_.count(lower))
k = TokenKind::Keyword;
else if (types_.count(lower))
k = TokenKind::Type;
push(out, i, j, k);
i = j;
continue;
}
if (std::ispunct(static_cast<unsigned char>(c))) {
TokenKind k = TokenKind::Operator;
if (c == ',' || c == ';' || c == '(' || c == ')')
k = TokenKind::Punctuation;
push(out, i, i + 1, k);
++i;
continue;
}
push(out, i, i + 1, TokenKind::Default);
++i;
}
}
} // namespace kte

18
syntax/SqlHighlighter.h Normal file
View File

@@ -0,0 +1,18 @@
// SqlHighlighter.h - simple SQL/SQLite highlighter
#pragma once
#include "LanguageHighlighter.h"
#include <unordered_set>
namespace kte {
class SqlHighlighter final : public LanguageHighlighter {
public:
SqlHighlighter();
void HighlightLine(const Buffer &buf, int row, std::vector<HighlightSpan> &out) const override;
private:
std::unordered_set<std::string> kws_;
std::unordered_set<std::string> types_;
};
} // namespace kte

View File

@@ -0,0 +1,51 @@
#include "../TreeSitterHighlighter.h"
#ifdef KTE_ENABLE_TREESITTER
#include "Buffer.h"
#include <utility>
namespace kte {
TreeSitterHighlighter::TreeSitterHighlighter(const TSLanguage *lang, std::string filetype)
: language_(lang), filetype_(std::move(filetype)) {}
TreeSitterHighlighter::~TreeSitterHighlighter()
{
disposeParser();
}
void
TreeSitterHighlighter::ensureParsed(const Buffer & /*buf*/) const
{
// Intentionally a stub to avoid pulling the Tree-sitter API and library by default.
// In future, when linking against tree-sitter, initialize parser_, set language_,
// and build tree_ from the buffer contents.
}
void
TreeSitterHighlighter::disposeParser() const
{
// Stub; nothing to dispose when not actually creating parser/tree
}
void
TreeSitterHighlighter::HighlightLine(const Buffer &/*buf*/, int /*row*/, std::vector<HighlightSpan> &/*out*/) const
{
// For now, no-op. When tree-sitter is wired, map nodes to TokenKind spans per line.
}
std::unique_ptr<LanguageHighlighter>
CreateTreeSitterHighlighter(const char *filetype,
const void * (*get_lang)())
{
const auto *lang = reinterpret_cast<const TSLanguage *>(get_lang ? get_lang() : nullptr);
return std::make_unique < TreeSitterHighlighter > (lang, filetype ? std::string(filetype) : std::string());
}
} // namespace kte
#endif // KTE_ENABLE_TREESITTER

View File

@@ -0,0 +1,48 @@
// TreeSitterHighlighter.h - optional adapter for Tree-sitter (behind KTE_ENABLE_TREESITTER)
#pragma once
#ifdef KTE_ENABLE_TREESITTER
#include <memory>
#include <string>
#include <vector>
#include "LanguageHighlighter.h"
// Forward-declare Tree-sitter C API to avoid hard coupling in headers if includes are not present
extern "C" {
struct TSLanguage;
struct TSParser;
struct TSTree;
}
namespace kte {
// A minimal adapter that uses Tree-sitter to parse the whole buffer and then, for now,
// does very limited token classification. This acts as a scaffold for future richer
// queries. If no queries are provided, it currently produces no spans (safe fallback).
class TreeSitterHighlighter : public LanguageHighlighter {
public:
explicit TreeSitterHighlighter(const TSLanguage *lang, std::string filetype);
~TreeSitterHighlighter() override;
void HighlightLine(const Buffer &buf, int row, std::vector<HighlightSpan> &out) const override;
private:
const TSLanguage *language_{nullptr};
std::string filetype_;
// Lazy parser to avoid startup cost; mutable to allow creation in const method
mutable TSParser *parser_{nullptr};
mutable TSTree *tree_{nullptr};
void ensureParsed(const Buffer &buf) const;
void disposeParser() const;
};
// Factory used by HighlighterRegistry when registering via RegisterTreeSitter.
std::unique_ptr<LanguageHighlighter> CreateTreeSitterHighlighter(const char *filetype,
const void * (*get_lang)());
} // namespace kte
#endif // KTE_ENABLE_TREESITTER