Files
kte/PythonHighlighter.cc
Kyle Isom 1a77f28ce4 Add syntax highlighting infrastructure
- Introduced `HighlighterRegistry` with support for multiple language highlighters (e.g., JSON, Markdown, Python).
- Added `JsonHighlighter` implementation for basic JSON syntax highlighting.
2025-12-01 18:20:36 -08:00

86 lines
4.4 KiB
C++

#include "PythonHighlighter.h"
#include "Buffer.h"
#include <cctype>
namespace kte {
static void push(std::vector<HighlightSpan> &out, int a, int b, TokenKind k){ if (b>a) out.push_back({a,b,k}); }
static bool is_ident_start(char c){ return std::isalpha(static_cast<unsigned char>(c)) || c=='_'; }
static bool is_ident_char(char c){ return std::isalnum(static_cast<unsigned char>(c)) || c=='_'; }
PythonHighlighter::PythonHighlighter()
{
const char* kw[] = {"and","as","assert","break","class","continue","def","del","elif","else","except","False","finally","for","from","global","if","import","in","is","lambda","None","nonlocal","not","or","pass","raise","return","True","try","while","with","yield"};
for (auto s: kw) kws_.insert(s);
}
void PythonHighlighter::HighlightLine(const Buffer &buf, int row, std::vector<HighlightSpan> &out) const
{
LineState st; (void)HighlightLineStateful(buf, row, st, out);
}
StatefulHighlighter::LineState PythonHighlighter::HighlightLineStateful(const Buffer &buf, int row, const LineState &prev, std::vector<HighlightSpan> &out) const
{
StatefulHighlighter::LineState state = prev;
const auto &rows = buf.Rows();
if (row < 0 || static_cast<std::size_t>(row) >= rows.size()) return state;
std::string s = static_cast<std::string>(rows[static_cast<std::size_t>(row)]);
int n = static_cast<int>(s.size());
// Triple-quoted string continuation uses in_raw_string with raw_delim either "'''" or "\"\"\""
if (state.in_raw_string && (state.raw_delim == "'''" || state.raw_delim == "\"\"\"")) {
auto pos = s.find(state.raw_delim);
if (pos == std::string::npos) {
push(out, 0, n, TokenKind::String);
return state; // still inside
} else {
int end = static_cast<int>(pos + static_cast<int>(state.raw_delim.size()));
push(out, 0, end, TokenKind::String);
// remainder processed normally
s = s.substr(end);
n = static_cast<int>(s.size());
state.in_raw_string = false; state.raw_delim.clear();
// Continue parsing remainder as a separate small loop
int base = end; // original offset, but we already emitted to 'out' with base=0; following spans should be from 'end'
// For simplicity, mark rest as Default
if (n>0) push(out, base, base + n, TokenKind::Default);
return state;
}
}
int i = 0;
// Detect comment start '#', ignoring inside strings
while (i < n) {
char c = s[i];
if (c==' '||c=='\t') { int j=i+1; while (j<n && (s[j]==' '||s[j]=='\t')) ++j; push(out,i,j,TokenKind::Whitespace); i=j; continue; }
if (c=='#') { push(out,i,n,TokenKind::Comment); break; }
// Strings: triple quotes and single-line
if (c=='"' || c=='\'') {
char q=c;
// triple?
if (i+2 < n && s[i+1]==q && s[i+2]==q) {
std::string delim(3, q);
int j = i+3; // search for closing triple
auto pos = s.find(delim, static_cast<std::size_t>(j));
if (pos == std::string::npos) {
push(out,i,n,TokenKind::String);
state.in_raw_string = true; state.raw_delim = delim; return state;
} else {
int end = static_cast<int>(pos + 3);
push(out,i,end,TokenKind::String); i=end; continue;
}
} else {
int j=i+1; bool esc=false; while (j<n) { char d=s[j++]; if (esc){esc=false; continue;} if (d=='\\'){esc=true; continue;} if (d==q) break; }
push(out,i,j,TokenKind::String); i=j; continue;
}
}
if (std::isdigit(static_cast<unsigned char>(c))) { int j=i+1; while (j<n && (std::isalnum(static_cast<unsigned char>(s[j]))||s[j]=='.'||s[j]=='_' )) ++j; push(out,i,j,TokenKind::Number); i=j; continue; }
if (is_ident_start(c)) { int j=i+1; while (j<n && is_ident_char(s[j])) ++j; std::string id=s.substr(i,j-i); TokenKind k=TokenKind::Identifier; if (kws_.count(id)) k=TokenKind::Keyword; push(out,i,j,k); i=j; continue; }
if (std::ispunct(static_cast<unsigned char>(c))) { TokenKind k=TokenKind::Operator; if (c==':'||c==','||c=='('||c==')'||c=='['||c==']') k=TokenKind::Punctuation; push(out,i,i+1,k); ++i; continue; }
push(out,i,i+1,TokenKind::Default); ++i;
}
return state;
}
} // namespace kte