#include "CppHighlighter.h" #include "../Buffer.h" #include namespace kte { static bool is_digit(char c) { return c >= '0' && c <= '9'; } CppHighlighter::CppHighlighter() { const char *kw[] = { "if", "else", "for", "while", "do", "switch", "case", "default", "break", "continue", "return", "goto", "struct", "class", "namespace", "using", "template", "typename", "public", "private", "protected", "virtual", "override", "const", "constexpr", "auto", "static", "inline", "operator", "new", "delete", "try", "catch", "throw", "friend", "enum", "union", "extern", "volatile", "mutable", "noexcept", "sizeof", "this" }; for (auto s: kw) keywords_.insert(s); const char *types[] = { "int", "long", "short", "char", "signed", "unsigned", "float", "double", "void", "bool", "wchar_t", "size_t", "ptrdiff_t", "uint8_t", "uint16_t", "uint32_t", "uint64_t", "int8_t", "int16_t", "int32_t", "int64_t" }; for (auto s: types) types_.insert(s); } bool CppHighlighter::is_ident_start(char c) { return std::isalpha(static_cast(c)) || c == '_'; } bool CppHighlighter::is_ident_char(char c) { return std::isalnum(static_cast(c)) || c == '_'; } void CppHighlighter::HighlightLine(const Buffer &buf, int row, std::vector &out) const { // Stateless entry simply delegates to stateful with a clean previous state StatefulHighlighter::LineState prev; (void) HighlightLineStateful(buf, row, prev, out); } StatefulHighlighter::LineState CppHighlighter::HighlightLineStateful(const Buffer &buf, int row, const LineState &prev, std::vector &out) const { const auto &rows = buf.Rows(); StatefulHighlighter::LineState state = prev; if (row < 0 || static_cast(row) >= rows.size()) return state; std::string s = static_cast(rows[static_cast(row)]); if (s.empty()) return state; auto push = [&](int a, int b, TokenKind k) { if (b > a) out.push_back({a, b, k}); }; int n = static_cast(s.size()); int bol = 0; while (bol < n && (s[bol] == ' ' || s[bol] == '\t')) ++bol; int i = 0; // Continue multi-line raw string from previous line if (state.in_raw_string) { std::string needle = ")" + state.raw_delim + "\""; auto pos = s.find(needle); if (pos == std::string::npos) { push(0, n, TokenKind::String); state.in_raw_string = true; return state; } else { int end = static_cast(pos + needle.size()); push(0, end, TokenKind::String); i = end; state.in_raw_string = false; state.raw_delim.clear(); } } // Continue multi-line block comment from previous line if (state.in_block_comment) { int j = i; while (i + 1 < n) { if (s[i] == '*' && s[i + 1] == '/') { i += 2; push(j, i, TokenKind::Comment); state.in_block_comment = false; break; } ++i; } if (state.in_block_comment) { push(j, n, TokenKind::Comment); return state; } } while (i < n) { char c = s[i]; // Preprocessor at beginning of line (after leading whitespace) if (i == bol && c == '#') { push(0, n, TokenKind::Preproc); break; } // Whitespace if (c == ' ' || c == '\t') { int j = i + 1; while (j < n && (s[j] == ' ' || s[j] == '\t')) ++j; push(i, j, TokenKind::Whitespace); i = j; continue; } // Line comment if (c == '/' && i + 1 < n && s[i + 1] == '/') { push(i, n, TokenKind::Comment); break; } // Block comment if (c == '/' && i + 1 < n && s[i + 1] == '*') { int j = i + 2; bool closed = false; while (j + 1 <= n) { if (j + 1 < n && s[j] == '*' && s[j + 1] == '/') { j += 2; closed = true; break; } ++j; } if (closed) { push(i, j, TokenKind::Comment); i = j; continue; } // Spill to next lines push(i, n, TokenKind::Comment); state.in_block_comment = true; return state; } // Raw string start: very simple detection: R"delim( if (c == 'R' && i + 1 < n && s[i + 1] == '"') { int k = i + 2; std::string delim; while (k < n && s[k] != '(') { delim.push_back(s[k]); ++k; } if (k < n && s[k] == '(') { int body_start = k + 1; std::string needle = ")" + delim + "\""; auto pos = s.find(needle, static_cast(body_start)); if (pos == std::string::npos) { push(i, n, TokenKind::String); state.in_raw_string = true; state.raw_delim = delim; return state; } else { int end = static_cast(pos + needle.size()); push(i, end, TokenKind::String); i = end; continue; } } // If malformed, just treat 'R' as identifier fallback } // Regular string literal if (c == '"') { int j = i + 1; bool esc = false; while (j < n) { char d = s[j++]; if (esc) { esc = false; continue; } if (d == '\\') { esc = true; continue; } if (d == '"') break; } push(i, j, TokenKind::String); i = j; continue; } // Char literal if (c == '\'') { int j = i + 1; bool esc = false; while (j < n) { char d = s[j++]; if (esc) { esc = false; continue; } if (d == '\\') { esc = true; continue; } if (d == '\'') break; } push(i, j, TokenKind::Char); i = j; continue; } // Number literal (simple) if (is_digit(c) || (c == '.' && i + 1 < n && is_digit(s[i + 1]))) { int j = i + 1; while (j < n && (std::isalnum(static_cast(s[j])) || s[j] == '.' || s[j] == 'x' || s[j] == 'X' || s[j] == 'b' || s[j] == 'B' || s[j] == '_')) ++j; push(i, j, TokenKind::Number); i = j; continue; } // Identifier / keyword / type if (is_ident_start(c)) { int j = i + 1; while (j < n && is_ident_char(s[j])) ++j; std::string id = s.substr(i, j - i); TokenKind k = TokenKind::Identifier; if (keywords_.count(id)) k = TokenKind::Keyword; else if (types_.count(id)) k = TokenKind::Type; push(i, j, k); i = j; continue; } // Operators and punctuation (single char for now) TokenKind kind = TokenKind::Operator; if (std::ispunct(static_cast(c)) && c != '_' && c != '#') { if (c == ';' || c == ',' || c == '(' || c == ')' || c == '{' || c == '}' || c == '[' || c == ']') kind = TokenKind::Punctuation; push(i, i + 1, kind); ++i; continue; } // Fallback push(i, i + 1, TokenKind::Default); ++i; } return state; } } // namespace kte