Add extensible highlighter registration and Tree-sitter support.
Some checks failed
Release / Bump Homebrew formula (push) Has been cancelled
Release / Build Linux amd64 (push) Has been cancelled
Release / Build Linux arm64 (push) Has been cancelled
Release / Build macOS arm64 (.app) (push) Has been cancelled
Release / Create GitHub Release (push) Has been cancelled

- Implemented runtime API for registering custom highlighters.
- Added optional Tree-sitter integration for advanced syntax parsing (disabled by default).
- Updated buffer initialization and copying to support dynamic highlighter configuration.
- Introduced `NullHighlighter` as a fallback for unsupported filetypes.
- Enhanced CMake configuration with `KTE_ENABLE_TREESITTER` option.
This commit is contained in:
2025-12-01 19:04:37 -08:00
parent e62cf3ee28
commit ceef6af3ae
8 changed files with 318 additions and 6 deletions

View File

@@ -6,6 +6,9 @@
#include "Buffer.h"
#include "UndoSystem.h"
#include "UndoTree.h"
// For reconstructing highlighter state on copies
#include "HighlighterRegistry.h"
#include "NullHighlighter.h"
Buffer::Buffer()
@@ -40,9 +43,32 @@ Buffer::Buffer(const Buffer &other)
mark_set_ = other.mark_set_;
mark_curx_ = other.mark_curx_;
mark_cury_ = other.mark_cury_;
// Copy syntax/highlighting flags
version_ = other.version_;
syntax_enabled_ = other.syntax_enabled_;
filetype_ = other.filetype_;
// Fresh undo system for the copy
undo_tree_ = std::make_unique<UndoTree>();
undo_sys_ = std::make_unique<UndoSystem>(*this, *undo_tree_);
// Recreate a highlighter engine for this copy based on filetype/syntax state
if (syntax_enabled_) {
// Allocate engine and install an appropriate highlighter
highlighter_ = std::make_unique<kte::HighlighterEngine>();
if (!filetype_.empty()) {
auto hl = kte::HighlighterRegistry::CreateFor(filetype_);
if (hl) {
highlighter_->SetHighlighter(std::move(hl));
} else {
// Unsupported filetype -> NullHighlighter keeps syntax pipeline active
highlighter_->SetHighlighter(std::make_unique<kte::NullHighlighter>());
}
} else {
// No filetype -> keep syntax enabled but use NullHighlighter
highlighter_->SetHighlighter(std::make_unique<kte::NullHighlighter>());
}
// Fresh engine has empty caches; nothing to invalidate
}
}
@@ -65,9 +91,28 @@ Buffer::operator=(const Buffer &other)
mark_set_ = other.mark_set_;
mark_curx_ = other.mark_curx_;
mark_cury_ = other.mark_cury_;
version_ = other.version_;
syntax_enabled_ = other.syntax_enabled_;
filetype_ = other.filetype_;
// Recreate undo system for this instance
undo_tree_ = std::make_unique<UndoTree>();
undo_sys_ = std::make_unique<UndoSystem>(*this, *undo_tree_);
// Recreate highlighter engine consistent with syntax settings
highlighter_.reset();
if (syntax_enabled_) {
highlighter_ = std::make_unique<kte::HighlighterEngine>();
if (!filetype_.empty()) {
auto hl = kte::HighlighterRegistry::CreateFor(filetype_);
if (hl) {
highlighter_->SetHighlighter(std::move(hl));
} else {
highlighter_->SetHighlighter(std::make_unique<kte::NullHighlighter>());
}
} else {
highlighter_->SetHighlighter(std::make_unique<kte::NullHighlighter>());
}
}
return *this;
}
@@ -91,6 +136,11 @@ Buffer::Buffer(Buffer &&other) noexcept
undo_tree_(std::move(other.undo_tree_)),
undo_sys_(std::move(other.undo_sys_))
{
// Move syntax/highlighting state
version_ = other.version_;
syntax_enabled_ = other.syntax_enabled_;
filetype_ = std::move(other.filetype_);
highlighter_ = std::move(other.highlighter_);
// Update UndoSystem's buffer reference to point to this object
if (undo_sys_) {
undo_sys_->UpdateBufferReference(*this);
@@ -122,6 +172,12 @@ Buffer::operator=(Buffer &&other) noexcept
undo_tree_ = std::move(other.undo_tree_);
undo_sys_ = std::move(other.undo_sys_);
// Move syntax/highlighting state
version_ = other.version_;
syntax_enabled_ = other.syntax_enabled_;
filetype_ = std::move(other.filetype_);
highlighter_ = std::move(other.highlighter_);
// Update UndoSystem's buffer reference to point to this object
if (undo_sys_) {
undo_sys_->UpdateBufferReference(*this);

View File

@@ -13,6 +13,7 @@ set(BUILD_TESTS OFF CACHE BOOL "Enable building test programs.")
option(KTE_USE_PIECE_TABLE "Use PieceTable instead of GapBuffer implementation" ON)
set(KTE_FONT_SIZE "18.0" CACHE STRING "Default font size for GUI")
option(KTE_UNDO_DEBUG "Enable undo instrumentation logs" OFF)
option(KTE_ENABLE_TREESITTER "Enable optional Tree-sitter highlighter adapter" OFF)
if (CMAKE_HOST_UNIX)
message(STATUS "Build system is POSIX.")
@@ -37,6 +38,9 @@ else ()
endif ()
add_compile_definitions(KGE_PLATFORM=${CMAKE_HOST_SYSTEM_NAME})
add_compile_definitions(KTE_VERSION_STR="v${KTE_VERSION}")
if (KTE_ENABLE_TREESITTER)
add_compile_definitions(KTE_ENABLE_TREESITTER)
endif ()
message(STATUS "Build system: ${CMAKE_HOST_SYSTEM_NAME}")
@@ -80,6 +84,11 @@ set(COMMON_SOURCES
LispHighlighter.cc
)
if (KTE_ENABLE_TREESITTER)
list(APPEND COMMON_SOURCES
TreeSitterHighlighter.cc)
endif ()
set(COMMON_HEADERS
GapBuffer.h
PieceTable.h
@@ -116,6 +125,11 @@ set(COMMON_HEADERS
LispHighlighter.h
)
if (KTE_ENABLE_TREESITTER)
list(APPEND COMMON_HEADERS
TreeSitterHighlighter.h)
endif ()
# kte (terminal-first) executable
add_executable(kte
main.cc
@@ -132,6 +146,18 @@ endif ()
target_link_libraries(kte ${CURSES_LIBRARIES})
if (KTE_ENABLE_TREESITTER)
# Users can provide their own tree-sitter include/lib via cache variables
set(TREESITTER_INCLUDE_DIR "" CACHE PATH "Path to tree-sitter include directory")
set(TREESITTER_LIBRARY "" CACHE FILEPATH "Path to tree-sitter library (.a/.dylib)")
if (TREESITTER_INCLUDE_DIR)
target_include_directories(kte PRIVATE ${TREESITTER_INCLUDE_DIR})
endif ()
if (TREESITTER_LIBRARY)
target_link_libraries(kte ${TREESITTER_LIBRARY})
endif ()
endif ()
install(TARGETS kte
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
)
@@ -157,6 +183,14 @@ if (BUILD_TESTS)
target_link_libraries(test_undo ${CURSES_LIBRARIES})
if (KTE_ENABLE_TREESITTER)
if (TREESITTER_INCLUDE_DIR)
target_include_directories(test_undo PRIVATE ${TREESITTER_INCLUDE_DIR})
endif ()
if (TREESITTER_LIBRARY)
target_link_libraries(test_undo ${TREESITTER_LIBRARY})
endif ()
endif ()
endif ()
if (${BUILD_GUI})

View File

@@ -1,6 +1,8 @@
#include <algorithm>
#include <utility>
#include <filesystem>
#include "HighlighterRegistry.h"
#include "NullHighlighter.h"
#include "Editor.h"
#include "HighlighterRegistry.h"
@@ -218,11 +220,32 @@ Editor::OpenFile(const std::string &path, std::string &err)
bool
Editor::SwitchTo(std::size_t index)
{
if (index >= buffers_.size()) {
return false;
}
curbuf_ = index;
return true;
if (index >= buffers_.size()) {
return false;
}
curbuf_ = index;
// Robustness: ensure a valid highlighter is installed when switching buffers
Buffer &b = buffers_[curbuf_];
if (b.SyntaxEnabled()) {
b.EnsureHighlighter();
if (auto *eng = b.Highlighter()) {
if (!eng->HasHighlighter()) {
// Try to set based on existing filetype; fall back to NullHighlighter
if (!b.Filetype().empty()) {
auto hl = kte::HighlighterRegistry::CreateFor(b.Filetype());
if (hl) {
eng->SetHighlighter(std::move(hl));
} else {
eng->SetHighlighter(std::make_unique<kte::NullHighlighter>());
}
} else {
eng->SetHighlighter(std::make_unique<kte::NullHighlighter>());
}
eng->InvalidateFrom(0);
}
}
}
return true;
}

View File

@@ -3,9 +3,22 @@
#include <algorithm>
#include <filesystem>
#include <vector>
#include <cctype>
// Forward declare simple highlighters implemented in this project
namespace kte {
// Registration storage
struct RegEntry {
std::string ft; // normalized
HighlighterRegistry::Factory factory;
};
static std::vector<RegEntry> &registry() {
static std::vector<RegEntry> reg;
return reg;
}
class JSONHighlighter; class MarkdownHighlighter; class ShellHighlighter;
class GoHighlighter; class PythonHighlighter; class RustHighlighter; class LispHighlighter;
}
@@ -45,6 +58,10 @@ std::string HighlighterRegistry::Normalize(std::string_view ft)
std::unique_ptr<LanguageHighlighter> HighlighterRegistry::CreateFor(std::string_view filetype)
{
std::string ft = Normalize(filetype);
// Prefer externally registered factories
for (const auto &e : registry()) {
if (e.ft == ft && e.factory) return e.factory();
}
if (ft == "cpp") return std::make_unique<CppHighlighter>();
if (ft == "json") return std::make_unique<JSONHighlighter>();
if (ft == "markdown") return std::make_unique<MarkdownHighlighter>();
@@ -91,3 +108,50 @@ std::string HighlighterRegistry::DetectForPath(std::string_view path, std::strin
}
} // namespace kte
// Extensibility API implementations
namespace kte {
void HighlighterRegistry::Register(std::string_view filetype, Factory factory, bool override_existing)
{
std::string ft = Normalize(filetype);
for (auto &e : registry()) {
if (e.ft == ft) {
if (override_existing) e.factory = std::move(factory);
return;
}
}
registry().push_back(RegEntry{ft, std::move(factory)});
}
bool HighlighterRegistry::IsRegistered(std::string_view filetype)
{
std::string ft = Normalize(filetype);
for (const auto &e : registry()) if (e.ft == ft) return true;
return false;
}
std::vector<std::string> HighlighterRegistry::RegisteredFiletypes()
{
std::vector<std::string> out;
out.reserve(registry().size());
for (const auto &e : registry()) out.push_back(e.ft);
return out;
}
#ifdef KTE_ENABLE_TREESITTER
// Forward declare adapter factory
std::unique_ptr<LanguageHighlighter> CreateTreeSitterHighlighter(const char* filetype,
const void* (*get_lang)());
void HighlighterRegistry::RegisterTreeSitter(std::string_view filetype,
const TSLanguage* (*get_language)())
{
std::string ft = Normalize(filetype);
Register(ft, [ft, get_language]() {
return CreateTreeSitterHighlighter(ft.c_str(), reinterpret_cast<const void* (*)()>(get_language));
}, /*override_existing=*/true);
}
#endif
} // namespace kte

View File

@@ -1,10 +1,11 @@
// HighlighterRegistry.h - create/detect language highlighters
// HighlighterRegistry.h - create/detect language highlighters and allow external registration
#pragma once
#include <functional>
#include <memory>
#include <string>
#include <string_view>
#include <vector>
#include "LanguageHighlighter.h"
@@ -12,6 +13,8 @@ namespace kte {
class HighlighterRegistry {
public:
using Factory = std::function<std::unique_ptr<LanguageHighlighter>()>;
// Create a highlighter for normalized filetype id (e.g., "cpp", "json", "markdown", "shell", "go", "python", "rust", "lisp").
static std::unique_ptr<LanguageHighlighter> CreateFor(std::string_view filetype);
@@ -21,6 +24,26 @@ public:
// Normalize various aliases/extensions to canonical ids.
static std::string Normalize(std::string_view ft);
// Extensibility: allow external code to register highlighters at runtime.
// The filetype key is normalized via Normalize(). If a factory is already registered for the
// normalized key and override=false, the existing factory is kept.
static void Register(std::string_view filetype, Factory factory, bool override_existing = true);
// Returns true if a factory is registered for the (normalized) filetype.
static bool IsRegistered(std::string_view filetype);
// Return a list of currently registered (normalized) filetypes. Primarily for diagnostics/tests.
static std::vector<std::string> RegisteredFiletypes();
#ifdef KTE_ENABLE_TREESITTER
// Forward declaration to avoid hard dependency when disabled.
struct TSLanguage;
// Convenience: register a Tree-sitter-backed highlighter for a filetype.
// The getter should return a non-null language pointer for the grammar.
static void RegisterTreeSitter(std::string_view filetype,
const TSLanguage* (*get_language)());
#endif
};
} // namespace kte

46
TreeSitterHighlighter.cc Normal file
View File

@@ -0,0 +1,46 @@
#include "TreeSitterHighlighter.h"
#ifdef KTE_ENABLE_TREESITTER
#include "Buffer.h"
#include <utility>
namespace kte {
TreeSitterHighlighter::TreeSitterHighlighter(const TSLanguage* lang, std::string filetype)
: language_(lang), filetype_(std::move(filetype))
{
}
TreeSitterHighlighter::~TreeSitterHighlighter()
{
disposeParser();
}
void TreeSitterHighlighter::ensureParsed(const Buffer& /*buf*/) const
{
// Intentionally a stub to avoid pulling the Tree-sitter API and library by default.
// In future, when linking against tree-sitter, initialize parser_, set language_,
// and build tree_ from the buffer contents.
}
void TreeSitterHighlighter::disposeParser() const
{
// Stub; nothing to dispose when not actually creating parser/tree
}
void TreeSitterHighlighter::HighlightLine(const Buffer &/*buf*/, int /*row*/, std::vector<HighlightSpan> &/*out*/) const
{
// For now, no-op. When tree-sitter is wired, map nodes to TokenKind spans per line.
}
std::unique_ptr<LanguageHighlighter> CreateTreeSitterHighlighter(const char* filetype,
const void* (*get_lang)())
{
const auto* lang = reinterpret_cast<const TSLanguage*>(get_lang ? get_lang() : nullptr);
return std::make_unique<TreeSitterHighlighter>(lang, filetype ? std::string(filetype) : std::string());
}
} // namespace kte
#endif // KTE_ENABLE_TREESITTER

48
TreeSitterHighlighter.h Normal file
View File

@@ -0,0 +1,48 @@
// TreeSitterHighlighter.h - optional adapter for Tree-sitter (behind KTE_ENABLE_TREESITTER)
#pragma once
#ifdef KTE_ENABLE_TREESITTER
#include <memory>
#include <string>
#include <vector>
#include "LanguageHighlighter.h"
// Forward-declare Tree-sitter C API to avoid hard coupling in headers if includes are not present
extern "C" {
struct TSLanguage;
struct TSParser;
struct TSTree;
}
namespace kte {
// A minimal adapter that uses Tree-sitter to parse the whole buffer and then, for now,
// does very limited token classification. This acts as a scaffold for future richer
// queries. If no queries are provided, it currently produces no spans (safe fallback).
class TreeSitterHighlighter : public LanguageHighlighter {
public:
explicit TreeSitterHighlighter(const TSLanguage* lang, std::string filetype);
~TreeSitterHighlighter() override;
void HighlightLine(const Buffer &buf, int row, std::vector<HighlightSpan> &out) const override;
private:
const TSLanguage* language_{nullptr};
std::string filetype_;
// Lazy parser to avoid startup cost; mutable to allow creation in const method
mutable TSParser* parser_{nullptr};
mutable TSTree* tree_{nullptr};
void ensureParsed(const Buffer& buf) const;
void disposeParser() const;
};
// Factory used by HighlighterRegistry when registering via RegisterTreeSitter.
std::unique_ptr<LanguageHighlighter> CreateTreeSitterHighlighter(const char* filetype,
const void* (*get_lang)());
} // namespace kte
#endif // KTE_ENABLE_TREESITTER

View File

@@ -50,3 +50,21 @@ Renderer integration
- Terminal and GUI renderers request line spans via `Highlighter()->GetLine(buf, row, buf.Version())`.
- Search highlight and cursor overlays take precedence over syntax colors.
Extensibility (Phase 4)
-----------------------
- Public registration API: external code can register custom highlighters by filetype.
- Use `HighlighterRegistry::Register("mylang", []{ return std::make_unique<MyHighlighter>(); });`
- Registered factories are preferred over built-ins for the same filetype key.
- Filetype keys are normalized via `HighlighterRegistry::Normalize()`.
- Optional Tree-sitter adapter: disabled by default to keep dependencies minimal.
- Enable with CMake option `-DKTE_ENABLE_TREESITTER=ON` and provide
`-DTREESITTER_INCLUDE_DIR=...` and `-DTREESITTER_LIBRARY=...` if needed.
- Register a Tree-sitter-backed highlighter for a language (example assumes you link a grammar):
```c++
extern "C" const TSLanguage* tree_sitter_c();
kte::HighlighterRegistry::RegisterTreeSitter("c", &tree_sitter_c);
```
- Current adapter is a stub scaffold; it compiles and integrates cleanly when enabled, but
intentionally emits no spans until Tree-sitter node-to-token mapping is implemented.