From a8abda4b87fcbb08e907783289e8d8e6ae04eeef Mon Sep 17 00:00:00 2001 From: Kyle Isom Date: Sun, 11 Jan 2026 11:39:08 -0800 Subject: [PATCH] Unicode improvements and version bump. - Added full UTF-8 support for terminal rendering, including multi-width character handling. - Improved font handling in ImGui with expanded glyph support (Greek, Mathematical Operators). - Updated locale initialization to enable proper character rendering. - Bumped version to 1.5.8. --- CMakeLists.txt | 2 +- ImGuiFrontend.cc | 41 ++++++++++-- TerminalRenderer.cc | 160 +++++++++++++++++++++++++++++--------------- fonts/Font.cc | 39 +++++++++-- main.cc | 3 + 5 files changed, 178 insertions(+), 67 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 4be4ebf..776d81b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,7 +4,7 @@ project(kte) include(GNUInstallDirs) set(CMAKE_CXX_STANDARD 20) -set(KTE_VERSION "1.5.7") +set(KTE_VERSION "1.5.8") # Default to terminal-only build to avoid SDL/OpenGL dependency by default. # Enable with -DBUILD_GUI=ON when SDL2/OpenGL/Freetype are available. diff --git a/ImGuiFrontend.cc b/ImGuiFrontend.cc index d775206..682a499 100644 --- a/ImGuiFrontend.cc +++ b/ImGuiFrontend.cc @@ -357,14 +357,43 @@ GUIFrontend::LoadGuiFont_(const char * /*path*/, const float size_px) { const ImGuiIO &io = ImGui::GetIO(); io.Fonts->Clear(); - const ImFont *font = io.Fonts->AddFontFromMemoryCompressedTTF( + + ImFontConfig config; + config.MergeMode = false; + + // Load Basic Latin + Latin Supplement + io.Fonts->AddFontFromMemoryCompressedTTF( kte::Fonts::DefaultFontData, kte::Fonts::DefaultFontSize, - size_px); - if (!font) { - font = io.Fonts->AddFontDefault(); - } - (void) font; + size_px, + &config, + io.Fonts->GetGlyphRangesDefault()); + + // Merge Greek and Coptic + config.MergeMode = true; + static const ImWchar greek_ranges[] = { + 0x0370, 0x03FF, // Greek and Coptic + 0, + }; + io.Fonts->AddFontFromMemoryCompressedTTF( + kte::Fonts::DefaultFontData, + kte::Fonts::DefaultFontSize, + size_px, + &config, + greek_ranges); + + // Merge Mathematical Operators + static const ImWchar math_ranges[] = { + 0x2200, 0x22FF, // Mathematical Operators + 0, + }; + io.Fonts->AddFontFromMemoryCompressedTTF( + kte::Fonts::DefaultFontData, + kte::Fonts::DefaultFontSize, + size_px, + &config, + math_ranges); + io.Fonts->Build(); return true; } \ No newline at end of file diff --git a/TerminalRenderer.cc b/TerminalRenderer.cc index 52d7c08..d23b2bd 100644 --- a/TerminalRenderer.cc +++ b/TerminalRenderer.cc @@ -1,3 +1,6 @@ +#include +#define _XOPEN_SOURCE_EXTENDED 1 +#include #include #include #include @@ -157,35 +160,52 @@ TerminalRenderer::Draw(Editor &ed) // Map to simple attributes; search highlight uses A_STANDOUT which takes precedence below attrset(A_NORMAL); switch (k) { - case kte::TokenKind::Keyword: - case kte::TokenKind::Type: - case kte::TokenKind::Constant: - case kte::TokenKind::Function: - attron(A_BOLD); - break; - case kte::TokenKind::Comment: - attron(A_DIM); - break; - case kte::TokenKind::String: - case kte::TokenKind::Char: - case kte::TokenKind::Number: - // standout a bit using A_UNDERLINE if available - attron(A_UNDERLINE); - break; - default: - break; + case kte::TokenKind::Keyword: + case kte::TokenKind::Type: + case kte::TokenKind::Constant: + case kte::TokenKind::Function: + attron(A_BOLD); + break; + case kte::TokenKind::Comment: + attron(A_DIM); + break; + case kte::TokenKind::String: + case kte::TokenKind::Char: + case kte::TokenKind::Number: + // standout a bit using A_UNDERLINE if available + attron(A_UNDERLINE); + break; + default: + break; } }; while (written < cols) { - char ch = ' '; bool from_src = false; + wchar_t wch = L' '; + int wch_len = 1; + int disp_w = 1; + if (src_i < line.size()) { - unsigned char c = static_cast(line[src_i]); - if (c == '\t') { + // Decode UTF-8 + std::mbstate_t state = std::mbstate_t(); + size_t res = std::mbrtowc( + &wch, &line[src_i], line.size() - src_i, &state); + if (res == (size_t) -1 || res == (size_t) -2) { + // Invalid or incomplete; treat as single byte + wch = static_cast(line[src_i]); + wch_len = 1; + } else if (res == 0) { + wch = L'\0'; + wch_len = 1; + } else { + wch_len = static_cast(res); + } + + if (wch == L'\t') { std::size_t next_tab = tabw - (render_col % tabw); if (render_col + next_tab <= coloffs) { render_col += next_tab; - ++src_i; + src_i += wch_len; continue; } // Emit spaces for tab @@ -194,7 +214,7 @@ TerminalRenderer::Draw(Editor &ed) std::size_t to_skip = std::min( next_tab, coloffs - render_col); render_col += to_skip; - next_tab -= to_skip; + next_tab -= to_skip; } // Now render visible spaces while (next_tab > 0 && written < cols) { @@ -233,23 +253,34 @@ TerminalRenderer::Draw(Editor &ed) ++render_col; --next_tab; } - ++src_i; + src_i += wch_len; continue; } else { // normal char + disp_w = wcwidth(wch); + if (disp_w < 0) + disp_w = 1; // non-printable or similar + if (render_col < coloffs) { - ++render_col; - ++src_i; + render_col += disp_w; + src_i += wch_len; continue; } - ch = static_cast(c); from_src = true; } } else { // beyond EOL, fill spaces - ch = ' '; + wch = L' '; + wch_len = 1; + disp_w = 1; from_src = false; } + + if (written + disp_w > cols) { + // would overflow, just break + break; + } + bool in_hl = search_mode && from_src && is_src_in_hl(src_i); bool in_cur = has_current && li == cur_my && from_src && src_i >= cur_mx && src_i < @@ -273,11 +304,20 @@ TerminalRenderer::Draw(Editor &ed) if (!in_hl && from_src) { apply_token_attr(token_at(src_i)); } - addch(static_cast(ch)); - ++written; - ++render_col; + + if (from_src) { + cchar_t cch; + wchar_t warr[2] = {wch, L'\0'}; + setcchar(&cch, warr, A_NORMAL, 0, nullptr); + add_wch(&cch); + } else { + addch(' '); + } + + written += disp_w; + render_col += disp_w; if (from_src) - ++src_i; + src_i += wch_len; if (src_i >= line.size() && written >= cols) break; } @@ -297,23 +337,35 @@ TerminalRenderer::Draw(Editor &ed) // Place terminal cursor at logical position accounting for tabs and coloffs. // Recompute the rendered X using the same logic as the drawing loop to avoid // any drift between the command-layer computation and the terminal renderer. - std::size_t cy = buf->Cury(); - std::size_t cx = buf->Curx(); - int cur_y = static_cast(cy) - static_cast(buf->Rowoffs()); + std::size_t cy = buf->Cury(); + std::size_t cx = buf->Curx(); + int cur_y = static_cast(cy) - static_cast(buf->Rowoffs()); std::size_t rx_recomputed = 0; if (cy < lines.size()) { const std::string line_for_cursor = static_cast(lines[cy]); - std::size_t src_i_cur = 0; - std::size_t render_col_cur = 0; + std::size_t src_i_cur = 0; + std::size_t render_col_cur = 0; while (src_i_cur < line_for_cursor.size() && src_i_cur < cx) { - unsigned char ccur = static_cast(line_for_cursor[src_i_cur]); - if (ccur == '\t') { - std::size_t next_tab = tabw - (render_col_cur % tabw); - render_col_cur += next_tab; - ++src_i_cur; + std::mbstate_t state = std::mbstate_t(); + wchar_t wch; + size_t res = std::mbrtowc( + &wch, &line_for_cursor[src_i_cur], line_for_cursor.size() - src_i_cur, + &state); + + if (res == (size_t) -1 || res == (size_t) -2) { + render_col_cur += 1; + src_i_cur += 1; + } else if (res == 0) { + src_i_cur += 1; } else { - ++render_col_cur; - ++src_i_cur; + if (wch == L'\t') { + std::size_t next_tab = tabw - (render_col_cur % tabw); + render_col_cur += next_tab; + } else { + int dw = wcwidth(wch); + render_col_cur += (dw < 0) ? 1 : dw; + } + src_i_cur += res; } } rx_recomputed = render_col_cur; @@ -403,9 +455,9 @@ TerminalRenderer::Draw(Editor &ed) { const char *app = "kte"; left.reserve(256); - left += app; - left += " "; - left += KTE_VERSION_STR; // already includes leading 'v' + left += app; + left += " "; + left += KTE_VERSION_STR; // already includes leading 'v' const Buffer *b = buf; std::string fname; if (b) { @@ -426,11 +478,11 @@ TerminalRenderer::Draw(Editor &ed) std::size_t total = ed.BufferCount(); if (total > 0) { std::size_t idx1 = ed.CurrentBufferIndex() + 1; // human-friendly 1-based - left += "["; - left += std::to_string(static_cast(idx1)); - left += "/"; - left += std::to_string(static_cast(total)); - left += "] "; + left += "["; + left += std::to_string(static_cast(idx1)); + left += "/"; + left += std::to_string(static_cast(total)); + left += "] "; } } left += fname; @@ -442,9 +494,9 @@ TerminalRenderer::Draw(Editor &ed) // Append total line count as "L" if (b) { unsigned long lcount = static_cast(b->Rows().size()); - left += " "; - left += std::to_string(lcount); - left += "L"; + left += " "; + left += std::to_string(lcount); + left += "L"; } } diff --git a/fonts/Font.cc b/fonts/Font.cc index 427c61c..8d86df1 100644 --- a/fonts/Font.cc +++ b/fonts/Font.cc @@ -8,16 +8,43 @@ Font::Load(const float size) const { const ImGuiIO &io = ImGui::GetIO(); io.Fonts->Clear(); - const ImFont *font = io.Fonts->AddFontFromMemoryCompressedTTF( + + ImFontConfig config; + config.MergeMode = false; + + // Load Basic Latin + Latin Supplement + io.Fonts->AddFontFromMemoryCompressedTTF( this->data_, this->size_, - size); + size, + &config, + io.Fonts->GetGlyphRangesDefault()); - if (!font) { - font = io.Fonts->AddFontDefault(); - } + // Merge Greek and Coptic + config.MergeMode = true; + static const ImWchar greek_ranges[] = { + 0x0370, 0x03FF, // Greek and Coptic + 0, + }; + io.Fonts->AddFontFromMemoryCompressedTTF( + this->data_, + this->size_, + size, + &config, + greek_ranges); + + // Merge Mathematical Operators + static const ImWchar math_ranges[] = { + 0x2200, 0x22FF, // Mathematical Operators + 0, + }; + io.Fonts->AddFontFromMemoryCompressedTTF( + this->data_, + this->size_, + size, + &config, + math_ranges); - (void) font; io.Fonts->Build(); } } // namespace kte::Fonts \ No newline at end of file diff --git a/main.cc b/main.cc index 5e22b3e..55f0ab8 100644 --- a/main.cc +++ b/main.cc @@ -1,3 +1,4 @@ +#include #include #include #include @@ -113,6 +114,8 @@ RunStressHighlighter(unsigned seconds) int main(int argc, const char *argv[]) { + std::setlocale(LC_ALL, ""); + Editor editor; // CLI parsing using getopt_long