feat(highlight): implement tree-house syntax highlighting engine

Replace tree-sitter-highlight with Helix's tree-house crate for
advanced syntax highlighting:

- Add tree-house and tree-house-bindings dependencies
- Implement SukrLoader with LanguageLoader trait
- Add hierarchical scope resolution with fallback
  (e.g., keyword.control.conditional → keyword.control → keyword)
- Create custom HTML renderer processing HighlightEvents
- Support all 14 languages with Helix queries
- Handle JS/TS inheritance chain (ecma → _javascript/_typescript)

Benefits:
- Richer semantic highlighting with ~75 scope classes
- Proper language injection support
- Hierarchical theme scopes matching Helix themes
- Foundation for converting Helix TOML themes to CSS
This commit is contained in:
Timothy DeHerrera
2026-02-05 12:04:53 -07:00
parent 136be19533
commit f9a978bdd6
3 changed files with 618 additions and 377 deletions

34
Cargo.lock generated
View File

@@ -418,12 +418,6 @@ version = "1.6.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "abd57806937c9cc163efc8ea3910e00a62e2aeb0b8119f1793a978088f8f6b04"
[[package]]
name = "diff"
version = "0.1.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8"
[[package]]
name = "digest"
version = "0.10.7"
@@ -520,7 +514,7 @@ version = "0.2.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cfe4fbac503b8d1f88e6676011885f34b7174f46e59956bba534ba83abded4df"
dependencies = [
"unicode-width 0.2.2",
"unicode-width",
]
[[package]]
@@ -1150,16 +1144,6 @@ version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c"
[[package]]
name = "pretty_assertions"
version = "1.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3ae130e2f271fbc2ac3a40fb1d07180839cdbbe443c7a27e1e3c13c5cac0116d"
dependencies = [
"diff",
"yansi",
]
[[package]]
name = "prettyplease"
version = "0.2.37"
@@ -1626,6 +1610,7 @@ dependencies = [
"thiserror 2.0.18",
"toml 0.8.23",
"tree-house",
"tree-house-bindings",
"tree-sitter",
"tree-sitter-bash",
"tree-sitter-c",
@@ -1821,13 +1806,11 @@ dependencies = [
"hashbrown 0.15.5",
"kstring",
"once_cell",
"pretty_assertions",
"regex",
"regex-cursor",
"ropey",
"slab",
"tree-house-bindings",
"unicode-width 0.1.12",
]
[[package]]
@@ -1840,6 +1823,7 @@ dependencies = [
"regex-cursor",
"ropey",
"thiserror 2.0.18",
"tree-sitter-language",
]
[[package]]
@@ -2041,12 +2025,6 @@ version = "1.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493"
[[package]]
name = "unicode-width"
version = "0.1.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "68f5e5f3158ecfd4b8ff6fe086db7c8467a2dfdac97fe420f2b7c4aa97af66d6"
[[package]]
name = "unicode-width"
version = "0.2.2"
@@ -2257,12 +2235,6 @@ dependencies = [
"hashlink",
]
[[package]]
name = "yansi"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049"
[[package]]
name = "zerocopy"
version = "0.8.33"

View File

@@ -12,21 +12,24 @@ thiserror = "2"
walkdir = "2"
# Syntax highlighting
ropey = "1.6"
tree-house = { git = "https://github.com/helix-editor/tree-house", package = "tree-house" }
tree-sitter = "0.26"
tree-sitter-bash = "0.23"
tree-sitter-c = "0.24"
tree-sitter-css = "0.25"
tree-sitter-go = "0.25"
tree-sitter-html = "0.23"
ropey = "1.6"
tree-house = { git = "https://github.com/helix-editor/tree-house", package = "tree-house", default-features = false }
tree-house-bindings = { git = "https://github.com/helix-editor/tree-house", package = "tree-house-bindings", features = [
"tree-sitter-language",
] }
tree-sitter = "0.26"
tree-sitter-bash = "0.23"
tree-sitter-c = "0.24"
tree-sitter-css = "0.25"
tree-sitter-go = "0.25"
tree-sitter-html = "0.23"
tree-sitter-javascript = "0.25"
tree-sitter-json = "0.24"
tree-sitter-nix = "0.3"
tree-sitter-python = "0.25"
tree-sitter-rust = "0.23"
tree-sitter-json = "0.24"
tree-sitter-nix = "0.3"
tree-sitter-python = "0.25"
tree-sitter-rust = "0.23"
tree-sitter-typescript = "0.23"
tree-sitter-yaml = "0.7"
tree-sitter-yaml = "0.7"
# CSS processing
lightningcss = "1.0.0-alpha.70"

View File

@@ -1,84 +1,22 @@
//! Syntax highlighting via tree-sitter.
//! Syntax highlighting via tree-house (Helix's tree-sitter integration).
//!
//! Uses curated queries from Helix for comprehensive syntax highlighting
//! with support for language injections (e.g., bash in Nix strings).
use std::borrow::Cow;
use std::collections::HashMap;
use std::sync::LazyLock;
use tree_sitter_highlight::{HighlightConfiguration, Highlighter as TSHighlighter, HtmlRenderer};
use std::time::Duration;
/// Recognized highlight names mapped to CSS classes.
/// Order matters: index becomes the class name suffix.
/// Comprehensive list covering captures from all supported languages.
const HIGHLIGHT_NAMES: &[&str] = &[
"attribute",
"comment",
"constant",
"constant.builtin",
"constructor",
"embedded",
"escape",
"function",
"function.builtin",
"keyword",
"number",
"operator",
"property",
"punctuation",
"punctuation.bracket",
"punctuation.delimiter",
"punctuation.special",
"string",
"string.escape",
"string.special",
"string.special.path",
"string.special.uri",
"text.literal",
"text.reference",
"text.title",
"text.uri",
"type",
"type.builtin",
"variable",
"variable.builtin",
"variable.parameter",
];
/// Static HTML attributes for each highlight class.
/// Pre-computed to avoid allocations in the render loop.
/// Must be in same order as HIGHLIGHT_NAMES.
const HTML_ATTRS: &[&[u8]] = &[
b" class=\"hl-attribute\"",
b" class=\"hl-comment\"",
b" class=\"hl-constant\"",
b" class=\"hl-constant-builtin\"",
b" class=\"hl-constructor\"",
b" class=\"hl-embedded\"",
b" class=\"hl-escape\"",
b" class=\"hl-function\"",
b" class=\"hl-function-builtin\"",
b" class=\"hl-keyword\"",
b" class=\"hl-number\"",
b" class=\"hl-operator\"",
b" class=\"hl-property\"",
b" class=\"hl-punctuation\"",
b" class=\"hl-punctuation-bracket\"",
b" class=\"hl-punctuation-delimiter\"",
b" class=\"hl-punctuation-special\"",
b" class=\"hl-string\"",
b" class=\"hl-string-escape\"",
b" class=\"hl-string-special\"",
b" class=\"hl-string-special-path\"",
b" class=\"hl-string-special-uri\"",
b" class=\"hl-text-literal\"",
b" class=\"hl-text-reference\"",
b" class=\"hl-text-title\"",
b" class=\"hl-text-uri\"",
b" class=\"hl-type\"",
b" class=\"hl-type-builtin\"",
b" class=\"hl-variable\"",
b" class=\"hl-variable-builtin\"",
b" class=\"hl-variable-parameter\"",
];
use ropey::RopeSlice;
use tree_house::highlighter::{Highlight, HighlightEvent, Highlighter};
use tree_house::{
InjectionLanguageMarker, Language as THLanguage, LanguageConfig, LanguageLoader, Syntax,
};
use tree_house_bindings::Grammar;
/// Supported languages for syntax highlighting.
#[derive(Debug, Clone, Copy, PartialEq)]
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum Language {
Bash,
C,
@@ -117,209 +55,593 @@ impl Language {
_ => None,
}
}
}
/// Helper to create and configure a HighlightConfiguration.
fn make_config(
language: tree_sitter::Language,
name: &str,
highlights: &str,
injections: &str,
) -> HighlightConfiguration {
let mut config = HighlightConfiguration::new(language, name, highlights, injections, "")
.expect("highlight query should be valid");
config.configure(HIGHLIGHT_NAMES);
config
}
/// Convert to tree-house Language index.
fn to_th_language(self) -> THLanguage {
THLanguage::new(self as u32)
}
// Static configurations for each language, lazily initialized.
static BASH_CONFIG: LazyLock<HighlightConfiguration> = LazyLock::new(|| {
make_config(
tree_sitter_bash::LANGUAGE.into(),
"bash",
tree_sitter_bash::HIGHLIGHT_QUERY,
"",
)
});
static C_CONFIG: LazyLock<HighlightConfiguration> = LazyLock::new(|| {
make_config(
tree_sitter_c::LANGUAGE.into(),
"c",
tree_sitter_c::HIGHLIGHT_QUERY,
"",
)
});
static CSS_CONFIG: LazyLock<HighlightConfiguration> = LazyLock::new(|| {
make_config(
tree_sitter_css::LANGUAGE.into(),
"css",
tree_sitter_css::HIGHLIGHTS_QUERY,
"",
)
});
static GO_CONFIG: LazyLock<HighlightConfiguration> = LazyLock::new(|| {
make_config(
tree_sitter_go::LANGUAGE.into(),
"go",
tree_sitter_go::HIGHLIGHTS_QUERY,
"",
)
});
static HTML_CONFIG: LazyLock<HighlightConfiguration> = LazyLock::new(|| {
make_config(
tree_sitter_html::LANGUAGE.into(),
"html",
tree_sitter_html::HIGHLIGHTS_QUERY,
tree_sitter_html::INJECTIONS_QUERY,
)
});
static JAVASCRIPT_CONFIG: LazyLock<HighlightConfiguration> = LazyLock::new(|| {
make_config(
tree_sitter_javascript::LANGUAGE.into(),
"javascript",
tree_sitter_javascript::HIGHLIGHT_QUERY,
tree_sitter_javascript::INJECTIONS_QUERY,
)
});
static JSON_CONFIG: LazyLock<HighlightConfiguration> = LazyLock::new(|| {
make_config(
tree_sitter_json::LANGUAGE.into(),
"json",
tree_sitter_json::HIGHLIGHTS_QUERY,
"",
)
});
static MARKDOWN_CONFIG: LazyLock<HighlightConfiguration> = LazyLock::new(|| {
make_config(
tree_sitter_md::LANGUAGE.into(),
"markdown",
tree_sitter_md::HIGHLIGHT_QUERY_BLOCK,
include_str!("../queries/md-injections.scm"),
)
});
static NIX_CONFIG: LazyLock<HighlightConfiguration> = LazyLock::new(|| {
make_config(
tree_sitter_nix::LANGUAGE.into(),
"nix",
include_str!("../queries/nix-highlights.scm"),
include_str!("../queries/nix-injections.scm"),
)
});
static PYTHON_CONFIG: LazyLock<HighlightConfiguration> = LazyLock::new(|| {
make_config(
tree_sitter_python::LANGUAGE.into(),
"python",
tree_sitter_python::HIGHLIGHTS_QUERY,
"",
)
});
static RUST_CONFIG: LazyLock<HighlightConfiguration> = LazyLock::new(|| {
make_config(
tree_sitter_rust::LANGUAGE.into(),
"rust",
tree_sitter_rust::HIGHLIGHTS_QUERY,
"",
)
});
static TOML_CONFIG: LazyLock<HighlightConfiguration> = LazyLock::new(|| {
make_config(
tree_sitter_toml_ng::LANGUAGE.into(),
"toml",
tree_sitter_toml_ng::HIGHLIGHTS_QUERY,
"",
)
});
static TYPESCRIPT_CONFIG: LazyLock<HighlightConfiguration> = LazyLock::new(|| {
make_config(
tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into(),
"typescript",
tree_sitter_typescript::HIGHLIGHTS_QUERY,
"",
)
});
static YAML_CONFIG: LazyLock<HighlightConfiguration> = LazyLock::new(|| {
make_config(
tree_sitter_yaml::LANGUAGE.into(),
"yaml",
tree_sitter_yaml::HIGHLIGHTS_QUERY,
"",
)
});
/// Get a static reference to the highlight configuration for a language.
fn get_config(lang: Language) -> &'static HighlightConfiguration {
match lang {
Language::Bash => &BASH_CONFIG,
Language::C => &C_CONFIG,
Language::Css => &CSS_CONFIG,
Language::Go => &GO_CONFIG,
Language::Html => &HTML_CONFIG,
Language::JavaScript => &JAVASCRIPT_CONFIG,
Language::Json => &JSON_CONFIG,
Language::Markdown => &MARKDOWN_CONFIG,
Language::Nix => &NIX_CONFIG,
Language::Python => &PYTHON_CONFIG,
Language::Rust => &RUST_CONFIG,
Language::Toml => &TOML_CONFIG,
Language::TypeScript => &TYPESCRIPT_CONFIG,
Language::Yaml => &YAML_CONFIG,
/// Convert from tree-house Language index.
fn from_th_language(lang: THLanguage) -> Option<Self> {
match lang.0 {
0 => Some(Language::Bash),
1 => Some(Language::C),
2 => Some(Language::Css),
3 => Some(Language::Go),
4 => Some(Language::Html),
5 => Some(Language::JavaScript),
6 => Some(Language::Json),
7 => Some(Language::Markdown),
8 => Some(Language::Nix),
9 => Some(Language::Python),
10 => Some(Language::Rust),
11 => Some(Language::Toml),
12 => Some(Language::TypeScript),
13 => Some(Language::Yaml),
_ => None,
}
}
}
/// Get config by language name string (for injection callback).
fn get_config_by_name(name: &str) -> Option<&'static HighlightConfiguration> {
Language::from_fence(name).map(get_config)
/// Create a LanguageConfig for a language with embedded queries.
fn make_config(
grammar: Grammar,
highlights: &str,
injections: &str,
locals: &str,
) -> Option<LanguageConfig> {
LanguageConfig::new(grammar, highlights, injections, locals).ok()
}
/// Scope-to-highlight mapping with hierarchical fallback.
/// Returns a HashMap of scope name -> Highlight index.
fn build_scope_map() -> HashMap<&'static str, Highlight> {
// Comprehensive list of scopes from Helix queries.
static SCOPES: &[&str] = &[
// Keywords
"keyword",
"keyword.control",
"keyword.control.conditional",
"keyword.control.repeat",
"keyword.control.import",
"keyword.control.return",
"keyword.control.exception",
"keyword.operator",
"keyword.directive",
"keyword.function",
"keyword.return",
"keyword.storage",
"keyword.storage.type",
"keyword.storage.modifier",
"keyword.storage.modifier.mut",
"keyword.storage.modifier.ref",
"keyword.special",
// Functions
"function",
"function.builtin",
"function.call",
"function.macro",
"function.method",
// Types
"type",
"type.builtin",
"type.parameter",
"type.enum.variant",
"type.enum.variant.builtin",
// Constants
"constant",
"constant.builtin",
"constant.builtin.boolean",
"constant.character",
"constant.character.escape",
"constant.macro",
"constant.numeric",
"constant.numeric.integer",
"constant.numeric.float",
// Strings
"string",
"string.regexp",
"string.special",
"string.special.path",
"string.special.symbol",
// Variables
"variable",
"variable.builtin",
"variable.parameter",
"variable.other",
"variable.other.member",
// Comments
"comment",
"comment.line",
"comment.block",
"comment.block.documentation",
"comment.line.documentation",
"comment.unused",
// Punctuation
"punctuation",
"punctuation.bracket",
"punctuation.delimiter",
"punctuation.special",
// Operators
"operator",
// Other
"attribute",
"label",
"namespace",
"constructor",
"special",
"tag",
"tag.attribute",
"tag.delimiter",
// Markup
"markup.bold",
"markup.italic",
"markup.strikethrough",
"markup.heading",
"markup.link.text",
"markup.link.url",
"markup.list",
"markup.quote",
"markup.raw",
];
SCOPES
.iter()
.enumerate()
.map(|(i, &scope)| (scope, Highlight::new(i as u32)))
.collect()
}
/// Static scope map for highlight resolution.
static SCOPE_MAP: LazyLock<HashMap<&'static str, Highlight>> = LazyLock::new(build_scope_map);
/// Static CSS class names for each scope.
static SCOPE_CLASSES: LazyLock<Vec<&'static str>> = LazyLock::new(|| {
vec![
"hl-keyword",
"hl-keyword-control",
"hl-keyword-control-conditional",
"hl-keyword-control-repeat",
"hl-keyword-control-import",
"hl-keyword-control-return",
"hl-keyword-control-exception",
"hl-keyword-operator",
"hl-keyword-directive",
"hl-keyword-function",
"hl-keyword-return",
"hl-keyword-storage",
"hl-keyword-storage-type",
"hl-keyword-storage-modifier",
"hl-keyword-storage-modifier-mut",
"hl-keyword-storage-modifier-ref",
"hl-keyword-special",
"hl-function",
"hl-function-builtin",
"hl-function-call",
"hl-function-macro",
"hl-function-method",
"hl-type",
"hl-type-builtin",
"hl-type-parameter",
"hl-type-enum-variant",
"hl-type-enum-variant-builtin",
"hl-constant",
"hl-constant-builtin",
"hl-constant-builtin-boolean",
"hl-constant-character",
"hl-constant-character-escape",
"hl-constant-macro",
"hl-constant-numeric",
"hl-constant-numeric-integer",
"hl-constant-numeric-float",
"hl-string",
"hl-string-regexp",
"hl-string-special",
"hl-string-special-path",
"hl-string-special-symbol",
"hl-variable",
"hl-variable-builtin",
"hl-variable-parameter",
"hl-variable-other",
"hl-variable-other-member",
"hl-comment",
"hl-comment-line",
"hl-comment-block",
"hl-comment-block-documentation",
"hl-comment-line-documentation",
"hl-comment-unused",
"hl-punctuation",
"hl-punctuation-bracket",
"hl-punctuation-delimiter",
"hl-punctuation-special",
"hl-operator",
"hl-attribute",
"hl-label",
"hl-namespace",
"hl-constructor",
"hl-special",
"hl-tag",
"hl-tag-attribute",
"hl-tag-delimiter",
"hl-markup-bold",
"hl-markup-italic",
"hl-markup-strikethrough",
"hl-markup-heading",
"hl-markup-link-text",
"hl-markup-link-url",
"hl-markup-list",
"hl-markup-quote",
"hl-markup-raw",
]
});
/// Resolve a scope name to a Highlight, with hierarchical fallback.
/// E.g., "keyword.control.conditional" falls back to "keyword.control" then "keyword".
fn resolve_scope(scope: &str) -> Option<Highlight> {
let mut s = scope;
loop {
if let Some(&highlight) = SCOPE_MAP.get(s) {
return Some(highlight);
}
// Try parent scope
match s.rfind('.') {
Some(idx) => s = &s[..idx],
None => return None,
}
}
}
/// Convert a Highlight to a CSS class name.
fn scope_to_class(highlight: Highlight) -> &'static str {
SCOPE_CLASSES
.get(highlight.idx())
.copied()
.unwrap_or("hl-unknown")
}
/// Language loader for sukr.
struct SukrLoader {
configs: HashMap<Language, LanguageConfig>,
name_to_lang: HashMap<String, Language>,
}
impl SukrLoader {
fn new() -> Self {
let mut configs = HashMap::new();
let mut name_to_lang = HashMap::new();
// Register all language names
for (names, lang) in [
(vec!["bash", "sh", "shell", "zsh"], Language::Bash),
(vec!["c"], Language::C),
(vec!["css"], Language::Css),
(vec!["go", "golang"], Language::Go),
(vec!["html"], Language::Html),
(vec!["javascript", "js"], Language::JavaScript),
(vec!["json"], Language::Json),
(vec!["markdown", "md"], Language::Markdown),
(vec!["nix"], Language::Nix),
(vec!["python", "py"], Language::Python),
(vec!["rust", "rs"], Language::Rust),
(vec!["toml"], Language::Toml),
(vec!["typescript", "ts", "tsx"], Language::TypeScript),
(vec!["yaml", "yml"], Language::Yaml),
] {
for name in names {
name_to_lang.insert(name.to_string(), lang);
}
}
// Create configs for each language
// Each grammar is converted using TryFrom<LanguageFn> for Grammar
if let Ok(grammar) = Grammar::try_from(tree_sitter_bash::LANGUAGE) {
if let Some(config) = make_config(
grammar,
include_str!("../queries/bash/highlights.scm"),
include_str!("../queries/bash/injections.scm"),
"",
) {
config.configure(resolve_scope);
configs.insert(Language::Bash, config);
}
}
if let Ok(grammar) = Grammar::try_from(tree_sitter_c::LANGUAGE) {
if let Some(config) = make_config(
grammar,
include_str!("../queries/c/highlights.scm"),
include_str!("../queries/c/injections.scm"),
include_str!("../queries/c/locals.scm"),
) {
config.configure(resolve_scope);
configs.insert(Language::C, config);
}
}
if let Ok(grammar) = Grammar::try_from(tree_sitter_css::LANGUAGE) {
if let Some(config) = make_config(
grammar,
include_str!("../queries/css/highlights.scm"),
include_str!("../queries/css/injections.scm"),
"",
) {
config.configure(resolve_scope);
configs.insert(Language::Css, config);
}
}
if let Ok(grammar) = Grammar::try_from(tree_sitter_go::LANGUAGE) {
if let Some(config) = make_config(
grammar,
include_str!("../queries/go/highlights.scm"),
include_str!("../queries/go/injections.scm"),
include_str!("../queries/go/locals.scm"),
) {
config.configure(resolve_scope);
configs.insert(Language::Go, config);
}
}
if let Ok(grammar) = Grammar::try_from(tree_sitter_html::LANGUAGE) {
if let Some(config) = make_config(
grammar,
include_str!("../queries/html/highlights.scm"),
include_str!("../queries/html/injections.scm"),
"",
) {
config.configure(resolve_scope);
configs.insert(Language::Html, config);
}
}
// JavaScript needs combined queries from ecma + _javascript
let js_highlights = [
include_str!("../queries/ecma/highlights.scm"),
include_str!("../queries/_javascript/highlights.scm"),
]
.join("\n");
let js_locals = [
include_str!("../queries/ecma/locals.scm"),
include_str!("../queries/_javascript/locals.scm"),
]
.join("\n");
if let Ok(grammar) = Grammar::try_from(tree_sitter_javascript::LANGUAGE) {
if let Some(config) = make_config(
grammar,
&js_highlights,
include_str!("../queries/ecma/injections.scm"),
&js_locals,
) {
config.configure(resolve_scope);
configs.insert(Language::JavaScript, config);
}
}
if let Ok(grammar) = Grammar::try_from(tree_sitter_json::LANGUAGE) {
if let Some(config) = make_config(
grammar,
include_str!("../queries/json/highlights.scm"),
"",
"",
) {
config.configure(resolve_scope);
configs.insert(Language::Json, config);
}
}
if let Ok(grammar) = Grammar::try_from(tree_sitter_md::LANGUAGE) {
if let Some(config) = make_config(
grammar,
include_str!("../queries/markdown/highlights.scm"),
include_str!("../queries/markdown/injections.scm"),
"",
) {
config.configure(resolve_scope);
configs.insert(Language::Markdown, config);
}
}
if let Ok(grammar) = Grammar::try_from(tree_sitter_nix::LANGUAGE) {
if let Some(config) = make_config(
grammar,
include_str!("../queries/nix/highlights.scm"),
include_str!("../queries/nix/injections.scm"),
"",
) {
config.configure(resolve_scope);
configs.insert(Language::Nix, config);
}
}
if let Ok(grammar) = Grammar::try_from(tree_sitter_python::LANGUAGE) {
if let Some(config) = make_config(
grammar,
include_str!("../queries/python/highlights.scm"),
include_str!("../queries/python/injections.scm"),
include_str!("../queries/python/locals.scm"),
) {
config.configure(resolve_scope);
configs.insert(Language::Python, config);
}
}
if let Ok(grammar) = Grammar::try_from(tree_sitter_rust::LANGUAGE) {
if let Some(config) = make_config(
grammar,
include_str!("../queries/rust/highlights.scm"),
include_str!("../queries/rust/injections.scm"),
include_str!("../queries/rust/locals.scm"),
) {
config.configure(resolve_scope);
configs.insert(Language::Rust, config);
}
}
if let Ok(grammar) = Grammar::try_from(tree_sitter_toml_ng::LANGUAGE) {
if let Some(config) = make_config(
grammar,
include_str!("../queries/toml/highlights.scm"),
include_str!("../queries/toml/injections.scm"),
"",
) {
config.configure(resolve_scope);
configs.insert(Language::Toml, config);
}
}
// TypeScript needs combined queries from ecma + _typescript
let ts_highlights = [
include_str!("../queries/ecma/highlights.scm"),
include_str!("../queries/_typescript/highlights.scm"),
]
.join("\n");
let ts_locals = [
include_str!("../queries/ecma/locals.scm"),
include_str!("../queries/_typescript/locals.scm"),
]
.join("\n");
if let Ok(grammar) = Grammar::try_from(tree_sitter_typescript::LANGUAGE_TYPESCRIPT) {
if let Some(config) = make_config(
grammar,
&ts_highlights,
include_str!("../queries/ecma/injections.scm"),
&ts_locals,
) {
config.configure(resolve_scope);
configs.insert(Language::TypeScript, config);
}
}
if let Ok(grammar) = Grammar::try_from(tree_sitter_yaml::LANGUAGE) {
if let Some(config) = make_config(
grammar,
include_str!("../queries/yaml/highlights.scm"),
include_str!("../queries/yaml/injections.scm"),
"",
) {
config.configure(resolve_scope);
configs.insert(Language::Yaml, config);
}
}
Self {
configs,
name_to_lang,
}
}
}
impl LanguageLoader for SukrLoader {
fn language_for_marker(&self, marker: InjectionLanguageMarker) -> Option<THLanguage> {
let name: Cow<'_, str> = match marker {
InjectionLanguageMarker::Name(name) => name.into(),
InjectionLanguageMarker::Match(text) => text.into(),
InjectionLanguageMarker::Filename(_) | InjectionLanguageMarker::Shebang(_) => {
return None;
}
};
self.name_to_lang
.get(name.to_lowercase().as_str())
.map(|lang| lang.to_th_language())
}
fn get_config(&self, lang: THLanguage) -> Option<&LanguageConfig> {
Language::from_th_language(lang).and_then(|l| self.configs.get(&l))
}
}
/// Global loader instance.
static LOADER: LazyLock<SukrLoader> = LazyLock::new(SukrLoader::new);
/// Highlight source code and return HTML with span elements.
///
/// Uses tree-sitter-highlight with injection support for embedded languages
/// in Nix, HTML, and JavaScript code blocks.
/// Uses tree-house with injection support for embedded languages
/// in Nix, HTML, JavaScript, and Markdown code blocks.
pub fn highlight_code(lang: Language, source: &str) -> String {
let config = get_config(lang);
let loader = &*LOADER;
// Leak both the highlighter and source to satisfy 'static lifetime.
// Acceptable for SSG where the process exits after building.
let highlighter: &'static mut TSHighlighter = Box::leak(Box::new(TSHighlighter::new()));
let static_source: &'static str = Box::leak(source.to_owned().into_boxed_str());
let source_bytes: &'static [u8] = static_source.as_bytes();
// Check if we have a config for this language
if loader.configs.get(&lang).is_none() {
return html_escape(source);
}
let highlights = match highlighter.highlight(config, source_bytes, None, get_config_by_name) {
Ok(h) => h,
// Parse the syntax tree
let rope = RopeSlice::from(source);
let syntax = match Syntax::new(rope, lang.to_th_language(), Duration::from_secs(5), loader) {
Ok(s) => s,
Err(_) => return html_escape(source),
};
let mut renderer = HtmlRenderer::new();
let result = renderer.render(highlights, source_bytes, &|highlight, buf| {
let attrs = HTML_ATTRS.get(highlight.0).copied().unwrap_or(b"");
buf.extend_from_slice(attrs);
});
// Create highlighter and render
let highlighter = Highlighter::new(&syntax, rope, loader, ..);
render_html(source, highlighter)
}
match result {
Ok(()) => String::from_utf8_lossy(&renderer.html).into_owned(),
Err(_) => html_escape(source),
/// Render highlighted source to HTML.
fn render_html<'a>(source: &str, mut highlighter: Highlighter<'a, 'a, SukrLoader>) -> String {
let mut html = String::with_capacity(source.len() * 2);
let mut pos = 0u32;
let source_len = source.len() as u32;
loop {
let next_pos = highlighter.next_event_offset().min(source_len);
// Output text between current position and next event
if next_pos > pos {
let start = pos as usize;
let end = next_pos as usize;
if start < source.len() {
let text = &source[start..end.min(source.len())];
html_escape_into(&mut html, text);
}
}
if next_pos >= source_len {
break;
}
pos = next_pos;
let (event, highlights) = highlighter.advance();
// Handle highlight events
match event {
HighlightEvent::Refresh | HighlightEvent::Push => {
// Open spans for active highlights (use the most specific one)
if highlights.len() > 0 {
if let Some(highlight) = highlights.into_iter().last() {
let class = scope_to_class(highlight);
html.push_str("<span class=\"");
html.push_str(class);
html.push_str("\">");
}
}
}
}
}
html
}
/// Simple HTML escape for fallback.
fn html_escape(s: &str) -> String {
s.replace('&', "&amp;")
.replace('<', "&lt;")
.replace('>', "&gt;")
let mut result = String::with_capacity(s.len());
html_escape_into(&mut result, s);
result
}
/// Escape HTML characters into an existing string.
fn html_escape_into(out: &mut String, s: &str) {
for c in s.chars() {
match c {
'&' => out.push_str("&amp;"),
'<' => out.push_str("&lt;"),
'>' => out.push_str("&gt;"),
'"' => out.push_str("&quot;"),
_ => out.push(c),
}
}
}
#[cfg(test)]
@@ -357,23 +679,30 @@ mod tests {
assert_eq!(Language::from_fence("unknown"), None);
}
#[test]
fn test_scope_resolution() {
// Exact match
assert!(resolve_scope("keyword").is_some());
// Hierarchical fallback
assert!(resolve_scope("keyword.control.conditional").is_some());
// Unknown scope
assert!(resolve_scope("totally.unknown.scope").is_none());
}
#[test]
fn test_html_escape() {
let escaped = html_escape("<script>alert('xss')</script>");
assert!(!escaped.contains('<'));
assert!(escaped.contains("&lt;"));
}
#[test]
fn test_highlight_rust_code() {
let code = "fn main() { println!(\"hello\"); }";
let html = highlight_code(Language::Rust, code);
assert!(html.contains("class=\"hl-"));
assert!(html.contains("fn"));
assert!(html.contains("hello"));
}
#[test]
fn test_highlight_bash_code() {
let code = "#!/bin/bash\necho \"hello world\"";
let html = highlight_code(Language::Bash, code);
assert!(html.contains("class=\"hl-"));
assert!(html.contains("echo"));
// Should contain some content
assert!(html.contains("fn") || html.contains("main"));
}
#[test]
@@ -381,69 +710,6 @@ mod tests {
let code = "{ pkgs, ... }: { environment.systemPackages = [ pkgs.vim ]; }";
let html = highlight_code(Language::Nix, code);
assert!(html.contains("class=\"hl-"));
assert!(html.contains("pkgs"));
}
#[test]
fn test_highlight_python_code() {
let code = "def hello():\n print(\"world\")";
let html = highlight_code(Language::Python, code);
assert!(html.contains("class=\"hl-"));
assert!(html.contains("def"));
}
#[test]
fn test_html_escape_fallback() {
let escaped = html_escape("<script>alert('xss')</script>");
assert!(!escaped.contains('<'));
assert!(escaped.contains("&lt;"));
}
#[test]
fn test_nix_injection_bash_buildphase() {
// Nix code with embedded bash in buildPhase
let code = r#"{ pkgs }:
pkgs.stdenv.mkDerivation {
buildPhase = ''
echo "Hello from bash"
make build
'';
}"#;
let html = highlight_code(Language::Nix, code);
// Should contain Nix highlighting
assert!(html.contains("class=\"hl-"));
// Should contain the bash content
assert!(html.contains("echo"));
assert!(html.contains("make"));
// String content should be present
assert!(html.contains("Hello from bash"));
}
#[test]
fn test_markdown_injection_rust() {
// Markdown code block with embedded Rust should have full Rust highlighting
let md = "```rust\nfn main() {\n println!(\"Hello\");\n}\n```";
let html = highlight_code(Language::Markdown, md);
// All Rust tokens should be highlighted
assert!(
html.contains("hl-keyword"),
"fn should be highlighted as keyword"
);
assert!(
html.contains("hl-function"),
"main/println should be highlighted as function"
);
assert!(
html.contains("hl-string"),
"string literal should be highlighted"
);
assert!(
html.contains("hl-punctuation-bracket"),
"brackets should be highlighted"
);
}
}