feat: add tree-sitter syntax highlighting module

- Cargo.toml: Add tree-sitter-highlight + grammar crates
  (rust, bash, json). TOML dropped due to API incompatibility.
- src/highlight.rs: Language enum, highlight_code() function,
  4 unit tests covering parsing and HTML generation.
- Uses static HTML_ATTRS array for zero-allocation rendering.
This commit is contained in:
Timothy DeHerrera
2026-01-24 20:38:02 -07:00
parent 5317da94c4
commit ba5e97dfb7
4 changed files with 339 additions and 2 deletions

162
Cargo.lock generated
View File

@@ -14,6 +14,15 @@ dependencies = [
"zerocopy", "zerocopy",
] ]
[[package]]
name = "aho-corasick"
version = "1.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301"
dependencies = [
"memchr",
]
[[package]] [[package]]
name = "allocator-api2" name = "allocator-api2"
version = "0.2.21" version = "0.2.21"
@@ -32,6 +41,16 @@ version = "2.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3" checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3"
[[package]]
name = "cc"
version = "1.2.54"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6354c81bbfd62d9cfa9cb3c773c2b7b2a3a482d569de977fd0e961f6e7c00583"
dependencies = [
"find-msvc-tools",
"shlex",
]
[[package]] [[package]]
name = "cfg-if" name = "cfg-if"
version = "1.0.4" version = "1.0.4"
@@ -47,6 +66,12 @@ dependencies = [
"cfg-if", "cfg-if",
] ]
[[package]]
name = "find-msvc-tools"
version = "0.1.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8591b0bcc8a98a64310a2fae1bb3e9b8564dd10e381e6e28010fde8e8e8568db"
[[package]] [[package]]
name = "getopts" name = "getopts"
version = "0.2.24" version = "0.2.24"
@@ -93,6 +118,12 @@ version = "1.0.17"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2"
[[package]]
name = "lazy_static"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
[[package]] [[package]]
name = "maud" name = "maud"
version = "0.26.0" version = "0.26.0"
@@ -128,7 +159,11 @@ dependencies = [
"gray_matter", "gray_matter",
"maud", "maud",
"pulldown-cmark", "pulldown-cmark",
"thiserror", "thiserror 2.0.18",
"tree-sitter-bash",
"tree-sitter-highlight",
"tree-sitter-json",
"tree-sitter-rust",
"walkdir", "walkdir",
] ]
@@ -198,6 +233,35 @@ dependencies = [
"proc-macro2", "proc-macro2",
] ]
[[package]]
name = "regex"
version = "1.12.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4"
dependencies = [
"aho-corasick",
"memchr",
"regex-automata",
"regex-syntax",
]
[[package]]
name = "regex-automata"
version = "0.4.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
]
[[package]]
name = "regex-syntax"
version = "0.8.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58"
[[package]] [[package]]
name = "same-file" name = "same-file"
version = "1.0.6" version = "1.0.6"
@@ -250,6 +314,18 @@ dependencies = [
"zmij", "zmij",
] ]
[[package]]
name = "shlex"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
[[package]]
name = "streaming-iterator"
version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b2231b7c3057d5e4ad0156fb3dc807d900806020c5ffa3ee6ff2c8c76fb8520"
[[package]] [[package]]
name = "syn" name = "syn"
version = "2.0.114" version = "2.0.114"
@@ -261,13 +337,33 @@ dependencies = [
"unicode-ident", "unicode-ident",
] ]
[[package]]
name = "thiserror"
version = "1.0.69"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52"
dependencies = [
"thiserror-impl 1.0.69",
]
[[package]] [[package]]
name = "thiserror" name = "thiserror"
version = "2.0.18" version = "2.0.18"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4"
dependencies = [ dependencies = [
"thiserror-impl", "thiserror-impl 2.0.18",
]
[[package]]
name = "thiserror-impl"
version = "1.0.69"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1"
dependencies = [
"proc-macro2",
"quote",
"syn",
] ]
[[package]] [[package]]
@@ -290,6 +386,68 @@ dependencies = [
"serde", "serde",
] ]
[[package]]
name = "tree-sitter"
version = "0.24.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a5387dffa7ffc7d2dae12b50c6f7aab8ff79d6210147c6613561fc3d474c6f75"
dependencies = [
"cc",
"regex",
"regex-syntax",
"streaming-iterator",
"tree-sitter-language",
]
[[package]]
name = "tree-sitter-bash"
version = "0.23.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "329a4d48623ac337d42b1df84e81a1c9dbb2946907c102ca72db158c1964a52e"
dependencies = [
"cc",
"tree-sitter-language",
]
[[package]]
name = "tree-sitter-highlight"
version = "0.24.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6411813e4a9ebc87d391b98b0f3ce65d5361cd80c54de8651d8b85b555ea5d95"
dependencies = [
"lazy_static",
"regex",
"streaming-iterator",
"thiserror 1.0.69",
"tree-sitter",
]
[[package]]
name = "tree-sitter-json"
version = "0.24.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4d727acca406c0020cffc6cf35516764f36c8e3dc4408e5ebe2cb35a947ec471"
dependencies = [
"cc",
"tree-sitter-language",
]
[[package]]
name = "tree-sitter-language"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4ae62f7eae5eb549c71b76658648b72cc6111f2d87d24a1e31fa907f4943e3ce"
[[package]]
name = "tree-sitter-rust"
version = "0.23.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ca8ccb3e3a3495c8a943f6c3fd24c3804c471fd7f4f16087623c7fa4c0068e8a"
dependencies = [
"cc",
"tree-sitter-language",
]
[[package]] [[package]]
name = "unicase" name = "unicase"
version = "2.9.0" version = "2.9.0"

View File

@@ -11,3 +11,9 @@ maud = "0.26"
pulldown-cmark = "0.12" pulldown-cmark = "0.12"
thiserror = "2" thiserror = "2"
walkdir = "2" walkdir = "2"
# Syntax highlighting
tree-sitter-bash = "0.23"
tree-sitter-highlight = "0.24"
tree-sitter-json = "0.24"
tree-sitter-rust = "0.23"

172
src/highlight.rs Normal file
View File

@@ -0,0 +1,172 @@
//! Syntax highlighting via tree-sitter.
use tree_sitter_highlight::{HighlightConfiguration, Highlighter as TSHighlighter, HtmlRenderer};
/// Recognized highlight names mapped to CSS classes.
/// Order matters: index becomes the class name suffix.
const HIGHLIGHT_NAMES: &[&str] = &[
"attribute",
"comment",
"constant",
"constant.builtin",
"constructor",
"function",
"function.builtin",
"keyword",
"number",
"operator",
"property",
"punctuation",
"punctuation.bracket",
"punctuation.delimiter",
"string",
"type",
"type.builtin",
"variable",
"variable.builtin",
"variable.parameter",
];
/// Static HTML attributes for each highlight class.
/// Pre-computed to avoid allocations in the render loop.
const HTML_ATTRS: &[&[u8]] = &[
b"<span class=\"hl-attribute\">",
b"<span class=\"hl-comment\">",
b"<span class=\"hl-constant\">",
b"<span class=\"hl-constant-builtin\">",
b"<span class=\"hl-constructor\">",
b"<span class=\"hl-function\">",
b"<span class=\"hl-function-builtin\">",
b"<span class=\"hl-keyword\">",
b"<span class=\"hl-number\">",
b"<span class=\"hl-operator\">",
b"<span class=\"hl-property\">",
b"<span class=\"hl-punctuation\">",
b"<span class=\"hl-punctuation-bracket\">",
b"<span class=\"hl-punctuation-delimiter\">",
b"<span class=\"hl-string\">",
b"<span class=\"hl-type\">",
b"<span class=\"hl-type-builtin\">",
b"<span class=\"hl-variable\">",
b"<span class=\"hl-variable-builtin\">",
b"<span class=\"hl-variable-parameter\">",
];
/// Supported languages for syntax highlighting.
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum Language {
Rust,
Bash,
Json,
}
impl Language {
/// Parse a language identifier from a code fence.
pub fn from_fence(lang: &str) -> Option<Self> {
match lang.to_lowercase().as_str() {
"rust" | "rs" => Some(Language::Rust),
"bash" | "sh" | "shell" | "zsh" => Some(Language::Bash),
"json" => Some(Language::Json),
_ => None,
}
}
}
/// Get highlight configuration for a language.
fn get_config(lang: Language) -> HighlightConfiguration {
let (language, name, highlights) = match lang {
Language::Rust => (
tree_sitter_rust::LANGUAGE.into(),
"rust",
tree_sitter_rust::HIGHLIGHTS_QUERY,
),
Language::Bash => (
tree_sitter_bash::LANGUAGE.into(),
"bash",
tree_sitter_bash::HIGHLIGHT_QUERY,
),
Language::Json => (
tree_sitter_json::LANGUAGE.into(),
"json",
tree_sitter_json::HIGHLIGHTS_QUERY,
),
};
let mut config = HighlightConfiguration::new(language, name, highlights, "", "")
.expect("highlight query should be valid");
config.configure(HIGHLIGHT_NAMES);
config
}
/// Highlight source code and return HTML with span elements.
pub fn highlight_code(lang: Language, source: &str) -> String {
let mut highlighter = TSHighlighter::new();
let config = get_config(lang);
let highlights = match highlighter.highlight(&config, source.as_bytes(), None, |_| None) {
Ok(h) => h,
Err(_) => return html_escape(source),
};
let mut renderer = HtmlRenderer::new();
let result = renderer.render(highlights, source.as_bytes(), &|highlight| {
HTML_ATTRS.get(highlight.0).copied().unwrap_or(b"<span>")
});
match result {
Ok(()) => String::from_utf8_lossy(&renderer.html).into_owned(),
Err(_) => html_escape(source),
}
}
/// Simple HTML escape for fallback.
fn html_escape(s: &str) -> String {
s.replace('&', "&amp;")
.replace('<', "&lt;")
.replace('>', "&gt;")
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_language_from_fence() {
assert_eq!(Language::from_fence("rust"), Some(Language::Rust));
assert_eq!(Language::from_fence("rs"), Some(Language::Rust));
assert_eq!(Language::from_fence("bash"), Some(Language::Bash));
assert_eq!(Language::from_fence("sh"), Some(Language::Bash));
assert_eq!(Language::from_fence("json"), Some(Language::Json));
assert_eq!(Language::from_fence("unknown"), None);
}
#[test]
fn test_highlight_rust_code() {
let code = "fn main() { println!(\"hello\"); }";
let html = highlight_code(Language::Rust, code);
// Should contain span elements with highlight classes
assert!(html.contains("<span class=\"hl-"));
// Should contain the keyword "fn"
assert!(html.contains("fn"));
// Should contain the string
assert!(html.contains("hello"));
}
#[test]
fn test_highlight_bash_code() {
let code = "#!/bin/bash\necho \"hello world\"";
let html = highlight_code(Language::Bash, code);
assert!(html.contains("<span class=\"hl-"));
assert!(html.contains("echo"));
}
#[test]
fn test_html_escape_fallback() {
let escaped = html_escape("<script>alert('xss')</script>");
assert!(!escaped.contains('<'));
assert!(escaped.contains("&lt;"));
}
}

View File

@@ -4,6 +4,7 @@
mod content; mod content;
mod error; mod error;
mod highlight;
mod render; mod render;
mod templates; mod templates;