feat: add tree-sitter syntax highlighting module

- Cargo.toml: Add tree-sitter-highlight + grammar crates
  (rust, bash, json). TOML dropped due to API incompatibility.
- src/highlight.rs: Language enum, highlight_code() function,
  4 unit tests covering parsing and HTML generation.
- Uses static HTML_ATTRS array for zero-allocation rendering.
This commit is contained in:
Timothy DeHerrera
2026-01-24 20:38:02 -07:00
parent 5317da94c4
commit ba5e97dfb7
4 changed files with 339 additions and 2 deletions

172
src/highlight.rs Normal file
View File

@@ -0,0 +1,172 @@
//! Syntax highlighting via tree-sitter.
use tree_sitter_highlight::{HighlightConfiguration, Highlighter as TSHighlighter, HtmlRenderer};
/// Recognized highlight names mapped to CSS classes.
/// Order matters: index becomes the class name suffix.
const HIGHLIGHT_NAMES: &[&str] = &[
"attribute",
"comment",
"constant",
"constant.builtin",
"constructor",
"function",
"function.builtin",
"keyword",
"number",
"operator",
"property",
"punctuation",
"punctuation.bracket",
"punctuation.delimiter",
"string",
"type",
"type.builtin",
"variable",
"variable.builtin",
"variable.parameter",
];
/// Static HTML attributes for each highlight class.
/// Pre-computed to avoid allocations in the render loop.
const HTML_ATTRS: &[&[u8]] = &[
b"<span class=\"hl-attribute\">",
b"<span class=\"hl-comment\">",
b"<span class=\"hl-constant\">",
b"<span class=\"hl-constant-builtin\">",
b"<span class=\"hl-constructor\">",
b"<span class=\"hl-function\">",
b"<span class=\"hl-function-builtin\">",
b"<span class=\"hl-keyword\">",
b"<span class=\"hl-number\">",
b"<span class=\"hl-operator\">",
b"<span class=\"hl-property\">",
b"<span class=\"hl-punctuation\">",
b"<span class=\"hl-punctuation-bracket\">",
b"<span class=\"hl-punctuation-delimiter\">",
b"<span class=\"hl-string\">",
b"<span class=\"hl-type\">",
b"<span class=\"hl-type-builtin\">",
b"<span class=\"hl-variable\">",
b"<span class=\"hl-variable-builtin\">",
b"<span class=\"hl-variable-parameter\">",
];
/// Supported languages for syntax highlighting.
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum Language {
Rust,
Bash,
Json,
}
impl Language {
/// Parse a language identifier from a code fence.
pub fn from_fence(lang: &str) -> Option<Self> {
match lang.to_lowercase().as_str() {
"rust" | "rs" => Some(Language::Rust),
"bash" | "sh" | "shell" | "zsh" => Some(Language::Bash),
"json" => Some(Language::Json),
_ => None,
}
}
}
/// Get highlight configuration for a language.
fn get_config(lang: Language) -> HighlightConfiguration {
let (language, name, highlights) = match lang {
Language::Rust => (
tree_sitter_rust::LANGUAGE.into(),
"rust",
tree_sitter_rust::HIGHLIGHTS_QUERY,
),
Language::Bash => (
tree_sitter_bash::LANGUAGE.into(),
"bash",
tree_sitter_bash::HIGHLIGHT_QUERY,
),
Language::Json => (
tree_sitter_json::LANGUAGE.into(),
"json",
tree_sitter_json::HIGHLIGHTS_QUERY,
),
};
let mut config = HighlightConfiguration::new(language, name, highlights, "", "")
.expect("highlight query should be valid");
config.configure(HIGHLIGHT_NAMES);
config
}
/// Highlight source code and return HTML with span elements.
pub fn highlight_code(lang: Language, source: &str) -> String {
let mut highlighter = TSHighlighter::new();
let config = get_config(lang);
let highlights = match highlighter.highlight(&config, source.as_bytes(), None, |_| None) {
Ok(h) => h,
Err(_) => return html_escape(source),
};
let mut renderer = HtmlRenderer::new();
let result = renderer.render(highlights, source.as_bytes(), &|highlight| {
HTML_ATTRS.get(highlight.0).copied().unwrap_or(b"<span>")
});
match result {
Ok(()) => String::from_utf8_lossy(&renderer.html).into_owned(),
Err(_) => html_escape(source),
}
}
/// Simple HTML escape for fallback.
fn html_escape(s: &str) -> String {
s.replace('&', "&amp;")
.replace('<', "&lt;")
.replace('>', "&gt;")
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_language_from_fence() {
assert_eq!(Language::from_fence("rust"), Some(Language::Rust));
assert_eq!(Language::from_fence("rs"), Some(Language::Rust));
assert_eq!(Language::from_fence("bash"), Some(Language::Bash));
assert_eq!(Language::from_fence("sh"), Some(Language::Bash));
assert_eq!(Language::from_fence("json"), Some(Language::Json));
assert_eq!(Language::from_fence("unknown"), None);
}
#[test]
fn test_highlight_rust_code() {
let code = "fn main() { println!(\"hello\"); }";
let html = highlight_code(Language::Rust, code);
// Should contain span elements with highlight classes
assert!(html.contains("<span class=\"hl-"));
// Should contain the keyword "fn"
assert!(html.contains("fn"));
// Should contain the string
assert!(html.contains("hello"));
}
#[test]
fn test_highlight_bash_code() {
let code = "#!/bin/bash\necho \"hello world\"";
let html = highlight_code(Language::Bash, code);
assert!(html.contains("<span class=\"hl-"));
assert!(html.contains("echo"));
}
#[test]
fn test_html_escape_fallback() {
let escaped = html_escape("<script>alert('xss')</script>");
assert!(!escaped.contains('<'));
assert!(escaped.contains("&lt;"));
}
}