From ba5e97dfb7821ba773e510d75ac6088ee6aab04e Mon Sep 17 00:00:00 2001 From: Timothy DeHerrera Date: Sat, 24 Jan 2026 20:38:02 -0700 Subject: [PATCH] feat: add tree-sitter syntax highlighting module - Cargo.toml: Add tree-sitter-highlight + grammar crates (rust, bash, json). TOML dropped due to API incompatibility. - src/highlight.rs: Language enum, highlight_code() function, 4 unit tests covering parsing and HTML generation. - Uses static HTML_ATTRS array for zero-allocation rendering. --- Cargo.lock | 162 +++++++++++++++++++++++++++++++++++++++++++- Cargo.toml | 6 ++ src/highlight.rs | 172 +++++++++++++++++++++++++++++++++++++++++++++++ src/main.rs | 1 + 4 files changed, 339 insertions(+), 2 deletions(-) create mode 100644 src/highlight.rs diff --git a/Cargo.lock b/Cargo.lock index b255a40..918e2f3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -14,6 +14,15 @@ dependencies = [ "zerocopy", ] +[[package]] +name = "aho-corasick" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" +dependencies = [ + "memchr", +] + [[package]] name = "allocator-api2" version = "0.2.21" @@ -32,6 +41,16 @@ version = "2.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3" +[[package]] +name = "cc" +version = "1.2.54" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6354c81bbfd62d9cfa9cb3c773c2b7b2a3a482d569de977fd0e961f6e7c00583" +dependencies = [ + "find-msvc-tools", + "shlex", +] + [[package]] name = "cfg-if" version = "1.0.4" @@ -47,6 +66,12 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "find-msvc-tools" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8591b0bcc8a98a64310a2fae1bb3e9b8564dd10e381e6e28010fde8e8e8568db" + [[package]] name = "getopts" version = "0.2.24" @@ -93,6 +118,12 @@ version = "1.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + [[package]] name = "maud" version = "0.26.0" @@ -128,7 +159,11 @@ dependencies = [ "gray_matter", "maud", "pulldown-cmark", - "thiserror", + "thiserror 2.0.18", + "tree-sitter-bash", + "tree-sitter-highlight", + "tree-sitter-json", + "tree-sitter-rust", "walkdir", ] @@ -198,6 +233,35 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "regex" +version = "1.12.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" + [[package]] name = "same-file" version = "1.0.6" @@ -250,6 +314,18 @@ dependencies = [ "zmij", ] +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "streaming-iterator" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b2231b7c3057d5e4ad0156fb3dc807d900806020c5ffa3ee6ff2c8c76fb8520" + [[package]] name = "syn" version = "2.0.114" @@ -261,13 +337,33 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "thiserror" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" +dependencies = [ + "thiserror-impl 1.0.69", +] + [[package]] name = "thiserror" version = "2.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" dependencies = [ - "thiserror-impl", + "thiserror-impl 2.0.18", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" +dependencies = [ + "proc-macro2", + "quote", + "syn", ] [[package]] @@ -290,6 +386,68 @@ dependencies = [ "serde", ] +[[package]] +name = "tree-sitter" +version = "0.24.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5387dffa7ffc7d2dae12b50c6f7aab8ff79d6210147c6613561fc3d474c6f75" +dependencies = [ + "cc", + "regex", + "regex-syntax", + "streaming-iterator", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-bash" +version = "0.23.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "329a4d48623ac337d42b1df84e81a1c9dbb2946907c102ca72db158c1964a52e" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-highlight" +version = "0.24.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6411813e4a9ebc87d391b98b0f3ce65d5361cd80c54de8651d8b85b555ea5d95" +dependencies = [ + "lazy_static", + "regex", + "streaming-iterator", + "thiserror 1.0.69", + "tree-sitter", +] + +[[package]] +name = "tree-sitter-json" +version = "0.24.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4d727acca406c0020cffc6cf35516764f36c8e3dc4408e5ebe2cb35a947ec471" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-language" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ae62f7eae5eb549c71b76658648b72cc6111f2d87d24a1e31fa907f4943e3ce" + +[[package]] +name = "tree-sitter-rust" +version = "0.23.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca8ccb3e3a3495c8a943f6c3fd24c3804c471fd7f4f16087623c7fa4c0068e8a" +dependencies = [ + "cc", + "tree-sitter-language", +] + [[package]] name = "unicase" version = "2.9.0" diff --git a/Cargo.toml b/Cargo.toml index 555c4c7..065f000 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,3 +11,9 @@ maud = "0.26" pulldown-cmark = "0.12" thiserror = "2" walkdir = "2" + +# Syntax highlighting +tree-sitter-bash = "0.23" +tree-sitter-highlight = "0.24" +tree-sitter-json = "0.24" +tree-sitter-rust = "0.23" diff --git a/src/highlight.rs b/src/highlight.rs new file mode 100644 index 0000000..0309fe8 --- /dev/null +++ b/src/highlight.rs @@ -0,0 +1,172 @@ +//! Syntax highlighting via tree-sitter. + +use tree_sitter_highlight::{HighlightConfiguration, Highlighter as TSHighlighter, HtmlRenderer}; + +/// Recognized highlight names mapped to CSS classes. +/// Order matters: index becomes the class name suffix. +const HIGHLIGHT_NAMES: &[&str] = &[ + "attribute", + "comment", + "constant", + "constant.builtin", + "constructor", + "function", + "function.builtin", + "keyword", + "number", + "operator", + "property", + "punctuation", + "punctuation.bracket", + "punctuation.delimiter", + "string", + "type", + "type.builtin", + "variable", + "variable.builtin", + "variable.parameter", +]; + +/// Static HTML attributes for each highlight class. +/// Pre-computed to avoid allocations in the render loop. +const HTML_ATTRS: &[&[u8]] = &[ + b"", + b"", + b"", + b"", + b"", + b"", + b"", + b"", + b"", + b"", + b"", + b"", + b"", + b"", + b"", + b"", + b"", + b"", + b"", + b"", +]; + +/// Supported languages for syntax highlighting. +#[derive(Debug, Clone, Copy, PartialEq)] +pub enum Language { + Rust, + Bash, + Json, +} + +impl Language { + /// Parse a language identifier from a code fence. + pub fn from_fence(lang: &str) -> Option { + match lang.to_lowercase().as_str() { + "rust" | "rs" => Some(Language::Rust), + "bash" | "sh" | "shell" | "zsh" => Some(Language::Bash), + "json" => Some(Language::Json), + _ => None, + } + } +} + +/// Get highlight configuration for a language. +fn get_config(lang: Language) -> HighlightConfiguration { + let (language, name, highlights) = match lang { + Language::Rust => ( + tree_sitter_rust::LANGUAGE.into(), + "rust", + tree_sitter_rust::HIGHLIGHTS_QUERY, + ), + Language::Bash => ( + tree_sitter_bash::LANGUAGE.into(), + "bash", + tree_sitter_bash::HIGHLIGHT_QUERY, + ), + Language::Json => ( + tree_sitter_json::LANGUAGE.into(), + "json", + tree_sitter_json::HIGHLIGHTS_QUERY, + ), + }; + + let mut config = HighlightConfiguration::new(language, name, highlights, "", "") + .expect("highlight query should be valid"); + + config.configure(HIGHLIGHT_NAMES); + config +} + +/// Highlight source code and return HTML with span elements. +pub fn highlight_code(lang: Language, source: &str) -> String { + let mut highlighter = TSHighlighter::new(); + let config = get_config(lang); + + let highlights = match highlighter.highlight(&config, source.as_bytes(), None, |_| None) { + Ok(h) => h, + Err(_) => return html_escape(source), + }; + + let mut renderer = HtmlRenderer::new(); + let result = renderer.render(highlights, source.as_bytes(), &|highlight| { + HTML_ATTRS.get(highlight.0).copied().unwrap_or(b"") + }); + + match result { + Ok(()) => String::from_utf8_lossy(&renderer.html).into_owned(), + Err(_) => html_escape(source), + } +} + +/// Simple HTML escape for fallback. +fn html_escape(s: &str) -> String { + s.replace('&', "&") + .replace('<', "<") + .replace('>', ">") +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_language_from_fence() { + assert_eq!(Language::from_fence("rust"), Some(Language::Rust)); + assert_eq!(Language::from_fence("rs"), Some(Language::Rust)); + assert_eq!(Language::from_fence("bash"), Some(Language::Bash)); + assert_eq!(Language::from_fence("sh"), Some(Language::Bash)); + assert_eq!(Language::from_fence("json"), Some(Language::Json)); + assert_eq!(Language::from_fence("unknown"), None); + } + + #[test] + fn test_highlight_rust_code() { + let code = "fn main() { println!(\"hello\"); }"; + let html = highlight_code(Language::Rust, code); + + // Should contain span elements with highlight classes + assert!(html.contains("alert('xss')"); + assert!(!escaped.contains('<')); + assert!(escaped.contains("<")); + } +} diff --git a/src/main.rs b/src/main.rs index e4c8811..5df0c19 100644 --- a/src/main.rs +++ b/src/main.rs @@ -4,6 +4,7 @@ mod content; mod error; +mod highlight; mod render; mod templates;