feat: add tree-sitter syntax highlighting module
- Cargo.toml: Add tree-sitter-highlight + grammar crates (rust, bash, json). TOML dropped due to API incompatibility. - src/highlight.rs: Language enum, highlight_code() function, 4 unit tests covering parsing and HTML generation. - Uses static HTML_ATTRS array for zero-allocation rendering.
This commit is contained in:
162
Cargo.lock
generated
162
Cargo.lock
generated
@@ -14,6 +14,15 @@ dependencies = [
|
||||
"zerocopy",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "aho-corasick"
|
||||
version = "1.1.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "allocator-api2"
|
||||
version = "0.2.21"
|
||||
@@ -32,6 +41,16 @@ version = "2.10.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3"
|
||||
|
||||
[[package]]
|
||||
name = "cc"
|
||||
version = "1.2.54"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6354c81bbfd62d9cfa9cb3c773c2b7b2a3a482d569de977fd0e961f6e7c00583"
|
||||
dependencies = [
|
||||
"find-msvc-tools",
|
||||
"shlex",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cfg-if"
|
||||
version = "1.0.4"
|
||||
@@ -47,6 +66,12 @@ dependencies = [
|
||||
"cfg-if",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "find-msvc-tools"
|
||||
version = "0.1.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8591b0bcc8a98a64310a2fae1bb3e9b8564dd10e381e6e28010fde8e8e8568db"
|
||||
|
||||
[[package]]
|
||||
name = "getopts"
|
||||
version = "0.2.24"
|
||||
@@ -93,6 +118,12 @@ version = "1.0.17"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2"
|
||||
|
||||
[[package]]
|
||||
name = "lazy_static"
|
||||
version = "1.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
|
||||
|
||||
[[package]]
|
||||
name = "maud"
|
||||
version = "0.26.0"
|
||||
@@ -128,7 +159,11 @@ dependencies = [
|
||||
"gray_matter",
|
||||
"maud",
|
||||
"pulldown-cmark",
|
||||
"thiserror",
|
||||
"thiserror 2.0.18",
|
||||
"tree-sitter-bash",
|
||||
"tree-sitter-highlight",
|
||||
"tree-sitter-json",
|
||||
"tree-sitter-rust",
|
||||
"walkdir",
|
||||
]
|
||||
|
||||
@@ -198,6 +233,35 @@ dependencies = [
|
||||
"proc-macro2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex"
|
||||
version = "1.12.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"memchr",
|
||||
"regex-automata",
|
||||
"regex-syntax",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-automata"
|
||||
version = "0.4.13"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"memchr",
|
||||
"regex-syntax",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-syntax"
|
||||
version = "0.8.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58"
|
||||
|
||||
[[package]]
|
||||
name = "same-file"
|
||||
version = "1.0.6"
|
||||
@@ -250,6 +314,18 @@ dependencies = [
|
||||
"zmij",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "shlex"
|
||||
version = "1.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
|
||||
|
||||
[[package]]
|
||||
name = "streaming-iterator"
|
||||
version = "0.1.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2b2231b7c3057d5e4ad0156fb3dc807d900806020c5ffa3ee6ff2c8c76fb8520"
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "2.0.114"
|
||||
@@ -261,13 +337,33 @@ dependencies = [
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thiserror"
|
||||
version = "1.0.69"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52"
|
||||
dependencies = [
|
||||
"thiserror-impl 1.0.69",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thiserror"
|
||||
version = "2.0.18"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4"
|
||||
dependencies = [
|
||||
"thiserror-impl",
|
||||
"thiserror-impl 2.0.18",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thiserror-impl"
|
||||
version = "1.0.69"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -290,6 +386,68 @@ dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tree-sitter"
|
||||
version = "0.24.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a5387dffa7ffc7d2dae12b50c6f7aab8ff79d6210147c6613561fc3d474c6f75"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"regex",
|
||||
"regex-syntax",
|
||||
"streaming-iterator",
|
||||
"tree-sitter-language",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tree-sitter-bash"
|
||||
version = "0.23.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "329a4d48623ac337d42b1df84e81a1c9dbb2946907c102ca72db158c1964a52e"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"tree-sitter-language",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tree-sitter-highlight"
|
||||
version = "0.24.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6411813e4a9ebc87d391b98b0f3ce65d5361cd80c54de8651d8b85b555ea5d95"
|
||||
dependencies = [
|
||||
"lazy_static",
|
||||
"regex",
|
||||
"streaming-iterator",
|
||||
"thiserror 1.0.69",
|
||||
"tree-sitter",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tree-sitter-json"
|
||||
version = "0.24.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4d727acca406c0020cffc6cf35516764f36c8e3dc4408e5ebe2cb35a947ec471"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"tree-sitter-language",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tree-sitter-language"
|
||||
version = "0.1.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4ae62f7eae5eb549c71b76658648b72cc6111f2d87d24a1e31fa907f4943e3ce"
|
||||
|
||||
[[package]]
|
||||
name = "tree-sitter-rust"
|
||||
version = "0.23.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ca8ccb3e3a3495c8a943f6c3fd24c3804c471fd7f4f16087623c7fa4c0068e8a"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"tree-sitter-language",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicase"
|
||||
version = "2.9.0"
|
||||
|
||||
@@ -11,3 +11,9 @@ maud = "0.26"
|
||||
pulldown-cmark = "0.12"
|
||||
thiserror = "2"
|
||||
walkdir = "2"
|
||||
|
||||
# Syntax highlighting
|
||||
tree-sitter-bash = "0.23"
|
||||
tree-sitter-highlight = "0.24"
|
||||
tree-sitter-json = "0.24"
|
||||
tree-sitter-rust = "0.23"
|
||||
|
||||
172
src/highlight.rs
Normal file
172
src/highlight.rs
Normal file
@@ -0,0 +1,172 @@
|
||||
//! Syntax highlighting via tree-sitter.
|
||||
|
||||
use tree_sitter_highlight::{HighlightConfiguration, Highlighter as TSHighlighter, HtmlRenderer};
|
||||
|
||||
/// Recognized highlight names mapped to CSS classes.
|
||||
/// Order matters: index becomes the class name suffix.
|
||||
const HIGHLIGHT_NAMES: &[&str] = &[
|
||||
"attribute",
|
||||
"comment",
|
||||
"constant",
|
||||
"constant.builtin",
|
||||
"constructor",
|
||||
"function",
|
||||
"function.builtin",
|
||||
"keyword",
|
||||
"number",
|
||||
"operator",
|
||||
"property",
|
||||
"punctuation",
|
||||
"punctuation.bracket",
|
||||
"punctuation.delimiter",
|
||||
"string",
|
||||
"type",
|
||||
"type.builtin",
|
||||
"variable",
|
||||
"variable.builtin",
|
||||
"variable.parameter",
|
||||
];
|
||||
|
||||
/// Static HTML attributes for each highlight class.
|
||||
/// Pre-computed to avoid allocations in the render loop.
|
||||
const HTML_ATTRS: &[&[u8]] = &[
|
||||
b"<span class=\"hl-attribute\">",
|
||||
b"<span class=\"hl-comment\">",
|
||||
b"<span class=\"hl-constant\">",
|
||||
b"<span class=\"hl-constant-builtin\">",
|
||||
b"<span class=\"hl-constructor\">",
|
||||
b"<span class=\"hl-function\">",
|
||||
b"<span class=\"hl-function-builtin\">",
|
||||
b"<span class=\"hl-keyword\">",
|
||||
b"<span class=\"hl-number\">",
|
||||
b"<span class=\"hl-operator\">",
|
||||
b"<span class=\"hl-property\">",
|
||||
b"<span class=\"hl-punctuation\">",
|
||||
b"<span class=\"hl-punctuation-bracket\">",
|
||||
b"<span class=\"hl-punctuation-delimiter\">",
|
||||
b"<span class=\"hl-string\">",
|
||||
b"<span class=\"hl-type\">",
|
||||
b"<span class=\"hl-type-builtin\">",
|
||||
b"<span class=\"hl-variable\">",
|
||||
b"<span class=\"hl-variable-builtin\">",
|
||||
b"<span class=\"hl-variable-parameter\">",
|
||||
];
|
||||
|
||||
/// Supported languages for syntax highlighting.
|
||||
#[derive(Debug, Clone, Copy, PartialEq)]
|
||||
pub enum Language {
|
||||
Rust,
|
||||
Bash,
|
||||
Json,
|
||||
}
|
||||
|
||||
impl Language {
|
||||
/// Parse a language identifier from a code fence.
|
||||
pub fn from_fence(lang: &str) -> Option<Self> {
|
||||
match lang.to_lowercase().as_str() {
|
||||
"rust" | "rs" => Some(Language::Rust),
|
||||
"bash" | "sh" | "shell" | "zsh" => Some(Language::Bash),
|
||||
"json" => Some(Language::Json),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Get highlight configuration for a language.
|
||||
fn get_config(lang: Language) -> HighlightConfiguration {
|
||||
let (language, name, highlights) = match lang {
|
||||
Language::Rust => (
|
||||
tree_sitter_rust::LANGUAGE.into(),
|
||||
"rust",
|
||||
tree_sitter_rust::HIGHLIGHTS_QUERY,
|
||||
),
|
||||
Language::Bash => (
|
||||
tree_sitter_bash::LANGUAGE.into(),
|
||||
"bash",
|
||||
tree_sitter_bash::HIGHLIGHT_QUERY,
|
||||
),
|
||||
Language::Json => (
|
||||
tree_sitter_json::LANGUAGE.into(),
|
||||
"json",
|
||||
tree_sitter_json::HIGHLIGHTS_QUERY,
|
||||
),
|
||||
};
|
||||
|
||||
let mut config = HighlightConfiguration::new(language, name, highlights, "", "")
|
||||
.expect("highlight query should be valid");
|
||||
|
||||
config.configure(HIGHLIGHT_NAMES);
|
||||
config
|
||||
}
|
||||
|
||||
/// Highlight source code and return HTML with span elements.
|
||||
pub fn highlight_code(lang: Language, source: &str) -> String {
|
||||
let mut highlighter = TSHighlighter::new();
|
||||
let config = get_config(lang);
|
||||
|
||||
let highlights = match highlighter.highlight(&config, source.as_bytes(), None, |_| None) {
|
||||
Ok(h) => h,
|
||||
Err(_) => return html_escape(source),
|
||||
};
|
||||
|
||||
let mut renderer = HtmlRenderer::new();
|
||||
let result = renderer.render(highlights, source.as_bytes(), &|highlight| {
|
||||
HTML_ATTRS.get(highlight.0).copied().unwrap_or(b"<span>")
|
||||
});
|
||||
|
||||
match result {
|
||||
Ok(()) => String::from_utf8_lossy(&renderer.html).into_owned(),
|
||||
Err(_) => html_escape(source),
|
||||
}
|
||||
}
|
||||
|
||||
/// Simple HTML escape for fallback.
|
||||
fn html_escape(s: &str) -> String {
|
||||
s.replace('&', "&")
|
||||
.replace('<', "<")
|
||||
.replace('>', ">")
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_language_from_fence() {
|
||||
assert_eq!(Language::from_fence("rust"), Some(Language::Rust));
|
||||
assert_eq!(Language::from_fence("rs"), Some(Language::Rust));
|
||||
assert_eq!(Language::from_fence("bash"), Some(Language::Bash));
|
||||
assert_eq!(Language::from_fence("sh"), Some(Language::Bash));
|
||||
assert_eq!(Language::from_fence("json"), Some(Language::Json));
|
||||
assert_eq!(Language::from_fence("unknown"), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_highlight_rust_code() {
|
||||
let code = "fn main() { println!(\"hello\"); }";
|
||||
let html = highlight_code(Language::Rust, code);
|
||||
|
||||
// Should contain span elements with highlight classes
|
||||
assert!(html.contains("<span class=\"hl-"));
|
||||
// Should contain the keyword "fn"
|
||||
assert!(html.contains("fn"));
|
||||
// Should contain the string
|
||||
assert!(html.contains("hello"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_highlight_bash_code() {
|
||||
let code = "#!/bin/bash\necho \"hello world\"";
|
||||
let html = highlight_code(Language::Bash, code);
|
||||
|
||||
assert!(html.contains("<span class=\"hl-"));
|
||||
assert!(html.contains("echo"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_html_escape_fallback() {
|
||||
let escaped = html_escape("<script>alert('xss')</script>");
|
||||
assert!(!escaped.contains('<'));
|
||||
assert!(escaped.contains("<"));
|
||||
}
|
||||
}
|
||||
@@ -4,6 +4,7 @@
|
||||
|
||||
mod content;
|
||||
mod error;
|
||||
mod highlight;
|
||||
mod render;
|
||||
mod templates;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user