feat: add syntax highlighting for 9 additional languages

- Cargo.toml: Add tree-sitter grammars for Nix, Python, JavaScript,
  TypeScript, Go, C, CSS, HTML, YAML. Upgrade tree-sitter-highlight
  to 0.26 for language version 15 compatibility.

- src/highlight.rs: Add Language enum variants and get_config()
  match arms for all new languages. Update render() callback for
  0.26 API (writes attributes to buffer). Add tests for Nix and
  Python highlighting.

TOML excluded due to incompatible API (tree-sitter 0.20 vs 0.26).
This commit is contained in:
Timothy DeHerrera
2026-01-25 17:20:00 -07:00
parent a73359098e
commit acb0ff3e15
3 changed files with 234 additions and 45 deletions

138
Cargo.lock generated
View File

@@ -511,12 +511,21 @@ dependencies = [
"maud", "maud",
"pulldown-cmark", "pulldown-cmark",
"serde", "serde",
"thiserror 2.0.18", "thiserror",
"toml 0.8.23", "toml 0.8.23",
"tree-sitter-bash", "tree-sitter-bash",
"tree-sitter-c",
"tree-sitter-css",
"tree-sitter-go",
"tree-sitter-highlight", "tree-sitter-highlight",
"tree-sitter-html",
"tree-sitter-javascript",
"tree-sitter-json", "tree-sitter-json",
"tree-sitter-nix",
"tree-sitter-python",
"tree-sitter-rust", "tree-sitter-rust",
"tree-sitter-typescript",
"tree-sitter-yaml",
"walkdir", "walkdir",
] ]
@@ -926,6 +935,7 @@ version = "1.0.149"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86"
dependencies = [ dependencies = [
"indexmap",
"itoa", "itoa",
"memchr", "memchr",
"serde", "serde",
@@ -1009,33 +1019,13 @@ version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369"
[[package]]
name = "thiserror"
version = "1.0.69"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52"
dependencies = [
"thiserror-impl 1.0.69",
]
[[package]] [[package]]
name = "thiserror" name = "thiserror"
version = "2.0.18" version = "2.0.18"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4"
dependencies = [ dependencies = [
"thiserror-impl 2.0.18", "thiserror-impl",
]
[[package]]
name = "thiserror-impl"
version = "1.0.69"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.114",
] ]
[[package]] [[package]]
@@ -1116,13 +1106,14 @@ checksum = "5d99f8c9a7727884afe522e9bd5edbfc91a3312b36a77b5fb8926e4c31a41801"
[[package]] [[package]]
name = "tree-sitter" name = "tree-sitter"
version = "0.24.7" version = "0.26.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a5387dffa7ffc7d2dae12b50c6f7aab8ff79d6210147c6613561fc3d474c6f75" checksum = "974d205cc395652cfa8b37daa053fe56eebd429acf8dc055503fee648dae981e"
dependencies = [ dependencies = [
"cc", "cc",
"regex", "regex",
"regex-syntax", "regex-syntax",
"serde_json",
"streaming-iterator", "streaming-iterator",
"tree-sitter-language", "tree-sitter-language",
] ]
@@ -1138,18 +1129,67 @@ dependencies = [
] ]
[[package]] [[package]]
name = "tree-sitter-highlight" name = "tree-sitter-c"
version = "0.24.7" version = "0.24.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6411813e4a9ebc87d391b98b0f3ce65d5361cd80c54de8651d8b85b555ea5d95" checksum = "1a3aad8f0129083a59fe8596157552d2bb7148c492d44c21558d68ca1c722707"
dependencies = [
"cc",
"tree-sitter-language",
]
[[package]]
name = "tree-sitter-css"
version = "0.25.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a5cbc5e18f29a2c6d6435891f42569525cf95435a3e01c2f1947abcde178686f"
dependencies = [
"cc",
"tree-sitter-language",
]
[[package]]
name = "tree-sitter-go"
version = "0.25.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c8560a4d2f835cc0d4d2c2e03cbd0dde2f6114b43bc491164238d333e28b16ea"
dependencies = [
"cc",
"tree-sitter-language",
]
[[package]]
name = "tree-sitter-highlight"
version = "0.26.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bb0636662a03005d9289649e0b4a89ff37b75df5033e8d4a16398740ae6496d2"
dependencies = [ dependencies = [
"lazy_static",
"regex", "regex",
"streaming-iterator", "streaming-iterator",
"thiserror 1.0.69", "thiserror",
"tree-sitter", "tree-sitter",
] ]
[[package]]
name = "tree-sitter-html"
version = "0.23.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "261b708e5d92061ede329babaaa427b819329a9d427a1d710abb0f67bbef63ee"
dependencies = [
"cc",
"tree-sitter-language",
]
[[package]]
name = "tree-sitter-javascript"
version = "0.25.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "68204f2abc0627a90bdf06e605f5c470aa26fdcb2081ea553a04bdad756693f5"
dependencies = [
"cc",
"tree-sitter-language",
]
[[package]] [[package]]
name = "tree-sitter-json" name = "tree-sitter-json"
version = "0.24.8" version = "0.24.8"
@@ -1166,6 +1206,26 @@ version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4ae62f7eae5eb549c71b76658648b72cc6111f2d87d24a1e31fa907f4943e3ce" checksum = "4ae62f7eae5eb549c71b76658648b72cc6111f2d87d24a1e31fa907f4943e3ce"
[[package]]
name = "tree-sitter-nix"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4952a9733f3a98f6683a0ccd1035d84ab7a52f7e84eeed58548d86765ad92de3"
dependencies = [
"cc",
"tree-sitter-language",
]
[[package]]
name = "tree-sitter-python"
version = "0.25.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6bf85fd39652e740bf60f46f4cda9492c3a9ad75880575bf14960f775cb74a1c"
dependencies = [
"cc",
"tree-sitter-language",
]
[[package]] [[package]]
name = "tree-sitter-rust" name = "tree-sitter-rust"
version = "0.23.3" version = "0.23.3"
@@ -1176,6 +1236,26 @@ dependencies = [
"tree-sitter-language", "tree-sitter-language",
] ]
[[package]]
name = "tree-sitter-typescript"
version = "0.23.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c5f76ed8d947a75cc446d5fccd8b602ebf0cde64ccf2ffa434d873d7a575eff"
dependencies = [
"cc",
"tree-sitter-language",
]
[[package]]
name = "tree-sitter-yaml"
version = "0.7.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "53c223db85f05e34794f065454843b0668ebc15d240ada63e2b5939f43ce7c97"
dependencies = [
"cc",
"tree-sitter-language",
]
[[package]] [[package]]
name = "unicase" name = "unicase"
version = "2.9.0" version = "2.9.0"

View File

@@ -13,10 +13,19 @@ thiserror = "2"
walkdir = "2" walkdir = "2"
# Syntax highlighting # Syntax highlighting
tree-sitter-bash = "0.23" tree-sitter-bash = "0.23"
tree-sitter-highlight = "0.24" tree-sitter-c = "0.24"
tree-sitter-json = "0.24" tree-sitter-css = "0.25"
tree-sitter-rust = "0.23" tree-sitter-go = "0.25"
tree-sitter-highlight = "0.26"
tree-sitter-html = "0.23"
tree-sitter-javascript = "0.25"
tree-sitter-json = "0.24"
tree-sitter-nix = "0.3"
tree-sitter-python = "0.25"
tree-sitter-rust = "0.23"
tree-sitter-typescript = "0.23"
tree-sitter-yaml = "0.7"
# CSS processing # CSS processing
lightningcss = "1.0.0-alpha.70" lightningcss = "1.0.0-alpha.70"

View File

@@ -56,18 +56,36 @@ const HTML_ATTRS: &[&[u8]] = &[
/// Supported languages for syntax highlighting. /// Supported languages for syntax highlighting.
#[derive(Debug, Clone, Copy, PartialEq)] #[derive(Debug, Clone, Copy, PartialEq)]
pub enum Language { pub enum Language {
Rust,
Bash, Bash,
C,
Css,
Go,
Html,
JavaScript,
Json, Json,
Nix,
Python,
Rust,
TypeScript,
Yaml,
} }
impl Language { impl Language {
/// Parse a language identifier from a code fence. /// Parse a language identifier from a code fence.
pub fn from_fence(lang: &str) -> Option<Self> { pub fn from_fence(lang: &str) -> Option<Self> {
match lang.to_lowercase().as_str() { match lang.to_lowercase().as_str() {
"rust" | "rs" => Some(Language::Rust),
"bash" | "sh" | "shell" | "zsh" => Some(Language::Bash), "bash" | "sh" | "shell" | "zsh" => Some(Language::Bash),
"c" => Some(Language::C),
"css" => Some(Language::Css),
"go" | "golang" => Some(Language::Go),
"html" => Some(Language::Html),
"javascript" | "js" => Some(Language::JavaScript),
"json" => Some(Language::Json), "json" => Some(Language::Json),
"nix" => Some(Language::Nix),
"python" | "py" => Some(Language::Python),
"rust" | "rs" => Some(Language::Rust),
"typescript" | "ts" | "tsx" => Some(Language::TypeScript),
"yaml" | "yml" => Some(Language::Yaml),
_ => None, _ => None,
} }
} }
@@ -76,21 +94,66 @@ impl Language {
/// Get highlight configuration for a language. /// Get highlight configuration for a language.
fn get_config(lang: Language) -> HighlightConfiguration { fn get_config(lang: Language) -> HighlightConfiguration {
let (language, name, highlights) = match lang { let (language, name, highlights) = match lang {
Language::Rust => (
tree_sitter_rust::LANGUAGE.into(),
"rust",
tree_sitter_rust::HIGHLIGHTS_QUERY,
),
Language::Bash => ( Language::Bash => (
tree_sitter_bash::LANGUAGE.into(), tree_sitter_bash::LANGUAGE.into(),
"bash", "bash",
tree_sitter_bash::HIGHLIGHT_QUERY, tree_sitter_bash::HIGHLIGHT_QUERY,
), ),
Language::C => (
tree_sitter_c::LANGUAGE.into(),
"c",
tree_sitter_c::HIGHLIGHT_QUERY,
),
Language::Css => (
tree_sitter_css::LANGUAGE.into(),
"css",
tree_sitter_css::HIGHLIGHTS_QUERY,
),
Language::Go => (
tree_sitter_go::LANGUAGE.into(),
"go",
tree_sitter_go::HIGHLIGHTS_QUERY,
),
Language::Html => (
tree_sitter_html::LANGUAGE.into(),
"html",
tree_sitter_html::HIGHLIGHTS_QUERY,
),
Language::JavaScript => (
tree_sitter_javascript::LANGUAGE.into(),
"javascript",
tree_sitter_javascript::HIGHLIGHT_QUERY,
),
Language::Json => ( Language::Json => (
tree_sitter_json::LANGUAGE.into(), tree_sitter_json::LANGUAGE.into(),
"json", "json",
tree_sitter_json::HIGHLIGHTS_QUERY, tree_sitter_json::HIGHLIGHTS_QUERY,
), ),
Language::Nix => (
tree_sitter_nix::LANGUAGE.into(),
"nix",
tree_sitter_nix::HIGHLIGHTS_QUERY,
),
Language::Python => (
tree_sitter_python::LANGUAGE.into(),
"python",
tree_sitter_python::HIGHLIGHTS_QUERY,
),
Language::Rust => (
tree_sitter_rust::LANGUAGE.into(),
"rust",
tree_sitter_rust::HIGHLIGHTS_QUERY,
),
Language::TypeScript => (
tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into(),
"typescript",
tree_sitter_typescript::HIGHLIGHTS_QUERY,
),
Language::Yaml => (
tree_sitter_yaml::LANGUAGE.into(),
"yaml",
tree_sitter_yaml::HIGHLIGHTS_QUERY,
),
}; };
let mut config = HighlightConfiguration::new(language, name, highlights, "", "") let mut config = HighlightConfiguration::new(language, name, highlights, "", "")
@@ -111,8 +174,9 @@ pub fn highlight_code(lang: Language, source: &str) -> String {
}; };
let mut renderer = HtmlRenderer::new(); let mut renderer = HtmlRenderer::new();
let result = renderer.render(highlights, source.as_bytes(), &|highlight| { let result = renderer.render(highlights, source.as_bytes(), &|highlight, buf| {
HTML_ATTRS.get(highlight.0).copied().unwrap_or(b"<span>") let attrs = HTML_ATTRS.get(highlight.0).copied().unwrap_or(b"");
buf.extend_from_slice(attrs);
}); });
match result { match result {
@@ -139,6 +203,27 @@ mod tests {
assert_eq!(Language::from_fence("bash"), Some(Language::Bash)); assert_eq!(Language::from_fence("bash"), Some(Language::Bash));
assert_eq!(Language::from_fence("sh"), Some(Language::Bash)); assert_eq!(Language::from_fence("sh"), Some(Language::Bash));
assert_eq!(Language::from_fence("json"), Some(Language::Json)); assert_eq!(Language::from_fence("json"), Some(Language::Json));
assert_eq!(Language::from_fence("nix"), Some(Language::Nix));
assert_eq!(Language::from_fence("python"), Some(Language::Python));
assert_eq!(Language::from_fence("py"), Some(Language::Python));
assert_eq!(
Language::from_fence("javascript"),
Some(Language::JavaScript)
);
assert_eq!(Language::from_fence("js"), Some(Language::JavaScript));
assert_eq!(
Language::from_fence("typescript"),
Some(Language::TypeScript)
);
assert_eq!(Language::from_fence("ts"), Some(Language::TypeScript));
assert_eq!(Language::from_fence("tsx"), Some(Language::TypeScript));
assert_eq!(Language::from_fence("go"), Some(Language::Go));
assert_eq!(Language::from_fence("golang"), Some(Language::Go));
assert_eq!(Language::from_fence("c"), Some(Language::C));
assert_eq!(Language::from_fence("yaml"), Some(Language::Yaml));
assert_eq!(Language::from_fence("yml"), Some(Language::Yaml));
assert_eq!(Language::from_fence("css"), Some(Language::Css));
assert_eq!(Language::from_fence("html"), Some(Language::Html));
assert_eq!(Language::from_fence("unknown"), None); assert_eq!(Language::from_fence("unknown"), None);
} }
@@ -147,11 +232,8 @@ mod tests {
let code = "fn main() { println!(\"hello\"); }"; let code = "fn main() { println!(\"hello\"); }";
let html = highlight_code(Language::Rust, code); let html = highlight_code(Language::Rust, code);
// Should contain span elements with highlight classes
assert!(html.contains("class=\"hl-")); assert!(html.contains("class=\"hl-"));
// Should contain the keyword "fn"
assert!(html.contains("fn")); assert!(html.contains("fn"));
// Should contain the string
assert!(html.contains("hello")); assert!(html.contains("hello"));
} }
@@ -164,6 +246,24 @@ mod tests {
assert!(html.contains("echo")); assert!(html.contains("echo"));
} }
#[test]
fn test_highlight_nix_code() {
let code = "{ pkgs, ... }: { environment.systemPackages = [ pkgs.vim ]; }";
let html = highlight_code(Language::Nix, code);
assert!(html.contains("class=\"hl-"));
assert!(html.contains("pkgs"));
}
#[test]
fn test_highlight_python_code() {
let code = "def hello():\n print(\"world\")";
let html = highlight_code(Language::Python, code);
assert!(html.contains("class=\"hl-"));
assert!(html.contains("def"));
}
#[test] #[test]
fn test_html_escape_fallback() { fn test_html_escape_fallback() {
let escaped = html_escape("<script>alert('xss')</script>"); let escaped = html_escape("<script>alert('xss')</script>");