feat(highlight): add markdown syntax highlighting with injection

Add tree-sitter-md for markdown parsing with injection support for
fenced code blocks. Code inside markdown code fences (```rust, ```bash,
etc.) is now fully syntax highlighted.

Key fix: Use `#set! injection.include-children` directive in the
injection query to override tree-sitter-md's internal tokenization
of code_fence_content, allowing proper language injection.

- Add tree-sitter-md v0.5.2 dependency
- Add Markdown variant to Language enum (md, markdown aliases)
- Create queries/md-highlights.scm (minimal markdown highlights)
- Create queries/md-injections.scm (with include-children directive)
- Add test: test_markdown_injection_rust
This commit is contained in:
Timothy DeHerrera
2026-01-31 16:41:11 -07:00
parent 69cd81621f
commit 8c54882118
6 changed files with 92 additions and 1 deletions

11
Cargo.lock generated
View File

@@ -1531,6 +1531,7 @@ dependencies = [
"tree-sitter-html", "tree-sitter-html",
"tree-sitter-javascript", "tree-sitter-javascript",
"tree-sitter-json", "tree-sitter-json",
"tree-sitter-md",
"tree-sitter-nix", "tree-sitter-nix",
"tree-sitter-python", "tree-sitter-python",
"tree-sitter-rust", "tree-sitter-rust",
@@ -1809,6 +1810,16 @@ version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4ae62f7eae5eb549c71b76658648b72cc6111f2d87d24a1e31fa907f4943e3ce" checksum = "4ae62f7eae5eb549c71b76658648b72cc6111f2d87d24a1e31fa907f4943e3ce"
[[package]]
name = "tree-sitter-md"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2c96068626225a758ddb1f7cfb82c7c1fab4e093dd3bde464e2a44e8341f58f5"
dependencies = [
"cc",
"tree-sitter-language",
]
[[package]] [[package]]
name = "tree-sitter-nix" name = "tree-sitter-nix"
version = "0.3.0" version = "0.3.0"

View File

@@ -38,6 +38,7 @@ toml = "0.8"
# Diagram rendering # Diagram rendering
mermaid-rs-renderer = { version = "0.1", default-features = false } mermaid-rs-renderer = { version = "0.1", default-features = false }
tree-sitter-md = "0.5.2"
# Patch dagre_rust to fix unwrap on None bug # Patch dagre_rust to fix unwrap on None bug
[patch.crates-io] [patch.crates-io]

View File

@@ -12,7 +12,7 @@ sukr highlights code blocks at build time using Tree-sitter. No client-side Java
Use fenced code blocks with a language identifier: Use fenced code blocks with a language identifier:
````markdown ````md
```rust ```rust
fn main() { fn main() {
println!("Hello, world!"); println!("Hello, world!");

35
queries/md-highlights.scm Normal file
View File

@@ -0,0 +1,35 @@
; Minimal markdown block highlighting - only capture outside code blocks
; Removed all captures that could match inside code_fence_content
(atx_heading (inline) @text.title)
(setext_heading (paragraph) @text.title)
[
(atx_h1_marker)
(atx_h2_marker)
(atx_h3_marker)
(atx_h4_marker)
(atx_h5_marker)
(atx_h6_marker)
(setext_h1_underline)
(setext_h2_underline)
] @punctuation.special
(fenced_code_block_delimiter) @punctuation.delimiter
(info_string (language) @string)
(link_title) @string
(link_destination) @text.uri
(link_label) @text.reference
[
(list_marker_plus)
(list_marker_minus)
(list_marker_star)
(list_marker_dot)
(list_marker_parenthesis)
(thematic_break)
] @punctuation.special
(block_quote_marker) @punctuation.special
(backslash_escape) @string.escape

View File

@@ -0,0 +1,7 @@
; Markdown injection queries - testing include-children directive
(fenced_code_block
(info_string
(language) @injection.language)
(code_fence_content) @injection.content
(#set! injection.include-children))

View File

@@ -77,6 +77,7 @@ pub enum Language {
Html, Html,
JavaScript, JavaScript,
Json, Json,
Markdown,
Nix, Nix,
Python, Python,
Rust, Rust,
@@ -95,6 +96,7 @@ impl Language {
"html" => Some(Language::Html), "html" => Some(Language::Html),
"javascript" | "js" => Some(Language::JavaScript), "javascript" | "js" => Some(Language::JavaScript),
"json" => Some(Language::Json), "json" => Some(Language::Json),
"markdown" | "md" => Some(Language::Markdown),
"nix" => Some(Language::Nix), "nix" => Some(Language::Nix),
"python" | "py" => Some(Language::Python), "python" | "py" => Some(Language::Python),
"rust" | "rs" => Some(Language::Rust), "rust" | "rs" => Some(Language::Rust),
@@ -183,6 +185,15 @@ static JSON_CONFIG: LazyLock<HighlightConfiguration> = LazyLock::new(|| {
) )
}); });
static MARKDOWN_CONFIG: LazyLock<HighlightConfiguration> = LazyLock::new(|| {
make_config(
tree_sitter_md::LANGUAGE.into(),
"markdown",
include_str!("../queries/md-highlights.scm"),
include_str!("../queries/md-injections.scm"),
)
});
static NIX_CONFIG: LazyLock<HighlightConfiguration> = LazyLock::new(|| { static NIX_CONFIG: LazyLock<HighlightConfiguration> = LazyLock::new(|| {
make_config( make_config(
tree_sitter_nix::LANGUAGE.into(), tree_sitter_nix::LANGUAGE.into(),
@@ -238,6 +249,7 @@ fn get_config(lang: Language) -> &'static HighlightConfiguration {
Language::Html => &HTML_CONFIG, Language::Html => &HTML_CONFIG,
Language::JavaScript => &JAVASCRIPT_CONFIG, Language::JavaScript => &JAVASCRIPT_CONFIG,
Language::Json => &JSON_CONFIG, Language::Json => &JSON_CONFIG,
Language::Markdown => &MARKDOWN_CONFIG,
Language::Nix => &NIX_CONFIG, Language::Nix => &NIX_CONFIG,
Language::Python => &PYTHON_CONFIG, Language::Python => &PYTHON_CONFIG,
Language::Rust => &RUST_CONFIG, Language::Rust => &RUST_CONFIG,
@@ -387,4 +399,29 @@ pkgs.stdenv.mkDerivation {
// String content should be present // String content should be present
assert!(html.contains("Hello from bash")); assert!(html.contains("Hello from bash"));
} }
#[test]
fn test_markdown_injection_rust() {
// Markdown code block with embedded Rust should have full Rust highlighting
let md = "```rust\nfn main() {\n println!(\"Hello\");\n}\n```";
let html = highlight_code(Language::Markdown, md);
// All Rust tokens should be highlighted
assert!(
html.contains("hl-keyword"),
"fn should be highlighted as keyword"
);
assert!(
html.contains("hl-function"),
"main/println should be highlighted as function"
);
assert!(
html.contains("hl-string"),
"string literal should be highlighted"
);
assert!(
html.contains("hl-punctuation-bracket"),
"brackets should be highlighted"
);
}
} }