From 8c54882118c7e583f84f920d251c57e9c17afc60 Mon Sep 17 00:00:00 2001 From: Timothy DeHerrera Date: Sat, 31 Jan 2026 16:41:11 -0700 Subject: [PATCH] feat(highlight): add markdown syntax highlighting with injection Add tree-sitter-md for markdown parsing with injection support for fenced code blocks. Code inside markdown code fences (```rust, ```bash, etc.) is now fully syntax highlighted. Key fix: Use `#set! injection.include-children` directive in the injection query to override tree-sitter-md's internal tokenization of code_fence_content, allowing proper language injection. - Add tree-sitter-md v0.5.2 dependency - Add Markdown variant to Language enum (md, markdown aliases) - Create queries/md-highlights.scm (minimal markdown highlights) - Create queries/md-injections.scm (with include-children directive) - Add test: test_markdown_injection_rust --- Cargo.lock | 11 ++++++ Cargo.toml | 1 + docs/content/features/syntax-highlighting.md | 2 +- queries/md-highlights.scm | 35 ++++++++++++++++++ queries/md-injections.scm | 7 ++++ src/highlight.rs | 37 ++++++++++++++++++++ 6 files changed, 92 insertions(+), 1 deletion(-) create mode 100644 queries/md-highlights.scm create mode 100644 queries/md-injections.scm diff --git a/Cargo.lock b/Cargo.lock index a2188a0..6907576 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1531,6 +1531,7 @@ dependencies = [ "tree-sitter-html", "tree-sitter-javascript", "tree-sitter-json", + "tree-sitter-md", "tree-sitter-nix", "tree-sitter-python", "tree-sitter-rust", @@ -1809,6 +1810,16 @@ version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4ae62f7eae5eb549c71b76658648b72cc6111f2d87d24a1e31fa907f4943e3ce" +[[package]] +name = "tree-sitter-md" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c96068626225a758ddb1f7cfb82c7c1fab4e093dd3bde464e2a44e8341f58f5" +dependencies = [ + "cc", + "tree-sitter-language", +] + [[package]] name = "tree-sitter-nix" version = "0.3.0" diff --git a/Cargo.toml b/Cargo.toml index b20e4f2..b5b80ea 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -38,6 +38,7 @@ toml = "0.8" # Diagram rendering mermaid-rs-renderer = { version = "0.1", default-features = false } +tree-sitter-md = "0.5.2" # Patch dagre_rust to fix unwrap on None bug [patch.crates-io] diff --git a/docs/content/features/syntax-highlighting.md b/docs/content/features/syntax-highlighting.md index 6dcc821..9bb9b4e 100644 --- a/docs/content/features/syntax-highlighting.md +++ b/docs/content/features/syntax-highlighting.md @@ -12,7 +12,7 @@ sukr highlights code blocks at build time using Tree-sitter. No client-side Java Use fenced code blocks with a language identifier: -````markdown +````md ```rust fn main() { println!("Hello, world!"); diff --git a/queries/md-highlights.scm b/queries/md-highlights.scm new file mode 100644 index 0000000..a1d693c --- /dev/null +++ b/queries/md-highlights.scm @@ -0,0 +1,35 @@ +; Minimal markdown block highlighting - only capture outside code blocks +; Removed all captures that could match inside code_fence_content + +(atx_heading (inline) @text.title) +(setext_heading (paragraph) @text.title) + +[ + (atx_h1_marker) + (atx_h2_marker) + (atx_h3_marker) + (atx_h4_marker) + (atx_h5_marker) + (atx_h6_marker) + (setext_h1_underline) + (setext_h2_underline) +] @punctuation.special + +(fenced_code_block_delimiter) @punctuation.delimiter +(info_string (language) @string) + +(link_title) @string +(link_destination) @text.uri +(link_label) @text.reference + +[ + (list_marker_plus) + (list_marker_minus) + (list_marker_star) + (list_marker_dot) + (list_marker_parenthesis) + (thematic_break) +] @punctuation.special + +(block_quote_marker) @punctuation.special +(backslash_escape) @string.escape diff --git a/queries/md-injections.scm b/queries/md-injections.scm new file mode 100644 index 0000000..b819b84 --- /dev/null +++ b/queries/md-injections.scm @@ -0,0 +1,7 @@ +; Markdown injection queries - testing include-children directive + +(fenced_code_block + (info_string + (language) @injection.language) + (code_fence_content) @injection.content + (#set! injection.include-children)) diff --git a/src/highlight.rs b/src/highlight.rs index 1ca808d..125f97d 100644 --- a/src/highlight.rs +++ b/src/highlight.rs @@ -77,6 +77,7 @@ pub enum Language { Html, JavaScript, Json, + Markdown, Nix, Python, Rust, @@ -95,6 +96,7 @@ impl Language { "html" => Some(Language::Html), "javascript" | "js" => Some(Language::JavaScript), "json" => Some(Language::Json), + "markdown" | "md" => Some(Language::Markdown), "nix" => Some(Language::Nix), "python" | "py" => Some(Language::Python), "rust" | "rs" => Some(Language::Rust), @@ -183,6 +185,15 @@ static JSON_CONFIG: LazyLock = LazyLock::new(|| { ) }); +static MARKDOWN_CONFIG: LazyLock = LazyLock::new(|| { + make_config( + tree_sitter_md::LANGUAGE.into(), + "markdown", + include_str!("../queries/md-highlights.scm"), + include_str!("../queries/md-injections.scm"), + ) +}); + static NIX_CONFIG: LazyLock = LazyLock::new(|| { make_config( tree_sitter_nix::LANGUAGE.into(), @@ -238,6 +249,7 @@ fn get_config(lang: Language) -> &'static HighlightConfiguration { Language::Html => &HTML_CONFIG, Language::JavaScript => &JAVASCRIPT_CONFIG, Language::Json => &JSON_CONFIG, + Language::Markdown => &MARKDOWN_CONFIG, Language::Nix => &NIX_CONFIG, Language::Python => &PYTHON_CONFIG, Language::Rust => &RUST_CONFIG, @@ -387,4 +399,29 @@ pkgs.stdenv.mkDerivation { // String content should be present assert!(html.contains("Hello from bash")); } + + #[test] + fn test_markdown_injection_rust() { + // Markdown code block with embedded Rust should have full Rust highlighting + let md = "```rust\nfn main() {\n println!(\"Hello\");\n}\n```"; + let html = highlight_code(Language::Markdown, md); + + // All Rust tokens should be highlighted + assert!( + html.contains("hl-keyword"), + "fn should be highlighted as keyword" + ); + assert!( + html.contains("hl-function"), + "main/println should be highlighted as function" + ); + assert!( + html.contains("hl-string"), + "string literal should be highlighted" + ); + assert!( + html.contains("hl-punctuation-bracket"), + "brackets should be highlighted" + ); + } }