feat(highlight): add markdown syntax highlighting with injection
Add tree-sitter-md for markdown parsing with injection support for fenced code blocks. Code inside markdown code fences (```rust, ```bash, etc.) is now fully syntax highlighted. Key fix: Use `#set! injection.include-children` directive in the injection query to override tree-sitter-md's internal tokenization of code_fence_content, allowing proper language injection. - Add tree-sitter-md v0.5.2 dependency - Add Markdown variant to Language enum (md, markdown aliases) - Create queries/md-highlights.scm (minimal markdown highlights) - Create queries/md-injections.scm (with include-children directive) - Add test: test_markdown_injection_rust
This commit is contained in:
11
Cargo.lock
generated
11
Cargo.lock
generated
@@ -1531,6 +1531,7 @@ dependencies = [
|
||||
"tree-sitter-html",
|
||||
"tree-sitter-javascript",
|
||||
"tree-sitter-json",
|
||||
"tree-sitter-md",
|
||||
"tree-sitter-nix",
|
||||
"tree-sitter-python",
|
||||
"tree-sitter-rust",
|
||||
@@ -1809,6 +1810,16 @@ version = "0.1.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4ae62f7eae5eb549c71b76658648b72cc6111f2d87d24a1e31fa907f4943e3ce"
|
||||
|
||||
[[package]]
|
||||
name = "tree-sitter-md"
|
||||
version = "0.5.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2c96068626225a758ddb1f7cfb82c7c1fab4e093dd3bde464e2a44e8341f58f5"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"tree-sitter-language",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tree-sitter-nix"
|
||||
version = "0.3.0"
|
||||
|
||||
@@ -38,6 +38,7 @@ toml = "0.8"
|
||||
|
||||
# Diagram rendering
|
||||
mermaid-rs-renderer = { version = "0.1", default-features = false }
|
||||
tree-sitter-md = "0.5.2"
|
||||
|
||||
# Patch dagre_rust to fix unwrap on None bug
|
||||
[patch.crates-io]
|
||||
|
||||
@@ -12,7 +12,7 @@ sukr highlights code blocks at build time using Tree-sitter. No client-side Java
|
||||
|
||||
Use fenced code blocks with a language identifier:
|
||||
|
||||
````markdown
|
||||
````md
|
||||
```rust
|
||||
fn main() {
|
||||
println!("Hello, world!");
|
||||
|
||||
35
queries/md-highlights.scm
Normal file
35
queries/md-highlights.scm
Normal file
@@ -0,0 +1,35 @@
|
||||
; Minimal markdown block highlighting - only capture outside code blocks
|
||||
; Removed all captures that could match inside code_fence_content
|
||||
|
||||
(atx_heading (inline) @text.title)
|
||||
(setext_heading (paragraph) @text.title)
|
||||
|
||||
[
|
||||
(atx_h1_marker)
|
||||
(atx_h2_marker)
|
||||
(atx_h3_marker)
|
||||
(atx_h4_marker)
|
||||
(atx_h5_marker)
|
||||
(atx_h6_marker)
|
||||
(setext_h1_underline)
|
||||
(setext_h2_underline)
|
||||
] @punctuation.special
|
||||
|
||||
(fenced_code_block_delimiter) @punctuation.delimiter
|
||||
(info_string (language) @string)
|
||||
|
||||
(link_title) @string
|
||||
(link_destination) @text.uri
|
||||
(link_label) @text.reference
|
||||
|
||||
[
|
||||
(list_marker_plus)
|
||||
(list_marker_minus)
|
||||
(list_marker_star)
|
||||
(list_marker_dot)
|
||||
(list_marker_parenthesis)
|
||||
(thematic_break)
|
||||
] @punctuation.special
|
||||
|
||||
(block_quote_marker) @punctuation.special
|
||||
(backslash_escape) @string.escape
|
||||
7
queries/md-injections.scm
Normal file
7
queries/md-injections.scm
Normal file
@@ -0,0 +1,7 @@
|
||||
; Markdown injection queries - testing include-children directive
|
||||
|
||||
(fenced_code_block
|
||||
(info_string
|
||||
(language) @injection.language)
|
||||
(code_fence_content) @injection.content
|
||||
(#set! injection.include-children))
|
||||
@@ -77,6 +77,7 @@ pub enum Language {
|
||||
Html,
|
||||
JavaScript,
|
||||
Json,
|
||||
Markdown,
|
||||
Nix,
|
||||
Python,
|
||||
Rust,
|
||||
@@ -95,6 +96,7 @@ impl Language {
|
||||
"html" => Some(Language::Html),
|
||||
"javascript" | "js" => Some(Language::JavaScript),
|
||||
"json" => Some(Language::Json),
|
||||
"markdown" | "md" => Some(Language::Markdown),
|
||||
"nix" => Some(Language::Nix),
|
||||
"python" | "py" => Some(Language::Python),
|
||||
"rust" | "rs" => Some(Language::Rust),
|
||||
@@ -183,6 +185,15 @@ static JSON_CONFIG: LazyLock<HighlightConfiguration> = LazyLock::new(|| {
|
||||
)
|
||||
});
|
||||
|
||||
static MARKDOWN_CONFIG: LazyLock<HighlightConfiguration> = LazyLock::new(|| {
|
||||
make_config(
|
||||
tree_sitter_md::LANGUAGE.into(),
|
||||
"markdown",
|
||||
include_str!("../queries/md-highlights.scm"),
|
||||
include_str!("../queries/md-injections.scm"),
|
||||
)
|
||||
});
|
||||
|
||||
static NIX_CONFIG: LazyLock<HighlightConfiguration> = LazyLock::new(|| {
|
||||
make_config(
|
||||
tree_sitter_nix::LANGUAGE.into(),
|
||||
@@ -238,6 +249,7 @@ fn get_config(lang: Language) -> &'static HighlightConfiguration {
|
||||
Language::Html => &HTML_CONFIG,
|
||||
Language::JavaScript => &JAVASCRIPT_CONFIG,
|
||||
Language::Json => &JSON_CONFIG,
|
||||
Language::Markdown => &MARKDOWN_CONFIG,
|
||||
Language::Nix => &NIX_CONFIG,
|
||||
Language::Python => &PYTHON_CONFIG,
|
||||
Language::Rust => &RUST_CONFIG,
|
||||
@@ -387,4 +399,29 @@ pkgs.stdenv.mkDerivation {
|
||||
// String content should be present
|
||||
assert!(html.contains("Hello from bash"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_markdown_injection_rust() {
|
||||
// Markdown code block with embedded Rust should have full Rust highlighting
|
||||
let md = "```rust\nfn main() {\n println!(\"Hello\");\n}\n```";
|
||||
let html = highlight_code(Language::Markdown, md);
|
||||
|
||||
// All Rust tokens should be highlighted
|
||||
assert!(
|
||||
html.contains("hl-keyword"),
|
||||
"fn should be highlighted as keyword"
|
||||
);
|
||||
assert!(
|
||||
html.contains("hl-function"),
|
||||
"main/println should be highlighted as function"
|
||||
);
|
||||
assert!(
|
||||
html.contains("hl-string"),
|
||||
"string literal should be highlighted"
|
||||
);
|
||||
assert!(
|
||||
html.contains("hl-punctuation-bracket"),
|
||||
"brackets should be highlighted"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user