feat(highlight): add markdown syntax highlighting with injection
Add tree-sitter-md for markdown parsing with injection support for fenced code blocks. Code inside markdown code fences (```rust, ```bash, etc.) is now fully syntax highlighted. Key fix: Use `#set! injection.include-children` directive in the injection query to override tree-sitter-md's internal tokenization of code_fence_content, allowing proper language injection. - Add tree-sitter-md v0.5.2 dependency - Add Markdown variant to Language enum (md, markdown aliases) - Create queries/md-highlights.scm (minimal markdown highlights) - Create queries/md-injections.scm (with include-children directive) - Add test: test_markdown_injection_rust
This commit is contained in:
11
Cargo.lock
generated
11
Cargo.lock
generated
@@ -1531,6 +1531,7 @@ dependencies = [
|
|||||||
"tree-sitter-html",
|
"tree-sitter-html",
|
||||||
"tree-sitter-javascript",
|
"tree-sitter-javascript",
|
||||||
"tree-sitter-json",
|
"tree-sitter-json",
|
||||||
|
"tree-sitter-md",
|
||||||
"tree-sitter-nix",
|
"tree-sitter-nix",
|
||||||
"tree-sitter-python",
|
"tree-sitter-python",
|
||||||
"tree-sitter-rust",
|
"tree-sitter-rust",
|
||||||
@@ -1809,6 +1810,16 @@ version = "0.1.6"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "4ae62f7eae5eb549c71b76658648b72cc6111f2d87d24a1e31fa907f4943e3ce"
|
checksum = "4ae62f7eae5eb549c71b76658648b72cc6111f2d87d24a1e31fa907f4943e3ce"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "tree-sitter-md"
|
||||||
|
version = "0.5.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "2c96068626225a758ddb1f7cfb82c7c1fab4e093dd3bde464e2a44e8341f58f5"
|
||||||
|
dependencies = [
|
||||||
|
"cc",
|
||||||
|
"tree-sitter-language",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "tree-sitter-nix"
|
name = "tree-sitter-nix"
|
||||||
version = "0.3.0"
|
version = "0.3.0"
|
||||||
|
|||||||
@@ -38,6 +38,7 @@ toml = "0.8"
|
|||||||
|
|
||||||
# Diagram rendering
|
# Diagram rendering
|
||||||
mermaid-rs-renderer = { version = "0.1", default-features = false }
|
mermaid-rs-renderer = { version = "0.1", default-features = false }
|
||||||
|
tree-sitter-md = "0.5.2"
|
||||||
|
|
||||||
# Patch dagre_rust to fix unwrap on None bug
|
# Patch dagre_rust to fix unwrap on None bug
|
||||||
[patch.crates-io]
|
[patch.crates-io]
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ sukr highlights code blocks at build time using Tree-sitter. No client-side Java
|
|||||||
|
|
||||||
Use fenced code blocks with a language identifier:
|
Use fenced code blocks with a language identifier:
|
||||||
|
|
||||||
````markdown
|
````md
|
||||||
```rust
|
```rust
|
||||||
fn main() {
|
fn main() {
|
||||||
println!("Hello, world!");
|
println!("Hello, world!");
|
||||||
|
|||||||
35
queries/md-highlights.scm
Normal file
35
queries/md-highlights.scm
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
; Minimal markdown block highlighting - only capture outside code blocks
|
||||||
|
; Removed all captures that could match inside code_fence_content
|
||||||
|
|
||||||
|
(atx_heading (inline) @text.title)
|
||||||
|
(setext_heading (paragraph) @text.title)
|
||||||
|
|
||||||
|
[
|
||||||
|
(atx_h1_marker)
|
||||||
|
(atx_h2_marker)
|
||||||
|
(atx_h3_marker)
|
||||||
|
(atx_h4_marker)
|
||||||
|
(atx_h5_marker)
|
||||||
|
(atx_h6_marker)
|
||||||
|
(setext_h1_underline)
|
||||||
|
(setext_h2_underline)
|
||||||
|
] @punctuation.special
|
||||||
|
|
||||||
|
(fenced_code_block_delimiter) @punctuation.delimiter
|
||||||
|
(info_string (language) @string)
|
||||||
|
|
||||||
|
(link_title) @string
|
||||||
|
(link_destination) @text.uri
|
||||||
|
(link_label) @text.reference
|
||||||
|
|
||||||
|
[
|
||||||
|
(list_marker_plus)
|
||||||
|
(list_marker_minus)
|
||||||
|
(list_marker_star)
|
||||||
|
(list_marker_dot)
|
||||||
|
(list_marker_parenthesis)
|
||||||
|
(thematic_break)
|
||||||
|
] @punctuation.special
|
||||||
|
|
||||||
|
(block_quote_marker) @punctuation.special
|
||||||
|
(backslash_escape) @string.escape
|
||||||
7
queries/md-injections.scm
Normal file
7
queries/md-injections.scm
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
; Markdown injection queries - testing include-children directive
|
||||||
|
|
||||||
|
(fenced_code_block
|
||||||
|
(info_string
|
||||||
|
(language) @injection.language)
|
||||||
|
(code_fence_content) @injection.content
|
||||||
|
(#set! injection.include-children))
|
||||||
@@ -77,6 +77,7 @@ pub enum Language {
|
|||||||
Html,
|
Html,
|
||||||
JavaScript,
|
JavaScript,
|
||||||
Json,
|
Json,
|
||||||
|
Markdown,
|
||||||
Nix,
|
Nix,
|
||||||
Python,
|
Python,
|
||||||
Rust,
|
Rust,
|
||||||
@@ -95,6 +96,7 @@ impl Language {
|
|||||||
"html" => Some(Language::Html),
|
"html" => Some(Language::Html),
|
||||||
"javascript" | "js" => Some(Language::JavaScript),
|
"javascript" | "js" => Some(Language::JavaScript),
|
||||||
"json" => Some(Language::Json),
|
"json" => Some(Language::Json),
|
||||||
|
"markdown" | "md" => Some(Language::Markdown),
|
||||||
"nix" => Some(Language::Nix),
|
"nix" => Some(Language::Nix),
|
||||||
"python" | "py" => Some(Language::Python),
|
"python" | "py" => Some(Language::Python),
|
||||||
"rust" | "rs" => Some(Language::Rust),
|
"rust" | "rs" => Some(Language::Rust),
|
||||||
@@ -183,6 +185,15 @@ static JSON_CONFIG: LazyLock<HighlightConfiguration> = LazyLock::new(|| {
|
|||||||
)
|
)
|
||||||
});
|
});
|
||||||
|
|
||||||
|
static MARKDOWN_CONFIG: LazyLock<HighlightConfiguration> = LazyLock::new(|| {
|
||||||
|
make_config(
|
||||||
|
tree_sitter_md::LANGUAGE.into(),
|
||||||
|
"markdown",
|
||||||
|
include_str!("../queries/md-highlights.scm"),
|
||||||
|
include_str!("../queries/md-injections.scm"),
|
||||||
|
)
|
||||||
|
});
|
||||||
|
|
||||||
static NIX_CONFIG: LazyLock<HighlightConfiguration> = LazyLock::new(|| {
|
static NIX_CONFIG: LazyLock<HighlightConfiguration> = LazyLock::new(|| {
|
||||||
make_config(
|
make_config(
|
||||||
tree_sitter_nix::LANGUAGE.into(),
|
tree_sitter_nix::LANGUAGE.into(),
|
||||||
@@ -238,6 +249,7 @@ fn get_config(lang: Language) -> &'static HighlightConfiguration {
|
|||||||
Language::Html => &HTML_CONFIG,
|
Language::Html => &HTML_CONFIG,
|
||||||
Language::JavaScript => &JAVASCRIPT_CONFIG,
|
Language::JavaScript => &JAVASCRIPT_CONFIG,
|
||||||
Language::Json => &JSON_CONFIG,
|
Language::Json => &JSON_CONFIG,
|
||||||
|
Language::Markdown => &MARKDOWN_CONFIG,
|
||||||
Language::Nix => &NIX_CONFIG,
|
Language::Nix => &NIX_CONFIG,
|
||||||
Language::Python => &PYTHON_CONFIG,
|
Language::Python => &PYTHON_CONFIG,
|
||||||
Language::Rust => &RUST_CONFIG,
|
Language::Rust => &RUST_CONFIG,
|
||||||
@@ -387,4 +399,29 @@ pkgs.stdenv.mkDerivation {
|
|||||||
// String content should be present
|
// String content should be present
|
||||||
assert!(html.contains("Hello from bash"));
|
assert!(html.contains("Hello from bash"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_markdown_injection_rust() {
|
||||||
|
// Markdown code block with embedded Rust should have full Rust highlighting
|
||||||
|
let md = "```rust\nfn main() {\n println!(\"Hello\");\n}\n```";
|
||||||
|
let html = highlight_code(Language::Markdown, md);
|
||||||
|
|
||||||
|
// All Rust tokens should be highlighted
|
||||||
|
assert!(
|
||||||
|
html.contains("hl-keyword"),
|
||||||
|
"fn should be highlighted as keyword"
|
||||||
|
);
|
||||||
|
assert!(
|
||||||
|
html.contains("hl-function"),
|
||||||
|
"main/println should be highlighted as function"
|
||||||
|
);
|
||||||
|
assert!(
|
||||||
|
html.contains("hl-string"),
|
||||||
|
"string literal should be highlighted"
|
||||||
|
);
|
||||||
|
assert!(
|
||||||
|
html.contains("hl-punctuation-bracket"),
|
||||||
|
"brackets should be highlighted"
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user