diff --git a/Cargo.lock b/Cargo.lock index 198a7dd..7ce1c4b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -513,6 +513,7 @@ dependencies = [ "serde", "thiserror", "toml 0.8.23", + "tree-sitter", "tree-sitter-bash", "tree-sitter-c", "tree-sitter-css", diff --git a/Cargo.toml b/Cargo.toml index 3b5ef3f..c3dbce4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,6 +13,7 @@ thiserror = "2" walkdir = "2" # Syntax highlighting +tree-sitter = "0.26" tree-sitter-bash = "0.23" tree-sitter-c = "0.24" tree-sitter-css = "0.25" diff --git a/queries/nix-injections.scm b/queries/nix-injections.scm new file mode 100644 index 0000000..45621c4 --- /dev/null +++ b/queries/nix-injections.scm @@ -0,0 +1,190 @@ +; Nix injection queries for embedded language highlighting +; Adapted from Helix editor queries for tree-sitter-highlight compatibility +; Removed Helix-specific predicates: #is-not?, #any-of?, @injection.shebang, @injection.filename + +((comment) @injection.content + (#set! injection.language "comment")) + +; mark arbitrary languages with a comment +((((comment) @injection.language) . + (indented_string_expression (string_fragment) @injection.content)) + (#set! injection.combined)) +((binding + (comment) @injection.language + expression: (indented_string_expression (string_fragment) @injection.content)) + (#set! injection.combined)) + +; Common attribute keys corresponding to Python scripts, +; such as those for NixOS VM tests in nixpkgs/nixos/tests. +((binding + attrpath: (attrpath (identifier) @_path) + expression: (indented_string_expression + (string_fragment) @injection.content)) + (#match? @_path "(^|\\.)testScript$") + (#set! injection.language "python") + (#set! injection.combined)) + +; Common attribute keys corresponding to scripts, +; such as those of stdenv.mkDerivation. +((binding + attrpath: (attrpath (identifier) @_path) + expression: [ + (indented_string_expression (string_fragment) @injection.content) + (binary_expression (indented_string_expression (string_fragment) @injection.content)) + ]) + (#match? @_path "(^\\w*Phase|command|(pre|post)\\w*|(.*\\.)?\\w*([sS]cript|[hH]ook)|(.*\\.)?startup)$") + (#set! injection.language "bash") + (#set! injection.combined)) + +; builtins.{match,split} regex str +((apply_expression + function: (_) @_func + argument: (indented_string_expression (string_fragment) @injection.content)) + (#match? @_func "(^|\\.)match|split$") + (#set! injection.language "regex") + (#set! injection.combined)) + +; builtins.fromJSON json +((apply_expression + function: (_) @_func + argument: (indented_string_expression (string_fragment) @injection.content)) + (#match? @_func "(^|\\.)fromJSON$") + (#set! injection.language "json") + (#set! injection.combined)) + +; builtins.fromTOML toml +((apply_expression + function: (_) @_func + argument: (indented_string_expression (string_fragment) @injection.content)) + (#match? @_func "(^|\\.)fromTOML$") + (#set! injection.language "toml") + (#set! injection.combined)) + +; trivial-builders.nix pkgs.writeShellScript[Bin] name content +((apply_expression + function: (apply_expression function: (_) @_func) + argument: (indented_string_expression (string_fragment) @injection.content)) + (#match? @_func "(^|\\.)writeShellScript(Bin)?$") + (#set! injection.language "bash") + (#set! injection.combined)) + +; trivial-builders.nix, aliases.nix +; pkgs.runCommand[[No]CC][Local] name attrs content +(apply_expression + (apply_expression + function: (apply_expression + function: ((_) @_func))) + argument: (indented_string_expression (string_fragment) @injection.content) + (#match? @_func "(^|\\.)runCommand(((No)?(CC))?(Local)?)?$") + (#set! injection.language "bash") + (#set! injection.combined)) + +; trivial-builders.nix pkgs.writeShellApplication { text = content; } +(apply_expression + function: ((_) @_func) + argument: (_ (_)* (_ (_)* (binding + attrpath: (attrpath (identifier) @_path) + expression: (indented_string_expression + (string_fragment) @injection.content)))) + (#match? @_func "(^|\\.)writeShellApplication$") + (#match? @_path "^text$") + (#set! injection.language "bash") + (#set! injection.combined)) + +; trivial-builders.nix pkgs.writeCBin name content +((apply_expression + function: (apply_expression function: (_) @_func) + argument: (indented_string_expression (string_fragment) @injection.content)) + (#match? @_func "(^|\\.)writeC(Bin)?$") + (#set! injection.language "c") + (#set! injection.combined)) + +; pkgs.writers.write{Bash,Dash}[Bin] name content +((apply_expression + function: (apply_expression function: (_) @_func) + argument: (indented_string_expression (string_fragment) @injection.content)) + (#match? @_func "(^|\\.)write[BD]ash(Bin)?$") + (#set! injection.language "bash") + (#set! injection.combined)) + +; pkgs.writers.writeFish[Bin] name content +((apply_expression + function: (apply_expression function: (_) @_func) + argument: (indented_string_expression (string_fragment) @injection.content)) + (#match? @_func "(^|\\.)writeFish(Bin)?$") + (#set! injection.language "fish") + (#set! injection.combined)) + +; pkgs.writers.writeRust[Bin] name attrs content +(apply_expression + (apply_expression + function: (apply_expression + function: ((_) @_func))) + argument: (indented_string_expression (string_fragment) @injection.content) + (#match? @_func "(^|\\.)writeRust(Bin)?$") + (#set! injection.language "rust") + (#set! injection.combined)) + +; pkgs.writers.writeHaskell[Bin] name attrs content +(apply_expression + (apply_expression + function: (apply_expression + function: ((_) @_func))) + argument: (indented_string_expression (string_fragment) @injection.content) + (#match? @_func "(^|\\.)writeHaskell(Bin)?$") + (#set! injection.language "haskell") + (#set! injection.combined)) + +; pkgs.writers.writeJS[Bin] name attrs content +(apply_expression + (apply_expression + function: (apply_expression + function: ((_) @_func))) + argument: (indented_string_expression (string_fragment) @injection.content) + (#match? @_func "(^|\\.)writeJS(Bin)?$") + (#set! injection.language "javascript") + (#set! injection.combined)) + +; pkgs.writers.write{Python,PyPy}{2,3}[Bin] name attrs content +(apply_expression + (apply_expression + function: (apply_expression + function: ((_) @_func))) + argument: (indented_string_expression (string_fragment) @injection.content) + (#match? @_func "(^|\\.)write(Python|PyPy)[23](Bin)?$") + (#set! injection.language "python") + (#set! injection.combined)) + +; pkgs.writers.writeRuby[Bin] name attrs content +(apply_expression + (apply_expression + function: (apply_expression + function: ((_) @_func))) + argument: (indented_string_expression (string_fragment) @injection.content) + (#match? @_func "(^|\\.)writeRuby(Bin)?$") + (#set! injection.language "ruby") + (#set! injection.combined)) + +; pkgs.writers.writeLua[Bin] name attrs content +(apply_expression + (apply_expression + function: (apply_expression + function: ((_) @_func))) + argument: (indented_string_expression (string_fragment) @injection.content) + (#match? @_func "(^|\\.)writeLua(Bin)?$") + (#set! injection.language "lua") + (#set! injection.combined)) + +; string contents of lib.literalExpression is nix code +((apply_expression + function: [ + (select_expression) + (variable_expression) + ] @_func + argument: [ + (indented_string_expression (string_fragment) @injection.content) + (string_expression (string_fragment) @injection.content) + ]) + (#match? @_func "(lib\\.)?literalExpression$") + (#set! injection.language "nix") + (#set! injection.combined)) diff --git a/src/highlight.rs b/src/highlight.rs index dd37fed..1ca808d 100644 --- a/src/highlight.rs +++ b/src/highlight.rs @@ -1,5 +1,6 @@ //! Syntax highlighting via tree-sitter. +use std::sync::LazyLock; use tree_sitter_highlight::{HighlightConfiguration, Highlighter as TSHighlighter, HtmlRenderer}; /// Recognized highlight names mapped to CSS classes. @@ -104,90 +105,172 @@ impl Language { } } -/// Get highlight configuration for a language. -fn get_config(lang: Language) -> HighlightConfiguration { - let (language, name, highlights) = match lang { - Language::Bash => ( - tree_sitter_bash::LANGUAGE.into(), - "bash", - tree_sitter_bash::HIGHLIGHT_QUERY, - ), - Language::C => ( - tree_sitter_c::LANGUAGE.into(), - "c", - tree_sitter_c::HIGHLIGHT_QUERY, - ), - Language::Css => ( - tree_sitter_css::LANGUAGE.into(), - "css", - tree_sitter_css::HIGHLIGHTS_QUERY, - ), - Language::Go => ( - tree_sitter_go::LANGUAGE.into(), - "go", - tree_sitter_go::HIGHLIGHTS_QUERY, - ), - Language::Html => ( - tree_sitter_html::LANGUAGE.into(), - "html", - tree_sitter_html::HIGHLIGHTS_QUERY, - ), - Language::JavaScript => ( - tree_sitter_javascript::LANGUAGE.into(), - "javascript", - tree_sitter_javascript::HIGHLIGHT_QUERY, - ), - Language::Json => ( - tree_sitter_json::LANGUAGE.into(), - "json", - tree_sitter_json::HIGHLIGHTS_QUERY, - ), - Language::Nix => ( - tree_sitter_nix::LANGUAGE.into(), - "nix", - include_str!("../queries/nix-highlights.scm"), - ), - Language::Python => ( - tree_sitter_python::LANGUAGE.into(), - "python", - tree_sitter_python::HIGHLIGHTS_QUERY, - ), - Language::Rust => ( - tree_sitter_rust::LANGUAGE.into(), - "rust", - tree_sitter_rust::HIGHLIGHTS_QUERY, - ), - Language::TypeScript => ( - tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into(), - "typescript", - tree_sitter_typescript::HIGHLIGHTS_QUERY, - ), - Language::Yaml => ( - tree_sitter_yaml::LANGUAGE.into(), - "yaml", - tree_sitter_yaml::HIGHLIGHTS_QUERY, - ), - }; - - let mut config = HighlightConfiguration::new(language, name, highlights, "", "") +/// Helper to create and configure a HighlightConfiguration. +fn make_config( + language: tree_sitter::Language, + name: &str, + highlights: &str, + injections: &str, +) -> HighlightConfiguration { + let mut config = HighlightConfiguration::new(language, name, highlights, injections, "") .expect("highlight query should be valid"); - config.configure(HIGHLIGHT_NAMES); config } +// Static configurations for each language, lazily initialized. + +static BASH_CONFIG: LazyLock = LazyLock::new(|| { + make_config( + tree_sitter_bash::LANGUAGE.into(), + "bash", + tree_sitter_bash::HIGHLIGHT_QUERY, + "", + ) +}); + +static C_CONFIG: LazyLock = LazyLock::new(|| { + make_config( + tree_sitter_c::LANGUAGE.into(), + "c", + tree_sitter_c::HIGHLIGHT_QUERY, + "", + ) +}); + +static CSS_CONFIG: LazyLock = LazyLock::new(|| { + make_config( + tree_sitter_css::LANGUAGE.into(), + "css", + tree_sitter_css::HIGHLIGHTS_QUERY, + "", + ) +}); + +static GO_CONFIG: LazyLock = LazyLock::new(|| { + make_config( + tree_sitter_go::LANGUAGE.into(), + "go", + tree_sitter_go::HIGHLIGHTS_QUERY, + "", + ) +}); + +static HTML_CONFIG: LazyLock = LazyLock::new(|| { + make_config( + tree_sitter_html::LANGUAGE.into(), + "html", + tree_sitter_html::HIGHLIGHTS_QUERY, + tree_sitter_html::INJECTIONS_QUERY, + ) +}); + +static JAVASCRIPT_CONFIG: LazyLock = LazyLock::new(|| { + make_config( + tree_sitter_javascript::LANGUAGE.into(), + "javascript", + tree_sitter_javascript::HIGHLIGHT_QUERY, + tree_sitter_javascript::INJECTIONS_QUERY, + ) +}); + +static JSON_CONFIG: LazyLock = LazyLock::new(|| { + make_config( + tree_sitter_json::LANGUAGE.into(), + "json", + tree_sitter_json::HIGHLIGHTS_QUERY, + "", + ) +}); + +static NIX_CONFIG: LazyLock = LazyLock::new(|| { + make_config( + tree_sitter_nix::LANGUAGE.into(), + "nix", + include_str!("../queries/nix-highlights.scm"), + include_str!("../queries/nix-injections.scm"), + ) +}); + +static PYTHON_CONFIG: LazyLock = LazyLock::new(|| { + make_config( + tree_sitter_python::LANGUAGE.into(), + "python", + tree_sitter_python::HIGHLIGHTS_QUERY, + "", + ) +}); + +static RUST_CONFIG: LazyLock = LazyLock::new(|| { + make_config( + tree_sitter_rust::LANGUAGE.into(), + "rust", + tree_sitter_rust::HIGHLIGHTS_QUERY, + "", + ) +}); + +static TYPESCRIPT_CONFIG: LazyLock = LazyLock::new(|| { + make_config( + tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into(), + "typescript", + tree_sitter_typescript::HIGHLIGHTS_QUERY, + "", + ) +}); + +static YAML_CONFIG: LazyLock = LazyLock::new(|| { + make_config( + tree_sitter_yaml::LANGUAGE.into(), + "yaml", + tree_sitter_yaml::HIGHLIGHTS_QUERY, + "", + ) +}); + +/// Get a static reference to the highlight configuration for a language. +fn get_config(lang: Language) -> &'static HighlightConfiguration { + match lang { + Language::Bash => &BASH_CONFIG, + Language::C => &C_CONFIG, + Language::Css => &CSS_CONFIG, + Language::Go => &GO_CONFIG, + Language::Html => &HTML_CONFIG, + Language::JavaScript => &JAVASCRIPT_CONFIG, + Language::Json => &JSON_CONFIG, + Language::Nix => &NIX_CONFIG, + Language::Python => &PYTHON_CONFIG, + Language::Rust => &RUST_CONFIG, + Language::TypeScript => &TYPESCRIPT_CONFIG, + Language::Yaml => &YAML_CONFIG, + } +} + +/// Get config by language name string (for injection callback). +fn get_config_by_name(name: &str) -> Option<&'static HighlightConfiguration> { + Language::from_fence(name).map(get_config) +} + /// Highlight source code and return HTML with span elements. +/// +/// Uses tree-sitter-highlight with injection support for embedded languages +/// in Nix, HTML, and JavaScript code blocks. pub fn highlight_code(lang: Language, source: &str) -> String { - let mut highlighter = TSHighlighter::new(); let config = get_config(lang); - let highlights = match highlighter.highlight(&config, source.as_bytes(), None, |_| None) { + // Leak both the highlighter and source to satisfy 'static lifetime. + // Acceptable for SSG where the process exits after building. + let highlighter: &'static mut TSHighlighter = Box::leak(Box::new(TSHighlighter::new())); + let static_source: &'static str = Box::leak(source.to_owned().into_boxed_str()); + let source_bytes: &'static [u8] = static_source.as_bytes(); + + let highlights = match highlighter.highlight(config, source_bytes, None, get_config_by_name) { Ok(h) => h, Err(_) => return html_escape(source), }; let mut renderer = HtmlRenderer::new(); - let result = renderer.render(highlights, source.as_bytes(), &|highlight, buf| { + let result = renderer.render(highlights, source_bytes, &|highlight, buf| { let attrs = HTML_ATTRS.get(highlight.0).copied().unwrap_or(b""); buf.extend_from_slice(attrs); }); @@ -283,4 +366,25 @@ mod tests { assert!(!escaped.contains('<')); assert!(escaped.contains("<")); } + + #[test] + fn test_nix_injection_bash_buildphase() { + // Nix code with embedded bash in buildPhase + let code = r#"{ pkgs }: +pkgs.stdenv.mkDerivation { + buildPhase = '' + echo "Hello from bash" + make build + ''; +}"#; + let html = highlight_code(Language::Nix, code); + + // Should contain Nix highlighting + assert!(html.contains("class=\"hl-")); + // Should contain the bash content + assert!(html.contains("echo")); + assert!(html.contains("make")); + // String content should be present + assert!(html.contains("Hello from bash")); + } }