//! Markdown to HTML rendering via pulldown-cmark with syntax highlighting. use crate::highlight::{highlight_code, Language}; use pulldown_cmark::{CodeBlockKind, Event, HeadingLevel, Options, Parser, Tag, TagEnd}; use serde::Serialize; /// A heading anchor extracted from markdown content. #[derive(Debug, Clone, Serialize)] pub struct Anchor { /// Heading ID attribute (slug) pub id: String, /// Heading text content pub label: String, /// Heading level (2-6, h1 excluded) pub level: u8, } /// Render markdown content to HTML with syntax highlighting. /// Returns the HTML output and a list of extracted heading anchors. pub fn markdown_to_html(markdown: &str) -> (String, Vec) { let options = Options::ENABLE_TABLES | Options::ENABLE_FOOTNOTES | Options::ENABLE_STRIKETHROUGH | Options::ENABLE_TASKLISTS | Options::ENABLE_MATH; let parser = Parser::new_ext(markdown, options); let mut html_output = String::new(); let mut anchors = Vec::new(); let mut code_block_lang: Option = None; let mut code_block_content = String::new(); // Image alt text accumulation state let mut image_alt_content: Option = None; let mut image_attrs: Option<(String, String)> = None; // (src, title) // Heading accumulation state let mut heading_level: Option = None; let mut heading_text = String::new(); for event in parser { match event { Event::Start(Tag::CodeBlock(kind)) => { // Extract language from code fence code_block_lang = match kind { CodeBlockKind::Fenced(lang) => { let lang_str = lang.as_ref().split_whitespace().next().unwrap_or(""); if lang_str.is_empty() { None } else { Some(lang_str.to_string()) } } CodeBlockKind::Indented => None, }; code_block_content.clear(); } Event::Text(text) if code_block_lang.is_some() => { // Accumulate code block content code_block_content.push_str(&text); } Event::Text(text) if image_alt_content.is_some() => { // Accumulate image alt text if let Some(ref mut alt) = image_alt_content { alt.push_str(&text); } } Event::End(TagEnd::CodeBlock) => { // Render the code block with highlighting let lang_str = code_block_lang.as_deref().unwrap_or(""); // Mermaid diagrams: render to SVG if lang_str == "mermaid" { match crate::mermaid::render_diagram(&code_block_content) { Ok(svg) => { html_output.push_str("
\n"); html_output.push_str(&svg); html_output.push_str("\n
\n"); } Err(e) => { eprintln!("mermaid render error: {e}"); html_output.push_str("
");
                            html_output.push_str(&html_escape(&code_block_content));
                            html_output.push_str("
\n"); } } } else { // Code blocks: syntax highlighting html_output.push_str("
", lang_str));
                        html_output.push_str(&highlight_code(lang, &code_block_content));
                    } else {
                        // Unsupported language: render as plain escaped text
                        if !lang_str.is_empty() {
                            html_output.push_str(&format!(" class=\"language-{}\">", lang_str));
                        } else {
                            html_output.push('>');
                        }
                        html_output.push_str(&html_escape(&code_block_content));
                    }

                    html_output.push_str("
\n"); } code_block_lang = None; code_block_content.clear(); } Event::Text(text) if heading_level.is_some() => { // Accumulate heading text heading_text.push_str(&text); html_output.push_str(&html_escape(&text)); } Event::Text(text) => { // Regular text outside code blocks html_output.push_str(&html_escape(&text)); } Event::Code(text) => { // Inline code html_output.push_str(""); html_output.push_str(&html_escape(&text)); html_output.push_str(""); } Event::Start(Tag::Image { dest_url, title, .. }) => { // Begin accumulating alt text; defer rendering to End event image_alt_content = Some(String::new()); image_attrs = Some((dest_url.to_string(), title.to_string())); } Event::Start(Tag::Heading { level, .. }) => { // Begin accumulating heading text heading_level = Some(level); heading_text.clear(); let level_num = level as u8; html_output.push_str(&format!(" { html_output.push_str(&start_tag_to_html(&tag)); } Event::End(TagEnd::Image) => { // Render image with accumulated alt text let alt = image_alt_content.take().unwrap_or_default(); if let Some((src, title)) = image_attrs.take() { if title.is_empty() { html_output.push_str(&format!( "\"{}\"", src, html_escape(&alt) )); } else { html_output.push_str(&format!( "\"{}\"", src, html_escape(&alt), html_escape(&title) )); } } } Event::End(TagEnd::Heading(level)) => { // Generate slug ID from heading text let id = slugify(&heading_text); let level_num = level as u8; // We need to go back and insert the id attribute and close the tag // The heading was opened as " if let Some(pos) = html_output.rfind(&format!("", id)); } // Add pilcrow anchor link for deep-linking (hover-reveal via CSS) html_output.push_str(&format!( "\n", id, level_num )); // Extract anchor for h2-h6 (skip h1) if level_num >= 2 { anchors.push(Anchor { id, label: heading_text.clone(), level: level_num, }); } heading_level = None; heading_text.clear(); } Event::End(tag) => { html_output.push_str(&end_tag_to_html(&tag)); } Event::SoftBreak => { html_output.push('\n'); } Event::HardBreak => { html_output.push_str("
\n"); } Event::Rule => { html_output.push_str("
\n"); } Event::Html(html) | Event::InlineHtml(html) => { html_output.push_str(&html); } Event::FootnoteReference(name) => { html_output.push_str(&format!( "{}", name, name )); } Event::TaskListMarker(checked) => { let checkbox = if checked { "" } else { "" }; html_output.push_str(checkbox); } Event::InlineMath(latex) => match crate::math::render_math(&latex, false) { Ok(rendered) => html_output.push_str(&rendered), Err(e) => { eprintln!("math render error: {e}"); html_output.push_str(""); html_output.push_str(&html_escape(&latex)); html_output.push_str(""); } }, Event::DisplayMath(latex) => match crate::math::render_math(&latex, true) { Ok(rendered) => { html_output.push_str("
\n"); html_output.push_str(&rendered); html_output.push_str("\n
\n"); } Err(e) => { eprintln!("math render error: {e}"); html_output.push_str("
");
                    html_output.push_str(&html_escape(&latex));
                    html_output.push_str("
\n"); } }, } } (html_output, anchors) } fn html_escape(s: &str) -> String { s.replace('&', "&") .replace('<', "<") .replace('>', ">") .replace('"', """) } /// Convert heading text to a URL-friendly slug ID. fn slugify(text: &str) -> String { text.to_lowercase() .chars() .map(|c| if c.is_alphanumeric() { c } else { '-' }) .collect::() .split('-') .filter(|s| !s.is_empty()) .collect::>() .join("-") } fn start_tag_to_html(tag: &Tag) -> String { match tag { Tag::Paragraph => "

".to_string(), Tag::Heading { level, .. } => format!("", *level as u8), Tag::BlockQuote(_) => "

\n".to_string(), Tag::CodeBlock(_) => String::new(), // Handled separately Tag::List(Some(start)) => format!("
    \n", start), Tag::List(None) => "
      \n".to_string(), Tag::Item => "
    • ".to_string(), Tag::FootnoteDefinition(name) => { format!("
      ", name) } Tag::Table(_) => "\n".to_string(), Tag::TableHead => "\n\n".to_string(), Tag::TableRow => "\n".to_string(), Tag::TableCell => "
      ".to_string(), Tag::Emphasis => "".to_string(), Tag::Strong => "".to_string(), Tag::Strikethrough => "".to_string(), Tag::Link { dest_url, title, .. } => { if title.is_empty() { format!("", dest_url) } else { format!("", dest_url, title) } } Tag::Image { .. } => String::new(), // Handled separately in main loop Tag::HtmlBlock => String::new(), Tag::MetadataBlock(_) => String::new(), Tag::DefinitionListTitle => "
      ".to_string(), Tag::DefinitionListDefinition => "
      ".to_string(), Tag::DefinitionList => "
      ".to_string(), } } fn end_tag_to_html(tag: &TagEnd) -> String { match tag { TagEnd::Paragraph => "

      \n".to_string(), TagEnd::Heading(level) => format!("\n", *level as u8), TagEnd::BlockQuote(_) => "\n".to_string(), TagEnd::CodeBlock => String::new(), // Handled separately TagEnd::List(ordered) => { if *ordered { "\n".to_string() } else { "\n".to_string() } } TagEnd::Item => "\n".to_string(), TagEnd::FootnoteDefinition => "\n".to_string(), TagEnd::Table => "
      \n".to_string(), TagEnd::TableHead => "\n\n".to_string(), TagEnd::TableRow => "\n".to_string(), TagEnd::TableCell => "\n".to_string(), TagEnd::Emphasis => "".to_string(), TagEnd::Strong => "".to_string(), TagEnd::Strikethrough => "".to_string(), TagEnd::Link => "".to_string(), TagEnd::Image => String::new(), // Handled separately in main loop TagEnd::HtmlBlock => String::new(), TagEnd::MetadataBlock(_) => String::new(), TagEnd::DefinitionListTitle => "\n".to_string(), TagEnd::DefinitionListDefinition => "\n".to_string(), TagEnd::DefinitionList => "\n".to_string(), } } #[cfg(test)] mod tests { use super::*; #[test] fn test_basic_markdown() { let md = "# Hello\n\nThis is a *test*."; let (html, _) = markdown_to_html(md); // Heading includes pilcrow anchor for deep-linking assert!(html.contains( "

      Hello

      " )); assert!(html.contains("test")); } #[test] fn test_code_block_highlighting() { let md = "```rust\nfn main() {}\n```"; let (html, _) = markdown_to_html(md); // Should contain highlighted code assert!(html.contains("
      cargo run"));
          }
      
          #[test]
          fn test_image_alt_text() {
              let md = "![Beautiful sunset](sunset.jpg \"Evening sky\")";
              let (html, _) = markdown_to_html(md);
      
              assert!(html.contains("alt=\"Beautiful sunset\""));
              assert!(html.contains("title=\"Evening sky\""));
              assert!(html.contains("src=\"sunset.jpg\""));
          }
      
          #[test]
          fn test_image_alt_text_no_title() {
              let md = "![Logo image](logo.png)";
              let (html, _) = markdown_to_html(md);
      
              assert!(html.contains("alt=\"Logo image\""));
              assert!(html.contains("src=\"logo.png\""));
              assert!(!html.contains("title="));
          }
      
          #[test]
          fn test_anchor_extraction() {
              let md = r#"# Page Title
      ## Getting Started
      Some intro text.
      ### Installation
      Install steps.
      ## Configuration
      Config details.
      #### Deep Heading
      "#;
              let (html, anchors) = markdown_to_html(md);
      
              // h1 should NOT be extracted (page title, not TOC)
              assert!(anchors.iter().all(|a| a.level >= 2));
      
              // Should have 4 anchors: h2, h3, h2, h4
              assert_eq!(anchors.len(), 4);
      
              // Check first anchor
              assert_eq!(anchors[0].id, "getting-started");
              assert_eq!(anchors[0].label, "Getting Started");
              assert_eq!(anchors[0].level, 2);
      
              // Check h3
              assert_eq!(anchors[1].id, "installation");
              assert_eq!(anchors[1].level, 3);
      
              // Check second h2
              assert_eq!(anchors[2].id, "configuration");
              assert_eq!(anchors[2].level, 2);
      
              // Check h4
              assert_eq!(anchors[3].id, "deep-heading");
              assert_eq!(anchors[3].level, 4);
      
              // Verify IDs are in HTML
              assert!(html.contains("id=\"getting-started\""));
              assert!(html.contains("id=\"installation\""));
          }
      
          #[test]
          fn test_slugify_edge_cases() {
              // Basic case
              assert_eq!(slugify("Hello World"), "hello-world");
      
              // Multiple spaces → single hyphen
              assert_eq!(slugify("Hello   World"), "hello-world");
      
              // Special characters → hyphen (apostrophe becomes hyphen)
              assert_eq!(slugify("What's New?"), "what-s-new");
      
              // Numbers preserved, dot becomes hyphen
              assert_eq!(slugify("Version 2.0"), "version-2-0");
      
              // Leading/trailing spaces trimmed
              assert_eq!(slugify("  Padded  "), "padded");
      
              // Mixed case → lowercase
              assert_eq!(slugify("CamelCase"), "camelcase");
      
              // Consecutive special chars → single hyphen
              assert_eq!(slugify("A -- B"), "a-b");
          }
      }