fix(render): fix code block rendering and quote escaping
Two issues fixed: 1. Language-less code blocks (``` without lang) were not accumulating text content. The guard `code_block_lang.is_some()` was false for them, so content fell through to regular text rendering. Fix: Add `in_code_block` flag to track code block state separately from language presence. 2. Single quotes in code blocks were being HTML-escaped as ', breaking CSP headers like 'self' in documentation. Fix: Create code_escape/code_escape_into in escape.rs that only escapes <, >, & (required to prevent HTML tag injection) but preserves quotes (safe inside <pre><code> content). Rationale for code_escape: - < and > MUST be escaped to prevent browser interpreting code as HTML - & MUST be escaped to prevent HTML entity interpretation - Quotes are safe inside element content (no attribute context) Also: - Add test for unlabeled code block quote preservation All 71 tests pass.
This commit is contained in:
@@ -25,6 +25,27 @@ pub fn html_escape_into(out: &mut String, s: &str) {
|
||||
}
|
||||
}
|
||||
|
||||
/// Escape characters for safe embedding in code blocks.
|
||||
///
|
||||
/// Only escapes `&`, `<`, `>` — quotes are safe inside `<pre><code>`.
|
||||
pub fn code_escape(s: &str) -> String {
|
||||
let mut result = String::with_capacity(s.len());
|
||||
code_escape_into(&mut result, s);
|
||||
result
|
||||
}
|
||||
|
||||
/// Escape code block characters into an existing string.
|
||||
pub fn code_escape_into(out: &mut String, s: &str) {
|
||||
for c in s.chars() {
|
||||
match c {
|
||||
'&' => out.push_str("&"),
|
||||
'<' => out.push_str("<"),
|
||||
'>' => out.push_str(">"),
|
||||
_ => out.push(c),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Escape XML special characters for safe embedding in XML documents.
|
||||
///
|
||||
/// Escapes: `&`, `<`, `>`, `"`, `'`
|
||||
|
||||
@@ -8,7 +8,7 @@ use std::collections::HashMap;
|
||||
use std::sync::LazyLock;
|
||||
use std::time::Duration;
|
||||
|
||||
use crate::escape::{html_escape, html_escape_into};
|
||||
use crate::escape::{code_escape, code_escape_into};
|
||||
use ropey::RopeSlice;
|
||||
use tree_house::highlighter::{Highlight, HighlightEvent, Highlighter};
|
||||
use tree_house::{
|
||||
@@ -565,14 +565,14 @@ pub fn highlight_code(lang: Language, source: &str) -> String {
|
||||
|
||||
// Check if we have a config for this language
|
||||
if !loader.configs.contains_key(&lang) {
|
||||
return html_escape(source);
|
||||
return code_escape(source);
|
||||
}
|
||||
|
||||
// Parse the syntax tree
|
||||
let rope = RopeSlice::from(source);
|
||||
let syntax = match Syntax::new(rope, lang.to_th_language(), Duration::from_secs(5), loader) {
|
||||
Ok(s) => s,
|
||||
Err(_) => return html_escape(source),
|
||||
Err(_) => return code_escape(source),
|
||||
};
|
||||
|
||||
// Create highlighter and render
|
||||
@@ -595,7 +595,7 @@ fn render_html<'a>(source: &str, mut highlighter: Highlighter<'a, 'a, SukrLoader
|
||||
let end = next_pos as usize;
|
||||
if start < source.len() {
|
||||
let text = &source[start..end.min(source.len())];
|
||||
html_escape_into(&mut html, text);
|
||||
code_escape_into(&mut html, text);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -672,7 +672,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_html_escape() {
|
||||
let escaped = html_escape("<script>alert('xss')</script>");
|
||||
let escaped = code_escape("<script>alert('xss')</script>");
|
||||
assert!(!escaped.contains('<'));
|
||||
assert!(escaped.contains("<"));
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
//! Markdown to HTML rendering via pulldown-cmark with syntax highlighting.
|
||||
|
||||
use crate::escape::html_escape;
|
||||
use crate::escape::{code_escape, html_escape};
|
||||
use crate::highlight::{highlight_code, Language};
|
||||
use pulldown_cmark::{CodeBlockKind, Event, HeadingLevel, Options, Parser, Tag, TagEnd};
|
||||
use serde::Serialize;
|
||||
@@ -30,6 +30,7 @@ pub fn markdown_to_html(markdown: &str) -> (String, Vec<Anchor>) {
|
||||
let mut anchors = Vec::new();
|
||||
let mut code_block_lang: Option<String> = None;
|
||||
let mut code_block_content = String::new();
|
||||
let mut in_code_block = false;
|
||||
|
||||
// Image alt text accumulation state
|
||||
let mut image_alt_content: Option<String> = None;
|
||||
@@ -54,9 +55,10 @@ pub fn markdown_to_html(markdown: &str) -> (String, Vec<Anchor>) {
|
||||
}
|
||||
CodeBlockKind::Indented => None,
|
||||
};
|
||||
in_code_block = true;
|
||||
code_block_content.clear();
|
||||
}
|
||||
Event::Text(text) if code_block_lang.is_some() => {
|
||||
Event::Text(text) if in_code_block => {
|
||||
// Accumulate code block content
|
||||
code_block_content.push_str(&text);
|
||||
}
|
||||
@@ -100,13 +102,14 @@ pub fn markdown_to_html(markdown: &str) -> (String, Vec<Anchor>) {
|
||||
} else {
|
||||
html_output.push('>');
|
||||
}
|
||||
html_output.push_str(&html_escape(&code_block_content));
|
||||
html_output.push_str(&code_escape(&code_block_content));
|
||||
}
|
||||
|
||||
html_output.push_str("</code></pre>\n");
|
||||
}
|
||||
|
||||
code_block_lang = None;
|
||||
in_code_block = false;
|
||||
code_block_content.clear();
|
||||
}
|
||||
Event::Text(text) if heading_level.is_some() => {
|
||||
@@ -283,12 +286,12 @@ fn start_tag_to_html(tag: &Tag) -> String {
|
||||
dest_url, title, ..
|
||||
} => {
|
||||
if title.is_empty() {
|
||||
format!("<a href=\"{}\">", html_escape(&dest_url))
|
||||
format!("<a href=\"{}\">", html_escape(dest_url))
|
||||
} else {
|
||||
format!(
|
||||
"<a href=\"{}\" title=\"{}\">",
|
||||
html_escape(&dest_url),
|
||||
html_escape(&title)
|
||||
html_escape(dest_url),
|
||||
html_escape(title)
|
||||
)
|
||||
}
|
||||
}
|
||||
@@ -496,4 +499,24 @@ Config details.
|
||||
"special chars in src should be escaped"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_unlabeled_code_block_preserves_quotes() {
|
||||
// Code block without language specifier should preserve quotes
|
||||
let md = "```\nContent-Security-Policy: default-src 'self';\n```";
|
||||
let (html, _) = markdown_to_html(md);
|
||||
|
||||
// Should be inside <pre><code>
|
||||
assert!(html.contains("<pre><code>"), "should have code block");
|
||||
// Quotes should NOT be escaped (only <, >, & need escaping in code)
|
||||
assert!(
|
||||
html.contains("'self'"),
|
||||
"single quotes should be preserved in code blocks"
|
||||
);
|
||||
// Should NOT have escaped quotes
|
||||
assert!(
|
||||
!html.contains("'"),
|
||||
"quotes should not be HTML-escaped in code blocks"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user