diff --git a/flake.nix b/flake.nix index b01b930..719cff7 100644 --- a/flake.nix +++ b/flake.nix @@ -46,6 +46,7 @@ pkgs.taplo pkgs.pkg-config pkgs.nixfmt + pkgs.nodePackages.prettier pkgs.miniserve # Dev server for testing ] ++ pkgs.lib.optionals pkgs.stdenv.isDarwin [ diff --git a/src/main.rs b/src/main.rs index 80be117..8fe3d17 100644 --- a/src/main.rs +++ b/src/main.rs @@ -11,6 +11,7 @@ mod highlight; mod math; mod mermaid; mod render; +mod sitemap; mod template_engine; use crate::content::{discover_nav, discover_sections, Content, ContentKind, NavItem}; diff --git a/src/sitemap.rs b/src/sitemap.rs new file mode 100644 index 0000000..0bd7990 --- /dev/null +++ b/src/sitemap.rs @@ -0,0 +1,185 @@ +//! XML sitemap generation for SEO. + +use crate::config::SiteConfig; +use crate::content::{Content, Section}; +use std::path::Path; + +/// A URL entry for the sitemap. +pub struct SitemapEntry { + /// Absolute URL (e.g., "https://example.com/blog/post.html") + pub loc: String, + /// Optional last modification date in W3C format (YYYY-MM-DD) + pub lastmod: Option, +} + +/// Generate an XML sitemap from discovered content. +/// +/// Includes: +/// - Homepage +/// - Section indices +/// - Section items (posts, projects, etc.) +/// - Standalone pages +pub fn generate_sitemap( + sections: &[Section], + pages: &[Content], + config: &SiteConfig, + content_root: &Path, +) -> String { + let base_url = config.base_url.trim_end_matches('/'); + let mut entries = Vec::new(); + + // Homepage + entries.push(SitemapEntry { + loc: format!("{}/index.html", base_url), + lastmod: None, + }); + + // Sections and their items + for section in sections { + // Section index + entries.push(SitemapEntry { + loc: format!("{}/{}/index.html", base_url, section.name), + lastmod: section.index.frontmatter.date.clone(), + }); + + // Section items + if let Ok(items) = section.collect_items() { + for item in items { + let relative_path = item.output_path(content_root); + entries.push(SitemapEntry { + loc: format!("{}/{}", base_url, relative_path.display()), + lastmod: item.frontmatter.date.clone(), + }); + } + } + } + + // Standalone pages + for page in pages { + let relative_path = page.output_path(content_root); + entries.push(SitemapEntry { + loc: format!("{}/{}", base_url, relative_path.display()), + lastmod: page.frontmatter.date.clone(), + }); + } + + build_sitemap_xml(&entries) +} + +/// Build the XML sitemap string from entries. +fn build_sitemap_xml(entries: &[SitemapEntry]) -> String { + let mut urls = String::new(); + + for entry in entries { + urls.push_str(" \n"); + urls.push_str(&format!(" {}\n", xml_escape(&entry.loc))); + if let Some(ref date) = entry.lastmod { + urls.push_str(&format!(" {}\n", xml_escape(date))); + } + urls.push_str(" \n"); + } + + format!( + r#" + +{} + +"#, + urls.trim_end() + ) +} + +/// Escape XML special characters. +fn xml_escape(s: &str) -> String { + s.replace('&', "&") + .replace('<', "<") + .replace('>', ">") + .replace('"', """) + .replace('\'', "'") +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_xml_escape() { + assert_eq!(xml_escape("Hello & World"), "Hello & World"); + assert_eq!(xml_escape(""), "<tag>"); + assert_eq!(xml_escape("\"quoted\""), ""quoted""); + assert_eq!(xml_escape("it's"), "it's"); + } + + #[test] + fn test_build_sitemap_xml_single_entry() { + let entries = vec![SitemapEntry { + loc: "https://example.com/index.html".to_string(), + lastmod: None, + }]; + + let xml = build_sitemap_xml(&entries); + + assert!(xml.starts_with(r#""#)); + assert!(xml.contains("")); + assert!(xml.contains("https://example.com/index.html")); + assert!(xml.contains("")); + assert!(!xml.contains("")); // No lastmod when None + } + + #[test] + fn test_build_sitemap_xml_with_lastmod() { + let entries = vec![SitemapEntry { + loc: "https://example.com/blog/post.html".to_string(), + lastmod: Some("2026-01-31".to_string()), + }]; + + let xml = build_sitemap_xml(&entries); + + assert!(xml.contains("https://example.com/blog/post.html")); + assert!(xml.contains("2026-01-31")); + } + + #[test] + fn test_build_sitemap_xml_multiple_entries() { + let entries = vec![ + SitemapEntry { + loc: "https://example.com/index.html".to_string(), + lastmod: None, + }, + SitemapEntry { + loc: "https://example.com/about.html".to_string(), + lastmod: Some("2026-01-15".to_string()), + }, + SitemapEntry { + loc: "https://example.com/blog/index.html".to_string(), + lastmod: None, + }, + ]; + + let xml = build_sitemap_xml(&entries); + + // Count url elements + let url_count = xml.matches("").count(); + assert_eq!(url_count, 3); + + // Verify all URLs present + assert!(xml.contains("https://example.com/index.html")); + assert!(xml.contains("https://example.com/about.html")); + assert!(xml.contains("https://example.com/blog/index.html")); + } + + #[test] + fn test_build_sitemap_xml_escapes_special_chars() { + let entries = vec![SitemapEntry { + loc: "https://example.com/search?q=foo&bar=baz".to_string(), + lastmod: None, + }]; + + let xml = build_sitemap_xml(&entries); + + // & should be escaped + assert!(xml.contains("&")); + assert!(!xml.contains("?q=foo&bar")); // Raw & should not appear + } +}