feat(sitemap): add XML sitemap generation module
Implement src/sitemap.rs with generate_sitemap() for SEO-compliant XML sitemap generation. Follows the feed.rs pattern: - SitemapEntry struct for URL metadata - build_sitemap_xml() for XML construction - xml_escape() for special character handling - 5 unit tests covering single/multiple entries, lastmod, escaping Module declared in main.rs but not yet integrated into pipeline.
This commit is contained in:
@@ -46,6 +46,7 @@
|
|||||||
pkgs.taplo
|
pkgs.taplo
|
||||||
pkgs.pkg-config
|
pkgs.pkg-config
|
||||||
pkgs.nixfmt
|
pkgs.nixfmt
|
||||||
|
pkgs.nodePackages.prettier
|
||||||
pkgs.miniserve # Dev server for testing
|
pkgs.miniserve # Dev server for testing
|
||||||
]
|
]
|
||||||
++ pkgs.lib.optionals pkgs.stdenv.isDarwin [
|
++ pkgs.lib.optionals pkgs.stdenv.isDarwin [
|
||||||
|
|||||||
@@ -11,6 +11,7 @@ mod highlight;
|
|||||||
mod math;
|
mod math;
|
||||||
mod mermaid;
|
mod mermaid;
|
||||||
mod render;
|
mod render;
|
||||||
|
mod sitemap;
|
||||||
mod template_engine;
|
mod template_engine;
|
||||||
|
|
||||||
use crate::content::{discover_nav, discover_sections, Content, ContentKind, NavItem};
|
use crate::content::{discover_nav, discover_sections, Content, ContentKind, NavItem};
|
||||||
|
|||||||
185
src/sitemap.rs
Normal file
185
src/sitemap.rs
Normal file
@@ -0,0 +1,185 @@
|
|||||||
|
//! XML sitemap generation for SEO.
|
||||||
|
|
||||||
|
use crate::config::SiteConfig;
|
||||||
|
use crate::content::{Content, Section};
|
||||||
|
use std::path::Path;
|
||||||
|
|
||||||
|
/// A URL entry for the sitemap.
|
||||||
|
pub struct SitemapEntry {
|
||||||
|
/// Absolute URL (e.g., "https://example.com/blog/post.html")
|
||||||
|
pub loc: String,
|
||||||
|
/// Optional last modification date in W3C format (YYYY-MM-DD)
|
||||||
|
pub lastmod: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Generate an XML sitemap from discovered content.
|
||||||
|
///
|
||||||
|
/// Includes:
|
||||||
|
/// - Homepage
|
||||||
|
/// - Section indices
|
||||||
|
/// - Section items (posts, projects, etc.)
|
||||||
|
/// - Standalone pages
|
||||||
|
pub fn generate_sitemap(
|
||||||
|
sections: &[Section],
|
||||||
|
pages: &[Content],
|
||||||
|
config: &SiteConfig,
|
||||||
|
content_root: &Path,
|
||||||
|
) -> String {
|
||||||
|
let base_url = config.base_url.trim_end_matches('/');
|
||||||
|
let mut entries = Vec::new();
|
||||||
|
|
||||||
|
// Homepage
|
||||||
|
entries.push(SitemapEntry {
|
||||||
|
loc: format!("{}/index.html", base_url),
|
||||||
|
lastmod: None,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Sections and their items
|
||||||
|
for section in sections {
|
||||||
|
// Section index
|
||||||
|
entries.push(SitemapEntry {
|
||||||
|
loc: format!("{}/{}/index.html", base_url, section.name),
|
||||||
|
lastmod: section.index.frontmatter.date.clone(),
|
||||||
|
});
|
||||||
|
|
||||||
|
// Section items
|
||||||
|
if let Ok(items) = section.collect_items() {
|
||||||
|
for item in items {
|
||||||
|
let relative_path = item.output_path(content_root);
|
||||||
|
entries.push(SitemapEntry {
|
||||||
|
loc: format!("{}/{}", base_url, relative_path.display()),
|
||||||
|
lastmod: item.frontmatter.date.clone(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Standalone pages
|
||||||
|
for page in pages {
|
||||||
|
let relative_path = page.output_path(content_root);
|
||||||
|
entries.push(SitemapEntry {
|
||||||
|
loc: format!("{}/{}", base_url, relative_path.display()),
|
||||||
|
lastmod: page.frontmatter.date.clone(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
build_sitemap_xml(&entries)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Build the XML sitemap string from entries.
|
||||||
|
fn build_sitemap_xml(entries: &[SitemapEntry]) -> String {
|
||||||
|
let mut urls = String::new();
|
||||||
|
|
||||||
|
for entry in entries {
|
||||||
|
urls.push_str(" <url>\n");
|
||||||
|
urls.push_str(&format!(" <loc>{}</loc>\n", xml_escape(&entry.loc)));
|
||||||
|
if let Some(ref date) = entry.lastmod {
|
||||||
|
urls.push_str(&format!(" <lastmod>{}</lastmod>\n", xml_escape(date)));
|
||||||
|
}
|
||||||
|
urls.push_str(" </url>\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
format!(
|
||||||
|
r#"<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
||||||
|
{}
|
||||||
|
</urlset>
|
||||||
|
"#,
|
||||||
|
urls.trim_end()
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Escape XML special characters.
|
||||||
|
fn xml_escape(s: &str) -> String {
|
||||||
|
s.replace('&', "&")
|
||||||
|
.replace('<', "<")
|
||||||
|
.replace('>', ">")
|
||||||
|
.replace('"', """)
|
||||||
|
.replace('\'', "'")
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_xml_escape() {
|
||||||
|
assert_eq!(xml_escape("Hello & World"), "Hello & World");
|
||||||
|
assert_eq!(xml_escape("<tag>"), "<tag>");
|
||||||
|
assert_eq!(xml_escape("\"quoted\""), ""quoted"");
|
||||||
|
assert_eq!(xml_escape("it's"), "it's");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_build_sitemap_xml_single_entry() {
|
||||||
|
let entries = vec![SitemapEntry {
|
||||||
|
loc: "https://example.com/index.html".to_string(),
|
||||||
|
lastmod: None,
|
||||||
|
}];
|
||||||
|
|
||||||
|
let xml = build_sitemap_xml(&entries);
|
||||||
|
|
||||||
|
assert!(xml.starts_with(r#"<?xml version="1.0" encoding="utf-8"?>"#));
|
||||||
|
assert!(xml.contains("<urlset xmlns="));
|
||||||
|
assert!(xml.contains("<url>"));
|
||||||
|
assert!(xml.contains("<loc>https://example.com/index.html</loc>"));
|
||||||
|
assert!(xml.contains("</urlset>"));
|
||||||
|
assert!(!xml.contains("<lastmod>")); // No lastmod when None
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_build_sitemap_xml_with_lastmod() {
|
||||||
|
let entries = vec![SitemapEntry {
|
||||||
|
loc: "https://example.com/blog/post.html".to_string(),
|
||||||
|
lastmod: Some("2026-01-31".to_string()),
|
||||||
|
}];
|
||||||
|
|
||||||
|
let xml = build_sitemap_xml(&entries);
|
||||||
|
|
||||||
|
assert!(xml.contains("<loc>https://example.com/blog/post.html</loc>"));
|
||||||
|
assert!(xml.contains("<lastmod>2026-01-31</lastmod>"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_build_sitemap_xml_multiple_entries() {
|
||||||
|
let entries = vec![
|
||||||
|
SitemapEntry {
|
||||||
|
loc: "https://example.com/index.html".to_string(),
|
||||||
|
lastmod: None,
|
||||||
|
},
|
||||||
|
SitemapEntry {
|
||||||
|
loc: "https://example.com/about.html".to_string(),
|
||||||
|
lastmod: Some("2026-01-15".to_string()),
|
||||||
|
},
|
||||||
|
SitemapEntry {
|
||||||
|
loc: "https://example.com/blog/index.html".to_string(),
|
||||||
|
lastmod: None,
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
let xml = build_sitemap_xml(&entries);
|
||||||
|
|
||||||
|
// Count url elements
|
||||||
|
let url_count = xml.matches("<url>").count();
|
||||||
|
assert_eq!(url_count, 3);
|
||||||
|
|
||||||
|
// Verify all URLs present
|
||||||
|
assert!(xml.contains("https://example.com/index.html"));
|
||||||
|
assert!(xml.contains("https://example.com/about.html"));
|
||||||
|
assert!(xml.contains("https://example.com/blog/index.html"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_build_sitemap_xml_escapes_special_chars() {
|
||||||
|
let entries = vec![SitemapEntry {
|
||||||
|
loc: "https://example.com/search?q=foo&bar=baz".to_string(),
|
||||||
|
lastmod: None,
|
||||||
|
}];
|
||||||
|
|
||||||
|
let xml = build_sitemap_xml(&entries);
|
||||||
|
|
||||||
|
// & should be escaped
|
||||||
|
assert!(xml.contains("&"));
|
||||||
|
assert!(!xml.contains("?q=foo&bar")); // Raw & should not appear
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user