Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions crates/mdbook-html/src/html/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,28 @@ if (3 < 5 > 10)
assert_eq!(output, script);
}

#[test]
fn parse_html_svg_with_xml_decl() {
let html = r#"<svg xmlns="http://www.w3.org/2000/svg"><?xml version="1.0"?><rect/></svg>"#;
let ts = parse_html(html);
for t in &ts {
if let Token::ParseError(e) = t {
panic!("unexpected parse error: {e:?}");
}
}
}

#[test]
fn parse_html_pre_with_svg_xml_decl() {
let html = r#"<pre><?xml version="1.0" encoding="utf-8"?><svg xmlns="http://www.w3.org/2000/svg"><rect/></svg></pre>"#;
let ts = parse_html(html);
for t in &ts {
if let Token::ParseError(e) = t {
panic!("unexpected parse error: {e:?}");
}
}
}

// What happens if a script doesn't end.
#[test]
fn parse_html_script_unclosed() {
Expand Down
34 changes: 34 additions & 0 deletions crates/mdbook-html/src/html/tokenizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ use html5ever::tokenizer::states::RawKind;
use html5ever::tokenizer::{
BufferQueue, TagKind, Token, TokenSink, TokenSinkResult, Tokenizer, TokenizerOpts,
};
use std::borrow::Cow;
use std::cell::RefCell;

/// Collector for HTML tokens.
Expand Down Expand Up @@ -45,6 +46,15 @@ impl TokenSink for TokenCollector {
TagKind::EndTag => {}
}
}
if tag_name == b"svg" {
match tag.kind {
TagKind::StartTag => {
self.tokens.borrow_mut().push(token);
return TokenSinkResult::RawData(RawKind::Rawtext);
}
TagKind::EndTag => {}
}
}
self.tokens.borrow_mut().push(token);
}
Token::CommentToken(_) => {
Expand All @@ -63,8 +73,32 @@ impl TokenSink for TokenCollector {
}
}

/// Strips XML processing instructions (e.g. `<?xml ...?>`) that are invalid in HTML
/// but commonly appear in inline SVG emitted by preprocessors.
fn strip_xml_processing_instructions(html: &str) -> Cow<'_, str> {
let mut out = String::new();
let mut rest = html;
let mut changed = false;
while let Some(start) = rest.find("<?") {
changed = true;
out.push_str(&rest[..start]);
let after = &rest[start + 2..];
let Some(end) = after.find("?>") else {
out.push_str(&rest[start..]);
return Cow::Owned(out);
};
rest = &after[end + 2..];
}
if !changed {
return Cow::Borrowed(html);
}
out.push_str(rest);
Cow::Owned(out)
}

/// Parse HTML into tokens.
pub(crate) fn parse_html(html: &str) -> Vec<Token> {
let html = strip_xml_processing_instructions(html);
let tendril: ByteTendril = html.as_bytes().into();
let mut queue = BufferQueue::default();
queue.push_back(tendril.try_reinterpret().unwrap());
Expand Down
2 changes: 1 addition & 1 deletion crates/mdbook-html/src/html/tree.rs
Original file line number Diff line number Diff line change
Expand Up @@ -662,7 +662,7 @@ where
/// Adds an open HTML tag.
fn start_html_tag(&mut self, tag: html5ever::tokenizer::Tag, is_raw: &mut bool) {
let is_closed = is_void_element(&tag.name) || tag.self_closing;
*is_raw = matches!(&*tag.name, "script" | "style");
*is_raw = matches!(&*tag.name, "script" | "style" | "svg");
let name = QualName::new(None, html5ever::ns!(html), tag.name);
let attrs = tag
.attrs
Expand Down
Loading