Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
e60a49f
[bugfix] Fix serialization parameter handling for W3C compliance
joewiz Apr 4, 2026
7897966
[feature] Improve XML serialization for W3C compliance
joewiz Apr 4, 2026
734455b
[feature] Improve XHTML serialization for W3C compliance
joewiz Apr 4, 2026
0204da6
[feature] Fix HTML5/XHTML5 fragment and DOCTYPE serialization
joewiz Apr 4, 2026
7661d8c
[feature] Improve JSON and adaptive serialization for W3C compliance
joewiz Apr 4, 2026
ce52e24
[feature] Improve XQuerySerializer for W3C serialization compliance
joewiz Apr 4, 2026
062371f
[feature] Support XML 1.1 namespace undeclaration in element construc…
joewiz Apr 4, 2026
1145d00
[feature] Implement parameter-document serialization parameter
joewiz Apr 4, 2026
b5b3e2e
[bugfix] Fix URL rewrite view pipeline for XHTML-serialized HTML docu…
joewiz Mar 31, 2026
55d0a6c
[test] Add URL rewrite view pipeline regression test
joewiz Mar 31, 2026
d7d7000
[bugfix] Fix xmlns="" undeclaration via proper namespace stack in XML…
joewiz Apr 8, 2026
e226fc9
[bugfix] Fix fn:xml-to-json namespace validation and resolve Codacy i…
joewiz Apr 22, 2026
e231851
[bugfix] Fix Integer reference comparison in FunXmlToJson map stack
joewiz Apr 26, 2026
0a94a26
[optimize] Bulk-write HTML/XML serialization and raw-text fast path
joewiz Apr 26, 2026
fb3fb03
[optimize] Attribute prefix coalescence in XMLWriter
joewiz Apr 26, 2026
5d8f9ec
[refactor] Java modernization per review: switch expressions, text bl…
joewiz Apr 27, 2026
6f39a35
[refactor] Suppress NPath warnings on serializer methods
joewiz Apr 28, 2026
49c4c03
[refactor] Remove proactive PMD.NPathComplexity suppressions
joewiz Apr 29, 2026
7eb995d
[refactor] Deduplicate writeCharSeq, document charBuffer thread-safety
joewiz Apr 29, 2026
2945ddf
[refactor] Convert XMLWriter switches to arrow syntax per review
joewiz Apr 30, 2026
47c0859
[bugfix] Spec-compliant DOCTYPE for XHTML/HTML serialization
joewiz Apr 30, 2026
964d131
[bugfix] Resolve cdata-section-elements prefixes via static namespaces
joewiz Apr 30, 2026
38a9130
[bugfix] HTML method PI serialization per W3C 3.1 and HTML5 PR2372
joewiz Apr 30, 2026
7a0b81b
[bugfix] Suppress duplicate Content-Type/charset meta in HTML/XHTML head
joewiz Apr 30, 2026
59c9eb2
[bugfix] HTML method-html serialization fixes for QT4 conformance
joewiz Apr 30, 2026
7e0c412
[refactor] Address PMD warnings in HTML5Writer and XHTMLWriter
joewiz Apr 30, 2026
b6c8d4f
[test] Update HTML5 serialize asserts to use short <meta charset>
joewiz May 11, 2026
ece2857
[refactor] FunXmlToJson: convert two switch blocks to arrow syntax
joewiz May 11, 2026
f114d0b
[test] HtmlSerializerBenchmark: document no-op writer/stream stubs
joewiz May 11, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions exist-core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -1206,6 +1206,7 @@ The BaseX Team. The original license statement is also included below.]]></pream
<log4j.configurationFile>${project.build.testOutputDirectory}/log4j2.xml</log4j.configurationFile>
</systemPropertyVariables>

<forkedProcessTimeoutInSeconds>180</forkedProcessTimeoutInSeconds>
<excludes>

<!-- NOTE: these can still exhibit deadlocks
Expand All @@ -1224,6 +1225,14 @@ The BaseX Team. The original license statement is also included below.]]></pream
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-failsafe-plugin</artifactId>
<configuration>
<forkedProcessTimeoutInSeconds>180</forkedProcessTimeoutInSeconds>
<excludes>
<!-- Pre-existing deadlocks during BrokerPool initialization -->
<!-- see https://github.com/eXist-db/exist/issues/4140 -->
<!-- see https://github.com/eXist-db/exist/issues/3685 -->
<exclude>org.exist.storage.lock.DeadlockIT</exclude>
<exclude>org.exist.xmldb.RemoveCollectionIT</exclude>
</excludes>
<argLine>@{jacocoArgLine} --add-modules jdk.incubator.vector --enable-native-access=ALL-UNNAMED -Dfile.encoding=${project.build.sourceEncoding} -Dexist.recovery.progressbar.hide=true</argLine>
<systemPropertyVariables>
<jetty.home>${project.basedir}/../exist-jetty-config/target/classes/org/exist/jetty</jetty.home>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,11 @@ public class EXistOutputKeys {
*/
public static final String ITEM_SEPARATOR = "item-separator";

// --- QT4 Serialization 4.0 parameters ---
public static final String CANONICAL = "canonical";
public static final String ESCAPE_SOLIDUS = "escape-solidus";
public static final String JSON_LINES = "json-lines";

public static final String OMIT_ORIGINAL_XML_DECLARATION = "omit-original-xml-declaration";

public static final String OUTPUT_DOCTYPE = "output-doctype";
Expand Down
13 changes: 13 additions & 0 deletions exist-core/src/main/java/org/exist/util/CharSlice.java
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,19 @@ public void copyTo(final char[] destination, final int destOffset) {
public void write(final Writer writer) throws java.io.IOException {
writer.write(array, offset, len);
}

/**
* Write a sub-range of this slice to a writer using a single bulk
* {@link Writer#write(char[], int, int)} call.
*
* @param writer the writer
* @param start the start index within this slice (inclusive)
* @param length the number of characters to write
* @throws java.io.IOException if an error occurs whilst writing
*/
public void write(final Writer writer, final int start, final int length) throws java.io.IOException {
writer.write(array, offset + start, length);
}
}

//
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,13 +81,27 @@ protected SerializerWriter getDefaultWriter() {
public void setOutput(Writer writer, Properties properties) {
outputProperties = Objects.requireNonNullElseGet(properties, () -> new Properties(defaultProperties));
final String method = outputProperties.getProperty(OutputKeys.METHOD, "xml");
final String htmlVersionProp = outputProperties.getProperty(EXistOutputKeys.HTML_VERSION, "1.0");

// For html/xhtml methods, determine HTML version:
// 1. Use html-version if explicitly set
// 2. Otherwise use version (W3C spec: version controls HTML version for html method)
// 3. Default to 5.0
double htmlVersion;
try {
htmlVersion = Double.parseDouble(htmlVersionProp);
} catch (NumberFormatException e) {
htmlVersion = 1.0;
final String explicitHtmlVersion = outputProperties.getProperty(EXistOutputKeys.HTML_VERSION);
if (explicitHtmlVersion != null) {
try {
htmlVersion = Double.parseDouble(explicitHtmlVersion);
} catch (NumberFormatException e) {
htmlVersion = 5.0;
}
} else if (("html".equalsIgnoreCase(method) || "xhtml".equalsIgnoreCase(method))
&& outputProperties.getProperty(OutputKeys.VERSION) != null) {
try {
htmlVersion = Double.parseDouble(outputProperties.getProperty(OutputKeys.VERSION));
} catch (NumberFormatException e) {
htmlVersion = 5.0;
}
} else {
htmlVersion = 5.0;
}

final SerializerWriter baseSerializerWriter = getBaseSerializerWriter(method, htmlVersion);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -190,10 +190,15 @@ private void writeAtomic(AtomicValue value) throws IOException, SAXException, XP
}

private void writeDouble(final DoubleValue item) throws SAXException {
final DecimalFormatSymbols symbols = DecimalFormatSymbols.getInstance(Locale.US);
symbols.setExponentSeparator("e");
final DecimalFormat df = new DecimalFormat("0.0##########################E0", symbols);
writeText(df.format(item.getDouble()));
final double d = item.getDouble();
if (Double.isInfinite(d) || Double.isNaN(d)) {
writeText(item.getStringValue());
} else {
final DecimalFormatSymbols symbols = DecimalFormatSymbols.getInstance(Locale.US);
symbols.setExponentSeparator("e");
final DecimalFormat df = new DecimalFormat("0.0##########################E0", symbols);
writeText(df.format(d));
}
}

private void writeArray(final ArrayType array) throws XPathException, SAXException, TransformerException {
Expand All @@ -215,9 +220,7 @@ private void writeArray(final ArrayType array) throws XPathException, SAXExcepti

private void writeMap(final AbstractMapType map) throws SAXException, XPathException, TransformerException {
try {
writer.write("map");
addSpaceIfIndent();
writer.write('{');
writer.write("map{");
addIndent();
indent();
for (final Iterator<IEntry<AtomicValue, Sequence>> i = map.iterator(); i.hasNext(); ) {
Expand Down
160 changes: 146 additions & 14 deletions exist-core/src/main/java/org/exist/util/serializer/HTML5Writer.java
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,13 @@ public class HTML5Writer extends XHTML5Writer {
BOOLEAN_ATTRIBUTE_NAMES.add("willValidate");
}

private static final ObjectSet<String> BOOLEAN_ATTRIBUTE_NAMES_LOWER = new ObjectOpenHashSet<>(BOOLEAN_ATTRIBUTE_NAMES.size());
static {
for (final String n : BOOLEAN_ATTRIBUTE_NAMES) {
BOOLEAN_ATTRIBUTE_NAMES_LOWER.add(n.toLowerCase(java.util.Locale.ROOT));
}
}

private static final ObjectSet<String> EMPTY_TAGS = new ObjectOpenHashSet<>(31);
static {
EMPTY_TAGS.add("area");
Expand Down Expand Up @@ -156,8 +163,15 @@ public void endElement(QName qname) throws TransformerException {
if (!isEmptyTag(qname.getLocalPart())) {
super.endElement(qname);
} else {
// HTML5 omits the close tag for void elements; we still need to
// honor the meta-in-head dedup that XHTMLWriter sets up at startElement
// time. Capture the buffered-meta flag before closeStartTag flips state.
final boolean wasBufferedMeta = isBufferedMeta(qname.getLocalPart());
closeStartTag(true);
endIndent(qname.getNamespaceURI(), qname.getLocalPart());
if (wasBufferedMeta) {
endMetaBuffer();
}
}
}

Expand All @@ -166,24 +180,33 @@ public void endElement(String namespaceURI, String localName, String qname) thro
if (!isEmptyTag(localName)) {
super.endElement(namespaceURI, localName, qname);
} else {
final boolean wasBufferedMeta = isBufferedMeta(localName);
closeStartTag(true);
endIndent(namespaceURI, localName);
if (wasBufferedMeta) {
endMetaBuffer();
}
}
}

@Override
public void attribute(String qname, CharSequence value) throws TransformerException {
// Strip prefix for the meta-dedup redundancy check
final int colon = qname.indexOf(':');
final String localName = colon < 0 ? qname : qname.substring(colon + 1);
noteMetaAttribute(localName, value);
final CharSequence effectiveValue = maybeEscapeUriHtml5(localName, value);
try {
if(!tagIsOpen) {
characters(value);
characters(effectiveValue);
return;
}
final Writer writer = getWriter();
writer.write(' ');
writer.write(qname);
if (!(BOOLEAN_ATTRIBUTE_NAMES.contains(qname) && qname.contentEquals(value))) {
if (!isBooleanAttributeMatch(qname, effectiveValue)) {
writer.write("=\"");
writeChars(value, true);
writeChars(effectiveValue, true);
writer.write('"');
}
} catch(final IOException ioe) {
Expand All @@ -193,9 +216,12 @@ public void attribute(String qname, CharSequence value) throws TransformerExcept

@Override
public void attribute(QName qname, CharSequence value) throws TransformerException {
noteMetaAttribute(qname.getLocalPart(), value);
final String localPart = qname.getLocalPart();
final CharSequence effectiveValue = maybeEscapeUriHtml5(localPart, value);
try {
if(!tagIsOpen) {
characters(value);
characters(effectiveValue);
return;
// throw new TransformerException("Found an attribute outside an
// element");
Expand All @@ -206,38 +232,87 @@ public void attribute(QName qname, CharSequence value) throws TransformerExcepti
writer.write(qname.getPrefix());
writer.write(':');
}
final String localPart = qname.getLocalPart();
writer.write(localPart);
if (!(BOOLEAN_ATTRIBUTE_NAMES.contains(localPart) && localPart.contentEquals(value))) {
if (!isBooleanAttributeMatch(localPart, effectiveValue)) {
writer.write("=\"");
writeChars(value, true);
writeChars(effectiveValue, true);
writer.write('"');
}
} catch(final IOException ioe) {
throw new TransformerException(ioe.getMessage(), ioe);
}
}

/**
* URI-attribute escaping for the HTML5 writer. Mirrors
* {@link XHTMLWriter#shouldEscapeUriAttribute(String, String)} but unwraps
* the prefixed form of {@link #currentTag} so the (element, attribute)
* lookup uses local names only.
*/
private CharSequence maybeEscapeUriHtml5(final String attrLocal, final CharSequence value) {
if (currentTag == null) {
return value;
}
final String elementLocal = currentTag.contains(":")
? currentTag.substring(currentTag.indexOf(':') + 1)
: currentTag;
if (!shouldEscapeUriAttribute(elementLocal, attrLocal)) {
return value;
}
return escapeUriAttribute(value);
}

/**
* HTML5 boolean attribute minimization: emit just the bare name when the
* value is empty or matches the attribute name case-insensitively
* (per W3C XSLT/XQuery Serialization 3.1, section 7.2.2).
*/
private static boolean isBooleanAttributeMatch(final String name, final CharSequence value) {
if (!BOOLEAN_ATTRIBUTE_NAMES_LOWER.contains(name.toLowerCase(java.util.Locale.ROOT))) {
return false;
}
if (value == null || value.length() == 0) {
return true;
}
return name.equalsIgnoreCase(value.toString());
}

@Override
public void namespace(String prefix, String nsURI) throws TransformerException {
// no namespaces allowed in HTML5
// HTML5 elements never carry an explicit xmlns since the parser puts
// them in the HTML namespace implicitly. Foreign content (anything
// outside the XHTML namespace, e.g. SVG, MathML, custom XML) keeps
// its namespace declarations so the receiver can re-parse it as XML.
if (nsURI == null || nsURI.isEmpty()) {
return;
}
if (org.exist.Namespaces.XHTML_NS.equals(nsURI)) {
return;
}
super.namespace(prefix, nsURI);
}

@Override
protected void closeStartTag(boolean isEmpty) throws TransformerException {
try {
if (tagIsOpen) {
final Writer w = getWriter();
if (isEmpty) {
if (isEmptyTag(currentTag)) {
getWriter().write(">");
w.write('>');
} else if (isForeignContent()) {
// Foreign content (SVG, MathML, custom XML namespace)
// embedded in HTML5 is serialized with XML self-close
// syntax so the receiver can re-parse it as XML.
w.write("/>");
} else {
getWriter().write('>');
getWriter().write("</");
getWriter().write(currentTag);
getWriter().write('>');
// Coalesce ">", "</", tag, ">" into 2 writer calls instead of 4
w.write("></");
w.write(currentTag);
w.write('>');
}
} else {
getWriter().write('>');
w.write('>');
}
tagIsOpen = false;
}
Expand All @@ -246,11 +321,68 @@ protected void closeStartTag(boolean isEmpty) throws TransformerException {
}
}

/**
* The current element is "foreign content" when its namespace is neither
* the XHTML namespace nor the empty (no-namespace) HTML namespace; that
* is the trigger for XML-style self-closing per HTML5's foreign-content
* serialization rule.
*/
private boolean isForeignContent() {
final String ns = currentElementNamespaceURI();
return ns != null && !ns.isEmpty() && !org.exist.Namespaces.XHTML_NS.equals(ns);
}

@Override
public void processingInstruction(final String target, final String data) throws TransformerException {
// QT4 PR2372: HTML5 has no PI syntax, so the serializer renders
// processing instructions as comments of the form `<!--?target data?-->`,
// matching the HTML5 parser's coercion of `<?...?>` content.
try {
if (tagIsOpen) {
closeStartTag(false);
}
final Writer writer = getWriter();
writer.write("<!--?");
writer.write(target);
if (data != null && !data.isEmpty()) {
writer.write(' ');
writer.write(data);
}
writer.write("?-->");
} catch (final IOException e) {
throw new TransformerException(e.getMessage(), e);
}
}

@Override
protected boolean needsEscape(char ch) {
if (RAW_TEXT_ELEMENTS.contains(currentTag)) {
return false;
}
return super.needsEscape(ch);
}

@Override
protected boolean needsEscape(final char ch, final boolean inAttribute) {
// In raw text elements (script, style), suppress escaping for TEXT content only.
// Attribute values must always be escaped, even on raw text elements.
if (!inAttribute && RAW_TEXT_ELEMENTS.contains(currentTag)) {
return false;
}
// For attributes, always return true (bypass the 1-arg override
// which returns false for all script/style content)
if (inAttribute) {
return true;
}
return super.needsEscape(ch, inAttribute);
}

@Override
protected boolean needsEscaping(final boolean inAttribute) {
// Mirror the per-char rule above: TEXT content inside script/style is
// raw text and never needs escaping. Lets writeChars() bulk-stream
// the entire block in one Writer.write() call.
return inAttribute || !RAW_TEXT_ELEMENTS.contains(currentTag);
}

}
Loading
Loading