processIntraDocumentXPointer(String xpointer) throws SAXException {
+ if (document == null) {
+ return Optional.of(new ResourceError("No current document for intra-document XPointer reference"));
+ }
+
+ // Convert element() scheme to XPath if needed
+ xpointer = convertXPointerToXPath(xpointer);
+
+ // For absolute XPath expressions (from element() scheme), wrap with doc()
+ // to ensure the document context is properly set
+ final String docUri = document.getURI().toString();
+ if (xpointer.startsWith("/")) {
+ xpointer = "doc('" + docUri + "')" + xpointer;
+ } else if (xpointer.startsWith("id(")) {
+ // id() needs the document context — wrap: doc('...')/id('...')
+ xpointer = "doc('" + docUri + "')/" + xpointer;
+ }
+
+ final XQueryPool pool = serializer.broker.getBrokerPool().getXQueryPool();
+ CompiledXQuery compiled = null;
+ Source source = null;
+ try {
+ xpointer = checkNamespaces(xpointer);
+ source = new StringSource(xpointer);
+ final XQuery xquery = serializer.broker.getBrokerPool().getXQueryService();
+ XQueryContext context;
+ compiled = pool.borrowCompiledXQuery(serializer.broker, source);
+ if (compiled == null) {
+ context = new XQueryContext(serializer.broker.getBrokerPool());
+ } else {
+ context = compiled.getContext();
+ context.prepareForReuse();
+ }
+ if (namespaces != null) {
+ context.declareNamespaces(namespaces);
+ }
+ context.declareNamespace("xinclude", Namespaces.XINCLUDE_NS);
+ // Set the current document as the statically known document
+ context.setStaticallyKnownDocuments(new XmldbURI[]{document.getURI()});
+
+ if (compiled == null) {
+ compiled = xquery.compile(context, source, true);
+ } else {
+ compiled.getContext().updateContext(context);
+ context.getWatchDog().reset();
+ }
+
+ try {
+ final Sequence seq = xquery.execute(serializer.broker, compiled, null);
+ if (Type.subTypeOf(seq.getItemType(), Type.NODE)) {
+ NodeValue node;
+ for (final SequenceIterator i = seq.iterate(); i.hasNext(); ) {
+ node = (NodeValue) i.nextItem();
+ serializer.serializeToReceiver(node, false);
+ }
+ } else {
+ for (int i = 0; i < seq.getItemCount(); i++) {
+ characters(seq.itemAt(i).getStringValue());
+ }
+ }
+ } finally {
+ context.runCleanupTasks();
+ }
+ return Optional.empty();
+ } catch (final XPathException | PermissionDeniedException e) {
+ LOG.warn("intra-document xpointer error", e);
+ throw new SAXException("Error while processing intra-document XPointer: " + e.getMessage(), e);
+ } catch (final IOException e) {
+ throw new SAXException("I/O error while reading intra-document XPointer query: " + e.getMessage(), e);
+ } finally {
+ if (compiled != null) {
+ pool.returnCompiledXQuery(source, compiled);
+ }
+ }
+ }
+
+ /**
+ * Convert XPointer element() scheme to XPath expressions.
+ * The xpointer() scheme is handled natively by the ANTLR parser's xpointer() rule,
+ * so we leave it as-is and only convert element() scheme pointers.
+ *
+ * Handles:
+ * element(/1) -> /node()[1]
+ * element(/1/2/3) -> /node()[1]/node()[2]/node()[3]
+ * element(myid) -> id('myid')
+ * element(myid/2/3) -> id('myid')/node()[2]/node()[3]
+ * xpointer(expr) -> xpointer(expr) (left for ANTLR parser)
+ * xmlns(...)element() -> strips xmlns(), converts element()
+ */
+ private static String convertXPointerToXPath(String xpointer) {
+ xpointer = xpointer.trim();
+
+ // xpointer() scheme — leave as-is; the ANTLR parser's xpointer() rule handles it
+ if (xpointer.startsWith("xpointer(")) {
+ return xpointer;
+ }
+
+ // Handle element() scheme
+ if (xpointer.startsWith("element(") && xpointer.endsWith(")")) {
+ final String content = xpointer.substring(8, xpointer.length() - 1).trim();
+ return convertElementSchemeToXPath(content);
+ }
+
+ // Handle multiple schemes: xmlns(...)element(...)
+ // Strip xmlns() schemes first (handled by checkNamespaces), then look for element()
+ if (xpointer.contains("element(")) {
+ int idx = 0;
+ while (idx < xpointer.length()) {
+ if (xpointer.startsWith("xmlns(", idx)) {
+ final int close = xpointer.indexOf(')', idx);
+ if (close > 0) {
+ idx = close + 1;
+ continue;
+ }
+ }
+ break;
+ }
+ if (idx > 0 && idx < xpointer.length()) {
+ return convertXPointerToXPath(xpointer.substring(idx));
+ }
+ }
+
+ return xpointer;
+ }
+
+ /**
+ * Convert element() scheme content to XPath.
+ * Per XPointer element() scheme spec, child sequences use 1-based
+ * element positions (not node positions), so we use *[N] not node()[N].
+ */
+ private static String convertElementSchemeToXPath(final String content) {
+ if (content.startsWith("/")) {
+ // Child sequence: /1/2/3 -> /*[1]/*[2]/*[3]
+ final String[] parts = content.substring(1).split("/");
+ final StringBuilder xpath = new StringBuilder();
+ for (final String part : parts) {
+ xpath.append("/*[").append(part.trim()).append("]");
+ }
+ return xpath.toString();
+ } else if (content.contains("/")) {
+ // ID + child sequence: myid/2/3 -> id('myid')/*[2]/*[3]
+ final String[] parts = content.split("/");
+ final StringBuilder xpath = new StringBuilder("id('").append(parts[0].trim()).append("')");
+ for (int i = 1; i < parts.length; i++) {
+ xpath.append("/*[").append(parts[i].trim()).append("]");
+ }
+ return xpath.toString();
+ } else {
+ // Just an ID: myid -> id('myid')
+ return "id('" + content.trim() + "')";
+ }
+ }
+
+ /**
+ * Read a resource as text for parse="text" inclusion.
+ *
+ * Per the XInclude spec, when parse="text", the resource is read as plain text
+ * and included as character data. XML special characters in the included text are
+ * preserved as-is (they will be escaped during serialization).
+ *
+ * Architectural note: BaseX delegates XInclude entirely to Java's built-in
+ * SAXParserFactory.setXIncludeAware(true), which handles parse="text" at document
+ * import time. eXist's approach (serialization-time XIncludeFilter) is more powerful
+ * (works on stored documents) but requires implementing each XInclude feature
+ * explicitly. A complementary parse-time XInclude option (like BaseX) could be
+ * added as a future enhancement.
+ */
+ private @Nullable String readResourceAsText(@Nullable final DocumentImpl doc,
+ @Nullable final org.exist.dom.memtree.DocumentImpl memtreeDoc,
+ @Nullable final XmldbURI docUri,
+ final String href,
+ @Nullable final String encoding) {
+ final java.nio.charset.Charset charset;
+ try {
+ charset = encoding != null ? java.nio.charset.Charset.forName(encoding) : UTF_8;
+ } catch (final java.nio.charset.UnsupportedCharsetException e) {
+ LOG.warn("Unsupported encoding '{}' for text inclusion, falling back to UTF-8", encoding);
+ return readResourceAsText(doc, memtreeDoc, docUri, href, null);
+ }
+
+ // Case 1: Binary document in database — read raw bytes
+ if (doc != null && doc.getResourceType() == DocumentImpl.BINARY_FILE) {
+ try (final InputStream is = serializer.broker.getBinaryResource((BinaryDocument) doc)) {
+ return new String(is.readAllBytes(), charset);
+ } catch (final IOException e) {
+ LOG.warn("Error reading binary resource as text: {}", docUri, e);
+ return null;
+ }
+ }
+
+ // Case 2: XML document in database — serialize to string (text representation)
+ if (doc != null) {
+ // For XML documents with parse="text", we serialize the document to its
+ // XML text representation and include that as character data.
+ // Per XInclude spec, the XML declaration is NOT part of the text inclusion.
+ try {
+ final Serializer tempSerializer = serializer.broker.borrowSerializer();
+ try {
+ tempSerializer.setProperty(javax.xml.transform.OutputKeys.OMIT_XML_DECLARATION, "yes");
+ tempSerializer.setProperty(javax.xml.transform.OutputKeys.INDENT, "no");
+ return tempSerializer.serialize(doc);
+ } finally {
+ serializer.broker.returnSerializer(tempSerializer);
+ }
+ } catch (final Exception e) {
+ LOG.warn("Error serializing XML document as text: {}", docUri, e);
+ return null;
+ }
+ }
+
+ // Case 3: In-memory document
+ if (memtreeDoc != null) {
+ try {
+ final Serializer tempSerializer = serializer.broker.borrowSerializer();
+ try {
+ tempSerializer.setProperty(javax.xml.transform.OutputKeys.OMIT_XML_DECLARATION, "yes");
+ tempSerializer.setProperty(javax.xml.transform.OutputKeys.INDENT, "no");
+ return tempSerializer.serialize(memtreeDoc);
+ } finally {
+ serializer.broker.returnSerializer(tempSerializer);
+ }
+ } catch (final Exception e) {
+ LOG.warn("Error serializing in-memory document as text: {}", href, e);
+ return null;
+ }
+ }
+
+ // Case 4: External URI — read from URL
+ try {
+ final URI externalUri = new URI(href);
+ final URLConnection con = externalUri.toURL().openConnection();
+ try (final InputStream is = con.getInputStream()) {
+ return new String(is.readAllBytes(), charset);
+ }
+ } catch (final Exception e) {
+ LOG.warn("Error reading external resource as text: {}", href, e);
+ return null;
+ }
+ }
+
private Either parseExternal(final URI externalUri) throws ParserConfigurationException, SAXException {
try {
final URLConnection con = externalUri.toURL().openConnection();
diff --git a/exist-core/src/main/java/org/exist/util/CharSlice.java b/exist-core/src/main/java/org/exist/util/CharSlice.java
index ff2ac84d8da..c6b578b7387 100644
--- a/exist-core/src/main/java/org/exist/util/CharSlice.java
+++ b/exist-core/src/main/java/org/exist/util/CharSlice.java
@@ -196,6 +196,19 @@ public void copyTo(final char[] destination, final int destOffset) {
public void write(final Writer writer) throws java.io.IOException {
writer.write(array, offset, len);
}
+
+ /**
+ * Write a sub-range of this slice to a writer using a single bulk
+ * {@link Writer#write(char[], int, int)} call.
+ *
+ * @param writer the writer
+ * @param start the start index within this slice (inclusive)
+ * @param length the number of characters to write
+ * @throws java.io.IOException if an error occurs whilst writing
+ */
+ public void write(final Writer writer, final int start, final int length) throws java.io.IOException {
+ writer.write(array, offset + start, length);
+ }
}
//
diff --git a/exist-core/src/main/java/org/exist/util/Collations.java b/exist-core/src/main/java/org/exist/util/Collations.java
index 2d03138a291..64183619e30 100644
--- a/exist-core/src/main/java/org/exist/util/Collations.java
+++ b/exist-core/src/main/java/org/exist/util/Collations.java
@@ -75,6 +75,11 @@ public class Collations {
*/
public final static String HTML_ASCII_CASE_INSENSITIVE_COLLATION_URI = "http://www.w3.org/2005/xpath-functions/collation/html-ascii-case-insensitive";
+ /**
+ * The Unicode Case-Insensitive Collation as defined by XPath F&O 4.0.
+ */
+ public final static String UNICODE_CASE_INSENSITIVE_COLLATION_URI = "http://www.w3.org/2005/xpath-functions/collation/unicode-case-insensitive";
+
/**
* The XQTS ASCII Case-blind Collation as defined by the XQTS 3.1.
*/
@@ -90,6 +95,11 @@ public class Collations {
*/
private final static AtomicReference htmlAsciiCaseInsensitiveCollator = new AtomicReference<>();
+ /**
+ * Lazy-initialized singleton Unicode Case Insensitive Collator
+ */
+ private final static AtomicReference unicodeCaseInsensitiveCollator = new AtomicReference<>();
+
/**
* Lazy-initialized singleton XQTS Case Blind Collator
*/
@@ -276,6 +286,12 @@ public class Collations {
} catch (final Exception e) {
throw new XPathException(expression, "Unable to instantiate HTML ASCII Case Insensitive Collator: " + e.getMessage(), e);
}
+ } else if(UNICODE_CASE_INSENSITIVE_COLLATION_URI.equals(uri)) {
+ try {
+ collator = getUnicodeCaseInsensitiveCollator();
+ } catch (final Exception e) {
+ throw new XPathException(expression, "Unable to instantiate Unicode Case Insensitive Collator: " + e.getMessage(), e);
+ }
} else if(XQTS_ASCII_CASE_BLIND_COLLATION_URI.equals(uri)) {
try {
collator = getXqtsAsciiCaseBlindCollator();
@@ -344,14 +360,43 @@ public static boolean equals(@Nullable final Collator collator, final String s1,
*
* @throws UnsupportedOperationException if ICU4J does not support collation
*/
- public static int compare(@Nullable final Collator collator, final String s1,final String s2) {
+ public static int compare(@Nullable final Collator collator, final String s1, final String s2) {
if (collator == null) {
- return s1 == null ? (s2 == null ? 0 : -1) : s1.compareTo(s2);
+ if (s1 == null) {
+ return s2 == null ? 0 : -1;
+ }
+ return compareByCodepoint(s1, s2);
} else {
return collator.compare(s1, s2);
}
}
+ /**
+ * Compares two strings by Unicode codepoints rather than UTF-16 code units.
+ * {@link String#compareTo(String)} compares {@code char} (UTF-16) values, which gives
+ * incorrect ordering for supplementary characters (U+10000 and above) that are encoded
+ * as surrogate pairs.
+ *
+ * @param a the first string to compare.
+ * @param b the second string to compare.
+ * @return a negative integer, zero, or a positive integer if {@code a} is less than,
+ * equal to, or greater than {@code b} by codepoint order.
+ */
+ private static int compareByCodepoint(final String a, final String b) {
+ int i1 = 0, i2 = 0;
+ while (i1 < a.length() && i2 < b.length()) {
+ final int cp1 = a.codePointAt(i1);
+ final int cp2 = b.codePointAt(i2);
+ if (cp1 != cp2) {
+ return cp1 - cp2;
+ }
+ i1 += Character.charCount(cp1);
+ i2 += Character.charCount(cp2);
+ }
+ // Shorter string is less; equal length means equal
+ return (a.length() - i1) - (b.length() - i2);
+ }
+
/**
* Determines if one string starts with another with regards to a Collation.
*
@@ -371,10 +416,16 @@ public static boolean startsWith(@Nullable final Collator collator, final String
return true;
} else if (s1.isEmpty()) {
return false;
- } else {
+ } else if (collator instanceof RuleBasedCollator rbc) {
final SearchIterator searchIterator =
- new StringSearch(s2, new StringCharacterIterator(s1), (RuleBasedCollator) collator);
+ new StringSearch(s2, new StringCharacterIterator(s1), rbc);
return searchIterator.first() == 0;
+ } else {
+ // Fallback for non-RuleBasedCollator (e.g., HtmlAsciiCaseInsensitiveCollator)
+ if (s1.length() >= s2.length()) {
+ return collator.compare(s1.substring(0, s2.length()), s2) == 0;
+ }
+ return false;
}
}
}
@@ -398,9 +449,9 @@ public static boolean endsWith(@Nullable final Collator collator, final String s
return true;
} else if (s1.isEmpty()) {
return false;
- } else {
+ } else if (collator instanceof RuleBasedCollator rbc) {
final SearchIterator searchIterator =
- new StringSearch(s2, new StringCharacterIterator(s1), (RuleBasedCollator) collator);
+ new StringSearch(s2, new StringCharacterIterator(s1), rbc);
int lastPos = SearchIterator.DONE;
int lastLen = 0;
for (int pos = searchIterator.first(); pos != SearchIterator.DONE;
@@ -410,6 +461,12 @@ public static boolean endsWith(@Nullable final Collator collator, final String s
}
return lastPos > SearchIterator.DONE && lastPos + lastLen == s1.length();
+ } else {
+ // Fallback for non-RuleBasedCollator
+ if (s1.length() >= s2.length()) {
+ return collator.compare(s1.substring(s1.length() - s2.length()), s2) == 0;
+ }
+ return false;
}
}
}
@@ -433,10 +490,18 @@ public static boolean contains(@Nullable final Collator collator, final String s
return true;
} else if (s1.isEmpty()) {
return false;
- } else {
+ } else if (collator instanceof RuleBasedCollator rbc) {
final SearchIterator searchIterator =
- new StringSearch(s2, new StringCharacterIterator(s1), (RuleBasedCollator) collator);
+ new StringSearch(s2, new StringCharacterIterator(s1), rbc);
return searchIterator.first() >= 0;
+ } else {
+ // Fallback for non-RuleBasedCollator
+ for (int i = 0; i <= s1.length() - s2.length(); i++) {
+ if (collator.compare(s1.substring(i, i + s2.length()), s2) == 0) {
+ return true;
+ }
+ }
+ return false;
}
}
}
@@ -459,10 +524,18 @@ public static int indexOf(@Nullable final Collator collator, final String s1, fi
return 0;
} else if (s1.isEmpty()) {
return -1;
- } else {
+ } else if (collator instanceof RuleBasedCollator rbc) {
final SearchIterator searchIterator =
- new StringSearch(s2, new StringCharacterIterator(s1), (RuleBasedCollator) collator);
+ new StringSearch(s2, new StringCharacterIterator(s1), rbc);
return searchIterator.first();
+ } else {
+ // Fallback for non-RuleBasedCollator
+ for (int i = 0; i <= s1.length() - s2.length(); i++) {
+ if (collator.compare(s1.substring(i, i + s2.length()), s2) == 0) {
+ return i;
+ }
+ }
+ return -1;
}
}
}
@@ -809,21 +882,119 @@ private static Collator getSamiskCollator() throws Exception {
return collator;
}
- private static Collator getHtmlAsciiCaseInsensitiveCollator() throws Exception {
+ private static Collator getHtmlAsciiCaseInsensitiveCollator() {
Collator collator = htmlAsciiCaseInsensitiveCollator.get();
if (collator == null) {
- collator = new RuleBasedCollator("&a=A &b=B &c=C &d=D &e=E &f=F &g=G &h=H "
- + "&i=I &j=J &k=K &l=L &m=M &n=N &o=O &p=P &q=Q &r=R &s=S &t=T "
- + "&u=U &v=V &w=W &x=X &y=Y &z=Z");
- collator.setStrength(Collator.PRIMARY);
+ // XQ4 html-ascii-case-insensitive: ASCII letters A-Z fold to a-z,
+ // all other characters compare by Unicode codepoint order.
+ // Cannot use RuleBasedCollator with PRIMARY strength because that
+ // makes ALL case/accent differences irrelevant, not just ASCII.
htmlAsciiCaseInsensitiveCollator.compareAndSet(null,
- collator.freeze());
+ new HtmlAsciiCaseInsensitiveCollator());
collator = htmlAsciiCaseInsensitiveCollator.get();
}
return collator;
}
+ private static Collator getUnicodeCaseInsensitiveCollator() {
+ Collator collator = unicodeCaseInsensitiveCollator.get();
+ if (collator == null) {
+ // Unicode case-insensitive: UCA with SECONDARY strength
+ // ignores case differences but respects accents and other distinctions
+ final Collator uca = Collator.getInstance();
+ uca.setStrength(Collator.SECONDARY);
+ unicodeCaseInsensitiveCollator.compareAndSet(null, uca);
+ collator = unicodeCaseInsensitiveCollator.get();
+ }
+
+ return collator;
+ }
+
+ /**
+ * Custom Collator for HTML ASCII case-insensitive comparison.
+ * Folds only ASCII letters A-Z to a-z, then compares by Unicode codepoint.
+ * Non-ASCII characters are compared by their codepoint value without folding.
+ */
+ private static final class HtmlAsciiCaseInsensitiveCollator extends Collator {
+
+ @Override
+ public int compare(final String source, final String target) {
+ int i1 = 0, i2 = 0;
+ while (i1 < source.length() && i2 < target.length()) {
+ int cp1 = source.codePointAt(i1);
+ int cp2 = target.codePointAt(i2);
+ // Fold ASCII uppercase to lowercase only
+ if (cp1 >= 'A' && cp1 <= 'Z') {
+ cp1 += 32;
+ }
+ if (cp2 >= 'A' && cp2 <= 'Z') {
+ cp2 += 32;
+ }
+ if (cp1 != cp2) {
+ return cp1 - cp2;
+ }
+ i1 += Character.charCount(cp1);
+ i2 += Character.charCount(cp2);
+ }
+ return (source.length() - i1) - (target.length() - i2);
+ }
+
+ @Override
+ public CollationKey getCollationKey(final String source) {
+ throw new UnsupportedOperationException("CollationKey not supported for HTML ASCII case-insensitive collation");
+ }
+
+ @Override
+ public RawCollationKey getRawCollationKey(final String source, final RawCollationKey key) {
+ throw new UnsupportedOperationException("RawCollationKey not supported for HTML ASCII case-insensitive collation");
+ }
+
+ @Override
+ public int setVariableTop(final String varTop) {
+ return 0;
+ }
+
+ @Override
+ public int getVariableTop() {
+ return 0;
+ }
+
+ @Override
+ public void setVariableTop(final int varTop) {
+ }
+
+ @Override
+ public VersionInfo getVersion() {
+ return VersionInfo.getInstance(1);
+ }
+
+ @Override
+ public VersionInfo getUCAVersion() {
+ return VersionInfo.getInstance(1);
+ }
+
+ @Override
+ public int hashCode() {
+ return HtmlAsciiCaseInsensitiveCollator.class.hashCode();
+ }
+
+ @Override
+ public Collator freeze() {
+ return this;
+ }
+
+ @Override
+ public boolean isFrozen() {
+ return true;
+ }
+
+ @Override
+ public Collator cloneAsThawed() {
+ return new HtmlAsciiCaseInsensitiveCollator();
+ }
+ }
+
private static Collator getXqtsAsciiCaseBlindCollator() throws Exception {
Collator collator = xqtsAsciiCaseBlindCollator.get();
if (collator == null) {
diff --git a/exist-core/src/main/java/org/exist/util/XMLBackwardsCompatHandler.java b/exist-core/src/main/java/org/exist/util/XMLBackwardsCompatHandler.java
new file mode 100644
index 00000000000..47e364d09cb
--- /dev/null
+++ b/exist-core/src/main/java/org/exist/util/XMLBackwardsCompatHandler.java
@@ -0,0 +1,106 @@
+/*
+ * eXist-db Open Source Native XML Database
+ * Copyright (C) 2001 The eXist-db Authors
+ *
+ * info@exist-db.org
+ * http://www.exist-db.org
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+package org.exist.util;
+
+import org.xml.sax.Attributes;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.Locator;
+import org.xml.sax.SAXException;
+
+/**
+ * A SAX ContentHandler wrapper that suppresses duplicate startDocument/endDocument calls.
+ * Saxon 12's LinkedTreeBuilder does not tolerate receiving startDocument more than once,
+ * which can happen when eXist's Serializer sends document events that overlap with
+ * explicitly-called startDocument/endDocument in the XSLT compilation pipeline.
+ */
+public class XMLBackwardsCompatHandler implements ContentHandler {
+
+ private final ContentHandler delegate;
+ private boolean documentStarted = false;
+
+ public XMLBackwardsCompatHandler(final ContentHandler delegate) {
+ this.delegate = delegate;
+ }
+
+ @Override
+ public void startDocument() throws SAXException {
+ if (!documentStarted) {
+ documentStarted = true;
+ delegate.startDocument();
+ }
+ }
+
+ @Override
+ public void endDocument() throws SAXException {
+ // Suppress — the caller will call endDocument on the delegate directly
+ }
+
+ @Override
+ public void setDocumentLocator(final Locator locator) {
+ delegate.setDocumentLocator(locator);
+ }
+
+ @Override
+ public void startPrefixMapping(final String prefix, final String uri) throws SAXException {
+ // Saxon 12 rejects any namespace declaration involving the XML namespace URI
+ // (http://www.w3.org/XML/1998/namespace) — the xml prefix is always implicitly bound
+ if ("xml".equals(prefix) || javax.xml.XMLConstants.XML_NS_URI.equals(uri)) {
+ return;
+ }
+ delegate.startPrefixMapping(prefix, uri);
+ }
+
+ @Override
+ public void endPrefixMapping(final String prefix) throws SAXException {
+ delegate.endPrefixMapping(prefix);
+ }
+
+ @Override
+ public void startElement(final String uri, final String localName, final String qName, final Attributes atts) throws SAXException {
+ delegate.startElement(uri, localName, qName, atts);
+ }
+
+ @Override
+ public void endElement(final String uri, final String localName, final String qName) throws SAXException {
+ delegate.endElement(uri, localName, qName);
+ }
+
+ @Override
+ public void characters(final char[] ch, final int start, final int length) throws SAXException {
+ delegate.characters(ch, start, length);
+ }
+
+ @Override
+ public void ignorableWhitespace(final char[] ch, final int start, final int length) throws SAXException {
+ delegate.ignorableWhitespace(ch, start, length);
+ }
+
+ @Override
+ public void processingInstruction(final String target, final String data) throws SAXException {
+ delegate.processingInstruction(target, data);
+ }
+
+ @Override
+ public void skippedEntity(final String name) throws SAXException {
+ delegate.skippedEntity(name);
+ }
+}
diff --git a/exist-core/src/main/java/org/exist/util/serializer/AbstractSerializer.java b/exist-core/src/main/java/org/exist/util/serializer/AbstractSerializer.java
index 758ccee130a..a1b7c9890b3 100644
--- a/exist-core/src/main/java/org/exist/util/serializer/AbstractSerializer.java
+++ b/exist-core/src/main/java/org/exist/util/serializer/AbstractSerializer.java
@@ -81,13 +81,27 @@ protected SerializerWriter getDefaultWriter() {
public void setOutput(Writer writer, Properties properties) {
outputProperties = Objects.requireNonNullElseGet(properties, () -> new Properties(defaultProperties));
final String method = outputProperties.getProperty(OutputKeys.METHOD, "xml");
- final String htmlVersionProp = outputProperties.getProperty(EXistOutputKeys.HTML_VERSION, "1.0");
-
+ // For html/xhtml methods, determine HTML version:
+ // 1. Use html-version if explicitly set
+ // 2. Otherwise use version (W3C spec: version controls HTML version for html method)
+ // 3. Default to 5.0
double htmlVersion;
- try {
- htmlVersion = Double.parseDouble(htmlVersionProp);
- } catch (NumberFormatException e) {
- htmlVersion = 1.0;
+ final String explicitHtmlVersion = outputProperties.getProperty(EXistOutputKeys.HTML_VERSION);
+ if (explicitHtmlVersion != null) {
+ try {
+ htmlVersion = Double.parseDouble(explicitHtmlVersion);
+ } catch (NumberFormatException e) {
+ htmlVersion = 5.0;
+ }
+ } else if (("html".equalsIgnoreCase(method) || "xhtml".equalsIgnoreCase(method))
+ && outputProperties.getProperty(OutputKeys.VERSION) != null) {
+ try {
+ htmlVersion = Double.parseDouble(outputProperties.getProperty(OutputKeys.VERSION));
+ } catch (NumberFormatException e) {
+ htmlVersion = 5.0;
+ }
+ } else {
+ htmlVersion = 5.0;
}
final SerializerWriter baseSerializerWriter = getBaseSerializerWriter(method, htmlVersion);
diff --git a/exist-core/src/main/java/org/exist/util/serializer/AdaptiveWriter.java b/exist-core/src/main/java/org/exist/util/serializer/AdaptiveWriter.java
index 22ab6dfca23..59fc8af3dfb 100644
--- a/exist-core/src/main/java/org/exist/util/serializer/AdaptiveWriter.java
+++ b/exist-core/src/main/java/org/exist/util/serializer/AdaptiveWriter.java
@@ -152,6 +152,17 @@ public void write(final Sequence sequence, final String itemSep, final boolean e
case Type.FUNCTION:
writeFunctionItem((FunctionReference) item);
break;
+ // XQuery 4.0 JNode types — serialize as their underlying JSON structure
+ case Type.JSON_NODE:
+ case Type.JSON_OBJECT:
+ case Type.JSON_ARRAY:
+ case Type.JSON_STRING:
+ case Type.JSON_NUMBER:
+ case Type.JSON_BOOLEAN:
+ case Type.JSON_NULL:
+ case Type.JSON_MEMBER:
+ writeJNode((org.exist.xquery.value.jnode.JNode) item);
+ break;
default:
writeAtomic(item.atomize());
break;
@@ -190,10 +201,15 @@ private void writeAtomic(AtomicValue value) throws IOException, SAXException, XP
}
private void writeDouble(final DoubleValue item) throws SAXException {
- final DecimalFormatSymbols symbols = DecimalFormatSymbols.getInstance(Locale.US);
- symbols.setExponentSeparator("e");
- final DecimalFormat df = new DecimalFormat("0.0##########################E0", symbols);
- writeText(df.format(item.getDouble()));
+ final double d = item.getDouble();
+ if (Double.isInfinite(d) || Double.isNaN(d)) {
+ writeText(item.getStringValue());
+ } else {
+ final DecimalFormatSymbols symbols = DecimalFormatSymbols.getInstance(Locale.US);
+ symbols.setExponentSeparator("e");
+ final DecimalFormat df = new DecimalFormat("0.0##########################E0", symbols);
+ writeText(df.format(d));
+ }
}
private void writeArray(final ArrayType array) throws XPathException, SAXException, TransformerException {
@@ -215,9 +231,7 @@ private void writeArray(final ArrayType array) throws XPathException, SAXExcepti
private void writeMap(final AbstractMapType map) throws SAXException, XPathException, TransformerException {
try {
- writer.write("map");
- addSpaceIfIndent();
- writer.write('{');
+ writer.write("map{");
addIndent();
indent();
for (final Iterator> i = map.iterator(); i.hasNext(); ) {
@@ -297,4 +311,23 @@ private void writeXML(final Item item) throws SAXException {
broker.returnSerializer(serializer);
}
}
+
+ /**
+ * Serialize a JNode in adaptive mode.
+ * Maps/arrays are serialized as their adaptive representation,
+ * leaf values as their string representation.
+ */
+ private void writeJNode(final org.exist.xquery.value.jnode.JNode jnode) throws SAXException, XPathException, TransformerException {
+ final Sequence value = jnode.getValue();
+ if (value instanceof AbstractMapType) {
+ writeMap((AbstractMapType) value);
+ } else if (value instanceof ArrayType) {
+ writeArray((ArrayType) value);
+ } else if (value == Sequence.EMPTY_SEQUENCE || value.isEmpty()) {
+ writeText("null");
+ } else {
+ // Delegate to the normal write loop for the underlying value
+ write(value, ", ", false);
+ }
+ }
}
diff --git a/exist-core/src/main/java/org/exist/util/serializer/CSVSerializer.java b/exist-core/src/main/java/org/exist/util/serializer/CSVSerializer.java
new file mode 100644
index 00000000000..98c599fc582
--- /dev/null
+++ b/exist-core/src/main/java/org/exist/util/serializer/CSVSerializer.java
@@ -0,0 +1,295 @@
+/*
+ * eXist-db Open Source Native XML Database
+ * Copyright (C) 2001 The eXist-db Authors
+ *
+ * info@exist-db.org
+ * http://www.exist-db.org
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+package org.exist.util.serializer;
+
+import io.lacuna.bifurcan.IEntry;
+import org.exist.storage.serializers.EXistOutputKeys;
+import org.exist.xquery.XPathException;
+import org.exist.xquery.functions.array.ArrayType;
+import org.exist.xquery.functions.map.AbstractMapType;
+import org.exist.xquery.value.*;
+import org.xml.sax.SAXException;
+
+import java.io.IOException;
+import java.io.Writer;
+import java.util.*;
+
+/**
+ * Serializes XDM sequences as RFC 4180 CSV output.
+ *
+ * Accepts three input formats:
+ *
+ * - Array of arrays: each inner array is a row
+ * - Sequence of maps: keys become header, values become rows
+ * - XML table: <csv><record><field>...</field></record></csv>
+ *
+ */
+public class CSVSerializer {
+
+ private final String fieldDelimiter;
+ private final String rowDelimiter;
+ private final char quoteChar;
+ private final boolean alwaysQuote;
+ private final boolean includeHeader;
+
+ public CSVSerializer(final Properties outputProperties) {
+ this.fieldDelimiter = outputProperties.getProperty(EXistOutputKeys.CSV_FIELD_DELIMITER, ",");
+ this.rowDelimiter = outputProperties.getProperty(EXistOutputKeys.CSV_ROW_DELIMITER, "\n");
+ final String qc = outputProperties.getProperty(EXistOutputKeys.CSV_QUOTE_CHARACTER, "\"");
+ this.quoteChar = qc.isEmpty() ? '"' : qc.charAt(0);
+ this.alwaysQuote = !"no".equals(outputProperties.getProperty(EXistOutputKeys.CSV_QUOTES, "yes"));
+ this.includeHeader = "yes".equals(outputProperties.getProperty(EXistOutputKeys.CSV_HEADER, "no"));
+ }
+
+ public void serialize(final Sequence sequence, final Writer writer) throws SAXException {
+ try {
+ if (sequence.isEmpty()) {
+ return;
+ }
+
+ final Item first = sequence.itemAt(0);
+
+ if (first.getType() == Type.ARRAY_ITEM) {
+ if (sequence.hasOne()) {
+ // Single array: treat as array-of-arrays
+ serializeArrayOfArrays((ArrayType) first, writer);
+ } else {
+ // Sequence of arrays: each array is a row
+ serializeSequenceOfArrays(sequence, writer);
+ }
+ } else if (first.getType() == Type.MAP_ITEM) {
+ serializeSequenceOfMaps(sequence, writer);
+ } else if (Type.subTypeOf(first.getType(), Type.NODE)) {
+ serializeXmlTable(sequence, writer);
+ } else {
+ // Single atomic or sequence of atomics — one row
+ serializeAtomicSequence(sequence, writer);
+ }
+ } catch (final IOException | XPathException e) {
+ throw new SAXException(e.getMessage(), e);
+ }
+ }
+
+ private void serializeArrayOfArrays(final ArrayType outerArray, final Writer writer) throws IOException, XPathException {
+ for (int i = 0; i < outerArray.getSize(); i++) {
+ final Sequence member = outerArray.get(i);
+ if (member.getItemCount() == 1 && member.itemAt(0).getType() == Type.ARRAY_ITEM) {
+ writeRow((ArrayType) member.itemAt(0), writer);
+ } else {
+ writeSequenceRow(member, writer);
+ }
+ writer.write(rowDelimiter);
+ }
+ }
+
+ private void serializeSequenceOfArrays(final Sequence sequence, final Writer writer) throws IOException, XPathException {
+ for (final SequenceIterator i = sequence.iterate(); i.hasNext(); ) {
+ final Item item = i.nextItem();
+ if (item.getType() == Type.ARRAY_ITEM) {
+ writeRow((ArrayType) item, writer);
+ } else {
+ writer.write(quoteField(item.getStringValue()));
+ }
+ writer.write(rowDelimiter);
+ }
+ }
+
+ private void serializeSequenceOfMaps(final Sequence sequence, final Writer writer) throws IOException, XPathException {
+ // Collect all keys from first map for header
+ final AbstractMapType firstMap = (AbstractMapType) sequence.itemAt(0);
+ final List keys = new ArrayList<>();
+ for (final IEntry entry : firstMap) {
+ keys.add(entry.key().getStringValue());
+ }
+ Collections.sort(keys);
+
+ // Write header
+ if (includeHeader) {
+ writeFields(keys, writer);
+ writer.write(rowDelimiter);
+ }
+
+ // Write rows
+ for (final SequenceIterator i = sequence.iterate(); i.hasNext(); ) {
+ final Item item = i.nextItem();
+ if (item.getType() == Type.MAP_ITEM) {
+ final AbstractMapType map = (AbstractMapType) item;
+ boolean first = true;
+ for (final String key : keys) {
+ if (!first) {
+ writer.write(fieldDelimiter);
+ }
+ final Sequence value = map.get(new StringValue(key));
+ writer.write(quoteField(value.isEmpty() ? "" : value.getStringValue()));
+ first = false;
+ }
+ }
+ writer.write(rowDelimiter);
+ }
+ }
+
+ private void serializeXmlTable(final Sequence sequence, final Writer writer) throws IOException, XPathException {
+ // Walk XML table: value
+ // or
+ for (final SequenceIterator i = sequence.iterate(); i.hasNext(); ) {
+ final Item item = i.nextItem();
+ if (Type.subTypeOf(item.getType(), Type.ELEMENT)) {
+ final org.w3c.dom.Element elem = (org.w3c.dom.Element) ((NodeValue) item).getNode();
+ serializeXmlElement(elem, writer);
+ }
+ }
+ }
+
+ private void serializeXmlElement(final org.w3c.dom.Element element, final Writer writer) throws IOException {
+ final org.w3c.dom.NodeList children = element.getChildNodes();
+ boolean hasChildElements = false;
+ for (int i = 0; i < children.getLength(); i++) {
+ if (children.item(i).getNodeType() == org.w3c.dom.Node.ELEMENT_NODE) {
+ hasChildElements = true;
+ break;
+ }
+ }
+
+ if (!hasChildElements) {
+ // Leaf element — output as a field value
+ writer.write(quoteField(element.getTextContent()));
+ return;
+ }
+
+ // Check if children are "record" elements (containing field elements)
+ // or direct field elements
+ boolean firstRecord = true;
+ for (int i = 0; i < children.getLength(); i++) {
+ if (children.item(i).getNodeType() == org.w3c.dom.Node.ELEMENT_NODE) {
+ final org.w3c.dom.Element child = (org.w3c.dom.Element) children.item(i);
+ final org.w3c.dom.NodeList grandchildren = child.getChildNodes();
+ boolean hasGrandchildElements = false;
+ for (int j = 0; j < grandchildren.getLength(); j++) {
+ if (grandchildren.item(j).getNodeType() == org.w3c.dom.Node.ELEMENT_NODE) {
+ hasGrandchildElements = true;
+ break;
+ }
+ }
+
+ if (hasGrandchildElements) {
+ // This is a record element — its children are fields
+ if (!firstRecord) {
+ // row delimiter already written
+ }
+ boolean firstField = true;
+ for (int j = 0; j < grandchildren.getLength(); j++) {
+ if (grandchildren.item(j).getNodeType() == org.w3c.dom.Node.ELEMENT_NODE) {
+ if (!firstField) {
+ writer.write(fieldDelimiter);
+ }
+ writer.write(quoteField(grandchildren.item(j).getTextContent()));
+ firstField = false;
+ }
+ }
+ writer.write(rowDelimiter);
+ firstRecord = false;
+ } else {
+ // Direct field element — accumulate as part of a single row
+ if (!firstRecord) {
+ writer.write(fieldDelimiter);
+ }
+ writer.write(quoteField(child.getTextContent()));
+ firstRecord = false;
+ }
+ }
+ }
+ }
+
+ private void serializeAtomicSequence(final Sequence sequence, final Writer writer) throws IOException, XPathException {
+ boolean first = true;
+ for (final SequenceIterator i = sequence.iterate(); i.hasNext(); ) {
+ if (!first) {
+ writer.write(fieldDelimiter);
+ }
+ writer.write(quoteField(i.nextItem().getStringValue()));
+ first = false;
+ }
+ writer.write(rowDelimiter);
+ }
+
+ private void writeRow(final ArrayType array, final Writer writer) throws IOException, XPathException {
+ for (int i = 0; i < array.getSize(); i++) {
+ if (i > 0) {
+ writer.write(fieldDelimiter);
+ }
+ final Sequence member = array.get(i);
+ writer.write(quoteField(member.isEmpty() ? "" : member.getStringValue()));
+ }
+ }
+
+ private void writeSequenceRow(final Sequence sequence, final Writer writer) throws IOException, XPathException {
+ boolean first = true;
+ for (final SequenceIterator i = sequence.iterate(); i.hasNext(); ) {
+ if (!first) {
+ writer.write(fieldDelimiter);
+ }
+ writer.write(quoteField(i.nextItem().getStringValue()));
+ first = false;
+ }
+ }
+
+ private void writeFields(final List fields, final Writer writer) throws IOException {
+ boolean first = true;
+ for (final String field : fields) {
+ if (!first) {
+ writer.write(fieldDelimiter);
+ }
+ writer.write(quoteField(field));
+ first = false;
+ }
+ }
+
+ /**
+ * Quote a field value per RFC 4180.
+ * If alwaysQuote is true, all fields are quoted.
+ * If false, only fields containing the delimiter, quote char, or newline are quoted.
+ * Quote characters within the value are escaped by doubling.
+ */
+ private String quoteField(final String value) {
+ final boolean needsQuoting = alwaysQuote
+ || value.contains(fieldDelimiter)
+ || value.indexOf(quoteChar) >= 0
+ || value.contains("\n")
+ || value.contains("\r");
+
+ if (!needsQuoting) {
+ return value;
+ }
+
+ final StringBuilder sb = new StringBuilder(value.length() + 2);
+ sb.append(quoteChar);
+ for (int i = 0; i < value.length(); i++) {
+ final char c = value.charAt(i);
+ if (c == quoteChar) {
+ sb.append(quoteChar); // escape by doubling
+ }
+ sb.append(c);
+ }
+ sb.append(quoteChar);
+ return sb.toString();
+ }
+}
diff --git a/exist-core/src/main/java/org/exist/util/serializer/HTML5Writer.java b/exist-core/src/main/java/org/exist/util/serializer/HTML5Writer.java
index 1dffc3029b7..fa0a368bb9a 100644
--- a/exist-core/src/main/java/org/exist/util/serializer/HTML5Writer.java
+++ b/exist-core/src/main/java/org/exist/util/serializer/HTML5Writer.java
@@ -118,6 +118,13 @@ public class HTML5Writer extends XHTML5Writer {
BOOLEAN_ATTRIBUTE_NAMES.add("willValidate");
}
+ private static final ObjectSet BOOLEAN_ATTRIBUTE_NAMES_LOWER = new ObjectOpenHashSet<>(BOOLEAN_ATTRIBUTE_NAMES.size());
+ static {
+ for (final String n : BOOLEAN_ATTRIBUTE_NAMES) {
+ BOOLEAN_ATTRIBUTE_NAMES_LOWER.add(n.toLowerCase(java.util.Locale.ROOT));
+ }
+ }
+
private static final ObjectSet EMPTY_TAGS = new ObjectOpenHashSet<>(31);
static {
EMPTY_TAGS.add("area");
@@ -156,8 +163,15 @@ public void endElement(QName qname) throws TransformerException {
if (!isEmptyTag(qname.getLocalPart())) {
super.endElement(qname);
} else {
+ // HTML5 omits the close tag for void elements; we still need to
+ // honor the meta-in-head dedup that XHTMLWriter sets up at startElement
+ // time. Capture the buffered-meta flag before closeStartTag flips state.
+ final boolean wasBufferedMeta = isBufferedMeta(qname.getLocalPart());
closeStartTag(true);
endIndent(qname.getNamespaceURI(), qname.getLocalPart());
+ if (wasBufferedMeta) {
+ endMetaBuffer();
+ }
}
}
@@ -166,24 +180,33 @@ public void endElement(String namespaceURI, String localName, String qname) thro
if (!isEmptyTag(localName)) {
super.endElement(namespaceURI, localName, qname);
} else {
+ final boolean wasBufferedMeta = isBufferedMeta(localName);
closeStartTag(true);
endIndent(namespaceURI, localName);
+ if (wasBufferedMeta) {
+ endMetaBuffer();
+ }
}
}
@Override
public void attribute(String qname, CharSequence value) throws TransformerException {
+ // Strip prefix for the meta-dedup redundancy check
+ final int colon = qname.indexOf(':');
+ final String localName = colon < 0 ? qname : qname.substring(colon + 1);
+ noteMetaAttribute(localName, value);
+ final CharSequence effectiveValue = maybeEscapeUriHtml5(localName, value);
try {
if(!tagIsOpen) {
- characters(value);
+ characters(effectiveValue);
return;
}
final Writer writer = getWriter();
writer.write(' ');
writer.write(qname);
- if (!(BOOLEAN_ATTRIBUTE_NAMES.contains(qname) && qname.contentEquals(value))) {
+ if (!isBooleanAttributeMatch(qname, effectiveValue)) {
writer.write("=\"");
- writeChars(value, true);
+ writeChars(effectiveValue, true);
writer.write('"');
}
} catch(final IOException ioe) {
@@ -193,9 +216,12 @@ public void attribute(String qname, CharSequence value) throws TransformerExcept
@Override
public void attribute(QName qname, CharSequence value) throws TransformerException {
+ noteMetaAttribute(qname.getLocalPart(), value);
+ final String localPart = qname.getLocalPart();
+ final CharSequence effectiveValue = maybeEscapeUriHtml5(localPart, value);
try {
if(!tagIsOpen) {
- characters(value);
+ characters(effectiveValue);
return;
// throw new TransformerException("Found an attribute outside an
// element");
@@ -206,11 +232,10 @@ public void attribute(QName qname, CharSequence value) throws TransformerExcepti
writer.write(qname.getPrefix());
writer.write(':');
}
- final String localPart = qname.getLocalPart();
writer.write(localPart);
- if (!(BOOLEAN_ATTRIBUTE_NAMES.contains(localPart) && localPart.contentEquals(value))) {
+ if (!isBooleanAttributeMatch(localPart, effectiveValue)) {
writer.write("=\"");
- writeChars(value, true);
+ writeChars(effectiveValue, true);
writer.write('"');
}
} catch(final IOException ioe) {
@@ -218,26 +243,76 @@ public void attribute(QName qname, CharSequence value) throws TransformerExcepti
}
}
+ /**
+ * URI-attribute escaping for the HTML5 writer. Mirrors
+ * {@link XHTMLWriter#shouldEscapeUriAttribute(String, String)} but unwraps
+ * the prefixed form of {@link #currentTag} so the (element, attribute)
+ * lookup uses local names only.
+ */
+ private CharSequence maybeEscapeUriHtml5(final String attrLocal, final CharSequence value) {
+ if (currentTag == null) {
+ return value;
+ }
+ final String elementLocal = currentTag.contains(":")
+ ? currentTag.substring(currentTag.indexOf(':') + 1)
+ : currentTag;
+ if (!shouldEscapeUriAttribute(elementLocal, attrLocal)) {
+ return value;
+ }
+ return escapeUriAttribute(value);
+ }
+
+ /**
+ * HTML5 boolean attribute minimization: emit just the bare name when the
+ * value is empty or matches the attribute name case-insensitively
+ * (per W3C XSLT/XQuery Serialization 3.1, section 7.2.2).
+ */
+ private static boolean isBooleanAttributeMatch(final String name, final CharSequence value) {
+ if (!BOOLEAN_ATTRIBUTE_NAMES_LOWER.contains(name.toLowerCase(java.util.Locale.ROOT))) {
+ return false;
+ }
+ if (value == null || value.length() == 0) {
+ return true;
+ }
+ return name.equalsIgnoreCase(value.toString());
+ }
+
@Override
public void namespace(String prefix, String nsURI) throws TransformerException {
- // no namespaces allowed in HTML5
+ // HTML5 elements never carry an explicit xmlns since the parser puts
+ // them in the HTML namespace implicitly. Foreign content (anything
+ // outside the XHTML namespace, e.g. SVG, MathML, custom XML) keeps
+ // its namespace declarations so the receiver can re-parse it as XML.
+ if (nsURI == null || nsURI.isEmpty()) {
+ return;
+ }
+ if (org.exist.Namespaces.XHTML_NS.equals(nsURI)) {
+ return;
+ }
+ super.namespace(prefix, nsURI);
}
@Override
protected void closeStartTag(boolean isEmpty) throws TransformerException {
try {
if (tagIsOpen) {
+ final Writer w = getWriter();
if (isEmpty) {
if (isEmptyTag(currentTag)) {
- getWriter().write(">");
+ w.write('>');
+ } else if (isForeignContent()) {
+ // Foreign content (SVG, MathML, custom XML namespace)
+ // embedded in HTML5 is serialized with XML self-close
+ // syntax so the receiver can re-parse it as XML.
+ w.write("/>");
} else {
- getWriter().write('>');
- getWriter().write("");
- getWriter().write(currentTag);
- getWriter().write('>');
+ // Coalesce ">", "", tag, ">" into 2 writer calls instead of 4
+ w.write(">");
+ w.write(currentTag);
+ w.write('>');
}
} else {
- getWriter().write('>');
+ w.write('>');
}
tagIsOpen = false;
}
@@ -246,6 +321,39 @@ protected void closeStartTag(boolean isEmpty) throws TransformerException {
}
}
+ /**
+ * The current element is "foreign content" when its namespace is neither
+ * the XHTML namespace nor the empty (no-namespace) HTML namespace; that
+ * is the trigger for XML-style self-closing per HTML5's foreign-content
+ * serialization rule.
+ */
+ private boolean isForeignContent() {
+ final String ns = currentElementNamespaceURI();
+ return ns != null && !ns.isEmpty() && !org.exist.Namespaces.XHTML_NS.equals(ns);
+ }
+
+ @Override
+ public void processingInstruction(final String target, final String data) throws TransformerException {
+ // QT4 PR2372: HTML5 has no PI syntax, so the serializer renders
+ // processing instructions as comments of the form ``,
+ // matching the HTML5 parser's coercion of `...?>` content.
+ try {
+ if (tagIsOpen) {
+ closeStartTag(false);
+ }
+ final Writer writer = getWriter();
+ writer.write("");
+ } catch (final IOException e) {
+ throw new TransformerException(e.getMessage(), e);
+ }
+ }
+
@Override
protected boolean needsEscape(char ch) {
if (RAW_TEXT_ELEMENTS.contains(currentTag)) {
@@ -253,4 +361,31 @@ protected boolean needsEscape(char ch) {
}
return super.needsEscape(ch);
}
+
+ @Override
+ protected boolean needsEscape(final char ch, final boolean inAttribute) {
+ // In raw text elements (script, style), suppress escaping for TEXT content only.
+ // Attribute values must always be escaped, even on raw text elements.
+ if (!inAttribute && RAW_TEXT_ELEMENTS.contains(currentTag)) {
+ return false;
+ }
+ // For attributes, always return true (bypass the 1-arg override
+ // which returns false for all script/style content)
+ if (inAttribute) {
+ return true;
+ }
+ return super.needsEscape(ch, inAttribute);
+ }
+
+ @Override
+ protected boolean needsEscaping(final boolean inAttribute) {
+ // Mirror the per-char rule above: TEXT content inside script/style is
+ // raw text and never needs escaping. Lets writeChars() bulk-stream
+ // the entire block in one Writer.write() call.
+ if (!inAttribute && RAW_TEXT_ELEMENTS.contains(currentTag)) {
+ return false;
+ }
+ return true;
+ }
+
}
diff --git a/exist-core/src/main/java/org/exist/util/serializer/IndentingXMLWriter.java b/exist-core/src/main/java/org/exist/util/serializer/IndentingXMLWriter.java
index c336d8b2943..99df54c3e19 100644
--- a/exist-core/src/main/java/org/exist/util/serializer/IndentingXMLWriter.java
+++ b/exist-core/src/main/java/org/exist/util/serializer/IndentingXMLWriter.java
@@ -25,7 +25,9 @@
import java.io.Writer;
import java.util.ArrayDeque;
import java.util.Deque;
+import java.util.HashSet;
import java.util.Properties;
+import java.util.Set;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.TransformerException;
@@ -48,6 +50,8 @@ public class IndentingXMLWriter extends XMLWriter {
private boolean sameline = false;
private boolean whitespacePreserve = false;
private final Deque whitespacePreserveStack = new ArrayDeque<>();
+ private Set suppressIndentation = null;
+ private int suppressIndentDepth = 0;
public IndentingXMLWriter() {
super();
@@ -75,6 +79,9 @@ public void startElement(final String namespaceURI, final String localName, fina
indent();
}
super.startElement(namespaceURI, localName, qname);
+ if (isSuppressIndentation(localName)) {
+ suppressIndentDepth++;
+ }
addIndent();
afterTag = true;
sameline = true;
@@ -86,6 +93,9 @@ public void startElement(final QName qname) throws TransformerException {
indent();
}
super.startElement(qname);
+ if (isSuppressIndentation(qname.getLocalPart())) {
+ suppressIndentDepth++;
+ }
addIndent();
afterTag = true;
sameline = true;
@@ -95,6 +105,9 @@ public void startElement(final QName qname) throws TransformerException {
public void endElement(final String namespaceURI, final String localName, final String qname) throws TransformerException {
endIndent(namespaceURI, localName);
super.endElement(namespaceURI, localName, qname);
+ if (isSuppressIndentation(localName) && suppressIndentDepth > 0) {
+ suppressIndentDepth--;
+ }
popWhitespacePreserve(); // apply ancestor's xml:space value _after_ end element
sameline = isInlineTag(namespaceURI, localName);
afterTag = true;
@@ -104,6 +117,9 @@ public void endElement(final String namespaceURI, final String localName, final
public void endElement(final QName qname) throws TransformerException {
endIndent(qname.getNamespaceURI(), qname.getLocalPart());
super.endElement(qname);
+ if (isSuppressIndentation(qname.getLocalPart()) && suppressIndentDepth > 0) {
+ suppressIndentDepth--;
+ }
popWhitespacePreserve(); // apply ancestor's xml:space value _after_ end element
sameline = isInlineTag(qname.getNamespaceURI(), qname.getLocalPart());
afterTag = true;
@@ -164,7 +180,29 @@ public void setOutputProperties(final Properties properties) {
} catch (final NumberFormatException e) {
LOG.warn("Invalid indentation value: '{}'", option);
}
- indent = "yes".equals(outputProperties.getProperty(OutputKeys.INDENT, "no"));
+ final String indentValue = outputProperties.getProperty(OutputKeys.INDENT, "no").trim();
+ indent = "yes".equals(indentValue) || "true".equals(indentValue) || "1".equals(indentValue);
+ final String suppressProp = outputProperties.getProperty("suppress-indentation");
+ if (suppressProp != null && !suppressProp.isEmpty()) {
+ suppressIndentation = new HashSet<>();
+ for (final String name : suppressProp.split("\\s+")) {
+ if (!name.isEmpty()) {
+ // Handle URI-qualified names: Q{ns}local or {ns}local → extract local part
+ if (name.startsWith("Q{") || name.startsWith("{")) {
+ final int closeBrace = name.indexOf('}');
+ if (closeBrace > 0 && closeBrace < name.length() - 1) {
+ suppressIndentation.add(name.substring(closeBrace + 1));
+ } else {
+ suppressIndentation.add(name);
+ }
+ } else {
+ suppressIndentation.add(name);
+ }
+ }
+ }
+ } else {
+ suppressIndentation = null;
+ }
}
@Override
@@ -220,8 +258,12 @@ protected void addSpaceIfIndent() throws IOException {
writer.write(' ');
}
+ private boolean isSuppressIndentation(final String localName) {
+ return suppressIndentation != null && suppressIndentation.contains(localName);
+ }
+
protected void indent() throws TransformerException {
- if (!indent || whitespacePreserve) {
+ if (!indent || whitespacePreserve || suppressIndentDepth > 0) {
return;
}
final int spaces = indentAmount * level;
diff --git a/exist-core/src/main/java/org/exist/util/serializer/TEXTWriter.java b/exist-core/src/main/java/org/exist/util/serializer/TEXTWriter.java
index 85c5c4cf5a6..5ea206a25da 100644
--- a/exist-core/src/main/java/org/exist/util/serializer/TEXTWriter.java
+++ b/exist-core/src/main/java/org/exist/util/serializer/TEXTWriter.java
@@ -206,16 +206,10 @@ protected void writeDoctype(final String rootElement) throws TransformerExceptio
@Override
protected void writeChars(final CharSequence s, final boolean inAttribute) throws IOException {
- final int len = s.length();
- writeCharSeq(s, 0, len);
+ writeCharSeq(s, 0, s.length());
}
-
- private void writeCharSeq(final CharSequence ch, final int start, final int end) throws IOException {
- for (int i = start; i < end; i++) {
- writer.write(ch.charAt(i));
- }
- }
-
+
+
@Override
protected void writeCharacterReference(final char charval) throws IOException {
int o = 0;
diff --git a/exist-core/src/main/java/org/exist/util/serializer/XHTML5Writer.java b/exist-core/src/main/java/org/exist/util/serializer/XHTML5Writer.java
index e89e7119d19..bc4990eb5eb 100644
--- a/exist-core/src/main/java/org/exist/util/serializer/XHTML5Writer.java
+++ b/exist-core/src/main/java/org/exist/util/serializer/XHTML5Writer.java
@@ -22,7 +22,6 @@
package org.exist.util.serializer;
import java.io.Writer;
-import javax.xml.transform.TransformerException;
import it.unimi.dsi.fastutil.objects.ObjectOpenHashSet;
import it.unimi.dsi.fastutil.objects.ObjectSet;
@@ -121,14 +120,4 @@ public XHTML5Writer(ObjectSet emptyTags, ObjectSet inlineTags) {
public XHTML5Writer(Writer writer, ObjectSet emptyTags, ObjectSet inlineTags) {
super(writer, emptyTags, inlineTags);
}
-
- @Override
- protected void writeDoctype(String rootElement) throws TransformerException {
- if (doctypeWritten) {
- return;
- }
-
- documentType("html", null, null);
- doctypeWritten = true;
- }
}
diff --git a/exist-core/src/main/java/org/exist/util/serializer/XHTMLWriter.java b/exist-core/src/main/java/org/exist/util/serializer/XHTMLWriter.java
index b0006f7f51c..216ef6f59b4 100644
--- a/exist-core/src/main/java/org/exist/util/serializer/XHTMLWriter.java
+++ b/exist-core/src/main/java/org/exist/util/serializer/XHTMLWriter.java
@@ -23,6 +23,7 @@
import java.io.IOException;
import java.io.Writer;
+import javax.xml.transform.OutputKeys;
import javax.xml.transform.TransformerException;
import it.unimi.dsi.fastutil.objects.ObjectOpenHashSet;
@@ -36,12 +37,176 @@
*/
public class XHTMLWriter extends IndentingXMLWriter {
+ /**
+ * HTML boolean attributes per HTML 4.01 and HTML5 spec.
+ * When method="html" and the attribute value equals the attribute name
+ * (case-insensitive), the attribute is minimized to just the name.
+ */
+ protected static final ObjectSet