diff --git a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunXmlToJson.java b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunXmlToJson.java index 926e4da9faf..4e57c98b2f5 100644 --- a/exist-core/src/main/java/org/exist/xquery/functions/fn/FunXmlToJson.java +++ b/exist-core/src/main/java/org/exist/xquery/functions/fn/FunXmlToJson.java @@ -36,7 +36,9 @@ import java.io.StringWriter; import java.io.Writer; import java.math.BigDecimal; +import java.util.ArrayDeque; import java.util.ArrayList; +import java.util.Deque; import static org.exist.xquery.FunctionDSL.*; @@ -106,6 +108,8 @@ private void nodeValueToJson(final NodeValue nodeValue, final Writer writer) thr final Integer stackSeparator = 0; //use ArrayList to store String type keys and non-string type separators final ArrayList mapkeyArrayList = new ArrayList<>(); + //track parent element local names so we can validate child structure (F&O 3.1 §17.4.2 / §17.5.4) + final Deque elementStack = new ArrayDeque<>(); boolean elementKeyIsEscaped = false; boolean elementValueIsEscaped = false; XMLStreamReader reader = null; @@ -127,6 +131,7 @@ private void nodeValueToJson(final NodeValue nodeValue, final Writer writer) thr "Invalid XML representation of JSON. Element '" + reader.getLocalName() + "' is not in the required namespace '" + Namespaces.XPATH_FUNCTIONS_NS + "'."); } + validateStartElement(reader, elementStack); final String elementAttributeEscapedValue = reader.getAttributeValue(null, "escaped"); elementValueIsEscaped = "true".equals(elementAttributeEscapedValue); final String elementAttributeEscapedKeyValue = reader.getAttributeValue(null, "escaped-key"); @@ -149,23 +154,25 @@ private void nodeValueToJson(final NodeValue nodeValue, final Writer writer) thr } } switch (reader.getLocalName()) { - case "array": - jsonGenerator.writeStartArray(); - break; - case "map": + case "array" -> jsonGenerator.writeStartArray(); + case "map" -> { mapkeyArrayList.add(stackSeparator); jsonGenerator.writeStartObject(); - break; - default: - break; + } + default -> { /* other valid JSON element kinds emit only at END_ELEMENT */ } } break; case XMLStreamReader.CHARACTERS: case XMLStreamReader.CDATA: - tempStringBuilder.append(reader.getText()); + final String charText = reader.getText(); + validateTextInContext(charText, elementStack.peek()); + tempStringBuilder.append(charText); break; case XMLStreamReader.END_ELEMENT: final String tempString = tempStringBuilder.toString(); + if (!elementStack.isEmpty()) { + elementStack.pop(); + } switch (reader.getLocalName()) { case "array": jsonGenerator.writeEndArray(); @@ -252,4 +259,122 @@ private String unescapeEscapedJsonString(final String escapedJsonString) throws unescapedJsonString = unescapedJsonStringBuilder.toString(); return unescapedJsonString; } + + /** + * Validate the current START_ELEMENT against the F&O 3.1 §17.4.2 / §17.5.4 structural rules + * and, on success, push the element's local name onto the parent-tracking stack. + */ + private void validateStartElement(final XMLStreamReader reader, final Deque elementStack) throws XPathException { + final String localName = reader.getLocalName(); + if (!isJsonElementName(localName)) { + throw new XPathException(this, ErrorCodes.FOJS0006, + "Invalid XML representation of JSON. Element '" + localName + + "' is not one of [map, array, null, boolean, number, string]."); + } + final String parentLocalName = elementStack.peek(); + if (parentLocalName != null && isLeafElementName(parentLocalName)) { + throw new XPathException(this, ErrorCodes.FOJS0006, + "Invalid XML representation of JSON. Element '" + parentLocalName + + "' must not have element children."); + } + validateAttributes(reader, localName); + elementStack.push(localName); + } + + /** + * Reject non-whitespace text node children of {@code map} and {@code array} per F&O 3.1 §17.4.2. + */ + private void validateTextInContext(final String text, final String parentLocalName) throws XPathException { + if (parentLocalName == null) { + return; + } + if (!"map".equals(parentLocalName) && !"array".equals(parentLocalName)) { + return; + } + if (!isXmlWhitespace(text)) { + throw new XPathException(this, ErrorCodes.FOJS0006, + "Invalid XML representation of JSON. Element '" + parentLocalName + + "' must not have non-whitespace text content."); + } + } + + private static boolean isJsonElementName(final String name) { + return switch (name) { + case "map", "array", "string", "number", "boolean", "null" -> true; + default -> false; + }; + } + + private static boolean isLeafElementName(final String name) { + return switch (name) { + case "string", "number", "boolean", "null" -> true; + default -> false; + }; + } + + private static boolean isXmlWhitespace(final String text) { + for (int i = 0; i < text.length(); i++) { + final char c = text.charAt(i); + if (c != ' ' && c != '\t' && c != '\n' && c != '\r') { + return false; + } + } + return true; + } + + /** + * Validate that the attributes on the current element conform to F&O 3.1 §17.4.2 (the schema for JSON). + *

+ * Per the schema (Appendix C.2), the only allowed no-namespace attributes are: + *

    + *
  • {@code key} and {@code escaped-key} on any of the six elements (when child of map; allowed at top-level too)
  • + *
  • {@code escaped} on {@code string} only
  • + *
+ * Attributes in the {@code http://www.w3.org/2005/xpath-functions} namespace are disallowed + * ({@code anyAttribute namespace="##other"}); attributes in any other namespace are ignored. + * The {@code escaped} and {@code escaped-key} attributes must hold a valid {@code xs:boolean} value. + */ + private void validateAttributes(final XMLStreamReader reader, final String localName) throws XPathException { + for (int i = 0; i < reader.getAttributeCount(); i++) { + final String attrNs = reader.getAttributeNamespace(i); + final String attrName = reader.getAttributeLocalName(i); + if (Namespaces.XPATH_FUNCTIONS_NS.equals(attrNs)) { + throw new XPathException(this, ErrorCodes.FOJS0006, + "Invalid XML representation of JSON. Attribute '" + attrName + + "' must not be in the namespace '" + Namespaces.XPATH_FUNCTIONS_NS + "'."); + } + if (attrNs != null && !attrNs.isEmpty()) { + continue; + } + switch (attrName) { + case "key", "escaped-key" -> { + if ("escaped-key".equals(attrName) && !isValidXsBoolean(reader.getAttributeValue(i))) { + throw new XPathException(this, ErrorCodes.FOJS0006, + "Invalid XML representation of JSON. Attribute 'escaped-key' must have a valid xs:boolean value, but got '" + + reader.getAttributeValue(i) + "'."); + } + } + case "escaped" -> { + // Per W3C bug 29917 / qt3tests xml-to-json-065, 'escaped' is tolerated on + // non-string elements as a no-op; only the lexical value is enforced. + if (!isValidXsBoolean(reader.getAttributeValue(i))) { + throw new XPathException(this, ErrorCodes.FOJS0006, + "Invalid XML representation of JSON. Attribute 'escaped' must have a valid xs:boolean value, but got '" + + reader.getAttributeValue(i) + "'."); + } + } + default -> throw new XPathException(this, ErrorCodes.FOJS0006, + "Invalid XML representation of JSON. Attribute '" + attrName + + "' is not allowed on element '" + localName + "'."); + } + } + } + + private static boolean isValidXsBoolean(final String value) { + if (value == null) { + return false; + } + final String trimmed = value.trim(); + return "true".equals(trimmed) || "false".equals(trimmed) || "1".equals(trimmed) || "0".equals(trimmed); + } } diff --git a/exist-core/src/test/xquery/xquery3/xml-to-json.xql b/exist-core/src/test/xquery/xquery3/xml-to-json.xql index 59bf0086084..ba06094c366 100644 --- a/exist-core/src/test/xquery/xquery3/xml-to-json.xql +++ b/exist-core/src/test/xquery/xquery3/xml-to-json.xql @@ -479,6 +479,143 @@ function xtj:xmlmap-to-json-for-exponent($int as xs:string) as xs:string { ) }; +(: =========================================================== + F&O 3.1 §17.4.2 / §17.5.4 — structural validation tests + (parity with XQTS HEAD xml-to-json-{033,040,042,043,062,063,069,081,082}) + =========================================================== :) + +declare + %test:assertError('FOJS0006') +function xtj:xml-to-json-text-child-of-map() { + fn:xml-to-json( + + tabblubberundo + + ) +}; + +declare + %test:assertError('FOJS0006') +function xtj:xml-to-json-text-child-of-array() { + fn:xml-to-json( + + tabblubberundo + + ) +}; + +declare + %test:assertEquals('{"a":null,"b":null}') +function xtj:xml-to-json-whitespace-between-map-children-allowed() { + fn:xml-to-json( + + + + + ) +}; + +declare + %test:assertError('FOJS0006') +function xtj:xml-to-json-disallowed-no-ns-attribute() { + fn:xml-to-json( + + + + + ) +}; + +declare + %test:assertError('FOJS0006') +function xtj:xml-to-json-attribute-in-json-namespace() { + fn:xml-to-json( + + tab + + ) +}; + +declare + %test:assertError('FOJS0006') +function xtj:xml-to-json-invalid-escaped-key-value() { + fn:xml-to-json( + + tab + + ) +}; + +declare + %test:assertError('FOJS0006') +function xtj:xml-to-json-invalid-escaped-value() { + fn:xml-to-json( + + tab + + ) +}; + +declare + %test:assertEquals('{"\\t":"tab"}') +function xtj:xml-to-json-escaped-on-map-tolerated() { + fn:xml-to-json( + + tab + + ) +}; + +declare + %test:assertError('FOJS0006') +function xtj:xml-to-json-element-child-of-string() { + fn:xml-to-json( + ok + ) +}; + +declare + %test:assertError('FOJS0006') +function xtj:xml-to-json-element-child-of-boolean() { + fn:xml-to-json( + trueqq + ) +}; + +declare + %test:assertError('FOJS0006') +function xtj:xml-to-json-element-child-of-null() { + fn:xml-to-json( + + ) +}; + +declare + %test:assertError('FOJS0006') +function xtj:xml-to-json-element-child-of-number() { + fn:xml-to-json( + 1 + ) +}; + +declare + %test:assertEquals('"ok"') +function xtj:xml-to-json-foreign-ns-attribute-ignored() { + fn:xml-to-json( + ok + ) +}; + +declare + %test:assertEquals('"ok"') +function xtj:xml-to-json-escaped-numeric-boolean() { + fn:xml-to-json( + ok + ) +}; + declare %test:arg("int", "1E9") %test:assertXPath('$result/fn:map/fn:number = ''1E9''') %test:arg("int", "1E+9") %test:assertXPath('$result/fn:map/fn:number = ''1E+9''')