diff --git a/.codacy/codacy.yaml b/.codacy/codacy.yaml new file mode 100644 index 00000000000..e723e1332d2 --- /dev/null +++ b/.codacy/codacy.yaml @@ -0,0 +1,15 @@ +runtimes: + - dart@3.7.2 + - go@1.22.3 + - java@17.0.10 + - node@22.2.0 + - python@3.11.11 +tools: + - dartanalyzer@3.7.2 + - eslint@8.57.0 + - lizard@1.17.31 + - opengrep@1.16.2 + - pmd@7.11.0 + - pylint@3.3.6 + - revive@1.7.0 + - trivy@0.69.3 diff --git a/.gitignore b/.gitignore index 2f96c7e51c2..8ba285c898a 100644 --- a/.gitignore +++ b/.gitignore @@ -25,7 +25,4 @@ work/ # Claude planning files plans/ -.codacy/ - -# Debug logs (e.g. from reindex investigation) -reindex-dbg.log +.xqts-runner/ diff --git a/exist-ant/src/test/resources-filtered/conf.xml b/exist-ant/src/test/resources-filtered/conf.xml index 52cac5dde3f..cd3d1cd2aa5 100644 --- a/exist-ant/src/test/resources-filtered/conf.xml +++ b/exist-ant/src/test/resources-filtered/conf.xml @@ -753,6 +753,8 @@ + + diff --git a/exist-core/pom.xml b/exist-core/pom.xml index c1b7f369c72..15be3d187d3 100644 --- a/exist-core/pom.xml +++ b/exist-core/pom.xml @@ -169,13 +169,13 @@ org.bouncycastle bcprov-jdk18on - 1.84 + 1.83 at.yawk.lz4 lz4-java - 1.11.0 + 1.10.4 @@ -324,6 +324,12 @@ + + nu.validator + htmlparser + 1.4.16 + + org.apache.ws.commons.util ws-commons-util @@ -392,16 +398,8 @@ - org.exist-db - exist-saxon-regex - 9.4.0-9.e1 - - - - net.sf.saxon - Saxon-HE - - + de.bottlecaps + markup-blitz @@ -431,37 +429,16 @@ ${aspectj.version} - - org.eclipse.jetty - jetty-jaas - ${jetty.version} - runtime - - - - org.apache.servicemix.bundles - org.apache.servicemix.bundles.antlr - - - - - - - org.apache.mina - mina-core - 2.2.5 - + org.eclipse.jetty jetty-http - ${jetty.version} org.eclipse.jetty jetty-security - ${jetty.version} runtime @@ -532,13 +509,13 @@ org.jgrapht jgrapht-core - 1.5.3 + 1.5.2 org.jgrapht jgrapht-opt - 1.5.3 + 1.5.2 @@ -637,21 +614,44 @@ removed. Unfortunately, at this time, it is required for Monex's Remote Console to function. --> - org.eclipse.jetty - jetty-annotations + org.eclipse.jetty.ee10 + jetty-ee10-annotations - org.eclipse.jetty - jetty-servlet + org.eclipse.jetty.ee10 + jetty-ee10-servlet - org.eclipse.jetty - jetty-webapp + org.eclipse.jetty.ee10 + jetty-ee10-webapp org.eclipse.jetty jetty-xml + + + jakarta.websocket + jakarta.websocket-client-api + provided + + + jakarta.websocket + jakarta.websocket-api + provided + + + + org.eclipse.jetty.ee10.websocket + jetty-ee10-websocket-jakarta-server + + + + org.eclipse.jetty.ee10.websocket + jetty-ee10-websocket-jakarta-client + ${jetty.version} + test + org.apache.httpcomponents httpcore @@ -839,6 +839,12 @@ src/main/java/org/exist/util/io/TemporaryFileManager.java src/test/java/org/exist/util/io/CachingFilterInputStreamNonMarkableByteArrayInputStreamTest.java src/main/java/org/exist/xquery/functions/fn/FnFormatNumbers.java + + + src/test/resources/xinclude-test-suite/** @@ -1037,13 +1043,14 @@ The BaseX Team. The original license statement is also included below.]]>org.xmlresolver:xmlresolver:jar:${xmlresolver.version} org.exist-db.thirdparty.org.eclipse.wst.xml:xpath2:jar:1.2.0 edu.princeton.cup:java-cup:jar:10k - org.eclipse.jetty:jetty-jaas:jar:${jetty.version} + org.eclipse.jetty:jetty-deploy:jar:${jetty.version} org.eclipse.jetty:jetty-jmx:jar:${jetty.version} - org.eclipse.jetty:jetty-annotations:jar:${jetty.version} + org.eclipse.jetty.ee10:jetty-ee10-annotations:jar:${jetty.version} org.eclipse.jetty:jetty-security:jar:${jetty.version} ${project.groupId}:exist-jetty-config:jar:${project.version} org.apache.mina:mina-core + org.eclipse.jetty.ee10.websocket:jetty-ee10-websocket-jakarta-client:jar:${jetty.version} @@ -1201,6 +1208,7 @@ The BaseX Team. The original license statement is also included below.]]>${project.build.testOutputDirectory}/log4j2.xml + 180 + + + org.exist.storage.lock.DeadlockIT + org.exist.xmldb.RemoveCollectionIT + @{jacocoArgLine} --add-modules jdk.incubator.vector --enable-native-access=ALL-UNNAMED -Dfile.encoding=${project.build.sourceEncoding} -Dexist.recovery.progressbar.hide=true ${project.basedir}/../exist-jetty-config/target/classes/org/exist/jetty diff --git a/exist-core/src/main/antlr/org/exist/xquery/parser/XQuery.g b/exist-core/src/main/antlr/org/exist/xquery/parser/XQuery.g index d852d700444..62743725b6e 100644 --- a/exist-core/src/main/antlr/org/exist/xquery/parser/XQuery.g +++ b/exist-core/src/main/antlr/org/exist/xquery/parser/XQuery.g @@ -83,6 +83,7 @@ options { protected Deque> globalStack = new ArrayDeque<>(); protected Deque elementStack = new ArrayDeque<>(); protected XQueryLexer lexer; + protected boolean xq4Enabled = false; public XQueryParser(XQueryLexer lexer) { this((TokenStream)lexer); @@ -90,6 +91,8 @@ options { setASTNodeClass("org.exist.xquery.parser.XQueryAST"); } + public boolean isXQ4() { return xq4Enabled; } + public boolean foundErrors() { return foundError; } @@ -161,6 +164,7 @@ imaginaryTokenDefinitions MAP MAP_TEST LOOKUP + FILTER_AM ARRAY ARRAY_TEST PROLOG @@ -192,6 +196,77 @@ imaginaryTokenDefinitions PREVIOUS_ITEM NEXT_ITEM WINDOW_VARS + // Full Text (W3C XQuery and XPath Full Text 3.0) + FT_CONTAINS + FT_SELECTION + FT_OR + FT_AND + FT_MILD_NOT + FT_UNARY_NOT + FT_PRIMARY_WITH_OPTIONS + FT_WORDS + FT_ANYALL_OPTION + FT_TIMES + FT_RANGE + FT_ORDER + FT_WINDOW + FT_DISTANCE + FT_SCOPE + FT_CONTENT + FT_MATCH_OPTION + FT_CASE_OPTION + FT_DIACRITICS_OPTION + FT_STEM_OPTION + FT_THESAURUS_OPTION + FT_THESAURUS_ID + FT_STOP_WORD_OPTION + FT_STOP_WORDS + FT_STOP_WORDS_EXCEPT + FT_LANGUAGE_OPTION + FT_WILDCARD_OPTION + FT_EXTENSION_OPTION + FT_EXTENSION_SELECTION + FT_IGNORE_OPTION + FT_WEIGHT + FT_SCORE_VAR + FT_OPTION_DECL + // XQuery 4.0 Parser Extensions + FOCUS_FUNCTION + KEYWORD_ARG + FOR_MEMBER + STRING_TEMPLATE + FOR_KEY + FOR_VALUE + FOR_KEY_VALUE + VALUE_VAR + SWITCH_BOOLEAN + MAPPING_ARROW + FILTER_AM + QNAME_LITERAL + PARAM_DEFAULT + CHOICE_TYPE + ENUM_TYPE + TERNARY + SEQ_DESTRUCTURE + ARRAY_DESTRUCTURE + MAP_DESTRUCTURE + DESTRUCTURE_VAR_TYPE + RECORD_TEST + RECORD_FIELD + // Decimal Format Declarations + DECIMAL_FORMAT_DECL + DEF_DECIMAL_FORMAT_DECL + // XQuery 4.0 JNode Kind Tests + JSON_NODE_TEST + JSON_OBJECT_TEST + JSON_ARRAY_TEST + JSON_STRING_TEST + JSON_NUMBER_TEST + JSON_BOOLEAN_TEST + JSON_NULL_TEST + JSON_MEMBER_TEST + // === XQuery 4.0 Map Comprehensions (PR2094) === + MAP_MERGE ; // === XPointer === @@ -256,11 +331,21 @@ prolog throws XPathException ( importDecl | - ( "declare" ( "default" | "boundary-space" | "ordering" | "construction" | "base-uri" | "copy-namespaces" | "namespace" ) ) => + ( "declare" ( "default" | "boundary-space" | "ordering" | "construction" | "base-uri" | "copy-namespaces" | "namespace" | "decimal-format" | "revalidation" ) ) => s:setter { if(!inSetters) - throw new XPathException(#s, "Default declarations have to come first"); + throw new XPathException(#s, ErrorCodes.XPST0003, "Default declarations have to come first"); + } + | + ( "declare" "ft-option" ) + => fto:ftOptionDecl + { + // XQFT 3.0 §2.6: FTOptionDecl is in the first section of the prolog + // (same level as setters and imports), not the second section. + if (!inSetters) + throw new XPathException(#fto, ErrorCodes.XPST0003, + "'declare ft-option' must appear before variable and function declarations"); } | ( "declare" "option" ) @@ -269,10 +354,13 @@ prolog throws XPathException ( "declare" "function" ) => functionDeclUp { inSetters = false; } | + ( "declare" "updating" "function" ) + => updatingFunctionDeclUp { inSetters = false; } + | ( "declare" "variable" ) => varDeclUp { inSetters = false; } | - ( "declare" "context" "item" ) + ( "declare" "context" ("item" | "value") ) => contextItemDeclUp { inSetters = false; } | ( "declare" MOD ) @@ -292,7 +380,12 @@ importDecl throws XPathException versionDecl throws XPathException : "xquery" "version" v:STRING_LITERAL ( "encoding"! enc:STRING_LITERAL )? - { #versionDecl = #(#[VERSION_DECL, v.getText()], enc); } + { + #versionDecl = #(#[VERSION_DECL, v.getText()], enc); + if ("4.0".equals(v.getText())) { + xq4Enabled = true; + } + } ; setter @@ -311,6 +404,9 @@ setter { #setter= #(#[DEF_FUNCTION_NS_DECL, "defaultFunctionNSDecl"], deff); } | "order"^ "empty"! ( "greatest" | "least" ) + | + "decimal-format"! ( dfDefProperty )* + { #setter = #(#[DEF_DECIMAL_FORMAT_DECL, "defaultDecimalFormatDecl"], #setter); } ) | ( "declare" "boundary-space" ) => @@ -325,14 +421,39 @@ setter ( "declare" "construction" ) => "declare"! "construction"^ ( "preserve" | "strip" ) | + // === W3C XQuery Update Facility 3.0 - Revalidation Declaration === + ( "declare" "revalidation" ) => + "declare"! "revalidation"^ ( "strict" | "lax" | "skip" ) + | ( "declare" "copy-namespaces" ) => "declare"! "copy-namespaces"^ preserveMode COMMA! inheritMode | ( "declare" "namespace" ) => namespaceDecl + | + ( "declare" "decimal-format" ) => + decimalFormatDecl ) ; +decimalFormatDecl +{ String eq = null; } +: + decl:"declare"! "decimal-format"! eq=eqName! ( dfDefProperty )* + { + #decimalFormatDecl = #(#[DECIMAL_FORMAT_DECL, eq], #decimalFormatDecl); + #decimalFormatDecl.copyLexInfo(#decl); + } + ; + +dfDefProperty +: + ( "decimal-separator"^ | "grouping-separator"^ | "infinity"^ | "minus-sign"^ + | "NaN"^ | "percent"^ | "per-mille"^ | "zero-digit"^ | "digit"^ + | "pattern-separator"^ | "exponent-separator"^ ) + EQ! STRING_LITERAL + ; + preserveMode : ( "preserve" | "no-preserve" ) @@ -428,7 +549,7 @@ annotateDecl! throws XPathException : decl:"declare"! ann:annotations! ( - ("function") => f:functionDecl[#ann] { #annotateDecl = #f; } + ("function") => f:functionDecl[#ann, false] { #annotateDecl = #f; } | ("variable") => v:varDecl[#decl, #ann] { #annotateDecl = #v; } ) @@ -441,7 +562,7 @@ contextItemDeclUp! throws XPathException contextItemDecl [XQueryAST decl] throws XPathException : - "context"! "item"! ( typeDeclaration )? + "context"! ( "item"! | "value"! ) ( typeDeclaration )? ( COLON! EQ! e1:expr | @@ -464,10 +585,22 @@ annotation String name= null; } : - MOD! name=eqName! (LPAREN! literal (COMMA! literal)* RPAREN!)? + MOD! name=eqName! (LPAREN! annotationLiteral (COMMA! annotationLiteral)* RPAREN!)? { #annotation= #(#[ANNOT_DECL, name], #annotation); } ; +// XQ4: annotation parameters support literals, true(), false(), and negated numeric literals +// Note: true()/false() must be matched via NCNAME + semantic predicate, NOT as "true"/"false" keywords. +// Using quoted keyword syntax would register them in testLiterals, breaking true()/false() function +// calls throughout the grammar (ANTLR 2 converts all NCNAMEs matching keywords to LITERAL_xxx tokens). +annotationLiteral +: + literal + | ( { LT(1).getText().equals("true") || LT(1).getText().equals("false") }? b:NCNAME LPAREN! RPAREN! + { #annotationLiteral = #[STRING_LITERAL, #b.getText()]; #b = null; } ) + | MINUS! n:numericLiteral { #n.setText("-" + #n.getText()); #annotationLiteral = #n; } + ; + eqName returns [String name] { name= null; } : @@ -496,10 +629,15 @@ bracedUriLiteral returns [String uri] functionDeclUp! throws XPathException : - "declare"! f:functionDecl[null] { #functionDeclUp = #f; } + "declare"! f:functionDecl[null, false] { #functionDeclUp = #f; } + ; + +updatingFunctionDeclUp! throws XPathException +: + "declare"! "updating"! f:functionDecl[null, true] { #updatingFunctionDeclUp = #f; } ; -functionDecl [XQueryAST ann] throws XPathException +functionDecl [XQueryAST ann, boolean updating] throws XPathException { String name= null; } : "function"! name=eqName! lp:LPAREN! ( paramList )? @@ -509,6 +647,9 @@ functionDecl [XQueryAST ann] throws XPathException #functionDecl= #(#[FUNCTION_DECL, name, org.exist.xquery.parser.XQueryFunctionAST.class.getName()], #ann, #functionDecl); #functionDecl.copyLexInfo(#lp); #functionDecl.setDoc(getXQDoc()); + if (updating) { + ((XQueryFunctionAST) #functionDecl).setUpdating(true); + } } exception catch [RecognitionException e] { @@ -550,7 +691,10 @@ param throws XPathException { String varName= null; } : DOLLAR! varName=eqName ( t:typeDeclaration )? - { #param= #(#[VARIABLE_BINDING, varName], #t); } + ( ( { xq4Enabled }? COLON EQ ) => COLON! EQ! pd:exprSingle! + { #pd = #(#[PARAM_DEFAULT, "param-default"], #pd); } + )? + { #param= #(#[VARIABLE_BINDING, varName], #t, #pd); } ; uriList throws XPathException @@ -588,10 +732,16 @@ itemType throws XPathException | ( "function" LPAREN ) => functionTest | + ( "fn" LPAREN ) => fnShorthandFunctionTest + | ( "map" LPAREN ) => mapType | ( "array" LPAREN ) => arrayType | + ( "record" LPAREN ) => recordType + | + ( "enum" LPAREN ) => enumType + | ( LPAREN ) => parenthesizedItemType | ( . LPAREN ) => kindTest @@ -600,13 +750,51 @@ itemType throws XPathException ; parenthesizedItemType throws XPathException +{ int count = 0; } +: + LPAREN! itemType { count++; } ( UNION! itemType { count++; } )* RPAREN! + { + if (count > 1) { + #parenthesizedItemType = #(#[CHOICE_TYPE, "choice-type"], #parenthesizedItemType); + } + } + ; + +enumType throws XPathException +{ List enumValues = new ArrayList(); } : - LPAREN! itemType RPAREN! + e:"enum"! LPAREN! + s1:STRING_LITERAL! { enumValues.add(s1.getText()); } + ( COMMA! s2:STRING_LITERAL! { enumValues.add(s2.getText()); } )* + RPAREN! + { + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < enumValues.size(); i++) { + if (i > 0) sb.append(","); + sb.append(enumValues.get(i)); + } + #enumType = #(#[ENUM_TYPE, sb.toString()]); + #enumType.copyLexInfo(#e); + } ; singleType throws XPathException +{ int count = 0; } : - atomicType ( QUESTION )? + ( + ( "enum" LPAREN ) => enumType ( QUESTION )? + | + ( LPAREN ) => + LPAREN! atomicType { count++; } ( UNION! atomicType { count++; } )* RPAREN! + { + if (count > 1) { + #singleType = #(#[CHOICE_TYPE, "choice-type"], #singleType); + } + } + ( QUESTION )? + | + atomicType ( QUESTION )? + ) ; atomicType throws XPathException @@ -634,10 +822,38 @@ anyFunctionTest throws XPathException typedFunctionTest throws XPathException : - "function"! LPAREN! (sequenceType (COMMA! sequenceType)*)? RPAREN! "as" sequenceType + "function"! LPAREN! (fnShorthandParam (COMMA! fnShorthandParam)*)? RPAREN! "as" sequenceType { #typedFunctionTest = #(#[FUNCTION_TEST, "anyFunction"], #typedFunctionTest); } ; +// XQ4: fn(...) as shorthand for function(...) in type positions +fnShorthandFunctionTest throws XPathException +: + ( "fn" LPAREN STAR RPAREN) => fnShorthandAnyFunctionTest + | + fnShorthandTypedFunctionTest + ; + +fnShorthandAnyFunctionTest throws XPathException +: + "fn"! LPAREN! s2:STAR RPAREN! + { #fnShorthandAnyFunctionTest = #(#[FUNCTION_TEST, "anyFunction"], #s2); } + ; + +fnShorthandTypedFunctionTest throws XPathException +: + "fn"! LPAREN! (fnShorthandParam (COMMA! fnShorthandParam)*)? RPAREN! "as" sequenceType + { #fnShorthandTypedFunctionTest = #(#[FUNCTION_TEST, "anyFunction"], #fnShorthandTypedFunctionTest); } + ; + +// XQ4: fn() type parameters can optionally have names: fn($name as type, ...) +fnShorthandParam throws XPathException +: + ( DOLLAR ) => DOLLAR! eqName! "as"! sequenceType + | + sequenceType + ; + mapType throws XPathException : ( "map" LPAREN STAR ) => anyMapTypeTest @@ -686,6 +902,42 @@ arrayTypeTest throws XPathException } ; +recordType throws XPathException +: + ( "record" LPAREN RPAREN ) => emptyRecordTypeTest + | + recordTypeTest + ; + +// NOTE: anyRecordTypeTest removed - XQ4 PR2413 removed extensible records. +// record(*) and record(field, *) must raise XPST0003. + +emptyRecordTypeTest throws XPathException +: + m:"record"! LPAREN! RPAREN! + { + #emptyRecordTypeTest = #(#[RECORD_TEST, "record"]); + #emptyRecordTypeTest.copyLexInfo(#m); + } + ; + +recordTypeTest throws XPathException +: + m:"record"! LPAREN! recordFieldDecl ( COMMA! recordFieldDecl )* RPAREN! + { + #recordTypeTest = #(#[RECORD_TEST, "record"], #recordTypeTest); + } + ; + +recordFieldDecl throws XPathException +{ String fieldName = null; } +: + fieldName=ncnameOrKeyword! ( QUESTION )? ( "as"! sequenceType )? + { + #recordFieldDecl = #(#[RECORD_FIELD, fieldName], #recordFieldDecl); + } + ; + // === Expressions === queryBody throws XPathException: expr ; @@ -702,17 +954,26 @@ expr throws XPathException exprSingle throws XPathException : - ( ( "for" | "let" ) ("tumbling" | "sliding" | DOLLAR ) ) => flworExpr + ( ( "for" | "let" ) ("tumbling" | "sliding" | "score" | "member" | "key" | "value" | DOLLAR) ) => flworExpr | ( "try" LCURLY ) => tryCatchExpr | ( ( "some" | "every" ) DOLLAR ) => quantifiedExpr | ( "if" LPAREN ) => ifExpr | ( "switch" LPAREN ) => switchExpr | ( "typeswitch" LPAREN ) => typeswitchExpr + // === Legacy update (DEPRECATED - use W3C XQuery Update Facility 3.0 syntax instead) === | ( "update" ( "replace" | "value" | "insert" | "delete" | "rename" )) => updateExpr + // === W3C XQuery Update Facility 3.0 === + | ( "insert" ( "node" | "nodes" ) ) => xqufInsertExpr + | ( "delete" ( "node" | "nodes" ) ) => xqufDeleteExpr + | ( "replace" ( "node" | "value" ) ) => xqufReplaceExpr + | ( "rename" "node" ) => xqufRenameExpr + | ( "copy" DOLLAR ) => xqufTransformExpr | orExpr ; -// === Xupdate === +// === Legacy update (DEPRECATED - use W3C XQuery Update Facility 3.0 syntax instead) === +// To remove legacy update support, delete this section and the updateExpr +// alternative in exprSingle above. updateExpr throws XPathException : @@ -752,11 +1013,65 @@ renameExpr throws XPathException "rename" exprSingle "as"! exprSingle ; -// === try/catch === +// === W3C XQuery Update Facility 3.0 === + +xqufInsertExpr throws XPathException +: + "insert"^ ( "node"! | "nodes"! ) exprSingle + ( + ( "as" "first" "into" ) => "as"! "first" "into"! exprSingle + | ( "as" "last" "into" ) => "as"! "last" "into"! exprSingle + | "into" exprSingle + | "before" exprSingle + | "after" exprSingle + ) + ; + +xqufDeleteExpr throws XPathException +: + "delete"^ ( "node"! | "nodes"! ) exprSingle + ; + +xqufReplaceExpr throws XPathException +: + "replace"^ + ( + ( "value" "of" "node" ) => "value" "of"! "node"! exprSingle "with"! exprSingle + | "node"! exprSingle "with"! exprSingle + ) + ; + +xqufRenameExpr throws XPathException +: + "rename"^ "node"! exprSingle "as"! exprSingle + ; + +xqufTransformExpr throws XPathException +: + "copy"^ + xqufCopyBinding ( COMMA! xqufCopyBinding )* + "modify"! exprSingle + "return"! exprSingle + ; + +xqufCopyBinding throws XPathException +{ String varName; } +: + DOLLAR! varName=v:varName! COLON! EQ! exprSingle + { + #xqufCopyBinding = #(#[VARIABLE_BINDING, varName], #xqufCopyBinding); + #xqufCopyBinding.copyLexInfo(#v); + } + ; + +// === try/catch/finally === tryCatchExpr throws XPathException : "try"^ LCURLY! tryTargetExpr RCURLY! - (catchClause)+ + ( + (catchClause)+ ( { xq4Enabled }? finallyClause )? + | { xq4Enabled }? finallyClause + ) ; tryTargetExpr throws XPathException @@ -769,6 +1084,11 @@ catchClause throws XPathException "catch"^ catchErrorList (catchVars)? LCURLY! expr RCURLY! ; +finallyClause throws XPathException +: + "finally"^ LCURLY! (expr)? RCURLY! + ; + catchErrorList throws XPathException : nameTest (UNION! nameTest)* @@ -809,14 +1129,14 @@ flworExpr throws XPathException initialClause throws XPathException : - ( ( "for" DOLLAR ) => forClause + ( ( "for" ( "member" | "key" | "value" | DOLLAR ) ) => forClause | ( "for" ( "tumbling" | "sliding" ) ) => windowClause | letClause ) ; intermediateClause throws XPathException : - ( initialClause | whereClause | groupByClause | orderByClause | countClause ) + ( initialClause | whereClause | whileClause | groupByClause | orderByClause | countClause ) ; whereClause throws XPathException @@ -824,6 +1144,11 @@ whereClause throws XPathException "where"^ exprSingle ; +whileClause throws XPathException +: + { xq4Enabled }? "while"^ exprSingle + ; + countClause throws XPathException { String varName; } : @@ -833,12 +1158,83 @@ countClause throws XPathException forClause throws XPathException : - "for"^ inVarBinding ( COMMA! inVarBinding )* + "for"^ forBinding ( COMMA! forBinding )* + ; + +forBinding throws XPathException +: + ( { xq4Enabled }? "member" ) => memberVarBinding + | ( { xq4Enabled }? "key" ) => keyVarBinding + | ( { xq4Enabled }? "value" ) => valueVarBinding + | inVarBinding + ; + +memberVarBinding throws XPathException +{ String varName; } +: + "member"! DOLLAR! varName=v:varName! ( typeDeclaration )? + ( positionalVar )? + "in"! exprSingle + { + #memberVarBinding= #(#[VARIABLE_BINDING, varName], #memberVarBinding); + #memberVarBinding.copyLexInfo(#v); + #memberVarBinding= #(#[FOR_MEMBER, null], #memberVarBinding); + } + ; + +keyVarBinding throws XPathException +{ String varName; } +: + "key"! DOLLAR! varName=v:varName! ( typeDeclaration )? + ( + ( "value" DOLLAR ) => keyValueVarPart + )? + ( positionalVar )? + "in"! exprSingle + { + #keyVarBinding= #(#[VARIABLE_BINDING, varName], #keyVarBinding); + #keyVarBinding.copyLexInfo(#v); + // Check if we have a value variable (keyValueVarPart was matched) + boolean hasValueVar = false; + AST child = #keyVarBinding.getFirstChild(); + while (child != null) { + if (child.getType() == VALUE_VAR) { hasValueVar = true; break; } + child = child.getNextSibling(); + } + if (hasValueVar) { + #keyVarBinding= #(#[FOR_KEY_VALUE, null], #keyVarBinding); + } else { + #keyVarBinding= #(#[FOR_KEY, null], #keyVarBinding); + } + } + ; + +keyValueVarPart throws XPathException +{ String valueVarName; } +: + "value"! DOLLAR! valueVarName=varName! ( typeDeclaration )? + { + #keyValueVarPart = #(#[VALUE_VAR, valueVarName], #keyValueVarPart); + } + ; + +valueVarBinding throws XPathException +{ String varName; } +: + "value"! DOLLAR! varName=v:varName! ( typeDeclaration )? + ( positionalVar )? + "in"! exprSingle + { + #valueVarBinding= #(#[VARIABLE_BINDING, varName], #valueVarBinding); + #valueVarBinding.copyLexInfo(#v); + #valueVarBinding= #(#[FOR_VALUE, null], #valueVarBinding); + } ; letClause throws XPathException : - "let"^ letVarBinding ( COMMA! letVarBinding )* + "let"^ ( ( "score" ) => ftScoreVarBinding | letVarBinding ) + ( COMMA! ( ( "score" ) => ftScoreVarBinding | letVarBinding ) )* ; windowClause throws XPathException @@ -851,6 +1247,7 @@ inVarBinding throws XPathException : DOLLAR! varName=v:varName! ( typeDeclaration )? ( allowingEmpty )? ( positionalVar )? + ( ftScoreVar )? "in"! exprSingle { #inVarBinding= #(#[VARIABLE_BINDING, varName], #inVarBinding); @@ -904,6 +1301,16 @@ windowVars throws XPathException letVarBinding throws XPathException { String varName; } : + // XQ4: sequence destructuring - let $($x, $y) := expr + ( DOLLAR LPAREN ) => letDestructureSeq + | + // XQ4: array destructuring - let $[$x, $y] := expr + ( DOLLAR LPPAREN ) => letDestructureArray + | + // XQ4: map destructuring - let ${$x, $y} := expr + ( DOLLAR LCURLY ) => letDestructureMap + | + // Standard let binding DOLLAR! varName=v:varName! ( typeDeclaration )? COLON! EQ! exprSingle { @@ -912,6 +1319,86 @@ letVarBinding throws XPathException } ; +// XQFT 3.0: FTScoreVar in for binding - "score" "$" VarName +ftScoreVar +{ String varName; } +: + "score" DOLLAR! varName=varName + { #ftScoreVar= #[FT_SCORE_VAR, varName]; } + ; + +// XQFT 3.0: FTScoreVar as let clause - "score" "$" VarName ":=" ExprSingle +ftScoreVarBinding throws XPathException +{ String varName; } +: + "score"! DOLLAR! varName=v:varName! COLON! EQ! exprSingle + { + #ftScoreVarBinding= #(#[VARIABLE_BINDING, varName], #[FT_SCORE_VAR, "score"], #ftScoreVarBinding); + #ftScoreVarBinding.copyLexInfo(#v); + } + ; + +// XQ4: Per-variable type annotations: "x+,y" means $x has a DESTRUCTURE_VAR_TYPE child, $y does not +letDestructureSeq throws XPathException +{ String vn; + StringBuilder sb = new StringBuilder(); } +: + d:DOLLAR! LPAREN! + DOLLAR! vn=varName! { sb.append(vn); } + ( destructureVarType { sb.append("+"); } )? + ( COMMA! DOLLAR! vn=varName! { sb.append(",").append(vn); } + ( destructureVarType { sb.append("+"); } )? )* + RPAREN! ( typeDeclaration )? + COLON! EQ! exprSingle + { + #letDestructureSeq = #(#[SEQ_DESTRUCTURE, sb.toString()], #letDestructureSeq); + #letDestructureSeq.copyLexInfo(#d); + } + ; + +letDestructureArray throws XPathException +{ String vn; + StringBuilder sb = new StringBuilder(); } +: + d:DOLLAR! LPPAREN! + DOLLAR! vn=varName! { sb.append(vn); } + ( destructureVarType { sb.append("+"); } )? + ( COMMA! DOLLAR! vn=varName! { sb.append(",").append(vn); } + ( destructureVarType { sb.append("+"); } )? )* + RPPAREN! ( typeDeclaration )? + COLON! EQ! exprSingle + { + #letDestructureArray = #(#[ARRAY_DESTRUCTURE, sb.toString()], #letDestructureArray); + #letDestructureArray.copyLexInfo(#d); + } + ; + +letDestructureMap throws XPathException +{ String vn; + StringBuilder sb = new StringBuilder(); } +: + d:DOLLAR! LCURLY! + DOLLAR! vn=varName! { sb.append(vn); } + ( destructureVarType { sb.append("+"); } )? + ( COMMA! DOLLAR! vn=varName! { sb.append(",").append(vn); } + ( destructureVarType { sb.append("+"); } )? )* + RCURLY! ( typeDeclaration )? + COLON! EQ! exprSingle + { + #letDestructureMap = #(#[MAP_DESTRUCTURE, sb.toString()], #letDestructureMap); + #letDestructureMap.copyLexInfo(#d); + } + ; + +// Helper: wraps typeDeclaration in DESTRUCTURE_VAR_TYPE imaginary token +destructureVarType throws XPathException +: + td:typeDeclaration + { + #destructureVarType = #(#[DESTRUCTURE_VAR_TYPE, "vartype"], #td); + } + ; + orderByClause throws XPathException : ( "order"! "by"! | "stable"! "order"! "by"! ) orderSpecList @@ -973,9 +1460,26 @@ quantifiedInVarBinding throws XPathException switchExpr throws XPathException : - "switch"^ LPAREN! expr RPAREN! - ( switchCaseClause )+ - "default" "return"! exprSingle + "switch"^ LPAREN! + ( + // XQ4 omitted comparand - boolean mode: switch () { case boolExpr return ... } + ( RPAREN ) => + RPAREN! switchBooleanMarker + | + expr RPAREN! + ) + ( + // XQ4 braced syntax: switch (...) { case ... default ... } + ( LCURLY "case" ) => + LCURLY! ( switchCaseClause )+ "default" "return"! exprSingle RCURLY! + | + ( switchCaseClause )+ "default" "return"! exprSingle + ) + ; + +switchBooleanMarker +: + { #switchBooleanMarker = #(#[SWITCH_BOOLEAN, "switch-boolean"]); } ; switchCaseClause throws XPathException @@ -988,8 +1492,13 @@ typeswitchExpr throws XPathException { String varName; } : "typeswitch"^ LPAREN! expr RPAREN! - ( caseClause )+ - "default" ( defaultVar )? "return"! exprSingle + ( + // XQ4 braced syntax: typeswitch (...) { case ... default ... } + ( LCURLY "case" ) => + LCURLY! ( caseClause )+ "default" ( defaultVar )? "return"! exprSingle RCURLY! + | + ( caseClause )+ "default" ( defaultVar )? "return"! exprSingle + ) ; caseClause throws XPathException @@ -1024,12 +1533,28 @@ defaultVar throws XPathException ; ifExpr throws XPathException +{ + org.exist.xquery.parser.XQueryAST emptyNode = null; +} : - "if"^ LPAREN! expr RPAREN! t:"then"! thenExpr:exprSingle e:"else"! elseExpr:exprSingle - { - #thenExpr.copyLexInfo(#t); - #elseExpr.copyLexInfo(#e); - } + "if"^ LPAREN! expr RPAREN! + ( + // Traditional: if (cond) then expr else expr + ( "then" ) => + t:"then"! thenExpr:exprSingle e:"else"! elseExpr:exprSingle + { + #thenExpr.copyLexInfo(#t); + #elseExpr.copyLexInfo(#e); + } + | + // XQ4 Braced: if (cond) { expr } (no else clause; returns empty sequence if false) + LCURLY! bracedThenExpr:expr RCURLY! + { + // Synthesize empty sequence as implicit else branch + emptyNode = (org.exist.xquery.parser.XQueryAST) #(#[PARENTHESIZED, "()"]); + #ifExpr.addChild(emptyNode); + } + ) ; // === Logical === @@ -1037,6 +1562,12 @@ ifExpr throws XPathException orExpr throws XPathException : andExpr ( "or"^ andExpr )* + ( + { xq4Enabled }? DOUBLE_QUESTION! exprSingle DOUBLE_BANG! exprSingle + { + #orExpr = #(#[TERNARY, "ternary"], #orExpr); + } + )? ; andExpr throws XPathException @@ -1061,23 +1592,51 @@ castableExpr throws XPathException castExpr throws XPathException : - arrowExpr ( "cast"^ "as"! singleType )? + pipelineExpr ( "cast"^ "as"! singleType )? + ; + +pipelineExpr throws XPathException +: + arrowExpr ( { xq4Enabled }? PIPELINE_OP^ arrowExpr )* ; comparisonExpr throws XPathException : - r1:stringConcatExpr ( - ( BEFORE ) => BEFORE^ stringConcatExpr + r1:ftContainsExpr ( + ( BEFORE ) => BEFORE^ ftContainsExpr | - ( AFTER ) => AFTER^ stringConcatExpr - | ( ( "eq"^ | "ne"^ | "lt"^ | "le"^ | "gt"^ | "ge"^ ) stringConcatExpr ) - | ( GT EQ ) => GT^ EQ^ r2:rangeExpr + ( AFTER ) => AFTER^ ftContainsExpr + | ( ( "eq"^ | "ne"^ | "lt"^ | "le"^ | "gt"^ | "ge"^ ) ftContainsExpr ) + | ( GT EQ ) => GT^ EQ^ r2:ftContainsExpr { #comparisonExpr = #(#[GTEQ, ">="], #r1, #r2); } - | ( ( EQ^ | NEQ^ | GT^ | LT^ | LTEQ^ ) stringConcatExpr ) - | ( ( "is"^ | "isnot"^ ) stringConcatExpr ) + | ( ( EQ^ | NEQ^ | GT^ | LT^ | LTEQ^ ) ftContainsExpr ) + | ( ( "is"^ | "isnot"^ | "is-not"^ | "follows-or-is"^ | "precedes-or-is"^ ) ftContainsExpr ) + )? + ; + +// XQFT 3.0: FTContainsExpr sits between ComparisonExpr and OtherwiseExpr +ftContainsExpr throws XPathException +: + r1:otherwiseExpr ( + ( "contains" "text" ) => "contains"! "text"! ft:ftSelection ( ( "without" ) => fti:ftIgnoreOption )? + { + // Break auto-tree sibling links to prevent circular refs in ASTFactory.make() + #r1.setNextSibling(null); + #ft.setNextSibling(null); + if (#fti != null) { + #ftContainsExpr = #(#[FT_CONTAINS, "contains text"], #r1, #ft, #fti); + } else { + #ftContainsExpr = #(#[FT_CONTAINS, "contains text"], #r1, #ft); + } + } )? ; +otherwiseExpr throws XPathException +: + stringConcatExpr ( { xq4Enabled }? "otherwise"^ stringConcatExpr )* + ; + stringConcatExpr throws XPathException { boolean isConcat = false; } : @@ -1222,13 +1781,15 @@ stepExpr throws XPathException | ( ( "element" | "attribute" | "text" | "document" | "comment" | "namespace-node" | "processing-instruction" | "namespace" | "ordered" | - "unordered" | "map" | "array" ) LCURLY ) => + "unordered" | "map" | "array" | "fn" | "function" ) LCURLY ) => postfixExpr | ( ( "element" | "attribute" | "processing-instruction" | "namespace" ) eqName LCURLY ) => postfixExpr | + ( "fn" LPAREN ) => postfixExpr + | ( MOD | DOLLAR | ( eqName ( LPAREN | HASH ) ) | SELF | LPAREN | literal | XML_COMMENT | LT | - XML_PI | QUESTION | LPPAREN | STRING_CONSTRUCTOR_START ) + XML_PI | QUESTION | LPPAREN | STRING_CONSTRUCTOR_START | STRING_TEMPLATE_START | LCURLY | HASH ) => postfixExpr | axisStep @@ -1271,14 +1832,17 @@ forwardAxis : forwardAxisSpecifier COLON! COLON! ; forwardAxisSpecifier : "child" | "self" | "attribute" | "descendant" | "descendant-or-self" - | "following-sibling" | "following" + | "following-sibling-or-self" | "following-sibling" + | "following-or-self" | "following" ; reverseAxis : reverseAxisSpecifier COLON! COLON! ; reverseAxisSpecifier : - "parent" | "ancestor" | "ancestor-or-self" | "preceding-sibling" | "preceding" + "parent" | "ancestor" | "ancestor-or-self" + | "preceding-sibling-or-self" | "preceding-sibling" + | "preceding-or-self" | "preceding" ; nodeTest throws XPathException @@ -1326,18 +1890,53 @@ postfixExpr throws XPathException | (LPAREN) => dynamicFunCall | + // XQuery 4.0: FilterExprAM - must check before lookup + (QUESTION LPPAREN) => filterExprAM + | (QUESTION) => lookup )* ; +// XQuery 4.0: Array/Map Filter Expression +filterExprAM throws XPathException +{ } +: + q:QUESTION! LPPAREN! expr:exprSingle RPPAREN! + { + #filterExprAM = #(#[FILTER_AM, "?["], #expr); + #filterExprAM.copyLexInfo(#q); + } + ; + arrowExpr throws XPathException : - unaryExpr ( ARROW_OP^ arrowFunctionSpecifier argumentList )* + unaryExpr ( + ARROW_OP^ arrowFunctionSpecifier argumentList + | + { xq4Enabled }? MAPPING_ARROW_OP^ arrowFunctionSpecifier argumentList + | + { xq4Enabled }? METHOD_CALL_OP^ NCNAME argumentList + )* ; arrowFunctionSpecifier throws XPathException { String name= null; } : + // XQ4: inline/focus function expression + ( MOD | ( ("function" | "fn") (LPAREN | LCURLY) ) ) => inlineOrFocusFunctionExpr + | + // XQ4: named function reference (eqName '#' arity) + ( eqName HASH ) => namedFunctionRef + | + // XQ4: map constructor as function + ( "map" LCURLY ) => mapConstructor + | + // XQ4: bare map constructor as function + ( LCURLY ) => bareMapConstructor + | + // XQ4: array constructor as function + ( LPPAREN | ("array" LCURLY) ) => arrayConstructor + | name=n:eqName { #arrowFunctionSpecifier= #[EQNAME, name]; @@ -1350,7 +1949,7 @@ arrowFunctionSpecifier throws XPathException ; lookup throws XPathException -{ String name= null; } +{ String name= null; String varName= null; } : q:QUESTION! ( @@ -1360,18 +1959,59 @@ lookup throws XPathException #lookup.copyLexInfo(#q); } | + // XQ4: decimal and double literals as key selectors (?1.2, ?1.2e0) + { xq4Enabled }? dbl:DOUBLE_LITERAL + { + #lookup = #(#[LOOKUP, "?"], #dbl); + #lookup.copyLexInfo(#q); + } + | + { xq4Enabled }? dec:DECIMAL_LITERAL + { + #lookup = #(#[LOOKUP, "?"], #dec); + #lookup.copyLexInfo(#q); + } + | pos:INTEGER_LITERAL { #lookup = #(#[LOOKUP, "?"], #pos); #lookup.copyLexInfo(#q); } | + // XQ4: string literal as key selector (?"first value") + str:STRING_LITERAL + { + #lookup = #(#[LOOKUP, "?"], #str); + #lookup.copyLexInfo(#q); + } + | paren:parenthesizedExpr { #lookup = #(#[LOOKUP, "?"], #paren); #lookup.copyLexInfo(#q); } | + // XQ4: variable reference as key selector (?$var) + DOLLAR! varName=v:varName + { + #lookup = #(#[LOOKUP, "?"], #[VARIABLE_REF, varName]); + #lookup.copyLexInfo(#q); + } + | + // XQ4: context item as key selector (?.) + dot:SELF + { + #lookup = #(#[LOOKUP, "?"], #dot); + #lookup.copyLexInfo(#q); + } + | + // XQ4: QName literal as key selector (?#name) + qnl:qnameLiteral + { + #lookup = #(#[LOOKUP, "?"], #qnl); + #lookup.copyLexInfo(#q); + } + | STAR { #lookup = #(#[LOOKUP, "?*"]); @@ -1423,9 +2063,18 @@ primaryExpr throws XPathException | ( "map" LCURLY ) => mapConstructor | + ( LCURLY RCURLY ) => bareMapConstructor + | + ( LCURLY exprSingle COLON ) => bareMapConstructor + | directConstructor | - ( MOD | "function" LPAREN | eqName HASH ) => functionItemExpr + ( { xq4Enabled }? ( "fn" | "function" ) LCURLY ) => focusFunctionExpr + | + // XQ4: QName literal (#local, #prefix:local, #Q{uri}local) + ( { xq4Enabled }? HASH ) => qnameLiteral + | + ( MOD | ( "fn" | "function" ) LPAREN | eqName HASH ) => functionItemExpr | ( eqName LPAREN ) => functionCall | @@ -1433,6 +2082,8 @@ primaryExpr throws XPathException | ( STRING_CONSTRUCTOR_START ) => stringConstructor | + ( { xq4Enabled }? STRING_TEMPLATE_START ) => stringTemplate + | contextItemExpr | parenthesizedExpr @@ -1459,10 +2110,32 @@ stringConstructorContent throws XPathException stringConstructorInterpolation throws XPathException : STRING_CONSTRUCTOR_INTERPOLATION_START^ - { lexer.inStringConstructor = false; } + { lexer.inStringConstructor = false; lexer.stringConstructorInterpolationDepth++; } ( expr )? STRING_CONSTRUCTOR_INTERPOLATION_END! - { lexer.inStringConstructor = true; } + { lexer.stringConstructorInterpolationDepth--; lexer.inStringConstructor = true; } + ; + +stringTemplate throws XPathException +: + st:STRING_TEMPLATE_START! + { lexer.inStringTemplate = true; } + ( STRING_TEMPLATE_CONTENT | stringTemplateInterpolation )* + STRING_TEMPLATE_END! + { lexer.inStringTemplate = false; } + { + #stringTemplate = #(#[STRING_TEMPLATE, null], #stringTemplate); + #stringTemplate.copyLexInfo(#st); + } + ; + +stringTemplateInterpolation throws XPathException +: + lc:LCURLY! + { lexer.inStringTemplate = false; lexer.stringTemplateDepth++; } + ( expr )? + RCURLY! + { lexer.stringTemplateDepth--; lexer.inStringTemplate = true; } ; mapConstructor throws XPathException @@ -1474,6 +2147,15 @@ mapConstructor throws XPathException } ; +bareMapConstructor throws XPathException +: + lc:LCURLY! ( mapAssignment ( COMMA! mapAssignment )* )? RCURLY! + { + #bareMapConstructor = #(#[MAP, "map"], #bareMapConstructor); + #bareMapConstructor.copyLexInfo(#lc); + } + ; + mapAssignment throws XPathException : (exprSingle COLON! EQ!) => exprSingle COLON^ eq:EQ^ exprSingle @@ -1482,7 +2164,15 @@ mapAssignment throws XPathException "The ':=' notation is no longer accepted in map expressions: use ':' instead."); } | - exprSingle COLON^ exprSingle + // === XQuery 4.0 Map Comprehensions (PR2094) === + // MapConstructorEntry ::= ExprSingle (":" ExprSingle)? + // When ":" is present, it's a key:value pair; otherwise a merge entry (must evaluate to a map) + (exprSingle COLON) => exprSingle COLON^ exprSingle + | + exprSingle + { + #mapAssignment = #(#[MAP_MERGE, "merge"], #mapAssignment); + } ; arrayConstructor throws XPathException @@ -1525,6 +2215,16 @@ literal STRING_LITERAL^ | numericLiteral ; +qnameLiteral throws XPathException +{ String name = null; } +: + h:HASH! name=eqName + { + #qnameLiteral = #(#[QNAME_LITERAL, name]); + #qnameLiteral.copyLexInfo(#h); + } + ; + numericLiteral : DOUBLE_LITERAL^ | DECIMAL_LITERAL^ | INTEGER_LITERAL^ @@ -1539,7 +2239,7 @@ parenthesizedExpr throws XPathException functionItemExpr throws XPathException : - ( MOD | "function" ) => inlineFunctionExpr + ( MOD | "function" | "fn" ) => inlineOrFocusFunctionExpr | namedFunctionRef ; @@ -1553,24 +2253,36 @@ namedFunctionRef throws XPathException } ; -inlineFunctionExpr throws XPathException +inlineOrFocusFunctionExpr throws XPathException : - ann:annotations! "function"! lp:LPAREN! ( paramList )? - RPAREN! ( returnType )? - functionBody + ann:annotations! ( "function"! | "fn"! ) + ( + (LPAREN) => lp:LPAREN! ( paramList )? + RPAREN! ( returnType )? + functionBody + { + #inlineOrFocusFunctionExpr = #(#[INLINE_FUNCTION_DECL, null], #ann, #inlineOrFocusFunctionExpr); + #inlineOrFocusFunctionExpr.copyLexInfo(#lp); + } + | + lc:LCURLY! ( expr )? RCURLY! + { + #inlineOrFocusFunctionExpr = #(#[FOCUS_FUNCTION, null], #inlineOrFocusFunctionExpr); + #inlineOrFocusFunctionExpr.copyLexInfo(#lc); + } + ) + exception catch [RecognitionException e] { - #inlineFunctionExpr = #(#[INLINE_FUNCTION_DECL, null], null, #inlineFunctionExpr); - #inlineFunctionExpr.copyLexInfo(#lp); + throw new XPathException(e.getLine(), e.getColumn(), ErrorCodes.XPST0003, "Syntax error within inline function: " + e.getMessage()); } - exception catch [RecognitionException e] + ; + +focusFunctionExpr throws XPathException +: + ( "fn"! | "function"! ) lc:LCURLY! ( expr )? RCURLY! { - if (#lp == null) { - throw new XPathException(e.getLine(), e.getColumn(), ErrorCodes.XPST0003, "Syntax error within inline function: " + e.getMessage()); - } else { - #lp.setLine(e.getLine()); - #lp.setColumn(e.getColumn()); - throw new XPathException(#lp, ErrorCodes.XPST0003, "Syntax error within user defined function: " + e.getMessage()); - } + #focusFunctionExpr = #(#[FOCUS_FUNCTION, null], #focusFunctionExpr); + #focusFunctionExpr.copyLexInfo(#lc); } ; @@ -1595,8 +2307,34 @@ argumentList throws XPathException argument throws XPathException : - (QUESTION! ( NCNAME | INTEGER_LITERAL | LPAREN | STAR )) => lookup + (QUESTION ( ncnameOrKeyword | INTEGER_LITERAL | DECIMAL_LITERAL | DOUBLE_LITERAL | STRING_LITERAL | LPAREN | DOLLAR | SELF | HASH | STAR )) => unaryLookup | argumentPlaceholder + | ( { xq4Enabled }? ncnameOrKeyword COLON ( EQ | ncnameOrKeyword COLON EQ ) ) => keywordArgument + | exprSingle + ; + +// XQ4: keyword arguments - name := value, or prefix:name := value +keywordArgument throws XPathException +{ String kwName = null; String prefix = null; String local = null; } +: + // Prefixed keyword: prefix:name := value + ( ( ncnameOrKeyword COLON ncnameOrKeyword COLON EQ ) => + prefix=ncnameOrKeyword! COLON! local=ncnameOrKeyword! COLON! EQ! keywordArgumentValue + { kwName = prefix + ":" + local; } + | + // Simple keyword: name := value + kwName=ncnameOrKeyword! COLON! EQ! keywordArgumentValue + ) + { + #keywordArgument = #(#[KEYWORD_ARG, kwName], #keywordArgument); + } + ; + +// XQ4: keyword argument value can be an expression or argument placeholder (?) +// Use lookahead to distinguish bare ? (placeholder) from ?key (unary lookup) +keywordArgumentValue throws XPathException +: + ( QUESTION ( RPAREN | COMMA ) ) => argumentPlaceholder | exprSingle ; @@ -1606,8 +2344,12 @@ contextItemExpr : SELF ; kindTest : - textTest | anyKindTest | elementTest | attributeTest | - commentTest | namespaceNodeTest | piTest | documentTest + textTest | anyKindTest | gnodeTest | elementTest | attributeTest | + commentTest | namespaceNodeTest | piTest | documentTest | + // === XQuery 4.0 JNode Kind Tests === + jsonNodeTest | jsonObjectTest | jsonArrayTest | jsonStringTest | + jsonNumberTest | jsonBooleanTest | jsonNullTest | jsonMemberTest | + jnodeTest ; textTest @@ -1620,6 +2362,13 @@ anyKindTest "node"^ LPAREN! RPAREN! ; +// XQ4: gnode() is a synonym for node() +gnodeTest +: + "gnode"! LPAREN! RPAREN! + { #gnodeTest = #[LITERAL_node, "node"]; } + ; + elementTest : "element"^ LPAREN! @@ -1684,6 +2433,76 @@ documentTest schemaElementTest : "schema-element"^ LPAREN! eqName RPAREN! ; +// === XQuery 4.0 JNode Kind Tests === + +jsonNodeTest +: + "json-node"! LPAREN! RPAREN! + { #jsonNodeTest = #[JSON_NODE_TEST, "json-node()"]; } + ; + +jsonObjectTest +: + "object-node"! LPAREN! RPAREN! + { #jsonObjectTest = #[JSON_OBJECT_TEST, "object-node()"]; } + ; + +jsonArrayTest +: + "array-node"! LPAREN! RPAREN! + { #jsonArrayTest = #[JSON_ARRAY_TEST, "array-node()"]; } + ; + +jsonStringTest +: + "string-node"! LPAREN! RPAREN! + { #jsonStringTest = #[JSON_STRING_TEST, "string-node()"]; } + ; + +jsonNumberTest +: + "number-node"! LPAREN! RPAREN! + { #jsonNumberTest = #[JSON_NUMBER_TEST, "number-node()"]; } + ; + +jsonBooleanTest +: + "boolean-node"! LPAREN! RPAREN! + { #jsonBooleanTest = #[JSON_BOOLEAN_TEST, "boolean-node()"]; } + ; + +jsonNullTest +: + "null-node"! LPAREN! RPAREN! + { #jsonNullTest = #[JSON_NULL_TEST, "null-node()"]; } + ; + +jsonMemberTest +: + "member-node"! LPAREN! RPAREN! + { #jsonMemberTest = #[JSON_MEMBER_TEST, "member-node()"]; } + ; + +jnodeTest +: + "jnode"! LPAREN! + // Skip balanced parenthesized content (handles nested parens in record(), map(), etc.) + jnodeTestArgs + RPAREN! + { #jnodeTest = #[JSON_NODE_TEST, "json-node()"]; } + ; + +// Helper rule: skip balanced parenthesized content for jnode() arguments +jnodeTestArgs +: + ( options { greedy = true; } : + LPAREN jnodeTestArgs RPAREN + | ~(LPAREN | RPAREN) + )* + ; + +// === End XQuery 4.0 JNode Kind Tests === + qName returns [String name] { name= null; @@ -2062,6 +2881,397 @@ attributeEnclosedExpr throws XPathException } ; +// === Full Text (W3C XQuery and XPath Full Text 3.0) === +// Spec: https://www.w3.org/TR/xpath-full-text-30/ + +ftSelection throws XPathException +: + ftOr + ( + ( "ordered" | "window" | "distance" | "same" | "different" | "entire" | "at" ( "start" | "end" ) ) => + ftPosFilter + )* + { #ftSelection = #(#[FT_SELECTION, "FTSelection"], #ftSelection); } + ; + +ftOr throws XPathException +{ boolean hasOr = false; } +: + ftAnd ( "ftor"! ftAnd { hasOr = true; } )* + { + if (hasOr) + #ftOr = #(#[FT_OR, "ftor"], #ftOr); + } + ; + +ftAnd throws XPathException +{ boolean hasAnd = false; } +: + ftMildNot ( "ftand"! ftMildNot { hasAnd = true; } )* + { + if (hasAnd) + #ftAnd = #(#[FT_AND, "ftand"], #ftAnd); + } + ; + +ftMildNot throws XPathException +{ boolean hasMildNot = false; } +: + ftUnaryNot ( ( "not" "in" ) => "not"! "in"! ftUnaryNot { hasMildNot = true; } )* + { + if (hasMildNot) + #ftMildNot = #(#[FT_MILD_NOT, "not in"], #ftMildNot); + } + ; + +ftUnaryNot throws XPathException +{ boolean negated = false; } +: + ( "ftnot"! { negated = true; } )? ftPrimaryWithOptions + { + if (negated) + #ftUnaryNot = #(#[FT_UNARY_NOT, "ftnot"], #ftUnaryNot); + } + ; + +ftPrimaryWithOptions throws XPathException +{ boolean hasOptions = false; } +: + ftPrimary + ( ( "using" ) => ftMatchOptions { hasOptions = true; } )? + ( ( "weight" LCURLY ) => ftWeight { hasOptions = true; } )? + { + if (hasOptions) + #ftPrimaryWithOptions = #(#[FT_PRIMARY_WITH_OPTIONS, "FTPrimaryWithOptions"], #ftPrimaryWithOptions); + } + ; + +ftPrimary throws XPathException +: + ftWords + | + LPAREN! ftSelection RPAREN! + | + ftExtensionSelection + ; + +// XQFT 3.0 §3.4.8: FTExtensionSelection ::= Pragma+ "{" FTSelection? "}" +// Pragmas are parsed but ignored (no FT-specific pragma support). +// If all pragmas are unrecognized and the body is empty, XQST0079 applies. +ftExtensionSelection throws XPathException +{ boolean hasBody = false; } +: + ( pragma )+ LCURLY! ( ftSelection { hasBody = true; } )? RCURLY! + { + #ftExtensionSelection = #(#[FT_EXTENSION_SELECTION, "FTExtensionSelection"], #ftExtensionSelection); + } + ; + +ftWords throws XPathException +: + ftWordsValue ( ftAnyallOption )? ( ( "occurs" ) => ftTimes )? + { #ftWords = #(#[FT_WORDS, "FTWords"], #ftWords); } + ; + +ftWordsValue throws XPathException +: + STRING_LITERAL + | + LCURLY! expr RCURLY! + ; + +ftAnyallOption +: + ( "any" "word" ) => "any"! "word"! + { #ftAnyallOption = #[FT_ANYALL_OPTION, "any word"]; } + | + "any"! + { #ftAnyallOption = #[FT_ANYALL_OPTION, "any"]; } + | + ( "all" "words" ) => "all"! "words"! + { #ftAnyallOption = #[FT_ANYALL_OPTION, "all words"]; } + | + "all"! + { #ftAnyallOption = #[FT_ANYALL_OPTION, "all"]; } + | + "phrase"! + { #ftAnyallOption = #[FT_ANYALL_OPTION, "phrase"]; } + ; + +ftTimes throws XPathException +: + "occurs"! ftRange "times"! + { #ftTimes = #(#[FT_TIMES, "FTTimes"], #ftTimes); } + ; + +ftRange throws XPathException +: + ( "exactly" ) => "exactly"! additiveExpr + { #ftRange = #(#[FT_RANGE, "exactly"], #ftRange); } + | + ( "at" "least" ) => "at"! "least"! additiveExpr + { #ftRange = #(#[FT_RANGE, "at least"], #ftRange); } + | + ( "at" "most" ) => "at"! "most"! additiveExpr + { #ftRange = #(#[FT_RANGE, "at most"], #ftRange); } + | + "from"! additiveExpr "to"! additiveExpr + { #ftRange = #(#[FT_RANGE, "from"], #ftRange); } + ; + +ftPosFilter throws XPathException +: + ( "ordered" ) => ftOrder + | + ( "window" ) => ftWindow + | + ( "distance" ) => ftDistance + | + ( "same" ) => ftScope + | + ( "different" ) => ftScope + | + ( "at" "start" ) => ftContent + | + ( "at" "end" ) => ftContent + | + ( "entire" ) => ftContent + ; + +ftOrder +: + "ordered"! + { #ftOrder = #[FT_ORDER, "ordered"]; } + ; + +ftWindow throws XPathException +: + "window"! additiveExpr ftUnit + { #ftWindow = #(#[FT_WINDOW, "window"], #ftWindow); } + ; + +ftDistance throws XPathException +: + "distance"! ftRange ftUnit + { #ftDistance = #(#[FT_DISTANCE, "distance"], #ftDistance); } + ; + +ftScope +: + ( "same" "sentence" ) => "same"! "sentence"! + { #ftScope = #[FT_SCOPE, "same sentence"]; } + | + ( "same" "paragraph" ) => "same"! "paragraph"! + { #ftScope = #[FT_SCOPE, "same paragraph"]; } + | + ( "different" "sentence" ) => "different"! "sentence"! + { #ftScope = #[FT_SCOPE, "different sentence"]; } + | + "different"! "paragraph"! + { #ftScope = #[FT_SCOPE, "different paragraph"]; } + ; + +ftContent +: + ( "at" "start" ) => "at"! "start"! + { #ftContent = #[FT_CONTENT, "at start"]; } + | + ( "at" "end" ) => "at"! "end"! + { #ftContent = #[FT_CONTENT, "at end"]; } + | + "entire"! "content"! + { #ftContent = #[FT_CONTENT, "entire content"]; } + ; + +ftUnit +: + "words" | "sentences" | "paragraphs" + ; + +// === Full Text Option Declaration (prolog) === +// XQFT 3.0 §5.2: declare ft-option using + +ftOptionDecl throws XPathException +: + "declare"! "ft-option"! ftMatchOptions + { #ftOptionDecl = #(#[FT_OPTION_DECL, "ft-option"], #ftOptionDecl); } + ; + +// === Full Text Match Options === + +ftMatchOptions throws XPathException +: + ( "using"! ftMatchOption )+ + ; + +ftMatchOption throws XPathException +: + ( "case" ) => ftCaseOption + | + ( "lowercase" ) => ftCaseOption + | + ( "uppercase" ) => ftCaseOption + | + ( "diacritics" ) => ftDiacriticsOption + | + ( "stemming" ) => ftStemOption + | + ( "no" "stemming" ) => ftStemOption + | + ( "thesaurus" ) => ftThesaurusOption + | + ( "no" "thesaurus" ) => ftThesaurusOption + | + ( "stop" ) => ftStopWordOption + | + ( "no" "stop" ) => ftStopWordOption + | + ( "language" ) => ftLanguageOption + | + ( "wildcards" ) => ftWildCardOption + | + ( "no" "wildcards" ) => ftWildCardOption + | + ftExtensionOption + ; + +ftCaseOption +: + ( "case" "insensitive" ) => "case"! "insensitive"! + { #ftCaseOption = #[FT_CASE_OPTION, "insensitive"]; } + | + ( "case" "sensitive" ) => "case"! "sensitive"! + { #ftCaseOption = #[FT_CASE_OPTION, "sensitive"]; } + | + "lowercase"! + { #ftCaseOption = #[FT_CASE_OPTION, "lowercase"]; } + | + "uppercase"! + { #ftCaseOption = #[FT_CASE_OPTION, "uppercase"]; } + ; + +ftDiacriticsOption +: + ( "diacritics" "insensitive" ) => "diacritics"! "insensitive"! + { #ftDiacriticsOption = #[FT_DIACRITICS_OPTION, "insensitive"]; } + | + "diacritics"! "sensitive"! + { #ftDiacriticsOption = #[FT_DIACRITICS_OPTION, "sensitive"]; } + ; + +ftStemOption +: + "stemming"! + { #ftStemOption = #[FT_STEM_OPTION, "stemming"]; } + | + "no"! "stemming"! + { #ftStemOption = #[FT_STEM_OPTION, "no stemming"]; } + ; + +ftThesaurusOption throws XPathException +: + ( "no" "thesaurus" ) => "no"! "thesaurus"! + { #ftThesaurusOption = #[FT_THESAURUS_OPTION, "no thesaurus"]; } + | + ( "thesaurus" LPAREN ) => "thesaurus"! LPAREN! ftThesaurusIDOrDefault ( COMMA! ftThesaurusID )* RPAREN! + { #ftThesaurusOption = #(#[FT_THESAURUS_OPTION, "thesaurus list"], #ftThesaurusOption); } + | + "thesaurus"! ftThesaurusIDOrDefault + { #ftThesaurusOption = #(#[FT_THESAURUS_OPTION, "thesaurus"], #ftThesaurusOption); } + ; + +ftThesaurusIDOrDefault throws XPathException +: + ( "default" ) => "default"! + { #ftThesaurusIDOrDefault = #[FT_THESAURUS_ID, "default"]; } + | + ftThesaurusID + ; + +ftThesaurusID throws XPathException +: + "at"! STRING_LITERAL ( "relationship"! STRING_LITERAL )? ( ftLiteralRange "levels"! )? + { #ftThesaurusID = #(#[FT_THESAURUS_ID, "at"], #ftThesaurusID); } + ; + +ftLiteralRange +: + ( "exactly" ) => "exactly"! INTEGER_LITERAL + { #ftLiteralRange = #(#[FT_RANGE, "exactly"], #ftLiteralRange); } + | + ( "at" "least" ) => "at"! "least"! INTEGER_LITERAL + { #ftLiteralRange = #(#[FT_RANGE, "at least"], #ftLiteralRange); } + | + ( "at" "most" ) => "at"! "most"! INTEGER_LITERAL + { #ftLiteralRange = #(#[FT_RANGE, "at most"], #ftLiteralRange); } + | + "from"! INTEGER_LITERAL "to"! INTEGER_LITERAL + { #ftLiteralRange = #(#[FT_RANGE, "from"], #ftLiteralRange); } + ; + +ftStopWordOption throws XPathException +: + ( "no" "stop" ) => "no"! "stop"! "words"! + { #ftStopWordOption = #[FT_STOP_WORD_OPTION, "no stop words"]; } + | + ( "stop" "words" "default" ) => "stop"! "words"! "default"! ( ftStopWordsInclExcl )* + { #ftStopWordOption = #(#[FT_STOP_WORD_OPTION, "stop words default"], #ftStopWordOption); } + | + "stop"! "words"! ftStopWords ( ftStopWordsInclExcl )* + { #ftStopWordOption = #(#[FT_STOP_WORD_OPTION, "stop words"], #ftStopWordOption); } + ; + +ftStopWords +: + ( "at" ) => "at"! STRING_LITERAL + { #ftStopWords = #(#[FT_STOP_WORDS, "at"], #ftStopWords); } + | + LPAREN! STRING_LITERAL ( COMMA! STRING_LITERAL )* RPAREN! + { #ftStopWords = #(#[FT_STOP_WORDS, "list"], #ftStopWords); } + ; + +ftStopWordsInclExcl +: + "union"! ftStopWords + | + "except"! ftStopWords + { #ftStopWordsInclExcl = #(#[FT_STOP_WORDS_EXCEPT, "except"], #ftStopWordsInclExcl); } + ; + +ftLanguageOption +: + "language"! STRING_LITERAL + { #ftLanguageOption = #(#[FT_LANGUAGE_OPTION, "language"], #ftLanguageOption); } + ; + +ftWildCardOption +: + "wildcards"! + { #ftWildCardOption = #[FT_WILDCARD_OPTION, "wildcards"]; } + | + "no"! "wildcards"! + { #ftWildCardOption = #[FT_WILDCARD_OPTION, "no wildcards"]; } + ; + +ftExtensionOption throws XPathException +{ String name; } +: + "option"! name=eqName STRING_LITERAL + { #ftExtensionOption = #(#[FT_EXTENSION_OPTION, name], #ftExtensionOption); } + ; + +ftWeight throws XPathException +: + "weight"! LCURLY! expr RCURLY! + { #ftWeight = #(#[FT_WEIGHT, "weight"], #ftWeight); } + ; + +ftIgnoreOption throws XPathException +: + "without"! "content"! unionExpr + { #ftIgnoreOption = #(#[FT_IGNORE_OPTION, "without content"], #ftIgnoreOption); } + ; + /* All of the literals used in this grammar can also be * part of a valid QName. We thus have to test for each * of them below. @@ -2074,8 +3284,23 @@ ncnameOrKeyword returns [String name] name=reservedKeywords ; +/** + * Top-level dispatcher for reserved keywords usable as NCNames. + * Split into feature-area sub-rules to reduce merge conflicts on the + * next integration branch. Each feature branch owns its sub-rule; + * merging adds a single alternative here instead of interleaving 80+ lines. + */ reservedKeywords returns [String name] { name= null; } +: + name=coreReservedKeywords + | + name=xq4Keywords + ; + +// ---- Core reserved keywords (XQuery 3.1 + eXist-db extensions) ---- +coreReservedKeywords returns [String name] +{ name= null; } : "element" { name = "element"; } | @@ -2117,14 +3342,30 @@ reservedKeywords returns [String name] | "ancestor-or-self" { name= "ancestor-or-self"; } | + "preceding-sibling-or-self" { name= "preceding-sibling-or-self"; } + | "preceding-sibling" { name= "preceding-sibling"; } | + "following-sibling-or-self" { name= "following-sibling-or-self"; } + | "following-sibling" { name= "following-sibling"; } | + "following-or-self" { name = "following-or-self"; } + | "following" { name = "following"; } | + "preceding-or-self" { name = "preceding-or-self"; } + | "preceding" { name = "preceding"; } | + "following-or-self" { name = "following-or-self"; } + | + "preceding-or-self" { name = "preceding-or-self"; } + | + "following-sibling-or-self" { name = "following-sibling-or-self"; } + | + "preceding-sibling-or-self" { name = "preceding-sibling-or-self"; } + | "item" { name= "item"; } | "empty" { name= "empty"; } @@ -2137,8 +3378,8 @@ reservedKeywords returns [String name] | "namespace-node" { name= "namespace-node"; } | - "namespace" { name= "namespace"; } - | + "namespace" { name= "namespace"; } + | "if" { name= "if"; } | "then" { name= "then"; } @@ -2177,8 +3418,8 @@ reservedKeywords returns [String name] | "by" { name = "by"; } | - "group" { name = "group"; } - | + "group" { name = "group"; } + | "some" { name = "some"; } | "every" { name = "every"; } @@ -2229,8 +3470,12 @@ reservedKeywords returns [String name] | "base-uri" { name = "base-uri"; } | + // Legacy update keyword (DEPRECATED - only "update" is legacy-only; + // the others below are shared with W3C XQUF 3.0). + // To remove: delete "update" and keep the rest. "update" { name = "update"; } | + // Shared by legacy update and W3C XQUF 3.0 "replace" { name = "replace"; } | "delete" { name = "delete"; } @@ -2289,7 +3534,7 @@ reservedKeywords returns [String name] | "tumbling" { name = "tumbling"; } | - "sliding" { name = "sliding"; } + "sliding" { name = "sliding"; } | "window" { name = "window"; } | @@ -2304,6 +3549,180 @@ reservedKeywords returns [String name] "next" { name = "next"; } | "when" { name = "when"; } + | + // W3C XQuery Update Facility 3.0 keywords + "copy" { name = "copy"; } + | + "modify" { name = "modify"; } + | + "nodes" { name = "nodes"; } + | + "before" { name = "before"; } + | + "after" { name = "after"; } + | + "first" { name = "first"; } + | + "last" { name = "last"; } + | + "updating" { name = "updating"; } + | + "ascending" { name = "ascending"; } + | + "descending" { name = "descending"; } + | + "greatest" { name = "greatest"; } + | + "least" { name = "least"; } + | + "satisfies" { name = "satisfies"; } + | + "schema-attribute" { name = "schema-attribute"; } + | + "revalidation" { name = "revalidation"; } + | + "skip" { name = "skip"; } + | + "strict" { name = "strict"; } + | + "lax" { name = "lax"; } + | + "castable" { name = "castable"; } + | + "idiv" { name = "idiv"; } + | + "processing-instruction" { name = "processing-instruction"; } + | + // Full Text keywords + "contains" { name = "contains"; } + | + "score" { name = "score"; } + | + "content" { name = "content"; } + | + "ftor" { name = "ftor"; } + | + "ftand" { name = "ftand"; } + | + "ftnot" { name = "ftnot"; } + | + "stemming" { name = "stemming"; } + | + "thesaurus" { name = "thesaurus"; } + | + "diacritics" { name = "diacritics"; } + | + "sensitive" { name = "sensitive"; } + | + "insensitive" { name = "insensitive"; } + | + "language" { name = "language"; } + | + "wildcards" { name = "wildcards"; } + | + "lowercase" { name = "lowercase"; } + | + "uppercase" { name = "uppercase"; } + | + "distance" { name = "distance"; } + | + "entire" { name = "entire"; } + | + "words" { name = "words"; } + | + "sentences" { name = "sentences"; } + | + "paragraphs" { name = "paragraphs"; } + | + "sentence" { name = "sentence"; } + | + "paragraph" { name = "paragraph"; } + | + "occurs" { name = "occurs"; } + | + "times" { name = "times"; } + | + "weight" { name = "weight"; } + | + "without" { name = "without"; } + | + "same" { name = "same"; } + | + "different" { name = "different"; } + | + "relationship" { name = "relationship"; } + | + "levels" { name = "levels"; } + | + "stop" { name = "stop"; } + | + "most" { name = "most"; } + | + "exactly" { name = "exactly"; } + | + "no" { name = "no"; } + | + "not" { name = "not"; } + | + "all" { name = "all"; } + | + "any" { name = "any"; } + | + "word" { name = "word"; } + | + "phrase" { name = "phrase"; } + | + "using" { name = "using"; } + | + "from" { name = "from"; } + | + "allowing" { name = "allowing"; } + | + // Decimal format property keywords + "decimal-format" { name = "decimal-format"; } + | + "decimal-separator" { name = "decimal-separator"; } + | + "grouping-separator" { name = "grouping-separator"; } + | + "infinity" { name = "infinity"; } + | + "minus-sign" { name = "minus-sign"; } + | + "NaN" { name = "NaN"; } + | + "percent" { name = "percent"; } + | + "per-mille" { name = "per-mille"; } + | + "zero-digit" { name = "zero-digit"; } + | + "digit" { name = "digit"; } + | + "pattern-separator" { name = "pattern-separator"; } + | + "exponent-separator" { name = "exponent-separator"; } + ; + +// ---- XQuery 4.0 keywords (feature/xquery-4.0-parser) ---- +xq4Keywords returns [String name] +{ name= null; } +: + "fn" { name = "fn"; } + | + "member" { name = "member"; } + | + "otherwise" { name = "otherwise"; } + | + "key" { name = "key"; } + | + "while" { name = "while"; } + | + "finally" { name = "finally"; } + | + "record" { name = "record"; } + | + "gnode" { name = "gnode"; } ; @@ -2324,6 +3743,9 @@ options { protected boolean wsExplicit= false; protected boolean parseStringLiterals= true; protected boolean inStringConstructor = false; + protected boolean inStringTemplate = false; + protected int stringTemplateDepth = 0; + protected int stringConstructorInterpolationDepth = 0; protected boolean inElementContent= false; protected boolean inAttributeContent= false; protected boolean inFunctionBody= false; @@ -2352,11 +3774,35 @@ options { newline(); } } + + /** + * Disambiguate (# as pragma vs ( + #QName literal. + * Scans past (# and the QName. Returns true (pragma) if the QName + * is followed by whitespace or #). Returns false (QName literal) + * if followed by , or ). + */ + private boolean isPragmaContext() throws CharStreamException { + // LA(1)='(' LA(2)='#' -- start scanning from LA(3) + int i = 3; + // Skip the QName (letters, digits, -, ., _, :) + while (Character.isLetterOrDigit(LA(i)) || LA(i) == '-' || LA(i) == '.' || LA(i) == '_' || LA(i) == ':') { + i++; + } + char afterQName = LA(i); + // If followed by , or ) it's a QName literal argument + if (afterQName == ',' || afterQName == ')') { + return false; + } + // Otherwise it's a pragma (whitespace, #), or other pragma content) + return true; + } } protected SLASH options { paraphrase="single slash '/'"; }: '/' ; protected DSLASH options { paraphrase="double slash '//'"; }: '/' '/' ; protected BANG : '!' ; +protected DOUBLE_BANG options { paraphrase="double bang '!!'"; }: '!' '!' ; +protected DOUBLE_QUESTION options { paraphrase="double question '??'"; }: '?' '?' ; protected MOD : '%' ; protected COLON : ':' ; protected COMMA : ',' ; @@ -2374,7 +3820,10 @@ protected SELF options { paraphrase="."; }: '.' ; protected PARENT options { paraphrase=".."; }: ".." ; protected UNION options { paraphrase="union"; }: '|' ; protected CONCAT options { paraphrase="||"; }: '|' '|'; +protected METHOD_CALL_OP options { paraphrase="method call operator"; }: '=' '?' '>'; +protected MAPPING_ARROW_OP options { paraphrase="mapping arrow operator"; }: '=' '!' '>'; protected ARROW_OP options { paraphrase="arrow operator"; }: '=' '>'; +protected PIPELINE_OP options { paraphrase="pipeline operator"; }: '-' '>'; protected AT options { paraphrase="@ char"; }: '@' ; protected DOLLAR options { paraphrase="dollar sign '$'"; }: '$' ; protected EQ options { paraphrase="="; }: '=' ; @@ -2408,12 +3857,17 @@ protected LETTER protected DIGITS : - ( DIGIT )+ + ( DIGIT )+ ( '_' ( DIGIT )+ )* ; protected HEX_DIGITS : - ( '0'..'9' | 'a'..'f' | 'A'..'F' )+ + ( '0'..'9' | 'a'..'f' | 'A'..'F' )+ ( '_' ( '0'..'9' | 'a'..'f' | 'A'..'F' )+ )* + ; + +protected BINARY_DIGITS +: + ( '0' | '1' )+ ( '_' ( '0' | '1' )+ )* ; protected NCNAME @@ -2470,16 +3924,26 @@ protected INTEGER_LITERAL { !(inElementContent || inAttributeContent) }? DIGITS ; +protected HEX_INTEGER_LITERAL +: + { !(inElementContent || inAttributeContent) }? '0' ('x' | 'X') HEX_DIGITS + ; + +protected BINARY_INTEGER_LITERAL +: + { !(inElementContent || inAttributeContent) }? '0' ('b' | 'B') BINARY_DIGITS + ; + protected DOUBLE_LITERAL : { !(inElementContent || inAttributeContent) }? - ( ( '.' DIGITS ) | ( DIGITS ( '.' ( DIGIT )* )? ) ) ( 'e' | 'E' ) ( '+' | '-' )? DIGITS + ( ( '.' DIGITS ) | ( DIGITS ( '.' ( DIGITS )? )? ) ) ( 'e' | 'E' ) ( '+' | '-' )? DIGITS ; protected DECIMAL_LITERAL : { !(inElementContent || inAttributeContent) }? - ( '.' DIGITS ) | ( DIGITS ( '.' ( DIGIT )* )? ) + ( '.' DIGITS ) | ( DIGITS ( '.' ( DIGITS )? )? ) ; protected PREDEFINED_ENTITY_REF @@ -2520,7 +3984,6 @@ options { : ( ( '\n' ) => '\n' { newline(); } | - ( '&' ) => ( PREDEFINED_ENTITY_REF | CHAR_REF ) | ( ( ']' '`' ) ~ ( '`' ) ) => ( ']' '`' ) | ( ']' ~ ( '`' ) ) => ']' | ( '`' ~ ( '{') ) => '`' | @@ -2528,6 +3991,21 @@ options { )+ ; +protected STRING_TEMPLATE_START options { paraphrase="start of string template"; }: '`'; +protected STRING_TEMPLATE_END options { paraphrase="end of string template"; }: '`'; + +protected STRING_TEMPLATE_CONTENT +options { + testLiterals = false; + paraphrase = "string template content"; +} +: + ( + '\n' { newline(); } | + ~ ( '\n' | '{' | '}' | '`') + )+ + ; + protected BRACED_URI_LITERAL options { paraphrase="braced uri literal"; @@ -2641,6 +4119,46 @@ options { testLiterals = false; } : + { inStringTemplate }? + ( '`' '`' ) => '`' '`' { + $setType(STRING_TEMPLATE_CONTENT); + } + | + { inStringTemplate }? + ( '{' '{' ) => '{' '{' { + $setType(STRING_TEMPLATE_CONTENT); + } + | + { inStringTemplate }? + ( '}' '}' ) => '}' '}' { + $setType(STRING_TEMPLATE_CONTENT); + } + | + { inStringTemplate }? + STRING_TEMPLATE_END { + $setType(STRING_TEMPLATE_END); + } + | + { inStringTemplate }? + LCURLY { + $setType(LCURLY); + } + | + { inStringTemplate }? + STRING_TEMPLATE_CONTENT { + $setType(STRING_TEMPLATE_CONTENT); + } + | + { !inStringConstructor && !inStringTemplate }? + ( '`' '`' '[' ) => STRING_CONSTRUCTOR_START { + $setType(STRING_CONSTRUCTOR_START); + } + | + { !inStringConstructor && !inStringTemplate }? + STRING_TEMPLATE_START { + $setType(STRING_TEMPLATE_START); + } + | { !inStringConstructor }? STRING_CONSTRUCTOR_START { $setType(STRING_CONSTRUCTOR_START); @@ -2656,7 +4174,7 @@ options { $setType(STRING_CONSTRUCTOR_INTERPOLATION_START); } | - { !inStringConstructor }? + { !inStringConstructor && stringTemplateDepth == 0 && stringConstructorInterpolationDepth > 0 }? STRING_CONSTRUCTOR_INTERPOLATION_END { $setType(STRING_CONSTRUCTOR_INTERPOLATION_END); } @@ -2777,7 +4295,7 @@ options { ( NAME_START_CHAR ) => ncname:NCNAME { $setType(ncname.getType()); } | - { parseStringLiterals && !inElementContent && !inStringConstructor }? + { parseStringLiterals && !inElementContent && !inStringConstructor && !inStringTemplate }? STRING_LITERAL { $setType(STRING_LITERAL); } | BRACED_URI_LITERAL { $setType(BRACED_URI_LITERAL); } @@ -2801,7 +4319,15 @@ options { ( '.' ) => SELF { $setType(SELF); } | - ( INTEGER_LITERAL ( '.' ( INTEGER_LITERAL )? )? ( 'e' | 'E' ) ) + // XQ4: hex integer literals (0xFF, 0xCAFE_BABE) + ( '0' ('x' | 'X') ) + => HEX_INTEGER_LITERAL { $setType(INTEGER_LITERAL); } + | + // XQ4: binary integer literals (0b1010, 0b1111_0000) + ( '0' ('b' | 'B') ) + => BINARY_INTEGER_LITERAL { $setType(INTEGER_LITERAL); } + | + ( INTEGER_LITERAL ( '.' ( DIGITS )? )? ( 'e' | 'E' ) ) => DOUBLE_LITERAL { $setType(DOUBLE_LITERAL); } | @@ -2816,6 +4342,8 @@ options { { !(inAttributeContent || inElementContent) }? DSLASH { $setType(DSLASH); } | + ( DOUBLE_BANG ) => DOUBLE_BANG { $setType(DOUBLE_BANG); } + | BANG { $setType(BANG); } | COLON { $setType(COLON); } @@ -2828,10 +4356,17 @@ options { | STAR { $setType(STAR); } | + // XQ4: Unicode multiplication sign (U+00D7) as alternative to * + '\u00D7' { $setType(STAR); } + | + ( DOUBLE_QUESTION ) => DOUBLE_QUESTION { $setType(DOUBLE_QUESTION); } + | QUESTION { $setType(QUESTION); } | PLUS { $setType(PLUS); } | + ( PIPELINE_OP ) => PIPELINE_OP { $setType(PIPELINE_OP); } + | MINUS { $setType(MINUS); } | LPPAREN { $setType(LPPAREN); } @@ -2846,6 +4381,10 @@ options { | DOLLAR { $setType(DOLLAR); } | + ( METHOD_CALL_OP ) => METHOD_CALL_OP { $setType(METHOD_CALL_OP); } + | + ( MAPPING_ARROW_OP ) => MAPPING_ARROW_OP { $setType(MAPPING_ARROW_OP); } + | ARROW_OP { $setType(ARROW_OP); } | EQ { $setType(EQ); } @@ -2863,6 +4402,7 @@ options { | XML_CDATA_END { $setType(XML_CDATA_END); } | + { LA(1) == '(' && LA(2) == '#' && isPragmaContext() }? PRAGMA_START { $setType(PRAGMA_START); diff --git a/exist-core/src/main/antlr/org/exist/xquery/parser/XQueryTree.g b/exist-core/src/main/antlr/org/exist/xquery/parser/XQueryTree.g index 3109d357158..5371c6d83b7 100644 --- a/exist-core/src/main/antlr/org/exist/xquery/parser/XQueryTree.g +++ b/exist-core/src/main/antlr/org/exist/xquery/parser/XQueryTree.g @@ -53,9 +53,11 @@ header { import org.exist.xquery.value.*; import org.exist.xquery.functions.fn.*; import org.exist.xquery.update.*; + import org.exist.xquery.xquf.*; import org.exist.storage.ElementValue; import org.exist.xquery.functions.map.MapExpr; import org.exist.xquery.functions.array.ArrayConstructor; + import org.exist.xquery.ft.*; import static org.apache.commons.lang3.ArrayUtils.isNotEmpty; } @@ -131,6 +133,8 @@ options { QName varName; SequenceType sequenceType= null; QName posVar = null; + QName scoreVar = null; + boolean isScoreBinding = false; Expression inputSequence; Expression action; FLWORClause.ClauseType type = FLWORClause.ClauseType.FOR; @@ -139,6 +143,11 @@ options { List windowConditions = null; WindowExpr.WindowType windowType = null; boolean allowEmpty = false; + QName valueVarName = null; + SequenceType valueSequenceType = null; + // XQ4 destructuring + List destructureVarNames = null; + List destructureVarTypes = null; } /** @@ -148,7 +157,7 @@ options { String ns = qname.getNamespaceURI(); if (ns.equals(Namespaces.XPATH_FUNCTIONS_NS)) { String ln = qname.getLocalPart(); - return ("private".equals(ln) || "public".equals(ln)); + return ("private".equals(ln) || "public".equals(ln) || "updating".equals(ln)); } else { return !(ns.equals(Namespaces.XML_NS) || ns.equals(Namespaces.SCHEMA_NS) @@ -158,6 +167,61 @@ options { } } + /** + * Check for duplicate or conflicting %public/%private annotations. + * @param annots the parsed annotation list + * @param errorCode XQST0106 for functions, XQST0116 for variables + * @param declType "function" or "variable" for error messages + * @param ast the AST node for error location reporting + */ + /** The XQuery annotation namespace (http://www.w3.org/2012/xquery) */ + private static final String XQUERY_ANNOTATION_NS = "http://www.w3.org/2012/xquery"; + + /** Check if a QName refers to a %public or %private visibility annotation */ + private static boolean isVisibilityAnnotation(QName qn) { + String ns = qn.getNamespaceURI(); + String ln = qn.getLocalPart(); + return ("public".equals(ln) || "private".equals(ln)) + && (Namespaces.XPATH_FUNCTIONS_NS.equals(ns) || XQUERY_ANNOTATION_NS.equals(ns)); + } + + private static void checkVisibilityAnnotations(List annots, ErrorCodes.ErrorCode errorCode, String declType, XQueryAST ast) + throws XPathException { + int publicCount = 0; + int privateCount = 0; + for (int i = 0; i < annots.size(); i++) { + List la = (List) annots.get(i); + QName qn = (QName) la.get(0); + if (isVisibilityAnnotation(qn)) { + if ("public".equals(qn.getLocalPart())) { + publicCount++; + } else if ("private".equals(qn.getLocalPart())) { + privateCount++; + } + } + } + if (publicCount + privateCount > 1) { + throw new XPathException(ast, errorCode, + "A " + declType + " declaration must not contain more than one " + + "%public or %private annotation, and must not contain both."); + } + } + + /** + * Check if any annotation in the list is %private. + */ + private static boolean hasPrivateAnnotation(List annots) { + for (int i = 0; i < annots.size(); i++) { + List la = (List) annots.get(i); + QName qn = (QName) la.get(0); + if (isVisibilityAnnotation(qn) + && "private".equals(qn.getLocalPart())) { + return true; + } + } + return false; + } + private static void processAnnotations(List annots, FunctionSignature signature) { Annotation[] anns = new Annotation[annots.size()]; @@ -185,6 +249,15 @@ options { //set the Annotations on the Function Signature signature.setAnnotations(anns); + + // W3C XQuery Update Facility 3.0: %updating annotation + for (Annotation a : anns) { + if ("updating".equals(a.getName().getLocalPart()) + && Namespaces.XPATH_FUNCTIONS_NS.equals(a.getName().getNamespaceURI())) { + signature.setUpdating(true); + break; + } + } } private static void processParams(List varList, UserDefinedFunction func, FunctionSignature signature) @@ -210,6 +283,122 @@ options { return variableName; } } + + private static String dfRequireSingleChar(final AST node, final String propName, final String value) throws XPathException { + if (value.codePointCount(0, value.length()) != 1) { + throw new XPathException(node.getLine(), node.getColumn(), ErrorCodes.XQST0098, + "The value of decimal-format property '" + propName + "' must be a single character, but got: \"" + value + "\""); + } + return value; + } + + private static void dfValidateZeroDigit(final AST node, final String value) throws XPathException { + final int cp = value.codePointAt(0); + if (Character.getType(cp) != Character.DECIMAL_DIGIT_NUMBER || Character.getNumericValue(cp) != 0) { + throw new XPathException(node.getLine(), node.getColumn(), ErrorCodes.XQST0098, + "The value of decimal-format property 'zero-digit' must be a Unicode digit with numeric value zero, but got: \"" + value + "\""); + } + } + + private static void dfValidateDistinctPictureChars(final AST node, final DecimalFormat df) throws XPathException { + // The 8 single-character picture-string properties must all have distinct values + final int[] chars = { df.decimalSeparator, df.groupingSeparator, df.percent, df.perMille, + df.zeroDigit, df.digit, df.patternSeparator, df.exponentSeparator }; + final String[] names = { "decimal-separator", "grouping-separator", "percent", "per-mille", + "zero-digit", "digit", "pattern-separator", "exponent-separator" }; + for (int i = 0; i < chars.length; i++) { + for (int j = i + 1; j < chars.length; j++) { + if (chars[i] == chars[j]) { + throw new XPathException(node.getLine(), node.getColumn(), ErrorCodes.XQST0098, + "Decimal-format properties '" + names[i] + "' and '" + names[j] + + "' must have distinct values, but both are: '" + new String(Character.toChars(chars[i])) + "'"); + } + } + } + } + + private DecimalFormat processDecimalFormatProperties(final AST parentNode) throws XPathException { + // Start with UNNAMED defaults + int decimalSeparator = DecimalFormat.UNNAMED.decimalSeparator; + int exponentSeparator = DecimalFormat.UNNAMED.exponentSeparator; + int groupingSeparator = DecimalFormat.UNNAMED.groupingSeparator; + int percent = DecimalFormat.UNNAMED.percent; + int perMille = DecimalFormat.UNNAMED.perMille; + int zeroDigit = DecimalFormat.UNNAMED.zeroDigit; + int digit = DecimalFormat.UNNAMED.digit; + int patternSeparator = DecimalFormat.UNNAMED.patternSeparator; + String infinity = DecimalFormat.UNNAMED.infinity; + String nan = DecimalFormat.UNNAMED.NaN; + int minusSign = DecimalFormat.UNNAMED.minusSign; + + AST child = parentNode.getFirstChild(); + while (child != null) { + final String propName = child.getText(); + final AST valueNode = child.getFirstChild(); + if (valueNode == null) { + child = child.getNextSibling(); + continue; + } + final String value = valueNode.getText(); + + switch (propName) { + case "decimal-separator": + dfRequireSingleChar(child, propName, value); + decimalSeparator = value.codePointAt(0); + break; + case "grouping-separator": + dfRequireSingleChar(child, propName, value); + groupingSeparator = value.codePointAt(0); + break; + case "infinity": + infinity = value; + break; + case "minus-sign": + dfRequireSingleChar(child, propName, value); + minusSign = value.codePointAt(0); + break; + case "NaN": + nan = value; + break; + case "percent": + dfRequireSingleChar(child, propName, value); + percent = value.codePointAt(0); + break; + case "per-mille": + dfRequireSingleChar(child, propName, value); + perMille = value.codePointAt(0); + break; + case "zero-digit": + dfRequireSingleChar(child, propName, value); + dfValidateZeroDigit(child, value); + zeroDigit = value.codePointAt(0); + break; + case "digit": + dfRequireSingleChar(child, propName, value); + digit = value.codePointAt(0); + break; + case "pattern-separator": + dfRequireSingleChar(child, propName, value); + patternSeparator = value.codePointAt(0); + break; + case "exponent-separator": + dfRequireSingleChar(child, propName, value); + exponentSeparator = value.codePointAt(0); + break; + default: + break; + } + child = child.getNextSibling(); + } + + final DecimalFormat df = new DecimalFormat( + decimalSeparator, exponentSeparator, groupingSeparator, + percent, perMille, zeroDigit, digit, + patternSeparator, infinity, nan, minusSign + ); + dfValidateDistinctPictureChars(parentNode, df); + return df; + } } xpointer [PathExpr path] @@ -267,14 +456,24 @@ throws PermissionDeniedException, EXistException, XPathException v:VERSION_DECL { final String version = v.getText(); - if (version.equals("3.1")) { + if (version.equals("4.0")) { + if (!"true".equals(System.getProperty("exist.xquery4.enabled", "true"))) { + throw new XPathException(v, ErrorCodes.XPST0003, + "XQuery 4.0 is not enabled. Set system property exist.xquery4.enabled=true to enable."); + } + context.setXQueryVersion(40); + staticContext.setXQueryVersion(40); + } else if (version.equals("3.1")) { context.setXQueryVersion(31); + staticContext.setXQueryVersion(31); } else if (version.equals("3.0")) { context.setXQueryVersion(30); + staticContext.setXQueryVersion(30); } else if (version.equals("1.0")) { context.setXQueryVersion(10); + staticContext.setXQueryVersion(10); } else { - throw new XPathException(v, ErrorCodes.XQST0031, "Wrong XQuery version: require 1.0, 3.0 or 3.1"); + throw new XPathException(v, ErrorCodes.XQST0031, "Wrong XQuery version: require 1.0, 3.0, 3.1, or 4.0"); } } ( enc:STRING_LITERAL )? @@ -337,6 +536,8 @@ throws PermissionDeniedException, EXistException, XPathException boolean baseuri = false; boolean ordering = false; boolean construction = false; + Set declaredDecimalFormats = new HashSet(); + boolean defaultDecimalFormatDeclared = false; }: ( @@ -459,11 +660,22 @@ throws PermissionDeniedException, EXistException, XPathException } ) | + // === W3C XQuery Update Facility 3.0 - Revalidation Declaration === + #( + "revalidation" ( "strict" | "lax" | "skip" ) + { + // eXist does not support schema revalidation; declaration is accepted and ignored + } + ) + | #( DEF_NAMESPACE_DECL defu:STRING_LITERAL - { // Use setDefaultElementNamespace() + { + // Check for duplicate default element namespace first (XQST0066) + context.setDefaultElementNamespace(defu.getText(), null); + staticContext.setDefaultElementNamespace(defu.getText(), null); context.declareNamespace("", defu.getText()); - staticContext.declareNamespace("",defu.getText()); + staticContext.declareNamespace("", defu.getText()); } ) | @@ -510,8 +722,12 @@ throws PermissionDeniedException, EXistException, XPathException } declaredGlobalVars.add(qn); } - { List annots = new ArrayList(); } + { List annots = new ArrayList(); boolean varIsPrivate = false; } (annotations [annots] + { + checkVisibilityAnnotations(annots, ErrorCodes.XQST0116, "variable", qname); + varIsPrivate = hasPrivateAnnotation(annots); + } )? ( #( @@ -525,6 +741,7 @@ throws PermissionDeniedException, EXistException, XPathException { final VariableDeclaration decl= new VariableDeclaration(context, qn, enclosed); decl.setSequenceType(type); + decl.setPrivate(varIsPrivate); decl.setASTNode(e); path.add(decl); if(myModule != null) { @@ -554,6 +771,7 @@ throws PermissionDeniedException, EXistException, XPathException final VariableDeclaration decl = new VariableDeclaration(context, qn, defaultValue); decl.setSequenceType(type); + decl.setPrivate(varIsPrivate); decl.setASTNode(ext); if (external == null) { path.add(decl); @@ -632,6 +850,51 @@ throws PermissionDeniedException, EXistException, XPathException ) ) | + // XQFT 3.0 §5.2: declare ft-option using + #( + FT_OPTION_DECL + { + FTMatchOptions ftDefaultOpts = new FTMatchOptions(); + } + ftDefaultOpts=ftMatchOptionsExpr + { + if (ftDefaultOpts.hasConflict()) { + throw new XPathException(ErrorCodes.FTST0019, + ftDefaultOpts.getConflictDescription()); + } + context.setDefaultFTMatchOptions(ftDefaultOpts); + } + ) + | + #( + dfDecl:DECIMAL_FORMAT_DECL (.)* + { + final QName dfQName; + try { + dfQName = QName.parse(staticContext, dfDecl.getText(), null); + } catch (final IllegalQNameException iqe) { + throw new XPathException(dfDecl.getLine(), dfDecl.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix in decimal format name: " + dfDecl.getText()); + } + final String dfKey = dfQName.getNamespaceURI() + ":" + dfQName.getLocalPart(); + if (declaredDecimalFormats.contains(dfKey)) + throw new XPathException(dfDecl, ErrorCodes.XQST0097, "Duplicate decimal format declaration: " + dfDecl.getText()); + declaredDecimalFormats.add(dfKey); + final DecimalFormat df = processDecimalFormatProperties(dfDecl); + context.setStaticDecimalFormat(dfQName, df); + } + ) + | + #( + defDfDecl:DEF_DECIMAL_FORMAT_DECL (.)* + { + if (defaultDecimalFormatDeclared) + throw new XPathException(defDfDecl, ErrorCodes.XQST0097, "Duplicate default decimal format declaration."); + defaultDecimalFormatDeclared = true; + final DecimalFormat df = processDecimalFormatProperties(defDfDecl); + context.setDefaultStaticDecimalFormat(df); + } + ) + | functionDecl [path] | importDecl [path] @@ -828,12 +1091,21 @@ throws PermissionDeniedException, EXistException, XPathException { QName qn= null; try { - qn = QName.parse(staticContext, name.getText(), staticContext.getDefaultFunctionNamespace()); + // XQ4 (PR2200): unprefixed function declarations go into "no namespace" + // instead of the default function namespace (fn:) + if (name.getText() != null && !name.getText().contains(":") && staticContext.getXQueryVersion() >= 40) { + qn = new QName(name.getText(), ""); + } else { + qn = QName.parse(staticContext, name.getText(), staticContext.getDefaultFunctionNamespace()); + } } catch (final IllegalQNameException iqe) { throw new XPathException(name.getLine(), name.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + name.getText()); } FunctionSignature signature= new FunctionSignature(qn); signature.setDescription(name.getDoc()); + if (name instanceof XQueryFunctionAST && ((XQueryFunctionAST) name).isUpdating()) { + signature.setUpdating(true); + } UserDefinedFunction func= new UserDefinedFunction(context, signature); func.setASTNode(name); List varList= new ArrayList(3); @@ -841,6 +1113,7 @@ throws PermissionDeniedException, EXistException, XPathException { List annots = new ArrayList(); } (annotations [annots] { + checkVisibilityAnnotations(annots, ErrorCodes.XQST0106, "function", name); processAnnotations(annots, signature); } )? @@ -859,7 +1132,14 @@ throws PermissionDeniedException, EXistException, XPathException "as" { SequenceType type= new SequenceType(); } sequenceType [type] - { signature.setReturnType(type); } + { + signature.setReturnType(type); + // XUST0028: updating functions must not declare a return type + if (signature.isUpdating()) { + throw new XPathException(name.getLine(), name.getColumn(), + ErrorCodes.XUST0028, "An updating function must not declare a return type."); + } + } ) )? ( @@ -903,6 +1183,7 @@ throws PermissionDeniedException, EXistException, XPathException ( annotations [annots] { + checkVisibilityAnnotations(annots, ErrorCodes.XQST0106, "function", name); processAnnotations(annots, signature); } )? @@ -930,11 +1211,46 @@ throws PermissionDeniedException, EXistException, XPathException ) ; +focusFunctionDecl [PathExpr path] +returns [Expression step] +throws PermissionDeniedException, EXistException, XPathException +{ step = null; }: + #( + ff:FOCUS_FUNCTION + { + if (staticContext.getXQueryVersion() < 40) { + throw new XPathException(ff, ErrorCodes.XPST0003, + "Focus functions require xquery version \"4.0\""); + } + PathExpr body = new PathExpr(context); + body.setASTNode(focusFunctionDecl_AST_in); + + // Create a function with a single implicit parameter + FunctionSignature signature = new FunctionSignature(InlineFunction.INLINE_FUNCTION_QNAME); + UserDefinedFunction func = new UserDefinedFunction(context, signature); + func.setASTNode(ff); + + // Add the implicit focus parameter: $(.focus) as item()* + FunctionParameterSequenceType focusParam = new FunctionParameterSequenceType( + FocusFunction.FOCUS_PARAM_NAME, Type.ITEM, Cardinality.ZERO_OR_MORE, + "implicit focus parameter"); + signature.setArgumentTypes(new SequenceType[] { focusParam }); + signature.setReturnType(new SequenceType(Type.ITEM, Cardinality.ZERO_OR_MORE)); + func.addVariable(FocusFunction.FOCUS_PARAM_NAME); + } + ( expr [body] )? + { + func.setFunctionBody(body); + step = new FocusFunction(context, func); + } + ) + ; + /** * Parse params in function declaration. */ paramList [List vars] -throws XPathException +throws PermissionDeniedException, EXistException, XPathException : param [vars] ( param [vars] )* ; @@ -943,7 +1259,7 @@ throws XPathException * Single function param. */ param [List vars] -throws XPathException +throws PermissionDeniedException, EXistException, XPathException : #( varname:VARIABLE_BINDING @@ -959,6 +1275,22 @@ throws XPathException sequenceType [var] ) )? + ( + #( + pd:PARAM_DEFAULT + { + if (staticContext.getXQueryVersion() < 40) { + throw new XPathException(pd, ErrorCodes.XPST0003, + "Default parameter values require xquery version \"4.0\""); + } + PathExpr defaultExpr = new PathExpr(context); + } + expr [defaultExpr] + { + var.setDefaultValue(defaultExpr.simplify()); + } + ) + )? ) ; @@ -1082,8 +1414,6 @@ throws XPathException STAR | ( - // TODO: parameter types are collected, but not used! - // Change SequenceType accordingly. { List paramTypes = new ArrayList(5); } ( { SequenceType paramType = new SequenceType(); } @@ -1092,6 +1422,10 @@ throws XPathException )* { SequenceType returnType = new SequenceType(); } "as" sequenceType [returnType] + { + type.setFunctionParamTypes(paramTypes.toArray(new SequenceType[0])); + type.setFunctionReturnType(returnType); + } ) ) ) @@ -1102,14 +1436,15 @@ throws XPathException STAR | ( - // TODO: parameter types are collected, but not used! - // Change SequenceType accordingly. { List paramTypes = new ArrayList(5); } ( { SequenceType paramType = new SequenceType(); } sequenceType [paramType] { paramTypes.add(paramType); } )* + { + type.setFunctionParamTypes(paramTypes.toArray(new SequenceType[0])); + } ) ) ) @@ -1120,18 +1455,42 @@ throws XPathException STAR | ( - // TODO: parameter types are collected, but not used! - // Change SequenceType accordingly. { List paramTypes = new ArrayList(5); } ( { SequenceType paramType = new SequenceType(); } sequenceType [paramType] { paramTypes.add(paramType); } )* + { + type.setFunctionParamTypes(paramTypes.toArray(new SequenceType[0])); + } ) ) ) | + #( + RECORD_TEST { type.setPrimaryType(Type.RECORD); } + ( + #( + rf:RECORD_FIELD + { + final String fieldName = rf.getText(); + boolean optional = false; + SequenceType fieldType = null; + } + ( QUESTION { optional = true; } )? + ( + { fieldType = new SequenceType(); } + sequenceType [fieldType] + )? + { + type.addRecordField(new SequenceType.RecordField( + fieldName, optional, fieldType)); + } + ) + )* + ) + | #( "item" { type.setPrimaryType(Type.ITEM); } ) @@ -1262,6 +1621,111 @@ throws XPathException #( "schema-element" EQNAME ) )? ) + // === XQuery 4.0 JNode Kind Tests (version-gated) === + | + #( jnt1:JSON_NODE_TEST + { + if (staticContext.getXQueryVersion() < 40) { + throw new XPathException(jnt1, ErrorCodes.XPST0003, "json-node() requires xquery version \"4.0\""); + } + type.setPrimaryType(Type.JSON_NODE); + } + ) + | + #( jnt2:JSON_OBJECT_TEST + { + if (staticContext.getXQueryVersion() < 40) { + throw new XPathException(jnt2, ErrorCodes.XPST0003, "object-node() requires xquery version \"4.0\""); + } + type.setPrimaryType(Type.JSON_OBJECT); + } + ) + | + #( jnt3:JSON_ARRAY_TEST + { + if (staticContext.getXQueryVersion() < 40) { + throw new XPathException(jnt3, ErrorCodes.XPST0003, "array-node() requires xquery version \"4.0\""); + } + type.setPrimaryType(Type.JSON_ARRAY); + } + ) + | + #( jnt4:JSON_STRING_TEST + { + if (staticContext.getXQueryVersion() < 40) { + throw new XPathException(jnt4, ErrorCodes.XPST0003, "string-node() requires xquery version \"4.0\""); + } + type.setPrimaryType(Type.JSON_STRING); + } + ) + | + #( jnt5:JSON_NUMBER_TEST + { + if (staticContext.getXQueryVersion() < 40) { + throw new XPathException(jnt5, ErrorCodes.XPST0003, "number-node() requires xquery version \"4.0\""); + } + type.setPrimaryType(Type.JSON_NUMBER); + } + ) + | + #( jnt6:JSON_BOOLEAN_TEST + { + if (staticContext.getXQueryVersion() < 40) { + throw new XPathException(jnt6, ErrorCodes.XPST0003, "boolean-node() requires xquery version \"4.0\""); + } + type.setPrimaryType(Type.JSON_BOOLEAN); + } + ) + | + #( jnt7:JSON_NULL_TEST + { + if (staticContext.getXQueryVersion() < 40) { + throw new XPathException(jnt7, ErrorCodes.XPST0003, "null-node() requires xquery version \"4.0\""); + } + type.setPrimaryType(Type.JSON_NULL); + } + ) + | + #( jnt8:JSON_MEMBER_TEST + { + if (staticContext.getXQueryVersion() < 40) { + throw new XPathException(jnt8, ErrorCodes.XPST0003, "member-node() requires xquery version \"4.0\""); + } + type.setPrimaryType(Type.JSON_MEMBER); + } + ) + // === End XQuery 4.0 JNode Kind Tests === + | + #( + CHOICE_TYPE + { + List alternatives = new ArrayList(); + } + ( + { + SequenceType altType = new SequenceType(); + } + sequenceType [altType] + { + alternatives.add(altType); + } + )+ + { + for (final SequenceType alt : alternatives) { + type.addChoiceAlternative(alt); + } + type.setPrimaryType(Type.ITEM); + } + ) + | + #( + en:ENUM_TYPE + { + String enumText = en.getText(); + String[] enumVals = enumText.split(",", -1); + type.setEnumValues(enumVals); + } + ) ) ( STAR { type.setCardinality(Cardinality.ZERO_OR_MORE); } @@ -1293,6 +1757,14 @@ throws PermissionDeniedException, EXistException, XPathException | step=arrowOp [path] | + step=mappingArrowOp [path] + | + step=pipelineOp [path] + | + step=methodCallOp [path] // XQ4 method call operator =?> + | + step=otherwiseExpr [path] + | step=typeCastExpr [path] | // sequence constructor: @@ -1363,1053 +1835,1616 @@ throws PermissionDeniedException, EXistException, XPathException } ) | - // conditional: + step=exprFlowControl [path] + + | + // treat as: #( - astIf:"if" + "treat" { - PathExpr testExpr= new PathExpr(context); - PathExpr thenExpr= new PathExpr(context); - PathExpr elseExpr= new PathExpr(context); + PathExpr expr = new PathExpr(context); + expr.setASTNode(expr_AST_in); + SequenceType type= new SequenceType(); } - step=expr [testExpr] - step=astThen:expr [thenExpr] - step=astElse:expr [elseExpr] + step=expr [expr] + sequenceType [type] { - thenExpr.setASTNode(astThen); - elseExpr.setASTNode(astElse); - ConditionalExpression cond = - new ConditionalExpression(context, testExpr, thenExpr, - new DebuggableExpression(elseExpr)); - cond.setASTNode(astIf); - path.add(cond); - step = cond; + step = new TreatAsExpression(context, expr, type); + step.setASTNode(expr_AST_in); + path.add(step); } ) | - // quantified expression: some + // switch #( - "some" + switchAST:"switch" { - List clauses= new ArrayList(); - PathExpr satisfiesExpr = new PathExpr(context); - satisfiesExpr.setASTNode(expr_AST_in); + PathExpr operand = new PathExpr(context); + operand.setASTNode(expr_AST_in); + boolean booleanMode = false; } ( - #( - someVarName:VARIABLE_BINDING - { - ForLetClause clause= new ForLetClause(); - PathExpr inputSequence = new PathExpr(context); - inputSequence.setASTNode(expr_AST_in); - } - ( - #( - "as" - { SequenceType type= new SequenceType(); } - sequenceType[type] - ) - { clause.sequenceType = type; } - )? - step=expr[inputSequence] - { - try { - clause.varName = QName.parse(staticContext, someVarName.getText(), null); - } catch (final IllegalQNameException iqe) { - throw new XPathException(someVarName.getLine(), someVarName.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + someVarName.getText()); - } - clause.inputSequence= inputSequence; - clauses.add(clause); - } - ) - )* - step=expr[satisfiesExpr] + SWITCH_BOOLEAN + { booleanMode = true; } + | + step=expr [operand] + ) { - Expression action = satisfiesExpr; - for (int i= clauses.size() - 1; i >= 0; i--) { - ForLetClause clause= (ForLetClause) clauses.get(i); - BindingExpression expr = new QuantifiedExpression(context, QuantifiedExpression.SOME); - expr.setASTNode(expr_AST_in); - expr.setVariable(clause.varName); - expr.setSequenceType(clause.sequenceType); - expr.setInputSequence(clause.inputSequence); - expr.setReturnExpression(action); - satisfiesExpr= null; - action= expr; - } - path.add(action); - step = action; - } - ) - | - // quantified expression: every - #( - "every" - { - List clauses= new ArrayList(); - PathExpr satisfiesExpr = new PathExpr(context); - satisfiesExpr.setASTNode(expr_AST_in); + SwitchExpression switchExpr = new SwitchExpression(context, operand); + switchExpr.setBooleanMode(booleanMode); + switchExpr.setASTNode(switchAST); + path.add(switchExpr); } ( - #( - everyVarName:VARIABLE_BINDING - { - ForLetClause clause= new ForLetClause(); - PathExpr inputSequence = new PathExpr(context); - inputSequence.setASTNode(expr_AST_in); - } - ( - #( - "as" - { SequenceType type= new SequenceType(); } - sequenceType[type] - ) - { clause.sequenceType = type; } - )? - step=expr[inputSequence] - { - try { - clause.varName = QName.parse(staticContext, everyVarName.getText(), null); - } catch (final IllegalQNameException iqe) { - throw new XPathException(everyVarName.getLine(), everyVarName.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + everyVarName.getText()); - } - clause.inputSequence= inputSequence; - clauses.add(clause); - } - ) - )* - step=expr[satisfiesExpr] - { - Expression action = satisfiesExpr; - for (int i= clauses.size() - 1; i >= 0; i--) { - ForLetClause clause= (ForLetClause) clauses.get(i); - BindingExpression expr = new QuantifiedExpression(context, QuantifiedExpression.EVERY); - expr.setASTNode(expr_AST_in); - expr.setVariable(clause.varName); - expr.setSequenceType(clause.sequenceType); - expr.setInputSequence(clause.inputSequence); - expr.setReturnExpression(action); - satisfiesExpr= null; - action= expr; + { + List caseOperands = new ArrayList(2); + PathExpr returnExpr = new PathExpr(context); + returnExpr.setASTNode(expr_AST_in); } - path.add(action); - step = action; - } + (( + { + PathExpr caseOperand = new PathExpr(context); + caseOperand.setASTNode(expr_AST_in); + } + "case" + expr [caseOperand] + { caseOperands.add(caseOperand); } + )+ + #( + "return" + step= expr [returnExpr] + { switchExpr.addCase(caseOperands, returnExpr); } + )) + )+ + ( + "default" + { + PathExpr returnExpr = new PathExpr(context); + returnExpr.setASTNode(expr_AST_in); + } + step=expr [returnExpr] + { + switchExpr.setDefault(returnExpr); + } + ) + { step = switchExpr; } ) | - //try/catch expression + // typeswitch #( - astTry:"try" + "typeswitch" { - PathExpr tryTargetExpr = new PathExpr(context); - tryTargetExpr.setASTNode(expr_AST_in); + PathExpr operand = new PathExpr(context); + operand.setASTNode(expr_AST_in); } - step=expr [tryTargetExpr] + step=expr [operand] { - TryCatchExpression cond = new TryCatchExpression(context, tryTargetExpr); - cond.setASTNode(astTry); - path.add(cond); + TypeswitchExpression tswitch = new TypeswitchExpression(context, operand); + tswitch.setASTNode(expr_AST_in); + path.add(tswitch); } ( { - final List catchErrorList = new ArrayList<>(2); - final List catchVars = new ArrayList<>(3); - final PathExpr catchExpr = new PathExpr(context); - catchExpr.setASTNode(expr_AST_in); + PathExpr returnExpr = new PathExpr(context); + returnExpr.setASTNode(expr_AST_in); + QName qn = null; + List types = new ArrayList(2); + SequenceType type = new SequenceType(); } #( - astCatch:"catch" - (catchErrorList [catchErrorList]) + "case" ( - { - QName qncode = null; - QName qndesc = null; - QName qnval = null; - } - code:CATCH_ERROR_CODE + var:VARIABLE_BINDING { try { - qncode = QName.parse(staticContext, code.getText()); - catchVars.add(qncode); + qn = QName.parse(staticContext, var.getText()); } catch (final IllegalQNameException iqe) { - throw new XPathException(code.getLine(), code.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + code.getText()); + throw new XPathException(var.getLine(), var.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + var.getText()); } } - ( - desc:CATCH_ERROR_DESC - { - try { - qndesc = QName.parse(staticContext, desc.getText()); - catchVars.add(qndesc); - } catch (final IllegalQNameException iqe) { - throw new XPathException(desc.getLine(), desc.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + desc.getText()); - } - } - - ( - val:CATCH_ERROR_VAL - { - try { - qnval = QName.parse(staticContext, val.getText()); - catchVars.add(qnval); - } catch (final IllegalQNameException iqe) { - throw new XPathException(val.getLine(), val.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + val.getText()); - } - } - - )? - )? )? - step= expr [catchExpr] - { - catchExpr.setASTNode(astCatch); - cond.addCatchClause(catchErrorList, catchVars, catchExpr); - } + ( + sequenceType[type] + { + types.add(type); + type = new SequenceType(); + } + )+ + // Need return as root in following to disambiguate + // e.g. ( case a xs:integer ( * 3 3 ) ) + // which gives xs:integer* and no operator left for 3 3 ... + // Now ( case a xs:integer ( return ( + 3 3 ) ) ) /ljo + #( + "return" + step= expr [returnExpr] + { + SequenceType[] atype = new SequenceType[types.size()]; + atype = types.toArray(atype); + tswitch.addCase(atype, qn, returnExpr); + } + ) ) - )+ + )+ + ( + "default" + { + PathExpr returnExpr = new PathExpr(context); + returnExpr.setASTNode(expr_AST_in); + QName qn = null; + } + ( + dvar:VARIABLE_BINDING + { + try { + qn = QName.parse(staticContext, dvar.getText()); + } catch (final IllegalQNameException iqe) { + throw new XPathException(dvar.getLine(), dvar.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + dvar.getText()); + } + } + )? + step=expr [returnExpr] + { + tswitch.setDefault(qn, returnExpr); + } + ) + { step = tswitch; } + ) + | + // logical operator: or + #( + "or" { - step = cond; + PathExpr left= new PathExpr(context); + left.setASTNode(expr_AST_in); + } + step=expr [left] + { + PathExpr right= new PathExpr(context); + right.setASTNode(expr_AST_in); } + step=expr [right] ) + { + OpOr or= new OpOr(context); + or.addPath(left); + or.addPath(right); + path.addPath(or); + step = or; + } | - // FLWOR expressions: let and for + // logical operator: and #( - r:"return" + "and" { - List clauses= new ArrayList(); - Expression action= new PathExpr(context); - action.setASTNode(r); - PathExpr whereExpr= null; - List orderBy= null; + PathExpr left= new PathExpr(context); + left.setASTNode(expr_AST_in); + + PathExpr right= new PathExpr(context); + right.setASTNode(expr_AST_in); } - ( - #( - f:"for" - ( - #( - varName:VARIABLE_BINDING - { - ForLetClause clause= new ForLetClause(); - clause.ast = varName; - PathExpr inputSequence= new PathExpr(context); - inputSequence.setASTNode(expr_AST_in);inputSequence.setASTNode(expr_AST_in); - final DistinctVariableNames distinctVariableNames = new DistinctVariableNames(); - } - ( - #( - "as" - { clause.sequenceType= new SequenceType(); } - sequenceType [clause.sequenceType] - ) - )? - ( - "empty" - { clause.allowEmpty = true; } - )? - ( - posVar:POSITIONAL_VAR - { - try { - clause.posVar = distinctVariableNames.check(ErrorCodes.XQST0089, posVar, QName.parse(staticContext, posVar.getText(), null)); - } catch (final IllegalQNameException iqe) { - throw new XPathException(posVar.getLine(), posVar.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + posVar.getText()); - } - } - )? - step=expr [inputSequence] - { - try { - clause.varName = distinctVariableNames.check(ErrorCodes.XQST0089, varName, QName.parse(staticContext, varName.getText(), null)); - } catch (final IllegalQNameException iqe) { - throw new XPathException(varName.getLine(), varName.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + varName.getText()); - } - clause.inputSequence= inputSequence; - clauses.add(clause); - } - ) - )+ - ) - | + step=expr [left] + step=expr [right] + ) + { + OpAnd and= new OpAnd(context); + and.addPath(left); + and.addPath(right); + path.addPath(and); + step = and; + } + | + // union expressions: | and union + #( + UNION + { + PathExpr left= new PathExpr(context); + left.setASTNode(expr_AST_in); + + PathExpr right= new PathExpr(context); + right.setASTNode(expr_AST_in); + } + step=expr [left] + step=expr [right] + ) + { + Union union= new Union(context, left, right); + path.add(union); + step = union; + } + | + // intersections: + #( "intersect" + { + PathExpr left = new PathExpr(context); + left.setASTNode(expr_AST_in); + + PathExpr right = new PathExpr(context); + right.setASTNode(expr_AST_in); + } + step=expr [left] + step=expr [right] + ) + { + Intersect intersect = new Intersect(context, left, right); + path.add(intersect); + step = intersect; + } + | + #( "except" + { + PathExpr left = new PathExpr(context); + left.setASTNode(expr_AST_in); + + PathExpr right = new PathExpr(context); + right.setASTNode(expr_AST_in); + } + step=expr [left] + step=expr [right] + ) + { + Except intersect = new Except(context, left, right); + path.add(intersect); + step = intersect; + } + | + // absolute path expression starting with a / + #( + ABSOLUTE_SLASH + { + path.setHasSlash(); + RootNode root= new RootNode(context); + path.add(root); + } + ( step=expr [path] )? + ) + | + // absolute path expression starting with // + #( + ABSOLUTE_DSLASH + { + path.setHasSlash(); + RootNode root= new RootNode(context); + path.add(root); + } + ( + step=expr [path] + { + if (step instanceof LocationStep) { + LocationStep s= (LocationStep) step; + if (s.getAxis() == Constants.ATTRIBUTE_AXIS || + (s.getTest().getType() == Type.ATTRIBUTE && s.getAxis() == Constants.CHILD_AXIS)) + // combines descendant-or-self::node()/attribute:* + s.setAxis(Constants.DESCENDANT_ATTRIBUTE_AXIS); + else if (s.getAxis() <= Constants.PRECEDING_SIBLING_AXIS) { + // Reverse axis: insert explicit descendant-or-self::node() step + LocationStep descStep = new LocationStep(context, Constants.DESCENDANT_SELF_AXIS, new TypeTest(Type.NODE)); + descStep.setAbbreviated(true); + path.replaceLastExpression(descStep); + path.add(step); + } else { + s.setAxis(Constants.DESCENDANT_SELF_AXIS); + s.setAbbreviated(true); + } + } else + step.setPrimaryAxis(Constants.DESCENDANT_SELF_AXIS); + } + )? + ) + | + // range expression: to + #( + "to" + { + PathExpr start= new PathExpr(context); + start.setASTNode(expr_AST_in); + + PathExpr end= new PathExpr(context); + end.setASTNode(expr_AST_in); + + List args= new ArrayList(2); + args.add(start); + args.add(end); + } + step=expr [start] + step=expr [end] + { + RangeExpression range= new RangeExpression(context); + range.setASTNode(expr_AST_in); + range.setArguments(args); + path.addPath(range); + step = range; + } + ) + | + step=generalComp [path] + | + step=valueComp [path] + | + step=nodeComp [path] + | + step=ftContainsExpr [path] + | + step=primaryExpr [path] + | + step=pathExpr [path] + | + step=extensionExpr [path] + | + step=numericExpr [path] + | + // Legacy update (DEPRECATED) + step=updateExpr [path] + | + // W3C XQuery Update Facility 3.0 + step=xqufInsertExpr [path] + | + step=xqufDeleteExpr [path] + | + step=xqufReplaceExpr [path] + | + step=xqufRenameExpr [path] + | + step=xqufTransformExpr [path] + ; + +/** + * Flow control expressions extracted from expr to avoid + * Java method size limit (64KB bytecode). + * Handles: conditional, ternary, quantified (some/every), + * try/catch/finally, FLWOR, instance of. + */ +exprFlowControl [PathExpr path] +returns [Expression step] +throws PermissionDeniedException, EXistException, XPathException +{ step = null; } +: + // conditional: + #( + astIf:"if" + { + PathExpr testExpr= new PathExpr(context); + PathExpr thenExpr= new PathExpr(context); + PathExpr elseExpr= new PathExpr(context); + } + step=expr [testExpr] + step=astThen:expr [thenExpr] + step=astElse:expr [elseExpr] + { + thenExpr.setASTNode(astThen); + elseExpr.setASTNode(astElse); + ConditionalExpression cond = + new ConditionalExpression(context, testExpr, thenExpr, + new DebuggableExpression(elseExpr)); + cond.setASTNode(astIf); + path.add(cond); + step = cond; + } + ) + | + // ternary conditional: condition ?? then !! else + #( + astTernary:TERNARY + { + if (staticContext.getXQueryVersion() < 40) { + throw new XPathException(astTernary, ErrorCodes.XPST0003, + "The ternary conditional operator (?? !!) requires xquery version \"4.0\""); + } + PathExpr ternTestExpr = new PathExpr(context); + PathExpr ternThenExpr = new PathExpr(context); + PathExpr ternElseExpr = new PathExpr(context); + } + step=expr [ternTestExpr] + step=expr [ternThenExpr] + step=expr [ternElseExpr] + { + ConditionalExpression ternCond = + new ConditionalExpression(context, ternTestExpr, ternThenExpr, + new DebuggableExpression(ternElseExpr)); + ternCond.setASTNode(astTernary); + path.add(ternCond); + step = ternCond; + } + ) + | + // quantified expression: some + #( + "some" + { + List clauses= new ArrayList(); + PathExpr satisfiesExpr = new PathExpr(context); + satisfiesExpr.setASTNode(exprFlowControl_AST_in); + } + ( #( - l:"let" + someVarName:VARIABLE_BINDING + { + ForLetClause clause= new ForLetClause(); + PathExpr inputSequence = new PathExpr(context); + inputSequence.setASTNode(exprFlowControl_AST_in); + } ( #( - letVarName:VARIABLE_BINDING - { - ForLetClause clause= new ForLetClause(); - clause.ast = letVarName; - clause.type = FLWORClause.ClauseType.LET; - PathExpr inputSequence= new PathExpr(context); - inputSequence.setASTNode(expr_AST_in); - } - ( - #( - "as" - { clause.sequenceType= new SequenceType(); } - sequenceType [clause.sequenceType] - ) - )? - step=expr [inputSequence] - { - try { - clause.varName = QName.parse(staticContext, letVarName.getText(), null); - } catch (final IllegalQNameException iqe) { - throw new XPathException(letVarName.getLine(), letVarName.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + letVarName.getText()); - } - clause.inputSequence= inputSequence; - clauses.add(clause); - } + "as" + { SequenceType type= new SequenceType(); } + sequenceType[type] ) - )+ - ) - | - #( - wc:"window" - { - ForLetClause clause= new ForLetClause(); - clause.type = FLWORClause.ClauseType.WINDOW; - clause.windowConditions = new ArrayList(2); - final DistinctVariableNames distinctVariableNames = new DistinctVariableNames(); - } - ( - "tumbling" - { - clause.windowType = WindowExpr.WindowType.TUMBLING_WINDOW; - } - | - "sliding" - { - clause.windowType = WindowExpr.WindowType.SLIDING_WINDOW; - } + { clause.sequenceType = type; } )? - // invarBinding - ( - #( - windowWarName:VARIABLE_BINDING - { - clause.ast = windowWarName; - PathExpr inputSequence= new PathExpr(context); - } - ( - #( - "as" - { clause.sequenceType= new SequenceType(); } - sequenceType [clause.sequenceType] - ) - )? - step=expr [inputSequence] - { - try { - clause.varName = distinctVariableNames.check(ErrorCodes.XQST0103, windowWarName, QName.parse(staticContext, windowWarName.getText(), null)); - } catch (final IllegalQNameException iqe) { - throw new XPathException(windowWarName.getLine(), windowWarName.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + windowWarName.getText()); + step=expr[inputSequence] + { + try { + clause.varName = QName.parse(staticContext, someVarName.getText(), null); + } catch (final IllegalQNameException iqe) { + throw new XPathException(someVarName.getLine(), someVarName.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + someVarName.getText()); } - clause.inputSequence= inputSequence; - clauses.add(clause); - } - ) - ) - // windowStartCondition - #( - "start" - { - PathExpr whenExpr = new PathExpr(context); - QName currentItemName = null; - QName previousItemName = null; - QName nextItemName = null; - QName windowStartPosVar = null; - } - #( - // WINDOW_VARS - WINDOW_VARS - ( - currentItem:CURRENT_ITEM - { - if (currentItem != null && currentItem.getText() != null) { - try { - currentItemName = distinctVariableNames.check(ErrorCodes.XQST0103, currentItem, QName.parse(staticContext, currentItem.getText())); - } catch (final IllegalQNameException iqe) { - throw new XPathException(currentItem.getLine(), currentItem.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + currentItem.getText()); - } - } - } - )? - ( - startPosVar:POSITIONAL_VAR - { - try { - windowStartPosVar = distinctVariableNames.check(ErrorCodes.XQST0103, startPosVar, QName.parse(staticContext, startPosVar.getText(), null)); - } catch (final IllegalQNameException iqe) { - throw new XPathException(startPosVar.getLine(), startPosVar.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + startPosVar.getText()); - } - } - )? - ( - previousItem:PREVIOUS_ITEM - { - if (previousItem != null && previousItem.getText() != null) { - try { - previousItemName = distinctVariableNames.check(ErrorCodes.XQST0103, previousItem, QName.parse(staticContext, previousItem.getText())); - } catch (final IllegalQNameException iqe) { - throw new XPathException(previousItem.getLine(), previousItem.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + previousItem.getText()); - } - } - } - )? - ( - nextItem:NEXT_ITEM - { - if (nextItem != null && nextItem.getText() != null) { - try { - nextItemName = distinctVariableNames.check(ErrorCodes.XQST0103, nextItem, QName.parse(staticContext, nextItem.getText())); - } catch (final IllegalQNameException iqe) { - throw new XPathException(nextItem.getLine(), nextItem.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + nextItem.getText()); - } - } - } - )? + clause.inputSequence= inputSequence; + clauses.add(clause); + } + ) + )* + step=expr[satisfiesExpr] + { + Expression action = satisfiesExpr; + for (int i= clauses.size() - 1; i >= 0; i--) { + ForLetClause clause= (ForLetClause) clauses.get(i); + BindingExpression expr = new QuantifiedExpression(context, QuantifiedExpression.SOME); + expr.setASTNode(exprFlowControl_AST_in); + expr.setVariable(clause.varName); + expr.setSequenceType(clause.sequenceType); + expr.setInputSequence(clause.inputSequence); + expr.setReturnExpression(action); + satisfiesExpr= null; + action= expr; + } + path.add(action); + step = action; + } + ) + | + // quantified expression: every + #( + "every" + { + List clauses= new ArrayList(); + PathExpr satisfiesExpr = new PathExpr(context); + satisfiesExpr.setASTNode(exprFlowControl_AST_in); + } + ( + #( + everyVarName:VARIABLE_BINDING + { + ForLetClause clause= new ForLetClause(); + PathExpr inputSequence = new PathExpr(context); + inputSequence.setASTNode(exprFlowControl_AST_in); + } + ( + #( + "as" + { SequenceType type= new SequenceType(); } + sequenceType[type] ) - "when" - step=expr [whenExpr] + { clause.sequenceType = type; } + )? + step=expr[inputSequence] + { + try { + clause.varName = QName.parse(staticContext, everyVarName.getText(), null); + } catch (final IllegalQNameException iqe) { + throw new XPathException(everyVarName.getLine(), everyVarName.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + everyVarName.getText()); + } + clause.inputSequence= inputSequence; + clauses.add(clause); + } + ) + )* + step=expr[satisfiesExpr] + { + Expression action = satisfiesExpr; + for (int i= clauses.size() - 1; i >= 0; i--) { + ForLetClause clause= (ForLetClause) clauses.get(i); + BindingExpression expr = new QuantifiedExpression(context, QuantifiedExpression.EVERY); + expr.setASTNode(exprFlowControl_AST_in); + expr.setVariable(clause.varName); + expr.setSequenceType(clause.sequenceType); + expr.setInputSequence(clause.inputSequence); + expr.setReturnExpression(action); + satisfiesExpr= null; + action= expr; + } + path.add(action); + step = action; + } + ) + | + //try/catch expression + #( + astTry:"try" + { + PathExpr tryTargetExpr = new PathExpr(context); + tryTargetExpr.setASTNode(exprFlowControl_AST_in); + } + step=expr [tryTargetExpr] + { + TryCatchExpression cond = new TryCatchExpression(context, tryTargetExpr); + cond.setASTNode(astTry); + path.add(cond); + } + ( + { + final List catchErrorList = new ArrayList<>(2); + final List catchVars = new ArrayList<>(3); + final PathExpr catchExpr = new PathExpr(context); + catchExpr.setASTNode(exprFlowControl_AST_in); + } + #( + astCatch:"catch" + (catchErrorList [catchErrorList]) + ( { - WindowCondition windowCondition = new WindowCondition( - context, false, currentItemName, windowStartPosVar, previousItemName, nextItemName, whenExpr - ); - clause.windowConditions.add(windowCondition); + QName qncode = null; + QName qndesc = null; + QName qnval = null; } - ) - // windowEndCondition - ( + code:CATCH_ERROR_CODE { - PathExpr endWhenExpr = new PathExpr(context); - QName endCurrentItemName = null; - QName endPreviousItemName = null; - QName endNextItemName = null; - QName windowEndPosVar = null; - Boolean only = false; + try { + qncode = QName.parse(staticContext, code.getText()); + catchVars.add(qncode); + } catch (final IllegalQNameException iqe) { + throw new XPathException(code.getLine(), code.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + code.getText()); + } } - #( - "end" + ( + desc:CATCH_ERROR_DESC + { + try { + qndesc = QName.parse(staticContext, desc.getText()); + catchVars.add(qndesc); + } catch (final IllegalQNameException iqe) { + throw new XPathException(desc.getLine(), desc.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + desc.getText()); + } + } + ( - "only" + val:CATCH_ERROR_VAL { - only = true; + try { + qnval = QName.parse(staticContext, val.getText()); + catchVars.add(qnval); + } catch (final IllegalQNameException iqe) { + throw new XPathException(val.getLine(), val.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + val.getText()); + } } + )? + )? + )? + step= expr [catchExpr] + { + catchExpr.setASTNode(astCatch); + cond.addCatchClause(catchErrorList, catchVars, catchExpr); + } + ) + )* + ( + #( + astFinally:"finally" + { + final PathExpr finallyExpr = new PathExpr(context); + finallyExpr.setASTNode(astFinally); + } + (step=expr [finallyExpr])? + { + finallyExpr.setASTNode(astFinally); + cond.setFinallyExpr(finallyExpr); + } + ) + )? + + { + step = cond; + } + ) + | + // FLWOR expressions: let and for + #( + r:"return" + { + List clauses= new ArrayList(); + Expression action= new PathExpr(context); + action.setASTNode(r); + PathExpr whereExpr= null; + List orderBy= null; + } + ( + #( + f:"for" + ( + #( + varName:VARIABLE_BINDING + { + ForLetClause clause= new ForLetClause(); + clause.ast = varName; + PathExpr inputSequence= new PathExpr(context); + inputSequence.setASTNode(exprFlowControl_AST_in);inputSequence.setASTNode(exprFlowControl_AST_in); + final DistinctVariableNames distinctVariableNames = new DistinctVariableNames(); + } + ( + #( + "as" + { clause.sequenceType= new SequenceType(); } + sequenceType [clause.sequenceType] + ) + )? + ( + "empty" + { clause.allowEmpty = true; } + )? + ( + posVar:POSITIONAL_VAR + { + try { + clause.posVar = distinctVariableNames.check(ErrorCodes.XQST0089, posVar, QName.parse(staticContext, posVar.getText(), null)); + } catch (final IllegalQNameException iqe) { + throw new XPathException(posVar.getLine(), posVar.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + posVar.getText()); + } + } + )? + ( + scoreVar:FT_SCORE_VAR + { + try { + clause.scoreVar = distinctVariableNames.check(ErrorCodes.XQST0089, scoreVar, QName.parse(staticContext, scoreVar.getText(), null)); + } catch (final IllegalQNameException iqe) { + throw new XPathException(scoreVar.getLine(), scoreVar.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + scoreVar.getText()); + } + } + )? + step=expr [inputSequence] + { + try { + clause.varName = distinctVariableNames.check(ErrorCodes.XQST0089, varName, QName.parse(staticContext, varName.getText(), null)); + } catch (final IllegalQNameException iqe) { + throw new XPathException(varName.getLine(), varName.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + varName.getText()); + } + clause.inputSequence= inputSequence; + clauses.add(clause); + } + ) + | + #( + fmAST:FOR_MEMBER + { + if (staticContext.getXQueryVersion() < 40) { + throw new XPathException(fmAST, ErrorCodes.XPST0003, + "The 'for member' clause requires xquery version \"4.0\""); + } + } #( - // WINDOW_VARS - WINDOW_VARS - ( - endCurrentItem:CURRENT_ITEM - { - if (endCurrentItem != null && endCurrentItem.getText() != null) { - try { - endCurrentItemName = distinctVariableNames.check(ErrorCodes.XQST0103, endCurrentItem, QName.parse(staticContext, endCurrentItem.getText())); - } catch (final IllegalQNameException iqe) { - throw new XPathException(endCurrentItem.getLine(), endCurrentItem.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + endCurrentItem.getText()); - } - } - } - )? - ( - endPosVar:POSITIONAL_VAR + memberVarName:VARIABLE_BINDING + { + ForLetClause clause= new ForLetClause(); + clause.ast = memberVarName; + clause.type = FLWORClause.ClauseType.FOR_MEMBER; + PathExpr inputSequence= new PathExpr(context); + inputSequence.setASTNode(exprFlowControl_AST_in); + final DistinctVariableNames memberDistinctVars = new DistinctVariableNames(); + } + ( + #( + "as" + { clause.sequenceType= new SequenceType(); } + sequenceType [clause.sequenceType] + ) + )? + ( + memberPosVar:POSITIONAL_VAR { try { - windowEndPosVar = distinctVariableNames.check(ErrorCodes.XQST0103, endPosVar, QName.parse(staticContext, endPosVar.getText(), null)); + clause.posVar = memberDistinctVars.check(ErrorCodes.XQST0089, memberPosVar, QName.parse(staticContext, memberPosVar.getText(), null)); } catch (final IllegalQNameException iqe) { - throw new XPathException(endPosVar.getLine(), endPosVar.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + endPosVar.getText()); + throw new XPathException(memberPosVar.getLine(), memberPosVar.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + memberPosVar.getText()); } } - )? - ( - endPreviousItem:PREVIOUS_ITEM + )? + step=expr [inputSequence] + { + try { + clause.varName = memberDistinctVars.check(ErrorCodes.XQST0089, memberVarName, QName.parse(staticContext, memberVarName.getText(), null)); + } catch (final IllegalQNameException iqe) { + throw new XPathException(memberVarName.getLine(), memberVarName.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + memberVarName.getText()); + } + clause.inputSequence= inputSequence; + clauses.add(clause); + } + ) + ) + | + #( + FOR_KEY + #( + keyVarName:VARIABLE_BINDING + { + ForLetClause clause= new ForLetClause(); + clause.ast = keyVarName; + clause.type = FLWORClause.ClauseType.FOR_KEY; + PathExpr inputSequence= new PathExpr(context); + inputSequence.setASTNode(exprFlowControl_AST_in); + final DistinctVariableNames keyDistinctVars = new DistinctVariableNames(); + } + ( + #( + "as" + { clause.sequenceType= new SequenceType(); } + sequenceType [clause.sequenceType] + ) + )? + ( + keyPosVar:POSITIONAL_VAR { - if (endPreviousItem != null && endPreviousItem.getText() != null) { - try { - endPreviousItemName = distinctVariableNames.check(ErrorCodes.XQST0103, endPreviousItem, QName.parse(staticContext, endPreviousItem.getText())); - } catch (final IllegalQNameException iqe) { - throw new XPathException(endPreviousItem.getLine(), endPreviousItem.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + endPreviousItem.getText()); - } + try { + clause.posVar = keyDistinctVars.check(ErrorCodes.XQST0089, keyPosVar, QName.parse(staticContext, keyPosVar.getText(), null)); + } catch (final IllegalQNameException iqe) { + throw new XPathException(keyPosVar.getLine(), keyPosVar.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + keyPosVar.getText()); } } )? + step=expr [inputSequence] + { + try { + clause.varName = keyDistinctVars.check(ErrorCodes.XQST0089, keyVarName, QName.parse(staticContext, keyVarName.getText(), null)); + } catch (final IllegalQNameException iqe) { + throw new XPathException(keyVarName.getLine(), keyVarName.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + keyVarName.getText()); + } + clause.inputSequence= inputSequence; + clauses.add(clause); + } + ) + ) + | + #( + FOR_VALUE + #( + valueVarName:VARIABLE_BINDING + { + ForLetClause clause= new ForLetClause(); + clause.ast = valueVarName; + clause.type = FLWORClause.ClauseType.FOR_VALUE; + PathExpr inputSequence= new PathExpr(context); + inputSequence.setASTNode(exprFlowControl_AST_in); + final DistinctVariableNames valueDistinctVars = new DistinctVariableNames(); + } + ( + #( + "as" + { clause.sequenceType= new SequenceType(); } + sequenceType [clause.sequenceType] + ) + )? ( - endNextItem:NEXT_ITEM + valuePosVar:POSITIONAL_VAR { - if (endNextItem != null && endNextItem.getText() != null) { + try { + clause.posVar = valueDistinctVars.check(ErrorCodes.XQST0089, valuePosVar, QName.parse(staticContext, valuePosVar.getText(), null)); + } catch (final IllegalQNameException iqe) { + throw new XPathException(valuePosVar.getLine(), valuePosVar.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + valuePosVar.getText()); + } + } + )? + step=expr [inputSequence] + { + try { + clause.varName = valueDistinctVars.check(ErrorCodes.XQST0089, valueVarName, QName.parse(staticContext, valueVarName.getText(), null)); + } catch (final IllegalQNameException iqe) { + throw new XPathException(valueVarName.getLine(), valueVarName.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + valueVarName.getText()); + } + clause.inputSequence= inputSequence; + clauses.add(clause); + } + ) + ) + | + #( + FOR_KEY_VALUE + #( + kvKeyVarName:VARIABLE_BINDING + { + ForLetClause clause= new ForLetClause(); + clause.ast = kvKeyVarName; + clause.type = FLWORClause.ClauseType.FOR_KEY_VALUE; + PathExpr inputSequence= new PathExpr(context); + inputSequence.setASTNode(exprFlowControl_AST_in); + final DistinctVariableNames kvDistinctVars = new DistinctVariableNames(); + } + ( + #( + "as" + { clause.sequenceType= new SequenceType(); } + sequenceType [clause.sequenceType] + ) + )? + ( + #( + kvValueVar:VALUE_VAR + { try { - endNextItemName = distinctVariableNames.check(ErrorCodes.XQST0103, endNextItem, QName.parse(staticContext, endNextItem.getText())); + clause.valueVarName = kvDistinctVars.check(ErrorCodes.XQST0089, kvValueVar, QName.parse(staticContext, kvValueVar.getText(), null)); } catch (final IllegalQNameException iqe) { - throw new XPathException(endNextItem.getLine(), endNextItem.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + endNextItem.getText()); + throw new XPathException(kvValueVar.getLine(), kvValueVar.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + kvValueVar.getText()); } } + ( + #( + "as" + { clause.valueSequenceType = new SequenceType(); } + sequenceType [clause.valueSequenceType] + ) + )? + ) + )? + ( + kvPosVar:POSITIONAL_VAR + { + try { + clause.posVar = kvDistinctVars.check(ErrorCodes.XQST0089, kvPosVar, QName.parse(staticContext, kvPosVar.getText(), null)); + } catch (final IllegalQNameException iqe) { + throw new XPathException(kvPosVar.getLine(), kvPosVar.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + kvPosVar.getText()); + } } )? + step=expr [inputSequence] + { + try { + clause.varName = kvDistinctVars.check(ErrorCodes.XQST0089, kvKeyVarName, QName.parse(staticContext, kvKeyVarName.getText(), null)); + } catch (final IllegalQNameException iqe) { + throw new XPathException(kvKeyVarName.getLine(), kvKeyVarName.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + kvKeyVarName.getText()); + } + clause.inputSequence= inputSequence; + clauses.add(clause); + } ) - "when" - step=expr [endWhenExpr] + ) + )+ + ) + | + #( + l:"let" + ( + #( + letVarName:VARIABLE_BINDING { - WindowCondition endWindowCondition = new WindowCondition( - context, only, endCurrentItemName, windowEndPosVar, endPreviousItemName, endNextItemName, endWhenExpr - ); - clause.windowConditions.add(endWindowCondition); + ForLetClause clause= new ForLetClause(); + clause.ast = letVarName; + clause.type = FLWORClause.ClauseType.LET; + PathExpr inputSequence= new PathExpr(context); + inputSequence.setASTNode(exprFlowControl_AST_in); + } + ( + letScoreVar:FT_SCORE_VAR + { + try { + clause.scoreVar = QName.parse(staticContext, letScoreVar.getText(), null); + } catch (final IllegalQNameException iqe) { + throw new XPathException(letScoreVar.getLine(), letScoreVar.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + letScoreVar.getText()); + } + } + )? + ( + #( + "as" + { clause.sequenceType= new SequenceType(); } + sequenceType [clause.sequenceType] + ) + )? + step=expr [inputSequence] + { + try { + clause.varName = QName.parse(staticContext, letVarName.getText(), null); + } catch (final IllegalQNameException iqe) { + throw new XPathException(letVarName.getLine(), letVarName.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + letVarName.getText()); + } + clause.inputSequence= inputSequence; + clauses.add(clause); } ) - )? - ) - | - // XQuery 3.0 group by clause - #( - gb:GROUP_BY - { - ForLetClause clause = new ForLetClause(); - clause.ast = gb; - clause.type = FLWORClause.ClauseType.GROUPBY; - clause.groupSpecs = new ArrayList(4); - clauses.add(clause); - } - ( - #( - groupVarName:VARIABLE_BINDING - { PathExpr groupSpecExpr = null; } - ( - ( - #( - "as" - { clause.sequenceType = new SequenceType(); } - sequenceType [clause.sequenceType] - ) - )? - // optional := exprSingle - ( - { - groupSpecExpr = new PathExpr(context); - groupSpecExpr.setASTNode(expr_AST_in); - } - step=expr [groupSpecExpr] + | + // XQ4: sequence destructuring + #( + seqDestAST:SEQ_DESTRUCTURE + { + ForLetClause seqClause = new ForLetClause(); + seqClause.ast = seqDestAST; + seqClause.type = FLWORClause.ClauseType.LET_SEQ_DESTRUCTURE; + seqClause.destructureVarNames = new ArrayList(); + seqClause.destructureVarTypes = new ArrayList(); + String[] seqVarNames = seqDestAST.getText().split(",", -1); + int seqTypedIdx = 0; + boolean[] seqHasType = new boolean[seqVarNames.length]; + for (int dv = 0; dv < seqVarNames.length; dv++) { + String svn = seqVarNames[dv]; + seqHasType[dv] = svn.endsWith("+"); + if (seqHasType[dv]) svn = svn.substring(0, svn.length() - 1); + try { + seqClause.destructureVarNames.add( + QName.parse(staticContext, svn, null)); + } catch (final IllegalQNameException iqe) { + throw new XPathException(seqDestAST.getLine(), seqDestAST.getColumn(), + ErrorCodes.XPST0081, "No namespace defined for prefix " + svn); + } + seqClause.destructureVarTypes.add(null); + } + PathExpr seqInput = new PathExpr(context); + seqInput.setASTNode(exprFlowControl_AST_in); + } + ( + #( + DESTRUCTURE_VAR_TYPE + #( + "as" + { + SequenceType seqVarType = new SequenceType(); + while (seqTypedIdx < seqHasType.length && !seqHasType[seqTypedIdx]) seqTypedIdx++; + } + sequenceType [seqVarType] + { + if (seqTypedIdx < seqClause.destructureVarTypes.size()) { + seqClause.destructureVarTypes.set(seqTypedIdx, seqVarType); + } + seqTypedIdx++; + } + ) + ) + )* + ( + #( + "as" + { seqClause.sequenceType = new SequenceType(); } + sequenceType [seqClause.sequenceType] + ) + )? + step=expr [seqInput] + { + seqClause.inputSequence = seqInput; + clauses.add(seqClause); + } ) - )? - { - final QName groupKeyVar; - try { - groupKeyVar = QName.parse(staticContext, groupVarName.getText(), null); - } catch (final IllegalQNameException iqe) { - throw new XPathException(groupVarName.getLine(), groupVarName.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + groupVarName.getText()); - } - - GroupSpec groupSpec = new GroupSpec(context, groupSpecExpr, groupKeyVar, clause.sequenceType); - clause.groupSpecs.add(groupSpec); - } - ( - "collation" groupCollURI:STRING_LITERAL - { - groupSpec.setCollator(groupCollURI.getText()); - } - )? - ) - )+ - ) - | - #( - ob:ORDER_BY { orderBy = new ArrayList(3); } - ( - { - PathExpr orderSpecExpr= new PathExpr(context); - orderSpecExpr.setASTNode(expr_AST_in); - } - step=expr [orderSpecExpr] - { - OrderSpec orderSpec= new OrderSpec(context, orderSpecExpr); - int modifiers= 0; - boolean orderDescending = false; - orderBy.add(orderSpec); - - if (!context.orderEmptyGreatest()) { - modifiers |= OrderSpec.EMPTY_LEAST; - orderSpec.setModifiers(modifiers); - } - } - ( - ( - - "ascending" - | - "descending" - { - modifiers |= OrderSpec.DESCENDING_ORDER; - orderSpec.setModifiers(modifiers); - orderDescending = true; - } - ) - )? - ( - "empty" - ( - "greatest" - { - if (!context.orderEmptyGreatest()) - modifiers &= OrderSpec.EMPTY_GREATEST; - if (orderDescending) - modifiers |= OrderSpec.DESCENDING_ORDER; - orderSpec.setModifiers(modifiers); - } - | - "least" - { - modifiers |= OrderSpec.EMPTY_LEAST; - orderSpec.setModifiers(modifiers); - } - ) - )? - ( - "collation" collURI:STRING_LITERAL - { - orderSpec.setCollation(collURI.getText()); - } - )? - )+ - { - ForLetClause clause= new ForLetClause(); - clause.ast = ob; - clause.type = FLWORClause.ClauseType.ORDERBY; - clause.orderSpecs = orderBy; - clauses.add(clause); - } - ) - | - #( - w:"where" - { - whereExpr= new PathExpr(context); - whereExpr.setASTNode(expr_AST_in); - } - step=expr [whereExpr] - { - ForLetClause clause = new ForLetClause(); - clause.ast = w; - clause.type = FLWORClause.ClauseType.WHERE; - clause.inputSequence = whereExpr; - clauses.add(clause); - } + | + // XQ4: array destructuring + #( + arrDestAST:ARRAY_DESTRUCTURE + { + ForLetClause arrClause = new ForLetClause(); + arrClause.ast = arrDestAST; + arrClause.type = FLWORClause.ClauseType.LET_ARRAY_DESTRUCTURE; + arrClause.destructureVarNames = new ArrayList(); + arrClause.destructureVarTypes = new ArrayList(); + String[] arrVarNames = arrDestAST.getText().split(",", -1); + int arrTypedIdx = 0; + boolean[] arrHasType = new boolean[arrVarNames.length]; + for (int dv = 0; dv < arrVarNames.length; dv++) { + String avn = arrVarNames[dv]; + arrHasType[dv] = avn.endsWith("+"); + if (arrHasType[dv]) avn = avn.substring(0, avn.length() - 1); + try { + arrClause.destructureVarNames.add( + QName.parse(staticContext, avn, null)); + } catch (final IllegalQNameException iqe) { + throw new XPathException(arrDestAST.getLine(), arrDestAST.getColumn(), + ErrorCodes.XPST0081, "No namespace defined for prefix " + avn); + } + arrClause.destructureVarTypes.add(null); + } + PathExpr arrInput = new PathExpr(context); + arrInput.setASTNode(exprFlowControl_AST_in); + } + ( + #( + DESTRUCTURE_VAR_TYPE + #( + "as" + { + SequenceType arrVarType = new SequenceType(); + while (arrTypedIdx < arrHasType.length && !arrHasType[arrTypedIdx]) arrTypedIdx++; + } + sequenceType [arrVarType] + { + if (arrTypedIdx < arrClause.destructureVarTypes.size()) { + arrClause.destructureVarTypes.set(arrTypedIdx, arrVarType); + } + arrTypedIdx++; + } + ) + ) + )* + ( + #( + "as" + { arrClause.sequenceType = new SequenceType(); } + sequenceType [arrClause.sequenceType] + ) + )? + step=expr [arrInput] + { + arrClause.inputSequence = arrInput; + clauses.add(arrClause); + } + ) + | + // XQ4: map destructuring + #( + mapDestAST:MAP_DESTRUCTURE + { + ForLetClause mapClause = new ForLetClause(); + mapClause.ast = mapDestAST; + mapClause.type = FLWORClause.ClauseType.LET_MAP_DESTRUCTURE; + mapClause.destructureVarNames = new ArrayList(); + mapClause.destructureVarTypes = new ArrayList(); + String[] mapVarNames = mapDestAST.getText().split(",", -1); + int mapTypedIdx = 0; + boolean[] mapHasType = new boolean[mapVarNames.length]; + for (int dv = 0; dv < mapVarNames.length; dv++) { + String mvn = mapVarNames[dv]; + mapHasType[dv] = mvn.endsWith("+"); + if (mapHasType[dv]) mvn = mvn.substring(0, mvn.length() - 1); + try { + mapClause.destructureVarNames.add( + QName.parse(staticContext, mvn, null)); + } catch (final IllegalQNameException iqe) { + throw new XPathException(mapDestAST.getLine(), mapDestAST.getColumn(), + ErrorCodes.XPST0081, "No namespace defined for prefix " + mvn); + } + mapClause.destructureVarTypes.add(null); + } + PathExpr mapInput = new PathExpr(context); + mapInput.setASTNode(exprFlowControl_AST_in); + } + ( + #( + DESTRUCTURE_VAR_TYPE + #( + "as" + { + SequenceType mapVarType = new SequenceType(); + while (mapTypedIdx < mapHasType.length && !mapHasType[mapTypedIdx]) mapTypedIdx++; + } + sequenceType [mapVarType] + { + if (mapTypedIdx < mapClause.destructureVarTypes.size()) { + mapClause.destructureVarTypes.set(mapTypedIdx, mapVarType); + } + mapTypedIdx++; + } + ) + ) + )* + ( + #( + "as" + { mapClause.sequenceType = new SequenceType(); } + sequenceType [mapClause.sequenceType] + ) + )? + step=expr [mapInput] + { + mapClause.inputSequence = mapInput; + clauses.add(mapClause); + } + ) + )+ ) | - #( - co:"count" - countVarName:VARIABLE_BINDING - { - ForLetClause clause = new ForLetClause(); - clause.ast = co; - try { - clause.varName = QName.parse(staticContext, countVarName.getText(), null); - } catch (final IllegalQNameException iqe) { - throw new XPathException(countVarName.getLine(), countVarName.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + countVarName.getText()); + #( + wc:"window" + { + ForLetClause clause= new ForLetClause(); + clause.type = FLWORClause.ClauseType.WINDOW; + clause.windowConditions = new ArrayList(2); + final DistinctVariableNames distinctVariableNames = new DistinctVariableNames(); + } + ( + "tumbling" + { + clause.windowType = WindowExpr.WindowType.TUMBLING_WINDOW; } - clause.type = FLWORClause.ClauseType.COUNT; - clause.inputSequence = null; - clauses.add(clause); - } - ) - )+ - step=expr [(PathExpr) action] - { - for (int i= clauses.size() - 1; i >= 0; i--) { - ForLetClause clause= (ForLetClause) clauses.get(i); - FLWORClause expr; - switch (clause.type) { - case LET: - expr = new LetExpr(context); - expr.setASTNode(expr_AST_in); - break; - case GROUPBY: - expr = new GroupByClause(context); - break; - case ORDERBY: - expr = new OrderByClause(context, clause.orderSpecs); - break; - case WHERE: - expr = new WhereClause(context, new DebuggableExpression(clause.inputSequence)); - break; - case COUNT: - expr = new CountClause(context, clause.varName); - break; - case WINDOW: - expr = new WindowExpr(context, clause.windowType, clause.windowConditions.get(0), clause.windowConditions.size() > 1 ? clause.windowConditions.get(1) : null); - break; - default: - expr = new ForExpr(context, clause.allowEmpty); - break; - } - expr.setASTNode(clause.ast); - if (clause.type == FLWORClause.ClauseType.FOR || clause.type == FLWORClause.ClauseType.LET - || clause.type == FLWORClause.ClauseType.WINDOW) { - final BindingExpression bind = (BindingExpression)expr; - bind.setVariable(clause.varName); - bind.setSequenceType(clause.sequenceType); - bind.setInputSequence(clause.inputSequence); - if (clause.type == FLWORClause.ClauseType.FOR) { - ((ForExpr) bind).setPositionalVariable(clause.posVar); - } - } else if (clause.type == FLWORClause.ClauseType.GROUPBY) { - if (clause.groupSpecs != null) { - GroupSpec specs[] = new GroupSpec[clause.groupSpecs.size()]; - int k = 0; - for (GroupSpec groupSpec : clause.groupSpecs) { - specs[k++]= groupSpec; - } - ((GroupByClause)expr).setGroupSpecs(specs); - } - } - if (!(action instanceof FLWORClause)) - expr.setReturnExpression(new DebuggableExpression(action)); - else { - expr.setReturnExpression(action); - ((FLWORClause)action).setPreviousClause(expr); - } - - action= expr; - } - - path.add(action); - step = action; - } - ) - | - // instance of: - #( - "instance" - { - PathExpr expr = new PathExpr(context); - expr.setASTNode(expr_AST_in); - SequenceType type= new SequenceType(); - } - step=expr [expr] - sequenceType [type] - { - step = new InstanceOfExpression(context, expr, type); - step.setASTNode(expr_AST_in); - path.add(step); - } - ) - | - // treat as: - #( - "treat" - { - PathExpr expr = new PathExpr(context); - expr.setASTNode(expr_AST_in); - SequenceType type= new SequenceType(); - } - step=expr [expr] - sequenceType [type] - { - step = new TreatAsExpression(context, expr, type); - step.setASTNode(expr_AST_in); - path.add(step); - } - ) - | - // switch - #( - switchAST:"switch" - { - PathExpr operand = new PathExpr(context); - operand.setASTNode(expr_AST_in); - } - step=expr [operand] - { - SwitchExpression switchExpr = new SwitchExpression(context, operand); - switchExpr.setASTNode(switchAST); - path.add(switchExpr); - } - ( - { - List caseOperands = new ArrayList(2); - PathExpr returnExpr = new PathExpr(context); - returnExpr.setASTNode(expr_AST_in); - } - (( - { - PathExpr caseOperand = new PathExpr(context); - caseOperand.setASTNode(expr_AST_in); - } - "case" - expr [caseOperand] - { caseOperands.add(caseOperand); } - )+ - #( - "return" - step= expr [returnExpr] - { switchExpr.addCase(caseOperands, returnExpr); } - )) - )+ - ( - "default" - { - PathExpr returnExpr = new PathExpr(context); - returnExpr.setASTNode(expr_AST_in); - } - step=expr [returnExpr] - { - switchExpr.setDefault(returnExpr); - } - ) - { step = switchExpr; } - ) - | - // typeswitch - #( - "typeswitch" - { - PathExpr operand = new PathExpr(context); - operand.setASTNode(expr_AST_in); - } - step=expr [operand] - { - TypeswitchExpression tswitch = new TypeswitchExpression(context, operand); - tswitch.setASTNode(expr_AST_in); - path.add(tswitch); - } - ( - { - PathExpr returnExpr = new PathExpr(context); - returnExpr.setASTNode(expr_AST_in); - QName qn = null; - List types = new ArrayList(2); - SequenceType type = new SequenceType(); - } + | + "sliding" + { + clause.windowType = WindowExpr.WindowType.SLIDING_WINDOW; + } + )? + // invarBinding + ( + #( + windowWarName:VARIABLE_BINDING + { + clause.ast = windowWarName; + PathExpr inputSequence= new PathExpr(context); + } + ( + #( + "as" + { clause.sequenceType= new SequenceType(); } + sequenceType [clause.sequenceType] + ) + )? + step=expr [inputSequence] + { + try { + clause.varName = distinctVariableNames.check(ErrorCodes.XQST0103, windowWarName, QName.parse(staticContext, windowWarName.getText(), null)); + } catch (final IllegalQNameException iqe) { + throw new XPathException(windowWarName.getLine(), windowWarName.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + windowWarName.getText()); + } + clause.inputSequence= inputSequence; + clauses.add(clause); + } + ) + ) + // windowStartCondition + #( + "start" + { + PathExpr whenExpr = new PathExpr(context); + QName currentItemName = null; + QName previousItemName = null; + QName nextItemName = null; + QName windowStartPosVar = null; + } + #( + // WINDOW_VARS + WINDOW_VARS + ( + currentItem:CURRENT_ITEM + { + if (currentItem != null && currentItem.getText() != null) { + try { + currentItemName = distinctVariableNames.check(ErrorCodes.XQST0103, currentItem, QName.parse(staticContext, currentItem.getText())); + } catch (final IllegalQNameException iqe) { + throw new XPathException(currentItem.getLine(), currentItem.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + currentItem.getText()); + } + } + } + )? + ( + startPosVar:POSITIONAL_VAR + { + try { + windowStartPosVar = distinctVariableNames.check(ErrorCodes.XQST0103, startPosVar, QName.parse(staticContext, startPosVar.getText(), null)); + } catch (final IllegalQNameException iqe) { + throw new XPathException(startPosVar.getLine(), startPosVar.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + startPosVar.getText()); + } + } + )? + ( + previousItem:PREVIOUS_ITEM + { + if (previousItem != null && previousItem.getText() != null) { + try { + previousItemName = distinctVariableNames.check(ErrorCodes.XQST0103, previousItem, QName.parse(staticContext, previousItem.getText())); + } catch (final IllegalQNameException iqe) { + throw new XPathException(previousItem.getLine(), previousItem.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + previousItem.getText()); + } + } + } + )? + ( + nextItem:NEXT_ITEM + { + if (nextItem != null && nextItem.getText() != null) { + try { + nextItemName = distinctVariableNames.check(ErrorCodes.XQST0103, nextItem, QName.parse(staticContext, nextItem.getText())); + } catch (final IllegalQNameException iqe) { + throw new XPathException(nextItem.getLine(), nextItem.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + nextItem.getText()); + } + } + } + )? + ) + "when" + step=expr [whenExpr] + { + WindowCondition windowCondition = new WindowCondition( + context, false, currentItemName, windowStartPosVar, previousItemName, nextItemName, whenExpr + ); + clause.windowConditions.add(windowCondition); + } + ) + // windowEndCondition + ( + { + PathExpr endWhenExpr = new PathExpr(context); + QName endCurrentItemName = null; + QName endPreviousItemName = null; + QName endNextItemName = null; + QName windowEndPosVar = null; + Boolean only = false; + } + #( + "end" + ( + "only" + { + only = true; + } + )? + #( + // WINDOW_VARS + WINDOW_VARS + ( + endCurrentItem:CURRENT_ITEM + { + if (endCurrentItem != null && endCurrentItem.getText() != null) { + try { + endCurrentItemName = distinctVariableNames.check(ErrorCodes.XQST0103, endCurrentItem, QName.parse(staticContext, endCurrentItem.getText())); + } catch (final IllegalQNameException iqe) { + throw new XPathException(endCurrentItem.getLine(), endCurrentItem.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + endCurrentItem.getText()); + } + } + } + )? + ( + endPosVar:POSITIONAL_VAR + { + try { + windowEndPosVar = distinctVariableNames.check(ErrorCodes.XQST0103, endPosVar, QName.parse(staticContext, endPosVar.getText(), null)); + } catch (final IllegalQNameException iqe) { + throw new XPathException(endPosVar.getLine(), endPosVar.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + endPosVar.getText()); + } + } + )? + ( + endPreviousItem:PREVIOUS_ITEM + { + if (endPreviousItem != null && endPreviousItem.getText() != null) { + try { + endPreviousItemName = distinctVariableNames.check(ErrorCodes.XQST0103, endPreviousItem, QName.parse(staticContext, endPreviousItem.getText())); + } catch (final IllegalQNameException iqe) { + throw new XPathException(endPreviousItem.getLine(), endPreviousItem.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + endPreviousItem.getText()); + } + } + } + )? + ( + endNextItem:NEXT_ITEM + { + if (endNextItem != null && endNextItem.getText() != null) { + try { + endNextItemName = distinctVariableNames.check(ErrorCodes.XQST0103, endNextItem, QName.parse(staticContext, endNextItem.getText())); + } catch (final IllegalQNameException iqe) { + throw new XPathException(endNextItem.getLine(), endNextItem.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + endNextItem.getText()); + } + } + } + )? + ) + "when" + step=expr [endWhenExpr] + { + WindowCondition endWindowCondition = new WindowCondition( + context, only, endCurrentItemName, windowEndPosVar, endPreviousItemName, endNextItemName, endWhenExpr + ); + clause.windowConditions.add(endWindowCondition); + } + ) + )? + ) + | + // XQuery 3.0 group by clause + #( + gb:GROUP_BY + { + ForLetClause clause = new ForLetClause(); + clause.ast = gb; + clause.type = FLWORClause.ClauseType.GROUPBY; + clause.groupSpecs = new ArrayList(4); + clauses.add(clause); + } + ( + #( + groupVarName:VARIABLE_BINDING + { PathExpr groupSpecExpr = null; } + ( + ( + #( + "as" + { clause.sequenceType = new SequenceType(); } + sequenceType [clause.sequenceType] + ) + )? + // optional := exprSingle + ( + { + groupSpecExpr = new PathExpr(context); + groupSpecExpr.setASTNode(exprFlowControl_AST_in); + } + step=expr [groupSpecExpr] + ) + )? + { + final QName groupKeyVar; + try { + groupKeyVar = QName.parse(staticContext, groupVarName.getText(), null); + } catch (final IllegalQNameException iqe) { + throw new XPathException(groupVarName.getLine(), groupVarName.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + groupVarName.getText()); + } + + GroupSpec groupSpec = new GroupSpec(context, groupSpecExpr, groupKeyVar, clause.sequenceType); + clause.groupSpecs.add(groupSpec); + } + ( + "collation" groupCollURI:STRING_LITERAL + { + groupSpec.setCollator(groupCollURI.getText()); + } + )? + ) + )+ + ) + | + #( + ob:ORDER_BY { orderBy = new ArrayList(3); } + ( + { + PathExpr orderSpecExpr= new PathExpr(context); + orderSpecExpr.setASTNode(exprFlowControl_AST_in); + } + step=expr [orderSpecExpr] + { + OrderSpec orderSpec= new OrderSpec(context, orderSpecExpr); + int modifiers= 0; + boolean orderDescending = false; + orderBy.add(orderSpec); + + if (!context.orderEmptyGreatest()) { + modifiers |= OrderSpec.EMPTY_LEAST; + orderSpec.setModifiers(modifiers); + } + } + ( + ( + + "ascending" + | + "descending" + { + modifiers |= OrderSpec.DESCENDING_ORDER; + orderSpec.setModifiers(modifiers); + orderDescending = true; + } + ) + )? + ( + "empty" + ( + "greatest" + { + if (!context.orderEmptyGreatest()) + modifiers &= OrderSpec.EMPTY_GREATEST; + if (orderDescending) + modifiers |= OrderSpec.DESCENDING_ORDER; + orderSpec.setModifiers(modifiers); + } + | + "least" + { + modifiers |= OrderSpec.EMPTY_LEAST; + orderSpec.setModifiers(modifiers); + } + ) + )? + ( + "collation" collURI:STRING_LITERAL + { + orderSpec.setCollation(collURI.getText()); + } + )? + )+ + { + ForLetClause clause= new ForLetClause(); + clause.ast = ob; + clause.type = FLWORClause.ClauseType.ORDERBY; + clause.orderSpecs = orderBy; + clauses.add(clause); + } + ) + | #( - "case" - ( - var:VARIABLE_BINDING - { - try { - qn = QName.parse(staticContext, var.getText()); - } catch (final IllegalQNameException iqe) { - throw new XPathException(var.getLine(), var.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + var.getText()); - } - } - )? - ( - sequenceType[type] - { - types.add(type); - type = new SequenceType(); - } - )+ - // Need return as root in following to disambiguate - // e.g. ( case a xs:integer ( * 3 3 ) ) - // which gives xs:integer* and no operator left for 3 3 ... - // Now ( case a xs:integer ( return ( + 3 3 ) ) ) /ljo - #( - "return" - step= expr [returnExpr] - { - SequenceType[] atype = new SequenceType[types.size()]; - atype = types.toArray(atype); - tswitch.addCase(atype, qn, returnExpr); + w:"where" + { + whereExpr= new PathExpr(context); + whereExpr.setASTNode(exprFlowControl_AST_in); + } + step=expr [whereExpr] + { + ForLetClause clause = new ForLetClause(); + clause.ast = w; + clause.type = FLWORClause.ClauseType.WHERE; + clause.inputSequence = whereExpr; + clauses.add(clause); + } + ) + | + #( + wh:"while" + { + if (staticContext.getXQueryVersion() < 40) { + throw new XPathException(wh, ErrorCodes.XPST0003, + "The 'while' clause requires xquery version \"4.0\""); } - ) + PathExpr whileExpr = new PathExpr(context); + whileExpr.setASTNode(exprFlowControl_AST_in); + } + step=expr [whileExpr] + { + ForLetClause clause = new ForLetClause(); + clause.ast = wh; + clause.type = FLWORClause.ClauseType.WHILE; + clause.inputSequence = whileExpr; + clauses.add(clause); + } ) - - )+ - ( - "default" - { - PathExpr returnExpr = new PathExpr(context); - returnExpr.setASTNode(expr_AST_in); - QName qn = null; - } - ( - dvar:VARIABLE_BINDING + | + #( + co:"count" + countVarName:VARIABLE_BINDING { + ForLetClause clause = new ForLetClause(); + clause.ast = co; try { - qn = QName.parse(staticContext, dvar.getText()); + clause.varName = QName.parse(staticContext, countVarName.getText(), null); } catch (final IllegalQNameException iqe) { - throw new XPathException(dvar.getLine(), dvar.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + dvar.getText()); + throw new XPathException(countVarName.getLine(), countVarName.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + countVarName.getText()); } + clause.type = FLWORClause.ClauseType.COUNT; + clause.inputSequence = null; + clauses.add(clause); } - )? - step=expr [returnExpr] - { - tswitch.setDefault(qn, returnExpr); - } - ) - { step = tswitch; } - ) - | - // logical operator: or - #( - "or" - { - PathExpr left= new PathExpr(context); - left.setASTNode(expr_AST_in); - } - step=expr [left] - { - PathExpr right= new PathExpr(context); - right.setASTNode(expr_AST_in); - } - step=expr [right] - ) - { - OpOr or= new OpOr(context); - or.addPath(left); - or.addPath(right); - path.addPath(or); - step = or; - } - | - // logical operator: and - #( - "and" - { - PathExpr left= new PathExpr(context); - left.setASTNode(expr_AST_in); - - PathExpr right= new PathExpr(context); - right.setASTNode(expr_AST_in); - } - step=expr [left] - step=expr [right] - ) - { - OpAnd and= new OpAnd(context); - and.addPath(left); - and.addPath(right); - path.addPath(and); - step = and; - } - | - // union expressions: | and union - #( - UNION - { - PathExpr left= new PathExpr(context); - left.setASTNode(expr_AST_in); - - PathExpr right= new PathExpr(context); - right.setASTNode(expr_AST_in); + ) + )+ + step=expr [(PathExpr) action] + { + for (int i= clauses.size() - 1; i >= 0; i--) { + ForLetClause clause= (ForLetClause) clauses.get(i); + FLWORClause expr; + switch (clause.type) { + case LET: + expr = new LetExpr(context); + expr.setASTNode(exprFlowControl_AST_in); + break; + case GROUPBY: + expr = new GroupByClause(context); + break; + case ORDERBY: + expr = new OrderByClause(context, clause.orderSpecs); + break; + case WHERE: + expr = new WhereClause(context, new DebuggableExpression(clause.inputSequence)); + break; + case WHILE: + expr = new WhileClause(context, new DebuggableExpression(clause.inputSequence)); + break; + case COUNT: + expr = new CountClause(context, clause.varName); + break; + case WINDOW: + expr = new WindowExpr(context, clause.windowType, clause.windowConditions.get(0), clause.windowConditions.size() > 1 ? clause.windowConditions.get(1) : null); + break; + case FOR_MEMBER: + expr = new ForMemberExpr(context); + break; + case FOR_KEY: + expr = new ForKeyValueExpr(context, FLWORClause.ClauseType.FOR_KEY); + break; + case FOR_VALUE: + expr = new ForKeyValueExpr(context, FLWORClause.ClauseType.FOR_VALUE); + break; + case FOR_KEY_VALUE: + expr = new ForKeyValueExpr(context, FLWORClause.ClauseType.FOR_KEY_VALUE); + break; + case LET_SEQ_DESTRUCTURE: + case LET_ARRAY_DESTRUCTURE: + case LET_MAP_DESTRUCTURE: + { + LetDestructureExpr.DestructureMode dmode; + if (clause.type == FLWORClause.ClauseType.LET_SEQ_DESTRUCTURE) { + dmode = LetDestructureExpr.DestructureMode.SEQUENCE; + } else if (clause.type == FLWORClause.ClauseType.LET_ARRAY_DESTRUCTURE) { + dmode = LetDestructureExpr.DestructureMode.ARRAY; + } else { + dmode = LetDestructureExpr.DestructureMode.MAP; + } + LetDestructureExpr dexpr = new LetDestructureExpr(context, dmode); + dexpr.setASTNode(clause.ast); + for (int j = 0; j < clause.destructureVarNames.size(); j++) { + dexpr.addVariable( + (QName) clause.destructureVarNames.get(j), + clause.destructureVarTypes.size() > j ? + (SequenceType) clause.destructureVarTypes.get(j) : null); + } + dexpr.setInputSequence(clause.inputSequence); + if (clause.sequenceType != null) { + dexpr.setOverallType(clause.sequenceType); + } + expr = dexpr; + break; + } + default: + expr = new ForExpr(context, clause.allowEmpty); + break; + } + expr.setASTNode(clause.ast); + if (clause.type == FLWORClause.ClauseType.FOR || clause.type == FLWORClause.ClauseType.LET + || clause.type == FLWORClause.ClauseType.WINDOW + || clause.type == FLWORClause.ClauseType.FOR_MEMBER + || clause.type == FLWORClause.ClauseType.FOR_KEY + || clause.type == FLWORClause.ClauseType.FOR_VALUE + || clause.type == FLWORClause.ClauseType.FOR_KEY_VALUE) { + final BindingExpression bind = (BindingExpression)expr; + bind.setVariable(clause.varName); + bind.setSequenceType(clause.sequenceType); + bind.setInputSequence(clause.inputSequence); + if (clause.type == FLWORClause.ClauseType.FOR) { + ((ForExpr) bind).setPositionalVariable(clause.posVar); + if (clause.scoreVar != null) { + ((ForExpr) bind).setScoreVariable(clause.scoreVar); + } + } else if (clause.type == FLWORClause.ClauseType.FOR_MEMBER) { + ((ForMemberExpr) bind).setPositionalVariable(clause.posVar); + } else if (clause.type == FLWORClause.ClauseType.FOR_KEY + || clause.type == FLWORClause.ClauseType.FOR_VALUE + || clause.type == FLWORClause.ClauseType.FOR_KEY_VALUE) { + ((ForKeyValueExpr) bind).setPositionalVariable(clause.posVar); + if (clause.valueVarName != null) { + ((ForKeyValueExpr) bind).setValueVariable(clause.valueVarName); + if (clause.valueSequenceType != null) { + ((ForKeyValueExpr) bind).setValueSequenceType(clause.valueSequenceType); + } + } + } + if (clause.type == FLWORClause.ClauseType.LET && clause.scoreVar != null) { + ((LetExpr) bind).setScoreBinding(true); + } + } else if (clause.type == FLWORClause.ClauseType.GROUPBY) { + if (clause.groupSpecs != null) { + GroupSpec specs[] = new GroupSpec[clause.groupSpecs.size()]; + int k = 0; + for (GroupSpec groupSpec : clause.groupSpecs) { + specs[k++]= groupSpec; + } + ((GroupByClause)expr).setGroupSpecs(specs); + } + } + if (!(action instanceof FLWORClause)) + expr.setReturnExpression(new DebuggableExpression(action)); + else { + expr.setReturnExpression(action); + ((FLWORClause)action).setPreviousClause(expr); } - step=expr [left] - step=expr [right] - ) - { - Union union= new Union(context, left, right); - path.add(union); - step = union; - } - | - // intersections: - #( "intersect" - { - PathExpr left = new PathExpr(context); - left.setASTNode(expr_AST_in); - PathExpr right = new PathExpr(context); - right.setASTNode(expr_AST_in); - } - step=expr [left] - step=expr [right] - ) - { - Intersect intersect = new Intersect(context, left, right); - path.add(intersect); - step = intersect; - } - | - #( "except" - { - PathExpr left = new PathExpr(context); - left.setASTNode(expr_AST_in); + action= expr; + } - PathExpr right = new PathExpr(context); - right.setASTNode(expr_AST_in); - } - step=expr [left] - step=expr [right] - ) - { - Except intersect = new Except(context, left, right); - path.add(intersect); - step = intersect; - } - | - // absolute path expression starting with a / - #( - ABSOLUTE_SLASH - { - RootNode root= new RootNode(context); - path.add(root); - } - ( step=expr [path] )? - ) - | - // absolute path expression starting with // - #( - ABSOLUTE_DSLASH - { - RootNode root= new RootNode(context); - path.add(root); + path.add(action); + step = action; } - ( - step=expr [path] - { - if (step instanceof LocationStep) { - LocationStep s= (LocationStep) step; - if (s.getAxis() == Constants.ATTRIBUTE_AXIS || - (s.getTest().getType() == Type.ATTRIBUTE && s.getAxis() == Constants.CHILD_AXIS)) - // combines descendant-or-self::node()/attribute:* - s.setAxis(Constants.DESCENDANT_ATTRIBUTE_AXIS); - else { - s.setAxis(Constants.DESCENDANT_SELF_AXIS); - s.setAbbreviated(true); - } - } else - step.setPrimaryAxis(Constants.DESCENDANT_SELF_AXIS); - } - )? ) | - // range expression: to + // instance of: #( - "to" + "instance" { - PathExpr start= new PathExpr(context); - start.setASTNode(expr_AST_in); - - PathExpr end= new PathExpr(context); - end.setASTNode(expr_AST_in); - - List args= new ArrayList(2); - args.add(start); - args.add(end); + PathExpr expr = new PathExpr(context); + expr.setASTNode(exprFlowControl_AST_in); + SequenceType type= new SequenceType(); } - step=expr [start] - step=expr [end] + step=expr [expr] + sequenceType [type] { - RangeExpression range= new RangeExpression(context); - range.setASTNode(expr_AST_in); - range.setArguments(args); - path.addPath(range); - step = range; + step = new InstanceOfExpression(context, expr, type); + step.setASTNode(exprFlowControl_AST_in); + path.add(step); } ) - | - step=generalComp [path] - | - step=valueComp [path] - | - step=nodeComp [path] - | - step=primaryExpr [path] - | - step=pathExpr [path] - | - step=extensionExpr [path] - | - step=numericExpr [path] - | - step=updateExpr [path] ; /** @@ -2495,14 +3530,67 @@ throws PermissionDeniedException, EXistException, XPathException step=postfixExpr [step] { path.add(step); } | + ql:QNAME_LITERAL + { + final String qlText = ql.getText(); + final QName qlQName; + try { + qlQName = QName.parse(staticContext, qlText); + } catch (final IllegalQNameException iqe) { + throw new XPathException(ql.getLine(), ql.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + qlText); + } + step = new LiteralValue(context, new QNameValue(context, qlQName)); + step.setASTNode(ql); + } + step=postfixExpr [step] + { path.add(step); } + | step=inlineFunctionDecl [path] step=postfixExpr [step] { path.add(step); } | + step=focusFunctionDecl [path] + step=postfixExpr [step] + { path.add(step); } + | step = lookup [null] step=postfixExpr [step] { path.add(step); } | + #( + stAST:STRING_TEMPLATE + { + if (staticContext.getXQueryVersion() < 40) { + throw new XPathException(stAST, ErrorCodes.XPST0003, + "String templates require xquery version \"4.0\""); + } + StringConstructor st = new StringConstructor(context); + st.setASTNode(stAST); + } + ( + stContent:STRING_TEMPLATE_CONTENT + { + // Unescape {{ -> {, }} -> }, `` -> ` + String raw = stContent.getText(); + raw = raw.replace("{{", "{").replace("}}", "}").replace("``", "`"); + st.addContent(raw); + } + | + { + PathExpr stInterpolation = new PathExpr(context); + stInterpolation.setASTNode(primaryExpr_AST_in); + } + expr[stInterpolation] + { + st.addInterpolation(stInterpolation.simplify()); + } + )* + { + path.add(st); + step = st; + } + ) + | #( scAST:STRING_CONSTRUCTOR_START { @@ -2764,6 +3852,72 @@ throws PermissionDeniedException, EXistException, XPathException | #( "schema-element" EQNAME ) )? + // === XQuery 4.0 JNode Kind Tests in path steps (version-gated) === + | + jn1:JSON_NODE_TEST + { + if (staticContext.getXQueryVersion() < 40) { + throw new XPathException(jn1, ErrorCodes.XPST0003, "json-node() requires xquery version \"4.0\""); + } + test = new TypeTest(Type.JSON_NODE); ast = jn1; + } + | + jn2:JSON_OBJECT_TEST + { + if (staticContext.getXQueryVersion() < 40) { + throw new XPathException(jn2, ErrorCodes.XPST0003, "object-node() requires xquery version \"4.0\""); + } + test = new TypeTest(Type.JSON_OBJECT); ast = jn2; + } + | + jn3:JSON_ARRAY_TEST + { + if (staticContext.getXQueryVersion() < 40) { + throw new XPathException(jn3, ErrorCodes.XPST0003, "array-node() requires xquery version \"4.0\""); + } + test = new TypeTest(Type.JSON_ARRAY); ast = jn3; + } + | + jn4:JSON_STRING_TEST + { + if (staticContext.getXQueryVersion() < 40) { + throw new XPathException(jn4, ErrorCodes.XPST0003, "string-node() requires xquery version \"4.0\""); + } + test = new TypeTest(Type.JSON_STRING); ast = jn4; + } + | + jn5:JSON_NUMBER_TEST + { + if (staticContext.getXQueryVersion() < 40) { + throw new XPathException(jn5, ErrorCodes.XPST0003, "number-node() requires xquery version \"4.0\""); + } + test = new TypeTest(Type.JSON_NUMBER); ast = jn5; + } + | + jn6:JSON_BOOLEAN_TEST + { + if (staticContext.getXQueryVersion() < 40) { + throw new XPathException(jn6, ErrorCodes.XPST0003, "boolean-node() requires xquery version \"4.0\""); + } + test = new TypeTest(Type.JSON_BOOLEAN); ast = jn6; + } + | + jn7:JSON_NULL_TEST + { + if (staticContext.getXQueryVersion() < 40) { + throw new XPathException(jn7, ErrorCodes.XPST0003, "null-node() requires xquery version \"4.0\""); + } + test = new TypeTest(Type.JSON_NULL); ast = jn7; + } + | + jn8:JSON_MEMBER_TEST + { + if (staticContext.getXQueryVersion() < 40) { + throw new XPathException(jn8, ErrorCodes.XPST0003, "member-node() requires xquery version \"4.0\""); + } + test = new TypeTest(Type.JSON_MEMBER); ast = jn8; + } + // === End XQuery 4.0 JNode Kind Tests === ) { step= new LocationStep(context, axis, test); @@ -2948,6 +4102,9 @@ throws PermissionDeniedException, EXistException, XPathException | #( SLASH step=expr [path] + { + path.setHasSlash(); + } ( rightStep=expr [path] { @@ -2972,6 +4129,9 @@ throws PermissionDeniedException, EXistException, XPathException | #( DSLASH step=expr [path] + { + path.setHasSlash(); + } ( rightStep=expr [path] { @@ -2984,6 +4144,13 @@ throws PermissionDeniedException, EXistException, XPathException rs.setAxis(Constants.DESCENDANT_AXIS); } else if (rs.getAxis() == Constants.SELF_AXIS) { rs.setAxis(Constants.DESCENDANT_SELF_AXIS); + } else if (rs.getAxis() <= Constants.PRECEDING_SIBLING_AXIS) { + // Reverse axis: cannot merge with descendant-or-self, + // insert explicit descendant-or-self::node() step before the reverse axis step + LocationStep descStep = new LocationStep(context, Constants.DESCENDANT_SELF_AXIS, new TypeTest(Type.NODE)); + descStep.setAbbreviated(true); + path.replaceLastExpression(descStep); + path.add(rightStep); } else { rs.setAxis(Constants.DESCENDANT_SELF_AXIS); rs.setAbbreviated(true); @@ -3032,21 +4199,30 @@ throws XPathException | i:INTEGER_LITERAL { - step= new LiteralValue(context, new IntegerValue(i.getText())); + String itext = i.getText().replace("_", ""); + java.math.BigInteger intVal; + if (itext.startsWith("0x") || itext.startsWith("0X")) { + intVal = new java.math.BigInteger(itext.substring(2), 16); + } else if (itext.startsWith("0b") || itext.startsWith("0B")) { + intVal = new java.math.BigInteger(itext.substring(2), 2); + } else { + intVal = new java.math.BigInteger(itext); + } + step= new LiteralValue(context, new IntegerValue(intVal)); step.setASTNode(i); } | ( dec:DECIMAL_LITERAL { - step= new LiteralValue(context, new DecimalValue(dec.getText())); + step= new LiteralValue(context, new DecimalValue(dec.getText().replace("_", ""))); step.setASTNode(dec); } | dbl:DOUBLE_LITERAL { step= new LiteralValue(context, - new DoubleValue(Double.parseDouble(dbl.getText()))); + new DoubleValue(Double.parseDouble(dbl.getText().replace("_", "")))); step.setASTNode(dbl); } ) @@ -3145,6 +4321,21 @@ throws PermissionDeniedException, EXistException, XPathException ( step = lookup [step] | + step = filterExprAM [step] + | + #( + fam:FILTER_AM + { + PathExpr filterPred = new PathExpr(context); + filterPred.setASTNode(postfixExpr_AST_in); + } + expr [filterPred] + { + step = new FilterExprAM(context, step, filterPred.simplify()); + step.setASTNode(fam); + } + ) + | #( PREDICATE { @@ -3206,6 +4397,24 @@ throws PermissionDeniedException, EXistException, XPathException ) ; +// === XQuery 4.0: Array/Map Filter Expression (?[expr]) === +filterExprAM [Expression leftExpr] +returns [Expression step] +throws PermissionDeniedException, EXistException, XPathException +: + #( + filterAM:FILTER_AM + { + PathExpr predExpr = new PathExpr(context); + } + ( expr [predExpr] )+ + { + step = new FilterExprAM(context, leftExpr, predExpr); + step.setASTNode(filterAM); + } + ) + ; + lookup [Expression leftExpr] returns [Expression step] throws PermissionDeniedException, EXistException, XPathException @@ -3220,6 +4429,55 @@ throws PermissionDeniedException, EXistException, XPathException ( pos:INTEGER_VALUE { position = Integer.parseInt(pos.getText()); } | + // XQ4: string literal as key selector (?"first value") + strKey:STRING_LITERAL + { + lookupExpr.add(new LiteralValue(context, new StringValue(strKey.getText()))); + } + | + // XQ4: decimal literal as key selector (?1.2) + decKey:DECIMAL_LITERAL + { + lookupExpr.add(new LiteralValue(context, new DecimalValue(decKey.getText().replace("_", "")))); + } + | + // XQ4: double literal as key selector (?1.2e0) + dblKey:DOUBLE_LITERAL + { + lookupExpr.add(new LiteralValue(context, new DoubleValue(Double.parseDouble(dblKey.getText().replace("_", ""))))); + } + | + // XQ4: variable reference as key selector (?$var) + varKey:VARIABLE_REF + { + final QName varQn; + try { + varQn = QName.parse(staticContext, varKey.getText(), null); + } catch (final IllegalQNameException iqe) { + throw new XPathException(varKey.getLine(), varKey.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + varKey.getText()); + } + lookupExpr.add(new VariableReference(context, varQn)); + } + | + // XQ4: context item as key selector (?.) + ctxKey:SELF + { + lookupExpr.add(new ContextItemExpression(context)); + } + | + // XQ4: QName literal as key selector (?#name) + qnKey:QNAME_LITERAL + { + final String qnText = qnKey.getText(); + final QName qnQName; + try { + qnQName = QName.parse(staticContext, qnText); + } catch (final IllegalQNameException iqe) { + throw new XPathException(qnKey.getLine(), qnKey.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + qnText); + } + lookupExpr.add(new LiteralValue(context, new QNameValue(context, qnQName))); + } + | ( expr [lookupExpr] )+ )? { @@ -3262,6 +4520,33 @@ throws PermissionDeniedException, EXistException, XPathException isPartial = true; } | + #( + kw:KEYWORD_ARG + { + if (staticContext.getXQueryVersion() < 40) { + throw new XPathException(kw, ErrorCodes.XPST0003, + "Keyword arguments require xquery version \"4.0\""); + } + } + ( + QUESTION { + // Keyword argument with placeholder value: name := ? + params.add(new KeywordArgumentExpression(context, kw.getText(), + new Function.Placeholder(context))); + isPartial = true; + } + | + { + PathExpr kwExpr = new PathExpr(context); + kwExpr.setASTNode(functionCall_AST_in); + } + expr [kwExpr] + { + params.add(new KeywordArgumentExpression(context, kw.getText(), kwExpr)); + } + ) + ) + | expr [pathExpr] { params.add(pathExpr); } ) )* @@ -3296,7 +4581,7 @@ throws PermissionDeniedException, EXistException, XPathException } catch (final IllegalQNameException iqe) { throw new XPathException(name.getLine(), name.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + name.getText()); } - NamedFunctionReference ref = new NamedFunctionReference(context, qname, Integer.parseInt(arity.getText())); + NamedFunctionReference ref = new NamedFunctionReference(context, qname, Integer.parseInt(arity.getText().replace("_", ""))); step = ref; } ) @@ -3318,17 +4603,33 @@ throws PermissionDeniedException, EXistException | "descendant-or-self" { axis= Constants.DESCENDANT_SELF_AXIS; } | + "following-sibling-or-self" { axis= Constants.FOLLOWING_SIBLING_OR_SELF_AXIS; } + | "following-sibling" { axis= Constants.FOLLOWING_SIBLING_AXIS; } | + "following-or-self" { axis= Constants.FOLLOWING_OR_SELF_AXIS; } + | "following" { axis= Constants.FOLLOWING_AXIS; } | + "preceding-sibling-or-self" { axis= Constants.PRECEDING_SIBLING_OR_SELF_AXIS; } + | "preceding-sibling" { axis= Constants.PRECEDING_SIBLING_AXIS; } | + "preceding-or-self" { axis= Constants.PRECEDING_OR_SELF_AXIS; } + | "preceding" { axis= Constants.PRECEDING_AXIS; } | "ancestor" { axis= Constants.ANCESTOR_AXIS; } | "ancestor-or-self" { axis= Constants.ANCESTOR_SELF_AXIS; } + | + "following-or-self" { axis= Constants.FOLLOWING_OR_SELF_AXIS; } + | + "preceding-or-self" { axis= Constants.PRECEDING_OR_SELF_AXIS; } + | + "following-sibling-or-self" { axis= Constants.FOLLOWING_SIBLING_OR_SELF_AXIS; } + | + "preceding-sibling-or-self" { axis= Constants.PRECEDING_SIBLING_OR_SELF_AXIS; } ; valueComp [PathExpr path] @@ -3394,130 +4695,737 @@ throws PermissionDeniedException, EXistException, XPathException ) | #( - ge:"ge" step=expr [left] - step=expr [right] + ge:"ge" step=expr [left] + step=expr [right] + { + step= new ValueComparison(context, left, right, Comparison.GTEQ); + step.setASTNode(ge); + path.add(step); + } + ) + ; + +generalComp [PathExpr path] +returns [Expression step] +throws PermissionDeniedException, EXistException, XPathException +{ + step= null; + + PathExpr left= new PathExpr(context); + left.setASTNode(generalComp_AST_in); + + PathExpr right= new PathExpr(context); + right.setASTNode(generalComp_AST_in); + +} +: + #( + eq:EQ step=expr [left] + step=expr [right] + { + step= new GeneralComparison(context, left, right, Comparison.EQ); + step.setASTNode(eq); + path.add(step); + } + ) + | + #( + neq:NEQ step=expr [left] + step=expr [right] + { + step= new GeneralComparison(context, left, right, Comparison.NEQ); + step.setASTNode(neq); + path.add(step); + } + ) + | + #( + lt:LT step=expr [left] + step=expr [right] + { + step= new GeneralComparison(context, left, right, Comparison.LT); + step.setASTNode(lt); + path.add(step); + } + ) + | + #( + lteq:LTEQ step=expr [left] + step=expr [right] + { + step= new GeneralComparison(context, left, right, Comparison.LTEQ); + step.setASTNode(lteq); + path.add(step); + } + ) + | + #( + gt:GT step=expr [left] + step=expr [right] + { + step= new GeneralComparison(context, left, right, Comparison.GT); + step.setASTNode(gt); + path.add(step); + } + ) + | + #( + gteq:GTEQ step=expr [left] + step=expr [right] + { + step= new GeneralComparison(context, left, right, Comparison.GTEQ); + step.setASTNode(gteq); + path.add(step); + } + ) + ; + +nodeComp [PathExpr path] +returns [Expression step] +throws PermissionDeniedException, EXistException, XPathException +{ + step= null; + + PathExpr left= new PathExpr(context); + left.setASTNode(nodeComp_AST_in); + + PathExpr right= new PathExpr(context); + right.setASTNode(nodeComp_AST_in); + +} +: + #( + is:"is" step=expr [left] step=expr [right] + { + step = new NodeComparison(context, left, right, NodeComparisonOperator.IS); + step.setASTNode(is); + path.add(step); + } + ) + | + #( + before:BEFORE step=expr[left] step=expr[right] + { + step = new NodeComparison(context, left, right, NodeComparisonOperator.BEFORE); + step.setASTNode(before); + path.add(step); + } + ) + | + #( + after:AFTER step=expr[left] step=expr[right] + { + step = new NodeComparison(context, left, right, NodeComparisonOperator.AFTER); + step.setASTNode(after); + path.add(step); + } + ) + | + // XQuery 4.0 node comparison operators + #( + isnot:"is-not" step=expr[left] step=expr[right] + { + step = new NodeComparison(context, left, right, NodeComparisonOperator.IS_NOT); + step.setASTNode(isnot); + path.add(step); + } + ) + | + #( + foi:"follows-or-is" step=expr[left] step=expr[right] + { + step = new NodeComparison(context, left, right, NodeComparisonOperator.FOLLOWS_OR_IS); + step.setASTNode(foi); + path.add(step); + } + ) + | + #( + poi:"precedes-or-is" step=expr[left] step=expr[right] + { + step = new NodeComparison(context, left, right, NodeComparisonOperator.PRECEDES_OR_IS); + step.setASTNode(poi); + path.add(step); + } + ) + ; + +// === Full Text (W3C XQuery and XPath Full Text 3.0) === + +ftContainsExpr [PathExpr path] +returns [Expression step] +throws PermissionDeniedException, EXistException, XPathException +{ + step = null; + PathExpr source = new PathExpr(context); + source.setASTNode(ftContainsExpr_AST_in); + FTSelection ftSel = null; + Expression ignoreExpr = null; +} +: + #( + ft:FT_CONTAINS + step=expr [source] + ftSel=ftSelectionExpr + ( ignoreExpr=ftIgnoreExpr )? + { + FTContainsExpr ftContains = new FTContainsExpr(context); + ftContains.setASTNode(ft); + ftContains.setSearchSource(source); + ftContains.setFTSelection(ftSel); + ftContains.setIgnoreExpr(ignoreExpr); + path.add(ftContains); + step = ftContains; + } + ) + ; + +ftSelectionExpr +returns [FTSelection ftSel] +throws PermissionDeniedException, EXistException, XPathException +{ + ftSel = new FTSelection(context); + ftSel.setASTNode(ftSelectionExpr_AST_in); + Expression ftOr = null; + Expression posFilter = null; +} +: + #( + FT_SELECTION + ftOr=ftOrExpr + { ftSel.setFTOr(ftOr); } + ( posFilter=ftPosFilterExpr { ftSel.addPosFilter(posFilter); } )* + ) + ; + +ftOrExpr +returns [Expression step] +throws PermissionDeniedException, EXistException, XPathException +{ + step = null; + Expression operand = null; + FTOr ftOr = null; +} +: + #( + FT_OR + { + ftOr = new FTOr(context); + ftOr.setASTNode(ftOrExpr_AST_in); + } + ( operand=ftAndExpr { ftOr.addOperand(operand); } )+ + { step = ftOr; } + ) + | + step=ftAndExpr + ; + +ftAndExpr +returns [Expression step] +throws PermissionDeniedException, EXistException, XPathException +{ + step = null; + Expression operand = null; + FTAnd ftAnd = null; +} +: + #( + FT_AND + { + ftAnd = new FTAnd(context); + ftAnd.setASTNode(ftAndExpr_AST_in); + } + ( operand=ftMildNotExpr { ftAnd.addOperand(operand); } )+ + { step = ftAnd; } + ) + | + step=ftMildNotExpr + ; + +ftMildNotExpr +returns [Expression step] +throws PermissionDeniedException, EXistException, XPathException +{ + step = null; + Expression operand = null; + FTMildNot ftMildNot = null; +} +: + #( + FT_MILD_NOT + { + ftMildNot = new FTMildNot(context); + ftMildNot.setASTNode(ftMildNotExpr_AST_in); + } + ( operand=ftUnaryNotExpr { ftMildNot.addOperand(operand); } )+ + { step = ftMildNot; } + ) + | + step=ftUnaryNotExpr + ; + +ftUnaryNotExpr +returns [Expression step] +throws PermissionDeniedException, EXistException, XPathException +{ + step = null; + Expression operand = null; +} +: + #( + FT_UNARY_NOT + operand=ftPrimaryWithOptionsExpr + { + FTUnaryNot ftNot = new FTUnaryNot(context); + ftNot.setASTNode(ftUnaryNotExpr_AST_in); + ftNot.setOperand(operand); + step = ftNot; + } + ) + | + step=ftPrimaryWithOptionsExpr + ; + +ftPrimaryWithOptionsExpr +returns [Expression step] +throws PermissionDeniedException, EXistException, XPathException +{ + step = null; + Expression primary = null; + FTMatchOptions matchOpts = null; + Expression weightExpr = null; +} +: + #( + FT_PRIMARY_WITH_OPTIONS + primary=ftPrimaryExpr + ( matchOpts=ftMatchOptionsExpr )? + ( weightExpr=ftWeightExpr )? { - step= new ValueComparison(context, left, right, Comparison.GTEQ); - step.setASTNode(ge); - path.add(step); + FTPrimaryWithOptions pwo = new FTPrimaryWithOptions(context); + pwo.setASTNode(ftPrimaryWithOptionsExpr_AST_in); + pwo.setPrimary(primary); + pwo.setMatchOptions(matchOpts); + pwo.setWeight(weightExpr); + step = pwo; } ) + | + step=ftPrimaryExpr ; -generalComp [PathExpr path] +ftPrimaryExpr returns [Expression step] throws PermissionDeniedException, EXistException, XPathException { - step= null; - - PathExpr left= new PathExpr(context); - left.setASTNode(generalComp_AST_in); - - PathExpr right= new PathExpr(context); - right.setASTNode(generalComp_AST_in); + step = null; +} +: + step=ftWordsExpr + | + step=ftSelectionExpr + | + step=ftExtensionSelectionExpr + ; +ftWordsExpr +returns [Expression step] +throws PermissionDeniedException, EXistException, XPathException +{ + step = null; + PathExpr wordsValue = new PathExpr(context); + FTWords.AnyallMode mode = FTWords.AnyallMode.ANY; + FTTimes ftTimes = null; } : #( - eq:EQ step=expr [left] - step=expr [right] + FT_WORDS + step=expr [wordsValue] + ( aa:FT_ANYALL_OPTION { mode = FTWords.AnyallMode.fromString(aa.getText()); } )? + ( ftTimes=ftTimesExpr )? { - step= new GeneralComparison(context, left, right, Comparison.EQ); - step.setASTNode(eq); - path.add(step); + FTWords ftWords = new FTWords(context); + ftWords.setASTNode(ftWordsExpr_AST_in); + ftWords.setWordsValue(wordsValue); + ftWords.setMode(mode); + ftWords.setFTTimes(ftTimes); + step = ftWords; } ) - | + ; + +// XQFT 3.0 3.4.8: FTExtensionSelection -- pragmas wrapping an optional FTSelection. +// Pragmas are parsed but ignored (no FT-specific pragmas are recognized). +// If the body is empty, XQST0079 is raised. If the body is present, +// the pragmas are discarded and the inner FTSelection is returned. +// Namespace prefix validation is performed via context.getPragma(). +ftExtensionSelectionExpr +returns [Expression step] +throws PermissionDeniedException, EXistException, XPathException +{ + step = null; + FTSelection innerSel = null; +} +: #( - neq:NEQ step=expr [left] - step=expr [right] + FT_EXTENSION_SELECTION + // Validate pragma namespace prefixes (raises XPST0081 for undeclared prefixes). + // We don't recognize any FT-specific pragmas, so the result is always null. + ( + #( p:PRAGMA ( c:PRAGMA_END )? ) + { + // Validates namespace prefix; throws XPST0081 if prefix is undeclared + context.getPragma(p.getText(), c != null ? c.getText() : ""); + } + )* + ( innerSel=ftSelectionExpr )? { - step= new GeneralComparison(context, left, right, Comparison.NEQ); - step.setASTNode(neq); - path.add(step); + if (innerSel == null) { + // XQST0079: all pragmas unrecognized and no fallback body + throw new XPathException(ftExtensionSelectionExpr_AST_in, + ErrorCodes.XQST0079, + "No recognized pragmas in FTExtensionSelection and no fallback expression"); + } + step = innerSel; } ) - | + ; + +ftTimesExpr +returns [FTTimes step] +throws PermissionDeniedException, EXistException, XPathException +{ + step = null; + FTRange range = null; +} +: #( - lt:LT step=expr [left] - step=expr [right] + FT_TIMES + range=ftRangeExpr { - step= new GeneralComparison(context, left, right, Comparison.LT); - step.setASTNode(lt); - path.add(step); + step = new FTTimes(context); + step.setASTNode(ftTimesExpr_AST_in); + step.setRange(range); } ) - | + ; + +ftRangeExpr +returns [FTRange step] +throws PermissionDeniedException, EXistException, XPathException +{ + step = new FTRange(context); + PathExpr e1 = new PathExpr(context); + PathExpr e2 = new PathExpr(context); + Expression tmp = null; +} +: #( - lteq:LTEQ step=expr [left] - step=expr [right] + r:FT_RANGE { - step= new GeneralComparison(context, left, right, Comparison.LTEQ); - step.setASTNode(lteq); - path.add(step); + String rangeMode = r.getText(); + switch (rangeMode) { + case "exactly": step.setMode(FTRange.RangeMode.EXACTLY); break; + case "at least": step.setMode(FTRange.RangeMode.AT_LEAST); break; + case "at most": step.setMode(FTRange.RangeMode.AT_MOST); break; + case "from": step.setMode(FTRange.RangeMode.FROM_TO); break; + } } + tmp=expr [e1] { step.setExpr1(e1); } + ( tmp=expr [e2] { step.setExpr2(e2); } )? ) + ; + +ftPosFilterExpr +returns [Expression step] +throws PermissionDeniedException, EXistException, XPathException +{ + step = null; +} +: + o:FT_ORDER + { + FTOrder order = new FTOrder(context); + order.setASTNode(o); + step = order; + } | - #( - gt:GT step=expr [left] - step=expr [right] - { - step= new GeneralComparison(context, left, right, Comparison.GT); - step.setASTNode(gt); - path.add(step); + step=ftWindowExpr + | + step=ftDistanceExpr + | + s:FT_SCOPE + { + FTScope scope = new FTScope(context); + scope.setASTNode(s); + String scopeText = s.getText(); + if (scopeText.startsWith("same")) { + scope.setScopeType(FTScope.ScopeType.SAME); + } else { + scope.setScopeType(FTScope.ScopeType.DIFFERENT); } - ) + if (scopeText.endsWith("sentence")) { + scope.setBigUnit(FTScope.BigUnit.SENTENCE); + } else { + scope.setBigUnit(FTScope.BigUnit.PARAGRAPH); + } + step = scope; + } | + c:FT_CONTENT + { + FTContent content = new FTContent(context); + content.setASTNode(c); + switch (c.getText()) { + case "at start": content.setContentType(FTContent.ContentType.AT_START); break; + case "at end": content.setContentType(FTContent.ContentType.AT_END); break; + case "entire content": content.setContentType(FTContent.ContentType.ENTIRE_CONTENT); break; + } + step = content; + } + ; + +ftWindowExpr +returns [Expression step] +throws PermissionDeniedException, EXistException, XPathException +{ + step = null; + PathExpr winExpr = new PathExpr(context); + Expression tmp = null; +} +: #( - gteq:GTEQ step=expr [left] - step=expr [right] + w:FT_WINDOW + tmp=expr [winExpr] + u1:. // ftUnit token (words|sentences|paragraphs) { - step= new GeneralComparison(context, left, right, Comparison.GTEQ); - step.setASTNode(gteq); - path.add(step); + FTWindow win = new FTWindow(context); + win.setASTNode(w); + win.setWindowExpr(winExpr); + win.setUnit(FTUnit.fromString(u1.getText())); + step = win; } ) ; -nodeComp [PathExpr path] +ftDistanceExpr returns [Expression step] throws PermissionDeniedException, EXistException, XPathException { - step= null; - - PathExpr left= new PathExpr(context); - left.setASTNode(nodeComp_AST_in); - - PathExpr right= new PathExpr(context); - right.setASTNode(nodeComp_AST_in); - + step = null; + FTRange range = null; } : #( - is:"is" step=expr [left] step=expr [right] + d:FT_DISTANCE + range=ftRangeExpr + u2:. // ftUnit token (words|sentences|paragraphs) { - step = new NodeComparison(context, left, right, NodeComparisonOperator.IS); - step.setASTNode(is); - path.add(step); + FTDistance dist = new FTDistance(context); + dist.setASTNode(d); + dist.setRange(range); + dist.setUnit(FTUnit.fromString(u2.getText())); + step = dist; } ) - | - #( - before:BEFORE step=expr[left] step=expr[right] + ; + +ftMatchOptionsExpr +returns [FTMatchOptions opts] +throws PermissionDeniedException, EXistException, XPathException +{ + opts = new FTMatchOptions(); +} +: + ( + co:FT_CASE_OPTION { - step = new NodeComparison(context, left, right, NodeComparisonOperator.BEFORE); - step.setASTNode(before); - path.add(step); + switch (co.getText()) { + case "sensitive": opts.setCaseMode(FTMatchOptions.CaseMode.SENSITIVE); break; + case "insensitive": opts.setCaseMode(FTMatchOptions.CaseMode.INSENSITIVE); break; + case "lowercase": opts.setCaseMode(FTMatchOptions.CaseMode.LOWERCASE); break; + case "uppercase": opts.setCaseMode(FTMatchOptions.CaseMode.UPPERCASE); break; + } } - ) - | - #( - after:AFTER step=expr[left] step=expr[right] + | + di:FT_DIACRITICS_OPTION { - step = new NodeComparison(context, left, right, NodeComparisonOperator.AFTER); - step.setASTNode(after); - path.add(step); + switch (di.getText()) { + case "sensitive": opts.setDiacriticsMode(FTMatchOptions.DiacriticsMode.SENSITIVE); break; + case "insensitive": opts.setDiacriticsMode(FTMatchOptions.DiacriticsMode.INSENSITIVE); break; + } } + | + st:FT_STEM_OPTION + { opts.setStemming("stemming".equals(st.getText())); } + | + #( FT_LANGUAGE_OPTION lang:STRING_LITERAL { opts.setLanguage(lang.getText()); } ) + | + wc:FT_WILDCARD_OPTION + { opts.setWildcards("wildcards".equals(wc.getText())); } + | + #( thesOpt:FT_THESAURUS_OPTION + { + final String thesText = thesOpt.getText(); + if ("no thesaurus".equals(thesText)) { + opts.setNoThesaurus(true); + } else { + opts.setNoThesaurus(false); + AST thesChild = thesOpt.getFirstChild(); + while (thesChild != null) { + if (thesChild.getType() == FT_THESAURUS_ID) { + final String idText = thesChild.getText(); + if ("default".equals(idText)) { + opts.getThesaurusIDs().add( + new FTMatchOptions.ThesaurusID(null, null, 0, Integer.MAX_VALUE)); + } else { + // "at" -- children: STRING_LITERAL (uri), optional STRING_LITERAL (rel), optional FT_RANGE + String uri = null; + String relationship = null; + int minLevels = 0; + int maxLevels = Integer.MAX_VALUE; + AST idChild = thesChild.getFirstChild(); + if (idChild != null && idChild.getType() == STRING_LITERAL) { + uri = idChild.getText(); + idChild = idChild.getNextSibling(); + } + if (idChild != null && idChild.getType() == STRING_LITERAL) { + relationship = idChild.getText(); + idChild = idChild.getNextSibling(); + } + if (idChild != null && idChild.getType() == FT_RANGE) { + final String rangeType = idChild.getText(); + AST rangeChild = idChild.getFirstChild(); + if (rangeChild != null) { + final int val1 = Integer.parseInt(rangeChild.getText()); + switch (rangeType) { + case "exactly": + minLevels = val1; + maxLevels = val1; + break; + case "at least": + minLevels = val1; + break; + case "at most": + maxLevels = val1; + break; + case "from": + minLevels = val1; + AST rangeChild2 = rangeChild.getNextSibling(); + if (rangeChild2 != null) { + maxLevels = Integer.parseInt(rangeChild2.getText()); + } + break; + } + } + } + if (uri != null) { + opts.getThesaurusIDs().add( + new FTMatchOptions.ThesaurusID(uri, relationship, minLevels, maxLevels)); + opts.getThesaurusURIs().add(uri); + } + } + } + thesChild = thesChild.getNextSibling(); + } + } + } + ) + | + #( sw:FT_STOP_WORD_OPTION + { + final String swText = sw.getText(); + if ("no stop words".equals(swText)) { + opts.setNoStopWords(true); + } else { + if ("stop words default".equals(swText)) { + opts.setUseDefaultStopWords(true); + } + // Walk children to extract stop words (union and except) + AST swChild = sw.getFirstChild(); + while (swChild != null) { + if (swChild.getType() == FT_STOP_WORDS_EXCEPT) { + // Except wrapper -- inner child is FT_STOP_WORDS + AST exceptInner = swChild.getFirstChild(); + while (exceptInner != null) { + if (exceptInner.getType() == FT_STOP_WORDS) { + final String swMode = exceptInner.getText(); + AST swWordNode = exceptInner.getFirstChild(); + while (swWordNode != null) { + if ("at".equals(swMode)) { + opts.getExceptStopWordURIs().add(swWordNode.getText()); + } else { + opts.getExceptInlineStopWords().add(swWordNode.getText()); + } + swWordNode = swWordNode.getNextSibling(); + } + } + exceptInner = exceptInner.getNextSibling(); + } + } else if (swChild.getType() == FT_STOP_WORDS) { + // Union stop words (primary or union-added) + final String swMode = swChild.getText(); + AST swWordNode = swChild.getFirstChild(); + while (swWordNode != null) { + if ("at".equals(swMode)) { + opts.getStopWordURIs().add(swWordNode.getText()); + } else { + opts.getInlineStopWords().add(swWordNode.getText()); + } + swWordNode = swWordNode.getNextSibling(); + } + } + swChild = swChild.getNextSibling(); + } + } + } + ( . )* + ) + | + #( eo:FT_EXTENSION_OPTION ( . )* + { + // XQFT 3.0 §4.10: validate namespace prefix for extension option. + // Raises XPST0081 if the prefix is not declared. + final String extOptName = eo.getText(); + try { + QName.parse(staticContext, extOptName); + } catch (final QName.IllegalQNameException e) { + throw new XPathException(eo.getLine(), eo.getColumn(), + ErrorCodes.XPST0081, + "No namespace defined for prefix in extension option: " + extOptName); + } + } + ) + )+ + ; + +ftWeightExpr +returns [Expression step] +throws PermissionDeniedException, EXistException, XPathException +{ + step = null; + PathExpr weightPath = new PathExpr(context); +} +: + #( + FT_WEIGHT + step=expr [weightPath] + { step = weightPath; } + ) + ; + +ftIgnoreExpr +returns [Expression step] +throws PermissionDeniedException, EXistException, XPathException +{ + step = null; + PathExpr ignorePath = new PathExpr(context); +} +: + #( + FT_IGNORE_OPTION + step=expr [ignorePath] + { step = ignorePath; } ) ; @@ -3774,32 +5682,83 @@ throws PermissionDeniedException, EXistException, XPathException EnclosedExpr subexpr= new EnclosedExpr(context); subexpr.setASTNode(l); } - step=expr [subexpr] - { step= subexpr; } + step=expr [subexpr] + { step= subexpr; } + ) + ; + +arrowOp [PathExpr path] +returns [Expression step] +throws PermissionDeniedException, EXistException, XPathException +{ + step= null; +}: + #( + arrowAST:ARROW_OP + { + PathExpr leftExpr = new PathExpr(context); + leftExpr.setASTNode(arrowOp_AST_in); + } + expr [leftExpr] + { + ArrowOperator op = new ArrowOperator(context, leftExpr.simplify()); + op.setASTNode(arrowAST); + path.add(op); + step = op; + + PathExpr nameExpr = new PathExpr(context); + nameExpr.setASTNode(arrowOp_AST_in); + String name = null; + } + ( + eq:EQNAME + { name = eq.toString(); } + | + expr [nameExpr] + ) + { List params = new ArrayList(5); } + ( + { + PathExpr pathExpr = new PathExpr(context); + pathExpr.setASTNode(arrowOp_AST_in); + } + expr [pathExpr] { params.add(pathExpr.simplify()); } + )* + { + if (name == null) { + op.setArrowFunction(nameExpr, params); + } else { + op.setArrowFunction(name, params); + } + } ) ; -arrowOp [PathExpr path] +mappingArrowOp [PathExpr path] returns [Expression step] throws PermissionDeniedException, EXistException, XPathException { step= null; }: #( - arrowAST:ARROW_OP + mapArrowAST:MAPPING_ARROW_OP { + if (staticContext.getXQueryVersion() < 40) { + throw new XPathException(mapArrowAST, ErrorCodes.XPST0003, + "The mapping arrow operator (=>!) requires xquery version \"4.0\""); + } PathExpr leftExpr = new PathExpr(context); - leftExpr.setASTNode(arrowOp_AST_in); + leftExpr.setASTNode(mappingArrowOp_AST_in); } expr [leftExpr] { - ArrowOperator op = new ArrowOperator(context, leftExpr.simplify()); - op.setASTNode(arrowAST); + MappingArrowOperator op = new MappingArrowOperator(context, leftExpr.simplify()); + op.setASTNode(mapArrowAST); path.add(op); step = op; PathExpr nameExpr = new PathExpr(context); - nameExpr.setASTNode(arrowOp_AST_in); + nameExpr.setASTNode(mappingArrowOp_AST_in); String name = null; } ( @@ -3812,7 +5771,7 @@ throws PermissionDeniedException, EXistException, XPathException ( { PathExpr pathExpr = new PathExpr(context); - pathExpr.setASTNode(arrowOp_AST_in); + pathExpr.setASTNode(mappingArrowOp_AST_in); } expr [pathExpr] { params.add(pathExpr.simplify()); } )* @@ -3826,6 +5785,105 @@ throws PermissionDeniedException, EXistException, XPathException ) ; +pipelineOp [PathExpr path] +returns [Expression step] +throws PermissionDeniedException, EXistException, XPathException +{ + step = null; +}: + #( + pipeAST:PIPELINE_OP + { + if (staticContext.getXQueryVersion() < 40) { + throw new XPathException(pipeAST, ErrorCodes.XPST0003, + "The pipeline operator (->) requires xquery version \"4.0\""); + } + PathExpr leftExpr = new PathExpr(context); + leftExpr.setASTNode(pipelineOp_AST_in); + } + expr [leftExpr] + { + PathExpr rightExpr = new PathExpr(context); + rightExpr.setASTNode(pipelineOp_AST_in); + } + expr [rightExpr] + { + step = new PipelineExpression(context, leftExpr.simplify(), rightExpr.simplify()); + step.setASTNode(pipeAST); + path.add(step); + } + ) + ; + +methodCallOp [PathExpr path] +returns [Expression step] +throws PermissionDeniedException, EXistException, XPathException +{ + step = null; +}: + #( + mcAST:METHOD_CALL_OP + { + if (staticContext.getXQueryVersion() < 40) { + throw new XPathException(mcAST, ErrorCodes.XPST0003, + "The method call operator (=?>) requires xquery version \"4.0\""); + } + PathExpr leftExpr = new PathExpr(context); + leftExpr.setASTNode(methodCallOp_AST_in); + } + expr [leftExpr] + mn:NCNAME + { + MethodCallOperator op = new MethodCallOperator(context, leftExpr.simplify()); + op.setASTNode(mcAST); + path.add(op); + step = op; + + List params = new ArrayList(5); + } + ( + { + PathExpr pathExpr = new PathExpr(context); + pathExpr.setASTNode(methodCallOp_AST_in); + } + expr [pathExpr] { params.add(pathExpr.simplify()); } + )* + { + op.setMethod(mn.getText(), params); + } + ) + ; + +otherwiseExpr [PathExpr path] +returns [Expression step] +throws PermissionDeniedException, EXistException, XPathException +{ + step = null; +}: + #( + owAST:LITERAL_otherwise + { + if (staticContext.getXQueryVersion() < 40) { + throw new XPathException(owAST, ErrorCodes.XPST0003, + "The 'otherwise' operator requires xquery version \"4.0\""); + } + PathExpr leftExpr = new PathExpr(context); + leftExpr.setASTNode(otherwiseExpr_AST_in); + } + expr [leftExpr] + { + PathExpr rightExpr = new PathExpr(context); + rightExpr.setASTNode(otherwiseExpr_AST_in); + } + expr [rightExpr] + { + step = new OtherwiseExpression(context, leftExpr.simplify(), rightExpr.simplify()); + step.setASTNode(owAST); + path.add(step); + } + ) + ; + typeCastExpr [PathExpr path] returns [Expression step] throws PermissionDeniedException, EXistException, XPathException @@ -3840,25 +5898,72 @@ throws PermissionDeniedException, EXistException, XPathException Cardinality cardinality= Cardinality.EXACTLY_ONE; } step=expr [expr] - t:ATOMIC_TYPE ( - QUESTION - { cardinality= Cardinality.ZERO_OR_ONE; } - )? - { - try { - QName qn= QName.parse(staticContext, t.getText()); - int code= Type.getType(qn); - CastExpression castExpr= new CastExpression(context, expr, code, cardinality); + #( + CHOICE_TYPE + { + List choiceTypes = new ArrayList(); + } + ( + ct:ATOMIC_TYPE + { + try { + QName qn = QName.parse(staticContext, ct.getText()); + choiceTypes.add(Type.getType(qn)); + } catch (final XPathException e) { + throw new XPathException(ct.getLine(), ct.getColumn(), ErrorCodes.XPST0051, "Unknown simple type " + ct.getText()); + } catch (final IllegalQNameException e) { + throw new XPathException(ct.getLine(), ct.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + ct.getText()); + } + } + )+ + ) + ( + QUESTION + { cardinality= Cardinality.ZERO_OR_ONE; } + )? + { + int[] types = new int[choiceTypes.size()]; + for (int ci = 0; ci < choiceTypes.size(); ci++) { types[ci] = choiceTypes.get(ci); } + ChoiceCastExpression castExpr = new ChoiceCastExpression(context, expr, types, cardinality); castExpr.setASTNode(castAST); path.add(castExpr); step = castExpr; - } catch (final XPathException e) { - throw new XPathException(t.getLine(), t.getColumn(), ErrorCodes.XPST0051, "Unknown simple type " + t.getText()); - } catch (final IllegalQNameException e) { - throw new XPathException(t.getLine(), t.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + t.getText()); } - } + | + t:ATOMIC_TYPE + ( + QUESTION + { cardinality= Cardinality.ZERO_OR_ONE; } + )? + { + try { + QName qn= QName.parse(staticContext, t.getText()); + int code= Type.getType(qn); + CastExpression castExpr= new CastExpression(context, expr, code, cardinality); + castExpr.setASTNode(castAST); + path.add(castExpr); + step = castExpr; + } catch (final XPathException e) { + throw new XPathException(t.getLine(), t.getColumn(), ErrorCodes.XPST0051, "Unknown simple type " + t.getText()); + } catch (final IllegalQNameException e) { + throw new XPathException(t.getLine(), t.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + t.getText()); + } + } + | + enumCast:ENUM_TYPE + ( + QUESTION + { cardinality= Cardinality.ZERO_OR_ONE; } + )? + { + String[] enumVals = enumCast.getText().split(",", -1); + EnumCastExpression enumCastExpr = new EnumCastExpression(context, expr, enumVals, cardinality, false); + enumCastExpr.setASTNode(castAST); + path.add(enumCastExpr); + step = enumCastExpr; + } + ) ) | #( @@ -3869,25 +5974,72 @@ throws PermissionDeniedException, EXistException, XPathException Cardinality cardinality= Cardinality.EXACTLY_ONE; } step=expr [expr] - t2:ATOMIC_TYPE ( - QUESTION - { cardinality= Cardinality.ZERO_OR_ONE; } - )? - { - try { - QName qn= QName.parse(staticContext, t2.getText()); - int code= Type.getType(qn); - CastableExpression castExpr= new CastableExpression(context, expr, code, cardinality); - castExpr.setASTNode(castAST); + #( + CHOICE_TYPE + { + List choiceTypes2 = new ArrayList(); + } + ( + ct2:ATOMIC_TYPE + { + try { + QName qn = QName.parse(staticContext, ct2.getText()); + choiceTypes2.add(Type.getType(qn)); + } catch (final XPathException e) { + throw new XPathException(ct2.getLine(), ct2.getColumn(), ErrorCodes.XPST0051, "Unknown simple type " + ct2.getText()); + } catch (final IllegalQNameException e) { + throw new XPathException(ct2.getLine(), ct2.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + ct2.getText()); + } + } + )+ + ) + ( + QUESTION + { cardinality= Cardinality.ZERO_OR_ONE; } + )? + { + int[] types2 = new int[choiceTypes2.size()]; + for (int ci = 0; ci < choiceTypes2.size(); ci++) { types2[ci] = choiceTypes2.get(ci); } + ChoiceCastableExpression castExpr = new ChoiceCastableExpression(context, expr, types2, cardinality); + castExpr.setASTNode(castableAST); path.add(castExpr); step = castExpr; - } catch (final XPathException e) { - throw new XPathException(t2.getLine(), t2.getColumn(), ErrorCodes.XPST0051, "Unknown simple type " + t2.getText()); - } catch (final IllegalQNameException e) { - throw new XPathException(t2.getLine(), t2.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + t2.getText()); } - } + | + t2:ATOMIC_TYPE + ( + QUESTION + { cardinality= Cardinality.ZERO_OR_ONE; } + )? + { + try { + QName qn= QName.parse(staticContext, t2.getText()); + int code= Type.getType(qn); + CastableExpression castExpr= new CastableExpression(context, expr, code, cardinality); + castExpr.setASTNode(castableAST); + path.add(castExpr); + step = castExpr; + } catch (final XPathException e) { + throw new XPathException(t2.getLine(), t2.getColumn(), ErrorCodes.XPST0051, "Unknown simple type " + t2.getText()); + } catch (final IllegalQNameException e) { + throw new XPathException(t2.getLine(), t2.getColumn(), ErrorCodes.XPST0081, "No namespace defined for prefix " + t2.getText()); + } + } + | + enumCastable:ENUM_TYPE + ( + QUESTION + { cardinality= Cardinality.ZERO_OR_ONE; } + )? + { + String[] enumVals2 = enumCastable.getText().split(",", -1); + EnumCastExpression enumCastExpr2 = new EnumCastExpression(context, expr, enumVals2, cardinality, true); + enumCastExpr2.setASTNode(castableAST); + path.add(enumCastExpr2); + step = enumCastExpr2; + } + ) ) ; @@ -3929,6 +6081,10 @@ throws XPathException, PermissionDeniedException, EXistException } ; +// === Legacy update (DEPRECATED - use W3C XQuery Update Facility 3.0 syntax instead) === +// To remove legacy update support, delete this rule and the updateExpr +// alternative in the expr dispatch above. + updateExpr [PathExpr path] returns [Expression step] throws XPathException, PermissionDeniedException, EXistException @@ -3936,6 +6092,8 @@ throws XPathException, PermissionDeniedException, EXistException }: #( updateAST:"update" { + context.markLegacyUpdate(updateAST); + PathExpr p1 = new PathExpr(context); p1.setASTNode(updateExpr_AST_in); @@ -3984,6 +6142,179 @@ throws XPathException, PermissionDeniedException, EXistException ) ; +// === W3C XQuery Update Facility 3.0 tree walker rules === + +xqufInsertExpr [PathExpr path] +returns [Expression step] +throws XPathException, PermissionDeniedException, EXistException +{ +}: + #( insertAST:"insert" + { + context.markXQUFUpdate(insertAST); + + PathExpr sourceExpr = new PathExpr(context); + sourceExpr.setASTNode(xqufInsertExpr_AST_in); + + PathExpr targetExpr = new PathExpr(context); + targetExpr.setASTNode(xqufInsertExpr_AST_in); + + int mode = XQUFInsertExpr.INSERT_INTO; + } + step=expr [sourceExpr] + ( + "first" { mode = XQUFInsertExpr.INSERT_INTO_AS_FIRST; } + | + "last" { mode = XQUFInsertExpr.INSERT_INTO_AS_LAST; } + | + "into" { mode = XQUFInsertExpr.INSERT_INTO; } + | + "before" { mode = XQUFInsertExpr.INSERT_BEFORE; } + | + "after" { mode = XQUFInsertExpr.INSERT_AFTER; } + ) + step=expr [targetExpr] + { + XQUFInsertExpr ins = new XQUFInsertExpr(context, sourceExpr, targetExpr, mode); + ins.setASTNode(insertAST); + path.add(ins); + step = ins; + } + ) + ; + +xqufDeleteExpr [PathExpr path] +returns [Expression step] +throws XPathException, PermissionDeniedException, EXistException +{ +}: + #( deleteAST:"delete" + { + context.markXQUFUpdate(deleteAST); + + PathExpr targetExpr = new PathExpr(context); + targetExpr.setASTNode(xqufDeleteExpr_AST_in); + } + step=expr [targetExpr] + { + XQUFDeleteExpr del = new XQUFDeleteExpr(context, targetExpr); + del.setASTNode(deleteAST); + path.add(del); + step = del; + } + ) + ; + +xqufReplaceExpr [PathExpr path] +returns [Expression step] +throws XPathException, PermissionDeniedException, EXistException +{ +}: + #( replaceAST:"replace" + { + context.markXQUFUpdate(replaceAST); + + PathExpr targetExpr = new PathExpr(context); + targetExpr.setASTNode(xqufReplaceExpr_AST_in); + + PathExpr withExpr = new PathExpr(context); + withExpr.setASTNode(xqufReplaceExpr_AST_in); + + boolean isValueOf = false; + } + ( + "value" { isValueOf = true; } + )? + step=expr [targetExpr] + step=expr [withExpr] + { + Expression replExpr; + if (isValueOf) { + replExpr = new XQUFReplaceValueExpr(context, targetExpr, withExpr); + } else { + replExpr = new XQUFReplaceNodeExpr(context, targetExpr, withExpr); + } + replExpr.setASTNode(replaceAST); + path.add(replExpr); + step = replExpr; + } + ) + ; + +xqufRenameExpr [PathExpr path] +returns [Expression step] +throws XPathException, PermissionDeniedException, EXistException +{ +}: + #( renameAST:"rename" + { + context.markXQUFUpdate(renameAST); + + PathExpr targetExpr = new PathExpr(context); + targetExpr.setASTNode(xqufRenameExpr_AST_in); + + PathExpr nameExpr = new PathExpr(context); + nameExpr.setASTNode(xqufRenameExpr_AST_in); + } + step=expr [targetExpr] + step=expr [nameExpr] + { + XQUFRenameExpr ren = new XQUFRenameExpr(context, targetExpr, nameExpr); + ren.setASTNode(renameAST); + path.add(ren); + step = ren; + } + ) + ; + +xqufTransformExpr [PathExpr path] +returns [Expression step] +throws XPathException, PermissionDeniedException, EXistException +{ +}: + #( copyAST:"copy" + { + context.markXQUFUpdate(copyAST); + + java.util.List copyBindings = new java.util.ArrayList(); + + PathExpr modifyExpr = new PathExpr(context); + modifyExpr.setASTNode(xqufTransformExpr_AST_in); + + PathExpr returnExpr = new PathExpr(context); + returnExpr.setASTNode(xqufTransformExpr_AST_in); + } + ( + #( VARIABLE_BINDING + { + PathExpr bindingExpr = new PathExpr(context); + bindingExpr.setASTNode(xqufTransformExpr_AST_in); + String varName = #VARIABLE_BINDING.getText(); + } + step=expr [bindingExpr] + { + final org.exist.dom.QName copyVarQName; + try { + copyVarQName = org.exist.dom.QName.parse(context, varName, null); + } catch (final org.exist.dom.QName.IllegalQNameException e) { + throw new XPathException(xqufTransformExpr_AST_in, ErrorCodes.XPST0081, + "Invalid variable name in copy binding: " + varName); + } + copyBindings.add(new XQUFTransformExpr.CopyBinding(copyVarQName, bindingExpr)); + } + ) + )+ + step=expr [modifyExpr] + step=expr [returnExpr] + { + XQUFTransformExpr trans = new XQUFTransformExpr(context, copyBindings, modifyExpr, returnExpr); + trans.setASTNode(copyAST); + path.add(trans); + step = trans; + } + ) + ; + mapConstr [PathExpr path] returns [Expression step] throws XPathException, PermissionDeniedException, EXistException @@ -4010,6 +6341,18 @@ throws XPathException, PermissionDeniedException, EXistException expr[value] { expr.map(key, value); } ) + | + // === XQuery 4.0 Map Comprehensions (PR2094) === + // Merge entry: ExprSingle without ":" - must evaluate to a map at runtime + #( + MAP_MERGE + { + PathExpr mergeExpr = new PathExpr(context); + mergeExpr.setASTNode(mapConstr_AST_in); + } + expr[mergeExpr] + { expr.merge(mergeExpr); } + ) )* ) ; diff --git a/exist-core/src/main/java/org/exist/dom/memtree/DocumentImpl.java b/exist-core/src/main/java/org/exist/dom/memtree/DocumentImpl.java index ea7685a17c5..8da61c5ae0b 100644 --- a/exist-core/src/main/java/org/exist/dom/memtree/DocumentImpl.java +++ b/exist-core/src/main/java/org/exist/dom/memtree/DocumentImpl.java @@ -49,6 +49,8 @@ import javax.xml.XMLConstants; import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; import java.util.Objects; import java.util.concurrent.atomic.AtomicLong; @@ -144,6 +146,11 @@ public class DocumentImpl extends NodeImpl implements Document { // end reference nodes + // Override for first-child lookup after in-memory mutations. + // Maps parent node number -> first child node number when the first child + // is no longer at the positional (parent + 1) slot due to insertions. + private Map firstChildOverride = null; + protected XQueryContext context; protected final boolean explicitlyCreated; protected final long docId; @@ -603,43 +610,174 @@ public int getNamespacesCountFor(final int nodeNumber) { return count; } + /** + * Strip unused namespace declarations from an element and all its descendants. + * A namespace declaration is "unused" if its prefix is not used by the element's + * own name or any of its attribute names. + * + *

This implements the W3C copy-namespaces {@code no-preserve} semantics: + * only namespace bindings that are used by element/attribute names are preserved.

+ * + *

Works by invalidating unused namespace array entries (setting parent to -2) + * and re-adding only used ones. The invalidated entries are dead space that + * is cleaned up on the next {@link #compact()} call.

+ * + * @param rootNodeNum the root element node number of the subtree to process + */ + public void stripUnusedNamespacesInSubtree(final int rootNodeNum) { + if (namespaceCode == null) { + return; + } + // Walk the subtree: process rootNodeNum and all descendants at deeper levels + final short rootLevel = treeLevel[rootNodeNum]; + for (int i = rootNodeNum; i < size; i++) { + if (i > rootNodeNum && treeLevel[i] <= rootLevel) { + break; // past the subtree + } + if (nodeKind[i] != Node.ELEMENT_NODE) { + continue; + } + stripUnusedNamespacesForElement(i); + } + } + + private void stripUnusedNamespacesForElement(final int nodeNum) { + int ns = alphaLen[nodeNum]; + if (ns < 0) { + return; // no namespace declarations + } + + // Collect used prefixes: element name + attribute names + final java.util.Set usedPrefixes = new java.util.HashSet<>(); + final QName elemName = nodeName[nodeNum]; + usedPrefixes.add(elemName.getPrefix() != null ? elemName.getPrefix() : ""); + int attr = alpha[nodeNum]; + if (attr >= 0) { + while (attr < nextAttr && attrParent[attr] == nodeNum) { + final QName aName = attrName[attr]; + if (aName.getPrefix() != null && !aName.getPrefix().isEmpty()) { + usedPrefixes.add(aName.getPrefix()); + } + attr++; + } + } + + // Collect used namespace declarations (to re-add later) + final java.util.List usedNs = new java.util.ArrayList<>(); + while (ns < nextNamespace && namespaceParent[ns] == nodeNum) { + final QName nsQName = namespaceCode[ns]; + if (usedPrefixes.contains(nsQName.getLocalPart())) { + usedNs.add(nsQName); + } + // Invalidate the old entry + namespaceParent[ns] = -2; + ns++; + } + + // Reset alphaLen so addNamespace can set it fresh + alphaLen[nodeNum] = -1; + + // Re-add only used namespace declarations + for (final QName nsQName : usedNs) { + addNamespace(nodeNum, nsQName); + } + } + public int getChildCountFor(final int nr) { int count = 0; + final short childLevel = (short) (treeLevel[nr] + 1); int nextNode = getFirstChildFor(nr); - while(nextNode > nr) { - ++count; - nextNode = next[nextNode]; + int steps = 0; + while (nextNode >= 0 && steps < size) { + if (nodeKind[nextNode] != -1 && treeLevel[nextNode] == childLevel) { + ++count; + } + final int following = getNextSiblingFor(nextNode); + if (following < 0) { + break; + } + nextNode = following; + steps++; } return count; } public int getFirstChildFor(final int nodeNumber) { + // Check for override from in-memory mutations (e.g. insert as first) + if (firstChildOverride != null) { + final Integer override = firstChildOverride.get(nodeNumber); + if (override != null) { + return override; + } + } + if (nodeNumber == 0) { // optimisation for document-node if (size > 1) { - return 1; + // skip soft-deleted nodes, but remember first deleted child + int n = 1; + int firstDeleted = -1; + while (n < size && nodeKind[n] == -1) { + if (firstDeleted < 0) { + firstDeleted = n; + } + n++; + } + return n < size ? n : firstDeleted; } else { return -1; } } final short level = treeLevel[nodeNumber]; - final int nextNode = nodeNumber + 1; - if((nextNode < size) && (treeLevel[nextNode] > level)) { - return nextNode; + int nextNode = nodeNumber + 1; + int firstDeletedChild = -1; + // Scan positional children (nodes immediately after parent in the array at a deeper level) + while (nextNode < size && treeLevel[nextNode] > level) { + if (nodeKind[nextNode] != -1) { + return nextNode; // found a non-deleted child + } + if (firstDeletedChild < 0) { + firstDeletedChild = nextNode; + } + nextNode++; } - return -1; + // No non-deleted positional child found. Return the first deleted child + // so callers can follow the next[] chain to find children that were + // appended beyond the positional range via insertChildren(). + return firstDeletedChild; } public int getNextSiblingFor(final int nodeNumber) { final int nextNr = next[nodeNumber]; - return nextNr < nodeNumber ? -1 : nextNr; + if (nextNr < 0) { + return -1; + } + if (nextNr < nodeNumber) { + // Backwards reference: after in-memory mutations, siblings may be at + // lower positions. Check tree level to distinguish sibling from parent. + if (treeLevel[nextNr] == treeLevel[nodeNumber]) { + return nextNr; + } + return -1; // lower level = parent pointer, no next sibling + } + return nextNr; } public int getParentNodeFor(final int nodeNumber) { + if (nodeNumber == 0) { + return -1; + } + final short level = treeLevel[nodeNumber]; int nextNode = next[nodeNumber]; - while(nextNode > nodeNumber) { + int steps = 0; + while (nextNode >= 0 && steps < size) { + if (treeLevel[nextNode] < level) { + return nextNode; // found a node at a lower level = parent + } + // same or higher level — keep walking the chain nextNode = next[nextNode]; + steps++; } return nextNode; } @@ -1635,4 +1773,1046 @@ public Node appendChild(final Node newChild) throws DOMException { throw unsupported(); } + + // === W3C XQuery Update Facility 3.0 - In-memory mutation methods === + + /** + * Rename a node in this document. + * + * @param nodeNum the node number to rename + * @param newName the new QName + */ + public void renameNode(final int nodeNum, final QName newName) { + final short kind = nodeKind[nodeNum]; + switch (kind) { + case Node.ELEMENT_NODE: + case Node.PROCESSING_INSTRUCTION_NODE: + nodeName[nodeNum] = namePool.getSharedName(newName); + break; + default: + throw new DOMException(DOMException.NOT_SUPPORTED_ERR, + "Cannot rename node of type " + kind); + } + } + + /** + * Rename an attribute node. The attrNum parameter is an index into the + * attribute arrays (attrName, attrValue, etc.), NOT the main node arrays. + * + * @param attrNum the attribute index + * @param newName the new QName + */ + public void renameAttribute(final int attrNum, final QName newName) { + attrName[attrNum] = namePool.getSharedName(newName); + } + + /** + * Replace the string value of a node. + * + * @param nodeNum the node number + * @param value the new string value + */ + public void replaceValue(final int nodeNum, final String value) { + final short kind = nodeKind[nodeNum]; + switch (kind) { + case Node.TEXT_NODE: + case Node.COMMENT_NODE: + case Node.CDATA_SECTION_NODE: + case Node.PROCESSING_INSTRUCTION_NODE: { + // Replace the character content + final char[] chars = value.toCharArray(); + if (characters == null) { + characters = new char[chars.length > CHAR_BUF_SIZE ? chars.length : CHAR_BUF_SIZE]; + } else if ((nextChar + chars.length) >= characters.length) { + int newLen = (characters.length * 3) / 2; + if (newLen < (nextChar + chars.length)) { + newLen = nextChar + chars.length; + } + final char[] nc = new char[newLen]; + System.arraycopy(characters, 0, nc, 0, characters.length); + characters = nc; + } + alpha[nodeNum] = nextChar; + alphaLen[nodeNum] = chars.length; + System.arraycopy(chars, 0, characters, nextChar, chars.length); + nextChar += chars.length; + break; + } + case Node.ELEMENT_NODE: { + // W3C replaceElementContent: replace all children with a single text node. + // We must be careful to only modify THIS element's children, not nodes + // belonging to sibling elements that happen to be adjacent in the array. + final short childLevel = (short) (treeLevel[nodeNum] + 1); + + // Determine the boundary of this element's positional subtree. + // Only nodes at positions nodeNum+1..subtreeEnd (where subtreeEnd is the + // first position at the same or lower level) are this element's children. + int subtreeEnd = nodeNum + 1; + while (subtreeEnd < size && treeLevel[subtreeEnd] > treeLevel[nodeNum]) { + subtreeEnd++; + } + + // Find and modify/create a text child within the positional range + int firstTextChild = -1; + for (int c = nodeNum + 1; c < subtreeEnd; c++) { + if (firstTextChild == -1 && treeLevel[c] == childLevel + && nodeKind[c] == Node.TEXT_NODE) { + firstTextChild = c; + } else if (c != firstTextChild) { + nodeKind[c] = -1; // delete other children + } + } + + // Also delete any chain-linked children (from previous insertions) + if (firstChildOverride != null && firstChildOverride.containsKey(nodeNum)) { + int chainChild = firstChildOverride.get(nodeNum); + while (chainChild >= 0 && chainChild != nodeNum) { + if (chainChild >= subtreeEnd && nodeKind[chainChild] != -1) { + nodeKind[chainChild] = -1; // delete appended children + } + final int nx = next[chainChild]; + if (nx < 0 || nx == nodeNum) break; + chainChild = nx; + } + firstChildOverride.remove(nodeNum); + } + + if (firstTextChild >= 0) { + // Modify existing text child in place + final char[] chars = value.toCharArray(); + if ((nextChar + chars.length) >= characters.length) { + int newLen = (characters.length * 3) / 2; + if (newLen < (nextChar + chars.length)) { + newLen = nextChar + chars.length; + } + final char[] nc = new char[newLen]; + System.arraycopy(characters, 0, nc, 0, characters.length); + characters = nc; + } + alpha[firstTextChild] = nextChar; + alphaLen[firstTextChild] = chars.length; + System.arraycopy(chars, 0, characters, nextChar, chars.length); + nextChar += chars.length; + } else if (nodeNum + 1 < subtreeEnd) { + // No text child but has positional children — convert first to text + final int firstChild = nodeNum + 1; + nodeKind[firstChild] = Node.TEXT_NODE; + nodeName[firstChild] = null; + final char[] chars = value.toCharArray(); + if ((nextChar + chars.length) >= characters.length) { + int newLen = (characters.length * 3) / 2; + if (newLen < (nextChar + chars.length)) { + newLen = nextChar + chars.length; + } + final char[] nc = new char[newLen]; + System.arraycopy(characters, 0, nc, 0, characters.length); + characters = nc; + } + alpha[firstChild] = nextChar; + alphaLen[firstChild] = chars.length; + System.arraycopy(chars, 0, characters, nextChar, chars.length); + nextChar += chars.length; + // Mark remaining positional children as deleted + for (int c = firstChild + 1; c < subtreeEnd; c++) { + nodeKind[c] = -1; + } + } else if (value != null && !value.isEmpty()) { + // Element has no positional children — insert via insertChildren + try { + final org.exist.xquery.value.StringValue textVal = + new org.exist.xquery.value.StringValue(value); + insertChildren(nodeNum, textVal, true); + } catch (final org.exist.xquery.XPathException e) { + throw new DOMException(DOMException.INVALID_STATE_ERR, + "Failed to insert text child: " + e.getMessage()); + } + } + break; + } + default: + throw new DOMException(DOMException.NOT_SUPPORTED_ERR, + "Cannot replace value of node of type " + kind); + } + } + + /** + * Replace the value of an attribute node. The attrNum parameter is an index + * into the attribute arrays (attrName, attrValue, etc.), NOT the main node arrays. + * + * @param attrNum the attribute index + * @param value the new value + */ + public void replaceAttributeValue(final int attrNum, final String value) { + attrValue[attrNum] = value; + } + + /** + * Remove an attribute from this document. + * Compacts the attribute arrays by shifting subsequent entries down. + * Also updates the alpha[] pointers for elements whose first attribute + * index is affected. + * + * @param attrNum the attribute index to remove + */ + /** + * Find an attribute index by QName on a given element. + * + * @param elementNodeNum the element node number + * @param qname the attribute QName to find + * @return the attribute index, or -1 if not found + */ + public int findAttribute(final int elementNodeNum, final QName qname) { + int a = alpha[elementNodeNum]; + if (a < 0) { + return -1; + } + while (a < nextAttr && attrParent[a] == elementNodeNum) { + if (attrName[a].getLocalPart().equals(qname.getLocalPart()) + && attrName[a].getNamespaceURI().equals(qname.getNamespaceURI())) { + return a; + } + a++; + } + return -1; + } + + public void removeAttribute(final int attrNum) { + if (attrNum < 0 || attrNum >= nextAttr) { + return; + } + + // Shift all attribute arrays down by one + final int remaining = nextAttr - attrNum - 1; + if (remaining > 0) { + System.arraycopy(attrName, attrNum + 1, attrName, attrNum, remaining); + System.arraycopy(attrNodeId, attrNum + 1, attrNodeId, attrNum, remaining); + System.arraycopy(attrParent, attrNum + 1, attrParent, attrNum, remaining); + System.arraycopy(attrValue, attrNum + 1, attrValue, attrNum, remaining); + System.arraycopy(attrType, attrNum + 1, attrType, attrNum, remaining); + } + nextAttr--; + + // Update alpha[] pointers: alpha[nodeNum] stores the first attribute index + // for each element. If the removed attribute index is <= the element's + // first attribute, we need to adjust. + for (int i = 0; i < size; i++) { + if (nodeKind[i] == Node.ELEMENT_NODE && alpha[i] >= 0) { + if (alpha[i] > attrNum) { + alpha[i]--; + } else if (alpha[i] == attrNum) { + // Check if this element still has attributes + if (attrNum < nextAttr && attrParent[attrNum] == i) { + // Still has attributes at the same index (shifted down) + } else { + alpha[i] = -1; // No more attributes for this element + } + } + } + } + } + + /** + * Find any node whose next[] pointer targets the given node. + * After in-memory mutations, predecessors may be at any array position, + * so we must scan all nodes, not just those before targetNodeNum. + * + * @param targetNodeNum the node to find a predecessor for + * @return the predecessor node number, or -1 if not found + */ + private int findPredecessor(final int targetNodeNum) { + final short targetLevel = treeLevel[targetNodeNum]; + // Search backward first (most common case for unmutated trees) + for (int i = targetNodeNum - 1; i >= 0; i--) { + if (next[i] == targetNodeNum && nodeKind[i] != -1 && treeLevel[i] == targetLevel) { + return i; + } + } + // Search forward (for nodes inserted after targetNodeNum in array order) + for (int i = targetNodeNum + 1; i < size; i++) { + if (next[i] == targetNodeNum && nodeKind[i] != -1 && treeLevel[i] == targetLevel) { + return i; + } + } + return -1; + } + + /** + * Remove a node from this document. + * This is a soft-delete: the node's kind is set to -1 to mark it as deleted. + * This is sufficient for the copy-modify pattern where the document is + * consumed once and not reused. + * + * @param nodeNum the node number to remove + */ + public void removeNode(final int nodeNum) { + if (nodeNum <= 0 || nodeNum >= size) { + return; + } + + // Find the parent and re-stitch the next[] chain to skip this node + final int origNext = next[nodeNum]; + final short level = treeLevel[nodeNum]; + + // Find the previous node that points to nodeNum + final int prev = findPredecessor(nodeNum); + + if (prev >= 0) { + // Find the next node after this node's subtree in the sibling chain. + // Walk the next[] chain from nodeNum to find the first node that's + // at the same or lower level (a sibling or the parent). + int chainNode = origNext; + int steps = 0; + while (chainNode >= 0 && steps < size) { + if (nodeKind[chainNode] == -1) { + // skip deleted nodes in chain + chainNode = next[chainNode]; + steps++; + continue; + } + if (treeLevel[chainNode] <= level) { + // Found a sibling or parent + break; + } + chainNode = next[chainNode]; + steps++; + } + next[prev] = chainNode >= 0 ? chainNode : origNext; + } + + // Mark the node and its subtree as deleted + final short nodeLevel = treeLevel[nodeNum]; + nodeKind[nodeNum] = -1; + for (int i = nodeNum + 1; i < size && treeLevel[i] > nodeLevel; i++) { + nodeKind[i] = -1; + } + } + + /** + * Merge adjacent text nodes throughout the document. + * Per the W3C XQuery Update Facility spec, after applying updates, + * adjacent text nodes among children of any element or document node + * must be merged. Empty text nodes are removed. + * + * This walks all non-deleted nodes and for each parent (document or element), + * finds runs of consecutive text node children and merges them. + */ + public void mergeAdjacentTextNodes() { + // Walk the document looking for parent nodes (document or element) + for (int parent = 0; parent < size; parent++) { + if (nodeKind[parent] == -1) { + continue; + } + if (nodeKind[parent] != Node.DOCUMENT_NODE && nodeKind[parent] != Node.ELEMENT_NODE) { + continue; + } + + // Iterate through children of this parent using the next[] chain + final short childLevel = (short) (treeLevel[parent] + 1); + int child = getFirstChildFor(parent); + if (child < 0) { + continue; + } + + int prevTextNode = -1; + while (child >= 0 && child < size && treeLevel[child] >= childLevel) { + if (nodeKind[child] == -1) { + // Skip deleted nodes — follow next[] chain + child = next[child]; + if (child <= parent) break; + continue; + } + if (treeLevel[child] > childLevel) { + // Descendant, not direct child — skip + child = next[child]; + if (child <= parent) break; + continue; + } + + // Direct child at childLevel + if (nodeKind[child] == Node.TEXT_NODE) { + if (prevTextNode >= 0) { + // Merge this text node into prevTextNode + final String prevText = new String(characters, alpha[prevTextNode], alphaLen[prevTextNode]); + final String thisText = new String(characters, alpha[child], alphaLen[child]); + final String merged = prevText + thisText; + + // Store merged text in prevTextNode + final char[] chars = merged.toCharArray(); + if ((nextChar + chars.length) >= characters.length) { + int newLen = (characters.length * 3) / 2; + if (newLen < (nextChar + chars.length)) { + newLen = nextChar + chars.length; + } + final char[] nc = new char[newLen]; + System.arraycopy(characters, 0, nc, 0, characters.length); + characters = nc; + } + alpha[prevTextNode] = nextChar; + alphaLen[prevTextNode] = chars.length; + System.arraycopy(chars, 0, characters, nextChar, chars.length); + nextChar += chars.length; + + // Soft-delete the merged text node and restitch + removeNode(child); + + // Continue from prevTextNode's next (don't advance prevTextNode) + child = next[prevTextNode]; + if (child <= parent) break; + } else { + // Check for empty text nodes + if (alphaLen[child] == 0) { + final int nextChild = next[child]; + removeNode(child); + child = nextChild; + if (child <= parent) break; + } else { + prevTextNode = child; + child = next[child]; + if (child <= parent) break; + } + } + } else { + prevTextNode = -1; + child = next[child]; + if (child <= parent) break; + } + } + } + + // Invalidate cached node IDs since the structure changed + if (nodeId != null) { + nodeId[0] = null; + } + } + + /** + * Insert children into an element node. + * Uses the serialization rebuild approach for correctness. + * + * @param parentNodeNum the node number of the parent element + * @param content the content to insert + * @param asFirst if true, insert as first children; if false, as last + * @throws XPathException if the content cannot be processed + */ + public void insertChildren(final int parentNodeNum, final Sequence content, final boolean asFirst) + throws XPathException { + if (content == null || content.isEmpty()) { + return; + } + + final short childLevel = (short) (treeLevel[parentNodeNum] + 1); + + if (asFirst) { + // Insert as first children: find the current first child and link new nodes before it + final int firstChild = getFirstChildFor(parentNodeNum); + + int lastInserted = -1; + int firstInserted = -1; + for (final org.exist.xquery.value.SequenceIterator i = content.iterate(); i.hasNext(); ) { + final org.exist.xquery.value.Item item = i.nextItem(); + final java.util.List inserted = copyItemIntoDocument(item, parentNodeNum, childLevel); + for (final int newNodeNum : inserted) { + if (firstInserted == -1) { + firstInserted = newNodeNum; + } + if (lastInserted >= 0) { + next[lastInserted] = newNodeNum; + } + lastInserted = newNodeNum; + } + } + // Link last inserted to the old first child (or parent if no children) + if (lastInserted >= 0) { + next[lastInserted] = firstChild >= 0 ? firstChild : parentNodeNum; + } + // Override the first-child lookup so navigation finds the new nodes first + if (firstInserted >= 0) { + if (firstChildOverride == null) { + firstChildOverride = new HashMap<>(); + } + firstChildOverride.put(parentNodeNum, firstInserted); + } + } else { + // Insert as last children: find the last child and link after it + // Walk the sibling chain from first child to find the last one + int lastChild = -1; + final int firstChild = getFirstChildFor(parentNodeNum); + if (firstChild >= 0) { + lastChild = firstChild; + int nextSib = getNextSiblingFor(lastChild); + while (nextSib >= 0) { + lastChild = nextSib; + nextSib = getNextSiblingFor(lastChild); + } + } + + int firstInsertedAsLast = -1; + for (final org.exist.xquery.value.SequenceIterator i = content.iterate(); i.hasNext(); ) { + final org.exist.xquery.value.Item item = i.nextItem(); + final java.util.List inserted = copyItemIntoDocument(item, parentNodeNum, childLevel); + for (final int newNodeNum : inserted) { + if (firstInsertedAsLast == -1) { + firstInsertedAsLast = newNodeNum; + } + if (lastChild >= 0) { + next[lastChild] = newNodeNum; + } + lastChild = newNodeNum; + } + } + // If the parent had no visible children, the appended nodes are beyond + // the positional scan range. Set firstChildOverride so they can be found. + if (firstChild < 0 && firstInsertedAsLast >= 0) { + if (firstChildOverride == null) { + firstChildOverride = new HashMap<>(); + } + firstChildOverride.put(parentNodeNum, firstInsertedAsLast); + } + } + } + + /** + * Insert sibling nodes before or after a reference node. + * + * @param refNodeNum the reference node number + * @param content the content to insert + * @param before if true, insert before; if false, insert after + * @throws XPathException if the content cannot be processed + */ + public void insertSiblings(final int refNodeNum, final Sequence content, final boolean before) + throws XPathException { + if (content == null || content.isEmpty()) { + return; + } + + final short level = treeLevel[refNodeNum]; + // Find the parent using level-aware parent finding + final int parentNum = getParentNodeFor(refNodeNum); + if (parentNum < 0) { + // Cannot insert siblings of the document node (no parent) + return; + } + + if (before) { + // Insert before: find the node whose next[] points to refNodeNum and re-link + final int prevNode = findPredecessor(refNodeNum); + + int lastInserted = -1; + int firstInserted = -1; + for (final org.exist.xquery.value.SequenceIterator i = content.iterate(); i.hasNext(); ) { + final org.exist.xquery.value.Item item = i.nextItem(); + final java.util.List inserted = copyItemIntoDocument(item, parentNum, level); + for (final int newNodeNum : inserted) { + if (firstInserted == -1) { + firstInserted = newNodeNum; + } + if (prevNode >= 0 && lastInserted == -1) { + next[prevNode] = newNodeNum; + } + if (lastInserted >= 0) { + next[lastInserted] = newNodeNum; + } + lastInserted = newNodeNum; + } + } + // Link last inserted to refNode + if (lastInserted >= 0) { + next[lastInserted] = refNodeNum; + } + // If no predecessor found, refNode was the first child (found positionally). + // Set override so navigation finds the new nodes first. + if (prevNode < 0 && firstInserted >= 0 && parentNum >= 0) { + if (firstChildOverride == null) { + firstChildOverride = new HashMap<>(); + } + firstChildOverride.put(parentNum, firstInserted); + } + } else { + // Insert after: link new nodes after refNode + final int origNext = next[refNodeNum]; + int lastInserted = refNodeNum; + for (final org.exist.xquery.value.SequenceIterator i = content.iterate(); i.hasNext(); ) { + final org.exist.xquery.value.Item item = i.nextItem(); + final java.util.List inserted = copyItemIntoDocument(item, parentNum, level); + for (final int newNodeNum : inserted) { + next[lastInserted] = newNodeNum; + lastInserted = newNodeNum; + } + } + // Last inserted points to where refNode originally pointed + if (lastInserted != refNodeNum) { + next[lastInserted] = origNext; + } + } + } + + /** + * Insert attributes into an element. + * + * @param elementNodeNum the element node number + * @param content the attribute nodes to insert + * @throws XPathException if the content cannot be processed + */ + public void insertAttributes(final int elementNodeNum, final Sequence content) throws XPathException { + insertAttributes(elementNodeNum, content, true); + } + + /** + * Insert attributes into an element. + * + * @param elementNodeNum the target element's node number + * @param content the attributes to insert + * @param replaceExisting if true, replace existing attributes with the same name; + * if false, always add as new attributes (for PUL application + * where a DELETE may separately remove the original) + */ + public void insertAttributes(final int elementNodeNum, final Sequence content, + final boolean replaceExisting) throws XPathException { + if (content == null || content.isEmpty()) { + return; + } + + // Collect new attributes to insert + final java.util.List newAttrs = new java.util.ArrayList<>(); + for (final org.exist.xquery.value.SequenceIterator i = content.iterate(); i.hasNext(); ) { + final org.exist.xquery.value.Item item = i.nextItem(); + if (org.exist.xquery.value.Type.subTypeOf(item.getType(), org.exist.xquery.value.Type.NODE)) { + final Node node = ((org.exist.xquery.value.NodeValue) item).getNode(); + if (node.getNodeType() == Node.ATTRIBUTE_NODE) { + final Attr attr = (Attr) node; + final QName qname = new QName( + attr.getLocalName() != null ? attr.getLocalName() : attr.getName(), + attr.getNamespaceURI() != null ? attr.getNamespaceURI() : "", + attr.getPrefix() != null ? attr.getPrefix() : ""); + newAttrs.add(new Object[]{qname, attr.getValue()}); + } + } + } + + if (newAttrs.isEmpty()) { + return; + } + + // Check for duplicates and replace existing values (only when not in PUL mode) + if (replaceExisting) { + final java.util.Iterator it = newAttrs.iterator(); + while (it.hasNext()) { + final Object[] entry = it.next(); + final QName qname = (QName) entry[0]; + final String value = (String) entry[1]; + if (alpha[elementNodeNum] >= 0) { + int a = alpha[elementNodeNum]; + while (a < nextAttr && attrParent[a] == elementNodeNum) { + if (attrName[a].equals(qname)) { + // Replace existing attribute value + attrValue[a] = value; + it.remove(); + break; + } + a++; + } + } + } + } + + if (newAttrs.isEmpty()) { + return; + } + + final int count = newAttrs.size(); + + // Find insertion point: right after the last contiguous attribute of this element + int insertPos; + if (alpha[elementNodeNum] >= 0) { + insertPos = alpha[elementNodeNum]; + while (insertPos < nextAttr && attrParent[insertPos] == elementNodeNum) { + insertPos++; + } + } else { + // Element has no attrs yet — insert at nextAttr (already contiguous) + insertPos = nextAttr; + } + + // Ensure capacity + while (nextAttr + count > attrName.length) { + growAttributes(); + } + + // Shift everything from insertPos onwards to make room + if (insertPos < nextAttr) { + System.arraycopy(attrParent, insertPos, attrParent, insertPos + count, nextAttr - insertPos); + System.arraycopy(attrName, insertPos, attrName, insertPos + count, nextAttr - insertPos); + System.arraycopy(attrValue, insertPos, attrValue, insertPos + count, nextAttr - insertPos); + System.arraycopy(attrType, insertPos, attrType, insertPos + count, nextAttr - insertPos); + + // Update alpha pointers for elements whose attrs shifted + for (int n = 0; n < size; n++) { + if (nodeKind[n] == Node.ELEMENT_NODE && alpha[n] >= insertPos && n != elementNodeNum) { + alpha[n] += count; + } + } + } + + // Insert new attributes at the contiguous position + for (int j = 0; j < count; j++) { + final Object[] entry = newAttrs.get(j); + final QName qname = (QName) entry[0]; + final String value = (String) entry[1]; + final QName attrQname = new QName(qname.getLocalPart(), qname.getNamespaceURI(), qname.getPrefix(), ElementValue.ATTRIBUTE); + attrParent[insertPos + j] = elementNodeNum; + this.attrName[insertPos + j] = namePool.getSharedName(attrQname); + attrValue[insertPos + j] = value; + attrType[insertPos + j] = AttrImpl.ATTR_CDATA_TYPE; + } + + // Set alpha if element didn't have attrs before + if (alpha[elementNodeNum] < 0) { + alpha[elementNodeNum] = insertPos; + } + + nextAttr += count; + } + + /** + * Replace a node with new content. + * + * @param nodeNum the node number to replace + * @param content the replacement content + * @throws XPathException if the content cannot be processed + */ + public void replaceNode(final int nodeNum, final Sequence content) throws XPathException { + if (content == null || content.isEmpty()) { + removeNode(nodeNum); + return; + } + + final short level = treeLevel[nodeNum]; + final int parentNum = getParentNodeFor(nodeNum); + + // Find the predecessor that points to nodeNum + final int prev = findPredecessor(nodeNum); + + // Find the next node after nodeNum's subtree (the node nodeNum's chain leads to + // at the same or lower level) + int afterNode = next[nodeNum]; + int steps = 0; + while (afterNode >= 0 && steps < size) { + if (nodeKind[afterNode] != -1 && treeLevel[afterNode] <= level) { + break; + } + afterNode = next[afterNode]; + steps++; + } + + // Copy new content nodes and link them into the chain. + // Uses copyItemIntoDocument to handle document nodes and atomic values. + int firstNew = -1; + int lastNew = -1; + try { + for (final org.exist.xquery.value.SequenceIterator i = content.iterate(); i.hasNext(); ) { + final org.exist.xquery.value.Item item = i.nextItem(); + final java.util.List newNodes = copyItemIntoDocument(item, parentNum, level); + for (final int newNodeNum : newNodes) { + if (firstNew == -1) { + firstNew = newNodeNum; + } + if (lastNew >= 0) { + next[lastNew] = newNodeNum; + } + lastNew = newNodeNum; + } + } + } catch (final org.exist.xquery.XPathException e) { + throw new DOMException(DOMException.INVALID_STATE_ERR, e.getMessage()); + } + + // Link new nodes into the chain + if (prev >= 0 && firstNew >= 0) { + next[prev] = firstNew; + } else if (prev < 0 && firstNew >= 0 && parentNum >= 0) { + // No same-level predecessor: the replaced node was the first child. + // Set firstChildOverride so getFirstChildFor() can find the new nodes + // (they're appended at the end of the array, beyond positional scan). + if (firstChildOverride == null) { + firstChildOverride = new HashMap<>(); + } + firstChildOverride.put(parentNum, firstNew); + } + if (lastNew >= 0) { + next[lastNew] = afterNode >= 0 ? afterNode : parentNum; + } + + // Soft-delete the original node and its subtree + final short nodeLevel = treeLevel[nodeNum]; + nodeKind[nodeNum] = -1; + for (int i = nodeNum + 1; i < size && treeLevel[i] > nodeLevel; i++) { + nodeKind[i] = -1; + } + } + + /** + * Copy a DOM node into this document's arrays. + * This is a simplified version for the copy-modify pattern. + * + * @return the node number of the top-level copied node + */ + /** + * Copy a content item into the document arrays, handling atomic values, + * document nodes, and regular nodes per the W3C XQuery Update Facility spec. + * + * @param item the content item to copy + * @param parentNodeNum the parent node number + * @param level the tree level for the new node(s) + * @return list of top-level node numbers that were inserted + */ + private java.util.List copyItemIntoDocument(final org.exist.xquery.value.Item item, + final int parentNodeNum, final short level) + throws XPathException { + // When no-inherit is active, pass an empty scope map to materialize namespaces + // within inserted subtrees (so FunInScopePrefixes self-only mode still finds them) + final java.util.Map scopeNs = + (context != null && !context.inheritNamespaces()) + ? new java.util.LinkedHashMap<>() : null; + + final java.util.List result = new java.util.ArrayList<>(); + if (org.exist.xquery.value.Type.subTypeOf(item.getType(), org.exist.xquery.value.Type.NODE)) { + final Node node = ((org.exist.xquery.value.NodeValue) item).getNode(); + if (node.getNodeType() == Node.DOCUMENT_NODE) { + // For document nodes: insert the document's children, not the document itself + Node child = node.getFirstChild(); + while (child != null) { + result.add(copyNodeIntoDocument(child, parentNodeNum, level, scopeNs)); + child = child.getNextSibling(); + } + } else { + result.add(copyNodeIntoDocument(node, parentNodeNum, level, scopeNs)); + } + } else { + // Atomic value: convert to text node per W3C spec + final String text = item.getStringValue(); + if (!text.isEmpty()) { + final int nodeNum = addNode(Node.TEXT_NODE, level, null); + addChars(nodeNum, text.toCharArray(), 0, text.length()); + next[nodeNum] = parentNodeNum; + result.add(nodeNum); + } + } + return result; + } + + private int copyNodeIntoDocument(final Node node, final int parentNodeNum, final short level) { + return copyNodeIntoDocument(node, parentNodeNum, level, null); + } + + /** + * Copy a node into this document. + * + * @param node the source node + * @param parentNodeNum the parent in this document + * @param level tree level for the new node + * @param scopeNamespaces when non-null, namespace bindings accumulated from ancestors + * within the current subtree (for no-inherit materialization). Each element gets + * explicit declarations for ancestor bindings not already declared on self. + * Pass null to skip materialization (normal copy behavior). + */ + private int copyNodeIntoDocument(final Node node, final int parentNodeNum, final short level, + final java.util.Map scopeNamespaces) { + switch (node.getNodeType()) { + case Node.ELEMENT_NODE: { + final String localName = node.getLocalName() != null ? node.getLocalName() : node.getNodeName(); + final String nsUri = node.getNamespaceURI() != null ? node.getNamespaceURI() : ""; + final String prefix = node.getPrefix() != null ? node.getPrefix() : ""; + final QName qname = new QName(localName, nsUri, prefix); + final int nodeNum = addNode(Node.ELEMENT_NODE, level, qname); + next[nodeNum] = parentNodeNum; + + // Collect attribute prefixes (needed for no-preserve filtering) + final NamedNodeMap attrs = node.getAttributes(); + final java.util.Set usedPrefixes = new java.util.HashSet<>(); + usedPrefixes.add(prefix); // element prefix is always "used" + + // Copy attributes (skip xmlns declarations — handled separately below) + if (attrs != null) { + for (int i = 0; i < attrs.getLength(); i++) { + final Attr attr = (Attr) attrs.item(i); + // Skip namespace declarations + if (javax.xml.XMLConstants.XMLNS_ATTRIBUTE_NS_URI.equals(attr.getNamespaceURI())) { + continue; + } + final String attrLocal = attr.getLocalName() != null ? attr.getLocalName() : attr.getName(); + final String attrNs = attr.getNamespaceURI() != null ? attr.getNamespaceURI() : ""; + final String attrPrefix = attr.getPrefix() != null ? attr.getPrefix() : ""; + usedPrefixes.add(attrPrefix); + addAttribute(nodeNum, new QName(attrLocal, attrNs, attrPrefix), + attr.getValue(), AttrImpl.ATTR_CDATA_TYPE); + } + } + + // Check if no-preserve mode should strip unused namespace declarations + final boolean noPreserve = context != null && !context.preserveNamespaces(); + + // Collect this element's own namespace declarations + final java.util.Map selfNsDecls = new java.util.LinkedHashMap<>(); + + // Copy namespace declarations (filtered by no-preserve if applicable) + if (node instanceof ElementImpl memElement) { + // Memtree element: copy from namespace arrays + final java.util.Map nsMap = memElement.getNamespaceMap(); + for (final java.util.Map.Entry e : nsMap.entrySet()) { + if (noPreserve && !usedPrefixes.contains(e.getKey())) { + continue; // strip unused namespace declaration + } + selfNsDecls.put(e.getKey(), e.getValue()); + final QName nsQName = new QName(e.getKey(), e.getValue(), + javax.xml.XMLConstants.XMLNS_ATTRIBUTE); + addNamespace(nodeNum, nsQName); + } + } else if (attrs != null) { + // DOM element: extract xmlns attributes + for (int i = 0; i < attrs.getLength(); i++) { + final Attr attr = (Attr) attrs.item(i); + if (javax.xml.XMLConstants.XMLNS_ATTRIBUTE_NS_URI.equals(attr.getNamespaceURI())) { + final String nsPrefix = attr.getLocalName() != null + && !javax.xml.XMLConstants.XMLNS_ATTRIBUTE.equals(attr.getLocalName()) + ? attr.getLocalName() : ""; + if (noPreserve && !usedPrefixes.contains(nsPrefix)) { + continue; // strip unused namespace declaration + } + selfNsDecls.put(nsPrefix, attr.getValue()); + final QName nsQName = new QName(nsPrefix, attr.getValue(), + javax.xml.XMLConstants.XMLNS_ATTRIBUTE); + addNamespace(nodeNum, nsQName); + } + } + } + + // No-inherit materialization: add ancestor namespace bindings from within + // the subtree that are not already declared on this element + if (scopeNamespaces != null) { + for (final java.util.Map.Entry e : scopeNamespaces.entrySet()) { + if (!selfNsDecls.containsKey(e.getKey())) { + if (!noPreserve || usedPrefixes.contains(e.getKey())) { + final QName nsQName = new QName(e.getKey(), e.getValue(), + javax.xml.XMLConstants.XMLNS_ATTRIBUTE); + addNamespace(nodeNum, nsQName); + selfNsDecls.put(e.getKey(), e.getValue()); + } + } + } + } + + // Build effective namespace scope for children + final java.util.Map childScope; + if (scopeNamespaces != null) { + childScope = new java.util.LinkedHashMap<>(scopeNamespaces); + childScope.putAll(selfNsDecls); + } else { + childScope = null; + } + + // Copy children recursively, linking siblings together + int prevChild = -1; + Node child = node.getFirstChild(); + while (child != null) { + final int childNum = copyNodeIntoDocument(child, nodeNum, (short) (level + 1), childScope); + if (prevChild >= 0) { + next[prevChild] = childNum; + } + prevChild = childNum; + child = child.getNextSibling(); + } + return nodeNum; + } + case Node.TEXT_NODE: { + final String text = node.getTextContent(); + final int nodeNum = addNode(Node.TEXT_NODE, level, null); + addChars(nodeNum, text.toCharArray(), 0, text.length()); + next[nodeNum] = parentNodeNum; + return nodeNum; + } + case Node.COMMENT_NODE: { + final String text = node.getTextContent(); + final int nodeNum = addNode(Node.COMMENT_NODE, level, null); + addChars(nodeNum, text.toCharArray(), 0, text.length()); + next[nodeNum] = parentNodeNum; + return nodeNum; + } + case Node.PROCESSING_INSTRUCTION_NODE: { + final String target = node.getNodeName(); + final String data = node.getNodeValue() != null ? node.getNodeValue() : ""; + final QName qname = new QName(target, "", ""); + final int nodeNum = addNode(Node.PROCESSING_INSTRUCTION_NODE, level, qname); + addChars(nodeNum, data.toCharArray(), 0, data.length()); + next[nodeNum] = parentNodeNum; + return nodeNum; + } + case Node.CDATA_SECTION_NODE: { + final String text = node.getTextContent(); + final int nodeNum = addNode(Node.CDATA_SECTION_NODE, level, null); + addChars(nodeNum, text.toCharArray(), 0, text.length()); + next[nodeNum] = parentNodeNum; + return nodeNum; + } + default: + return -1; + } + } + + /** + * Compact the document by rebuilding all internal arrays from the logical + * tree structure. After in-memory mutations (insert, delete, replace), + * nodes may be appended at the end of the arrays, breaking the positional + * invariant that the XQuery engine relies on for document order. This method + * serializes the mutated tree into a fresh document and replaces the internal + * arrays, restoring correct positional ordering. + * + * Must be called after all mutations and text merging are complete. + */ + public void compact() { + try { + final MemTreeBuilder builder = new MemTreeBuilder(context); + builder.startDocument(); + final DocumentBuilderReceiver receiver = new DocumentBuilderReceiver(builder, true); + receiver.setSuppressWhitespace(false); + + // Walk the document tree in logical order using chain-aware traversal + int child = getFirstChildFor(0); + while (child >= 0) { + if (nodeKind[child] != -1) { + final NodeImpl node = getNode(child); + copyTo(node, receiver, false); + } + child = getNextSiblingFor(child); + } + + builder.endDocument(); + final DocumentImpl newDoc = builder.getDocument(); + + // Replace internal arrays with the rebuilt document's arrays + this.nodeKind = newDoc.nodeKind; + this.treeLevel = newDoc.treeLevel; + this.next = newDoc.next; + this.nodeName = newDoc.nodeName; + this.nodeId = newDoc.nodeId; + this.alpha = newDoc.alpha; + this.alphaLen = newDoc.alphaLen; + this.characters = newDoc.characters; + this.nextChar = newDoc.nextChar; + this.attrName = newDoc.attrName; + this.attrType = newDoc.attrType; + this.attrNodeId = newDoc.attrNodeId; + this.attrParent = newDoc.attrParent; + this.attrValue = newDoc.attrValue; + this.nextAttr = newDoc.nextAttr; + this.namespaceParent = newDoc.namespaceParent; + this.namespaceCode = newDoc.namespaceCode; + this.nextNamespace = newDoc.nextNamespace; + this.size = newDoc.size; + this.references = newDoc.references; + this.nextReferenceIdx = newDoc.nextReferenceIdx; + this.firstChildOverride = null; + } catch (final SAXException e) { + throw new RuntimeException("Failed to compact document after mutations", e); + } + } } diff --git a/exist-core/src/main/java/org/exist/dom/memtree/ElementImpl.java b/exist-core/src/main/java/org/exist/dom/memtree/ElementImpl.java index 514d4d9e0b9..c8d6e21507f 100644 --- a/exist-core/src/main/java/org/exist/dom/memtree/ElementImpl.java +++ b/exist-core/src/main/java/org/exist/dom/memtree/ElementImpl.java @@ -64,15 +64,21 @@ public String getTagName() { @Override public boolean hasChildNodes() { - return (nodeNumber + 1) < document.size && document.treeLevel[nodeNumber + 1] > document.treeLevel[nodeNumber]; + return getFirstChild() != null; } @Override public Node getFirstChild() { - final short level = document.treeLevel[nodeNumber]; - final int nextNode = nodeNumber + 1; - if(nextNode < document.size && document.treeLevel[nextNode] > level) { - return document.getNode(nextNode); + int firstChild = document.getFirstChildFor(nodeNumber); + // Skip deleted nodes (nodeKind == -1) after in-memory mutations + while (firstChild >= 0 && document.nodeKind[firstChild] == -1) { + firstChild = document.next[firstChild]; + if (firstChild < 0 || firstChild <= nodeNumber) { + return null; + } + } + if (firstChild >= 0) { + return document.getNode(firstChild); } return null; } @@ -83,9 +89,11 @@ public NodeList getChildNodes() { final NodeListImpl nl = new NodeListImpl(1); // nil elements are rare, so we use 1 here int nextNode = document.getFirstChildFor(nodeNumber); while(nextNode > nodeNumber) { - final Node n = document.getNode(nextNode); - if(n.getNodeType() != Node.ATTRIBUTE_NODE) { - nl.add(n); + if (document.nodeKind[nextNode] != -1) { + final Node n = document.getNode(nextNode); + if(n.getNodeType() != Node.ATTRIBUTE_NODE) { + nl.add(n); + } } nextNode = document.next[nextNode]; } @@ -300,15 +308,22 @@ public void selectAttributes(final NodeTest test, final Sequence result) throws @Override public void selectDescendantAttributes(final NodeTest test, final Sequence result) throws XPathException { - final int treeLevel = document.treeLevel[nodeNumber]; - int nextNode = nodeNumber; - NodeImpl n = document.getNode(nextNode); - n.selectAttributes(test, result); - while(++nextNode < document.size && document.treeLevel[nextNode] > treeLevel) { - n = document.getNode(nextNode); - if(n.getNodeType() == Node.ELEMENT_NODE) { + // Use chain-based traversal to find descendant attributes, + // including nodes appended by in-memory mutations. + selectAttributes(test, result); + selectDescendantAttributesWalk(nodeNumber, test, result); + } + + private void selectDescendantAttributesWalk(final int parentNum, final NodeTest test, final Sequence result) + throws XPathException { + int child = document.getFirstChildFor(parentNum); + while (child >= 0) { + if (document.nodeKind[child] != -1 && document.nodeKind[child] == Node.ELEMENT_NODE) { + final NodeImpl n = document.getNode(child); n.selectAttributes(test, result); + selectDescendantAttributesWalk(child, test, result); } + child = document.getNextSiblingFor(child); } } @@ -316,9 +331,11 @@ public void selectDescendantAttributes(final NodeTest test, final Sequence resul public void selectChildren(final NodeTest test, final Sequence result) throws XPathException { int nextNode = document.getFirstChildFor(nodeNumber); while(nextNode > nodeNumber) { - final NodeImpl n = document.getNode(nextNode); - if(test.matches(n)) { - result.add(n); + if (document.nodeKind[nextNode] != -1) { + final NodeImpl n = document.getNode(nextNode); + if(test.matches(n)) { + result.add(n); + } } nextNode = document.next[nextNode]; } @@ -333,21 +350,34 @@ public NodeImpl getFirstChild(final NodeTest test) throws XPathException { @Override public void selectDescendants(final boolean includeSelf, final NodeTest test, final Sequence result) throws XPathException { - final int treeLevel = document.treeLevel[nodeNumber]; - int nextNode = nodeNumber; - - if(includeSelf) { - final NodeImpl n = document.getNode(nextNode); - if(test.matches(n)) { + if (includeSelf) { + final NodeImpl n = document.getNode(nodeNumber); + if (test.matches(n)) { result.add(n); } } + // Use chain-based tree walking instead of flat array scanning. + // Flat scanning from nodeNumber+1 misses nodes appended by in-memory + // mutations (insert as first, insert before, etc.) since those are placed + // at positions beyond the original tree. + selectDescendantsWalk(nodeNumber, test, result); + } - while(++nextNode < document.size && document.treeLevel[nextNode] > treeLevel) { - final NodeImpl n = document.getNode(nextNode); - if(test.matches(n)) { - result.add(n); + private void selectDescendantsWalk(final int parentNum, final NodeTest test, final Sequence result) + throws XPathException { + int child = document.getFirstChildFor(parentNum); + while (child >= 0) { + if (document.nodeKind[child] != -1) { + final NodeImpl n = document.getNode(child); + if (test.matches(n)) { + result.add(n); + } + // Recurse into element children + if (document.nodeKind[child] == Node.ELEMENT_NODE) { + selectDescendantsWalk(child, test, result); + } } + child = document.getNextSiblingFor(child); } } diff --git a/exist-core/src/main/java/org/exist/dom/memtree/NodeImpl.java b/exist-core/src/main/java/org/exist/dom/memtree/NodeImpl.java index 47f03d4096b..487f346e9e5 100644 --- a/exist-core/src/main/java/org/exist/dom/memtree/NodeImpl.java +++ b/exist-core/src/main/java/org/exist/dom/memtree/NodeImpl.java @@ -221,14 +221,14 @@ public short getNodeType() { @Override public Node getParentNode() { - int next = document.next[nodeNumber]; - while (next > nodeNumber) { - next = document.next[next]; + if (nodeNumber == 0) { + return null; } - if (next < 0) { + final int parentNum = document.getParentNodeFor(nodeNumber); + if (parentNum < 0) { return null; } - final NodeImpl parent = document.getNode(next); + final NodeImpl parent = document.getNode(parentNum); if (parent.getNodeType() == DOCUMENT_NODE && !((DocumentImpl) parent).isExplicitlyCreated()) { /* All nodes in the MemTree will return an Owner document due to how the MemTree is implemented, @@ -246,17 +246,14 @@ public Node selectParentNode() { if(nodeNumber == 0) { return null; } - int next = document.next[nodeNumber]; - while(next > nodeNumber) { - next = document.next[next]; - } - if(next < 0) { //Is this even possible ? + final int parentNum = document.getParentNodeFor(nodeNumber); + if(parentNum < 0) { return null; } - if(next == 0) { + if(parentNum == 0) { return this.document.explicitlyCreated ? this.document : null; } - return document.getNode(next); + return document.getNode(parentNum); } @Override @@ -273,6 +270,11 @@ public boolean equals(final Object other) { getNodeType() == o.getNodeType(); } + @Override + public int hashCode() { + return System.identityHashCode(document) * 31 + nodeNumber; + } + @Override public boolean equals(final NodeValue other) throws XPathException { if(other.getImplementationType() != NodeValue.IN_MEMORY_NODE) { @@ -309,10 +311,16 @@ public int compareTo(final NodeImpl other) { } else { return Constants.SUPERIOR; } - } else if(document.docId < other.document.docId) { - return Constants.INFERIOR; } else { - return Constants.SUPERIOR; + final long thisDocId = document != null ? document.docId : 0; + final long otherDocId = other.document != null ? other.document.docId : 0; + if (thisDocId < otherDocId) { + return Constants.INFERIOR; + } else if (thisDocId > otherDocId) { + return Constants.SUPERIOR; + } else { + return Constants.EQUAL; + } } } @@ -355,8 +363,23 @@ public Node getPreviousSibling() { @Override public Node getNextSibling() { - final int nextNr = document.next[nodeNumber]; - return nextNr < nodeNumber ? null : document.getNode(nextNr); + int nextNr = document.next[nodeNumber]; + // Skip deleted nodes (nodeKind == -1) in the sibling chain + while (nextNr >= 0 && document.nodeKind[nextNr] == -1) { + nextNr = document.next[nextNr]; + } + if (nextNr < 0) { + return null; + } + if (nextNr < nodeNumber) { + // Backwards reference: check tree level to distinguish sibling from parent. + // After in-memory mutations, siblings may be at lower positions than this node. + if (document.treeLevel[nextNr] == document.treeLevel[nodeNumber]) { + return document.getNode(nextNr); + } + return null; // lower level = parent, no next sibling + } + return document.getNode(nextNr); } @Override @@ -755,6 +778,10 @@ public void selectPreceding(final NodeTest test, final Sequence result, final in int count = 0; for(int i = nodeNumber - 1; i > 0; i--) { + // Skip deleted nodes (soft-deleted by removeNode, nodeKind set to -1) + if(document.nodeKind[i] == -1) { + continue; + } final NodeImpl n = document.getNode(i); if(!myNodeId.isDescendantOf(n.getNodeId()) && test.matches(n)) { if((position < 0) || (++count == position)) { @@ -784,17 +811,15 @@ public void selectFollowing(final NodeTest test, final Sequence result, final in throws XPathException { final int parent = document.getParentNodeFor(nodeNumber); if(parent == 0) { - // parent is the document node - if(getNodeType() == Node.ELEMENT_NODE) { - return; - } + // parent is the document node — walk document-level siblings after this node + final boolean isDocElement = (getNodeType() == Node.ELEMENT_NODE); NodeImpl next = (NodeImpl) getNextSibling(); while(next != null) { - if(test.matches(next)) { + if(!isDocElement && next.getNodeType() == Node.ELEMENT_NODE) { + // Context is before the doc element — include element and its descendants next.selectDescendants(true, test, result); - } - if(next.getNodeType() == Node.ELEMENT_NODE) { - break; + } else if(next.getNodeType() != Node.ELEMENT_NODE && test.matches(next)) { + result.add(next); } next = (NodeImpl) next.getNextSibling(); } @@ -803,6 +828,11 @@ public void selectFollowing(final NodeTest test, final Sequence result, final in int count = 0; int nextNode = nodeNumber + 1; while(nextNode < document.size) { + // Skip deleted nodes (soft-deleted by removeNode, nodeKind set to -1) + if(document.nodeKind[nextNode] == -1) { + nextNode++; + continue; + } final NodeImpl n = document.getNode(nextNode); if(!n.getNodeId().isDescendantOf(myNodeId) && test.matches(n)) { if((position < 0) || (++count == position)) { diff --git a/exist-core/src/main/java/org/exist/dom/persistent/ElementImpl.java b/exist-core/src/main/java/org/exist/dom/persistent/ElementImpl.java index c490a4dfaff..ec98af10fd1 100644 --- a/exist-core/src/main/java/org/exist/dom/persistent/ElementImpl.java +++ b/exist-core/src/main/java/org/exist/dom/persistent/ElementImpl.java @@ -822,6 +822,7 @@ public Attr getAttributeNodeNS(final String namespaceURI, final String localName @Override public NamedNodeMap getAttributes() { final org.exist.dom.NamedNodeMapImpl map = new NamedNodeMapImpl(ownerDocument, true); + if(hasAttributes()) { try(final DBBroker broker = ownerDocument.getBrokerPool().getBroker(); final INodeIterator iterator = broker.getNodeIterator(this)) { @@ -837,6 +838,14 @@ public NamedNodeMap getAttributes() { if(next.getNodeType() != Node.ATTRIBUTE_NODE) { break; } + // Skip namespace declarations for the XML namespace — the xml prefix + // is always implicitly bound and Saxon 12 rejects any explicit + // declaration involving http://www.w3.org/XML/1998/namespace + if (next.getNodeType() == Node.ATTRIBUTE_NODE + && Namespaces.XMLNS_NS.equals(next.getNamespaceURI()) + && XMLConstants.XML_NS_URI.equals(next.getNodeValue())) { + continue; + } map.setNamedItem(next); } } catch(final EXistException | IOException e) { @@ -847,6 +856,13 @@ public NamedNodeMap getAttributes() { for (final Map.Entry entry : namespaceMappings.entrySet()) { final String prefix = entry.getKey(); final String ns = entry.getValue(); + // Skip namespace declarations involving the XML namespace URI — + // Saxon 12 rejects any explicit declaration of the xml prefix + // or binding of the XML namespace to a non-xml prefix + if (XMLConstants.XML_NS_PREFIX.equals(prefix) + || XMLConstants.XML_NS_URI.equals(ns)) { + continue; + } final QName attrName = new QName(prefix, Namespaces.XMLNS_NS, XMLConstants.XMLNS_ATTRIBUTE); final AttrImpl attr = new AttrImpl(getExpression(), attrName, ns, null); attr.setOwnerDocument(ownerDocument); diff --git a/exist-core/src/main/java/org/exist/dom/persistent/NewArrayNodeSet.java b/exist-core/src/main/java/org/exist/dom/persistent/NewArrayNodeSet.java index ed5f630028d..a765adc29d8 100644 --- a/exist-core/src/main/java/org/exist/dom/persistent/NewArrayNodeSet.java +++ b/exist-core/src/main/java/org/exist/dom/persistent/NewArrayNodeSet.java @@ -792,6 +792,7 @@ public NodeSet selectFollowing(final NodeSet pl, final int position, final int c if(!reference.getNodeId().isDescendantOf(nodes[j].getNodeId())) { if(position < 0 || ++n == position) { if (contextId != Expression.IGNORE_CONTEXT + && contextId != Expression.NO_CONTEXT_ID && nodes[j].getContext() != null && reference.getContext() != null && nodes[j].getContext().getContextId() == reference.getContext().getContextId()) { @@ -846,6 +847,7 @@ public NodeSet selectPreceding(final NodeSet pl, final int position, if(!reference.getNodeId().isDescendantOf(nodes[j].getNodeId())) { if(position < 0 || ++n == position) { if (contextId != Expression.IGNORE_CONTEXT + && contextId != Expression.NO_CONTEXT_ID && nodes[j].getContext() != null && reference.getContext() != null && nodes[j].getContext().getContextId() == reference.getContext().getContextId()) { diff --git a/exist-core/src/main/java/org/exist/dom/persistent/SortedNodeSet.java b/exist-core/src/main/java/org/exist/dom/persistent/SortedNodeSet.java index 88ecfb38641..a0a87afe9dd 100644 --- a/exist-core/src/main/java/org/exist/dom/persistent/SortedNodeSet.java +++ b/exist-core/src/main/java/org/exist/dom/persistent/SortedNodeSet.java @@ -86,20 +86,28 @@ public void addAll(final NodeSet other) { try(final DBBroker broker = pool.get(Optional.ofNullable(user))) { final XQueryContext context = new XQueryContext(pool); - final XQueryLexer lexer = new XQueryLexer(context, new StringReader(sortExpr)); - final XQueryParser parser = new XQueryParser(lexer); - final XQueryTreeParser treeParser = new XQueryTreeParser(context); - parser.xpath(); - if(parser.foundErrors()) { - //TODO : error ? - LOG.debug(parser.getErrorMessage()); - } - final AST ast = parser.getAST(); - LOG.debug("generated AST: {}", ast.toStringTree()); - final PathExpr expr = new PathExpr(context); - treeParser.xpath(ast, expr); - if(treeParser.foundErrors()) { - LOG.debug(treeParser.getErrorMessage()); + final PathExpr expr; + if (org.exist.xquery.XQuery.useRdParser()) { + final org.exist.xquery.parser.next.XQueryParser rdParser = + new org.exist.xquery.parser.next.XQueryParser(context, sortExpr); + final Expression rootExpr = rdParser.parse(); + expr = rootExpr instanceof PathExpr ? (PathExpr) rootExpr : new PathExpr(context); + if (!(rootExpr instanceof PathExpr)) { expr.add(rootExpr); } + } else { + expr = new PathExpr(context); + final XQueryLexer lexer = new XQueryLexer(context, new StringReader(sortExpr)); + final XQueryParser parser = new XQueryParser(lexer); + final XQueryTreeParser treeParser = new XQueryTreeParser(context); + parser.xpath(); + if (parser.foundErrors()) { + LOG.debug(parser.getErrorMessage()); + } + final AST ast = parser.getAST(); + LOG.debug("generated AST: {}", ast.toStringTree()); + treeParser.xpath(ast, expr); + if (treeParser.foundErrors()) { + LOG.debug(treeParser.getErrorMessage()); + } } expr.analyze(new AnalyzeContextInfo()); for(final SequenceIterator i = other.iterate(); i.hasNext(); ) { diff --git a/exist-core/src/main/java/org/exist/http/restxq/AnnotationParser.java b/exist-core/src/main/java/org/exist/http/restxq/AnnotationParser.java new file mode 100644 index 00000000000..b9129ed8977 --- /dev/null +++ b/exist-core/src/main/java/org/exist/http/restxq/AnnotationParser.java @@ -0,0 +1,644 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.http.restxq; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.exist.dom.QName; +import org.exist.xquery.*; +import org.exist.xquery.value.FunctionParameterSequenceType; +import org.exist.xquery.value.SequenceType; + +import java.util.*; + +/** + * Parses RESTXQ annotations ({@code %rest:*}, {@code %output:*}) from + * compiled XQuery function signatures and produces {@link Route} objects. + * + *

This replaces the EXQuery library's annotation processing with a + * native implementation that works directly with eXist's type system.

+ */ +public class AnnotationParser { + + private static final Logger LOG = LogManager.getLogger(AnnotationParser.class); + + private static final Set HTTP_METHOD_ANNOTATIONS = Set.of( + "GET", "POST", "PUT", "DELETE", "PATCH", "HEAD", "OPTIONS" + ); + + /** + * Result of parsing a module: both path routes and error handlers. + */ + public static class ParseResult { + public final List routes; + public final List errorRoutes; + + public ParseResult(List routes, List errorRoutes) { + this.routes = routes; + this.errorRoutes = errorRoutes; + } + } + + /** + * Inspects all local functions in the given compiled XQuery and returns + * Routes for any functions that have RESTXQ annotations. + * + * @param compiled the compiled XQuery + * @param moduleUri the database URI of the XQuery module + * @return list of routes found (may be empty) + */ + public static List parseModule(final CompiledXQuery compiled, final String moduleUri) + throws RestXqAnnotationException { + return parseModuleFull(compiled, moduleUri).routes; + } + + /** + * Inspects all local functions and returns both path routes and error handlers. + */ + public static ParseResult parseModuleFull(final CompiledXQuery compiled, final String moduleUri) + throws RestXqAnnotationException { + final List routes = new ArrayList<>(); + final List errorRoutes = new ArrayList<>(); + final Iterator functions = compiled.getContext().localFunctions(); + + while (functions.hasNext()) { + final UserDefinedFunction function = functions.next(); + final Route route = parseFunction(function, moduleUri); + if (route != null) { + routes.add(route); + LOG.debug("Registered RESTXQ route: {}", route); + } + final ErrorRoute errorRoute = parseErrorFunction(function, moduleUri); + if (errorRoute != null) { + // Check for duplicate error handlers in the same module + for (final ErrorRoute existing : errorRoutes) { + for (final ErrorRoute.ErrorCode newCode : errorRoute.getErrorCodes()) { + for (final ErrorRoute.ErrorCode existingCode : existing.getErrorCodes()) { + if (newCode.toString().equals(existingCode.toString())) { + throw new RestXqAnnotationException( + "Duplicate error handler for " + newCode + + " in module " + moduleUri); + } + } + } + } + errorRoutes.add(errorRoute); + LOG.debug("Registered RESTXQ error handler: {}", errorRoute.getFunctionName()); + } + } + + return new ParseResult(routes, errorRoutes); + } + + private static final Set KNOWN_OUTPUT_PARAMS = Set.of( + "method", "media-type", "encoding", "indent", "omit-xml-declaration", + "standalone", "version", "cdata-section-elements", "doctype-public", + "doctype-system", "byte-order-mark", "escape-uri-attributes", + "include-content-type", "normalization-form", "suppress-indentation", + "undeclare-prefixes", "use-character-maps", "html-version", + "item-separator", "json-node-output-method" + ); + + private static final Set KNOWN_REST_ANNOTATIONS = Set.of( + "path", "GET", "POST", "PUT", "DELETE", "PATCH", "HEAD", "OPTIONS", + "method", "consumes", "produces", + "query-param", "form-param", "header-param", "cookie-param", + "error", "error-param", "single" + ); + + /** + * Parses annotations from a single function. Returns null if the + * function has no RESTXQ annotations. Throws on invalid annotations. + */ + static Route parseFunction(final UserDefinedFunction function, final String moduleUri) + throws RestXqAnnotationException { + final Annotation[] annotations = function.getSignature().getAnnotations(); + if (annotations == null || annotations.length == 0) { + return null; + } + + // Check if this function has any RESTXQ annotations at all + boolean hasRestAnnotation = false; + boolean hasOutputAnnotation = false; + boolean hasInputAnnotation = false; + for (final Annotation a : annotations) { + final String ns = a.getName().getNamespaceURI(); + if (RestXqNamespaces.REST_NS.equals(ns)) { + hasRestAnnotation = true; + } else if (RestXqNamespaces.OUTPUT_NS.equals(ns)) { + hasOutputAnnotation = true; + } else if (RestXqNamespaces.INPUT_NS.equals(ns)) { + hasInputAnnotation = true; + } + } + if (!hasRestAnnotation && !hasOutputAnnotation && !hasInputAnnotation) { + return null; + } + + String pathTemplate = null; + int pathAnnotationCount = 0; + final Set methods = new LinkedHashSet<>(); + final Set rawMethodAnnotations = new LinkedHashSet<>(); + boolean hasMethodAnnotation = false; + final Properties outputProperties = new Properties(); + final List consumes = new ArrayList<>(); + final List produces = new ArrayList<>(); + final Map queryParams = new LinkedHashMap<>(); + final Map formParams = new LinkedHashMap<>(); + final Map headerParams = new LinkedHashMap<>(); + final Map cookieParams = new LinkedHashMap<>(); + String bodyVariable = null; + final Properties inputOptions = new Properties(); + final String funcName = function.getSignature().getName().getLocalPart(); + + for (final Annotation annotation : annotations) { + final QName name = annotation.getName(); + final String ns = name.getNamespaceURI(); + final String local = name.getLocalPart(); + final LiteralValue[] values = annotation.getValue(); + + if (RestXqNamespaces.REST_NS.equals(ns)) { + // Validate known annotation names + if (!"error".equals(local) && !"error-param".equals(local) + && !KNOWN_REST_ANNOTATIONS.contains(local)) { + throw new RestXqAnnotationException( + "Unknown RESTXQ annotation %rest:" + local + + " on function " + funcName); + } + + if ("path".equals(local)) { + pathAnnotationCount++; + if (pathAnnotationCount > 1) { + throw new RestXqAnnotationException( + "Duplicate %rest:path annotation on function " + funcName); + } + if (values.length == 0) { + throw new RestXqAnnotationException( + "%rest:path requires a path argument on function " + funcName); + } + if (values.length > 1) { + throw new RestXqAnnotationException( + "%rest:path must have exactly one argument on function " + funcName); + } + pathTemplate = getLiteralString(values, 0); + } else if (HTTP_METHOD_ANNOTATIONS.contains(local.toUpperCase(Locale.ROOT))) { + final String methodUpper = local.toUpperCase(Locale.ROOT); + if (!rawMethodAnnotations.add(local)) { + throw new RestXqAnnotationException( + "Duplicate %rest:" + local + " annotation on function " + funcName); + } + hasMethodAnnotation = true; + if (!methods.add(methodUpper)) { + throw new RestXqAnnotationException( + "Duplicate method " + methodUpper + " on function " + funcName); + } + if (values.length > 0) { + bodyVariable = extractVariableName(getLiteralString(values, 0)); + } + } else if ("method".equals(local)) { + hasMethodAnnotation = true; + if (values.length > 0) { + final String methodName = getLiteralString(values, 0).toUpperCase(Locale.ROOT); + // Check for duplicate method names (across all annotation types) + if (!methods.add(methodName)) { + throw new RestXqAnnotationException( + "Duplicate method " + methodName + " on function " + funcName); + } + } + if (values.length > 1) { + bodyVariable = extractVariableName(getLiteralString(values, 1)); + } + } else if ("consumes".equals(local)) { + for (final LiteralValue v : values) { + consumes.add(literalToString(v)); + } + } else if ("produces".equals(local)) { + for (final LiteralValue v : values) { + produces.add(literalToString(v)); + } + } else if ("query-param".equals(local)) { + parseParamBinding(values, queryParams); + } else if ("form-param".equals(local)) { + parseParamBinding(values, formParams); + } else if ("header-param".equals(local)) { + parseParamBinding(values, headerParams); + } else if ("cookie-param".equals(local)) { + parseParamBinding(values, cookieParams); + } + } else if (RestXqNamespaces.OUTPUT_NS.equals(ns)) { + if (values.length == 0) { + throw new RestXqAnnotationException( + "%output:" + local + " requires a value on function " + funcName); + } + if (values.length > 1) { + throw new RestXqAnnotationException( + "%output:" + local + " must have exactly one value on function " + funcName); + } + // Validate known serialization parameter names + if (!KNOWN_OUTPUT_PARAMS.contains(local)) { + throw new RestXqAnnotationException( + "Unknown serialization parameter %output:" + local + " on function " + funcName); + } + outputProperties.setProperty(local, getLiteralString(values, 0)); + } else if (RestXqNamespaces.INPUT_NS.equals(ns)) { + // %input:json('lax=no'), %input:csv('header=yes'), %input:html('nons=true') + // Parse key=value pairs from annotation values and store as input.type.key=value + for (final LiteralValue v : values) { + final String optStr = literalToString(v); + if (optStr != null) { + parseInputOptions(local, optStr, inputOptions); + } + } + } + } + + // If function has %rest:GET (or other method) but no %rest:path, it's an error + if (hasRestAnnotation && pathTemplate == null) { + // Only error if there's a method annotation without path — pure error handlers are OK + if (hasMethodAnnotation) { + throw new RestXqAnnotationException( + "Function " + funcName + " has HTTP method annotation but no %rest:path"); + } + return null; + } + + if (pathTemplate == null) { + return null; + } + + // Check for %rest:method conflicts with explicit method annotations + for (final String m : methods) { + if (rawMethodAnnotations.contains(m) || rawMethodAnnotations.contains(m.toLowerCase(Locale.ROOT))) { + // Already handled above in the method parsing + } + } + + // If no explicit HTTP method annotation, default to GET + if (methods.isEmpty()) { + methods.add("GET"); + } + + // Validate: GET, HEAD, DELETE, OPTIONS should not have body variable + if (bodyVariable != null) { + final Set noBodyMethods = Set.of("GET", "HEAD", "DELETE", "OPTIONS"); + for (final String m : methods) { + if (noBodyMethods.contains(m)) { + throw new RestXqAnnotationException( + "HTTP method " + m + " must not have a body variable on function " + funcName); + } + } + } + + // Parse and validate the path template + final PathMatcher pathMatcher = PathMatcher.parse(pathTemplate); + + // Validate template variables against function parameters + final SequenceType[] argTypes = function.getSignature().getArgumentTypes(); + final int arity = argTypes != null ? argTypes.length : 0; + final List templateVars = pathMatcher.getVarNames(); + + // Collect all declared variable names from annotations + final Set annotationVars = new LinkedHashSet<>(templateVars); + for (final Route.ParamBinding b : queryParams.values()) { + annotationVars.add(b.getVariableName()); + } + for (final Route.ParamBinding b : formParams.values()) { + annotationVars.add(b.getVariableName()); + } + for (final Route.ParamBinding b : headerParams.values()) { + annotationVars.add(b.getVariableName()); + } + for (final Route.ParamBinding b : cookieParams.values()) { + annotationVars.add(b.getVariableName()); + } + if (bodyVariable != null) { + annotationVars.add(bodyVariable); + } + + // Check that each template variable has a corresponding function parameter + if (argTypes != null) { + final Set paramNames = new LinkedHashSet<>(); + for (final SequenceType st : argTypes) { + if (st instanceof FunctionParameterSequenceType fpst) { + paramNames.add(fpst.getAttributeName()); + } + } + + for (final String tv : templateVars) { + if (!paramNames.contains(tv)) { + throw new RestXqAnnotationException( + "Path template variable {$" + tv + "} has no corresponding function parameter " + + "on function " + funcName); + } + } + + // Check that every function parameter is bound by some annotation + for (final String pn : paramNames) { + if (!annotationVars.contains(pn)) { + throw new RestXqAnnotationException( + "Function parameter $" + pn + " is not bound by any annotation " + + "on function " + funcName); + } + } + + // Check that every annotation variable has a corresponding function parameter + for (final String av : annotationVars) { + if (!paramNames.contains(av)) { + throw new RestXqAnnotationException( + "Annotation variable $" + av + " has no corresponding function parameter " + + "on function " + funcName); + } + } + } else if (!templateVars.isEmpty()) { + throw new RestXqAnnotationException( + "Path template has variables but function " + funcName + " has no parameters"); + } + + return new Route( + moduleUri, + function.getSignature().getName(), + arity, + pathMatcher, + Collections.unmodifiableSet(methods), + outputProperties, + Collections.unmodifiableList(consumes), + Collections.unmodifiableList(produces), + Collections.unmodifiableMap(queryParams), + Collections.unmodifiableMap(formParams), + Collections.unmodifiableMap(headerParams), + Collections.unmodifiableMap(cookieParams), + bodyVariable, + inputOptions + ); + } + + /** + * Parses a %rest:*-param annotation: ("paramName", "{$varName}", default?) + */ + private static void parseParamBinding(final LiteralValue[] values, + final Map target) + throws RestXqAnnotationException { + if (values.length < 2) { + return; + } + // First arg must be a string (the external parameter name) + final String paramName = getLiteralString(values, 0); + if (paramName == null) { + throw new RestXqAnnotationException("Parameter name must be a string"); + } + // Validate first arg is string type (not integer etc.) + try { + if (values[0].getValue().getType() != org.exist.xquery.value.Type.STRING) { + throw new RestXqAnnotationException( + "Parameter name must be a string, got: " + values[0].getValue().getStringValue()); + } + } catch (final XPathException e) { + // ignore type check failures + } + + // Second arg must use {$var} template syntax + final String varTemplate = getLiteralString(values, 1); + if (varTemplate == null || !varTemplate.contains("{") || !varTemplate.contains("$")) { + throw new RestXqAnnotationException( + "Parameter variable must use {$var} template syntax, got: " + varTemplate); + } + final String varName = extractVariableName(varTemplate); + if (varName == null) { + throw new RestXqAnnotationException( + "Invalid variable template: " + varTemplate); + } + + // Check for duplicate param bindings + if (target.containsKey(paramName)) { + throw new RestXqAnnotationException( + "Duplicate parameter binding for '" + paramName + "'"); + } + + final List defaults = new ArrayList<>(); + for (int i = 2; i < values.length; i++) { + final String dv = getLiteralString(values, i); + if (dv != null) { + defaults.add(dv); + } + } + target.put(paramName, new Route.ParamBinding(paramName, varName, defaults)); + } + + /** + * Extracts a variable name from "{$varName}" syntax. + * Returns the name without the $ prefix and curly braces. + */ + static String extractVariableName(final String spec) { + if (spec == null) { + return null; + } + String s = spec.trim(); + if (s.startsWith("{") && s.endsWith("}")) { + s = s.substring(1, s.length() - 1).trim(); + } + if (s.startsWith("$")) { + s = s.substring(1); + } + return s.isEmpty() ? null : s; + } + + private static String getLiteralString(final LiteralValue[] values, final int index) { + if (index >= values.length) { + return null; + } + return literalToString(values[index]); + } + + /** + * Parses input option strings like "lax=no" or "header=yes" from + * %input:json, %input:csv, %input:html annotations. + * Stores as "input.{type}.{key}={value}" in the options Properties. + */ + private static void parseInputOptions(final String type, final String optStr, + final Properties options) { + // Option format: "key=value" or "key=value,key2=value2" + for (final String part : optStr.split(",")) { + final String trimmed = part.trim(); + final int eqIdx = trimmed.indexOf('='); + if (eqIdx > 0) { + final String key = trimmed.substring(0, eqIdx).trim(); + final String value = trimmed.substring(eqIdx + 1).trim(); + options.setProperty("input." + type + "." + key, value); + } + } + } + + private static String literalToString(final LiteralValue value) { + try { + return value.getValue().getStringValue(); + } catch (final XPathException e) { + LOG.warn("Failed to get string value from annotation literal", e); + return null; + } + } + + /** + * Parses %rest:error annotations from a function. + * Returns null if the function has no %rest:error annotation. + */ + static ErrorRoute parseErrorFunction(final UserDefinedFunction function, final String moduleUri) + throws RestXqAnnotationException { + final Annotation[] annotations = function.getSignature().getAnnotations(); + if (annotations == null || annotations.length == 0) { + return null; + } + + final List errorCodes = new ArrayList<>(); + final Map errorParams = new LinkedHashMap<>(); + + for (final Annotation annotation : annotations) { + final QName name = annotation.getName(); + if (!RestXqNamespaces.REST_NS.equals(name.getNamespaceURI())) { + continue; + } + final String local = name.getLocalPart(); + final LiteralValue[] values = annotation.getValue(); + + if ("error".equals(local)) { + for (final LiteralValue value : values) { + final String codeStr = literalToString(value); + if (codeStr != null) { + final ErrorRoute.ErrorCode code = parseErrorCode(codeStr, function); + if (code == null) { + throw new RestXqAnnotationException( + "Invalid error code: " + codeStr); + } + // Check for duplicate error codes + for (final ErrorRoute.ErrorCode existing : errorCodes) { + if (existing.toString().equals(code.toString())) { + throw new RestXqAnnotationException( + "Duplicate error code: " + codeStr); + } + } + errorCodes.add(code); + } + } + } else if ("error-param".equals(local)) { + parseParamBinding(values, errorParams); + } + } + + if (errorCodes.isEmpty()) { + return null; + } + + final SequenceType[] argTypes = function.getSignature().getArgumentTypes(); + final int arity = argTypes != null ? argTypes.length : 0; + + return new ErrorRoute(moduleUri, function.getSignature().getName(), arity, + errorCodes, errorParams); + } + + /** + * Parses an error code pattern string into an ErrorCode. + * Supports: "*", "prefix:*", "*:local", "prefix:local", "Q{uri}local", "Q{uri}*" + */ + private static ErrorRoute.ErrorCode parseErrorCode(final String codeStr, + final UserDefinedFunction function) + throws RestXqAnnotationException { + if ("*".equals(codeStr)) { + return new ErrorRoute.ErrorCode(ErrorRoute.MatchType.CATCH_ALL, null, null); + } + + // Q{uri}local or Q{uri}* + if (codeStr.startsWith("Q{")) { + final int closeBrace = codeStr.indexOf('}'); + if (closeBrace > 2) { + final String uri = codeStr.substring(2, closeBrace); + final String localPart = codeStr.substring(closeBrace + 1); + if (localPart.isEmpty()) { + throw new RestXqAnnotationException( + "Invalid EQName in %rest:error — missing local part: " + codeStr); + } + if ("*".equals(localPart)) { + return new ErrorRoute.ErrorCode(ErrorRoute.MatchType.NAMESPACE_WILD, uri, null); + } else { + validateNCName(localPart, codeStr); + return new ErrorRoute.ErrorCode(ErrorRoute.MatchType.EXACT, uri, localPart); + } + } + throw new RestXqAnnotationException( + "Invalid EQName syntax in %rest:error: " + codeStr); + } + + // prefix:* or *:local or prefix:local + final int colonIdx = codeStr.indexOf(':'); + if (colonIdx > 0) { + final String prefix = codeStr.substring(0, colonIdx); + final String localPart = codeStr.substring(colonIdx + 1); + + if ("*".equals(prefix)) { + // *:local + validateNCName(localPart, codeStr); + return new ErrorRoute.ErrorCode(ErrorRoute.MatchType.LOCAL_WILD, null, localPart); + } else if ("*".equals(localPart)) { + // prefix:* — resolve prefix to namespace URI (use prefix as fallback) + final String nsUri = resolvePrefix(prefix, function); + return new ErrorRoute.ErrorCode(ErrorRoute.MatchType.NAMESPACE_WILD, + (nsUri != null && !nsUri.isEmpty()) ? nsUri : prefix, null); + } else { + // prefix:local — resolve prefix to namespace URI + validateNCName(localPart, codeStr); + final String nsUri = resolvePrefix(prefix, function); + return new ErrorRoute.ErrorCode(ErrorRoute.MatchType.EXACT, + (nsUri != null && !nsUri.isEmpty()) ? nsUri : prefix, localPart); + } + } + + // Bare name — no namespace, validate as NCName + validateNCName(codeStr, codeStr); + return new ErrorRoute.ErrorCode(ErrorRoute.MatchType.EXACT, "", codeStr); + } + + /** + * Validates that a string is a valid XML NCName (no colons, no spaces, + * starts with letter or underscore). + */ + private static void validateNCName(final String name, final String context) + throws RestXqAnnotationException { + if (name == null || name.isEmpty()) { + throw new RestXqAnnotationException( + "Empty name in %rest:error: " + context); + } + if (name.contains(" ")) { + throw new RestXqAnnotationException( + "Invalid name (contains spaces) in %rest:error: " + context); + } + final char first = name.charAt(0); + if (!Character.isLetter(first) && first != '_') { + throw new RestXqAnnotationException( + "Invalid name (must start with letter or _) in %rest:error: " + context); + } + } + + /** + * Resolves a namespace prefix using the function's XQuery context. + */ + private static String resolvePrefix(final String prefix, final UserDefinedFunction function) { + return function.getContext().getURIForPrefix(prefix); + } +} diff --git a/exist-core/src/main/java/org/exist/http/restxq/CachingHttpServletRequest.java b/exist-core/src/main/java/org/exist/http/restxq/CachingHttpServletRequest.java new file mode 100644 index 00000000000..453aab359f6 --- /dev/null +++ b/exist-core/src/main/java/org/exist/http/restxq/CachingHttpServletRequest.java @@ -0,0 +1,85 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.http.restxq; + +import jakarta.servlet.ReadListener; +import jakarta.servlet.ServletInputStream; +import jakarta.servlet.http.HttpServletRequest; +import jakarta.servlet.http.HttpServletRequestWrapper; + +import java.io.ByteArrayInputStream; +import java.io.IOException; + +/** + * HttpServletRequest wrapper that caches the request body so it can be + * read multiple times. Used for RESTXQ server-side forwards where the + * POST/PUT body must be preserved across route dispatches. + */ +class CachingHttpServletRequest extends HttpServletRequestWrapper { + + private byte[] cachedBody; + + CachingHttpServletRequest(final HttpServletRequest request) { + super(request); + } + + @Override + public ServletInputStream getInputStream() throws IOException { + if (cachedBody == null) { + cachedBody = super.getInputStream().readAllBytes(); + } + return new CachedServletInputStream(cachedBody); + } + + private static class CachedServletInputStream extends ServletInputStream { + private final ByteArrayInputStream delegate; + + CachedServletInputStream(final byte[] data) { + this.delegate = new ByteArrayInputStream(data); + } + + @Override + public int read() { + return delegate.read(); + } + + @Override + public int read(final byte[] b, final int off, final int len) { + return delegate.read(b, off, len); + } + + @Override + public boolean isFinished() { + return delegate.available() == 0; + } + + @Override + public boolean isReady() { + return true; + } + + @Override + public void setReadListener(final ReadListener readListener) { + // not supported for cached streams + } + } +} diff --git a/exist-core/src/main/java/org/exist/http/restxq/ErrorRoute.java b/exist-core/src/main/java/org/exist/http/restxq/ErrorRoute.java new file mode 100644 index 00000000000..4ce35dc5767 --- /dev/null +++ b/exist-core/src/main/java/org/exist/http/restxq/ErrorRoute.java @@ -0,0 +1,125 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.http.restxq; + +import org.exist.dom.QName; + +import java.util.*; + +/** + * Represents a RESTXQ error handler function annotated with {@code %rest:error}. + * + *

Error handlers match XQuery errors by QName with four precedence levels:

+ *
    + *
  1. Exact QName: {@code %rest:error('err:FORG0001')}
  2. + *
  3. Namespace wildcard: {@code %rest:error('err:*')}
  4. + *
  5. Local-name wildcard: {@code %rest:error('*:FORG0001')}
  6. + *
  7. Catch-all: {@code %rest:error('*')}
  8. + *
+ */ +public class ErrorRoute { + + /** Precedence: exact > namespace > local > catch-all */ + public enum MatchType { + EXACT(0), + NAMESPACE_WILD(1), + LOCAL_WILD(2), + CATCH_ALL(3); + + final int priority; + MatchType(int priority) { this.priority = priority; } + } + + /** An error code pattern from a %rest:error annotation. */ + public static class ErrorCode { + private final MatchType matchType; + private final String namespaceURI; // null for catch-all and local-wild + private final String localName; // null for catch-all and namespace-wild + + public ErrorCode(MatchType matchType, String namespaceURI, String localName) { + this.matchType = matchType; + this.namespaceURI = namespaceURI; + this.localName = localName; + } + + public MatchType getMatchType() { return matchType; } + public String getNamespaceURI() { return namespaceURI; } + public String getLocalName() { return localName; } + + public boolean matches(final QName errorQName) { + return switch (matchType) { + case CATCH_ALL -> true; + case EXACT -> localName.equals(errorQName.getLocalPart()) + && Objects.equals(namespaceURI, errorQName.getNamespaceURI()); + case NAMESPACE_WILD -> Objects.equals(namespaceURI, errorQName.getNamespaceURI()); + case LOCAL_WILD -> localName.equals(errorQName.getLocalPart()); + }; + } + + @Override + public String toString() { + return switch (matchType) { + case CATCH_ALL -> "*"; + case EXACT -> (namespaceURI != null ? "Q{" + namespaceURI + "}" : "") + localName; + case NAMESPACE_WILD -> "Q{" + namespaceURI + "}*"; + case LOCAL_WILD -> "*:" + localName; + }; + } + } + + private final String moduleUri; + private final QName functionName; + private final int arity; + private final List errorCodes; + private final Map errorParams; + + public ErrorRoute(final String moduleUri, final QName functionName, final int arity, + final List errorCodes, + final Map errorParams) { + this.moduleUri = moduleUri; + this.functionName = functionName; + this.arity = arity; + this.errorCodes = errorCodes; + this.errorParams = errorParams; + } + + public String getModuleUri() { return moduleUri; } + public QName getFunctionName() { return functionName; } + public int getArity() { return arity; } + public List getErrorCodes() { return errorCodes; } + public Map getErrorParams() { return errorParams; } + + /** + * Returns the best matching error code for the given QName, or null. + */ + public ErrorCode bestMatch(final QName errorQName) { + ErrorCode best = null; + for (final ErrorCode code : errorCodes) { + if (code.matches(errorQName)) { + if (best == null || code.matchType.priority < best.matchType.priority) { + best = code; + } + } + } + return best; + } +} diff --git a/exist-core/src/main/java/org/exist/http/restxq/NativeRestXqServlet.java b/exist-core/src/main/java/org/exist/http/restxq/NativeRestXqServlet.java new file mode 100644 index 00000000000..67cf8cbb260 --- /dev/null +++ b/exist-core/src/main/java/org/exist/http/restxq/NativeRestXqServlet.java @@ -0,0 +1,540 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.http.restxq; + +import jakarta.servlet.ServletConfig; +import jakarta.servlet.ServletException; +import jakarta.servlet.http.HttpServletRequest; +import jakarta.servlet.http.HttpServletResponse; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.exist.EXistException; +import org.exist.http.servlets.AbstractExistHttpServlet; +import org.exist.security.EffectiveSubject; +import org.exist.security.Permission; +import org.exist.security.PermissionDeniedException; +import org.exist.security.Subject; +import org.exist.source.DBSource; +import org.exist.storage.DBBroker; +import org.exist.storage.ProcessMonitor; +import org.exist.dom.persistent.BinaryDocument; +import org.exist.xmldb.XmldbURI; +import org.exist.http.restxq.xquery.WebFunctions; +import org.exist.xquery.*; +import org.exist.xquery.value.*; + +import java.io.IOException; +import java.util.*; + + + +/** + * Native RESTXQ servlet that dispatches HTTP requests to XQuery functions + * annotated with {@code %rest:*} annotations. + * + *

This servlet replaces the old {@code RestXqServlet} from the EXQuery + * extension, eliminating the 10-JAR EXQuery library dependency and the + * adapter layer between EXQuery and eXist types.

+ * + *

Servlet Configuration

+ *

Add to web.xml:

+ *
{@code
+ * 
+ *     NativeRestXqServlet
+ *     org.exist.http.restxq.NativeRestXqServlet
+ *     
+ *         scan-root
+ *         /db/apps
+ *     
+ * 
+ * }
+ */ +public class NativeRestXqServlet extends AbstractExistHttpServlet { + + private static final long serialVersionUID = 1L; + private static final Logger LOG = LogManager.getLogger(NativeRestXqServlet.class); + + /** Default database path to scan for RESTXQ modules. */ + private static final String DEFAULT_SCAN_ROOT = "/db/apps"; + + /** Init parameter for the scan root collection path. */ + private static final String PARAM_SCAN_ROOT = "scan-root"; + + /** Init parameter to scan at startup (default true). */ + private static final String PARAM_SCAN_ON_STARTUP = "scan-on-startup"; + + private RouteRegistry registry; + + @Override + public Logger getLog() { + return LOG; + } + + @Override + public void init(final ServletConfig config) throws ServletException { + super.init(config); + + final String scanRoot = Optional.ofNullable(config.getInitParameter(PARAM_SCAN_ROOT)) + .orElse(DEFAULT_SCAN_ROOT); + + registry = new RouteRegistry(getPool(), scanRoot); + + final boolean scanOnStartup = !"false".equalsIgnoreCase( + config.getInitParameter(PARAM_SCAN_ON_STARTUP)); + + if (scanOnStartup) { + try (final DBBroker broker = getPool().get(Optional.empty())) { + LOG.info("NativeRestXqServlet: pre-scanning RESTXQ modules at startup"); + registry.fullScan(broker); + } catch (final EXistException e) { + LOG.warn("Failed to pre-scan RESTXQ modules at startup: {}", e.getMessage()); + } + } + + LOG.info("NativeRestXqServlet initialized; scan-root={}, scan-on-startup={}", + scanRoot, scanOnStartup); + } + + @Override + protected void service(final HttpServletRequest request, + final HttpServletResponse response) + throws ServletException, IOException { + + // Authenticate + final Subject user = authenticate(request, response); + if (user == null) { + return; // Authentication challenge sent + } + + // Wrap request to cache body for potential forward dispatch + final HttpServletRequest wrappedRequest = + hasBody(request) ? new CachingHttpServletRequest(request) : request; + + // Handle /.init — cache invalidation endpoint + final String pathInfo = getRestXqPath(wrappedRequest); + if ("/.init".equals(pathInfo)) { + registry.invalidate(); + response.setStatus(HttpServletResponse.SC_NO_CONTENT); + return; + } + + final long startTime = System.nanoTime(); + + try (final DBBroker broker = getPool().get(Optional.of(user))) { + + // Ensure the route registry is initialized + registry.ensureInitialized(broker); + + // Find matching route + final String method = wrappedRequest.getMethod().toUpperCase(Locale.ROOT); + Route route = registry.findRoute( + method, pathInfo, + wrappedRequest.getContentType(), + wrappedRequest.getHeader("Accept")); + + boolean headFromGet = false; + + // Auto-handle HEAD: if no explicit HEAD route, try GET + if (route == null && "HEAD".equals(method)) { + route = registry.findRoute("GET", pathInfo, + wrappedRequest.getContentType(), + wrappedRequest.getHeader("Accept")); + if (route != null) { + headFromGet = true; + } + } + + // Auto-handle OPTIONS: if no explicit OPTIONS route, return Allow header + if (route == null && "OPTIONS".equals(method)) { + final Set allowed = registry.allowedMethods(pathInfo); + if (!allowed.isEmpty()) { + allowed.add("OPTIONS"); + allowed.add("HEAD"); + response.setHeader("Allow", String.join(", ", allowed)); + response.setStatus(HttpServletResponse.SC_OK); + return; + } + } + + if (route == null) { + // If modules failed and there are no working routes at all, + // return 500 (the user likely just uploaded a broken module) + final Map failures = registry.getFailedModules(); + if (!failures.isEmpty() && registry.getRouteCount() == 0 + && registry.getModuleCount() == 0) { + final String firstError = failures.values().iterator().next(); + response.sendError(HttpServletResponse.SC_INTERNAL_SERVER_ERROR, + "RESTXQ module error: " + firstError); + return; + } + // Check if path matches with different method → 405 + final Set allowed = registry.allowedMethods(pathInfo, + wrappedRequest.getContentType(), wrappedRequest.getHeader("Accept")); + if (!allowed.isEmpty()) { + response.setHeader("Allow", String.join(", ", allowed)); + response.sendError(HttpServletResponse.SC_METHOD_NOT_ALLOWED); + } else { + response.sendError(HttpServletResponse.SC_NOT_FOUND); + } + return; + } + + if (headFromGet) { + executeRoute(broker, route, wrappedRequest, response, true); + } else { + executeRoute(broker, route, wrappedRequest, response, false); + } + + // Add Server-Timing header + final long durationMs = (System.nanoTime() - startTime) / 1_000_000; + response.addHeader("Server-Timing", "total;dur=" + durationMs); + + } catch (final EXistException e) { + LOG.error("Database error processing RESTXQ request", e); + response.sendError(HttpServletResponse.SC_INTERNAL_SERVER_ERROR, + "Database error: " + e.getMessage()); + } + } + + /** + * Executes the matched route's XQuery function and writes the result + * to the HTTP response. + */ + private void executeRoute(final DBBroker broker, final Route route, + final HttpServletRequest request, + final HttpServletResponse response, + final boolean headOnly) throws IOException { + + CompiledXQuery xquery = null; + ProcessMonitor processMonitor = null; + + try { + // Compile or retrieve the XQuery module + final XmldbURI moduleUri = XmldbURI.create(route.getModuleUri()); + final BinaryDocument binDoc = (BinaryDocument) broker.getResource(moduleUri, + Permission.READ | Permission.EXECUTE); + if (binDoc == null) { + response.sendError(HttpServletResponse.SC_NOT_FOUND, + "RESTXQ module not found: " + route.getModuleUri()); + return; + } + + final DBSource source = new DBSource(getPool(), binDoc, true); + final XQuery xqueryService = getPool().getXQueryService(); + final XQueryContext context = new XQueryContext(getPool()); + + context.setModuleLoadPath(XmldbURI.EMBEDDED_SERVER_URI_PREFIX + + moduleUri.removeLastSegment().toString()); + + xquery = xqueryService.compile(context, source); + + // Check eXist security annotations (%auth:*) before execution + final String authDenial = SecurityAnnotationHandler.checkAccess( + broker.getCurrentSubject(), route, xquery); + if (authDenial != null) { + response.sendError(HttpServletResponse.SC_FORBIDDEN, authDenial); + return; + } + + // Resolve the function + final UserDefinedFunction fn = context.resolveFunction( + route.getFunctionName(), route.getArity()); + if (fn == null) { + response.sendError(HttpServletResponse.SC_INTERNAL_SERVER_ERROR, + "RESTXQ function not found: " + route.getFunctionName() + + "#" + route.getArity()); + return; + } + + // Evaluate global variable declarations (workaround for context.reset()) + final Expression rootExpr = context.getRootExpression(); + for (int i = 0; i < rootExpr.getSubExpressionCount(); i++) { + final Expression subExpr = rootExpr.getSubExpression(i); + if (subExpr instanceof VariableDeclaration) { + subExpr.eval(null, null); + } + } + + // Set up process monitoring + processMonitor = broker.getBrokerPool().getProcessMonitor(); + context.getProfiler().traceQueryStart(); + processMonitor.queryStarted(context.getWatchDog()); + + // Bind parameters + final String restxqPath = getRestXqPath(request); + final SequenceType[] argTypes = fn.getSignature().getArgumentTypes(); + final Sequence[] args = ParameterBinder.bind( + context, route, request, restxqPath, argTypes); + + // Execute the function + try (final FunctionReference fnRef = new FunctionReference( + new FunctionCall(context, fn))) { + + fnRef.analyze(new AnalyzeContextInfo()); + + // Handle setUid/setGid + final Optional effectiveSubject = getEffectiveSubject(xquery); + try { + effectiveSubject.ifPresent(broker::pushSubject); + + final Sequence result = fnRef.evalFunction(null, null, args); + + // Check if this is an explicit HEAD route (not auto-from-GET) + final boolean isExplicitHead = route.getMethods().contains("HEAD") + && "HEAD".equals(request.getMethod().toUpperCase(Locale.ROOT)); + + if (isExplicitHead) { + // Explicit HEAD handler: must return rest:response element + if (result.isEmpty()) { + throw new XPathException((Expression) null, + "HEAD handler must return a rest:response element, got empty sequence"); + } + final Item firstItem = result.itemAt(0); + if (!Type.subTypeOf(firstItem.getType(), Type.ELEMENT)) { + throw new XPathException((Expression) null, + "HEAD handler must return a rest:response element"); + } + final org.w3c.dom.Node node = ((NodeValue) firstItem).getNode(); + if (!"response".equals(node.getLocalName()) + || !RestXqNamespaces.REST_NS.equals(node.getNamespaceURI())) { + throw new XPathException((Expression) null, + "HEAD handler must return a rest:response element, got: " + + node.getLocalName()); + } + // HEAD handler: 200 OK, no body + response.setStatus(HttpServletResponse.SC_OK); + } else if (!headOnly) { + // Normal route execution + if (!response.isCommitted()) { + response.setStatus(HttpServletResponse.SC_OK); + } + ResponseWriter.write(broker, route, result, response); + } else { + // Auto-HEAD from GET: set status and headers but skip body + response.setStatus(HttpServletResponse.SC_OK); + if (response.getContentType() == null) { + response.setContentType(route.getResponseContentType()); + } + } + + } finally { + effectiveSubject.ifPresent(es -> broker.popSubject()); + } + } + + } catch (final WebFunctions.WebErrorException e) { + // web:error() — return clean HTTP error, no stack trace + if (!response.isCommitted()) { + response.sendError(e.getHttpStatusCode(), e.getDetailMessage()); + } + } catch (final XPathException e) { + // Try to find a matching error handler + final org.exist.dom.QName errorQName = e.getErrorCode() != null + ? e.getErrorCode().getErrorQName() + : new org.exist.dom.QName("FOER0000", "http://www.w3.org/2005/xqt-errors", "err"); + final ErrorRoute errorHandler = registry.findErrorHandler(errorQName); + if (errorHandler != null && !response.isCommitted()) { + try { + executeErrorHandler(broker, errorHandler, e, response); + return; + } catch (final Exception ex) { + LOG.error("Error executing RESTXQ error handler", ex); + } + } + LOG.error("XQuery error executing RESTXQ function {}: {}", + route.getFunctionName(), e.getMessage()); + if (!response.isCommitted()) { + response.sendError(HttpServletResponse.SC_INTERNAL_SERVER_ERROR, + "XQuery error: " + e.getMessage()); + } + } catch (final RestXqForwardException e) { + // Server-side forward: dispatch to the target route + final String forwardPath = "/" + e.getForwardPath(); + final Route forwardRoute = registry.findRoute( + request.getMethod().toUpperCase(Locale.ROOT), forwardPath, + request.getContentType(), request.getHeader("Accept")); + if (forwardRoute != null && !response.isCommitted()) { + executeRoute(broker, forwardRoute, request, response, headOnly); + } else if (!response.isCommitted()) { + response.sendError(HttpServletResponse.SC_NOT_FOUND, + "Forward target not found: " + forwardPath); + } + } catch (final PermissionDeniedException e) { + if (!response.isCommitted()) { + response.sendError(HttpServletResponse.SC_FORBIDDEN, e.getMessage()); + } + } catch (final Exception e) { + LOG.error("Unexpected error executing RESTXQ function", e); + if (!response.isCommitted()) { + response.sendError(HttpServletResponse.SC_INTERNAL_SERVER_ERROR, + e.getMessage()); + } + } finally { + if (processMonitor != null && xquery != null) { + xquery.getContext().getProfiler().traceQueryEnd(xquery.getContext()); + processMonitor.queryCompleted(xquery.getContext().getWatchDog()); + } + } + } + + /** + * Executes a RESTXQ error handler function, binding error parameters. + */ + private void executeErrorHandler(final DBBroker broker, final ErrorRoute errorHandler, + final XPathException error, + final HttpServletResponse response) throws Exception { + + final XmldbURI moduleUri = XmldbURI.create(errorHandler.getModuleUri()); + final BinaryDocument binDoc = (BinaryDocument) broker.getResource(moduleUri, + Permission.READ | Permission.EXECUTE); + if (binDoc == null) { + throw new IOException("Error handler module not found: " + errorHandler.getModuleUri()); + } + + final DBSource source = new DBSource(getPool(), binDoc, true); + final XQuery xqueryService = getPool().getXQueryService(); + final XQueryContext context = new XQueryContext(getPool()); + context.setModuleLoadPath(XmldbURI.EMBEDDED_SERVER_URI_PREFIX + + moduleUri.removeLastSegment().toString()); + + final CompiledXQuery xquery = xqueryService.compile(context, source); + final UserDefinedFunction fn = context.resolveFunction( + errorHandler.getFunctionName(), errorHandler.getArity()); + + // Evaluate global variables + final Expression rootExpr = context.getRootExpression(); + for (int i = 0; i < rootExpr.getSubExpressionCount(); i++) { + final Expression subExpr = rootExpr.getSubExpression(i); + if (subExpr instanceof VariableDeclaration) { + subExpr.eval(null, null); + } + } + + // Bind error parameters + final SequenceType[] argTypes = fn.getSignature().getArgumentTypes(); + final Sequence[] args = new Sequence[argTypes != null ? argTypes.length : 0]; + + // Build error param bindings + final Map errorBindings = new LinkedHashMap<>(); + final org.exist.dom.QName errorQName = error.getErrorCode() != null + ? error.getErrorCode().getErrorQName() + : new org.exist.dom.QName("FOER0000", "http://www.w3.org/2005/xqt-errors", "err"); + errorBindings.put("code", new StringValue("#" + + (errorQName.getPrefix() != null && !errorQName.getPrefix().isEmpty() + ? errorQName.getPrefix() + ":" : "") + + errorQName.getLocalPart())); + errorBindings.put("description", new StringValue( + error.getDetailMessage() != null ? error.getDetailMessage() : "")); + errorBindings.put("module", new StringValue( + error.getSource() != null ? error.getSource().path() : "")); + errorBindings.put("line-number", new IntegerValue(error.getLine())); + errorBindings.put("column-number", new IntegerValue(error.getColumn())); + if (error.getErrorVal() != null) { + errorBindings.put("value", error.getErrorVal()); + } + + for (final Map.Entry entry : errorHandler.getErrorParams().entrySet()) { + final String varName = entry.getValue().getVariableName(); + final String paramName = entry.getValue().getParamName(); + final Sequence val = errorBindings.get(paramName); + if (val != null) { + errorBindings.put(varName, val); + } + } + + // Map to function args + if (argTypes != null) { + for (int i = 0; i < argTypes.length; i++) { + final FunctionParameterSequenceType paramType = (FunctionParameterSequenceType) argTypes[i]; + final Sequence val = errorBindings.get(paramType.getAttributeName()); + args[i] = val != null ? val : Sequence.EMPTY_SEQUENCE; + } + } + + try (final FunctionReference fnRef = new FunctionReference(new FunctionCall(context, fn))) { + fnRef.analyze(new AnalyzeContextInfo()); + final Sequence result = fnRef.evalFunction(null, null, args); + + response.setStatus(HttpServletResponse.SC_OK); + // Use a minimal route for serialization + final Route dummyRoute = new Route(errorHandler.getModuleUri(), + errorHandler.getFunctionName(), errorHandler.getArity(), + PathMatcher.parse("/"), Set.of("GET"), new java.util.Properties(), + List.of(), List.of(), + Map.of(), Map.of(), Map.of(), Map.of(), null, new java.util.Properties()); + ResponseWriter.write(broker, dummyRoute, result, response); + } + } + + /** + * Extracts the RESTXQ-relevant path from the request. + * Strips the servlet context path and any prefix like "/apps". + */ + private String getRestXqPath(final HttpServletRequest request) { + String path = request.getPathInfo(); + if (path == null) { + path = request.getServletPath(); + } + if (path == null || path.isEmpty()) { + path = "/"; + } + return path; + } + + /** + * If the compiled XQuery is setUid and/or setGid, returns the + * EffectiveSubject to use for execution. + */ + private Optional getEffectiveSubject(final CompiledXQuery xquery) { + final org.exist.source.Source src = xquery.getContext().getSource(); + if (src instanceof DBSource dbSrc) { + final Permission perm = dbSrc.getPermissions(); + if (perm.isSetUid()) { + if (perm.isSetGid()) { + return Optional.of(new EffectiveSubject(perm.getOwner(), perm.getGroup())); + } else { + return Optional.of(new EffectiveSubject(perm.getOwner())); + } + } else if (perm.isSetGid()) { + return Optional.of(new EffectiveSubject( + xquery.getContext().getBroker().getCurrentSubject(), perm.getGroup())); + } + } + return Optional.empty(); + } + + /** + * Returns the route registry, for use by XQuery modules like + * rest:resource-functions() and rest:init(). + */ + public RouteRegistry getRouteRegistry() { + return registry; + } + + /** + * Returns true if the request method typically carries a body. + */ + private static boolean hasBody(final HttpServletRequest request) { + final String method = request.getMethod().toUpperCase(Locale.ROOT); + return "POST".equals(method) || "PUT".equals(method) || "PATCH".equals(method); + } +} diff --git a/exist-core/src/main/java/org/exist/http/restxq/ParameterBinder.java b/exist-core/src/main/java/org/exist/http/restxq/ParameterBinder.java new file mode 100644 index 00000000000..a5839918cbf --- /dev/null +++ b/exist-core/src/main/java/org/exist/http/restxq/ParameterBinder.java @@ -0,0 +1,499 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.http.restxq; + +import jakarta.servlet.http.Cookie; +import jakarta.servlet.http.HttpServletRequest; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.exist.dom.memtree.SAXAdapter; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.value.*; + +import org.xml.sax.InputSource; +import org.xml.sax.XMLReader; + +import javax.xml.parsers.SAXParserFactory; +import java.io.IOException; +import java.io.InputStream; +import java.util.*; + +/** + * Binds HTTP request data to XQuery function parameters based on + * RESTXQ annotations. Handles path variables, query parameters, + * form parameters, header parameters, cookie parameters, and request body. + * + *

Parameter values are automatically cast to the declared XQuery + * function parameter types where possible.

+ */ +public class ParameterBinder { + + private static final Logger LOG = LogManager.getLogger(ParameterBinder.class); + + /** + * Binds all available request data to function arguments according to + * the route's parameter annotations. + * + * @param context the XQuery context + * @param route the matched route + * @param request the HTTP request + * @param requestPath the matched request path (after prefix stripping) + * @param argTypes the function's declared parameter types + * @return array of Sequence values to pass as function arguments + */ + public static Sequence[] bind(final XQueryContext context, + final Route route, + final HttpServletRequest request, + final String requestPath, + final SequenceType[] argTypes) throws XPathException { + + if (argTypes == null || argTypes.length == 0) { + return new Sequence[0]; + } + + // Build a map of variable name → value from all sources + final Map bindings = new LinkedHashMap<>(); + + // 1. Path template variables + final Map pathVars = route.getPathMatcher().extractVariables(requestPath); + for (final Map.Entry entry : pathVars.entrySet()) { + bindings.put(entry.getKey(), new StringValue(entry.getValue())); + } + + // 2. Query parameters + bindParams(route.getQueryParams(), request.getParameterMap(), bindings); + + // 3. Form parameters (only for POST with form content type) + if ("POST".equalsIgnoreCase(request.getMethod()) + && request.getContentType() != null + && request.getContentType().startsWith("application/x-www-form-urlencoded")) { + bindParams(route.getFormParams(), request.getParameterMap(), bindings); + } + + // 4. Header parameters + for (final Map.Entry entry : route.getHeaderParams().entrySet()) { + final String headerName = entry.getValue().getParamName(); + final String varName = entry.getValue().getVariableName(); + final String headerValue = request.getHeader(headerName); + if (headerValue != null) { + bindings.put(varName, new StringValue(headerValue)); + } else if (entry.getValue().getDefaultValue() != null) { + bindings.put(varName, new StringValue(entry.getValue().getDefaultValue())); + } + } + + // 5. Cookie parameters + for (final Map.Entry entry : route.getCookieParams().entrySet()) { + final String cookieName = entry.getValue().getParamName(); + final String varName = entry.getValue().getVariableName(); + String cookieValue = null; + final Cookie[] cookies = request.getCookies(); + if (cookies != null) { + for (final Cookie cookie : cookies) { + if (cookieName.equals(cookie.getName())) { + cookieValue = cookie.getValue(); + break; + } + } + } + if (cookieValue != null) { + bindings.put(varName, new StringValue(cookieValue)); + } else if (entry.getValue().getDefaultValue() != null) { + bindings.put(varName, new StringValue(entry.getValue().getDefaultValue())); + } + } + + // 6. Request body (for POST/PUT/PATCH with body variable binding) + if (route.getBodyVariable() != null) { + try { + final Sequence bodyValue = readRequestBody(context, request, route); + if (bodyValue != null) { + bindings.put(route.getBodyVariable(), bodyValue); + } + } catch (final IOException e) { + throw new XPathException((org.exist.xquery.Expression) null, + "Failed to read request body: " + e.getMessage()); + } + } + + // Map bindings to function argument positions + final Sequence[] args = new Sequence[argTypes.length]; + for (int i = 0; i < argTypes.length; i++) { + final FunctionParameterSequenceType paramType = (FunctionParameterSequenceType) argTypes[i]; + final String paramName = paramType.getAttributeName(); + + final Sequence value = bindings.get(paramName); + if (value != null) { + args[i] = castValue(value, paramType.getPrimaryType()); + } else { + args[i] = Sequence.EMPTY_SEQUENCE; + } + } + + return args; + } + + /** + * Binds named parameters from the request parameter map using the + * %rest:*-param annotation bindings. + */ + private static void bindParams(final Map paramBindings, + final Map requestParams, + final Map bindings) { + for (final Map.Entry entry : paramBindings.entrySet()) { + final Route.ParamBinding binding = entry.getValue(); + final String[] values = requestParams.get(binding.getParamName()); + if (values != null && values.length > 0) { + if (values.length == 1) { + bindings.put(binding.getVariableName(), new UntypedAtomicValue(values[0])); + } else { + final ValueSequence seq = new ValueSequence(); + for (final String v : values) { + seq.add(new UntypedAtomicValue(v)); + } + bindings.put(binding.getVariableName(), seq); + } + } else if (!binding.getDefaultValues().isEmpty()) { + final java.util.List defaults = binding.getDefaultValues(); + if (defaults.size() == 1) { + bindings.put(binding.getVariableName(), new UntypedAtomicValue(defaults.get(0))); + } else { + final ValueSequence seq = new ValueSequence(); + for (final String dv : defaults) { + seq.add(new UntypedAtomicValue(dv)); + } + bindings.put(binding.getVariableName(), seq); + } + } + } + } + + /** + * Reads the request body and returns an appropriate XQuery value + * based on the Content-Type. + */ + private static Sequence readRequestBody(final XQueryContext context, + final HttpServletRequest request, + final Route route) + throws IOException, XPathException { + final String contentType = request.getContentType(); + if (contentType == null) { + return null; + } + + final String baseType = contentType.contains(";") + ? contentType.substring(0, contentType.indexOf(';')).trim() + : contentType.trim(); + + // Extract content-type parameters (e.g., "lax=false" from "application/json;lax=false") + final java.util.Properties ctParams = parseContentTypeParams(contentType); + + try (final InputStream is = request.getInputStream()) { + if ("application/xml".equals(baseType) || "text/xml".equals(baseType) + || baseType.endsWith("+xml")) { + return parseXmlBody(context, is); + } else if ("application/json".equals(baseType) || baseType.endsWith("+json")) { + final String encoding = request.getCharacterEncoding() != null + ? request.getCharacterEncoding() : "UTF-8"; + final String jsonStr = new String(is.readAllBytes(), encoding); + // Determine lax mode: %input:json annotation > content-type param > default (no) + final boolean lax = resolveJsonLax(route, ctParams); + return parseJsonToXml(context, jsonStr, lax); + } else if ("text/csv".equals(baseType)) { + final String encoding = request.getCharacterEncoding() != null + ? request.getCharacterEncoding() : "UTF-8"; + final String csvStr = new String(is.readAllBytes(), encoding); + // Determine header mode: %input:csv annotation > content-type param > default (no) + final boolean header = resolveCsvHeader(route, ctParams); + return parseCsvToXml(context, csvStr, header); + } else if (baseType.startsWith("text/")) { + return new StringValue(new String(is.readAllBytes(), request.getCharacterEncoding() != null + ? request.getCharacterEncoding() : "UTF-8")); + } else { + // Binary body + return BinaryValueFromInputStream.getInstance(context, + new Base64BinaryValueType(), is, null); + } + } + } + + private static Sequence parseXmlBody(final XQueryContext context, + final InputStream is) throws XPathException { + try { + final SAXParserFactory factory = SAXParserFactory.newInstance(); + factory.setNamespaceAware(true); + final XMLReader reader = factory.newSAXParser().getXMLReader(); + final SAXAdapter adapter = new SAXAdapter(context); + reader.setContentHandler(adapter); + reader.parse(new InputSource(is)); + return adapter.getDocument(); + } catch (final Exception e) { + throw new XPathException((org.exist.xquery.Expression) null, + "Failed to parse XML request body: " + e.getMessage()); + } + } + + /** + * Casts a string value to the target XQuery type if needed. + */ + private static Sequence castValue(final Sequence value, final int targetType) throws XPathException { + if (targetType == Type.ITEM || targetType == Type.STRING || targetType == Type.ANY_TYPE) { + return value; + } + + // If it's already the right type, return as-is + if (value.hasOne()) { + final Item item = value.itemAt(0); + if (item.getType() == targetType || Type.subTypeOf(item.getType(), targetType)) { + return value; + } + // Try automatic casting + if (item instanceof AtomicValue) { + return ((AtomicValue) item).convertTo(targetType); + } + } + + return value; + } + + /** + * Parses content-type parameters (everything after the semicolon). + * E.g., "application/json;lax=false" → {"lax": "false"} + */ + private static java.util.Properties parseContentTypeParams(final String contentType) { + final java.util.Properties params = new java.util.Properties(); + if (contentType == null || !contentType.contains(";")) { + return params; + } + final String paramPart = contentType.substring(contentType.indexOf(';') + 1); + for (final String part : paramPart.split(";")) { + final String trimmed = part.trim(); + final int eqIdx = trimmed.indexOf('='); + if (eqIdx > 0) { + params.setProperty( + trimmed.substring(0, eqIdx).trim().toLowerCase(java.util.Locale.ROOT), + trimmed.substring(eqIdx + 1).trim()); + } + } + return params; + } + + /** + * Resolves JSON lax mode from %input:json annotation, content-type params, or default. + */ + private static boolean resolveJsonLax(final Route route, final java.util.Properties ctParams) { + // 1. Check %input:json('lax=...') annotation + final String annotationLax = route.getInputOptions().getProperty("input.json.lax"); + if (annotationLax != null) { + return "yes".equalsIgnoreCase(annotationLax) || "true".equalsIgnoreCase(annotationLax); + } + // 2. Check content-type parameter (e.g., application/json;lax=yes) + final String ctLax = ctParams.getProperty("lax"); + if (ctLax != null) { + return "yes".equalsIgnoreCase(ctLax) || "true".equalsIgnoreCase(ctLax); + } + // 3. Default: lax=no (strict mode — underscores doubled) + return false; + } + + /** + * Resolves CSV header mode from %input:csv annotation, content-type params, or default. + */ + private static boolean resolveCsvHeader(final Route route, final java.util.Properties ctParams) { + // 1. Check %input:csv('header=...') annotation + final String annotationHeader = route.getInputOptions().getProperty("input.csv.header"); + if (annotationHeader != null) { + return "yes".equalsIgnoreCase(annotationHeader) || "true".equalsIgnoreCase(annotationHeader); + } + // 2. Check content-type parameter (e.g., text/csv;header=yes) + final String ctHeader = ctParams.getProperty("header"); + if (ctHeader != null) { + return "yes".equalsIgnoreCase(ctHeader) || "true".equalsIgnoreCase(ctHeader); + } + // 3. Default: header=no + return false; + } + + /** + * Parses JSON string to XML using the BaseX-compatible "direct" format. + * JSON keys become element names. With lax=false (default), characters + * invalid in XML names are escaped by doubling underscores. + */ + private static Sequence parseJsonToXml(final XQueryContext context, + final String json, final boolean lax) + throws XPathException { + try { + context.pushDocumentContext(); + final org.exist.dom.memtree.MemTreeBuilder builder = context.getDocumentBuilder(); + builder.startDocument(); + + final com.fasterxml.jackson.core.JsonFactory factory = new com.fasterxml.jackson.core.JsonFactory(); + try (final com.fasterxml.jackson.core.JsonParser parser = factory.createParser(json)) { + parser.nextToken(); // Move to first token + jsonTokenToXml(builder, "json", parser, lax); + } + + builder.endDocument(); + return builder.getDocument(); + } catch (final IOException e) { + throw new XPathException((org.exist.xquery.Expression) null, + "Failed to parse JSON body: " + e.getMessage()); + } finally { + context.popDocumentContext(); + } + } + + private static void jsonTokenToXml( + final org.exist.dom.memtree.MemTreeBuilder builder, + final String name, + final com.fasterxml.jackson.core.JsonParser parser, + final boolean lax) throws IOException, XPathException { + final String xmlName = lax ? name : escapeJsonName(name); + final org.exist.dom.QName qname = qname(xmlName); + + final com.fasterxml.jackson.core.JsonToken token = parser.currentToken(); + if (token == com.fasterxml.jackson.core.JsonToken.START_OBJECT) { + builder.startElement(qname, null); + while (parser.nextToken() != com.fasterxml.jackson.core.JsonToken.END_OBJECT) { + final String fieldName = parser.currentName(); + parser.nextToken(); + jsonTokenToXml(builder, fieldName, parser, lax); + } + builder.endElement(); + } else if (token == com.fasterxml.jackson.core.JsonToken.START_ARRAY) { + builder.startElement(qname, null); + while (parser.nextToken() != com.fasterxml.jackson.core.JsonToken.END_ARRAY) { + jsonTokenToXml(builder, "_", parser, lax); + } + builder.endElement(); + } else if (token == com.fasterxml.jackson.core.JsonToken.VALUE_STRING) { + builder.startElement(qname, null); + final String text = parser.getText(); + if (!text.isEmpty()) { + builder.characters(text); + } + builder.endElement(); + } else if (token == com.fasterxml.jackson.core.JsonToken.VALUE_NUMBER_INT + || token == com.fasterxml.jackson.core.JsonToken.VALUE_NUMBER_FLOAT) { + builder.startElement(qname, null); + builder.characters(parser.getText()); + builder.endElement(); + } else if (token == com.fasterxml.jackson.core.JsonToken.VALUE_TRUE + || token == com.fasterxml.jackson.core.JsonToken.VALUE_FALSE) { + builder.startElement(qname, null); + builder.characters(parser.getText()); + builder.endElement(); + } else { + // null + builder.startElement(qname, null); + builder.endElement(); + } + } + + /** + * Escapes a JSON key for use as an XML element name (non-lax mode). + * Underscores are doubled, other invalid chars replaced with underscore+hex. + */ + private static String escapeJsonName(final String name) { + final StringBuilder result = new StringBuilder(name.length()); + for (int i = 0; i < name.length(); i++) { + final char c = name.charAt(i); + if (c == '_') { + result.append("__"); + } else if (i == 0 && !Character.isLetter(c) && c != '_') { + result.append('_').append(String.format("%04x", (int) c)); + } else if (!Character.isLetterOrDigit(c) && c != '_' && c != '-' && c != '.') { + result.append('_').append(String.format("%04x", (int) c)); + } else { + result.append(c); + } + } + return result.toString(); + } + + /** + * Parses CSV string to XML using the BaseX-compatible format. + * With header=true, first row values become element names. + * With header=false, values are wrapped in generic <entry> elements. + */ + private static Sequence parseCsvToXml(final XQueryContext context, + final String csv, final boolean header) + throws XPathException { + context.pushDocumentContext(); + try { + final org.exist.dom.memtree.MemTreeBuilder builder = context.getDocumentBuilder(); + builder.startDocument(); + builder.startElement(qname("csv"), null); + + final String[] lines = csv.split("\r?\n"); + String[] headers = null; + int startLine = 0; + + if (header && lines.length > 0) { + headers = parseCsvLine(lines[0]); + startLine = 1; + } + + for (int i = startLine; i < lines.length; i++) { + if (lines[i].trim().isEmpty()) { + continue; + } + builder.startElement(qname("record"), null); + final String[] fields = parseCsvLine(lines[i]); + for (int f = 0; f < fields.length; f++) { + final String elemName = (headers != null && f < headers.length) + ? headers[f] : "entry"; + builder.startElement(qname(elemName), null); + builder.characters(fields[f]); + builder.endElement(); + } + builder.endElement(); + } + + builder.endElement(); // csv + builder.endDocument(); + return builder.getDocument(); + } finally { + context.popDocumentContext(); + } + } + + /** + * Simple CSV line parser. Handles basic comma-separated values. + */ + private static String[] parseCsvLine(final String line) { + return line.split(",", -1); + } + + /** + * Creates a QName, wrapping the checked exception. + */ + private static org.exist.dom.QName qname(final String localName) throws XPathException { + try { + return new org.exist.dom.QName(localName); + } catch (final org.exist.dom.QName.IllegalQNameException e) { + throw new XPathException((org.exist.xquery.Expression) null, + "Invalid element name: " + localName); + } + } +} diff --git a/exist-core/src/main/java/org/exist/http/restxq/PathMatcher.java b/exist-core/src/main/java/org/exist/http/restxq/PathMatcher.java new file mode 100644 index 00000000000..b33b86173ff --- /dev/null +++ b/exist-core/src/main/java/org/exist/http/restxq/PathMatcher.java @@ -0,0 +1,299 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.http.restxq; + +import java.math.BigInteger; +import java.util.*; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * Matches HTTP request paths against RESTXQ path templates and extracts + * template variable values. + * + *

Supports three kinds of path segments:

+ *
    + *
  • Literal segments: {@code /users/list}
  • + *
  • Template variables: {@code /users/{$id}}
  • + *
  • Regex-constrained variables: {@code /users/{$id=[0-9]+}}
  • + *
+ * + *

Implements {@link Comparable} for specificity-based route precedence: + * more segments wins; at equal segment count, literal segments beat + * template variables.

+ * + *

The path matching approach follows the BaseX model for cross-engine + * RESTXQ compatibility.

+ */ +public class PathMatcher implements Comparable { + + private final String template; + private final Pattern pattern; + private final List varNames; + private final int segmentCount; + private final BigInteger templatePositions; + + private PathMatcher(final String template, final Pattern pattern, + final List varNames, final int segmentCount, + final BigInteger templatePositions) { + this.template = template; + this.pattern = pattern; + this.varNames = varNames; + this.segmentCount = segmentCount; + this.templatePositions = templatePositions; + } + + /** + * Parses a RESTXQ path template into a PathMatcher. + * + * @param pathTemplate the path template string, e.g. "/users/{$id=[0-9]+}/posts" + * @return a compiled PathMatcher + * @throws IllegalArgumentException if the template is malformed + */ + public static PathMatcher parse(final String pathTemplate) { + if (pathTemplate == null) { + throw new IllegalArgumentException("Path template must not be null"); + } + + // Empty path or "/" both match the root + if (pathTemplate.isEmpty() || "/".equals(pathTemplate)) { + final Pattern rootPattern = Pattern.compile("^/?$"); + return new PathMatcher(pathTemplate, rootPattern, + Collections.emptyList(), 0, BigInteger.ZERO); + } + + final List varNames = new ArrayList<>(); + final StringBuilder regex = new StringBuilder(); + int segmentCount = 0; + BigInteger templatePositions = BigInteger.ZERO; + + // Ensure leading slash + final String path = pathTemplate.startsWith("/") ? pathTemplate : "/" + pathTemplate; + + int i = 0; + while (i < path.length()) { + final char c = path.charAt(i); + + if (c == '{') { + // Template variable + final int close = path.indexOf('}', i); + if (close == -1) { + throw new IllegalArgumentException( + "Unclosed template variable at position " + i + " in: " + pathTemplate); + } + + final String rawVarSpec = path.substring(i + 1, close); + + // Must start with $ (RESTXQ variable syntax) + if (!rawVarSpec.startsWith("$")) { + throw new IllegalArgumentException( + "Template variable must start with $ in: " + pathTemplate); + } + + String varSpec = rawVarSpec.substring(1); + + // Validate no spaces in variable spec + if (varSpec.contains(" ")) { + throw new IllegalArgumentException( + "Template variable must not contain spaces in: " + pathTemplate); + } + + String varName; + String varRegex; + + final int eqIdx = varSpec.indexOf('='); + if (eqIdx >= 0) { + // Regex-constrained variable: {$id=[0-9]+} + varName = varSpec.substring(0, eqIdx); + varRegex = varSpec.substring(eqIdx + 1); + } else { + varName = varSpec; + varRegex = "[^/]+?"; + } + + // Validate variable name (XQuery QName rules: no colons within local part, + // but allow namespace prefix like m:x) + if (varName.isEmpty()) { + throw new IllegalArgumentException( + "Empty variable name in template: " + pathTemplate); + } + if (varName.contains("::") || varName.contains(" ")) { + throw new IllegalArgumentException( + "Invalid variable name '" + varName + "' in template: " + pathTemplate); + } + + // Check for duplicate variable names + if (varNames.contains(varName)) { + throw new IllegalArgumentException( + "Duplicate variable {$" + varName + "} in template: " + pathTemplate); + } + + varNames.add(varName); + regex.append('(').append(varRegex).append(')'); + + // Mark this segment as a template + templatePositions = templatePositions.setBit(segmentCount > 0 ? segmentCount - 1 : 0); + + i = close + 1; + } else if (c == '/') { + regex.append('/'); + segmentCount++; + i++; + } else { + // Literal segment — accumulate, decode, and escape for regex + int j = i; + while (j < path.length() && path.charAt(j) != '/' && path.charAt(j) != '{') { + j++; + } + final String literal = path.substring(i, j); + // URL-decode the literal segment (e.g., %7b → {, %20 → space, + → space) + final String decoded = decodePath(literal); + regex.append(Pattern.quote(decoded)); + i = j; + } + } + + final Pattern compiled = Pattern.compile("^" + regex + "$"); + return new PathMatcher(pathTemplate, compiled, varNames, segmentCount, templatePositions); + } + + /** + * Tests whether the given request path matches this template. + * + * @param requestPath the request path (must start with "/") + * @return true if the path matches + */ + public boolean matches(final String requestPath) { + return pattern.matcher(requestPath).matches(); + } + + /** + * Extracts template variable values from a matching request path. + * + * @param requestPath the request path + * @return map of variable name to captured value, or empty map if no match + */ + public Map extractVariables(final String requestPath) { + final Matcher m = pattern.matcher(requestPath); + if (!m.matches()) { + return Collections.emptyMap(); + } + + final Map result = new LinkedHashMap<>(); + for (int g = 0; g < varNames.size(); g++) { + result.put(varNames.get(g), m.group(g + 1)); + } + return result; + } + + /** + * Returns the original path template string. + */ + public String getTemplate() { + return template; + } + + /** + * Returns the variable names declared in the template, in order. + */ + public List getVarNames() { + return Collections.unmodifiableList(varNames); + } + + /** + * Returns the number of path segments (separated by '/'). + */ + public int getSegmentCount() { + return segmentCount; + } + + /** + * Specificity comparison for route precedence. + * + *

More segments = more specific. At equal segment count, + * literal segments beat template variables (checked left-to-right).

+ */ + @Override + public int compareTo(final PathMatcher other) { + // More segments = more specific (sort first) + final int segDiff = other.segmentCount - this.segmentCount; + if (segDiff != 0) { + return segDiff; + } + + // Same segment count: compare segment-by-segment + // A template position (bit set) is LESS specific than a literal (bit unset) + for (int s = 0; s < this.segmentCount; s++) { + final boolean thisIsTemplate = this.templatePositions.testBit(s); + final boolean otherIsTemplate = other.templatePositions.testBit(s); + if (thisIsTemplate != otherIsTemplate) { + // literal (not template) is more specific → sort first + return thisIsTemplate ? 1 : -1; + } + } + + return 0; + } + + @Override + public String toString() { + return template; + } + + /** + * URL-decodes a path string, converting percent-encoded characters and + * {@code +} to space. Throws IllegalArgumentException for invalid encodings. + */ + static String decodePath(final String path) { + if (path == null || path.isEmpty()) { + return path; + } + // Check for invalid percent encoding and decode only percent sequences + // (NOT +, which is literal in URI paths — only means space in query strings) + final StringBuilder result = new StringBuilder(path.length()); + for (int i = 0; i < path.length(); i++) { + final char c = path.charAt(i); + if (c == '%') { + if (i + 2 >= path.length()) { + throw new IllegalArgumentException("Invalid percent encoding in path: " + path); + } + final char c1 = path.charAt(i + 1); + final char c2 = path.charAt(i + 2); + if (!isHexDigit(c1) || !isHexDigit(c2)) { + throw new IllegalArgumentException("Invalid percent encoding in path: " + path); + } + result.append((char) Integer.parseInt(path.substring(i + 1, i + 3), 16)); + i += 2; + } else if (c == '+') { + // In RESTXQ path templates, + means space (following URL convention) + result.append(' '); + } else { + result.append(c); + } + } + return result.toString(); + } + + private static boolean isHexDigit(final char c) { + return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'); + } +} diff --git a/exist-core/src/main/java/org/exist/http/restxq/ResponseWriter.java b/exist-core/src/main/java/org/exist/http/restxq/ResponseWriter.java new file mode 100644 index 00000000000..7f651c3dbd7 --- /dev/null +++ b/exist-core/src/main/java/org/exist/http/restxq/ResponseWriter.java @@ -0,0 +1,342 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.http.restxq; + +import jakarta.servlet.http.HttpServletResponse; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.exist.storage.DBBroker; +import org.exist.util.serializer.XQuerySerializer; +import org.exist.xquery.XPathException; +import org.exist.xquery.value.*; +import org.w3c.dom.Element; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; +import org.xml.sax.SAXException; + +import java.io.IOException; +import java.io.OutputStream; +import java.io.OutputStreamWriter; +import java.io.Writer; +import java.nio.charset.StandardCharsets; +import java.util.Properties; +import java.util.Set; + +/** + * Serializes XQuery function results to HTTP responses, handling the + * RESTXQ response protocol including {@code } elements, + * {@code }, and binary output. + */ +public class ResponseWriter { + + private static final Logger LOG = LogManager.getLogger(ResponseWriter.class); + + /** + * Writes the XQuery result sequence to the HTTP response, applying + * serialization properties from the route's %output:* annotations. + * + *

Handles these special cases:

+ *
    + *
  • {@code } — sets custom status code and headers
  • + *
  • {@code } — server-side forward (not yet implemented)
  • + *
  • Binary results — written directly to output stream
  • + *
  • Node/atomic results — serialized via XQuerySerializer
  • + *
+ */ + public static void write(final DBBroker broker, final Route route, + final Sequence result, + final HttpServletResponse response) throws IOException { + + final Properties outputProperties = new Properties(route.getOutputProperties()); + + // Check first item for rest:response or rest:forward + Sequence bodySequence = result; + boolean statusExplicitlySet = false; + + if (result.getItemCount() > 0) { + try { + final Item firstItem = result.itemAt(0); + if (isRestResponseElement(firstItem)) { + statusExplicitlySet = processRestResponse((NodeValue) firstItem, response, outputProperties); + // Body is everything after the rest:response element + if (result.getItemCount() > 1) { + final ValueSequence remaining = new ValueSequence(); + for (int i = 1; i < result.getItemCount(); i++) { + remaining.add(result.itemAt(i)); + } + bodySequence = remaining; + } else { + bodySequence = Sequence.EMPTY_SEQUENCE; + } + } else if (isRestForwardElement(firstItem)) { + // Return the forward path — the servlet handles internal dispatch + final String forwardPath = firstItem.getStringValue().trim(); + throw new RestXqForwardException(forwardPath); + } + } catch (final XPathException e) { + LOG.error("Error processing RESTXQ response", e); + response.sendError(HttpServletResponse.SC_INTERNAL_SERVER_ERROR, e.getMessage()); + return; + } + } + + if (bodySequence.isEmpty()) { + if (!response.isCommitted() && !statusExplicitlySet) { + response.setStatus(HttpServletResponse.SC_NO_CONTENT); + } + return; + } + + // Set Content-Type if not already set by rest:response + if (response.getContentType() == null) { + response.setContentType(route.getResponseContentType()); + } + + // Serialize the body + try { + serializeSequence(broker, bodySequence, outputProperties, response); + } catch (final XPathException | SAXException e) { + LOG.error("Error serializing RESTXQ response", e); + if (!response.isCommitted()) { + response.sendError(HttpServletResponse.SC_INTERNAL_SERVER_ERROR, + "Serialization error: " + e.getMessage()); + } + } + } + + private static boolean isRestResponseElement(final Item item) throws XPathException { + if (Type.subTypeOf(item.getType(), Type.ELEMENT)) { + final NodeValue node = (NodeValue) item; + final Node n = node.getNode(); + return "response".equals(n.getLocalName()) + && RestXqNamespaces.REST_NS.equals(n.getNamespaceURI()); + } + return false; + } + + private static boolean isRestForwardElement(final Item item) throws XPathException { + if (Type.subTypeOf(item.getType(), Type.ELEMENT)) { + final NodeValue node = (NodeValue) item; + final Node n = node.getNode(); + return "forward".equals(n.getLocalName()) + && RestXqNamespaces.REST_NS.equals(n.getNamespaceURI()); + } + return false; + } + + /** + * Validates and processes a {@code } element, extracting + * HTTP status, headers, and serialization parameters. + * + * @return true if an HTTP status was explicitly set + * @throws IOException if the rest:response element has invalid structure + */ + private static boolean processRestResponse(final NodeValue responseNode, + final HttpServletResponse response, + final Properties outputProperties) + throws IOException { + final Node node = responseNode.getNode(); + + // Validate: no unknown attributes on rest:response + final org.w3c.dom.NamedNodeMap attrs = node.getAttributes(); + if (attrs != null) { + for (int a = 0; a < attrs.getLength(); a++) { + final Node attr = attrs.item(a); + final String attrNs = attr.getNamespaceURI(); + // Allow xmlns declarations, reject anything else + if (attrNs == null || !attrNs.equals("http://www.w3.org/2000/xmlns/")) { + final String attrName = attr.getLocalName() != null ? attr.getLocalName() : attr.getNodeName(); + if (!"xmlns".equals(attrName) && !attrName.startsWith("xmlns:")) { + throw new IOException( + "Invalid attribute '" + attrName + "' on rest:response element"); + } + } + } + } + + final NodeList children = node.getChildNodes(); + boolean statusSet = false; + + for (int i = 0; i < children.getLength(); i++) { + final Node child = children.item(i); + + // Validate: no text content in rest:response + if (child.getNodeType() == Node.TEXT_NODE) { + final String text = child.getTextContent(); + if (text != null && !text.trim().isEmpty()) { + throw new IOException( + "rest:response must not contain text content"); + } + continue; + } + + if (child.getNodeType() != Node.ELEMENT_NODE) { + continue; + } + + final String childNs = child.getNamespaceURI(); + final String childLocal = child.getLocalName(); + + if (RestXqNamespaces.HTTP_NS.equals(childNs) && "response".equals(childLocal)) { + statusSet = processHttpResponse((Element) child, response); + } else if (RestXqNamespaces.OUTPUT_NS.equals(childNs) + && "serialization-parameters".equals(childLocal)) { + processSerializationParams((Element) child, outputProperties); + } else { + // Validate: only http:response and output:serialization-parameters allowed + throw new IOException( + "Invalid child element in rest:response: " + + (childNs != null ? "{" + childNs + "}" : "") + childLocal); + } + } + return statusSet; + } + + /** + * @return true if status was explicitly set + */ + private static final Set VALID_HTTP_RESPONSE_ATTRS = Set.of( + "status", "reason", "message" + ); + + private static boolean processHttpResponse(final Element httpResponse, + final HttpServletResponse response) + throws IOException { + // Validate attributes: only status and reason/message allowed + final org.w3c.dom.NamedNodeMap attrs = httpResponse.getAttributes(); + if (attrs != null) { + for (int a = 0; a < attrs.getLength(); a++) { + final Node attr = attrs.item(a); + final String attrNs = attr.getNamespaceURI(); + final String attrName = attr.getLocalName() != null ? attr.getLocalName() : attr.getNodeName(); + if (attrNs != null && attrNs.equals("http://www.w3.org/2000/xmlns/")) { + continue; + } + if ("xmlns".equals(attrName) || attrName.startsWith("xmlns:")) { + continue; + } + if (!VALID_HTTP_RESPONSE_ATTRS.contains(attrName)) { + throw new IOException( + "Invalid attribute '" + attrName + "' on http:response element"); + } + } + } + + boolean statusSet = false; + final String status = httpResponse.getAttribute("status"); + if (status != null && !status.isEmpty()) { + try { + response.setStatus(Integer.parseInt(status)); + statusSet = true; + } catch (final NumberFormatException e) { + LOG.warn("Invalid HTTP status in rest:response: {}", status); + } + } + + // Process children: only http:header elements allowed, no text content + final NodeList children = httpResponse.getChildNodes(); + for (int i = 0; i < children.getLength(); i++) { + final Node child = children.item(i); + + if (child.getNodeType() == Node.TEXT_NODE) { + final String text = child.getTextContent(); + if (text != null && !text.trim().isEmpty()) { + throw new IOException( + "http:response must not contain text content"); + } + continue; + } + + if (child.getNodeType() == Node.ELEMENT_NODE + && "header".equals(child.getLocalName()) + && RestXqNamespaces.HTTP_NS.equals(child.getNamespaceURI())) { + final Element headerElem = (Element) child; + final String name = headerElem.getAttribute("name"); + final String value = headerElem.getAttribute("value"); + if (name != null && !name.isEmpty()) { + response.addHeader(name, value); + if ("Content-Type".equalsIgnoreCase(name)) { + response.setContentType(value); + } + } + } + } + return statusSet; + } + + private static void processSerializationParams(final Element params, + final Properties outputProperties) { + final NodeList children = params.getChildNodes(); + for (int i = 0; i < children.getLength(); i++) { + final Node child = children.item(i); + if (child.getNodeType() == Node.ELEMENT_NODE + && RestXqNamespaces.OUTPUT_NS.equals(child.getNamespaceURI())) { + final Element param = (Element) child; + final String value = param.getAttribute("value"); + if (value != null && !value.isEmpty()) { + outputProperties.setProperty(param.getLocalName(), value); + } + } + } + } + + private static void handleForward(final NodeValue forwardNode, + final HttpServletResponse response) throws IOException { + final String path = forwardNode.getNode().getTextContent(); + if (path != null && !path.isEmpty()) { + response.setStatus(HttpServletResponse.SC_NO_CONTENT); + response.sendRedirect(path); + } + } + + private static void serializeSequence(final DBBroker broker, + final Sequence sequence, + final Properties outputProperties, + final HttpServletResponse response) + throws IOException, XPathException, SAXException { + + // Handle binary results + if (sequence.hasOne()) { + final Item item = sequence.itemAt(0); + if (item instanceof BinaryValue) { + writeBinaryResult((BinaryValue) item, response); + return; + } + } + + // Serialize using XQuerySerializer + final OutputStream os = response.getOutputStream(); + try (final Writer writer = new OutputStreamWriter(os, StandardCharsets.UTF_8)) { + final XQuerySerializer serializer = new XQuerySerializer(broker, outputProperties, writer); + serializer.serialize(sequence); + writer.flush(); + } + } + + private static void writeBinaryResult(final BinaryValue binary, + final HttpServletResponse response) throws IOException { + try (final OutputStream os = response.getOutputStream()) { + binary.streamBinaryTo(os); + os.flush(); + } + } +} diff --git a/exist-core/src/main/java/org/exist/http/restxq/RestXqAnnotationException.java b/exist-core/src/main/java/org/exist/http/restxq/RestXqAnnotationException.java new file mode 100644 index 00000000000..f18cdc2b333 --- /dev/null +++ b/exist-core/src/main/java/org/exist/http/restxq/RestXqAnnotationException.java @@ -0,0 +1,31 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.http.restxq; + +/** + * Thrown when RESTXQ annotation validation fails during module scanning. + */ +public class RestXqAnnotationException extends Exception { + public RestXqAnnotationException(final String message) { + super(message); + } +} diff --git a/exist-core/src/main/java/org/exist/http/restxq/RestXqForwardException.java b/exist-core/src/main/java/org/exist/http/restxq/RestXqForwardException.java new file mode 100644 index 00000000000..07032e9c843 --- /dev/null +++ b/exist-core/src/main/java/org/exist/http/restxq/RestXqForwardException.java @@ -0,0 +1,39 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.http.restxq; + +/** + * Thrown when a RESTXQ function returns a {@code } element, + * indicating that the request should be internally dispatched to another route. + */ +public class RestXqForwardException extends java.io.IOException { + private final String forwardPath; + + public RestXqForwardException(final String forwardPath) { + super("Forward to: " + forwardPath); + this.forwardPath = forwardPath; + } + + public String getForwardPath() { + return forwardPath; + } +} diff --git a/exist-core/src/main/java/org/exist/http/restxq/RestXqNamespaces.java b/exist-core/src/main/java/org/exist/http/restxq/RestXqNamespaces.java new file mode 100644 index 00000000000..9fe9f7de6d8 --- /dev/null +++ b/exist-core/src/main/java/org/exist/http/restxq/RestXqNamespaces.java @@ -0,0 +1,68 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.http.restxq; + +/** + * Namespace constants for RESTXQ annotations and related namespaces. + */ +public final class RestXqNamespaces { + + /** The RESTXQ annotation namespace. */ + public static final String REST_NS = "http://exquery.org/ns/restxq"; + + /** The RESTXQ annotation namespace prefix. */ + public static final String REST_PREFIX = "rest"; + + /** The W3C serialization namespace for %output:* annotations. */ + public static final String OUTPUT_NS = "http://www.w3.org/2010/xslt-xquery-serialization"; + + /** The output namespace prefix. */ + public static final String OUTPUT_PREFIX = "output"; + + /** The HTTP client namespace for response elements. */ + public static final String HTTP_NS = "http://expath.org/ns/http-client"; + + /** The HTTP client namespace prefix. */ + public static final String HTTP_PREFIX = "http"; + + /** The input processing namespace for %input:* annotations. */ + public static final String INPUT_NS = "http://exquery.org/ns/restxq/input"; + + /** The input namespace prefix. */ + public static final String INPUT_PREFIX = "input"; + + /** The eXist-db auth annotation namespace. */ + public static final String AUTH_NS = "http://exist-db.org/ns/auth"; + + /** The auth namespace prefix. */ + public static final String AUTH_PREFIX = "auth"; + + /** The web module namespace (BaseX extension). */ + public static final String WEB_NS = "http://basex.org/modules/web"; + + /** The web module namespace prefix. */ + public static final String WEB_PREFIX = "web"; + + private RestXqNamespaces() { + // utility class + } +} diff --git a/exist-core/src/main/java/org/exist/http/restxq/Route.java b/exist-core/src/main/java/org/exist/http/restxq/Route.java new file mode 100644 index 00000000000..3bddda10812 --- /dev/null +++ b/exist-core/src/main/java/org/exist/http/restxq/Route.java @@ -0,0 +1,315 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.http.restxq; + +import org.exist.dom.QName; + +import javax.xml.transform.OutputKeys; +import java.util.*; + +/** + * Represents a single RESTXQ route: the combination of a path pattern, + * HTTP method(s), a function reference, and serialization options parsed + * from the function's annotations. + * + *

A Route is built by {@link AnnotationParser} from a compiled XQuery + * function's annotations, and consumed by {@link NativeRestXqServlet} for + * dispatch.

+ */ +public class Route implements Comparable { + + /** The XQuery source URI (database path) containing this function. */ + private final String moduleUri; + + /** The QName of the annotated XQuery function. */ + private final QName functionName; + + /** The arity of the function. */ + private final int arity; + + /** The compiled path matcher for %rest:path. */ + private final PathMatcher pathMatcher; + + /** The HTTP methods this route handles (GET, POST, etc.). */ + private final Set methods; + + /** Serialization properties from %output:* annotations. */ + private final Properties outputProperties; + + /** %rest:consumes media types. */ + private final List consumes; + + /** %rest:produces media types. */ + private final List produces; + + /** %rest:query-param bindings: param name → variable name. */ + private final Map queryParams; + + /** %rest:form-param bindings. */ + private final Map formParams; + + /** %rest:header-param bindings. */ + private final Map headerParams; + + /** %rest:cookie-param bindings. */ + private final Map cookieParams; + + /** The variable name for POST/PUT body binding, or null. */ + private final String bodyVariable; + + /** Input processing options from %input:json, %input:csv, %input:html annotations. */ + private final Properties inputOptions; + + Route(final String moduleUri, final QName functionName, final int arity, + final PathMatcher pathMatcher, final Set methods, + final Properties outputProperties, + final List consumes, final List produces, + final Map queryParams, + final Map formParams, + final Map headerParams, + final Map cookieParams, + final String bodyVariable, + final Properties inputOptions) { + this.moduleUri = moduleUri; + this.functionName = functionName; + this.arity = arity; + this.pathMatcher = pathMatcher; + this.methods = methods; + this.outputProperties = outputProperties; + this.consumes = consumes; + this.produces = produces; + this.queryParams = queryParams; + this.formParams = formParams; + this.headerParams = headerParams; + this.cookieParams = cookieParams; + this.bodyVariable = bodyVariable; + this.inputOptions = inputOptions; + } + + public String getModuleUri() { + return moduleUri; + } + + public QName getFunctionName() { + return functionName; + } + + public int getArity() { + return arity; + } + + public PathMatcher getPathMatcher() { + return pathMatcher; + } + + public Set getMethods() { + return methods; + } + + public Properties getOutputProperties() { + return outputProperties; + } + + public List getConsumes() { + return consumes; + } + + public List getProduces() { + return produces; + } + + public Map getQueryParams() { + return queryParams; + } + + public Map getFormParams() { + return formParams; + } + + public Map getHeaderParams() { + return headerParams; + } + + public Map getCookieParams() { + return cookieParams; + } + + public String getBodyVariable() { + return bodyVariable; + } + + public Properties getInputOptions() { + return inputOptions; + } + + /** + * Tests whether this route matches the given HTTP method and request path. + */ + public boolean matches(final String method, final String requestPath) { + return methods.contains(method.toUpperCase(Locale.ROOT)) + && pathMatcher.matches(requestPath); + } + + /** + * Tests whether this route's consumes constraint is satisfied by the + * given Content-Type (or if no constraint is declared). + */ + public boolean matchesConsumes(final String contentType) { + if (consumes.isEmpty()) { + return true; + } + if (contentType == null || contentType.isEmpty()) { + // No content type in request — only match if consumes includes wildcard + for (final String consume : consumes) { + if ("*/*".equals(consume.trim())) { + return true; + } + } + return false; + } + // Strip parameters (e.g., charset) for matching + final String baseType = contentType.contains(";") + ? contentType.substring(0, contentType.indexOf(';')).trim() + : contentType.trim(); + for (final String consume : consumes) { + if (mediaTypeMatches(baseType, consume)) { + return true; + } + } + return false; + } + + /** + * Tests whether this route's produces constraint is satisfied by the + * given Accept header (or if no constraint is declared). + */ + public boolean matchesProduces(final String acceptHeader) { + if (produces.isEmpty()) { + return true; + } + if (acceptHeader == null || acceptHeader.isEmpty()) { + return true; + } + for (final String produce : produces) { + // Strip qs parameter for matching + final String baseProduces = produce.contains(";") + ? produce.substring(0, produce.indexOf(';')).trim() + : produce.trim(); + for (final String accept : acceptHeader.split(",")) { + final String baseAccept = accept.contains(";") + ? accept.substring(0, accept.indexOf(';')).trim() + : accept.trim(); + if (mediaTypeMatches(baseAccept, baseProduces)) { + return true; + } + } + } + return false; + } + + private static boolean mediaTypeMatches(final String actual, final String pattern) { + if ("*/*".equals(pattern) || "*/*".equals(actual)) { + return true; + } + if (actual.equalsIgnoreCase(pattern)) { + return true; + } + // Check type/* wildcard + final int slashActual = actual.indexOf('/'); + final int slashPattern = pattern.indexOf('/'); + if (slashActual > 0 && slashPattern > 0) { + final String typeActual = actual.substring(0, slashActual); + final String typePattern = pattern.substring(0, slashPattern); + final String subtypePattern = pattern.substring(slashPattern + 1); + if (typeActual.equalsIgnoreCase(typePattern) && "*".equals(subtypePattern)) { + return true; + } + } + return false; + } + + /** + * Returns the Content-Type to set on the response, derived from + * %output:media-type or %output:method annotations. + */ + public String getResponseContentType() { + final String mediaType = outputProperties.getProperty("media-type"); + if (mediaType != null) { + return mediaType; + } + // Derive from method + final String method = outputProperties.getProperty(OutputKeys.METHOD, "xml"); + return switch (method) { + case "json" -> "application/json"; + case "html" -> "text/html"; + case "text" -> "text/plain"; + case "adaptive" -> "text/plain"; + default -> "application/xml"; + }; + } + + /** + * Sort by path specificity (most specific first). + */ + @Override + public int compareTo(final Route other) { + return this.pathMatcher.compareTo(other.pathMatcher); + } + + @Override + public String toString() { + return methods + " " + pathMatcher.getTemplate() + " → " + + functionName.getLocalPart() + "#" + arity + + " [" + moduleUri + "]"; + } + + /** + * A parameter binding from a %rest:*-param annotation. + */ + public static class ParamBinding { + private final String paramName; + private final String variableName; + private final List defaultValues; + + public ParamBinding(final String paramName, final String variableName, final List defaultValues) { + this.paramName = paramName; + this.variableName = variableName; + this.defaultValues = defaultValues; + } + + public String getParamName() { + return paramName; + } + + public String getVariableName() { + return variableName; + } + + public String getDefaultValue() { + return defaultValues.isEmpty() ? null : defaultValues.get(0); + } + + public List getDefaultValues() { + return defaultValues; + } + } +} diff --git a/exist-core/src/main/java/org/exist/http/restxq/RouteRegistry.java b/exist-core/src/main/java/org/exist/http/restxq/RouteRegistry.java new file mode 100644 index 00000000000..d82ad721880 --- /dev/null +++ b/exist-core/src/main/java/org/exist/http/restxq/RouteRegistry.java @@ -0,0 +1,443 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.http.restxq; + +import net.jcip.annotations.GuardedBy; +import net.jcip.annotations.ThreadSafe; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.exist.collections.Collection; +import org.exist.dom.persistent.BinaryDocument; +import org.exist.dom.persistent.DocumentImpl; +import org.exist.security.PermissionDeniedException; +import org.exist.source.DBSource; +import org.exist.storage.BrokerPool; +import org.exist.storage.DBBroker; +import org.exist.storage.lock.Lock.LockMode; +import org.exist.xmldb.XmldbURI; +import org.exist.xquery.CompiledXQuery; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQuery; +import org.exist.xquery.XQueryContext; + +import java.io.IOException; +import java.util.*; +import java.util.concurrent.locks.ReentrantReadWriteLock; + +/** + * In-memory route table that discovers RESTXQ-annotated functions from + * XQuery modules stored in the database. Uses timestamp-based caching + * to re-parse only when modules change. + * + *

This replaces the trigger-based {@code ExistXqueryRegistry} and + * {@code RestXqServiceRegistryPersistence} from the old EXQuery-based + * implementation.

+ * + *

Concurrency model

+ *

All mutable state is protected by a {@link ReentrantReadWriteLock}. + * Write operations ({@link #fullScan}, {@link #invalidate}) acquire + * the write lock and rebuild immutable snapshots that are published via + * volatile references. Read operations ({@link #findRoute}, + * {@link #findErrorHandler}, {@link #allowedMethods}) read the volatile + * snapshots without locking for maximum throughput on the HTTP request + * path. Status accessors also read volatile snapshots.

+ */ +@ThreadSafe +public class RouteRegistry { + + private static final Logger LOG = LogManager.getLogger(RouteRegistry.class); + + private static final Set XQUERY_MIME_TYPES = Set.of( + "application/xquery" + ); + + private static final Set XQUERY_EXTENSIONS = Set.of( + ".xq", ".xqm", ".xql", ".xquery" + ); + + private final BrokerPool brokerPool; + private final XmldbURI scanRoot; + + /** Protects all mutable state below. */ + private final ReentrantReadWriteLock lock = new ReentrantReadWriteLock(); + + // --- Mutable state, guarded by lock.writeLock() --- + + /** Module URI to last-modified timestamp at time of last parse. */ + @GuardedBy("lock") + private final Map moduleTimestamps = new HashMap<>(); + + /** Per-module route list, for efficient partial updates during scan. */ + @GuardedBy("lock") + private final Map> routesByModule = new HashMap<>(); + + /** Per-module error route list. */ + @GuardedBy("lock") + private final Map> errorRoutesByModule = new HashMap<>(); + + /** Modules that failed compilation or annotation validation. */ + @GuardedBy("lock") + private final Map failedModules = new HashMap<>(); + + // --- Volatile snapshots, rebuilt under write lock, read without lock --- + + /** All currently registered routes, sorted by specificity. Immutable. */ + private volatile List routes = Collections.emptyList(); + + /** All currently registered error handlers. Immutable. */ + private volatile List errorRoutes = Collections.emptyList(); + + /** Immutable snapshot of failed modules for status reporting. */ + private volatile Map failedModulesSnapshot = Collections.emptyMap(); + + /** Count of modules with routes (snapshot for lock-free reads). */ + private volatile int moduleCount = 0; + + /** When we last completed a full scan (epoch millis). */ + private volatile long lastScanTime = 0; + + /** How long the scan took (ms). */ + private volatile long lastScanDurationMs = 0; + + /** Whether at least one scan has completed. */ + private volatile boolean initialized = false; + + public RouteRegistry(final BrokerPool brokerPool, final String scanRoot) { + this.brokerPool = brokerPool; + this.scanRoot = XmldbURI.create(scanRoot); + } + + // --- Read path: lock-free, reads volatile immutable snapshots --- + + /** + * Finds the best matching route for the given HTTP method and path. + * + *

Routes are pre-sorted by specificity; the first match wins. + * Content negotiation (consumes/produces) is also checked.

+ * + * @return the matching Route, or null if no route matches + */ + public Route findRoute(final String method, final String path, + final String contentType, final String acceptHeader) { + final List snapshot = routes; + for (final Route route : snapshot) { + if (route.matches(method, path) + && route.matchesConsumes(contentType) + && route.matchesProduces(acceptHeader)) { + return route; + } + } + return null; + } + + /** + * Returns all routes that match the given path (regardless of method), + * useful for generating Allow headers on 405 responses. + */ + public Set allowedMethods(final String path) { + return allowedMethods(path, null, null); + } + + /** + * Returns all HTTP methods that have a matching route for the given path + * and content negotiation constraints. Used for 405 Allow headers. + */ + public Set allowedMethods(final String path, final String contentType, + final String acceptHeader) { + final Set methods = new LinkedHashSet<>(); + final List snapshot = routes; + for (final Route route : snapshot) { + if (route.getPathMatcher().matches(path) + && route.matchesConsumes(contentType) + && route.matchesProduces(acceptHeader)) { + methods.addAll(route.getMethods()); + } + } + return methods; + } + + /** + * Finds the best matching error handler for the given error QName. + * Returns null if no handler matches. + */ + public ErrorRoute findErrorHandler(final org.exist.dom.QName errorQName) { + ErrorRoute bestHandler = null; + ErrorRoute.ErrorCode bestCode = null; + + final List snapshot = errorRoutes; + for (final ErrorRoute handler : snapshot) { + final ErrorRoute.ErrorCode match = handler.bestMatch(errorQName); + if (match != null) { + if (bestCode == null || match.getMatchType().priority < bestCode.getMatchType().priority) { + bestHandler = handler; + bestCode = match; + } + } + } + return bestHandler; + } + + // --- Write path: acquires write lock, rebuilds snapshots --- + + /** + * Invalidates the entire route cache, forcing a full rescan + * on the next call to {@link #ensureInitialized(DBBroker)}. + */ + public void invalidate() { + lock.writeLock().lock(); + try { + routesByModule.clear(); + errorRoutesByModule.clear(); + moduleTimestamps.clear(); + failedModules.clear(); + publishSnapshots(); + initialized = false; + LOG.info("RESTXQ route registry invalidated; will rescan on next request"); + } finally { + lock.writeLock().unlock(); + } + } + + /** + * Ensures the registry has been initialized (at least one scan completed). + * If not yet initialized, performs a full scan. + */ + public void ensureInitialized(final DBBroker broker) { + if (!initialized) { + fullScan(broker); + } + } + + /** + * Performs a full scan of all XQuery modules under the scan root. + * Modules whose timestamp hasn't changed since last scan are skipped. + */ + public void fullScan(final DBBroker broker) { + lock.writeLock().lock(); + try { + final long start = System.currentTimeMillis(); + LOG.info("Starting RESTXQ module scan of {}", scanRoot); + + // Track which modules exist during this scan + final Set scannedModules = new HashSet<>(); + scanCollection(broker, scanRoot, scannedModules); + + // Remove routes/failures for modules that no longer exist + routesByModule.keySet().retainAll(scannedModules); + errorRoutesByModule.keySet().retainAll(scannedModules); + moduleTimestamps.keySet().retainAll(scannedModules); + failedModules.keySet().retainAll(scannedModules); + + publishSnapshots(); + + lastScanTime = System.currentTimeMillis(); + lastScanDurationMs = lastScanTime - start; + initialized = true; + + LOG.info("RESTXQ scan complete: {} routes from {} modules in {}ms", + routes.size(), routesByModule.size(), lastScanDurationMs); + } finally { + lock.writeLock().unlock(); + } + } + + /** + * Rebuilds all volatile immutable snapshots from the guarded mutable state. + * Must be called under write lock. + */ + @GuardedBy("lock") + private void publishSnapshots() { + // Routes snapshot — sorted by specificity + final List allRoutes = new ArrayList<>(); + for (final List moduleRoutes : routesByModule.values()) { + allRoutes.addAll(moduleRoutes); + } + Collections.sort(allRoutes); + this.routes = Collections.unmodifiableList(allRoutes); + + // Error routes snapshot + final List allErrorRoutes = new ArrayList<>(); + for (final List moduleErrorRoutes : errorRoutesByModule.values()) { + allErrorRoutes.addAll(moduleErrorRoutes); + } + this.errorRoutes = Collections.unmodifiableList(allErrorRoutes); + + // Failed modules snapshot + this.failedModulesSnapshot = Map.copyOf(failedModules); + + // Module count snapshot + this.moduleCount = routesByModule.size(); + } + + // --- Scanning internals (called under write lock) --- + + @GuardedBy("lock") + private void scanCollection(final DBBroker broker, final XmldbURI collectionUri, + final Set scannedModules) { + try (final Collection collection = broker.openCollection(collectionUri, LockMode.READ_LOCK)) { + if (collection == null) { + return; + } + + // Scan documents in this collection + final Iterator docs = collection.iterator(broker); + while (docs.hasNext()) { + final DocumentImpl doc = docs.next(); + if (isXQueryDocument(doc)) { + scannedModules.add(doc.getURI().toString()); + scanDocument(broker, doc); + } + } + + // Recurse into child collections + final List childUris = new ArrayList<>(); + for (final Iterator it = collection.collectionIterator(broker); it.hasNext(); ) { + childUris.add(collectionUri.append(it.next())); + } + + // Release parent lock before recursing (we re-acquire per child) + collection.close(); + for (final XmldbURI childUri : childUris) { + scanCollection(broker, childUri, scannedModules); + } + + } catch (final PermissionDeniedException e) { + LOG.debug("Permission denied scanning collection {}: {}", collectionUri, e.getMessage()); + } catch (final Exception e) { + LOG.warn("Error scanning collection {}: {}", collectionUri, e.getMessage()); + } + } + + @GuardedBy("lock") + private void scanDocument(final DBBroker broker, final DocumentImpl doc) { + final String moduleUri = doc.getURI().toString(); + final long lastModified = doc.getLastModified(); + + // Check if we already have an up-to-date parse + final Long cached = moduleTimestamps.get(moduleUri); + if (cached != null && cached == lastModified) { + return; + } + + // Need to (re-)parse this module + try { + if (!(doc instanceof BinaryDocument binDoc)) { + return; + } + + final DBSource source = new DBSource(brokerPool, binDoc, true); + final XQuery xqueryService = brokerPool.getXQueryService(); + final XQueryContext context = new XQueryContext(brokerPool); + + // Set module load path so relative imports resolve correctly + context.setModuleLoadPath(XmldbURI.EMBEDDED_SERVER_URI_PREFIX + + doc.getURI().removeLastSegment().toString()); + + final CompiledXQuery compiled = xqueryService.compile(context, source); + final AnnotationParser.ParseResult result = + AnnotationParser.parseModuleFull(compiled, moduleUri); + + if (!result.routes.isEmpty()) { + routesByModule.put(moduleUri, result.routes); + LOG.debug("Found {} RESTXQ routes in {}", result.routes.size(), moduleUri); + } else { + routesByModule.remove(moduleUri); + } + + if (!result.errorRoutes.isEmpty()) { + errorRoutesByModule.put(moduleUri, result.errorRoutes); + LOG.debug("Found {} RESTXQ error handlers in {}", result.errorRoutes.size(), moduleUri); + } else { + errorRoutesByModule.remove(moduleUri); + } + + moduleTimestamps.put(moduleUri, lastModified); + failedModules.remove(moduleUri); + + } catch (final RestXqAnnotationException e) { + LOG.warn("RESTXQ annotation error in {}: {}", moduleUri, e.getMessage()); + failedModules.put(moduleUri, e.getMessage()); + routesByModule.remove(moduleUri); + errorRoutesByModule.remove(moduleUri); + moduleTimestamps.remove(moduleUri); + } catch (final XPathException e) { + LOG.warn("Failed to compile RESTXQ module {}: {}", moduleUri, e.getMessage()); + failedModules.put(moduleUri, e.getMessage()); + routesByModule.remove(moduleUri); + errorRoutesByModule.remove(moduleUri); + moduleTimestamps.remove(moduleUri); + } catch (final PermissionDeniedException | IOException e) { + LOG.warn("Error reading RESTXQ module {}: {}", moduleUri, e.getMessage()); + failedModules.put(moduleUri, e.getMessage()); + } catch (final Exception e) { + LOG.warn("Unexpected error reading RESTXQ module {}: {}", moduleUri, e.getMessage()); + failedModules.put(moduleUri, e.getClass().getName() + ": " + e.getMessage()); + } + } + + private static boolean isXQueryDocument(final DocumentImpl doc) { + if (doc instanceof BinaryDocument) { + final String mimeType = doc.getMimeType(); + if (mimeType != null && XQUERY_MIME_TYPES.contains(mimeType)) { + return true; + } + // Fallback: check file extension + final String name = doc.getFileURI().toString(); + for (final String ext : XQUERY_EXTENSIONS) { + if (name.endsWith(ext)) { + return true; + } + } + } + return false; + } + + // --- Status accessors: read volatile snapshots, no lock needed --- + + public int getRouteCount() { + return routes.size(); + } + + public int getModuleCount() { + return moduleCount; + } + + public long getLastScanTime() { + return lastScanTime; + } + + public long getLastScanDurationMs() { + return lastScanDurationMs; + } + + public Map getFailedModules() { + return failedModulesSnapshot; + } + + public List getAllRoutes() { + return routes; + } + + public boolean isInitialized() { + return initialized; + } +} diff --git a/exist-core/src/main/java/org/exist/http/restxq/SecurityAnnotationHandler.java b/exist-core/src/main/java/org/exist/http/restxq/SecurityAnnotationHandler.java new file mode 100644 index 00000000000..9a146386e8f --- /dev/null +++ b/exist-core/src/main/java/org/exist/http/restxq/SecurityAnnotationHandler.java @@ -0,0 +1,151 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.http.restxq; + +import org.exist.dom.QName; +import org.exist.security.SecurityManager; +import org.exist.security.Subject; +import org.exist.xquery.Annotation; +import org.exist.xquery.CompiledXQuery; +import org.exist.xquery.UserDefinedFunction; +import org.exist.xquery.XPathException; + +import java.util.*; + +/** + * Handles eXist-specific security annotations for access control on + * RESTXQ-dispatched functions. + * + *

Note: These annotations are an eXist-db extension, + * not part of the RESTXQ specification. They provide declarative access + * control by checking the already-authenticated servlet user against + * eXist's permission system. They do not introduce server-side session + * state — they inspect the {@link Subject} that was authenticated via + * standard HTTP mechanisms (Basic auth, etc.) by the servlet container.

+ * + *

Supported annotations

+ *
    + *
  • {@code %auth:allow-groups("dba", "editor")} — require membership + * in at least one listed group
  • + *
  • {@code %auth:allow-users("admin")} — require specific username
  • + *
  • {@code %auth:deny-groups("guest")} — deny access to listed groups
  • + *
  • {@code %auth:login-required} — require any authenticated user + * (not the guest account)
  • + *
+ * + *

All annotations use the namespace + * {@code http://exist-db.org/ns/auth} (prefix {@code auth}).

+ */ +public class SecurityAnnotationHandler { + + /** + * Accept both the canonical namespace and the test namespace for auth annotations. + * The test suite uses 'http://exist-db.org/xquery/restxq/auth'. + */ + private static final Set AUTH_NAMESPACES = Set.of( + RestXqNamespaces.AUTH_NS, + "http://exist-db.org/xquery/restxq/auth" + ); + + /** + * Checks whether the given subject is authorized to invoke the + * function at the given route. Returns null if authorized, or an + * error message string if denied. + * + * @param subject the authenticated user (from servlet-level auth) + * @param route the matched RESTXQ route + * @param compiled the compiled XQuery containing the function + * @return null if authorized, or a denial reason string + */ + public static String checkAccess(final Subject subject, final Route route, + final CompiledXQuery compiled) { + final UserDefinedFunction fn = + compiled.getContext().resolveFunction(route.getFunctionName(), route.getArity()); + if (fn == null) { + return null; + } + + final Annotation[] annotations = fn.getSignature().getAnnotations(); + if (annotations == null) { + return null; + } + + for (final Annotation annotation : annotations) { + final QName name = annotation.getName(); + if (!AUTH_NAMESPACES.contains(name.getNamespaceURI())) { + continue; + } + + final String local = name.getLocalPart(); + switch (local) { + case "login-required" -> { + if (SecurityManager.GUEST_USER.equals(subject.getName())) { + return "Authentication required"; + } + } + case "allow-groups" -> { + final Set allowed = literalValues(annotation); + if (!allowed.isEmpty() && !hasAnyGroup(subject, allowed)) { + return "User not in required group"; + } + } + case "allow-users" -> { + final Set allowed = literalValues(annotation); + if (!allowed.isEmpty() && !allowed.contains(subject.getName())) { + return "User not authorized"; + } + } + case "deny-groups" -> { + final Set denied = literalValues(annotation); + if (hasAnyGroup(subject, denied)) { + return "User in denied group"; + } + } + default -> { /* ignore unknown auth annotations */ } + } + } + + return null; // authorized + } + + private static boolean hasAnyGroup(final Subject subject, final Set groups) { + final String[] userGroups = subject.getGroups(); + for (final String ug : userGroups) { + if (groups.contains(ug)) { + return true; + } + } + return false; + } + + private static Set literalValues(final Annotation annotation) { + final Set values = new LinkedHashSet<>(); + for (final org.exist.xquery.LiteralValue lv : annotation.getValue()) { + try { + values.add(lv.getValue().getStringValue()); + } catch (final XPathException e) { + // skip + } + } + return values; + } +} diff --git a/exist-core/src/main/java/org/exist/http/restxq/xquery/WebFunctions.java b/exist-core/src/main/java/org/exist/http/restxq/xquery/WebFunctions.java new file mode 100644 index 00000000000..c99a835f66c --- /dev/null +++ b/exist-core/src/main/java/org/exist/http/restxq/xquery/WebFunctions.java @@ -0,0 +1,164 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.http.restxq.xquery; + +import org.exist.dom.QName; +import org.exist.dom.memtree.MemTreeBuilder; +import org.exist.http.restxq.RestXqNamespaces; +import org.exist.xquery.*; +import org.exist.xquery.value.*; +import org.xml.sax.helpers.AttributesImpl; + +/** + * Implements web:redirect(), web:forward(), and web:error() for RESTXQ. + * + *
    + *
  • {@code web:redirect($url)} — returns a {@code } with HTTP 302
  • + *
  • {@code web:forward($url)} — returns a {@code } element
  • + *
  • {@code web:error($code, $message)} — throws an XPathException with HTTP status code
  • + *
+ */ +public class WebFunctions extends BasicFunction { + + private static final QName QN_REDIRECT = new QName("redirect", WebModule.NAMESPACE_URI, WebModule.PREFIX); + private static final QName QN_FORWARD = new QName("forward", WebModule.NAMESPACE_URI, WebModule.PREFIX); + private static final QName QN_ERROR = new QName("error", WebModule.NAMESPACE_URI, WebModule.PREFIX); + + public static final FunctionSignature FNS_REDIRECT = new FunctionSignature( + QN_REDIRECT, + "Returns a rest:response element that redirects the client to the given URL (HTTP 302).", + new SequenceType[]{ + new FunctionParameterSequenceType("url", Type.STRING, Cardinality.EXACTLY_ONE, + "The URL to redirect to") + }, + new FunctionReturnSequenceType(Type.NODE, Cardinality.EXACTLY_ONE, + "A rest:response element with HTTP 302 redirect") + ); + + public static final FunctionSignature FNS_FORWARD = new FunctionSignature( + QN_FORWARD, + "Returns a rest:forward element for server-side forwarding to the given path.", + new SequenceType[]{ + new FunctionParameterSequenceType("path", Type.STRING, Cardinality.EXACTLY_ONE, + "The path to forward to") + }, + new FunctionReturnSequenceType(Type.NODE, Cardinality.EXACTLY_ONE, + "A rest:forward element") + ); + + public static final FunctionSignature FNS_ERROR_2 = new FunctionSignature( + QN_ERROR, + "Aborts query evaluation and returns an HTTP error response with the given status code and message.", + new SequenceType[]{ + new FunctionParameterSequenceType("code", Type.INTEGER, Cardinality.EXACTLY_ONE, + "The HTTP status code"), + new FunctionParameterSequenceType("message", Type.STRING, Cardinality.EXACTLY_ONE, + "The error message body") + }, + new FunctionReturnSequenceType(Type.ITEM, Cardinality.EMPTY_SEQUENCE, + "Never returns — throws an exception") + ); + + public WebFunctions(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + final String funcName = getSignature().getName().getLocalPart(); + return switch (funcName) { + case "redirect" -> doRedirect(args[0].getStringValue()); + case "forward" -> doForward(args[0].getStringValue()); + case "error" -> doError( + ((IntegerValue) args[0].itemAt(0)).getInt(), + args[1].getStringValue()); + default -> throw new XPathException(this, "Unknown web function: " + funcName); + }; + } + + private Sequence doRedirect(final String url) { + context.pushDocumentContext(); + try { + final MemTreeBuilder builder = context.getDocumentBuilder(); + + // + builder.startElement( + new QName("response", RestXqNamespaces.REST_NS, RestXqNamespaces.REST_PREFIX), null); + + // + final AttributesImpl httpAttrs = new AttributesImpl(); + httpAttrs.addAttribute("", "status", "status", "CDATA", "302"); + builder.startElement( + new QName("response", RestXqNamespaces.HTTP_NS, RestXqNamespaces.HTTP_PREFIX), httpAttrs); + + // + final AttributesImpl headerAttrs = new AttributesImpl(); + headerAttrs.addAttribute("", "name", "name", "CDATA", "Location"); + headerAttrs.addAttribute("", "value", "value", "CDATA", url); + builder.startElement( + new QName("header", RestXqNamespaces.HTTP_NS, RestXqNamespaces.HTTP_PREFIX), headerAttrs); + builder.endElement(); + + builder.endElement(); // http:response + builder.endElement(); // rest:response + + return builder.getDocument().getNode(1); + } finally { + context.popDocumentContext(); + } + } + + private Sequence doForward(final String path) { + context.pushDocumentContext(); + try { + final MemTreeBuilder builder = context.getDocumentBuilder(); + builder.startElement( + new QName("forward", RestXqNamespaces.REST_NS, RestXqNamespaces.REST_PREFIX), null); + builder.characters(path); + builder.endElement(); + return builder.getDocument().getNode(1); + } finally { + context.popDocumentContext(); + } + } + + private Sequence doError(final int statusCode, final String message) throws XPathException { + // Throw a special exception that the servlet can catch and convert to an HTTP error response + throw new WebErrorException(this, statusCode, message); + } + + /** + * Special exception for web:error() that carries an HTTP status code. + */ + public static class WebErrorException extends XPathException { + private final int httpStatusCode; + + public WebErrorException(final Expression expr, final int statusCode, final String message) { + super(expr, ErrorCodes.ERROR, message); + this.httpStatusCode = statusCode; + } + + public int getHttpStatusCode() { + return httpStatusCode; + } + } +} diff --git a/exist-core/src/main/java/org/exist/http/restxq/xquery/WebModule.java b/exist-core/src/main/java/org/exist/http/restxq/xquery/WebModule.java new file mode 100644 index 00000000000..17bb0a6c182 --- /dev/null +++ b/exist-core/src/main/java/org/exist/http/restxq/xquery/WebModule.java @@ -0,0 +1,70 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.http.restxq.xquery; + +import org.exist.xquery.AbstractInternalModule; +import org.exist.xquery.FunctionDef; + +import java.util.List; +import java.util.Map; + +/** + * XQuery module providing web:redirect(), web:forward(), and web:error() + * functions for RESTXQ applications. Compatible with the BaseX web module. + */ +public class WebModule extends AbstractInternalModule { + + public static final String NAMESPACE_URI = "http://basex.org/modules/web"; + public static final String PREFIX = "web"; + public static final String DESCRIPTION = "Web utility functions for RESTXQ (redirect, forward, error)"; + public static final String RELEASE_VERSION = "7.0"; + + private static final FunctionDef[] functions = { + new FunctionDef(WebFunctions.FNS_REDIRECT, WebFunctions.class), + new FunctionDef(WebFunctions.FNS_FORWARD, WebFunctions.class), + new FunctionDef(WebFunctions.FNS_ERROR_2, WebFunctions.class), + }; + + public WebModule(final Map> parameters) { + super(functions, parameters); + } + + @Override + public String getNamespaceURI() { + return NAMESPACE_URI; + } + + @Override + public String getDefaultPrefix() { + return PREFIX; + } + + @Override + public String getDescription() { + return DESCRIPTION; + } + + @Override + public String getReleaseVersion() { + return RELEASE_VERSION; + } +} diff --git a/exist-core/src/main/java/org/exist/http/servlets/HttpServletRequestWrapper.java b/exist-core/src/main/java/org/exist/http/servlets/HttpServletRequestWrapper.java index 5d3e437d4a0..178b5de7ac7 100644 --- a/exist-core/src/main/java/org/exist/http/servlets/HttpServletRequestWrapper.java +++ b/exist-core/src/main/java/org/exist/http/servlets/HttpServletRequestWrapper.java @@ -102,7 +102,7 @@ private void initialiseWrapper() { parseURLParameters(this.request.getQueryString()); //If POST request, Parse out parameters from the Content Body - if ("POST".equals(request.getMethod().toUpperCase())) { + if ("POST".equalsIgnoreCase(request.getMethod())) { //If there is some Content final int contentLength = request.getContentLength(); if (contentLength > 0 || contentLength == -1) { @@ -281,7 +281,7 @@ public BufferedReader getReader() throws IOException { @Override public String toString() { // If POST request AND there is some content AND its not a file upload - if ("POST".equals(request.getMethod().toUpperCase()) + if ("POST".equalsIgnoreCase(request.getMethod()) && (request.getContentLength() > 0 || request.getContentLength() == -1) && !request.getContentType().toUpperCase().startsWith("MULTIPART/")) { @@ -441,12 +441,6 @@ public boolean isRequestedSessionIdFromURL() { return request.isRequestedSessionIdFromURL(); } - @Override - @Deprecated - public boolean isRequestedSessionIdFromUrl() { - return request.isRequestedSessionIdFromUrl(); - } - @Override public boolean authenticate(final HttpServletResponse httpServletResponse) throws IOException, ServletException { return request.authenticate(httpServletResponse); @@ -572,12 +566,6 @@ public RequestDispatcher getRequestDispatcher(final String name) { return request.getRequestDispatcher(name); } - @Override - @Deprecated - public String getRealPath(final String path) { - return request.getSession().getServletContext().getRealPath(path); - } - @Override public int getRemotePort() { return request.getRemotePort(); @@ -633,6 +621,21 @@ public DispatcherType getDispatcherType() { return request.getDispatcherType(); } + @Override + public String getRequestId() { + return request.getRequestId(); + } + + @Override + public String getProtocolRequestId() { + return request.getProtocolRequestId(); + } + + @Override + public ServletConnection getServletConnection() { + return request.getServletConnection(); + } + @Override public void close() throws IOException { this.is.close(); diff --git a/exist-core/src/main/java/org/exist/http/urlrewrite/RewriteConfig.java b/exist-core/src/main/java/org/exist/http/urlrewrite/RewriteConfig.java index 6303fd67940..ace13810fd3 100644 --- a/exist-core/src/main/java/org/exist/http/urlrewrite/RewriteConfig.java +++ b/exist-core/src/main/java/org/exist/http/urlrewrite/RewriteConfig.java @@ -31,9 +31,8 @@ import org.exist.security.PermissionDeniedException; import org.exist.storage.DBBroker; import org.exist.storage.lock.Lock.LockMode; -import org.exist.thirdparty.net.sf.saxon.functions.regex.JDK15RegexTranslator; -import org.exist.thirdparty.net.sf.saxon.functions.regex.RegexSyntaxException; -import org.exist.thirdparty.net.sf.saxon.functions.regex.RegularExpression; +import net.sf.saxon.regex.JavaRegularExpression; +import net.sf.saxon.str.StringView; import org.exist.util.XMLReaderPool; import org.exist.xmldb.XmldbURI; import org.exist.xquery.Constants; @@ -272,16 +271,13 @@ private static final class Mapping { private Mapping(String regex, final URLRewrite action) throws ServletException { try { - final int options = RegularExpression.XML11 | RegularExpression.XPATH30; - int flagbits = 0; - - final List warnings = new ArrayList<>(); - regex = JDK15RegexTranslator.translate(regex, options, flagbits, warnings); + final JavaRegularExpression javaRegex = new JavaRegularExpression(StringView.of(regex), ""); + regex = javaRegex.getJavaRegularExpression(); this.pattern = Pattern.compile(regex, 0); this.action = action; this.matcher = pattern.matcher(""); - } catch (final RegexSyntaxException e) { + } catch (final net.sf.saxon.trans.XPathException e) { throw new ServletException("Syntax error in regular expression specified for path. " + e.getMessage(), e); } diff --git a/exist-core/src/main/java/org/exist/http/urlrewrite/XQueryURLRewrite.java b/exist-core/src/main/java/org/exist/http/urlrewrite/XQueryURLRewrite.java index e0cef4b13d6..ccb49419ddc 100644 --- a/exist-core/src/main/java/org/exist/http/urlrewrite/XQueryURLRewrite.java +++ b/exist-core/src/main/java/org/exist/http/urlrewrite/XQueryURLRewrite.java @@ -382,7 +382,6 @@ Subject getDefaultUser() { } private void applyViews(final ModelAndView modelView, final List views, final HttpServletResponse response, final RequestWrapper modifiedRequest, final HttpServletResponse currentResponse) throws IOException, ServletException { - //int status; HttpServletResponse wrappedResponse = currentResponse; for (int i = 0; i < views.size(); i++) { final URLRewrite view = views.get(i); @@ -475,7 +474,7 @@ private ModelAndView getFromCache(final String url, final Subject user) throws E return null; } - try (final DBBroker broker = pool.get(Optional.ofNullable(user))) { + try (@SuppressWarnings("PMD.UnusedLocalVariable") final DBBroker broker = pool.get(Optional.ofNullable(user))) { if (model.getSourceInfo().source instanceof DBSource) { ((DBSource) model.getSourceInfo().source).validate(Permission.EXECUTE); @@ -506,25 +505,27 @@ void clearCaches() { * @param request the http request * @param response the http response */ - private void doRewrite(URLRewrite action, RequestWrapper request, final HttpServletResponse response) throws IOException, ServletException { - if (action.getTarget() != null && !(action instanceof Redirect)) { - final String uri = action.resolve(request); - final URLRewrite staticRewrite = rewriteConfig.lookup(uri, request.getServerName(), true, action); + private void doRewrite(final URLRewrite action, final RequestWrapper request, final HttpServletResponse response) throws IOException, ServletException { + URLRewrite effectiveAction = action; + RequestWrapper effectiveRequest = request; + if (effectiveAction.getTarget() != null && !(effectiveAction instanceof Redirect)) { + final String uri = effectiveAction.resolve(effectiveRequest); + final URLRewrite staticRewrite = rewriteConfig.lookup(uri, effectiveRequest.getServerName(), true, effectiveAction); if (staticRewrite != null) { - staticRewrite.copyFrom(action); - action = staticRewrite; - final RequestWrapper modifiedRequest = new RequestWrapper(request); - modifiedRequest.setPaths(uri, action.getPrefix()); + staticRewrite.copyFrom(effectiveAction); + effectiveAction = staticRewrite; + final RequestWrapper modifiedRequest = new RequestWrapper(effectiveRequest); + modifiedRequest.setPaths(uri, effectiveAction.getPrefix()); if (LOG.isTraceEnabled()) { - LOG.trace("Forwarding to : {} url: {}", action.toString(), action.getURI()); + LOG.trace("Forwarding to : {} url: {}", effectiveAction.toString(), effectiveAction.getURI()); } - request = modifiedRequest; + effectiveRequest = modifiedRequest; } } - action.prepareRequest(request); - action.doRewrite(request, response); + effectiveAction.prepareRequest(effectiveRequest); + effectiveAction.doRewrite(effectiveRequest, response); } protected ServletConfig getConfig() { @@ -543,6 +544,7 @@ private URLRewrite parseAction(final HttpServletRequest request, final Element a return rewrite; } + @SuppressWarnings("PMD.UnusedPrivateMethod") // called from switch expression in service() private void parseViews(final HttpServletRequest request, final Element view, final ModelAndView modelView) throws ServletException { Node node = view.getFirstChild(); while (node != null) { @@ -557,6 +559,7 @@ private void parseViews(final HttpServletRequest request, final Element view, fi } } + @SuppressWarnings("PMD.UnusedPrivateMethod") // called from switch expression in service() private void parseErrorHandlers(final HttpServletRequest request, final Element view, final ModelAndView modelView) throws ServletException { Node node = view.getFirstChild(); while (node != null) { @@ -685,28 +688,30 @@ private Sequence runQuery(final DBBroker broker, final RequestWrapper request, f } } - String adjustPathForSourceLookup(final String basePath, String path) { + String adjustPathForSourceLookup(final String basePath, final String path) { if (LOG.isTraceEnabled()) { LOG.trace("request path={}", path); } - if (basePath.startsWith(XmldbURI.EMBEDDED_SERVER_URI_PREFIX) && path.startsWith(basePath.replace(XmldbURI.EMBEDDED_SERVER_URI_PREFIX, ""))) { - path = path.replace(basePath.replace(XmldbURI.EMBEDDED_SERVER_URI_PREFIX, ""), ""); + String adjustedPath = path; + if (basePath.startsWith(XmldbURI.EMBEDDED_SERVER_URI_PREFIX) && adjustedPath.startsWith(basePath.replace(XmldbURI.EMBEDDED_SERVER_URI_PREFIX, ""))) { + adjustedPath = adjustedPath.replace(basePath.replace(XmldbURI.EMBEDDED_SERVER_URI_PREFIX, ""), ""); - } else if (path.startsWith("/db/")) { - path = path.substring(4); + } else if (adjustedPath.startsWith("/db/")) { + adjustedPath = adjustedPath.substring(4); } - if (path.startsWith("/")) { - path = path.substring(1); + if (adjustedPath.startsWith("/")) { + adjustedPath = adjustedPath.substring(1); } if (LOG.isTraceEnabled()) { - LOG.trace("adjusted request path={}", path); + LOG.trace("adjusted request path={}", adjustedPath); } - return path; + return adjustedPath; } + @SuppressWarnings("PMD.UnusedPrivateMethod") // called indirectly from getSourceInfo() private SourceInfo findSource(final HttpServletRequest request, final DBBroker broker, final String basePath) { if (LOG.isTraceEnabled()) { LOG.trace("basePath={}", basePath); @@ -977,9 +982,6 @@ private static class ModelAndView { private boolean useCache = false; private SourceInfo sourceInfo = null; - private ModelAndView() { - } - public void setSourceInfo(final SourceInfo sourceInfo) { this.sourceInfo = sourceInfo; } @@ -1179,12 +1181,10 @@ public String getPathTranslated() { return super.getSession().getServletContext().getRealPath(pathInfo); } - protected void setData(@Nullable byte[] data) { - if (data == null) { - data = new byte[0]; - } - contentLength = data.length; - sis = new CachingServletInputStream(data); + protected void setData(@Nullable final byte[] data) { + final byte[] effectiveData = data == null ? new byte[0] : data; + contentLength = effectiveData.length; + sis = new CachingServletInputStream(effectiveData); } public void addParameter(final String name, final String value) { @@ -1380,11 +1380,6 @@ public void setStatus(final int i) { super.setStatus(i); } - @Override - public void setStatus(final int i, final String msg) { - this.status = i; - super.setStatus(i, msg); - } @Override public void sendError(final int i, final String msg) throws IOException { @@ -1413,10 +1408,8 @@ public void flushBuffer() throws IOException { } public void flush() throws IOException { - if (cache) { - if (contentType != null) { - super.setContentType(contentType); - } + if (cache && contentType != null) { + super.setContentType(contentType); } if (sos != null) { final ServletOutputStream out = super.getOutputStream(); diff --git a/exist-core/src/main/java/org/exist/http/ws/EvalProtocol.java b/exist-core/src/main/java/org/exist/http/ws/EvalProtocol.java new file mode 100644 index 00000000000..7ca92bf5112 --- /dev/null +++ b/exist-core/src/main/java/org/exist/http/ws/EvalProtocol.java @@ -0,0 +1,336 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.http.ws; + +import com.fasterxml.jackson.core.JsonFactory; +import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.core.JsonParser; +import com.fasterxml.jackson.core.JsonToken; + +import javax.annotation.Nullable; +import java.io.IOException; +import java.io.StringWriter; +import java.util.HashMap; +import java.util.Map; +import java.util.Properties; + +/** + * JSON protocol messages for the /ws/eval WebSocket endpoint. + * + * All messages are JSON objects. Client messages have an "action" field; + * server messages have a "type" field. + */ +public final class EvalProtocol { + + private static final JsonFactory JSON_FACTORY = new JsonFactory(); + + public static final int DEFAULT_CHUNK_SIZE = 1000; + public static final long DEFAULT_MAX_EXECUTION_TIME = 0; // no limit + + // Client actions + public static final String ACTION_EVAL = "eval"; + public static final String ACTION_CANCEL = "cancel"; + public static final String ACTION_COMPILE = "compile"; + public static final String ACTION_ADMIN_CANCEL = "admin-cancel"; + + // Server message types + public static final String TYPE_PROGRESS = "progress"; + public static final String TYPE_RESULT = "result"; + public static final String TYPE_COMPILE = "compile"; + public static final String TYPE_ERROR = "error"; + public static final String TYPE_CANCELLED = "cancelled"; + + // Phases + public static final String PHASE_PARSING = "parsing"; + public static final String PHASE_COMPILING = "compiling"; + public static final String PHASE_EVALUATING = "evaluating"; + public static final String PHASE_SERIALIZING = "serializing"; + public static final String PHASE_COMPLETE = "complete"; + + private EvalProtocol() { + // utility class + } + + /** + * Parsed client request message. + */ + public static final class ClientMessage { + public final String action; + public final String id; + @Nullable public final String query; + @Nullable public final Properties serialization; + @Nullable public final Map variables; + @Nullable public final String context; + @Nullable public final String moduleLoadPath; + public final long maxExecutionTime; + public final boolean streaming; + public final int chunkSize; + + ClientMessage(final String action, final String id, @Nullable final String query, + @Nullable final Properties serialization, + @Nullable final Map variables, + @Nullable final String context, + @Nullable final String moduleLoadPath, + final long maxExecutionTime, + final boolean streaming, + final int chunkSize) { + this.action = action; + this.id = id; + this.query = query; + this.serialization = serialization; + this.variables = variables; + this.context = context; + this.moduleLoadPath = moduleLoadPath; + this.maxExecutionTime = maxExecutionTime; + this.streaming = streaming; + this.chunkSize = chunkSize; + } + } + + /** + * Timing breakdown for query execution phases. + */ + public static final class Timing { + public long parse; + public long compile; + public long evaluate; + public long serialize; + public long total; + } + + /** + * Parse a JSON client message. + */ + public static ClientMessage parseClientMessage(final String json) throws IOException { + String action = null; + String id = null; + String query = null; + Properties serialization = null; + Map variables = null; + String context = null; + String moduleLoadPath = null; + long maxExecutionTime = DEFAULT_MAX_EXECUTION_TIME; + boolean streaming = true; + int chunkSize = DEFAULT_CHUNK_SIZE; + + try (final JsonParser parser = JSON_FACTORY.createParser(json)) { + if (parser.nextToken() != JsonToken.START_OBJECT) { + throw new IOException("Expected JSON object"); + } + while (parser.nextToken() != JsonToken.END_OBJECT) { + final String field = parser.currentName(); + parser.nextToken(); + switch (field) { + case "action": + action = parser.getValueAsString(); + break; + case "id": + id = parser.getValueAsString(); + break; + case "query": + query = parser.getValueAsString(); + break; + case "context": + context = parser.getValueAsString(); + break; + case "module-load-path": + moduleLoadPath = parser.getValueAsString(); + break; + case "max-execution-time": + maxExecutionTime = parser.getValueAsLong(); + break; + case "streaming": + streaming = parser.getValueAsBoolean(); + break; + case "chunk-size": + chunkSize = parser.getValueAsInt(); + break; + case "serialization": + serialization = parseObject(parser); + break; + case "variables": + variables = parseStringMap(parser); + break; + default: + parser.skipChildren(); + break; + } + } + } + + if (action == null) { + throw new IOException("Missing required field: action"); + } + if (id == null) { + throw new IOException("Missing required field: id"); + } + + return new ClientMessage(action, id, query, serialization, variables, + context, moduleLoadPath, maxExecutionTime, streaming, chunkSize); + } + + private static Properties parseObject(final JsonParser parser) throws IOException { + final Properties props = new Properties(); + if (parser.currentToken() != JsonToken.START_OBJECT) { + return props; + } + while (parser.nextToken() != JsonToken.END_OBJECT) { + final String key = parser.currentName(); + parser.nextToken(); + props.setProperty(key, parser.getValueAsString()); + } + return props; + } + + private static Map parseStringMap(final JsonParser parser) throws IOException { + final Map map = new HashMap<>(); + if (parser.currentToken() != JsonToken.START_OBJECT) { + return map; + } + while (parser.nextToken() != JsonToken.END_OBJECT) { + final String key = parser.currentName(); + parser.nextToken(); + map.put(key, parser.getValueAsString()); + } + return map; + } + + // --- Server message builders --- + + public static String progressMessage(final String id, final String phase, + final long items, final long elapsed) throws IOException { + final StringWriter sw = new StringWriter(); + try (final JsonGenerator gen = JSON_FACTORY.createGenerator(sw)) { + gen.writeStartObject(); + gen.writeStringField("type", TYPE_PROGRESS); + gen.writeStringField("id", id); + gen.writeStringField("phase", phase); + gen.writeNumberField("items", items); + gen.writeNumberField("elapsed", elapsed); + gen.writeEndObject(); + } + return sw.toString(); + } + + public static String resultMessage(final String id, final int chunk, + final String data, final boolean more, + @Nullable final Timing timing, + final long items) throws IOException { + final StringWriter sw = new StringWriter(); + try (final JsonGenerator gen = JSON_FACTORY.createGenerator(sw)) { + gen.writeStartObject(); + gen.writeStringField("type", TYPE_RESULT); + gen.writeStringField("id", id); + gen.writeNumberField("chunk", chunk); + gen.writeStringField("data", data); + gen.writeBooleanField("more", more); + if (!more && timing != null) { + writeTiming(gen, timing); + } + if (!more) { + gen.writeNumberField("items", items); + gen.writeBooleanField("truncated", false); + } + gen.writeEndObject(); + } + return sw.toString(); + } + + public static String compileResultMessage(final String id, final boolean success, + @Nullable final String errorCode, + @Nullable final String errorMessage, + final int line, final int column) throws IOException { + final StringWriter sw = new StringWriter(); + try (final JsonGenerator gen = JSON_FACTORY.createGenerator(sw)) { + gen.writeStartObject(); + gen.writeStringField("type", TYPE_COMPILE); + gen.writeStringField("id", id); + gen.writeBooleanField("success", success); + if (!success) { + gen.writeArrayFieldStart("diagnostics"); + gen.writeStartObject(); + gen.writeNumberField("line", line); + gen.writeNumberField("column", column); + gen.writeStringField("severity", "error"); + if (errorCode != null) { + gen.writeStringField("code", errorCode); + } + if (errorMessage != null) { + gen.writeStringField("message", errorMessage); + } + gen.writeEndObject(); + gen.writeEndArray(); + } + gen.writeEndObject(); + } + return sw.toString(); + } + + public static String errorMessage(final String id, @Nullable final String code, + final String message, final int line, + final int column, + @Nullable final Timing timing) throws IOException { + final StringWriter sw = new StringWriter(); + try (final JsonGenerator gen = JSON_FACTORY.createGenerator(sw)) { + gen.writeStartObject(); + gen.writeStringField("type", TYPE_ERROR); + gen.writeStringField("id", id); + if (code != null) { + gen.writeStringField("code", code); + } + gen.writeStringField("message", message); + gen.writeNumberField("line", line); + gen.writeNumberField("column", column); + if (timing != null) { + writeTiming(gen, timing); + } + gen.writeEndObject(); + } + return sw.toString(); + } + + public static String cancelledMessage(final String id, final long items, + @Nullable final Timing timing) throws IOException { + final StringWriter sw = new StringWriter(); + try (final JsonGenerator gen = JSON_FACTORY.createGenerator(sw)) { + gen.writeStartObject(); + gen.writeStringField("type", TYPE_CANCELLED); + gen.writeStringField("id", id); + gen.writeNumberField("items", items); + if (timing != null) { + writeTiming(gen, timing); + } + gen.writeEndObject(); + } + return sw.toString(); + } + + private static void writeTiming(final JsonGenerator gen, final Timing timing) throws IOException { + gen.writeObjectFieldStart("timing"); + gen.writeNumberField("parse", timing.parse); + gen.writeNumberField("compile", timing.compile); + gen.writeNumberField("evaluate", timing.evaluate); + gen.writeNumberField("serialize", timing.serialize); + gen.writeNumberField("total", timing.total); + gen.writeEndObject(); + } +} diff --git a/exist-core/src/main/java/org/exist/http/ws/EvalSession.java b/exist-core/src/main/java/org/exist/http/ws/EvalSession.java new file mode 100644 index 00000000000..f59f9dc5fc6 --- /dev/null +++ b/exist-core/src/main/java/org/exist/http/ws/EvalSession.java @@ -0,0 +1,86 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.http.ws; + +import org.exist.security.Subject; +import org.exist.xquery.XQueryWatchDog; + +import javax.annotation.Nullable; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; + +/** + * Per-connection state for the /ws/eval WebSocket endpoint. + * + * Tracks the authenticated user and all running queries for cancellation. + */ +public final class EvalSession { + + private final Subject subject; + private final Map runningQueries = new ConcurrentHashMap<>(); + + public EvalSession(final Subject subject) { + this.subject = subject; + } + + public Subject getSubject() { + return subject; + } + + /** + * Register a running query's watchdog for cancellation. + */ + public void registerQuery(final String queryId, final XQueryWatchDog watchDog) { + runningQueries.put(queryId, watchDog); + } + + /** + * Unregister a completed or cancelled query. + */ + public void unregisterQuery(final String queryId) { + runningQueries.remove(queryId); + } + + /** + * Cancel a running query by its ID. + * + * @return true if the query was found and cancelled + */ + public boolean cancelQuery(final String queryId) { + final XQueryWatchDog watchDog = runningQueries.get(queryId); + if (watchDog != null) { + watchDog.kill(0); + return true; + } + return false; + } + + /** + * Cancel all running queries (called on session close). + */ + public void cancelAll() { + for (final XQueryWatchDog watchDog : runningQueries.values()) { + watchDog.kill(0); + } + runningQueries.clear(); + } +} diff --git a/exist-core/src/main/java/org/exist/http/ws/EvalWebSocketEndpoint.java b/exist-core/src/main/java/org/exist/http/ws/EvalWebSocketEndpoint.java new file mode 100644 index 00000000000..ed7b619303e --- /dev/null +++ b/exist-core/src/main/java/org/exist/http/ws/EvalWebSocketEndpoint.java @@ -0,0 +1,322 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.http.ws; + +import org.apache.commons.codec.binary.Base64; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.exist.EXistException; +import org.exist.security.AuthenticationException; +import org.exist.security.SecurityManager; +import org.exist.security.Subject; +import org.exist.storage.BrokerPool; +import org.exist.storage.ProcessMonitor; + +import jakarta.websocket.*; +import jakarta.websocket.server.HandshakeRequest; +import jakarta.websocket.server.ServerEndpoint; +import jakarta.websocket.server.ServerEndpointConfig; +import java.io.IOException; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; + +import static java.nio.charset.StandardCharsets.UTF_8; + +/** + * WebSocket endpoint for real-time, streaming XQuery evaluation. + * + * Supports: + *
    + *
  • Query evaluation with streaming results
  • + *
  • Query cancellation
  • + *
  • Compile-only checks (no execution)
  • + *
  • Progress reporting
  • + *
  • Timing breakdown (parse/compile/evaluate/serialize)
  • + *
+ * + * Authentication is performed during the WebSocket handshake using + * Basic authentication from the HTTP upgrade request headers. + */ +@ServerEndpoint( + value = "/ws/eval", + configurator = EvalWebSocketEndpoint.AuthConfigurator.class +) +public class EvalWebSocketEndpoint { + + private static final Logger LOG = LogManager.getLogger(EvalWebSocketEndpoint.class); + private static final String USER_PROPERTY = "exist.eval.subject"; + + private static final Map sessions = new ConcurrentHashMap<>(); + private static final ExecutorService queryExecutorService = Executors.newCachedThreadPool(r -> { + final Thread t = new Thread(r, "ws-eval-worker"); + t.setDaemon(true); + return t; + }); + + /** + * Configurator that extracts Basic auth credentials during the WebSocket + * handshake and authenticates the user against eXist's SecurityManager. + */ + public static class AuthConfigurator extends ServerEndpointConfig.Configurator { + @Override + public void modifyHandshake(final ServerEndpointConfig sec, + final HandshakeRequest request, + final HandshakeResponse response) { + final Map> headers = request.getHeaders(); + final List authHeaders = headers.get("authorization"); + + Subject subject = null; + if (authHeaders != null && !authHeaders.isEmpty()) { + final String credentials = authHeaders.get(0); + if (credentials.toLowerCase().startsWith("basic ")) { + try { + final byte[] decoded = Base64.decodeBase64( + credentials.substring("basic ".length())); + final String s = new String(decoded, UTF_8); + final int p = s.indexOf(':'); + final String username = p < 0 ? s : s.substring(0, p); + final String password = p < 0 ? null : s.substring(p + 1); + + final BrokerPool pool = BrokerPool.getInstance(); + final SecurityManager secman = pool.getSecurityManager(); + subject = secman.authenticate(username, password); + } catch (final AuthenticationException e) { + LOG.warn("WebSocket eval authentication failed: {}", e.getMessage()); + } catch (final EXistException e) { + LOG.error("Failed to get BrokerPool for WebSocket auth: {}", e.getMessage()); + } + } + } + + // No guest fallback — authentication is required for WebSocket eval. + // Unauthenticated connections will be rejected in onOpen(). + if (subject != null) { + sec.getUserProperties().put(USER_PROPERTY, subject); + } else { + LOG.debug("WebSocket eval connection without valid credentials — will be rejected in onOpen"); + } + } + } + + private static final int MAX_TEXT_MESSAGE_SIZE = 10 * 1024 * 1024; // 10MB + + @OnOpen + public void onOpen(final Session session, final EndpointConfig config) { + session.setMaxIdleTimeout(0); // no idle timeout for eval sessions + session.setMaxTextMessageBufferSize(MAX_TEXT_MESSAGE_SIZE); + + final Subject subject = (Subject) config.getUserProperties().get(USER_PROPERTY); + if (subject == null) { + try { + session.close(new CloseReason( + CloseReason.CloseCodes.VIOLATED_POLICY, "Authentication failed")); + } catch (final IOException e) { + LOG.debug("Error closing unauthenticated session: {}", e.getMessage()); + } + return; + } + + final EvalSession evalSession = new EvalSession(subject); + sessions.put(session, evalSession); + LOG.debug("Eval WebSocket opened for user: {}", subject.getName()); + } + + @OnMessage + public void onMessage(final String message, final Session session) { + final EvalSession evalSession = sessions.get(session); + if (evalSession == null) { + LOG.warn("Received message on unregistered session"); + return; + } + + final EvalProtocol.ClientMessage msg; + try { + msg = EvalProtocol.parseClientMessage(message); + } catch (final IOException e) { + try { + session.getBasicRemote().sendText( + EvalProtocol.errorMessage("unknown", null, + "Invalid message: " + e.getMessage(), 0, 0, null)); + } catch (final IOException ex) { + LOG.debug("Failed to send parse error: {}", ex.getMessage()); + } + return; + } + + switch (msg.action) { + case EvalProtocol.ACTION_EVAL: + handleEval(session, evalSession, msg); + break; + case EvalProtocol.ACTION_CANCEL: + handleCancel(evalSession, msg); + break; + case EvalProtocol.ACTION_COMPILE: + handleCompile(session, evalSession, msg); + break; + case EvalProtocol.ACTION_ADMIN_CANCEL: + handleAdminCancel(session, evalSession, msg); + break; + default: + try { + session.getBasicRemote().sendText( + EvalProtocol.errorMessage(msg.id, null, + "Unknown action: " + msg.action, 0, 0, null)); + } catch (final IOException e) { + LOG.debug("Failed to send unknown action error: {}", e.getMessage()); + } + break; + } + } + + @OnClose + public void onClose(final Session session, final CloseReason reason) { + final EvalSession evalSession = sessions.remove(session); + if (evalSession != null) { + evalSession.cancelAll(); + LOG.debug("Eval WebSocket closed: {}", reason.getReasonPhrase()); + } + } + + @OnError + public void onError(final Session session, final Throwable error) { + if (error.getMessage() != null && error.getMessage().contains("Text message size")) { + LOG.warn("WebSocket message exceeds {}MB buffer limit: {}", MAX_TEXT_MESSAGE_SIZE / (1024 * 1024), error.getMessage()); + } else { + LOG.warn("WebSocket eval error: {}", error.getMessage(), error); + } + final EvalSession evalSession = sessions.remove(session); + if (evalSession != null) { + evalSession.cancelAll(); + } + } + + private void handleEval(final Session session, final EvalSession evalSession, + final EvalProtocol.ClientMessage msg) { + if (msg.query == null || msg.query.isEmpty()) { + try { + session.getBasicRemote().sendText( + EvalProtocol.errorMessage(msg.id, null, + "Missing required field: query", 0, 0, null)); + } catch (final IOException e) { + LOG.debug("Failed to send missing query error: {}", e.getMessage()); + } + return; + } + + // Execute on a worker thread to avoid blocking the WebSocket message thread + queryExecutorService.submit(() -> { + try { + final BrokerPool pool = BrokerPool.getInstance(); + final QueryExecutor executor = new QueryExecutor(pool); + executor.execute(session, evalSession, msg); + } catch (final EXistException e) { + try { + session.getBasicRemote().sendText( + EvalProtocol.errorMessage(msg.id, null, + "Database unavailable: " + e.getMessage(), 0, 0, null)); + } catch (final IOException ex) { + LOG.debug("Failed to send database error: {}", ex.getMessage()); + } + } + }); + } + + private void handleCancel(final EvalSession evalSession, + final EvalProtocol.ClientMessage msg) { + final boolean cancelled = evalSession.cancelQuery(msg.id); + if (!cancelled) { + LOG.debug("Cancel requested for unknown query: {}", msg.id); + } + } + + /** + * Admin cancel: kill any running query by its context identity hash code. + * Requires DBA role. Uses the ProcessMonitor (same as system:kill-running-xquery). + */ + private void handleAdminCancel(final Session session, final EvalSession evalSession, + final EvalProtocol.ClientMessage msg) { + if (!evalSession.getSubject().hasDbaRole()) { + try { + session.getBasicRemote().sendText( + EvalProtocol.errorMessage(msg.id, null, + "Permission denied: admin-cancel requires DBA role", 0, 0, null)); + } catch (final IOException e) { + LOG.debug("Failed to send permission error: {}", e.getMessage()); + } + return; + } + + try { + final BrokerPool pool = BrokerPool.getInstance(); + final ProcessMonitor monitor = pool.getProcessMonitor(); + final int targetId = Integer.parseInt(msg.id); + + boolean found = false; + for (final org.exist.xquery.XQueryWatchDog wd : monitor.getRunningXQueries()) { + if (System.identityHashCode(wd.getContext()) == targetId) { + wd.kill(0); + found = true; + break; + } + } + + if (!found) { + LOG.debug("Admin cancel: query {} not found", msg.id); + } + } catch (final Exception e) { + LOG.warn("Admin cancel failed: {}", e.getMessage()); + } + } + + private void handleCompile(final Session session, final EvalSession evalSession, + final EvalProtocol.ClientMessage msg) { + if (msg.query == null || msg.query.isEmpty()) { + try { + session.getBasicRemote().sendText( + EvalProtocol.errorMessage(msg.id, null, + "Missing required field: query", 0, 0, null)); + } catch (final IOException e) { + LOG.debug("Failed to send missing query error: {}", e.getMessage()); + } + return; + } + + queryExecutorService.submit(() -> { + try { + final BrokerPool pool = BrokerPool.getInstance(); + final QueryExecutor executor = new QueryExecutor(pool); + executor.compile(session, evalSession, msg); + } catch (final EXistException e) { + try { + session.getBasicRemote().sendText( + EvalProtocol.errorMessage(msg.id, null, + "Database unavailable: " + e.getMessage(), 0, 0, null)); + } catch (final IOException ex) { + LOG.debug("Failed to send database error: {}", ex.getMessage()); + } + } + }); + } +} diff --git a/exist-core/src/main/java/org/exist/http/ws/QueryExecutor.java b/exist-core/src/main/java/org/exist/http/ws/QueryExecutor.java new file mode 100644 index 00000000000..4ec8b64f787 --- /dev/null +++ b/exist-core/src/main/java/org/exist/http/ws/QueryExecutor.java @@ -0,0 +1,389 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.http.ws; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.exist.EXistException; +import org.exist.security.PermissionDeniedException; +import org.exist.security.Subject; +import org.exist.source.StringSource; +import org.exist.storage.BrokerPool; +import org.exist.storage.DBBroker; +import org.exist.util.serializer.XQuerySerializer; +import org.exist.xquery.*; +import org.exist.xquery.value.AnyURIValue; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.SequenceIterator; +import org.exist.xquery.value.ValueSequence; +import org.exist.xmldb.XmldbURI; +import org.xml.sax.SAXException; + +import jakarta.websocket.Session; +import javax.annotation.Nullable; +import java.io.IOException; +import java.io.StringWriter; +import java.util.Map; +import java.util.Optional; +import java.util.Properties; + +/** + * Executes XQuery expressions for the /ws/eval endpoint with support for + * streaming results, progress reporting, cancellation, and timing. + */ +public final class QueryExecutor { + + private static final Logger LOG = LogManager.getLogger(QueryExecutor.class); + + private static final long PROGRESS_INTERVAL_MS = 500; + + private final BrokerPool pool; + + public QueryExecutor(final BrokerPool pool) { + this.pool = pool; + } + + /** + * Execute a query and stream results back over the WebSocket session. + */ + public void execute(final Session wsSession, final EvalSession evalSession, + final EvalProtocol.ClientMessage msg) { + final EvalProtocol.Timing timing = new EvalProtocol.Timing(); + final long startTime = System.currentTimeMillis(); + final String user = evalSession.getSubject().getName(); + + try (final DBBroker broker = pool.get(Optional.of(evalSession.getSubject()))) { + // Parse phase + sendProgress(wsSession, msg.id, EvalProtocol.PHASE_PARSING, 0, 0); + QueryMonitorBroadcaster.broadcastEvent("started", msg.id, user, msg.query, + EvalProtocol.PHASE_PARSING, 0, 0); + final long parseStart = System.currentTimeMillis(); + + final XQueryContext context = new XQueryContext(pool); + configureContext(context, msg); + + final XQuery xquery = pool.getXQueryService(); + final CompiledXQuery compiled; + + // Compile phase + try { + compiled = xquery.compile(context, new StringSource(msg.query)); + } catch (final XPathException e) { + timing.parse = System.currentTimeMillis() - parseStart; + timing.total = System.currentTimeMillis() - startTime; + sendError(wsSession, msg.id, e, timing); + QueryMonitorBroadcaster.broadcastEvent("error", msg.id, user, msg.query, + null, 0, timing.total); + return; + } catch (final IOException e) { + timing.total = System.currentTimeMillis() - startTime; + sendError(wsSession, msg.id, null, e.getMessage(), 0, 0, timing); + QueryMonitorBroadcaster.broadcastEvent("error", msg.id, user, msg.query, + null, 0, timing.total); + return; + } + + timing.parse = System.currentTimeMillis() - parseStart; + final long compileEnd = System.currentTimeMillis(); + timing.compile = compileEnd - parseStart - timing.parse; + + sendProgress(wsSession, msg.id, EvalProtocol.PHASE_COMPILING, 0, + System.currentTimeMillis() - startTime); + + // Set timeout via watchdog + final XQueryWatchDog watchDog = context.getWatchDog(); + if (msg.maxExecutionTime > 0) { + watchDog.setTimeout(msg.maxExecutionTime); + } + evalSession.registerQuery(msg.id, watchDog); + + try { + // Evaluate phase + sendProgress(wsSession, msg.id, EvalProtocol.PHASE_EVALUATING, 0, + System.currentTimeMillis() - startTime); + QueryMonitorBroadcaster.broadcastEvent("progress", msg.id, user, msg.query, + EvalProtocol.PHASE_EVALUATING, 0, System.currentTimeMillis() - startTime); + final long evalStart = System.currentTimeMillis(); + + final Sequence result; + try { + result = xquery.execute(broker, compiled, null, new Properties(), true); + } catch (final TerminatedException e) { + timing.evaluate = System.currentTimeMillis() - evalStart; + timing.total = System.currentTimeMillis() - startTime; + if (watchDog.isTerminating()) { + sendCancelled(wsSession, msg.id, 0, timing); + QueryMonitorBroadcaster.broadcastEvent("cancelled", msg.id, user, msg.query, + null, 0, timing.total); + } else { + sendError(wsSession, msg.id, null, e.getMessage(), + e.getLine(), e.getColumn(), timing); + QueryMonitorBroadcaster.broadcastEvent("error", msg.id, user, msg.query, + null, 0, timing.total); + } + return; + } catch (final XPathException e) { + timing.evaluate = System.currentTimeMillis() - evalStart; + timing.total = System.currentTimeMillis() - startTime; + if (watchDog.isTerminating()) { + sendCancelled(wsSession, msg.id, 0, timing); + QueryMonitorBroadcaster.broadcastEvent("cancelled", msg.id, user, msg.query, + null, 0, timing.total); + } else { + sendError(wsSession, msg.id, e, timing); + QueryMonitorBroadcaster.broadcastEvent("error", msg.id, user, msg.query, + null, 0, timing.total); + } + return; + } + + timing.evaluate = System.currentTimeMillis() - evalStart; + + // Serialize phase + sendProgress(wsSession, msg.id, EvalProtocol.PHASE_SERIALIZING, 0, + System.currentTimeMillis() - startTime); + final long serStart = System.currentTimeMillis(); + + final long itemCount = result.getItemCount(); + final Properties outputProperties = msg.serialization != null + ? msg.serialization : new Properties(); + + try { + if (msg.streaming && itemCount > msg.chunkSize) { + streamResults(wsSession, msg.id, broker, result, outputProperties, + msg.chunkSize, timing, startTime, watchDog); + } else { + final String serialized = serializeAll(broker, result, outputProperties); + timing.serialize = System.currentTimeMillis() - serStart; + timing.total = System.currentTimeMillis() - startTime; + + sendResult(wsSession, msg.id, 1, serialized, false, timing, itemCount); + } + QueryMonitorBroadcaster.broadcastEvent("completed", msg.id, user, msg.query, + null, itemCount, System.currentTimeMillis() - startTime); + } catch (final SAXException | XPathException e) { + timing.serialize = System.currentTimeMillis() - serStart; + timing.total = System.currentTimeMillis() - startTime; + sendError(wsSession, msg.id, null, e.getMessage(), 0, 0, timing); + QueryMonitorBroadcaster.broadcastEvent("error", msg.id, user, msg.query, + null, 0, timing.total); + } + } finally { + evalSession.unregisterQuery(msg.id); + context.runCleanupTasks(); + } + + } catch (final EXistException | PermissionDeniedException e) { + timing.total = System.currentTimeMillis() - startTime; + sendError(wsSession, msg.id, null, e.getMessage(), 0, 0, timing); + QueryMonitorBroadcaster.broadcastEvent("error", msg.id, user, msg.query, + null, 0, timing.total); + } + } + + /** + * Compile-check a query without executing it. + */ + public void compile(final Session wsSession, final EvalSession evalSession, + final EvalProtocol.ClientMessage msg) { + try (final DBBroker broker = pool.get(Optional.of(evalSession.getSubject()))) { + final XQueryContext context = new XQueryContext(pool); + configureContext(context, msg); + + final XQuery xquery = pool.getXQueryService(); + try { + xquery.compile(context, new StringSource(msg.query)); + sendCompileResult(wsSession, msg.id, true, null, null, 0, 0); + } catch (final XPathException e) { + sendCompileResult(wsSession, msg.id, false, + e.getErrorCode() != null ? e.getErrorCode().toString() : null, + e.getDetailMessage(), e.getLine(), e.getColumn()); + } catch (final IOException e) { + sendCompileResult(wsSession, msg.id, false, null, e.getMessage(), 0, 0); + } finally { + context.runCleanupTasks(); + } + } catch (final EXistException | PermissionDeniedException e) { + sendError(wsSession, msg.id, null, e.getMessage(), 0, 0, null); + } + } + + private void configureContext(final XQueryContext context, + final EvalProtocol.ClientMessage msg) { + if (msg.moduleLoadPath != null) { + context.setModuleLoadPath(msg.moduleLoadPath); + } + if (msg.context != null) { + try { + final XmldbURI baseUri = XmldbURI.create(msg.context); + context.setStaticallyKnownDocuments(new XmldbURI[]{baseUri}); + context.setBaseURI(new AnyURIValue(msg.context)); + } catch (final XPathException e) { + LOG.warn("Invalid context URI: {}", msg.context, e); + } + } + if (msg.variables != null) { + for (final Map.Entry entry : msg.variables.entrySet()) { + try { + context.declareVariable(entry.getKey(), entry.getValue()); + } catch (final XPathException e) { + LOG.warn("Failed to declare variable ${}: {}", entry.getKey(), e.getMessage()); + } + } + } + } + + private void streamResults(final Session wsSession, final String queryId, + final DBBroker broker, final Sequence result, + final Properties outputProperties, final int chunkSize, + final EvalProtocol.Timing timing, final long startTime, + final XQueryWatchDog watchDog) { + final long serStart = System.currentTimeMillis(); + final long totalItems = result.getItemCount(); + int chunkNum = 0; + long itemsSent = 0; + + try { + final SequenceIterator iter = result.iterate(); + while (iter.hasNext()) { + if (watchDog.isTerminating()) { + timing.serialize = System.currentTimeMillis() - serStart; + timing.total = System.currentTimeMillis() - startTime; + sendCancelled(wsSession, queryId, itemsSent, timing); + return; + } + + // collect a chunk + final ValueSequence chunk = new ValueSequence(); + for (int i = 0; i < chunkSize && iter.hasNext(); i++) { + chunk.add(iter.nextItem()); + } + + chunkNum++; + itemsSent += chunk.getItemCount(); + final boolean more = iter.hasNext(); + + final String data = serializeAll(broker, chunk, outputProperties); + + if (!more) { + timing.serialize = System.currentTimeMillis() - serStart; + timing.total = System.currentTimeMillis() - startTime; + } + + sendResult(wsSession, queryId, chunkNum, data, more, + more ? null : timing, totalItems); + + // send progress during streaming + if (more && (System.currentTimeMillis() - startTime) % PROGRESS_INTERVAL_MS < 50) { + sendProgress(wsSession, queryId, EvalProtocol.PHASE_SERIALIZING, + itemsSent, System.currentTimeMillis() - startTime); + } + } + } catch (final XPathException | SAXException e) { + timing.serialize = System.currentTimeMillis() - serStart; + timing.total = System.currentTimeMillis() - startTime; + sendError(wsSession, queryId, null, e.getMessage(), 0, 0, timing); + } + } + + private String serializeAll(final DBBroker broker, final Sequence sequence, + final Properties outputProperties) + throws SAXException, XPathException { + final StringWriter writer = new StringWriter(); + final XQuerySerializer serializer = new XQuerySerializer(broker, outputProperties, writer); + serializer.serialize(sequence); + return writer.toString(); + } + + // --- WebSocket message senders --- + + private void sendProgress(final Session session, final String id, + final String phase, final long items, final long elapsed) { + try { + session.getBasicRemote().sendText( + EvalProtocol.progressMessage(id, phase, items, elapsed)); + } catch (final IOException e) { + LOG.debug("Failed to send progress: {}", e.getMessage()); + } + // Also broadcast to monitor channel (user/query not available here, + // but the snapshot fills in full details) + QueryMonitorBroadcaster.broadcastEvent("progress", id, "", null, phase, items, elapsed); + } + + private void sendResult(final Session session, final String id, final int chunk, + final String data, final boolean more, + @Nullable final EvalProtocol.Timing timing, final long items) { + try { + session.getBasicRemote().sendText( + EvalProtocol.resultMessage(id, chunk, data, more, timing, items)); + } catch (final IOException e) { + LOG.debug("Failed to send result: {}", e.getMessage()); + } + } + + private void sendError(final Session session, final String id, + @Nullable final XPathException xpe, + @Nullable final EvalProtocol.Timing timing) { + final String code = xpe != null && xpe.getErrorCode() != null + ? xpe.getErrorCode().toString() : null; + final String message = xpe != null ? xpe.getDetailMessage() : "Unknown error"; + final int line = xpe != null ? xpe.getLine() : 0; + final int column = xpe != null ? xpe.getColumn() : 0; + sendError(session, id, code, message, line, column, timing); + } + + private void sendError(final Session session, final String id, + @Nullable final String code, final String message, + final int line, final int column, + @Nullable final EvalProtocol.Timing timing) { + try { + session.getBasicRemote().sendText( + EvalProtocol.errorMessage(id, code, message, line, column, timing)); + } catch (final IOException e) { + LOG.debug("Failed to send error: {}", e.getMessage()); + } + } + + private void sendCancelled(final Session session, final String id, + final long items, + @Nullable final EvalProtocol.Timing timing) { + try { + session.getBasicRemote().sendText( + EvalProtocol.cancelledMessage(id, items, timing)); + } catch (final IOException e) { + LOG.debug("Failed to send cancelled: {}", e.getMessage()); + } + } + + private void sendCompileResult(final Session session, final String id, + final boolean success, + @Nullable final String errorCode, + @Nullable final String errorMessage, + final int line, final int column) { + try { + session.getBasicRemote().sendText( + EvalProtocol.compileResultMessage(id, success, errorCode, errorMessage, line, column)); + } catch (final IOException e) { + LOG.debug("Failed to send compile result: {}", e.getMessage()); + } + } +} diff --git a/exist-core/src/main/java/org/exist/http/ws/QueryMonitorBroadcaster.java b/exist-core/src/main/java/org/exist/http/ws/QueryMonitorBroadcaster.java new file mode 100644 index 00000000000..92b5b4a922c --- /dev/null +++ b/exist-core/src/main/java/org/exist/http/ws/QueryMonitorBroadcaster.java @@ -0,0 +1,147 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.http.ws; + +import com.fasterxml.jackson.core.JsonFactory; +import com.fasterxml.jackson.core.JsonGenerator; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.exist.storage.BrokerPool; +import org.exist.storage.ProcessMonitor; +import org.exist.xquery.XQueryWatchDog; +import org.exist.xquery.functions.websocket.WebSocketEndpoint; + +import javax.annotation.Nullable; +import java.io.IOException; +import java.io.StringWriter; + +/** + * Bridges the /ws/eval query lifecycle to the /ws pub/sub channel system, + * broadcasting query events to admin monitoring clients subscribed to the + * {@code _monitor} channel. + * + *

Also provides periodic snapshots of ALL running queries (including + * REST/XQueryServlet queries that don't go through /ws/eval).

+ */ +public final class QueryMonitorBroadcaster { + + private static final Logger LOG = LogManager.getLogger(QueryMonitorBroadcaster.class); + private static final JsonFactory JSON_FACTORY = new JsonFactory(); + private static final String CHANNEL = "_monitor"; + private static final int MAX_SOURCE_LENGTH = 100; + + private QueryMonitorBroadcaster() { + // utility class + } + + /** + * Broadcast a query lifecycle event to the _monitor channel. + * No-op if nobody is subscribed. + */ + public static void broadcastEvent(final String event, final String queryId, + final String user, @Nullable final String query, + @Nullable final String phase, final long items, + final long elapsed) { + if (WebSocketEndpoint.getChannelCount(CHANNEL) == 0) { + return; + } + try { + final String json = buildEventMessage(event, queryId, user, query, phase, items, elapsed); + WebSocketEndpoint.sendAll(CHANNEL, json); + } catch (final IOException e) { + LOG.debug("Failed to broadcast monitor event: {}", e.getMessage()); + } + } + + /** + * Broadcast a snapshot of all running queries from the ProcessMonitor. + * Called periodically (e.g., every 1 second) to catch queries from all + * execution paths (REST, XQueryServlet, URL rewrite, etc.). + */ + public static void broadcastSnapshot(final BrokerPool pool) { + if (WebSocketEndpoint.getChannelCount(CHANNEL) == 0) { + return; + } + try { + final ProcessMonitor monitor = pool.getProcessMonitor(); + final XQueryWatchDog[] watchDogs = monitor.getRunningXQueries(); + + final StringWriter sw = new StringWriter(); + try (final JsonGenerator gen = JSON_FACTORY.createGenerator(sw)) { + gen.writeStartObject(); + gen.writeStringField("type", "monitor"); + gen.writeStringField("event", "snapshot"); + gen.writeArrayFieldStart("queries"); + + for (final XQueryWatchDog wd : watchDogs) { + final var ctx = wd.getContext(); + gen.writeStartObject(); + gen.writeStringField("queryId", String.valueOf(System.identityHashCode(ctx))); + gen.writeStringField("user", ctx.getEffectiveUser().getName()); + + final String sourceKey = ctx.getSource() != null ? ctx.getSource().pathOrShortIdentifier() : ""; + gen.writeStringField("source", sourceKey); + + gen.writeStringField("phase", "evaluating"); + gen.writeNumberField("elapsed", + System.currentTimeMillis() - wd.getStartTime()); + gen.writeBooleanField("terminating", wd.isTerminating()); + gen.writeEndObject(); + } + + gen.writeEndArray(); + gen.writeEndObject(); + } + + WebSocketEndpoint.sendAll(CHANNEL, sw.toString()); + } catch (final Exception e) { + LOG.debug("Failed to broadcast monitor snapshot: {}", e.getMessage()); + } + } + + private static String buildEventMessage(final String event, final String queryId, + final String user, @Nullable final String query, + @Nullable final String phase, final long items, + final long elapsed) throws IOException { + final StringWriter sw = new StringWriter(); + try (final JsonGenerator gen = JSON_FACTORY.createGenerator(sw)) { + gen.writeStartObject(); + gen.writeStringField("type", "monitor"); + gen.writeStringField("event", event); + gen.writeStringField("queryId", queryId); + gen.writeStringField("user", user); + if (query != null) { + gen.writeStringField("source", + query.length() > MAX_SOURCE_LENGTH + ? query.substring(0, MAX_SOURCE_LENGTH) + "..." + : query); + } + if (phase != null) { + gen.writeStringField("phase", phase); + } + gen.writeNumberField("items", items); + gen.writeNumberField("elapsed", elapsed); + gen.writeEndObject(); + } + return sw.toString(); + } +} diff --git a/exist-core/src/main/java/org/exist/jetty/JettyStart.java b/exist-core/src/main/java/org/exist/jetty/JettyStart.java index 3225fbab227..d58da3abaff 100644 --- a/exist-core/src/main/java/org/exist/jetty/JettyStart.java +++ b/exist-core/src/main/java/org/exist/jetty/JettyStart.java @@ -26,14 +26,12 @@ import org.apache.logging.log4j.Logger; import org.eclipse.jetty.server.*; import org.eclipse.jetty.server.handler.ContextHandler; -import org.eclipse.jetty.server.handler.HandlerWrapper; -import org.eclipse.jetty.servlet.ServletContextHandler; -import org.eclipse.jetty.servlet.ServletHolder; +import org.eclipse.jetty.ee10.servlet.ServletContextHandler; +import org.eclipse.jetty.ee10.servlet.ServletHolder; import org.eclipse.jetty.util.Jetty; -import org.eclipse.jetty.util.MultiException; import org.eclipse.jetty.util.component.LifeCycle; -import org.eclipse.jetty.util.resource.PathResource; import org.eclipse.jetty.util.resource.Resource; +import org.eclipse.jetty.util.resource.ResourceFactory; import org.eclipse.jetty.xml.XmlConfiguration; import org.exist.SystemProperties; import org.exist.http.servlets.ExistExtensionServlet; @@ -68,7 +66,7 @@ /** * This class provides a main method to start Jetty with eXist. It registers shutdown * handlers to cleanly shut down the database and the webserver. - * + * * @author wolf */ public class JettyStart extends Observable implements LifeCycle.Listener { @@ -149,8 +147,6 @@ public synchronized void run(final boolean standalone) { return jettyPath; }); - System.setProperty("org.eclipse.jetty.util.log.class", "org.eclipse.jetty.util.log.Slf4jLog"); - final Path jettyConfig; if (standalone) { jettyConfig = Paths.get(jettyProperty).normalize().resolve("etc").resolve(Main.STANDALONE_ENABLED_JETTY_CONFIGS); @@ -159,7 +155,7 @@ public synchronized void run(final boolean standalone) { } run(new String[] { jettyConfig.toAbsolutePath().toString() }, null); } - + public synchronized void run(final String[] args, final Observer observer) { if (args.length == 0) { logger.error("No configuration file specified!"); @@ -261,7 +257,7 @@ public synchronized void run(final String[] args, final Observer observer) { XmlConfiguration last = null; for(final Path confFile : configFiles) { logger.info("[loading jetty configuration : {}]", confFile.toString()); - final Resource resource = new PathResource(confFile); + final Resource resource = ResourceFactory.root().newResource(confFile); final XmlConfiguration configuration = new XmlConfiguration(resource); if (last != null) { configuration.getIdMap().putAll(last.getIdMap()); @@ -271,9 +267,12 @@ public synchronized void run(final String[] args, final Observer observer) { last = configuration; } + // configure WebSocket on any ServletContextHandler + configureWebSocket(configuredObjects); + // start Jetty final Optional maybeServer = startJetty(configuredObjects); - if(!maybeServer.isPresent()) { + if(maybeServer.isEmpty()) { logger.error("Unable to find a server to start in jetty configurations"); throw new IllegalStateException(); } @@ -303,7 +302,7 @@ public synchronized void run(final String[] args, final Observer observer) { allPorts.append(networkConnector.getLocalPort()); } } - + //************************************************************* final List serverUris = getSeverURIs(server); if(!serverUris.isEmpty()) { @@ -317,9 +316,9 @@ public synchronized void run(final String[] args, final Observer observer) { } logger.info("Configured contexts:"); - final LinkedHashSet handlers = getAllHandlers(server.getHandler()); + final List handlers = getAllHandlers(server.getHandler()); for (final Handler handler: handlers) { - + if (handler instanceof ContextHandler contextHandler) { logger.info("{} ({})", contextHandler.getContextPath(), contextHandler.getDisplayName()); } @@ -348,29 +347,7 @@ public synchronized void run(final String[] args, final Observer observer) { setChanged(); notifyObservers(SIGNAL_STARTED); - - } catch (final MultiException e) { - - // Mute the BindExceptions - - boolean hasBindException = false; - for (final Throwable t : e.getThrowables()) { - if (t instanceof java.net.BindException) { - hasBindException = true; - logger.error("----------------------------------------------------------"); - logger.error("ERROR: Could not bind to port because {}", t.getMessage()); - logger.error(t.toString()); - logger.error("----------------------------------------------------------"); - } - } - // If it is another error, print stacktrace - if (!hasBindException) { - e.printStackTrace(); - } - setChanged(); - notifyObservers(SIGNAL_ERROR); - } catch (final SocketException e) { logger.error("----------------------------------------------------------"); logger.error("ERROR: Could not bind to port because {}", e.getMessage()); @@ -378,44 +355,65 @@ public synchronized void run(final String[] args, final Observer observer) { logger.error("----------------------------------------------------------"); setChanged(); notifyObservers(SIGNAL_ERROR); - + } catch (final Exception e) { - e.printStackTrace(); + logger.fatal("An unexpected error occurred, web server can not be started: {}", e.getMessage(), e); setChanged(); notifyObservers(SIGNAL_ERROR); } } - private LinkedHashSet getAllHandlers(final Handler handler) { - if(handler instanceof HandlerWrapper handlerWrapper) { - final LinkedHashSet handlers = new LinkedHashSet<>(); - handlers.add(handlerWrapper); - if(handlerWrapper.getHandler() != null) { - handlers.addAll(getAllHandlers(handlerWrapper.getHandler())); + private void configureWebSocket(final List configuredObjects) { + for (final Object obj : configuredObjects) { + if (obj instanceof Server server) { + final List handlers = getAllHandlers(server.getHandler()); + for (final Handler handler : handlers) { + if (handler instanceof ServletContextHandler sch) { + try { + org.eclipse.jetty.ee10.websocket.jakarta.server.config.JakartaWebSocketServletContainerInitializer + .configure(sch, (servletContext, serverContainer) -> { + serverContainer.addEndpoint( + org.exist.xquery.functions.websocket.WebSocketEndpoint.class); + logger.info("[WebSocket endpoint registered: /ws]"); + serverContainer.addEndpoint( + org.exist.http.ws.EvalWebSocketEndpoint.class); + logger.info("[WebSocket endpoint registered: /ws/eval]"); + }); + org.exist.xquery.functions.websocket.WebSocketEndpoint.initialize(); + return; // only need to configure once + } catch (final Exception e) { + logger.warn("Failed to configure WebSocket endpoint: {}", e.getMessage(), e); + } + } + } } - return handlers; + } + } - } else if(handler instanceof HandlerContainer handlerContainer) { - final LinkedHashSet handlers = new LinkedHashSet<>(); - handlers.add(handler); - for(final Handler childHandler : handlerContainer.getChildHandlers()) { + private List getAllHandlers(final Handler handler) { + final List handlers = new ArrayList<>(); + handlers.add(handler); + + if (handler instanceof Handler.Wrapper wrapper) { + if (wrapper.getHandler() != null) { + handlers.addAll(getAllHandlers(wrapper.getHandler())); + } + } else if (handler instanceof Handler.Container container) { + for (final Handler childHandler : container.getHandlers()) { handlers.addAll(getAllHandlers(childHandler)); } - return handlers; - - } else { - //assuming just Handler - final LinkedHashSet handlers = new LinkedHashSet<>(); - handlers.add(handler); - return handlers; } + + return handlers; } /** * See {@link Server#getURI()} */ private List getSeverURIs(final Server server) { - final ContextHandler context = server.getChildHandlerByClass(ContextHandler.class); + final ContextHandler context = server.getHandler() instanceof Handler.Container container + ? container.getDescendant(ContextHandler.class) + : null; return Arrays.stream(server.getConnectors()) .filter(connector -> connector instanceof NetworkConnector) .map(connector -> (NetworkConnector)connector) @@ -438,9 +436,13 @@ private URI getURI(final NetworkConnector networkConnector, final ContextHandler } String host = null; - if (context != null && context.getVirtualHosts() != null && context.getVirtualHosts().length > 0) { - host = context.getVirtualHosts()[0]; - } else { + if (context != null) { + final List virtualHosts = context.getVirtualHosts(); + if (virtualHosts != null && !virtualHosts.isEmpty()) { + host = virtualHosts.getFirst(); + } + } + if (host == null) { host = networkConnector.getHost(); } @@ -492,11 +494,9 @@ private Optional startJetty(final List configuredObjects) throws server = Optional.of(_server); } - if (configuredObject instanceof LifeCycle lc) { - if (!lc.isRunning()) { - logger.info("[Starting jetty component : {}]", lc.getClass().getName()); - lc.start(); - } + if (configuredObject instanceof LifeCycle lc && !lc.isRunning()) { + logger.info("[Starting jetty component : {}]", lc.getClass().getName()); + lc.start(); } } @@ -562,14 +562,14 @@ public synchronized void shutdown() { logger.warn("Unable to remove BrokerPoolsAndJetty.ShutdownHook hook: {}", e.getMessage()); } }); - + BrokerPool.stopAll(false); - + while (status != STATUS_STOPPED) { try { wait(); } catch (final InterruptedException e) { - // ignore + Thread.currentThread().interrupt(); } } } @@ -603,7 +603,7 @@ public void run() { // make sure to stop the timer thread! timer.cancel(); } catch (final Exception e) { - e.printStackTrace(); + logger.error("An error occurred in the shutdown scheduler: {}", e.getMessage(), e); } } }, 1000); // timer.schedule @@ -644,6 +644,7 @@ public synchronized boolean isStarted() { try { wait(); } catch (final InterruptedException e) { + Thread.currentThread().interrupt(); } } return false; @@ -669,6 +670,7 @@ public synchronized void lifeCycleStarted(final LifeCycle lifeCycle) { @Override public void lifeCycleFailure(final LifeCycle lifeCycle, final Throwable throwable) { + // no-op } @Override diff --git a/exist-core/src/main/java/org/exist/jetty/WebAppContext.java b/exist-core/src/main/java/org/exist/jetty/WebAppContext.java index d07a431fa50..660bd0cb545 100644 --- a/exist-core/src/main/java/org/exist/jetty/WebAppContext.java +++ b/exist-core/src/main/java/org/exist/jetty/WebAppContext.java @@ -27,17 +27,17 @@ * @author Dmitriy Shabanov * */ -public class WebAppContext extends org.eclipse.jetty.webapp.WebAppContext { - +public class WebAppContext extends org.eclipse.jetty.ee10.webapp.WebAppContext { + @Override public String toString() { return "eXist-db Open Source Native XML Database"; } - + @Override protected void doStop() throws Exception { super.doStop(); - + BrokerPool.stopAll(true); } diff --git a/exist-core/src/main/java/org/exist/repo/ExistPkgInfo.java b/exist-core/src/main/java/org/exist/repo/ExistPkgInfo.java index 84806e3a4e6..cd9e00dc45c 100644 --- a/exist-core/src/main/java/org/exist/repo/ExistPkgInfo.java +++ b/exist-core/src/main/java/org/exist/repo/ExistPkgInfo.java @@ -69,6 +69,10 @@ public Set getJavaModules() { return myJava.keySet(); } + public Set getXQueryModules() { + return myXquery.keySet(); + } + public void addJar(String jar) { myJars.add(jar); } diff --git a/exist-core/src/main/java/org/exist/repo/ExistRepository.java b/exist-core/src/main/java/org/exist/repo/ExistRepository.java index 6c6ec6fee9b..354a2278634 100644 --- a/exist-core/src/main/java/org/exist/repo/ExistRepository.java +++ b/exist-core/src/main/java/org/exist/repo/ExistRepository.java @@ -334,6 +334,48 @@ public List getJavaModules() { return modules; } + public List getXQueryModules() { + final List modules = new ArrayList<>(); + for (final Packages pp : myParent.listPackages()) { + final Package pkg = pp.latest(); + // 1. XQuery modules declared in exist.xml + final ExistPkgInfo info = (ExistPkgInfo) pkg.getInfo("exist"); + if (info != null) { + modules.addAll(info.getXQueryModules()); + } + // 2. XQuery modules declared in expath-pkg.xml (standard EXPath components) + final FileSystemResolver resolver = (FileSystemResolver) pkg.getResolver(); + final Path pkgDescriptor = resolver.resolveResourceAsFile("expath-pkg.xml"); + if (pkgDescriptor != null && Files.exists(pkgDescriptor)) { + try { + final javax.xml.parsers.DocumentBuilderFactory dbf = javax.xml.parsers.DocumentBuilderFactory.newInstance(); + dbf.setNamespaceAware(true); + final Document doc = dbf.newDocumentBuilder().parse(pkgDescriptor.toFile()); + final org.w3c.dom.NodeList xqueryElements = doc.getElementsByTagNameNS( + "http://expath.org/ns/pkg", "xquery"); + for (int i = 0; i < xqueryElements.getLength(); i++) { + final org.w3c.dom.Element xquery = (org.w3c.dom.Element) xqueryElements.item(i); + final org.w3c.dom.NodeList nsElements = xquery.getElementsByTagNameNS( + "http://expath.org/ns/pkg", "namespace"); + for (int j = 0; j < nsElements.getLength(); j++) { + final String ns = nsElements.item(j).getTextContent().trim(); + if (!ns.isEmpty()) { + try { + modules.add(new URI(ns)); + } catch (final URISyntaxException e) { + LOG.debug("Invalid namespace URI in expath-pkg.xml: {}", ns); + } + } + } + } + } catch (final Exception e) { + LOG.debug("Error parsing expath-pkg.xml for package {}: {}", pkg.getName(), e.getMessage()); + } + } + } + return modules; + } + public static Path getRepositoryDir(final Configuration config) throws IOException { final Path dataDir = Optional.ofNullable((Path) config.getProperty(BrokerPool.PROPERTY_DATA_DIR)) .orElse(Paths.get(NativeBroker.DEFAULT_DATA_DIR)); diff --git a/exist-core/src/main/java/org/exist/source/AbstractSource.java b/exist-core/src/main/java/org/exist/source/AbstractSource.java index 24bbdf9ebbb..aa90293a5aa 100644 --- a/exist-core/src/main/java/org/exist/source/AbstractSource.java +++ b/exist-core/src/main/java/org/exist/source/AbstractSource.java @@ -89,6 +89,9 @@ public QName isModule() throws IOException { * @param is the input stream * @return The guessed encoding. */ + // TODO(rd-parser): DeclScanner is a lightweight ANTLR 2 pre-scanner that extracts + // version/encoding declarations without full parsing. The rd parser may need an + // equivalent lightweight method (e.g., XQueryParser.scanVersionDecl). protected static String guessXQueryEncoding(final InputStream is) { final XQueryLexer lexer = new XQueryLexer(null, new InputStreamReader(is)); final DeclScanner scanner = new DeclScanner(lexer); diff --git a/exist-core/src/main/java/org/exist/storage/serializers/EXistOutputKeys.java b/exist-core/src/main/java/org/exist/storage/serializers/EXistOutputKeys.java index ca85a06f5fe..f2dbb185acb 100644 --- a/exist-core/src/main/java/org/exist/storage/serializers/EXistOutputKeys.java +++ b/exist-core/src/main/java/org/exist/storage/serializers/EXistOutputKeys.java @@ -28,6 +28,18 @@ public class EXistOutputKeys { */ public static final String ITEM_SEPARATOR = "item-separator"; + // --- QT4 Serialization 4.0 parameters --- + public static final String CANONICAL = "canonical"; + public static final String ESCAPE_SOLIDUS = "escape-solidus"; + public static final String JSON_LINES = "json-lines"; + + // --- CSV serialization parameters --- + public static final String CSV_FIELD_DELIMITER = "csv.field-delimiter"; + public static final String CSV_ROW_DELIMITER = "csv.row-delimiter"; + public static final String CSV_QUOTE_CHARACTER = "csv.quote-character"; + public static final String CSV_HEADER = "csv.header"; + public static final String CSV_QUOTES = "csv.quotes"; + public static final String OMIT_ORIGINAL_XML_DECLARATION = "omit-original-xml-declaration"; public static final String OUTPUT_DOCTYPE = "output-doctype"; diff --git a/exist-core/src/main/java/org/exist/storage/serializers/Serializer.java b/exist-core/src/main/java/org/exist/storage/serializers/Serializer.java index e2f2166443b..27b5aaec44e 100644 --- a/exist-core/src/main/java/org/exist/storage/serializers/Serializer.java +++ b/exist-core/src/main/java/org/exist/storage/serializers/Serializer.java @@ -825,7 +825,8 @@ public void setStylesheet(final Document doc, final @Nullable String stylesheet) // restore handlers receiver = oldReceiver; - factory.get().setURIResolver(null); + // Saxon 12 rejects null URIResolver; reset to default identity resolver + factory.get().setURIResolver((href, base) -> null); } LOG.debug("compiling stylesheet took {}", System.currentTimeMillis() - start); if (templates != null) { diff --git a/exist-core/src/main/java/org/exist/storage/serializers/XIncludeFilter.java b/exist-core/src/main/java/org/exist/storage/serializers/XIncludeFilter.java index 36ec6696c29..3de7672ad8a 100644 --- a/exist-core/src/main/java/org/exist/storage/serializers/XIncludeFilter.java +++ b/exist-core/src/main/java/org/exist/storage/serializers/XIncludeFilter.java @@ -88,6 +88,8 @@ public class XIncludeFilter implements Receiver { private static final QName HREF_ATTRIB = new QName("href", XMLConstants.NULL_NS_URI); private static final QName XPOINTER_ATTRIB = new QName("xpointer", XMLConstants.NULL_NS_URI); + private static final QName PARSE_ATTRIB = new QName("parse", XMLConstants.NULL_NS_URI); + private static final QName ENCODING_ATTRIB = new QName("encoding", XMLConstants.NULL_NS_URI); private static final String XI_INCLUDE = "include"; private static final String XI_FALLBACK = "fallback"; @@ -112,6 +114,9 @@ private ResourceError(final String message) { private @Nullable String moduleLoadPath = null; private @Nullable Map namespaces = null; private boolean inFallback = false; + private int inIncludeDepth = 0; // depth of non-XInclude elements inside xi:include (for suppressing non-fallback children) + private boolean suppressIncludeChildren = false; // true while processing xi:include's own children (not expanded content) + private int fallbackCount = 0; // count of xi:fallback children in current xi:include private @Nullable ResourceError error = null; public XIncludeFilter(final Serializer serializer, @Nullable final Receiver receiver) { @@ -132,6 +137,9 @@ public void reset() { this.moduleLoadPath = null; this.namespaces = null; this.inFallback = false; + this.inIncludeDepth = 0; + this.suppressIncludeChildren = false; + this.fallbackCount = 0; this.error = null; } @@ -155,6 +163,9 @@ public void setModuleLoadPath(final String path) { @Override public void characters(final CharSequence seq) throws SAXException { + if (suppressIncludeChildren && !inFallback) { + return; // suppress non-fallback content inside xi:include + } if (!inFallback || error != null) { receiver.characters(seq); } @@ -162,6 +173,9 @@ public void characters(final CharSequence seq) throws SAXException { @Override public void comment(final char[] ch, final int start, final int length) throws SAXException { + if (suppressIncludeChildren && !inFallback) { + return; // suppress non-fallback content inside xi:include + } if (!inFallback || error != null) { receiver.comment(ch, start, length); } @@ -179,13 +193,19 @@ public void endElement(final QName qname) throws SAXException { inFallback = false; // clear error error = null; - } else if (XI_INCLUDE.equals(qname.getLocalPart()) && error != null) { - // found an error, but there was no fallback element. - // throw the exception now - final SAXException e = error.cause.map(cause -> new SAXException(error.message, cause)).orElse(new SAXException(error.message)); - error = null; - throw e; + } else if (XI_INCLUDE.equals(qname.getLocalPart())) { + inIncludeDepth--; + suppressIncludeChildren = (inIncludeDepth > 0); // restore suppression if nested + if (error != null) { + // found an error, but there was no fallback element. + // throw the exception now + final SAXException e = error.cause.map(cause -> new SAXException(error.message, cause)).orElse(new SAXException(error.message)); + error = null; + throw e; + } } + } else if (suppressIncludeChildren && !inFallback) { + // Inside xi:include but not in fallback — suppress non-fallback children per spec } else if (!inFallback || error != null) { receiver.endElement(qname); } @@ -237,8 +257,28 @@ public void startElement(final QName qname, final AttrList attribs) throws SAXEx if (LOG.isDebugEnabled()) { LOG.debug("processing include ..."); } + inIncludeDepth++; + + // Validate parse attribute (per spec 4.1: must be "xml" or "text") + final String parseMode = attribs.getValue(PARSE_ATTRIB); + if (parseMode != null && !"xml".equals(parseMode) && !"text".equals(parseMode)) { + throw new SAXException("Invalid value for parse attribute: '" + parseMode + + "'. Must be 'xml' or 'text'."); + } + + // processXInclude will serialize included content through this filter; + // suppress only the xi:include's own children (not the expanded content) + final boolean prevSuppress = suppressIncludeChildren; + suppressIncludeChildren = false; // allow expanded content through - final Optional maybeResourceError = processXInclude(attribs.getValue(HREF_ATTRIB), attribs.getValue(XPOINTER_ATTRIB)); + final String encoding = attribs.getValue(ENCODING_ATTRIB); + final Optional maybeResourceError = processXInclude( + attribs.getValue(HREF_ATTRIB), attribs.getValue(XPOINTER_ATTRIB), + parseMode, encoding); + + // After processXInclude returns, any remaining SAX events until + // are the xi:include's own children — suppress them (except fallback) + suppressIncludeChildren = true; if (maybeResourceError.isPresent()) { final ResourceError resourceError = maybeResourceError.get(); @@ -250,6 +290,8 @@ public void startElement(final QName qname, final AttrList attribs) throws SAXEx } else if (qname.getLocalPart().equals(XI_FALLBACK)) { inFallback = true; } + } else if (suppressIncludeChildren && !inFallback) { + // Inside xi:include but not in fallback — suppress non-fallback children per spec } else if (!inFallback || error != null) { //LOG.debug("start: " + qName); receiver.startElement(qname, attribs); @@ -270,12 +312,24 @@ public void highlightText(final CharSequence seq) { /** * @param href The resource to be xincluded * @param xpointer The xpointer + * @param parseMode The parse mode ("xml" or "text"), null defaults to "xml" + * @param encoding The encoding for text inclusion, null defaults to UTF-8 * @return Optionally a ResourceError if it was not possible to retrieve the resource * to be xincluded * @throws SAXException If a SAX processing error occurs */ - protected Optional processXInclude(final String href, String xpointer) throws SAXException { - if (href == null) { + protected Optional processXInclude(final String href, String xpointer, + @Nullable final String parseMode, + @Nullable final String encoding) throws SAXException { + if (href == null && xpointer == null) { + throw new SAXException("No href or xpointer attribute found in XInclude include element"); + } + + // Intra-document reference: xpointer without href (or href="") + if (href == null || href.isEmpty()) { + if (xpointer != null) { + return processIntraDocumentXPointer(xpointer); + } throw new SAXException("No href attribute found in XInclude include element"); } // save some settings @@ -400,6 +454,20 @@ protected Optional processXInclude(final String href, String xpoi return Optional.of(new ResourceError("document " + docUri + " not found")); } + // Handle parse="text" — include resource as text, not XML + if ("text".equals(parseMode)) { + final String textContent = readResourceAsText(doc, memtreeDoc, docUri, href, encoding); + if (textContent != null) { + characters(textContent); + } else { + return Optional.of(new ResourceError("Unable to read text content from " + (docUri != null ? docUri : href))); + } + // restore settings and return + document = prevDoc; + serializer.createContainerElements = createContainerElements; + return Optional.empty(); + } + if (xpointer == null && !xqueryDoc) { // no xpointer found - just serialize the doc if (memtreeDoc == null) { @@ -412,11 +480,28 @@ protected Optional processXInclude(final String href, String xpoi Source source = null; final XQueryPool pool = serializer.broker.getBrokerPool().getXQueryPool(); CompiledXQuery compiled = null; + boolean wasElementScheme = false; try { if (xpointer == null) { source = new DBSource(serializer.broker.getBrokerPool(), (BinaryDocument) doc, true); } else { + wasElementScheme = xpointer.trim().startsWith("element("); + xpointer = convertXPointerToXPath(xpointer); xpointer = checkNamespaces(xpointer); + // element() scheme produces XPath — needs doc() context + // and must be compiled as regular XQuery, not xpointer mode + if (wasElementScheme) { + final XmldbURI contextDocUri = doc != null ? doc.getURI() : docUri; + if (contextDocUri != null) { + if (xpointer.startsWith("/")) { + // Child sequence: /1/2 -> doc('...')/*[1]/*[2] + xpointer = "doc('" + contextDocUri + "')" + xpointer; + } else if (xpointer.startsWith("id(")) { + // ID-based: id('x') -> doc('...')/id('x') + xpointer = "doc('" + contextDocUri + "')/" + xpointer; + } + } + } source = new StringSource(xpointer); } final XQuery xquery = serializer.broker.getBrokerPool().getXQueryService(); @@ -461,7 +546,10 @@ protected Optional processXInclude(final String href, String xpoi if (compiled == null) { try { - compiled = xquery.compile(context, source, xpointer != null); + // element() scheme expressions are converted to regular XQuery + // (doc('...')/*[1]) and must not use xpointer compilation mode + final boolean useXPointerMode = xpointer != null && !wasElementScheme; + compiled = xquery.compile(context, source, useXPointerMode); } catch (final IOException e) { throw new SAXException("I/O error while reading query for xinclude: " + e.getMessage(), e); } @@ -500,8 +588,12 @@ protected Optional processXInclude(final String href, String xpoi } } catch (final XPathException | PermissionDeniedException e) { + // XPointer evaluation failures are resource errors per XInclude spec 4.2, + // not fatal errors. Return as ResourceError to allow fallback processing. LOG.warn("xpointer error", e); - throw new SAXException("Error while processing XInclude expression: " + e.getMessage(), e); + document = prevDoc; + serializer.createContainerElements = createContainerElements; + return Optional.of(new ResourceError("Error while processing XInclude expression: " + e.getMessage(), e)); } finally { if (compiled != null) { pool.returnCompiledXQuery(source, compiled); @@ -515,6 +607,251 @@ protected Optional processXInclude(final String href, String xpoi return Optional.empty(); } + /** + * Handle intra-document XPointer references (xpointer without href). + * Per XInclude spec, when href is absent or empty, the xpointer is evaluated + * against the current document. + */ + private Optional processIntraDocumentXPointer(String xpointer) throws SAXException { + if (document == null) { + return Optional.of(new ResourceError("No current document for intra-document XPointer reference")); + } + + // Convert element() scheme to XPath if needed + xpointer = convertXPointerToXPath(xpointer); + + // For absolute XPath expressions (from element() scheme), wrap with doc() + // to ensure the document context is properly set + final String docUri = document.getURI().toString(); + if (xpointer.startsWith("/")) { + xpointer = "doc('" + docUri + "')" + xpointer; + } else if (xpointer.startsWith("id(")) { + // id() needs the document context — wrap: doc('...')/id('...') + xpointer = "doc('" + docUri + "')/" + xpointer; + } + + final XQueryPool pool = serializer.broker.getBrokerPool().getXQueryPool(); + CompiledXQuery compiled = null; + Source source = null; + try { + xpointer = checkNamespaces(xpointer); + source = new StringSource(xpointer); + final XQuery xquery = serializer.broker.getBrokerPool().getXQueryService(); + XQueryContext context; + compiled = pool.borrowCompiledXQuery(serializer.broker, source); + if (compiled == null) { + context = new XQueryContext(serializer.broker.getBrokerPool()); + } else { + context = compiled.getContext(); + context.prepareForReuse(); + } + if (namespaces != null) { + context.declareNamespaces(namespaces); + } + context.declareNamespace("xinclude", Namespaces.XINCLUDE_NS); + // Set the current document as the statically known document + context.setStaticallyKnownDocuments(new XmldbURI[]{document.getURI()}); + + if (compiled == null) { + compiled = xquery.compile(context, source, true); + } else { + compiled.getContext().updateContext(context); + context.getWatchDog().reset(); + } + + try { + final Sequence seq = xquery.execute(serializer.broker, compiled, null); + if (Type.subTypeOf(seq.getItemType(), Type.NODE)) { + NodeValue node; + for (final SequenceIterator i = seq.iterate(); i.hasNext(); ) { + node = (NodeValue) i.nextItem(); + serializer.serializeToReceiver(node, false); + } + } else { + for (int i = 0; i < seq.getItemCount(); i++) { + characters(seq.itemAt(i).getStringValue()); + } + } + } finally { + context.runCleanupTasks(); + } + return Optional.empty(); + } catch (final XPathException | PermissionDeniedException e) { + LOG.warn("intra-document xpointer error", e); + throw new SAXException("Error while processing intra-document XPointer: " + e.getMessage(), e); + } catch (final IOException e) { + throw new SAXException("I/O error while reading intra-document XPointer query: " + e.getMessage(), e); + } finally { + if (compiled != null) { + pool.returnCompiledXQuery(source, compiled); + } + } + } + + /** + * Convert XPointer element() scheme to XPath expressions. + * The xpointer() scheme is handled natively by the ANTLR parser's xpointer() rule, + * so we leave it as-is and only convert element() scheme pointers. + * + * Handles: + * element(/1) -> /node()[1] + * element(/1/2/3) -> /node()[1]/node()[2]/node()[3] + * element(myid) -> id('myid') + * element(myid/2/3) -> id('myid')/node()[2]/node()[3] + * xpointer(expr) -> xpointer(expr) (left for ANTLR parser) + * xmlns(...)element() -> strips xmlns(), converts element() + */ + private static String convertXPointerToXPath(String xpointer) { + xpointer = xpointer.trim(); + + // xpointer() scheme — leave as-is; the ANTLR parser's xpointer() rule handles it + if (xpointer.startsWith("xpointer(")) { + return xpointer; + } + + // Handle element() scheme + if (xpointer.startsWith("element(") && xpointer.endsWith(")")) { + final String content = xpointer.substring(8, xpointer.length() - 1).trim(); + return convertElementSchemeToXPath(content); + } + + // Handle multiple schemes: xmlns(...)element(...) + // Strip xmlns() schemes first (handled by checkNamespaces), then look for element() + if (xpointer.contains("element(")) { + int idx = 0; + while (idx < xpointer.length()) { + if (xpointer.startsWith("xmlns(", idx)) { + final int close = xpointer.indexOf(')', idx); + if (close > 0) { + idx = close + 1; + continue; + } + } + break; + } + if (idx > 0 && idx < xpointer.length()) { + return convertXPointerToXPath(xpointer.substring(idx)); + } + } + + return xpointer; + } + + /** + * Convert element() scheme content to XPath. + * Per XPointer element() scheme spec, child sequences use 1-based + * element positions (not node positions), so we use *[N] not node()[N]. + */ + private static String convertElementSchemeToXPath(final String content) { + if (content.startsWith("/")) { + // Child sequence: /1/2/3 -> /*[1]/*[2]/*[3] + final String[] parts = content.substring(1).split("/"); + final StringBuilder xpath = new StringBuilder(); + for (final String part : parts) { + xpath.append("/*[").append(part.trim()).append("]"); + } + return xpath.toString(); + } else if (content.contains("/")) { + // ID + child sequence: myid/2/3 -> id('myid')/*[2]/*[3] + final String[] parts = content.split("/"); + final StringBuilder xpath = new StringBuilder("id('").append(parts[0].trim()).append("')"); + for (int i = 1; i < parts.length; i++) { + xpath.append("/*[").append(parts[i].trim()).append("]"); + } + return xpath.toString(); + } else { + // Just an ID: myid -> id('myid') + return "id('" + content.trim() + "')"; + } + } + + /** + * Read a resource as text for parse="text" inclusion. + * + *

Per the XInclude spec, when parse="text", the resource is read as plain text + * and included as character data. XML special characters in the included text are + * preserved as-is (they will be escaped during serialization).

+ * + *

Architectural note: BaseX delegates XInclude entirely to Java's built-in + * SAXParserFactory.setXIncludeAware(true), which handles parse="text" at document + * import time. eXist's approach (serialization-time XIncludeFilter) is more powerful + * (works on stored documents) but requires implementing each XInclude feature + * explicitly. A complementary parse-time XInclude option (like BaseX) could be + * added as a future enhancement.

+ */ + private @Nullable String readResourceAsText(@Nullable final DocumentImpl doc, + @Nullable final org.exist.dom.memtree.DocumentImpl memtreeDoc, + @Nullable final XmldbURI docUri, + final String href, + @Nullable final String encoding) { + final java.nio.charset.Charset charset; + try { + charset = encoding != null ? java.nio.charset.Charset.forName(encoding) : UTF_8; + } catch (final java.nio.charset.UnsupportedCharsetException e) { + LOG.warn("Unsupported encoding '{}' for text inclusion, falling back to UTF-8", encoding); + return readResourceAsText(doc, memtreeDoc, docUri, href, null); + } + + // Case 1: Binary document in database — read raw bytes + if (doc != null && doc.getResourceType() == DocumentImpl.BINARY_FILE) { + try (final InputStream is = serializer.broker.getBinaryResource((BinaryDocument) doc)) { + return new String(is.readAllBytes(), charset); + } catch (final IOException e) { + LOG.warn("Error reading binary resource as text: {}", docUri, e); + return null; + } + } + + // Case 2: XML document in database — serialize to string (text representation) + if (doc != null) { + // For XML documents with parse="text", we serialize the document to its + // XML text representation and include that as character data. + // Per XInclude spec, the XML declaration is NOT part of the text inclusion. + try { + final Serializer tempSerializer = serializer.broker.borrowSerializer(); + try { + tempSerializer.setProperty(javax.xml.transform.OutputKeys.OMIT_XML_DECLARATION, "yes"); + tempSerializer.setProperty(javax.xml.transform.OutputKeys.INDENT, "no"); + return tempSerializer.serialize(doc); + } finally { + serializer.broker.returnSerializer(tempSerializer); + } + } catch (final Exception e) { + LOG.warn("Error serializing XML document as text: {}", docUri, e); + return null; + } + } + + // Case 3: In-memory document + if (memtreeDoc != null) { + try { + final Serializer tempSerializer = serializer.broker.borrowSerializer(); + try { + tempSerializer.setProperty(javax.xml.transform.OutputKeys.OMIT_XML_DECLARATION, "yes"); + tempSerializer.setProperty(javax.xml.transform.OutputKeys.INDENT, "no"); + return tempSerializer.serialize(memtreeDoc); + } finally { + serializer.broker.returnSerializer(tempSerializer); + } + } catch (final Exception e) { + LOG.warn("Error serializing in-memory document as text: {}", href, e); + return null; + } + } + + // Case 4: External URI — read from URL + try { + final URI externalUri = new URI(href); + final URLConnection con = externalUri.toURL().openConnection(); + try (final InputStream is = con.getInputStream()) { + return new String(is.readAllBytes(), charset); + } + } catch (final Exception e) { + LOG.warn("Error reading external resource as text: {}", href, e); + return null; + } + } + private Either parseExternal(final URI externalUri) throws ParserConfigurationException, SAXException { try { final URLConnection con = externalUri.toURL().openConnection(); diff --git a/exist-core/src/main/java/org/exist/util/Collations.java b/exist-core/src/main/java/org/exist/util/Collations.java index 2d03138a291..64183619e30 100644 --- a/exist-core/src/main/java/org/exist/util/Collations.java +++ b/exist-core/src/main/java/org/exist/util/Collations.java @@ -75,6 +75,11 @@ public class Collations { */ public final static String HTML_ASCII_CASE_INSENSITIVE_COLLATION_URI = "http://www.w3.org/2005/xpath-functions/collation/html-ascii-case-insensitive"; + /** + * The Unicode Case-Insensitive Collation as defined by XPath F&O 4.0. + */ + public final static String UNICODE_CASE_INSENSITIVE_COLLATION_URI = "http://www.w3.org/2005/xpath-functions/collation/unicode-case-insensitive"; + /** * The XQTS ASCII Case-blind Collation as defined by the XQTS 3.1. */ @@ -90,6 +95,11 @@ public class Collations { */ private final static AtomicReference htmlAsciiCaseInsensitiveCollator = new AtomicReference<>(); + /** + * Lazy-initialized singleton Unicode Case Insensitive Collator + */ + private final static AtomicReference unicodeCaseInsensitiveCollator = new AtomicReference<>(); + /** * Lazy-initialized singleton XQTS Case Blind Collator */ @@ -276,6 +286,12 @@ public class Collations { } catch (final Exception e) { throw new XPathException(expression, "Unable to instantiate HTML ASCII Case Insensitive Collator: " + e.getMessage(), e); } + } else if(UNICODE_CASE_INSENSITIVE_COLLATION_URI.equals(uri)) { + try { + collator = getUnicodeCaseInsensitiveCollator(); + } catch (final Exception e) { + throw new XPathException(expression, "Unable to instantiate Unicode Case Insensitive Collator: " + e.getMessage(), e); + } } else if(XQTS_ASCII_CASE_BLIND_COLLATION_URI.equals(uri)) { try { collator = getXqtsAsciiCaseBlindCollator(); @@ -344,14 +360,43 @@ public static boolean equals(@Nullable final Collator collator, final String s1, * * @throws UnsupportedOperationException if ICU4J does not support collation */ - public static int compare(@Nullable final Collator collator, final String s1,final String s2) { + public static int compare(@Nullable final Collator collator, final String s1, final String s2) { if (collator == null) { - return s1 == null ? (s2 == null ? 0 : -1) : s1.compareTo(s2); + if (s1 == null) { + return s2 == null ? 0 : -1; + } + return compareByCodepoint(s1, s2); } else { return collator.compare(s1, s2); } } + /** + * Compares two strings by Unicode codepoints rather than UTF-16 code units. + * {@link String#compareTo(String)} compares {@code char} (UTF-16) values, which gives + * incorrect ordering for supplementary characters (U+10000 and above) that are encoded + * as surrogate pairs. + * + * @param a the first string to compare. + * @param b the second string to compare. + * @return a negative integer, zero, or a positive integer if {@code a} is less than, + * equal to, or greater than {@code b} by codepoint order. + */ + private static int compareByCodepoint(final String a, final String b) { + int i1 = 0, i2 = 0; + while (i1 < a.length() && i2 < b.length()) { + final int cp1 = a.codePointAt(i1); + final int cp2 = b.codePointAt(i2); + if (cp1 != cp2) { + return cp1 - cp2; + } + i1 += Character.charCount(cp1); + i2 += Character.charCount(cp2); + } + // Shorter string is less; equal length means equal + return (a.length() - i1) - (b.length() - i2); + } + /** * Determines if one string starts with another with regards to a Collation. * @@ -371,10 +416,16 @@ public static boolean startsWith(@Nullable final Collator collator, final String return true; } else if (s1.isEmpty()) { return false; - } else { + } else if (collator instanceof RuleBasedCollator rbc) { final SearchIterator searchIterator = - new StringSearch(s2, new StringCharacterIterator(s1), (RuleBasedCollator) collator); + new StringSearch(s2, new StringCharacterIterator(s1), rbc); return searchIterator.first() == 0; + } else { + // Fallback for non-RuleBasedCollator (e.g., HtmlAsciiCaseInsensitiveCollator) + if (s1.length() >= s2.length()) { + return collator.compare(s1.substring(0, s2.length()), s2) == 0; + } + return false; } } } @@ -398,9 +449,9 @@ public static boolean endsWith(@Nullable final Collator collator, final String s return true; } else if (s1.isEmpty()) { return false; - } else { + } else if (collator instanceof RuleBasedCollator rbc) { final SearchIterator searchIterator = - new StringSearch(s2, new StringCharacterIterator(s1), (RuleBasedCollator) collator); + new StringSearch(s2, new StringCharacterIterator(s1), rbc); int lastPos = SearchIterator.DONE; int lastLen = 0; for (int pos = searchIterator.first(); pos != SearchIterator.DONE; @@ -410,6 +461,12 @@ public static boolean endsWith(@Nullable final Collator collator, final String s } return lastPos > SearchIterator.DONE && lastPos + lastLen == s1.length(); + } else { + // Fallback for non-RuleBasedCollator + if (s1.length() >= s2.length()) { + return collator.compare(s1.substring(s1.length() - s2.length()), s2) == 0; + } + return false; } } } @@ -433,10 +490,18 @@ public static boolean contains(@Nullable final Collator collator, final String s return true; } else if (s1.isEmpty()) { return false; - } else { + } else if (collator instanceof RuleBasedCollator rbc) { final SearchIterator searchIterator = - new StringSearch(s2, new StringCharacterIterator(s1), (RuleBasedCollator) collator); + new StringSearch(s2, new StringCharacterIterator(s1), rbc); return searchIterator.first() >= 0; + } else { + // Fallback for non-RuleBasedCollator + for (int i = 0; i <= s1.length() - s2.length(); i++) { + if (collator.compare(s1.substring(i, i + s2.length()), s2) == 0) { + return true; + } + } + return false; } } } @@ -459,10 +524,18 @@ public static int indexOf(@Nullable final Collator collator, final String s1, fi return 0; } else if (s1.isEmpty()) { return -1; - } else { + } else if (collator instanceof RuleBasedCollator rbc) { final SearchIterator searchIterator = - new StringSearch(s2, new StringCharacterIterator(s1), (RuleBasedCollator) collator); + new StringSearch(s2, new StringCharacterIterator(s1), rbc); return searchIterator.first(); + } else { + // Fallback for non-RuleBasedCollator + for (int i = 0; i <= s1.length() - s2.length(); i++) { + if (collator.compare(s1.substring(i, i + s2.length()), s2) == 0) { + return i; + } + } + return -1; } } } @@ -809,21 +882,119 @@ private static Collator getSamiskCollator() throws Exception { return collator; } - private static Collator getHtmlAsciiCaseInsensitiveCollator() throws Exception { + private static Collator getHtmlAsciiCaseInsensitiveCollator() { Collator collator = htmlAsciiCaseInsensitiveCollator.get(); if (collator == null) { - collator = new RuleBasedCollator("&a=A &b=B &c=C &d=D &e=E &f=F &g=G &h=H " - + "&i=I &j=J &k=K &l=L &m=M &n=N &o=O &p=P &q=Q &r=R &s=S &t=T " - + "&u=U &v=V &w=W &x=X &y=Y &z=Z"); - collator.setStrength(Collator.PRIMARY); + // XQ4 html-ascii-case-insensitive: ASCII letters A-Z fold to a-z, + // all other characters compare by Unicode codepoint order. + // Cannot use RuleBasedCollator with PRIMARY strength because that + // makes ALL case/accent differences irrelevant, not just ASCII. htmlAsciiCaseInsensitiveCollator.compareAndSet(null, - collator.freeze()); + new HtmlAsciiCaseInsensitiveCollator()); collator = htmlAsciiCaseInsensitiveCollator.get(); } return collator; } + private static Collator getUnicodeCaseInsensitiveCollator() { + Collator collator = unicodeCaseInsensitiveCollator.get(); + if (collator == null) { + // Unicode case-insensitive: UCA with SECONDARY strength + // ignores case differences but respects accents and other distinctions + final Collator uca = Collator.getInstance(); + uca.setStrength(Collator.SECONDARY); + unicodeCaseInsensitiveCollator.compareAndSet(null, uca); + collator = unicodeCaseInsensitiveCollator.get(); + } + + return collator; + } + + /** + * Custom Collator for HTML ASCII case-insensitive comparison. + * Folds only ASCII letters A-Z to a-z, then compares by Unicode codepoint. + * Non-ASCII characters are compared by their codepoint value without folding. + */ + private static final class HtmlAsciiCaseInsensitiveCollator extends Collator { + + @Override + public int compare(final String source, final String target) { + int i1 = 0, i2 = 0; + while (i1 < source.length() && i2 < target.length()) { + int cp1 = source.codePointAt(i1); + int cp2 = target.codePointAt(i2); + // Fold ASCII uppercase to lowercase only + if (cp1 >= 'A' && cp1 <= 'Z') { + cp1 += 32; + } + if (cp2 >= 'A' && cp2 <= 'Z') { + cp2 += 32; + } + if (cp1 != cp2) { + return cp1 - cp2; + } + i1 += Character.charCount(cp1); + i2 += Character.charCount(cp2); + } + return (source.length() - i1) - (target.length() - i2); + } + + @Override + public CollationKey getCollationKey(final String source) { + throw new UnsupportedOperationException("CollationKey not supported for HTML ASCII case-insensitive collation"); + } + + @Override + public RawCollationKey getRawCollationKey(final String source, final RawCollationKey key) { + throw new UnsupportedOperationException("RawCollationKey not supported for HTML ASCII case-insensitive collation"); + } + + @Override + public int setVariableTop(final String varTop) { + return 0; + } + + @Override + public int getVariableTop() { + return 0; + } + + @Override + public void setVariableTop(final int varTop) { + } + + @Override + public VersionInfo getVersion() { + return VersionInfo.getInstance(1); + } + + @Override + public VersionInfo getUCAVersion() { + return VersionInfo.getInstance(1); + } + + @Override + public int hashCode() { + return HtmlAsciiCaseInsensitiveCollator.class.hashCode(); + } + + @Override + public Collator freeze() { + return this; + } + + @Override + public boolean isFrozen() { + return true; + } + + @Override + public Collator cloneAsThawed() { + return new HtmlAsciiCaseInsensitiveCollator(); + } + } + private static Collator getXqtsAsciiCaseBlindCollator() throws Exception { Collator collator = xqtsAsciiCaseBlindCollator.get(); if (collator == null) { diff --git a/exist-core/src/main/java/org/exist/util/XMLBackwardsCompatHandler.java b/exist-core/src/main/java/org/exist/util/XMLBackwardsCompatHandler.java new file mode 100644 index 00000000000..47e364d09cb --- /dev/null +++ b/exist-core/src/main/java/org/exist/util/XMLBackwardsCompatHandler.java @@ -0,0 +1,106 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.util; + +import org.xml.sax.Attributes; +import org.xml.sax.ContentHandler; +import org.xml.sax.Locator; +import org.xml.sax.SAXException; + +/** + * A SAX ContentHandler wrapper that suppresses duplicate startDocument/endDocument calls. + * Saxon 12's LinkedTreeBuilder does not tolerate receiving startDocument more than once, + * which can happen when eXist's Serializer sends document events that overlap with + * explicitly-called startDocument/endDocument in the XSLT compilation pipeline. + */ +public class XMLBackwardsCompatHandler implements ContentHandler { + + private final ContentHandler delegate; + private boolean documentStarted = false; + + public XMLBackwardsCompatHandler(final ContentHandler delegate) { + this.delegate = delegate; + } + + @Override + public void startDocument() throws SAXException { + if (!documentStarted) { + documentStarted = true; + delegate.startDocument(); + } + } + + @Override + public void endDocument() throws SAXException { + // Suppress — the caller will call endDocument on the delegate directly + } + + @Override + public void setDocumentLocator(final Locator locator) { + delegate.setDocumentLocator(locator); + } + + @Override + public void startPrefixMapping(final String prefix, final String uri) throws SAXException { + // Saxon 12 rejects any namespace declaration involving the XML namespace URI + // (http://www.w3.org/XML/1998/namespace) — the xml prefix is always implicitly bound + if ("xml".equals(prefix) || javax.xml.XMLConstants.XML_NS_URI.equals(uri)) { + return; + } + delegate.startPrefixMapping(prefix, uri); + } + + @Override + public void endPrefixMapping(final String prefix) throws SAXException { + delegate.endPrefixMapping(prefix); + } + + @Override + public void startElement(final String uri, final String localName, final String qName, final Attributes atts) throws SAXException { + delegate.startElement(uri, localName, qName, atts); + } + + @Override + public void endElement(final String uri, final String localName, final String qName) throws SAXException { + delegate.endElement(uri, localName, qName); + } + + @Override + public void characters(final char[] ch, final int start, final int length) throws SAXException { + delegate.characters(ch, start, length); + } + + @Override + public void ignorableWhitespace(final char[] ch, final int start, final int length) throws SAXException { + delegate.ignorableWhitespace(ch, start, length); + } + + @Override + public void processingInstruction(final String target, final String data) throws SAXException { + delegate.processingInstruction(target, data); + } + + @Override + public void skippedEntity(final String name) throws SAXException { + delegate.skippedEntity(name); + } +} diff --git a/exist-core/src/main/java/org/exist/util/serializer/AbstractSerializer.java b/exist-core/src/main/java/org/exist/util/serializer/AbstractSerializer.java index 758ccee130a..a1b7c9890b3 100644 --- a/exist-core/src/main/java/org/exist/util/serializer/AbstractSerializer.java +++ b/exist-core/src/main/java/org/exist/util/serializer/AbstractSerializer.java @@ -81,13 +81,27 @@ protected SerializerWriter getDefaultWriter() { public void setOutput(Writer writer, Properties properties) { outputProperties = Objects.requireNonNullElseGet(properties, () -> new Properties(defaultProperties)); final String method = outputProperties.getProperty(OutputKeys.METHOD, "xml"); - final String htmlVersionProp = outputProperties.getProperty(EXistOutputKeys.HTML_VERSION, "1.0"); - + // For html/xhtml methods, determine HTML version: + // 1. Use html-version if explicitly set + // 2. Otherwise use version (W3C spec: version controls HTML version for html method) + // 3. Default to 5.0 double htmlVersion; - try { - htmlVersion = Double.parseDouble(htmlVersionProp); - } catch (NumberFormatException e) { - htmlVersion = 1.0; + final String explicitHtmlVersion = outputProperties.getProperty(EXistOutputKeys.HTML_VERSION); + if (explicitHtmlVersion != null) { + try { + htmlVersion = Double.parseDouble(explicitHtmlVersion); + } catch (NumberFormatException e) { + htmlVersion = 5.0; + } + } else if (("html".equalsIgnoreCase(method) || "xhtml".equalsIgnoreCase(method)) + && outputProperties.getProperty(OutputKeys.VERSION) != null) { + try { + htmlVersion = Double.parseDouble(outputProperties.getProperty(OutputKeys.VERSION)); + } catch (NumberFormatException e) { + htmlVersion = 5.0; + } + } else { + htmlVersion = 5.0; } final SerializerWriter baseSerializerWriter = getBaseSerializerWriter(method, htmlVersion); diff --git a/exist-core/src/main/java/org/exist/util/serializer/AdaptiveWriter.java b/exist-core/src/main/java/org/exist/util/serializer/AdaptiveWriter.java index 22ab6dfca23..59fc8af3dfb 100644 --- a/exist-core/src/main/java/org/exist/util/serializer/AdaptiveWriter.java +++ b/exist-core/src/main/java/org/exist/util/serializer/AdaptiveWriter.java @@ -152,6 +152,17 @@ public void write(final Sequence sequence, final String itemSep, final boolean e case Type.FUNCTION: writeFunctionItem((FunctionReference) item); break; + // XQuery 4.0 JNode types — serialize as their underlying JSON structure + case Type.JSON_NODE: + case Type.JSON_OBJECT: + case Type.JSON_ARRAY: + case Type.JSON_STRING: + case Type.JSON_NUMBER: + case Type.JSON_BOOLEAN: + case Type.JSON_NULL: + case Type.JSON_MEMBER: + writeJNode((org.exist.xquery.value.jnode.JNode) item); + break; default: writeAtomic(item.atomize()); break; @@ -190,10 +201,15 @@ private void writeAtomic(AtomicValue value) throws IOException, SAXException, XP } private void writeDouble(final DoubleValue item) throws SAXException { - final DecimalFormatSymbols symbols = DecimalFormatSymbols.getInstance(Locale.US); - symbols.setExponentSeparator("e"); - final DecimalFormat df = new DecimalFormat("0.0##########################E0", symbols); - writeText(df.format(item.getDouble())); + final double d = item.getDouble(); + if (Double.isInfinite(d) || Double.isNaN(d)) { + writeText(item.getStringValue()); + } else { + final DecimalFormatSymbols symbols = DecimalFormatSymbols.getInstance(Locale.US); + symbols.setExponentSeparator("e"); + final DecimalFormat df = new DecimalFormat("0.0##########################E0", symbols); + writeText(df.format(d)); + } } private void writeArray(final ArrayType array) throws XPathException, SAXException, TransformerException { @@ -215,9 +231,7 @@ private void writeArray(final ArrayType array) throws XPathException, SAXExcepti private void writeMap(final AbstractMapType map) throws SAXException, XPathException, TransformerException { try { - writer.write("map"); - addSpaceIfIndent(); - writer.write('{'); + writer.write("map{"); addIndent(); indent(); for (final Iterator> i = map.iterator(); i.hasNext(); ) { @@ -297,4 +311,23 @@ private void writeXML(final Item item) throws SAXException { broker.returnSerializer(serializer); } } + + /** + * Serialize a JNode in adaptive mode. + * Maps/arrays are serialized as their adaptive representation, + * leaf values as their string representation. + */ + private void writeJNode(final org.exist.xquery.value.jnode.JNode jnode) throws SAXException, XPathException, TransformerException { + final Sequence value = jnode.getValue(); + if (value instanceof AbstractMapType) { + writeMap((AbstractMapType) value); + } else if (value instanceof ArrayType) { + writeArray((ArrayType) value); + } else if (value == Sequence.EMPTY_SEQUENCE || value.isEmpty()) { + writeText("null"); + } else { + // Delegate to the normal write loop for the underlying value + write(value, ", ", false); + } + } } diff --git a/exist-core/src/main/java/org/exist/util/serializer/CSVSerializer.java b/exist-core/src/main/java/org/exist/util/serializer/CSVSerializer.java new file mode 100644 index 00000000000..98c599fc582 --- /dev/null +++ b/exist-core/src/main/java/org/exist/util/serializer/CSVSerializer.java @@ -0,0 +1,295 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.util.serializer; + +import io.lacuna.bifurcan.IEntry; +import org.exist.storage.serializers.EXistOutputKeys; +import org.exist.xquery.XPathException; +import org.exist.xquery.functions.array.ArrayType; +import org.exist.xquery.functions.map.AbstractMapType; +import org.exist.xquery.value.*; +import org.xml.sax.SAXException; + +import java.io.IOException; +import java.io.Writer; +import java.util.*; + +/** + * Serializes XDM sequences as RFC 4180 CSV output. + * + * Accepts three input formats: + *
    + *
  • Array of arrays: each inner array is a row
  • + *
  • Sequence of maps: keys become header, values become rows
  • + *
  • XML table: <csv><record><field>...</field></record></csv>
  • + *
+ */ +public class CSVSerializer { + + private final String fieldDelimiter; + private final String rowDelimiter; + private final char quoteChar; + private final boolean alwaysQuote; + private final boolean includeHeader; + + public CSVSerializer(final Properties outputProperties) { + this.fieldDelimiter = outputProperties.getProperty(EXistOutputKeys.CSV_FIELD_DELIMITER, ","); + this.rowDelimiter = outputProperties.getProperty(EXistOutputKeys.CSV_ROW_DELIMITER, "\n"); + final String qc = outputProperties.getProperty(EXistOutputKeys.CSV_QUOTE_CHARACTER, "\""); + this.quoteChar = qc.isEmpty() ? '"' : qc.charAt(0); + this.alwaysQuote = !"no".equals(outputProperties.getProperty(EXistOutputKeys.CSV_QUOTES, "yes")); + this.includeHeader = "yes".equals(outputProperties.getProperty(EXistOutputKeys.CSV_HEADER, "no")); + } + + public void serialize(final Sequence sequence, final Writer writer) throws SAXException { + try { + if (sequence.isEmpty()) { + return; + } + + final Item first = sequence.itemAt(0); + + if (first.getType() == Type.ARRAY_ITEM) { + if (sequence.hasOne()) { + // Single array: treat as array-of-arrays + serializeArrayOfArrays((ArrayType) first, writer); + } else { + // Sequence of arrays: each array is a row + serializeSequenceOfArrays(sequence, writer); + } + } else if (first.getType() == Type.MAP_ITEM) { + serializeSequenceOfMaps(sequence, writer); + } else if (Type.subTypeOf(first.getType(), Type.NODE)) { + serializeXmlTable(sequence, writer); + } else { + // Single atomic or sequence of atomics — one row + serializeAtomicSequence(sequence, writer); + } + } catch (final IOException | XPathException e) { + throw new SAXException(e.getMessage(), e); + } + } + + private void serializeArrayOfArrays(final ArrayType outerArray, final Writer writer) throws IOException, XPathException { + for (int i = 0; i < outerArray.getSize(); i++) { + final Sequence member = outerArray.get(i); + if (member.getItemCount() == 1 && member.itemAt(0).getType() == Type.ARRAY_ITEM) { + writeRow((ArrayType) member.itemAt(0), writer); + } else { + writeSequenceRow(member, writer); + } + writer.write(rowDelimiter); + } + } + + private void serializeSequenceOfArrays(final Sequence sequence, final Writer writer) throws IOException, XPathException { + for (final SequenceIterator i = sequence.iterate(); i.hasNext(); ) { + final Item item = i.nextItem(); + if (item.getType() == Type.ARRAY_ITEM) { + writeRow((ArrayType) item, writer); + } else { + writer.write(quoteField(item.getStringValue())); + } + writer.write(rowDelimiter); + } + } + + private void serializeSequenceOfMaps(final Sequence sequence, final Writer writer) throws IOException, XPathException { + // Collect all keys from first map for header + final AbstractMapType firstMap = (AbstractMapType) sequence.itemAt(0); + final List keys = new ArrayList<>(); + for (final IEntry entry : firstMap) { + keys.add(entry.key().getStringValue()); + } + Collections.sort(keys); + + // Write header + if (includeHeader) { + writeFields(keys, writer); + writer.write(rowDelimiter); + } + + // Write rows + for (final SequenceIterator i = sequence.iterate(); i.hasNext(); ) { + final Item item = i.nextItem(); + if (item.getType() == Type.MAP_ITEM) { + final AbstractMapType map = (AbstractMapType) item; + boolean first = true; + for (final String key : keys) { + if (!first) { + writer.write(fieldDelimiter); + } + final Sequence value = map.get(new StringValue(key)); + writer.write(quoteField(value.isEmpty() ? "" : value.getStringValue())); + first = false; + } + } + writer.write(rowDelimiter); + } + } + + private void serializeXmlTable(final Sequence sequence, final Writer writer) throws IOException, XPathException { + // Walk XML table: value + // or
value
+ for (final SequenceIterator i = sequence.iterate(); i.hasNext(); ) { + final Item item = i.nextItem(); + if (Type.subTypeOf(item.getType(), Type.ELEMENT)) { + final org.w3c.dom.Element elem = (org.w3c.dom.Element) ((NodeValue) item).getNode(); + serializeXmlElement(elem, writer); + } + } + } + + private void serializeXmlElement(final org.w3c.dom.Element element, final Writer writer) throws IOException { + final org.w3c.dom.NodeList children = element.getChildNodes(); + boolean hasChildElements = false; + for (int i = 0; i < children.getLength(); i++) { + if (children.item(i).getNodeType() == org.w3c.dom.Node.ELEMENT_NODE) { + hasChildElements = true; + break; + } + } + + if (!hasChildElements) { + // Leaf element — output as a field value + writer.write(quoteField(element.getTextContent())); + return; + } + + // Check if children are "record" elements (containing field elements) + // or direct field elements + boolean firstRecord = true; + for (int i = 0; i < children.getLength(); i++) { + if (children.item(i).getNodeType() == org.w3c.dom.Node.ELEMENT_NODE) { + final org.w3c.dom.Element child = (org.w3c.dom.Element) children.item(i); + final org.w3c.dom.NodeList grandchildren = child.getChildNodes(); + boolean hasGrandchildElements = false; + for (int j = 0; j < grandchildren.getLength(); j++) { + if (grandchildren.item(j).getNodeType() == org.w3c.dom.Node.ELEMENT_NODE) { + hasGrandchildElements = true; + break; + } + } + + if (hasGrandchildElements) { + // This is a record element — its children are fields + if (!firstRecord) { + // row delimiter already written + } + boolean firstField = true; + for (int j = 0; j < grandchildren.getLength(); j++) { + if (grandchildren.item(j).getNodeType() == org.w3c.dom.Node.ELEMENT_NODE) { + if (!firstField) { + writer.write(fieldDelimiter); + } + writer.write(quoteField(grandchildren.item(j).getTextContent())); + firstField = false; + } + } + writer.write(rowDelimiter); + firstRecord = false; + } else { + // Direct field element — accumulate as part of a single row + if (!firstRecord) { + writer.write(fieldDelimiter); + } + writer.write(quoteField(child.getTextContent())); + firstRecord = false; + } + } + } + } + + private void serializeAtomicSequence(final Sequence sequence, final Writer writer) throws IOException, XPathException { + boolean first = true; + for (final SequenceIterator i = sequence.iterate(); i.hasNext(); ) { + if (!first) { + writer.write(fieldDelimiter); + } + writer.write(quoteField(i.nextItem().getStringValue())); + first = false; + } + writer.write(rowDelimiter); + } + + private void writeRow(final ArrayType array, final Writer writer) throws IOException, XPathException { + for (int i = 0; i < array.getSize(); i++) { + if (i > 0) { + writer.write(fieldDelimiter); + } + final Sequence member = array.get(i); + writer.write(quoteField(member.isEmpty() ? "" : member.getStringValue())); + } + } + + private void writeSequenceRow(final Sequence sequence, final Writer writer) throws IOException, XPathException { + boolean first = true; + for (final SequenceIterator i = sequence.iterate(); i.hasNext(); ) { + if (!first) { + writer.write(fieldDelimiter); + } + writer.write(quoteField(i.nextItem().getStringValue())); + first = false; + } + } + + private void writeFields(final List fields, final Writer writer) throws IOException { + boolean first = true; + for (final String field : fields) { + if (!first) { + writer.write(fieldDelimiter); + } + writer.write(quoteField(field)); + first = false; + } + } + + /** + * Quote a field value per RFC 4180. + * If alwaysQuote is true, all fields are quoted. + * If false, only fields containing the delimiter, quote char, or newline are quoted. + * Quote characters within the value are escaped by doubling. + */ + private String quoteField(final String value) { + final boolean needsQuoting = alwaysQuote + || value.contains(fieldDelimiter) + || value.indexOf(quoteChar) >= 0 + || value.contains("\n") + || value.contains("\r"); + + if (!needsQuoting) { + return value; + } + + final StringBuilder sb = new StringBuilder(value.length() + 2); + sb.append(quoteChar); + for (int i = 0; i < value.length(); i++) { + final char c = value.charAt(i); + if (c == quoteChar) { + sb.append(quoteChar); // escape by doubling + } + sb.append(c); + } + sb.append(quoteChar); + return sb.toString(); + } +} diff --git a/exist-core/src/main/java/org/exist/util/serializer/HTML5Writer.java b/exist-core/src/main/java/org/exist/util/serializer/HTML5Writer.java index 1dffc3029b7..bc69c4304c6 100644 --- a/exist-core/src/main/java/org/exist/util/serializer/HTML5Writer.java +++ b/exist-core/src/main/java/org/exist/util/serializer/HTML5Writer.java @@ -246,6 +246,23 @@ protected void closeStartTag(boolean isEmpty) throws TransformerException { } } + @Override + public void processingInstruction(String target, String data) throws TransformerException { + try { + closeStartTag(false); + final Writer writer = getWriter(); + writer.write("'); + } catch (IOException e) { + throw new TransformerException(e.getMessage(), e); + } + } + @Override protected boolean needsEscape(char ch) { if (RAW_TEXT_ELEMENTS.contains(currentTag)) { @@ -253,4 +270,20 @@ protected boolean needsEscape(char ch) { } return super.needsEscape(ch); } + + @Override + protected boolean needsEscape(final char ch, final boolean inAttribute) { + // In raw text elements (script, style), suppress escaping for TEXT content only. + // Attribute values must always be escaped, even on raw text elements. + if (!inAttribute && RAW_TEXT_ELEMENTS.contains(currentTag)) { + return false; + } + // For attributes, always return true (bypass the 1-arg override + // which returns false for all script/style content) + if (inAttribute) { + return true; + } + return super.needsEscape(ch, inAttribute); + } + } diff --git a/exist-core/src/main/java/org/exist/util/serializer/IndentingXMLWriter.java b/exist-core/src/main/java/org/exist/util/serializer/IndentingXMLWriter.java index c336d8b2943..99df54c3e19 100644 --- a/exist-core/src/main/java/org/exist/util/serializer/IndentingXMLWriter.java +++ b/exist-core/src/main/java/org/exist/util/serializer/IndentingXMLWriter.java @@ -25,7 +25,9 @@ import java.io.Writer; import java.util.ArrayDeque; import java.util.Deque; +import java.util.HashSet; import java.util.Properties; +import java.util.Set; import javax.xml.transform.OutputKeys; import javax.xml.transform.TransformerException; @@ -48,6 +50,8 @@ public class IndentingXMLWriter extends XMLWriter { private boolean sameline = false; private boolean whitespacePreserve = false; private final Deque whitespacePreserveStack = new ArrayDeque<>(); + private Set suppressIndentation = null; + private int suppressIndentDepth = 0; public IndentingXMLWriter() { super(); @@ -75,6 +79,9 @@ public void startElement(final String namespaceURI, final String localName, fina indent(); } super.startElement(namespaceURI, localName, qname); + if (isSuppressIndentation(localName)) { + suppressIndentDepth++; + } addIndent(); afterTag = true; sameline = true; @@ -86,6 +93,9 @@ public void startElement(final QName qname) throws TransformerException { indent(); } super.startElement(qname); + if (isSuppressIndentation(qname.getLocalPart())) { + suppressIndentDepth++; + } addIndent(); afterTag = true; sameline = true; @@ -95,6 +105,9 @@ public void startElement(final QName qname) throws TransformerException { public void endElement(final String namespaceURI, final String localName, final String qname) throws TransformerException { endIndent(namespaceURI, localName); super.endElement(namespaceURI, localName, qname); + if (isSuppressIndentation(localName) && suppressIndentDepth > 0) { + suppressIndentDepth--; + } popWhitespacePreserve(); // apply ancestor's xml:space value _after_ end element sameline = isInlineTag(namespaceURI, localName); afterTag = true; @@ -104,6 +117,9 @@ public void endElement(final String namespaceURI, final String localName, final public void endElement(final QName qname) throws TransformerException { endIndent(qname.getNamespaceURI(), qname.getLocalPart()); super.endElement(qname); + if (isSuppressIndentation(qname.getLocalPart()) && suppressIndentDepth > 0) { + suppressIndentDepth--; + } popWhitespacePreserve(); // apply ancestor's xml:space value _after_ end element sameline = isInlineTag(qname.getNamespaceURI(), qname.getLocalPart()); afterTag = true; @@ -164,7 +180,29 @@ public void setOutputProperties(final Properties properties) { } catch (final NumberFormatException e) { LOG.warn("Invalid indentation value: '{}'", option); } - indent = "yes".equals(outputProperties.getProperty(OutputKeys.INDENT, "no")); + final String indentValue = outputProperties.getProperty(OutputKeys.INDENT, "no").trim(); + indent = "yes".equals(indentValue) || "true".equals(indentValue) || "1".equals(indentValue); + final String suppressProp = outputProperties.getProperty("suppress-indentation"); + if (suppressProp != null && !suppressProp.isEmpty()) { + suppressIndentation = new HashSet<>(); + for (final String name : suppressProp.split("\\s+")) { + if (!name.isEmpty()) { + // Handle URI-qualified names: Q{ns}local or {ns}local → extract local part + if (name.startsWith("Q{") || name.startsWith("{")) { + final int closeBrace = name.indexOf('}'); + if (closeBrace > 0 && closeBrace < name.length() - 1) { + suppressIndentation.add(name.substring(closeBrace + 1)); + } else { + suppressIndentation.add(name); + } + } else { + suppressIndentation.add(name); + } + } + } + } else { + suppressIndentation = null; + } } @Override @@ -220,8 +258,12 @@ protected void addSpaceIfIndent() throws IOException { writer.write(' '); } + private boolean isSuppressIndentation(final String localName) { + return suppressIndentation != null && suppressIndentation.contains(localName); + } + protected void indent() throws TransformerException { - if (!indent || whitespacePreserve) { + if (!indent || whitespacePreserve || suppressIndentDepth > 0) { return; } final int spaces = indentAmount * level; diff --git a/exist-core/src/main/java/org/exist/util/serializer/XHTML5Writer.java b/exist-core/src/main/java/org/exist/util/serializer/XHTML5Writer.java index e89e7119d19..4894c0162af 100644 --- a/exist-core/src/main/java/org/exist/util/serializer/XHTML5Writer.java +++ b/exist-core/src/main/java/org/exist/util/serializer/XHTML5Writer.java @@ -24,6 +24,7 @@ import java.io.Writer; import javax.xml.transform.TransformerException; +import org.exist.storage.serializers.EXistOutputKeys; import it.unimi.dsi.fastutil.objects.ObjectOpenHashSet; import it.unimi.dsi.fastutil.objects.ObjectSet; @@ -128,7 +129,45 @@ protected void writeDoctype(String rootElement) throws TransformerException { return; } - documentType("html", null, null); + // Canonical serialization: never output DOCTYPE + final String canonicalProp = outputProperties != null + ? outputProperties.getProperty(EXistOutputKeys.CANONICAL) : null; + if ("yes".equals(canonicalProp) || "true".equals(canonicalProp) || "1".equals(canonicalProp)) { + doctypeWritten = true; + return; + } + + // Only output DOCTYPE when the root element is (case-insensitive) + // Per W3C Serialization: DOCTYPE is for the html element only, not fragments + final String localName = rootElement.contains(":") ? rootElement.substring(rootElement.indexOf(':') + 1) : rootElement; + if (!"html".equalsIgnoreCase(localName)) { + doctypeWritten = true; // suppress future attempts + return; + } + + final String publicId = outputProperties != null + ? outputProperties.getProperty(javax.xml.transform.OutputKeys.DOCTYPE_PUBLIC) : null; + final String systemId = outputProperties != null + ? outputProperties.getProperty(javax.xml.transform.OutputKeys.DOCTYPE_SYSTEM) : null; + final String method = outputProperties != null + ? outputProperties.getProperty(javax.xml.transform.OutputKeys.METHOD, "xhtml") : "xhtml"; + + if ("xhtml".equalsIgnoreCase(method)) { + // XHTML: per W3C spec section 5.2, only output doctype-public when + // doctype-system is also present + if (systemId != null) { + documentType("html", publicId, systemId); + } else if (publicId == null) { + // Neither set — simple DOCTYPE + documentType("html", null, null); + } else { + // doctype-public without doctype-system — suppress DOCTYPE for XHTML + doctypeWritten = true; + } + } else { + // HTML method: pass through doctype-public and doctype-system as set + documentType("html", publicId, systemId); + } doctypeWritten = true; } } diff --git a/exist-core/src/main/java/org/exist/util/serializer/XHTMLWriter.java b/exist-core/src/main/java/org/exist/util/serializer/XHTMLWriter.java index b0006f7f51c..9238cd1e848 100644 --- a/exist-core/src/main/java/org/exist/util/serializer/XHTMLWriter.java +++ b/exist-core/src/main/java/org/exist/util/serializer/XHTMLWriter.java @@ -23,6 +23,7 @@ import java.io.IOException; import java.io.Writer; +import javax.xml.transform.OutputKeys; import javax.xml.transform.TransformerException; import it.unimi.dsi.fastutil.objects.ObjectOpenHashSet; @@ -36,12 +37,35 @@ */ public class XHTMLWriter extends IndentingXMLWriter { + /** + * HTML boolean attributes per HTML 4.01 and HTML5 spec. + * When method="html" and the attribute value equals the attribute name + * (case-insensitive), the attribute is minimized to just the name. + */ + protected static final ObjectSet BOOLEAN_ATTRIBUTES = new ObjectOpenHashSet<>(31); + static { + BOOLEAN_ATTRIBUTES.add("checked"); + BOOLEAN_ATTRIBUTES.add("compact"); + BOOLEAN_ATTRIBUTES.add("declare"); + BOOLEAN_ATTRIBUTES.add("defer"); + BOOLEAN_ATTRIBUTES.add("disabled"); + BOOLEAN_ATTRIBUTES.add("ismap"); + BOOLEAN_ATTRIBUTES.add("multiple"); + BOOLEAN_ATTRIBUTES.add("nohref"); + BOOLEAN_ATTRIBUTES.add("noresize"); + BOOLEAN_ATTRIBUTES.add("noshade"); + BOOLEAN_ATTRIBUTES.add("nowrap"); + BOOLEAN_ATTRIBUTES.add("readonly"); + BOOLEAN_ATTRIBUTES.add("selected"); + } + protected static final ObjectSet EMPTY_TAGS = new ObjectOpenHashSet<>(31); static { EMPTY_TAGS.add("area"); EMPTY_TAGS.add("base"); EMPTY_TAGS.add("br"); EMPTY_TAGS.add("col"); + EMPTY_TAGS.add("embed"); EMPTY_TAGS.add("hr"); EMPTY_TAGS.add("img"); EMPTY_TAGS.add("input"); @@ -88,6 +112,8 @@ public class XHTMLWriter extends IndentingXMLWriter { } protected String currentTag; + protected boolean inHead = false; + protected boolean contentTypeMetaWritten = false; protected final ObjectSet emptyTags; protected final ObjectSet inlineTags; @@ -120,78 +146,121 @@ public XHTMLWriter(final Writer writer, ObjectSet emptyTags, ObjectSet 0 && namespaceURI != null && namespaceURI.equals(Namespaces.XHTML_NS)) { - haveCollapsedXhtmlPrefix = true; - return qname.substring(pos+1); - + if (pos > 0 && namespaceURI != null) { + if (namespaceURI.equals(Namespaces.XHTML_NS)) { + haveCollapsedXhtmlPrefix = true; + return qname.substring(pos + 1); + } + // XHTML5: normalize SVG and MathML prefixes + if (isHtml5Version() && (namespaceURI.equals(SVG_NS) || namespaceURI.equals(MATHML_NS))) { + collapsedForeignNs = namespaceURI; + return qname.substring(pos + 1); + } } - return qname; } @Override public void namespace(final String prefix, final String nsURI) throws TransformerException { - if(haveCollapsedXhtmlPrefix && prefix != null && !prefix.isEmpty() && nsURI.equals(Namespaces.XHTML_NS)) { - return; //dont output the xmlns:prefix for the collapsed nodes prefix + if (haveCollapsedXhtmlPrefix && prefix != null && !prefix.isEmpty() && nsURI.equals(Namespaces.XHTML_NS)) { + return; // don't output the xmlns:prefix for the collapsed node's prefix + } + // When a foreign namespace prefix was collapsed, replace the prefixed + // declaration with a default namespace declaration + if (collapsedForeignNs != null && prefix != null && !prefix.isEmpty() + && nsURI.equals(collapsedForeignNs)) { + super.namespace("", nsURI); // emit xmlns="..." instead of xmlns:prefix="..." + return; } - super.namespace(prefix, nsURI); } @@ -200,9 +269,25 @@ public void namespace(final String prefix, final String nsURI) throws Transforme protected void closeStartTag(final boolean isEmpty) throws TransformerException { try { if (tagIsOpen) { + // Flush canonical buffers (sorted namespaces + attributes) if active + if (isCanonical()) { + flushCanonicalBuffersXhtml(); + } if (isEmpty) { - if (isEmptyTag(currentTag)) { - getWriter().write(" />"); + if (isCanonical()) { + // Canonical: always expand empty elements + getWriter().write('>'); + getWriter().write("'); + } else if (isEmptyTag(currentTag)) { + // For method="html", use HTML-style void tags (
) + // For method="xhtml", use XHTML-style (
) + if (isHtmlMethod()) { + getWriter().write(">"); + } else { + getWriter().write(" />"); + } } else { getWriter().write('>'); getWriter().write(") while XHTML uses self-closing (
). + */ + private boolean isHtmlMethod() { + if (outputProperties != null) { + final String method = outputProperties.getProperty(javax.xml.transform.OutputKeys.METHOD); + return "html".equalsIgnoreCase(method); + } + return false; + } + + /** + * Returns true if the HTML version is 5.0 or higher. + */ + private boolean isHtml5Version() { + if (outputProperties == null) { + return true; // default to HTML5 + } + final String version = outputProperties.getProperty(OutputKeys.VERSION); + if (version != null) { + try { + return Double.parseDouble(version) >= 5.0; + } catch (final NumberFormatException e) { + // ignore + } + } + return true; // default to HTML5 + } + @Override + public void attribute(final QName qname, final CharSequence value) throws TransformerException { + // For method="html", minimize boolean attributes when value matches name + if (isHtmlMethod() && isBooleanAttribute(qname.getLocalPart(), value)) { + try { + if (!tagIsOpen) { + characters(value); + return; + } + final Writer w = getWriter(); + w.write(' '); + w.write(qname.getLocalPart()); + // Don't write ="value" — minimized form + } catch (final IOException ioe) { + throw new TransformerException(ioe.getMessage(), ioe); + } + return; + } + super.attribute(qname, value); + } + + @Override + public void attribute(final String qname, final CharSequence value) throws TransformerException { + if (isHtmlMethod() && isBooleanAttribute(qname, value)) { + try { + if (!tagIsOpen) { + characters(value); + return; + } + final Writer w = getWriter(); + w.write(' '); + w.write(qname); + } catch (final IOException ioe) { + throw new TransformerException(ioe.getMessage(), ioe); + } + return; + } + super.attribute(qname, value); + } + + private boolean isBooleanAttribute(final String attrName, final CharSequence value) { + return BOOLEAN_ATTRIBUTES.contains(attrName.toLowerCase(java.util.Locale.ROOT)) + && attrName.equalsIgnoreCase(value.toString()); + } + + private static final ObjectSet RAW_TEXT_ELEMENTS_HTML = new ObjectOpenHashSet<>(4); + static { + RAW_TEXT_ELEMENTS_HTML.add("script"); + RAW_TEXT_ELEMENTS_HTML.add("style"); + } + + @Override + protected boolean needsEscape(final char ch, final boolean inAttribute) { + // For HTML method, script and style content should not be escaped + if (!inAttribute && isHtmlMethod() + && currentTag != null && RAW_TEXT_ELEMENTS_HTML.contains(currentTag.toLowerCase(java.util.Locale.ROOT))) { + return false; + } + return super.needsEscape(ch, inAttribute); + } + + /** + * For HTML serialization, cdata-section-elements is ignored per the + * W3C serialization spec — CDATA sections are not valid in HTML. + */ + @Override + protected boolean shouldUseCdataSections() { + if (isHtmlMethod()) { + return false; + } + return super.shouldUseCdataSections(); + } + + @Override + protected boolean escapeAmpersandBeforeBrace() { + // HTML spec: & before { in attribute values should not be escaped + return false; + } + @Override protected boolean isInlineTag(final String namespaceURI, final String localName) { return (namespaceURI == null || namespaceURI.isEmpty() || Namespaces.XHTML_NS.equals(namespaceURI)) && inlineTags.contains(localName); } + + /** + * Write a meta content-type tag as the first child of head when + * include-content-type is enabled (the default per W3C Serialization 3.1). + */ + protected void writeContentTypeMeta() throws TransformerException { + if (contentTypeMetaWritten || outputProperties == null) { + return; + } + final String includeContentType = outputProperties.getProperty("include-content-type", "yes"); + if (!"yes".equals(includeContentType)) { + return; + } + contentTypeMetaWritten = true; + try { + final String encoding = outputProperties.getProperty(OutputKeys.ENCODING, "UTF-8"); + closeStartTag(false); + final Writer writer = getWriter(); + + // HTML5 method uses + // XHTML and HTML4 use + // XHTML mode requires self-closing tags (/>) for valid XML output — + // the URL rewrite pipeline re-parses this as XML in the view step. + final boolean selfClose = !isHtmlMethod(); + if (isHtmlMethod() && isHtml5Version()) { + writer.write("" : "\">"); + } else { + final String mediaType = outputProperties.getProperty(OutputKeys.MEDIA_TYPE, "text/html"); + writer.write("" : "\">"); + } + } catch (IOException e) { + throw new TransformerException(e.getMessage(), e); + } + } } diff --git a/exist-core/src/main/java/org/exist/util/serializer/XMLWriter.java b/exist-core/src/main/java/org/exist/util/serializer/XMLWriter.java index 763aaf52ef6..48887f88e13 100644 --- a/exist-core/src/main/java/org/exist/util/serializer/XMLWriter.java +++ b/exist-core/src/main/java/org/exist/util/serializer/XMLWriter.java @@ -78,6 +78,11 @@ public class XMLWriter implements SerializerWriter { private String defaultNamespace = ""; + // Namespace stack (BaseX-style): flat list of (prefix, uri) pairs for all in-scope bindings. + // nstack records the list size at each startElement so endElement can roll back declarations. + private final List nspaces = new ArrayList<>(); + private final Deque nstack = new ArrayDeque<>(); + /** * When serializing an XDM this should be true, * otherwise false. @@ -86,8 +91,33 @@ public class XMLWriter implements SerializerWriter { * compared to retrieving resources from the database. */ private boolean xdmSerialization = false; + private boolean xml11 = false; + private boolean canonical = false; + @Nullable private java.text.Normalizer.Form normalizationForm = null; + + // Canonical XML: buffer namespaces and attributes for sorting + private final List canonicalNamespaces = new ArrayList<>(); // [prefix, uri] + private final List canonicalAttributes = new ArrayList<>(); // [nsUri, localName, qname, value] private final Deque elementName = new ArrayDeque<>(); + + /** + * Returns true if cdata-section-elements should be applied. + * Subclasses (e.g., XHTMLWriter for HTML method) can override + * to suppress CDATA sections. + */ + protected boolean shouldUseCdataSections() { + return xdmSerialization; + } + + /** + * Returns the namespace URI of the current (innermost) element, + * or null if no element is on the stack. + */ + protected String currentElementNamespaceURI() { + final QName top = elementName.peek(); + return top != null ? top.getNamespaceURI() : null; + } private LazyVal> cdataSectionElements = new LazyVal<>(this::parseCdataSectionElementNames); private boolean cdataSetionElement = false; @@ -96,8 +126,9 @@ public class XMLWriter implements SerializerWriter { Arrays.fill(textSpecialChars, false); textSpecialChars['<'] = true; textSpecialChars['>'] = true; - // textSpecialChars['\r'] = true; + textSpecialChars['\r'] = true; textSpecialChars['&'] = true; + textSpecialChars[0x7F] = true; // DEL must be escaped as  attrSpecialChars = new boolean[128]; Arrays.fill(attrSpecialChars, false); @@ -108,6 +139,7 @@ public class XMLWriter implements SerializerWriter { attrSpecialChars['\t'] = true; attrSpecialChars['&'] = true; attrSpecialChars['"'] = true; + attrSpecialChars[0x7F] = true; // DEL must be escaped as  } @Nullable private XMLDeclaration originalXmlDecl; @@ -139,6 +171,10 @@ public void setOutputProperties(final Properties properties) { } this.xdmSerialization = "yes".equals(outputProperties.getProperty(EXistOutputKeys.XDM_SERIALIZATION, "no")); + this.xml11 = "1.1".equals(outputProperties.getProperty(OutputKeys.VERSION)); + this.normalizationForm = parseNormalizationForm(outputProperties.getProperty("normalization-form", "none")); + final String canonicalProp = outputProperties.getProperty(EXistOutputKeys.CANONICAL); + this.canonical = "yes".equals(canonicalProp) || "true".equals(canonicalProp) || "1".equals(canonicalProp); } private Set parseCdataSectionElementNames() { @@ -166,6 +202,8 @@ protected void resetObjectState() { originalXmlDecl = null; doctypeWritten = false; defaultNamespace = ""; + nspaces.clear(); + nstack.clear(); cdataSectionElements = new LazyVal<>(this::parseCdataSectionElementNames); } @@ -184,12 +222,35 @@ public Writer getWriter() { } public String getDefaultNamespace() { - return defaultNamespace.isEmpty() ? null : defaultNamespace; + final String fromStack = nsLookup(""); + return (fromStack == null || fromStack.isEmpty()) ? null : fromStack; } public void setDefaultNamespace(final String namespace) { + // Keep the baseline field in sync; nsLookup() falls back to it when the + // namespace stack has no in-scope binding for the default prefix. defaultNamespace = namespace == null ? "" : namespace; } + + /** + * Looks up the currently in-scope URI for {@code prefix} by scanning the flat + * namespace list from innermost to outermost scope. + * For the default-namespace prefix ({@code ""}), falls back to the + * {@link #defaultNamespace} baseline field when the stack has no binding. + * + * @return the in-scope URI, or {@code null} if {@code prefix} is unbound + */ + private String nsLookup(final String prefix) { + for (int i = nspaces.size() - 2; i >= 0; i -= 2) { + if (nspaces.get(i).equals(prefix)) { + return nspaces.get(i + 1); + } + } + if (prefix.isEmpty()) { + return defaultNamespace.isEmpty() ? null : defaultNamespace; + } + return null; + } public void startDocument() throws TransformerException { resetObjectState(); @@ -207,15 +268,16 @@ public void startElement(final String namespaceUri, final String localName, fina if(!declarationWritten) { writeDeclaration(); } - + if(!doctypeWritten) { writeDoctype(qname); } - + try { if(tagIsOpen) { closeStartTag(false); } + nstack.push(nspaces.size()); writer.write('<'); writer.write(qname); tagIsOpen = true; @@ -233,21 +295,22 @@ public void startElement(final QName qname) throws TransformerException { if(!declarationWritten) { writeDeclaration(); } - + if(!doctypeWritten) { writeDoctype(qname.getStringValue()); } - + try { if(tagIsOpen) { closeStartTag(false); } + nstack.push(nspaces.size()); writer.write('<'); if(qname.getPrefix() != null && !qname.getPrefix().isEmpty()) { writer.write(qname.getPrefix()); writer.write(':'); } - + writer.write(qname.getLocalPart()); tagIsOpen = true; elementName.push(qname); @@ -266,6 +329,9 @@ public void endElement(final String namespaceURI, final String localName, final writer.write('>'); } elementName.pop(); + if (!nstack.isEmpty()) { + nspaces.subList(nstack.pop(), nspaces.size()).clear(); + } } catch(final IOException ioe) { throw new TransformerException(ioe.getMessage(), ioe); } @@ -285,40 +351,74 @@ public void endElement(final QName qname) throws TransformerException { writer.write('>'); } elementName.pop(); + if (!nstack.isEmpty()) { + nspaces.subList(nstack.pop(), nspaces.size()).clear(); + } } catch(final IOException ioe) { throw new TransformerException(ioe.getMessage(), ioe); } } public void namespace(final String prefix, final String nsURI) throws TransformerException { - if((nsURI == null) && (prefix == null || prefix.isEmpty())) { + final String normPrefix = prefix != null ? prefix : ""; + final String normUri = nsURI != null ? nsURI : ""; + + // The xml namespace is implicitly declared and never needs explicit serialization + if ("xml".equals(normPrefix)) { return; } - try { - if(!tagIsOpen) { + try { + if (!tagIsOpen) { + // An xmlns="" outside a start tag is harmless — just skip it + if (normUri.isEmpty() && normPrefix.isEmpty()) { + return; + } throw new TransformerException("Found a namespace declaration outside an element"); } - if(prefix != null && !prefix.isEmpty()) { - writer.write(' '); - writer.write("xmlns"); - writer.write(':'); - writer.write(prefix); - writer.write("=\""); - writeChars(nsURI, true); - writer.write('"'); - } else { - if(defaultNamespace.equals(nsURI)) { - return; + if (canonical) { + // Buffer for sorting — emitted in closeStartTag + // Validate: reject relative namespace URIs (SERE0024) + if (!normUri.isEmpty() && isRelativeUri(normUri)) { + throw new TransformerException("err:SERE0024 Canonical serialization does not allow relative namespace URIs: " + normUri); } - writer.write(' '); - writer.write("xmlns"); + if (normPrefix.isEmpty() && normUri.isEmpty()) { + return; // Skip xmlns="" in canonical (not meaningful for no-namespace elements) + } + // Deduplicate: replace existing binding for same prefix + canonicalNamespaces.removeIf(ns -> ns[0].equals(normPrefix)); + canonicalNamespaces.add(new String[]{normPrefix, normUri}); + // Track in namespace stack so getDefaultNamespace() stays accurate + nspaces.add(normPrefix); + nspaces.add(normUri); + return; + } + + // Look up what is currently in scope for this prefix. + // nsLookup scans nspaces from innermost to outermost and falls back to the + // defaultNamespace baseline field for the default-namespace prefix. + final String inScope = nsLookup(normPrefix); + final String effective = inScope != null ? inScope : ""; + if (normUri.equals(effective)) { + return; // Binding unchanged — no declaration needed + } + + // Record the new binding so descendants can see it via nsLookup + nspaces.add(normPrefix); + nspaces.add(normUri); + + // Write the namespace declaration + writer.write(' '); + if (normPrefix.isEmpty()) { + writer.write("xmlns=\""); + } else { + writer.write("xmlns:"); + writer.write(normPrefix); writer.write("=\""); - writeChars(nsURI, true); - writer.write('"'); - defaultNamespace= nsURI; } + writeChars(normUri, true); + writer.write('"'); } catch(final IOException ioe) { throw new TransformerException(ioe.getMessage(), ioe); } @@ -329,8 +429,13 @@ public void attribute(String qname, CharSequence value) throws TransformerExcept if(!tagIsOpen) { characters(value); return; - // throw new TransformerException("Found an attribute outside an - // element"); + } + if (canonical) { + // Buffer for sorting — extract namespace URI from qname if prefixed + final int colon = qname.indexOf(':'); + final String nsUri = colon > 0 ? "" : ""; // string qname doesn't carry namespace + canonicalAttributes.add(new String[]{nsUri, colon > 0 ? qname.substring(colon + 1) : qname, qname, value.toString()}); + return; } writer.write(' '); writer.write(qname); @@ -347,8 +452,18 @@ public void attribute(final QName qname, final CharSequence value) throws Transf if(!tagIsOpen) { characters(value); return; - // throw new TransformerException("Found an attribute outside an - // element"); + } + if (canonical) { + final String nsUri = qname.getNamespaceURI() != null ? qname.getNamespaceURI() : ""; + final String localName = qname.getLocalPart(); + final String fullName; + if (qname.getPrefix() != null && !qname.getPrefix().isEmpty()) { + fullName = qname.getPrefix() + ":" + localName; + } else { + fullName = localName; + } + canonicalAttributes.add(new String[]{nsUri, localName, fullName, value.toString()}); + return; } writer.write(' '); if(qname.getPrefix() != null && !qname.getPrefix().isEmpty()) { @@ -373,12 +488,68 @@ public void characters(final CharSequence chars) throws TransformerException { if(tagIsOpen) { closeStartTag(false); } - writeChars(chars, false); + // When xdmSerialization is active and current element is in cdata-section-elements, + // wrap text content in CDATA instead of escaping it (per W3C Serialization 3.1) + if (shouldUseCdataSections() && !elementName.isEmpty() + && cdataSectionElements.get().contains(elementName.peek())) { + writeCdataContent(chars); + } else { + writeChars(chars, false); + } } catch(final IOException ioe) { throw new TransformerException(ioe.getMessage(), ioe); } } + private void writeCdataContent(final CharSequence chars) throws IOException { + // CDATA sections must be split when: + // 1. The content contains "]]>" (which would end the CDATA prematurely) + // 2. A character cannot be represented in the output encoding (must be escaped as &#xNN;) + final String s = normalize(chars).toString(); + boolean inCdata = false; + for (int i = 0; i < s.length(); ) { + final int cp = s.codePointAt(i); + final int cpLen = Character.charCount(cp); + + // Check for "]]>" sequence + if (cp == ']' && i + 2 < s.length() && s.charAt(i + 1) == ']' && s.charAt(i + 2) == '>') { + if (!inCdata) { + writer.write(""); + inCdata = false; + i += 2; // skip "]]", the ">" will be picked up next + continue; + } + + // Check if character is encodable in the output charset + if (!charSet.inCharacterSet((char) cp)) { + // Close any open CDATA section + if (inCdata) { + writer.write("]]>"); + inCdata = false; + } + // Write as character reference + writer.write("&#x"); + writer.write(Integer.toHexString(cp)); + writer.write(';'); + } else { + // Encodable character — write inside CDATA + if (!inCdata) { + writer.write(""); + } + } + public void characters(final char[] ch, final int start, final int len) throws TransformerException { if(!declarationWritten) { writeDeclaration(); @@ -510,8 +681,23 @@ public void documentType(final String name, final String publicId, final String protected void closeStartTag(final boolean isEmpty) throws TransformerException { try { if(tagIsOpen) { - if(isEmpty) { + if (canonical) { + flushCanonicalBuffers(); + } + if(isEmpty && !canonical) { + // Canonical XML: empty elements expanded to writer.write("/>"); + } else if (isEmpty) { + // Canonical: write > for empty elements + writer.write('>'); + final QName currentElem = elementName.peek(); + writer.write("'); } else { writer.write('>'); } @@ -522,6 +708,52 @@ protected void closeStartTag(final boolean isEmpty) throws TransformerException } } + protected boolean isCanonical() { + return canonical; + } + + protected void flushCanonicalBuffersXhtml() throws TransformerException { + try { + flushCanonicalBuffers(); + } catch (final IOException ioe) { + throw new TransformerException(ioe.getMessage(), ioe); + } + } + + private void flushCanonicalBuffers() throws IOException { + // Sort namespaces by prefix (default namespace first, then alphabetical) + canonicalNamespaces.sort((a, b) -> a[0].compareTo(b[0])); + // Write sorted namespaces + for (final String[] ns : canonicalNamespaces) { + writer.write(' '); + if (ns[0].isEmpty()) { + writer.write("xmlns=\""); + } else { + writer.write("xmlns:"); + writer.write(ns[0]); + writer.write("=\""); + } + writeChars(ns[1], true); + writer.write('"'); + } + canonicalNamespaces.clear(); + + // Sort attributes by namespace URI (primary), then local name (secondary) + canonicalAttributes.sort((a, b) -> { + final int cmp = a[0].compareTo(b[0]); + return cmp != 0 ? cmp : a[1].compareTo(b[1]); + }); + // Write sorted attributes + for (final String[] attr : canonicalAttributes) { + writer.write(' '); + writer.write(attr[2]); // qualified name + writer.write("=\""); + writeChars(attr[3], true); + writer.write('"'); + } + canonicalAttributes.clear(); + } + protected void writeDeclaration() throws TransformerException { if(declarationWritten) { return; @@ -537,7 +769,9 @@ protected void writeDeclaration() throws TransformerException { // get the fields of the persisted xml declaration, but overridden with any properties from the serialization properties final String version = outputProperties.getProperty(OutputKeys.VERSION, (originalXmlDecl.version != null ? originalXmlDecl.version : DEFAULT_XML_VERSION)); final String encoding = outputProperties.getProperty(OutputKeys.ENCODING, (originalXmlDecl.encoding != null ? originalXmlDecl.encoding : DEFAULT_XML_ENCODING)); - @Nullable final String standalone = outputProperties.getProperty(OutputKeys.STANDALONE, originalXmlDecl.standalone); + @Nullable final String standaloneOrig = outputProperties.getProperty(OutputKeys.STANDALONE, originalXmlDecl.standalone); + // "omit" means standalone should be absent from the declaration + @Nullable final String standalone = (standaloneOrig != null && "omit".equalsIgnoreCase(standaloneOrig.trim())) ? null : standaloneOrig; writeDeclaration(version, encoding, standalone); @@ -545,11 +779,15 @@ protected void writeDeclaration() throws TransformerException { } final String omitXmlDecl = outputProperties.getProperty(OutputKeys.OMIT_XML_DECLARATION, "yes"); - if ("no".equals(omitXmlDecl)) { + @Nullable final String standaloneRaw = outputProperties.getProperty(OutputKeys.STANDALONE); + // "omit" means standalone should be absent from the declaration + @Nullable final String standalone = (standaloneRaw != null && "omit".equalsIgnoreCase(standaloneRaw.trim())) ? null : standaloneRaw; + // Per W3C Serialization 3.1: output declaration if omit-xml-declaration is false/no/0, + // or if standalone is explicitly set (the declaration is required to carry standalone) + if (isBooleanFalse(omitXmlDecl) || standalone != null) { // get the fields of the declaration from the serialization properties final String version = outputProperties.getProperty(OutputKeys.VERSION, DEFAULT_XML_VERSION); final String encoding = outputProperties.getProperty(OutputKeys.ENCODING, DEFAULT_XML_ENCODING); - @Nullable final String standalone = outputProperties.getProperty(OutputKeys.STANDALONE); writeDeclaration(version, encoding, standalone); } @@ -564,7 +802,15 @@ private void writeDeclaration(final String version, final String encoding, @Null writer.write('"'); if(standalone != null) { writer.write(" standalone=\""); - writer.write(standalone); + // Normalize boolean values to yes/no for XML declaration + final String standaloneVal = standalone.trim(); + if ("true".equals(standaloneVal) || "1".equals(standaloneVal)) { + writer.write("yes"); + } else if ("false".equals(standaloneVal) || "0".equals(standaloneVal)) { + writer.write("no"); + } else { + writer.write(standaloneVal); + } writer.write('"'); } writer.write("?>\n"); @@ -589,36 +835,79 @@ protected void writeDoctype(final String rootElement) throws TransformerExceptio protected boolean needsEscape(final char ch) { return true; } + + /** + * Whether & before { should be escaped. HTML output returns false + * per W3C HTML serialization spec. XML output returns true (always escape &). + */ + protected boolean escapeAmpersandBeforeBrace() { + return true; + } + + /** + * Check if a serialization boolean parameter value is false. + * W3C Serialization 3.1 accepts "no", "false", "0" (with optional whitespace) as false. + */ + protected static boolean isBooleanFalse(final String value) { + if (value == null) { + return false; + } + final String trimmed = value.trim(); + return "no".equals(trimmed) || "false".equals(trimmed) || "0".equals(trimmed); + } + + /** + * Whether the given character needs escaping. Subclasses can override + * to suppress escaping for specific contexts (e.g., HTML raw text elements). + * + * @param ch the character to check + * @param inAttribute true if we're writing an attribute value + */ + protected boolean needsEscape(final char ch, final boolean inAttribute) { + return needsEscape(ch); + } protected void writeChars(final CharSequence s, final boolean inAttribute) throws IOException { + // Apply Unicode normalization if configured + final CharSequence text = normalize(s); final boolean[] specialChars = inAttribute ? attrSpecialChars : textSpecialChars; char ch = 0; - final int len = s.length(); + final int len = text.length(); int pos = 0, i; while(pos < len) { i = pos; while(i < len) { - ch = s.charAt(i); + ch = text.charAt(i); if(ch < 128) { if(specialChars[ch]) { break; + } else if(xml11 && ch >= 0x01 && ch <= 0x1F + && ch != 0x09 && ch != 0x0A && ch != 0x0D) { + // XML 1.1: C0 control chars (except TAB, LF, CR) must be escaped + break; } else { i++; } } else if(!charSet.inCharacterSet(ch)) { break; + } else if(ch >= 0x7F && ch <= 0x9F) { + // Control chars 0x7F-0x9F must be serialized as character references + break; + } else if(ch == 0x2028) { + // LINE SEPARATOR must be serialized as character reference + break; } else { i++; } } - writeCharSeq(s, pos, i); + writeCharSeq(text, pos, i); // writer.write(s.subSequence(pos, i).toString()); if (i >= len) { return; } - if(needsEscape(ch)) { + if(needsEscape(ch, inAttribute)) { switch(ch) { case '<': writer.write("<"); @@ -627,7 +916,12 @@ protected void writeChars(final CharSequence s, final boolean inAttribute) throw writer.write(">"); break; case '&': - writer.write("&"); + // HTML spec: & before { in attribute values should not be escaped + if (inAttribute && i + 1 < len && text.charAt(i + 1) == '{' && !escapeAmpersandBeforeBrace()) { + writer.write('&'); + } else { + writer.write("&"); + } break; case '\r': writer.write(" "); @@ -672,6 +966,38 @@ protected void writeCharacterReference(final char charval) throws IOException { writer.write(charref, 0, o); } + @Nullable + private static java.text.Normalizer.Form parseNormalizationForm(final String value) { + if (value == null) return null; + return switch (value.trim().toUpperCase(java.util.Locale.ROOT)) { + case "NFC" -> java.text.Normalizer.Form.NFC; + case "NFD" -> java.text.Normalizer.Form.NFD; + case "NFKC" -> java.text.Normalizer.Form.NFKC; + case "NFKD" -> java.text.Normalizer.Form.NFKD; + case "NONE", "" -> null; + default -> null; // "fully-normalized" or unknown — treated as none + }; + } + + /** + * Apply Unicode normalization if a normalization-form is set. + */ + protected CharSequence normalize(final CharSequence text) { + if (normalizationForm == null) return text; + final String s = text.toString(); + if (java.text.Normalizer.isNormalized(s, normalizationForm)) return text; + return java.text.Normalizer.normalize(s, normalizationForm); + } + + private static boolean isRelativeUri(final String uri) { + for (int i = 0; i < uri.length(); i++) { + final char c = uri.charAt(i); + if (c == ':') return false; + if (c == '/' || c == '?' || c == '#') return true; + } + return true; + } + private static class XMLDeclaration { @Nullable final String version; @Nullable final String encoding; diff --git a/exist-core/src/main/java/org/exist/util/serializer/XQuerySerializer.java b/exist-core/src/main/java/org/exist/util/serializer/XQuerySerializer.java index 366e3866cbc..55526b7ff04 100644 --- a/exist-core/src/main/java/org/exist/util/serializer/XQuerySerializer.java +++ b/exist-core/src/main/java/org/exist/util/serializer/XQuerySerializer.java @@ -32,6 +32,7 @@ import org.xml.sax.SAXNotSupportedException; import javax.xml.transform.OutputKeys; +import java.io.IOException; import java.io.Writer; import java.util.Properties; @@ -70,19 +71,172 @@ public void serialize(final Sequence sequence, final int start, final int howman case "json": serializeJSON(sequence, compilationTime, executionTime); break; + case "csv": + serializeCSV(sequence); + break; case "xml": default: - serializeXML(sequence, start, howmany, wrap, typed, compilationTime, executionTime); + // For XML/text methods, flatten any arrays in the sequence before serialization + // (arrays can't be serialized as SAX events directly) + // Maps and function items cannot be serialized with XML/text methods (SENR0001) + validateXmlSerializable(sequence); + if (isCanonical()) { + validateCanonical(sequence); + } + final Sequence flattened = flattenArrays(sequence); + if (flattened != sequence) { + // Flattening changed the sequence — reset start/howmany to cover all items. + // For text method, default item-separator is space if not explicitly set. + if ("text".equals(method) && outputProperties.getProperty(EXistOutputKeys.ITEM_SEPARATOR) == null) { + outputProperties.setProperty(EXistOutputKeys.ITEM_SEPARATOR, " "); + } + serializeXML(flattened, 1, flattened.getItemCount(), wrap, typed, compilationTime, executionTime); + } else { + serializeXML(flattened, start, howmany, wrap, typed, compilationTime, executionTime); + } + break; + } + } + + /** + * Validate that a sequence can be serialized with the XML/text method. + * Maps and function items are not serializable as XML (SENR0001). + */ + private static void validateXmlSerializable(final Sequence sequence) throws SAXException, XPathException { + for (final SequenceIterator i = sequence.iterate(); i.hasNext(); ) { + final Item item = i.nextItem(); + final int type = item.getType(); + if (type == Type.MAP_ITEM || type == Type.FUNCTION) { + throw new SAXException("err:SENR0001 Cannot serialize a " + + Type.getTypeName(type) + " with the XML or text output method"); + } + } + } + + private boolean isCanonical() { + final String v = outputProperties.getProperty(EXistOutputKeys.CANONICAL); + return "yes".equals(v) || "true".equals(v) || "1".equals(v); + } + + /** + * Validate canonical XML constraints (SERE0024). + * Checks for relative namespace URIs and multi-root documents. + */ + private void validateCanonical(final Sequence sequence) throws SAXException, XPathException { + for (final SequenceIterator i = sequence.iterate(); i.hasNext(); ) { + final Item item = i.nextItem(); + if (Type.subTypeOf(item.getType(), Type.NODE)) { + validateCanonicalNode((NodeValue) item); + } + } + } + + private void validateCanonicalNode(final NodeValue node) throws SAXException, XPathException { + if (node.getType() == Type.DOCUMENT) { + // Check for multi-root: document must have exactly one element child + int elementCount = 0; + final org.w3c.dom.Node domNode = node.getNode(); + for (org.w3c.dom.Node child = domNode.getFirstChild(); child != null; child = child.getNextSibling()) { + if (child.getNodeType() == org.w3c.dom.Node.ELEMENT_NODE) { + elementCount++; + } + } + if (elementCount != 1) { + throw new SAXException("err:SERE0024 Canonical serialization requires a well-formed document with exactly one root element, found " + elementCount); + } + // Check namespace URIs on the document's elements + validateCanonicalNamespaces(domNode); + } else if (node.getType() == Type.ELEMENT) { + validateCanonicalNamespaces(node.getNode()); + } + } + + private void validateCanonicalNamespaces(final org.w3c.dom.Node node) throws SAXException { + if (node.getNodeType() == org.w3c.dom.Node.ELEMENT_NODE) { + final String nsUri = node.getNamespaceURI(); + if (nsUri != null && !nsUri.isEmpty() && isRelativeUri(nsUri)) { + throw new SAXException("err:SERE0024 Canonical serialization does not allow relative namespace URIs: " + nsUri); + } + // Also check namespace URIs in attributes (including xmlns declarations) + final org.w3c.dom.NamedNodeMap attrs = node.getAttributes(); + if (attrs != null) { + for (int i = 0; i < attrs.getLength(); i++) { + final org.w3c.dom.Attr attr = (org.w3c.dom.Attr) attrs.item(i); + final String attrName = attr.getName(); + // Check xmlns and xmlns:prefix declarations + if ("xmlns".equals(attrName) || attrName.startsWith("xmlns:")) { + final String declUri = attr.getValue(); + if (declUri != null && !declUri.isEmpty() && isRelativeUri(declUri)) { + throw new SAXException("err:SERE0024 Canonical serialization does not allow relative namespace URIs: " + declUri); + } + } + } + } + // Check child elements recursively + for (org.w3c.dom.Node child = node.getFirstChild(); child != null; child = child.getNextSibling()) { + validateCanonicalNamespaces(child); + } + } + } + + private static boolean isRelativeUri(final String uri) { + // Absolute URIs contain a scheme (e.g., "http://", "urn:", "file:") + // A URI without ":" before the first "/" or "?" is relative + for (int i = 0; i < uri.length(); i++) { + final char c = uri.charAt(i); + if (c == ':') return false; // Found scheme separator — absolute + if (c == '/' || c == '?' || c == '#') return true; // Path/query before scheme — relative + } + return true; // No scheme found — relative (e.g., "local.ns") + } + + /** + * Flatten arrays in a sequence — each array member becomes a top-level item. + * This is needed because the SAX-based XML/text serializer can't handle ArrayType items. + */ + private static Sequence flattenArrays(final Sequence sequence) throws XPathException { + boolean hasArrays = false; + for (final SequenceIterator i = sequence.iterate(); i.hasNext(); ) { + if (i.nextItem().getType() == Type.ARRAY_ITEM) { + hasArrays = true; break; + } + } + if (!hasArrays) { + return sequence; } + final ValueSequence result = new ValueSequence(); + for (final SequenceIterator i = sequence.iterate(); i.hasNext(); ) { + final Item item = i.nextItem(); + if (item.getType() == Type.ARRAY_ITEM) { + final Sequence flat = org.exist.xquery.functions.array.ArrayType.flatten(item); + for (final SequenceIterator fi = flat.iterate(); fi.hasNext(); ) { + result.add(fi.nextItem()); + } + } else { + result.add(item); + } + } + return result; } public boolean normalize() { final String method = outputProperties.getProperty(OutputKeys.METHOD, "xml"); - return !("json".equals(method) || "adaptive".equals(method)); + return !("json".equals(method) || "adaptive".equals(method) || "csv".equals(method)); } private void serializeXML(final Sequence sequence, final int start, final int howmany, final boolean wrap, final boolean typed, final long compilationTime, final long executionTime) throws SAXException, XPathException { + final String itemSeparator = outputProperties.getProperty(EXistOutputKeys.ITEM_SEPARATOR); + // If item-separator is set and sequence has multiple items, serialize items individually + // with separator between them (the internal Serializer doesn't handle item-separator) + if (itemSeparator != null && sequence.getItemCount() > 1 && !wrap) { + serializeXMLWithItemSeparator(sequence, start, howmany, typed, itemSeparator); + } else { + serializeXMLDirect(sequence, start, howmany, wrap, typed, compilationTime, executionTime); + } + } + + private void serializeXMLDirect(final Sequence sequence, final int start, final int howmany, final boolean wrap, final boolean typed, final long compilationTime, final long executionTime) throws SAXException, XPathException { final Serializer serializer = broker.borrowSerializer(); SAXSerializer sax = null; try { @@ -102,17 +256,89 @@ private void serializeXML(final Sequence sequence, final int start, final int ho } } + private void serializeXMLWithItemSeparator(final Sequence sequence, final int start, final int howmany, final boolean typed, final String itemSeparator) throws SAXException, XPathException { + // Write XML declaration if not omitted (per W3C Serialization 3.1) + if (!isBooleanTrue(outputProperties.getProperty(OutputKeys.OMIT_XML_DECLARATION, "no"))) { + try { + final String version = outputProperties.getProperty(OutputKeys.VERSION, "1.0"); + final String encoding = outputProperties.getProperty(OutputKeys.ENCODING, "UTF-8"); + writer.write(""); + } catch (IOException e) { + throw new SAXException(e.getMessage(), e); + } + } + + final int actualStart = start - 1; // convert 1-based to 0-based + final int end = Math.min(actualStart + howmany, sequence.getItemCount()); + for (int i = actualStart; i < end; i++) { + if (i > actualStart) { + try { + writer.write(itemSeparator); + } catch (IOException e) { + throw new SAXException(e.getMessage(), e); + } + } + final Item item = sequence.itemAt(i); + if (item == null) { + continue; + } + if (Type.subTypeOf(item.getType(), Type.NODE)) { + // For nodes serialized with item-separator, omit the XML declaration + // on each individual node (only one declaration for the whole output) + final Properties nodeProps = new Properties(outputProperties); + nodeProps.setProperty(OutputKeys.OMIT_XML_DECLARATION, "yes"); + final Serializer serializer = broker.borrowSerializer(); + SAXSerializer sax = null; + try { + sax = (SAXSerializer) SerializerPool.getInstance().borrowObject(SAXSerializer.class); + sax.setOutput(writer, nodeProps); + serializer.setProperties(nodeProps); + serializer.setSAXHandlers(sax, sax); + final ValueSequence singleItem = new ValueSequence(1); + singleItem.add(item); + serializer.toSAX(singleItem, 1, 1, false, typed, 0, 0); + } catch (SAXNotSupportedException | SAXNotRecognizedException e) { + throw new SAXException(e.getMessage(), e); + } finally { + if (sax != null) { + SerializerPool.getInstance().returnObject(sax); + } + broker.returnSerializer(serializer); + } + } else { + try { + writer.write(item.getStringValue()); + } catch (IOException e) { + throw new SAXException(e.getMessage(), e); + } + } + } + } + + private static boolean isBooleanTrue(final String value) { + if (value == null) return false; + final String v = value.trim(); + return "yes".equals(v) || "true".equals(v) || "1".equals(v); + } + private void serializeJSON(final Sequence sequence, final long compilationTime, final long executionTime) throws SAXException, XPathException { - // backwards compatibility: if the sequence contains a single element, we assume - // it should be transformed to JSON following the rules of the old JSON writer + // Backwards compatibility: if the sequence contains a single element or document, + // use the legacy XML-to-JSON writer (which converts XML structure to JSON properties). + // This is needed for RESTXQ and REST API which return XML documents with method=json. + // Maps, arrays, atomics, and multi-item sequences go through the W3C-compliant JSONSerializer. if (sequence.hasOne() && (Type.subTypeOf(sequence.getItemType(), Type.DOCUMENT) || Type.subTypeOf(sequence.getItemType(), Type.ELEMENT))) { - serializeXML(sequence, 1, 1, false, false, compilationTime, executionTime); + serializeXMLDirect(sequence, 1, 1, false, false, compilationTime, executionTime); } else { JSONSerializer serializer = new JSONSerializer(broker, outputProperties); serializer.serialize(sequence, writer); } } + private void serializeCSV(final Sequence sequence) throws SAXException { + final CSVSerializer serializer = new CSVSerializer(outputProperties); + serializer.serialize(sequence, writer); + } + private void serializeAdaptive(final Sequence sequence) throws SAXException, XPathException { final AdaptiveSerializer serializer = new AdaptiveSerializer(broker); serializer.setOutput(writer, outputProperties); diff --git a/exist-core/src/main/java/org/exist/util/serializer/json/JSONSerializer.java b/exist-core/src/main/java/org/exist/util/serializer/json/JSONSerializer.java index bd1f01a9454..da22cd51163 100644 --- a/exist-core/src/main/java/org/exist/util/serializer/json/JSONSerializer.java +++ b/exist-core/src/main/java/org/exist/util/serializer/json/JSONSerializer.java @@ -23,53 +23,94 @@ import com.fasterxml.jackson.core.JsonFactory; import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.core.json.JsonWriteFeature; import io.lacuna.bifurcan.IEntry; +import it.unimi.dsi.fastutil.ints.Int2ObjectMap; import org.exist.storage.DBBroker; import org.exist.storage.serializers.EXistOutputKeys; import org.exist.storage.serializers.Serializer; +import org.exist.xquery.ErrorCodes; import org.exist.xquery.XPathException; import org.exist.xquery.functions.array.ArrayType; +import org.exist.xquery.functions.map.AbstractMapType; import org.exist.xquery.functions.map.MapType; +import org.exist.xquery.util.SerializerUtils; import org.exist.xquery.value.*; import org.xml.sax.SAXException; +import javax.annotation.Nullable; import javax.xml.transform.OutputKeys; import java.io.IOException; import java.io.Writer; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; import java.util.Properties; +import java.util.Set; /** * Called by {@link org.exist.util.serializer.XQuerySerializer} to serialize an XQuery sequence * to JSON. The JSON serializer differs from other serialization methods because it maps XQuery * data items to JSON. * + * Per W3C XSLT and XQuery Serialization 3.1 Section 10 (JSON Output Method). + * * @author Wolf */ public class JSONSerializer { private final DBBroker broker; private final Properties outputProperties; + private final boolean allowDuplicateNames; + private final boolean canonical; + @Nullable private final Int2ObjectMap characterMap; public JSONSerializer(DBBroker broker, Properties outputProperties) { super(); this.broker = broker; this.outputProperties = outputProperties; + final String canonicalProp = outputProperties.getProperty(EXistOutputKeys.CANONICAL); + this.canonical = isBooleanTrue(canonicalProp); + // Canonical mode: always reject duplicate keys + this.allowDuplicateNames = !canonical && "yes".equals( + outputProperties.getProperty(EXistOutputKeys.ALLOW_DUPLICATE_NAMES, "yes")); + this.characterMap = SerializerUtils.getCharacterMap(outputProperties); } public void serialize(Sequence sequence, Writer writer) throws SAXException { - JsonFactory factory = new JsonFactory(); + // QT4: escape-solidus controls whether / is escaped as \/ + // Default is "no" for XQ 3.1 compatibility (parameter doesn't exist in 3.1 spec) + // Canonical JSON (RFC 8785): solidus is NOT escaped + final boolean escapeSolidus = !canonical && isBooleanTrue( + outputProperties.getProperty(EXistOutputKeys.ESCAPE_SOLIDUS, "no")); + final JsonFactory factory = JsonFactory.builder() + .configure(JsonWriteFeature.ESCAPE_FORWARD_SLASHES, escapeSolidus) + .build(); try { JsonGenerator generator = factory.createGenerator(writer); generator.disable(JsonGenerator.Feature.AUTO_CLOSE_TARGET); - if ("yes".equals(outputProperties.getProperty(OutputKeys.INDENT, "no"))) { - generator.useDefaultPrettyPrinter(); + if (isBooleanTrue(outputProperties.getProperty(OutputKeys.INDENT, "no"))) { + final int indentSpaces = Integer.parseInt( + outputProperties.getProperty(EXistOutputKeys.INDENT_SPACES, "4")); + final com.fasterxml.jackson.core.util.DefaultPrettyPrinter pp = + new com.fasterxml.jackson.core.util.DefaultPrettyPrinter(); + pp.indentArraysWith( + com.fasterxml.jackson.core.util.DefaultIndenter.SYSTEM_LINEFEED_INSTANCE.withIndent( + " ".repeat(indentSpaces))); + pp.indentObjectsWith( + com.fasterxml.jackson.core.util.DefaultIndenter.SYSTEM_LINEFEED_INSTANCE.withIndent( + " ".repeat(indentSpaces))); + generator.setPrettyPrinter(pp); } - if ("yes".equals(outputProperties.getProperty(EXistOutputKeys.ALLOW_DUPLICATE_NAMES, "yes"))) { - generator.enable(JsonGenerator.Feature.STRICT_DUPLICATE_DETECTION); + // Duplicate detection is handled manually in serializeMap for proper SERE0022 errors + generator.disable(JsonGenerator.Feature.STRICT_DUPLICATE_DETECTION); + final boolean jsonLines = isBooleanTrue( + outputProperties.getProperty(EXistOutputKeys.JSON_LINES, "no")); + if (jsonLines) { + serializeJsonLines(sequence, generator); } else { - generator.disable(JsonGenerator.Feature.STRICT_DUPLICATE_DETECTION); + serializeSequence(sequence, generator); } - serializeSequence(sequence, generator); if ("yes".equals(outputProperties.getProperty(EXistOutputKeys.INSERT_FINAL_NEWLINE, "no"))) { generator.writeRaw('\n'); } @@ -79,12 +120,55 @@ public void serialize(Sequence sequence, Writer writer) throws SAXException { } } + /** + * JSON Lines format (NDJSON): one JSON value per line, no array wrapper. + * Per QT4 Serialization 4.0, when json-lines=true. + */ + private void serializeJsonLines(Sequence sequence, JsonGenerator generator) throws IOException, XPathException, SAXException { + if (sequence.isEmpty()) { + return; + } + // Each line must be a separate root-level value. Jackson adds separator + // whitespace between root values, so we serialize each item to a string + // and concatenate with newlines. + final boolean escapeSolidus = !isBooleanFalse( + outputProperties.getProperty(EXistOutputKeys.ESCAPE_SOLIDUS, "yes")); + boolean first = true; + for (SequenceIterator i = sequence.iterate(); i.hasNext(); ) { + if (!first) { + generator.writeRaw('\n'); + } + // Serialize this item to a standalone string + final java.io.StringWriter lineWriter = new java.io.StringWriter(); + final JsonFactory lineFactory = JsonFactory.builder() + .configure(JsonWriteFeature.ESCAPE_FORWARD_SLASHES, escapeSolidus) + .build(); + final JsonGenerator lineGen = lineFactory.createGenerator(lineWriter); + lineGen.disable(JsonGenerator.Feature.AUTO_CLOSE_TARGET); + serializeItem(i.nextItem(), lineGen); + lineGen.close(); + // Write the line's JSON as raw content to avoid Jackson's root separator + generator.writeRaw(lineWriter.toString()); + first = false; + } + } + private void serializeSequence(Sequence sequence, JsonGenerator generator) throws IOException, XPathException, SAXException { + serializeSequence(sequence, generator, false); + } + + private void serializeSequence(Sequence sequence, JsonGenerator generator, boolean allowMultiItem) throws IOException, XPathException, SAXException { if (sequence.isEmpty()) { generator.writeNull(); } else if (sequence.hasOne() && "no".equals(outputProperties.getProperty(EXistOutputKeys.JSON_ARRAY_OUTPUT, "no"))) { serializeItem(sequence.itemAt(0), generator); + } else if (!allowMultiItem) { + // SERE0023: JSON output method cannot serialize a sequence of more than one item + // at the top level or as a map entry value + throw new SAXException("err:SERE0023 Sequence of " + sequence.getItemCount() + + " items cannot be serialized using the JSON output method"); } else { + // Inside arrays, multi-item sequences become JSON arrays generator.writeStartArray(); for (SequenceIterator i = sequence.iterate(); i.hasNext(); ) { serializeItem(i.nextItem(), generator); @@ -94,28 +178,150 @@ private void serializeSequence(Sequence sequence, JsonGenerator generator) throw } private void serializeItem(Item item, JsonGenerator generator) throws IOException, XPathException, SAXException { - if (item.getType() == Type.ARRAY_ITEM) { + // XQuery 4.0 JNode: unwrap to underlying value and serialize that + if (item instanceof org.exist.xquery.value.jnode.JNode) { + final org.exist.xquery.value.jnode.JNode jnode = (org.exist.xquery.value.jnode.JNode) item; + serializeJNode(jnode, generator); + } else if (item.getType() == Type.ARRAY_ITEM) { serializeArray((ArrayType) item, generator); } else if (item.getType() == Type.MAP_ITEM) { serializeMap((MapType) item, generator); } else if (Type.subTypeOf(item.getType(), Type.ANY_ATOMIC_TYPE)) { - if (Type.subTypeOfUnion(item.getType(), Type.NUMERIC)) { - generator.writeNumber(item.getStringValue()); + serializeAtomicValue(item, generator); + } else if (Type.subTypeOf(item.getType(), Type.NODE)) { + serializeNode(item, generator); + } else if (Type.subTypeOf(item.getType(), Type.FUNCTION)) { + throw new SAXException("err:SERE0021 Sequence contains a function item, which cannot be serialized as JSON"); + } + } + + /** + * Serialize a JNode as JSON. The JNode's underlying value determines the output: + * maps → objects, arrays → arrays, atomics → JSON literals, null → null. + */ + private void serializeJNode(org.exist.xquery.value.jnode.JNode jnode, JsonGenerator generator) throws IOException, XPathException, SAXException { + final Sequence value = jnode.getValue(); + if (value instanceof AbstractMapType) { + serializeMap((MapType) value, generator); + } else if (value instanceof ArrayType) { + serializeArray((ArrayType) value, generator); + } else if (value == Sequence.EMPTY_SEQUENCE || value.isEmpty()) { + generator.writeNull(); + } else if (value instanceof Item) { + final Item inner = (Item) value; + if (Type.subTypeOf(inner.getType(), Type.ANY_ATOMIC_TYPE)) { + serializeAtomicValue(inner, generator); } else { - switch (item.getType()) { - case Type.BOOLEAN: - generator.writeBoolean(((AtomicValue)item).effectiveBooleanValue()); - break; - default: - generator.writeString(item.getStringValue()); - break; + // Fallback: serialize as string + writeStringWithCharMap(generator, inner.getStringValue()); + } + } else { + // Sequence with multiple items — serialize as array + generator.writeStartArray(); + for (final SequenceIterator it = value.iterate(); it.hasNext(); ) { + serializeItem(it.nextItem(), generator); + } + generator.writeEndArray(); + } + } + + private void serializeAtomicValue(Item item, JsonGenerator generator) throws IOException, XPathException, SAXException { + if (Type.subTypeOfUnion(item.getType(), Type.NUMERIC)) { + if (canonical) { + // RFC 8785: cast to double, use shortest representation + final double d = ((NumericValue) item).getDouble(); + if (!Double.isFinite(d)) { + throw new SAXException("err:SERE0020 Numeric value " + item.getStringValue() + + " cannot be serialized in canonical JSON"); } + generator.writeRawValue(canonicalDoubleString(d)); + return; } - } else if (Type.subTypeOf(item.getType(), Type.NODE)) { - serializeNode(item, generator); + final String stringValue = item.getStringValue(); + // W3C Serialization 3.1: INF, -INF, and NaN MUST raise SERE0020 + if ("NaN".equals(stringValue) || "INF".equals(stringValue) || "-INF".equals(stringValue)) { + throw new SAXException("err:SERE0020 Numeric value " + stringValue + + " cannot be serialized as JSON"); + } else if ("-0".equals(stringValue)) { + // Negative zero: write as 0 (QT4 allows either 0 or -0) + generator.writeNumber(stringValue); + } else { + generator.writeNumber(stringValue); + } + } else if (item.getType() == Type.BOOLEAN) { + generator.writeBoolean(((AtomicValue) item).effectiveBooleanValue()); + } else { + writeStringWithCharMap(generator, item.getStringValue()); } } + /** + * RFC 8785 canonical double formatting. + * Uses ECMAScript shortest representation: minimum digits to uniquely + * identify the double value. Plain notation for [1e-6, 1e21), exponential + * notation otherwise with lowercase 'e'. + */ + private static String canonicalDoubleString(final double value) { + if (value == 0) return "0"; + if (value == Double.MIN_VALUE) return "5e-324"; + if (value == -Double.MIN_VALUE) return "-5e-324"; + + final java.math.BigDecimal bd = java.math.BigDecimal.valueOf(value).stripTrailingZeros(); + final double abs = Math.abs(value); + if (abs >= 1e-6 && abs < 1e21) { + return bd.toPlainString(); + } else { + return bd.toString().replace('E', 'e'); + } + } + + /** + * Apply use-character-maps substitutions to a string value. + * Character map replacements are written raw (not escaped by JSON). + */ + private String applyCharacterMap(final String value) { + if (characterMap == null || characterMap.isEmpty()) { + return value; + } + final StringBuilder sb = new StringBuilder(value.length()); + for (int i = 0; i < value.length(); ) { + final int cp = value.codePointAt(i); + i += Character.charCount(cp); + final String replacement = characterMap.get(cp); + if (replacement != null) { + sb.append(replacement); + } else { + sb.appendCodePoint(cp); + } + } + return sb.toString(); + } + + /** + * Write a string value to the JSON generator, applying character map + * substitutions. The mapped string is passed through writeString so + * Jackson handles JSON structural separators and escaping correctly. + */ + private void writeStringWithCharMap(final JsonGenerator generator, final String value) throws IOException { + if (characterMap == null || characterMap.isEmpty()) { + generator.writeString(value); + } else { + generator.writeString(applyCharacterMap(value)); + } + } + + private static boolean isBooleanTrue(final String value) { + if (value == null) return false; + final String v = value.trim(); + return "yes".equals(v) || "true".equals(v) || "1".equals(v); + } + + private static boolean isBooleanFalse(final String value) { + if (value == null) return false; + final String v = value.trim(); + return "no".equals(v) || "false".equals(v) || "0".equals(v); + } + private void serializeNode(Item item, JsonGenerator generator) throws SAXException { final Serializer serializer = broker.borrowSerializer(); final Properties xmlOutput = new Properties(); @@ -124,7 +330,7 @@ private void serializeNode(Item item, JsonGenerator generator) throws SAXExcepti xmlOutput.setProperty(OutputKeys.INDENT, outputProperties.getProperty(OutputKeys.INDENT, "no")); try { serializer.setProperties(xmlOutput); - generator.writeString(serializer.serialize((NodeValue)item)); + writeStringWithCharMap(generator, serializer.serialize((NodeValue)item)); } catch (IOException e) { throw new SAXException(e.getMessage(), e); } finally { @@ -136,16 +342,50 @@ private void serializeArray(ArrayType array, JsonGenerator generator) throws IOE generator.writeStartArray(); for (int i = 0; i < array.getSize(); i++) { final Sequence member = array.get(i); - serializeSequence(member, generator); + // W3C Serialization 3.1: multi-item sequences within arrays raise SERE0023 + if (member.getItemCount() > 1) { + throw new SAXException("err:SERE0023 Array member at position " + (i + 1) + + " is a sequence of " + member.getItemCount() + " items"); + } + serializeSequence(member, generator, false); } generator.writeEndArray(); } private void serializeMap(MapType map, JsonGenerator generator) throws IOException, XPathException, SAXException { generator.writeStartObject(); - for (final IEntry entry: map) { - generator.writeFieldName(entry.key().getStringValue()); - serializeSequence(entry.value(), generator); + final Set seenKeys = allowDuplicateNames ? null : new HashSet<>(); + + // Canonical JSON (RFC 8785): sort keys by UTF-16 code unit order + final Iterable> entries; + if (canonical) { + final List> sorted = new ArrayList<>(); + for (final IEntry entry : map) { + sorted.add(entry); + } + sorted.sort((a, b) -> { + try { + return a.key().getStringValue().compareTo(b.key().getStringValue()); + } catch (XPathException e) { + return 0; + } + }); + entries = sorted; + } else { + final List> list = new ArrayList<>(); + for (final IEntry entry : map) { + list.add(entry); + } + entries = list; + } + + for (final IEntry entry : entries) { + final String key = entry.key().getStringValue(); + if (seenKeys != null && !seenKeys.add(key)) { + throw new SAXException("err:SERE0022 Duplicate key '" + key + "' in map and allow-duplicate-names is 'no'"); + } + generator.writeFieldName(key); + serializeSequence(entry.value(), generator, false); } generator.writeEndObject(); } diff --git a/exist-core/src/main/java/org/exist/validation/XmlLibraryChecker.java b/exist-core/src/main/java/org/exist/validation/XmlLibraryChecker.java index 5b9a570f3f3..ad240345548 100644 --- a/exist-core/src/main/java/org/exist/validation/XmlLibraryChecker.java +++ b/exist-core/src/main/java/org/exist/validation/XmlLibraryChecker.java @@ -54,7 +54,7 @@ public class XmlLibraryChecker { * Possible XML Transformers, at least one must be valid */ private final static ClassVersion[] validTransformers = { - new ClassVersion("Saxon", "8.9.0", "net.sf.saxon.Version.getProductVersion()"), + new ClassVersion("Saxon", "12.0", "net.sf.saxon.Version.getProductVersion()"), new ClassVersion("Xalan", "Xalan Java 2.7.1", "org.apache.xalan.Version.getVersion()"), }; diff --git a/exist-core/src/main/java/org/exist/xquery/AbstractFLWORClause.java b/exist-core/src/main/java/org/exist/xquery/AbstractFLWORClause.java index c088561e08b..f2d266f395d 100644 --- a/exist-core/src/main/java/org/exist/xquery/AbstractFLWORClause.java +++ b/exist-core/src/main/java/org/exist/xquery/AbstractFLWORClause.java @@ -46,6 +46,35 @@ public LocalVariable createVariable(final QName name) throws XPathException { return variable; } + /** + * Default chain-position behaviour: register this clause's tuple-stream + * variables on the active FLWOR scope so subsequent clauses' optimize() + * passes see them as in-scope, then recurse into the rest of the chain. + * + * Chain HEADS (where {@link #getPreviousClause()} is null) are limited to + * {@link ForExpr} and {@link LetExpr} per parser rules (see + * {@code parseFLWORInitialClause}), so push/pop of the scope happens in + * those subclasses' overrides — they bypass this base method to control + * the precise input-vs-return ordering loop-invariant hoisting requires. + * + * Bound-variable tracking matters because hoisting only fires when the + * candidate input does not reference an outer-scope variable. Without + * registering e.g. group-by keys here, an inner {@code for} whose input + * referenced a key would be falsely classified loop-invariant. + */ + @Override + public Expression optimize(CompileContext cc) throws XPathException { + if (cc.inFlworChain()) { + for (final QName name : getTupleStreamVariables()) { + cc.addVisibleFlworVar(name); + } + } + if (returnExpr != null) { + returnExpr = returnExpr.optimize(cc); + } + return this; + } + @Override public Sequence preEval(Sequence seq) throws XPathException { if (returnExpr instanceof FLWORClause) { @@ -103,6 +132,11 @@ public void resetState(boolean postOptimization) { firstVariable = null; } + @Override + public boolean isUpdating() { + return returnExpr != null && returnExpr.isUpdating(); + } + @Override public int getDependencies() { return returnExpr.getDependencies(); diff --git a/exist-core/src/main/java/org/exist/xquery/Atomize.java b/exist-core/src/main/java/org/exist/xquery/Atomize.java index 19b4c9670ca..7ff5ef58a37 100644 --- a/exist-core/src/main/java/org/exist/xquery/Atomize.java +++ b/exist-core/src/main/java/org/exist/xquery/Atomize.java @@ -23,6 +23,7 @@ import org.exist.dom.persistent.DocumentSet; import org.exist.xquery.functions.array.ArrayType; +import org.exist.xquery.functions.map.AbstractMapType; import org.exist.xquery.util.ExpressionDumper; import org.exist.xquery.value.Item; import org.exist.xquery.value.Sequence; @@ -75,15 +76,25 @@ public static Sequence atomize(Sequence input) throws XPathException { if (input.isEmpty()) {return Sequence.EMPTY_SEQUENCE;} input = ArrayType.flatten(input); - if (input.hasOne()) {return - input.itemAt(0).atomize(); + if (input.hasOne()) { + final Item single = input.itemAt(0); + // XQ4: maps are atomizable — expand to their values before atomizing + if (single instanceof AbstractMapType && ((AbstractMapType) single).isXq4Atomizable()) { + return ((AbstractMapType) single).atomizeValues(); + } + return single.atomize(); } Item next; final ValueSequence result = new ValueSequence(); for(final SequenceIterator i = input.iterate(); i.hasNext(); ) { next = i.nextItem(); - result.add(next.atomize()); + // XQ4: maps are atomizable — expand to their values before atomizing + if (next instanceof AbstractMapType && ((AbstractMapType) next).isXq4Atomizable()) { + result.addAll(((AbstractMapType) next).atomizeValues()); + } else { + result.add(next.atomize()); + } } return result; } diff --git a/exist-core/src/main/java/org/exist/xquery/AttributeConstructor.java b/exist-core/src/main/java/org/exist/xquery/AttributeConstructor.java index bb1720e67d9..2dcbfe4652c 100644 --- a/exist-core/src/main/java/org/exist/xquery/AttributeConstructor.java +++ b/exist-core/src/main/java/org/exist/xquery/AttributeConstructor.java @@ -56,7 +56,7 @@ public void addValue(String value) { public void addEnclosedExpr(Expression expr) throws XPathException { if(isNamespaceDecl) - {throw new XPathException(this, "enclosed expressions are not allowed in namespace " + + {throw new XPathException(this, ErrorCodes.XQST0022, "enclosed expressions are not allowed in namespace " + "declaration attributes");} contents.add(expr); } diff --git a/exist-core/src/main/java/org/exist/xquery/BasicFunction.java b/exist-core/src/main/java/org/exist/xquery/BasicFunction.java index e9e51481f6c..e0ac80458d2 100644 --- a/exist-core/src/main/java/org/exist/xquery/BasicFunction.java +++ b/exist-core/src/main/java/org/exist/xquery/BasicFunction.java @@ -21,8 +21,11 @@ */ package org.exist.xquery; +import org.exist.xquery.value.FunctionReference; import org.exist.xquery.value.Item; import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.SequenceType; +import org.exist.xquery.value.Type; import org.exist.xquery.util.ExpressionDumper; /** @@ -91,4 +94,39 @@ public Sequence eval(Sequence contextSequence, final Item contextItem) throws XP * @return The result of the XPath function */ public abstract Sequence eval(Sequence[] args, Sequence contextSequence) throws XPathException; + + /** + * Validates that each argument is compatible with the declared parameter type + * of a function reference. Useful for any built-in function that accepts + * callback functions (higher-order functions). + * + * @param caller the calling expression (for error reporting) + * @param ref the function reference whose parameter types to check against + * @param args the actual arguments to validate + * @throws XPathException with XPTY0004 if a type mismatch is detected + */ + protected static void checkFunctionParameterTypes(final Expression caller, final FunctionReference ref, final Sequence[] args) throws XPathException { + final SequenceType[] paramTypes = ref.getSignature().getArgumentTypes(); + if (paramTypes == null) { + return; + } + for (int i = 0; i < args.length && i < paramTypes.length; i++) { + final SequenceType expected = paramTypes[i]; + final int expectedType = expected.getPrimaryType(); + // Skip check if the declared type is item() — accepts anything + if (expectedType == Type.ITEM) { + continue; + } + final Sequence arg = args[i]; + if (arg.isEmpty()) { + continue; + } + if (!expected.checkType(arg)) { + throw new XPathException(caller, ErrorCodes.XPTY0004, + "Invalid type for parameter " + (i + 1) + " of higher-order function call. " + + "Expected " + Type.getTypeName(expectedType) + + ", got " + Type.getTypeName(arg.getItemType())); + } + } + } } diff --git a/exist-core/src/main/java/org/exist/xquery/BinaryOp.java b/exist-core/src/main/java/org/exist/xquery/BinaryOp.java index 894f9f32ac9..e13254385b6 100644 --- a/exist-core/src/main/java/org/exist/xquery/BinaryOp.java +++ b/exist-core/src/main/java/org/exist/xquery/BinaryOp.java @@ -69,8 +69,12 @@ public void analyze(AnalyzeContextInfo contextInfo) throws XPathException { inPredicate = (contextInfo.getFlags() & IN_PREDICATE) != 0; contextId = contextInfo.getContextId(); inWhereClause = (contextInfo.getFlags() & IN_WHERE_CLAUSE) != 0; - getLeft().analyze(new AnalyzeContextInfo(contextInfo)); - getRight().analyze(new AnalyzeContextInfo(contextInfo)); + final AnalyzeContextInfo leftInfo = new AnalyzeContextInfo(contextInfo); + leftInfo.addFlag(NON_UPDATING_CONTEXT); + getLeft().analyze(leftInfo); + final AnalyzeContextInfo rightInfo = new AnalyzeContextInfo(contextInfo); + rightInfo.addFlag(NON_UPDATING_CONTEXT); + getRight().analyze(rightInfo); } /* diff --git a/exist-core/src/main/java/org/exist/xquery/BindingExpression.java b/exist-core/src/main/java/org/exist/xquery/BindingExpression.java index 57ea725e6c3..4f0599295a5 100644 --- a/exist-core/src/main/java/org/exist/xquery/BindingExpression.java +++ b/exist-core/src/main/java/org/exist/xquery/BindingExpression.java @@ -81,6 +81,14 @@ public void analyze(final AnalyzeContextInfo contextInfo) throws XPathException unordered = (contextInfo.getFlags() & UNORDERED) > 0; } + @Override + public Expression optimize(final CompileContext cc) throws XPathException { + if (inputSequence != null) { + inputSequence = inputSequence.optimize(cc); + } + return super.optimize(cc); + } + @Override public Sequence postEval(Sequence seq) throws XPathException { if (returnExpr instanceof FLWORClause flworClause) { diff --git a/exist-core/src/main/java/org/exist/xquery/CastExpression.java b/exist-core/src/main/java/org/exist/xquery/CastExpression.java index 8911c5c6144..fdcd58afb42 100644 --- a/exist-core/src/main/java/org/exist/xquery/CastExpression.java +++ b/exist-core/src/main/java/org/exist/xquery/CastExpression.java @@ -84,12 +84,17 @@ public Sequence eval(final Sequence contextSequence, final Item contextItem) thr } } - // Should be handled by the parser - if (requiredType == Type.ANY_ATOMIC_TYPE || (requiredType == Type.NOTATION && expression.returnsType() != Type.NOTATION)) { + // XPST0080: cannot cast to xs:NOTATION or xs:anyAtomicType (per XPath 3.1 §3.12.3) + if (requiredType == Type.ANY_ATOMIC_TYPE + || (requiredType == Type.NOTATION && expression.returnsType() != Type.NOTATION)) { throw new XPathException(this, ErrorCodes.XPST0080, "cannot cast to " + Type.getTypeName(requiredType)); } - if (requiredType == Type.ANY_SIMPLE_TYPE || expression.returnsType() == Type.ANY_SIMPLE_TYPE || requiredType == Type.UNTYPED || expression.returnsType() == Type.UNTYPED) { + // XPST0051: cannot cast to non-atomic abstract types + if (requiredType == Type.ANY_SIMPLE_TYPE || requiredType == Type.UNTYPED + || requiredType == Type.ANY_TYPE + || expression.returnsType() == Type.ANY_SIMPLE_TYPE + || expression.returnsType() == Type.UNTYPED) { throw new XPathException(this, ErrorCodes.XPST0051, "cannot cast to " + Type.getTypeName(requiredType)); } @@ -110,14 +115,31 @@ public Sequence eval(final Sequence contextSequence, final Item contextItem) thr if (requiredType == Type.QNAME) { if (item.getType() == Type.QNAME) { result = item.toSequence(); - } else if (item.getType() == Type.ANY_ATOMIC_TYPE || Type.subTypeOf(item.getType(), Type.STRING)) { + } else if (item.getType() == Type.ANY_ATOMIC_TYPE + || item.getType() == Type.UNTYPED_ATOMIC + || Type.subTypeOf(item.getType(), Type.STRING)) { result = new QNameValue(this, context, item.getStringValue()); - } else { throw new XPathException(this, ErrorCodes.XPTY0004, "Cannot cast " + Type.getTypeName(item.getType()) + " to xs:QName"); } } else { - result = item.convertTo(requiredType); + // Per XPath F&O 3.1, Section 19: if the source and target types + // have no valid casting relationship, raise XPTY0004 (not FORG0001). + if (!Type.isCastable(item.getType(), requiredType)) { + throw new XPathException(this, ErrorCodes.XPTY0004, + "Cannot cast " + Type.getTypeName(item.getType()) + + " to " + Type.getTypeName(requiredType)); + } + // String-derived types (xs:language, xs:Name, xs:NMTOKEN, etc.): + // cast via xs:string intermediary per XPath F&O 3.1 casting table. + if (Type.subTypeOf(requiredType, Type.STRING) && requiredType != Type.STRING + && !Type.subTypeOf(item.getType(), Type.STRING) + && item.getType() != Type.UNTYPED_ATOMIC) { + final AtomicValue asString = item.convertTo(Type.STRING); + result = asString.convertTo(requiredType); + } else { + result = item.convertTo(requiredType); + } } } diff --git a/exist-core/src/main/java/org/exist/xquery/CastableExpression.java b/exist-core/src/main/java/org/exist/xquery/CastableExpression.java index 9a0769f9653..93536befae2 100644 --- a/exist-core/src/main/java/org/exist/xquery/CastableExpression.java +++ b/exist-core/src/main/java/org/exist/xquery/CastableExpression.java @@ -93,10 +93,14 @@ public Sequence eval(Sequence contextSequence, Item contextItem) throws XPathExc {context.getProfiler().message(this, Profiler.START_SEQUENCES, "CONTEXT ITEM", contextItem.toSequence());} } - if (requiredType == Type.ANY_ATOMIC_TYPE || (requiredType == Type.NOTATION && expression.returnsType() != Type.NOTATION)) + if (requiredType == Type.ANY_ATOMIC_TYPE + || (requiredType == Type.NOTATION && expression.returnsType() != Type.NOTATION)) {throw new XPathException(this, ErrorCodes.XPST0080, "cannot convert to " + Type.getTypeName(requiredType));} - if (requiredType == Type.ANY_SIMPLE_TYPE || expression.returnsType() == Type.ANY_SIMPLE_TYPE || requiredType == Type.UNTYPED || expression.returnsType() == Type.UNTYPED) + if (requiredType == Type.ANY_SIMPLE_TYPE || requiredType == Type.UNTYPED + || requiredType == Type.ANY_TYPE + || expression.returnsType() == Type.ANY_SIMPLE_TYPE + || expression.returnsType() == Type.UNTYPED) {throw new XPathException(this, ErrorCodes.XPST0051, "cannot convert to " + Type.getTypeName(requiredType));} Sequence result; diff --git a/exist-core/src/main/java/org/exist/xquery/ChoiceCastExpression.java b/exist-core/src/main/java/org/exist/xquery/ChoiceCastExpression.java new file mode 100644 index 00000000000..1f58834103f --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/ChoiceCastExpression.java @@ -0,0 +1,137 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery; + +import org.exist.dom.persistent.DocumentSet; +import org.exist.xquery.util.ExpressionDumper; +import org.exist.xquery.value.*; + +/** + * Implements cast as (T1 | T2 | ...) from XQuery 4.0. + * Tries each target type in order and returns the first successful cast. + */ +public class ChoiceCastExpression extends AbstractExpression { + + private final int[] targetTypes; + private final Cardinality cardinality; + private Expression expression; + + public ChoiceCastExpression(final XQueryContext context, final Expression expr, + final int[] targetTypes, final Cardinality cardinality) { + super(context); + this.targetTypes = targetTypes; + this.cardinality = cardinality; + this.expression = expr; + } + + @Override + public void analyze(final AnalyzeContextInfo contextInfo) throws XPathException { + contextInfo.setParent(this); + expression.analyze(contextInfo); + } + + @Override + public Sequence eval(final Sequence contextSequence, final Item contextItem) throws XPathException { + final Sequence seq = Atomize.atomize(expression.eval(contextSequence, contextItem)); + if (seq.isEmpty()) { + if (cardinality.atLeastOne()) { + throw new XPathException(this, ErrorCodes.XPTY0004, + "Type error: empty sequence is not allowed here"); + } + return Sequence.EMPTY_SEQUENCE; + } + if (seq.hasMany()) { + throw new XPathException(this, ErrorCodes.XPTY0004, + "cardinality error: sequence with more than one item is not allowed here"); + } + + final Item item = seq.itemAt(0); + XPathException lastError = null; + + for (final int targetType : targetTypes) { + try { + return item.convertTo(targetType); + } catch (final XPathException e) { + lastError = e; + } + } + + throw new XPathException(this, ErrorCodes.FORG0001, + "Cannot cast " + Type.getTypeName(item.getType()) + + " to any of the choice types", lastError); + } + + @Override + public int returnsType() { + return Type.ANY_ATOMIC_TYPE; + } + + @Override + public Cardinality getCardinality() { + return Cardinality.ZERO_OR_ONE; + } + + @Override + public void dump(final ExpressionDumper dumper) { + expression.dump(dumper); + dumper.display(" cast as ("); + for (int i = 0; i < targetTypes.length; i++) { + if (i > 0) { + dumper.display(" | "); + } + dumper.display(Type.getTypeName(targetTypes[i])); + } + dumper.display(")"); + } + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder(); + sb.append(expression.toString()).append(" cast as ("); + for (int i = 0; i < targetTypes.length; i++) { + if (i > 0) { + sb.append(" | "); + } + sb.append(Type.getTypeName(targetTypes[i])); + } + sb.append(")"); + return sb.toString(); + } + + @Override + public int getDependencies() { + return expression.getDependencies() | Dependency.CONTEXT_ITEM; + } + + @Override + public void setContextDocSet(final DocumentSet contextSet) { + super.setContextDocSet(contextSet); + expression.setContextDocSet(contextSet); + } + + @Override + public void resetState(final boolean postOptimization) { + super.resetState(postOptimization); + expression.resetState(postOptimization); + } + +} diff --git a/exist-core/src/main/java/org/exist/xquery/ChoiceCastableExpression.java b/exist-core/src/main/java/org/exist/xquery/ChoiceCastableExpression.java new file mode 100644 index 00000000000..4d867b21e44 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/ChoiceCastableExpression.java @@ -0,0 +1,128 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery; + +import org.exist.dom.persistent.DocumentSet; +import org.exist.xquery.util.ExpressionDumper; +import org.exist.xquery.value.*; + +/** + * Implements castable as (T1 | T2 | ...) from XQuery 4.0. + * Returns true if the value can be cast to any of the target types. + */ +public class ChoiceCastableExpression extends AbstractExpression { + + private final int[] targetTypes; + private final Cardinality requiredCardinality; + private final Expression expression; + + public ChoiceCastableExpression(final XQueryContext context, final Expression expr, + final int[] targetTypes, final Cardinality requiredCardinality) { + super(context); + this.expression = expr; + this.targetTypes = targetTypes; + this.requiredCardinality = requiredCardinality; + } + + @Override + public int returnsType() { + return Type.BOOLEAN; + } + + @Override + public Cardinality getCardinality() { + return Cardinality.EXACTLY_ONE; + } + + @Override + public void analyze(final AnalyzeContextInfo contextInfo) throws XPathException { + contextInfo.setParent(this); + expression.analyze(contextInfo); + } + + @Override + public Sequence eval(final Sequence contextSequence, final Item contextItem) throws XPathException { + final Sequence seq = Atomize.atomize(expression.eval(contextSequence, contextItem)); + if (seq.isEmpty()) { + return BooleanValue.valueOf( + requiredCardinality.isSuperCardinalityOrEqualOf(Cardinality.EMPTY_SEQUENCE)); + } + if (!requiredCardinality.isSuperCardinalityOrEqualOf(seq.getCardinality())) { + return BooleanValue.FALSE; + } + + final Item item = seq.itemAt(0); + for (final int targetType : targetTypes) { + try { + item.convertTo(targetType); + return BooleanValue.TRUE; + } catch (final XPathException e) { + // try next type + } + } + return BooleanValue.FALSE; + } + + @Override + public void dump(final ExpressionDumper dumper) { + expression.dump(dumper); + dumper.display(" castable as ("); + for (int i = 0; i < targetTypes.length; i++) { + if (i > 0) { + dumper.display(" | "); + } + dumper.display(Type.getTypeName(targetTypes[i])); + } + dumper.display(")"); + } + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder(); + sb.append(expression.toString()).append(" castable as ("); + for (int i = 0; i < targetTypes.length; i++) { + if (i > 0) { + sb.append(" | "); + } + sb.append(Type.getTypeName(targetTypes[i])); + } + sb.append(")"); + return sb.toString(); + } + + @Override + public int getDependencies() { + return Dependency.CONTEXT_SET + Dependency.CONTEXT_ITEM; + } + + @Override + public void setContextDocSet(final DocumentSet contextSet) { + super.setContextDocSet(contextSet); + expression.setContextDocSet(contextSet); + } + + @Override + public void resetState(final boolean postOptimization) { + super.resetState(postOptimization); + expression.resetState(postOptimization); + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/CombiningExpression.java b/exist-core/src/main/java/org/exist/xquery/CombiningExpression.java index 2b65dda4344..7701b006dc6 100644 --- a/exist-core/src/main/java/org/exist/xquery/CombiningExpression.java +++ b/exist-core/src/main/java/org/exist/xquery/CombiningExpression.java @@ -47,8 +47,13 @@ public CombiningExpression(final XQueryContext context, final PathExpr left, fin @Override public void analyze(final AnalyzeContextInfo contextInfo) throws XPathException { contextInfo.setParent(this); - left.analyze(contextInfo); - right.analyze(contextInfo); + // Operands of union/intersect/except are non-updating contexts + final AnalyzeContextInfo leftInfo = new AnalyzeContextInfo(contextInfo); + leftInfo.addFlag(NON_UPDATING_CONTEXT); + left.analyze(leftInfo); + final AnalyzeContextInfo rightInfo = new AnalyzeContextInfo(contextInfo); + rightInfo.addFlag(NON_UPDATING_CONTEXT); + right.analyze(rightInfo); } @Override diff --git a/exist-core/src/main/java/org/exist/xquery/CompileContext.java b/exist-core/src/main/java/org/exist/xquery/CompileContext.java new file mode 100644 index 00000000000..034c0acadd4 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/CompileContext.java @@ -0,0 +1,390 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery; + +import org.exist.dom.QName; +import org.exist.xquery.value.AtomicValue; +import org.exist.xquery.value.Sequence; + +import java.util.ArrayDeque; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Deque; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +/** + * Compilation context passed to {@link Expression#optimize(CompileContext)}. + * + * Carries the {@link XQueryContext}, captures rewrite decisions for visibility + * (consumed by {@code util:explain}-style diagnostics), and records whether any + * rewrite occurred so the driver knows to re-analyze. + * + * The {@link #replaceWith(Expression, Expression, String)} helper is the + * preferred return path: each {@code optimize()} method computes its + * replacement and returns {@code cc.replaceWith(this, replacement, reason)}. + */ +public final class CompileContext { + + /** Internal namespace for synthesized hoist variables. */ + public static final String OPTIMIZER_NS = + "http://exist-db.org/xquery/internal/optimizer"; + + public final XQueryContext qc; + + private final List log = new ArrayList<>(); + private boolean changed; + + private final Deque flworScopes = new ArrayDeque<>(); + private int hoistedVarCounter; + + public CompileContext(final XQueryContext qc) { + this.qc = qc; + } + + /** + * Records an expression rewrite and returns the replacement. + * + * If {@code replacement == original}, this is a no-op and nothing is + * logged. Callers can therefore unconditionally return + * {@code cc.replaceWith(this, candidate, reason)} — when the candidate + * happens to be {@code this}, the log stays clean. + * + * @param original the expression being rewritten + * @param replacement the replacement (may be {@code original}) + * @param reason short human-readable cause, e.g. {@code "constant fold"} + * @return {@code replacement} + */ + public Expression replaceWith(final Expression original, final Expression replacement, + final String reason) { + if (replacement != original) { + changed = true; + log.add(String.format("REWRITE %s → %s (%s)", + abbreviate(original), abbreviate(replacement), reason)); + } + return replacement; + } + + /** Free-form info entry for diagnostics. */ + public void info(final String fmt, final Object... args) { + log.add(args.length == 0 ? fmt : String.format(fmt, args)); + } + + /** + * Pre-evaluates an expression with no dependencies and returns a literal + * wrapping the result. Caller is responsible for ensuring + * {@code expr.getDependencies() == Dependency.NO_DEPENDENCY}. + * + * @param expr expression to pre-evaluate + * @return a {@link LiteralValue} wrapping the result, or {@code expr} + * itself if the result is not an {@link AtomicValue} + * @throws XPathException if evaluation fails + */ + public Expression preEval(final Expression expr) throws XPathException { + final Sequence value = expr.eval(null, null); + if (value.hasOne() && value.itemAt(0) instanceof AtomicValue atom) { + final LiteralValue literal = new LiteralValue(qc, atom); + literal.setLocation(expr.getLine(), expr.getColumn()); + return replaceWith(expr, literal, "constant fold"); + } + return expr; + } + + /** True if any {@link #replaceWith} actually swapped expressions. */ + public boolean hasOptimized() { + return changed; + } + + /** Read-only view of the log. */ + public List log() { + return Collections.unmodifiableList(log); + } + + private static String abbreviate(final Expression e) { + if (e == null) { + return ""; + } + final String s = e.toString(); + if (s.length() <= 60) { + return s; + } + return s.substring(0, 57) + "..."; + } + + /* ===== FLWOR chain scope tracking (loop-invariant hoisting) ===== */ + + /** + * Enter a new FLWOR chain scope. Called by a {@link FLWORClause} whose + * {@link FLWORClause#getPreviousClause()} is null (i.e., the head of a + * chain). Subsequent clauses in the same chain share the scope. + * + * Each pushed scope tracks (a) the QNames of variables visible to + * sub-expressions evaluated within it and (b) hoist actions to apply + * when the chain head's {@code optimize()} returns. + */ + public void enterFlworChain() { + flworScopes.push(new FlworChainScope()); + } + + /** True if at least one FLWOR chain scope is active. */ + public boolean inFlworChain() { + return !flworScopes.isEmpty(); + } + + /** Number of nested FLWOR chain scopes currently active. */ + public int flworChainDepth() { + return flworScopes.size(); + } + + /** + * Add a variable to the innermost (current) FLWOR chain scope. Vars are + * routed to one of two buckets based on whether the chain has yet seen a + * {@code for}-clause (recorded via {@link #recordForClause(FLWORClause)}): + *
    + *
  • let-prefix vars (before the first FOR) — once-evaluated; safe + * for a hoisted expression to reference;
  • + *
  • loop-body vars (the FOR's variable, plus everything after) — + * per-iteration; a hoist that references one is unsafe and must be + * refused.
  • + *
+ */ + public void addVisibleFlworVar(final QName name) { + if (name != null && !flworScopes.isEmpty()) { + flworScopes.peek().recordVar(name); + } + } + + /** + * Mark this clause as the chain's first {@code for} (or no-op if a prior + * for was already seen). Determines two things on the current scope: + *
    + *
  1. subsequent {@link #addVisibleFlworVar} entries flow into the + * loop-body bucket;
  2. + *
  3. the FOR clause becomes the hoist insertion point — synthesised + * lets are spliced in just before it.
  4. + *
+ */ + public void recordForClause(final FLWORClause forClause) { + if (forClause != null && !flworScopes.isEmpty()) { + flworScopes.peek().recordForClause(forClause); + } + } + + /** + * Returns the union of LOOP-BODY variables visible across all OUTER FLWOR + * scopes (excluding the current). These are the variables a hoist + * candidate must NOT reference to be safe — they vary per outer iteration. + * Let-prefix variables (bound before any outer FOR) are intentionally + * excluded: they are once-evaluated, so a hoisted expression referencing + * one remains correct after the rewrite. + */ + public Set getOuterLoopBodyVars() { + final Set result = new HashSet<>(); + if (flworScopes.size() <= 1) { + return result; + } + boolean first = true; + for (final FlworChainScope scope : flworScopes) { + if (first) { first = false; continue; } + result.addAll(scope.loopBodyVars); + } + return result; + } + + /** + * Generate a unique QName for a hoisted variable. The local part + * incorporates a per-context counter; the namespace is reserved for the + * optimizer so synthesized names cannot collide with user code. + */ + public QName generateHoistedVarName() { + ++hoistedVarCounter; + return new QName("__hoisted_" + hoistedVarCounter, OPTIMIZER_NS, "__opt"); + } + + /** + * Queue a hoist on the OUTERMOST chain scope that actually has a + * {@code for}-clause to insert before. Pure let-only chains (e.g. an + * outer {@code let $auction := …}) cannot host a hoist — there's no + * loop to lift it out of and no insertion point for + * {@code applyHoistsAndExitChain}. Skip them and let the next inner + * scope (which presumably contains the loop the hoist is meant to + * escape) take it. + */ + public void addPendingHoistToOutermost(final QName varName, final Expression expr) { + if (flworScopes.isEmpty()) { + return; + } + FlworChainScope target = null; + // Iterate head→tail (innermost→outermost). Last scope with a FOR wins. + for (final FlworChainScope scope : flworScopes) { + if (scope.firstForClause != null) { + target = scope; + } + } + if (target == null) { + return; // no FOR anywhere: nothing to lift over. + } + target.pendingHoists.add(new HoistAction(varName, expr)); + changed = true; + } + + /** + * Pop the innermost FLWOR scope and, if it carries pending hoists, splice + * a chain of {@link LetExpr} clauses into the chain just before its + * outermost {@code for}-clause (the {@code firstForClause} recorded by + * {@link #recordForClause(FLWORClause)}). + * + * Two cases: + *
    + *
  1. The first FOR is the chain head itself — wrap {@code chainHead} + * with the new lets and return the new head; the caller's + * {@code returnExpr.optimize(cc)} captures the replacement.
  2. + *
  3. The first FOR is mid-chain (a {@code let}-prefix precedes it) — + * splice the new lets between the prefix's last clause and the FOR + * in place; the chain head stays the same and is returned + * unchanged.
  4. + *
+ * + * If a scope has hoists but no FOR was recorded, the hoists are silently + * dropped (no iteration, no benefit). + */ + public Expression applyHoistsAndExitChain(final Expression chainHead) { + if (flworScopes.isEmpty()) { + return chainHead; + } + final FlworChainScope scope = flworScopes.pop(); + if (scope.pendingHoists.isEmpty() || scope.firstForClause == null) { + return chainHead; + } + + if (scope.firstForClause == chainHead) { + // Wrap the chain head; new head returned to caller. + Expression result = chainHead; + for (final HoistAction h : scope.pendingHoists) { + final LetExpr letExpr = buildHoistLet(h, result); + if (result instanceof FLWORClause flwor) { + flwor.setPreviousClause(letExpr); + } + logHoist(h, "head wrap"); + result = letExpr; + } + return result; + } + + // Mid-chain splice: find predecessor of firstForClause along the + // chain head's returnExpression links. + final FLWORClause predecessor = findPredecessor(chainHead, scope.firstForClause); + if (predecessor == null) { + // Defensive: the chain isn't shaped how we expect — skip rather + // than risk a malformed chain. + return chainHead; + } + + Expression hoistChain = scope.firstForClause; + for (final HoistAction h : scope.pendingHoists) { + final LetExpr letExpr = buildHoistLet(h, hoistChain); + if (hoistChain instanceof FLWORClause flwor) { + flwor.setPreviousClause(letExpr); + } + logHoist(h, "mid-chain splice"); + hoistChain = letExpr; + } + predecessor.setReturnExpression(hoistChain); + if (hoistChain instanceof FLWORClause flwor) { + flwor.setPreviousClause(predecessor); + } + return chainHead; + } + + private LetExpr buildHoistLet(final HoistAction h, final Expression body) { + final LetExpr letExpr = new LetExpr(qc); + letExpr.setVariable(h.varName); + letExpr.setInputSequence(h.expr); + letExpr.setReturnExpression(body); + return letExpr; + } + + private void logHoist(final HoistAction h, final String mode) { + log.add(String.format("HOIST(%s): let $%s := %s", + mode, h.varName.getLocalPart(), abbreviate(h.expr))); + } + + private static FLWORClause findPredecessor(final Expression chainHead, + final FLWORClause target) { + if (!(chainHead instanceof FLWORClause cur)) { + return null; + } + while (cur != null) { + final Expression next = cur.getReturnExpression(); + if (next == target) { + return cur; + } + if (next instanceof FLWORClause nextClause) { + cur = nextClause; + } else { + return null; + } + } + return null; + } + + /** + * State for one active FLWOR chain scope. Tracks bound variables in two + * buckets ({@link #letPrefixVars} vs {@link #loopBodyVars}, separated by + * the first {@code for}-clause encountered) and queued hoists targeting + * this scope's outermost FOR position. + */ + private static final class FlworChainScope { + final Set letPrefixVars = new HashSet<>(); + final Set loopBodyVars = new HashSet<>(); + FLWORClause firstForClause = null; + boolean firstForSeen = false; + final List pendingHoists = new ArrayList<>(); + + void recordVar(final QName name) { + if (firstForSeen) { + loopBodyVars.add(name); + } else { + letPrefixVars.add(name); + } + } + + void recordForClause(final FLWORClause forClause) { + if (!firstForSeen) { + firstForClause = forClause; + firstForSeen = true; + } + } + } + + /** A pending hoist: bind {@code varName} to {@code expr} via a new let. */ + private static final class HoistAction { + final QName varName; + final Expression expr; + + HoistAction(final QName varName, final Expression expr) { + this.varName = varName; + this.expr = expr; + } + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/ConcatExpr.java b/exist-core/src/main/java/org/exist/xquery/ConcatExpr.java index fd19c50e2ef..6e509fa150d 100644 --- a/exist-core/src/main/java/org/exist/xquery/ConcatExpr.java +++ b/exist-core/src/main/java/org/exist/xquery/ConcatExpr.java @@ -21,6 +21,7 @@ */ package org.exist.xquery; +import org.exist.xquery.functions.map.AbstractMapType; import org.exist.xquery.util.Error; import org.exist.xquery.value.*; @@ -65,8 +66,17 @@ public Sequence eval(Sequence contextSequence, Item contextItem) final Sequence seq = step.eval(contextSequence, contextItem); for (final SequenceIterator i = seq.iterate(); i.hasNext(); ) { final Item item = i.nextItem(); - if (Type.subTypeOf(item.getType(), Type.FUNCTION)) - {throw new XPathException(this, ErrorCodes.FOTY0013, "Got a function item as operand in string concatenation");} + if (Type.subTypeOf(item.getType(), Type.FUNCTION)) { + // XQ4: maps are no longer function items for atomization purposes + if (item instanceof AbstractMapType && ((AbstractMapType) item).isXq4Atomizable()) { + final Sequence atomized = ((AbstractMapType) item).atomizeValues(); + for (final SequenceIterator ai = atomized.iterate(); ai.hasNext(); ) { + concat.append(ai.nextItem().getStringValue()); + } + continue; + } + throw new XPathException(this, ErrorCodes.FOTY0013, "Got a function item as operand in string concatenation"); + } concat.append(item.getStringValue()); } } diff --git a/exist-core/src/main/java/org/exist/xquery/ConditionalExpression.java b/exist-core/src/main/java/org/exist/xquery/ConditionalExpression.java index 5f910a43603..455235dadf6 100644 --- a/exist-core/src/main/java/org/exist/xquery/ConditionalExpression.java +++ b/exist-core/src/main/java/org/exist/xquery/ConditionalExpression.java @@ -70,6 +70,34 @@ public Cardinality getCardinality() { return Cardinality.superCardinalityOf(thenExpr.getCardinality(), elseExpr.getCardinality()); } + @Override + public boolean isUpdating() { + return thenExpr.isUpdating() || elseExpr.isUpdating(); + } + + @Override + public Expression optimize(final CompileContext cc) throws XPathException { + testExpr = testExpr.optimize(cc); + thenExpr = thenExpr.optimize(cc); + elseExpr = elseExpr.optimize(cc); + + // Constant condition: replace with the chosen branch. + // Restricted to LiteralValue with NO_DEPENDENCY so we don't accidentally + // pre-evaluate something context-sensitive. + if (testExpr instanceof LiteralValue + && testExpr.getDependencies() == Dependency.NO_DEPENDENCY) { + try { + final boolean ebv = ((LiteralValue) testExpr).getValue().effectiveBooleanValue(); + final Expression chosen = ebv ? thenExpr : elseExpr; + return cc.replaceWith(this, chosen, + "constant " + (ebv ? "true" : "false") + " condition"); + } catch (final XPathException e) { + // Fall through and keep the if-then-else unchanged. + } + } + return this; + } + /* (non-Javadoc) * @see org.exist.xquery.Expression#analyze(org.exist.xquery.Expression) */ @@ -77,12 +105,29 @@ public void analyze(AnalyzeContextInfo contextInfo) throws XPathException { AnalyzeContextInfo myContextInfo = new AnalyzeContextInfo(contextInfo); myContextInfo.setFlags(myContextInfo.getFlags() & (~IN_PREDICATE)); myContextInfo.setParent(this); - testExpr.analyze(myContextInfo); + // Test expression is always a non-updating context + final AnalyzeContextInfo testInfo = new AnalyzeContextInfo(myContextInfo); + testInfo.addFlag(NON_UPDATING_CONTEXT); + testExpr.analyze(testInfo); // parent may have been modified by testExpr: set it again myContextInfo.setParent(this); thenExpr.analyze(myContextInfo); myContextInfo.setParent(this); elseExpr.analyze(myContextInfo); + + // XUST0001: if one branch is updating and the other is non-updating (and not vacuous) + final boolean thenUpdating = thenExpr.isUpdating(); + final boolean elseUpdating = elseExpr.isUpdating(); + if (thenUpdating != elseUpdating) { + if (thenUpdating && !elseExpr.isVacuous()) { + throw new XPathException(this, ErrorCodes.XUST0001, + "then branch is updating but else branch is not updating and not vacuous"); + } + if (elseUpdating && !thenExpr.isVacuous()) { + throw new XPathException(this, ErrorCodes.XUST0001, + "else branch is updating but then branch is not updating and not vacuous"); + } + } } /* (non-Javadoc) diff --git a/exist-core/src/main/java/org/exist/xquery/Constants.java b/exist-core/src/main/java/org/exist/xquery/Constants.java index 7a5069d7416..515d1990371 100644 --- a/exist-core/src/main/java/org/exist/xquery/Constants.java +++ b/exist-core/src/main/java/org/exist/xquery/Constants.java @@ -46,7 +46,11 @@ public interface Constants { "following-sibling", "namespace", "self", - "attribute-descendant" + "attribute-descendant", + "following-or-self", + "preceding-or-self", + "following-sibling-or-self", + "preceding-sibling-or-self" }; /** @@ -73,6 +77,12 @@ public interface Constants { //combines /descendant-or-self::node()/attribute:* int DESCENDANT_ATTRIBUTE_AXIS = 13; + /** XQuery 4.0 axes */ + int FOLLOWING_OR_SELF_AXIS = 14; + int PRECEDING_OR_SELF_AXIS = 15; + int FOLLOWING_SIBLING_OR_SELF_AXIS = 16; + int PRECEDING_SIBLING_OR_SELF_AXIS = 17; + /** * Node types */ @@ -140,8 +150,11 @@ enum ArithmeticOperator { */ enum NodeComparisonOperator { IS("is"), + IS_NOT("is-not"), BEFORE("<<"), - AFTER(">>"); + AFTER(">>"), + FOLLOWS_OR_IS("follows-or-is"), + PRECEDES_OR_IS("precedes-or-is"); public final String symbol; diff --git a/exist-core/src/main/java/org/exist/xquery/DebuggableExpression.java b/exist-core/src/main/java/org/exist/xquery/DebuggableExpression.java index 96ca504b481..0f455e049d3 100644 --- a/exist-core/src/main/java/org/exist/xquery/DebuggableExpression.java +++ b/exist-core/src/main/java/org/exist/xquery/DebuggableExpression.java @@ -52,6 +52,22 @@ public void analyze(AnalyzeContextInfo contextInfo) throws XPathException { expression.analyze(contextInfo); } + /** + * Recurse the per-expression optimize() pass into the wrapped expression + * and capture any replacement. Without this, the inherited default no-op + * blocks the optimize pass at every debugger wrapper — including the one + * the FLWOR parser inserts around every {@code return} body. That would + * silently disable hoisting, constant folding, and every other rewrite + * inside a FLWOR's return. + */ + @Override + public Expression optimize(final CompileContext cc) throws XPathException { + if (expression != null) { + expression = expression.optimize(cc); + } + return this; + } + public Sequence eval(Sequence contextSequence, Item contextItem) throws XPathException { try { expression.getContext().expressionStart(expression); @@ -90,6 +106,11 @@ public boolean needsReset() { return true; } + @Override + public boolean isUpdating() { + return expression.isUpdating(); + } + public void accept(ExpressionVisitor visitor) { expression.accept(visitor); } diff --git a/exist-core/src/main/java/org/exist/xquery/DecimalFormat.java b/exist-core/src/main/java/org/exist/xquery/DecimalFormat.java index 46a54962ad5..2498539e0f0 100644 --- a/exist-core/src/main/java/org/exist/xquery/DecimalFormat.java +++ b/exist-core/src/main/java/org/exist/xquery/DecimalFormat.java @@ -24,7 +24,7 @@ /** * Data class for a Decimal Format. * - * See https://www.w3.org/TR/xpath-31/#dt-static-decimal-formats + * See https://www.w3.org/TR/xquery-31/#id-decimal-format-decl * * NOTE: UTF-16 characters are stored as code-points! * @@ -32,6 +32,11 @@ */ public class DecimalFormat { + /** + * The default (unnamed) decimal format as defined by the XQuery 3.1 specification. + * + * @see XQuery 3.1 §4.10: Decimal Format Declaration + */ public static final DecimalFormat UNNAMED = new DecimalFormat( '.', 'e', @@ -47,7 +52,7 @@ public class DecimalFormat { ); - // used both in the picture string, and in the formatted number + // Markers: used in the picture string to identify active elements public final int decimalSeparator; public final int exponentSeparator; public final int groupingSeparator; @@ -55,18 +60,38 @@ public class DecimalFormat { public final int perMille; public final int zeroDigit; - // used in the picture string + // used in the picture string only public final int digit; public final int patternSeparator; - //used in the result of formatting the number, but not in the picture string + // used in the result of formatting the number, but not in the picture string public final String infinity; public final String NaN; public final int minusSign; + // XQ4 renditions: output strings for properties that support char:rendition. + // When marker != rendition, the marker is used for picture parsing and the + // rendition string appears in the formatted output. + public final String decimalSeparatorRendition; + public final String exponentSeparatorRendition; + public final String groupingSeparatorRendition; + public final String percentRendition; + public final String perMilleRendition; + public DecimalFormat(final int decimalSeparator, final int exponentSeparator, final int groupingSeparator, final int percent, final int perMille, final int zeroDigit, final int digit, final int patternSeparator, final String infinity, final String NaN, final int minusSign) { + this(decimalSeparator, exponentSeparator, groupingSeparator, percent, perMille, + zeroDigit, digit, patternSeparator, infinity, NaN, minusSign, + null, null, null, null, null); + } + + public DecimalFormat(final int decimalSeparator, final int exponentSeparator, final int groupingSeparator, + final int percent, final int perMille, final int zeroDigit, final int digit, + final int patternSeparator, final String infinity, final String NaN, final int minusSign, + final String decimalSeparatorRendition, final String exponentSeparatorRendition, + final String groupingSeparatorRendition, final String percentRendition, + final String perMilleRendition) { this.decimalSeparator = decimalSeparator; this.exponentSeparator = exponentSeparator; this.groupingSeparator = groupingSeparator; @@ -78,5 +103,11 @@ public DecimalFormat(final int decimalSeparator, final int exponentSeparator, fi this.infinity = infinity; this.NaN = NaN; this.minusSign = minusSign; + // Renditions default to the marker character as a string + this.decimalSeparatorRendition = decimalSeparatorRendition != null ? decimalSeparatorRendition : new String(Character.toChars(decimalSeparator)); + this.exponentSeparatorRendition = exponentSeparatorRendition != null ? exponentSeparatorRendition : new String(Character.toChars(exponentSeparator)); + this.groupingSeparatorRendition = groupingSeparatorRendition != null ? groupingSeparatorRendition : new String(Character.toChars(groupingSeparator)); + this.percentRendition = percentRendition != null ? percentRendition : new String(Character.toChars(percent)); + this.perMilleRendition = perMilleRendition != null ? perMilleRendition : new String(Character.toChars(perMille)); } } diff --git a/exist-core/src/main/java/org/exist/xquery/DocumentConstructor.java b/exist-core/src/main/java/org/exist/xquery/DocumentConstructor.java index 3495fed460f..a67eaee3544 100644 --- a/exist-core/src/main/java/org/exist/xquery/DocumentConstructor.java +++ b/exist-core/src/main/java/org/exist/xquery/DocumentConstructor.java @@ -90,7 +90,7 @@ public Sequence eval(Sequence contextSequence, Item contextItem) throws XPathExc while(next != null) { context.proceed(this, builder); if (next.getType() == Type.ATTRIBUTE || next.getType() == Type.NAMESPACE) - {throw new XPathException(this, "Found a node of type " + + {throw new XPathException(this, ErrorCodes.XPTY0004, "Found a node of type " + Type.getTypeName(next.getType()) + " inside a document constructor");} // if item is an atomic value, collect the string values of all // following atomic values and seperate them by a space. diff --git a/exist-core/src/main/java/org/exist/xquery/DynamicAttributeConstructor.java b/exist-core/src/main/java/org/exist/xquery/DynamicAttributeConstructor.java index 168c2da95a6..215a6e601ed 100644 --- a/exist-core/src/main/java/org/exist/xquery/DynamicAttributeConstructor.java +++ b/exist-core/src/main/java/org/exist/xquery/DynamicAttributeConstructor.java @@ -30,6 +30,8 @@ import org.exist.xquery.value.*; import org.w3c.dom.DOMException; +import javax.xml.XMLConstants; + /** * Represents a dynamic attribute constructor. The implementation differs from * AttributeConstructor as the evaluation is not controlled by the surrounding @@ -99,25 +101,53 @@ public Sequence eval(Sequence contextSequence, Item contextItem) throws XPathExc final Sequence nameSeq = qnameExpr.eval(contextSequence, contextItem); if(!nameSeq.hasOne()) - {throw new XPathException(this, "The name expression should evaluate to a single value");} + {throw new XPathException(this, ErrorCodes.XPTY0004, "The name expression should evaluate to a single value");} final Item qnItem = nameSeq.itemAt(0); QName qn; if (qnItem.getType() == Type.QNAME) {qn = ((QNameValue) qnItem).getQName();} - else + else { + // Only xs:string and xs:untypedAtomic can be used as computed attribute names + final int itemType = qnItem.getType(); + if (!Type.subTypeOf(itemType, Type.STRING) && itemType != Type.UNTYPED_ATOMIC) { + throw new XPathException(this, ErrorCodes.XPTY0004, + "The name expression must be of type xs:QName, xs:string, or xs:untypedAtomic, got " + Type.getTypeName(itemType)); + } try { qn = QName.parse(context, nameSeq.getStringValue(), null); } catch (final QName.IllegalQNameException e) { - throw new XPathException(this, ErrorCodes.XPTY0004, "'" + nameSeq.getStringValue() + "' is not a valid attribute name"); + throw new XPathException(this, ErrorCodes.XQDY0074, "'" + nameSeq.getStringValue() + "' is not a valid attribute name"); + } catch (final XPathException e) { + // QName.parse throws XPST0081 for undeclared prefixes, but in a computed + // constructor the name is evaluated dynamically, so this is XQDY0074 + if (e.getErrorCode() == ErrorCodes.XPST0081) { + throw new XPathException(this, ErrorCodes.XQDY0074, "'" + nameSeq.getStringValue() + "' is not a valid attribute name"); + } + throw e; } + } //Not in the specs but... makes sense if(!XMLNames.isName(qn.getLocalPart())) - {throw new XPathException(this, ErrorCodes.XPTY0004, "'" + qn.getLocalPart() + "' is not a valid attribute name");} + {throw new XPathException(this, ErrorCodes.XQDY0074, "'" + qn.getLocalPart() + "' is not a valid attribute name");} if ("xmlns".equals(qn.getLocalPart()) && qn.getNamespaceURI().isEmpty()) - {throw new XPathException(this, ErrorCodes.XQDY0044, "'" + qn.getLocalPart() + "' is not a valid attribute name");} + {throw new XPathException(this, ErrorCodes.XQDY0044, + "The node-name property of the node constructed by a computed attribute constructor " + + "is in no namespace and has local name xmlns.");} + + if (Namespaces.XMLNS_NS.equals(qn.getNamespaceURI())) + {throw new XPathException(this, ErrorCodes.XQDY0044, + "The node-name property of the node constructed by a computed attribute constructor " + + "is in the namespace " + Namespaces.XMLNS_NS + + " (corresponding to namespace prefix xmlns).");} + + // Auto-assign the 'xml' prefix for attributes in the XML namespace + if (Namespaces.XML_NS.equals(qn.getNamespaceURI()) + && (qn.getPrefix() == null || qn.getPrefix().isEmpty())) { + qn = new QName(qn.getLocalPart(), qn.getNamespaceURI(), XMLConstants.XML_NS_PREFIX); + } String value; final Sequence valueSeq = valueExpr.eval(contextSequence, contextItem); @@ -141,8 +171,13 @@ public Sequence eval(Sequence contextSequence, Item contextItem) throws XPathExc final int nodeNr = builder.addAttribute(qn, value); node = builder.getDocument().getAttribute(nodeNr); } catch (final DOMException e) { + final String msg = e.getMessage(); + if (msg != null && msg.contains("XQTY0024")) { + throw new XPathException(this, ErrorCodes.XQTY0024, + "An attribute node cannot follow a node that is not an element or namespace node."); + } throw new XPathException(this, ErrorCodes.XQDY0025, "element has more than one attribute '" + qn + "'"); - } + } } finally { if (newDocumentContext) {context.popDocumentContext();} diff --git a/exist-core/src/main/java/org/exist/xquery/DynamicCardinalityCheck.java b/exist-core/src/main/java/org/exist/xquery/DynamicCardinalityCheck.java index 5accad4503e..436aeb7d57d 100644 --- a/exist-core/src/main/java/org/exist/xquery/DynamicCardinalityCheck.java +++ b/exist-core/src/main/java/org/exist/xquery/DynamicCardinalityCheck.java @@ -47,6 +47,10 @@ public DynamicCardinalityCheck(final XQueryContext context, final Cardinality re setLocation(expression.getLine(), expression.getColumn()); } + public Expression getExpression() { + return expression; + } + /* (non-Javadoc) * @see org.exist.xquery.Expression#analyze(org.exist.xquery.Expression) */ @@ -82,7 +86,14 @@ else if (seq.hasMany()) error.addArgs(ExpressionDumper.dump(expression), requiredCardinality.getHumanDescription(), seq.getItemCount()); - throw new XPathException(this, error.toString()); + final String errCode = error.getErrorCode(); + final ErrorCodes.ErrorCode xpathErrCode; + if ("XPDY0050".equals(errCode)) { + xpathErrCode = ErrorCodes.XPDY0050; + } else { + xpathErrCode = ErrorCodes.XPTY0004; + } + throw new XPathException(this, xpathErrCode, error.toString()); } if (context.getProfiler().isEnabled()) {context.getProfiler().end(this, "", seq);} diff --git a/exist-core/src/main/java/org/exist/xquery/DynamicTypeCheck.java b/exist-core/src/main/java/org/exist/xquery/DynamicTypeCheck.java index 1f32cbca2a8..5c3b8f04083 100644 --- a/exist-core/src/main/java/org/exist/xquery/DynamicTypeCheck.java +++ b/exist-core/src/main/java/org/exist/xquery/DynamicTypeCheck.java @@ -35,11 +35,17 @@ public class DynamicTypeCheck extends AbstractExpression { final private Expression expression; final private int requiredType; - + final private ErrorCodes.ErrorCode errorCode; + public DynamicTypeCheck(XQueryContext context, int requiredType, Expression expr) { + this(context, requiredType, expr, null); + } + + public DynamicTypeCheck(XQueryContext context, int requiredType, Expression expr, ErrorCodes.ErrorCode errorCode) { super(context); this.requiredType = requiredType; this.expression = expr; + this.errorCode = errorCode; } /* (non-Javadoc) @@ -73,6 +79,10 @@ else if (!seq.isEmpty()) { return result == null ? seq : result; } + private ErrorCodes.ErrorCode effectiveErrorCode() { + return errorCode != null ? errorCode : ErrorCodes.XPTY0004; + } + private void check(Sequence result, Item item) throws XPathException { int type = item.getType(); if (type == Type.NODE && @@ -82,14 +92,34 @@ private void check(Sequence result, Item item) throws XPathException { //Retrieve the actual node {type= ((NodeProxy) item).getNode().getNodeType();} } + // Record types: maps can satisfy record types structurally + if (requiredType == Type.RECORD && Type.subTypeOf(type, Type.MAP_ITEM)) { + // Let SequenceType.checkRecordType() handle structural validation + if (result != null) { result.add(item); } + return; + } + // XQuery 4.0 JNode types: JNode items satisfy json-node() and its subtypes + if (Type.subTypeOf(requiredType, Type.JSON_NODE) && item instanceof org.exist.xquery.value.jnode.JNode) { + final int jnodeType = item.getType(); + if (jnodeType == requiredType || Type.subTypeOf(jnodeType, requiredType)) { + if (result != null) { result.add(item); } + return; + } + } if(type != requiredType && !Type.subTypeOf(type, requiredType)) { //TODO : how to make this block more generic ? -pb if (type == Type.UNTYPED_ATOMIC) { + // XPTY0117: untypedAtomic cannot be coerced to namespace-sensitive types + if (requiredType == Type.QNAME || requiredType == Type.NOTATION) { + throw new XPathException(expression, ErrorCodes.XPTY0117, + "Cannot coerce xs:untypedAtomic to namespace-sensitive type " + + Type.getTypeName(requiredType)); + } try { item = item.convertTo(requiredType); //No way } catch (final XPathException e) { - throw new XPathException(expression, ErrorCodes.FOCH0002, "Required type is " + + throw new XPathException(expression, effectiveErrorCode(), "Required type is " + Type.getTypeName(requiredType) + " but got '" + Type.getTypeName(item.getType()) + "(" + item.getStringValue() + ")'"); } @@ -103,32 +133,29 @@ private void check(Sequence result, Item item) throws XPathException { item = item.convertTo(requiredType); //No way } catch (final XPathException e) { - throw new XPathException(expression, ErrorCodes.FOCH0002, "Required type is " + + throw new XPathException(expression, effectiveErrorCode(), "Required type is " + Type.getTypeName(requiredType) + " but got '" + Type.getTypeName(item.getType()) + "(" + item.getStringValue() + ")'"); } //Then, if duration, try to refine the type //No test on the type hierarchy ; this has to pass : //fn:months-from-duration(xs:duration("P1Y2M3DT10H30M")) - //TODO : find a way to enforce the test (by making a difference between casting and treating as ?) - } else if (Type.subTypeOf(requiredType, Type.DURATION) /*&& Type.subTypeOf(type, requiredType)*/) { + } else if (Type.subTypeOf(requiredType, Type.DURATION) && Type.subTypeOf(type, requiredType)) { try { item = item.convertTo(requiredType); //No way } catch (final XPathException e) { - throw new XPathException(expression, ErrorCodes.FOCH0002, "Required type is " + + throw new XPathException(expression, effectiveErrorCode(), "Required type is " + Type.getTypeName(requiredType) + " but got '" + Type.getTypeName(item.getType()) + "(" + item.getStringValue() + ")'"); } //Then, if date, try to refine the type - //No test on the type hierarchy - //TODO : find a way to enforce the test (by making a difference between casting and treating as ?) - } else if (Type.subTypeOf(requiredType, Type.DATE) /*&& Type.subTypeOf(type, requiredType)*/) { + } else if (Type.subTypeOf(requiredType, Type.DATE) && Type.subTypeOf(type, requiredType)) { try { item = item.convertTo(requiredType); //No way } catch (final XPathException e) { - throw new XPathException(expression, ErrorCodes.FOCH0002, "Required type is " + + throw new XPathException(expression, effectiveErrorCode(), "Required type is " + Type.getTypeName(requiredType) + " but got '" + Type.getTypeName(item.getType()) + "(" + item.getStringValue() + ")'"); } @@ -139,14 +166,17 @@ private void check(Sequence result, Item item) throws XPathException { } else if (type == Type.ANY_URI && requiredType == Type.STRING) { item = item.convertTo(Type.STRING); type = Type.STRING; + // XQuery 4.0 implicit casting (spec §3.4.1 item 4) + } else if (context.getXQueryVersion() >= 40) { + item = xq4ImplicitCast(item, type, requiredType); } else { if (!(Type.subTypeOf(type, requiredType))) { - throw new XPathException(expression, ErrorCodes.XPTY0004, + throw new XPathException(expression, effectiveErrorCode(), Type.getTypeName(item.getType()) + "(" + item.getStringValue() + ") is not a sub-type of " + Type.getTypeName(requiredType)); } else - {throw new XPathException(expression, ErrorCodes.FOCH0002, "Required type is " + + {throw new XPathException(expression, effectiveErrorCode(), "Required type is " + Type.getTypeName(requiredType) + " but got '" + Type.getTypeName(item.getType()) + "(" + item.getStringValue() + ")'");} } @@ -155,6 +185,104 @@ private void check(Sequence result, Item item) throws XPathException { {result.add(item);} } + /** + * XQuery 4.0 coercion rules per spec §3.4.1. + * Handles implicit casting (item 4) and relabeling (item 6). + */ + private Item xq4ImplicitCast(Item item, final int type, final int requiredType) throws XPathException { + // Item 4: Implicit Casting table + if (isXQ4ImplicitCast(type, requiredType)) { + try { + return item.convertTo(requiredType); + } catch (final XPathException e) { + throw new XPathException(expression, ErrorCodes.XPTY0004, + "Cannot coerce " + Type.getTypeName(type) + "(" + + item.getStringValue() + ") to " + Type.getTypeName(requiredType)); + } + } + // Item 6: Relabeling — if R is derived from primitive P, and J is an instance of P, + // relabel J as R if J's datum is within the value space of R. + if (isXQ4Relabeling(type, requiredType)) { + try { + return item.convertTo(requiredType); + } catch (final XPathException e) { + throw new XPathException(expression, ErrorCodes.XPTY0004, + "Cannot relabel " + Type.getTypeName(type) + "(" + + item.getStringValue() + ") as " + Type.getTypeName(requiredType) + + ": value is not in the value space of the target type"); + } + } + // Fall through to the standard type error + if (!(Type.subTypeOf(type, requiredType))) { + throw new XPathException(expression, ErrorCodes.XPTY0004, + Type.getTypeName(item.getType()) + "(" + item.getStringValue() + + ") is not a sub-type of " + Type.getTypeName(requiredType)); + } else { + throw new XPathException(expression, ErrorCodes.FOCH0002, "Required type is " + + Type.getTypeName(requiredType) + " but got '" + Type.getTypeName(item.getType()) + "(" + + item.getStringValue() + ")'"); + } + } + + /** + * Check if an implicit cast is allowed from a source type to a target type + * under XQuery 4.0 coercion rules (spec §3.4.1 item 4, Implicit Casting table). + * + * The "from" column matches if J is an instance of "from" (including subtypes). + * The "to" column must match R exactly (the required type must be the primitive type). + */ + static boolean isXQ4ImplicitCast(final int sourceType, final int requiredType) { + // xs:string → xs:anyURI + if (Type.subTypeOf(sourceType, Type.STRING) && requiredType == Type.ANY_URI) { + return true; + } + // xs:hexBinary ↔ xs:base64Binary + if (Type.subTypeOf(sourceType, Type.HEX_BINARY) && requiredType == Type.BASE64_BINARY) { + return true; + } + if (Type.subTypeOf(sourceType, Type.BASE64_BINARY) && requiredType == Type.HEX_BINARY) { + return true; + } + // Bidirectional numeric: xs:double → xs:decimal, xs:float → xs:decimal + // (Note: decimal→float, decimal→double, float→double already handled by XQ 3.1 rules) + if (Type.subTypeOf(sourceType, Type.DOUBLE) && requiredType == Type.DECIMAL) { + return true; + } + if (Type.subTypeOf(sourceType, Type.FLOAT) && requiredType == Type.DECIMAL) { + return true; + } + // XQ4 also allows any numeric → any other numeric + // "any numeric type to be implicitly converted to any other" + if (Type.subTypeOfUnion(sourceType, Type.NUMERIC) && Type.subTypeOfUnion(requiredType, Type.NUMERIC)) { + return true; + } + return false; + } + + /** + * Check if relabeling is allowed under XQuery 4.0 coercion rules (spec §3.4.1 item 6). + * Relabeling applies when R is derived from a primitive type P, and J is an instance of P + * (but not already an instance of R). The actual value check (whether the datum is in + * the value space of R) is deferred to convertTo(). + */ + static boolean isXQ4Relabeling(final int sourceType, final int requiredType) { + // Only applies to atomic types + if (!Type.subTypeOf(sourceType, Type.ANY_ATOMIC_TYPE) || !Type.subTypeOf(requiredType, Type.ANY_ATOMIC_TYPE)) { + return false; + } + try { + final int requiredPrimitive = Type.primitiveTypeOf(requiredType); + // Relabeling only applies when R is a derived type (not a primitive itself) + if (requiredPrimitive == requiredType) { + return false; + } + // J must be an instance of the same primitive type P + return Type.subTypeOf(sourceType, requiredPrimitive); + } catch (final IllegalArgumentException e) { + return false; + } + } + /* (non-Javadoc) * @see org.exist.xquery.Expression#dump(org.exist.xquery.util.ExpressionDumper) */ diff --git a/exist-core/src/main/java/org/exist/xquery/ElementConstructor.java b/exist-core/src/main/java/org/exist/xquery/ElementConstructor.java index 20b94537797..03943cfebca 100644 --- a/exist-core/src/main/java/org/exist/xquery/ElementConstructor.java +++ b/exist-core/src/main/java/org/exist/xquery/ElementConstructor.java @@ -124,9 +124,9 @@ public void addNamespaceDecl(final String name, final String uri) throws XPathEx throw new XPathException(this, ErrorCodes.XQST0070, "'" + Namespaces.XMLNS_NS + "' can bind only to '" + XMLConstants.XMLNS_ATTRIBUTE + "' prefix"); } - if (name != null && (!name.isEmpty()) && uri.trim().isEmpty()) { - throw new XPathException(this, ErrorCodes.XQST0085, "cannot undeclare a prefix " + name + "."); - } + // XQST0085: namespace undeclaration (xmlns:prefix="") is allowed when the + // implementation supports XML Names 1.1. Since eXist supports XML 1.1 + // serialization (version="1.1"), this is no longer an error. addNamespaceDecl(qn); } @@ -168,6 +168,7 @@ public void analyze(final AnalyzeContextInfo contextInfo) throws XPathException final AnalyzeContextInfo newContextInfo = new AnalyzeContextInfo(contextInfo); newContextInfo.setParent(this); newContextInfo.addFlag(IN_NODE_CONSTRUCTOR); + newContextInfo.addFlag(NON_UPDATING_CONTEXT); qnameExpr.analyze(newContextInfo); if(attributes != null) { for (AttributeConstructor attribute : attributes) { @@ -179,6 +180,27 @@ public void analyze(final AnalyzeContextInfo contextInfo) throws XPathException } context.popInScopeNamespaces(); } + + /** + * Recurse the per-expression optimize() pass into our content (and the + * dynamic name expression and dynamic-attribute constructors). Without + * this, the default no-op inherited from {@link Expression} would skip + * any FLWOR or other rewriteable structure inside an element constructor + * — which is the entire body of a typical XQuery query that wraps + * results in a synthetic element (e.g. XMark queries). + */ + @Override + public Expression optimize(final CompileContext cc) throws XPathException { + if (qnameExpr != null) { + qnameExpr = qnameExpr.optimize(cc); + } + // content is a PathExpr that mutates its own steps[] in place and + // returns this; safe to ignore the return. + if (content != null) { + content.optimize(cc); + } + return this; + } @Override public Sequence eval(final Sequence contextSequence, final Item contextItem) throws XPathException { @@ -226,7 +248,7 @@ public Sequence eval(final Sequence contextSequence, final Item contextItem) thr try { attrQName = QName.parse(context, constructor.getQName(), XMLConstants.NULL_NS_URI); } catch (final QName.IllegalQNameException e) { - throw new XPathException(this, ErrorCodes.XPTY0004, "'" + constructor.getQName() + "' is not a valid attribute name"); + throw new XPathException(this, ErrorCodes.XQDY0074, "'" + constructor.getQName() + "' is not a valid attribute name"); } final String namespaceURI = attrQName.getNamespaceURI(); @@ -275,30 +297,44 @@ public Sequence eval(final Sequence contextSequence, final Item contextItem) thr if (qnitem instanceof QNameValue) { qn = ((QNameValue) qnitem).getQName(); } else { - //Do we have the same result than Atomize there ? -pb - try { - qn = QName.parse(context, qnitem.getStringValue()); - } catch (final QName.IllegalQNameException e) { - throw new XPathException(this, ErrorCodes.XPTY0004, "'" + qnitem.getStringValue() + "' is not a valid element name"); - } catch (final XPathException e) { - e.setLocation(getLine(), getColumn(), getSource()); - throw e; + // Element constructors must resolve namespace prefixes using the full + // inherited namespace context, regardless of declare copy-namespaces no-inherit. + // The no-inherit option governs how namespaces propagate from copied source + // nodes, not how constructor names are resolved (XQuery 3.1 §3.9.3.4). + final boolean savedInherit = context.inheritNamespaces(); + if (!savedInherit) { + context.setInheritNamespaces(true); } + try { + //Do we have the same result than Atomize there ? -pb + try { + qn = QName.parse(context, qnitem.getStringValue()); + } catch (final QName.IllegalQNameException e) { + throw new XPathException(this, ErrorCodes.XQDY0074, "'" + qnitem.getStringValue() + "' is not a valid element name"); + } catch (final XPathException e) { + e.setLocation(getLine(), getColumn(), getSource()); + throw e; + } - //Use the default namespace if specified - /* - if (qn.getPrefix() == null && context.inScopeNamespaces.get("xmlns") != null) { - qn.setNamespaceURI((String)context.inScopeNamespaces.get("xmlns")); - } - */ - if (qn.getPrefix() == null && context.getInScopeNamespace(XMLConstants.DEFAULT_NS_PREFIX) != null) { - qn = new QName(qn.getLocalPart(), context.getInScopeNamespace(XMLConstants.DEFAULT_NS_PREFIX), qn.getPrefix()); + //Use the default namespace if specified + /* + if (qn.getPrefix() == null && context.inScopeNamespaces.get("xmlns") != null) { + qn.setNamespaceURI((String)context.inScopeNamespaces.get("xmlns")); + } + */ + if (qn.getPrefix() == null && context.getInScopeNamespace(XMLConstants.DEFAULT_NS_PREFIX) != null) { + qn = new QName(qn.getLocalPart(), context.getInScopeNamespace(XMLConstants.DEFAULT_NS_PREFIX), qn.getPrefix()); + } + } finally { + if (!savedInherit) { + context.setInheritNamespaces(false); + } } } //Not in the specs but... makes sense if (!XMLNames.isName(qn.getLocalPart())) { - throw new XPathException(this, ErrorCodes.XPTY0004, "'" + qnitem.getStringValue() + "' is not a valid element name"); + throw new XPathException(this, ErrorCodes.XQDY0074, "'" + qnitem.getStringValue() + "' is not a valid element name"); } // add namespace declaration nodes diff --git a/exist-core/src/main/java/org/exist/xquery/EnclosedExpr.java b/exist-core/src/main/java/org/exist/xquery/EnclosedExpr.java index b11a10e06f4..8aeff5011dc 100644 --- a/exist-core/src/main/java/org/exist/xquery/EnclosedExpr.java +++ b/exist-core/src/main/java/org/exist/xquery/EnclosedExpr.java @@ -21,20 +21,30 @@ */ package org.exist.xquery; +import org.exist.dom.QName; import org.exist.dom.memtree.DocumentBuilderReceiver; +import org.exist.dom.memtree.DocumentImpl; import org.exist.dom.memtree.MemTreeBuilder; +import org.exist.dom.memtree.NodeImpl; import org.exist.dom.memtree.TextImpl; +import org.exist.util.serializer.AttrList; import org.exist.xquery.functions.array.ArrayType; import org.exist.xquery.util.ExpressionDumper; import org.exist.xquery.value.*; import org.w3c.dom.DOMException; import org.xml.sax.SAXException; +import javax.xml.XMLConstants; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; + /** * Represents an enclosed expression {expr} inside element * content. Enclosed expressions within attribute values are processed by * {@link org.exist.xquery.AttributeConstructor}. - * + * * @author Wolfgang Meier */ public class EnclosedExpr extends PathExpr { @@ -42,7 +52,7 @@ public class EnclosedExpr extends PathExpr { public EnclosedExpr(XQueryContext context) { super(context); } - + public void analyze(AnalyzeContextInfo contextInfo) throws XPathException { final AnalyzeContextInfo newContextInfo = new AnalyzeContextInfo(contextInfo); newContextInfo.removeFlag(IN_NODE_CONSTRUCTOR); @@ -74,14 +84,41 @@ public Sequence eval(Sequence contextSequence, Item contextItem) throws XPathExc Sequence result; context.enterEnclosedExpr(); try { + // Check copy-namespaces mode before evaluation so we can lazily capture + // innerBuilder only when no-inherit is active. In the default (inherit) case + // this avoids the peekDocumentBuilder() call entirely. + final boolean noInherit = !context.inheritNamespaces(); + context.pushDocumentContext(); + MemTreeBuilder innerBuilder = null; try { result = super.eval(contextSequence, null); + // Only capture the inner builder when no-inherit is active — it is used + // solely to distinguish pre-existing nodes (variable references / copies) + // from nodes constructed inside this enclosed expression. + if (noInherit) { + innerBuilder = context.getCurrentDocumentBuilder(); + } } finally { context.popDocumentContext(); } + // Compute ancestor namespace context for no-inherit copy handling. + // This is the union of inherited namespaces (from outer constructors) and + // in-scope namespaces (from the immediately enclosing constructor), i.e. all + // namespace bindings that an ancestor traversal from this element would find. + Map ancestorNS = null; + if (noInherit) { + final Map inherited = context.getAllInheritedNamespaces(); + final Map inScope = context.getInScopeNamespaces(); + if ((inherited != null && !inherited.isEmpty()) || (inScope != null && !inScope.isEmpty())) { + ancestorNS = new HashMap<>(); + if (inherited != null) { ancestorNS.putAll(inherited); } + if (inScope != null) { ancestorNS.putAll(inScope); } + } + } + // create the output final MemTreeBuilder builder = context.getDocumentBuilder(); final DocumentBuilderReceiver receiver = new DocumentBuilderReceiver(this, builder); @@ -130,11 +167,36 @@ public Sequence eval(Sequence contextSequence, Item contextItem) throws XPathExc } try { receiver.setCheckNS(false); - next.copyTo(context.getBroker(), receiver); + // When copy-namespaces no-inherit is active, pre-existing element nodes + // (i.e. nodes from variable references, not direct constructors) must have + // namespace undeclarations injected so that ancestor-traversal in + // in-scope-prefixes() is neutralized for inherited namespace bindings. + if (noInherit && ancestorNS != null + && next.getType() == Type.ELEMENT + && next instanceof NodeImpl) { + final NodeImpl nodeImpl = (NodeImpl) next; + final boolean isPreExisting = (innerBuilder == null) + || (nodeImpl.getOwnerDocument() != innerBuilder.getDocument()); + if (isPreExisting) { + next.copyTo(context.getBroker(), + new NoInheritCopyReceiver(this, builder, ancestorNS)); + } else { + next.copyTo(context.getBroker(), receiver); + } + } else { + next.copyTo(context.getBroker(), receiver); + } receiver.setCheckNS(true); } catch (DOMException e) { if (e.code == DOMException.NAMESPACE_ERR) { throw new XPathException(this, ErrorCodes.XQDY0102, e.getMessage()); + } else if (e.code == DOMException.INUSE_ATTRIBUTE_ERR) { + final String msg = e.getMessage(); + if (msg != null && msg.contains("XQTY0024")) { + throw new XPathException(this, ErrorCodes.XQTY0024, msg); + } else { + throw new XPathException(this, ErrorCodes.XQDY0025, msg); + } } else { throw new XPathException(this, e.getMessage(), e); } @@ -194,4 +256,104 @@ public Expression simplify() { public boolean evalNextExpressionOnEmptyContextSequence() { return true; } + + /** + * A {@link DocumentBuilderReceiver} that injects namespace undeclarations onto the + * root element of a copied pre-existing node when {@code declare copy-namespaces no-inherit} + * is active. The undeclarations neutralize ancestor namespace bindings so that + * {@code fn:in-scope-prefixes()} traversing the ancestor chain returns the correct result. + * + *

Only prefixes present in {@code ancestorNS} but absent from the root element's own + * namespace declarations are undeclared (i.e. recorded as {@code xmlns:prefix=""}).

+ */ + private static final class NoInheritCopyReceiver extends DocumentBuilderReceiver { + + private final Map ancestorNS; + /** True once the root element's startElement event has been seen. */ + private boolean rootSeen = false; + /** True once undeclarations have been flushed (happens before first non-namespace event). */ + private boolean undeclsFlushed = false; + /** Prefixes that the root element itself declares (element prefix + xmlns:* nodes). */ + private final Set rootOwnPrefixes = new HashSet<>(); + + NoInheritCopyReceiver(final Expression expr, final MemTreeBuilder builder, + final Map ancestorNS) { + super(expr, builder); + this.ancestorNS = ancestorNS; + } + + @Override + public void startElement(final QName qname, final AttrList attribs) { + if (!rootSeen) { + rootSeen = true; + // Track the element's own namespace prefix so we don't undeclare it. + final String prefix = qname.getPrefix(); + if (prefix != null && !prefix.isEmpty()) { + rootOwnPrefixes.add(prefix); + } + // Note: namespace declaration nodes for this element come via addNamespaceNode + // after startElement; they are collected in rootOwnPrefixes there. + } else { + maybeFlushUndeclarations(); + } + super.startElement(qname, attribs); + } + + @Override + public void addNamespaceNode(final QName qname) throws SAXException { + if (rootSeen && !undeclsFlushed) { + // Collect the prefix that this namespace node declares on the root element. + rootOwnPrefixes.add(qname.getLocalPart()); + } + super.addNamespaceNode(qname); + } + + @Override + public void characters(final CharSequence seq) throws SAXException { + maybeFlushUndeclarations(); + super.characters(seq); + } + + @Override + public void characters(final char[] ch, final int start, final int len) throws SAXException { + maybeFlushUndeclarations(); + super.characters(ch, start, len); + } + + @Override + public void endElement(final String ns, final String local, final String qname) throws SAXException { + maybeFlushUndeclarations(); + super.endElement(ns, local, qname); + } + + @Override + public void endElement(final QName qname) throws SAXException { + maybeFlushUndeclarations(); + super.endElement(qname); + } + + /** + * Emit namespace undeclarations for every ancestor namespace binding whose prefix is + * not already declared by the root element itself. Called before the first non-namespace + * event on the root element (child, text, endElement) to ensure the nodes attach to the + * root element node number in the MemTree. + */ + private void maybeFlushUndeclarations() { + if (rootSeen && !undeclsFlushed) { + undeclsFlushed = true; + for (final Map.Entry entry : ancestorNS.entrySet()) { + final String prefix = entry.getKey(); + if (!rootOwnPrefixes.contains(prefix)) { + try { + super.addNamespaceNode( + new QName(prefix, XMLConstants.NULL_NS_URI, XMLConstants.XMLNS_ATTRIBUTE)); + } catch (final SAXException e) { + // Silently skip — undeclaration is best-effort; worst case + // in-scope-prefixes() returns a superset which is handled by cleanup. + } + } + } + } + } + } } diff --git a/exist-core/src/main/java/org/exist/xquery/EnumCastExpression.java b/exist-core/src/main/java/org/exist/xquery/EnumCastExpression.java new file mode 100644 index 00000000000..bf0fc6ce7b2 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/EnumCastExpression.java @@ -0,0 +1,141 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery; + +import org.exist.dom.persistent.DocumentSet; +import org.exist.xquery.util.ExpressionDumper; +import org.exist.xquery.value.*; + +/** + * Implements cast as enum("a","b","c") and castable as enum("a","b","c") from XQuery 4.0. + */ +public class EnumCastExpression extends AbstractExpression { + + private final String[] enumValues; + private final Cardinality cardinality; + private final Expression expression; + private final boolean isCastable; + + public EnumCastExpression(final XQueryContext context, final Expression expr, + final String[] enumValues, final Cardinality cardinality, + final boolean isCastable) { + super(context); + this.expression = expr; + this.enumValues = enumValues; + this.cardinality = cardinality; + this.isCastable = isCastable; + } + + @Override + public void analyze(final AnalyzeContextInfo contextInfo) throws XPathException { + contextInfo.setParent(this); + expression.analyze(contextInfo); + } + + @Override + public Sequence eval(final Sequence contextSequence, final Item contextItem) throws XPathException { + final Sequence seq = Atomize.atomize(expression.eval(contextSequence, contextItem)); + + if (seq.isEmpty()) { + if (isCastable) { + return BooleanValue.valueOf( + cardinality.isSuperCardinalityOrEqualOf(Cardinality.EMPTY_SEQUENCE)); + } + if (cardinality.atLeastOne()) { + throw new XPathException(this, ErrorCodes.XPTY0004, + "Type error: empty sequence is not allowed here"); + } + return Sequence.EMPTY_SEQUENCE; + } + + final String value = seq.itemAt(0).getStringValue(); + + for (final String enumVal : enumValues) { + if (enumVal.equals(value)) { + if (isCastable) { + return BooleanValue.TRUE; + } + return new StringValue(this, value); + } + } + + if (isCastable) { + return BooleanValue.FALSE; + } + throw new XPathException(this, ErrorCodes.FORG0001, + "Cannot cast '" + value + "' to enum type"); + } + + @Override + public int returnsType() { + return isCastable ? Type.BOOLEAN : Type.STRING; + } + + @Override + public Cardinality getCardinality() { + return isCastable ? Cardinality.EXACTLY_ONE : Cardinality.ZERO_OR_ONE; + } + + @Override + public void dump(final ExpressionDumper dumper) { + expression.dump(dumper); + dumper.display(isCastable ? " castable as enum(" : " cast as enum("); + for (int i = 0; i < enumValues.length; i++) { + if (i > 0) { + dumper.display(", "); + } + dumper.display("\"" + enumValues[i] + "\""); + } + dumper.display(")"); + } + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder(); + sb.append(expression.toString()).append(isCastable ? " castable as enum(" : " cast as enum("); + for (int i = 0; i < enumValues.length; i++) { + if (i > 0) { + sb.append(", "); + } + sb.append("\"").append(enumValues[i]).append("\""); + } + sb.append(")"); + return sb.toString(); + } + + @Override + public int getDependencies() { + return expression.getDependencies() | Dependency.CONTEXT_ITEM; + } + + @Override + public void setContextDocSet(final DocumentSet contextSet) { + super.setContextDocSet(contextSet); + expression.setContextDocSet(contextSet); + } + + @Override + public void resetState(final boolean postOptimization) { + super.resetState(postOptimization); + expression.resetState(postOptimization); + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/ErrorCodes.java b/exist-core/src/main/java/org/exist/xquery/ErrorCodes.java index 23226a155f2..9079277d89f 100644 --- a/exist-core/src/main/java/org/exist/xquery/ErrorCodes.java +++ b/exist-core/src/main/java/org/exist/xquery/ErrorCodes.java @@ -43,6 +43,7 @@ public class ErrorCodes { public static final ErrorCode XPTY0018 = new W3CErrorCode("XPTY0018", "It is a type error if the result of the last step in a path expression contains both nodes and atomic values."); public static final ErrorCode XPTY0019 = new W3CErrorCode("XPTY0019", "It is a type error if the result of a step (other than the last step) in a path expression contains an atomic value."); public static final ErrorCode XPTY0020 = new W3CErrorCode("XPTY0020", "It is a type error if, in an axis step, the context item is not a node."); + public static final ErrorCode XPTY0117 = new W3CErrorCode("XPTY0117", "It is a type error to apply function coercion, or to cast to or from a union type, when a value of type xs:untypedAtomic is involved and the other type is namespace-sensitive."); public static final ErrorCode XPDY0021 = new W3CErrorCode("XPDY0021", "(Not currently used.)"); public static final ErrorCode XPDY0050 = new W3CErrorCode("XPDY0050", "It is a dynamic error if the dynamic type of the operand of a treat expression does not match the sequence type specified by the treat expression. This error might also be raised by a path expression beginning with \"/\" or \"//\" if the context node is not in a tree that is rooted at a document node. This is because a leading \"/\" or \"//\" in a path expression is an abbreviation for an initial step that includes the clause treat as document-node()."); public static final ErrorCode XPST0051 = new W3CErrorCode("XPST0051", "It is a static error if a QName that is used as an AtomicType in a SequenceType is not defined in the in-scope schema types as an atomic type."); @@ -88,6 +89,7 @@ public class ErrorCodes { public static final ErrorCode XQDY0052 = new W3CErrorCode("XQDY0052", "(Not currently used.)"); public static final ErrorCode XQST0053 = new W3CErrorCode("XQST0053", "(Not currently used.)"); public static final ErrorCode XQST0054 = new W3CErrorCode("XQST0054", "It is a static error if a variable depends on itself."); + public static final ErrorCode XQDY0054 = new W3CErrorCode("XQDY0054", "It is a dynamic error if a variable depends on itself."); public static final ErrorCode XQST0055 = new W3CErrorCode("XQST0055", "It is a static error if a Prolog contains more than one copy-namespaces declaration."); public static final ErrorCode XQST0056 = new W3CErrorCode("XQST0056", "(Not currently used.)"); public static final ErrorCode XQST0057 = new W3CErrorCode("XQST0057", "It is a static error if a schema import binds a namespace prefix but does not specify a target namespace other than a zero-length string."); @@ -127,6 +129,13 @@ public class ErrorCodes { public static final ErrorCode XQST0094 = new W3CErrorCode("XQST0094", "The name of each grouping variable must be equal (by the eq operator on expanded QNames) to the name of a variable in the input tuple stream."); + public static final ErrorCode XQST0097 = new W3CErrorCode("XQST0097", + "It is a static error to have more than one decimal-format declaration with the same name, " + + "or more than one default decimal-format declaration, in the same module."); + public static final ErrorCode XQST0098 = new W3CErrorCode("XQST0098", + "It is a static error if the properties representing characters used in a picture string " + + "do not each have distinct values, or if a property value is not valid for its property."); + public static final ErrorCode XQDY0101 = new W3CErrorCode("XQDY0101", "An error is raised if a computed namespace constructor attempts to do any of the following:\n" + "Bind the prefix xml to some namespace URI other than http://www.w3.org/XML/1998/namespace.\n" + "Bind a prefix other than xml to the namespace URI http://www.w3.org/XML/1998/namespace.\n" + @@ -135,10 +144,38 @@ public class ErrorCodes { "Bind any prefix (including the empty prefix) to a zero-length namespace URI."); public static final ErrorCode XQDY0102 = new W3CErrorCode("XQDY0102", "If the name of an element in an element constructor is in no namespace, creating a default namespace for that element using a computed namespace constructor is an error."); public static final ErrorCode XQST0103 = new W3CErrorCode("XQST0103", "All variables in a window clause must have distinct names."); + public static final ErrorCode XQST0106 = new W3CErrorCode("XQST0106", "It is a static error if a function's annotations contain more than one annotation named %public or %private, or if they contain both %public and %private."); + public static final ErrorCode XQST0116 = new W3CErrorCode("XQST0116", "It is a static error if a variable declaration's annotations contain more than one annotation named %public or %private, or if they contain both %public and %private."); public static final ErrorCode XQDY0137 = new W3CErrorCode("XQDY0137", "No two keys in a map may have the same key value"); public static final ErrorCode XQDY0138 = new W3CErrorCode("XQDY0138", "Position n does not exist in this array"); + /* W3C XQuery Update Facility 3.0 error codes */ + public static final ErrorCode XUDY0009 = new W3CErrorCode("XUDY0009", "It is a dynamic error if the target node of a replace expression is a node without a parent."); + public static final ErrorCode XUDY0014 = new W3CErrorCode("XUDY0014", "It is a dynamic error if the result of applying all update primitives on a single document node would result in that document having more than one element or text child."); + public static final ErrorCode XUDY0015 = new W3CErrorCode("XUDY0015", "It is a dynamic error if more than one rename primitive is applied to the same target node."); + public static final ErrorCode XUDY0016 = new W3CErrorCode("XUDY0016", "It is a dynamic error if more than one replace primitive is applied to the same target node."); + public static final ErrorCode XUDY0017 = new W3CErrorCode("XUDY0017", "It is a dynamic error if two or more upd:replaceValue primitives in a PUL have the same target node."); + public static final ErrorCode XUDY0021 = new W3CErrorCode("XUDY0021", "It is a dynamic error if an insert, replace, or rename expression affects an element node by introducing an attribute node with a namespace binding that conflicts with a namespace binding of the element node."); public static final ErrorCode XUDY0023 = new W3CErrorCode("XUDY0023", "It is a dynamic error if an insert, replace, or rename expression affects an element node by introducing a new namespace binding that conflicts with one of its existing namespace bindings."); + public static final ErrorCode XUDY0024 = new W3CErrorCode("XUDY0024", "It is a dynamic error if the new namespace bindings added to an element by an update conflict with its existing namespace bindings."); + public static final ErrorCode XUDY0027 = new W3CErrorCode("XUDY0027", "It is a dynamic error if the target of an insert before or insert after expression is a root element or root text node of a document."); + public static final ErrorCode XUDY0029 = new W3CErrorCode("XUDY0029", "It is a dynamic error if the target of an insert into, insert as first into, insert as last into, or replace expression is not an element or document node."); + public static final ErrorCode XUDY0030 = new W3CErrorCode("XUDY0030", "It is a dynamic error if the target of an insert attributes expression is not an element node."); + public static final ErrorCode XUDY0031 = new W3CErrorCode("XUDY0031", "It is a dynamic error if two or more fn:put primitives have the same URI."); + public static final ErrorCode XUST0001 = new W3CErrorCode("XUST0001", "It is a static error if an updating expression is used in a context where it is not allowed."); + public static final ErrorCode XUST0002 = new W3CErrorCode("XUST0002", "It is a static error if a non-updating expression other than an empty sequence is used where an updating expression is expected."); + public static final ErrorCode XUST0003 = new W3CErrorCode("XUST0003", "It is a static error if a revalidation declaration specifies a revalidation mode that is not supported by the implementation."); + public static final ErrorCode XUST0028 = new W3CErrorCode("XUST0028", "It is a static error if a function declaration is declared as updating and also declares a return type."); + public static final ErrorCode XUTY0004 = new W3CErrorCode("XUTY0004", "It is a type error if the content sequence of an insert expression with into, as first into, or as last into contains an attribute node following a node that is not an attribute node."); + public static final ErrorCode XUTY0005 = new W3CErrorCode("XUTY0005", "It is a type error if the target expression of an insert expression with into, as first into, or as last into does not return a single element or document node."); + public static final ErrorCode XUTY0006 = new W3CErrorCode("XUTY0006", "It is a type error if the target expression of an insert expression with before or after does not return a single element, text, comment, or processing instruction node with a parent."); + public static final ErrorCode XUTY0007 = new W3CErrorCode("XUTY0007", "It is a type error if the target expression of a replace value of expression does not return a single element, attribute, text, comment, or processing instruction node."); + public static final ErrorCode XUTY0008 = new W3CErrorCode("XUTY0008", "It is a type error if the target expression of a replace expression returns a document node."); + public static final ErrorCode XUTY0010 = new W3CErrorCode("XUTY0010", "It is a type error if in a replace expression where the target is an element, text, comment, or processing instruction node, the content expression does not return a sequence of zero or more element, text, comment, or processing instruction nodes."); + public static final ErrorCode XUTY0011 = new W3CErrorCode("XUTY0011", "It is a type error if in a replace expression where the target is an attribute node, the content expression does not return a sequence of zero or more attribute nodes."); + public static final ErrorCode XUTY0012 = new W3CErrorCode("XUTY0012", "It is a type error if the target expression of a rename expression does not return a single element, attribute, or processing instruction node."); + public static final ErrorCode XUTY0013 = new W3CErrorCode("XUTY0013", "It is a type error if the source expression of a copy expression does not return a single node."); + public static final ErrorCode XUTY0022 = new W3CErrorCode("XUTY0022", "It is a type error if an insert expression specifies the insertion of an attribute node into a document node."); /* XQuery 1.0 and XPath 2.0 Functions and Operators http://www.w3.org/TR/xpath-functions/#error-summary */ public static final ErrorCode FOER0000 = new W3CErrorCode("FOER0000", "Unidentified error."); @@ -161,6 +198,10 @@ public class ErrorCodes { public static final ErrorCode FODT0001 = new W3CErrorCode("FODT0001", "Overflow/underflow in date/time operation."); public static final ErrorCode FODT0002 = new W3CErrorCode("FODT0002", "Overflow/underflow in duration operation."); public static final ErrorCode FODT0003 = new W3CErrorCode("FODT0003", "Invalid timezone value."); + // --- XQuery 4.0 fn:build-dateTime error codes --- + public static final ErrorCode FODT0005 = new W3CErrorCode("FODT0005", "Missing required date/time component."); + public static final ErrorCode FODT0006 = new W3CErrorCode("FODT0006", "Invalid date/time component value."); + // --- End XQuery 4.0 fn:build-dateTime error codes --- public static final ErrorCode FONS0004 = new W3CErrorCode("FONS0004", "No namespace found for prefix."); public static final ErrorCode FONS0005 = new W3CErrorCode("FONS0005", "Base-uri not defined in the static context."); public static final ErrorCode FORG0001 = new W3CErrorCode("FORG0001", "Invalid value for cast/constructor."); @@ -176,6 +217,10 @@ public class ErrorCodes { public static final ErrorCode FORX0002 = new W3CErrorCode("FORX0002", "Invalid regular expression."); public static final ErrorCode FORX0003 = new W3CErrorCode("FORX0003", "Regular expression matches zero-length string."); public static final ErrorCode FORX0004 = new W3CErrorCode("FORX0004", "Invalid replacement string."); + public static final ErrorCode FOCV0001 = new W3CErrorCode("FOCV0001", "CSV quote error."); + public static final ErrorCode FOCV0002 = new W3CErrorCode("FOCV0002", "Invalid CSV delimiter."); + public static final ErrorCode FOCV0003 = new W3CErrorCode("FOCV0003", "Conflicting CSV delimiters."); + public static final ErrorCode FOCV0004 = new W3CErrorCode("FOCV0004", "CSV field not found."); public static final ErrorCode FOTY0012 = new W3CErrorCode("FOTY0012", "Argument node does not have a typed value."); public static final ErrorCode FOTY0013 = new W3CErrorCode("FOTY0013", "The argument to fn:data() contains a function item."); @@ -211,11 +256,13 @@ public class ErrorCodes { public static final ErrorCode FTDY0020 = new W3CErrorCode("FTDY0020", ""); public static final ErrorCode FODC0006 = new W3CErrorCode("FODC0006", "String passed to fn:parse-xml is not a well-formed XML document."); + public static final ErrorCode FODC0011 = new W3CErrorCode("FODC0011", "HTML parsing error."); public static final ErrorCode FOAP0001 = new W3CErrorCode("FOAP0001", "Wrong number of arguments"); /* XQuery 3.1 */ public static final ErrorCode XQTY0105 = new W3CErrorCode("XQTY0105", "It is a type error if the content sequence in an element constructor contains a function."); + public static final ErrorCode XQTY0153 = new W3CErrorCode("XQTY0153", "It is a type error if the finally clause of a try/catch expression evaluates to a non-empty sequence."); public static final ErrorCode FOAY0001 = new W3CErrorCode("FOAY0001", "Array index out of bounds."); public static final ErrorCode FOAY0002 = new W3CErrorCode("FOAY0002", "Negative array length."); @@ -225,6 +272,7 @@ public class ErrorCodes { public static final ErrorCode FOJS0005 = new W3CErrorCode("FOJS0005", "Invalid options."); public static final ErrorCode FOJS0006 = new W3CErrorCode("FOJS0006", "Invalid XML representation of JSON."); public static final ErrorCode FOJS0007 = new W3CErrorCode("FOJS0007", "Bad JSON escape sequence."); + public static final ErrorCode FOJS0008 = new W3CErrorCode("FOJS0008", "Element cannot be converted with the specified layout."); public static final ErrorCode FOUT1170 = new W3CErrorCode("FOUT1170", "Invalid $href argument to fn:unparsed-text() (etc.)"); public static final ErrorCode FOUT1190 = new W3CErrorCode("FOUT1190", "Cannot decode resource retrieved by fn:unparsed-text() (etc.)"); @@ -241,8 +289,25 @@ public class ErrorCodes { public static final ErrorCode FOXT0004 = new W3CErrorCode("FOXT0004", "XSLT transformation has been disabled"); public static final ErrorCode FOXT0006 = new W3CErrorCode("FOXT0006", "XSLT output contains non-accepted characters"); + // Invisible XML errors + public static final ErrorCode FOIX0001 = new W3CErrorCode("FOIX0001", "Invalid ixml grammar"); + public static final ErrorCode FOIX0002 = new W3CErrorCode("FOIX0002", "ixml parse error"); + public static final ErrorCode XTSE0165 = new W3CErrorCode("XTSE0165","It is a static error if the processor is not able to retrieve the resource identified by the URI reference [ in the href attribute of xsl:include or xsl:import] , or if the resource that is retrieved does not contain a stylesheet module conforming to this specification."); + // W3C XQuery and XPath Full Text 3.0 error codes + public static final ErrorCode FTST0001 = new W3CErrorCode("FTST0001", "It is a static error if an operand of mild not (not in) contains ftnot or occurs."); + public static final ErrorCode FTST0003 = new W3CErrorCode("FTST0003", "It is a static error if a tokenizer for the language specified by the language option is not available."); + public static final ErrorCode FTST0004 = new W3CErrorCode("FTST0004", "It is a static error if sentence/paragraph boundaries are required but not supported by the tokenizer."); + public static final ErrorCode FTST0006 = new W3CErrorCode("FTST0006", "It is a static error if a stop word list cannot be found."); + public static final ErrorCode FTST0008 = new W3CErrorCode("FTST0008", "It is a static error if a stop word list is not in the correct format."); + public static final ErrorCode FTST0009 = new W3CErrorCode("FTST0009", "It is a static error if the specified language is not supported."); + public static final ErrorCode FTDY0016 = new W3CErrorCode("FTDY0016", "It is a dynamic error if a weight value is not within the required range."); + public static final ErrorCode FTDY0017 = new W3CErrorCode("FTDY0017", "It is a dynamic error if the right-hand match of mild not has any include-matches matching tokens not matched by include-matches of the left-hand match."); + public static final ErrorCode FTST0013 = new W3CErrorCode("FTST0013", "It is a static error if, in an implementation which does not support the Stop Word Languages feature, a stop word option includes a language specification."); + public static final ErrorCode FTST0018 = new W3CErrorCode("FTST0018", "It is a static error if a thesaurus is not available."); + public static final ErrorCode FTST0019 = new W3CErrorCode("FTST0019", "It is a static error if match options in a single contains text expression conflict with each other."); + /* eXist specific XQuery and XPath errors * * Codes have the format [EX][XQ|XP][DY|SE|ST][nnnn] diff --git a/exist-core/src/main/java/org/exist/xquery/Expression.java b/exist-core/src/main/java/org/exist/xquery/Expression.java index de4afa9c099..6b4bbc04c5b 100644 --- a/exist-core/src/main/java/org/exist/xquery/Expression.java +++ b/exist-core/src/main/java/org/exist/xquery/Expression.java @@ -76,6 +76,14 @@ public interface Expression extends Materializable { */ public final static int UNORDERED = 1024; + /** + * Indicates that the expression is in a context where updating expressions + * (insert, delete, replace, rename) are not allowed. + * Per W3C XQuery Update Facility 3.0, XUST0001 should be raised if an + * updating expression appears in such a context. + */ + public final static int NON_UPDATING_CONTEXT = 2048; + /** * Indicates that no context id is supplied to an expression. */ @@ -107,6 +115,31 @@ public interface Expression extends Materializable { */ public void analyze(AnalyzeContextInfo contextInfo) throws XPathException; + /** + * Optimize this expression and return the (possibly different) replacement. + * + * Called once after {@link #analyze(AnalyzeContextInfo)} for the whole + * tree. Implementations may return: + *
    + *
  • {@code this} (no rewrite — the default),
  • + *
  • one of this expression's children (e.g. constant-condition fold),
  • + *
  • a freshly constructed expression replacing this one.
  • + *
+ * + * Implementations are responsible for recursing into their own + * sub-expressions: each child slot becomes + * {@code this.child = this.child.optimize(cc)}. The default returns + * {@code this} so existing classes that do not override this method + * continue to work unchanged. + * + * @param cc compilation context + * @return the optimized expression — the caller must capture and use it + * @throws XPathException if a static error is detected + */ + default Expression optimize(CompileContext cc) throws XPathException { + return this; + } + public void setPrimaryAxis(int axis); public int getPrimaryAxis(); @@ -203,6 +236,30 @@ public interface Expression extends Materializable { public boolean allowMixedNodesInReturn(); + /** + * Returns true if this expression is an updating expression per the + * W3C XQuery Update Facility 3.0 specification. + * Updating expressions include: insert, delete, replace, rename expressions, + * calls to updating functions, and composite expressions where all branches + * are updating. + * + * @return true if this is an updating expression + */ + default boolean isUpdating() { + return false; + } + + /** + * Returns true if this expression is vacuous — neither updating nor producing + * a non-empty result. A vacuous expression is compatible with both updating + * and non-updating contexts per W3C XQuery Update Facility 3.0. + * + * @return true if this expression is vacuous + */ + default boolean isVacuous() { + return !isUpdating() && getCardinality() == Cardinality.EMPTY_SEQUENCE; + } + public Expression getParent(); /** diff --git a/exist-core/src/main/java/org/exist/xquery/ExternalModuleImpl.java b/exist-core/src/main/java/org/exist/xquery/ExternalModuleImpl.java index 72742be3f1e..ae131640893 100644 --- a/exist-core/src/main/java/org/exist/xquery/ExternalModuleImpl.java +++ b/exist-core/src/main/java/org/exist/xquery/ExternalModuleImpl.java @@ -48,6 +48,9 @@ public class ExternalModuleImpl implements ExternalModule { final private TreeMap mGlobalVariables = new TreeMap<>(); final private TreeMap mStaticVariables = new TreeMap<>(); + /** Guards against recursive variable analysis/evaluation */ + final private Set variablesBeingResolved = new java.util.HashSet<>(); + private Source mSource = null; private XQueryContext mContext = null; @@ -223,17 +226,25 @@ public boolean isVarDeclared(QName qname) { Variable var = mStaticVariables.get(qname); if (isReady && decl != null && (var == null || var.getValue() == null)) { - // Make sure Analyze has been called, see - https://github.com/eXist-db/exist/issues/4096 - final AnalyzeContextInfo declContextInfo; - if (contextInfo != null) { - declContextInfo = new AnalyzeContextInfo(contextInfo); - } else { - declContextInfo = new AnalyzeContextInfo(); + // Guard against recursive variable resolution (circular dependencies) + if (!variablesBeingResolved.add(qname)) { + return var; + } + try { + // Make sure Analyze has been called, see - https://github.com/eXist-db/exist/issues/4096 + final AnalyzeContextInfo declContextInfo; + if (contextInfo != null) { + declContextInfo = new AnalyzeContextInfo(contextInfo); + } else { + declContextInfo = new AnalyzeContextInfo(); + } + decl.analyze(declContextInfo); + + decl.eval(getContext().getContextItem(), null); + var = mStaticVariables.get(qname); + } finally { + variablesBeingResolved.remove(qname); } - decl.analyze(declContextInfo); - - decl.eval(getContext().getContextItem(), null); - var = mStaticVariables.get(qname); } if (var == null) { // external variable may be defined in root context importing this module @@ -249,6 +260,17 @@ public boolean isVarDeclared(QName qname) { return var; } + /** + * Check if a variable declared in this module has a %private annotation. + * + * @param qname the variable name + * @return true if the variable is declared private + */ + public boolean isVariablePrivate(final QName qname) { + final VariableDeclaration decl = mGlobalVariables.get(qname); + return decl != null && decl.isPrivate(); + } + public void analyzeGlobalVars() throws XPathException { for (final VariableDeclaration decl : mGlobalVariables.values()) { decl.analyzeExpression(new AnalyzeContextInfo()); diff --git a/exist-core/src/main/java/org/exist/xquery/FLWORClause.java b/exist-core/src/main/java/org/exist/xquery/FLWORClause.java index d56ed4777d2..ea632d51e17 100644 --- a/exist-core/src/main/java/org/exist/xquery/FLWORClause.java +++ b/exist-core/src/main/java/org/exist/xquery/FLWORClause.java @@ -34,7 +34,8 @@ public interface FLWORClause extends Expression { enum ClauseType { - FOR, LET, GROUPBY, ORDERBY, WHERE, SOME, EVERY, COUNT, WINDOW + FOR, LET, GROUPBY, ORDERBY, WHERE, WHILE, SOME, EVERY, COUNT, WINDOW, FOR_MEMBER, FOR_KEY, FOR_VALUE, FOR_KEY_VALUE, + LET_SEQ_DESTRUCTURE, LET_ARRAY_DESTRUCTURE, LET_MAP_DESTRUCTURE } /** diff --git a/exist-core/src/main/java/org/exist/xquery/FieldAccessor.java b/exist-core/src/main/java/org/exist/xquery/FieldAccessor.java new file mode 100644 index 00000000000..1ce377912bd --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/FieldAccessor.java @@ -0,0 +1,108 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery; + +import org.exist.xquery.functions.map.AbstractMapType; +import org.exist.xquery.util.ExpressionDumper; +import org.exist.xquery.value.Item; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.StringValue; +import org.exist.xquery.value.Type; + +import java.util.Objects; + +/** + * XQuery 4.0 field accessor expression: {@code $expr.fieldName}. + * + *

Syntactic sugar for {@code map:get($expr, "fieldName")}. The parser-next + * branch will wire the {@code .NCName} postfix syntax to this class. Until then, + * it can be used programmatically or via {@code fn:get} on record-typed values.

+ * + *

If the base expression has a declared record type, the field name is + * validated against the record's field declarations at analysis time.

+ */ +public class FieldAccessor extends AbstractExpression { + + private final Expression baseExpr; + private final String fieldName; + + public FieldAccessor(final XQueryContext context, final Expression baseExpr, final String fieldName) { + super(context); + this.baseExpr = baseExpr; + this.fieldName = fieldName; + } + + public Expression getBaseExpression() { + return baseExpr; + } + + public String getFieldName() { + return fieldName; + } + + @Override + public void analyze(final AnalyzeContextInfo contextInfo) throws XPathException { + contextInfo.setParent(this); + baseExpr.analyze(contextInfo); + } + + @Override + public Sequence eval(final Sequence contextSequence, final Item contextItem) throws XPathException { + final Sequence baseResult = baseExpr.eval(contextSequence, contextItem); + + if (baseResult.isEmpty()) { + return Sequence.EMPTY_SEQUENCE; + } + + final Item item = baseResult.itemAt(0); + if (!Type.subTypeOf(item.getType(), Type.MAP_ITEM)) { + throw new XPathException(this, ErrorCodes.XPTY0004, + "Field accessor ." + fieldName + " requires a map, got " + Type.getTypeName(item.getType())); + } + + final AbstractMapType map = (AbstractMapType) item; + final Sequence value = map.get(new StringValue(this, fieldName)); + return Objects.requireNonNullElse(value, Sequence.EMPTY_SEQUENCE); + } + + @Override + public int returnsType() { + return Type.ITEM; + } + + @Override + public int getDependencies() { + return baseExpr.getDependencies(); + } + + @Override + public void dump(final ExpressionDumper dumper) { + baseExpr.dump(dumper); + dumper.display("."); + dumper.display(fieldName); + } + + @Override + public String toString() { + return baseExpr.toString() + "." + fieldName; + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/FilterExprAM.java b/exist-core/src/main/java/org/exist/xquery/FilterExprAM.java new file mode 100644 index 00000000000..c59fea9d9c9 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/FilterExprAM.java @@ -0,0 +1,152 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery; + +import io.lacuna.bifurcan.IEntry; +import org.exist.xquery.functions.array.ArrayType; +import org.exist.xquery.functions.map.AbstractMapType; +import org.exist.xquery.functions.map.MapType; +import org.exist.xquery.util.ExpressionDumper; +import org.exist.xquery.value.*; + +/** + * XQuery 4.0 FilterExprAM — the array/map filter expression {@code ?[expr]}. + * + *

Filters array members or map entries by evaluating a predicate expression + * with each member/value as the context item. Only items where the predicate's + * effective boolean value is true are kept in the result.

+ * + *
+ * [1, 2, 3, 4, 5]?[. > 3]         → [4, 5]
+ * map{"a":1, "b":2, "c":3}?[. > 1] → map{"b":2, "c":3}
+ * 
+ * + * @see + * QT4 spec: FilterExprAM + */ +public class FilterExprAM extends AbstractExpression { + + private final Expression target; + private final Expression predicate; + + public FilterExprAM(final XQueryContext context, final Expression target, final Expression predicate) { + super(context); + this.target = target; + this.predicate = predicate; + } + + @Override + public void analyze(final AnalyzeContextInfo contextInfo) throws XPathException { + target.analyze(contextInfo); + + // The predicate runs with each member/value as context item + final AnalyzeContextInfo predInfo = new AnalyzeContextInfo(contextInfo); + predInfo.setStaticType(Type.ITEM); + predicate.analyze(predInfo); + } + + @Override + public Sequence eval(final Sequence contextSequence, final Item contextItem) throws XPathException { + // Evaluate the target expression + final Sequence targetSeq = target.eval(contextSequence, contextItem); + + if (targetSeq.isEmpty()) { + return Sequence.EMPTY_SEQUENCE; + } + + // Process each item in the target sequence + final ValueSequence result = new ValueSequence(); + for (final SequenceIterator iter = targetSeq.iterate(); iter.hasNext(); ) { + final Item item = iter.nextItem(); + + if (item.getType() == Type.ARRAY_ITEM) { + result.add(filterArray((ArrayType) item)); + } else if (Type.subTypeOf(item.getType(), Type.MAP_ITEM)) { + result.add(filterMap((AbstractMapType) item)); + } else { + throw new XPathException(this, ErrorCodes.XPTY0004, + "FilterExprAM (?[]) requires an array or map, got " + + Type.getTypeName(item.getType())); + } + } + + return result; + } + + private ArrayType filterArray(final ArrayType array) throws XPathException { + final ArrayType filtered = new ArrayType(context, Sequence.EMPTY_SEQUENCE); + for (int i = 0; i < array.getSize(); i++) { + final Sequence member = array.get(i); + + // Evaluate predicate with member as context item + final Sequence predResult = predicate.eval(member, null); + if (predResult.effectiveBooleanValue()) { + filtered.add(member); + } + } + return filtered; + } + + private AbstractMapType filterMap(final AbstractMapType map) throws XPathException { + final MapType filtered = new MapType(this, context); + for (final IEntry entry : map) { + final Sequence value = entry.value(); + + // Evaluate predicate with value as context item + final Sequence predResult = predicate.eval(value, null); + if (predResult.effectiveBooleanValue()) { + filtered.add(entry.key(), value); + } + } + return filtered; + } + + @Override + public int returnsType() { + return target.returnsType(); + } + + @Override + public Cardinality getCardinality() { + return target.getCardinality(); + } + + @Override + public void dump(final ExpressionDumper dumper) { + target.dump(dumper); + dumper.display("?["); + predicate.dump(dumper); + dumper.display("]"); + } + + @Override + public String toString() { + return target.toString() + "?[" + predicate.toString() + "]"; + } + + @Override + public void resetState(final boolean postOptimization) { + super.resetState(postOptimization); + target.resetState(postOptimization); + predicate.resetState(postOptimization); + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/FocusFunction.java b/exist-core/src/main/java/org/exist/xquery/FocusFunction.java new file mode 100644 index 00000000000..28d930a3102 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/FocusFunction.java @@ -0,0 +1,140 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery; + +import org.exist.dom.persistent.DocumentSet; +import org.exist.xquery.util.ExpressionDumper; +import org.exist.xquery.value.*; + +import java.util.ArrayDeque; +import java.util.List; + +/** + * Implements XQuery 4.0 focus functions: {@code fn { expr }} and {@code function { expr }}. + * + *

A focus function is an inline function with an implicit single parameter + * of type {@code item()*}. When called, the argument is bound as the context + * item for the body expression.

+ * + *

Formally: {@code fn { EXPR }} is equivalent to + * {@code function($dot as item()*) as item()* { EXPR }} where EXPR is + * evaluated with the context value set to {@code $dot}.

+ */ +public class FocusFunction extends AbstractExpression { + + public static final String FOCUS_PARAM_NAME = ".focus"; + + private final UserDefinedFunction function; + private final ArrayDeque calls = new ArrayDeque<>(); + private AnalyzeContextInfo cachedContextInfo; + + public FocusFunction(final XQueryContext context, final UserDefinedFunction function) { + super(context); + this.function = function; + } + + @Override + public void analyze(final AnalyzeContextInfo contextInfo) throws XPathException { + cachedContextInfo = new AnalyzeContextInfo(contextInfo); + cachedContextInfo.addFlag(SINGLE_STEP_EXECUTION); + cachedContextInfo.setParent(this); + } + + @Override + public void dump(final ExpressionDumper dumper) { + dumper.display("fn "); + function.dump(dumper); + } + + @Override + public Sequence eval(final Sequence contextSequence, final Item contextItem) + throws XPathException { + final List closureVars = context.getLocalStack(); + + final FunctionCall call = new FocusFunctionCall(context, function); + call.getFunction().setClosureVariables(closureVars); + call.setLocation(function.getLine(), function.getColumn()); + call.analyze(new AnalyzeContextInfo(cachedContextInfo)); + + calls.push(call); + + return new FunctionReference(this, call); + } + + @Override + public int returnsType() { + return Type.FUNCTION; + } + + @Override + public void resetState(final boolean postOptimization) { + super.resetState(postOptimization); + calls.clear(); + function.resetState(postOptimization); + } + + /** + * A specialized FunctionCall that sets the argument as context item + * before evaluating the function body. + */ + public static class FocusFunctionCall extends FunctionCall { + + public FocusFunctionCall(final XQueryContext context, final UserDefinedFunction function) { + super(context, function); + } + + @Override + public Sequence evalFunction(final Sequence contextSequence, final Item contextItem, + final Sequence[] seq, final DocumentSet[] contextDocs) throws XPathException { + // The focus function's single argument becomes the context item + // for the body evaluation. + final Sequence focusArg = (seq != null && seq.length > 0) ? seq[0] : Sequence.EMPTY_SEQUENCE; + + context.stackEnter(this); + final LocalVariable mark = context.markLocalVariables(true); + if (getFunction().getClosureVariables() != null) { + context.restoreStack(getFunction().getClosureVariables()); + } + try { + // Bind the implicit parameter + final UserDefinedFunction func = getFunction(); + if (!func.getParameters().isEmpty()) { + final LocalVariable var = new LocalVariable( + func.getParameters().get(0)); + var.setValue(focusArg); + context.declareVariableBinding(var); + } + + // Evaluate the body with the argument as context + final Expression body = func.getFunctionBody(); + if (focusArg.getItemCount() == 1) { + return body.eval(focusArg, focusArg.itemAt(0)); + } else { + return body.eval(focusArg, null); + } + } finally { + context.popLocalVariables(mark); + context.stackLeave(this); + } + } + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/ForExpr.java b/exist-core/src/main/java/org/exist/xquery/ForExpr.java index 1a5eab2f4dd..4870537e8ad 100644 --- a/exist-core/src/main/java/org/exist/xquery/ForExpr.java +++ b/exist-core/src/main/java/org/exist/xquery/ForExpr.java @@ -28,6 +28,8 @@ import java.util.HashSet; import java.util.Set; +import org.exist.xquery.Constants.Comparison; +import org.exist.xquery.Constants.StringTruncationOperator; /** * Represents an XQuery "for" expression. @@ -37,6 +39,7 @@ public class ForExpr extends BindingExpression { private QName positionalVariable = null; + private QName scoreVariable = null; private boolean allowEmpty = false; private boolean isOuterFor = true; @@ -50,6 +53,408 @@ public ClauseType getType() { return ClauseType.FOR; } + /** + * FLWOR loop-invariant input hoisting via rewrite-into-let. + * + * If this {@code for}'s {@code in} expression is loop-invariant relative + * to all enclosing FLWOR-bound variables, rewrite it as a reference to a + * new {@code let} binding inserted before the outermost enclosing FLWOR + * head. The hoisted expression is evaluated once instead of once per + * outer iteration, turning O(N×M) nested-loop joins into O(N+M) for the + * input materialization. + * + * Order matters: this clause's own variables are added to the current + * scope AFTER recursing into the input (the binding is not in scope for + * its own initializer) and BEFORE recursing into the return expression + * (the rest of the chain). + */ + @Override + public Expression optimize(final CompileContext cc) throws XPathException { + final boolean enteredScope = getPreviousClause() == null; + if (enteredScope) { + cc.enterFlworChain(); + } + + // Recurse input first — gives any inner FLWORs a chance to hoist + // themselves out of US (their hoists target our scope's outermost). + if (inputSequence != null) { + inputSequence = inputSequence.optimize(cc); + } + + // Hoist OUR input, if invariant against outer chain scopes. + tryHoistInputSequence(cc); + + // Record THIS for-clause as a potential hoist insertion point on the + // current scope (the first such call wins). Must precede addVisibleVar + // so subsequent vars are tagged loop-body, not let-prefix. + cc.recordForClause(this); + + // Now this clause's vars become visible to the rest of the chain. + cc.addVisibleFlworVar(varName); + if (positionalVariable != null) { + cc.addVisibleFlworVar(positionalVariable); + } + if (scoreVariable != null) { + cc.addVisibleFlworVar(scoreVariable); + } + + if (returnExpr != null) { + returnExpr = returnExpr.optimize(cc); + } + + // After hoisting+chain wiring, check if this for/where shape can be + // rewritten into a hash-join. Detection runs AFTER the inner returnExpr + // has been optimized so the WhereClause's whereExpr is in its final + // (post-fold, post-rewrite) form. + Expression result = tryHashJoinRewrite(cc); + if (enteredScope) { + result = cc.applyHoistsAndExitChain(result); + } + return result; + } + + /** + * Gates hash-join recognition. Default is {@code true} — XMark + * factor-0.01 measurements (interleaved 4 runs each, ANTLR parser): + * heavy tier (Q8/Q9/Q11/Q12) median 4.47s vs 6.42s with hash-join off + * (~30% reduction); Q8 specifically goes 1.57s → 0.34s (~4.6x). Disable + * with {@code -Dexist.optimizer.hashjoin=false} as an emergency switch + * if a workload regresses; per-test override via + * {@link #setHashJoinEnabledForTest(boolean)}. + */ + private static volatile boolean hashJoinEnabled = + Boolean.parseBoolean(System.getProperty("exist.optimizer.hashjoin", "true")); + + /** Test hook — flip hash-join on/off without restarting the JVM. */ + static void setHashJoinEnabledForTest(final boolean enabled) { + hashJoinEnabled = enabled; + } + + /** + * Detect the hash-join eligible shape and rewrite if eligible. + *

+ * Eligible shape: {@code for $i in where $i/key = + * return }, with no positional/score variables, no + * {@code allowing empty}, no truncation/collation on the comparison, + * and a non-FLWOR body. + *

+ * Returns either a {@link HashJoinForExpr} replacement (and logs the + * rewrite) or {@code this} when the shape doesn't match. + */ + private Expression tryHashJoinRewrite(final CompileContext cc) { + if (!hashJoinEnabled) { + return this; + } + if (positionalVariable != null || scoreVariable != null || allowEmpty) { + return this; + } + if (!(returnExpr instanceof WhereClause wc)) { + return this; + } + // Body of the where (after the predicate) must not be a FLWOR clause — + // ordering/grouping/chained for-let semantics are not preserved by the + // hash-join's match-iteration model. Unwrap the parser's debug + // wrapper before checking. + if (unwrapForDetection(wc.getReturnExpression()) instanceof FLWORClause) { + return this; + } + // The parser wraps the where expression in DebuggableExpression and may + // also nest it in a single-step PathExpr; peel both layers to see the + // underlying comparison. + final Expression rawWhere = unwrapForDetection(wc.getWhereExpr()); + if (!(rawWhere instanceof GeneralComparison cmp)) { + return this; + } + if (cmp.getRelation() != Comparison.EQ) { + return this; + } + if (cmp.getTruncation() != StringTruncationOperator.NONE) { + return this; // contains()/starts-with/ends-with + } + // Probe the operands: exactly one side must reference $i (and not + // through any sub-expression class the walker can't traverse). + final QName myVar = varName; + if (myVar == null) { + return this; + } + final boolean leftRefs = referencesVar(cmp.getLeft(), myVar); + final boolean rightRefs = referencesVar(cmp.getRight(), myVar); + if (leftRefs == rightRefs) { + // Both reference $i, or neither does — not a single-variable join. + return this; + } + final int innerSide = leftRefs ? 0 : 1; + final Expression probeExpr = leftRefs ? cmp.getRight() : cmp.getLeft(); + // Probe must NOT reference $i (already implied by leftRefs/rightRefs + // exclusivity, but check the walk didn't abort). + final RefCheck probeCheck = new RefCheck(myVar); + probeCheck.check(probeExpr); + if (probeCheck.aborted) { + return this; // can't prove probe is independent of $i — bail + } + // Build the replacement. + final HashJoinForExpr replacement = new HashJoinForExpr(context, this, innerSide); + replacement.setLocation(line, column); + // Repair clause-chain links: previousClause stays unchanged + // (HashJoinForExpr is_a ForExpr); the returnExpr (WhereClause) keeps + // pointing back to the new for via setPreviousClause. + replacement.setPreviousClause(getPreviousClause()); + if (returnExpr instanceof FLWORClause innerClause) { + innerClause.setPreviousClause(replacement); + } + cc.replaceWith(this, replacement, + "hash-join for/where (innerSide=" + innerSide + ")"); + return replacement; + } + + /** + * Strips parser-inserted wrappers so structural pattern checks see the + * underlying expression. {@link DebuggableExpression} wraps return bodies + * and where conditions for debugger support, and a single-step + * {@link PathExpr} can wrap any operand. + */ + private static Expression unwrapForDetection(Expression e) { + while (true) { + if (e instanceof DebuggableExpression d) { + e = d.getFirst(); + } else if (e instanceof PathExpr p && p.getLength() == 1) { + e = p.getExpression(0); + } else { + return e; + } + } + } + + /** + * True if {@code expr} contains a {@link VariableReference} to + * {@code target}. Returns false ONLY when the walker can prove no such + * reference exists; if it encounters an expression class it cannot + * reliably traverse, returns true to err on the side of refusing the + * rewrite. + */ + private static boolean referencesVar(final Expression expr, final QName target) { + final RefCheck check = new RefCheck(target); + check.check(expr); + return check.aborted || check.found; + } + + /** + * Walks an expression subtree to determine whether it references a + * specific variable QName. Sets {@link #aborted} on unrecognised classes + * the walker cannot prove safe — callers should treat that as "may + * reference any variable" and refuse to optimize. + */ + private static final class RefCheck { + final QName target; + boolean found = false; + boolean aborted = false; + + RefCheck(final QName target) { this.target = target; } + + void check(final Expression expr) { + if (expr == null || aborted || found) { + return; + } + if (expr instanceof VariableReference vr) { + final QName name = vr.getName(); + if (name != null && name.equals(target)) { + found = true; + } + return; + } + if (expr instanceof LiteralValue) { + return; + } + if (expr instanceof BindingExpression be) { + check(be.getInputSequence()); + if (be instanceof AbstractFLWORClause flwor) { + check(flwor.getReturnExpression()); + } + return; + } + if (expr instanceof FilteredExpression fe) { + check(fe.getExpression()); + for (final Predicate p : fe.getPredicates()) { + check(p); + } + return; + } + if (expr instanceof LocationStep ls) { + final Predicate[] preds = ls.getPredicates(); + if (preds != null) { + for (final Predicate p : preds) { + check(p); + } + } + return; + } + if (expr instanceof WhereClause wc) { + check(wc.getWhereExpr()); + check(wc.getReturnExpression()); + return; + } + if (expr instanceof AbstractFLWORClause) { + aborted = true; + return; + } + final int count = expr.getSubExpressionCount(); + if (count == 0) { + if (!(expr instanceof LiteralValue)) { + aborted = true; + } + return; + } + for (int i = 0; i < count; i++) { + check(expr.getSubExpression(i)); + if (aborted || found) { + return; + } + } + } + } + + /** + * Decide whether this for-clause's input is loop-invariant relative to + * outer FLWOR scopes and, if so, queue a hoist on the outermost scope + * and replace the input with a reference to the synthesized variable. + * + * Conservative gates: + *

    + *
  • requires at least one outer FLWOR scope (otherwise nothing to + * hoist over);
  • + *
  • skips trivial inputs (literals, bare variable references) where + * hoisting yields no benefit;
  • + *
  • skips updating expressions (W3C XQUF — must not move side + * effects);
  • + *
  • refuses to hoist when reference-collection encountered an + * expression shape the walker cannot prove side-effect-free of + * outer-var references.
  • + *
+ */ + private void tryHoistInputSequence(final CompileContext cc) { + if (inputSequence == null + || inputSequence instanceof VariableReference + || inputSequence instanceof LiteralValue + || cc.flworChainDepth() < 2) { + return; + } + if (inputSequence.isUpdating()) { + return; + } + + final Set outerVars = cc.getOuterLoopBodyVars(); + if (outerVars.isEmpty()) { + return; + } + + final RefCollector refs = new RefCollector(); + refs.collect(inputSequence); + if (refs.aborted) { + return; + } + for (final QName name : refs.referenced) { + if (outerVars.contains(name)) { + return; + } + } + + final QName hoistedName = cc.generateHoistedVarName(); + cc.addPendingHoistToOutermost(hoistedName, inputSequence); + + final VariableReference ref = new VariableReference(context, hoistedName); + ref.setLocation(line, column); + inputSequence = ref; + } + + /** + * Walks an expression subtree to collect the QNames of in-scope variables + * it references. Sets {@link #aborted} to true if it encounters a class + * shape it cannot reliably traverse — callers must treat that as + * "may reference any var" and refuse to hoist. + * + * The subtree walk explicitly handles classes whose children are NOT + * exposed via {@code getSubExpression}: {@link BindingExpression}'s + * {@code inputSequence} / {@code returnExpr}, {@link FilteredExpression}'s + * expression and predicates, {@link LocationStep}'s predicates, and the + * {@link AbstractFLWORClause} chain. For everything else it falls back to + * {@code getSubExpression}; an unrecognized class with no advertised + * children aborts the walk. + */ + private static final class RefCollector { + final Set referenced = new HashSet<>(); + boolean aborted = false; + + void collect(final Expression expr) { + if (expr == null || aborted) { + return; + } + if (expr instanceof VariableReference vr) { + final QName name = vr.getName(); + if (name != null) { + referenced.add(name); + } + return; + } + if (expr instanceof LiteralValue) { + return; + } + if (expr instanceof BindingExpression be) { + collect(be.getInputSequence()); + if (be instanceof AbstractFLWORClause flwor) { + collect(flwor.getReturnExpression()); + } + return; + } + if (expr instanceof FilteredExpression fe) { + collect(fe.getExpression()); + for (final Predicate p : fe.getPredicates()) { + collect(p); + } + return; + } + if (expr instanceof LocationStep ls) { + final Predicate[] preds = ls.getPredicates(); + if (preds != null) { + for (final Predicate p : preds) { + collect(p); + } + } + return; + } + if (expr instanceof WhereClause wc) { + collect(wc.getWhereExpr()); + collect(wc.getReturnExpression()); + return; + } + if (expr instanceof AbstractFLWORClause flwor) { + // OrderByClause, GroupByClause, CountClause, ReturnClause-like. + // Their non-returnExpr children aren't uniformly accessible; + // bail conservatively. + aborted = true; + return; + } + final int count = expr.getSubExpressionCount(); + if (count == 0) { + // Unknown leaf with no advertised children: cannot prove + // it has no var references. Conservative bail. + if (!isKnownSafeLeaf(expr)) { + aborted = true; + } + return; + } + for (int i = 0; i < count; i++) { + collect(expr.getSubExpression(i)); + if (aborted) { + return; + } + } + } + + private static boolean isKnownSafeLeaf(final Expression expr) { + return expr instanceof LiteralValue; + } + } + /** * A "for" expression may have an optional positional variable whose * QName can be set via this method. @@ -60,6 +465,17 @@ public void setPositionalVariable(final QName variable) { positionalVariable = variable; } + /** + * XQFT 3.0 §2.3: A "for" expression may have an optional score variable + * whose QName can be set via this method. The score variable is bound to + * an xs:double value representing the relevance score for each item. + * + * @param variable the name of the score variable + */ + public void setScoreVariable(final QName variable) { + scoreVariable = variable; + } + /* (non-Javadoc) * @see org.exist.xquery.Expression#analyze(org.exist.xquery.Expression) */ @@ -70,6 +486,7 @@ public void analyze(AnalyzeContextInfo contextInfo) throws XPathException { try { contextInfo.setParent(this); final AnalyzeContextInfo varContextInfo = new AnalyzeContextInfo(contextInfo); + varContextInfo.addFlag(NON_UPDATING_CONTEXT); inputSequence.analyze(varContextInfo); // Declare the iteration variable final LocalVariable inVar = new LocalVariable(varName); @@ -83,6 +500,13 @@ public void analyze(AnalyzeContextInfo contextInfo) throws XPathException { posVar.setStaticType(Type.INTEGER); context.declareVariableBinding(posVar); } + // Declare score variable (XQFT 3.0 §2.3) + if (scoreVariable != null) { + final LocalVariable scoreVar = new LocalVariable(scoreVariable); + scoreVar.setSequenceType(new SequenceType(Type.DOUBLE, Cardinality.EXACTLY_ONE)); + scoreVar.setStaticType(Type.DOUBLE); + context.declareVariableBinding(scoreVar); + } final AnalyzeContextInfo newContextInfo = new AnalyzeContextInfo(contextInfo); newContextInfo.addFlag(SINGLE_STEP_EXECUTION); @@ -135,6 +559,15 @@ public Sequence eval(Sequence contextSequence, Item contextItem) at.setSequenceType(POSITIONAL_VAR_TYPE); context.declareVariableBinding(at); } + // Declare score variable (XQFT 3.0 §2.3) + LocalVariable score = null; + if (scoreVariable != null) { + score = new LocalVariable(scoreVariable); + score.setSequenceType(new SequenceType(Type.DOUBLE, Cardinality.EXACTLY_ONE)); + context.declareVariableBinding(score); + // Naive implementation: always bind score to 1.0 + score.setValue(new DoubleValue(this, 1.0)); + } // Assign the whole input sequence to the bound variable. // This is required if we process the "where" or "order by" clause // in one step. @@ -176,15 +609,23 @@ public Sequence eval(Sequence contextSequence, Item contextItem) // Loop through each variable binding int p = 0; - if (in.isEmpty() && allowEmpty) { - processItem(var, AtomicValue.EMPTY_VALUE, Sequence.EMPTY_SEQUENCE, resultSequence, at, p); - } else { - for (final SequenceIterator i = in.iterate(); i.hasNext(); p++) { - processItem(var, i.nextItem(), in, resultSequence, at, p); + try { + if (in.isEmpty() && allowEmpty) { + processItem(var, AtomicValue.EMPTY_VALUE, Sequence.EMPTY_SEQUENCE, resultSequence, at, p); + } else { + for (final SequenceIterator i = in.iterate(); i.hasNext() && !WhileClause.isTerminated(); p++) { + processItem(var, i.nextItem(), in, resultSequence, at, p); + } } + } catch (final WhileClause.WhileTerminationException e) { + // while clause signaled end of iteration for this for loop + } + // clear terminated flag if this is the outermost for + if (isOuterFor && WhileClause.isTerminated()) { + WhileClause.clearTerminated(); } } finally { - // restore the local variable stack + // restore the local variable stack context.popLocalVariables(mark, resultSequence); } @@ -238,6 +679,8 @@ private boolean callPostEval() { case ORDERBY: case GROUPBY: return true; + default: + break; } prev = prev.getPreviousClause(); } @@ -264,6 +707,8 @@ public void dump(ExpressionDumper dumper) { } if (positionalVariable != null) {dumper.display(" at ").display(positionalVariable);} + if (scoreVariable != null) + {dumper.display(" score ").display(scoreVariable);} dumper.display(" in "); inputSequence.dump(dumper); dumper.endIndent().nl(); @@ -290,6 +735,9 @@ public String toString() { if (positionalVariable != null) { result.append(" at ").append(positionalVariable); } + if (scoreVariable != null) { + result.append(" score ").append(scoreVariable); + } result.append(" in "); result.append(inputSequence.toString()); result.append(" "); @@ -313,6 +761,9 @@ public Set getTupleStreamVariables() { if (positionalVariable != null) { variables.add(positionalVariable); } + if (scoreVariable != null) { + variables.add(scoreVariable); + } final QName variable = getVariable(); if (variable != null) { diff --git a/exist-core/src/main/java/org/exist/xquery/ForKeyValueExpr.java b/exist-core/src/main/java/org/exist/xquery/ForKeyValueExpr.java new file mode 100644 index 00000000000..e2956b36d3b --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/ForKeyValueExpr.java @@ -0,0 +1,308 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery; + +import org.exist.dom.QName; +import org.exist.xquery.util.ExpressionDumper; +import org.exist.xquery.functions.map.AbstractMapType; +import org.exist.xquery.value.*; + +import java.util.HashSet; +import java.util.Set; + +/** + * Implements the XQuery 4.0 "for key", "for value", and "for key/value" clauses. + * + *

{@code for key $k in map-expr} iterates over the keys of a map.

+ *

{@code for value $v in map-expr} iterates over the values of a map.

+ *

{@code for key $k value $v in map-expr} iterates over key-value pairs.

+ */ +public class ForKeyValueExpr extends BindingExpression { + + private final ClauseType clauseType; + private QName positionalVariable = null; + private QName valueVariable = null; + private SequenceType valueSequenceType = null; + + public ForKeyValueExpr(final XQueryContext context, final ClauseType clauseType) { + super(context); + this.clauseType = clauseType; + } + + public void setPositionalVariable(final QName variable) { + positionalVariable = variable; + } + + public void setValueVariable(final QName variable) { + valueVariable = variable; + } + + public void setValueSequenceType(final SequenceType type) { + valueSequenceType = type; + } + + @Override + public ClauseType getType() { + return clauseType; + } + + @Override + public void analyze(final AnalyzeContextInfo contextInfo) throws XPathException { + super.analyze(contextInfo); + final LocalVariable mark = context.markLocalVariables(false); + try { + contextInfo.setParent(this); + final AnalyzeContextInfo varContextInfo = new AnalyzeContextInfo(contextInfo); + inputSequence.analyze(varContextInfo); + final LocalVariable inVar = new LocalVariable(varName); + inVar.setSequenceType(sequenceType); + inVar.setStaticType(Type.ITEM); + context.declareVariableBinding(inVar); + if (valueVariable != null) { + final LocalVariable valVar = new LocalVariable(valueVariable); + valVar.setSequenceType(valueSequenceType); + valVar.setStaticType(Type.ITEM); + context.declareVariableBinding(valVar); + } + if (positionalVariable != null) { + final LocalVariable posVar = new LocalVariable(positionalVariable); + posVar.setSequenceType(POSITIONAL_VAR_TYPE); + posVar.setStaticType(Type.INTEGER); + context.declareVariableBinding(posVar); + } + + final AnalyzeContextInfo newContextInfo = new AnalyzeContextInfo(contextInfo); + newContextInfo.addFlag(SINGLE_STEP_EXECUTION); + returnExpr.analyze(newContextInfo); + } finally { + context.popLocalVariables(mark); + } + } + + @Override + public Sequence eval(Sequence contextSequence, final Item contextItem) + throws XPathException { + if (context.getProfiler().isEnabled()) { + context.getProfiler().start(this); + context.getProfiler().message(this, Profiler.DEPENDENCIES, + "DEPENDENCIES", Dependency.getDependenciesName(this.getDependencies())); + if (contextSequence != null) { + context.getProfiler().message(this, Profiler.START_SEQUENCES, + "CONTEXT SEQUENCE", contextSequence); + } + } + context.expressionStart(this); + + final LocalVariable mark = context.markLocalVariables(false); + final Sequence resultSequence = new ValueSequence(unordered); + try { + final Sequence in = inputSequence.eval(contextSequence, null); + + if (in.isEmpty()) { + // Empty map produces no iterations + } else if (in.getItemCount() != 1 || !(in.itemAt(0) instanceof AbstractMapType)) { + throw new XPathException(this, ErrorCodes.XPTY0004, + "for " + clauseLabel() + + " expression requires a single map, got " + + Type.getTypeName(in.getItemType())); + } else { + final AbstractMapType map = (AbstractMapType) in.itemAt(0); + final LocalVariable var = createVariable(varName); + var.setSequenceType(sequenceType); + context.declareVariableBinding(var); + + LocalVariable valVar = null; + if (valueVariable != null) { + valVar = new LocalVariable(valueVariable); + valVar.setSequenceType(valueSequenceType); + context.declareVariableBinding(valVar); + } + + LocalVariable at = null; + if (positionalVariable != null) { + at = new LocalVariable(positionalVariable); + at.setSequenceType(POSITIONAL_VAR_TYPE); + context.declareVariableBinding(at); + } + + final Sequence keys = map.keys(); + int pos = 0; + try { + for (final SequenceIterator i = keys.iterate(); i.hasNext() && !WhileClause.isTerminated(); ) { + context.proceed(this); + final AtomicValue key = (AtomicValue) i.nextItem(); + pos++; + + final Sequence bindValue; + if (clauseType == ClauseType.FOR_VALUE) { + bindValue = map.get(key); + } else { + // FOR_KEY or FOR_KEY_VALUE: primary var is key + bindValue = key; + } + var.setValue(bindValue); + + if (valVar != null) { + valVar.setValue(map.get(key)); + } + + if (positionalVariable != null) { + at.setValue(new IntegerValue(this, pos)); + } + if (sequenceType != null) { + var.checkType(); + } + if (valVar != null && valueSequenceType != null) { + valVar.checkType(); + } + + final Sequence returnResult; + if (returnExpr instanceof OrderByClause) { + returnResult = returnExpr.eval(bindValue, null); + } else { + returnResult = returnExpr.eval(null, null); + } + resultSequence.addAll(returnResult); + var.destroy(context, resultSequence); + } + } catch (final WhileClause.WhileTerminationException e) { + // while clause signaled end of iteration + } + if (getPreviousClause() == null && WhileClause.isTerminated()) { + WhileClause.clearTerminated(); + } + } + } finally { + context.popLocalVariables(mark, resultSequence); + } + + if (callPostEval()) { + final Sequence postResult = postEval(resultSequence); + context.expressionEnd(this); + if (context.getProfiler().isEnabled()) { + context.getProfiler().end(this, "", postResult); + } + return postResult; + } + + context.expressionEnd(this); + if (context.getProfiler().isEnabled()) { + context.getProfiler().end(this, "", resultSequence); + } + return resultSequence; + } + + private String clauseLabel() { + switch (clauseType) { + case FOR_KEY: return "key"; + case FOR_VALUE: return "value"; + case FOR_KEY_VALUE: return "key/value"; + default: return "key"; + } + } + + private boolean callPostEval() { + FLWORClause prev = getPreviousClause(); + while (prev != null) { + switch (prev.getType()) { + case LET: + case FOR: + case FOR_MEMBER: + case FOR_KEY: + case FOR_VALUE: + case FOR_KEY_VALUE: + return false; + case ORDERBY: + case GROUPBY: + return true; + default: + break; + } + prev = prev.getPreviousClause(); + } + return true; + } + + @Override + public void dump(final ExpressionDumper dumper) { + dumper.display("for " + clauseLabel() + " ", line); + dumper.startIndent(); + dumper.display("$").display(varName); + if (valueVariable != null) { + dumper.display(" value $").display(valueVariable); + } + if (sequenceType != null) { + dumper.display(" as ").display(sequenceType); + } + dumper.display(" in "); + inputSequence.dump(dumper); + dumper.endIndent().nl(); + if (returnExpr instanceof LetExpr) { + dumper.display(" ", returnExpr.getLine()); + } else { + dumper.display("return", returnExpr.getLine()); + } + dumper.startIndent(); + returnExpr.dump(dumper); + dumper.endIndent().nl(); + } + + @Override + public String toString() { + final StringBuilder result = new StringBuilder(); + result.append("for ").append(clauseLabel()).append(" "); + result.append("$").append(varName); + if (valueVariable != null) { + result.append(" value $").append(valueVariable); + } + if (sequenceType != null) { + result.append(" as ").append(sequenceType); + } + result.append(" in "); + result.append(inputSequence.toString()); + result.append(" "); + if (returnExpr instanceof LetExpr) { + result.append(" "); + } else { + result.append("return "); + } + result.append(returnExpr.toString()); + return result.toString(); + } + + @Override + public Set getTupleStreamVariables() { + final Set variables = new HashSet<>(); + final QName variable = getVariable(); + if (variable != null) { + variables.add(variable); + } + if (valueVariable != null) { + variables.add(valueVariable); + } + final LocalVariable startVar = getStartVariable(); + if (startVar != null) { + variables.add(startVar.getQName()); + } + return variables; + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/ForMemberExpr.java b/exist-core/src/main/java/org/exist/xquery/ForMemberExpr.java new file mode 100644 index 00000000000..522cb213331 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/ForMemberExpr.java @@ -0,0 +1,239 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery; + +import org.exist.dom.QName; +import org.exist.xquery.util.ExpressionDumper; +import org.exist.xquery.functions.array.ArrayType; +import org.exist.xquery.value.*; + +import java.util.HashSet; +import java.util.Set; + +/** + * Implements the XQuery 4.0 "for member" clause in FLWOR expressions. + * + *

{@code for member $m in $array-expr} iterates over the members of an array, + * binding each member (which is a sequence) to the variable.

+ */ +public class ForMemberExpr extends BindingExpression { + + private QName positionalVariable = null; + + public ForMemberExpr(final XQueryContext context) { + super(context); + } + + public void setPositionalVariable(final QName variable) { + positionalVariable = variable; + } + + @Override + public ClauseType getType() { + return ClauseType.FOR_MEMBER; + } + + @Override + public void analyze(final AnalyzeContextInfo contextInfo) throws XPathException { + super.analyze(contextInfo); + final LocalVariable mark = context.markLocalVariables(false); + try { + contextInfo.setParent(this); + final AnalyzeContextInfo varContextInfo = new AnalyzeContextInfo(contextInfo); + inputSequence.analyze(varContextInfo); + final LocalVariable inVar = new LocalVariable(varName); + inVar.setSequenceType(sequenceType); + inVar.setStaticType(Type.ITEM); + context.declareVariableBinding(inVar); + if (positionalVariable != null) { + final LocalVariable posVar = new LocalVariable(positionalVariable); + posVar.setSequenceType(POSITIONAL_VAR_TYPE); + posVar.setStaticType(Type.INTEGER); + context.declareVariableBinding(posVar); + } + + final AnalyzeContextInfo newContextInfo = new AnalyzeContextInfo(contextInfo); + newContextInfo.addFlag(SINGLE_STEP_EXECUTION); + returnExpr.analyze(newContextInfo); + } finally { + context.popLocalVariables(mark); + } + } + + @Override + public Sequence eval(Sequence contextSequence, final Item contextItem) + throws XPathException { + if (context.getProfiler().isEnabled()) { + context.getProfiler().start(this); + context.getProfiler().message(this, Profiler.DEPENDENCIES, + "DEPENDENCIES", Dependency.getDependenciesName(this.getDependencies())); + if (contextSequence != null) { + context.getProfiler().message(this, Profiler.START_SEQUENCES, + "CONTEXT SEQUENCE", contextSequence); + } + } + context.expressionStart(this); + + final LocalVariable mark = context.markLocalVariables(false); + final Sequence resultSequence = new ValueSequence(unordered); + try { + final Sequence in = inputSequence.eval(contextSequence, null); + + if (!(in instanceof ArrayType)) { + throw new XPathException(this, ErrorCodes.XPTY0004, + "for member expression requires an array, got " + + Type.getTypeName(in.getItemType())); + } + + final ArrayType array = (ArrayType) in; + final LocalVariable var = createVariable(varName); + var.setSequenceType(sequenceType); + context.declareVariableBinding(var); + + LocalVariable at = null; + if (positionalVariable != null) { + at = new LocalVariable(positionalVariable); + at.setSequenceType(POSITIONAL_VAR_TYPE); + context.declareVariableBinding(at); + } + + try { + for (int i = 0; i < array.getSize() && !WhileClause.isTerminated(); i++) { + context.proceed(this); + final Sequence member = array.get(i); + var.setValue(member); + if (positionalVariable != null) { + at.setValue(new IntegerValue(this, i + 1)); + } + if (sequenceType == null) { + var.checkType(); + } + + final Sequence returnResult; + if (returnExpr instanceof OrderByClause) { + returnResult = returnExpr.eval(member, null); + } else { + returnResult = returnExpr.eval(null, null); + } + resultSequence.addAll(returnResult); + var.destroy(context, resultSequence); + } + } catch (final WhileClause.WhileTerminationException e) { + // while clause signaled end of iteration + } + if (getPreviousClause() == null && WhileClause.isTerminated()) { + WhileClause.clearTerminated(); + } + } finally { + context.popLocalVariables(mark, resultSequence); + } + + if (callPostEval()) { + final Sequence postResult = postEval(resultSequence); + context.expressionEnd(this); + if (context.getProfiler().isEnabled()) { + context.getProfiler().end(this, "", postResult); + } + return postResult; + } + + context.expressionEnd(this); + if (context.getProfiler().isEnabled()) { + context.getProfiler().end(this, "", resultSequence); + } + return resultSequence; + } + + private boolean callPostEval() { + FLWORClause prev = getPreviousClause(); + while (prev != null) { + switch (prev.getType()) { + case LET: + case FOR: + case FOR_MEMBER: + return false; + case ORDERBY: + case GROUPBY: + return true; + default: + break; + } + prev = prev.getPreviousClause(); + } + return true; + } + + @Override + public void dump(final ExpressionDumper dumper) { + dumper.display("for member ", line); + dumper.startIndent(); + dumper.display("$").display(varName); + if (sequenceType != null) { + dumper.display(" as ").display(sequenceType); + } + dumper.display(" in "); + inputSequence.dump(dumper); + dumper.endIndent().nl(); + if (returnExpr instanceof LetExpr) { + dumper.display(" ", returnExpr.getLine()); + } else { + dumper.display("return", returnExpr.getLine()); + } + dumper.startIndent(); + returnExpr.dump(dumper); + dumper.endIndent().nl(); + } + + @Override + public String toString() { + final StringBuilder result = new StringBuilder(); + result.append("for member "); + result.append("$").append(varName); + if (sequenceType != null) { + result.append(" as ").append(sequenceType); + } + result.append(" in "); + result.append(inputSequence.toString()); + result.append(" "); + if (returnExpr instanceof LetExpr) { + result.append(" "); + } else { + result.append("return "); + } + result.append(returnExpr.toString()); + return result.toString(); + } + + @Override + public Set getTupleStreamVariables() { + final Set variables = new HashSet<>(); + final QName variable = getVariable(); + if (variable != null) { + variables.add(variable); + } + final LocalVariable startVar = getStartVariable(); + if (startVar != null) { + variables.add(startVar.getQName()); + } + return variables; + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/Function.java b/exist-core/src/main/java/org/exist/xquery/Function.java index 161cba2957b..4b490c81b9e 100644 --- a/exist-core/src/main/java/org/exist/xquery/Function.java +++ b/exist-core/src/main/java/org/exist/xquery/Function.java @@ -212,10 +212,29 @@ public void setParent(final Expression parent) { * @throws XPathException if an error occurs setting the arguments */ public void setArguments(final List arguments) throws XPathException { - if ((!mySignature.isVariadic()) && arguments.size() != mySignature.getArgumentCount()) { - throw new XPathException(this, ErrorCodes.XPST0017, - "Number of arguments of function " + getName() + " doesn't match function signature (expected " - + mySignature.getArgumentCount() + ", got " + arguments.size() + ')'); + final int argCount = mySignature.getArgumentCount(); + if ((!mySignature.isVariadic()) && arguments.size() != argCount) { + // XQ4: Allow fewer arguments if trailing params have default values + if (arguments.size() < argCount) { + boolean hasDefaults = true; + final SequenceType[] argTypes = mySignature.getArgumentTypes(); + for (int i = arguments.size(); i < argCount; i++) { + if (!(argTypes[i] instanceof FunctionParameterSequenceType) || + !((FunctionParameterSequenceType) argTypes[i]).hasDefaultValue()) { + hasDefaults = false; + break; + } + } + if (!hasDefaults) { + throw new XPathException(this, ErrorCodes.XPST0017, + "Number of arguments of function " + getName() + " doesn't match function signature (expected " + + argCount + ", got " + arguments.size() + ')'); + } + } else { + throw new XPathException(this, ErrorCodes.XPST0017, + "Number of arguments of function " + getName() + " doesn't match function signature (expected " + + argCount + ", got " + arguments.size() + ')'); + } } steps.clear(); @@ -305,7 +324,9 @@ private Expression checkArgumentType( if (returnType != Type.ITEM && !Type.subTypeOf(returnType, argType.getPrimaryType())) { if (!(Type.subTypeOf(argType.getPrimaryType(), returnType) || //Because () is seen as a node - (argType.getCardinality().isSuperCardinalityOrEqualOf(Cardinality.EMPTY_SEQUENCE) && returnType == Type.NODE))) { + (argType.getCardinality().isSuperCardinalityOrEqualOf(Cardinality.EMPTY_SEQUENCE) && returnType == Type.NODE) || + // XQuery 4.0: allow implicit casts and relabeling + (context.getXQueryVersion() >= 40 && (DynamicTypeCheck.isXQ4ImplicitCast(returnType, argType.getPrimaryType()) || DynamicTypeCheck.isXQ4Relabeling(returnType, argType.getPrimaryType()))))) { LOG.debug(ExpressionDumper.dump(argument)); throw new XPathException(this, ErrorCodes.XPTY0004, Messages.getMessage(Error.FUNC_PARAM_TYPE_STATIC, String.valueOf(argPosition), mySignature, argType.toString(), Type.getTypeName(returnType))); @@ -422,6 +443,7 @@ public void analyze(final AnalyzeContextInfo contextInfo) throws XPathException if (arg != null) { // call analyze for each argument final AnalyzeContextInfo argContextInfo = new AnalyzeContextInfo(contextInfo); + argContextInfo.addFlag(NON_UPDATING_CONTEXT); arg.analyze(argContextInfo); if (!argumentsChecked) { diff --git a/exist-core/src/main/java/org/exist/xquery/FunctionCall.java b/exist-core/src/main/java/org/exist/xquery/FunctionCall.java index 466739d798f..df0656bb0a1 100644 --- a/exist-core/src/main/java/org/exist/xquery/FunctionCall.java +++ b/exist-core/src/main/java/org/exist/xquery/FunctionCall.java @@ -119,6 +119,12 @@ public void analyze(final AnalyzeContextInfo contextInfo) throws XPathException // check that FunctionCall#resolveForwardReference(UserDefinedFunction) has been called first! if (functionDef != null) { + // XUST0001: calling an updating function in a non-updating context + if (functionDef.getSignature().isUpdating() && contextInfo.hasFlag(NON_UPDATING_CONTEXT)) { + throw new XPathException(this, ErrorCodes.XUST0001, + "call to updating function " + functionDef.getSignature().getName() + + " is not allowed in a non-updating context"); + } final AnalyzeContextInfo newContextInfo = new AnalyzeContextInfo(contextInfo); newContextInfo.setParent(this); newContextInfo.removeFlag(IN_NODE_CONSTRUCTOR); @@ -451,6 +457,11 @@ protected void setRecursive(boolean recursive) { this.recursive = recursive; } + @Override + public boolean isUpdating() { + return functionDef != null && functionDef.getSignature().isUpdating(); + } + public boolean isRecursive(){ return recursive; } diff --git a/exist-core/src/main/java/org/exist/xquery/FunctionFactory.java b/exist-core/src/main/java/org/exist/xquery/FunctionFactory.java index adcf7d3d5cb..8002853aaed 100644 --- a/exist-core/src/main/java/org/exist/xquery/FunctionFactory.java +++ b/exist-core/src/main/java/org/exist/xquery/FunctionFactory.java @@ -23,6 +23,7 @@ import java.util.ArrayList; import java.util.List; +import java.util.Set; import org.exist.Namespaces; import org.exist.dom.QName; @@ -47,6 +48,17 @@ public class FunctionFactory { public static final String PROPERTY_DISABLE_DEPRECATED_FUNCTIONS = "xquery.disable-deprecated-functions"; public static final boolean DISABLE_DEPRECATED_FUNCTIONS_BY_DEFAULT = false; + /** + * Reserved function names per XQuery 3.1/4.0 spec. + * These names must not be used as unprefixed function calls (XPST0003). + */ + private static final Set RESERVED_FUNCTION_NAMES = Set.of( + "array", "attribute", "comment", "document-node", "element", + "function", "if", "item", "map", "namespace-node", "node", + "processing-instruction", "schema-attribute", "schema-element", + "switch", "text", "typeswitch" + ); + public static Expression createFunction(XQueryContext context, XQueryAST ast, PathExpr parent, List params) throws XPathException { QName qname = null; try { @@ -54,6 +66,29 @@ public static Expression createFunction(XQueryContext context, XQueryAST ast, Pa } catch(final QName.IllegalQNameException xpe) { throw new XPathException(ast, ErrorCodes.XPST0081, "Invalid qname " + ast.getText() + ". " + xpe.getMessage()); } + // Check for reserved function names — unprefixed reserved names cannot be + // used as function calls (XPST0003). Prefixed names like fn:item() are not + // subject to the reserved name restriction (they just won't be found → XPST0017). + final String rawName = ast.getText(); + if (rawName != null && !rawName.contains(":") && !rawName.contains("{")) { + final String local = qname.getLocalPart(); + if (RESERVED_FUNCTION_NAMES.contains(local)) { + throw new XPathException(ast.getLine(), ast.getColumn(), ErrorCodes.XPST0003, + "'" + local + "' is a reserved function name and cannot be used as a function call"); + } + } + + // XQ4 (PR2200): for unprefixed function calls, check if there's a + // no-namespace user-defined function that should override fn: + if (context.getXQueryVersion() >= 40 + && !ast.getText().contains(":") + && Namespaces.XPATH_FUNCTIONS_NS.equals(qname.getNamespaceURI())) { + final QName noNsName = new QName(ast.getText(), ""); + final UserDefinedFunction noNsFunc = context.resolveFunction(noNsName, params.size()); + if (noNsFunc != null) { + qname = noNsName; + } + } return createFunction(context, qname, ast, parent, params); } @@ -85,7 +120,8 @@ public static Expression createFunction(XQueryContext context, QName qname, XQue final String local = qname.getLocalPart(); final String uri = qname.getNamespaceURI(); Expression step = null; - if (optimizeStrFuncs && (Namespaces.XPATH_FUNCTIONS_NS.equals(uri) || Namespaces.XSL_NS.equals(uri))) { + final boolean allPathExprParams = params.stream().allMatch(p -> p instanceof PathExpr); + if (optimizeStrFuncs && allPathExprParams && (Namespaces.XPATH_FUNCTIONS_NS.equals(uri) || Namespaces.XSL_NS.equals(uri))) { if("starts-with".equals(local)) { step = startsWith(context, ast, parent, params); } else if("ends-with".equals(local)) { @@ -240,12 +276,25 @@ private static GeneralComparison equals(XQueryContext context, XQueryAST ast, private static CastExpression castExpression(XQueryContext context, XQueryAST ast, List params, QName qname) throws XPathException { - if (params.size() != 1) { + final Expression arg; + if (params.size() == 1) { + arg = params.getFirst(); + } else if (params.isEmpty() && context.getXQueryVersion() >= 31) { + // XQ4 focus constructor: xs:type() uses context item as argument + arg = new ContextItemExpression(context); + ((ContextItemExpression) arg).setLocation(ast.getLine(), ast.getColumn()); + } else { throw new XPathException(ast.getLine(), ast.getColumn(), ErrorCodes.XPST0017, "Wrong number of arguments for constructor function"); } - final Expression arg = params.getFirst(); - final int code = Type.getType(qname); + final int code; + try { + code = Type.getType(qname); + } catch (final XPathException e) { + // Unknown type name in xs: namespace → XPST0017 (no such function) + throw new XPathException(ast.getLine(), ast.getColumn(), + ErrorCodes.XPST0017, "Unknown constructor function: " + qname.getStringValue()); + } final CastExpression castExpr = new CastExpression(context, arg, code, Cardinality.ZERO_OR_ONE); castExpr.setLocation(ast.getLine(), ast.getColumn()); return castExpr; @@ -305,10 +354,34 @@ private static Function functionCall(final XQueryContext context, * @param throwOnNotFound true to throw an XPST0017 if the functions is not found, false to just return null */ private static @Nullable Function getInternalModuleFunction(final XQueryContext context, - final XQueryAST ast, final List params, QName qname, Module module, + final XQueryAST ast, List params, QName qname, Module module, final boolean throwOnNotFound) throws XPathException { //For internal modules: create a new function instance from the class - FunctionDef def = ((InternalModule) module).getFunctionDef(qname, params.size()); + final boolean hasKeywordArgs = hasKeywordArguments(params); + FunctionDef def = null; + + // When keyword args are present, skip the initial arity-based lookup because + // params.size() may not match the correct overload. Instead, resolve keyword + // args against all signatures (largest arity first) to find the right one. + if (hasKeywordArgs) { + final List funcs = ((InternalModule) module).getFunctionsByName(qname); + // Sort by arity descending — keyword args typically target the largest overload + funcs.sort((a, b) -> b.getArgumentCount() - a.getArgumentCount()); + for (final FunctionSignature sig : funcs) { + final List resolved = resolveKeywordArguments(context, params, sig, ast); + if (resolved != null) { + def = ((InternalModule) module).getFunctionDef(qname, sig.getArgumentCount()); + if (def != null) { + params = resolved; + break; + } + } + } + } + + if (def == null && !hasKeywordArgs) { + def = ((InternalModule) module).getFunctionDef(qname, params.size()); + } //TODO: rethink: xsl namespace function should search xpath one too if (def == null && Namespaces.XSL_NS.equals(qname.getNamespaceURI())) { //Search xpath namespace @@ -360,7 +433,12 @@ private static Function functionCall(final XQueryContext context, "Access to deprecated functions is not allowed. Call to '" + qname.getStringValue() + "()' denied. " + def.getSignature().getDeprecated()); } final Function fn = Function.createFunction(context, ast, module, def); - fn.setArguments(params); + if (hasKeywordArgs) { + final List resolved = resolveKeywordArguments(context, params, def.getSignature(), ast); + fn.setArguments(resolved != null ? resolved : params); + } else { + fn.setArguments(params); + } fn.setASTNode(ast); return new InternalFunctionCall(fn); } @@ -370,11 +448,36 @@ private static Function functionCall(final XQueryContext context, */ private static FunctionCall getUserDefinedFunction(XQueryContext context, XQueryAST ast, List params, QName qname) throws XPathException { final FunctionCall fc; - final UserDefinedFunction func = context.resolveFunction(qname, params.size()); + final boolean hasKeywordArgs = hasKeywordArguments(params); + + // Count positional arguments to determine resolution arity + int positionalCount = params.size(); + if (hasKeywordArgs) { + positionalCount = 0; + for (final Expression param : params) { + if (param instanceof KeywordArgumentExpression) { + break; + } + positionalCount++; + } + } + + UserDefinedFunction func = context.resolveFunction(qname, params.size()); + + // If keyword args and no exact match, try resolving with positional count + if (func == null && hasKeywordArgs && positionalCount != params.size()) { + func = context.resolveFunction(qname, positionalCount); + } + if (func != null) { fc = new FunctionCall(context, func); fc.setLocation(ast.getLine(), ast.getColumn()); - fc.setArguments(params); + if (hasKeywordArgs) { + final List resolved = resolveKeywordArguments(context, params, func.getSignature(), ast); + fc.setArguments(resolved != null ? resolved : params); + } else { + fc.setArguments(params); + } } else { //Create a forward reference which will be resolved later fc = new FunctionCall(context, qname, params); @@ -470,16 +573,133 @@ public static FunctionCall wrap(XQueryContext context, Function call) throws XPa newSignature.setArgumentTypes(newParamArray); final UserDefinedFunction func = new UserDefinedFunction(context, newSignature); + func.setPassContextToBody(true); for (final QName varName: variables) { func.addVariable(varName); } - + call.setArguments(innerArgs); - + func.setFunctionBody(call); final FunctionCall wrappedCall = new FunctionCall(context, func); wrappedCall.setArguments(wrapperArgs); return wrappedCall; } + + /** + * Check if any parameter is a keyword argument. + */ + private static boolean hasKeywordArguments(final List params) { + for (final Expression param : params) { + if (param instanceof KeywordArgumentExpression) { + return true; + } + } + return false; + } + + /** + * Resolve keyword arguments to positional arguments using the function signature. + * + * Keyword arguments (name := value) are matched to the corresponding parameter + * position in the function signature. Positional arguments must come before + * keyword arguments. Gaps between positional and keyword arguments are filled + * with empty sequence expressions for optional parameters. Returns null if + * resolution fails. + */ + private static @Nullable List resolveKeywordArguments( + final XQueryContext context, + final List params, final FunctionSignature signature, + final XQueryAST ast) throws XPathException { + final SequenceType[] argTypes = signature.getArgumentTypes(); + if (argTypes == null) { + return null; + } + + // Find where keyword arguments start + int firstKeyword = -1; + for (int i = 0; i < params.size(); i++) { + if (params.get(i) instanceof KeywordArgumentExpression) { + firstKeyword = i; + break; + } + } + if (firstKeyword < 0) { + return params; // no keyword args + } + + // Build the resolved argument list + final List resolved = new ArrayList<>(argTypes.length); + + // Copy positional arguments + for (int i = 0; i < firstKeyword; i++) { + resolved.add(params.get(i)); + } + + // Fill remaining positions with nulls (to be filled by keyword args) + for (int i = firstKeyword; i < argTypes.length; i++) { + resolved.add(null); + } + + // Match keyword arguments to parameter positions + for (int i = firstKeyword; i < params.size(); i++) { + final Expression param = params.get(i); + if (!(param instanceof KeywordArgumentExpression)) { + throw new XPathException(ast.getLine(), ast.getColumn(), + ErrorCodes.XPST0003, + "Positional arguments must not follow keyword arguments"); + } + final KeywordArgumentExpression kwArg = (KeywordArgumentExpression) param; + final String kwName = kwArg.getKeywordName(); + + // Find matching parameter by name + int matchPos = -1; + for (int j = firstKeyword; j < argTypes.length; j++) { + if (argTypes[j] instanceof org.exist.xquery.value.FunctionParameterSequenceType) { + final String paramName = ((org.exist.xquery.value.FunctionParameterSequenceType) argTypes[j]) + .getAttributeName(); + if (kwName.equals(paramName)) { + matchPos = j; + break; + } + } + } + + if (matchPos < 0) { + return null; // no matching parameter found — signature mismatch + } + if (resolved.get(matchPos) != null) { + throw new XPathException(ast.getLine(), ast.getColumn(), + ErrorCodes.XPST0003, + "Duplicate keyword argument: " + kwName); + } + resolved.set(matchPos, kwArg.getArgument()); + } + + // Fill gaps: for parameters that allow empty sequences or have defaults, + // supply an empty sequence expression. This enables keyword arguments to + // skip optional positional parameters in overloaded built-in functions. + for (int i = 0; i < resolved.size(); i++) { + if (resolved.get(i) == null) { + if (argTypes[i] instanceof org.exist.xquery.value.FunctionParameterSequenceType) { + final org.exist.xquery.value.FunctionParameterSequenceType pst = + (org.exist.xquery.value.FunctionParameterSequenceType) argTypes[i]; + if (pst.hasDefaultValue()) { + resolved.set(i, pst.getDefaultValue()); + } else if (pst.getCardinality().isSuperCardinalityOrEqualOf( + org.exist.xquery.Cardinality.EMPTY_SEQUENCE)) { + // Parameter allows empty — fill with empty sequence + resolved.set(i, new PathExpr(context)); + } else { + return null; // required parameter missing + } + } else { + return null; // can't determine if parameter is optional + } + } + } + + return resolved; + } } diff --git a/exist-core/src/main/java/org/exist/xquery/FunctionSignature.java b/exist-core/src/main/java/org/exist/xquery/FunctionSignature.java index 44202735804..c220e1d5ec8 100644 --- a/exist-core/src/main/java/org/exist/xquery/FunctionSignature.java +++ b/exist-core/src/main/java/org/exist/xquery/FunctionSignature.java @@ -59,6 +59,7 @@ public class FunctionSignature { private SequenceType[] arguments; private SequenceType returnType; private boolean isVariadic; + private boolean isUpdating; private String description; private String deprecated = null; private Map metadata = null; @@ -69,6 +70,7 @@ public FunctionSignature(final FunctionSignature other) { this.returnType = other.returnType; this.annotations = other.annotations != null ? Arrays.copyOf(other.annotations, other.annotations.length) : null; this.isVariadic = other.isVariadic; + this.isUpdating = other.isUpdating; this.deprecated = other.deprecated; this.description = other.description; this.metadata = other.metadata != null ? new HashMap<>(other.metadata) : null; @@ -129,6 +131,14 @@ public QName getName() { return name; } + public boolean isUpdating() { + return isUpdating; + } + + public void setUpdating(final boolean updating) { + this.isUpdating = updating; + } + public int getArgumentCount() { if (isVariadic) { return -1; diff --git a/exist-core/src/main/java/org/exist/xquery/GeneralComparison.java b/exist-core/src/main/java/org/exist/xquery/GeneralComparison.java index b705fe3ef3d..b3dbadbe327 100644 --- a/exist-core/src/main/java/org/exist/xquery/GeneralComparison.java +++ b/exist-core/src/main/java/org/exist/xquery/GeneralComparison.java @@ -128,17 +128,17 @@ public GeneralComparison( XQueryContext context, Expression left, Expression rig this.relation = relation; this.truncation = truncation; - if( ( left instanceof PathExpr ) && ( ( ( PathExpr )left ).getLength() == 1 ) ) { + if( isSimplifiablePathExpr( left ) ) { left = ( ( PathExpr )left ).getExpression( 0 ); didLeftSimplification = true; } - add( left ); + addOperand( left ); - if( ( right instanceof PathExpr ) && ( ( ( PathExpr )right ).getLength() == 1 ) ) { + if( isSimplifiablePathExpr( right ) ) { right = ( ( PathExpr )right ).getExpression( 0 ); didRightSimplification = true; } - add( right ); + addOperand( right ); //TODO : should we also use simplify() here ? -pb if( didLeftSimplification ) { @@ -150,6 +150,49 @@ public GeneralComparison( XQueryContext context, Expression left, Expression rig } } + /** + * Check if an expression is a plain PathExpr container that can be safely unwrapped. + * Function, BinaryOp, and other PathExpr subclasses that use steps for their own + * purposes must NOT be unwrapped — doing so would replace the expression with its + * operands/arguments. + */ + private static boolean isSimplifiablePathExpr( final Expression expr ) { + return expr instanceof PathExpr + && expr.getClass() == PathExpr.class + && ( ( PathExpr )expr ).getLength() == 1; + } + + /** + * Add an operand expression using the Expression overload (not PathExpr) + * to prevent flattening of Function/BinaryOp subclasses of PathExpr. + */ + private void addOperand( final Expression expr ) { + steps.add( expr ); + } + + @Override + public Expression optimize( final CompileContext cc ) throws XPathException + { + // Recurse into left/right via PathExpr.optimize (sub-expressions live in steps[]). + super.optimize( cc ); + + // Both operands are literal atomic values with no dependencies — fold to the result. + // Skipped if either side has a function call, variable reference, or context dependency. + final Expression left = getLeft(); + final Expression right = getRight(); + if ( left instanceof LiteralValue && right instanceof LiteralValue + && left.getDependencies() == Dependency.NO_DEPENDENCY + && right.getDependencies() == Dependency.NO_DEPENDENCY ) { + try { + return cc.preEval( this ); + } catch ( final XPathException e ) { + // Fall through if pre-evaluation raises an error (e.g. type mismatch + // that should be surfaced at runtime, not at compile time). + } + } + return this; + } + /* (non-Javadoc) * @see org.exist.xquery.BinaryOp#analyze(org.exist.xquery.AnalyzeContextInfo) */ @@ -240,6 +283,17 @@ public void visitCastExpr( CastExpression expression ) } } } + + // Log optimization decisions + if (LOG.isDebugEnabled()) { + if (optimizeSelf || optimizeChild) { + LOG.debug("Optimizer: {} can use index optimization on {} (self={}, child={}, qname={})", + ExpressionDumper.dump(this), contextQName, optimizeSelf, optimizeChild, contextQName); + } else if (!steps.isEmpty()) { + LOG.debug("Optimizer: {} skipped index optimization — no suitable index path found", + ExpressionDumper.dump(this)); + } + } } @Override @@ -1080,9 +1134,20 @@ private AtomicValue convertForValueComparison(final AtomicValue value, final int } /* - * d. Otherwise, a type error is raised [err:XPTY0004]. + * d. (XQuery 4.0) If each operand is an instance of one of the types + * xs:hexBinary or xs:base64Binary, then both operands are cast to + * type xs:base64Binary. + */ + if ((thisType == Type.HEX_BINARY || thisType == Type.BASE64_BINARY) + && (otherType == Type.HEX_BINARY || otherType == Type.BASE64_BINARY)) { + return value.convertTo(Type.BASE64_BINARY); + } + + /* + * e. Otherwise, a type error is raised [err:XPTY0004]. */ - throw new XPathException(this, ErrorCodes.XPTY0004, "Incompatible primitive types"); + throw new XPathException(this, ErrorCodes.XPTY0004, + "Incompatible primitive types: " + Type.getTypeName(thisType) + " vs " + Type.getTypeName(otherType)); } return value; @@ -1100,6 +1165,11 @@ private AtomicValue convertForValueComparison(final AtomicValue value, final int * @throws XPathException if an error occurs during the comparison */ private boolean compareAtomic(final Collator collator, AtomicValue lv, AtomicValue rv) throws XPathException { + // Propagate expression context to atomized values so version-gated + // comparisons (e.g., xs:duration ordering) can check the XQuery version + if (lv.getExpression() == null) { lv.setExpression(this); } + if (rv.getExpression() == null) { rv.setExpression(this); } + // get types locally as convertForCompareAtomic may change the types of the AtomicValue itself int ltype = lv.getType(); int rtype = rv.getType(); diff --git a/exist-core/src/main/java/org/exist/xquery/HashJoinForExpr.java b/exist-core/src/main/java/org/exist/xquery/HashJoinForExpr.java new file mode 100644 index 00000000000..1115efe14fd --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/HashJoinForExpr.java @@ -0,0 +1,250 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery; + +import org.exist.dom.persistent.NodeSet; +import org.exist.xquery.util.ExpressionDumper; +import org.exist.xquery.value.AtomicValue; +import org.exist.xquery.value.Item; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.SequenceIterator; +import org.exist.xquery.value.ValueSequence; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +/** + * Hash-join replacement for the {@code for $i in where $i/key + * = $outer/key return ...} pattern. + * + * Produced by {@link ForExpr#optimize(CompileContext)} when it recognises an + * inner FOR whose body is a {@link WhereClause} containing a single + * equality {@link GeneralComparison} between an inner-scope path + * (referencing this FOR's variable) and a probe expression that does NOT + * reference any inner-scope variable. Replaces the linear scan of the input + * sequence with an O(N+M) build/probe: a hash map keyed by the join value + * is built once on first eval (or whenever the input {@link Sequence} + * reference changes — handles fresh function invocations), and probed per + * outer iteration with the outer key. + * + * Intentional restrictions for v1: + *
    + *
  • Operator must be {@code =} (general equality). Other operators + * require sorted structures, not hash maps.
  • + *
  • The body after the where clause must NOT be a FLWOR clause + * (no order-by / group-by / for-let chaining), so the per-match + * evaluation matches the original semantics.
  • + *
  • The FOR must have no positional, score, or {@code allowing empty} + * extras — these affect iteration semantics that hash join skips.
  • + *
  • Hash keys are normalised to the atomized value's + * {@code stringValue()}. This is provably correct when both sides + * atomize to {@code xs:string} or {@code xs:untypedAtomic} (the XMark + * case). The detection in {@link ForExpr} restricts to this case.
  • + *
+ * + * Cache lifetime: the hash is keyed by the {@link Sequence} reference of + * the input, mirroring BaseX's {@code CmpHashG}/{@code CmpCache} pattern. + * Multiple per-outer-iteration calls within the same query share the hash; + * a fresh function invocation produces a new input Sequence (fresh + * let-bindings), so the cache is rebuilt — sidestepping the cross-call + * lifetime bug that broke the earlier eval-time-cache attempt + * (see {@code joe-vault/Claude/exist/query-optimizer-overhaul.md}). + */ +public class HashJoinForExpr extends ForExpr { + + /** Side of the comparison referencing this FOR's variable: 0 = left, 1 = right. */ + private final int innerSide; + + /** Last-seen input {@link Sequence} reference; rebuilds {@link #hashIndex} on change. */ + private Sequence cachedInputRef; + + /** key → matching items (in input order). */ + private Map> hashIndex; + + public HashJoinForExpr(final XQueryContext context, final ForExpr original, + final int innerSide) { + super(context, false /* allowingEmpty — gated off by detection */); + setVariable(original.getVariable()); + // sequenceType is a protected field on BindingExpression; copy directly + this.sequenceType = original.sequenceType; + setInputSequence(original.getInputSequence()); + setReturnExpression(original.getReturnExpression()); + this.innerSide = innerSide; + } + + /** Already in hash-join form — no further structural rewrite. */ + @Override + public Expression optimize(final CompileContext cc) throws XPathException { + return this; + } + + @Override + public Sequence eval(final Sequence contextSequence, final Item contextItem) throws XPathException { + if (context.getProfiler().isEnabled()) { + context.getProfiler().start(this); + context.getProfiler().message(this, Profiler.OPTIMIZATIONS, + "OPTIMIZATION", "Hash-join FLWOR"); + } + context.expressionStart(this); + + final Sequence resultSequence = new ValueSequence(unordered); + final LocalVariable mark = context.markLocalVariables(false); + try { + // Evaluate input — if hoisted to a let, this is a constant-time + // VariableReference resolution. + final Sequence in = inputSequence.eval(contextSequence, null); + clearContext(getExpressionId(), in); + registerUpdateListener(in); + + // Declare $i — bound transiently while building the hash and + // again per match while evaluating the return body. + final LocalVariable var = createVariable(getVariable()); + var.setSequenceType(sequenceType); + context.declareVariableBinding(var); + if (in instanceof NodeSet) { + var.setContextDocs(in.getDocumentSet()); + } else { + var.setContextDocs(null); + } + + // (Re)build hash if input identity changed since last eval. + if (cachedInputRef != in) { + buildHash(var, in); + cachedInputRef = in; + } + + // Probe: evaluate the outer-side expression in OUTER scope. + // $i is currently bound; the probeExpr does not reference it + // (verified in ForExpr.optimize when the rewrite was decided). + final Expression probeExpr = getProbeExpr(); + final Sequence probeSeq = probeExpr.eval(contextSequence, contextItem); + + if (!probeSeq.isEmpty() && hashIndex != null && !hashIndex.isEmpty()) { + // LinkedHashSet: dedupe across probe keys, preserve first-encounter order. + final Set matches = new LinkedHashSet<>(); + for (final SequenceIterator probeIter = probeSeq.iterate(); probeIter.hasNext();) { + final Item probeItem = probeIter.nextItem(); + final AtomicValue probeKey = probeItem.atomize(); + final String keyStr = probeKey.getStringValue(); + final List bucket = hashIndex.get(keyStr); + if (bucket != null) { + matches.addAll(bucket); + } + } + + final Expression body = getBodyExpr(); + for (final Item match : matches) { + var.setValue(match.toSequence()); + var.checkType(); + final Sequence sub = body.eval(null, null); + resultSequence.addAll(sub); + } + } + } finally { + context.popLocalVariables(mark, resultSequence); + } + + setActualReturnType(resultSequence.getItemType()); + context.expressionEnd(this); + if (context.getProfiler().isEnabled()) { + context.getProfiler().end(this, "", resultSequence); + } + return resultSequence; + } + + /** + * Build the hash by binding {@code var} to each input item and + * evaluating the inner-side expression. Multi-key items (rare in + * practice — XMark attribute joins are 1-key) are inserted under each + * key; per-match dedupe in {@link #eval} handles the join semantics. + */ + private void buildHash(final LocalVariable var, final Sequence in) throws XPathException { + hashIndex = new HashMap<>(); + final Expression keyExtractor = getKeyExtractor(); + for (final SequenceIterator it = in.iterate(); it.hasNext();) { + final Item item = it.nextItem(); + var.setValue(item.toSequence()); + final Sequence keySeq = keyExtractor.eval(null, null); + for (final SequenceIterator ki = keySeq.iterate(); ki.hasNext();) { + final AtomicValue key = ki.nextItem().atomize(); + final String keyStr = key.getStringValue(); + hashIndex.computeIfAbsent(keyStr, k -> new ArrayList<>()).add(item); + } + } + } + + private Expression getKeyExtractor() { + final GeneralComparison cmp = getComparison(); + return innerSide == 0 ? cmp.getLeft() : cmp.getRight(); + } + + private Expression getProbeExpr() { + final GeneralComparison cmp = getComparison(); + return innerSide == 0 ? cmp.getRight() : cmp.getLeft(); + } + + private GeneralComparison getComparison() { + final WhereClause wc = (WhereClause) getReturnExpression(); + Expression w = wc.getWhereExpr(); + // Unwrap parser-inserted DebuggableExpression / single-step PathExpr. + while (true) { + if (w instanceof DebuggableExpression d) { + w = d.getFirst(); + } else if (w instanceof PathExpr p && p.getLength() == 1) { + w = p.getExpression(0); + } else { + break; + } + } + return (GeneralComparison) w; + } + + private Expression getBodyExpr() { + // Body is the WhereClause's return expression — kept as-is (the parser's + // DebuggableExpression wrapper retains debugger fidelity at runtime). + return ((WhereClause) getReturnExpression()).getReturnExpression(); + } + + @Override + public void resetState(final boolean postOptimization) { + super.resetState(postOptimization); + if (!postOptimization) { + cachedInputRef = null; + hashIndex = null; + } + } + + @Override + public void dump(final ExpressionDumper dumper) { + dumper.display("(* hash-join *) ", line); + super.dump(dumper); + } + + @Override + public String toString() { + return "(* hash-join *) " + super.toString(); + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/KeywordArgumentExpression.java b/exist-core/src/main/java/org/exist/xquery/KeywordArgumentExpression.java new file mode 100644 index 00000000000..6bd237072a9 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/KeywordArgumentExpression.java @@ -0,0 +1,85 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery; + +import org.exist.xquery.util.ExpressionDumper; +import org.exist.xquery.value.Item; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.Type; + +/** + * Wraps a function argument expression with a keyword name for XQuery 4.0 + * keyword argument syntax: {@code fn:slice($input, start := 3)}. + * + *

This is a transient wrapper used during function call construction. + * The keyword name is used to match the argument to the correct parameter + * position in the function signature.

+ */ +public class KeywordArgumentExpression extends AbstractExpression { + + private final String keywordName; + private final Expression argument; + + public KeywordArgumentExpression(final XQueryContext context, final String keywordName, + final Expression argument) { + super(context); + this.keywordName = keywordName; + this.argument = argument; + } + + public String getKeywordName() { + return keywordName; + } + + public Expression getArgument() { + return argument; + } + + @Override + public Sequence eval(final Sequence contextSequence, final Item contextItem) + throws XPathException { + return argument.eval(contextSequence, contextItem); + } + + @Override + public int returnsType() { + return argument.returnsType(); + } + + @Override + public void analyze(final AnalyzeContextInfo contextInfo) throws XPathException { + argument.analyze(contextInfo); + } + + @Override + public void dump(final ExpressionDumper dumper) { + dumper.display(keywordName); + dumper.display(" := "); + argument.dump(dumper); + } + + @Override + public void resetState(final boolean postOptimization) { + super.resetState(postOptimization); + argument.resetState(postOptimization); + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/LetDestructureExpr.java b/exist-core/src/main/java/org/exist/xquery/LetDestructureExpr.java new file mode 100644 index 00000000000..9aedcb6f144 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/LetDestructureExpr.java @@ -0,0 +1,335 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery; + +import org.exist.dom.QName; +import org.exist.xquery.functions.array.ArrayType; +import org.exist.xquery.functions.map.AbstractMapType; +import org.exist.xquery.util.ExpressionDumper; +import org.exist.xquery.value.*; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +/** + * Implements XQuery 4.0 let destructuring: + *
    + *
  • {@code let $($x, $y) := (1, 2)} — sequence destructuring
  • + *
  • {@code let $[$x, $y] := [1, 2]} — array destructuring
  • + *
  • {@code let ${$x, $y} := map{'x':1,'y':2}} — map destructuring
  • + *
+ */ +public class LetDestructureExpr extends AbstractFLWORClause { + + public enum DestructureMode { + SEQUENCE, ARRAY, MAP + } + + private final DestructureMode mode; + private final List varNames; + private final List varTypes; + private Expression inputSequence; + + public LetDestructureExpr(final XQueryContext context, final DestructureMode mode) { + super(context); + this.mode = mode; + this.varNames = new ArrayList<>(); + this.varTypes = new ArrayList<>(); + } + + public void addVariable(final QName name, final SequenceType type) { + varNames.add(name); + varTypes.add(type); + } + + public void setInputSequence(final Expression seq) { + this.inputSequence = seq.simplify(); + } + + public void setOverallType(final SequenceType type) { + // Reserved for future type checking of overall destructure type + } + + @Override + public ClauseType getType() { + switch (mode) { + case SEQUENCE: return ClauseType.LET_SEQ_DESTRUCTURE; + case ARRAY: return ClauseType.LET_ARRAY_DESTRUCTURE; + case MAP: return ClauseType.LET_MAP_DESTRUCTURE; + default: return ClauseType.LET; + } + } + + @Override + public void analyze(final AnalyzeContextInfo contextInfo) throws XPathException { + final LocalVariable mark = context.markLocalVariables(false); + try { + contextInfo.setParent(this); + final AnalyzeContextInfo varContextInfo = new AnalyzeContextInfo(contextInfo); + inputSequence.analyze(varContextInfo); + + for (int i = 0; i < varNames.size(); i++) { + final LocalVariable var = new LocalVariable(varNames.get(i)); + if (varTypes.get(i) != null) { + var.setSequenceType(varTypes.get(i)); + } + context.declareVariableBinding(var); + } + + context.setContextSequencePosition(0, null); + returnExpr.analyze(contextInfo); + } finally { + context.popLocalVariables(mark); + } + } + + @Override + public Sequence eval(Sequence contextSequence, final Item contextItem) throws XPathException { + context.expressionStart(this); + context.pushDocumentContext(); + try { + final LocalVariable mark = context.markLocalVariables(false); + Sequence resultSequence = null; + try { + final Sequence input = inputSequence.eval(contextSequence, null); + + switch (mode) { + case SEQUENCE: + bindSequenceVars(input); + break; + case ARRAY: + bindArrayVars(input); + break; + case MAP: + bindMapVars(input); + break; + default: + throw new XPathException(this, ErrorCodes.ERROR, "Unknown destructure mode: " + mode); + } + + resultSequence = returnExpr.eval(contextSequence, null); + } finally { + context.popLocalVariables(mark, resultSequence); + } + if (resultSequence == null) { + return Sequence.EMPTY_SEQUENCE; + } + if (getPreviousClause() == null) { + resultSequence = postEval(resultSequence); + } + return resultSequence; + } finally { + context.popDocumentContext(); + context.expressionEnd(this); + } + } + + private void bindSequenceVars(final Sequence input) throws XPathException { + for (int i = 0; i < varNames.size(); i++) { + final LocalVariable var = createVariable(varNames.get(i)); + final SequenceType type = varTypes.get(i); + if (type != null) { + var.setSequenceType(type); + } + context.declareVariableBinding(var); + + if (i < input.getItemCount()) { + var.setValue(input.itemAt(i).toSequence()); + } else { + var.setValue(Sequence.EMPTY_SEQUENCE); + } + if (type != null) { + checkVarType(var, type); + } + } + } + + private void bindArrayVars(final Sequence input) throws XPathException { + if (input.isEmpty()) { + throw new XPathException(this, ErrorCodes.XPTY0004, + "Array destructuring requires an array, got empty sequence"); + } + final Item item = input.itemAt(0); + if (!Type.subTypeOf(item.getType(), Type.ARRAY_ITEM)) { + throw new XPathException(this, ErrorCodes.XPTY0004, + "Array destructuring requires an array, got " + + Type.getTypeName(item.getType())); + } + final ArrayType array = (ArrayType) item; + for (int i = 0; i < varNames.size(); i++) { + final LocalVariable var = createVariable(varNames.get(i)); + final SequenceType type = varTypes.get(i); + if (type != null) { + var.setSequenceType(type); + } + context.declareVariableBinding(var); + + if (i < array.getSize()) { + var.setValue(array.get(i)); + } else { + var.setValue(Sequence.EMPTY_SEQUENCE); + } + if (type != null) { + checkVarType(var, type); + } + } + } + + private void bindMapVars(final Sequence input) throws XPathException { + if (input.isEmpty()) { + throw new XPathException(this, ErrorCodes.XPTY0004, + "Map destructuring requires a map, got empty sequence"); + } + final Item item = input.itemAt(0); + if (!Type.subTypeOf(item.getType(), Type.MAP_ITEM)) { + throw new XPathException(this, ErrorCodes.XPTY0004, + "Map destructuring requires a map, got " + + Type.getTypeName(item.getType())); + } + final AbstractMapType map = (AbstractMapType) item; + for (int i = 0; i < varNames.size(); i++) { + final QName qn = varNames.get(i); + final LocalVariable var = createVariable(qn); + final SequenceType type = varTypes.get(i); + if (type != null) { + var.setSequenceType(type); + } + context.declareVariableBinding(var); + + final Sequence value = map.get(new StringValue(this, qn.getLocalPart())); + if (value != null && !value.isEmpty()) { + var.setValue(value); + } else { + var.setValue(Sequence.EMPTY_SEQUENCE); + } + if (type != null) { + checkVarType(var, type); + } + } + } + + private void checkVarType(final LocalVariable var, final SequenceType type) throws XPathException { + final Sequence val = var.getValue(); + if (val == null) { + return; + } + final Cardinality actualCard; + if (val.isEmpty()) { + actualCard = Cardinality.EMPTY_SEQUENCE; + } else if (val.hasMany()) { + actualCard = Cardinality._MANY; + } else { + actualCard = Cardinality.EXACTLY_ONE; + } + if (!type.getCardinality().isSuperCardinalityOrEqualOf(actualCard)) { + throw new XPathException(this, ErrorCodes.XPTY0004, + "Invalid cardinality for variable $" + var.getQName() + + ". Expected " + type.getCardinality().getHumanDescription() + + ", got " + actualCard.getHumanDescription(), val); + } + if (!Type.subTypeOf(type.getPrimaryType(), Type.NODE) && + !val.isEmpty() && + !Type.subTypeOf(val.getItemType(), type.getPrimaryType())) { + throw new XPathException(this, ErrorCodes.XPTY0004, + "Invalid type for variable $" + var.getQName() + + ". Expected " + Type.getTypeName(type.getPrimaryType()) + + ", got " + Type.getTypeName(val.getItemType()), val); + } + } + + @Override + public void dump(final ExpressionDumper dumper) { + dumper.display("let "); + switch (mode) { + case SEQUENCE: dumper.display("$("); break; + case ARRAY: dumper.display("$["); break; + case MAP: dumper.display("${"); break; + default: break; + } + for (int i = 0; i < varNames.size(); i++) { + if (i > 0) dumper.display(", "); + dumper.display("$").display(varNames.get(i).getLocalPart()); + } + switch (mode) { + case SEQUENCE: dumper.display(")"); break; + case ARRAY: dumper.display("]"); break; + case MAP: dumper.display("}"); break; + default: break; + } + dumper.display(" := "); + inputSequence.dump(dumper); + dumper.nl().display("return "); + returnExpr.dump(dumper); + } + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder("let "); + switch (mode) { + case SEQUENCE: sb.append("$("); break; + case ARRAY: sb.append("$["); break; + case MAP: sb.append("${"); break; + default: break; + } + for (int i = 0; i < varNames.size(); i++) { + if (i > 0) sb.append(", "); + sb.append("$").append(varNames.get(i).getLocalPart()); + } + switch (mode) { + case SEQUENCE: sb.append(")"); break; + case ARRAY: sb.append("]"); break; + case MAP: sb.append("}"); break; + default: break; + } + sb.append(" := ").append(inputSequence.toString()); + sb.append(" return ").append(returnExpr.toString()); + return sb.toString(); + } + + @Override + public void accept(final ExpressionVisitor visitor) { + // No specific visitor method for destructure - use default + } + + @Override + public boolean allowMixedNodesInReturn() { + return true; + } + + @Override + public Set getTupleStreamVariables() { + return new HashSet<>(varNames); + } + + @Override + public void resetState(final boolean postOptimization) { + super.resetState(postOptimization); + inputSequence.resetState(postOptimization); + } + + @Override + public int getDependencies() { + return Dependency.CONTEXT_SET; + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/LetExpr.java b/exist-core/src/main/java/org/exist/xquery/LetExpr.java index 278e7d18295..091fbda6868 100644 --- a/exist-core/src/main/java/org/exist/xquery/LetExpr.java +++ b/exist-core/src/main/java/org/exist/xquery/LetExpr.java @@ -37,15 +37,117 @@ */ public class LetExpr extends BindingExpression { + private boolean scoreBinding = false; + public LetExpr(XQueryContext context) { super(context); } + /** + * XQFT 3.0 §2.3: Mark this let binding as a score variable binding. + * When true, the variable is bound to the score (xs:double in [0,1]) + * of the input expression rather than the expression's value. + */ + public void setScoreBinding(final boolean scoreBinding) { + this.scoreBinding = scoreBinding; + } + + public boolean isScoreBinding() { + return scoreBinding; + } + @Override public ClauseType getType() { return ClauseType.LET; } + @Override + public Expression optimize(final CompileContext cc) throws XPathException { + // Chain-head lets push a fresh FLWOR scope (continuing clauses share it). + // The scope tracks visible variables and accumulates hoist actions + // queued by inner FLWOR optimize() passes. + final boolean enteredScope = getPreviousClause() == null; + if (enteredScope) { + cc.enterFlworChain(); + } + + // Recurse the input first (the let-variable is NOT yet in scope for + // its own initializer, per XQuery semantics). This also gives any + // inner FLWORs in the input a chance to register hoists targeting + // outer scopes. + if (inputSequence != null) { + inputSequence = inputSequence.optimize(cc); + } + + // Now make this let's variable visible to the rest of the chain. + // Score bindings (XQFT 3.0 §2.3) bind a synthesized double rather than + // the input value — exclude from FLWOR-scope tracking to keep the + // hoist invariance check honest. + if (varName != null && !scoreBinding) { + cc.addVisibleFlworVar(varName); + } + + if (returnExpr != null) { + returnExpr = returnExpr.optimize(cc); + } + + // Drop the let if its body is a literal — the variable is by definition + // unreferenced, and the input sequence is side-effect-free. Guards: + // - score binding stays (XQFT 3.0 §2.3), + // - no previous clause (avoid having to repair clause-chain + // previousClause pointers), + // - inputSequence is a LiteralValue (no side effects to preserve), + // - the unwrapped returnExpr is a LiteralValue (so it cannot + // reference any variable). + // + // This is intentionally narrow for v1: it captures `let $x := 1 return + // 42` but not `let $x := 1 return $y + 1`. We avoid a structural + // variable-reference scan because BasicExpressionVisitor does not + // traverse through FilteredExpression / GeneralComparison / OpNumeric, + // and the eXist Dependency flags are not reliable after the analyze() + // pass has popped the let's variable from scope. A precise unused-var + // check belongs in a follow-up. + Expression result = this; + if (!scoreBinding + && varName != null + && !(returnExpr instanceof FLWORClause) + && getPreviousClause() == null + && (inputSequence instanceof LiteralValue) + && (unwrap(returnExpr) instanceof LiteralValue)) { + result = cc.replaceWith(this, returnExpr, "unused let-binding $" + varName); + } + + // Inline a node-typed path binding referenced exactly once at the + // source of a FilteredExpression with an Optimizable predicate + // (closes GH-873). Skip if the literal-drop above already fired. + if (result == this) { + result = LetInliner.tryInline(this, cc); + } + + if (enteredScope) { + result = cc.applyHoistsAndExitChain(result); + } + return result; + } + + /** + * Strips {@link DebuggableExpression} and single-step {@link PathExpr} + * wrappers to expose the underlying expression. Used to recognise a + * trivially literal return body even when the parser has wrapped it for + * debugger support. + */ + private static Expression unwrap(Expression e) { + while (true) { + if (e instanceof DebuggableExpression d) { + e = d.getFirst(); + } else if (e instanceof PathExpr p && p.getLength() == 1) { + e = p.getExpression(0); + } else { + return e; + } + } + } + @Override public void analyze(final AnalyzeContextInfo contextInfo) throws XPathException { super.analyze(contextInfo); @@ -54,6 +156,7 @@ public void analyze(final AnalyzeContextInfo contextInfo) throws XPathException try { contextInfo.setParent(this); final AnalyzeContextInfo varContextInfo = new AnalyzeContextInfo(contextInfo); + varContextInfo.addFlag(NON_UPDATING_CONTEXT); inputSequence.analyze(varContextInfo); //Declare the iteration variable final LocalVariable inVar = new LocalVariable(varName); @@ -102,13 +205,26 @@ public Sequence eval(Sequence contextSequence, Item contextItem) var = createVariable(varName); var.setSequenceType(sequenceType); context.declareVariableBinding(var); - var.setValue(in); + if (scoreBinding) { + // XQFT 3.0 §2.3: score binding — bind variable to the score + // of the expression. Naive implementation: 1.0 if non-empty, 0.0 if empty. + var.setValue(new DoubleValue(this, in.isEmpty() ? 0.0 : 1.0)); + } else { + var.setValue(in); + } if (sequenceType == null) - {var.checkType();} //Just because it makes conversions ! + {var.checkType();} //Just because it makes conversions ! var.setContextDocs(inputSequence.getContextDocSet()); registerUpdateListener(in); - resultSequence = returnExpr.eval(contextSequence, null); + try { + resultSequence = returnExpr.eval(contextSequence, null); + } catch (final WhileClause.WhileTerminationException e) { + resultSequence = Sequence.EMPTY_SEQUENCE; + } + if (getPreviousClause() == null && WhileClause.isTerminated()) { + WhileClause.clearTerminated(); + } if (sequenceType != null) { Cardinality actualCardinality; diff --git a/exist-core/src/main/java/org/exist/xquery/LetInliner.java b/exist-core/src/main/java/org/exist/xquery/LetInliner.java new file mode 100644 index 00000000000..35e8f60a9cd --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/LetInliner.java @@ -0,0 +1,238 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery; + +import org.exist.dom.QName; +import org.exist.xquery.value.Type; + +import javax.annotation.Nullable; +import java.util.List; + +/** + * Implements the inline-for-index-pre-select rewrite invoked from + * {@link LetExpr#optimize(CompileContext)}. + * + *

Rewrites + *

+ *   let $v := <persistent path> return $v[Optimizable-pred]
+ * 
+ * into + *
+ *   <persistent path>[Optimizable-pred]
+ * 
+ * by attaching the predicate to the last LocationStep of the input path. The + * resulting tree is identical in shape to the direct form, so the legacy + * {@link Optimizer} pass that runs immediately after the {@code optimize(cc)} + * pass wraps it in the {@code (#exist:optimize#)} pragma and routes the + * predicate through the index pre-select machinery — closing GH-873. + * + *

Without this rewrite, the indirect form runs about 167x slower than the + * direct form because {@code Optimize.eval} computes a pre-selected node set + * and then {@code FilteredExpression.eval} re-evaluates {@code $v} from the + * variable stack (ignoring the pre-selected context), so the index hit-set is + * thrown away and the predicate runs once per node in the full input. + * + *

The gate predicates are intentionally narrow in v1: each one corresponds + * to a soundness or scope concern documented in the design doc accompanying + * the change. See the comments on {@link #tryInline(LetExpr, CompileContext)} + * for the precise list. + */ +final class LetInliner { + + private LetInliner() { + } + + /** + * Attempt to inline the binding. Returns the LetExpr's replacement if all + * gates pass, or {@code let} unchanged if any gate fails. + * + *

Gates (all must hold): + *

    + *
  1. {@code !scoreBinding} and a non-null variable name -- score + * bindings (XQFT 3.0 §2.3) bind a synthesised double, not the + * input value, so inlining changes semantics;
  2. + *
  3. {@code getPreviousClause() == null} and the body is not itself + * another FLWOR clause -- limits v1 to standalone lets;
  4. + *
  5. no static type declared on the binding -- typed declarations + * impose a runtime check on the variable's value that inlining + * would silently bypass;
  6. + *
  7. the input is a node-typed expression -- only node sequences + * benefit from a downstream index pre-select;
  8. + *
  9. the input contains at least one non-wildcard LocationStep -- + * this is what an index can attach to;
  10. + *
  11. the body is exactly {@code FilteredExpression} (or a + * length-1 PathExpr wrapping one) whose source is the bound + * variable, with exactly one predicate that contains an + * {@link Optimizable}, and the variable is not referenced + * anywhere else in the body.
  12. + *
+ */ + static Expression tryInline(final LetExpr let, final CompileContext cc) { + // Gate 1 + final QName varName = let.getVariable(); + if (varName == null || let.isScoreBinding()) { + return let; + } + // Gate 2 + if (let.getPreviousClause() != null) { + return let; + } + final Expression returnExpr = let.getReturnExpression(); + if (returnExpr == null || returnExpr instanceof FLWORClause) { + return let; + } + // Gate 3 -- BindingExpression.sequenceType is protected; same-package access. + if (let.sequenceType != null) { + return let; + } + // Gate 4 + final Expression inputSequence = let.getInputSequence(); + if (inputSequence == null + || !Type.subTypeOf(inputSequence.returnsType(), Type.NODE)) { + return let; + } + // Gate 5: at least one non-wildcard LocationStep in the input. Pick + // the last one as the predicate-attachment site -- semantically the + // predicate filters the OUTPUT of the path, which is what the last + // step yields. + final LocationStep lastStep = findLastNamedStep(inputSequence); + if (lastStep == null) { + return let; + } + + // Gate 6: body must be a FilteredExpression (or length-1 PathExpr + // wrapping one) whose source is $varName, with exactly one + // Optimizable predicate, and $varName must not appear anywhere + // else in the body. + final FilteredExpression fe = unwrapFilteredExpression(returnExpr); + if (fe == null) { + return let; + } + final Expression feSrc = fe.getExpression(); + if (!(feSrc instanceof final VariableReference vr) + || !varName.equals(vr.getName())) { + return let; + } + final List preds = fe.getPredicates(); + if (preds.size() != 1) { + return let; + } + final Predicate pred = preds.get(0); + if (!hasOptimizable(pred)) { + return let; + } + final RefCounter counter = new RefCounter(varName); + returnExpr.accept(counter); + if (counter.count != 1) { + // The variable appears outside the FilteredExpression source -- + // a literal substitution would leave dangling references. + return let; + } + + // Substitute: attach the predicate to the input's last named step. + // The legacy Optimizer pass that runs next will detect the + // Optimizable predicate, wrap the rewritten path in the + // (#exist:optimize#) pragma, and route through the index pre-select + // (Optimizer.visitLocationStep / Optimize.before). + lastStep.addPredicate(pred); + return cc.replaceWith(let, inputSequence, + "inline let $" + varName.getLocalPart() + " for index pre-select"); + } + + /** + * Return the last non-wildcard LocationStep in {@code expr}, or null if + * none. Order matches document-order traversal of the path. + */ + private static @Nullable LocationStep findLastNamedStep(final Expression expr) { + final List steps = BasicExpressionVisitor.findLocationSteps(expr); + LocationStep last = null; + for (final LocationStep s : steps) { + if (s != null && !s.getTest().isWildcardTest()) { + last = s; + } + } + return last; + } + + /** + * Unwrap {@link DebuggableExpression} and length-1 {@link PathExpr} + * containers to expose a {@link FilteredExpression}, or return null if + * the underlying shape isn't one. Mirrors the unwrap rule used + * elsewhere in the engine to look past parser-introduced wrappers. + */ + private static @Nullable FilteredExpression unwrapFilteredExpression(final Expression expr) { + Expression current = expr; + while (true) { + if (current instanceof final FilteredExpression filtered) { + return filtered; + } else if (current instanceof final DebuggableExpression debug) { + current = debug.getFirst(); + } else if (current instanceof final PathExpr pathExpr && pathExpr.getLength() == 1) { + current = pathExpr.getExpression(0); + } else { + return null; + } + } + } + + /** + * Reuses the engine's existing Optimizable-detection visitor so the + * "is this predicate index-eligible?" question gets the same answer + * the legacy Optimizer pass would give. + */ + private static boolean hasOptimizable(final Predicate predicate) { + final Optimizer.FindOptimizable visitor = new Optimizer.FindOptimizable(); + predicate.accept(visitor); + final Optimizable[] optimizables = visitor.getOptimizables(); + return optimizables != null && optimizables.length > 0; + } + + /** + * Counts {@link VariableReference} nodes referring to a target name + * across an expression tree. Descends explicitly into + * {@link FilteredExpression} (BasicExpressionVisitor's default does + * not), so a $v that sits as the source of a FE is still counted. + */ + private static final class RefCounter extends DefaultExpressionVisitor { + private final QName target; + int count = 0; + + RefCounter(final QName target) { + this.target = target; + } + + @Override + public void visitVariableReference(final VariableReference ref) { + if (target.equals(ref.getName())) { + count++; + } + } + + @Override + public void visitFilteredExpr(final FilteredExpression filtered) { + filtered.getExpression().accept(this); + for (final Predicate p : filtered.getPredicates()) { + p.accept(this); + } + } + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/LocationStep.java b/exist-core/src/main/java/org/exist/xquery/LocationStep.java index 624795add20..326c8615db8 100644 --- a/exist-core/src/main/java/org/exist/xquery/LocationStep.java +++ b/exist-core/src/main/java/org/exist/xquery/LocationStep.java @@ -95,11 +95,15 @@ public LocationStep(final XQueryContext context, final int axis, final NodeTest public int getDependencies() { int deps = Dependency.CONTEXT_SET; - // self axis has an obvious dependency on the context item - // likewise we depend on the context item if this is a single path step (outside a predicate) - if (!this.inPredicate && - (this.axis == Constants.SELF_AXIS || - (parent != null && parent.getSubExpressionCount() > 0 && parent.getSubExpression(0) == this))) { + // self axis always has a dependency on the context item — that is its definition. + // Previously this was suppressed inside predicates as a node-set optimization, + // but that caused XPDY0002 when predicates operated on atomic values from + // map/array navigation (e.g., map{"a":1}/a[. <= 1]). + if (this.axis == Constants.SELF_AXIS) { + deps = deps | Dependency.CONTEXT_ITEM; + } else if (!this.inPredicate && + (parent != null && parent.getSubExpressionCount() > 0 && parent.getSubExpression(0) == this)) { + // single path step (outside a predicate) also depends on context item deps = deps | Dependency.CONTEXT_ITEM; } @@ -383,6 +387,43 @@ public Sequence eval(Sequence contextSequence, final Item contextItem) + this + "'"); } + // === XQuery 4.0 JNode / Map / Array axis navigation === + // Check if any item in the context sequence is navigable (JNode, map, array) + if (contextSequence.getItemCount() > 0) { + boolean hasJNode = false; + boolean hasMapArray = false; + for (int ci = 0; ci < Math.min(contextSequence.getItemCount(), 10); ci++) { + final Item item = contextSequence.itemAt(ci); + if (item instanceof org.exist.xquery.value.jnode.JNode) { + hasJNode = true; + break; + } + if (item instanceof org.exist.xquery.functions.map.MapType + || item instanceof org.exist.xquery.functions.array.ArrayType) { + hasMapArray = true; + } + } + if (hasJNode) { + result = evalJNodeAxis(contextSequence); + // Apply predicates and return + result = applyPredicate(contextSequence, result); + if (context.getProfiler().isEnabled()) { + context.getProfiler().end(this, "", result); + } + return result; + } + if (hasMapArray) { + result = evalMapArrayAxis(contextSequence); + // Apply predicates and return + result = applyPredicate(contextSequence, result); + if (context.getProfiler().isEnabled()) { + context.getProfiler().end(this, "", result); + } + return result; + } + } + // === End JNode / Map / Array axis navigation === + try { switch (axis) { @@ -443,6 +484,21 @@ public Sequence eval(Sequence contextSequence, final Item contextItem) result = getSiblings(context, contextSequence); break; + // --- XQuery 4.0 combined axes --- + case Constants.FOLLOWING_OR_SELF_AXIS: + case Constants.PRECEDING_OR_SELF_AXIS: + result = getOrSelfAxis(context, contextSequence, + axis == Constants.FOLLOWING_OR_SELF_AXIS + ? Constants.FOLLOWING_AXIS : Constants.PRECEDING_AXIS); + break; + + case Constants.FOLLOWING_SIBLING_OR_SELF_AXIS: + case Constants.PRECEDING_SIBLING_OR_SELF_AXIS: + result = getOrSelfAxis(context, contextSequence, + axis == Constants.FOLLOWING_SIBLING_OR_SELF_AXIS + ? Constants.FOLLOWING_SIBLING_AXIS : Constants.PRECEDING_SIBLING_AXIS); + break; + default: throw new IllegalArgumentException("Unsupported axis specified"); } @@ -486,6 +542,12 @@ private boolean needsComputation() { if (nodeTestType == null) { nodeTestType = test.getType(); } + // JNode, map, and array types always need computation + if (Type.subTypeOf(nodeTestType, Type.JSON_NODE) + || Type.subTypeOf(nodeTestType, Type.MAP_ITEM) + || Type.subTypeOf(nodeTestType, Type.ARRAY_ITEM)) { + return true; + } if (nodeTestType != Type.DOCUMENT && nodeTestType != Type.NODE && nodeTestType != Type.ELEMENT @@ -912,6 +974,63 @@ protected Sequence getSiblings(final XQueryContext context, final Sequence conte * * @throws XPathException if an error occurs */ + /** + * Evaluates an XQuery 4.0 combined axis (e.g., following-or-self, preceding-sibling-or-self). + * Returns the union of the self axis result and the base axis result, preserving document order. + * + * @param context the XQuery context + * @param contextSequence the context sequence + * @param baseAxis the base axis constant (e.g., Constants.FOLLOWING_AXIS) + * @return the combined result in document order + */ + private Sequence getOrSelfAxis(final XQueryContext context, final Sequence contextSequence, + final int baseAxis) throws XPathException { + // Save and temporarily switch axis to get results + final int savedAxis = this.axis; + try { + final Sequence selfOrRelatedResult; + final Sequence baseResult; + + if (baseAxis == Constants.FOLLOWING_AXIS) { + // following-or-self = descendant-or-self | following + // (all nodes at or after context node in document order) + this.axis = Constants.DESCENDANT_SELF_AXIS; + selfOrRelatedResult = getDescendants(context, contextSequence); + this.axis = Constants.FOLLOWING_AXIS; + baseResult = getPrecedingOrFollowing(context, contextSequence); + } else if (baseAxis == Constants.PRECEDING_AXIS) { + // preceding-or-self = ancestor-or-self | preceding + // (all nodes at or before context node in document order) + this.axis = Constants.ANCESTOR_SELF_AXIS; + selfOrRelatedResult = getAncestors(context, contextSequence); + this.axis = Constants.PRECEDING_AXIS; + baseResult = getPrecedingOrFollowing(context, contextSequence); + } else { + // following-sibling-or-self / preceding-sibling-or-self = self | sibling + this.axis = Constants.SELF_AXIS; + selfOrRelatedResult = getSelf(context, contextSequence); + this.axis = baseAxis; + baseResult = getSiblings(context, contextSequence); + } + + // Union preserving document order + if (selfOrRelatedResult.isEmpty()) { + return baseResult; + } + if (baseResult.isEmpty()) { + return selfOrRelatedResult; + } + final ValueSequence combined = new ValueSequence(); + combined.addAll(selfOrRelatedResult); + combined.addAll(baseResult); + combined.removeDuplicates(); + combined.sortInDocumentOrder(); + return combined; + } finally { + this.axis = savedAxis; + } + } + private Sequence getPrecedingOrFollowing(final XQueryContext context, final Sequence contextSequence) throws XPathException { final int position = computeLimit(); @@ -1487,4 +1606,395 @@ public boolean accept(final XMLStreamReader reader) { } } + // === XQuery 4.0 Map/Array path navigation === + + /** + * Evaluate axis navigation when the context contains maps or arrays. + * Implements XQuery 4.0 path navigation on maps/arrays: + * $map/key → map lookup by key name + * $map/* → all map values + * $array/* → all array members + * $map//key → recursive descent lookup + */ + private Sequence evalMapArrayAxis(final Sequence contextSequence) throws XPathException { + final ValueSequence result = new ValueSequence(); + final org.exist.dom.QName stepName = test != null ? test.getName() : null; + final boolean isWildcard = stepName == null; // TypeTest like node() or * → wildcard + + for (final SequenceIterator it = contextSequence.iterate(); it.hasNext(); ) { + final Item item = it.nextItem(); + switch (axis) { + case Constants.CHILD_AXIS: + navigateChildren(item, stepName, isWildcard, result); + break; + case Constants.DESCENDANT_AXIS: + navigateDescendants(item, stepName, isWildcard, false, result); + break; + case Constants.DESCENDANT_SELF_AXIS: + navigateDescendants(item, stepName, isWildcard, true, result); + break; + case Constants.SELF_AXIS: + // Self on map/array returns the item itself + result.add(item); + break; + default: + // Other axes (parent, ancestor, sibling) don't apply to raw maps/arrays + break; + } + } + return result; + } + + /** + * Navigate children of a map or array item. + * For maps: lookup by name or return all values. + * For arrays: return all members. + */ + private void navigateChildren(final Item item, @Nullable final org.exist.dom.QName stepName, + final boolean isWildcard, + final ValueSequence result) throws XPathException { + if (item instanceof org.exist.xquery.functions.map.MapType) { + final org.exist.xquery.functions.map.MapType map = + (org.exist.xquery.functions.map.MapType) item; + if (isWildcard) { + // Return all map values + for (final io.lacuna.bifurcan.IEntry entry : map) { + addSequenceItems(entry.value(), result); + } + } else { + // Lookup by name + final String key = stepName.getLocalPart(); + final Sequence value = map.get(new StringValue(this, key)); + if (value != null && !value.isEmpty()) { + addSequenceItems(value, result); + } + } + } else if (item instanceof org.exist.xquery.functions.array.ArrayType) { + final org.exist.xquery.functions.array.ArrayType array = + (org.exist.xquery.functions.array.ArrayType) item; + // Arrays: wildcard returns all members, named step returns nothing + if (isWildcard) { + for (int i = 0; i < array.getSize(); i++) { + addSequenceItems(array.get(i), result); + } + } + // Named steps on arrays: navigate into each member that is a map + if (!isWildcard) { + for (int i = 0; i < array.getSize(); i++) { + final Sequence member = array.get(i); + if (member.getItemCount() == 1 && + member.itemAt(0) instanceof org.exist.xquery.functions.map.MapType) { + navigateChildren(member.itemAt(0), stepName, false, result); + } + } + } + } + } + + /** + * Recursive descent into maps/arrays for descendant axis. + */ + private void navigateDescendants(final Item item, @Nullable final org.exist.dom.QName stepName, + final boolean isWildcard, final boolean includeSelf, + final ValueSequence result) throws XPathException { + if (includeSelf) { + result.add(item); + } + + if (item instanceof org.exist.xquery.functions.map.MapType) { + final org.exist.xquery.functions.map.MapType map = + (org.exist.xquery.functions.map.MapType) item; + for (final io.lacuna.bifurcan.IEntry entry : map) { + final String key = entry.key().getStringValue(); + // If name matches, add the value + if (!isWildcard && stepName != null && key.equals(stepName.getLocalPart())) { + addSequenceItems(entry.value(), result); + } else if (isWildcard) { + addSequenceItems(entry.value(), result); + } + // Recurse into nested maps/arrays + final Sequence value = entry.value(); + for (int i = 0; i < value.getItemCount(); i++) { + final Item child = value.itemAt(i); + if (child instanceof org.exist.xquery.functions.map.MapType + || child instanceof org.exist.xquery.functions.array.ArrayType) { + navigateDescendants(child, stepName, isWildcard, false, result); + } + } + } + } else if (item instanceof org.exist.xquery.functions.array.ArrayType) { + final org.exist.xquery.functions.array.ArrayType array = + (org.exist.xquery.functions.array.ArrayType) item; + for (int i = 0; i < array.getSize(); i++) { + final Sequence member = array.get(i); + for (int j = 0; j < member.getItemCount(); j++) { + final Item child = member.itemAt(j); + if (isWildcard) { + result.add(child); + } + if (child instanceof org.exist.xquery.functions.map.MapType + || child instanceof org.exist.xquery.functions.array.ArrayType) { + navigateDescendants(child, stepName, isWildcard, false, result); + } + } + } + } + } + + /** + * Add all items from a sequence to the result. + */ + private void addSequenceItems(final Sequence seq, final ValueSequence result) throws XPathException { + for (int i = 0; i < seq.getItemCount(); i++) { + result.add(seq.itemAt(i)); + } + } + + // === XQuery 4.0 JNode axis navigation === + + /** + * Evaluate axis navigation when the context contains JNodes. + * Handles child, parent, self, descendant, descendant-or-self, + * ancestor, ancestor-or-self, following-sibling, and preceding-sibling axes. + */ + private Sequence evalJNodeAxis(final Sequence contextSequence) throws XPathException { + final ValueSequence result = new ValueSequence(); + for (final SequenceIterator it = contextSequence.iterate(); it.hasNext(); ) { + final Item item = it.nextItem(); + if (!(item instanceof org.exist.xquery.value.jnode.JNode)) { + continue; + } + final org.exist.xquery.value.jnode.JNode jnode = (org.exist.xquery.value.jnode.JNode) item; + + switch (axis) { + case Constants.CHILD_AXIS: + for (final org.exist.xquery.value.jnode.JNode child : jnode.getChildren()) { + if (matchesJNode(child)) { + result.add(child); + } + } + break; + + case Constants.PARENT_AXIS: + if (jnode.getParent() != null && matchesJNode(jnode.getParent())) { + result.add(jnode.getParent()); + } + break; + + case Constants.SELF_AXIS: + if (matchesJNode(jnode)) { + result.add(jnode); + } + break; + + case Constants.DESCENDANT_AXIS: + addDescendants(jnode, result, false); + break; + + case Constants.DESCENDANT_SELF_AXIS: + addDescendants(jnode, result, true); + break; + + case Constants.ANCESTOR_AXIS: + addAncestors(jnode, result, false); + break; + + case Constants.ANCESTOR_SELF_AXIS: + addAncestors(jnode, result, true); + break; + + case Constants.FOLLOWING_SIBLING_AXIS: + for (final org.exist.xquery.value.jnode.JNode sibling : jnode.getFollowingSiblings()) { + if (matchesJNode(sibling)) { + result.add(sibling); + } + } + break; + + case Constants.PRECEDING_SIBLING_AXIS: + for (final org.exist.xquery.value.jnode.JNode sibling : jnode.getPrecedingSiblings()) { + if (matchesJNode(sibling)) { + result.add(sibling); + } + } + break; + + case Constants.FOLLOWING_AXIS: + for (final org.exist.xquery.value.jnode.JNode following : jnode.getFollowing()) { + if (matchesJNode(following)) { + result.add(following); + } + } + break; + + case Constants.PRECEDING_AXIS: + for (final org.exist.xquery.value.jnode.JNode preceding : jnode.getPreceding()) { + if (matchesJNode(preceding)) { + result.add(preceding); + } + } + break; + + // XQuery 4.0 *-or-self axis variants + case Constants.FOLLOWING_OR_SELF_AXIS: + if (matchesJNode(jnode)) { + result.add(jnode); + } + for (final org.exist.xquery.value.jnode.JNode following : jnode.getFollowing()) { + if (matchesJNode(following)) { + result.add(following); + } + } + break; + + case Constants.PRECEDING_OR_SELF_AXIS: + if (matchesJNode(jnode)) { + result.add(jnode); + } + for (final org.exist.xquery.value.jnode.JNode preceding : jnode.getPreceding()) { + if (matchesJNode(preceding)) { + result.add(preceding); + } + } + break; + + case Constants.FOLLOWING_SIBLING_OR_SELF_AXIS: + if (matchesJNode(jnode)) { + result.add(jnode); + } + for (final org.exist.xquery.value.jnode.JNode sibling : jnode.getFollowingSiblings()) { + if (matchesJNode(sibling)) { + result.add(sibling); + } + } + break; + + case Constants.PRECEDING_SIBLING_OR_SELF_AXIS: + if (matchesJNode(jnode)) { + result.add(jnode); + } + for (final org.exist.xquery.value.jnode.JNode sibling : jnode.getPrecedingSiblings()) { + if (matchesJNode(sibling)) { + result.add(sibling); + } + } + break; + + default: + throw new XPathException(this, ErrorCodes.XPTY0019, + "Axis " + axis + " is not supported for JNodes"); + } + } + + // Apply predicates + if (result.isEmpty()) { + return Sequence.EMPTY_SEQUENCE; + } + Sequence filtered = result; + final Predicate[] predicates = getPredicates(); + if (predicates != null) { + for (final Predicate pred : predicates) { + filtered = pred.evalPredicate(contextSequence, filtered, axis); + } + } + return filtered; + } + + /** + * Check if a JNode matches this step's node test. + */ + private boolean matchesJNode(final org.exist.xquery.value.jnode.JNode jnode) { + if (test == null) { + return true; + } + // AnyNodeTest — matches any JNode + if (test instanceof AnyNodeTest) { + return true; + } + // NameTest MUST be checked before TypeTest (NameTest extends TypeTest) + if (test instanceof NameTest) { + final org.exist.dom.QName name = test.getName(); + if (name != null) { + final String local = name.getLocalPart(); + // Wildcard * matches any JNode + if ("*".equals(local) || local == null) { + return true; + } + // Named test: match against JNode key + try { + final AtomicValue jnodeKey = jnode.getKey(); + if (jnodeKey != null) { + return local.equals(jnodeKey.getStringValue()); + } + } catch (final XPathException e) { + return false; + } + } + return false; + } + if (test instanceof TypeTest) { + final int testType = ((TypeTest) test).getType(); + // TypeTest with NODE or ELEMENT means any node — wildcard for JNodes + // (child::* produces TypeTest(ELEMENT) which should match JNodes) + if (testType == Type.NODE || testType == Type.JSON_NODE + || testType == Type.ELEMENT) { + return true; + } + return Type.subTypeOf(jnode.getType(), testType); + } + // AnyNodeTest — matches any JNode + if (test instanceof AnyNodeTest) { + return true; + } + // NameTest — check if wildcard (*) or named key + if (test instanceof NameTest) { + final org.exist.dom.QName name = test.getName(); + if (name != null) { + final String local = name.getLocalPart(); + // Wildcard * matches any JNode + if ("*".equals(local) || local == null) { + return true; + } + // Named test: match against JNode key + try { + final AtomicValue jnodeKey = jnode.getKey(); + if (jnodeKey != null) { + return local.equals(jnodeKey.getStringValue()); + } + } catch (final XPathException e) { + return false; + } + } + return false; + } + return false; + } + + private void addDescendants(final org.exist.xquery.value.jnode.JNode jnode, + final ValueSequence result, final boolean includeSelf) throws XPathException { + if (includeSelf && matchesJNode(jnode)) { + result.add(jnode); + } + for (final org.exist.xquery.value.jnode.JNode child : jnode.getChildren()) { + if (matchesJNode(child)) { + result.add(child); + } + addDescendants(child, result, false); + } + } + + private void addAncestors(final org.exist.xquery.value.jnode.JNode jnode, + final ValueSequence result, final boolean includeSelf) { + if (includeSelf && matchesJNode(jnode)) { + result.add(jnode); + } + org.exist.xquery.value.jnode.JNode parent = jnode.getParent(); + while (parent != null) { + if (matchesJNode(parent)) { + result.add(parent); + } + parent = parent.getParent(); + } + } + } diff --git a/exist-core/src/main/java/org/exist/xquery/Lookup.java b/exist-core/src/main/java/org/exist/xquery/Lookup.java index 2640d16076d..b54ebd19f36 100644 --- a/exist-core/src/main/java/org/exist/xquery/Lookup.java +++ b/exist-core/src/main/java/org/exist/xquery/Lookup.java @@ -83,18 +83,11 @@ public Sequence eval(Sequence contextSequence, Item contextItem) throws XPathExc } else { leftSeq = contextExpression.eval(contextSequence, null); } - final int contextType = leftSeq.getItemType(); - // Make compatible with baseX and Saxon if (leftSeq.isEmpty()) { return Sequence.EMPTY_SEQUENCE; } - - if (!(Type.subTypeOf(contextType, Type.MAP_ITEM) || Type.subTypeOf(contextType, Type.ARRAY_ITEM))) { - throw new XPathException(this, ErrorCodes.XPTY0004, - "expression to the left of a lookup operator needs to be a sequence of maps or arrays"); - } if (keyExpression != null) { keys = keyExpression.eval(contextSequence, null); if (keys.isEmpty()) { @@ -104,7 +97,12 @@ public Sequence eval(Sequence contextSequence, Item contextItem) throws XPathExc try { final ValueSequence result = new ValueSequence(); for (SequenceIterator i = leftSeq.iterate(); i.hasNext(); ) { - final LookupSupport item = (LookupSupport) i.nextItem(); + final Item nextItem = i.nextItem(); + if (!(nextItem instanceof LookupSupport)) { + throw new XPathException(this, ErrorCodes.XPTY0004, + "expression to the left of a lookup operator needs to be a sequence of maps or arrays"); + } + final LookupSupport item = (LookupSupport) nextItem; if (keys != null) { for (SequenceIterator j = keys.iterate(); j.hasNext(); ) { final AtomicValue key = j.nextItem().atomize(); diff --git a/exist-core/src/main/java/org/exist/xquery/MappingArrowOperator.java b/exist-core/src/main/java/org/exist/xquery/MappingArrowOperator.java new file mode 100644 index 00000000000..1e738abc9c4 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/MappingArrowOperator.java @@ -0,0 +1,207 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery; + +import org.exist.dom.QName; +import org.exist.dom.QName.IllegalQNameException; +import org.exist.xquery.util.ExpressionDumper; +import org.exist.xquery.value.FunctionReference; +import org.exist.xquery.value.Item; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.Type; +import org.exist.xquery.value.ValueSequence; + +import java.util.ArrayList; +import java.util.List; + +/** + * Implements the XQuery 4.0 mapping arrow operator (=!>). + * + * Unlike the fat arrow (=>), which passes the entire left-hand sequence + * as the first argument, the mapping arrow iterates over each item in + * the sequence and passes each one individually, concatenating the results. + * + * {@code (1, 2, 3) =!> string()} is equivalent to {@code (1, 2, 3) ! string(.)}. + */ +public class MappingArrowOperator extends AbstractExpression { + + private QName qname = null; + private Expression leftExpr; + private FunctionCall fcall = null; + private Expression funcSpec = null; + private List parameters; + private AnalyzeContextInfo cachedContextInfo; + + public MappingArrowOperator(final XQueryContext context, final Expression leftExpr) throws XPathException { + super(context); + this.leftExpr = leftExpr; + } + + public void setArrowFunction(final String fname, final List params) throws XPathException { + try { + this.qname = QName.parse(context, fname, context.getDefaultFunctionNamespace()); + this.parameters = params; + } catch (final IllegalQNameException e) { + throw new XPathException(this, ErrorCodes.XPST0081, "No namespace defined for prefix " + fname); + } + } + + public void setArrowFunction(final PathExpr funcSpec, final List params) { + this.funcSpec = funcSpec.simplify(); + this.parameters = params; + } + + @Override + public void analyze(final AnalyzeContextInfo contextInfo) throws XPathException { + if (qname != null) { + fcall = NamedFunctionReference.lookupFunction(this, context, qname, parameters.size() + 1); + } + this.cachedContextInfo = contextInfo; + leftExpr.analyze(contextInfo); + if (fcall != null) { + fcall.analyze(contextInfo); + } + if (funcSpec != null) { + funcSpec.analyze(contextInfo); + } + } + + @Override + public Sequence eval(Sequence contextSequence, final Item contextItem) throws XPathException { + if (contextItem != null) { + contextSequence = contextItem.toSequence(); + } + final Sequence inputSeq = leftExpr.eval(contextSequence, null); + + if (inputSeq.isEmpty()) { + return Sequence.EMPTY_SEQUENCE; + } + + final ValueSequence result = new ValueSequence(); + for (int i = 0; i < inputSeq.getItemCount(); i++) { + final Item item = inputSeq.itemAt(i); + final Sequence itemSeq = item.toSequence(); + + final FunctionReference fref; + if (fcall != null) { + fref = new FunctionReference(this, fcall); + } else { + final Sequence funcSeq = funcSpec.eval(itemSeq, null); + if (funcSeq.getCardinality() != Cardinality.EXACTLY_ONE) { + throw new XPathException(this, ErrorCodes.XPTY0004, + "Expected exactly one item for the function to be called, got " + funcSeq.getItemCount() + + ". Expression: " + ExpressionDumper.dump(funcSpec)); + } + final Item item0 = funcSeq.itemAt(0); + if (!Type.subTypeOf(item0.getType(), Type.FUNCTION)) { + throw new XPathException(this, ErrorCodes.XPTY0004, + "Type error: expected function, got " + Type.getTypeName(item0.getType())); + } + fref = (FunctionReference) item0; + } + try { + final List fparams = new ArrayList<>(parameters.size() + 1); + fparams.add(new ContextParam(context, itemSeq)); + fparams.addAll(parameters); + + fref.setArguments(fparams); + fref.analyze(new AnalyzeContextInfo(cachedContextInfo)); + result.addAll(fref.eval(null)); + } finally { + fref.close(); + } + } + return result; + } + + @Override + public int returnsType() { + return fcall == null ? Type.ITEM : fcall.returnsType(); + } + + @Override + public Cardinality getCardinality() { + return Cardinality.ZERO_OR_MORE; + } + + @Override + public void dump(final ExpressionDumper dumper) { + leftExpr.dump(dumper); + dumper.display(" =!> "); + if (fcall != null) { + dumper.display(fcall.getFunction().getName()).display('('); + } else { + funcSpec.dump(dumper); + } + for (int i = 0; i < parameters.size(); i++) { + if (i > 0) { + dumper.display(", "); + parameters.get(i).dump(dumper); + } + } + dumper.display(')'); + } + + @Override + public void resetState(boolean postOptimization) { + super.resetState(postOptimization); + leftExpr.resetState(postOptimization); + if (fcall != null) { + fcall.resetState(postOptimization); + } + if (funcSpec != null) { + funcSpec.resetState(postOptimization); + } + for (Expression param : parameters) { + param.resetState(postOptimization); + } + } + + private class ContextParam extends Function.Placeholder { + private final Sequence sequence; + + ContextParam(XQueryContext context, Sequence sequence) { + super(context); + this.sequence = sequence; + } + + @Override + public void analyze(AnalyzeContextInfo contextInfo) throws XPathException { + // no-op: context param is pre-evaluated + } + + @Override + public Sequence eval(Sequence contextSequence, Item contextItem) throws XPathException { + return sequence; + } + + @Override + public int returnsType() { + return sequence.getItemType(); + } + + @Override + public void dump(ExpressionDumper dumper) { + // no-op: context param has no source representation + } + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/MethodCallOperator.java b/exist-core/src/main/java/org/exist/xquery/MethodCallOperator.java new file mode 100644 index 00000000000..b302c8bdac3 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/MethodCallOperator.java @@ -0,0 +1,211 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery; + +import org.exist.xquery.functions.map.AbstractMapType; +import org.exist.xquery.util.ExpressionDumper; +import org.exist.xquery.value.*; + +import java.util.ArrayList; +import java.util.List; + +/** + * Implements the XQuery 4.0 method call operator (=?>). + * + * {@code $map =?> method(args)} looks up the key "method" in the map, + * retrieves the function stored there, and calls it with the map as + * the first argument followed by any additional arguments. + * + * For each item in the left-hand sequence: + *
    + *
  1. The item must be a map (XPTY0004 otherwise)
  2. + *
  3. The method name is looked up as a key in the map
  4. + *
  5. The value must be exactly one function (XPTY0004 otherwise)
  6. + *
  7. The function is called with the map as first argument + additional args
  8. + *
+ * + * Like the mapping arrow (=!>), it processes each item individually + * and concatenates results. + */ +public class MethodCallOperator extends AbstractExpression { + + private Expression leftExpr; + private String methodName; + private List parameters; + private AnalyzeContextInfo cachedContextInfo; + + public MethodCallOperator(final XQueryContext context, final Expression leftExpr) throws XPathException { + super(context); + this.leftExpr = leftExpr; + } + + public void setMethod(final String methodName, final List params) { + this.methodName = methodName; + this.parameters = params; + } + + @Override + public void analyze(final AnalyzeContextInfo contextInfo) throws XPathException { + this.cachedContextInfo = contextInfo; + leftExpr.analyze(contextInfo); + if (parameters != null) { + for (final Expression param : parameters) { + param.analyze(contextInfo); + } + } + } + + @Override + public Sequence eval(Sequence contextSequence, final Item contextItem) throws XPathException { + if (contextItem != null) { + contextSequence = contextItem.toSequence(); + } + final Sequence inputSeq = leftExpr.eval(contextSequence, null); + + if (inputSeq.isEmpty()) { + return Sequence.EMPTY_SEQUENCE; + } + + final ValueSequence result = new ValueSequence(); + for (int i = 0; i < inputSeq.getItemCount(); i++) { + final Item item = inputSeq.itemAt(i); + + // The item must be a map + if (!Type.subTypeOf(item.getType(), Type.MAP_ITEM)) { + throw new XPathException(this, ErrorCodes.XPTY0004, + "Method call operator (=?>) requires a map, got " + + Type.getTypeName(item.getType())); + } + + final AbstractMapType map = (AbstractMapType) item; + + // Look up the method name as a key in the map + final Sequence methodValue = map.get(new StringValue(this, methodName)); + if (methodValue == null || methodValue.isEmpty()) { + throw new XPathException(this, ErrorCodes.XPTY0004, + "Method '" + methodName + "' not found in map"); + } + + if (methodValue.getItemCount() != 1) { + throw new XPathException(this, ErrorCodes.XPTY0004, + "Method '" + methodName + "' must be a single function, got " + + methodValue.getItemCount() + " items"); + } + + final Item methodItem = methodValue.itemAt(0); + if (!Type.subTypeOf(methodItem.getType(), Type.FUNCTION)) { + throw new XPathException(this, ErrorCodes.XPTY0004, + "Method '" + methodName + "' is not a function, got " + + Type.getTypeName(methodItem.getType())); + } + + final FunctionReference fref = (FunctionReference) methodItem; + + // Check arity: function must accept at least 1 argument (the map itself) + final int expectedArity = (parameters != null ? parameters.size() : 0) + 1; + if (fref.getSignature().getArgumentCount() == 0) { + throw new XPathException(this, ErrorCodes.XPTY0004, + "Method '" + methodName + "' has arity 0 and cannot accept the map as first argument"); + } + + try { + final List fparams = new ArrayList<>(expectedArity); + fparams.add(new ContextParam(context, item.toSequence())); + if (parameters != null) { + fparams.addAll(parameters); + } + + fref.setArguments(fparams); + fref.analyze(new AnalyzeContextInfo(cachedContextInfo)); + result.addAll(fref.eval(null)); + } finally { + fref.close(); + } + } + return result; + } + + @Override + public int returnsType() { + return Type.ITEM; + } + + @Override + public Cardinality getCardinality() { + return Cardinality.ZERO_OR_MORE; + } + + @Override + public void dump(final ExpressionDumper dumper) { + leftExpr.dump(dumper); + dumper.display(" =?> ").display(methodName).display('('); + if (parameters != null) { + for (int i = 0; i < parameters.size(); i++) { + if (i > 0) { + dumper.display(", "); + } + parameters.get(i).dump(dumper); + } + } + dumper.display(')'); + } + + @Override + public void resetState(boolean postOptimization) { + super.resetState(postOptimization); + leftExpr.resetState(postOptimization); + if (parameters != null) { + for (Expression param : parameters) { + param.resetState(postOptimization); + } + } + } + + private class ContextParam extends Function.Placeholder { + private final Sequence sequence; + + ContextParam(XQueryContext context, Sequence sequence) { + super(context); + this.sequence = sequence; + } + + @Override + public void analyze(AnalyzeContextInfo contextInfo) throws XPathException { + // no-op: context param is pre-evaluated + } + + @Override + public Sequence eval(Sequence contextSequence, Item contextItem) throws XPathException { + return sequence; + } + + @Override + public int returnsType() { + return sequence.getItemType(); + } + + @Override + public void dump(ExpressionDumper dumper) { + // no-op: context param has no source representation + } + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/ModuleContext.java b/exist-core/src/main/java/org/exist/xquery/ModuleContext.java index fd63f4f0b6c..a9c2ffe8cea 100644 --- a/exist-core/src/main/java/org/exist/xquery/ModuleContext.java +++ b/exist-core/src/main/java/org/exist/xquery/ModuleContext.java @@ -297,6 +297,17 @@ public Module[] getRootModules(final String namespaceURI) { return parentContext.getRootModules(namespaceURI); } + @Override + public String getModuleLocation(final String namespaceURI) { + // Check local dynamic locations first, then delegate to parent + final String local = super.getModuleLocation(namespaceURI); + if (local != null) { + return local; + } + // Check parent context's dynamic locations + return parentContext.getModuleLocation(namespaceURI); + } + @Override final protected XPathException moduleLoadException(final String message, final String moduleLocation) throws XPathException { return moduleLoadException(message, moduleLocation, null); @@ -357,6 +368,11 @@ public MemTreeBuilder getDocumentBuilder(final boolean explicitCreation) { return parentContext.getDocumentBuilder(explicitCreation); } + @Override + public MemTreeBuilder getCurrentDocumentBuilder() { + return parentContext.getCurrentDocumentBuilder(); + } + @Override public void pushDocumentContext() { parentContext.pushDocumentContext(); @@ -523,6 +539,11 @@ public String getInheritedNamespace(final String prefix) { return parentContext.getInheritedNamespace(prefix); } + @Override + public Map getAllInheritedNamespaces() { + return parentContext.getAllInheritedNamespaces(); + } + @Override public String getInheritedPrefix(final String uri) { return parentContext.getInheritedPrefix(uri); diff --git a/exist-core/src/main/java/org/exist/xquery/NamedFunctionReference.java b/exist-core/src/main/java/org/exist/xquery/NamedFunctionReference.java index e3e222d0dcd..96e6424871c 100644 --- a/exist-core/src/main/java/org/exist/xquery/NamedFunctionReference.java +++ b/exist-core/src/main/java/org/exist/xquery/NamedFunctionReference.java @@ -24,6 +24,8 @@ import java.util.ArrayList; import java.util.List; +import java.util.Set; + import org.exist.dom.QName; import org.exist.xquery.parser.XQueryAST; import org.exist.xquery.util.ExpressionDumper; @@ -52,7 +54,29 @@ public void analyze(AnalyzeContextInfo contextInfo) throws XPathException { resolvedFunction.analyze(contextInfo); } + /** + * Reserved function names per XQuery 3.1/4.0 spec. + * These names must not be used as unprefixed named function references (XPST0003). + */ + private static final Set RESERVED_FUNCTION_NAMES = Set.of( + "array", "attribute", "comment", "document-node", "element", + "function", "if", "item", "map", "namespace-node", "node", + "processing-instruction", "schema-attribute", "schema-element", + "switch", "text", "typeswitch" + ); + public static FunctionCall lookupFunction(Expression self, XQueryContext context, QName funcName, int arity) throws XPathException { + // Check for reserved function names — these cannot be used as named function references + final String localPart = funcName.getLocalPart(); + final String nsURI = funcName.getNamespaceURI(); + if (RESERVED_FUNCTION_NAMES.contains(localPart) && + (nsURI == null || nsURI.isEmpty() || + Function.BUILTIN_FUNCTION_NS.equals(nsURI) || + context.getDefaultFunctionNamespace().equals(nsURI))) { + throw new XPathException(self, ErrorCodes.XPST0003, + "'" + localPart + "' is a reserved function name and cannot be used as a named function reference"); + } + if (Function.BUILTIN_FUNCTION_NS.equals(funcName.getNamespaceURI()) && "concat".equals(funcName.getLocalPart()) && arity < 2) { diff --git a/exist-core/src/main/java/org/exist/xquery/NodeComparison.java b/exist-core/src/main/java/org/exist/xquery/NodeComparison.java index 5ba49aea9b6..de029e62257 100644 --- a/exist-core/src/main/java/org/exist/xquery/NodeComparison.java +++ b/exist-core/src/main/java/org/exist/xquery/NodeComparison.java @@ -90,11 +90,11 @@ public Sequence eval(Sequence contextSequence, Item contextItem) throws XPathExc final Sequence ls = getLeft().eval(contextSequence, contextItem); final Sequence rs = getRight().eval(contextSequence, contextItem); if (!ls.isEmpty() && !rs.isEmpty()) { - if (!Type.subTypeOf(ls.itemAt(0).getType(), Type.NODE)) + if (!Type.isNodeType(ls.itemAt(0).getType())) {throw new XPathException(this, ErrorCodes.XPTY0004, "left item is not a node; got '" + Type.getTypeName(ls.itemAt(0).getType()) + "'");} - if (!Type.subTypeOf(rs.itemAt(0).getType(), Type.NODE)) + if (!Type.isNodeType(rs.itemAt(0).getType())) {throw new XPathException(this, ErrorCodes.XPTY0004, "right item is not a node; got '" + Type.getTypeName(rs.itemAt(0).getType()) + "'");} @@ -106,9 +106,11 @@ public Sequence eval(Sequence contextSequence, Item contextItem) throws XPathExc } else { result = switch (relation) { case IS -> lv.equals(rv) ? BooleanValue.TRUE : BooleanValue.FALSE; + case IS_NOT -> !lv.equals(rv) ? BooleanValue.TRUE : BooleanValue.FALSE; case BEFORE -> lv.before(rv, false) ? BooleanValue.TRUE : BooleanValue.FALSE; case AFTER -> lv.after(rv, false) ? BooleanValue.TRUE : BooleanValue.FALSE; - default -> throw new XPathException(this, "Illegal argument: unknown relation"); + case FOLLOWS_OR_IS -> (lv.equals(rv) || lv.after(rv, false)) ? BooleanValue.TRUE : BooleanValue.FALSE; + case PRECEDES_OR_IS -> (lv.equals(rv) || lv.before(rv, false)) ? BooleanValue.TRUE : BooleanValue.FALSE; }; } } else { diff --git a/exist-core/src/main/java/org/exist/xquery/OpSimpleMap.java b/exist-core/src/main/java/org/exist/xquery/OpSimpleMap.java index 7a4e31e6ee0..55fba78f1bb 100644 --- a/exist-core/src/main/java/org/exist/xquery/OpSimpleMap.java +++ b/exist-core/src/main/java/org/exist/xquery/OpSimpleMap.java @@ -44,7 +44,22 @@ public OpSimpleMap(XQueryContext context, PathExpr left, PathExpr right) { @Override public void analyze(AnalyzeContextInfo contextInfo) throws XPathException { left.analyze(new AnalyzeContextInfo(contextInfo)); - right.analyze(new AnalyzeContextInfo(contextInfo)); + // The right side of "!" evaluates with each left-side item as the new + // context item. Strip IN_PREDICATE so that sub-expressions (e.g. + // GeneralComparison) do not apply the node-set-intersection optimisation + // that is only valid for the outer predicate's direct children. + final AnalyzeContextInfo rightContextInfo = new AnalyzeContextInfo(contextInfo); + rightContextInfo.removeFlag(IN_PREDICATE); + right.analyze(rightContextInfo); + } + + @Override + public int getDependencies() { + // The simple map's external dependencies come from the left operand, + // which evaluates in the caller's context. The right operand evaluates + // in a per-item context derived from the left, so its dependencies are + // internal to the operator. + return left.getDependencies(); } @Override diff --git a/exist-core/src/main/java/org/exist/xquery/Option.java b/exist-core/src/main/java/org/exist/xquery/Option.java index 27f8615dfdb..32c38e67dd7 100644 --- a/exist-core/src/main/java/org/exist/xquery/Option.java +++ b/exist-core/src/main/java/org/exist/xquery/Option.java @@ -60,7 +60,9 @@ public Option(QName qname, String contents) throws XPathException { } public Option(final Expression expression, QName qname, String contents) throws XPathException { - if (qname.getPrefix() == null || qname.getPrefix().isEmpty()) + // Options must be in a namespace: either via prefix or via URIQualifiedName Q{uri}local + if ((qname.getPrefix() == null || qname.getPrefix().isEmpty()) + && (qname.getNamespaceURI() == null || qname.getNamespaceURI().isEmpty())) {throw new XPathException(expression, "XPST0081: options must have a prefix");} this.qname = qname; this.contents = contents; diff --git a/exist-core/src/main/java/org/exist/xquery/OrderSpec.java b/exist-core/src/main/java/org/exist/xquery/OrderSpec.java index 1a31dfc9dd9..1b0b59dd65e 100644 --- a/exist-core/src/main/java/org/exist/xquery/OrderSpec.java +++ b/exist-core/src/main/java/org/exist/xquery/OrderSpec.java @@ -48,7 +48,9 @@ public OrderSpec(XQueryContext context, Expression sortExpr) { } public void analyze(AnalyzeContextInfo contextInfo) throws XPathException { - expression.analyze(contextInfo); + final AnalyzeContextInfo orderInfo = new AnalyzeContextInfo(contextInfo); + orderInfo.addFlag(Expression.NON_UPDATING_CONTEXT); + expression.analyze(orderInfo); } public void setModifiers(int modifiers) { diff --git a/exist-core/src/main/java/org/exist/xquery/OtherwiseExpression.java b/exist-core/src/main/java/org/exist/xquery/OtherwiseExpression.java new file mode 100644 index 00000000000..760ab147c54 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/OtherwiseExpression.java @@ -0,0 +1,90 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery; + +import org.exist.xquery.util.ExpressionDumper; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.Item; + +/** + * Implements the XQuery 4.0 "otherwise" operator. + * + * {@code E1 otherwise E2} returns E1 if it is non-empty, otherwise E2. + */ +public class OtherwiseExpression extends AbstractExpression { + + private Expression left; + private Expression right; + + public OtherwiseExpression(final XQueryContext context, final Expression left, final Expression right) { + super(context); + this.left = left; + this.right = right; + } + + @Override + public void analyze(final AnalyzeContextInfo contextInfo) throws XPathException { + left.analyze(new AnalyzeContextInfo(contextInfo)); + right.analyze(new AnalyzeContextInfo(contextInfo)); + } + + @Override + public Sequence eval(Sequence contextSequence, final Item contextItem) throws XPathException { + if (contextItem != null) { + contextSequence = contextItem.toSequence(); + } + final Sequence leftResult = left.eval(contextSequence, null); + if (leftResult != null && !leftResult.isEmpty()) { + return leftResult; + } + return right.eval(contextSequence, null); + } + + @Override + public int returnsType() { + return left.returnsType(); + } + + @Override + public Cardinality getCardinality() { + return Cardinality.ZERO_OR_MORE; + } + + @Override + public void dump(final ExpressionDumper dumper) { + left.dump(dumper); + dumper.display(" otherwise "); + right.dump(dumper); + } + + @Override + public String toString() { + return left.toString() + " otherwise " + right.toString(); + } + + @Override + public void resetState(final boolean postOptimization) { + super.resetState(postOptimization); + left.resetState(postOptimization); + right.resetState(postOptimization); + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/PathExpr.java b/exist-core/src/main/java/org/exist/xquery/PathExpr.java index 8e096376cc5..82a97239929 100644 --- a/exist-core/src/main/java/org/exist/xquery/PathExpr.java +++ b/exist-core/src/main/java/org/exist/xquery/PathExpr.java @@ -53,6 +53,14 @@ public class PathExpr extends AbstractExpression implements CompiledXQuery, protected boolean inPredicate = false; + /** + * Set to true when this PathExpr represents an actual XPath path + * expression with '/' or '//' steps, as opposed to a generic expression + * container. When true, duplicate node elimination is applied per + * XPath 3.1 §3.3.1.1. + */ + private boolean hasSlash = false; + protected Expression parent; public PathExpr(final XQueryContext context) { @@ -155,6 +163,14 @@ public Expression getParent() { return this.parent; } + @Override + public Expression optimize(final CompileContext cc) throws XPathException { + for (int i = 0; i < steps.size(); i++) { + steps.set(i, steps.get(i).optimize(cc)); + } + return this; + } + @Override public void analyze(final AnalyzeContextInfo contextInfo) throws XPathException { this.parent = contextInfo.getParent(); @@ -179,7 +195,7 @@ public void analyze(final AnalyzeContextInfo contextInfo) throws XPathException } } - if (i > 1) { + if (i >= 1) { contextInfo.setContextStep(steps.get(i - 1)); } contextInfo.setParent(this); @@ -235,9 +251,11 @@ public Sequence eval(Sequence contextSequence, final Item contextItem) throws XP expr = step; context.getWatchDog().proceed(expr); //TODO : maybe this could be detected by the parser ? -pb - if (gotAtomicResult && !Type.subTypeOf(expr.returnsType(), Type.NODE) + if (gotAtomicResult && !Type.isNavigable(expr.returnsType()) //Ugly workaround to allow preceding *text* nodes. - && !(expr instanceof EnclosedExpr)) { + && !(expr instanceof EnclosedExpr) + // XQ4: allow path navigation on results containing maps/arrays + && (result == null || !containsNavigableItem(result))) { throw new XPathException(this, ErrorCodes.XPTY0019, "left operand of '/' must be a node. Got '" + Type.getTypeName(result.getItemType()) + " " + @@ -247,7 +265,7 @@ public Sequence eval(Sequence contextSequence, final Item contextItem) throws XP expr.setContextDocSet(contextDocs); // switch into single step mode if we are processing in-memory nodes only final boolean inMemProcessing = currentContext != null && - Type.subTypeOf(currentContext.getItemType(), Type.NODE) && + Type.isNavigable(currentContext.getItemType()) && !currentContext.isPersistentSet(); //DESIGN : first test the dependency then the result final int exprDeps = expr.getDependencies(); @@ -258,7 +276,7 @@ public Sequence eval(Sequence contextSequence, final Item contextItem) throws XP //TODO : reconsider since that may be expensive (type evaluation) !(this instanceof Predicate && Type.subTypeOfUnion(this.returnsType(), Type.NUMERIC)) && currentContext != null && !currentContext.isEmpty())) { - Sequence exprResult = new ValueSequence(Type.subTypeOf(expr.returnsType(), Type.NODE)); + Sequence exprResult = new ValueSequence(Type.isNodeType(expr.returnsType())); ((ValueSequence) exprResult).keepUnOrdered(unordered); //Restore a position which may have been modified by inner expressions int p = context.getContextPosition(); @@ -295,10 +313,12 @@ public Sequence eval(Sequence contextSequence, final Item contextItem) throws XP if (result != null) { if (steps.size() > 1 && !(result instanceof VirtualNodeSet) && !(expr instanceof EnclosedExpr) && !result.isEmpty() && - !Type.subTypeOf(result.getItemType(), Type.NODE)) { + !Type.isNavigable(result.getItemType()) && + !containsNavigableItem(result)) { gotAtomicResult = true; } - if (steps.size() > 1 && getLastExpression() instanceof Step) { + if (hasSlash && !result.isEmpty() + && Type.isNodeType(result.getItemType())) { // remove duplicate nodes if this is a path // expression with more than one step result.removeDuplicates(); @@ -332,7 +352,8 @@ public Sequence eval(Sequence contextSequence, final Item contextItem) throws XP } if (gotAtomicResult && result != null && !allowMixedNodesInReturn && - !Type.subTypeOf(result.getItemType(), Type.ANY_ATOMIC_TYPE)) { + !Type.subTypeOf(result.getItemType(), Type.ANY_ATOMIC_TYPE) && + !containsNavigableItem(result)) { throw new XPathException(this, ErrorCodes.XPTY0018, "Cannot mix nodes and atomic values in the result of a path expression."); } @@ -343,6 +364,24 @@ public Sequence eval(Sequence contextSequence, final Item contextItem) throws XP return result; } + /** + * Check if the sequence contains any navigable items (nodes, maps, or arrays). + * Used for XQuery 4.0 path navigation on maps/arrays where the declared + * item type may be item() but actual items are navigable. + */ + private static boolean containsNavigableItem(final Sequence seq) { + if (seq == null || seq.isEmpty()) { + return false; + } + for (int i = 0; i < Math.min(seq.getItemCount(), 5); i++) { + final Item item = seq.itemAt(i); + if (Type.isNavigable(item.getType())) { + return true; + } + } + return false; + } + @Override public XQueryContext getContext() { return context; @@ -375,6 +414,14 @@ public Expression getLastExpression() { return steps.isEmpty() ? null : steps.getLast(); } + /** + * Marks this PathExpr as containing a '/' or '//' path operator. + * Called from the grammar tree walker when SLASH or DSLASH is encountered. + */ + public void setHasSlash() { + this.hasSlash = true; + } + /** * Get the length. * @@ -392,6 +439,36 @@ public int getSubExpressionCount() { return steps.size(); } + @Override + public boolean isVacuous() { + if (steps.isEmpty()) { + return true; + } + if (steps.size() == 1) { + return steps.getFirst().isVacuous(); + } + // For multi-step paths, use default logic + return !isUpdating() && getCardinality() == Cardinality.EMPTY_SEQUENCE; + } + + @Override + public boolean isUpdating() { + if (steps.isEmpty()) { + return false; + } + // A PathExpr with one step delegates to that step + if (steps.size() == 1) { + return steps.getFirst().isUpdating(); + } + // For multi-step paths, check if any step is updating + for (final Expression step : steps) { + if (step.isUpdating()) { + return true; + } + } + return false; + } + @Override public boolean allowMixedNodesInReturn() { if (steps.size() == 1) { diff --git a/exist-core/src/main/java/org/exist/xquery/PipelineExpression.java b/exist-core/src/main/java/org/exist/xquery/PipelineExpression.java new file mode 100644 index 00000000000..5c746c1127f --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/PipelineExpression.java @@ -0,0 +1,106 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery; + +import org.exist.xquery.util.ExpressionDumper; +import org.exist.xquery.value.Item; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.ValueSequence; + +/** + * Implements the XQuery 4.0 pipeline operator "->". + * + * The expression {@code E1 -> E2} evaluates E1, then evaluates E2 with the + * result of E1 as the context value, position 1, and last 1. + */ +public class PipelineExpression extends AbstractExpression { + + private Expression left; + private Expression right; + + public PipelineExpression(final XQueryContext context, final Expression left, final Expression right) { + super(context); + this.left = left; + this.right = right; + } + + @Override + public void analyze(final AnalyzeContextInfo contextInfo) throws XPathException { + left.analyze(new AnalyzeContextInfo(contextInfo)); + right.analyze(new AnalyzeContextInfo(contextInfo)); + } + + @Override + public Sequence eval(Sequence contextSequence, final Item contextItem) throws XPathException { + if (contextItem != null) { + contextSequence = contextItem.toSequence(); + } + final Sequence leftResult = left.eval(contextSequence, null); + + // Pipeline: set context position=0 (position()=1) and a single-item + // context sequence so last()=1, per XQ4 spec. + final Sequence singletonContext; + if (leftResult.isEmpty()) { + singletonContext = Sequence.EMPTY_SEQUENCE; + } else { + singletonContext = new ValueSequence(1); + singletonContext.add(leftResult.itemAt(0)); + } + final int savedPos = context.getContextPosition(); + final Sequence savedSeq = context.getContextSequence(); + context.setContextSequencePosition(0, singletonContext); + try { + return right.eval(leftResult, null); + } finally { + context.setContextSequencePosition(savedPos, savedSeq); + } + } + + @Override + public int returnsType() { + return right.returnsType(); + } + + @Override + public Cardinality getCardinality() { + return Cardinality.ZERO_OR_MORE; + } + + @Override + public void dump(final ExpressionDumper dumper) { + left.dump(dumper); + dumper.display(" -> "); + right.dump(dumper); + } + + @Override + public String toString() { + return left.toString() + " -> " + right.toString(); + } + + @Override + public void resetState(final boolean postOptimization) { + super.resetState(postOptimization); + left.resetState(postOptimization); + right.resetState(postOptimization); + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/Predicate.java b/exist-core/src/main/java/org/exist/xquery/Predicate.java index 986de11bb8a..3dc963ab12d 100644 --- a/exist-core/src/main/java/org/exist/xquery/Predicate.java +++ b/exist-core/src/main/java/org/exist/xquery/Predicate.java @@ -72,7 +72,9 @@ public Predicate(final XQueryContext context) { @Override public void addPath(final PathExpr path) { - if (path.getSubExpressionCount() == 1) { + // Only unwrap plain PathExpr containers, not Function/BinaryOp subclasses + // which use steps for their own purposes (arguments, operands) + if (path.getClass() == PathExpr.class && path.getSubExpressionCount() == 1) { add(path.getSubExpression(0)); } else { super.addPath(path); @@ -129,6 +131,7 @@ private AnalyzeContextInfo createContext(final AnalyzeContextInfo contextInfo) { final AnalyzeContextInfo newContextInfo = new AnalyzeContextInfo(contextInfo); // set flag to signal subexpression that we are in a predicate newContextInfo.addFlag(IN_PREDICATE); + newContextInfo.addFlag(NON_UPDATING_CONTEXT); newContextInfo.removeFlag(IN_WHERE_CLAUSE); // remove where clause flag newContextInfo.removeFlag(DOT_TEST); outerContextId = newContextInfo.getContextId(); @@ -514,13 +517,21 @@ private Sequence selectByPosition(final Sequence outerSequence, temp = contextSet.selectPrecedingSiblings(p, Expression.IGNORE_CONTEXT); break; case Constants.FOLLOWING_SIBLING_AXIS: + case Constants.FOLLOWING_SIBLING_OR_SELF_AXIS: temp = contextSet.selectFollowingSiblings(p, Expression.IGNORE_CONTEXT); reverseAxis = false; break; case Constants.FOLLOWING_AXIS: + case Constants.FOLLOWING_OR_SELF_AXIS: temp = contextSet.selectFollowing(p, Expression.IGNORE_CONTEXT); reverseAxis = false; break; + case Constants.PRECEDING_OR_SELF_AXIS: + temp = contextSet.selectPreceding(p, Expression.IGNORE_CONTEXT); + break; + case Constants.PRECEDING_SIBLING_OR_SELF_AXIS: + temp = contextSet.selectPrecedingSiblings(p, Expression.IGNORE_CONTEXT); + break; case Constants.SELF_AXIS: temp = p; reverseAxis = false; diff --git a/exist-core/src/main/java/org/exist/xquery/Profiler.java b/exist-core/src/main/java/org/exist/xquery/Profiler.java index aab76e1a050..95767629082 100644 --- a/exist-core/src/main/java/org/exist/xquery/Profiler.java +++ b/exist-core/src/main/java/org/exist/xquery/Profiler.java @@ -81,6 +81,17 @@ public class Profiler { private PerformanceStats stats; + /** + * Returns the performance statistics collected by this profiler instance. + * Each XQueryContext has its own Profiler with its own stats, enabling + * per-query profiling isolation. + * + * @return the performance stats for this profiler + */ + public PerformanceStats getPerformanceStats() { + return stats; + } + private long queryStart = 0; private Database db; diff --git a/exist-core/src/main/java/org/exist/xquery/QuantifiedExpression.java b/exist-core/src/main/java/org/exist/xquery/QuantifiedExpression.java index e2e58d64f17..a0435bbd897 100644 --- a/exist-core/src/main/java/org/exist/xquery/QuantifiedExpression.java +++ b/exist-core/src/main/java/org/exist/xquery/QuantifiedExpression.java @@ -69,8 +69,12 @@ public void analyze(AnalyzeContextInfo contextInfo) throws XPathException { context.declareVariableBinding(new LocalVariable(varName)); contextInfo.setParent(this); - inputSequence.analyze(contextInfo); - returnExpr.analyze(contextInfo); + final AnalyzeContextInfo inputInfo = new AnalyzeContextInfo(contextInfo); + inputInfo.addFlag(NON_UPDATING_CONTEXT); + inputSequence.analyze(inputInfo); + final AnalyzeContextInfo satisfiesInfo = new AnalyzeContextInfo(contextInfo); + satisfiesInfo.addFlag(NON_UPDATING_CONTEXT); + returnExpr.analyze(satisfiesInfo); } finally { context.popLocalVariables(mark); } diff --git a/exist-core/src/main/java/org/exist/xquery/RangeExpression.java b/exist-core/src/main/java/org/exist/xquery/RangeExpression.java index 3dece7515e2..43ecb085c74 100644 --- a/exist-core/src/main/java/org/exist/xquery/RangeExpression.java +++ b/exist-core/src/main/java/org/exist/xquery/RangeExpression.java @@ -67,8 +67,13 @@ public void analyze(AnalyzeContextInfo contextInfo) throws XPathException { inPredicate = (contextInfo.getFlags() & IN_PREDICATE) > 0; contextId = contextInfo.getContextId(); contextInfo.setParent(this); - start.analyze(contextInfo); - end.analyze(contextInfo); + // Operands of range expression are non-updating contexts + final AnalyzeContextInfo startInfo = new AnalyzeContextInfo(contextInfo); + startInfo.addFlag(NON_UPDATING_CONTEXT); + start.analyze(startInfo); + final AnalyzeContextInfo endInfo = new AnalyzeContextInfo(contextInfo); + endInfo.addFlag(NON_UPDATING_CONTEXT); + end.analyze(endInfo); } diff --git a/exist-core/src/main/java/org/exist/xquery/RangeSequence.java b/exist-core/src/main/java/org/exist/xquery/RangeSequence.java index c23c663067e..eb3ecfa6507 100644 --- a/exist-core/src/main/java/org/exist/xquery/RangeSequence.java +++ b/exist-core/src/main/java/org/exist/xquery/RangeSequence.java @@ -21,8 +21,6 @@ */ package org.exist.xquery; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; import org.exist.dom.persistent.NodeSet; import org.exist.xquery.value.AbstractSequence; import org.exist.xquery.value.IntegerValue; @@ -32,18 +30,40 @@ import org.exist.xquery.value.SequenceIterator; import org.exist.xquery.value.Type; -import java.math.BigInteger; - +/** + * An immutable, lazy sequence representing an integer range (start to end). + * Stores only the start and end values as primitive longs — no intermediate + * IntegerValue objects are created until accessed. Operations like count(), + * isEmpty(), itemAt(), and subsequence() are O(1). + */ public class RangeSequence extends AbstractSequence { - private final static Logger LOG = LogManager.getLogger(AbstractSequence.class); - - private final IntegerValue start; - private final IntegerValue end; + private final long start; + private final long end; + private final long size; public RangeSequence(final IntegerValue start, final IntegerValue end) { + this(start.getLong(), end.getLong()); + } + + public RangeSequence(final long start, final long end) { this.start = start; this.end = end; + if (start <= end) { + final long diff = end - start; + // Overflow protection: if diff < 0, the range is too large + this.size = (diff >= 0) ? diff + 1 : Long.MAX_VALUE; + } else { + this.size = 0; + } + } + + public long getStart() { + return start; + } + + public long getEnd() { + return end; } @Override @@ -62,16 +82,16 @@ public int getItemType() { @Override public SequenceIterator iterate() { - return new RangeSequenceIterator(start.getLong(), end.getLong()); + return new RangeSequenceIterator(start, end); } @Override public SequenceIterator unorderedIterator() { - return new RangeSequenceIterator(start.getLong(), end.getLong()); + return new RangeSequenceIterator(start, end); } public SequenceIterator iterateInReverse() { - return new ReverseRangeSequenceIterator(start.getLong(), end.getLong()); + return new ReverseRangeSequenceIterator(start, end); } private static class RangeSequenceIterator implements SequenceIterator { @@ -148,39 +168,30 @@ public long skip(final long n) { @Override public long getItemCountLong() { - if (start.compareTo(end) > 0) { - return 0; - } - try { - return ((IntegerValue) end.minus(start)).getLong() + 1; - } catch (final XPathException e) { - LOG.warn("Unexpected exception when processing result of range expression: {}", e.getMessage(), e); - return 0; - } + return size; } @Override public boolean isEmpty() { - return getItemCountLong() == 0; + return size == 0; } @Override public boolean hasOne() { - return getItemCountLong() == 1; + return size == 1; } @Override public boolean hasMany() { - return getItemCountLong() > 1; + return size > 1; } @Override public Cardinality getCardinality() { - final long itemCount = getItemCountLong(); - if (itemCount <= 0) { + if (size == 0) { return Cardinality.EMPTY_SEQUENCE; } - if (itemCount == 1) { + if (size == 1) { return Cardinality.EXACTLY_ONE; } return Cardinality._MANY; @@ -188,12 +199,26 @@ public Cardinality getCardinality() { @Override public Item itemAt(final int pos) { - if (pos < getItemCountLong()) { - return new IntegerValue(start.getLong() + pos); + if (pos >= 0 && pos < size) { + return new IntegerValue(start + pos); } return null; } + @Override + public boolean contains(final Item item) { + if (item instanceof IntegerValue) { + final long val = ((IntegerValue) item).getLong(); + return val >= start && val <= end; + } + return false; + } + + @Override + public boolean containsReference(final Item item) { + return false; // primitives don't have reference identity + } + @Override public NodeSet toNodeSet() throws XPathException { throw new XPathException(this, "Type error: the sequence cannot be converted into" + @@ -211,37 +236,7 @@ public void removeDuplicates() { } @Override - public boolean containsReference(final Item item) { - return start == item || end == item; - } - - @Override - public boolean contains(final Item item) { - if (item instanceof IntegerValue) { - try { - final BigInteger other = item.toJavaObject(BigInteger.class); - return other.compareTo(start.toJavaObject(BigInteger.class)) >= 0 - && other.compareTo(end.toJavaObject(BigInteger.class)) <= 0; - } catch (final XPathException e) { - LOG.warn(e.getMessage(), e); - return false; - } - } - return false; + public String toString() { + return "Range(" + start + " to " + end + ")"; } - - /** - * Generates a string representation of the Range Sequence. - * - * Range sequences can potentially be - * very large, so we generate a summary here - * rather than evaluating to generate a (possibly) - * huge sequence of objects. - * - * @return a string representation of the range sequence. - */ - @Override - public String toString() { - return "Range(" + start + " to " + end + ")"; - } } diff --git a/exist-core/src/main/java/org/exist/xquery/RecordTypeCheck.java b/exist-core/src/main/java/org/exist/xquery/RecordTypeCheck.java new file mode 100644 index 00000000000..927ee022a82 --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/RecordTypeCheck.java @@ -0,0 +1,230 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery; + +import org.exist.dom.persistent.DocumentSet; +import org.exist.xquery.functions.map.AbstractMapType; +import org.exist.xquery.functions.map.MapType; +import org.exist.xquery.util.ExpressionDumper; +import org.exist.xquery.value.*; + +/** + * Runtime check that a function argument matches a declared record type. + * + *

When a function parameter is declared as {@code $param as record(name as xs:string, ...)}, + * the argument expression is wrapped in a {@code RecordTypeCheck}. At runtime, + * the check verifies the argument is a map that matches all required fields and + * field types declared in the {@link RecordType}.

+ * + *

Modeled on {@link DynamicTypeCheck} and {@link FunctionTypeCheck}.

+ */ +public class RecordTypeCheck extends AbstractExpression { + + private final Expression expression; + private final RecordType recordType; + + public RecordTypeCheck(final XQueryContext context, final RecordType recordType, final Expression expr) { + super(context); + this.recordType = recordType; + this.expression = expr; + } + + @Override + public void analyze(final AnalyzeContextInfo contextInfo) throws XPathException { + contextInfo.setParent(this); + expression.analyze(contextInfo); + } + + @Override + public Sequence eval(final Sequence contextSequence, final Item contextItem) throws XPathException { + final Sequence seq = expression.eval(contextSequence, contextItem); + + if (seq.isEmpty()) { + return seq; + } + + // Single item: coerce directly + if (seq.getItemCount() == 1) { + return coerce(seq.itemAt(0)); + } + + // Multiple items: coerce each + final ValueSequence result = new ValueSequence(seq.getItemCount()); + for (final SequenceIterator i = seq.iterate(); i.hasNext(); ) { + final Sequence coerced = coerce(i.nextItem()); + result.addAll(coerced); + } + return result; + } + + /** + * Coerce a single item to this record type. + * + *

Per XQuery 4.0, record coercion: + *

    + *
  • Validates that the item is a map with all required fields
  • + *
  • Drops undeclared fields (non-extensible records)
  • + *
  • Coerces field values to declared types
  • + *
  • Builds the result map with fields in declaration order
  • + *

+ */ + private Sequence coerce(final Item item) throws XPathException { + if (!Type.subTypeOf(item.getType(), Type.MAP_ITEM)) { + throw new XPathException(expression, ErrorCodes.XPTY0004, + "Expected " + recordType + " but got " + Type.getTypeName(item.getType())); + } + + final AbstractMapType sourceMap = (AbstractMapType) item; + final java.util.List fields = recordType.getFieldDeclarations(); + + // Build a new map with only declared fields, in declaration order + final MapType coercedMap = new MapType(expression, context); + + for (final RecordType.FieldDeclaration field : fields) { + final StringValue key = new StringValue(expression, field.getName()); + final Sequence value = sourceMap.get(key); + + if (value == null || value.isEmpty()) { + if (!field.isOptional()) { + throw new XPathException(expression, ErrorCodes.XPTY0004, + "Missing required field '" + field.getName() + "' in " + recordType); + } + // Optional field not present — omit from result + continue; + } + + // Coerce value to declared type if specified + if (field.getType() != null) { + final Sequence coerced = coerceFieldValue(field, value); + coercedMap.add(key, coerced); + } else { + coercedMap.add(key, value); + } + } + + return coercedMap; + } + + /** + * Coerce a field value to the declared type. + */ + private Sequence coerceFieldValue(final RecordType.FieldDeclaration field, + final Sequence value) throws XPathException { + final SequenceType declaredType = field.getType(); + final int targetType = declaredType.getPrimaryType(); + + // If already the right type, return as-is + if (Type.subTypeOf(value.getItemType(), targetType)) { + return value; + } + + // Attempt type promotion/casting for atomic types + if (Type.subTypeOf(targetType, Type.ANY_ATOMIC_TYPE) && value.getItemCount() > 0) { + final ValueSequence result = new ValueSequence(value.getItemCount()); + for (final SequenceIterator it = value.iterate(); it.hasNext(); ) { + final Item item = it.nextItem(); + if (item instanceof AtomicValue) { + try { + result.add(((AtomicValue) item).convertTo(targetType)); + } catch (final XPathException e) { + throw new XPathException(expression, ErrorCodes.XPTY0004, + "Cannot coerce field '" + field.getName() + "' value to " + + Type.getTypeName(targetType) + ": " + e.getMessage()); + } + } else { + result.add(item); + } + } + return result; + } + + return value; + } + + @Override + public int returnsType() { + return Type.MAP_ITEM; + } + + @Override + public int getDependencies() { + return expression.getDependencies(); + } + + @Override + public void dump(final ExpressionDumper dumper) { + if (dumper.verbosity() > 1) { + dumper.display("record-type-check["); + dumper.display(recordType.toString()); + dumper.display(", "); + } + expression.dump(dumper); + if (dumper.verbosity() > 1) { + dumper.display("]"); + } + } + + @Override + public String toString() { + return expression.toString(); + } + + @Override + public void resetState(final boolean postOptimization) { + super.resetState(postOptimization); + expression.resetState(postOptimization); + } + + @Override + public void setContextDocSet(final DocumentSet contextSet) { + super.setContextDocSet(contextSet); + expression.setContextDocSet(contextSet); + } + + @Override + public int getLine() { + return expression.getLine(); + } + + @Override + public int getColumn() { + return expression.getColumn(); + } + + @Override + public void accept(final ExpressionVisitor visitor) { + expression.accept(visitor); + } + + @Override + public int getSubExpressionCount() { + return 1; + } + + @Override + public Expression getSubExpression(final int index) { + if (index == 0) { + return expression; + } + throw new IndexOutOfBoundsException("Index: " + index + ", Size: " + getSubExpressionCount()); + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/SequenceConstructor.java b/exist-core/src/main/java/org/exist/xquery/SequenceConstructor.java index b03ada44fc6..4079ffb740c 100644 --- a/exist-core/src/main/java/org/exist/xquery/SequenceConstructor.java +++ b/exist-core/src/main/java/org/exist/xquery/SequenceConstructor.java @@ -57,6 +57,24 @@ public void analyze(final AnalyzeContextInfo contextInfo) throws XPathException } } contextInfo.setStaticReturnType(staticType); + + // XUST0001: check compatibility of items in the comma expression. + // All must be updating, or all must be non-updating (vacuous items are allowed either way). + if (steps.size() > 1) { + boolean hasUpdating = false; + boolean hasNonUpdating = false; + for (final Expression expr : steps) { + if (expr.isUpdating()) { + hasUpdating = true; + } else if (!expr.isVacuous()) { + hasNonUpdating = true; + } + } + if (hasUpdating && hasNonUpdating) { + throw new XPathException(this, ErrorCodes.XUST0001, + "comma expression mixes updating and non-updating expressions"); + } + } } @Override @@ -141,6 +159,17 @@ public void addPathIfNotFunction(final PathExpr path) throws XPathException { super.addPath(path); } + @Override + public boolean isUpdating() { + boolean anyUpdating = false; + for (final Expression step : steps) { + if (step.isUpdating()) { + anyUpdating = true; + } + } + return anyUpdating; + } + @Override public int returnsType() { return Type.ITEM; @@ -151,6 +180,16 @@ public Cardinality getCardinality() { return Cardinality.ZERO_OR_MORE; } + @Override + public boolean isVacuous() { + for (final Expression step : steps) { + if (!step.isVacuous()) { + return false; + } + } + return true; + } + @Override public boolean allowMixedNodesInReturn() { return true; diff --git a/exist-core/src/main/java/org/exist/xquery/StaticXQueryException.java b/exist-core/src/main/java/org/exist/xquery/StaticXQueryException.java index 682be4dfff1..260c5c6bda9 100644 --- a/exist-core/src/main/java/org/exist/xquery/StaticXQueryException.java +++ b/exist-core/src/main/java/org/exist/xquery/StaticXQueryException.java @@ -21,6 +21,8 @@ */ package org.exist.xquery; +import org.exist.xquery.ErrorCodes.ErrorCode; + public class StaticXQueryException extends XPathException { private static final long serialVersionUID = -8229758099980343418L; @@ -30,19 +32,19 @@ public StaticXQueryException(String message) { } public StaticXQueryException(final Expression expression, String message) { - super(expression, message); + super(expression, ErrorCodes.XPST0003, message); } public StaticXQueryException(int line, int column, String message) { - super(line, column, message); + super(line, column, ErrorCodes.XPST0003, message); } - + public StaticXQueryException(Throwable cause) { this((Expression) null, cause); } - + public StaticXQueryException(final Expression expression, Throwable cause) { - super(expression, cause); + super(expression, ErrorCodes.XPST0003, cause.getMessage(), cause); } public StaticXQueryException(String message, Throwable cause) { @@ -50,11 +52,20 @@ public StaticXQueryException(String message, Throwable cause) { } public StaticXQueryException(final Expression expression, String message, Throwable cause) { - super(expression, message, cause); + super(expression, ErrorCodes.XPST0003, message, cause); + } + + public StaticXQueryException(int line, int column, ErrorCode errorCode, String message) { + super(line, column, errorCode, message); + } + + public StaticXQueryException(int line, int column, ErrorCode errorCode, String message, Throwable cause) { + super(line, column, errorCode, message); + initCause(cause); } - //TODO add in ErrorCode and ErrorVal public StaticXQueryException(int line, int column, String message, Throwable cause) { - super(line, column, message, cause); + super(line, column, ErrorCodes.XPST0003, message); + initCause(cause); } } \ No newline at end of file diff --git a/exist-core/src/main/java/org/exist/xquery/StringConstructor.java b/exist-core/src/main/java/org/exist/xquery/StringConstructor.java index 3d725e63c66..ba3b0fce492 100644 --- a/exist-core/src/main/java/org/exist/xquery/StringConstructor.java +++ b/exist-core/src/main/java/org/exist/xquery/StringConstructor.java @@ -159,9 +159,13 @@ public void analyze(final AnalyzeContextInfo contextInfo) throws XPathException public String eval(final Sequence contextSequence) throws XPathException { final Sequence result = expression.eval(contextSequence, null); + // Atomize the result per spec: string constructor interpolation + // atomizes its content, joining with spaces + final Sequence atomized = Atomize.atomize(result); + final StringBuilder out = new StringBuilder(); boolean gotOne = false; - for(final SequenceIterator i = result.iterate(); i.hasNext(); ) { + for(final SequenceIterator i = atomized.iterate(); i.hasNext(); ) { final Item next = i.nextItem(); if (gotOne) { out.append(' '); diff --git a/exist-core/src/main/java/org/exist/xquery/SwitchExpression.java b/exist-core/src/main/java/org/exist/xquery/SwitchExpression.java index d75361bf784..351ca857228 100644 --- a/exist-core/src/main/java/org/exist/xquery/SwitchExpression.java +++ b/exist-core/src/main/java/org/exist/xquery/SwitchExpression.java @@ -56,11 +56,20 @@ public Case(List caseOperands, Expression caseClause) { private Expression operand; private Case defaultClause = null; private List cases = new ArrayList<>(5); - + private boolean booleanMode = false; + public SwitchExpression(XQueryContext context, Expression operand) { super(context); this.operand = operand; } + + /** + * Set boolean mode for XQ4 omitted comparand: switch () { case boolExpr return ... } + * In boolean mode, each case operand is evaluated and its effective boolean value determines the match. + */ + public void setBooleanMode(boolean booleanMode) { + this.booleanMode = booleanMode; + } /** * Add case clause(s) with a return. @@ -88,34 +97,58 @@ public Sequence eval(Sequence contextSequence, Item contextItem) throws XPathExc if (contextItem != null) {contextSequence = contextItem.toSequence();} + + if (booleanMode) { + // XQ4 omitted comparand: evaluate each case operand as boolean + return evalBooleanMode(contextSequence, contextItem); + } + final Sequence opSeq = operand.eval(contextSequence, null); - Sequence result = null; + if (opSeq.hasMany()) { + throw new XPathException(this, ErrorCodes.XPTY0004, "Cardinality error in switch operand ", opSeq); + } + final Collator defaultCollator = context.getDefaultCollator(); if (opSeq.isEmpty()) { - result = defaultClause.returnClause.eval(contextSequence, null); + // XQ4: empty comparand can match case () (empty case operand) + for (final Case next : cases) { + for (final Expression caseOperand : next.operands) { + final Sequence caseSeq = caseOperand.eval(contextSequence, contextItem); + if (caseSeq.isEmpty()) { + return next.returnClause.eval(contextSequence, null); + } + } + } } else { - if (opSeq.hasMany()) { - throw new XPathException(this, ErrorCodes.XPTY0004, "Cardinality error in switch operand ", opSeq); + final AtomicValue opVal = opSeq.itemAt(0).atomize(); + for (final Case next : cases) { + for (final Expression caseOperand : next.operands) { + final Sequence caseSeq = caseOperand.eval(contextSequence, contextItem); + if (context.getXQueryVersion() <= 30 && caseSeq.hasMany()) { + throw new XPathException(this, ErrorCodes.XPTY0004, "Cardinality error in switch case operand ", caseSeq); + } + // XQ4: case operand may be a sequence; match if any item equals the comparand + for (int i = 0; i < caseSeq.getItemCount(); i++) { + final AtomicValue caseVal = caseSeq.itemAt(i).atomize(); + if (FunDeepEqual.deepEquals(caseVal, opVal, defaultCollator)) { + return next.returnClause.eval(contextSequence, null); + } + } + } } - final AtomicValue opVal = opSeq.itemAt(0).atomize(); - final Collator defaultCollator = context.getDefaultCollator(); - for (final Case next : cases) { - for (final Expression caseOperand : next.operands) { - final Sequence caseSeq = caseOperand.eval(contextSequence, contextItem); - if (caseSeq.hasMany()) { - throw new XPathException(this, ErrorCodes.XPTY0004, "Cardinality error in switch case operand ", caseSeq); - } - final AtomicValue caseVal = caseSeq.isEmpty() ? AtomicValue.EMPTY_VALUE : caseSeq.itemAt(0).atomize(); - if (FunDeepEqual.deepEquals(caseVal, opVal, defaultCollator)) { - return next.returnClause.eval(contextSequence, null); - } - } - } } - if (result == null) { - result = defaultClause.returnClause.eval(contextSequence, null); + return defaultClause.returnClause.eval(contextSequence, null); + } + + private Sequence evalBooleanMode(Sequence contextSequence, Item contextItem) throws XPathException { + for (final Case next : cases) { + for (final Expression caseOperand : next.operands) { + final Sequence caseSeq = caseOperand.eval(contextSequence, contextItem); + if (caseSeq.effectiveBooleanValue()) { + return next.returnClause.eval(contextSequence, null); + } + } } - - return result; + return defaultClause.returnClause.eval(contextSequence, null); } public int returnsType() { @@ -131,13 +164,67 @@ public Cardinality getCardinality() { return Cardinality.ZERO_OR_MORE; } + @Override + public boolean isUpdating() { + for (final Case c : cases) { + if (c.returnClause.isUpdating()) { + return true; + } + } + return defaultClause != null && defaultClause.returnClause.isUpdating(); + } + + @Override + public boolean isVacuous() { + for (final Case c : cases) { + if (!c.returnClause.isVacuous()) { + return false; + } + } + return defaultClause == null || defaultClause.returnClause.isVacuous(); + } + public void analyze(AnalyzeContextInfo contextInfo) throws XPathException { - contextInfo.setParent(this); - operand.analyze(contextInfo); + final AnalyzeContextInfo myContextInfo = new AnalyzeContextInfo(contextInfo); + myContextInfo.setParent(this); + + // Operand and case operands are non-updating contexts + final AnalyzeContextInfo operandInfo = new AnalyzeContextInfo(myContextInfo); + operandInfo.addFlag(NON_UPDATING_CONTEXT); + operand.analyze(operandInfo); for (final Case next : cases) { - next.returnClause.analyze(contextInfo); + for (final Expression caseOperand : next.operands) { + final AnalyzeContextInfo caseOpInfo = new AnalyzeContextInfo(myContextInfo); + caseOpInfo.addFlag(NON_UPDATING_CONTEXT); + caseOperand.analyze(caseOpInfo); + } + myContextInfo.setParent(this); + next.returnClause.analyze(myContextInfo); + } + myContextInfo.setParent(this); + defaultClause.returnClause.analyze(myContextInfo); + + // XUST0001: check branch compatibility + boolean hasUpdating = false; + boolean hasNonUpdating = false; + for (final Case c : cases) { + if (c.returnClause.isUpdating()) { + hasUpdating = true; + } else if (!c.returnClause.isVacuous()) { + hasNonUpdating = true; + } + } + if (defaultClause != null) { + if (defaultClause.returnClause.isUpdating()) { + hasUpdating = true; + } else if (!defaultClause.returnClause.isVacuous()) { + hasNonUpdating = true; + } + } + if (hasUpdating && hasNonUpdating) { + throw new XPathException(this, ErrorCodes.XUST0001, + "switch branches mix updating and non-updating expressions"); } - defaultClause.returnClause.analyze(contextInfo); } public void setContextDocSet(DocumentSet contextSet) { diff --git a/exist-core/src/main/java/org/exist/xquery/TreatAsExpression.java b/exist-core/src/main/java/org/exist/xquery/TreatAsExpression.java index ab90c1245a4..3cf503b72e1 100644 --- a/exist-core/src/main/java/org/exist/xquery/TreatAsExpression.java +++ b/exist-core/src/main/java/org/exist/xquery/TreatAsExpression.java @@ -63,7 +63,7 @@ public void analyze(AnalyzeContextInfo contextInfo) throws XPathException { expression = new DynamicCardinalityCheck(context, type.getCardinality(), expression, new Error("XPDY0050", type.toString())); - expression = new DynamicTypeCheck(context, type.getPrimaryType(), expression); + expression = new DynamicTypeCheck(context, type.getPrimaryType(), expression, ErrorCodes.XPDY0050); } public void dump(ExpressionDumper dumper) { diff --git a/exist-core/src/main/java/org/exist/xquery/TryCatchExpression.java b/exist-core/src/main/java/org/exist/xquery/TryCatchExpression.java index c11a2acf065..0712770b636 100644 --- a/exist-core/src/main/java/org/exist/xquery/TryCatchExpression.java +++ b/exist-core/src/main/java/org/exist/xquery/TryCatchExpression.java @@ -63,6 +63,7 @@ public class TryCatchExpression extends AbstractExpression { private final Expression tryTargetExpr; private final List catchClauses = new ArrayList<>(); + private Expression finallyExpr; /** * Constructor. @@ -88,6 +89,10 @@ public void addCatchClause(final List catchErrorList, final List c catchClauses.add( new CatchClause(catchErrorList, catchVars, catchExpr) ); } + public void setFinallyExpr(final Expression finallyExpr) { + this.finallyExpr = finallyExpr; + } + @Override public int getDependencies() { return Dependency.CONTEXT_SET | Dependency.CONTEXT_ITEM; @@ -126,6 +131,9 @@ public void analyze(final AnalyzeContextInfo contextInfo) throws XPathException for (final CatchClause catchClause : catchClauses) { catchClause.getCatchExpr().analyze(contextInfo); } + if (finallyExpr != null) { + finallyExpr.analyze(contextInfo); + } } finally { // restore the local variable stack context.popLocalVariables(mark); @@ -141,107 +149,136 @@ public Sequence eval(final Sequence contextSequence, final Item contextItem) thr throw new XPathException(this, ErrorCodes.EXXQDY0003, "The try-catch expression is only available in xquery version \"3.0\" and later."); } + Sequence result = null; + Throwable pendingError = null; + try { // Evaluate 'try' expression - final Sequence tryTargetSeq = tryTargetExpr.eval(contextSequence, contextItem); - return tryTargetSeq; + result = tryTargetExpr.eval(contextSequence, contextItem); - } catch (final Throwable throwable) { + } catch (final Throwable throwable) { - final ErrorCode errorCode; + // If no catch clauses (try/finally only), re-throw after finally + if (catchClauses.isEmpty()) { + pendingError = throwable; + } else { - // fn:error throws an XPathException - if(throwable instanceof XPathException xpe){ - // Get errorcode from nicely thrown xpathexception + final ErrorCode errorCode; - if(xpe.getErrorCode() != null) { - if(xpe.getErrorCode() == ErrorCodes.ERROR) { - errorCode = extractErrorCode(xpe); + // fn:error throws an XPathException + if (throwable instanceof XPathException xpe) { + // Get errorcode from nicely thrown xpathexception + + if (xpe.getErrorCode() != null) { + if (xpe.getErrorCode() == ErrorCodes.ERROR) { + errorCode = extractErrorCode(xpe); + } else { + errorCode = xpe.getErrorCode(); + } } else { - errorCode = xpe.getErrorCode(); + // if no errorcode is found, reconstruct by parsing the error text. + errorCode = extractErrorCode(xpe); } } else { - // if no errorcode is found, reconstruct by parsing the error text. - errorCode = extractErrorCode(xpe); + // Get errorcode from all other errors and exceptions + errorCode = new JavaErrorCode(throwable); } - } else { - // Get errorcode from all other errors and exceptions - errorCode = new JavaErrorCode(throwable); - } - // We need the qname in the end - final QName errorCodeQname = errorCode.getErrorQName(); - - // Exception in thrown, catch expression will be evaluated. - // catchvars (CatchErrorCode (, CatchErrorDesc (, CatchErrorVal)?)? ) - // need to be retrieved as variables - Sequence catchResultSeq = null; - final LocalVariable mark0 = context.markLocalVariables(false); // DWES: what does this do? - - // DWES: should I use popLocalVariables - context.declareInScopeNamespace(Namespaces.W3C_XQUERY_XPATH_ERROR_PREFIX, Namespaces.W3C_XQUERY_XPATH_ERROR_NS); - context.declareInScopeNamespace(Namespaces.EXIST_XQUERY_XPATH_ERROR_PREFIX, Namespaces.EXIST_XQUERY_XPATH_ERROR_NS); - - //context.declareInScopeNamespace(null, null); - - try { - // flag used to escape loop when errorcode has matched - boolean errorMatched = false; - - // Iterate on all catch clauses - for (final CatchClause catchClause : catchClauses) { - - if (isErrorInList(errorCodeQname, catchClause.getCatchErrorList()) && !errorMatched) { - - errorMatched = true; - - // Get catch variables - final LocalVariable mark1 = context.markLocalVariables(false); // DWES: what does this do? - - try { - // Add std errors - addErrCode(errorCodeQname); - addErrDescription(throwable, errorCode); - addErrValue(throwable); - addErrModule(throwable); - addErrLineNumber(throwable); - addErrColumnNumber(throwable); - addErrAdditional(throwable); - addFunctionTrace(throwable); - addJavaTrace(throwable); - - // Evaluate catch expression - catchResultSeq = ((Expression) catchClause.getCatchExpr()).eval(contextSequence, contextItem); - - - } finally { - context.popLocalVariables(mark1, catchResultSeq); + // We need the qname in the end + final QName errorCodeQname = errorCode.getErrorQName(); + + // Exception in thrown, catch expression will be evaluated. + // catchvars (CatchErrorCode (, CatchErrorDesc (, CatchErrorVal)?)? ) + // need to be retrieved as variables + Sequence catchResultSeq = null; + final LocalVariable mark0 = context.markLocalVariables(false); + + context.declareInScopeNamespace(Namespaces.W3C_XQUERY_XPATH_ERROR_PREFIX, Namespaces.W3C_XQUERY_XPATH_ERROR_NS); + context.declareInScopeNamespace(Namespaces.EXIST_XQUERY_XPATH_ERROR_PREFIX, Namespaces.EXIST_XQUERY_XPATH_ERROR_NS); + + try { + // flag used to escape loop when errorcode has matched + boolean errorMatched = false; + + // Iterate on all catch clauses + for (final CatchClause catchClause : catchClauses) { + + if (isErrorInList(errorCodeQname, catchClause.getCatchErrorList()) && !errorMatched) { + + errorMatched = true; + + // Get catch variables + final LocalVariable mark1 = context.markLocalVariables(false); + + try { + // Add std errors + addErrCode(errorCodeQname); + addErrDescription(throwable, errorCode); + addErrValue(throwable); + addErrModule(throwable); + addErrLineNumber(throwable); + addErrColumnNumber(throwable); + addErrAdditional(throwable); + addFunctionTrace(throwable); + addJavaTrace(throwable); + + // Evaluate catch expression + catchResultSeq = ((Expression) catchClause.getCatchExpr()).eval(contextSequence, contextItem); + + + } finally { + context.popLocalVariables(mark1, catchResultSeq); + } + + } else { + // if in the end nothing is set, rethrow after loop } + } // for catch clauses + // If an error hasn't been caught, store for re-throw after finally + if (!errorMatched) { + pendingError = throwable; } else { - // if in the end nothing is set, rethrow after loop + result = catchResultSeq; } - } // for catch clauses - // If an error hasn't been caught, throw new one - if (!errorMatched) { - if (throwable instanceof XPathException) { - throw throwable; - } else { - LOG.error(throwable); - throw new XPathException(this, throwable); + } finally { + context.popLocalVariables(mark0, catchResultSeq); + } + } + } finally { + // XQ4: Evaluate finally clause (always, even if try/catch succeeded or failed) + if (finallyExpr != null) { + try { + final Sequence finallyResult = finallyExpr.eval(contextSequence, contextItem); + // If finally produces a non-empty sequence, raise XQTY0153 + if (finallyResult != null && !finallyResult.isEmpty()) { + throw new XPathException(this, ErrorCodes.XQTY0153, + "The finally clause must evaluate to an empty sequence, got " + + finallyResult.getItemCount() + " item(s)"); } + } catch (final XPathException finallyError) { + // Finally error replaces any pending error or result + context.expressionEnd(this); + throw finallyError; } - - } finally { - context.popLocalVariables(mark0, catchResultSeq); } - return catchResultSeq; + // Re-throw pending error from try body (if not caught) + if (pendingError != null) { + context.expressionEnd(this); + if (pendingError instanceof XPathException) { + throw (XPathException) pendingError; + } else { + LOG.error(pendingError); + throw new XPathException(this, pendingError); + } + } - } finally { context.expressionEnd(this); } + + return result; } @@ -384,6 +421,13 @@ public void dump(final ExpressionDumper dumper) { dumper.nl().display("}"); dumper.endIndent(); } + if (finallyExpr != null) { + dumper.nl().display("} finally {"); + dumper.startIndent(); + finallyExpr.dump(dumper); + dumper.nl().display("}"); + dumper.endIndent(); + } } /** @@ -428,6 +472,11 @@ public String toString() { result.append(catchExpr.toString()); result.append("}"); } + if (finallyExpr != null) { + result.append(" finally { "); + result.append(finallyExpr.toString()); + result.append("}"); + } return result.toString(); } @@ -436,8 +485,10 @@ public String toString() { */ @Override public int returnsType() { - // fixme! /ljo - return ((Expression) catchClauses.getFirst().getCatchExpr()).returnsType(); + if (!catchClauses.isEmpty()) { + return ((Expression) catchClauses.getFirst().getCatchExpr()).returnsType(); + } + return tryTargetExpr.returnsType(); } /* (non-Javadoc) @@ -451,6 +502,9 @@ public void resetState(final boolean postOptimization) { final Expression catchExpr = (Expression) catchClause.getCatchExpr(); catchExpr.resetState(postOptimization); } + if (finallyExpr != null) { + finallyExpr.resetState(postOptimization); + } } @Override diff --git a/exist-core/src/main/java/org/exist/xquery/TypeswitchExpression.java b/exist-core/src/main/java/org/exist/xquery/TypeswitchExpression.java index edfc79469db..53dbcaac8b3 100644 --- a/exist-core/src/main/java/org/exist/xquery/TypeswitchExpression.java +++ b/exist-core/src/main/java/org/exist/xquery/TypeswitchExpression.java @@ -161,12 +161,37 @@ public Cardinality getCardinality() { return Cardinality.ZERO_OR_MORE; } + @Override + public boolean isUpdating() { + for (final Case c : cases) { + if (c.returnClause.isUpdating()) { + return true; + } + } + return defaultClause != null && defaultClause.returnClause.isUpdating(); + } + + @Override + public boolean isVacuous() { + for (final Case c : cases) { + if (!c.returnClause.isVacuous()) { + return false; + } + } + return defaultClause == null || defaultClause.returnClause.isVacuous(); + } + public void analyze(AnalyzeContextInfo contextInfo) throws XPathException { - contextInfo.setParent(this); - operand.analyze(contextInfo); - + final AnalyzeContextInfo myContextInfo = new AnalyzeContextInfo(contextInfo); + myContextInfo.setParent(this); + + // Operand is a non-updating context + final AnalyzeContextInfo operandInfo = new AnalyzeContextInfo(myContextInfo); + operandInfo.addFlag(NON_UPDATING_CONTEXT); + operand.analyze(operandInfo); + final LocalVariable mark0 = context.markLocalVariables(false); - + try { for (final Case next : cases) { final LocalVariable mark1 = context.markLocalVariables(false); @@ -178,7 +203,8 @@ public void analyze(AnalyzeContextInfo contextInfo) throws XPathException { } context.declareVariableBinding(var); } - next.returnClause.analyze(contextInfo); + myContextInfo.setParent(this); + next.returnClause.analyze(myContextInfo); } finally { context.popLocalVariables(mark1); } @@ -187,10 +213,34 @@ public void analyze(AnalyzeContextInfo contextInfo) throws XPathException { final LocalVariable var = new LocalVariable(defaultClause.variable); context.declareVariableBinding(var); } - defaultClause.returnClause.analyze(contextInfo); + myContextInfo.setParent(this); + defaultClause.returnClause.analyze(myContextInfo); } finally { context.popLocalVariables(mark0); } + + // XUST0001: check branch compatibility + // All branches must be either all updating, all non-updating, or vacuous + boolean hasUpdating = false; + boolean hasNonUpdating = false; + for (final Case c : cases) { + if (c.returnClause.isUpdating()) { + hasUpdating = true; + } else if (!c.returnClause.isVacuous()) { + hasNonUpdating = true; + } + } + if (defaultClause != null) { + if (defaultClause.returnClause.isUpdating()) { + hasUpdating = true; + } else if (!defaultClause.returnClause.isVacuous()) { + hasNonUpdating = true; + } + } + if (hasUpdating && hasNonUpdating) { + throw new XPathException(this, ErrorCodes.XUST0001, + "typeswitch branches mix updating and non-updating expressions"); + } } @Override diff --git a/exist-core/src/main/java/org/exist/xquery/UntypedValueCheck.java b/exist-core/src/main/java/org/exist/xquery/UntypedValueCheck.java index 5e0775574f7..15362a1e7be 100644 --- a/exist-core/src/main/java/org/exist/xquery/UntypedValueCheck.java +++ b/exist-core/src/main/java/org/exist/xquery/UntypedValueCheck.java @@ -114,7 +114,10 @@ private Item convert(Item item) throws XPathException { if (Type.subTypeOf(item.getType(), requiredType)) { return item; } - if (item.getType() == Type.INTEGER && requiredType == Type.POSITIVE_INTEGER) { + // In XQuery 3.1, reject integer→positiveInteger conversion. + // In XQuery 4.0, relabeling allows this if the value is positive (§3.4.1 item 6). + if (item.getType() == Type.INTEGER && requiredType == Type.POSITIVE_INTEGER + && context.getXQueryVersion() < 40) { throw new XPathException(this, ErrorCodes.FORG0001, "cannot convert '" + Type.getTypeName(item.getType()) diff --git a/exist-core/src/main/java/org/exist/xquery/UserDefinedFunction.java b/exist-core/src/main/java/org/exist/xquery/UserDefinedFunction.java index a56db1a200b..e3872892656 100644 --- a/exist-core/src/main/java/org/exist/xquery/UserDefinedFunction.java +++ b/exist-core/src/main/java/org/exist/xquery/UserDefinedFunction.java @@ -24,8 +24,13 @@ import org.exist.dom.persistent.DocumentSet; import org.exist.dom.QName; import org.exist.xquery.util.ExpressionDumper; +import org.exist.xquery.value.FunctionParameterSequenceType; import org.exist.xquery.value.Item; import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.SequenceIterator; +import org.exist.xquery.value.SequenceType; +import org.exist.xquery.value.Type; +import org.exist.xquery.functions.map.AbstractMapType; import java.util.ArrayList; import java.util.List; @@ -44,6 +49,7 @@ public class UserDefinedFunction extends Function implements Cloneable { private FunctionCall call; private boolean hasBeenReset = false; private List closureVariables = null; + private boolean passContextToBody = false; public UserDefinedFunction(XQueryContext context, FunctionSignature signature) { super(context, signature); @@ -57,6 +63,17 @@ public void setFunctionBody(Expression body) { this.body = body.simplify(); } + /** + * Mark this UDF as a wrapper for an internal function (created by + * {@link FunctionFactory#wrap}). Wrapper functions pass the evaluation + * context through to their body so that context-dependent built-in + * functions (like fn:id, fn:idref, fn:string, etc.) can access the + * focus when called via function references. + */ + public void setPassContextToBody(boolean passContext) { + this.passContextToBody = passContext; + } + public void addVariable(final String varName) throws XPathException { try { final QName qname = QName.parse(context, varName, null); @@ -101,7 +118,22 @@ public void analyze(AnalyzeContextInfo contextInfo) throws XPathException { newContextInfo.setParent(this); if (!bodyAnalyzed) { if (body != null) { + if (!getSignature().isUpdating()) { + // Non-updating function body: updating expressions not allowed + newContextInfo.addFlag(NON_UPDATING_CONTEXT); + } else { + // Updating function body: updating expressions are allowed + newContextInfo.removeFlag(NON_UPDATING_CONTEXT); + } body.analyze(newContextInfo); + + // XUST0002: updating function body must be updating (or vacuous) + if (getSignature().isUpdating() && !body.isUpdating() + && !body.isVacuous()) { + throw new XPathException(this, ErrorCodes.XUST0002, + "body of updating function " + getName() + + " must be an updating expression or an empty sequence"); + } } bodyAnalyzed = true; } @@ -125,34 +157,80 @@ public Sequence eval(Sequence contextSequence, Item contextItem) throws XPathExc } Sequence result = null; try { - QName varName; - LocalVariable var; - int j = 0; - for (int i = 0; i < parameters.size(); i++, j++) { - varName = parameters.get(i); - var = new LocalVariable(varName); - var.setValue(currentArguments[j]); - if (contextDocs != null) { + final SequenceType[] argTypes = getSignature().getArgumentTypes(); + + // Evaluate all argument values first, BEFORE declaring any parameters. + // Default value expressions must be evaluated in the prolog's variable scope, + // not the function body scope (XQ4 spec: default sees variables in scope at + // the function declaration, not other parameters). Context is passed so that + // default values like "." can access the context item at the call site. + final Sequence[] argValues = new Sequence[parameters.size()]; + for (int i = 0; i < parameters.size(); i++) { + if (i < currentArguments.length) { + argValues[i] = currentArguments[i]; + } else if (argTypes[i] instanceof FunctionParameterSequenceType && + ((FunctionParameterSequenceType) argTypes[i]).hasDefaultValue()) { + argValues[i] = ((FunctionParameterSequenceType) argTypes[i]) + .getDefaultValue().eval(contextSequence, contextItem); + } else { + throw new XPathException(this, ErrorCodes.XPTY0004, + "Missing required argument $" + parameters.get(i)); + } + } + + // Now declare all parameters with their resolved values + for (int i = 0; i < parameters.size(); i++) { + final QName varName = parameters.get(i); + final LocalVariable var = new LocalVariable(varName); + + var.setValue(argValues[i]); + if (contextDocs != null && i < contextDocs.length) { var.setContextDocs(contextDocs[i]); } context.declareVariableBinding(var); Cardinality actualCardinality; - if (currentArguments[j].isEmpty()) { + if (argValues[i].isEmpty()) { actualCardinality = Cardinality.EMPTY_SEQUENCE; - } else if (currentArguments[j].hasMany()) { + } else if (argValues[i].hasMany()) { actualCardinality = Cardinality._MANY; } else { actualCardinality = Cardinality.EXACTLY_ONE; } - if (!getSignature().getArgumentTypes()[j].getCardinality().isSuperCardinalityOrEqualOf(actualCardinality)) { + if (!argTypes[i].getCardinality().isSuperCardinalityOrEqualOf(actualCardinality)) { throw new XPathException(this, ErrorCodes.XPTY0004, "Invalid cardinality for parameter $" + varName + - ". Expected " + getSignature().getArgumentTypes()[j].getCardinality().getHumanDescription() + - ", got " + currentArguments[j].getItemCount()); + ". Expected " + argTypes[i].getCardinality().getHumanDescription() + + ", got " + argValues[i].getItemCount()); } + + // XQuery 4.0: record type validation at runtime + final SequenceType argType = argTypes[i]; + if (argType.isRecordType() && argType.getRecordType() != null && !argValues[i].isEmpty()) { + for (final SequenceIterator iter = argValues[i].iterate(); iter.hasNext(); ) { + final Item item = iter.nextItem(); + if (Type.subTypeOf(item.getType(), Type.MAP_ITEM)) { + if (!argType.getRecordType().matches((AbstractMapType) item)) { + throw new XPathException(this, ErrorCodes.XPTY0004, + "Argument $" + varName + " does not match " + argType.getRecordType()); + } + } else { + throw new XPathException(this, ErrorCodes.XPTY0004, + "Argument $" + varName + " expected " + argType.getRecordType() + + " but got " + Type.getTypeName(item.getType())); + } + } + } + } + // For wrapper functions (created by FunctionFactory.wrap for internal + // function references), pass the context through so context-dependent + // built-in functions can access the focus. For regular user-declared + // functions, the focus is absent per the XQuery spec. + if (passContextToBody) { + result = body.eval(contextSequence, contextItem); + } else { + result = body.eval(null, null); } - result = body.eval(null, null); return result; } finally { // restore the local variable stack diff --git a/exist-core/src/main/java/org/exist/xquery/ValueComparison.java b/exist-core/src/main/java/org/exist/xquery/ValueComparison.java index 80c916a3f26..a870226b665 100644 --- a/exist-core/src/main/java/org/exist/xquery/ValueComparison.java +++ b/exist-core/src/main/java/org/exist/xquery/ValueComparison.java @@ -77,6 +77,10 @@ protected Sequence genericCompare(Sequence contextSequence, Item contextItem) th if (ls.hasOne() && rs.hasOne()) { final AtomicValue lv = ls.itemAt(0).atomize(); final AtomicValue rv = rs.itemAt(0).atomize(); + // Propagate expression context to atomized values so version-gated + // comparisons (e.g., xs:duration ordering) can check the XQuery version + if (lv.getExpression() == null) { lv.setExpression(this); } + if (rv.getExpression() == null) { rv.setExpression(this); } final Collator collator = getCollator(contextSequence); return BooleanValue.valueOf(compareAtomic(collator, lv, rv, StringTruncationOperator.NONE, relation)); } diff --git a/exist-core/src/main/java/org/exist/xquery/VariableDeclaration.java b/exist-core/src/main/java/org/exist/xquery/VariableDeclaration.java index f92f55ad378..ab839aa6af5 100644 --- a/exist-core/src/main/java/org/exist/xquery/VariableDeclaration.java +++ b/exist-core/src/main/java/org/exist/xquery/VariableDeclaration.java @@ -44,6 +44,7 @@ public class VariableDeclaration extends AbstractExpression implements Rewritabl Optional expression; SequenceType sequenceType = null; boolean analyzeDone = false; + private boolean isPrivate = false; public VariableDeclaration(final XQueryContext context, final QName qname, final Expression expr) { super(context); @@ -68,6 +69,14 @@ public SequenceType getSequenceType() { return sequenceType; } + public void setPrivate(final boolean isPrivate) { + this.isPrivate = isPrivate; + } + + public boolean isPrivate() { + return isPrivate; + } + @Override public void analyze(final AnalyzeContextInfo contextInfo) throws XPathException { contextInfo.setParent(this); @@ -120,7 +129,10 @@ public void analyze(final AnalyzeContextInfo contextInfo) throws XPathException */ public void analyzeExpression(final AnalyzeContextInfo contextInfo) throws XPathException { if (expression.isPresent()) { - expression.get().analyze(contextInfo); + // Variable initializers are non-updating contexts + final AnalyzeContextInfo exprInfo = new AnalyzeContextInfo(contextInfo); + exprInfo.addFlag(NON_UPDATING_CONTEXT); + expression.get().analyze(exprInfo); } } diff --git a/exist-core/src/main/java/org/exist/xquery/VariableImpl.java b/exist-core/src/main/java/org/exist/xquery/VariableImpl.java index c5a600c8ae2..ebd3c31698e 100644 --- a/exist-core/src/main/java/org/exist/xquery/VariableImpl.java +++ b/exist-core/src/main/java/org/exist/xquery/VariableImpl.java @@ -120,26 +120,26 @@ public void setSequenceType(SequenceType type) throws XPathException { else {actualCardinality = Cardinality.EXACTLY_ONE;} //Type.EMPTY is *not* a subtype of other types ; checking cardinality first if (!getSequenceType().getCardinality().isSuperCardinalityOrEqualOf(actualCardinality)) - {throw new XPathException(getValue(), "XPTY0004: Invalid cardinality for variable $" + getQName() + + {throw new XPathException(getValue(), ErrorCodes.XPTY0004, "Invalid cardinality for variable $" + getQName() + ". Expected " + getSequenceType().getCardinality().getHumanDescription() + ", got " + actualCardinality.getHumanDescription());} //TODO : ignore nodes right now ; they are returned as xs:untypedAtomicType if (!Type.subTypeOf(getSequenceType().getPrimaryType(), Type.NODE)) { if (!getValue().isEmpty() && !Type.subTypeOf(getValue().getItemType(), getSequenceType().getPrimaryType())) - {throw new XPathException(getValue(), "XPTY0004: Invalid type for variable $" + getQName() + + {throw new XPathException(getValue(), ErrorCodes.XPTY0004, "Invalid type for variable $" + getQName() + ". Expected " + Type.getTypeName(getSequenceType().getPrimaryType()) + ", got " +Type.getTypeName(getValue().getItemType()));} //Here is an attempt to process the nodes correctly } else { - //Same as above : we probably may factorize + //Same as above : we probably may factorize if (!getValue().isEmpty() && !Type.subTypeOf(getValue().getItemType(), getSequenceType().getPrimaryType())) - {throw new XPathException(getValue(), "XPTY0004: Invalid type for variable $" + getQName() + + {throw new XPathException(getValue(), ErrorCodes.XPTY0004, "Invalid type for variable $" + getQName() + ". Expected " + Type.getTypeName(getSequenceType().getPrimaryType()) + ", got " +Type.getTypeName(getValue().getItemType()));} - + } } diff --git a/exist-core/src/main/java/org/exist/xquery/VariableReference.java b/exist-core/src/main/java/org/exist/xquery/VariableReference.java index 41c87ba7a99..2beb0ccb292 100644 --- a/exist-core/src/main/java/org/exist/xquery/VariableReference.java +++ b/exist-core/src/main/java/org/exist/xquery/VariableReference.java @@ -64,7 +64,7 @@ public void analyze(final AnalyzeContextInfo contextInfo) throws XPathException "Variable '$" + qname + "' is not declared."); } if (!var.isInitialized()) { - throw new XPathException(this, ErrorCodes.XQST0054, + throw new XPathException(this, ErrorCodes.XQDY0054, "variable declaration of '$" + qname + "' cannot " + "be executed because of a circularity."); } diff --git a/exist-core/src/main/java/org/exist/xquery/WhereClause.java b/exist-core/src/main/java/org/exist/xquery/WhereClause.java index 47178b7045a..21913278d53 100644 --- a/exist-core/src/main/java/org/exist/xquery/WhereClause.java +++ b/exist-core/src/main/java/org/exist/xquery/WhereClause.java @@ -59,7 +59,7 @@ public Expression getWhereExpr() { public void analyze(AnalyzeContextInfo contextInfo) throws XPathException { contextInfo.setParent(this); AnalyzeContextInfo newContextInfo = new AnalyzeContextInfo(contextInfo); - newContextInfo.setFlags(contextInfo.getFlags() | IN_PREDICATE | IN_WHERE_CLAUSE); + newContextInfo.setFlags(contextInfo.getFlags() | IN_PREDICATE | IN_WHERE_CLAUSE | NON_UPDATING_CONTEXT); newContextInfo.setContextId(getExpressionId()); whereExpr.analyze(newContextInfo); diff --git a/exist-core/src/main/java/org/exist/xquery/WhileClause.java b/exist-core/src/main/java/org/exist/xquery/WhileClause.java new file mode 100644 index 00000000000..e2c9d7a41df --- /dev/null +++ b/exist-core/src/main/java/org/exist/xquery/WhileClause.java @@ -0,0 +1,136 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery; + +import org.exist.dom.QName; +import org.exist.xquery.util.ExpressionDumper; +import org.exist.xquery.value.Item; +import org.exist.xquery.value.Sequence; + +import java.util.HashSet; +import java.util.Set; + +/** + * Implements the XQuery 4.0 while clause in FLWOR expressions. + * + *

The while clause evaluates a condition for each tuple in the stream. + * If the condition is true, the tuple is retained; if false, the tuple + * and all subsequent tuples are discarded (iteration stops).

+ */ +public class WhileClause extends AbstractFLWORClause { + + /** + * Thread-local flag that signals all enclosing binding expressions + * in the same FLWOR to stop iteration after the current item. + */ + private static final ThreadLocal terminated = ThreadLocal.withInitial(() -> false); + + private final Expression whileExpr; + + /** + * Lightweight control-flow exception used to signal the immediately + * enclosing for/let binding expression to stop iteration. + */ + public static class WhileTerminationException extends XPathException { + public WhileTerminationException() { + super((Expression) null, "while clause terminated"); + } + } + + public static boolean isTerminated() { + return terminated.get(); + } + + public static void clearTerminated() { + terminated.set(false); + } + + public WhileClause(final XQueryContext context, final Expression whileExpr) { + super(context); + this.whileExpr = whileExpr; + } + + @Override + public ClauseType getType() { + return ClauseType.WHILE; + } + + public Expression getWhileExpr() { + return whileExpr; + } + + @Override + public void analyze(final AnalyzeContextInfo contextInfo) throws XPathException { + contextInfo.setParent(this); + final AnalyzeContextInfo newContextInfo = new AnalyzeContextInfo(contextInfo); + newContextInfo.setFlags(contextInfo.getFlags() | IN_PREDICATE | IN_WHERE_CLAUSE); + newContextInfo.setContextId(getExpressionId()); + whileExpr.analyze(newContextInfo); + + final AnalyzeContextInfo returnContextInfo = new AnalyzeContextInfo(contextInfo); + returnContextInfo.addFlag(SINGLE_STEP_EXECUTION); + returnExpr.analyze(returnContextInfo); + } + + @Override + public Sequence eval(final Sequence contextSequence, final Item contextItem) throws XPathException { + final Sequence condResult = whileExpr.eval(null, null); + if (condResult.effectiveBooleanValue()) { + return returnExpr.eval(null, null); + } + terminated.set(true); + throw new WhileTerminationException(); + } + + @Override + public Sequence postEval(final Sequence seq) throws XPathException { + if (returnExpr instanceof FLWORClause flworClause) { + return flworClause.postEval(seq); + } + return super.postEval(seq); + } + + @Override + public void dump(final ExpressionDumper dumper) { + dumper.display("while", whileExpr.getLine()); + dumper.startIndent(); + whileExpr.dump(dumper); + dumper.endIndent().nl(); + } + + @Override + public void resetState(final boolean postOptimization) { + super.resetState(postOptimization); + whileExpr.resetState(postOptimization); + returnExpr.resetState(postOptimization); + } + + @Override + public Set getTupleStreamVariables() { + final Set vars = new HashSet<>(); + final LocalVariable startVar = getStartVariable(); + if (startVar != null) { + vars.add(startVar.getQName()); + } + return vars; + } +} diff --git a/exist-core/src/main/java/org/exist/xquery/XQuery.java b/exist-core/src/main/java/org/exist/xquery/XQuery.java index 5eba728708b..de3a8bf139b 100644 --- a/exist-core/src/main/java/org/exist/xquery/XQuery.java +++ b/exist-core/src/main/java/org/exist/xquery/XQuery.java @@ -45,6 +45,7 @@ import org.exist.source.Source; import org.exist.source.StringSource; import org.exist.storage.DBBroker; +import org.exist.util.LockException; import org.exist.xquery.parser.XQueryLexer; import org.exist.xquery.parser.XQueryParser; import org.exist.xquery.parser.XQueryTreeParser; @@ -195,12 +196,28 @@ public CompiledXQuery compile(final XQueryContext context, final Source source, * @throws XPathException if an error occurs during compilation * @throws PermissionDeniedException if the caller is not permitted to compile the XQuery */ + /** + * System property to select the XQuery parser implementation. + * Set to "rd" to use the hand-written recursive descent parser. + * Default is "antlr2" (the ANTLR 2 generated parser). + */ + public static final String PROPERTY_PARSER = "exist.parser"; + + public static boolean useRdParser() { + return "rd".equalsIgnoreCase(System.getProperty(PROPERTY_PARSER, "antlr2")); + } + private CompiledXQuery compile(final XQueryContext context, final Reader reader, final boolean xpointer) throws XPathException, PermissionDeniedException { //check read permission if (context.getSource() instanceof DBSource) { ((DBSource) context.getSource()).validate(Permission.READ); } + + // Feature flag: use hand-written recursive descent parser if enabled + if (useRdParser() && !xpointer) { + return compileWithRdParser(context, reader); + } //TODO: move XQueryContext.getUserFromHttpSession() here, have to check if servlet.jar is in the classpath @@ -288,7 +305,7 @@ private CompiledXQuery compile(final XQueryContext context, final Reader reader, if (msg.endsWith(", found 'null'")) { msg = msg.substring(0, msg.length() - ", found 'null'".length()); } - throw new StaticXQueryException(e.getLine(), e.getColumn(), msg); + throw new StaticXQueryException(e.getLine(), e.getColumn(), ErrorCodes.XPST0003, msg); } catch(final TokenStreamException e) { final String es = e.toString(); if(es.matches("^line \\d+:\\d+:.+")) { @@ -298,7 +315,7 @@ private CompiledXQuery compile(final XQueryContext context, final Reader reader, final int line = Integer.parseInt(es.substring(5, es.indexOf(':'))); final String tmpColumn = es.substring(es.indexOf(':') + 1); final int column = Integer.parseInt(tmpColumn.substring(0, tmpColumn.indexOf(':'))); - throw new StaticXQueryException(line, column, e.getMessage(), e); + throw new StaticXQueryException(line, column, ErrorCodes.XPST0003, e.getMessage(), e); } else { if (LOG.isDebugEnabled()) { LOG.debug("Error compiling query: {}", e.getMessage(), e); @@ -316,6 +333,60 @@ private CompiledXQuery compile(final XQueryContext context, final Reader reader, * * @return true if this is a library module, false otherwise */ + private CompiledXQuery compileWithRdParser(final XQueryContext context, final Reader reader) + throws XPathException { + final long start = System.currentTimeMillis(); + try { + final String source = readFully(reader); + final org.exist.xquery.parser.next.XQueryParser rdParser = + new org.exist.xquery.parser.next.XQueryParser(context, source); + + final Expression rootExpr = rdParser.parse(); + + // Set root expression on context — required for resetState() during concurrent execution + context.setRootExpression(rootExpr); + context.getRootContext().resolveForwardReferences(); + + // For library modules, return LibraryModuleRoot so execute() can + // dispatch function calls by name (triggers, fn:load-xquery-module) + final PathExpr result; + if (rdParser.isLibraryModule()) { + result = new LibraryModuleRoot(context); + if (rootExpr instanceof PathExpr) { + for (int i = 0; i < ((PathExpr) rootExpr).getLength(); i++) { + result.add(((PathExpr) rootExpr).getExpression(i)); + } + } + } else if (rootExpr instanceof PathExpr) { + result = (PathExpr) rootExpr; + } else { + result = new PathExpr(context); + result.add(rootExpr); + } + + context.analyzeAndOptimizeIfModulesChanged(result); + + if (LOG.isDebugEnabled()) { + final NumberFormat nf = NumberFormat.getNumberInstance(); + LOG.debug("Recursive descent parser compilation took {} ms", nf.format(System.currentTimeMillis() - start)); + } + + return result; + } catch (final IOException e) { + throw new XPathException(context.getRootExpression(), "Error reading query source: " + e.getMessage(), e); + } + } + + private static String readFully(final Reader reader) throws IOException { + final StringBuilder sb = new StringBuilder(4096); + final char[] buf = new char[4096]; + int n; + while ((n = reader.read(buf)) != -1) { + sb.append(buf, 0, n); + } + return sb.toString(); + } + static boolean isLibraryModule(AST ast) { while (ast != null) { if (ast.getType() == XQueryTreeParser.MODULE_DECL) { @@ -381,7 +452,10 @@ public Sequence execute(final DBBroker broker, final CompiledXQuery expression, //do any preparation before execution context.prepareForExecution(); - + + // BaseX-style preclaiming: collect lock targets from compiled expression tree + context.collectLockTargets(context.getRootExpression()); + final Subject callingUser = broker.getCurrentSubject(); //if setUid or setGid, become Effective User @@ -412,6 +486,15 @@ public Sequence execute(final DBBroker broker, final CompiledXQuery expression, context.getProfiler().traceQueryStart(); broker.getBrokerPool().getProcessMonitor().queryStarted(context.getWatchDog()); + // Preclaim locks before evaluation if lock targets were collected + if (context.hasPreclaimTargets()) { + try { + context.preclaimLocks(); + } catch (final LockException e) { + throw new XPathException((Expression) null, ErrorCodes.ERROR, "Failed to preclaim locks: " + e.getMessage(), e); + } + } + FunctionCall call = null; try { @@ -451,6 +534,13 @@ public Sequence execute(final DBBroker broker, final CompiledXQuery expression, result = expression.eval(contextSequence, null); } + // W3C XQuery Update Facility 3.0: apply Pending Update List at snapshot boundary + final org.exist.xquery.xquf.PendingUpdateList pul = context.getPendingUpdateList(); + if (!pul.isEmpty()) { + pul.apply(context); + pul.clear(); + } + if(LOG.isDebugEnabled()) { final NumberFormat nf = NumberFormat.getNumberInstance(); LOG.debug("Execution took {} ms", nf.format(System.currentTimeMillis() - start)); @@ -462,6 +552,11 @@ public Sequence execute(final DBBroker broker, final CompiledXQuery expression, return result; } finally { + // Release preclaimed locks after PUL has been applied + if (context.hasPreclaimedLocks()) { + context.releasePreclaimedLocks(); + } + context.getProfiler().traceQueryEnd(context); // track query stats before context is reset broker.getBrokerPool().getProcessMonitor().queryCompleted(context.getWatchDog()); diff --git a/exist-core/src/main/java/org/exist/xquery/XQueryContext.java b/exist-core/src/main/java/org/exist/xquery/XQueryContext.java index 6e8105ec786..0b88b067b87 100644 --- a/exist-core/src/main/java/org/exist/xquery/XQueryContext.java +++ b/exist-core/src/main/java/org/exist/xquery/XQueryContext.java @@ -30,6 +30,8 @@ import java.net.URISyntaxException; import java.nio.charset.Charset; import java.nio.file.Path; + +import org.exist.xquery.ft.FTMatchOptions; import java.nio.file.Paths; import java.util.*; import java.util.concurrent.CopyOnWriteArrayList; @@ -93,6 +95,7 @@ import org.exist.xquery.pragmas.*; import org.exist.xquery.update.Modification; import org.exist.xquery.util.SerializerUtils; +import org.exist.xquery.xquf.PendingUpdateList; import org.exist.xquery.value.*; import org.jgrapht.Graph; import org.jgrapht.alg.interfaces.ShortestPathAlgorithm; @@ -238,6 +241,14 @@ public class XQueryContext implements BinaryValueManager, Context { private @Nullable Graph modulesDependencyGraph; private @Nullable ThreadPoolExecutor modulesDependencyGraphSPExecutor; + /** + * Per-context module location hints, mapping namespace URIs to file locations. + * These are checked by {@link #getModuleLocation(String)} before the global + * static module map in the Configuration. This allows external tools (like the + * XQTS test runner) to register module locations without eagerly loading them. + */ + private Map dynamicModuleLocations = null; + /** * Used to save current state when modules are imported dynamically */ @@ -287,6 +298,25 @@ public class XQueryContext implements BinaryValueManager, Context { */ protected MutableDocumentSet modifiedDocuments = null; + /** + * W3C XQuery Update Facility 3.0 Pending Update List. + * Accumulates update primitives during query evaluation and is applied + * at snapshot boundaries. + */ + private PendingUpdateList pendingUpdateList = new PendingUpdateList(); + + /** + * Tracks whether the current module uses the legacy eXist-db update syntax + * (update insert/delete/replace/rename/value). Set during tree walking. + */ + private boolean hasLegacyUpdate = false; + + /** + * Tracks whether the current module uses W3C XQuery Update Facility 3.0 syntax + * (insert node, delete node, replace node, etc.). Set during tree walking. + */ + private boolean hasXQUFUpdate = false; + /** * A general-purpose map to set attributes in the current query context. */ @@ -307,6 +337,18 @@ public class XQueryContext implements BinaryValueManager, Context { */ private String defaultCollation = Collations.UNICODE_CODEPOINT_COLLATION_URI; + /** + * XQFT 3.0: default full-text match options declared via "declare ft-option". + */ + private FTMatchOptions defaultFTMatchOptions; + + /** + * XQFT 3.0: thesaurus URI-to-file mapping. + * Maps thesaurus URIs (e.g., "http://bstore1.example.com/UsabilityThesaurus.xml") + * to local file paths. + */ + private final Map thesaurusRegistry = new HashMap<>(); + /** * The default language */ @@ -371,6 +413,12 @@ public class XQueryContext implements BinaryValueManager, Context { private LockedDocumentMap protectedDocuments = null; + // --- Preclaiming lock targets (BaseX-style two-phase locking) --- + private Set preclaimDocumentTargets; + private Set preclaimCollectionTargets; + private boolean preclaimRequiresGlobalLock = false; + private final List preclaimedLocks = new ArrayList<>(); + /** * The profiler instance used by this context. */ @@ -383,6 +431,11 @@ public class XQueryContext implements BinaryValueManager, Context { private boolean enableOptimizer = true; + /** CompileContext from the last optimize() pass. Exposed for diagnostics + * and tests. May be {@code null} if compilation has not yet happened or + * the optimizer was disabled. */ + private CompileContext lastCompileContext = null; + private boolean raiseErrorOnFailedRetrieval = XQUERY_RAISE_ERROR_ON_FAILED_RETRIEVAL_DEFAULT; private boolean isShared = false; @@ -420,6 +473,9 @@ public class XQueryContext implements BinaryValueManager, Context { * HTTP context. */ private @Nullable HttpContext httpContext = null; + /** + * Sentinel QName for the default (unnamed) decimal format per XQuery 3.1 §4.10. + */ private static final QName UNNAMED_DECIMAL_FORMAT = new QName("__UNNAMED__", Function.BUILTIN_FUNCTION_NS); private final Map staticDecimalFormats = hashMap(Tuple(UNNAMED_DECIMAL_FORMAT, DecimalFormat.UNNAMED)); @@ -969,6 +1025,18 @@ public String getInheritedNamespace(final String prefix) { return inheritedInScopeNamespaces == null ? null : inheritedInScopeNamespaces.get(prefix); } + public Map getAllInheritedNamespaces() { + return inheritedInScopeNamespaces; + } + + public Map getInScopeNamespaces() { + return inScopeNamespaces; + } + + public MemTreeBuilder getCurrentDocumentBuilder() { + return documentBuilder; + } + @Override public String getInheritedPrefix(final String uri) { return inheritedInScopePrefixes == null ? null : inheritedInScopePrefixes.get(uri); @@ -1049,6 +1117,13 @@ public String getDefaultElementNamespace() { @Override public void setDefaultElementNamespace(final String uri, @Nullable final String schema) throws XPathException { + // XQST0070: It is a static error if a namespace URI is bound to the predefined + // prefix xmlns, or if a namespace URI other than http://www.w3.org/XML/1998/namespace + // is bound to the prefix xml. + if (Namespaces.XMLNS_NS.equals(uri)) { + throw new XPathException(rootExpression, ErrorCodes.XQST0070, + "The namespace URI 'http://www.w3.org/2000/xmlns/' cannot be used as the default element namespace"); + } // eXist forces the empty element NS as default. if (!defaultElementNamespace.equals(AnyURIValue.EMPTY_URI)) { throw new XPathException(rootExpression, ErrorCodes.XQST0066, @@ -1090,6 +1165,22 @@ public String getDefaultCollation() { return defaultCollation; } + public void setDefaultFTMatchOptions(final FTMatchOptions opts) { + this.defaultFTMatchOptions = opts; + } + + public FTMatchOptions getDefaultFTMatchOptions() { + return defaultFTMatchOptions; + } + + public void registerThesaurus(final String uri, final Path file) { + thesaurusRegistry.put(uri, file); + } + + public Path resolveThesaurusURI(final String uri) { + return thesaurusRegistry.get(uri); + } + @Override public Collator getCollator(String uri) throws XPathException { return getCollator(uri, ErrorCodes.XQST0076); @@ -1300,6 +1391,31 @@ public DocumentSet getStaticDocs() { return textResourceSupplier.apply(getBroker(), getBroker().getCurrentTransaction(), uri, charset); } + /** + * Gets a text resource from the "Available text resources" of the + * dynamic context, matching by URI only. This is used when no encoding + * is specified, allowing the resource to be found regardless of what + * charset it was registered with. + * + * @param uri the URI of the resource to retrieve + * @return a reader to read the resource content from, or null if not found + * @throws XPathException in case of a dynamic error + */ + public @Nullable Reader getDynamicallyAvailableTextResourceByUri(final String uri) + throws XPathException { + if (dynamicTextResources == null) { + return null; + } + + for (final Map.Entry, QuadFunctionE> entry : dynamicTextResources.entrySet()) { + if (entry.getKey()._1.equals(uri)) { + final Charset registeredCharset = entry.getKey()._2; + return entry.getValue().apply(getBroker(), getBroker().getCurrentTransaction(), uri, registeredCharset); + } + } + return null; + } + /** * Gets a collection from the "Available collections" of the * dynamic context. @@ -1356,6 +1472,90 @@ public boolean lockDocumentsOnLoad() { return false; } + /** + * Collect lock targets from the compiled expression tree using + * a {@link org.exist.xquery.lock.LockTargetCollector}. + * + * @param root the compiled expression tree root + */ + public void collectLockTargets(final Expression root) { + if (root == null) { + return; + } + final org.exist.xquery.lock.LockTargetCollector collector = + new org.exist.xquery.lock.LockTargetCollector(); + collector.collect(root); + this.preclaimDocumentTargets = collector.getDocumentTargets(); + this.preclaimCollectionTargets = collector.getCollectionTargets(); + this.preclaimRequiresGlobalLock = collector.requiresGlobalLock(); + } + + /** + * Returns true if lock targets have been collected and preclaiming + * should be performed before evaluation. + */ + public boolean hasPreclaimTargets() { + return preclaimDocumentTargets != null && + (preclaimRequiresGlobalLock || + !preclaimDocumentTargets.isEmpty() || + !preclaimCollectionTargets.isEmpty()); + } + + /** + * Acquire preclaimed locks on all collected document and collection + * targets. If static analysis could not determine all targets, + * acquires a global collection write lock on /db as a safe fallback. + * + *

Locks are acquired in a consistent order (TreeSet natural ordering) + * to prevent deadlocks.

+ * + * @throws LockException if lock acquisition fails + */ + public void preclaimLocks() throws LockException { + if (preclaimDocumentTargets == null) { + return; + } + final org.exist.storage.lock.LockManager lockManager = + getBroker().getBrokerPool().getLockManager(); + + if (preclaimRequiresGlobalLock) { + // Fall back to global collection write lock on /db + preclaimedLocks.add(lockManager.acquireCollectionWriteLock(XmldbURI.ROOT_COLLECTION_URI)); + } else { + // Acquire collection write locks first (sorted order) + for (final XmldbURI collectionUri : preclaimCollectionTargets) { + preclaimedLocks.add(lockManager.acquireCollectionWriteLock(collectionUri)); + } + // Then acquire document write locks (sorted order) + for (final XmldbURI docUri : preclaimDocumentTargets) { + preclaimedLocks.add(lockManager.acquireDocumentWriteLock(docUri)); + } + } + } + + /** + * Release all preclaimed locks. Should be called in a finally block + * after query evaluation completes. + */ + public void releasePreclaimedLocks() { + // Release in reverse order of acquisition + for (int i = preclaimedLocks.size() - 1; i >= 0; i--) { + try { + preclaimedLocks.get(i).close(); + } catch (final Exception e) { + LOG.warn("Error releasing preclaimed lock", e); + } + } + preclaimedLocks.clear(); + } + + /** + * Returns true if preclaimed locks are currently held. + */ + public boolean hasPreclaimedLocks() { + return !preclaimedLocks.isEmpty(); + } + @Override public void setShared(final boolean shared) { isShared = shared; @@ -1374,6 +1574,59 @@ public void addModifiedDoc(final DocumentImpl document) { modifiedDocuments.add(document); } + /** + * Get the W3C XQuery Update Facility 3.0 Pending Update List for this context. + * + * @return the current pending update list + */ + public PendingUpdateList getPendingUpdateList() { + return pendingUpdateList; + } + + /** + * Set the Pending Update List. Used by copy-modify expressions to create + * a nested PUL scope. + * + * @param pul the new pending update list + */ + public void setPendingUpdateList(final PendingUpdateList pul) { + this.pendingUpdateList = pul; + } + + /** + * Mark that the current module uses the legacy eXist-db update syntax. + * Called during tree walking when a legacy update expression is encountered. + * + * @param ast the AST node for error reporting + * @throws XPathException if this module already uses W3C XQUF syntax + */ + public void markLegacyUpdate(final XQueryAST ast) throws XPathException { + if (hasXQUFUpdate) { + throw new XPathException(ast, ErrorCodes.XPST0003, + "Cannot mix legacy 'update' syntax with W3C XQuery Update Facility expressions " + + "in the same module. Migrate all updates to W3C syntax " + + "(insert node, delete node, replace node, replace value of node, rename node)."); + } + hasLegacyUpdate = true; + } + + /** + * Mark that the current module uses W3C XQuery Update Facility 3.0 syntax. + * Called during tree walking when a XQUF expression is encountered. + * + * @param ast the AST node for error reporting + * @throws XPathException if this module already uses legacy update syntax + */ + public void markXQUFUpdate(final XQueryAST ast) throws XPathException { + if (hasLegacyUpdate) { + throw new XPathException(ast, ErrorCodes.XPST0003, + "Cannot mix W3C XQuery Update Facility expressions with legacy 'update' syntax " + + "in the same module. Migrate all updates to W3C syntax " + + "(insert node, delete node, replace node, replace value of node, rename node)."); + } + hasXQUFUpdate = true; + } + @Override public void reset() { reset(false); @@ -1407,6 +1660,13 @@ public void reset(final boolean keepGlobals) { modifiedDocuments = null; } + // Reset the W3C XQuery Update Facility PUL + pendingUpdateList = new PendingUpdateList(); + + // Reset update syntax tracking flags + hasLegacyUpdate = false; + hasXQUFUpdate = false; + calendar = null; implicitTimeZone = null; @@ -1694,10 +1954,18 @@ public void analyzeAndOptimizeIfModulesChanged(final Expression expr) throws XPa expr.analyze(new AnalyzeContextInfo()); if (optimizationsEnabled()) { + // Per-expression optimize() pass: each Expression subclass may + // return a replacement (constant fold, branch select, etc.). Runs + // before the legacy visitor so replacements happen first; the + // visitor then operates on the reduced tree. + final CompileContext cc = new CompileContext(this); + expr.optimize(cc); + this.lastCompileContext = cc; + final Optimizer optimizer = new Optimizer(this); expr.accept(optimizer); - if (optimizer.hasOptimized()) { + if (optimizer.hasOptimized() || cc.hasOptimized()) { reset(true); expr.resetState(true); expr.analyze(new AnalyzeContextInfo()); @@ -1840,7 +2108,7 @@ public void declareFunction(final UserDefinedFunction function) throws XPathExce final QName name = function.getSignature().getName(); final String uri = name.getNamespaceURI(); - if (uri.isEmpty()) { + if (uri.isEmpty() && getXQueryVersion() < 40) { throw new XPathException(function, ErrorCodes.XQST0060, "Every declared function name must have a non-null namespace URI, " + "but function '" + name + "' does not meet this requirement."); @@ -1865,7 +2133,31 @@ public void declareFunction(final UserDefinedFunction function) throws XPathExce @Override public @Nullable UserDefinedFunction resolveFunction(final QName name, final int argCount) { final FunctionId id = new FunctionId(name, argCount); - return declaredFunctions.get(id); + final UserDefinedFunction exact = declaredFunctions.get(id); + if (exact != null) { + return exact; + } + // XQ4: Try to find a function with more params where trailing params have defaults + for (final UserDefinedFunction func : declaredFunctions.values()) { + if (func.getName().equals(name)) { + final SequenceType[] argTypes = func.getSignature().getArgumentTypes(); + if (argTypes.length > argCount) { + // Check that all params from argCount onwards have defaults + boolean allDefaulted = true; + for (int i = argCount; i < argTypes.length; i++) { + if (!(argTypes[i] instanceof FunctionParameterSequenceType) || + !((FunctionParameterSequenceType) argTypes[i]).hasDefaultValue()) { + allDefaulted = false; + break; + } + } + if (allDefaulted) { + return func; + } + } + } + } + return null; } @Override @@ -2001,6 +2293,14 @@ public Variable resolveVariable(@Nullable final AnalyzeContextInfo contextInfo, if (modules != null) { for (final Module module : modules) { + // Check %private visibility: if the variable is private and + // we're resolving from outside the module, skip it + if (module instanceof ExternalModuleImpl) { + final ExternalModuleImpl extModule = (ExternalModuleImpl) module; + if (extModule.isVariablePrivate(qname) && extModule.getContext() != this) { + continue; + } + } var = module.resolveVariable(contextInfo, qname); if (var != null) { break; @@ -2542,6 +2842,11 @@ public boolean tailRecursiveCall(final FunctionSignature signature) { @Override public @Nullable Module[] importModule(@Nullable String namespaceURI, @Nullable String prefix, @Nullable AnyURIValue[] locationHints) throws XPathException { + // Normalize whitespace in namespace URI per XQuery spec section 4.12 + if (namespaceURI != null) { + namespaceURI = namespaceURI.trim(); + } + if (XML_NS_PREFIX.equals(prefix) || XMLNS_ATTRIBUTE.equals(prefix)) { throw new XPathException(rootExpression, ErrorCodes.XQST0070, "The prefix declared for a module import must not be 'xml' or 'xmlns'."); @@ -2702,9 +3007,31 @@ protected XPathException moduleLoadException(final String message, final String return new XPathException(rootExpression, ErrorCodes.XQST0059, message, new ValueSequence(new StringValue(moduleLocation)), e); } + /** + * Registers a module location hint for the given namespace URI without + * eagerly loading the module. The location will be used when the query + * compiler encounters an {@code import module} statement for this namespace. + * + * @param namespaceURI the module namespace URI + * @param location the location (file URI or path) where the module can be found + */ + public void addModuleLocationHint(final String namespaceURI, final String location) { + if (dynamicModuleLocations == null) { + dynamicModuleLocations = new HashMap<>(); + } + dynamicModuleLocations.put(namespaceURI, location); + } + @SuppressWarnings("unchecked") @Override public String getModuleLocation(final String namespaceURI) { + // Check per-context dynamic locations first + if (dynamicModuleLocations != null) { + final String location = dynamicModuleLocations.get(namespaceURI); + if (location != null) { + return location; + } + } final Map moduleMap = (Map) getConfiguration().getProperty(PROPERTY_STATIC_MODULE_MAP); return moduleMap.get(namespaceURI); @@ -2750,6 +3077,13 @@ private ExternalModule compileOrBorrowModule(final String namespaceURI, final St * @return The compiled module, or null if the source is not a module * @throws XPathException if the module could not be loaded (XQST0059) or compiled (XPST0003) */ + /** + * Compile a module from a Source. Public wrapper for fn:load-xquery-module content option. + */ + public @Nullable ExternalModule compileModuleFromSource(final String namespaceURI, final Source source) throws XPathException { + return compileModule(namespaceURI, null, "content", source); + } + private @Nullable ExternalModule compileModule(String namespaceURI, final String prefix, final String location, final Source source) throws XPathException { if (LOG.isDebugEnabled()) { @@ -2778,6 +3112,82 @@ private ExternalModule compileOrBorrowModule(final String namespaceURI, final St final XQueryContext modContext = new ModuleContext(this, namespaceURI, prefix, location); modExternal.setContext(modContext); + // rd parser compileModule routing: GeneralComparison PathExpr unwrapping + // bug is fixed. Remaining blocker: rd parser fails on inline functions + // inside parenthesized sequences — e.g., (function ($a) {1}, ...) in + // bang.xql line 258. The parser doesn't recognize `function` as starting + // an inline function in this context. This is a general rd parser bug, + // not compileModule-specific. Re-enable once inline function parsing is fixed. + if (false && XQuery.useRdParser()) { + try { + final StringBuilder sb = new StringBuilder(4096); + final char[] buf = new char[4096]; + int n; + while ((n = reader.read(buf)) != -1) sb.append(buf, 0, n); + final String sourceText = sb.toString(); + if (LOG.isTraceEnabled()) { + LOG.trace("compileModule rd-parser: source length={}, namespace={}, first200={}", + sourceText.length(), namespaceURI, + sourceText.substring(0, Math.min(200, sourceText.length())).replace("\n", "\\n")); + } + final org.exist.xquery.parser.next.XQueryParser rdParser = + new org.exist.xquery.parser.next.XQueryParser(modContext, sourceText); + final Expression parsedExpr = rdParser.parse(); + // Wrap in LibraryModuleRoot for function dispatch + final Expression rootExpr; + if (rdParser.isLibraryModule()) { + final LibraryModuleRoot libRoot = new LibraryModuleRoot(modContext); + if (parsedExpr instanceof PathExpr) { + for (int ii = 0; ii < ((PathExpr) parsedExpr).getLength(); ii++) { + libRoot.add(((PathExpr) parsedExpr).getExpression(ii)); + } + } + rootExpr = libRoot; + } else { + rootExpr = parsedExpr; + } + modContext.setRootExpression(rootExpr); + modContext.resolveForwardReferences(); + + for (final java.util.Iterator it = modContext.localFunctions(); it.hasNext(); ) { + modExternal.declareFunction(it.next()); + } + // Register module-level variables from the parsed expression tree. + // The rd parser adds VariableDeclaration expressions to rootExpr, + // which need to be registered on the module (like ANTLR 2's + // myModule.declareVariable(qn, decl) during tree walking). + if (parsedExpr instanceof PathExpr) { + final PathExpr rootPath = (PathExpr) parsedExpr; + for (int vi = 0; vi < rootPath.getLength(); vi++) { + final Expression step = rootPath.getExpression(vi); + if (step instanceof VariableDeclaration) { + final VariableDeclaration decl = (VariableDeclaration) step; + modExternal.declareVariable(decl.getName(), decl); + } + } + } + // Also register any variables already in the context + for (final Variable var : modContext.getVariables().values()) { + if (var.getQName().getNamespaceURI().equals(namespaceURI)) { + modExternal.declareVariable(var); + } + } + modExternal.setRootExpression(rootExpr); + + if (namespaceURI != null && !modExternal.getNamespaceURI().equals(namespaceURI)) { + throw new XPathException(rootExpression, ErrorCodes.XQST0059, + "namespace URI declared by module (" + modExternal.getNamespaceURI() + + ") does not match namespace URI in import statement, which was: " + namespaceURI); + } + modExternal.setSource(source); + modContext.setSource(source); + modExternal.setIsReady(true); + return modExternal; + } catch (final XPathException e) { + e.prependMessage("Error while loading module " + location + ": "); + throw e; + } + } final XQueryLexer lexer = new XQueryLexer(modContext, reader); final XQueryParser parser = new XQueryParser(lexer); final XQueryTreeParser astParser = new XQueryTreeParser(modContext, modExternal); @@ -2807,12 +3217,6 @@ private ExternalModule compileOrBorrowModule(final String namespaceURI, final St throw new XPathException(rootExpression, ErrorCodes.XQST0059, "namespace URI declared by module (" + modExternal.getNamespaceURI() + ") does not match namespace URI in import statement, which was: " + namespaceURI); } - // Set source information on module context -// String sourceClassName = source.getClass().getName(); -// modContext.setSourceKey(source.getKey().toString()); - // Extract the source type from the classname by removing the package prefix and the "Source" suffix -// modContext.setSourceType( sourceClassName.substring( 17, sourceClassName.length() - 6 ) ); - modExternal.setSource(source); modContext.setSource(source); modExternal.setIsReady(true); @@ -2855,7 +3259,45 @@ public void resolveForwardReferences() throws XPathException { final UserDefinedFunction func = call.getContext().resolveFunction(call.getQName(), call.getArgumentCount()); if (func == null) { - throw new XPathException(call, ErrorCodes.XPST0017, "Call to undeclared function: " + call.getQName().getStringValue()); + // Check if function exists at other arities to give a better error message + final QName qname = call.getQName(); + final int argCount = call.getArgumentCount(); + final XQueryContext callContext = call.getContext(); + + // Check local declared functions + final Iterator localSigs = callContext.getSignaturesForFunction(qname); + + // Also check external modules + final List allSignatures = new ArrayList<>(); + while (localSigs.hasNext()) { + allSignatures.add(localSigs.next()); + } + + final Module[] modules = callContext.getModules(qname.getNamespaceURI()); + if (modules != null) { + for (final Module module : modules) { + if (module != null) { + final Iterator modSigs = module.getSignaturesForFunction(qname); + while (modSigs.hasNext()) { + allSignatures.add(modSigs.next()); + } + } + } + } + + if (!allSignatures.isEmpty()) { + final StringBuilder msg = new StringBuilder(); + msg.append("Unexpectedly received ").append(argCount) + .append(" parameter(s) in call to function '") + .append(qname.getStringValue()).append("()'. "); + msg.append("Defined function signatures are:\r\n"); + for (final FunctionSignature sig : allSignatures) { + msg.append(sig.toString()).append("\r\n"); + } + throw new XPathException(call, ErrorCodes.XPST0017, msg.toString()); + } + + throw new XPathException(call, ErrorCodes.XPST0017, "Call to undeclared function: " + qname.getStringValue()); } call.resolveForwardReference(func); } @@ -2922,6 +3364,10 @@ public void setStaticDecimalFormat(final QName qnDecimalFormat, final DecimalFor staticDecimalFormats.put(qnDecimalFormat, decimalFormat); } + public void setDefaultStaticDecimalFormat(final DecimalFormat decimalFormat) { + staticDecimalFormats.put(UNNAMED_DECIMAL_FORMAT, decimalFormat); + } + public Map getCachedUriCollectionResults() { return cachedUriCollectionResults; } @@ -3005,6 +3451,16 @@ public boolean optimizationsEnabled() { return enableOptimizer; } + /** + * Returns the {@link CompileContext} from the most recent optimize() pass, + * or {@code null} if the optimizer hasn't run yet or was disabled. Exposed + * for diagnostics and tests; will be replaced by a proper {@code util:explain} + * function in a follow-up. + */ + public CompileContext getLastCompileContext() { + return lastCompileContext; + } + @Override public void addOption(final String name, final String value) throws XPathException { if (staticOptions == null) { @@ -3276,9 +3732,16 @@ protected void clearUpdateListeners() { @Override public void checkOptions(final Properties properties) throws XPathException { checkLegacyOptions(properties); + + // Phase 1: Process parameter-document first (provides base settings) + processParameterDocument(dynamicOptions, properties); + processParameterDocument(staticOptions, properties); + + // Phase 2: Process inline options (override parameter-document settings) if (dynamicOptions != null) { for (final Option option : dynamicOptions) { - if (Namespaces.XSLT_XQUERY_SERIALIZATION_NS.equals(option.getQName().getNamespaceURI())) { + if (Namespaces.XSLT_XQUERY_SERIALIZATION_NS.equals(option.getQName().getNamespaceURI()) + && !"parameter-document".equals(option.getQName().getLocalPart())) { SerializerUtils.setProperty(option.getQName().getLocalPart(), option.getContents(), properties, inScopeNamespaces::get); } @@ -3288,6 +3751,7 @@ public void checkOptions(final Properties properties) throws XPathException { if (staticOptions != null) { for (final Option option : staticOptions) { if (Namespaces.XSLT_XQUERY_SERIALIZATION_NS.equals(option.getQName().getNamespaceURI()) + && !"parameter-document".equals(option.getQName().getLocalPart()) && !properties.containsKey(option.getQName().getLocalPart())) { SerializerUtils.setProperty(option.getQName().getLocalPart(), option.getContents(), properties, inScopeNamespaces::get); @@ -3296,6 +3760,55 @@ public void checkOptions(final Properties properties) throws XPathException { } } + /** + * Process the parameter-document serialization option if present. + * Loads the referenced XML file and extracts serialization parameters. + */ + private void processParameterDocument(final java.util.List