From 9cf2f79db7529ede70618c3f941b54db60fe849d Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 29 Mar 2026 20:35:30 +0000 Subject: [PATCH] Bump github.com/antchfx/xpath from 1.2.4 to 1.3.6 Bumps [github.com/antchfx/xpath](https://github.com/antchfx/xpath) from 1.2.4 to 1.3.6. - [Release notes](https://github.com/antchfx/xpath/releases) - [Commits](https://github.com/antchfx/xpath/compare/v1.2.4...v1.3.6) --- updated-dependencies: - dependency-name: github.com/antchfx/xpath dependency-version: 1.3.6 dependency-type: indirect ... Signed-off-by: dependabot[bot] --- go.mod | 2 +- go.sum | 3 +- vendor/github.com/antchfx/xpath/.travis.yml | 12 - vendor/github.com/antchfx/xpath/README.md | 141 ++--- vendor/github.com/antchfx/xpath/build.go | 492 +++++++++++------ vendor/github.com/antchfx/xpath/func.go | 368 +++++++----- vendor/github.com/antchfx/xpath/operator.go | 86 +-- vendor/github.com/antchfx/xpath/parse.go | 100 ++-- vendor/github.com/antchfx/xpath/query.go | 584 ++++++++++++++++++-- vendor/github.com/antchfx/xpath/xpath.go | 12 +- vendor/modules.txt | 2 +- 11 files changed, 1257 insertions(+), 545 deletions(-) delete mode 100644 vendor/github.com/antchfx/xpath/.travis.yml diff --git a/go.mod b/go.mod index ebb7419c59..6c16c2d0fa 100644 --- a/go.mod +++ b/go.mod @@ -137,7 +137,7 @@ require ( github.com/andybalholm/cascadia v1.3.2 // indirect github.com/antchfx/htmlquery v1.3.0 // indirect github.com/antchfx/xmlquery v1.3.17 // indirect - github.com/antchfx/xpath v1.2.4 // indirect + github.com/antchfx/xpath v1.3.6 // indirect github.com/containerd/containerd v1.7.11 // indirect github.com/davecgh/go-spew v1.1.1 // indirect github.com/dsnet/compress v0.0.1 // indirect diff --git a/go.sum b/go.sum index 83c60ce9b0..3efb6603d2 100644 --- a/go.sum +++ b/go.sum @@ -32,8 +32,9 @@ github.com/antchfx/xmlquery v1.3.17/go.mod h1:Afkq4JIeXut75taLSuI31ISJ/zeq+3jG7T github.com/antchfx/xpath v1.1.6/go.mod h1:Yee4kTMuNiPYJ7nSNorELQMr1J33uOpXDMByNYhvtNk= github.com/antchfx/xpath v1.1.8/go.mod h1:Yee4kTMuNiPYJ7nSNorELQMr1J33uOpXDMByNYhvtNk= github.com/antchfx/xpath v1.2.3/go.mod h1:i54GszH55fYfBmoZXapTHN8T8tkcHfRgLyVwwqzXNcs= -github.com/antchfx/xpath v1.2.4 h1:dW1HB/JxKvGtJ9WyVGJ0sIoEcqftV3SqIstujI+B9XY= github.com/antchfx/xpath v1.2.4/go.mod h1:i54GszH55fYfBmoZXapTHN8T8tkcHfRgLyVwwqzXNcs= +github.com/antchfx/xpath v1.3.6 h1:s0y+ElRRtTQdfHP609qFu0+c6bglDv20pqOViQjjdPI= +github.com/antchfx/xpath v1.3.6/go.mod h1:i54GszH55fYfBmoZXapTHN8T8tkcHfRgLyVwwqzXNcs= github.com/armon/go-radix v1.0.0 h1:F4z6KzEeeQIMeLFa97iZU6vupzoecKdU5TX24SNppXI= github.com/armon/go-radix v1.0.0/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8= github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio= diff --git a/vendor/github.com/antchfx/xpath/.travis.yml b/vendor/github.com/antchfx/xpath/.travis.yml deleted file mode 100644 index 6b63957a8c..0000000000 --- a/vendor/github.com/antchfx/xpath/.travis.yml +++ /dev/null @@ -1,12 +0,0 @@ -language: go - -go: - - 1.6 - - 1.9 - - '1.10' - -install: - - go get github.com/mattn/goveralls - -script: - - $HOME/gopath/bin/goveralls -service=travis-ci \ No newline at end of file diff --git a/vendor/github.com/antchfx/xpath/README.md b/vendor/github.com/antchfx/xpath/README.md index 3435fa9152..733c4c8727 100644 --- a/vendor/github.com/antchfx/xpath/README.md +++ b/vendor/github.com/antchfx/xpath/README.md @@ -1,14 +1,13 @@ -XPath -==== +# XPath + [![GoDoc](https://godoc.org/github.com/antchfx/xpath?status.svg)](https://godoc.org/github.com/antchfx/xpath) [![Coverage Status](https://coveralls.io/repos/github/antchfx/xpath/badge.svg?branch=master)](https://coveralls.io/github/antchfx/xpath?branch=master) -[![Build Status](https://travis-ci.org/antchfx/xpath.svg?branch=master)](https://travis-ci.org/antchfx/xpath) +[![Build Status](https://github.com/antchfx/xpath/actions/workflows/testing.yml/badge.svg)](https://github.com/antchfx/xpath/actions/workflows/testing.yml) [![Go Report Card](https://goreportcard.com/badge/github.com/antchfx/xpath)](https://goreportcard.com/report/github.com/antchfx/xpath) XPath is Go package provides selecting nodes from XML, HTML or other documents using XPath expression. -Implementation -=== +# Implementation - [htmlquery](https://github.com/antchfx/htmlquery) - an XPath query package for HTML document @@ -16,8 +15,7 @@ Implementation - [jsonquery](https://github.com/antchfx/jsonquery) - an XPath query package for JSON document -Supported Features -=== +# Supported Features #### The basic XPath patterns. @@ -49,7 +47,7 @@ Supported Features - `a/b` : For each node matching a, add the nodes matching b to the result. -- `a//b` : For each node matching a, add the descendant nodes matching b to the result. +- `a//b` : For each node matching a, add the descendant nodes matching b to the result. - `//b` : Returns elements in the entire document matching b. @@ -59,23 +57,26 @@ Supported Features - `(a/b)` : Selects all matches nodes as grouping set. -#### Node Axes +#### Node Axes - `child::*` : The child axis selects children of the current node. + - `child::node()`: Selects all the children of the context node. + - `child::text()`: Selects all text node children of the context node. + - `descendant::*` : The descendant axis selects descendants of the current node. It is equivalent to '//'. - `descendant-or-self::*` : Selects descendants including the current node. -- `attribute::*` : Selects attributes of the current element. It is equivalent to @* +- `attribute::*` : Selects attributes of the current element. It is equivalent to @\* - `following-sibling::*` : Selects nodes after the current node. - `preceding-sibling::*` : Selects nodes before the current node. -- `following::*` : Selects the first matching node following in document order, excluding descendants. +- `following::*` : Selects the first matching node following in document order, excluding descendants. -- `preceding::*` : Selects the first matching node preceding in document order, excluding ancestors. +- `preceding::*` : Selects the first matching node preceding in document order, excluding ancestors. - `parent::*` : Selects the parent if it matches. The '..' pattern from the core is equivalent to 'parent::node()'. @@ -87,27 +88,27 @@ Supported Features #### Expressions - The gxpath supported three types: number, boolean, string. +The gxpath supported three types: number, boolean, string. - `path` : Selects nodes based on the path. - `a = b` : Standard comparisons. - * a = b True if a equals b. - * a != b True if a is not equal to b. - * a < b True if a is less than b. - * a <= b True if a is less than or equal to b. - * a > b True if a is greater than b. - * a >= b True if a is greater than or equal to b. + - `a = b` : True if a equals b. + - `a != b` : True if a is not equal to b. + - `a < b` : True if a is less than b. + - `a <= b` : True if a is less than or equal to b. + - `a > b` : True if a is greater than b. + - `a >= b` : True if a is greater than or equal to b. - `a + b` : Arithmetic expressions. - * `- a` Unary minus - * a + b Add - * a - b Substract - * a * b Multiply - * a div b Divide - * a mod b Floating point mod, like Java. + - `- a` Unary minus + - `a + b` : Addition + - `a - b` : Subtraction + - `a * b` : Multiplication + - `a div b` : Division + - `a mod b` : Modulus (division remainder) - `a or b` : Boolean `or` operation. @@ -117,46 +118,50 @@ Supported Features - `fun(arg1, ..., argn)` : Function calls: -| Function | Supported | -| --- | --- | -`boolean()`| ✓ | -`ceiling()`| ✓ | -`choose()`| ✗ | -`concat()`| ✓ | -`contains()`| ✓ | -`count()`| ✓ | -`current()`| ✗ | -`document()`| ✗ | -`element-available()`| ✗ | -`ends-with()`| ✓ | -`false()`| ✓ | -`floor()`| ✓ | -`format-number()`| ✗ | -`function-available()`| ✗ | -`generate-id()`| ✗ | -`id()`| ✗ | -`key()`| ✗ | -`lang()`| ✗ | -`last()`| ✓ | -`local-name()`| ✓ | -`matches()`| ✓ | -`name()`| ✓ | -`namespace-uri()`| ✓ | -`normalize-space()`| ✓ | -`not()`| ✓ | -`number()`| ✓ | -`position()`| ✓ | -`replace()`| ✓ | -`reverse()`| ✓ | -`round()`| ✓ | -`starts-with()`| ✓ | -`string()`| ✓ | -`string-length()`| ✓ | -`substring()`| ✓ | -`substring-after()`| ✓ | -`substring-before()`| ✓ | -`sum()`| ✓ | -`system-property()`| ✗ | -`translate()`| ✓ | -`true()`| ✓ | -`unparsed-entity-url()` | ✗ | \ No newline at end of file +| Function | Supported | +| ----------------------- | --------- | +| `boolean()` | ✓ | +| `ceiling()` | ✓ | +| `choose()` | ✗ | +| `concat()` | ✓ | +| `contains()` | ✓ | +| `count()` | ✓ | +| `current()` | ✗ | +| `document()` | ✗ | +| `element-available()` | ✗ | +| `ends-with()` | ✓ | +| `false()` | ✓ | +| `floor()` | ✓ | +| `format-number()` | ✗ | +| `function-available()` | ✗ | +| `generate-id()` | ✗ | +| `id()` | ✗ | +| `key()` | ✗ | +| `lang()` | ✗ | +| `last()` | ✓ | +| `local-name()` | ✓ | +| `lower-case()`[^1] | ✓ | +| `matches()` | ✓ | +| `name()` | ✓ | +| `namespace-uri()` | ✓ | +| `normalize-space()` | ✓ | +| `not()` | ✓ | +| `number()` | ✓ | +| `position()` | ✓ | +| `replace()` | ✓ | +| `reverse()` | ✓ | +| `round()` | ✓ | +| `starts-with()` | ✓ | +| `string()` | ✓ | +| `string-join()`[^1] | ✓ | +| `string-length()` | ✓ | +| `substring()` | ✓ | +| `substring-after()` | ✓ | +| `substring-before()` | ✓ | +| `sum()` | ✓ | +| `system-property()` | ✗ | +| `translate()` | ✓ | +| `true()` | ✓ | +| `unparsed-entity-url()` | ✗ | + +[^1]: XPath-2.0 expression diff --git a/vendor/github.com/antchfx/xpath/build.go b/vendor/github.com/antchfx/xpath/build.go index 4129a21876..7172608cd6 100644 --- a/vendor/github.com/antchfx/xpath/build.go +++ b/vendor/github.com/antchfx/xpath/build.go @@ -7,42 +7,47 @@ import ( type flag int -const ( - noneFlag flag = iota - filterFlag -) +var flagsEnum = struct { + None flag + SmartDesc flag + PosFilter flag + Filter flag + Condition flag +}{ + None: 0, + SmartDesc: 1, + PosFilter: 2, + Filter: 4, + Condition: 8, +} + +type builderProp int + +var builderProps = struct { + None builderProp + PosFilter builderProp + HasPosition builderProp + HasLast builderProp + NonFlat builderProp +}{ + None: 0, + PosFilter: 1, + HasPosition: 2, + HasLast: 4, + NonFlat: 8, +} // builder provides building an XPath expressions. type builder struct { - depth int - flag flag + parseDepth int firstInput query } // axisPredicate creates a predicate to predicating for this axis node. func axisPredicate(root *axisNode) func(NodeNavigator) bool { - // get current axix node type. - typ := ElementNode - switch root.AxeType { - case "attribute": - typ = AttributeNode - case "self", "parent": - typ = allNode - default: - switch root.Prop { - case "comment": - typ = CommentNode - case "text": - typ = TextNode - // case "processing-instruction": - // typ = ProcessingInstructionNode - case "node": - typ = allNode - } - } nametest := root.LocalName != "" || root.Prefix != "" predicate := func(n NodeNavigator) bool { - if typ == n.NodeType() || typ == allNode { + if root.typeTest == n.NodeType() || root.typeTest == allNode { if nametest { type namespaceURL interface { NamespaceURL() string @@ -63,83 +68,88 @@ func axisPredicate(root *axisNode) func(NodeNavigator) bool { return predicate } -// processAxisNode processes a query for the XPath axis node. -func (b *builder) processAxisNode(root *axisNode) (query, error) { +// processAxis processes a query for the XPath axis node. +func (b *builder) processAxis(root *axisNode, flags flag, props *builderProp) (query, error) { var ( - err error - qyInput query - qyOutput query - predicate = axisPredicate(root) + err error + qyInput query + qyOutput query ) + b.firstInput = nil + predicate := axisPredicate(root) if root.Input == nil { qyInput = &contextQuery{} + *props = builderProps.None } else { - if root.AxeType == "child" && (root.Input.Type() == nodeAxis) { - if input := root.Input.(*axisNode); input.AxeType == "descendant-or-self" { - var qyGrandInput query - if input.Input != nil { - qyGrandInput, _ = b.processNode(input.Input) - } else { - qyGrandInput = &contextQuery{} - } - // fix #20: https://github.com/antchfx/htmlquery/issues/20 - filter := func(n NodeNavigator) bool { - v := predicate(n) - switch root.Prop { - case "text": - v = v && n.NodeType() == TextNode - case "comment": - v = v && n.NodeType() == CommentNode + inputFlags := flagsEnum.None + if (flags & flagsEnum.Filter) == 0 { + if root.AxisType == "child" && (root.Input.Type() == nodeAxis) { + if input := root.Input.(*axisNode); input.AxisType == "descendant-or-self" { + var qyGrandInput query + if input.Input != nil { + qyGrandInput, err = b.processNode(input.Input, flagsEnum.SmartDesc, props) + if err != nil { + return nil, err + } + } else { + qyGrandInput = &contextQuery{} } - return v + qyOutput = &descendantQuery{name: root.LocalName, Input: qyGrandInput, Predicate: predicate, Self: false} + *props |= builderProps.NonFlat + return qyOutput, nil } - // fix `//*[contains(@id,"food")]//*[contains(@id,"food")]`, see https://github.com/antchfx/htmlquery/issues/52 - // Skip the current node(Self:false) for the next descendants nodes. - _, ok := qyGrandInput.(*contextQuery) - qyOutput = &descendantQuery{Input: qyGrandInput, Predicate: filter, Self: ok} - return qyOutput, nil + } + if root.AxisType == "descendant" || root.AxisType == "descendant-or-self" { + inputFlags |= flagsEnum.SmartDesc } } - qyInput, err = b.processNode(root.Input) + + qyInput, err = b.processNode(root.Input, inputFlags, props) if err != nil { return nil, err } } - switch root.AxeType { + switch root.AxisType { case "ancestor": - qyOutput = &ancestorQuery{Input: qyInput, Predicate: predicate} + qyOutput = &ancestorQuery{name: root.LocalName, Input: qyInput, Predicate: predicate} + *props |= builderProps.NonFlat case "ancestor-or-self": - qyOutput = &ancestorQuery{Input: qyInput, Predicate: predicate, Self: true} + qyOutput = &ancestorQuery{name: root.LocalName, Input: qyInput, Predicate: predicate, Self: true} + *props |= builderProps.NonFlat case "attribute": - qyOutput = &attributeQuery{Input: qyInput, Predicate: predicate} + qyOutput = &attributeQuery{name: root.LocalName, Input: qyInput, Predicate: predicate} case "child": - filter := func(n NodeNavigator) bool { - v := predicate(n) - switch root.Prop { - case "text": - v = v && n.NodeType() == TextNode - case "node": - v = v && (n.NodeType() == ElementNode || n.NodeType() == TextNode) - case "comment": - v = v && n.NodeType() == CommentNode - } - return v + if (*props & builderProps.NonFlat) == 0 { + qyOutput = &childQuery{name: root.LocalName, Input: qyInput, Predicate: predicate} + } else { + qyOutput = &cachedChildQuery{name: root.LocalName, Input: qyInput, Predicate: predicate} } - qyOutput = &childQuery{Input: qyInput, Predicate: filter} case "descendant": - qyOutput = &descendantQuery{Input: qyInput, Predicate: predicate} + if (flags & flagsEnum.SmartDesc) != flagsEnum.None { + qyOutput = &descendantOverDescendantQuery{name: root.LocalName, Input: qyInput, MatchSelf: false, Predicate: predicate} + } else { + qyOutput = &descendantQuery{name: root.LocalName, Input: qyInput, Predicate: predicate} + } + *props |= builderProps.NonFlat case "descendant-or-self": - qyOutput = &descendantQuery{Input: qyInput, Predicate: predicate, Self: true} + if (flags & flagsEnum.SmartDesc) != flagsEnum.None { + qyOutput = &descendantOverDescendantQuery{name: root.LocalName, Input: qyInput, MatchSelf: true, Predicate: predicate} + } else { + qyOutput = &descendantQuery{name: root.LocalName, Input: qyInput, Predicate: predicate, Self: true} + } + *props |= builderProps.NonFlat case "following": qyOutput = &followingQuery{Input: qyInput, Predicate: predicate} + *props |= builderProps.NonFlat case "following-sibling": qyOutput = &followingQuery{Input: qyInput, Predicate: predicate, Sibling: true} case "parent": qyOutput = &parentQuery{Input: qyInput, Predicate: predicate} case "preceding": qyOutput = &precedingQuery{Input: qyInput, Predicate: predicate} + *props |= builderProps.NonFlat case "preceding-sibling": qyOutput = &precedingQuery{Input: qyInput, Predicate: predicate, Sibling: true} case "self": @@ -147,62 +157,189 @@ func (b *builder) processAxisNode(root *axisNode) (query, error) { case "namespace": // haha,what will you do someting?? default: - err = fmt.Errorf("unknown axe type: %s", root.AxeType) + err = fmt.Errorf("unknown axe type: %s", root.AxisType) return nil, err } return qyOutput, nil } +func canBeNumber(q query) bool { + if q.ValueType() != xpathResultType.Any { + return q.ValueType() == xpathResultType.Number + } + return true +} + // processFilterNode builds query for the XPath filter predicate. -func (b *builder) processFilterNode(root *filterNode) (query, error) { - b.flag |= filterFlag +func (b *builder) processFilter(root *filterNode, flags flag, props *builderProp) (query, error) { + first := (flags & flagsEnum.Filter) == 0 - qyInput, err := b.processNode(root.Input) + qyInput, err := b.processNode(root.Input, (flags | flagsEnum.Filter), props) if err != nil { return nil, err } - qyCond, err := b.processNode(root.Condition) + firstInput := b.firstInput + + var propsCond builderProp + cond, err := b.processNode(root.Condition, flags, &propsCond) if err != nil { return nil, err } - qyOutput := &filterQuery{Input: qyInput, Predicate: qyCond} - return qyOutput, nil + + // Checking whether is number + if canBeNumber(cond) || ((propsCond & (builderProps.HasPosition | builderProps.HasLast)) != 0) { + propsCond |= builderProps.HasPosition + flags |= flagsEnum.PosFilter + } + + if root.Input.Type() != nodeFilter { + *props &= ^builderProps.PosFilter + } + + if (propsCond & builderProps.HasPosition) != 0 { + *props |= builderProps.PosFilter + } + + if (propsCond & builderProps.HasPosition) != builderProps.None { + if (propsCond & builderProps.HasLast) != 0 { + // https://github.com/antchfx/xpath/issues/76 + // https://github.com/antchfx/xpath/issues/78 + if qyFunc, ok := cond.(*functionQuery); ok { + switch qyFunc.Input.(type) { + case *filterQuery: + cond = &lastFuncQuery{Input: qyFunc.Input} + case *groupQuery: + cond = &lastFuncQuery{Input: qyFunc.Input} + } + } + } + } + + merge := (qyInput.Properties() & queryProps.Merge) != 0 + if first && firstInput != nil { + if merge && ((*props & builderProps.PosFilter) != 0) { + var ( + rootQuery = &contextQuery{} + parent query + ) + switch axisQuery := firstInput.(type) { + case *ancestorQuery: + if _, ok := axisQuery.Input.(*contextQuery); !ok { + parent = axisQuery.Input + axisQuery.Input = rootQuery + } + case *attributeQuery: + if _, ok := axisQuery.Input.(*contextQuery); !ok { + parent = axisQuery.Input + axisQuery.Input = rootQuery + } + case *childQuery: + if _, ok := axisQuery.Input.(*contextQuery); !ok { + parent = axisQuery.Input + axisQuery.Input = rootQuery + } + case *cachedChildQuery: + if _, ok := axisQuery.Input.(*contextQuery); !ok { + parent = axisQuery.Input + axisQuery.Input = rootQuery + } + case *descendantQuery: + if _, ok := axisQuery.Input.(*contextQuery); !ok { + parent = axisQuery.Input + axisQuery.Input = rootQuery + } + case *followingQuery: + if _, ok := axisQuery.Input.(*contextQuery); !ok { + parent = axisQuery.Input + axisQuery.Input = rootQuery + } + case *precedingQuery: + if _, ok := axisQuery.Input.(*contextQuery); !ok { + parent = axisQuery.Input + axisQuery.Input = rootQuery + } + case *parentQuery: + if _, ok := axisQuery.Input.(*contextQuery); !ok { + parent = axisQuery.Input + axisQuery.Input = rootQuery + } + case *selfQuery: + if _, ok := axisQuery.Input.(*contextQuery); !ok { + parent = axisQuery.Input + axisQuery.Input = rootQuery + } + case *groupQuery: + if _, ok := axisQuery.Input.(*contextQuery); !ok { + parent = axisQuery.Input + axisQuery.Input = rootQuery + } + case *descendantOverDescendantQuery: + if _, ok := axisQuery.Input.(*contextQuery); !ok { + parent = axisQuery.Input + axisQuery.Input = rootQuery + } + } + b.firstInput = nil + child := &filterQuery{Input: qyInput, Predicate: cond, NoPosition: false} + if parent != nil { + return &mergeQuery{Input: parent, Child: child}, nil + } + return child, nil + } + b.firstInput = nil + } + + resultQuery := &filterQuery{ + Input: qyInput, + Predicate: cond, + NoPosition: (propsCond & builderProps.HasPosition) == 0, + } + return resultQuery, nil } // processFunctionNode processes query for the XPath function node. -func (b *builder) processFunctionNode(root *functionNode) (query, error) { +func (b *builder) processFunction(root *functionNode, props *builderProp) (query, error) { + // Reset builder props + *props = builderProps.None + var qyOutput query switch root.FuncName { + case "lower-case": + arg, err := b.processNode(root.Args[0], flagsEnum.None, props) + if err != nil { + return nil, err + } + qyOutput = &functionQuery{Func: lowerCaseFunc(arg)} case "starts-with": - arg1, err := b.processNode(root.Args[0]) + arg1, err := b.processNode(root.Args[0], flagsEnum.None, props) if err != nil { return nil, err } - arg2, err := b.processNode(root.Args[1]) + arg2, err := b.processNode(root.Args[1], flagsEnum.None, props) if err != nil { return nil, err } - qyOutput = &functionQuery{Input: b.firstInput, Func: startwithFunc(arg1, arg2)} + qyOutput = &functionQuery{Func: startwithFunc(arg1, arg2)} case "ends-with": - arg1, err := b.processNode(root.Args[0]) + arg1, err := b.processNode(root.Args[0], flagsEnum.None, props) if err != nil { return nil, err } - arg2, err := b.processNode(root.Args[1]) + arg2, err := b.processNode(root.Args[1], flagsEnum.None, props) if err != nil { return nil, err } - qyOutput = &functionQuery{Input: b.firstInput, Func: endwithFunc(arg1, arg2)} + qyOutput = &functionQuery{Func: endwithFunc(arg1, arg2)} case "contains": - arg1, err := b.processNode(root.Args[0]) + arg1, err := b.processNode(root.Args[0], flagsEnum.None, props) if err != nil { return nil, err } - arg2, err := b.processNode(root.Args[1]) + arg2, err := b.processNode(root.Args[1], flagsEnum.None, props) if err != nil { return nil, err } - qyOutput = &functionQuery{Input: b.firstInput, Func: containsFunc(arg1, arg2)} + qyOutput = &functionQuery{Func: containsFunc(arg1, arg2)} case "matches": //matches(string , pattern) if len(root.Args) != 2 { @@ -212,13 +349,19 @@ func (b *builder) processFunctionNode(root *functionNode) (query, error) { arg1, arg2 query err error ) - if arg1, err = b.processNode(root.Args[0]); err != nil { + if arg1, err = b.processNode(root.Args[0], flagsEnum.None, props); err != nil { return nil, err } - if arg2, err = b.processNode(root.Args[1]); err != nil { + if arg2, err = b.processNode(root.Args[1], flagsEnum.None, props); err != nil { return nil, err } - qyOutput = &functionQuery{Input: b.firstInput, Func: matchesFunc(arg1, arg2)} + // Issue #92, testing the regular expression before. + if q, ok := arg2.(*constantQuery); ok { + if _, err = getRegexp(q.Val.(string)); err != nil { + return nil, fmt.Errorf("matches() got error. %v", err) + } + } + qyOutput = &functionQuery{Func: matchesFunc(arg1, arg2)} case "substring": //substring( string , start [, length] ) if len(root.Args) < 2 { @@ -228,18 +371,18 @@ func (b *builder) processFunctionNode(root *functionNode) (query, error) { arg1, arg2, arg3 query err error ) - if arg1, err = b.processNode(root.Args[0]); err != nil { + if arg1, err = b.processNode(root.Args[0], flagsEnum.None, props); err != nil { return nil, err } - if arg2, err = b.processNode(root.Args[1]); err != nil { + if arg2, err = b.processNode(root.Args[1], flagsEnum.None, props); err != nil { return nil, err } if len(root.Args) == 3 { - if arg3, err = b.processNode(root.Args[2]); err != nil { + if arg3, err = b.processNode(root.Args[2], flagsEnum.None, props); err != nil { return nil, err } } - qyOutput = &functionQuery{Input: b.firstInput, Func: substringFunc(arg1, arg2, arg3)} + qyOutput = &functionQuery{Func: substringFunc(arg1, arg2, arg3)} case "substring-before", "substring-after": //substring-xxxx( haystack, needle ) if len(root.Args) != 2 { @@ -249,35 +392,37 @@ func (b *builder) processFunctionNode(root *functionNode) (query, error) { arg1, arg2 query err error ) - if arg1, err = b.processNode(root.Args[0]); err != nil { + if arg1, err = b.processNode(root.Args[0], flagsEnum.None, props); err != nil { return nil, err } - if arg2, err = b.processNode(root.Args[1]); err != nil { + if arg2, err = b.processNode(root.Args[1], flagsEnum.None, props); err != nil { return nil, err } qyOutput = &functionQuery{ - Input: b.firstInput, - Func: substringIndFunc(arg1, arg2, root.FuncName == "substring-after"), + Func: substringIndFunc(arg1, arg2, root.FuncName == "substring-after"), } case "string-length": // string-length( [string] ) if len(root.Args) < 1 { return nil, errors.New("xpath: string-length function must have at least one parameter") } - arg1, err := b.processNode(root.Args[0]) + arg1, err := b.processNode(root.Args[0], flagsEnum.None, props) if err != nil { return nil, err } - qyOutput = &functionQuery{Input: b.firstInput, Func: stringLengthFunc(arg1)} + qyOutput = &functionQuery{Func: stringLengthFunc(arg1)} case "normalize-space": - if len(root.Args) == 0 { - return nil, errors.New("xpath: normalize-space function must have at least one parameter") + var arg node + if len(root.Args) > 0 { + arg = root.Args[0] + } else { + arg = newAxisNode("self", allNode, "", "", "", nil) } - argQuery, err := b.processNode(root.Args[0]) + arg1, err := b.processNode(arg, flagsEnum.None, props) if err != nil { return nil, err } - qyOutput = &functionQuery{Input: argQuery, Func: normalizespaceFunc} + qyOutput = &functionQuery{Func: normalizespaceFunc(arg1)} case "replace": //replace( string , string, string ) if len(root.Args) != 3 { @@ -287,16 +432,16 @@ func (b *builder) processFunctionNode(root *functionNode) (query, error) { arg1, arg2, arg3 query err error ) - if arg1, err = b.processNode(root.Args[0]); err != nil { + if arg1, err = b.processNode(root.Args[0], flagsEnum.None, props); err != nil { return nil, err } - if arg2, err = b.processNode(root.Args[1]); err != nil { + if arg2, err = b.processNode(root.Args[1], flagsEnum.None, props); err != nil { return nil, err } - if arg3, err = b.processNode(root.Args[2]); err != nil { + if arg3, err = b.processNode(root.Args[2], flagsEnum.None, props); err != nil { return nil, err } - qyOutput = &functionQuery{Input: b.firstInput, Func: replaceFunc(arg1, arg2, arg3)} + qyOutput = &functionQuery{Func: replaceFunc(arg1, arg2, arg3)} case "translate": //translate( string , string, string ) if len(root.Args) != 3 { @@ -306,25 +451,25 @@ func (b *builder) processFunctionNode(root *functionNode) (query, error) { arg1, arg2, arg3 query err error ) - if arg1, err = b.processNode(root.Args[0]); err != nil { + if arg1, err = b.processNode(root.Args[0], flagsEnum.None, props); err != nil { return nil, err } - if arg2, err = b.processNode(root.Args[1]); err != nil { + if arg2, err = b.processNode(root.Args[1], flagsEnum.None, props); err != nil { return nil, err } - if arg3, err = b.processNode(root.Args[2]); err != nil { + if arg3, err = b.processNode(root.Args[2], flagsEnum.None, props); err != nil { return nil, err } - qyOutput = &functionQuery{Input: b.firstInput, Func: translateFunc(arg1, arg2, arg3)} + qyOutput = &functionQuery{Func: translateFunc(arg1, arg2, arg3)} case "not": if len(root.Args) == 0 { return nil, errors.New("xpath: not function must have at least one parameter") } - argQuery, err := b.processNode(root.Args[0]) + argQuery, err := b.processNode(root.Args[0], flagsEnum.None, props) if err != nil { return nil, err } - qyOutput = &functionQuery{Input: argQuery, Func: notFunc} + qyOutput = &functionQuery{Func: notFunc(argQuery)} case "name", "local-name", "namespace-uri": if len(root.Args) > 1 { return nil, fmt.Errorf("xpath: %s function must have at most one parameter", root.FuncName) @@ -334,141 +479,147 @@ func (b *builder) processFunctionNode(root *functionNode) (query, error) { err error ) if len(root.Args) == 1 { - arg, err = b.processNode(root.Args[0]) + arg, err = b.processNode(root.Args[0], flagsEnum.None, props) if err != nil { return nil, err } } switch root.FuncName { case "name": - qyOutput = &functionQuery{Input: b.firstInput, Func: nameFunc(arg)} + qyOutput = &functionQuery{Func: nameFunc(arg)} case "local-name": - qyOutput = &functionQuery{Input: b.firstInput, Func: localNameFunc(arg)} + qyOutput = &functionQuery{Func: localNameFunc(arg)} case "namespace-uri": - qyOutput = &functionQuery{Input: b.firstInput, Func: namespaceFunc(arg)} + qyOutput = &functionQuery{Func: namespaceFunc(arg)} } case "true", "false": val := root.FuncName == "true" qyOutput = &functionQuery{ - Input: b.firstInput, Func: func(_ query, _ iterator) interface{} { return val }, } case "last": - switch typ := b.firstInput.(type) { - case *groupQuery, *filterQuery: - // https://github.com/antchfx/xpath/issues/76 - // https://github.com/antchfx/xpath/issues/78 - qyOutput = &lastQuery{Input: typ} - default: - qyOutput = &functionQuery{Input: b.firstInput, Func: lastFunc} - } - + qyOutput = &functionQuery{Input: b.firstInput, Func: lastFunc()} + *props |= builderProps.HasLast case "position": - qyOutput = &functionQuery{Input: b.firstInput, Func: positionFunc} + qyOutput = &functionQuery{Input: b.firstInput, Func: positionFunc()} + *props |= builderProps.HasPosition case "boolean", "number", "string": - inp := b.firstInput + var inp query if len(root.Args) > 1 { return nil, fmt.Errorf("xpath: %s function must have at most one parameter", root.FuncName) } if len(root.Args) == 1 { - argQuery, err := b.processNode(root.Args[0]) + argQuery, err := b.processNode(root.Args[0], flagsEnum.None, props) if err != nil { return nil, err } inp = argQuery } - f := &functionQuery{Input: inp} switch root.FuncName { case "boolean": - f.Func = booleanFunc + qyOutput = &functionQuery{Func: booleanFunc(inp)} case "string": - f.Func = stringFunc + qyOutput = &functionQuery{Func: stringFunc(inp)} case "number": - f.Func = numberFunc + qyOutput = &functionQuery{Func: numberFunc(inp)} } - qyOutput = f case "count": - //if b.firstInput == nil { - // return nil, errors.New("xpath: expression must evaluate to node-set") - //} if len(root.Args) == 0 { return nil, fmt.Errorf("xpath: count(node-sets) function must with have parameters node-sets") } - argQuery, err := b.processNode(root.Args[0]) + argQuery, err := b.processNode(root.Args[0], flagsEnum.None, props) if err != nil { return nil, err } - qyOutput = &functionQuery{Input: argQuery, Func: countFunc} + qyOutput = &functionQuery{Func: countFunc(argQuery)} case "sum": if len(root.Args) == 0 { return nil, fmt.Errorf("xpath: sum(node-sets) function must with have parameters node-sets") } - argQuery, err := b.processNode(root.Args[0]) + argQuery, err := b.processNode(root.Args[0], flagsEnum.None, props) if err != nil { return nil, err } - qyOutput = &functionQuery{Input: argQuery, Func: sumFunc} + qyOutput = &functionQuery{Func: sumFunc(argQuery)} case "ceiling", "floor", "round": if len(root.Args) == 0 { return nil, fmt.Errorf("xpath: ceiling(node-sets) function must with have parameters node-sets") } - argQuery, err := b.processNode(root.Args[0]) + argQuery, err := b.processNode(root.Args[0], flagsEnum.None, props) if err != nil { return nil, err } - f := &functionQuery{Input: argQuery} switch root.FuncName { case "ceiling": - f.Func = ceilingFunc + qyOutput = &functionQuery{Func: ceilingFunc(argQuery)} case "floor": - f.Func = floorFunc + qyOutput = &functionQuery{Func: floorFunc(argQuery)} case "round": - f.Func = roundFunc + qyOutput = &functionQuery{Func: roundFunc(argQuery)} } - qyOutput = f case "concat": if len(root.Args) < 2 { return nil, fmt.Errorf("xpath: concat() must have at least two arguments") } var args []query for _, v := range root.Args { - q, err := b.processNode(v) + q, err := b.processNode(v, flagsEnum.None, props) if err != nil { return nil, err } args = append(args, q) } - qyOutput = &functionQuery{Input: b.firstInput, Func: concatFunc(args...)} + qyOutput = &functionQuery{Func: concatFunc(args...)} case "reverse": if len(root.Args) == 0 { return nil, fmt.Errorf("xpath: reverse(node-sets) function must with have parameters node-sets") } - argQuery, err := b.processNode(root.Args[0]) + argQuery, err := b.processNode(root.Args[0], flagsEnum.None, props) if err != nil { return nil, err } qyOutput = &transformFunctionQuery{Input: argQuery, Func: reverseFunc} + case "string-join": + if len(root.Args) != 2 { + return nil, fmt.Errorf("xpath: string-join(node-sets, separator) function requires node-set and argument") + } + input, err := b.processNode(root.Args[0], flagsEnum.None, props) + if err != nil { + return nil, err + } + arg1, err := b.processNode(root.Args[1], flagsEnum.None, props) + if err != nil { + return nil, err + } + qyOutput = &functionQuery{Func: stringJoinFunc(input, arg1)} default: return nil, fmt.Errorf("not yet support this function %s()", root.FuncName) } return qyOutput, nil } -func (b *builder) processOperatorNode(root *operatorNode) (query, error) { - left, err := b.processNode(root.Left) +func (b *builder) processOperator(root *operatorNode, props *builderProp) (query, error) { + var ( + leftProp builderProp + rightProp builderProp + ) + + left, err := b.processNode(root.Left, flagsEnum.None, &leftProp) if err != nil { return nil, err } - right, err := b.processNode(root.Right) + right, err := b.processNode(root.Right, flagsEnum.None, &rightProp) if err != nil { return nil, err } + *props = leftProp | rightProp + var qyOutput query switch root.Op { case "+", "-", "*", "div", "mod": // Numeric operator - var exprFunc func(interface{}, interface{}) interface{} + var exprFunc func(iterator, interface{}, interface{}) interface{} switch root.Op { case "+": exprFunc = plusFunc @@ -506,41 +657,43 @@ func (b *builder) processOperatorNode(root *operatorNode) (query, error) { } qyOutput = &booleanQuery{Left: left, Right: right, IsOr: isOr} case "|": + *props |= builderProps.NonFlat qyOutput = &unionQuery{Left: left, Right: right} } return qyOutput, nil } -func (b *builder) processNode(root node) (q query, err error) { - if b.depth = b.depth + 1; b.depth > 1024 { +func (b *builder) processNode(root node, flags flag, props *builderProp) (q query, err error) { + if b.parseDepth = b.parseDepth + 1; b.parseDepth > 1024 { err = errors.New("the xpath expressions is too complex") return } - + *props = builderProps.None switch root.Type() { case nodeConstantOperand: n := root.(*operandNode) q = &constantQuery{Val: n.Val} case nodeRoot: - q = &contextQuery{Root: true} + q = &absoluteQuery{} case nodeAxis: - q, err = b.processAxisNode(root.(*axisNode)) + q, err = b.processAxis(root.(*axisNode), flags, props) b.firstInput = q case nodeFilter: - q, err = b.processFilterNode(root.(*filterNode)) + q, err = b.processFilter(root.(*filterNode), flags, props) b.firstInput = q case nodeFunction: - q, err = b.processFunctionNode(root.(*functionNode)) + q, err = b.processFunction(root.(*functionNode), props) case nodeOperator: - q, err = b.processOperatorNode(root.(*operatorNode)) + q, err = b.processOperator(root.(*operatorNode), props) case nodeGroup: - q, err = b.processNode(root.(*groupNode).Input) + q, err = b.processNode(root.(*groupNode).Input, flagsEnum.None, props) if err != nil { return } q = &groupQuery{Input: q} b.firstInput = q } + b.parseDepth-- return } @@ -560,5 +713,6 @@ func build(expr string, namespaces map[string]string) (q query, err error) { }() root := parse(expr, namespaces) b := &builder{} - return b.processNode(root) + props := builderProps.None + return b.processNode(root, flagsEnum.None, &props) } diff --git a/vendor/github.com/antchfx/xpath/func.go b/vendor/github.com/antchfx/xpath/func.go index afe5988f8f..ffbee65676 100644 --- a/vendor/github.com/antchfx/xpath/func.go +++ b/vendor/github.com/antchfx/xpath/func.go @@ -37,75 +37,83 @@ func predicate(q query) func(NodeNavigator) bool { } // positionFunc is a XPath Node Set functions position(). -func positionFunc(q query, t iterator) interface{} { - var ( - count = 1 - node = t.Current().Copy() - ) - test := predicate(q) - for node.MoveToPrevious() { - if test(node) { - count++ +func positionFunc() func(query, iterator) interface{} { + return func(q query, t iterator) interface{} { + var ( + count = 1 + node = t.Current().Copy() + ) + test := predicate(q) + for node.MoveToPrevious() { + if test(node) { + count++ + } } + return float64(count) } - return float64(count) } // lastFunc is a XPath Node Set functions last(). -func lastFunc(q query, t iterator) interface{} { - var ( - count = 0 - node = t.Current().Copy() - ) - node.MoveToFirst() - test := predicate(q) - for { - if test(node) { - count++ - } - if !node.MoveToNext() { - break +func lastFunc() func(query, iterator) interface{} { + return func(q query, t iterator) interface{} { + var ( + count = 0 + node = t.Current().Copy() + ) + test := predicate(q) + node.MoveToFirst() + for { + if test(node) { + count++ + } + if !node.MoveToNext() { + break + } } + return float64(count) } - return float64(count) } // countFunc is a XPath Node Set functions count(node-set). -func countFunc(q query, t iterator) interface{} { - var count = 0 - q = functionArgs(q) - test := predicate(q) - switch typ := q.Evaluate(t).(type) { - case query: - for node := typ.Select(t); node != nil; node = typ.Select(t) { - if test(node) { - count++ +func countFunc(arg query) func(query, iterator) interface{} { + return func(_ query, t iterator) interface{} { + var count = 0 + q := functionArgs(arg) + test := predicate(q) + switch typ := q.Evaluate(t).(type) { + case query: + for node := typ.Select(t); node != nil; node = typ.Select(t) { + if test(node) { + count++ + } } } + return float64(count) } - return float64(count) } // sumFunc is a XPath Node Set functions sum(node-set). -func sumFunc(q query, t iterator) interface{} { - var sum float64 - switch typ := functionArgs(q).Evaluate(t).(type) { - case query: - for node := typ.Select(t); node != nil; node = typ.Select(t) { - if v, err := strconv.ParseFloat(node.Value(), 64); err == nil { - sum += v +func sumFunc(arg query) func(query, iterator) interface{} { + return func(_ query, t iterator) interface{} { + var sum float64 + switch typ := functionArgs(arg).Evaluate(t).(type) { + case query: + for node := typ.Select(t); node != nil; node = typ.Select(t) { + if v, err := strconv.ParseFloat(node.Value(), 64); err == nil { + sum += v + } } + case float64: + sum = typ + case string: + v, err := strconv.ParseFloat(typ, 64) + if err != nil { + panic(errors.New("sum() function argument type must be a node-set or number")) + } + sum = v } - case float64: - sum = typ - case string: - v, err := strconv.ParseFloat(typ, 64) - if err != nil { - panic(errors.New("sum() function argument type must be a node-set or number")) - } - sum = v + return sum } - return sum } func asNumber(t iterator, o interface{}) float64 { @@ -113,7 +121,7 @@ func asNumber(t iterator, o interface{}) float64 { case query: node := typ.Select(t) if node == nil { - return float64(0) + return math.NaN() } if v, err := strconv.ParseFloat(node.Value(), 64); err == nil { return v @@ -130,30 +138,36 @@ func asNumber(t iterator, o interface{}) float64 { } // ceilingFunc is a XPath Node Set functions ceiling(node-set). -func ceilingFunc(q query, t iterator) interface{} { - val := asNumber(t, functionArgs(q).Evaluate(t)) - // if math.IsNaN(val) { - // panic(errors.New("ceiling() function argument type must be a valid number")) - // } - return math.Ceil(val) +func ceilingFunc(arg query) func(query, iterator) interface{} { + return func(_ query, t iterator) interface{} { + val := asNumber(t, functionArgs(arg).Evaluate(t)) + // if math.IsNaN(val) { + // panic(errors.New("ceiling() function argument type must be a valid number")) + // } + return math.Ceil(val) + } } // floorFunc is a XPath Node Set functions floor(node-set). -func floorFunc(q query, t iterator) interface{} { - val := asNumber(t, functionArgs(q).Evaluate(t)) - return math.Floor(val) +func floorFunc(arg query) func(query, iterator) interface{} { + return func(_ query, t iterator) interface{} { + val := asNumber(t, functionArgs(arg).Evaluate(t)) + return math.Floor(val) + } } // roundFunc is a XPath Node Set functions round(node-set). -func roundFunc(q query, t iterator) interface{} { - val := asNumber(t, functionArgs(q).Evaluate(t)) - //return math.Round(val) - return round(val) +func roundFunc(arg query) func(query, iterator) interface{} { + return func(_ query, t iterator) interface{} { + val := asNumber(t, functionArgs(arg).Evaluate(t)) + //return math.Round(val) + return round(val) + } } // nameFunc is a XPath functions name([node-set]). func nameFunc(arg query) func(query, iterator) interface{} { - return func(q query, t iterator) interface{} { + return func(_ query, t iterator) interface{} { var v NodeNavigator if arg == nil { v = t.Current() @@ -173,7 +187,7 @@ func nameFunc(arg query) func(query, iterator) interface{} { // localNameFunc is a XPath functions local-name([node-set]). func localNameFunc(arg query) func(query, iterator) interface{} { - return func(q query, t iterator) interface{} { + return func(_ query, t iterator) interface{} { var v NodeNavigator if arg == nil { v = t.Current() @@ -189,7 +203,7 @@ func localNameFunc(arg query) func(query, iterator) interface{} { // namespaceFunc is a XPath functions namespace-uri([node-set]). func namespaceFunc(arg query) func(query, iterator) interface{} { - return func(q query, t iterator) interface{} { + return func(_ query, t iterator) interface{} { var v NodeNavigator if arg == nil { v = t.Current() @@ -256,26 +270,35 @@ func asString(t iterator, v interface{}) string { } // booleanFunc is a XPath functions boolean([node-set]). -func booleanFunc(q query, t iterator) interface{} { - v := functionArgs(q).Evaluate(t) - return asBool(t, v) +func booleanFunc(arg1 query) func(query, iterator) interface{} { + return func(_ query, t iterator) interface{} { + v := functionArgs(arg1).Evaluate(t) + return asBool(t, v) + } } // numberFunc is a XPath functions number([node-set]). -func numberFunc(q query, t iterator) interface{} { - v := functionArgs(q).Evaluate(t) - return asNumber(t, v) +func numberFunc(arg1 query) func(query, iterator) interface{} { + return func(_ query, t iterator) interface{} { + v := functionArgs(arg1).Evaluate(t) + return asNumber(t, v) + } } // stringFunc is a XPath functions string([node-set]). -func stringFunc(q query, t iterator) interface{} { - v := functionArgs(q).Evaluate(t) - return asString(t, v) +func stringFunc(arg1 query) func(query, iterator) interface{} { + return func(q query, t iterator) interface{} { + if arg1 == nil { + return t.Current().Value() + } + v := functionArgs(arg1).Evaluate(t) + return asString(t, v) + } } // startwithFunc is a XPath functions starts-with(string, string). func startwithFunc(arg1, arg2 query) func(query, iterator) interface{} { - return func(q query, t iterator) interface{} { + return func(_ query, t iterator) interface{} { var ( m, n string ok bool @@ -302,7 +325,7 @@ func startwithFunc(arg1, arg2 query) func(query, iterator) interface{} { // endwithFunc is a XPath functions ends-with(string, string). func endwithFunc(arg1, arg2 query) func(query, iterator) interface{} { - return func(q query, t iterator) interface{} { + return func(_ query, t iterator) interface{} { var ( m, n string ok bool @@ -329,7 +352,7 @@ func endwithFunc(arg1, arg2 query) func(query, iterator) interface{} { // containsFunc is a XPath functions contains(string or @attr, string). func containsFunc(arg1, arg2 query) func(query, iterator) interface{} { - return func(q query, t iterator) interface{} { + return func(_ query, t iterator) interface{} { var ( m, n string ok bool @@ -360,7 +383,7 @@ func containsFunc(arg1, arg2 query) func(query, iterator) interface{} { // Note: does not support https://www.w3.org/TR/xpath-functions-31/#func-matches 3rd optional `flags` argument; if // needed, directly put flags in the regexp pattern, such as `(?i)^pattern$` for `i` flag. func matchesFunc(arg1, arg2 query) func(query, iterator) interface{} { - return func(q query, t iterator) interface{} { + return func(_ query, t iterator) interface{} { var s string switch typ := functionArgs(arg1).Evaluate(t).(type) { case string: @@ -386,43 +409,45 @@ func matchesFunc(arg1, arg2 query) func(query, iterator) interface{} { } // normalizespaceFunc is XPath functions normalize-space(string?) -func normalizespaceFunc(q query, t iterator) interface{} { - var m string - switch typ := functionArgs(q).Evaluate(t).(type) { - case string: - m = typ - case query: - node := typ.Select(t) - if node == nil { - return "" +func normalizespaceFunc(arg1 query) func(query, iterator) interface{} { + return func(_ query, t iterator) interface{} { + var m string + switch typ := functionArgs(arg1).Evaluate(t).(type) { + case string: + m = typ + case query: + node := typ.Select(t) + if node == nil { + return "" + } + m = node.Value() } - m = node.Value() - } - var b = builderPool.Get().(stringBuilder) - b.Grow(len(m)) - - runeStr := []rune(strings.TrimSpace(m)) - l := len(runeStr) - for i := range runeStr { - r := runeStr[i] - isSpace := unicode.IsSpace(r) - if !(isSpace && (i+1 < l && unicode.IsSpace(runeStr[i+1]))) { - if isSpace { - r = ' ' + var b = builderPool.Get().(stringBuilder) + b.Grow(len(m)) + + runeStr := []rune(strings.TrimSpace(m)) + l := len(runeStr) + for i := range runeStr { + r := runeStr[i] + isSpace := unicode.IsSpace(r) + if !(isSpace && (i+1 < l && unicode.IsSpace(runeStr[i+1]))) { + if isSpace { + r = ' ' + } + b.WriteRune(r) } - b.WriteRune(r) } - } - result := b.String() - b.Reset() - builderPool.Put(b) + result := b.String() + b.Reset() + builderPool.Put(b) - return result + return result + } } // substringFunc is XPath functions substring function returns a part of a given string. func substringFunc(arg1, arg2, arg3 query) func(query, iterator) interface{} { - return func(q query, t iterator) interface{} { + return func(_ query, t iterator) interface{} { var m string switch typ := functionArgs(arg1).Evaluate(t).(type) { case string: @@ -437,31 +462,48 @@ func substringFunc(arg1, arg2, arg3 query) func(query, iterator) interface{} { var start, length float64 var ok bool - if start, ok = functionArgs(arg2).Evaluate(t).(float64); !ok { - panic(errors.New("substring() function first argument type must be int")) - } else if start < 1 { - panic(errors.New("substring() function first argument type must be >= 1")) - } - start-- - if arg3 != nil { - if length, ok = functionArgs(arg3).Evaluate(t).(float64); !ok { - panic(errors.New("substring() function second argument type must be int")) + panic(errors.New("substring() function first argument type must be number")) + } + // fix https://github.com/antchfx/xpath/issues/109 + start = math.Round(start) + if start > float64(len(m)) { + return "" + } + if arg3 == nil { + if start <= 0 { + return m } + return m[int(start)-1:] } - if (len(m) - int(start)) < int(length) { - panic(errors.New("substring() function start and length argument out of range")) + + if length, ok = functionArgs(arg3).Evaluate(t).(float64); !ok { + panic(errors.New("substring() function second argument type must be number")) + } + length = math.Round(length) + if length <= 0 { + return "" + } + if length > float64(len(m)) { + length = float64(len(m)) } - if length > 0 { - return m[int(start):int(length+start)] + if start < 0 { + length = length - math.Abs(start) + if length <= 1 { + return "" + } + return m[:int(length-1)] } - return m[int(start):] + if start == 0 { + return m[:int(length-1)] + } + return m[int(start-1):int(length+start-1)] } } // substringIndFunc is XPath functions substring-before/substring-after function returns a part of a given string. func substringIndFunc(arg1, arg2 query, after bool) func(query, iterator) interface{} { - return func(q query, t iterator) interface{} { + return func(_ query, t iterator) interface{} { var str string switch v := functionArgs(arg1).Evaluate(t).(type) { case string: @@ -502,7 +544,7 @@ func substringIndFunc(arg1, arg2 query, after bool) func(query, iterator) interf // stringLengthFunc is XPATH string-length( [string] ) function that returns a number // equal to the number of characters in a given string. func stringLengthFunc(arg1 query) func(query, iterator) interface{} { - return func(q query, t iterator) interface{} { + return func(_ query, t iterator) interface{} { switch v := functionArgs(arg1).Evaluate(t).(type) { case string: return float64(len(v)) @@ -519,7 +561,7 @@ func stringLengthFunc(arg1 query) func(query, iterator) interface{} { // translateFunc is XPath functions translate() function returns a replaced string. func translateFunc(arg1, arg2, arg3 query) func(query, iterator) interface{} { - return func(q query, t iterator) interface{} { + return func(_ query, t iterator) interface{} { str := asString(t, functionArgs(arg1).Evaluate(t)) src := asString(t, functionArgs(arg2).Evaluate(t)) dst := asString(t, functionArgs(arg3).Evaluate(t)) @@ -538,25 +580,36 @@ func translateFunc(arg1, arg2, arg3 query) func(query, iterator) interface{} { // replaceFunc is XPath functions replace() function returns a replaced string. func replaceFunc(arg1, arg2, arg3 query) func(query, iterator) interface{} { - return func(q query, t iterator) interface{} { + return func(_ query, t iterator) interface{} { str := asString(t, functionArgs(arg1).Evaluate(t)) src := asString(t, functionArgs(arg2).Evaluate(t)) dst := asString(t, functionArgs(arg3).Evaluate(t)) + e, err := getRegexp(src) + if err != nil { + panic(fmt.Errorf("replace() function second argument is not a valid regexp pattern, err: %s", err.Error())) + } + + // replace all $i to ${i} for golang regexp.Expand + for idx := e.NumSubexp(); idx > 0; idx-- { + dst = strings.ReplaceAll(dst, fmt.Sprintf("$%d", idx), fmt.Sprintf("${%d}", idx)) + } - return strings.Replace(str, src, dst, -1) + return e.ReplaceAllString(str, dst) } } // notFunc is XPATH functions not(expression) function operation. -func notFunc(q query, t iterator) interface{} { - switch v := functionArgs(q).Evaluate(t).(type) { - case bool: - return !v - case query: - node := v.Select(t) - return node == nil - default: - return false +func notFunc(arg1 query) func(query, iterator) interface{} { + return func(_ query, t iterator) interface{} { + switch v := functionArgs(arg1).Evaluate(t).(type) { + case bool: + return !v + case query: + node := v.Select(t) + return node == nil + default: + return false + } } } @@ -564,7 +617,7 @@ func notFunc(q query, t iterator) interface{} { // strings and returns the resulting string. // concat( string1 , string2 [, stringn]* ) func concatFunc(args ...query) func(query, iterator) interface{} { - return func(q query, t iterator) interface{} { + return func(_ query, t iterator) interface{} { b := builderPool.Get().(stringBuilder) for _, v := range args { v = functionArgs(v) @@ -614,3 +667,42 @@ func reverseFunc(q query, t iterator) func() NodeNavigator { return node } } + +// string-join is a XPath Node Set functions string-join(node-set, separator). +func stringJoinFunc(q, arg1 query) func(query, iterator) interface{} { + return func(_ query, t iterator) interface{} { + var separator string + switch v := functionArgs(arg1).Evaluate(t).(type) { + case string: + separator = v + case query: + node := v.Select(t) + if node != nil { + separator = node.Value() + } + } + + q = functionArgs(q) + test := predicate(q) + var parts []string + switch v := q.Evaluate(t).(type) { + case string: + return v + case query: + for node := v.Select(t); node != nil; node = v.Select(t) { + if test(node) { + parts = append(parts, node.Value()) + } + } + } + return strings.Join(parts, separator) + } +} + +// lower-case is XPATH function that converts a string to lower case. +func lowerCaseFunc(arg1 query) func(query, iterator) interface{} { + return func(_ query, t iterator) interface{} { + v := functionArgs(arg1).Evaluate(t) + return strings.ToLower(asString(t, v)) + } +} diff --git a/vendor/github.com/antchfx/xpath/operator.go b/vendor/github.com/antchfx/xpath/operator.go index eb38ac6d73..2820152b36 100644 --- a/vendor/github.com/antchfx/xpath/operator.go +++ b/vendor/github.com/antchfx/xpath/operator.go @@ -1,40 +1,11 @@ package xpath import ( - "fmt" - "reflect" "strconv" ) // The XPath number operator function list. -// valueType is a return value type. -type valueType int - -const ( - booleanType valueType = iota - numberType - stringType - nodeSetType -) - -func getValueType(i interface{}) valueType { - v := reflect.ValueOf(i) - switch v.Kind() { - case reflect.Float64: - return numberType - case reflect.String: - return stringType - case reflect.Bool: - return booleanType - default: - if _, ok := i.(query); ok { - return nodeSetType - } - } - panic(fmt.Errorf("xpath unknown value type: %v", v.Kind())) -} - type logical func(iterator, string, interface{}, interface{}) bool var logicalFuncs = [][]logical{ @@ -228,91 +199,90 @@ func cmpBooleanBoolean(t iterator, op string, m, n interface{}) bool { // eqFunc is an `=` operator. func eqFunc(t iterator, m, n interface{}) interface{} { - t1 := getValueType(m) - t2 := getValueType(n) + t1 := getXPathType(m) + t2 := getXPathType(n) return logicalFuncs[t1][t2](t, "=", m, n) } // gtFunc is an `>` operator. func gtFunc(t iterator, m, n interface{}) interface{} { - t1 := getValueType(m) - t2 := getValueType(n) + t1 := getXPathType(m) + t2 := getXPathType(n) return logicalFuncs[t1][t2](t, ">", m, n) } // geFunc is an `>=` operator. func geFunc(t iterator, m, n interface{}) interface{} { - t1 := getValueType(m) - t2 := getValueType(n) + t1 := getXPathType(m) + t2 := getXPathType(n) return logicalFuncs[t1][t2](t, ">=", m, n) } // ltFunc is an `<` operator. func ltFunc(t iterator, m, n interface{}) interface{} { - t1 := getValueType(m) - t2 := getValueType(n) + t1 := getXPathType(m) + t2 := getXPathType(n) return logicalFuncs[t1][t2](t, "<", m, n) } // leFunc is an `<=` operator. func leFunc(t iterator, m, n interface{}) interface{} { - t1 := getValueType(m) - t2 := getValueType(n) + t1 := getXPathType(m) + t2 := getXPathType(n) return logicalFuncs[t1][t2](t, "<=", m, n) } // neFunc is an `!=` operator. func neFunc(t iterator, m, n interface{}) interface{} { - t1 := getValueType(m) - t2 := getValueType(n) + t1 := getXPathType(m) + t2 := getXPathType(n) return logicalFuncs[t1][t2](t, "!=", m, n) } // orFunc is an `or` operator. var orFunc = func(t iterator, m, n interface{}) interface{} { - t1 := getValueType(m) - t2 := getValueType(n) + t1 := getXPathType(m) + t2 := getXPathType(n) return logicalFuncs[t1][t2](t, "or", m, n) } -func numericExpr(m, n interface{}, cb func(float64, float64) float64) float64 { - typ := reflect.TypeOf(float64(0)) - a := reflect.ValueOf(m).Convert(typ) - b := reflect.ValueOf(n).Convert(typ) - return cb(a.Float(), b.Float()) +func numericExpr(t iterator, m, n interface{}, cb func(float64, float64) float64) float64 { + a := asNumber(t, m) + b := asNumber(t, n) + return cb(a, b) } // plusFunc is an `+` operator. -var plusFunc = func(m, n interface{}) interface{} { - return numericExpr(m, n, func(a, b float64) float64 { +var plusFunc = func(t iterator, m, n interface{}) interface{} { + return numericExpr(t, m, n, func(a, b float64) float64 { return a + b }) } // minusFunc is an `-` operator. -var minusFunc = func(m, n interface{}) interface{} { - return numericExpr(m, n, func(a, b float64) float64 { +var minusFunc = func(t iterator, m, n interface{}) interface{} { + return numericExpr(t, m, n, func(a, b float64) float64 { return a - b }) } // mulFunc is an `*` operator. -var mulFunc = func(m, n interface{}) interface{} { - return numericExpr(m, n, func(a, b float64) float64 { +var mulFunc = func(t iterator, m, n interface{}) interface{} { + return numericExpr(t, m, n, func(a, b float64) float64 { return a * b }) } // divFunc is an `DIV` operator. -var divFunc = func(m, n interface{}) interface{} { - return numericExpr(m, n, func(a, b float64) float64 { +var divFunc = func(t iterator, m, n interface{}) interface{} { + return numericExpr(t, m, n, func(a, b float64) float64 { return a / b }) } // modFunc is an 'MOD' operator. -var modFunc = func(m, n interface{}) interface{} { - return numericExpr(m, n, func(a, b float64) float64 { +var modFunc = func(t iterator, m, n interface{}) interface{} { + return numericExpr(t, m, n, func(a, b float64) float64 { return float64(int(a) % int(b)) }) } diff --git a/vendor/github.com/antchfx/xpath/parse.go b/vendor/github.com/antchfx/xpath/parse.go index cbd289acab..5393125911 100644 --- a/vendor/github.com/antchfx/xpath/parse.go +++ b/vendor/github.com/antchfx/xpath/parse.go @@ -6,6 +6,7 @@ import ( "fmt" "strconv" "unicode" + "unicode/utf8" ) // A XPath expression token type. @@ -85,12 +86,13 @@ func newOperandNode(v interface{}) node { } // newAxisNode returns new axis node AxisNode. -func newAxisNode(axeTyp, localName, prefix, prop string, n node, opts ...func(p *axisNode)) node { +func newAxisNode(axisType string, typeTest NodeType, localName, prefix, prop string, n node, opts ...func(p *axisNode)) node { a := axisNode{ nodeType: nodeAxis, + typeTest: typeTest, LocalName: localName, Prefix: prefix, - AxeType: axeTyp, + AxisType: axisType, Prop: prop, Input: n, } @@ -228,8 +230,9 @@ Loop: } // RelationalExpr ::= AdditiveExpr | RelationalExpr '<' AdditiveExpr | RelationalExpr '>' AdditiveExpr -// | RelationalExpr '<=' AdditiveExpr -// | RelationalExpr '>=' AdditiveExpr +// +// | RelationalExpr '<=' AdditiveExpr +// | RelationalExpr '>=' AdditiveExpr func (p *parser) parseRelationalExpr(n node) node { opnd := p.parseAdditiveExpr(n) Loop: @@ -274,7 +277,8 @@ Loop: } // MultiplicativeExpr ::= UnaryExpr | MultiplicativeExpr MultiplyOperator(*) UnaryExpr -// | MultiplicativeExpr 'div' UnaryExpr | MultiplicativeExpr 'mod' UnaryExpr +// +// | MultiplicativeExpr 'div' UnaryExpr | MultiplicativeExpr 'mod' UnaryExpr func (p *parser) parseMultiplicativeExpr(n node) node { opnd := p.parseUnaryExpr(n) Loop: @@ -308,7 +312,7 @@ func (p *parser) parseUnaryExpr(n node) node { return opnd } -// UnionExpr ::= PathExpr | UnionExpr '|' PathExpr +// UnionExpr ::= PathExpr | UnionExpr '|' PathExpr func (p *parser) parseUnionExpr(n node) node { opnd := p.parsePathExpr(n) Loop: @@ -335,7 +339,7 @@ func (p *parser) parsePathExpr(n node) node { opnd = p.parseRelativeLocationPath(opnd) case itemSlashSlash: p.next() - opnd = p.parseRelativeLocationPath(newAxisNode("descendant-or-self", "", "", "", opnd)) + opnd = p.parseRelativeLocationPath(newAxisNode("descendant-or-self", allNode, "", "", "", opnd)) } } else { opnd = p.parseLocationPath(nil) @@ -352,7 +356,7 @@ func (p *parser) parseFilterExpr(n node) node { return opnd } -// Predicate ::= '[' PredicateExpr ']' +// Predicate ::= '[' PredicateExpr ']' func (p *parser) parsePredicate(n node) node { p.skipItem(itemLBracket) opnd := p.parseExpression(n) @@ -372,7 +376,7 @@ func (p *parser) parseLocationPath(n node) (opnd node) { case itemSlashSlash: p.next() opnd = newRootNode("//") - opnd = p.parseRelativeLocationPath(newAxisNode("descendant-or-self", "", "", "", opnd)) + opnd = p.parseRelativeLocationPath(newAxisNode("descendant-or-self", allNode, "", "", "", opnd)) default: opnd = p.parseRelativeLocationPath(n) } @@ -388,7 +392,7 @@ Loop: switch p.r.typ { case itemSlashSlash: p.next() - opnd = newAxisNode("descendant-or-self", "", "", "", opnd) + opnd = newAxisNode("descendant-or-self", allNode, "", "", "", opnd) case itemSlash: p.next() default: @@ -400,30 +404,33 @@ Loop: // Step ::= AxisSpecifier NodeTest Predicate* | AbbreviatedStep func (p *parser) parseStep(n node) (opnd node) { - axeTyp := "child" // default axes value. if p.r.typ == itemDot || p.r.typ == itemDotDot { if p.r.typ == itemDot { - axeTyp = "self" + opnd = newAxisNode("self", allNode, "", "", "", n) } else { - axeTyp = "parent" + opnd = newAxisNode("parent", allNode, "", "", "", n) } p.next() - opnd = newAxisNode(axeTyp, "", "", "", n) if p.r.typ != itemLBracket { return opnd } } else { + axisType := "child" // default axes value. switch p.r.typ { case itemAt: + axisType = "attribute" p.next() - axeTyp = "attribute" case itemAxe: - axeTyp = p.r.name + axisType = p.r.name p.next() case itemLParens: return p.parseSequence(n) } - opnd = p.parseNodeTest(n, axeTyp) + matchType := ElementNode + if axisType == "attribute" { + matchType = AttributeNode + } + opnd = p.parseNodeTest(n, axisType, matchType) } for p.r.typ == itemLBracket { opnd = newFilterNode(opnd, p.parsePredicate(opnd)) @@ -447,8 +454,8 @@ func (p *parser) parseSequence(n node) (opnd node) { return opnd } -// NodeTest ::= NameTest | nodeType '(' ')' | 'processing-instruction' '(' Literal ')' -func (p *parser) parseNodeTest(n node, axeTyp string) (opnd node) { +// NodeTest ::= NameTest | nodeType '(' ')' | 'processing-instruction' '(' Literal ')' +func (p *parser) parseNodeTest(n node, axeTyp string, matchType NodeType) (opnd node) { switch p.r.typ { case itemName: if p.r.canBeFunc && isNodeType(p.r) { @@ -466,7 +473,19 @@ func (p *parser) parseNodeTest(n node, axeTyp string) (opnd node) { p.next() } p.skipItem(itemRParens) - opnd = newAxisNode(axeTyp, name, "", prop, n) + switch prop { + case "comment": + matchType = CommentNode + case "text": + matchType = TextNode + case "processing-instruction": + case "node": + matchType = allNode + default: + matchType = RootNode + } + + opnd = newAxisNode(axeTyp, matchType, name, "", prop, n) } else { prefix := p.r.prefix name := p.r.name @@ -474,7 +493,7 @@ func (p *parser) parseNodeTest(n node, axeTyp string) (opnd node) { if p.r.name == "*" { name = "" } - opnd = newAxisNode(axeTyp, name, prefix, "", n, func(a *axisNode) { + opnd = newAxisNode(axeTyp, matchType, name, prefix, "", n, func(a *axisNode) { if prefix != "" && p.namespaces != nil { if ns, ok := p.namespaces[prefix]; ok { a.hasNamespaceURI = true @@ -486,7 +505,7 @@ func (p *parser) parseNodeTest(n node, axeTyp string) (opnd node) { }) } case itemStar: - opnd = newAxisNode(axeTyp, "", "", "", n) + opnd = newAxisNode(axeTyp, matchType, "", "", "", n) p.next() default: panic("expression must evaluate to a node-set") @@ -579,17 +598,18 @@ type axisNode struct { nodeType Input node Prop string // node-test name.[comment|text|processing-instruction|node] - AxeType string // name of the axes.[attribute|ancestor|child|....] + AxisType string // name of the axis.[attribute|ancestor|child|....] LocalName string // local part name of node. Prefix string // prefix name of node. namespaceURI string // namespace URI of node hasNamespaceURI bool // if namespace URI is set (can be "") + typeTest NodeType } func (a *axisNode) String() string { var b bytes.Buffer - if a.AxeType != "" { - b.Write([]byte(a.AxeType + "::")) + if a.AxisType != "" { + b.Write([]byte(a.AxisType + "::")) } if a.Prefix != "" { b.Write([]byte(a.Prefix + ":")) @@ -672,6 +692,7 @@ type scanner struct { pos int curr rune + currSize int typ itemType strval string // text value at current pos numval float64 // number value at current pos @@ -681,10 +702,18 @@ type scanner struct { func (s *scanner) nextChar() bool { if s.pos >= len(s.text) { s.curr = rune(0) + s.currSize = 1 return false } - s.curr = rune(s.text[s.pos]) - s.pos++ + + r, size := rune(s.text[s.pos]), 1 + if r >= 0x80 { // handle multi-byte runes + r, size = utf8.DecodeRuneInString(s.text[s.pos:]) + } + + s.curr = r + s.currSize = size + s.pos += size return true } @@ -839,31 +868,36 @@ func (s *scanner) scanNumber() float64 { func (s *scanner) scanString() string { var ( - c = 0 end = s.curr ) s.nextChar() - i := s.pos - 1 + i := s.pos - s.currSize + c := s.currSize for s.curr != end { if !s.nextChar() { panic(errors.New("xpath: scanString got unclosed string")) } - c++ + c += s.currSize } + c -= 1 s.nextChar() return s.text[i : i+c] } func (s *scanner) scanName() string { var ( - c int - i = s.pos - 1 + c = s.currSize - 1 + i = s.pos - s.currSize ) + + // Detect current rune size + for isName(s.curr) { - c++ if !s.nextChar() { + c += s.currSize break } + c += s.currSize } return s.text[i : i+c] } diff --git a/vendor/github.com/antchfx/xpath/query.go b/vendor/github.com/antchfx/xpath/query.go index 4e6c6348b9..8c5535e182 100644 --- a/vendor/github.com/antchfx/xpath/query.go +++ b/vendor/github.com/antchfx/xpath/query.go @@ -5,8 +5,47 @@ import ( "fmt" "hash/fnv" "reflect" + "strconv" ) +// The return type of the XPath expression. +type resultType int + +var xpathResultType = struct { + Boolean resultType + // A numeric value + Number resultType + String resultType + // A node collection. + NodeSet resultType + // Any of the XPath node types. + Any resultType +}{ + Boolean: 0, + Number: 1, + String: 2, + NodeSet: 3, + Any: 4, +} + +type queryProp int + +var queryProps = struct { + None queryProp + Position queryProp + Count queryProp + Cached queryProp + Reverse queryProp + Merge queryProp +}{ + None: 0, + Position: 1, + Count: 2, + Cached: 4, + Reverse: 8, + Merge: 16, +} + type iterator interface { Current() NodeNavigator } @@ -20,12 +59,15 @@ type query interface { Evaluate(iterator) interface{} Clone() query + + // ValueType returns the value type of the current query. + ValueType() resultType + + Properties() queryProp } // nopQuery is an empty query that always return nil for any query. -type nopQuery struct { - query -} +type nopQuery struct{} func (nopQuery) Select(iterator) NodeNavigator { return nil } @@ -33,21 +75,23 @@ func (nopQuery) Evaluate(iterator) interface{} { return nil } func (nopQuery) Clone() query { return nopQuery{} } +func (nopQuery) ValueType() resultType { return xpathResultType.NodeSet } + +func (nopQuery) Properties() queryProp { + return queryProps.Merge | queryProps.Position | queryProps.Count | queryProps.Cached +} + // contextQuery is returns current node on the iterator object query. type contextQuery struct { count int - Root bool // Moving to root-level node in the current context iterator. } -func (c *contextQuery) Select(t iterator) (n NodeNavigator) { - if c.count == 0 { - c.count++ - n = t.Current().Copy() - if c.Root { - n.MoveToRoot() - } +func (c *contextQuery) Select(t iterator) NodeNavigator { + if c.count > 0 { + return nil } - return n + c.count++ + return t.Current().Copy() } func (c *contextQuery) Evaluate(iterator) interface{} { @@ -56,12 +100,54 @@ func (c *contextQuery) Evaluate(iterator) interface{} { } func (c *contextQuery) Clone() query { - return &contextQuery{Root: c.Root} + return &contextQuery{} +} + +func (c *contextQuery) ValueType() resultType { + return xpathResultType.NodeSet +} + +func (c *contextQuery) Properties() queryProp { + return queryProps.Merge | queryProps.Position | queryProps.Count | queryProps.Cached +} + +type absoluteQuery struct { + count int +} + +func (a *absoluteQuery) Select(t iterator) (n NodeNavigator) { + if a.count > 0 { + return + } + a.count++ + n = t.Current().Copy() + n.MoveToRoot() + return +} + +func (a *absoluteQuery) Evaluate(t iterator) interface{} { + a.count = 0 + return a +} + +func (a *absoluteQuery) Clone() query { + return &absoluteQuery{} +} + +func (a *absoluteQuery) ValueType() resultType { + return xpathResultType.NodeSet +} + +func (a *absoluteQuery) Properties() queryProp { + return queryProps.Merge | queryProps.Position | queryProps.Count | queryProps.Cached } // ancestorQuery is an XPath ancestor node query.(ancestor::*|ancestor-self::*) type ancestorQuery struct { + name string iterator func() NodeNavigator + table map[uint64]bool + pos int Self bool Input query @@ -69,33 +155,44 @@ type ancestorQuery struct { } func (a *ancestorQuery) Select(t iterator) NodeNavigator { + if a.table == nil { + a.table = make(map[uint64]bool) + } + for { if a.iterator == nil { node := a.Input.Select(t) if node == nil { return nil } + // Reset position for a new input context node + a.pos = 0 first := true node = node.Copy() a.iterator = func() NodeNavigator { - if first && a.Self { + if first { first = false - if a.Predicate(node) { + if a.Self && a.Predicate(node) { return node } } for node.MoveToParent() { - if !a.Predicate(node) { - continue + if a.Predicate(node) { + return node } - return node } return nil } } - if node := a.iterator(); node != nil { - return node + for node := a.iterator(); node != nil; node = a.iterator() { + node_id := getHashCode(node.Copy()) + if _, ok := a.table[node_id]; !ok { + a.table[node_id] = true + // Increase position for each matched node in current input context + a.pos++ + return node + } } a.iterator = nil } @@ -104,6 +201,8 @@ func (a *ancestorQuery) Select(t iterator) NodeNavigator { func (a *ancestorQuery) Evaluate(t iterator) interface{} { a.Input.Evaluate(t) a.iterator = nil + // Reset the table when re-evaluating to ensure clean state + a.table = nil return a } @@ -112,11 +211,27 @@ func (a *ancestorQuery) Test(n NodeNavigator) bool { } func (a *ancestorQuery) Clone() query { - return &ancestorQuery{Self: a.Self, Input: a.Input.Clone(), Predicate: a.Predicate} + return &ancestorQuery{name: a.name, Self: a.Self, Input: a.Input.Clone(), Predicate: a.Predicate} +} + +func (a *ancestorQuery) ValueType() resultType { + return xpathResultType.NodeSet +} + +func (a *ancestorQuery) Properties() queryProp { + return queryProps.Position | queryProps.Count | queryProps.Cached | queryProps.Merge | queryProps.Reverse +} + +// position returns the ordinal of the current matched node within the axis +// traversal for the current input context node. This is required so numeric +// predicates like [1] or [2] on the ancestor axis resolve in axis order. +func (a *ancestorQuery) position() int { + return a.pos } // attributeQuery is an XPath attribute node query.(@*) type attributeQuery struct { + name string iterator func() NodeNavigator Input query @@ -162,11 +277,20 @@ func (a *attributeQuery) Test(n NodeNavigator) bool { } func (a *attributeQuery) Clone() query { - return &attributeQuery{Input: a.Input.Clone(), Predicate: a.Predicate} + return &attributeQuery{name: a.name, Input: a.Input.Clone(), Predicate: a.Predicate} +} + +func (a *attributeQuery) ValueType() resultType { + return xpathResultType.NodeSet +} + +func (a *attributeQuery) Properties() queryProp { + return queryProps.Merge } // childQuery is an XPath child node query.(child::*) type childQuery struct { + name string posit int iterator func() NodeNavigator @@ -216,7 +340,15 @@ func (c *childQuery) Test(n NodeNavigator) bool { } func (c *childQuery) Clone() query { - return &childQuery{Input: c.Input.Clone(), Predicate: c.Predicate} + return &childQuery{name: c.name, Input: c.Input.Clone(), Predicate: c.Predicate} +} + +func (c *childQuery) ValueType() resultType { + return xpathResultType.NodeSet +} + +func (c *childQuery) Properties() queryProp { + return queryProps.Merge } // position returns a position of current NodeNavigator. @@ -224,8 +356,75 @@ func (c *childQuery) position() int { return c.posit } +type cachedChildQuery struct { + name string + posit int + iterator func() NodeNavigator + + Input query + Predicate func(NodeNavigator) bool +} + +func (c *cachedChildQuery) Select(t iterator) NodeNavigator { + for { + if c.iterator == nil { + c.posit = 0 + node := c.Input.Select(t) + if node == nil { + return nil + } + node = node.Copy() + first := true + c.iterator = func() NodeNavigator { + for { + if (first && !node.MoveToChild()) || (!first && !node.MoveToNext()) { + return nil + } + first = false + if c.Predicate(node) { + return node + } + } + } + } + + if node := c.iterator(); node != nil { + c.posit++ + return node + } + c.iterator = nil + } +} + +func (c *cachedChildQuery) Evaluate(t iterator) interface{} { + c.Input.Evaluate(t) + c.iterator = nil + return c +} + +func (c *cachedChildQuery) position() int { + return c.posit +} + +func (c *cachedChildQuery) Test(n NodeNavigator) bool { + return c.Predicate(n) +} + +func (c *cachedChildQuery) Clone() query { + return &childQuery{name: c.name, Input: c.Input.Clone(), Predicate: c.Predicate} +} + +func (c *cachedChildQuery) ValueType() resultType { + return xpathResultType.NodeSet +} + +func (c *cachedChildQuery) Properties() queryProp { + return queryProps.Merge +} + // descendantQuery is an XPath descendant node query.(descendant::* | descendant-or-self::*) type descendantQuery struct { + name string iterator func() NodeNavigator posit int level int @@ -245,14 +444,11 @@ func (d *descendantQuery) Select(t iterator) NodeNavigator { } node = node.Copy() d.level = 0 - positmap := make(map[int]int) first := true d.iterator = func() NodeNavigator { - if first && d.Self { + if first { first = false - if d.Predicate(node) { - d.posit = 1 - positmap[d.level] = 1 + if d.Self && d.Predicate(node) { return node } } @@ -260,7 +456,6 @@ func (d *descendantQuery) Select(t iterator) NodeNavigator { for { if node.MoveToChild() { d.level = d.level + 1 - positmap[d.level] = 0 } else { for { if d.level == 0 { @@ -274,8 +469,6 @@ func (d *descendantQuery) Select(t iterator) NodeNavigator { } } if d.Predicate(node) { - positmap[d.level]++ - d.posit = positmap[d.level] return node } } @@ -283,6 +476,7 @@ func (d *descendantQuery) Select(t iterator) NodeNavigator { } if node := d.iterator(); node != nil { + d.posit++ return node } d.iterator = nil @@ -309,7 +503,15 @@ func (d *descendantQuery) depth() int { } func (d *descendantQuery) Clone() query { - return &descendantQuery{Self: d.Self, Input: d.Input.Clone(), Predicate: d.Predicate} + return &descendantQuery{name: d.name, Self: d.Self, Input: d.Input.Clone(), Predicate: d.Predicate} +} + +func (d *descendantQuery) ValueType() resultType { + return xpathResultType.NodeSet +} + +func (d *descendantQuery) Properties() queryProp { + return queryProps.Merge } // followingQuery is an XPath following node query.(following::*|following-sibling::*) @@ -390,6 +592,14 @@ func (f *followingQuery) Clone() query { return &followingQuery{Input: f.Input.Clone(), Sibling: f.Sibling, Predicate: f.Predicate} } +func (f *followingQuery) ValueType() resultType { + return xpathResultType.NodeSet +} + +func (f *followingQuery) Properties() queryProp { + return queryProps.Merge +} + func (f *followingQuery) position() int { return f.posit } @@ -471,6 +681,14 @@ func (p *precedingQuery) Clone() query { return &precedingQuery{Input: p.Input.Clone(), Sibling: p.Sibling, Predicate: p.Predicate} } +func (p *precedingQuery) ValueType() resultType { + return xpathResultType.NodeSet +} + +func (p *precedingQuery) Properties() queryProp { + return queryProps.Merge | queryProps.Reverse +} + func (p *precedingQuery) position() int { return p.posit } @@ -503,6 +721,14 @@ func (p *parentQuery) Clone() query { return &parentQuery{Input: p.Input.Clone(), Predicate: p.Predicate} } +func (p *parentQuery) ValueType() resultType { + return xpathResultType.NodeSet +} + +func (p *parentQuery) Properties() queryProp { + return queryProps.Position | queryProps.Count | queryProps.Cached | queryProps.Merge +} + func (p *parentQuery) Test(n NodeNavigator) bool { return p.Predicate(n) } @@ -539,12 +765,22 @@ func (s *selfQuery) Clone() query { return &selfQuery{Input: s.Input.Clone(), Predicate: s.Predicate} } +func (s *selfQuery) ValueType() resultType { + return xpathResultType.NodeSet +} + +func (s *selfQuery) Properties() queryProp { + return queryProps.Merge +} + // filterQuery is an XPath query for predicate filter. type filterQuery struct { - Input query - Predicate query - posit int - positmap map[int]int + Input query + Predicate query + NoPosition bool + + posit int + positmap map[int]int } func (f *filterQuery) do(t iterator) bool { @@ -595,6 +831,8 @@ func (f *filterQuery) Select(t iterator) NodeNavigator { func (f *filterQuery) Evaluate(t iterator) interface{} { f.Input.Evaluate(t) + // Reset the position map when re-evaluating to ensure clean state + f.positmap = nil return f } @@ -602,6 +840,14 @@ func (f *filterQuery) Clone() query { return &filterQuery{Input: f.Input.Clone(), Predicate: f.Predicate.Clone()} } +func (f *filterQuery) ValueType() resultType { + return xpathResultType.NodeSet +} + +func (f *filterQuery) Properties() queryProp { + return (queryProps.Position | f.Input.Properties()) & (queryProps.Reverse | queryProps.Merge) +} + // functionQuery is an XPath function that returns a computed value for // the Evaluate call of the current NodeNavigator node. Select call isn't // applicable for functionQuery. @@ -621,9 +867,20 @@ func (f *functionQuery) Evaluate(t iterator) interface{} { } func (f *functionQuery) Clone() query { + if f.Input == nil { + return &functionQuery{Func: f.Func} + } return &functionQuery{Input: f.Input.Clone(), Func: f.Func} } +func (f *functionQuery) ValueType() resultType { + return xpathResultType.Any +} + +func (f *functionQuery) Properties() queryProp { + return queryProps.Merge +} + // transformFunctionQuery diffs from functionQuery where the latter computes a scalar // value (number,string,boolean) for the current NodeNavigator node while the former // (transformFunctionQuery) performs a mapping or transform of the current NodeNavigator @@ -652,6 +909,14 @@ func (f *transformFunctionQuery) Clone() query { return &transformFunctionQuery{Input: f.Input.Clone(), Func: f.Func} } +func (f *transformFunctionQuery) ValueType() resultType { + return xpathResultType.Any +} + +func (f *transformFunctionQuery) Properties() queryProp { + return queryProps.Merge +} + // constantQuery is an XPath constant operand. type constantQuery struct { Val interface{} @@ -669,6 +934,14 @@ func (c *constantQuery) Clone() query { return c } +func (c *constantQuery) ValueType() resultType { + return getXPathType(c.Val) +} + +func (c *constantQuery) Properties() queryProp { + return queryProps.Position | queryProps.Count | queryProps.Cached | queryProps.Merge +} + type groupQuery struct { posit int @@ -692,6 +965,14 @@ func (g *groupQuery) Clone() query { return &groupQuery{Input: g.Input.Clone()} } +func (g *groupQuery) ValueType() resultType { + return g.Input.ValueType() +} + +func (g *groupQuery) Properties() queryProp { + return queryProps.Position +} + func (g *groupQuery) position() int { return g.posit } @@ -704,15 +985,6 @@ type logicalQuery struct { } func (l *logicalQuery) Select(t iterator) NodeNavigator { - // When a XPath expr is logical expression. - node := t.Current().Copy() - val := l.Evaluate(t) - switch val.(type) { - case bool: - if val.(bool) == true { - return node - } - } return nil } @@ -726,11 +998,19 @@ func (l *logicalQuery) Clone() query { return &logicalQuery{Left: l.Left.Clone(), Right: l.Right.Clone(), Do: l.Do} } +func (l *logicalQuery) ValueType() resultType { + return xpathResultType.Boolean +} + +func (l *logicalQuery) Properties() queryProp { + return queryProps.Merge +} + // numericQuery is an XPath numeric operator expression. type numericQuery struct { Left, Right query - Do func(interface{}, interface{}) interface{} + Do func(iterator, interface{}, interface{}) interface{} } func (n *numericQuery) Select(t iterator) NodeNavigator { @@ -740,13 +1020,21 @@ func (n *numericQuery) Select(t iterator) NodeNavigator { func (n *numericQuery) Evaluate(t iterator) interface{} { m := n.Left.Evaluate(t) k := n.Right.Evaluate(t) - return n.Do(m, k) + return n.Do(t, m, k) } func (n *numericQuery) Clone() query { return &numericQuery{Left: n.Left.Clone(), Right: n.Right.Clone(), Do: n.Do} } +func (n *numericQuery) ValueType() resultType { + return xpathResultType.Number +} + +func (n *numericQuery) Properties() queryProp { + return queryProps.Merge +} + type booleanQuery struct { IsOr bool Left, Right query @@ -837,6 +1125,14 @@ func (b *booleanQuery) Clone() query { return &booleanQuery{IsOr: b.IsOr, Left: b.Left.Clone(), Right: b.Right.Clone()} } +func (b *booleanQuery) ValueType() resultType { + return xpathResultType.Boolean +} + +func (b *booleanQuery) Properties() queryProp { + return queryProps.Merge +} + type unionQuery struct { Left, Right query iterator func() NodeNavigator @@ -894,18 +1190,26 @@ func (u *unionQuery) Clone() query { return &unionQuery{Left: u.Left.Clone(), Right: u.Right.Clone()} } -type lastQuery struct { +func (u *unionQuery) ValueType() resultType { + return xpathResultType.NodeSet +} + +func (u *unionQuery) Properties() queryProp { + return queryProps.Merge +} + +type lastFuncQuery struct { buffer []NodeNavigator counted bool Input query } -func (q *lastQuery) Select(t iterator) NodeNavigator { +func (q *lastFuncQuery) Select(t iterator) NodeNavigator { return nil } -func (q *lastQuery) Evaluate(t iterator) interface{} { +func (q *lastFuncQuery) Evaluate(t iterator) interface{} { if !q.counted { for { node := q.Input.Select(t) @@ -919,27 +1223,172 @@ func (q *lastQuery) Evaluate(t iterator) interface{} { return float64(len(q.buffer)) } -func (q *lastQuery) Clone() query { - return &lastQuery{Input: q.Input.Clone()} +func (q *lastFuncQuery) Clone() query { + return &lastFuncQuery{Input: q.Input.Clone()} +} + +func (q *lastFuncQuery) ValueType() resultType { + return xpathResultType.Number +} + +func (q *lastFuncQuery) Properties() queryProp { + return queryProps.Merge +} + +type descendantOverDescendantQuery struct { + name string + level int + posit int + currentNode NodeNavigator + + Input query + MatchSelf bool + Predicate func(NodeNavigator) bool +} + +func (d *descendantOverDescendantQuery) moveToFirstChild() bool { + if d.currentNode.MoveToChild() { + d.level++ + return true + } + return false +} + +func (d *descendantOverDescendantQuery) moveUpUntilNext() bool { + for !d.currentNode.MoveToNext() { + d.level-- + if d.level == 0 { + return false + } + d.currentNode.MoveToParent() + } + return true +} + +func (d *descendantOverDescendantQuery) Select(t iterator) NodeNavigator { + for { + if d.level == 0 { + node := d.Input.Select(t) + if node == nil { + return nil + } + d.currentNode = node.Copy() + d.posit = 0 + if d.MatchSelf && d.Predicate(d.currentNode) { + d.posit = 1 + return d.currentNode + } + d.moveToFirstChild() + } else if !d.moveUpUntilNext() { + continue + } + for ok := true; ok; ok = d.moveToFirstChild() { + if d.Predicate(d.currentNode) { + d.posit++ + return d.currentNode + } + } + } +} + +func (d *descendantOverDescendantQuery) Evaluate(t iterator) interface{} { + d.Input.Evaluate(t) + return d +} + +func (d *descendantOverDescendantQuery) Clone() query { + return &descendantOverDescendantQuery{Input: d.Input.Clone(), Predicate: d.Predicate, MatchSelf: d.MatchSelf} +} + +func (d *descendantOverDescendantQuery) ValueType() resultType { + return xpathResultType.NodeSet +} + +func (d *descendantOverDescendantQuery) Properties() queryProp { + return queryProps.Merge +} + +func (d *descendantOverDescendantQuery) position() int { + return d.posit +} + +type mergeQuery struct { + Input query + Child query + + iterator func() NodeNavigator +} + +func (m *mergeQuery) Select(t iterator) NodeNavigator { + for { + if m.iterator == nil { + root := m.Input.Select(t) + if root == nil { + return nil + } + m.Child.Evaluate(t) + root = root.Copy() + t.Current().MoveTo(root) + var list []NodeNavigator + for node := m.Child.Select(t); node != nil; node = m.Child.Select(t) { + list = append(list, node.Copy()) + } + i := 0 + m.iterator = func() NodeNavigator { + if i >= len(list) { + return nil + } + result := list[i] + i++ + return result + } + } + + if node := m.iterator(); node != nil { + return node + } + m.iterator = nil + } +} + +func (m *mergeQuery) Evaluate(t iterator) interface{} { + m.Input.Evaluate(t) + return m +} + +func (m *mergeQuery) Clone() query { + return &mergeQuery{Input: m.Input.Clone(), Child: m.Child.Clone()} +} + +func (m *mergeQuery) ValueType() resultType { + return xpathResultType.NodeSet +} + +func (m *mergeQuery) Properties() queryProp { + return queryProps.Position | queryProps.Count | queryProps.Cached | queryProps.Merge } func getHashCode(n NodeNavigator) uint64 { var sb bytes.Buffer switch n.NodeType() { case AttributeNode, TextNode, CommentNode: - sb.WriteString(fmt.Sprintf("%s=%s", n.LocalName(), n.Value())) + sb.WriteString(n.LocalName()) + sb.WriteByte('=') + sb.WriteString(n.Value()) // https://github.com/antchfx/htmlquery/issues/25 d := 1 for n.MoveToPrevious() { d++ } - sb.WriteString(fmt.Sprintf("-%d", d)) + sb.WriteByte('-') + sb.WriteString(strconv.Itoa(d)) for n.MoveToParent() { d = 1 for n.MoveToPrevious() { d++ } - sb.WriteString(fmt.Sprintf("-%d", d)) + sb.WriteByte('-') + sb.WriteString(strconv.Itoa(d)) } case ElementNode: sb.WriteString(n.Prefix() + n.LocalName()) @@ -947,18 +1396,20 @@ func getHashCode(n NodeNavigator) uint64 { for n.MoveToPrevious() { d++ } - sb.WriteString(fmt.Sprintf("-%d", d)) + sb.WriteByte('-') + sb.WriteString(strconv.Itoa(d)) for n.MoveToParent() { d = 1 for n.MoveToPrevious() { d++ } - sb.WriteString(fmt.Sprintf("-%d", d)) + sb.WriteByte('-') + sb.WriteString(strconv.Itoa(d)) } } h := fnv.New64a() - h.Write([]byte(sb.String())) + h.Write(sb.Bytes()) return h.Sum64() } @@ -981,3 +1432,20 @@ func getNodeDepth(q query) int { } return 0 } + +func getXPathType(i interface{}) resultType { + v := reflect.ValueOf(i) + switch v.Kind() { + case reflect.Float64: + return xpathResultType.Number + case reflect.String: + return xpathResultType.String + case reflect.Bool: + return xpathResultType.Boolean + default: + if _, ok := i.(query); ok { + return xpathResultType.NodeSet + } + } + panic(fmt.Errorf("xpath unknown value type: %v", v.Kind())) +} diff --git a/vendor/github.com/antchfx/xpath/xpath.go b/vendor/github.com/antchfx/xpath/xpath.go index 1c0a5a2506..04bbe8d4c2 100644 --- a/vendor/github.com/antchfx/xpath/xpath.go +++ b/vendor/github.com/antchfx/xpath/xpath.go @@ -84,13 +84,13 @@ func (t *NodeIterator) Current() NodeNavigator { // MoveNext moves Navigator to the next match node. func (t *NodeIterator) MoveNext() bool { n := t.query.Select(t) - if n != nil { - if !t.node.MoveTo(n) { - t.node = n.Copy() - } - return true + if n == nil { + return false } - return false + if !t.node.MoveTo(n) { + t.node = n.Copy() + } + return true } // Select selects a node set using the specified XPath expression. diff --git a/vendor/modules.txt b/vendor/modules.txt index c96f3bd5be..415d14e33d 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -21,7 +21,7 @@ github.com/antchfx/htmlquery # github.com/antchfx/xmlquery v1.3.17 ## explicit; go 1.14 github.com/antchfx/xmlquery -# github.com/antchfx/xpath v1.2.4 +# github.com/antchfx/xpath v1.3.6 ## explicit; go 1.14 github.com/antchfx/xpath # github.com/armon/go-radix v1.0.0