Skip cleaning for search by name scrape queries (#2059)

* Skip pp for search by name queries * upgrade htmlquery
2025-12-17 20:34:37 +03:00 · 2021-12-16 02:18:39 +02:00
parent 439c338049
commit 66dd239732
34 changed files with 10925 additions and 10665 deletions
--- a/vendor/github.com/antchfx/htmlquery/README.md
+++ b/vendor/github.com/antchfx/htmlquery/README.md
@@ -12,6 +12,16 @@ Overview

 `htmlquery` built-in the query object caching feature based on [LRU](https://godoc.org/github.com/golang/groupcache/lru), this feature will caching the recently used XPATH query string. Enable query caching can avoid re-compile XPath expression each query. 

+You can visit this page to learn about the supported XPath(1.0/2.0) syntax. https://github.com/antchfx/xpath
+
+XPath query packages for Go
+===
+| Name                                              | Description                               |
+| ------------------------------------------------- | ----------------------------------------- |
+| [htmlquery](https://github.com/antchfx/htmlquery) | XPath query package for the HTML document |
+| [xmlquery](https://github.com/antchfx/xmlquery)   | XPath query package for the XML document  |
+| [jsonquery](https://github.com/antchfx/jsonquery) | XPath query package for the JSON document |
+
 Installation
 ====

@@ -60,15 +70,15 @@ list := htmlquery.Find(doc, "//a")
 #### Find all A elements that have `href` attribute.

 ```go
-list := range htmlquery.Find(doc, "//a[@href]")	
+list := htmlquery.Find(doc, "//a[@href]")	
 ```

 #### Find all A elements with `href` attribute and only return `href` value.

 ```go
-list := range htmlquery.Find(doc, "//a/@href")	
-for n := range list{
-	fmt.Println(htmlquery.InnerText(n)) // output @href value without A element.
+list := htmlquery.Find(doc, "//a/@href")	
+for _ , n := range list{
+	fmt.Println(htmlquery.SelectAttr(n, "href")) // output @href value
 }
 ```

@@ -78,6 +88,13 @@ for n := range list{
 a := htmlquery.FindOne(doc, "//a[3]")
 ```

+### Find children element (img) under A `href` and print the source
+```go
+a := htmlquery.FindOne(doc, "//a")
+img := htmlquery.FindOne(a, "//img")
+fmt.Prinln(htmlquery.SelectAttr(img, "src")) // output @src value
+```
+
 #### Evaluate the number of all IMG element.

 ```go
@@ -87,6 +104,30 @@ fmt.Printf("total count is %f", v)
 ```


+Quick Starts
+===
+
+```go
+func main() {
+	doc, err := htmlquery.LoadURL("https://www.bing.com/search?q=golang")
+	if err != nil {
+		panic(err)
+	}
+	// Find all news item.
+	list, err := htmlquery.QueryAll(doc, "//ol/li")
+	if err != nil {
+		panic(err)
+	}
+	for i, n := range list {
+		a := htmlquery.FindOne(n, "//a")
+		if a != nil {
+		    fmt.Printf("%d %s(%s)\n", i, htmlquery.InnerText(a), htmlquery.SelectAttr(a, "href"))
+		}
+	}
+}
+```
+
+
 FAQ
 ====

@@ -117,52 +158,6 @@ BenchmarkDisableSelectorCache-4           500000              3162 ns/op
 htmlquery.DisableSelectorCache = true
 ```

-Changelogs
-===
-
-2019-11-19 
- Add built-in query object cache feature, avoid re-compilation for the same query string. [#16](https://github.com/antchfx/htmlquery/issues/16)
- Added LoadDoc [18](https://github.com/antchfx/htmlquery/pull/18)
-
-2019-10-05 
- Add new methods that compatible with invalid XPath expression error: `QueryAll` and `Query`.
- Add `QuerySelector` and `QuerySelectorAll` methods, supported reused your query object.
-
-2019-02-04
- [#7](https://github.com/antchfx/htmlquery/issues/7) Removed deprecated `FindEach()` and `FindEachWithBreak()` methods.
-
-2018-12-28
- Avoid adding duplicate elements to list for `Find()` method. [#6](https://github.com/antchfx/htmlquery/issues/6)
-
-Tutorial
-===
-
-```go
-func main() {
-	doc, err := htmlquery.LoadURL("https://www.bing.com/search?q=golang")
-	if err != nil {
-		panic(err)
-	}
-	// Find all news item.
-	list, err := htmlquery.QueryAll(doc, "//ol/li")
-	if err != nil {
-		panic(err)
-	}
-	for i, n := range list {
-		a := htmlquery.FindOne(n, "//a")
-		fmt.Printf("%d %s(%s)\n", i, htmlquery.InnerText(a), htmlquery.SelectAttr(a, "href"))
-	}
-}
-```
-
-List of supported XPath query packages
-===
-| Name                                              | Description                               |
-| ------------------------------------------------- | ----------------------------------------- |
-| [htmlquery](https://github.com/antchfx/htmlquery) | XPath query package for the HTML document |
-| [xmlquery](https://github.com/antchfx/xmlquery)   | XPath query package for the XML document  |
-| [jsonquery](https://github.com/antchfx/jsonquery) | XPath query package for the JSON document |
-
 Questions
 ===
 Please let me know if you have any questions.
--- a/vendor/github.com/antchfx/htmlquery/query.go
+++ b/vendor/github.com/antchfx/htmlquery/query.go
@@ -55,10 +55,10 @@ func QueryAll(top *html.Node, expr string) ([]*html.Node, error) {
 	return nodes, nil
 }

-// Query searches the html.Node that matches by the specified XPath expr,
-// and return the first element of matched html.Node.
+// Query runs the given XPath expression against the given html.Node and
+// returns the first matching html.Node, or nil if no matches are found.
 //
-// Return an error if the expression `expr` cannot be parsed.
+// Returns an error if the expression `expr` cannot be parsed.
 func Query(top *html.Node, expr string) (*html.Node, error) {
 	exp, err := getQuery(expr)
 	if err != nil {
@@ -83,11 +83,6 @@ func QuerySelectorAll(top *html.Node, selector *xpath.Expr) []*html.Node {
 	for t.MoveNext() {
 		nav := t.Current().(*NodeNavigator)
 		n := getCurrentNode(nav)
-		// avoid adding duplicate nodes.
-		if len(elems) > 0 && (elems[0] == n || (nav.NodeType() == xpath.AttributeNode &&
-			nav.LocalName() == elems[0].Data && nav.Value() == InnerText(elems[0]))) {
-			continue
-		}
 		elems = append(elems, n)
 	}
 	return elems
@@ -179,6 +174,19 @@ func SelectAttr(n *html.Node, name string) (val string) {
 	return
 }

+// ExistsAttr returns whether attribute with specified name exists.
+func ExistsAttr(n *html.Node, name string) bool {
+	if n == nil {
+		return false
+	}
+	for _, attr := range n.Attr {
+		if attr.Key == name {
+			return true
+		}
+	}
+	return false
+}
+
 // OutputHTML returns the text including tags name.
 func OutputHTML(n *html.Node, self bool) string {
 	var buf bytes.Buffer
--- a/vendor/github.com/antchfx/xpath/README.md
+++ b/vendor/github.com/antchfx/xpath/README.md
@@ -138,6 +138,7 @@ Supported Features
 `lang()`| ✗ |
 `last()`| ✓ |
 `local-name()`| ✓ |
+`matches()`| ✓ |
 `name()`| ✓ |
 `namespace-uri()`| ✓ |
 `normalize-space()`| ✓ |
--- a/vendor/github.com/antchfx/xpath/build.go
+++ b/vendor/github.com/antchfx/xpath/build.go
@@ -193,8 +193,23 @@ func (b *builder) processFunctionNode(root *functionNode) (query, error) {
 		if err != nil {
 			return nil, err
 		}
-
 		qyOutput = &functionQuery{Input: b.firstInput, Func: containsFunc(arg1, arg2)}
+	case "matches":
+		//matches(string , pattern)
+		if len(root.Args) != 2 {
+			return nil, errors.New("xpath: matches function must have two parameters")
+		}
+		var (
+			arg1, arg2 query
+			err        error
+		)
+		if arg1, err = b.processNode(root.Args[0]); err != nil {
+			return nil, err
+		}
+		if arg2, err = b.processNode(root.Args[1]); err != nil {
+			return nil, err
+		}
+		qyOutput = &functionQuery{Input: b.firstInput, Func: matchesFunc(arg1, arg2)}
 	case "substring":
 		//substring( string , start [, length] )
 		if len(root.Args) < 2 {
@@ -435,13 +450,15 @@ func (b *builder) processOperatorNode(root *operatorNode) (query, error) {
 	}
 	var qyOutput query
 	switch root.Op {
-	case "+", "-", "div", "mod": // Numeric operator
+	case "+", "-", "*", "div", "mod": // Numeric operator
 		var exprFunc func(interface{}, interface{}) interface{}
 		switch root.Op {
 		case "+":
 			exprFunc = plusFunc
 		case "-":
 			exprFunc = minusFunc
+		case "*":
+			exprFunc = mulFunc
 		case "div":
 			exprFunc = divFunc
 		case "mod":
@@ -498,6 +515,12 @@ func (b *builder) processNode(root node) (q query, err error) {
 		q, err = b.processFunctionNode(root.(*functionNode))
 	case nodeOperator:
 		q, err = b.processOperatorNode(root.(*operatorNode))
+	case nodeGroup:
+		q, err = b.processNode(root.(*groupNode).Input)
+		if err != nil {
+			return
+		}
+		q = &groupQuery{Input: q}
 	}
 	return
 }
--- a/vendor/github.com/antchfx/xpath/cache.go
+++ b/vendor/github.com/antchfx/xpath/cache.go
@@ -0,0 +1,80 @@
+package xpath
+
+import (
+	"regexp"
+	"sync"
+)
+
+type loadFunc func(key interface{}) (interface{}, error)
+
+const (
+	defaultCap = 65536
+)
+
+// The reason we're building a simple capacity-resetting loading cache (when capacity reached) instead of using
+// something like github.com/hashicorp/golang-lru is primarily due to (not wanting to create) external dependency.
+// Currently this library has 0 external dep (other than go sdk), and supports go 1.6, 1.9, and 1.10 (and later).
+// Creating external lib dependencies (plus their transitive dependencies) would make things hard if not impossible.
+// We expect under most circumstances, the defaultCap is big enough for any long running services that use this
+// library if their xpath regexp cardinality is low. However, in extreme cases when the capacity is reached, we
+// simply reset the cache, taking a small subsequent perf hit (next to nothing considering amortization) in trade
+// of more complex and less performant LRU type of construct.
+type loadingCache struct {
+	sync.RWMutex
+	cap   int
+	load  loadFunc
+	m     map[interface{}]interface{}
+	reset int
+}
+
+// NewLoadingCache creates a new instance of a loading cache with capacity. Capacity must be >= 0, or
+// it will panic. Capacity == 0 means the cache growth is unbounded.
+func NewLoadingCache(load loadFunc, capacity int) *loadingCache {
+	if capacity < 0 {
+		panic("capacity must be >= 0")
+	}
+	return &loadingCache{cap: capacity, load: load, m: make(map[interface{}]interface{})}
+}
+
+func (c *loadingCache) get(key interface{}) (interface{}, error) {
+	c.RLock()
+	v, found := c.m[key]
+	c.RUnlock()
+	if found {
+		return v, nil
+	}
+	v, err := c.load(key)
+	if err != nil {
+		return nil, err
+	}
+	c.Lock()
+	if c.cap > 0 && len(c.m) >= c.cap {
+		c.m = map[interface{}]interface{}{key: v}
+		c.reset++
+	} else {
+		c.m[key] = v
+	}
+	c.Unlock()
+	return v, nil
+}
+
+var (
+	// RegexpCache is a loading cache for string -> *regexp.Regexp mapping. It is exported so that in rare cases
+	// client can customize load func and/or capacity.
+	RegexpCache = defaultRegexpCache()
+)
+
+func defaultRegexpCache() *loadingCache {
+	return NewLoadingCache(
+		func(key interface{}) (interface{}, error) {
+			return regexp.Compile(key.(string))
+		}, defaultCap)
+}
+
+func getRegexp(pattern string) (*regexp.Regexp, error) {
+	exp, err := RegexpCache.get(pattern)
+	if err != nil {
+		return nil, err
+	}
+	return exp.(*regexp.Regexp), nil
+}
--- a/vendor/github.com/antchfx/xpath/func.go
+++ b/vendor/github.com/antchfx/xpath/func.go
@@ -4,11 +4,26 @@ import (
 	"errors"
 	"fmt"
 	"math"
-	"regexp"
 	"strconv"
 	"strings"
+	"sync"
+	"unicode"
 )

+// Defined an interface of stringBuilder that compatible with
+// strings.Builder(go 1.10) and bytes.Buffer(< go 1.10)
+type stringBuilder interface {
+	WriteRune(r rune) (n int, err error)
+	WriteString(s string) (int, error)
+	Reset()
+	Grow(n int)
+	String() string
+}
+
+var builderPool = sync.Pool{New: func() interface{} {
+	return newStringBuilder()
+}}
+
 // The XPath function list.

 func predicate(q query) func(NodeNavigator) bool {
@@ -25,7 +40,7 @@ func predicate(q query) func(NodeNavigator) bool {
 func positionFunc(q query, t iterator) interface{} {
 	var (
 		count = 1
-		node  = t.Current()
+		node  = t.Current().Copy()
 	)
 	test := predicate(q)
 	for node.MoveToPrevious() {
@@ -40,7 +55,7 @@ func positionFunc(q query, t iterator) interface{} {
 func lastFunc(q query, t iterator) interface{} {
 	var (
 		count = 0
-		node  = t.Current()
+		node  = t.Current().Copy()
 	)
 	node.MoveToFirst()
 	test := predicate(q)
@@ -58,6 +73,7 @@ func lastFunc(q query, t iterator) interface{} {
 // countFunc is a XPath Node Set functions count(node-set).
 func countFunc(q query, t iterator) interface{} {
 	var count = 0
+	q = functionArgs(q)
 	test := predicate(q)
 	switch typ := q.Evaluate(t).(type) {
 	case query:
@@ -73,7 +89,7 @@ func countFunc(q query, t iterator) interface{} {
 // sumFunc is a XPath Node Set functions sum(node-set).
 func sumFunc(q query, t iterator) interface{} {
 	var sum float64
-	switch typ := q.Evaluate(t).(type) {
+	switch typ := functionArgs(q).Evaluate(t).(type) {
 	case query:
 		for node := typ.Select(t); node != nil; node = typ.Select(t) {
 			if v, err := strconv.ParseFloat(node.Value(), 64); err == nil {
@@ -116,19 +132,19 @@ func asNumber(t iterator, o interface{}) float64 {

 // ceilingFunc is a XPath Node Set functions ceiling(node-set).
 func ceilingFunc(q query, t iterator) interface{} {
-	val := asNumber(t, q.Evaluate(t))
+	val := asNumber(t, functionArgs(q).Evaluate(t))
 	return math.Ceil(val)
 }

 // floorFunc is a XPath Node Set functions floor(node-set).
 func floorFunc(q query, t iterator) interface{} {
-	val := asNumber(t, q.Evaluate(t))
+	val := asNumber(t, functionArgs(q).Evaluate(t))
 	return math.Floor(val)
 }

 // roundFunc is a XPath Node Set functions round(node-set).
 func roundFunc(q query, t iterator) interface{} {
-	val := asNumber(t, q.Evaluate(t))
+	val := asNumber(t, functionArgs(q).Evaluate(t))
 	//return math.Round(val)
 	return round(val)
 }
@@ -140,7 +156,7 @@ func nameFunc(arg query) func(query, iterator) interface{} {
 		if arg == nil {
 			v = t.Current()
 		} else {
-			v = arg.Select(t)
+			v = arg.Clone().Select(t)
 			if v == nil {
 				return ""
 			}
@@ -160,7 +176,7 @@ func localNameFunc(arg query) func(query, iterator) interface{} {
 		if arg == nil {
 			v = t.Current()
 		} else {
-			v = arg.Select(t)
+			v = arg.Clone().Select(t)
 			if v == nil {
 				return ""
 			}
@@ -177,7 +193,7 @@ func namespaceFunc(arg query) func(query, iterator) interface{} {
 			v = t.Current()
 		} else {
 			// Get the first node in the node-set if specified.
-			v = arg.Select(t)
+			v = arg.Clone().Select(t)
 			if v == nil {
 				return ""
 			}
@@ -201,7 +217,7 @@ func asBool(t iterator, v interface{}) bool {
 	case *NodeIterator:
 		return v.MoveNext()
 	case bool:
-		return bool(v)
+		return v
 	case float64:
 		return v != 0
 	case string:
@@ -239,19 +255,19 @@ func asString(t iterator, v interface{}) string {

 // booleanFunc is a XPath functions boolean([node-set]).
 func booleanFunc(q query, t iterator) interface{} {
-	v := q.Evaluate(t)
+	v := functionArgs(q).Evaluate(t)
 	return asBool(t, v)
 }

 // numberFunc is a XPath functions number([node-set]).
 func numberFunc(q query, t iterator) interface{} {
-	v := q.Evaluate(t)
+	v := functionArgs(q).Evaluate(t)
 	return asNumber(t, v)
 }

 // stringFunc is a XPath functions string([node-set]).
 func stringFunc(q query, t iterator) interface{} {
-	v := q.Evaluate(t)
+	v := functionArgs(q).Evaluate(t)
 	return asString(t, v)
 }

@@ -338,15 +354,39 @@ func containsFunc(arg1, arg2 query) func(query, iterator) interface{} {
 	}
 }

-var (
-	regnewline  = regexp.MustCompile(`[\r\n\t]`)
-	regseqspace = regexp.MustCompile(`\s{2,}`)
-)
+// matchesFunc is an XPath function that tests a given string against a regexp pattern.
+// Note: does not support https://www.w3.org/TR/xpath-functions-31/#func-matches 3rd optional `flags` argument; if
+// needed, directly put flags in the regexp pattern, such as `(?i)^pattern$` for `i` flag.
+func matchesFunc(arg1, arg2 query) func(query, iterator) interface{} {
+	return func(q query, t iterator) interface{} {
+		var s string
+		switch typ := functionArgs(arg1).Evaluate(t).(type) {
+		case string:
+			s = typ
+		case query:
+			node := typ.Select(t)
+			if node == nil {
+				return ""
+			}
+			s = node.Value()
+		}
+		var pattern string
+		var ok bool
+		if pattern, ok = functionArgs(arg2).Evaluate(t).(string); !ok {
+			panic(errors.New("matches() function second argument type must be string"))
+		}
+		re, err := getRegexp(pattern)
+		if err != nil {
+			panic(fmt.Errorf("matches() function second argument is not a valid regexp pattern, err: %s", err.Error()))
+		}
+		return re.MatchString(s)
+	}
+}

 // normalizespaceFunc is XPath functions normalize-space(string?)
 func normalizespaceFunc(q query, t iterator) interface{} {
 	var m string
-	switch typ := q.Evaluate(t).(type) {
+	switch typ := functionArgs(q).Evaluate(t).(type) {
 	case string:
 		m = typ
 	case query:
@@ -356,10 +396,26 @@ func normalizespaceFunc(q query, t iterator) interface{} {
 		}
 		m = node.Value()
 	}
-	m = strings.TrimSpace(m)
-	m = regnewline.ReplaceAllString(m, " ")
-	m = regseqspace.ReplaceAllString(m, " ")
-	return m
+	var b = builderPool.Get().(stringBuilder)
+	b.Grow(len(m))
+
+	runeStr := []rune(strings.TrimSpace(m))
+	l := len(runeStr)
+	for i := range runeStr {
+		r := runeStr[i]
+		isSpace := unicode.IsSpace(r)
+		if !(isSpace && (i+1 < l && unicode.IsSpace(runeStr[i+1]))) {
+			if isSpace {
+				r = ' '
+			}
+			b.WriteRune(r)
+		}
+	}
+	result := b.String()
+	b.Reset()
+	builderPool.Put(b)
+
+	return result
 }

 // substringFunc is XPath functions substring function returns a part of a given string.
@@ -466,7 +522,7 @@ func translateFunc(arg1, arg2, arg3 query) func(query, iterator) interface{} {
 		src := asString(t, functionArgs(arg2).Evaluate(t))
 		dst := asString(t, functionArgs(arg3).Evaluate(t))

-		var replace []string
+		replace := make([]string, 0, len(src))
 		for i, s := range src {
 			d := ""
 			if i < len(dst) {
@@ -491,7 +547,7 @@ func replaceFunc(arg1, arg2, arg3 query) func(query, iterator) interface{} {

 // notFunc is XPATH functions not(expression) function operation.
 func notFunc(q query, t iterator) interface{} {
-	switch v := q.Evaluate(t).(type) {
+	switch v := functionArgs(q).Evaluate(t).(type) {
 	case bool:
 		return !v
 	case query:
@@ -507,20 +563,25 @@ func notFunc(q query, t iterator) interface{} {
 // concat( string1 , string2 [, stringn]* )
 func concatFunc(args ...query) func(query, iterator) interface{} {
 	return func(q query, t iterator) interface{} {
-		var a []string
+		b := builderPool.Get().(stringBuilder)
 		for _, v := range args {
 			v = functionArgs(v)
+
 			switch v := v.Evaluate(t).(type) {
 			case string:
-				a = append(a, v)
+				b.WriteString(v)
 			case query:
 				node := v.Select(t)
 				if node != nil {
-					a = append(a, node.Value())
+					b.WriteString(node.Value())
 				}
 			}
 		}
-		return strings.Join(a, "")
+		result := b.String()
+		b.Reset()
+		builderPool.Put(b)
+
+		return result
 	}
 }

--- a/vendor/github.com/antchfx/xpath/func_go110.go
+++ b/vendor/github.com/antchfx/xpath/func_go110.go
@@ -2,8 +2,15 @@

 package xpath

-import "math"
+import (
+	"math"
+	"strings"
+)

 func round(f float64) int {
 	return int(math.Round(f))
 }
+
+func newStringBuilder() stringBuilder{ 
+	return &strings.Builder{}
+}
--- a/vendor/github.com/antchfx/xpath/func_pre_go110.go
+++ b/vendor/github.com/antchfx/xpath/func_pre_go110.go
@@ -2,7 +2,10 @@

 package xpath

-import "math"
+import (
+	"bytes"
+	"math"
+)

 // math.Round() is supported by Go 1.10+,
 // This method just compatible for version <1.10.
@@ -13,3 +16,7 @@ func round(f float64) int {
 	}
 	return int(f + math.Copysign(0.5, f))
 }
+
+func newStringBuilder() stringBuilder {
+	return &bytes.Buffer{}
+}
--- a/vendor/github.com/antchfx/xpath/operator.go
+++ b/vendor/github.com/antchfx/xpath/operator.go
@@ -173,7 +173,7 @@ func cmpNodeSetNodeSet(t iterator, op string, m, n interface{}) bool {
 	if y == nil {
 		return false
 	}
-	return cmpStringStringF(op,x.Value(),y.Value())
+	return cmpStringStringF(op, x.Value(), y.Value())
 }

 func cmpStringNumeric(t iterator, op string, m, n interface{}) bool {
--- a/vendor/github.com/antchfx/xpath/parse.go
+++ b/vendor/github.com/antchfx/xpath/parse.go
@@ -65,6 +65,7 @@ const (
 	nodeOperator
 	nodeVariable
 	nodeConstantOperand
+	nodeGroup
 )

 type parser struct {
@@ -104,6 +105,10 @@ func newFilterNode(n, m node) node {
 	return &filterNode{nodeType: nodeFilter, Input: n, Condition: m}
 }

+func newGroupNode(n node) node {
+	return &groupNode{nodeType: nodeGroup, Input: n}
+}
+
 // newRootNode returns a root node.
 func newRootNode(s string) node {
 	return &rootNode{nodeType: nodeRoot, slash: s}
@@ -492,6 +497,9 @@ func (p *parser) parsePrimaryExpr(n node) (opnd node) {
 	case itemLParens:
 		p.next()
 		opnd = p.parseExpression(n)
+		if opnd.Type() != nodeConstantOperand {
+			opnd = newGroupNode(opnd)
+		}
 		p.skipItem(itemRParens)
 	case itemName:
 		if p.r.canBeFunc && !isNodeType(p.r) {
@@ -587,6 +595,16 @@ func (o *operandNode) String() string {
 	return fmt.Sprintf("%v", o.Val)
 }

+// groupNode holds a set of node expression
+type groupNode struct {
+	nodeType
+	Input node
+}
+
+func (g *groupNode) String() string {
+	return fmt.Sprintf("%s", g.Input)
+}
+
 // filterNode holds a condition filter.
 type filterNode struct {
 	nodeType
--- a/vendor/github.com/antchfx/xpath/query.go
+++ b/vendor/github.com/antchfx/xpath/query.go
@@ -76,6 +76,7 @@ func (a *ancestorQuery) Select(t iterator) NodeNavigator {
 				return nil
 			}
 			first := true
+			node = node.Copy()
 			a.iterator = func() NodeNavigator {
 				if first && a.Self {
 					first = false
@@ -668,6 +669,35 @@ func (c *constantQuery) Clone() query {
 	return c
 }

+type groupQuery struct {
+	posit int
+
+	Input query
+}
+
+func (g *groupQuery) Select(t iterator) NodeNavigator {
+	for {
+		node := g.Input.Select(t)
+		if node == nil {
+			return nil
+		}
+		g.posit++
+		return node.Copy()
+	}
+}
+
+func (g *groupQuery) Evaluate(t iterator) interface{} {
+	return g.Input.Evaluate(t)
+}
+
+func (g *groupQuery) Clone() query {
+	return &groupQuery{Input: g.Input}
+}
+
+func (g *groupQuery) position() int {
+	return g.posit
+}
+
 // logicalQuery is an XPath logical expression.
 type logicalQuery struct {
 	Left, Right query