Fix xpath comments text (#550)

This commit is contained in:
bnkai
2020-05-18 05:26:20 +03:00
committed by GitHub
parent 46746e6848
commit 0fc57ce1e0

View File

@@ -164,7 +164,7 @@ func (c xpathScraperAttrConfig) concatenateResults(nodes []*html.Node) string {
result := []string{} result := []string{}
for _, elem := range nodes { for _, elem := range nodes {
text := htmlquery.InnerText(elem) text := NodeText(elem)
text = commonPostProcess(text) text = commonPostProcess(text)
result = append(result, text) result = append(result, text)
@@ -220,7 +220,7 @@ func (c xpathScraperAttrConfig) applySubScraper(value string) string {
if subScraper.hasConcat() { if subScraper.hasConcat() {
result = subScraper.concatenateResults(found) result = subScraper.concatenateResults(found)
} else { } else {
result = htmlquery.InnerText(found[0]) result = NodeText(found[0])
result = commonPostProcess(result) result = commonPostProcess(result)
} }
@@ -277,7 +277,7 @@ func (s xpathScraperConfig) process(doc *html.Node, common commonXPathConfig) xP
if len(found) > 0 { if len(found) > 0 {
for i, elem := range found { for i, elem := range found {
text := htmlquery.InnerText(elem) text := NodeText(elem)
text = commonPostProcess(text) text = commonPostProcess(text)
ret = ret.setKey(i, k, text) ret = ret.setKey(i, k, text)
@@ -297,7 +297,7 @@ func (s xpathScraperConfig) process(doc *html.Node, common commonXPathConfig) xP
ret = ret.setKey(i, k, result) ret = ret.setKey(i, k, result)
} else { } else {
for i, elem := range found { for i, elem := range found {
text := htmlquery.InnerText(elem) text := NodeText(elem)
text = commonPostProcess(text) text = commonPostProcess(text)
text = attrConfig.postProcess(text) text = attrConfig.postProcess(text)
@@ -603,3 +603,10 @@ func scrapePerformerNamesXPath(c scraperTypeConfig, name string) ([]*models.Scra
return scraper.scrapePerformers(doc) return scraper.scrapePerformers(doc)
} }
func NodeText(n *html.Node) string {
if n != nil && n.Type == html.CommentNode {
return htmlquery.OutputHTML(n, true)
}
return htmlquery.InnerText(n)
}