Feature: Support inputURL and inputHostname in scrapers (#6250)

This commit is contained in:
Gykes
2025-11-09 20:00:47 -08:00
committed by GitHub
parent f434c1f529
commit 678b3de7c8
4 changed files with 102 additions and 18 deletions

View File

@@ -5,6 +5,7 @@ import (
"errors"
"fmt"
"math"
"net/url"
"reflect"
"regexp"
"strconv"
@@ -24,6 +25,7 @@ type mappedQuery interface {
getType() QueryType
setType(QueryType)
subScrape(ctx context.Context, value string) mappedQuery
getURL() string
}
type commonMappedConfig map[string]string
@@ -43,6 +45,22 @@ func (s mappedConfig) applyCommon(c commonMappedConfig, src string) string {
return ret
}
// extractHostname parses a URL string and returns the hostname.
// Returns empty string if the URL cannot be parsed.
func extractHostname(urlStr string) string {
if urlStr == "" {
return ""
}
u, err := url.Parse(urlStr)
if err != nil {
logger.Warnf("Error parsing URL '%s': %s", urlStr, err.Error())
return ""
}
return u.Hostname()
}
type isMultiFunc func(key string) bool
func (s mappedConfig) process(ctx context.Context, q mappedQuery, common commonMappedConfig, isMulti isMultiFunc) mappedResults {
@@ -53,10 +71,16 @@ func (s mappedConfig) process(ctx context.Context, q mappedQuery, common commonM
if attrConfig.Fixed != "" {
// TODO - not sure if this needs to set _all_ indexes for the key
const i = 0
ret = ret.setSingleValue(i, k, attrConfig.Fixed)
// Support {inputURL} and {inputHostname} placeholders in fixed values
value := strings.ReplaceAll(attrConfig.Fixed, "{inputURL}", q.getURL())
value = strings.ReplaceAll(value, "{inputHostname}", extractHostname(q.getURL()))
ret = ret.setSingleValue(i, k, value)
} else {
selector := attrConfig.Selector
selector = s.applyCommon(common, selector)
// Support {inputURL} and {inputHostname} placeholders in selectors
selector = strings.ReplaceAll(selector, "{inputURL}", q.getURL())
selector = strings.ReplaceAll(selector, "{inputHostname}", extractHostname(q.getURL()))
found, err := q.runQuery(selector)
if err != nil {