mirror of
https://github.com/stashapp/stash.git
synced 2025-12-17 04:14:39 +03:00
Xpath scraping from URL (#285)
* Add xpath performer and scene scraping * Add studio scraping * Refactor code * Fix compile error * Don't overwrite performer URL during a scrape
This commit is contained in:
3
go.mod
3
go.mod
@@ -3,6 +3,8 @@ module github.com/stashapp/stash
|
||||
require (
|
||||
github.com/99designs/gqlgen v0.9.0
|
||||
github.com/PuerkitoBio/goquery v1.5.0
|
||||
github.com/antchfx/htmlquery v1.2.0
|
||||
github.com/antchfx/xpath v1.1.2 // indirect
|
||||
github.com/bmatcuk/doublestar v1.1.5
|
||||
github.com/disintegration/imaging v1.6.0
|
||||
github.com/go-chi/chi v4.0.2+incompatible
|
||||
@@ -21,6 +23,7 @@ require (
|
||||
github.com/vektah/gqlparser v1.1.2
|
||||
golang.org/x/crypto v0.0.0-20190701094942-4def268fd1a4
|
||||
golang.org/x/image v0.0.0-20190118043309-183bebdce1b2 // indirect
|
||||
golang.org/x/net v0.0.0-20190522155817-f3200d17e092
|
||||
gopkg.in/yaml.v2 v2.2.2
|
||||
)
|
||||
|
||||
|
||||
5
go.sum
5
go.sum
@@ -30,6 +30,10 @@ github.com/andreyvit/diff v0.0.0-20170406064948-c7f18ee00883/go.mod h1:rCTlJbsFo
|
||||
github.com/andybalholm/cascadia v1.0.0 h1:hOCXnnZ5A+3eVDX8pvgl4kofXv2ELss0bKcqRySc45o=
|
||||
github.com/andybalholm/cascadia v1.0.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
|
||||
github.com/anmitsu/go-shlex v0.0.0-20161002113705-648efa622239/go.mod h1:2FmKhYUyUczH0OGQWaF5ceTx0UBShxjsH6f8oGKYe2c=
|
||||
github.com/antchfx/htmlquery v1.2.0 h1:oKShnsGlnOHX6t4uj5OHgLKkABcJoqnXpqnscoi9Lpw=
|
||||
github.com/antchfx/htmlquery v1.2.0/go.mod h1:MS9yksVSQXls00iXkiMqXr0J+umL/AmxXKuP28SUJM8=
|
||||
github.com/antchfx/xpath v1.1.2 h1:YziPrtM0gEJBnhdUGxYcIVYXZ8FXbtbovxOi+UW/yWQ=
|
||||
github.com/antchfx/xpath v1.1.2/go.mod h1:Yee4kTMuNiPYJ7nSNorELQMr1J33uOpXDMByNYhvtNk=
|
||||
github.com/apache/thrift v0.0.0-20180902110319-2566ecd5d999/go.mod h1:cp2SuWMxlEZw2r+iP2GNCdIi4C1qmUzdZFSVb+bacwQ=
|
||||
github.com/apache/thrift v0.12.0/go.mod h1:cp2SuWMxlEZw2r+iP2GNCdIi4C1qmUzdZFSVb+bacwQ=
|
||||
github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8=
|
||||
@@ -318,6 +322,7 @@ github.com/gogo/protobuf v1.2.1/go.mod h1:hp+jE20tsWTFYpLwKvXlhS1hjn+gTNwPg2I6zV
|
||||
github.com/golang-migrate/migrate/v4 v4.3.1 h1:3eR1NY+pplX+m6yJ1fQf5dFWX3fBgUtZfDiaS/kJVu4=
|
||||
github.com/golang-migrate/migrate/v4 v4.3.1/go.mod h1:mJ89KBgbXmM3P49BqOxRL3riNF/ATlg5kMhm17GA0dE=
|
||||
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
|
||||
github.com/golang/groupcache v0.0.0-20190129154638-5b532d6fd5ef h1:veQD95Isof8w9/WXiA+pa3tz3fJXkt5B7QaRBrM62gk=
|
||||
github.com/golang/groupcache v0.0.0-20190129154638-5b532d6fd5ef/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
|
||||
github.com/golang/lint v0.0.0-20180702182130-06c8688daad7/go.mod h1:tluoj9z5200jBnyusfRPU2LqT6J+DAorxEvtC7LHB+E=
|
||||
github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
|
||||
|
||||
@@ -19,16 +19,18 @@ type scraperAction string
|
||||
const (
|
||||
scraperActionScript scraperAction = "script"
|
||||
scraperActionStash scraperAction = "stash"
|
||||
scraperActionXPath scraperAction = "scrapeXPath"
|
||||
)
|
||||
|
||||
var allScraperAction = []scraperAction{
|
||||
scraperActionScript,
|
||||
scraperActionStash,
|
||||
scraperActionXPath,
|
||||
}
|
||||
|
||||
func (e scraperAction) IsValid() bool {
|
||||
switch e {
|
||||
case scraperActionScript:
|
||||
case scraperActionScript, scraperActionStash, scraperActionXPath:
|
||||
return true
|
||||
}
|
||||
return false
|
||||
@@ -37,6 +39,7 @@ func (e scraperAction) IsValid() bool {
|
||||
type scraperTypeConfig struct {
|
||||
Action scraperAction `yaml:"action"`
|
||||
Script []string `yaml:"script,flow"`
|
||||
Scraper string `yaml:"scraper"`
|
||||
|
||||
scraperConfig *scraperConfig
|
||||
}
|
||||
@@ -96,6 +99,8 @@ type scrapePerformerByURLConfig struct {
|
||||
func (c *scrapePerformerByURLConfig) resolveFn() {
|
||||
if c.Action == scraperActionScript {
|
||||
c.performScrape = scrapePerformerURLScript
|
||||
} else if c.Action == scraperActionXPath {
|
||||
c.performScrape = scrapePerformerURLXpath
|
||||
}
|
||||
}
|
||||
|
||||
@@ -124,6 +129,8 @@ type scrapeSceneByURLConfig struct {
|
||||
func (c *scrapeSceneByURLConfig) resolveFn() {
|
||||
if c.Action == scraperActionScript {
|
||||
c.performScrape = scrapeSceneURLScript
|
||||
} else if c.Action == scraperActionXPath {
|
||||
c.performScrape = scrapeSceneURLXPath
|
||||
}
|
||||
}
|
||||
|
||||
@@ -135,7 +142,9 @@ type scraperConfig struct {
|
||||
PerformerByURL []*scrapePerformerByURLConfig `yaml:"performerByURL"`
|
||||
SceneByFragment *sceneByFragmentConfig `yaml:"sceneByFragment"`
|
||||
SceneByURL []*scrapeSceneByURLConfig `yaml:"sceneByURL"`
|
||||
|
||||
StashServer *stashServer `yaml:"stashServer"`
|
||||
XPathScrapers xpathScrapers `yaml:"xPathScrapers"`
|
||||
}
|
||||
|
||||
func loadScraperFromYAML(path string) (*scraperConfig, error) {
|
||||
|
||||
267
pkg/scraper/xpath.go
Normal file
267
pkg/scraper/xpath.go
Normal file
@@ -0,0 +1,267 @@
|
||||
package scraper
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"reflect"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/antchfx/htmlquery"
|
||||
"golang.org/x/net/html"
|
||||
|
||||
"github.com/stashapp/stash/pkg/logger"
|
||||
"github.com/stashapp/stash/pkg/models"
|
||||
)
|
||||
|
||||
type commonXPathConfig map[string]string
|
||||
|
||||
func (c commonXPathConfig) applyCommon(src string) string {
|
||||
ret := src
|
||||
for commonKey, commonVal := range c {
|
||||
if strings.Contains(ret, commonKey) {
|
||||
ret = strings.Replace(ret, commonKey, commonVal, -1)
|
||||
}
|
||||
}
|
||||
|
||||
return ret
|
||||
}
|
||||
|
||||
type xpathScraperConfig map[string]interface{}
|
||||
|
||||
func createXPathScraperConfig(src map[interface{}]interface{}) xpathScraperConfig {
|
||||
ret := make(xpathScraperConfig)
|
||||
|
||||
if src != nil {
|
||||
for k, v := range src {
|
||||
keyStr, isStr := k.(string)
|
||||
if isStr {
|
||||
ret[keyStr] = v
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return ret
|
||||
}
|
||||
|
||||
func (s xpathScraperConfig) process(doc *html.Node, common commonXPathConfig) []xPathResult {
|
||||
var ret []xPathResult
|
||||
|
||||
for k, v := range s {
|
||||
asStr, isStr := v.(string)
|
||||
|
||||
if isStr {
|
||||
// apply common
|
||||
if common != nil {
|
||||
asStr = common.applyCommon(asStr)
|
||||
}
|
||||
|
||||
found := htmlquery.Find(doc, asStr)
|
||||
if len(found) > 0 {
|
||||
for i, elem := range found {
|
||||
if i >= len(ret) {
|
||||
ret = append(ret, make(xPathResult))
|
||||
}
|
||||
|
||||
ret[i][k] = elem
|
||||
}
|
||||
}
|
||||
}
|
||||
// TODO - handle map type
|
||||
}
|
||||
|
||||
return ret
|
||||
}
|
||||
|
||||
type xpathScrapers map[string]*xpathScraper
|
||||
|
||||
type xpathScraper struct {
|
||||
Common commonXPathConfig `yaml:"common"`
|
||||
Scene xpathScraperConfig `yaml:"scene"`
|
||||
Performer xpathScraperConfig `yaml:"performer"`
|
||||
}
|
||||
|
||||
const (
|
||||
XPathScraperConfigSceneTags = "Tags"
|
||||
XPathScraperConfigScenePerformers = "Performers"
|
||||
XPathScraperConfigSceneStudio = "Studio"
|
||||
)
|
||||
|
||||
func (s xpathScraper) GetSceneSimple() xpathScraperConfig {
|
||||
// exclude the complex sub-configs
|
||||
ret := make(xpathScraperConfig)
|
||||
mapped := s.Scene
|
||||
|
||||
if mapped != nil {
|
||||
for k, v := range mapped {
|
||||
if k != XPathScraperConfigSceneTags && k != XPathScraperConfigScenePerformers && k != XPathScraperConfigSceneStudio {
|
||||
ret[k] = v
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return ret
|
||||
}
|
||||
|
||||
func (s xpathScraper) getSceneSubMap(key string) xpathScraperConfig {
|
||||
var ret map[interface{}]interface{}
|
||||
mapped := s.Scene
|
||||
|
||||
if mapped != nil {
|
||||
v, ok := mapped[key]
|
||||
if ok {
|
||||
ret, _ = v.(map[interface{}]interface{})
|
||||
}
|
||||
}
|
||||
|
||||
if ret != nil {
|
||||
return createXPathScraperConfig(ret)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s xpathScraper) GetScenePerformers() xpathScraperConfig {
|
||||
return s.getSceneSubMap(XPathScraperConfigScenePerformers)
|
||||
}
|
||||
|
||||
func (s xpathScraper) GetSceneTags() xpathScraperConfig {
|
||||
return s.getSceneSubMap(XPathScraperConfigSceneTags)
|
||||
}
|
||||
|
||||
func (s xpathScraper) GetSceneStudio() xpathScraperConfig {
|
||||
return s.getSceneSubMap(XPathScraperConfigSceneStudio)
|
||||
}
|
||||
|
||||
func (s xpathScraper) scrapePerformer(doc *html.Node) (*models.ScrapedPerformer, error) {
|
||||
var ret models.ScrapedPerformer
|
||||
|
||||
performerMap := s.Performer
|
||||
if performerMap == nil {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
results := performerMap.process(doc, s.Common)
|
||||
if len(results) > 0 {
|
||||
results[0].apply(&ret)
|
||||
}
|
||||
|
||||
return &ret, nil
|
||||
}
|
||||
|
||||
func (s xpathScraper) scrapeScene(doc *html.Node) (*models.ScrapedScene, error) {
|
||||
var ret models.ScrapedScene
|
||||
|
||||
sceneMap := s.GetSceneSimple()
|
||||
if sceneMap == nil {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
scenePerformersMap := s.GetScenePerformers()
|
||||
sceneTagsMap := s.GetSceneTags()
|
||||
sceneStudioMap := s.GetSceneStudio()
|
||||
|
||||
results := sceneMap.process(doc, s.Common)
|
||||
if len(results) > 0 {
|
||||
results[0].apply(&ret)
|
||||
|
||||
// now apply the performers and tags
|
||||
if scenePerformersMap != nil {
|
||||
performerResults := scenePerformersMap.process(doc, s.Common)
|
||||
|
||||
for _, p := range performerResults {
|
||||
performer := &models.ScrapedScenePerformer{}
|
||||
p.apply(performer)
|
||||
ret.Performers = append(ret.Performers, performer)
|
||||
}
|
||||
}
|
||||
|
||||
if sceneTagsMap != nil {
|
||||
tagResults := sceneTagsMap.process(doc, s.Common)
|
||||
|
||||
for _, p := range tagResults {
|
||||
tag := &models.ScrapedSceneTag{}
|
||||
p.apply(tag)
|
||||
ret.Tags = append(ret.Tags, tag)
|
||||
}
|
||||
}
|
||||
|
||||
if sceneStudioMap != nil {
|
||||
studioResults := sceneStudioMap.process(doc, s.Common)
|
||||
|
||||
if len(studioResults) > 0 {
|
||||
studio := &models.ScrapedSceneStudio{}
|
||||
studioResults[0].apply(studio)
|
||||
ret.Studio = studio
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return &ret, nil
|
||||
}
|
||||
|
||||
type xPathResult map[string]*html.Node
|
||||
|
||||
func (r xPathResult) apply(dest interface{}) {
|
||||
destVal := reflect.ValueOf(dest)
|
||||
|
||||
// dest should be a pointer
|
||||
destVal = destVal.Elem()
|
||||
|
||||
for key, v := range r {
|
||||
field := destVal.FieldByName(key)
|
||||
|
||||
if field.IsValid() {
|
||||
value := htmlquery.InnerText(v)
|
||||
value = strings.TrimSpace(value)
|
||||
|
||||
// remove multiple whitespace and end lines
|
||||
re := regexp.MustCompile("\n")
|
||||
value = re.ReplaceAllString(value, "")
|
||||
re = regexp.MustCompile(" +")
|
||||
value = re.ReplaceAllString(value, " ")
|
||||
|
||||
var reflectValue reflect.Value
|
||||
if field.Kind() == reflect.Ptr {
|
||||
reflectValue = reflect.ValueOf(&value)
|
||||
} else {
|
||||
reflectValue = reflect.ValueOf(value)
|
||||
}
|
||||
|
||||
field.Set(reflectValue)
|
||||
} else {
|
||||
logger.Errorf("Field %s does not exist in %T", key, dest)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func scrapePerformerURLXpath(c scraperTypeConfig, url string) (*models.ScrapedPerformer, error) {
|
||||
scraper := c.scraperConfig.XPathScrapers[c.Scraper]
|
||||
|
||||
if scraper == nil {
|
||||
return nil, errors.New("xpath scraper with name " + c.Scraper + " not found in config")
|
||||
}
|
||||
|
||||
doc, err := htmlquery.LoadURL(url)
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return scraper.scrapePerformer(doc)
|
||||
}
|
||||
|
||||
func scrapeSceneURLXPath(c scraperTypeConfig, url string) (*models.ScrapedScene, error) {
|
||||
scraper := c.scraperConfig.XPathScrapers[c.Scraper]
|
||||
|
||||
if scraper == nil {
|
||||
return nil, errors.New("xpath scraper with name " + c.Scraper + " not found in config")
|
||||
}
|
||||
|
||||
doc, err := htmlquery.LoadURL(url)
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return scraper.scrapeScene(doc)
|
||||
}
|
||||
732
pkg/scraper/xpath_test.go
Normal file
732
pkg/scraper/xpath_test.go
Normal file
@@ -0,0 +1,732 @@
|
||||
package scraper
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/antchfx/htmlquery"
|
||||
"github.com/stashapp/stash/pkg/models"
|
||||
"gopkg.in/yaml.v2"
|
||||
)
|
||||
|
||||
// adapted from https://www.freeones.com/html/m_links/bio_Mia_Malkova.php
|
||||
const htmlDoc1 = `
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en" dir="ltr">
|
||||
<head>
|
||||
<title>Freeones: Mia Malkova Biography</title>
|
||||
</head>
|
||||
<body data-babe="Mia Malkova">
|
||||
<div class="ContentBlock Block1">
|
||||
<div class="ContentBlockBody" style="padding: 0px;">
|
||||
<table id="biographyTable" border="0" cellspacing="0" cellpadding="0" width="100%">
|
||||
<tbody>
|
||||
<tr>
|
||||
<td class="paramname">
|
||||
<div><b>Babe Name:</b></div>
|
||||
</td>
|
||||
<td class="paramvalue">
|
||||
<a href="/html/m_links/Mia_Malkova/">Mia Malkova</a>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="paramname">
|
||||
<div><b>Profession:</b></div>
|
||||
</td>
|
||||
<td class="paramvalue">Porn Star
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="paramname">
|
||||
<b>Ethnicity:</b>
|
||||
</td>
|
||||
<td class="paramvalue">
|
||||
Caucasian
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="paramname">
|
||||
<b>Country of Origin:</b>
|
||||
</td>
|
||||
<td class="paramvalue">
|
||||
|
||||
<span class="country-us">
|
||||
|
||||
United States
|
||||
<span>
|
||||
</span></span></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="paramname">
|
||||
<b>Date of Birth:</b>
|
||||
</td>
|
||||
<td class="paramvalue">
|
||||
July 1, 1992 (27 years old)
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="paramname">
|
||||
<b>Aliases:</b>
|
||||
</td>
|
||||
<td class="paramvalue">
|
||||
Mia Bliss, Madison Clover, Madison Swan, Mia Mountain, Jessica
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="paramname">
|
||||
<b>Eye Color:</b>
|
||||
</td>
|
||||
<td class="paramvalue">
|
||||
Hazel
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="paramname">
|
||||
<b>Hair Color:</b>
|
||||
</td>
|
||||
<td class="paramvalue">
|
||||
Blonde
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="paramname">
|
||||
<b>Height:</b>
|
||||
</td>
|
||||
<td class="paramvalue">
|
||||
<script type="text/javascript">
|
||||
<!--
|
||||
heightcm = "171";
|
||||
morethenone = 'inch';
|
||||
feet = heightcm / 30.48;
|
||||
inches = (feet - Math.floor(feet)) * 30.48 / 2.54;
|
||||
|
||||
feet = Math.floor(feet);
|
||||
inches = inches.toFixed(0);
|
||||
|
||||
if (inches > 1) {
|
||||
morethenone = 'inches';
|
||||
}
|
||||
|
||||
if (heightcm == 0) {
|
||||
message = 'Unknown';
|
||||
} else {
|
||||
message = '171 cm - ' + feet + ' feet and ' + inches + ' ' + morethenone;
|
||||
}
|
||||
document.write(message);
|
||||
// -->
|
||||
</script>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="paramname">
|
||||
<b>Measurements:</b>
|
||||
</td>
|
||||
<td class="paramvalue">
|
||||
34C-26-36
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="paramname">
|
||||
<b>Fake boobs:</b>
|
||||
</td>
|
||||
<td class="paramvalue">
|
||||
No
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="paramname">
|
||||
<b>Career Start And End</b>
|
||||
</td>
|
||||
<td class="paramvalue">
|
||||
2012 - 2019
|
||||
(7 Years In The Business)
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="paramname">
|
||||
<b>Tattoos:</b>
|
||||
</td>
|
||||
<td class="paramvalue">
|
||||
None
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="paramname">
|
||||
<b>Piercings:</b>
|
||||
</td>
|
||||
<td class="paramvalue">
|
||||
None
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="paramname">
|
||||
<div><b>Social Network Links:</b></div>
|
||||
</td>
|
||||
<td class="paramvalue">
|
||||
<ul id="socialmedia">
|
||||
<li class="twitter"><a href="https://twitter.com/MiaMalkova" target="_blank" alt="Mia Malkova Twitter" title="Mia Malkova Twitter">Twitter</a></li>
|
||||
<li class="facebook"><a href="https://www.facebook.com/MiaMalcove" target="_blank" alt="Mia Malkova Facebook" title="Mia Malkova Facebook">Facebook</a></li>
|
||||
<li class="youtube"><a href="https://www.youtube.com/channel/UCEPR0sZKa_ScMoyhemfB7nA" target="_blank" alt="Mia Malkova YouTube" title="Mia Malkova YouTube">YouTube</a></li>
|
||||
<li class="instagram"><a href="https://www.instagram.com/mia_malkova/" target="_blank" alt="Mia Malkova Instagram" title="Mia Malkova Instagram">Instagram</a></li>
|
||||
</ul>
|
||||
</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
`
|
||||
|
||||
func makeCommonXPath(attr string) string {
|
||||
return `//table[@id="biographyTable"]//tr/td[@class="paramname"]//b[text() = '` + attr + `']/ancestor::tr/td[@class="paramvalue"]`
|
||||
}
|
||||
|
||||
func makeXPathConfig() xpathScraperConfig {
|
||||
config := make(xpathScraperConfig)
|
||||
|
||||
config["Name"] = makeCommonXPath("Babe Name:") + `/a`
|
||||
config["Ethnicity"] = makeCommonXPath("Ethnicity:")
|
||||
config["Country"] = makeCommonXPath("Country of Origin:")
|
||||
config["Birthdate"] = makeCommonXPath("Date of Birth:")
|
||||
config["Aliases"] = makeCommonXPath("Aliases:")
|
||||
config["EyeColor"] = makeCommonXPath("Eye Color:")
|
||||
config["Measurements"] = makeCommonXPath("Measurements:")
|
||||
config["FakeTits"] = makeCommonXPath("Fake boobs:")
|
||||
config["Height"] = makeCommonXPath("Height:")
|
||||
// no colon in attribute header
|
||||
config["CareerLength"] = makeCommonXPath("Career Start And End")
|
||||
config["Tattoos"] = makeCommonXPath("Tattoos:")
|
||||
config["Piercings"] = makeCommonXPath("Piercings:")
|
||||
|
||||
return config
|
||||
}
|
||||
|
||||
func verifyField(t *testing.T, expected string, actual *string, field string) {
|
||||
t.Helper()
|
||||
|
||||
if actual == nil || *actual != expected {
|
||||
if actual == nil {
|
||||
t.Errorf("Expected %s to be set to %s, instead got nil", field, expected)
|
||||
} else {
|
||||
t.Errorf("Expected %s to be set to %s, instead got %s", field, expected, *actual)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestScrapePerformerXPath(t *testing.T) {
|
||||
reader := strings.NewReader(htmlDoc1)
|
||||
doc, err := htmlquery.Parse(reader)
|
||||
|
||||
if err != nil {
|
||||
t.Errorf("Error loading document: %s", err.Error())
|
||||
return
|
||||
}
|
||||
|
||||
xpathConfig := makeXPathConfig()
|
||||
|
||||
scraper := xpathScraper{
|
||||
Performer: xpathConfig,
|
||||
}
|
||||
|
||||
performer, err := scraper.scrapePerformer(doc)
|
||||
|
||||
if err != nil {
|
||||
t.Errorf("Error scraping performer: %s", err.Error())
|
||||
return
|
||||
}
|
||||
|
||||
const performerName = "Mia Malkova"
|
||||
const ethnicity = "Caucasian"
|
||||
const country = "United States"
|
||||
const birthdate = "July 1, 1992 (27 years old)"
|
||||
const aliases = "Mia Bliss, Madison Clover, Madison Swan, Mia Mountain, Jessica"
|
||||
const eyeColor = "Hazel"
|
||||
const measurements = "34C-26-36"
|
||||
const fakeTits = "No"
|
||||
const careerLength = "2012 - 2019"
|
||||
const tattoosPiercings = "None"
|
||||
|
||||
verifyField(t, performerName, performer.Name, "Name")
|
||||
verifyField(t, ethnicity, performer.Ethnicity, "Ethnicity")
|
||||
verifyField(t, country, performer.Country, "Country")
|
||||
verifyField(t, birthdate, performer.Birthdate, "Birthdate")
|
||||
verifyField(t, aliases, performer.Aliases, "Aliases")
|
||||
verifyField(t, eyeColor, performer.EyeColor, "EyeColor")
|
||||
verifyField(t, measurements, performer.Measurements, "Measurements")
|
||||
verifyField(t, fakeTits, performer.FakeTits, "FakeTits")
|
||||
|
||||
// TODO - this needs post-processing
|
||||
//verifyField(t, careerLength, performer.CareerLength, "CareerLength")
|
||||
|
||||
verifyField(t, tattoosPiercings, performer.Tattoos, "Tattoos")
|
||||
verifyField(t, tattoosPiercings, performer.Piercings, "Piercings")
|
||||
}
|
||||
|
||||
const sceneHTML = `
|
||||
<!DOCTYPE html>
|
||||
|
||||
<head>
|
||||
<title>Test Video - Pornhub.com</title>
|
||||
|
||||
<meta property="og:title" content="Test Video" />
|
||||
<meta property="og:description"
|
||||
content="Watch Test Video on Pornhub.com, the best hardcore porn site. Pornhub is home to the widest selection of free Babe sex videos full of the hottest pornstars. If you're craving 3some XXX movies you'll find them here." />
|
||||
<meta property="og:image"
|
||||
content="https://di.phncdn.com/videos/201910/13/254476211/thumbs_80/(m=eaAaGwObaaaa)(mh=_V1YEGdMFS1rEYoW)9.jpg" />
|
||||
|
||||
<script type="application/ld+json">
|
||||
{
|
||||
"@context": "http://schema.org/",
|
||||
"@type": "VideoObject",
|
||||
"name": "Test Video",
|
||||
"embedUrl": "https://www.pornhub.com/embed/ph5da270596459c",
|
||||
"duration": "PT00H33M27S",
|
||||
"thumbnailUrl": "https://di.phncdn.com/videos/201910/13/254476211/thumbs_80/(m=eaAaGwObaaaa)(mh=_V1YEGdMFS1rEYoW)9.jpg",
|
||||
"uploadDate": "2019-10-13T00:33:51+00:00",
|
||||
"description": "Watch Test Video on Pornhub.com, the best hardcore porn site. Pornhub is home to the widest selection of free Babe sex videos full of the hottest pornstars. If you're craving 3some XXX movies you'll find them here.",
|
||||
"author" : "Mia Malkova", "interactionStatistic": [
|
||||
{
|
||||
"@type": "InteractionCounter",
|
||||
"interactionType": "http://schema.org/WatchAction",
|
||||
"userInteractionCount": "5,908,861"
|
||||
},
|
||||
{
|
||||
"@type": "InteractionCounter",
|
||||
"interactionType": "http://schema.org/LikeAction",
|
||||
"userInteractionCount": "22,090"
|
||||
}
|
||||
]
|
||||
}
|
||||
</script>
|
||||
</head>
|
||||
|
||||
<body class="logged-out">
|
||||
<div class="container ">
|
||||
|
||||
|
||||
<div id="main-container" class="clearfix" data-delete-check="1" data-is-private="1" data-is-premium=""
|
||||
data-liu="0" data-next-shuffle="ph5da270596459c" data-pkey="" data-platform-pc="1" data-playlist-check="0"
|
||||
data-playlist-id-check="0" data-playlist-geo-check="0" data-friend="0" data-playlist-user-check="0"
|
||||
data-playlist-video-check="0" data-playlist-shuffle="0" data-shuffle-forward="ph5da270596459c"
|
||||
data-shuffle-back="ph5da270596459c" data-min-large="1350"
|
||||
data-video-title="Test Video">
|
||||
|
||||
<div id="vpContentContainer">
|
||||
<div id="hd-leftColVideoPage">
|
||||
<div class="video-wrapper">
|
||||
<div class="title-container">
|
||||
<i class="isMe tooltipTrig" data-title="Video of verified member"></i>
|
||||
<h1 class="title">
|
||||
<span class="inlineFree">Test Video</span>
|
||||
</h1>
|
||||
</div>
|
||||
|
||||
<div class="video-actions-container">
|
||||
<div class="video-actions-tabs">
|
||||
<div class="video-action-tab about-tab active">
|
||||
<div class="video-detailed-info">
|
||||
<div class="video-info-row">
|
||||
From:
|
||||
|
||||
<div class="usernameWrap clearfix" data-type="channel" data-userid="492538092"
|
||||
data-liu-user="0"
|
||||
data-json-url="/user/box?id=492538092&token=MTU3NzA1NTkzNIqATol8v_WrhmNTXkeflvG09C2U7UUT_NyoZUFa7iKq0mlzBkmdgAH1aNHZkJmIOHbbwmho1BehHDoA63K5Wn4."
|
||||
data-disable-popover="0">
|
||||
|
||||
<a rel="" href="/channels/sis-loves-me" class="bolded">Sis Loves Me</a>
|
||||
<div class="avatarPosition"></div>
|
||||
</div>
|
||||
|
||||
<span class="verified-icon flag tooltipTrig"
|
||||
data-title="Verified member"></span>
|
||||
- 87 videos
|
||||
<span class="subscribers-count"> 459466</span>
|
||||
</div>
|
||||
|
||||
<div class="video-info-row">
|
||||
<div class="pornstarsWrapper">
|
||||
Pornstars:
|
||||
<a class="pstar-list-btn js-mxp" data-mxptype="Pornstar"
|
||||
data-mxptext="Alex D" data-id="251341" data-login="1"
|
||||
href="/pornstar/alex-d">Alex D <span
|
||||
class="psbox-link-container display-none"></span>
|
||||
</a>
|
||||
, <a class="pstar-list-btn js-mxp" data-mxptype="Pornstar"
|
||||
data-mxptext="Mia Malkova" data-id="10641" data-login="1"
|
||||
href="/pornstar/mia-malkova">Mia Malkova <span
|
||||
class="psbox-link-container display-none"></span>
|
||||
</a>
|
||||
, <a class="pstar-list-btn js-mxp" data-mxptype="Pornstar"
|
||||
data-mxptext="Riley Reid" data-id="5343" data-login="1"
|
||||
href="/pornstar/riley-reid">Riley Reid <span
|
||||
class="psbox-link-container display-none"></span>
|
||||
</a>
|
||||
<div class="tooltipTrig suggestBtn" data-title="Add a pornstar">
|
||||
<a class="add-btn-small add-pornstar-btn-2">+
|
||||
<span>Suggest</span></a>
|
||||
</div>
|
||||
<div id="deletePornstarResult" class="suggest-result"></div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="video-info-row showLess">
|
||||
<div class="categoriesWrapper">
|
||||
Categories:
|
||||
<a href="/video?c=3"
|
||||
onclick="ga('send', 'event', 'Watch Page', 'click', 'Category');">Amateur</a>,
|
||||
<a href="/categories/babe"
|
||||
onclick="ga('send', 'event', 'Watch Page', 'click', 'Category');">Babe</a>,
|
||||
<a href="/video?c=13"
|
||||
onclick="ga('send', 'event', 'Watch Page', 'click', 'Category');">Blowjob</a>,
|
||||
<a href="/video?c=115"
|
||||
onclick="ga('send', 'event', 'Watch Page', 'click', 'Category');">Exclusive</a>,
|
||||
<a href="/hd"
|
||||
onclick="ga('send', 'event', 'Watch Page', 'click', 'Category');">HD
|
||||
Porn</a>, <a href="/categories/pornstar"
|
||||
onclick="ga('send', 'event', 'Watch Page', 'click', 'Category');">Pornstar</a>,
|
||||
<a href="/video?c=24"
|
||||
onclick="ga('send', 'event', 'Watch Page', 'click', 'Category');">Public</a>,
|
||||
<a href="/video?c=131"
|
||||
onclick="ga('send', 'event', 'Watch Page', 'click', 'Category');">Pussy
|
||||
Licking</a>, <a href="/video?c=65"
|
||||
onclick="ga('send', 'event', 'Watch Page', 'click', 'Category');">Threesome</a>,
|
||||
<a href="/video?c=139"
|
||||
onclick="ga('send', 'event', 'Watch Page', 'click', 'Category');">Verified
|
||||
Models</a>
|
||||
<div class="tooltipTrig suggestBtn" data-title="Suggest Categories">
|
||||
<a id="categoryLink" class="add-btn-small ">+
|
||||
<span>Suggest</span></a>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="video-info-row showLess">
|
||||
<div class="productionWrapper">
|
||||
Production:
|
||||
<a href="/video?p=professional" rel="nofollow"
|
||||
class="production">professional</a>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="video-info-row showLess">
|
||||
<div class="tagsWrapper">
|
||||
Tags:
|
||||
<a href="/video/search?search=3some">3some</a>, <a
|
||||
href="/video?c=9">blonde</a>, <a href="/video?c=59">small tits</a>,
|
||||
<a href="/video/search?search=butt">butt</a>, <a
|
||||
href="/video/search?search=natural+tits">natural tits</a>, <a
|
||||
href="/video/search?search=petite">petite</a>, <a
|
||||
href="/video?c=24">public</a>, <a
|
||||
href="/video/search?search=outside">outside</a>, <a
|
||||
href="/video/search?search=car">car</a>, <a
|
||||
href="/video/search?search=garage">garage</a>, <a
|
||||
href="/video?c=65">threesome</a>, <a
|
||||
href="/video/search?search=bgg">bgg</a>, <a
|
||||
href="/video/search?search=girlfrien+d">girlfrien d</a>, <a
|
||||
href="/video/search?search=parking">parking</a>, <a
|
||||
href="/video/search?search=sex">sex</a>, <a
|
||||
href="/video/search?search=gagging">gagging</a>, <a
|
||||
href="/video?c=13">blowjob</a>, <a
|
||||
href="/video/search?search=bj">bj</a>, <a
|
||||
href="/video/search?search=double">double</a>, <a
|
||||
href="/video/search?search=ass">ass</a>
|
||||
<div class="tooltipTrig suggestBtn" data-title="Suggest Tags">
|
||||
<a id="tagLink" class="add-btn-small">+ <span>Suggest</span></a>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="video-info-row showLess">
|
||||
Added on: <span class="white">2 months ago</span>
|
||||
</div>
|
||||
|
||||
<div class="video-info-row showLess">
|
||||
Featured on: <span class="white">1 month ago</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="video-action-tab jump-to-tab">
|
||||
<div class="title">Jump to your favorite action</div>
|
||||
|
||||
<div class="filters mainFilter float-right">
|
||||
<div class="dropdownTrigger">
|
||||
<div>
|
||||
<span class="textFilter" id="tagSort">Sequence</span>
|
||||
<span class="arrowFilters"></span>
|
||||
</div>
|
||||
<ul class="filterListItem dropdownWrapper">
|
||||
<li class="active"><a class="actionTagSort"
|
||||
data-sort="seconds">Sequence</a></li>
|
||||
<li><a class="actionTagSort" data-sort="tag">Alphabetical</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="reset"></div>
|
||||
<div class="display-grid col-4 gap-row-none sortBy seconds">
|
||||
<ul class="actionTagList full-width margin-none">
|
||||
<li>
|
||||
<a class="js-triggerJumpCat"
|
||||
onclick="jumpToAction(862), ga('send', 'event', 'Video Page', 'click', 'Jump to Blowjob');">
|
||||
Blowjob </a>
|
||||
|
||||
<var>14:22</var>
|
||||
</li>
|
||||
<li>
|
||||
<a class="js-triggerJumpCat"
|
||||
onclick="jumpToAction(1117), ga('send', 'event', 'Video Page', 'click', 'Jump to Reverse Cowgirl');">
|
||||
Reverse Cowgirl </a>
|
||||
|
||||
<var>18:37</var>
|
||||
</li>
|
||||
</ul>
|
||||
<ul class="actionTagList full-width margin-none">
|
||||
<li>
|
||||
<a class="js-triggerJumpCat"
|
||||
onclick="jumpToAction(1182), ga('send', 'event', 'Video Page', 'click', 'Jump to Cowgirl');">
|
||||
Cowgirl </a>
|
||||
|
||||
<var>19:42</var>
|
||||
</li>
|
||||
<li>
|
||||
<a class="js-triggerJumpCat"
|
||||
onclick="jumpToAction(1625), ga('send', 'event', 'Video Page', 'click', 'Jump to Cowgirl');">
|
||||
Cowgirl </a>
|
||||
|
||||
<var>27:05</var>
|
||||
</li>
|
||||
</ul>
|
||||
<ul class="actionTagList full-width margin-none">
|
||||
<li>
|
||||
<a class="js-triggerJumpCat"
|
||||
onclick="jumpToAction(1822), ga('send', 'event', 'Video Page', 'click', 'Jump to Doggystyle');">
|
||||
Doggystyle </a>
|
||||
|
||||
<var>30:22</var>
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
</div>
|
||||
<div class="display-grid col-4 gap-row-none sortBy tag">
|
||||
<ul class="actionTagList full-width margin-none">
|
||||
<li>
|
||||
<a class="js-triggerJumpCat"
|
||||
onclick="jumpToAction(862), ga('send', 'event', 'Video Page', 'click', 'Jump to Blowjob');">
|
||||
Blowjob </a>
|
||||
|
||||
<var>14:22</var>
|
||||
</li>
|
||||
<li>
|
||||
<a class="js-triggerJumpCat"
|
||||
onclick="jumpToAction(1117), ga('send', 'event', 'Video Page', 'click', 'Jump to Reverse Cowgirl');">
|
||||
Reverse Cowgirl </a>
|
||||
|
||||
<var>18:37</var>
|
||||
</li>
|
||||
</ul>
|
||||
<ul class="actionTagList full-width margin-none">
|
||||
<li>
|
||||
<a class="js-triggerJumpCat"
|
||||
onclick="jumpToAction(1182), ga('send', 'event', 'Video Page', 'click', 'Jump to Cowgirl');">
|
||||
Cowgirl </a>
|
||||
|
||||
<var>19:42</var>
|
||||
</li>
|
||||
<li>
|
||||
<a class="js-triggerJumpCat"
|
||||
onclick="jumpToAction(1625), ga('send', 'event', 'Video Page', 'click', 'Jump to Cowgirl');">
|
||||
Cowgirl </a>
|
||||
|
||||
<var>27:05</var>
|
||||
</li>
|
||||
</ul>
|
||||
<ul class="actionTagList full-width margin-none">
|
||||
<li>
|
||||
<a class="js-triggerJumpCat"
|
||||
onclick="jumpToAction(1822), ga('send', 'event', 'Video Page', 'click', 'Jump to Doggystyle');">
|
||||
Doggystyle </a>
|
||||
|
||||
<var>30:22</var>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</body>
|
||||
</html>`
|
||||
|
||||
func makeSceneXPathConfig() xpathScraper {
|
||||
common := make(commonXPathConfig)
|
||||
|
||||
common["$performerElem"] = `//div[@class="pornstarsWrapper"]/a[@data-mxptype="Pornstar"]`
|
||||
common["$studioElem"] = `//div[@data-type="channel"]/a`
|
||||
|
||||
config := make(xpathScraperConfig)
|
||||
|
||||
config["Title"] = `//meta[@property="og:title"]/@content`
|
||||
// this needs post-processing
|
||||
config["Date"] = `//script[@type="application/ld+json"]`
|
||||
|
||||
tagConfig := make(map[interface{}]interface{})
|
||||
tagConfig["Name"] = `//div[@class="categoriesWrapper"]//a[not(@class="add-btn-small ")]`
|
||||
config["Tags"] = tagConfig
|
||||
|
||||
performerConfig := make(map[interface{}]interface{})
|
||||
performerConfig["Name"] = `$performerElem/@data-mxptext`
|
||||
performerConfig["URL"] = `$performerElem/@href`
|
||||
config["Performers"] = performerConfig
|
||||
|
||||
studioConfig := make(map[interface{}]interface{})
|
||||
studioConfig["Name"] = `$studioElem`
|
||||
studioConfig["URL"] = `$studioElem/@href`
|
||||
config["Studio"] = studioConfig
|
||||
|
||||
scraper := xpathScraper{
|
||||
Scene: config,
|
||||
Common: common,
|
||||
}
|
||||
|
||||
return scraper
|
||||
}
|
||||
|
||||
func verifyTags(t *testing.T, expectedTagNames []string, actualTags []*models.ScrapedSceneTag) {
|
||||
t.Helper()
|
||||
|
||||
i := 0
|
||||
for i < len(expectedTagNames) || i < len(actualTags) {
|
||||
expectedTag := ""
|
||||
actualTag := ""
|
||||
if i < len(expectedTagNames) {
|
||||
expectedTag = expectedTagNames[i]
|
||||
}
|
||||
if i < len(actualTags) {
|
||||
actualTag = actualTags[i].Name
|
||||
}
|
||||
|
||||
if expectedTag != actualTag {
|
||||
t.Errorf("Expected tag %s, got %s", expectedTag, actualTag)
|
||||
}
|
||||
i++
|
||||
}
|
||||
}
|
||||
|
||||
func verifyPerformers(t *testing.T, expectedNames []string, expectedURLs []string, actualPerformers []*models.ScrapedScenePerformer) {
|
||||
t.Helper()
|
||||
|
||||
i := 0
|
||||
for i < len(expectedNames) || i < len(actualPerformers) {
|
||||
expectedName := ""
|
||||
actualName := ""
|
||||
expectedURL := ""
|
||||
actualURL := ""
|
||||
if i < len(expectedNames) {
|
||||
expectedName = expectedNames[i]
|
||||
}
|
||||
if i < len(expectedURLs) {
|
||||
expectedURL = expectedURLs[i]
|
||||
}
|
||||
if i < len(actualPerformers) {
|
||||
actualName = actualPerformers[i].Name
|
||||
if actualPerformers[i].URL != nil {
|
||||
actualURL = *actualPerformers[i].URL
|
||||
}
|
||||
}
|
||||
|
||||
if expectedName != actualName {
|
||||
t.Errorf("Expected performer name %s, got %s", expectedName, actualName)
|
||||
}
|
||||
if expectedURL != actualURL {
|
||||
t.Errorf("Expected perfromer URL %s, got %s", expectedName, actualName)
|
||||
}
|
||||
i++
|
||||
}
|
||||
}
|
||||
|
||||
func TestApplySceneXPathConfig(t *testing.T) {
|
||||
reader := strings.NewReader(sceneHTML)
|
||||
doc, err := htmlquery.Parse(reader)
|
||||
|
||||
if err != nil {
|
||||
t.Errorf("Error loading document: %s", err.Error())
|
||||
return
|
||||
}
|
||||
|
||||
scraper := makeSceneXPathConfig()
|
||||
|
||||
scene, err := scraper.scrapeScene(doc)
|
||||
|
||||
if err != nil {
|
||||
t.Errorf("Error scraping scene: %s", err.Error())
|
||||
return
|
||||
}
|
||||
|
||||
const title = "Test Video"
|
||||
|
||||
verifyField(t, title, scene.Title, "Title")
|
||||
|
||||
// verify tags
|
||||
expectedTags := []string{
|
||||
"Amateur",
|
||||
"Babe",
|
||||
"Blowjob",
|
||||
"Exclusive",
|
||||
"HD Porn",
|
||||
"Pornstar",
|
||||
"Public",
|
||||
"Pussy Licking",
|
||||
"Threesome",
|
||||
"Verified Models",
|
||||
}
|
||||
verifyTags(t, expectedTags, scene.Tags)
|
||||
|
||||
expectedPerformerNames := []string{
|
||||
"Alex D",
|
||||
"Mia Malkova",
|
||||
"Riley Reid",
|
||||
}
|
||||
|
||||
expectedPerformerURLs := []string{
|
||||
"/pornstar/alex-d",
|
||||
"/pornstar/mia-malkova",
|
||||
"/pornstar/riley-reid",
|
||||
}
|
||||
|
||||
verifyPerformers(t, expectedPerformerNames, expectedPerformerURLs, scene.Performers)
|
||||
|
||||
const expectedStudioName = "Sis Loves Me"
|
||||
const expectedStudioURL = "/channels/sis-loves-me"
|
||||
|
||||
verifyField(t, expectedStudioName, &scene.Studio.Name, "Studio.Name")
|
||||
verifyField(t, expectedStudioURL, scene.Studio.URL, "Studio.URL")
|
||||
}
|
||||
|
||||
func TestLoadXPathScraperFromYAML(t *testing.T) {
|
||||
const yamlStr = `name: Test
|
||||
performerByURL:
|
||||
- action: scrapeXPath
|
||||
url:
|
||||
- test.com
|
||||
scraper: performerScraper
|
||||
xPathScrapers:
|
||||
performerScraper:
|
||||
performer:
|
||||
name: //h1[@itemprop="name"]
|
||||
`
|
||||
|
||||
config := &scraperConfig{}
|
||||
err := yaml.Unmarshal([]byte(yamlStr), &config)
|
||||
|
||||
if err != nil {
|
||||
t.Errorf("Error loading yaml: %s", err.Error())
|
||||
return
|
||||
}
|
||||
}
|
||||
@@ -232,6 +232,11 @@ export const Performer: FunctionComponent<IPerformerProps> = (props: IPerformerP
|
||||
try {
|
||||
const result = await StashService.queryScrapePerformerURL(url);
|
||||
if (!result.data || !result.data.scrapePerformerURL) { return; }
|
||||
|
||||
// leave URL as is if not set explicitly
|
||||
if (!result.data.scrapePerformerURL.url) {
|
||||
result.data.scrapePerformerURL.url = url;
|
||||
}
|
||||
updatePerformerEditState(result.data.scrapePerformerURL);
|
||||
} catch (e) {
|
||||
ErrorUtils.handle(e);
|
||||
|
||||
32
vendor/github.com/antchfx/htmlquery/.gitignore
generated
vendored
Normal file
32
vendor/github.com/antchfx/htmlquery/.gitignore
generated
vendored
Normal file
@@ -0,0 +1,32 @@
|
||||
# vscode
|
||||
.vscode
|
||||
debug
|
||||
*.test
|
||||
|
||||
./build
|
||||
|
||||
# Compiled Object files, Static and Dynamic libs (Shared Objects)
|
||||
*.o
|
||||
*.a
|
||||
*.so
|
||||
|
||||
|
||||
# Folders
|
||||
_obj
|
||||
_test
|
||||
|
||||
# Architecture specific extensions/prefixes
|
||||
*.[568vq]
|
||||
[568vq].out
|
||||
|
||||
*.cgo1.go
|
||||
*.cgo2.c
|
||||
_cgo_defun.c
|
||||
_cgo_gotypes.go
|
||||
_cgo_export.*
|
||||
|
||||
_testmain.go
|
||||
|
||||
*.exe
|
||||
*.test
|
||||
*.prof
|
||||
16
vendor/github.com/antchfx/htmlquery/.travis.yml
generated
vendored
Normal file
16
vendor/github.com/antchfx/htmlquery/.travis.yml
generated
vendored
Normal file
@@ -0,0 +1,16 @@
|
||||
language: go
|
||||
|
||||
go:
|
||||
- 1.6
|
||||
- 1.7
|
||||
- 1.8
|
||||
|
||||
install:
|
||||
- go get golang.org/x/net/html/charset
|
||||
- go get golang.org/x/net/html
|
||||
- go get github.com/antchfx/xpath
|
||||
- go get github.com/mattn/goveralls
|
||||
- go get github.com/golang/groupcache
|
||||
|
||||
script:
|
||||
- $HOME/gopath/bin/goveralls -service=travis-ci
|
||||
17
vendor/github.com/antchfx/htmlquery/LICENSE
generated
vendored
Normal file
17
vendor/github.com/antchfx/htmlquery/LICENSE
generated
vendored
Normal file
@@ -0,0 +1,17 @@
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
158
vendor/github.com/antchfx/htmlquery/README.md
generated
vendored
Normal file
158
vendor/github.com/antchfx/htmlquery/README.md
generated
vendored
Normal file
@@ -0,0 +1,158 @@
|
||||
htmlquery
|
||||
====
|
||||
[](https://travis-ci.org/antchfx/htmlquery)
|
||||
[](https://coveralls.io/github/antchfx/htmlquery?branch=master)
|
||||
[](https://godoc.org/github.com/antchfx/htmlquery)
|
||||
[](https://goreportcard.com/report/github.com/antchfx/htmlquery)
|
||||
|
||||
Overview
|
||||
====
|
||||
|
||||
`htmlquery` is an XPath query package for HTML, lets you extract data or evaluate from HTML documents by an XPath expression.
|
||||
|
||||
`htmlquery` build-in the query object caching feature based on [LRU](https://godoc.org/github.com/golang/groupcache/lru), this feature will caching the recently used XPATH query string. enable caching can avoid re-compile XPath expression each query.
|
||||
|
||||
Installation
|
||||
====
|
||||
|
||||
```
|
||||
go get github.com/antchfx/htmlquery
|
||||
```
|
||||
|
||||
Getting Started
|
||||
====
|
||||
|
||||
#### Query, returns matched elements or error.
|
||||
|
||||
```go
|
||||
nodes, err := htmlquery.QueryAll(doc, "//a")
|
||||
if err != nil {
|
||||
panic(`not a valid XPath expression.`)
|
||||
}
|
||||
```
|
||||
|
||||
#### Load HTML document from URL.
|
||||
|
||||
```go
|
||||
doc, err := htmlquery.LoadURL("http://example.com/")
|
||||
```
|
||||
|
||||
#### Load HTML from document.
|
||||
|
||||
```go
|
||||
filePath := "/home/user/sample.html"
|
||||
doc, err := htmlquery.LoadDoc(filePath)
|
||||
```
|
||||
|
||||
#### Load HTML document from string.
|
||||
|
||||
```go
|
||||
s := `<html>....</html>`
|
||||
doc, err := htmlquery.Parse(strings.NewReader(s))
|
||||
```
|
||||
|
||||
#### Find all A elements.
|
||||
|
||||
```go
|
||||
list := htmlquery.Find(doc, "//a")
|
||||
```
|
||||
|
||||
#### Find all A elements that have `href` attribute.
|
||||
|
||||
```go
|
||||
list := range htmlquery.Find(doc, "//a[@href]")
|
||||
```
|
||||
|
||||
#### Find all A elements with `href` attribute and only return `href` value.
|
||||
|
||||
```go
|
||||
list := range htmlquery.Find(doc, "//a/@href")
|
||||
for n := range list{
|
||||
fmt.Println(htmlquery.InnerText(n)) // output @href value without A element.
|
||||
}
|
||||
```
|
||||
|
||||
### Find the third A element.
|
||||
|
||||
```go
|
||||
a := htmlquery.FindOne(doc, "//a[3]")
|
||||
```
|
||||
|
||||
#### Evaluate the number of all IMG element.
|
||||
|
||||
```go
|
||||
expr, _ := xpath.Compile("count(//img)")
|
||||
v := expr.Evaluate(htmlquery.CreateXPathNavigator(doc)).(float64)
|
||||
fmt.Printf("total count is %f", v)
|
||||
```
|
||||
|
||||
|
||||
FAQ
|
||||
====
|
||||
|
||||
#### `Find()` vs `QueryAll()`, which is better?
|
||||
|
||||
`Find` and `QueryAll` both do the same things, searches all of matched html nodes.
|
||||
The `Find` will panics if you give an error XPath query, but `QueryAll` will return an error for you.
|
||||
|
||||
#### Can I save my query expression object for the next query?
|
||||
|
||||
Yes, you can. We offer the `QuerySelector` and `QuerySelectorAll` methods, It will accept your query expression object.
|
||||
|
||||
Cache a query expression object(or reused) will avoid re-compile XPath query expression, improve your query performance.
|
||||
|
||||
#### Disable caching feature
|
||||
|
||||
```
|
||||
htmlquery.DisableSelectorCache = true
|
||||
```
|
||||
|
||||
Changelogs
|
||||
===
|
||||
|
||||
2019-11-19
|
||||
- Add built-in query object cache feature, avoid re-compilation for the same query string. [#16](https://github.com/antchfx/htmlquery/issues/16)
|
||||
- Added LoadDoc [18](https://github.com/antchfx/htmlquery/pull/18)
|
||||
|
||||
2019-10-05
|
||||
- Add new methods that compatible with invalid XPath expression error: `QueryAll` and `Query`.
|
||||
- Add `QuerySelector` and `QuerySelectorAll` methods, supported reused your query object.
|
||||
|
||||
2019-02-04
|
||||
- [#7](https://github.com/antchfx/htmlquery/issues/7) Removed deprecated `FindEach()` and `FindEachWithBreak()` methods.
|
||||
|
||||
2018-12-28
|
||||
- Avoid adding duplicate elements to list for `Find()` method. [#6](https://github.com/antchfx/htmlquery/issues/6)
|
||||
|
||||
Tutorial
|
||||
===
|
||||
|
||||
```go
|
||||
func main() {
|
||||
doc, err := htmlquery.LoadURL("https://www.bing.com/search?q=golang")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
// Find all news item.
|
||||
list, err := htmlquery.QueryAll(doc, "//ol/li")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
for i, n := range list {
|
||||
a := htmlquery.FindOne(n, "//a")
|
||||
fmt.Printf("%d %s(%s)\n", i, htmlquery.InnerText(a), htmlquery.SelectAttr(a, "href"))
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
List of supported XPath query packages
|
||||
===
|
||||
| Name | Description |
|
||||
| ------------------------------------------------- | ----------------------------------------- |
|
||||
| [htmlquery](https://github.com/antchfx/htmlquery) | XPath query package for the HTML document |
|
||||
| [xmlquery](https://github.com/antchfx/xmlquery) | XPath query package for the XML document |
|
||||
| [jsonquery](https://github.com/antchfx/jsonquery) | XPath query package for the JSON document |
|
||||
|
||||
Questions
|
||||
===
|
||||
Please let me know if you have any questions.
|
||||
40
vendor/github.com/antchfx/htmlquery/cache.go
generated
vendored
Normal file
40
vendor/github.com/antchfx/htmlquery/cache.go
generated
vendored
Normal file
@@ -0,0 +1,40 @@
|
||||
package htmlquery
|
||||
|
||||
import (
|
||||
"sync"
|
||||
|
||||
"github.com/golang/groupcache/lru"
|
||||
|
||||
"github.com/antchfx/xpath"
|
||||
)
|
||||
|
||||
// DisableSelectorCache will disable caching for the query selector if value is true.
|
||||
var DisableSelectorCache = false
|
||||
|
||||
// SelectorCacheMaxEntries allows how many selector object can be caching. Default is 50.
|
||||
// Will disable caching if SelectorCacheMaxEntries <= 0.
|
||||
var SelectorCacheMaxEntries = 50
|
||||
|
||||
var (
|
||||
cacheOnce sync.Once
|
||||
cache *lru.Cache
|
||||
)
|
||||
|
||||
func getQuery(expr string) (*xpath.Expr, error) {
|
||||
if DisableSelectorCache || SelectorCacheMaxEntries <= 0 {
|
||||
return xpath.Compile(expr)
|
||||
}
|
||||
cacheOnce.Do(func() {
|
||||
cache = lru.New(50)
|
||||
})
|
||||
if v, ok := cache.Get(expr); ok {
|
||||
return v.(*xpath.Expr), nil
|
||||
}
|
||||
v, err := xpath.Compile(expr)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
cache.Add(expr, v)
|
||||
return v, nil
|
||||
|
||||
}
|
||||
338
vendor/github.com/antchfx/htmlquery/query.go
generated
vendored
Normal file
338
vendor/github.com/antchfx/htmlquery/query.go
generated
vendored
Normal file
@@ -0,0 +1,338 @@
|
||||
/*
|
||||
Package htmlquery provides extract data from HTML documents using XPath expression.
|
||||
*/
|
||||
package htmlquery
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"os"
|
||||
|
||||
"github.com/antchfx/xpath"
|
||||
"golang.org/x/net/html"
|
||||
"golang.org/x/net/html/charset"
|
||||
)
|
||||
|
||||
var _ xpath.NodeNavigator = &NodeNavigator{}
|
||||
|
||||
// CreateXPathNavigator creates a new xpath.NodeNavigator for the specified html.Node.
|
||||
func CreateXPathNavigator(top *html.Node) *NodeNavigator {
|
||||
return &NodeNavigator{curr: top, root: top, attr: -1}
|
||||
}
|
||||
|
||||
// Find is like QueryAll but Will panics if the expression `expr` cannot be parsed.
|
||||
//
|
||||
// See `QueryAll()` function.
|
||||
func Find(top *html.Node, expr string) []*html.Node {
|
||||
nodes, err := QueryAll(top, expr)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return nodes
|
||||
}
|
||||
|
||||
// FindOne is like Query but will panics if the expression `expr` cannot be parsed.
|
||||
// See `Query()` function.
|
||||
func FindOne(top *html.Node, expr string) *html.Node {
|
||||
node, err := Query(top, expr)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return node
|
||||
}
|
||||
|
||||
// QueryAll searches the html.Node that matches by the specified XPath expr.
|
||||
// Return an error if the expression `expr` cannot be parsed.
|
||||
func QueryAll(top *html.Node, expr string) ([]*html.Node, error) {
|
||||
exp, err := getQuery(expr)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
nodes := QuerySelectorAll(top, exp)
|
||||
return nodes, nil
|
||||
}
|
||||
|
||||
// Query searches the html.Node that matches by the specified XPath expr,
|
||||
// and return the first element of matched html.Node.
|
||||
//
|
||||
// Return an error if the expression `expr` cannot be parsed.
|
||||
func Query(top *html.Node, expr string) (*html.Node, error) {
|
||||
exp, err := getQuery(expr)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return QuerySelector(top, exp), nil
|
||||
}
|
||||
|
||||
// QuerySelector returns the first matched html.Node by the specified XPath selector.
|
||||
func QuerySelector(top *html.Node, selector *xpath.Expr) *html.Node {
|
||||
t := selector.Select(CreateXPathNavigator(top))
|
||||
if t.MoveNext() {
|
||||
return getCurrentNode(t.Current().(*NodeNavigator))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// QuerySelectorAll searches all of the html.Node that matches the specified XPath selectors.
|
||||
func QuerySelectorAll(top *html.Node, selector *xpath.Expr) []*html.Node {
|
||||
var elems []*html.Node
|
||||
t := selector.Select(CreateXPathNavigator(top))
|
||||
for t.MoveNext() {
|
||||
nav := t.Current().(*NodeNavigator)
|
||||
n := getCurrentNode(nav)
|
||||
// avoid adding duplicate nodes.
|
||||
if len(elems) > 0 && (elems[0] == n || (nav.NodeType() == xpath.AttributeNode &&
|
||||
nav.LocalName() == elems[0].Data && nav.Value() == InnerText(elems[0]))) {
|
||||
continue
|
||||
}
|
||||
elems = append(elems, n)
|
||||
}
|
||||
return elems
|
||||
}
|
||||
|
||||
// LoadURL loads the HTML document from the specified URL.
|
||||
func LoadURL(url string) (*html.Node, error) {
|
||||
resp, err := http.Get(url)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
r, err := charset.NewReader(resp.Body, resp.Header.Get("Content-Type"))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return html.Parse(r)
|
||||
}
|
||||
|
||||
// LoadDoc loads the HTML document from the specified file path.
|
||||
func LoadDoc(path string) (*html.Node, error) {
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
return html.Parse(bufio.NewReader(f))
|
||||
}
|
||||
|
||||
func getCurrentNode(n *NodeNavigator) *html.Node {
|
||||
if n.NodeType() == xpath.AttributeNode {
|
||||
childNode := &html.Node{
|
||||
Type: html.TextNode,
|
||||
Data: n.Value(),
|
||||
}
|
||||
return &html.Node{
|
||||
Type: html.ElementNode,
|
||||
Data: n.LocalName(),
|
||||
FirstChild: childNode,
|
||||
LastChild: childNode,
|
||||
}
|
||||
|
||||
}
|
||||
return n.curr
|
||||
}
|
||||
|
||||
// Parse returns the parse tree for the HTML from the given Reader.
|
||||
func Parse(r io.Reader) (*html.Node, error) {
|
||||
return html.Parse(r)
|
||||
}
|
||||
|
||||
// InnerText returns the text between the start and end tags of the object.
|
||||
func InnerText(n *html.Node) string {
|
||||
var output func(*bytes.Buffer, *html.Node)
|
||||
output = func(buf *bytes.Buffer, n *html.Node) {
|
||||
switch n.Type {
|
||||
case html.TextNode:
|
||||
buf.WriteString(n.Data)
|
||||
return
|
||||
case html.CommentNode:
|
||||
return
|
||||
}
|
||||
for child := n.FirstChild; child != nil; child = child.NextSibling {
|
||||
output(buf, child)
|
||||
}
|
||||
}
|
||||
|
||||
var buf bytes.Buffer
|
||||
output(&buf, n)
|
||||
return buf.String()
|
||||
}
|
||||
|
||||
// SelectAttr returns the attribute value with the specified name.
|
||||
func SelectAttr(n *html.Node, name string) (val string) {
|
||||
if n == nil {
|
||||
return
|
||||
}
|
||||
if n.Type == html.ElementNode && n.Parent == nil && name == n.Data {
|
||||
return InnerText(n)
|
||||
}
|
||||
for _, attr := range n.Attr {
|
||||
if attr.Key == name {
|
||||
val = attr.Val
|
||||
break
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// OutputHTML returns the text including tags name.
|
||||
func OutputHTML(n *html.Node, self bool) string {
|
||||
var buf bytes.Buffer
|
||||
if self {
|
||||
html.Render(&buf, n)
|
||||
} else {
|
||||
for n := n.FirstChild; n != nil; n = n.NextSibling {
|
||||
html.Render(&buf, n)
|
||||
}
|
||||
}
|
||||
return buf.String()
|
||||
}
|
||||
|
||||
type NodeNavigator struct {
|
||||
root, curr *html.Node
|
||||
attr int
|
||||
}
|
||||
|
||||
func (h *NodeNavigator) Current() *html.Node {
|
||||
return h.curr
|
||||
}
|
||||
|
||||
func (h *NodeNavigator) NodeType() xpath.NodeType {
|
||||
switch h.curr.Type {
|
||||
case html.CommentNode:
|
||||
return xpath.CommentNode
|
||||
case html.TextNode:
|
||||
return xpath.TextNode
|
||||
case html.DocumentNode:
|
||||
return xpath.RootNode
|
||||
case html.ElementNode:
|
||||
if h.attr != -1 {
|
||||
return xpath.AttributeNode
|
||||
}
|
||||
return xpath.ElementNode
|
||||
case html.DoctypeNode:
|
||||
// ignored <!DOCTYPE HTML> declare and as Root-Node type.
|
||||
return xpath.RootNode
|
||||
}
|
||||
panic(fmt.Sprintf("unknown HTML node type: %v", h.curr.Type))
|
||||
}
|
||||
|
||||
func (h *NodeNavigator) LocalName() string {
|
||||
if h.attr != -1 {
|
||||
return h.curr.Attr[h.attr].Key
|
||||
}
|
||||
return h.curr.Data
|
||||
}
|
||||
|
||||
func (*NodeNavigator) Prefix() string {
|
||||
return ""
|
||||
}
|
||||
|
||||
func (h *NodeNavigator) Value() string {
|
||||
switch h.curr.Type {
|
||||
case html.CommentNode:
|
||||
return h.curr.Data
|
||||
case html.ElementNode:
|
||||
if h.attr != -1 {
|
||||
return h.curr.Attr[h.attr].Val
|
||||
}
|
||||
return InnerText(h.curr)
|
||||
case html.TextNode:
|
||||
return h.curr.Data
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func (h *NodeNavigator) Copy() xpath.NodeNavigator {
|
||||
n := *h
|
||||
return &n
|
||||
}
|
||||
|
||||
func (h *NodeNavigator) MoveToRoot() {
|
||||
h.curr = h.root
|
||||
}
|
||||
|
||||
func (h *NodeNavigator) MoveToParent() bool {
|
||||
if h.attr != -1 {
|
||||
h.attr = -1
|
||||
return true
|
||||
} else if node := h.curr.Parent; node != nil {
|
||||
h.curr = node
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (h *NodeNavigator) MoveToNextAttribute() bool {
|
||||
if h.attr >= len(h.curr.Attr)-1 {
|
||||
return false
|
||||
}
|
||||
h.attr++
|
||||
return true
|
||||
}
|
||||
|
||||
func (h *NodeNavigator) MoveToChild() bool {
|
||||
if h.attr != -1 {
|
||||
return false
|
||||
}
|
||||
if node := h.curr.FirstChild; node != nil {
|
||||
h.curr = node
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (h *NodeNavigator) MoveToFirst() bool {
|
||||
if h.attr != -1 || h.curr.PrevSibling == nil {
|
||||
return false
|
||||
}
|
||||
for {
|
||||
node := h.curr.PrevSibling
|
||||
if node == nil {
|
||||
break
|
||||
}
|
||||
h.curr = node
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func (h *NodeNavigator) String() string {
|
||||
return h.Value()
|
||||
}
|
||||
|
||||
func (h *NodeNavigator) MoveToNext() bool {
|
||||
if h.attr != -1 {
|
||||
return false
|
||||
}
|
||||
if node := h.curr.NextSibling; node != nil {
|
||||
h.curr = node
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (h *NodeNavigator) MoveToPrevious() bool {
|
||||
if h.attr != -1 {
|
||||
return false
|
||||
}
|
||||
if node := h.curr.PrevSibling; node != nil {
|
||||
h.curr = node
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (h *NodeNavigator) MoveTo(other xpath.NodeNavigator) bool {
|
||||
node, ok := other.(*NodeNavigator)
|
||||
if !ok || node.root != h.root {
|
||||
return false
|
||||
}
|
||||
|
||||
h.curr = node.curr
|
||||
h.attr = node.attr
|
||||
return true
|
||||
}
|
||||
32
vendor/github.com/antchfx/xpath/.gitignore
generated
vendored
Normal file
32
vendor/github.com/antchfx/xpath/.gitignore
generated
vendored
Normal file
@@ -0,0 +1,32 @@
|
||||
# vscode
|
||||
.vscode
|
||||
debug
|
||||
*.test
|
||||
|
||||
./build
|
||||
|
||||
# Compiled Object files, Static and Dynamic libs (Shared Objects)
|
||||
*.o
|
||||
*.a
|
||||
*.so
|
||||
|
||||
|
||||
# Folders
|
||||
_obj
|
||||
_test
|
||||
|
||||
# Architecture specific extensions/prefixes
|
||||
*.[568vq]
|
||||
[568vq].out
|
||||
|
||||
*.cgo1.go
|
||||
*.cgo2.c
|
||||
_cgo_defun.c
|
||||
_cgo_gotypes.go
|
||||
_cgo_export.*
|
||||
|
||||
_testmain.go
|
||||
|
||||
*.exe
|
||||
*.test
|
||||
*.prof
|
||||
12
vendor/github.com/antchfx/xpath/.travis.yml
generated
vendored
Normal file
12
vendor/github.com/antchfx/xpath/.travis.yml
generated
vendored
Normal file
@@ -0,0 +1,12 @@
|
||||
language: go
|
||||
|
||||
go:
|
||||
- 1.6
|
||||
- 1.9
|
||||
- '1.10'
|
||||
|
||||
install:
|
||||
- go get github.com/mattn/goveralls
|
||||
|
||||
script:
|
||||
- $HOME/gopath/bin/goveralls -service=travis-ci
|
||||
17
vendor/github.com/antchfx/xpath/LICENSE
generated
vendored
Normal file
17
vendor/github.com/antchfx/xpath/LICENSE
generated
vendored
Normal file
@@ -0,0 +1,17 @@
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
170
vendor/github.com/antchfx/xpath/README.md
generated
vendored
Normal file
170
vendor/github.com/antchfx/xpath/README.md
generated
vendored
Normal file
@@ -0,0 +1,170 @@
|
||||
XPath
|
||||
====
|
||||
[](https://godoc.org/github.com/antchfx/xpath)
|
||||
[](https://coveralls.io/github/antchfx/xpath?branch=master)
|
||||
[](https://travis-ci.org/antchfx/xpath)
|
||||
[](https://goreportcard.com/report/github.com/antchfx/xpath)
|
||||
|
||||
XPath is Go package provides selecting nodes from XML, HTML or other documents using XPath expression.
|
||||
|
||||
Implementation
|
||||
===
|
||||
|
||||
- [htmlquery](https://github.com/antchfx/htmlquery) - an XPath query package for HTML document
|
||||
|
||||
- [xmlquery](https://github.com/antchfx/xmlquery) - an XPath query package for XML document.
|
||||
|
||||
- [jsonquery](https://github.com/antchfx/jsonquery) - an XPath query package for JSON document
|
||||
|
||||
Supported Features
|
||||
===
|
||||
|
||||
#### The basic XPath patterns.
|
||||
|
||||
> The basic XPath patterns cover 90% of the cases that most stylesheets will need.
|
||||
|
||||
- `node` : Selects all child elements with nodeName of node.
|
||||
|
||||
- `*` : Selects all child elements.
|
||||
|
||||
- `@attr` : Selects the attribute attr.
|
||||
|
||||
- `@*` : Selects all attributes.
|
||||
|
||||
- `node()` : Matches an org.w3c.dom.Node.
|
||||
|
||||
- `text()` : Matches a org.w3c.dom.Text node.
|
||||
|
||||
- `comment()` : Matches a comment.
|
||||
|
||||
- `.` : Selects the current node.
|
||||
|
||||
- `..` : Selects the parent of current node.
|
||||
|
||||
- `/` : Selects the document node.
|
||||
|
||||
- `a[expr]` : Select only those nodes matching a which also satisfy the expression expr.
|
||||
|
||||
- `a[n]` : Selects the nth matching node matching a When a filter's expression is a number, XPath selects based on position.
|
||||
|
||||
- `a/b` : For each node matching a, add the nodes matching b to the result.
|
||||
|
||||
- `a//b` : For each node matching a, add the descendant nodes matching b to the result.
|
||||
|
||||
- `//b` : Returns elements in the entire document matching b.
|
||||
|
||||
- `a|b` : All nodes matching a or b, union operation(not boolean or).
|
||||
|
||||
- `(a, b, c)` : Evaluates each of its operands and concatenates the resulting sequences, in order, into a single result sequence
|
||||
|
||||
|
||||
#### Node Axes
|
||||
|
||||
- `child::*` : The child axis selects children of the current node.
|
||||
|
||||
- `descendant::*` : The descendant axis selects descendants of the current node. It is equivalent to '//'.
|
||||
|
||||
- `descendant-or-self::*` : Selects descendants including the current node.
|
||||
|
||||
- `attribute::*` : Selects attributes of the current element. It is equivalent to @*
|
||||
|
||||
- `following-sibling::*` : Selects nodes after the current node.
|
||||
|
||||
- `preceding-sibling::*` : Selects nodes before the current node.
|
||||
|
||||
- `following::*` : Selects the first matching node following in document order, excluding descendants.
|
||||
|
||||
- `preceding::*` : Selects the first matching node preceding in document order, excluding ancestors.
|
||||
|
||||
- `parent::*` : Selects the parent if it matches. The '..' pattern from the core is equivalent to 'parent::node()'.
|
||||
|
||||
- `ancestor::*` : Selects matching ancestors.
|
||||
|
||||
- `ancestor-or-self::*` : Selects ancestors including the current node.
|
||||
|
||||
- `self::*` : Selects the current node. '.' is equivalent to 'self::node()'.
|
||||
|
||||
#### Expressions
|
||||
|
||||
The gxpath supported three types: number, boolean, string.
|
||||
|
||||
- `path` : Selects nodes based on the path.
|
||||
|
||||
- `a = b` : Standard comparisons.
|
||||
|
||||
* a = b True if a equals b.
|
||||
* a != b True if a is not equal to b.
|
||||
* a < b True if a is less than b.
|
||||
* a <= b True if a is less than or equal to b.
|
||||
* a > b True if a is greater than b.
|
||||
* a >= b True if a is greater than or equal to b.
|
||||
|
||||
- `a + b` : Arithmetic expressions.
|
||||
|
||||
* `- a` Unary minus
|
||||
* a + b Add
|
||||
* a - b Substract
|
||||
* a * b Multiply
|
||||
* a div b Divide
|
||||
* a mod b Floating point mod, like Java.
|
||||
|
||||
- `a or b` : Boolean `or` operation.
|
||||
|
||||
- `a and b` : Boolean `and` operation.
|
||||
|
||||
- `(expr)` : Parenthesized expressions.
|
||||
|
||||
- `fun(arg1, ..., argn)` : Function calls:
|
||||
|
||||
| Function | Supported |
|
||||
| --- | --- |
|
||||
`boolean()`| ✓ |
|
||||
`ceiling()`| ✓ |
|
||||
`choose()`| ✗ |
|
||||
`concat()`| ✓ |
|
||||
`contains()`| ✓ |
|
||||
`count()`| ✓ |
|
||||
`current()`| ✗ |
|
||||
`document()`| ✗ |
|
||||
`element-available()`| ✗ |
|
||||
`ends-with()`| ✓ |
|
||||
`false()`| ✓ |
|
||||
`floor()`| ✓ |
|
||||
`format-number()`| ✗ |
|
||||
`function-available()`| ✗ |
|
||||
`generate-id()`| ✗ |
|
||||
`id()`| ✗ |
|
||||
`key()`| ✗ |
|
||||
`lang()`| ✗ |
|
||||
`last()`| ✓ |
|
||||
`local-name()`| ✓ |
|
||||
`name()`| ✓ |
|
||||
`namespace-uri()`| ✓ |
|
||||
`normalize-space()`| ✓ |
|
||||
`not()`| ✓ |
|
||||
`number()`| ✓ |
|
||||
`position()`| ✓ |
|
||||
`round()`| ✓ |
|
||||
`starts-with()`| ✓ |
|
||||
`string()`| ✓ |
|
||||
`string-length()`| ✓ |
|
||||
`substring()`| ✓ |
|
||||
`substring-after()`| ✓ |
|
||||
`substring-before()`| ✓ |
|
||||
`sum()`| ✓ |
|
||||
`system-property()`| ✗ |
|
||||
`translate()`| ✓ |
|
||||
`true()`| ✓ |
|
||||
`unparsed-entity-url()` | ✗ |
|
||||
|
||||
Changelogs
|
||||
===
|
||||
|
||||
2019-03-19
|
||||
- optimize XPath `|` operation performance. [#33](https://github.com/antchfx/xpath/issues/33). Tips: suggest split into multiple subquery if you have a lot of `|` operations.
|
||||
|
||||
2019-01-29
|
||||
- improvement `normalize-space` function. [#32](https://github.com/antchfx/xpath/issues/32)
|
||||
|
||||
2018-12-07
|
||||
- supports XPath 2.0 Sequence expressions. [#30](https://github.com/antchfx/xpath/pull/30) by [@minherz](https://github.com/minherz).
|
||||
483
vendor/github.com/antchfx/xpath/build.go
generated
vendored
Normal file
483
vendor/github.com/antchfx/xpath/build.go
generated
vendored
Normal file
@@ -0,0 +1,483 @@
|
||||
package xpath
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
)
|
||||
|
||||
type flag int
|
||||
|
||||
const (
|
||||
noneFlag flag = iota
|
||||
filterFlag
|
||||
)
|
||||
|
||||
// builder provides building an XPath expressions.
|
||||
type builder struct {
|
||||
depth int
|
||||
flag flag
|
||||
firstInput query
|
||||
}
|
||||
|
||||
// axisPredicate creates a predicate to predicating for this axis node.
|
||||
func axisPredicate(root *axisNode) func(NodeNavigator) bool {
|
||||
// get current axix node type.
|
||||
typ := ElementNode
|
||||
switch root.AxeType {
|
||||
case "attribute":
|
||||
typ = AttributeNode
|
||||
case "self", "parent":
|
||||
typ = allNode
|
||||
default:
|
||||
switch root.Prop {
|
||||
case "comment":
|
||||
typ = CommentNode
|
||||
case "text":
|
||||
typ = TextNode
|
||||
// case "processing-instruction":
|
||||
// typ = ProcessingInstructionNode
|
||||
case "node":
|
||||
typ = allNode
|
||||
}
|
||||
}
|
||||
nametest := root.LocalName != "" || root.Prefix != ""
|
||||
predicate := func(n NodeNavigator) bool {
|
||||
if typ == n.NodeType() || typ == allNode || typ == TextNode {
|
||||
if nametest {
|
||||
if root.LocalName == n.LocalName() && root.Prefix == n.Prefix() {
|
||||
return true
|
||||
}
|
||||
} else {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
return predicate
|
||||
}
|
||||
|
||||
// processAxisNode processes a query for the XPath axis node.
|
||||
func (b *builder) processAxisNode(root *axisNode) (query, error) {
|
||||
var (
|
||||
err error
|
||||
qyInput query
|
||||
qyOutput query
|
||||
predicate = axisPredicate(root)
|
||||
)
|
||||
|
||||
if root.Input == nil {
|
||||
qyInput = &contextQuery{}
|
||||
} else {
|
||||
if root.AxeType == "child" && (root.Input.Type() == nodeAxis) {
|
||||
if input := root.Input.(*axisNode); input.AxeType == "descendant-or-self" {
|
||||
var qyGrandInput query
|
||||
if input.Input != nil {
|
||||
qyGrandInput, _ = b.processNode(input.Input)
|
||||
} else {
|
||||
qyGrandInput = &contextQuery{}
|
||||
}
|
||||
qyOutput = &descendantQuery{Input: qyGrandInput, Predicate: predicate, Self: true}
|
||||
return qyOutput, nil
|
||||
}
|
||||
}
|
||||
qyInput, err = b.processNode(root.Input)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
switch root.AxeType {
|
||||
case "ancestor":
|
||||
qyOutput = &ancestorQuery{Input: qyInput, Predicate: predicate}
|
||||
case "ancestor-or-self":
|
||||
qyOutput = &ancestorQuery{Input: qyInput, Predicate: predicate, Self: true}
|
||||
case "attribute":
|
||||
qyOutput = &attributeQuery{Input: qyInput, Predicate: predicate}
|
||||
case "child":
|
||||
filter := func(n NodeNavigator) bool {
|
||||
v := predicate(n)
|
||||
switch root.Prop {
|
||||
case "text":
|
||||
v = v && n.NodeType() == TextNode
|
||||
case "node":
|
||||
v = v && (n.NodeType() == ElementNode || n.NodeType() == TextNode)
|
||||
case "comment":
|
||||
v = v && n.NodeType() == CommentNode
|
||||
}
|
||||
return v
|
||||
}
|
||||
qyOutput = &childQuery{Input: qyInput, Predicate: filter}
|
||||
case "descendant":
|
||||
qyOutput = &descendantQuery{Input: qyInput, Predicate: predicate}
|
||||
case "descendant-or-self":
|
||||
qyOutput = &descendantQuery{Input: qyInput, Predicate: predicate, Self: true}
|
||||
case "following":
|
||||
qyOutput = &followingQuery{Input: qyInput, Predicate: predicate}
|
||||
case "following-sibling":
|
||||
qyOutput = &followingQuery{Input: qyInput, Predicate: predicate, Sibling: true}
|
||||
case "parent":
|
||||
qyOutput = &parentQuery{Input: qyInput, Predicate: predicate}
|
||||
case "preceding":
|
||||
qyOutput = &precedingQuery{Input: qyInput, Predicate: predicate}
|
||||
case "preceding-sibling":
|
||||
qyOutput = &precedingQuery{Input: qyInput, Predicate: predicate, Sibling: true}
|
||||
case "self":
|
||||
qyOutput = &selfQuery{Input: qyInput, Predicate: predicate}
|
||||
case "namespace":
|
||||
// haha,what will you do someting??
|
||||
default:
|
||||
err = fmt.Errorf("unknown axe type: %s", root.AxeType)
|
||||
return nil, err
|
||||
}
|
||||
return qyOutput, nil
|
||||
}
|
||||
|
||||
// processFilterNode builds query for the XPath filter predicate.
|
||||
func (b *builder) processFilterNode(root *filterNode) (query, error) {
|
||||
b.flag |= filterFlag
|
||||
|
||||
qyInput, err := b.processNode(root.Input)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
qyCond, err := b.processNode(root.Condition)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
qyOutput := &filterQuery{Input: qyInput, Predicate: qyCond}
|
||||
return qyOutput, nil
|
||||
}
|
||||
|
||||
// processFunctionNode processes query for the XPath function node.
|
||||
func (b *builder) processFunctionNode(root *functionNode) (query, error) {
|
||||
var qyOutput query
|
||||
switch root.FuncName {
|
||||
case "starts-with":
|
||||
arg1, err := b.processNode(root.Args[0])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
arg2, err := b.processNode(root.Args[1])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
qyOutput = &functionQuery{Input: b.firstInput, Func: startwithFunc(arg1, arg2)}
|
||||
case "ends-with":
|
||||
arg1, err := b.processNode(root.Args[0])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
arg2, err := b.processNode(root.Args[1])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
qyOutput = &functionQuery{Input: b.firstInput, Func: endwithFunc(arg1, arg2)}
|
||||
case "contains":
|
||||
arg1, err := b.processNode(root.Args[0])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
arg2, err := b.processNode(root.Args[1])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
qyOutput = &functionQuery{Input: b.firstInput, Func: containsFunc(arg1, arg2)}
|
||||
case "substring":
|
||||
//substring( string , start [, length] )
|
||||
if len(root.Args) < 2 {
|
||||
return nil, errors.New("xpath: substring function must have at least two parameter")
|
||||
}
|
||||
var (
|
||||
arg1, arg2, arg3 query
|
||||
err error
|
||||
)
|
||||
if arg1, err = b.processNode(root.Args[0]); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if arg2, err = b.processNode(root.Args[1]); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if len(root.Args) == 3 {
|
||||
if arg3, err = b.processNode(root.Args[2]); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
qyOutput = &functionQuery{Input: b.firstInput, Func: substringFunc(arg1, arg2, arg3)}
|
||||
case "substring-before", "substring-after":
|
||||
//substring-xxxx( haystack, needle )
|
||||
if len(root.Args) != 2 {
|
||||
return nil, errors.New("xpath: substring-before function must have two parameters")
|
||||
}
|
||||
var (
|
||||
arg1, arg2 query
|
||||
err error
|
||||
)
|
||||
if arg1, err = b.processNode(root.Args[0]); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if arg2, err = b.processNode(root.Args[1]); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
qyOutput = &functionQuery{
|
||||
Input: b.firstInput,
|
||||
Func: substringIndFunc(arg1, arg2, root.FuncName == "substring-after"),
|
||||
}
|
||||
case "string-length":
|
||||
// string-length( [string] )
|
||||
if len(root.Args) < 1 {
|
||||
return nil, errors.New("xpath: string-length function must have at least one parameter")
|
||||
}
|
||||
arg1, err := b.processNode(root.Args[0])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
qyOutput = &functionQuery{Input: b.firstInput, Func: stringLengthFunc(arg1)}
|
||||
case "normalize-space":
|
||||
if len(root.Args) == 0 {
|
||||
return nil, errors.New("xpath: normalize-space function must have at least one parameter")
|
||||
}
|
||||
argQuery, err := b.processNode(root.Args[0])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
qyOutput = &functionQuery{Input: argQuery, Func: normalizespaceFunc}
|
||||
case "translate":
|
||||
//translate( string , string, string )
|
||||
if len(root.Args) != 3 {
|
||||
return nil, errors.New("xpath: translate function must have three parameters")
|
||||
}
|
||||
var (
|
||||
arg1, arg2, arg3 query
|
||||
err error
|
||||
)
|
||||
if arg1, err = b.processNode(root.Args[0]); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if arg2, err = b.processNode(root.Args[1]); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if arg3, err = b.processNode(root.Args[2]); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
qyOutput = &functionQuery{Input: b.firstInput, Func: translateFunc(arg1, arg2, arg3)}
|
||||
case "not":
|
||||
if len(root.Args) == 0 {
|
||||
return nil, errors.New("xpath: not function must have at least one parameter")
|
||||
}
|
||||
argQuery, err := b.processNode(root.Args[0])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
qyOutput = &functionQuery{Input: argQuery, Func: notFunc}
|
||||
case "name", "local-name", "namespace-uri":
|
||||
inp := b.firstInput
|
||||
if len(root.Args) > 1 {
|
||||
return nil, fmt.Errorf("xpath: %s function must have at most one parameter", root.FuncName)
|
||||
}
|
||||
if len(root.Args) == 1 {
|
||||
argQuery, err := b.processNode(root.Args[0])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
inp = argQuery
|
||||
}
|
||||
f := &functionQuery{Input: inp}
|
||||
switch root.FuncName {
|
||||
case "name":
|
||||
f.Func = nameFunc
|
||||
case "local-name":
|
||||
f.Func = localNameFunc
|
||||
case "namespace-uri":
|
||||
f.Func = namespaceFunc
|
||||
}
|
||||
qyOutput = f
|
||||
case "true", "false":
|
||||
val := root.FuncName == "true"
|
||||
qyOutput = &functionQuery{
|
||||
Input: b.firstInput,
|
||||
Func: func(_ query, _ iterator) interface{} {
|
||||
return val
|
||||
},
|
||||
}
|
||||
case "last":
|
||||
qyOutput = &functionQuery{Input: b.firstInput, Func: lastFunc}
|
||||
case "position":
|
||||
qyOutput = &functionQuery{Input: b.firstInput, Func: positionFunc}
|
||||
case "boolean", "number", "string":
|
||||
inp := b.firstInput
|
||||
if len(root.Args) > 1 {
|
||||
return nil, fmt.Errorf("xpath: %s function must have at most one parameter", root.FuncName)
|
||||
}
|
||||
if len(root.Args) == 1 {
|
||||
argQuery, err := b.processNode(root.Args[0])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
inp = argQuery
|
||||
}
|
||||
f := &functionQuery{Input: inp}
|
||||
switch root.FuncName {
|
||||
case "boolean":
|
||||
f.Func = booleanFunc
|
||||
case "string":
|
||||
f.Func = stringFunc
|
||||
case "number":
|
||||
f.Func = numberFunc
|
||||
}
|
||||
qyOutput = f
|
||||
case "count":
|
||||
//if b.firstInput == nil {
|
||||
// return nil, errors.New("xpath: expression must evaluate to node-set")
|
||||
//}
|
||||
if len(root.Args) == 0 {
|
||||
return nil, fmt.Errorf("xpath: count(node-sets) function must with have parameters node-sets")
|
||||
}
|
||||
argQuery, err := b.processNode(root.Args[0])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
qyOutput = &functionQuery{Input: argQuery, Func: countFunc}
|
||||
case "sum":
|
||||
if len(root.Args) == 0 {
|
||||
return nil, fmt.Errorf("xpath: sum(node-sets) function must with have parameters node-sets")
|
||||
}
|
||||
argQuery, err := b.processNode(root.Args[0])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
qyOutput = &functionQuery{Input: argQuery, Func: sumFunc}
|
||||
case "ceiling", "floor", "round":
|
||||
if len(root.Args) == 0 {
|
||||
return nil, fmt.Errorf("xpath: ceiling(node-sets) function must with have parameters node-sets")
|
||||
}
|
||||
argQuery, err := b.processNode(root.Args[0])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
f := &functionQuery{Input: argQuery}
|
||||
switch root.FuncName {
|
||||
case "ceiling":
|
||||
f.Func = ceilingFunc
|
||||
case "floor":
|
||||
f.Func = floorFunc
|
||||
case "round":
|
||||
f.Func = roundFunc
|
||||
}
|
||||
qyOutput = f
|
||||
case "concat":
|
||||
if len(root.Args) < 2 {
|
||||
return nil, fmt.Errorf("xpath: concat() must have at least two arguments")
|
||||
}
|
||||
var args []query
|
||||
for _, v := range root.Args {
|
||||
q, err := b.processNode(v)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
args = append(args, q)
|
||||
}
|
||||
qyOutput = &functionQuery{Input: b.firstInput, Func: concatFunc(args...)}
|
||||
default:
|
||||
return nil, fmt.Errorf("not yet support this function %s()", root.FuncName)
|
||||
}
|
||||
return qyOutput, nil
|
||||
}
|
||||
|
||||
func (b *builder) processOperatorNode(root *operatorNode) (query, error) {
|
||||
left, err := b.processNode(root.Left)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
right, err := b.processNode(root.Right)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
var qyOutput query
|
||||
switch root.Op {
|
||||
case "+", "-", "div", "mod": // Numeric operator
|
||||
var exprFunc func(interface{}, interface{}) interface{}
|
||||
switch root.Op {
|
||||
case "+":
|
||||
exprFunc = plusFunc
|
||||
case "-":
|
||||
exprFunc = minusFunc
|
||||
case "div":
|
||||
exprFunc = divFunc
|
||||
case "mod":
|
||||
exprFunc = modFunc
|
||||
}
|
||||
qyOutput = &numericQuery{Left: left, Right: right, Do: exprFunc}
|
||||
case "=", ">", ">=", "<", "<=", "!=":
|
||||
var exprFunc func(iterator, interface{}, interface{}) interface{}
|
||||
switch root.Op {
|
||||
case "=":
|
||||
exprFunc = eqFunc
|
||||
case ">":
|
||||
exprFunc = gtFunc
|
||||
case ">=":
|
||||
exprFunc = geFunc
|
||||
case "<":
|
||||
exprFunc = ltFunc
|
||||
case "<=":
|
||||
exprFunc = leFunc
|
||||
case "!=":
|
||||
exprFunc = neFunc
|
||||
}
|
||||
qyOutput = &logicalQuery{Left: left, Right: right, Do: exprFunc}
|
||||
case "or", "and":
|
||||
isOr := false
|
||||
if root.Op == "or" {
|
||||
isOr = true
|
||||
}
|
||||
qyOutput = &booleanQuery{Left: left, Right: right, IsOr: isOr}
|
||||
case "|":
|
||||
qyOutput = &unionQuery{Left: left, Right: right}
|
||||
}
|
||||
return qyOutput, nil
|
||||
}
|
||||
|
||||
func (b *builder) processNode(root node) (q query, err error) {
|
||||
if b.depth = b.depth + 1; b.depth > 1024 {
|
||||
err = errors.New("the xpath expressions is too complex")
|
||||
return
|
||||
}
|
||||
|
||||
switch root.Type() {
|
||||
case nodeConstantOperand:
|
||||
n := root.(*operandNode)
|
||||
q = &constantQuery{Val: n.Val}
|
||||
case nodeRoot:
|
||||
q = &contextQuery{Root: true}
|
||||
case nodeAxis:
|
||||
q, err = b.processAxisNode(root.(*axisNode))
|
||||
b.firstInput = q
|
||||
case nodeFilter:
|
||||
q, err = b.processFilterNode(root.(*filterNode))
|
||||
case nodeFunction:
|
||||
q, err = b.processFunctionNode(root.(*functionNode))
|
||||
case nodeOperator:
|
||||
q, err = b.processOperatorNode(root.(*operatorNode))
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// build builds a specified XPath expressions expr.
|
||||
func build(expr string) (q query, err error) {
|
||||
defer func() {
|
||||
if e := recover(); e != nil {
|
||||
switch x := e.(type) {
|
||||
case string:
|
||||
err = errors.New(x)
|
||||
case error:
|
||||
err = x
|
||||
default:
|
||||
err = errors.New("unknown panic")
|
||||
}
|
||||
}
|
||||
}()
|
||||
root := parse(expr)
|
||||
b := &builder{}
|
||||
return b.processNode(root)
|
||||
}
|
||||
492
vendor/github.com/antchfx/xpath/func.go
generated
vendored
Normal file
492
vendor/github.com/antchfx/xpath/func.go
generated
vendored
Normal file
@@ -0,0 +1,492 @@
|
||||
package xpath
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"math"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// The XPath function list.
|
||||
|
||||
func predicate(q query) func(NodeNavigator) bool {
|
||||
type Predicater interface {
|
||||
Test(NodeNavigator) bool
|
||||
}
|
||||
if p, ok := q.(Predicater); ok {
|
||||
return p.Test
|
||||
}
|
||||
return func(NodeNavigator) bool { return true }
|
||||
}
|
||||
|
||||
// positionFunc is a XPath Node Set functions position().
|
||||
func positionFunc(q query, t iterator) interface{} {
|
||||
var (
|
||||
count = 1
|
||||
node = t.Current()
|
||||
)
|
||||
test := predicate(q)
|
||||
for node.MoveToPrevious() {
|
||||
if test(node) {
|
||||
count++
|
||||
}
|
||||
}
|
||||
return float64(count)
|
||||
}
|
||||
|
||||
// lastFunc is a XPath Node Set functions last().
|
||||
func lastFunc(q query, t iterator) interface{} {
|
||||
var (
|
||||
count = 0
|
||||
node = t.Current()
|
||||
)
|
||||
node.MoveToFirst()
|
||||
test := predicate(q)
|
||||
for {
|
||||
if test(node) {
|
||||
count++
|
||||
}
|
||||
if !node.MoveToNext() {
|
||||
break
|
||||
}
|
||||
}
|
||||
return float64(count)
|
||||
}
|
||||
|
||||
// countFunc is a XPath Node Set functions count(node-set).
|
||||
func countFunc(q query, t iterator) interface{} {
|
||||
var count = 0
|
||||
test := predicate(q)
|
||||
switch typ := q.Evaluate(t).(type) {
|
||||
case query:
|
||||
for node := typ.Select(t); node != nil; node = typ.Select(t) {
|
||||
if test(node) {
|
||||
count++
|
||||
}
|
||||
}
|
||||
}
|
||||
return float64(count)
|
||||
}
|
||||
|
||||
// sumFunc is a XPath Node Set functions sum(node-set).
|
||||
func sumFunc(q query, t iterator) interface{} {
|
||||
var sum float64
|
||||
switch typ := q.Evaluate(t).(type) {
|
||||
case query:
|
||||
for node := typ.Select(t); node != nil; node = typ.Select(t) {
|
||||
if v, err := strconv.ParseFloat(node.Value(), 64); err == nil {
|
||||
sum += v
|
||||
}
|
||||
}
|
||||
case float64:
|
||||
sum = typ
|
||||
case string:
|
||||
v, err := strconv.ParseFloat(typ, 64)
|
||||
if err != nil {
|
||||
panic(errors.New("sum() function argument type must be a node-set or number"))
|
||||
}
|
||||
sum = v
|
||||
}
|
||||
return sum
|
||||
}
|
||||
|
||||
func asNumber(t iterator, o interface{}) float64 {
|
||||
switch typ := o.(type) {
|
||||
case query:
|
||||
node := typ.Select(t)
|
||||
if node == nil {
|
||||
return float64(0)
|
||||
}
|
||||
if v, err := strconv.ParseFloat(node.Value(), 64); err == nil {
|
||||
return v
|
||||
}
|
||||
case float64:
|
||||
return typ
|
||||
case string:
|
||||
v, err := strconv.ParseFloat(typ, 64)
|
||||
if err != nil {
|
||||
panic(errors.New("ceiling() function argument type must be a node-set or number"))
|
||||
}
|
||||
return v
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// ceilingFunc is a XPath Node Set functions ceiling(node-set).
|
||||
func ceilingFunc(q query, t iterator) interface{} {
|
||||
val := asNumber(t, q.Evaluate(t))
|
||||
return math.Ceil(val)
|
||||
}
|
||||
|
||||
// floorFunc is a XPath Node Set functions floor(node-set).
|
||||
func floorFunc(q query, t iterator) interface{} {
|
||||
val := asNumber(t, q.Evaluate(t))
|
||||
return math.Floor(val)
|
||||
}
|
||||
|
||||
// roundFunc is a XPath Node Set functions round(node-set).
|
||||
func roundFunc(q query, t iterator) interface{} {
|
||||
val := asNumber(t, q.Evaluate(t))
|
||||
//return math.Round(val)
|
||||
return round(val)
|
||||
}
|
||||
|
||||
// nameFunc is a XPath functions name([node-set]).
|
||||
func nameFunc(q query, t iterator) interface{} {
|
||||
v := q.Select(t)
|
||||
if v == nil {
|
||||
return ""
|
||||
}
|
||||
ns := v.Prefix()
|
||||
if ns == "" {
|
||||
return v.LocalName()
|
||||
}
|
||||
return ns + ":" + v.LocalName()
|
||||
}
|
||||
|
||||
// localNameFunc is a XPath functions local-name([node-set]).
|
||||
func localNameFunc(q query, t iterator) interface{} {
|
||||
v := q.Select(t)
|
||||
if v == nil {
|
||||
return ""
|
||||
}
|
||||
return v.LocalName()
|
||||
}
|
||||
|
||||
// namespaceFunc is a XPath functions namespace-uri([node-set]).
|
||||
func namespaceFunc(q query, t iterator) interface{} {
|
||||
v := q.Select(t)
|
||||
if v == nil {
|
||||
return ""
|
||||
}
|
||||
// fix about namespace-uri() bug: https://github.com/antchfx/xmlquery/issues/22
|
||||
// TODO: In the next version, add NamespaceURL() to the NodeNavigator interface.
|
||||
type namespaceURL interface {
|
||||
NamespaceURL() string
|
||||
}
|
||||
if f, ok := v.(namespaceURL); ok {
|
||||
return f.NamespaceURL()
|
||||
}
|
||||
return v.Prefix()
|
||||
}
|
||||
|
||||
func asBool(t iterator, v interface{}) bool {
|
||||
switch v := v.(type) {
|
||||
case nil:
|
||||
return false
|
||||
case *NodeIterator:
|
||||
return v.MoveNext()
|
||||
case bool:
|
||||
return bool(v)
|
||||
case float64:
|
||||
return v != 0
|
||||
case string:
|
||||
return v != ""
|
||||
case query:
|
||||
return v.Select(t) != nil
|
||||
default:
|
||||
panic(fmt.Errorf("unexpected type: %T", v))
|
||||
}
|
||||
}
|
||||
|
||||
func asString(t iterator, v interface{}) string {
|
||||
switch v := v.(type) {
|
||||
case nil:
|
||||
return ""
|
||||
case bool:
|
||||
if v {
|
||||
return "true"
|
||||
}
|
||||
return "false"
|
||||
case float64:
|
||||
return strconv.FormatFloat(v, 'g', -1, 64)
|
||||
case string:
|
||||
return v
|
||||
case query:
|
||||
node := v.Select(t)
|
||||
if node == nil {
|
||||
return ""
|
||||
}
|
||||
return node.Value()
|
||||
default:
|
||||
panic(fmt.Errorf("unexpected type: %T", v))
|
||||
}
|
||||
}
|
||||
|
||||
// booleanFunc is a XPath functions boolean([node-set]).
|
||||
func booleanFunc(q query, t iterator) interface{} {
|
||||
v := q.Evaluate(t)
|
||||
return asBool(t, v)
|
||||
}
|
||||
|
||||
// numberFunc is a XPath functions number([node-set]).
|
||||
func numberFunc(q query, t iterator) interface{} {
|
||||
v := q.Evaluate(t)
|
||||
return asNumber(t, v)
|
||||
}
|
||||
|
||||
// stringFunc is a XPath functions string([node-set]).
|
||||
func stringFunc(q query, t iterator) interface{} {
|
||||
v := q.Evaluate(t)
|
||||
return asString(t, v)
|
||||
}
|
||||
|
||||
// startwithFunc is a XPath functions starts-with(string, string).
|
||||
func startwithFunc(arg1, arg2 query) func(query, iterator) interface{} {
|
||||
return func(q query, t iterator) interface{} {
|
||||
var (
|
||||
m, n string
|
||||
ok bool
|
||||
)
|
||||
switch typ := arg1.Evaluate(t).(type) {
|
||||
case string:
|
||||
m = typ
|
||||
case query:
|
||||
node := typ.Select(t)
|
||||
if node == nil {
|
||||
return false
|
||||
}
|
||||
m = node.Value()
|
||||
default:
|
||||
panic(errors.New("starts-with() function argument type must be string"))
|
||||
}
|
||||
n, ok = arg2.Evaluate(t).(string)
|
||||
if !ok {
|
||||
panic(errors.New("starts-with() function argument type must be string"))
|
||||
}
|
||||
return strings.HasPrefix(m, n)
|
||||
}
|
||||
}
|
||||
|
||||
// endwithFunc is a XPath functions ends-with(string, string).
|
||||
func endwithFunc(arg1, arg2 query) func(query, iterator) interface{} {
|
||||
return func(q query, t iterator) interface{} {
|
||||
var (
|
||||
m, n string
|
||||
ok bool
|
||||
)
|
||||
switch typ := arg1.Evaluate(t).(type) {
|
||||
case string:
|
||||
m = typ
|
||||
case query:
|
||||
node := typ.Select(t)
|
||||
if node == nil {
|
||||
return false
|
||||
}
|
||||
m = node.Value()
|
||||
default:
|
||||
panic(errors.New("ends-with() function argument type must be string"))
|
||||
}
|
||||
n, ok = arg2.Evaluate(t).(string)
|
||||
if !ok {
|
||||
panic(errors.New("ends-with() function argument type must be string"))
|
||||
}
|
||||
return strings.HasSuffix(m, n)
|
||||
}
|
||||
}
|
||||
|
||||
// containsFunc is a XPath functions contains(string or @attr, string).
|
||||
func containsFunc(arg1, arg2 query) func(query, iterator) interface{} {
|
||||
return func(q query, t iterator) interface{} {
|
||||
var (
|
||||
m, n string
|
||||
ok bool
|
||||
)
|
||||
|
||||
switch typ := arg1.Evaluate(t).(type) {
|
||||
case string:
|
||||
m = typ
|
||||
case query:
|
||||
node := typ.Select(t)
|
||||
if node == nil {
|
||||
return false
|
||||
}
|
||||
m = node.Value()
|
||||
default:
|
||||
panic(errors.New("contains() function argument type must be string"))
|
||||
}
|
||||
|
||||
n, ok = arg2.Evaluate(t).(string)
|
||||
if !ok {
|
||||
panic(errors.New("contains() function argument type must be string"))
|
||||
}
|
||||
|
||||
return strings.Contains(m, n)
|
||||
}
|
||||
}
|
||||
|
||||
var (
|
||||
regnewline = regexp.MustCompile(`[\r\n\t]`)
|
||||
regseqspace = regexp.MustCompile(`\s{2,}`)
|
||||
)
|
||||
|
||||
// normalizespaceFunc is XPath functions normalize-space(string?)
|
||||
func normalizespaceFunc(q query, t iterator) interface{} {
|
||||
var m string
|
||||
switch typ := q.Evaluate(t).(type) {
|
||||
case string:
|
||||
m = typ
|
||||
case query:
|
||||
node := typ.Select(t)
|
||||
if node == nil {
|
||||
return ""
|
||||
}
|
||||
m = node.Value()
|
||||
}
|
||||
m = strings.TrimSpace(m)
|
||||
m = regnewline.ReplaceAllString(m, " ")
|
||||
m = regseqspace.ReplaceAllString(m, " ")
|
||||
return m
|
||||
}
|
||||
|
||||
// substringFunc is XPath functions substring function returns a part of a given string.
|
||||
func substringFunc(arg1, arg2, arg3 query) func(query, iterator) interface{} {
|
||||
return func(q query, t iterator) interface{} {
|
||||
var m string
|
||||
switch typ := arg1.Evaluate(t).(type) {
|
||||
case string:
|
||||
m = typ
|
||||
case query:
|
||||
node := typ.Select(t)
|
||||
if node == nil {
|
||||
return ""
|
||||
}
|
||||
m = node.Value()
|
||||
}
|
||||
|
||||
var start, length float64
|
||||
var ok bool
|
||||
|
||||
if start, ok = arg2.Evaluate(t).(float64); !ok {
|
||||
panic(errors.New("substring() function first argument type must be int"))
|
||||
} else if start < 1 {
|
||||
panic(errors.New("substring() function first argument type must be >= 1"))
|
||||
}
|
||||
start--
|
||||
if arg3 != nil {
|
||||
if length, ok = arg3.Evaluate(t).(float64); !ok {
|
||||
panic(errors.New("substring() function second argument type must be int"))
|
||||
}
|
||||
}
|
||||
if (len(m) - int(start)) < int(length) {
|
||||
panic(errors.New("substring() function start and length argument out of range"))
|
||||
}
|
||||
if length > 0 {
|
||||
return m[int(start):int(length+start)]
|
||||
}
|
||||
return m[int(start):]
|
||||
}
|
||||
}
|
||||
|
||||
// substringIndFunc is XPath functions substring-before/substring-after function returns a part of a given string.
|
||||
func substringIndFunc(arg1, arg2 query, after bool) func(query, iterator) interface{} {
|
||||
return func(q query, t iterator) interface{} {
|
||||
var str string
|
||||
switch v := arg1.Evaluate(t).(type) {
|
||||
case string:
|
||||
str = v
|
||||
case query:
|
||||
node := v.Select(t)
|
||||
if node == nil {
|
||||
return ""
|
||||
}
|
||||
str = node.Value()
|
||||
}
|
||||
var word string
|
||||
switch v := arg2.Evaluate(t).(type) {
|
||||
case string:
|
||||
word = v
|
||||
case query:
|
||||
node := v.Select(t)
|
||||
if node == nil {
|
||||
return ""
|
||||
}
|
||||
word = node.Value()
|
||||
}
|
||||
if word == "" {
|
||||
return ""
|
||||
}
|
||||
|
||||
i := strings.Index(str, word)
|
||||
if i < 0 {
|
||||
return ""
|
||||
}
|
||||
if after {
|
||||
return str[i+len(word):]
|
||||
}
|
||||
return str[:i]
|
||||
}
|
||||
}
|
||||
|
||||
// stringLengthFunc is XPATH string-length( [string] ) function that returns a number
|
||||
// equal to the number of characters in a given string.
|
||||
func stringLengthFunc(arg1 query) func(query, iterator) interface{} {
|
||||
return func(q query, t iterator) interface{} {
|
||||
switch v := arg1.Evaluate(t).(type) {
|
||||
case string:
|
||||
return float64(len(v))
|
||||
case query:
|
||||
node := v.Select(t)
|
||||
if node == nil {
|
||||
break
|
||||
}
|
||||
return float64(len(node.Value()))
|
||||
}
|
||||
return float64(0)
|
||||
}
|
||||
}
|
||||
|
||||
// translateFunc is XPath functions translate() function returns a replaced string.
|
||||
func translateFunc(arg1, arg2, arg3 query) func(query, iterator) interface{} {
|
||||
return func(q query, t iterator) interface{} {
|
||||
str := asString(t, arg1.Evaluate(t))
|
||||
src := asString(t, arg2.Evaluate(t))
|
||||
dst := asString(t, arg3.Evaluate(t))
|
||||
|
||||
var replace []string
|
||||
for i, s := range src {
|
||||
d := ""
|
||||
if i < len(dst) {
|
||||
d = string(dst[i])
|
||||
}
|
||||
replace = append(replace, string(s), d)
|
||||
}
|
||||
return strings.NewReplacer(replace...).Replace(str)
|
||||
}
|
||||
}
|
||||
|
||||
// notFunc is XPATH functions not(expression) function operation.
|
||||
func notFunc(q query, t iterator) interface{} {
|
||||
switch v := q.Evaluate(t).(type) {
|
||||
case bool:
|
||||
return !v
|
||||
case query:
|
||||
node := v.Select(t)
|
||||
return node == nil
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// concatFunc is the concat function concatenates two or more
|
||||
// strings and returns the resulting string.
|
||||
// concat( string1 , string2 [, stringn]* )
|
||||
func concatFunc(args ...query) func(query, iterator) interface{} {
|
||||
return func(q query, t iterator) interface{} {
|
||||
var a []string
|
||||
for _, v := range args {
|
||||
switch v := v.Evaluate(t).(type) {
|
||||
case string:
|
||||
a = append(a, v)
|
||||
case query:
|
||||
node := v.Select(t)
|
||||
if node != nil {
|
||||
a = append(a, node.Value())
|
||||
}
|
||||
}
|
||||
}
|
||||
return strings.Join(a, "")
|
||||
}
|
||||
}
|
||||
9
vendor/github.com/antchfx/xpath/func_go110.go
generated
vendored
Normal file
9
vendor/github.com/antchfx/xpath/func_go110.go
generated
vendored
Normal file
@@ -0,0 +1,9 @@
|
||||
// +build go1.10
|
||||
|
||||
package xpath
|
||||
|
||||
import "math"
|
||||
|
||||
func round(f float64) int {
|
||||
return int(math.Round(f))
|
||||
}
|
||||
15
vendor/github.com/antchfx/xpath/func_pre_go110.go
generated
vendored
Normal file
15
vendor/github.com/antchfx/xpath/func_pre_go110.go
generated
vendored
Normal file
@@ -0,0 +1,15 @@
|
||||
// +build !go1.10
|
||||
|
||||
package xpath
|
||||
|
||||
import "math"
|
||||
|
||||
// math.Round() is supported by Go 1.10+,
|
||||
// This method just compatible for version <1.10.
|
||||
// https://github.com/golang/go/issues/20100
|
||||
func round(f float64) int {
|
||||
if math.Abs(f) < 0.5 {
|
||||
return 0
|
||||
}
|
||||
return int(f + math.Copysign(0.5, f))
|
||||
}
|
||||
295
vendor/github.com/antchfx/xpath/operator.go
generated
vendored
Normal file
295
vendor/github.com/antchfx/xpath/operator.go
generated
vendored
Normal file
@@ -0,0 +1,295 @@
|
||||
package xpath
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"reflect"
|
||||
"strconv"
|
||||
)
|
||||
|
||||
// The XPath number operator function list.
|
||||
|
||||
// valueType is a return value type.
|
||||
type valueType int
|
||||
|
||||
const (
|
||||
booleanType valueType = iota
|
||||
numberType
|
||||
stringType
|
||||
nodeSetType
|
||||
)
|
||||
|
||||
func getValueType(i interface{}) valueType {
|
||||
v := reflect.ValueOf(i)
|
||||
switch v.Kind() {
|
||||
case reflect.Float64:
|
||||
return numberType
|
||||
case reflect.String:
|
||||
return stringType
|
||||
case reflect.Bool:
|
||||
return booleanType
|
||||
default:
|
||||
if _, ok := i.(query); ok {
|
||||
return nodeSetType
|
||||
}
|
||||
}
|
||||
panic(fmt.Errorf("xpath unknown value type: %v", v.Kind()))
|
||||
}
|
||||
|
||||
type logical func(iterator, string, interface{}, interface{}) bool
|
||||
|
||||
var logicalFuncs = [][]logical{
|
||||
{cmpBooleanBoolean, nil, nil, nil},
|
||||
{nil, cmpNumericNumeric, cmpNumericString, cmpNumericNodeSet},
|
||||
{nil, cmpStringNumeric, cmpStringString, cmpStringNodeSet},
|
||||
{nil, cmpNodeSetNumeric, cmpNodeSetString, cmpNodeSetNodeSet},
|
||||
}
|
||||
|
||||
// number vs number
|
||||
func cmpNumberNumberF(op string, a, b float64) bool {
|
||||
switch op {
|
||||
case "=":
|
||||
return a == b
|
||||
case ">":
|
||||
return a > b
|
||||
case "<":
|
||||
return a < b
|
||||
case ">=":
|
||||
return a >= b
|
||||
case "<=":
|
||||
return a <= b
|
||||
case "!=":
|
||||
return a != b
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// string vs string
|
||||
func cmpStringStringF(op string, a, b string) bool {
|
||||
switch op {
|
||||
case "=":
|
||||
return a == b
|
||||
case ">":
|
||||
return a > b
|
||||
case "<":
|
||||
return a < b
|
||||
case ">=":
|
||||
return a >= b
|
||||
case "<=":
|
||||
return a <= b
|
||||
case "!=":
|
||||
return a != b
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func cmpBooleanBooleanF(op string, a, b bool) bool {
|
||||
switch op {
|
||||
case "or":
|
||||
return a || b
|
||||
case "and":
|
||||
return a && b
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func cmpNumericNumeric(t iterator, op string, m, n interface{}) bool {
|
||||
a := m.(float64)
|
||||
b := n.(float64)
|
||||
return cmpNumberNumberF(op, a, b)
|
||||
}
|
||||
|
||||
func cmpNumericString(t iterator, op string, m, n interface{}) bool {
|
||||
a := m.(float64)
|
||||
b := n.(string)
|
||||
num, err := strconv.ParseFloat(b, 64)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return cmpNumberNumberF(op, a, num)
|
||||
}
|
||||
|
||||
func cmpNumericNodeSet(t iterator, op string, m, n interface{}) bool {
|
||||
a := m.(float64)
|
||||
b := n.(query)
|
||||
|
||||
for {
|
||||
node := b.Select(t)
|
||||
if node == nil {
|
||||
break
|
||||
}
|
||||
num, err := strconv.ParseFloat(node.Value(), 64)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
if cmpNumberNumberF(op, a, num) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func cmpNodeSetNumeric(t iterator, op string, m, n interface{}) bool {
|
||||
a := m.(query)
|
||||
b := n.(float64)
|
||||
for {
|
||||
node := a.Select(t)
|
||||
if node == nil {
|
||||
break
|
||||
}
|
||||
num, err := strconv.ParseFloat(node.Value(), 64)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
if cmpNumberNumberF(op, num, b) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func cmpNodeSetString(t iterator, op string, m, n interface{}) bool {
|
||||
a := m.(query)
|
||||
b := n.(string)
|
||||
for {
|
||||
node := a.Select(t)
|
||||
if node == nil {
|
||||
break
|
||||
}
|
||||
if cmpStringStringF(op, b, node.Value()) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func cmpNodeSetNodeSet(t iterator, op string, m, n interface{}) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func cmpStringNumeric(t iterator, op string, m, n interface{}) bool {
|
||||
a := m.(string)
|
||||
b := n.(float64)
|
||||
num, err := strconv.ParseFloat(a, 64)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return cmpNumberNumberF(op, b, num)
|
||||
}
|
||||
|
||||
func cmpStringString(t iterator, op string, m, n interface{}) bool {
|
||||
a := m.(string)
|
||||
b := n.(string)
|
||||
return cmpStringStringF(op, a, b)
|
||||
}
|
||||
|
||||
func cmpStringNodeSet(t iterator, op string, m, n interface{}) bool {
|
||||
a := m.(string)
|
||||
b := n.(query)
|
||||
for {
|
||||
node := b.Select(t)
|
||||
if node == nil {
|
||||
break
|
||||
}
|
||||
if cmpStringStringF(op, a, node.Value()) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func cmpBooleanBoolean(t iterator, op string, m, n interface{}) bool {
|
||||
a := m.(bool)
|
||||
b := n.(bool)
|
||||
return cmpBooleanBooleanF(op, a, b)
|
||||
}
|
||||
|
||||
// eqFunc is an `=` operator.
|
||||
func eqFunc(t iterator, m, n interface{}) interface{} {
|
||||
t1 := getValueType(m)
|
||||
t2 := getValueType(n)
|
||||
return logicalFuncs[t1][t2](t, "=", m, n)
|
||||
}
|
||||
|
||||
// gtFunc is an `>` operator.
|
||||
func gtFunc(t iterator, m, n interface{}) interface{} {
|
||||
t1 := getValueType(m)
|
||||
t2 := getValueType(n)
|
||||
return logicalFuncs[t1][t2](t, ">", m, n)
|
||||
}
|
||||
|
||||
// geFunc is an `>=` operator.
|
||||
func geFunc(t iterator, m, n interface{}) interface{} {
|
||||
t1 := getValueType(m)
|
||||
t2 := getValueType(n)
|
||||
return logicalFuncs[t1][t2](t, ">=", m, n)
|
||||
}
|
||||
|
||||
// ltFunc is an `<` operator.
|
||||
func ltFunc(t iterator, m, n interface{}) interface{} {
|
||||
t1 := getValueType(m)
|
||||
t2 := getValueType(n)
|
||||
return logicalFuncs[t1][t2](t, "<", m, n)
|
||||
}
|
||||
|
||||
// leFunc is an `<=` operator.
|
||||
func leFunc(t iterator, m, n interface{}) interface{} {
|
||||
t1 := getValueType(m)
|
||||
t2 := getValueType(n)
|
||||
return logicalFuncs[t1][t2](t, "<=", m, n)
|
||||
}
|
||||
|
||||
// neFunc is an `!=` operator.
|
||||
func neFunc(t iterator, m, n interface{}) interface{} {
|
||||
t1 := getValueType(m)
|
||||
t2 := getValueType(n)
|
||||
return logicalFuncs[t1][t2](t, "!=", m, n)
|
||||
}
|
||||
|
||||
// orFunc is an `or` operator.
|
||||
var orFunc = func(t iterator, m, n interface{}) interface{} {
|
||||
t1 := getValueType(m)
|
||||
t2 := getValueType(n)
|
||||
return logicalFuncs[t1][t2](t, "or", m, n)
|
||||
}
|
||||
|
||||
func numericExpr(m, n interface{}, cb func(float64, float64) float64) float64 {
|
||||
typ := reflect.TypeOf(float64(0))
|
||||
a := reflect.ValueOf(m).Convert(typ)
|
||||
b := reflect.ValueOf(n).Convert(typ)
|
||||
return cb(a.Float(), b.Float())
|
||||
}
|
||||
|
||||
// plusFunc is an `+` operator.
|
||||
var plusFunc = func(m, n interface{}) interface{} {
|
||||
return numericExpr(m, n, func(a, b float64) float64 {
|
||||
return a + b
|
||||
})
|
||||
}
|
||||
|
||||
// minusFunc is an `-` operator.
|
||||
var minusFunc = func(m, n interface{}) interface{} {
|
||||
return numericExpr(m, n, func(a, b float64) float64 {
|
||||
return a - b
|
||||
})
|
||||
}
|
||||
|
||||
// mulFunc is an `*` operator.
|
||||
var mulFunc = func(m, n interface{}) interface{} {
|
||||
return numericExpr(m, n, func(a, b float64) float64 {
|
||||
return a * b
|
||||
})
|
||||
}
|
||||
|
||||
// divFunc is an `DIV` operator.
|
||||
var divFunc = func(m, n interface{}) interface{} {
|
||||
return numericExpr(m, n, func(a, b float64) float64 {
|
||||
return a / b
|
||||
})
|
||||
}
|
||||
|
||||
// modFunc is an 'MOD' operator.
|
||||
var modFunc = func(m, n interface{}) interface{} {
|
||||
return numericExpr(m, n, func(a, b float64) float64 {
|
||||
return float64(int(a) % int(b))
|
||||
})
|
||||
}
|
||||
1186
vendor/github.com/antchfx/xpath/parse.go
generated
vendored
Normal file
1186
vendor/github.com/antchfx/xpath/parse.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load Diff
862
vendor/github.com/antchfx/xpath/query.go
generated
vendored
Normal file
862
vendor/github.com/antchfx/xpath/query.go
generated
vendored
Normal file
@@ -0,0 +1,862 @@
|
||||
package xpath
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"hash/fnv"
|
||||
"reflect"
|
||||
)
|
||||
|
||||
type iterator interface {
|
||||
Current() NodeNavigator
|
||||
}
|
||||
|
||||
// An XPath query interface.
|
||||
type query interface {
|
||||
// Select traversing iterator returns a query matched node NodeNavigator.
|
||||
Select(iterator) NodeNavigator
|
||||
|
||||
// Evaluate evaluates query and returns values of the current query.
|
||||
Evaluate(iterator) interface{}
|
||||
|
||||
Clone() query
|
||||
}
|
||||
|
||||
// nopQuery is an empty query that always return nil for any query.
|
||||
type nopQuery struct {
|
||||
query
|
||||
}
|
||||
|
||||
func (nopQuery) Select(iterator) NodeNavigator { return nil }
|
||||
|
||||
func (nopQuery) Evaluate(iterator) interface{} { return nil }
|
||||
|
||||
func (nopQuery) Clone() query { return nopQuery{} }
|
||||
|
||||
// contextQuery is returns current node on the iterator object query.
|
||||
type contextQuery struct {
|
||||
count int
|
||||
Root bool // Moving to root-level node in the current context iterator.
|
||||
}
|
||||
|
||||
func (c *contextQuery) Select(t iterator) (n NodeNavigator) {
|
||||
if c.count == 0 {
|
||||
c.count++
|
||||
n = t.Current().Copy()
|
||||
if c.Root {
|
||||
n.MoveToRoot()
|
||||
}
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
func (c *contextQuery) Evaluate(iterator) interface{} {
|
||||
c.count = 0
|
||||
return c
|
||||
}
|
||||
|
||||
func (c *contextQuery) Clone() query {
|
||||
return &contextQuery{count: 0, Root: c.Root}
|
||||
}
|
||||
|
||||
// ancestorQuery is an XPath ancestor node query.(ancestor::*|ancestor-self::*)
|
||||
type ancestorQuery struct {
|
||||
iterator func() NodeNavigator
|
||||
|
||||
Self bool
|
||||
Input query
|
||||
Predicate func(NodeNavigator) bool
|
||||
}
|
||||
|
||||
func (a *ancestorQuery) Select(t iterator) NodeNavigator {
|
||||
for {
|
||||
if a.iterator == nil {
|
||||
node := a.Input.Select(t)
|
||||
if node == nil {
|
||||
return nil
|
||||
}
|
||||
first := true
|
||||
a.iterator = func() NodeNavigator {
|
||||
if first && a.Self {
|
||||
first = false
|
||||
if a.Predicate(node) {
|
||||
return node
|
||||
}
|
||||
}
|
||||
for node.MoveToParent() {
|
||||
if !a.Predicate(node) {
|
||||
continue
|
||||
}
|
||||
return node
|
||||
}
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
if node := a.iterator(); node != nil {
|
||||
return node
|
||||
}
|
||||
a.iterator = nil
|
||||
}
|
||||
}
|
||||
|
||||
func (a *ancestorQuery) Evaluate(t iterator) interface{} {
|
||||
a.Input.Evaluate(t)
|
||||
a.iterator = nil
|
||||
return a
|
||||
}
|
||||
|
||||
func (a *ancestorQuery) Test(n NodeNavigator) bool {
|
||||
return a.Predicate(n)
|
||||
}
|
||||
|
||||
func (a *ancestorQuery) Clone() query {
|
||||
return &ancestorQuery{Self: a.Self, Input: a.Input.Clone(), Predicate: a.Predicate}
|
||||
}
|
||||
|
||||
// attributeQuery is an XPath attribute node query.(@*)
|
||||
type attributeQuery struct {
|
||||
iterator func() NodeNavigator
|
||||
|
||||
Input query
|
||||
Predicate func(NodeNavigator) bool
|
||||
}
|
||||
|
||||
func (a *attributeQuery) Select(t iterator) NodeNavigator {
|
||||
for {
|
||||
if a.iterator == nil {
|
||||
node := a.Input.Select(t)
|
||||
if node == nil {
|
||||
return nil
|
||||
}
|
||||
node = node.Copy()
|
||||
a.iterator = func() NodeNavigator {
|
||||
for {
|
||||
onAttr := node.MoveToNextAttribute()
|
||||
if !onAttr {
|
||||
return nil
|
||||
}
|
||||
if a.Predicate(node) {
|
||||
return node
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if node := a.iterator(); node != nil {
|
||||
return node
|
||||
}
|
||||
a.iterator = nil
|
||||
}
|
||||
}
|
||||
|
||||
func (a *attributeQuery) Evaluate(t iterator) interface{} {
|
||||
a.Input.Evaluate(t)
|
||||
a.iterator = nil
|
||||
return a
|
||||
}
|
||||
|
||||
func (a *attributeQuery) Test(n NodeNavigator) bool {
|
||||
return a.Predicate(n)
|
||||
}
|
||||
|
||||
func (a *attributeQuery) Clone() query {
|
||||
return &attributeQuery{Input: a.Input.Clone(), Predicate: a.Predicate}
|
||||
}
|
||||
|
||||
// childQuery is an XPath child node query.(child::*)
|
||||
type childQuery struct {
|
||||
posit int
|
||||
iterator func() NodeNavigator
|
||||
|
||||
Input query
|
||||
Predicate func(NodeNavigator) bool
|
||||
}
|
||||
|
||||
func (c *childQuery) Select(t iterator) NodeNavigator {
|
||||
for {
|
||||
if c.iterator == nil {
|
||||
c.posit = 0
|
||||
node := c.Input.Select(t)
|
||||
if node == nil {
|
||||
return nil
|
||||
}
|
||||
node = node.Copy()
|
||||
first := true
|
||||
c.iterator = func() NodeNavigator {
|
||||
for {
|
||||
if (first && !node.MoveToChild()) || (!first && !node.MoveToNext()) {
|
||||
return nil
|
||||
}
|
||||
first = false
|
||||
if c.Predicate(node) {
|
||||
return node
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if node := c.iterator(); node != nil {
|
||||
c.posit++
|
||||
return node
|
||||
}
|
||||
c.iterator = nil
|
||||
}
|
||||
}
|
||||
|
||||
func (c *childQuery) Evaluate(t iterator) interface{} {
|
||||
c.Input.Evaluate(t)
|
||||
c.iterator = nil
|
||||
return c
|
||||
}
|
||||
|
||||
func (c *childQuery) Test(n NodeNavigator) bool {
|
||||
return c.Predicate(n)
|
||||
}
|
||||
|
||||
func (c *childQuery) Clone() query {
|
||||
return &childQuery{Input: c.Input.Clone(), Predicate: c.Predicate}
|
||||
}
|
||||
|
||||
// position returns a position of current NodeNavigator.
|
||||
func (c *childQuery) position() int {
|
||||
return c.posit
|
||||
}
|
||||
|
||||
// descendantQuery is an XPath descendant node query.(descendant::* | descendant-or-self::*)
|
||||
type descendantQuery struct {
|
||||
iterator func() NodeNavigator
|
||||
posit int
|
||||
|
||||
Self bool
|
||||
Input query
|
||||
Predicate func(NodeNavigator) bool
|
||||
}
|
||||
|
||||
func (d *descendantQuery) Select(t iterator) NodeNavigator {
|
||||
for {
|
||||
if d.iterator == nil {
|
||||
d.posit = 0
|
||||
node := d.Input.Select(t)
|
||||
if node == nil {
|
||||
return nil
|
||||
}
|
||||
node = node.Copy()
|
||||
level := 0
|
||||
positmap := make(map[int]int)
|
||||
first := true
|
||||
d.iterator = func() NodeNavigator {
|
||||
if first && d.Self {
|
||||
first = false
|
||||
if d.Predicate(node) {
|
||||
d.posit = 1
|
||||
positmap[level] = 1
|
||||
return node
|
||||
}
|
||||
}
|
||||
|
||||
for {
|
||||
if node.MoveToChild() {
|
||||
level++
|
||||
positmap[level] = 0
|
||||
} else {
|
||||
for {
|
||||
if level == 0 {
|
||||
return nil
|
||||
}
|
||||
if node.MoveToNext() {
|
||||
break
|
||||
}
|
||||
node.MoveToParent()
|
||||
level--
|
||||
}
|
||||
}
|
||||
if d.Predicate(node) {
|
||||
positmap[level]++
|
||||
d.posit = positmap[level]
|
||||
return node
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if node := d.iterator(); node != nil {
|
||||
return node
|
||||
}
|
||||
d.iterator = nil
|
||||
}
|
||||
}
|
||||
|
||||
func (d *descendantQuery) Evaluate(t iterator) interface{} {
|
||||
d.Input.Evaluate(t)
|
||||
d.iterator = nil
|
||||
return d
|
||||
}
|
||||
|
||||
func (d *descendantQuery) Test(n NodeNavigator) bool {
|
||||
return d.Predicate(n)
|
||||
}
|
||||
|
||||
// position returns a position of current NodeNavigator.
|
||||
func (d *descendantQuery) position() int {
|
||||
return d.posit
|
||||
}
|
||||
|
||||
func (d *descendantQuery) Clone() query {
|
||||
return &descendantQuery{Self: d.Self, Input: d.Input.Clone(), Predicate: d.Predicate}
|
||||
}
|
||||
|
||||
// followingQuery is an XPath following node query.(following::*|following-sibling::*)
|
||||
type followingQuery struct {
|
||||
posit int
|
||||
iterator func() NodeNavigator
|
||||
|
||||
Input query
|
||||
Sibling bool // The matching sibling node of current node.
|
||||
Predicate func(NodeNavigator) bool
|
||||
}
|
||||
|
||||
func (f *followingQuery) Select(t iterator) NodeNavigator {
|
||||
for {
|
||||
if f.iterator == nil {
|
||||
f.posit = 0
|
||||
node := f.Input.Select(t)
|
||||
if node == nil {
|
||||
return nil
|
||||
}
|
||||
node = node.Copy()
|
||||
if f.Sibling {
|
||||
f.iterator = func() NodeNavigator {
|
||||
for {
|
||||
if !node.MoveToNext() {
|
||||
return nil
|
||||
}
|
||||
if f.Predicate(node) {
|
||||
f.posit++
|
||||
return node
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
var q *descendantQuery // descendant query
|
||||
f.iterator = func() NodeNavigator {
|
||||
for {
|
||||
if q == nil {
|
||||
for !node.MoveToNext() {
|
||||
if !node.MoveToParent() {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
q = &descendantQuery{
|
||||
Self: true,
|
||||
Input: &contextQuery{},
|
||||
Predicate: f.Predicate,
|
||||
}
|
||||
t.Current().MoveTo(node)
|
||||
}
|
||||
if node := q.Select(t); node != nil {
|
||||
f.posit = q.posit
|
||||
return node
|
||||
}
|
||||
q = nil
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if node := f.iterator(); node != nil {
|
||||
return node
|
||||
}
|
||||
f.iterator = nil
|
||||
}
|
||||
}
|
||||
|
||||
func (f *followingQuery) Evaluate(t iterator) interface{} {
|
||||
f.Input.Evaluate(t)
|
||||
return f
|
||||
}
|
||||
|
||||
func (f *followingQuery) Test(n NodeNavigator) bool {
|
||||
return f.Predicate(n)
|
||||
}
|
||||
|
||||
func (f *followingQuery) Clone() query {
|
||||
return &followingQuery{Input: f.Input.Clone(), Sibling: f.Sibling, Predicate: f.Predicate}
|
||||
}
|
||||
|
||||
func (f *followingQuery) position() int {
|
||||
return f.posit
|
||||
}
|
||||
|
||||
// precedingQuery is an XPath preceding node query.(preceding::*)
|
||||
type precedingQuery struct {
|
||||
iterator func() NodeNavigator
|
||||
posit int
|
||||
Input query
|
||||
Sibling bool // The matching sibling node of current node.
|
||||
Predicate func(NodeNavigator) bool
|
||||
}
|
||||
|
||||
func (p *precedingQuery) Select(t iterator) NodeNavigator {
|
||||
for {
|
||||
if p.iterator == nil {
|
||||
p.posit = 0
|
||||
node := p.Input.Select(t)
|
||||
if node == nil {
|
||||
return nil
|
||||
}
|
||||
node = node.Copy()
|
||||
if p.Sibling {
|
||||
p.iterator = func() NodeNavigator {
|
||||
for {
|
||||
for !node.MoveToPrevious() {
|
||||
return nil
|
||||
}
|
||||
if p.Predicate(node) {
|
||||
p.posit++
|
||||
return node
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
var q query
|
||||
p.iterator = func() NodeNavigator {
|
||||
for {
|
||||
if q == nil {
|
||||
for !node.MoveToPrevious() {
|
||||
if !node.MoveToParent() {
|
||||
return nil
|
||||
}
|
||||
p.posit = 0
|
||||
}
|
||||
q = &descendantQuery{
|
||||
Self: true,
|
||||
Input: &contextQuery{},
|
||||
Predicate: p.Predicate,
|
||||
}
|
||||
t.Current().MoveTo(node)
|
||||
}
|
||||
if node := q.Select(t); node != nil {
|
||||
p.posit++
|
||||
return node
|
||||
}
|
||||
q = nil
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if node := p.iterator(); node != nil {
|
||||
return node
|
||||
}
|
||||
p.iterator = nil
|
||||
}
|
||||
}
|
||||
|
||||
func (p *precedingQuery) Evaluate(t iterator) interface{} {
|
||||
p.Input.Evaluate(t)
|
||||
return p
|
||||
}
|
||||
|
||||
func (p *precedingQuery) Test(n NodeNavigator) bool {
|
||||
return p.Predicate(n)
|
||||
}
|
||||
|
||||
func (p *precedingQuery) Clone() query {
|
||||
return &precedingQuery{Input: p.Input.Clone(), Sibling: p.Sibling, Predicate: p.Predicate}
|
||||
}
|
||||
|
||||
func (p *precedingQuery) position() int {
|
||||
return p.posit
|
||||
}
|
||||
|
||||
// parentQuery is an XPath parent node query.(parent::*)
|
||||
type parentQuery struct {
|
||||
Input query
|
||||
Predicate func(NodeNavigator) bool
|
||||
}
|
||||
|
||||
func (p *parentQuery) Select(t iterator) NodeNavigator {
|
||||
for {
|
||||
node := p.Input.Select(t)
|
||||
if node == nil {
|
||||
return nil
|
||||
}
|
||||
node = node.Copy()
|
||||
if node.MoveToParent() && p.Predicate(node) {
|
||||
return node
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (p *parentQuery) Evaluate(t iterator) interface{} {
|
||||
p.Input.Evaluate(t)
|
||||
return p
|
||||
}
|
||||
|
||||
func (p *parentQuery) Clone() query {
|
||||
return &parentQuery{Input: p.Input.Clone(), Predicate: p.Predicate}
|
||||
}
|
||||
|
||||
func (p *parentQuery) Test(n NodeNavigator) bool {
|
||||
return p.Predicate(n)
|
||||
}
|
||||
|
||||
// selfQuery is an Self node query.(self::*)
|
||||
type selfQuery struct {
|
||||
Input query
|
||||
Predicate func(NodeNavigator) bool
|
||||
}
|
||||
|
||||
func (s *selfQuery) Select(t iterator) NodeNavigator {
|
||||
for {
|
||||
node := s.Input.Select(t)
|
||||
if node == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
if s.Predicate(node) {
|
||||
return node
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (s *selfQuery) Evaluate(t iterator) interface{} {
|
||||
s.Input.Evaluate(t)
|
||||
return s
|
||||
}
|
||||
|
||||
func (s *selfQuery) Test(n NodeNavigator) bool {
|
||||
return s.Predicate(n)
|
||||
}
|
||||
|
||||
func (s *selfQuery) Clone() query {
|
||||
return &selfQuery{Input: s.Input.Clone(), Predicate: s.Predicate}
|
||||
}
|
||||
|
||||
// filterQuery is an XPath query for predicate filter.
|
||||
type filterQuery struct {
|
||||
Input query
|
||||
Predicate query
|
||||
posit int
|
||||
}
|
||||
|
||||
func (f *filterQuery) do(t iterator) bool {
|
||||
val := reflect.ValueOf(f.Predicate.Evaluate(t))
|
||||
switch val.Kind() {
|
||||
case reflect.Bool:
|
||||
return val.Bool()
|
||||
case reflect.String:
|
||||
return len(val.String()) > 0
|
||||
case reflect.Float64:
|
||||
pt := getNodePosition(f.Input)
|
||||
return int(val.Float()) == pt
|
||||
default:
|
||||
if q, ok := f.Predicate.(query); ok {
|
||||
return q.Select(t) != nil
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (f *filterQuery) position() int {
|
||||
return f.posit
|
||||
}
|
||||
|
||||
func (f *filterQuery) Select(t iterator) NodeNavigator {
|
||||
|
||||
for {
|
||||
|
||||
node := f.Input.Select(t)
|
||||
if node == nil {
|
||||
return node
|
||||
}
|
||||
node = node.Copy()
|
||||
|
||||
t.Current().MoveTo(node)
|
||||
if f.do(t) {
|
||||
f.posit++
|
||||
return node
|
||||
}
|
||||
f.posit = 0
|
||||
}
|
||||
}
|
||||
|
||||
func (f *filterQuery) Evaluate(t iterator) interface{} {
|
||||
f.Input.Evaluate(t)
|
||||
return f
|
||||
}
|
||||
|
||||
func (f *filterQuery) Clone() query {
|
||||
return &filterQuery{Input: f.Input.Clone(), Predicate: f.Predicate.Clone()}
|
||||
}
|
||||
|
||||
// functionQuery is an XPath function that call a function to returns
|
||||
// value of current NodeNavigator node.
|
||||
type functionQuery struct {
|
||||
Input query // Node Set
|
||||
Func func(query, iterator) interface{} // The xpath function.
|
||||
}
|
||||
|
||||
func (f *functionQuery) Select(t iterator) NodeNavigator {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Evaluate call a specified function that will returns the
|
||||
// following value type: number,string,boolean.
|
||||
func (f *functionQuery) Evaluate(t iterator) interface{} {
|
||||
return f.Func(f.Input, t)
|
||||
}
|
||||
|
||||
func (f *functionQuery) Clone() query {
|
||||
return &functionQuery{Input: f.Input.Clone(), Func: f.Func}
|
||||
}
|
||||
|
||||
// constantQuery is an XPath constant operand.
|
||||
type constantQuery struct {
|
||||
Val interface{}
|
||||
}
|
||||
|
||||
func (c *constantQuery) Select(t iterator) NodeNavigator {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *constantQuery) Evaluate(t iterator) interface{} {
|
||||
return c.Val
|
||||
}
|
||||
|
||||
func (c *constantQuery) Clone() query {
|
||||
return c
|
||||
}
|
||||
|
||||
// logicalQuery is an XPath logical expression.
|
||||
type logicalQuery struct {
|
||||
Left, Right query
|
||||
|
||||
Do func(iterator, interface{}, interface{}) interface{}
|
||||
}
|
||||
|
||||
func (l *logicalQuery) Select(t iterator) NodeNavigator {
|
||||
// When a XPath expr is logical expression.
|
||||
node := t.Current().Copy()
|
||||
val := l.Evaluate(t)
|
||||
switch val.(type) {
|
||||
case bool:
|
||||
if val.(bool) == true {
|
||||
return node
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (l *logicalQuery) Evaluate(t iterator) interface{} {
|
||||
m := l.Left.Evaluate(t)
|
||||
n := l.Right.Evaluate(t)
|
||||
return l.Do(t, m, n)
|
||||
}
|
||||
|
||||
func (l *logicalQuery) Clone() query {
|
||||
return &logicalQuery{Left: l.Left.Clone(), Right: l.Right.Clone(), Do: l.Do}
|
||||
}
|
||||
|
||||
// numericQuery is an XPath numeric operator expression.
|
||||
type numericQuery struct {
|
||||
Left, Right query
|
||||
|
||||
Do func(interface{}, interface{}) interface{}
|
||||
}
|
||||
|
||||
func (n *numericQuery) Select(t iterator) NodeNavigator {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (n *numericQuery) Evaluate(t iterator) interface{} {
|
||||
m := n.Left.Evaluate(t)
|
||||
k := n.Right.Evaluate(t)
|
||||
return n.Do(m, k)
|
||||
}
|
||||
|
||||
func (n *numericQuery) Clone() query {
|
||||
return &numericQuery{Left: n.Left.Clone(), Right: n.Right.Clone(), Do: n.Do}
|
||||
}
|
||||
|
||||
type booleanQuery struct {
|
||||
IsOr bool
|
||||
Left, Right query
|
||||
iterator func() NodeNavigator
|
||||
}
|
||||
|
||||
func (b *booleanQuery) Select(t iterator) NodeNavigator {
|
||||
if b.iterator == nil {
|
||||
var list []NodeNavigator
|
||||
i := 0
|
||||
root := t.Current().Copy()
|
||||
if b.IsOr {
|
||||
for {
|
||||
node := b.Left.Select(t)
|
||||
if node == nil {
|
||||
break
|
||||
}
|
||||
node = node.Copy()
|
||||
list = append(list, node)
|
||||
}
|
||||
t.Current().MoveTo(root)
|
||||
for {
|
||||
node := b.Right.Select(t)
|
||||
if node == nil {
|
||||
break
|
||||
}
|
||||
node = node.Copy()
|
||||
list = append(list, node)
|
||||
}
|
||||
} else {
|
||||
var m []NodeNavigator
|
||||
var n []NodeNavigator
|
||||
for {
|
||||
node := b.Left.Select(t)
|
||||
if node == nil {
|
||||
break
|
||||
}
|
||||
node = node.Copy()
|
||||
list = append(m, node)
|
||||
}
|
||||
t.Current().MoveTo(root)
|
||||
for {
|
||||
node := b.Right.Select(t)
|
||||
if node == nil {
|
||||
break
|
||||
}
|
||||
node = node.Copy()
|
||||
list = append(n, node)
|
||||
}
|
||||
for _, k := range m {
|
||||
for _, j := range n {
|
||||
if k == j {
|
||||
list = append(list, k)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
b.iterator = func() NodeNavigator {
|
||||
if i >= len(list) {
|
||||
return nil
|
||||
}
|
||||
node := list[i]
|
||||
i++
|
||||
return node
|
||||
}
|
||||
}
|
||||
return b.iterator()
|
||||
}
|
||||
|
||||
func (b *booleanQuery) Evaluate(t iterator) interface{} {
|
||||
m := b.Left.Evaluate(t)
|
||||
left := asBool(t, m)
|
||||
if b.IsOr && left {
|
||||
return true
|
||||
} else if !b.IsOr && !left {
|
||||
return false
|
||||
}
|
||||
m = b.Right.Evaluate(t)
|
||||
return asBool(t, m)
|
||||
}
|
||||
|
||||
func (b *booleanQuery) Clone() query {
|
||||
return &booleanQuery{IsOr: b.IsOr, Left: b.Left.Clone(), Right: b.Right.Clone()}
|
||||
}
|
||||
|
||||
type unionQuery struct {
|
||||
Left, Right query
|
||||
iterator func() NodeNavigator
|
||||
}
|
||||
|
||||
func (u *unionQuery) Select(t iterator) NodeNavigator {
|
||||
if u.iterator == nil {
|
||||
var list []NodeNavigator
|
||||
var m = make(map[uint64]bool)
|
||||
root := t.Current().Copy()
|
||||
for {
|
||||
node := u.Left.Select(t)
|
||||
if node == nil {
|
||||
break
|
||||
}
|
||||
code := getHashCode(node.Copy())
|
||||
if _, ok := m[code]; !ok {
|
||||
m[code] = true
|
||||
list = append(list, node.Copy())
|
||||
}
|
||||
}
|
||||
t.Current().MoveTo(root)
|
||||
for {
|
||||
node := u.Right.Select(t)
|
||||
if node == nil {
|
||||
break
|
||||
}
|
||||
code := getHashCode(node.Copy())
|
||||
if _, ok := m[code]; !ok {
|
||||
m[code] = true
|
||||
list = append(list, node.Copy())
|
||||
}
|
||||
}
|
||||
var i int
|
||||
u.iterator = func() NodeNavigator {
|
||||
if i >= len(list) {
|
||||
return nil
|
||||
}
|
||||
node := list[i]
|
||||
i++
|
||||
return node
|
||||
}
|
||||
}
|
||||
return u.iterator()
|
||||
}
|
||||
|
||||
func (u *unionQuery) Evaluate(t iterator) interface{} {
|
||||
u.iterator = nil
|
||||
u.Left.Evaluate(t)
|
||||
u.Right.Evaluate(t)
|
||||
return u
|
||||
}
|
||||
|
||||
func (u *unionQuery) Clone() query {
|
||||
return &unionQuery{Left: u.Left.Clone(), Right: u.Right.Clone()}
|
||||
}
|
||||
|
||||
func getHashCode(n NodeNavigator) uint64 {
|
||||
var sb bytes.Buffer
|
||||
switch n.NodeType() {
|
||||
case AttributeNode, TextNode, CommentNode:
|
||||
sb.WriteString(fmt.Sprintf("%s=%s", n.LocalName(), n.Value()))
|
||||
if n.MoveToParent() {
|
||||
sb.WriteString(n.LocalName())
|
||||
}
|
||||
case ElementNode:
|
||||
sb.WriteString(n.Prefix() + n.LocalName())
|
||||
d := 1
|
||||
for n.MoveToPrevious() {
|
||||
d++
|
||||
}
|
||||
sb.WriteString(fmt.Sprintf("-%d", d))
|
||||
|
||||
for n.MoveToParent() {
|
||||
d = 1
|
||||
for n.MoveToPrevious() {
|
||||
d++
|
||||
}
|
||||
sb.WriteString(fmt.Sprintf("-%d", d))
|
||||
}
|
||||
}
|
||||
h := fnv.New64a()
|
||||
h.Write([]byte(sb.String()))
|
||||
return h.Sum64()
|
||||
}
|
||||
|
||||
func getNodePosition(q query) int {
|
||||
type Position interface {
|
||||
position() int
|
||||
}
|
||||
if count, ok := q.(Position); ok {
|
||||
return count.position()
|
||||
}
|
||||
return 1
|
||||
}
|
||||
157
vendor/github.com/antchfx/xpath/xpath.go
generated
vendored
Normal file
157
vendor/github.com/antchfx/xpath/xpath.go
generated
vendored
Normal file
@@ -0,0 +1,157 @@
|
||||
package xpath
|
||||
|
||||
import (
|
||||
"errors"
|
||||
)
|
||||
|
||||
// NodeType represents a type of XPath node.
|
||||
type NodeType int
|
||||
|
||||
const (
|
||||
// RootNode is a root node of the XML document or node tree.
|
||||
RootNode NodeType = iota
|
||||
|
||||
// ElementNode is an element, such as <element>.
|
||||
ElementNode
|
||||
|
||||
// AttributeNode is an attribute, such as id='123'.
|
||||
AttributeNode
|
||||
|
||||
// TextNode is the text content of a node.
|
||||
TextNode
|
||||
|
||||
// CommentNode is a comment node, such as <!-- my comment -->
|
||||
CommentNode
|
||||
|
||||
// allNode is any types of node, used by xpath package only to predicate match.
|
||||
allNode
|
||||
)
|
||||
|
||||
// NodeNavigator provides cursor model for navigating XML data.
|
||||
type NodeNavigator interface {
|
||||
// NodeType returns the XPathNodeType of the current node.
|
||||
NodeType() NodeType
|
||||
|
||||
// LocalName gets the Name of the current node.
|
||||
LocalName() string
|
||||
|
||||
// Prefix returns namespace prefix associated with the current node.
|
||||
Prefix() string
|
||||
|
||||
// Value gets the value of current node.
|
||||
Value() string
|
||||
|
||||
// Copy does a deep copy of the NodeNavigator and all its components.
|
||||
Copy() NodeNavigator
|
||||
|
||||
// MoveToRoot moves the NodeNavigator to the root node of the current node.
|
||||
MoveToRoot()
|
||||
|
||||
// MoveToParent moves the NodeNavigator to the parent node of the current node.
|
||||
MoveToParent() bool
|
||||
|
||||
// MoveToNextAttribute moves the NodeNavigator to the next attribute on current node.
|
||||
MoveToNextAttribute() bool
|
||||
|
||||
// MoveToChild moves the NodeNavigator to the first child node of the current node.
|
||||
MoveToChild() bool
|
||||
|
||||
// MoveToFirst moves the NodeNavigator to the first sibling node of the current node.
|
||||
MoveToFirst() bool
|
||||
|
||||
// MoveToNext moves the NodeNavigator to the next sibling node of the current node.
|
||||
MoveToNext() bool
|
||||
|
||||
// MoveToPrevious moves the NodeNavigator to the previous sibling node of the current node.
|
||||
MoveToPrevious() bool
|
||||
|
||||
// MoveTo moves the NodeNavigator to the same position as the specified NodeNavigator.
|
||||
MoveTo(NodeNavigator) bool
|
||||
}
|
||||
|
||||
// NodeIterator holds all matched Node object.
|
||||
type NodeIterator struct {
|
||||
node NodeNavigator
|
||||
query query
|
||||
}
|
||||
|
||||
// Current returns current node which matched.
|
||||
func (t *NodeIterator) Current() NodeNavigator {
|
||||
return t.node
|
||||
}
|
||||
|
||||
// MoveNext moves Navigator to the next match node.
|
||||
func (t *NodeIterator) MoveNext() bool {
|
||||
n := t.query.Select(t)
|
||||
if n != nil {
|
||||
if !t.node.MoveTo(n) {
|
||||
t.node = n.Copy()
|
||||
}
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// Select selects a node set using the specified XPath expression.
|
||||
// This method is deprecated, recommend using Expr.Select() method instead.
|
||||
func Select(root NodeNavigator, expr string) *NodeIterator {
|
||||
exp, err := Compile(expr)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return exp.Select(root)
|
||||
}
|
||||
|
||||
// Expr is an XPath expression for query.
|
||||
type Expr struct {
|
||||
s string
|
||||
q query
|
||||
}
|
||||
|
||||
type iteratorFunc func() NodeNavigator
|
||||
|
||||
func (f iteratorFunc) Current() NodeNavigator {
|
||||
return f()
|
||||
}
|
||||
|
||||
// Evaluate returns the result of the expression.
|
||||
// The result type of the expression is one of the follow: bool,float64,string,NodeIterator).
|
||||
func (expr *Expr) Evaluate(root NodeNavigator) interface{} {
|
||||
val := expr.q.Evaluate(iteratorFunc(func() NodeNavigator { return root }))
|
||||
switch val.(type) {
|
||||
case query:
|
||||
return &NodeIterator{query: expr.q.Clone(), node: root}
|
||||
}
|
||||
return val
|
||||
}
|
||||
|
||||
// Select selects a node set using the specified XPath expression.
|
||||
func (expr *Expr) Select(root NodeNavigator) *NodeIterator {
|
||||
return &NodeIterator{query: expr.q.Clone(), node: root}
|
||||
}
|
||||
|
||||
// String returns XPath expression string.
|
||||
func (expr *Expr) String() string {
|
||||
return expr.s
|
||||
}
|
||||
|
||||
// Compile compiles an XPath expression string.
|
||||
func Compile(expr string) (*Expr, error) {
|
||||
if expr == "" {
|
||||
return nil, errors.New("expr expression is nil")
|
||||
}
|
||||
qy, err := build(expr)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &Expr{s: expr, q: qy}, nil
|
||||
}
|
||||
|
||||
// MustCompile compiles an XPath expression string and ignored error.
|
||||
func MustCompile(expr string) *Expr {
|
||||
exp, err := Compile(expr)
|
||||
if err != nil {
|
||||
return &Expr{s: expr, q: nopQuery{}}
|
||||
}
|
||||
return exp
|
||||
}
|
||||
191
vendor/github.com/golang/groupcache/LICENSE
generated
vendored
Normal file
191
vendor/github.com/golang/groupcache/LICENSE
generated
vendored
Normal file
@@ -0,0 +1,191 @@
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction, and
|
||||
distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by the copyright
|
||||
owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all other entities
|
||||
that control, are controlled by, or are under common control with that entity.
|
||||
For the purposes of this definition, "control" means (i) the power, direct or
|
||||
indirect, to cause the direction or management of such entity, whether by
|
||||
contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity exercising
|
||||
permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications, including
|
||||
but not limited to software source code, documentation source, and configuration
|
||||
files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical transformation or
|
||||
translation of a Source form, including but not limited to compiled object code,
|
||||
generated documentation, and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or Object form, made
|
||||
available under the License, as indicated by a copyright notice that is included
|
||||
in or attached to the work (an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object form, that
|
||||
is based on (or derived from) the Work and for which the editorial revisions,
|
||||
annotations, elaborations, or other modifications represent, as a whole, an
|
||||
original work of authorship. For the purposes of this License, Derivative Works
|
||||
shall not include works that remain separable from, or merely link (or bind by
|
||||
name) to the interfaces of, the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including the original version
|
||||
of the Work and any modifications or additions to that Work or Derivative Works
|
||||
thereof, that is intentionally submitted to Licensor for inclusion in the Work
|
||||
by the copyright owner or by an individual or Legal Entity authorized to submit
|
||||
on behalf of the copyright owner. For the purposes of this definition,
|
||||
"submitted" means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems, and
|
||||
issue tracking systems that are managed by, or on behalf of, the Licensor for
|
||||
the purpose of discussing and improving the Work, but excluding communication
|
||||
that is conspicuously marked or otherwise designated in writing by the copyright
|
||||
owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity on behalf
|
||||
of whom a Contribution has been received by Licensor and subsequently
|
||||
incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License.
|
||||
|
||||
Subject to the terms and conditions of this License, each Contributor hereby
|
||||
grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free,
|
||||
irrevocable copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the Work and such
|
||||
Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License.
|
||||
|
||||
Subject to the terms and conditions of this License, each Contributor hereby
|
||||
grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free,
|
||||
irrevocable (except as stated in this section) patent license to make, have
|
||||
made, use, offer to sell, sell, import, and otherwise transfer the Work, where
|
||||
such license applies only to those patent claims licensable by such Contributor
|
||||
that are necessarily infringed by their Contribution(s) alone or by combination
|
||||
of their Contribution(s) with the Work to which such Contribution(s) was
|
||||
submitted. If You institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work or a
|
||||
Contribution incorporated within the Work constitutes direct or contributory
|
||||
patent infringement, then any patent licenses granted to You under this License
|
||||
for that Work shall terminate as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution.
|
||||
|
||||
You may reproduce and distribute copies of the Work or Derivative Works thereof
|
||||
in any medium, with or without modifications, and in Source or Object form,
|
||||
provided that You meet the following conditions:
|
||||
|
||||
You must give any other recipients of the Work or Derivative Works a copy of
|
||||
this License; and
|
||||
You must cause any modified files to carry prominent notices stating that You
|
||||
changed the files; and
|
||||
You must retain, in the Source form of any Derivative Works that You distribute,
|
||||
all copyright, patent, trademark, and attribution notices from the Source form
|
||||
of the Work, excluding those notices that do not pertain to any part of the
|
||||
Derivative Works; and
|
||||
If the Work includes a "NOTICE" text file as part of its distribution, then any
|
||||
Derivative Works that You distribute must include a readable copy of the
|
||||
attribution notices contained within such NOTICE file, excluding those notices
|
||||
that do not pertain to any part of the Derivative Works, in at least one of the
|
||||
following places: within a NOTICE text file distributed as part of the
|
||||
Derivative Works; within the Source form or documentation, if provided along
|
||||
with the Derivative Works; or, within a display generated by the Derivative
|
||||
Works, if and wherever such third-party notices normally appear. The contents of
|
||||
the NOTICE file are for informational purposes only and do not modify the
|
||||
License. You may add Your own attribution notices within Derivative Works that
|
||||
You distribute, alongside or as an addendum to the NOTICE text from the Work,
|
||||
provided that such additional attribution notices cannot be construed as
|
||||
modifying the License.
|
||||
You may add Your own copyright statement to Your modifications and may provide
|
||||
additional or different license terms and conditions for use, reproduction, or
|
||||
distribution of Your modifications, or for any such Derivative Works as a whole,
|
||||
provided Your use, reproduction, and distribution of the Work otherwise complies
|
||||
with the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions.
|
||||
|
||||
Unless You explicitly state otherwise, any Contribution intentionally submitted
|
||||
for inclusion in the Work by You to the Licensor shall be under the terms and
|
||||
conditions of this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify the terms of
|
||||
any separate license agreement you may have executed with Licensor regarding
|
||||
such Contributions.
|
||||
|
||||
6. Trademarks.
|
||||
|
||||
This License does not grant permission to use the trade names, trademarks,
|
||||
service marks, or product names of the Licensor, except as required for
|
||||
reasonable and customary use in describing the origin of the Work and
|
||||
reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty.
|
||||
|
||||
Unless required by applicable law or agreed to in writing, Licensor provides the
|
||||
Work (and each Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied,
|
||||
including, without limitation, any warranties or conditions of TITLE,
|
||||
NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are
|
||||
solely responsible for determining the appropriateness of using or
|
||||
redistributing the Work and assume any risks associated with Your exercise of
|
||||
permissions under this License.
|
||||
|
||||
8. Limitation of Liability.
|
||||
|
||||
In no event and under no legal theory, whether in tort (including negligence),
|
||||
contract, or otherwise, unless required by applicable law (such as deliberate
|
||||
and grossly negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special, incidental,
|
||||
or consequential damages of any character arising as a result of this License or
|
||||
out of the use or inability to use the Work (including but not limited to
|
||||
damages for loss of goodwill, work stoppage, computer failure or malfunction, or
|
||||
any and all other commercial damages or losses), even if such Contributor has
|
||||
been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability.
|
||||
|
||||
While redistributing the Work or Derivative Works thereof, You may choose to
|
||||
offer, and charge a fee for, acceptance of support, warranty, indemnity, or
|
||||
other liability obligations and/or rights consistent with this License. However,
|
||||
in accepting such obligations, You may act only on Your own behalf and on Your
|
||||
sole responsibility, not on behalf of any other Contributor, and only if You
|
||||
agree to indemnify, defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason of your
|
||||
accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work
|
||||
|
||||
To apply the Apache License to your work, attach the following boilerplate
|
||||
notice, with the fields enclosed by brackets "[]" replaced with your own
|
||||
identifying information. (Don't include the brackets!) The text should be
|
||||
enclosed in the appropriate comment syntax for the file format. We also
|
||||
recommend that a file or class name and description of purpose be included on
|
||||
the same "printed page" as the copyright notice for easier identification within
|
||||
third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
133
vendor/github.com/golang/groupcache/lru/lru.go
generated
vendored
Normal file
133
vendor/github.com/golang/groupcache/lru/lru.go
generated
vendored
Normal file
@@ -0,0 +1,133 @@
|
||||
/*
|
||||
Copyright 2013 Google Inc.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// Package lru implements an LRU cache.
|
||||
package lru
|
||||
|
||||
import "container/list"
|
||||
|
||||
// Cache is an LRU cache. It is not safe for concurrent access.
|
||||
type Cache struct {
|
||||
// MaxEntries is the maximum number of cache entries before
|
||||
// an item is evicted. Zero means no limit.
|
||||
MaxEntries int
|
||||
|
||||
// OnEvicted optionally specifies a callback function to be
|
||||
// executed when an entry is purged from the cache.
|
||||
OnEvicted func(key Key, value interface{})
|
||||
|
||||
ll *list.List
|
||||
cache map[interface{}]*list.Element
|
||||
}
|
||||
|
||||
// A Key may be any value that is comparable. See http://golang.org/ref/spec#Comparison_operators
|
||||
type Key interface{}
|
||||
|
||||
type entry struct {
|
||||
key Key
|
||||
value interface{}
|
||||
}
|
||||
|
||||
// New creates a new Cache.
|
||||
// If maxEntries is zero, the cache has no limit and it's assumed
|
||||
// that eviction is done by the caller.
|
||||
func New(maxEntries int) *Cache {
|
||||
return &Cache{
|
||||
MaxEntries: maxEntries,
|
||||
ll: list.New(),
|
||||
cache: make(map[interface{}]*list.Element),
|
||||
}
|
||||
}
|
||||
|
||||
// Add adds a value to the cache.
|
||||
func (c *Cache) Add(key Key, value interface{}) {
|
||||
if c.cache == nil {
|
||||
c.cache = make(map[interface{}]*list.Element)
|
||||
c.ll = list.New()
|
||||
}
|
||||
if ee, ok := c.cache[key]; ok {
|
||||
c.ll.MoveToFront(ee)
|
||||
ee.Value.(*entry).value = value
|
||||
return
|
||||
}
|
||||
ele := c.ll.PushFront(&entry{key, value})
|
||||
c.cache[key] = ele
|
||||
if c.MaxEntries != 0 && c.ll.Len() > c.MaxEntries {
|
||||
c.RemoveOldest()
|
||||
}
|
||||
}
|
||||
|
||||
// Get looks up a key's value from the cache.
|
||||
func (c *Cache) Get(key Key) (value interface{}, ok bool) {
|
||||
if c.cache == nil {
|
||||
return
|
||||
}
|
||||
if ele, hit := c.cache[key]; hit {
|
||||
c.ll.MoveToFront(ele)
|
||||
return ele.Value.(*entry).value, true
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// Remove removes the provided key from the cache.
|
||||
func (c *Cache) Remove(key Key) {
|
||||
if c.cache == nil {
|
||||
return
|
||||
}
|
||||
if ele, hit := c.cache[key]; hit {
|
||||
c.removeElement(ele)
|
||||
}
|
||||
}
|
||||
|
||||
// RemoveOldest removes the oldest item from the cache.
|
||||
func (c *Cache) RemoveOldest() {
|
||||
if c.cache == nil {
|
||||
return
|
||||
}
|
||||
ele := c.ll.Back()
|
||||
if ele != nil {
|
||||
c.removeElement(ele)
|
||||
}
|
||||
}
|
||||
|
||||
func (c *Cache) removeElement(e *list.Element) {
|
||||
c.ll.Remove(e)
|
||||
kv := e.Value.(*entry)
|
||||
delete(c.cache, kv.key)
|
||||
if c.OnEvicted != nil {
|
||||
c.OnEvicted(kv.key, kv.value)
|
||||
}
|
||||
}
|
||||
|
||||
// Len returns the number of items in the cache.
|
||||
func (c *Cache) Len() int {
|
||||
if c.cache == nil {
|
||||
return 0
|
||||
}
|
||||
return c.ll.Len()
|
||||
}
|
||||
|
||||
// Clear purges all stored items from the cache.
|
||||
func (c *Cache) Clear() {
|
||||
if c.OnEvicted != nil {
|
||||
for _, e := range c.cache {
|
||||
kv := e.Value.(*entry)
|
||||
c.OnEvicted(kv.key, kv.value)
|
||||
}
|
||||
}
|
||||
c.ll = nil
|
||||
c.cache = nil
|
||||
}
|
||||
257
vendor/golang.org/x/net/html/charset/charset.go
generated
vendored
Normal file
257
vendor/golang.org/x/net/html/charset/charset.go
generated
vendored
Normal file
@@ -0,0 +1,257 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package charset provides common text encodings for HTML documents.
|
||||
//
|
||||
// The mapping from encoding labels to encodings is defined at
|
||||
// https://encoding.spec.whatwg.org/.
|
||||
package charset // import "golang.org/x/net/html/charset"
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"mime"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/net/html"
|
||||
"golang.org/x/text/encoding"
|
||||
"golang.org/x/text/encoding/charmap"
|
||||
"golang.org/x/text/encoding/htmlindex"
|
||||
"golang.org/x/text/transform"
|
||||
)
|
||||
|
||||
// Lookup returns the encoding with the specified label, and its canonical
|
||||
// name. It returns nil and the empty string if label is not one of the
|
||||
// standard encodings for HTML. Matching is case-insensitive and ignores
|
||||
// leading and trailing whitespace. Encoders will use HTML escape sequences for
|
||||
// runes that are not supported by the character set.
|
||||
func Lookup(label string) (e encoding.Encoding, name string) {
|
||||
e, err := htmlindex.Get(label)
|
||||
if err != nil {
|
||||
return nil, ""
|
||||
}
|
||||
name, _ = htmlindex.Name(e)
|
||||
return &htmlEncoding{e}, name
|
||||
}
|
||||
|
||||
type htmlEncoding struct{ encoding.Encoding }
|
||||
|
||||
func (h *htmlEncoding) NewEncoder() *encoding.Encoder {
|
||||
// HTML requires a non-terminating legacy encoder. We use HTML escapes to
|
||||
// substitute unsupported code points.
|
||||
return encoding.HTMLEscapeUnsupported(h.Encoding.NewEncoder())
|
||||
}
|
||||
|
||||
// DetermineEncoding determines the encoding of an HTML document by examining
|
||||
// up to the first 1024 bytes of content and the declared Content-Type.
|
||||
//
|
||||
// See http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#determining-the-character-encoding
|
||||
func DetermineEncoding(content []byte, contentType string) (e encoding.Encoding, name string, certain bool) {
|
||||
if len(content) > 1024 {
|
||||
content = content[:1024]
|
||||
}
|
||||
|
||||
for _, b := range boms {
|
||||
if bytes.HasPrefix(content, b.bom) {
|
||||
e, name = Lookup(b.enc)
|
||||
return e, name, true
|
||||
}
|
||||
}
|
||||
|
||||
if _, params, err := mime.ParseMediaType(contentType); err == nil {
|
||||
if cs, ok := params["charset"]; ok {
|
||||
if e, name = Lookup(cs); e != nil {
|
||||
return e, name, true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if len(content) > 0 {
|
||||
e, name = prescan(content)
|
||||
if e != nil {
|
||||
return e, name, false
|
||||
}
|
||||
}
|
||||
|
||||
// Try to detect UTF-8.
|
||||
// First eliminate any partial rune at the end.
|
||||
for i := len(content) - 1; i >= 0 && i > len(content)-4; i-- {
|
||||
b := content[i]
|
||||
if b < 0x80 {
|
||||
break
|
||||
}
|
||||
if utf8.RuneStart(b) {
|
||||
content = content[:i]
|
||||
break
|
||||
}
|
||||
}
|
||||
hasHighBit := false
|
||||
for _, c := range content {
|
||||
if c >= 0x80 {
|
||||
hasHighBit = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if hasHighBit && utf8.Valid(content) {
|
||||
return encoding.Nop, "utf-8", false
|
||||
}
|
||||
|
||||
// TODO: change default depending on user's locale?
|
||||
return charmap.Windows1252, "windows-1252", false
|
||||
}
|
||||
|
||||
// NewReader returns an io.Reader that converts the content of r to UTF-8.
|
||||
// It calls DetermineEncoding to find out what r's encoding is.
|
||||
func NewReader(r io.Reader, contentType string) (io.Reader, error) {
|
||||
preview := make([]byte, 1024)
|
||||
n, err := io.ReadFull(r, preview)
|
||||
switch {
|
||||
case err == io.ErrUnexpectedEOF:
|
||||
preview = preview[:n]
|
||||
r = bytes.NewReader(preview)
|
||||
case err != nil:
|
||||
return nil, err
|
||||
default:
|
||||
r = io.MultiReader(bytes.NewReader(preview), r)
|
||||
}
|
||||
|
||||
if e, _, _ := DetermineEncoding(preview, contentType); e != encoding.Nop {
|
||||
r = transform.NewReader(r, e.NewDecoder())
|
||||
}
|
||||
return r, nil
|
||||
}
|
||||
|
||||
// NewReaderLabel returns a reader that converts from the specified charset to
|
||||
// UTF-8. It uses Lookup to find the encoding that corresponds to label, and
|
||||
// returns an error if Lookup returns nil. It is suitable for use as
|
||||
// encoding/xml.Decoder's CharsetReader function.
|
||||
func NewReaderLabel(label string, input io.Reader) (io.Reader, error) {
|
||||
e, _ := Lookup(label)
|
||||
if e == nil {
|
||||
return nil, fmt.Errorf("unsupported charset: %q", label)
|
||||
}
|
||||
return transform.NewReader(input, e.NewDecoder()), nil
|
||||
}
|
||||
|
||||
func prescan(content []byte) (e encoding.Encoding, name string) {
|
||||
z := html.NewTokenizer(bytes.NewReader(content))
|
||||
for {
|
||||
switch z.Next() {
|
||||
case html.ErrorToken:
|
||||
return nil, ""
|
||||
|
||||
case html.StartTagToken, html.SelfClosingTagToken:
|
||||
tagName, hasAttr := z.TagName()
|
||||
if !bytes.Equal(tagName, []byte("meta")) {
|
||||
continue
|
||||
}
|
||||
attrList := make(map[string]bool)
|
||||
gotPragma := false
|
||||
|
||||
const (
|
||||
dontKnow = iota
|
||||
doNeedPragma
|
||||
doNotNeedPragma
|
||||
)
|
||||
needPragma := dontKnow
|
||||
|
||||
name = ""
|
||||
e = nil
|
||||
for hasAttr {
|
||||
var key, val []byte
|
||||
key, val, hasAttr = z.TagAttr()
|
||||
ks := string(key)
|
||||
if attrList[ks] {
|
||||
continue
|
||||
}
|
||||
attrList[ks] = true
|
||||
for i, c := range val {
|
||||
if 'A' <= c && c <= 'Z' {
|
||||
val[i] = c + 0x20
|
||||
}
|
||||
}
|
||||
|
||||
switch ks {
|
||||
case "http-equiv":
|
||||
if bytes.Equal(val, []byte("content-type")) {
|
||||
gotPragma = true
|
||||
}
|
||||
|
||||
case "content":
|
||||
if e == nil {
|
||||
name = fromMetaElement(string(val))
|
||||
if name != "" {
|
||||
e, name = Lookup(name)
|
||||
if e != nil {
|
||||
needPragma = doNeedPragma
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
case "charset":
|
||||
e, name = Lookup(string(val))
|
||||
needPragma = doNotNeedPragma
|
||||
}
|
||||
}
|
||||
|
||||
if needPragma == dontKnow || needPragma == doNeedPragma && !gotPragma {
|
||||
continue
|
||||
}
|
||||
|
||||
if strings.HasPrefix(name, "utf-16") {
|
||||
name = "utf-8"
|
||||
e = encoding.Nop
|
||||
}
|
||||
|
||||
if e != nil {
|
||||
return e, name
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func fromMetaElement(s string) string {
|
||||
for s != "" {
|
||||
csLoc := strings.Index(s, "charset")
|
||||
if csLoc == -1 {
|
||||
return ""
|
||||
}
|
||||
s = s[csLoc+len("charset"):]
|
||||
s = strings.TrimLeft(s, " \t\n\f\r")
|
||||
if !strings.HasPrefix(s, "=") {
|
||||
continue
|
||||
}
|
||||
s = s[1:]
|
||||
s = strings.TrimLeft(s, " \t\n\f\r")
|
||||
if s == "" {
|
||||
return ""
|
||||
}
|
||||
if q := s[0]; q == '"' || q == '\'' {
|
||||
s = s[1:]
|
||||
closeQuote := strings.IndexRune(s, rune(q))
|
||||
if closeQuote == -1 {
|
||||
return ""
|
||||
}
|
||||
return s[:closeQuote]
|
||||
}
|
||||
|
||||
end := strings.IndexAny(s, "; \t\n\f\r")
|
||||
if end == -1 {
|
||||
end = len(s)
|
||||
}
|
||||
return s[:end]
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
var boms = []struct {
|
||||
bom []byte
|
||||
enc string
|
||||
}{
|
||||
{[]byte{0xfe, 0xff}, "utf-16be"},
|
||||
{[]byte{0xff, 0xfe}, "utf-16le"},
|
||||
{[]byte{0xef, 0xbb, 0xbf}, "utf-8"},
|
||||
}
|
||||
249
vendor/golang.org/x/text/encoding/charmap/charmap.go
generated
vendored
Normal file
249
vendor/golang.org/x/text/encoding/charmap/charmap.go
generated
vendored
Normal file
@@ -0,0 +1,249 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:generate go run maketables.go
|
||||
|
||||
// Package charmap provides simple character encodings such as IBM Code Page 437
|
||||
// and Windows 1252.
|
||||
package charmap // import "golang.org/x/text/encoding/charmap"
|
||||
|
||||
import (
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/text/encoding"
|
||||
"golang.org/x/text/encoding/internal"
|
||||
"golang.org/x/text/encoding/internal/identifier"
|
||||
"golang.org/x/text/transform"
|
||||
)
|
||||
|
||||
// These encodings vary only in the way clients should interpret them. Their
|
||||
// coded character set is identical and a single implementation can be shared.
|
||||
var (
|
||||
// ISO8859_6E is the ISO 8859-6E encoding.
|
||||
ISO8859_6E encoding.Encoding = &iso8859_6E
|
||||
|
||||
// ISO8859_6I is the ISO 8859-6I encoding.
|
||||
ISO8859_6I encoding.Encoding = &iso8859_6I
|
||||
|
||||
// ISO8859_8E is the ISO 8859-8E encoding.
|
||||
ISO8859_8E encoding.Encoding = &iso8859_8E
|
||||
|
||||
// ISO8859_8I is the ISO 8859-8I encoding.
|
||||
ISO8859_8I encoding.Encoding = &iso8859_8I
|
||||
|
||||
iso8859_6E = internal.Encoding{
|
||||
Encoding: ISO8859_6,
|
||||
Name: "ISO-8859-6E",
|
||||
MIB: identifier.ISO88596E,
|
||||
}
|
||||
|
||||
iso8859_6I = internal.Encoding{
|
||||
Encoding: ISO8859_6,
|
||||
Name: "ISO-8859-6I",
|
||||
MIB: identifier.ISO88596I,
|
||||
}
|
||||
|
||||
iso8859_8E = internal.Encoding{
|
||||
Encoding: ISO8859_8,
|
||||
Name: "ISO-8859-8E",
|
||||
MIB: identifier.ISO88598E,
|
||||
}
|
||||
|
||||
iso8859_8I = internal.Encoding{
|
||||
Encoding: ISO8859_8,
|
||||
Name: "ISO-8859-8I",
|
||||
MIB: identifier.ISO88598I,
|
||||
}
|
||||
)
|
||||
|
||||
// All is a list of all defined encodings in this package.
|
||||
var All []encoding.Encoding = listAll
|
||||
|
||||
// TODO: implement these encodings, in order of importance.
|
||||
// ASCII, ISO8859_1: Rather common. Close to Windows 1252.
|
||||
// ISO8859_9: Close to Windows 1254.
|
||||
|
||||
// utf8Enc holds a rune's UTF-8 encoding in data[:len].
|
||||
type utf8Enc struct {
|
||||
len uint8
|
||||
data [3]byte
|
||||
}
|
||||
|
||||
// Charmap is an 8-bit character set encoding.
|
||||
type Charmap struct {
|
||||
// name is the encoding's name.
|
||||
name string
|
||||
// mib is the encoding type of this encoder.
|
||||
mib identifier.MIB
|
||||
// asciiSuperset states whether the encoding is a superset of ASCII.
|
||||
asciiSuperset bool
|
||||
// low is the lower bound of the encoded byte for a non-ASCII rune. If
|
||||
// Charmap.asciiSuperset is true then this will be 0x80, otherwise 0x00.
|
||||
low uint8
|
||||
// replacement is the encoded replacement character.
|
||||
replacement byte
|
||||
// decode is the map from encoded byte to UTF-8.
|
||||
decode [256]utf8Enc
|
||||
// encoding is the map from runes to encoded bytes. Each entry is a
|
||||
// uint32: the high 8 bits are the encoded byte and the low 24 bits are
|
||||
// the rune. The table entries are sorted by ascending rune.
|
||||
encode [256]uint32
|
||||
}
|
||||
|
||||
// NewDecoder implements the encoding.Encoding interface.
|
||||
func (m *Charmap) NewDecoder() *encoding.Decoder {
|
||||
return &encoding.Decoder{Transformer: charmapDecoder{charmap: m}}
|
||||
}
|
||||
|
||||
// NewEncoder implements the encoding.Encoding interface.
|
||||
func (m *Charmap) NewEncoder() *encoding.Encoder {
|
||||
return &encoding.Encoder{Transformer: charmapEncoder{charmap: m}}
|
||||
}
|
||||
|
||||
// String returns the Charmap's name.
|
||||
func (m *Charmap) String() string {
|
||||
return m.name
|
||||
}
|
||||
|
||||
// ID implements an internal interface.
|
||||
func (m *Charmap) ID() (mib identifier.MIB, other string) {
|
||||
return m.mib, ""
|
||||
}
|
||||
|
||||
// charmapDecoder implements transform.Transformer by decoding to UTF-8.
|
||||
type charmapDecoder struct {
|
||||
transform.NopResetter
|
||||
charmap *Charmap
|
||||
}
|
||||
|
||||
func (m charmapDecoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
for i, c := range src {
|
||||
if m.charmap.asciiSuperset && c < utf8.RuneSelf {
|
||||
if nDst >= len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
dst[nDst] = c
|
||||
nDst++
|
||||
nSrc = i + 1
|
||||
continue
|
||||
}
|
||||
|
||||
decode := &m.charmap.decode[c]
|
||||
n := int(decode.len)
|
||||
if nDst+n > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
// It's 15% faster to avoid calling copy for these tiny slices.
|
||||
for j := 0; j < n; j++ {
|
||||
dst[nDst] = decode.data[j]
|
||||
nDst++
|
||||
}
|
||||
nSrc = i + 1
|
||||
}
|
||||
return nDst, nSrc, err
|
||||
}
|
||||
|
||||
// DecodeByte returns the Charmap's rune decoding of the byte b.
|
||||
func (m *Charmap) DecodeByte(b byte) rune {
|
||||
switch x := &m.decode[b]; x.len {
|
||||
case 1:
|
||||
return rune(x.data[0])
|
||||
case 2:
|
||||
return rune(x.data[0]&0x1f)<<6 | rune(x.data[1]&0x3f)
|
||||
default:
|
||||
return rune(x.data[0]&0x0f)<<12 | rune(x.data[1]&0x3f)<<6 | rune(x.data[2]&0x3f)
|
||||
}
|
||||
}
|
||||
|
||||
// charmapEncoder implements transform.Transformer by encoding from UTF-8.
|
||||
type charmapEncoder struct {
|
||||
transform.NopResetter
|
||||
charmap *Charmap
|
||||
}
|
||||
|
||||
func (m charmapEncoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
r, size := rune(0), 0
|
||||
loop:
|
||||
for nSrc < len(src) {
|
||||
if nDst >= len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
r = rune(src[nSrc])
|
||||
|
||||
// Decode a 1-byte rune.
|
||||
if r < utf8.RuneSelf {
|
||||
if m.charmap.asciiSuperset {
|
||||
nSrc++
|
||||
dst[nDst] = uint8(r)
|
||||
nDst++
|
||||
continue
|
||||
}
|
||||
size = 1
|
||||
|
||||
} else {
|
||||
// Decode a multi-byte rune.
|
||||
r, size = utf8.DecodeRune(src[nSrc:])
|
||||
if size == 1 {
|
||||
// All valid runes of size 1 (those below utf8.RuneSelf) were
|
||||
// handled above. We have invalid UTF-8 or we haven't seen the
|
||||
// full character yet.
|
||||
if !atEOF && !utf8.FullRune(src[nSrc:]) {
|
||||
err = transform.ErrShortSrc
|
||||
} else {
|
||||
err = internal.RepertoireError(m.charmap.replacement)
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// Binary search in [low, high) for that rune in the m.charmap.encode table.
|
||||
for low, high := int(m.charmap.low), 0x100; ; {
|
||||
if low >= high {
|
||||
err = internal.RepertoireError(m.charmap.replacement)
|
||||
break loop
|
||||
}
|
||||
mid := (low + high) / 2
|
||||
got := m.charmap.encode[mid]
|
||||
gotRune := rune(got & (1<<24 - 1))
|
||||
if gotRune < r {
|
||||
low = mid + 1
|
||||
} else if gotRune > r {
|
||||
high = mid
|
||||
} else {
|
||||
dst[nDst] = byte(got >> 24)
|
||||
nDst++
|
||||
break
|
||||
}
|
||||
}
|
||||
nSrc += size
|
||||
}
|
||||
return nDst, nSrc, err
|
||||
}
|
||||
|
||||
// EncodeRune returns the Charmap's byte encoding of the rune r. ok is whether
|
||||
// r is in the Charmap's repertoire. If not, b is set to the Charmap's
|
||||
// replacement byte. This is often the ASCII substitute character '\x1a'.
|
||||
func (m *Charmap) EncodeRune(r rune) (b byte, ok bool) {
|
||||
if r < utf8.RuneSelf && m.asciiSuperset {
|
||||
return byte(r), true
|
||||
}
|
||||
for low, high := int(m.low), 0x100; ; {
|
||||
if low >= high {
|
||||
return m.replacement, false
|
||||
}
|
||||
mid := (low + high) / 2
|
||||
got := m.encode[mid]
|
||||
gotRune := rune(got & (1<<24 - 1))
|
||||
if gotRune < r {
|
||||
low = mid + 1
|
||||
} else if gotRune > r {
|
||||
high = mid
|
||||
} else {
|
||||
return byte(got >> 24), true
|
||||
}
|
||||
}
|
||||
}
|
||||
556
vendor/golang.org/x/text/encoding/charmap/maketables.go
generated
vendored
Normal file
556
vendor/golang.org/x/text/encoding/charmap/maketables.go
generated
vendored
Normal file
@@ -0,0 +1,556 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"log"
|
||||
"net/http"
|
||||
"sort"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/text/encoding"
|
||||
"golang.org/x/text/internal/gen"
|
||||
)
|
||||
|
||||
const ascii = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" +
|
||||
"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" +
|
||||
` !"#$%&'()*+,-./0123456789:;<=>?` +
|
||||
`@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_` +
|
||||
"`abcdefghijklmnopqrstuvwxyz{|}~\u007f"
|
||||
|
||||
var encodings = []struct {
|
||||
name string
|
||||
mib string
|
||||
comment string
|
||||
varName string
|
||||
replacement byte
|
||||
mapping string
|
||||
}{
|
||||
{
|
||||
"IBM Code Page 037",
|
||||
"IBM037",
|
||||
"",
|
||||
"CodePage037",
|
||||
0x3f,
|
||||
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM037-2.1.2.ucm",
|
||||
},
|
||||
{
|
||||
"IBM Code Page 437",
|
||||
"PC8CodePage437",
|
||||
"",
|
||||
"CodePage437",
|
||||
encoding.ASCIISub,
|
||||
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM437-2.1.2.ucm",
|
||||
},
|
||||
{
|
||||
"IBM Code Page 850",
|
||||
"PC850Multilingual",
|
||||
"",
|
||||
"CodePage850",
|
||||
encoding.ASCIISub,
|
||||
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM850-2.1.2.ucm",
|
||||
},
|
||||
{
|
||||
"IBM Code Page 852",
|
||||
"PCp852",
|
||||
"",
|
||||
"CodePage852",
|
||||
encoding.ASCIISub,
|
||||
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM852-2.1.2.ucm",
|
||||
},
|
||||
{
|
||||
"IBM Code Page 855",
|
||||
"IBM855",
|
||||
"",
|
||||
"CodePage855",
|
||||
encoding.ASCIISub,
|
||||
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM855-2.1.2.ucm",
|
||||
},
|
||||
{
|
||||
"Windows Code Page 858", // PC latin1 with Euro
|
||||
"IBM00858",
|
||||
"",
|
||||
"CodePage858",
|
||||
encoding.ASCIISub,
|
||||
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/windows-858-2000.ucm",
|
||||
},
|
||||
{
|
||||
"IBM Code Page 860",
|
||||
"IBM860",
|
||||
"",
|
||||
"CodePage860",
|
||||
encoding.ASCIISub,
|
||||
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM860-2.1.2.ucm",
|
||||
},
|
||||
{
|
||||
"IBM Code Page 862",
|
||||
"PC862LatinHebrew",
|
||||
"",
|
||||
"CodePage862",
|
||||
encoding.ASCIISub,
|
||||
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM862-2.1.2.ucm",
|
||||
},
|
||||
{
|
||||
"IBM Code Page 863",
|
||||
"IBM863",
|
||||
"",
|
||||
"CodePage863",
|
||||
encoding.ASCIISub,
|
||||
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM863-2.1.2.ucm",
|
||||
},
|
||||
{
|
||||
"IBM Code Page 865",
|
||||
"IBM865",
|
||||
"",
|
||||
"CodePage865",
|
||||
encoding.ASCIISub,
|
||||
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM865-2.1.2.ucm",
|
||||
},
|
||||
{
|
||||
"IBM Code Page 866",
|
||||
"IBM866",
|
||||
"",
|
||||
"CodePage866",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-ibm866.txt",
|
||||
},
|
||||
{
|
||||
"IBM Code Page 1047",
|
||||
"IBM1047",
|
||||
"",
|
||||
"CodePage1047",
|
||||
0x3f,
|
||||
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM1047-2.1.2.ucm",
|
||||
},
|
||||
{
|
||||
"IBM Code Page 1140",
|
||||
"IBM01140",
|
||||
"",
|
||||
"CodePage1140",
|
||||
0x3f,
|
||||
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/ibm-1140_P100-1997.ucm",
|
||||
},
|
||||
{
|
||||
"ISO 8859-1",
|
||||
"ISOLatin1",
|
||||
"",
|
||||
"ISO8859_1",
|
||||
encoding.ASCIISub,
|
||||
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/iso-8859_1-1998.ucm",
|
||||
},
|
||||
{
|
||||
"ISO 8859-2",
|
||||
"ISOLatin2",
|
||||
"",
|
||||
"ISO8859_2",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-iso-8859-2.txt",
|
||||
},
|
||||
{
|
||||
"ISO 8859-3",
|
||||
"ISOLatin3",
|
||||
"",
|
||||
"ISO8859_3",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-iso-8859-3.txt",
|
||||
},
|
||||
{
|
||||
"ISO 8859-4",
|
||||
"ISOLatin4",
|
||||
"",
|
||||
"ISO8859_4",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-iso-8859-4.txt",
|
||||
},
|
||||
{
|
||||
"ISO 8859-5",
|
||||
"ISOLatinCyrillic",
|
||||
"",
|
||||
"ISO8859_5",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-iso-8859-5.txt",
|
||||
},
|
||||
{
|
||||
"ISO 8859-6",
|
||||
"ISOLatinArabic",
|
||||
"",
|
||||
"ISO8859_6,ISO8859_6E,ISO8859_6I",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-iso-8859-6.txt",
|
||||
},
|
||||
{
|
||||
"ISO 8859-7",
|
||||
"ISOLatinGreek",
|
||||
"",
|
||||
"ISO8859_7",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-iso-8859-7.txt",
|
||||
},
|
||||
{
|
||||
"ISO 8859-8",
|
||||
"ISOLatinHebrew",
|
||||
"",
|
||||
"ISO8859_8,ISO8859_8E,ISO8859_8I",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-iso-8859-8.txt",
|
||||
},
|
||||
{
|
||||
"ISO 8859-9",
|
||||
"ISOLatin5",
|
||||
"",
|
||||
"ISO8859_9",
|
||||
encoding.ASCIISub,
|
||||
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/iso-8859_9-1999.ucm",
|
||||
},
|
||||
{
|
||||
"ISO 8859-10",
|
||||
"ISOLatin6",
|
||||
"",
|
||||
"ISO8859_10",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-iso-8859-10.txt",
|
||||
},
|
||||
{
|
||||
"ISO 8859-13",
|
||||
"ISO885913",
|
||||
"",
|
||||
"ISO8859_13",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-iso-8859-13.txt",
|
||||
},
|
||||
{
|
||||
"ISO 8859-14",
|
||||
"ISO885914",
|
||||
"",
|
||||
"ISO8859_14",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-iso-8859-14.txt",
|
||||
},
|
||||
{
|
||||
"ISO 8859-15",
|
||||
"ISO885915",
|
||||
"",
|
||||
"ISO8859_15",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-iso-8859-15.txt",
|
||||
},
|
||||
{
|
||||
"ISO 8859-16",
|
||||
"ISO885916",
|
||||
"",
|
||||
"ISO8859_16",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-iso-8859-16.txt",
|
||||
},
|
||||
{
|
||||
"KOI8-R",
|
||||
"KOI8R",
|
||||
"",
|
||||
"KOI8R",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-koi8-r.txt",
|
||||
},
|
||||
{
|
||||
"KOI8-U",
|
||||
"KOI8U",
|
||||
"",
|
||||
"KOI8U",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-koi8-u.txt",
|
||||
},
|
||||
{
|
||||
"Macintosh",
|
||||
"Macintosh",
|
||||
"",
|
||||
"Macintosh",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-macintosh.txt",
|
||||
},
|
||||
{
|
||||
"Macintosh Cyrillic",
|
||||
"MacintoshCyrillic",
|
||||
"",
|
||||
"MacintoshCyrillic",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-x-mac-cyrillic.txt",
|
||||
},
|
||||
{
|
||||
"Windows 874",
|
||||
"Windows874",
|
||||
"",
|
||||
"Windows874",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-windows-874.txt",
|
||||
},
|
||||
{
|
||||
"Windows 1250",
|
||||
"Windows1250",
|
||||
"",
|
||||
"Windows1250",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-windows-1250.txt",
|
||||
},
|
||||
{
|
||||
"Windows 1251",
|
||||
"Windows1251",
|
||||
"",
|
||||
"Windows1251",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-windows-1251.txt",
|
||||
},
|
||||
{
|
||||
"Windows 1252",
|
||||
"Windows1252",
|
||||
"",
|
||||
"Windows1252",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-windows-1252.txt",
|
||||
},
|
||||
{
|
||||
"Windows 1253",
|
||||
"Windows1253",
|
||||
"",
|
||||
"Windows1253",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-windows-1253.txt",
|
||||
},
|
||||
{
|
||||
"Windows 1254",
|
||||
"Windows1254",
|
||||
"",
|
||||
"Windows1254",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-windows-1254.txt",
|
||||
},
|
||||
{
|
||||
"Windows 1255",
|
||||
"Windows1255",
|
||||
"",
|
||||
"Windows1255",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-windows-1255.txt",
|
||||
},
|
||||
{
|
||||
"Windows 1256",
|
||||
"Windows1256",
|
||||
"",
|
||||
"Windows1256",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-windows-1256.txt",
|
||||
},
|
||||
{
|
||||
"Windows 1257",
|
||||
"Windows1257",
|
||||
"",
|
||||
"Windows1257",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-windows-1257.txt",
|
||||
},
|
||||
{
|
||||
"Windows 1258",
|
||||
"Windows1258",
|
||||
"",
|
||||
"Windows1258",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-windows-1258.txt",
|
||||
},
|
||||
{
|
||||
"X-User-Defined",
|
||||
"XUserDefined",
|
||||
"It is defined at http://encoding.spec.whatwg.org/#x-user-defined",
|
||||
"XUserDefined",
|
||||
encoding.ASCIISub,
|
||||
ascii +
|
||||
"\uf780\uf781\uf782\uf783\uf784\uf785\uf786\uf787" +
|
||||
"\uf788\uf789\uf78a\uf78b\uf78c\uf78d\uf78e\uf78f" +
|
||||
"\uf790\uf791\uf792\uf793\uf794\uf795\uf796\uf797" +
|
||||
"\uf798\uf799\uf79a\uf79b\uf79c\uf79d\uf79e\uf79f" +
|
||||
"\uf7a0\uf7a1\uf7a2\uf7a3\uf7a4\uf7a5\uf7a6\uf7a7" +
|
||||
"\uf7a8\uf7a9\uf7aa\uf7ab\uf7ac\uf7ad\uf7ae\uf7af" +
|
||||
"\uf7b0\uf7b1\uf7b2\uf7b3\uf7b4\uf7b5\uf7b6\uf7b7" +
|
||||
"\uf7b8\uf7b9\uf7ba\uf7bb\uf7bc\uf7bd\uf7be\uf7bf" +
|
||||
"\uf7c0\uf7c1\uf7c2\uf7c3\uf7c4\uf7c5\uf7c6\uf7c7" +
|
||||
"\uf7c8\uf7c9\uf7ca\uf7cb\uf7cc\uf7cd\uf7ce\uf7cf" +
|
||||
"\uf7d0\uf7d1\uf7d2\uf7d3\uf7d4\uf7d5\uf7d6\uf7d7" +
|
||||
"\uf7d8\uf7d9\uf7da\uf7db\uf7dc\uf7dd\uf7de\uf7df" +
|
||||
"\uf7e0\uf7e1\uf7e2\uf7e3\uf7e4\uf7e5\uf7e6\uf7e7" +
|
||||
"\uf7e8\uf7e9\uf7ea\uf7eb\uf7ec\uf7ed\uf7ee\uf7ef" +
|
||||
"\uf7f0\uf7f1\uf7f2\uf7f3\uf7f4\uf7f5\uf7f6\uf7f7" +
|
||||
"\uf7f8\uf7f9\uf7fa\uf7fb\uf7fc\uf7fd\uf7fe\uf7ff",
|
||||
},
|
||||
}
|
||||
|
||||
func getWHATWG(url string) string {
|
||||
res, err := http.Get(url)
|
||||
if err != nil {
|
||||
log.Fatalf("%q: Get: %v", url, err)
|
||||
}
|
||||
defer res.Body.Close()
|
||||
|
||||
mapping := make([]rune, 128)
|
||||
for i := range mapping {
|
||||
mapping[i] = '\ufffd'
|
||||
}
|
||||
|
||||
scanner := bufio.NewScanner(res.Body)
|
||||
for scanner.Scan() {
|
||||
s := strings.TrimSpace(scanner.Text())
|
||||
if s == "" || s[0] == '#' {
|
||||
continue
|
||||
}
|
||||
x, y := 0, 0
|
||||
if _, err := fmt.Sscanf(s, "%d\t0x%x", &x, &y); err != nil {
|
||||
log.Fatalf("could not parse %q", s)
|
||||
}
|
||||
if x < 0 || 128 <= x {
|
||||
log.Fatalf("code %d is out of range", x)
|
||||
}
|
||||
if 0x80 <= y && y < 0xa0 {
|
||||
// We diverge from the WHATWG spec by mapping control characters
|
||||
// in the range [0x80, 0xa0) to U+FFFD.
|
||||
continue
|
||||
}
|
||||
mapping[x] = rune(y)
|
||||
}
|
||||
return ascii + string(mapping)
|
||||
}
|
||||
|
||||
func getUCM(url string) string {
|
||||
res, err := http.Get(url)
|
||||
if err != nil {
|
||||
log.Fatalf("%q: Get: %v", url, err)
|
||||
}
|
||||
defer res.Body.Close()
|
||||
|
||||
mapping := make([]rune, 256)
|
||||
for i := range mapping {
|
||||
mapping[i] = '\ufffd'
|
||||
}
|
||||
|
||||
charsFound := 0
|
||||
scanner := bufio.NewScanner(res.Body)
|
||||
for scanner.Scan() {
|
||||
s := strings.TrimSpace(scanner.Text())
|
||||
if s == "" || s[0] == '#' {
|
||||
continue
|
||||
}
|
||||
var c byte
|
||||
var r rune
|
||||
if _, err := fmt.Sscanf(s, `<U%x> \x%x |0`, &r, &c); err != nil {
|
||||
continue
|
||||
}
|
||||
mapping[c] = r
|
||||
charsFound++
|
||||
}
|
||||
|
||||
if charsFound < 200 {
|
||||
log.Fatalf("%q: only %d characters found (wrong page format?)", url, charsFound)
|
||||
}
|
||||
|
||||
return string(mapping)
|
||||
}
|
||||
|
||||
func main() {
|
||||
mibs := map[string]bool{}
|
||||
all := []string{}
|
||||
|
||||
w := gen.NewCodeWriter()
|
||||
defer w.WriteGoFile("tables.go", "charmap")
|
||||
|
||||
printf := func(s string, a ...interface{}) { fmt.Fprintf(w, s, a...) }
|
||||
|
||||
printf("import (\n")
|
||||
printf("\t\"golang.org/x/text/encoding\"\n")
|
||||
printf("\t\"golang.org/x/text/encoding/internal/identifier\"\n")
|
||||
printf(")\n\n")
|
||||
for _, e := range encodings {
|
||||
varNames := strings.Split(e.varName, ",")
|
||||
all = append(all, varNames...)
|
||||
varName := varNames[0]
|
||||
switch {
|
||||
case strings.HasPrefix(e.mapping, "http://encoding.spec.whatwg.org/"):
|
||||
e.mapping = getWHATWG(e.mapping)
|
||||
case strings.HasPrefix(e.mapping, "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/"):
|
||||
e.mapping = getUCM(e.mapping)
|
||||
}
|
||||
|
||||
asciiSuperset, low := strings.HasPrefix(e.mapping, ascii), 0x00
|
||||
if asciiSuperset {
|
||||
low = 0x80
|
||||
}
|
||||
lvn := 1
|
||||
if strings.HasPrefix(varName, "ISO") || strings.HasPrefix(varName, "KOI") {
|
||||
lvn = 3
|
||||
}
|
||||
lowerVarName := strings.ToLower(varName[:lvn]) + varName[lvn:]
|
||||
printf("// %s is the %s encoding.\n", varName, e.name)
|
||||
if e.comment != "" {
|
||||
printf("//\n// %s\n", e.comment)
|
||||
}
|
||||
printf("var %s *Charmap = &%s\n\nvar %s = Charmap{\nname: %q,\n",
|
||||
varName, lowerVarName, lowerVarName, e.name)
|
||||
if mibs[e.mib] {
|
||||
log.Fatalf("MIB type %q declared multiple times.", e.mib)
|
||||
}
|
||||
printf("mib: identifier.%s,\n", e.mib)
|
||||
printf("asciiSuperset: %t,\n", asciiSuperset)
|
||||
printf("low: 0x%02x,\n", low)
|
||||
printf("replacement: 0x%02x,\n", e.replacement)
|
||||
|
||||
printf("decode: [256]utf8Enc{\n")
|
||||
i, backMapping := 0, map[rune]byte{}
|
||||
for _, c := range e.mapping {
|
||||
if _, ok := backMapping[c]; !ok && c != utf8.RuneError {
|
||||
backMapping[c] = byte(i)
|
||||
}
|
||||
var buf [8]byte
|
||||
n := utf8.EncodeRune(buf[:], c)
|
||||
if n > 3 {
|
||||
panic(fmt.Sprintf("rune %q (%U) is too long", c, c))
|
||||
}
|
||||
printf("{%d,[3]byte{0x%02x,0x%02x,0x%02x}},", n, buf[0], buf[1], buf[2])
|
||||
if i%2 == 1 {
|
||||
printf("\n")
|
||||
}
|
||||
i++
|
||||
}
|
||||
printf("},\n")
|
||||
|
||||
printf("encode: [256]uint32{\n")
|
||||
encode := make([]uint32, 0, 256)
|
||||
for c, i := range backMapping {
|
||||
encode = append(encode, uint32(i)<<24|uint32(c))
|
||||
}
|
||||
sort.Sort(byRune(encode))
|
||||
for len(encode) < cap(encode) {
|
||||
encode = append(encode, encode[len(encode)-1])
|
||||
}
|
||||
for i, enc := range encode {
|
||||
printf("0x%08x,", enc)
|
||||
if i%8 == 7 {
|
||||
printf("\n")
|
||||
}
|
||||
}
|
||||
printf("},\n}\n")
|
||||
|
||||
// Add an estimate of the size of a single Charmap{} struct value, which
|
||||
// includes two 256 elem arrays of 4 bytes and some extra fields, which
|
||||
// align to 3 uint64s on 64-bit architectures.
|
||||
w.Size += 2*4*256 + 3*8
|
||||
}
|
||||
// TODO: add proper line breaking.
|
||||
printf("var listAll = []encoding.Encoding{\n%s,\n}\n\n", strings.Join(all, ",\n"))
|
||||
}
|
||||
|
||||
type byRune []uint32
|
||||
|
||||
func (b byRune) Len() int { return len(b) }
|
||||
func (b byRune) Less(i, j int) bool { return b[i]&0xffffff < b[j]&0xffffff }
|
||||
func (b byRune) Swap(i, j int) { b[i], b[j] = b[j], b[i] }
|
||||
7410
vendor/golang.org/x/text/encoding/charmap/tables.go
generated
vendored
Normal file
7410
vendor/golang.org/x/text/encoding/charmap/tables.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load Diff
335
vendor/golang.org/x/text/encoding/encoding.go
generated
vendored
Normal file
335
vendor/golang.org/x/text/encoding/encoding.go
generated
vendored
Normal file
@@ -0,0 +1,335 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package encoding defines an interface for character encodings, such as Shift
|
||||
// JIS and Windows 1252, that can convert to and from UTF-8.
|
||||
//
|
||||
// Encoding implementations are provided in other packages, such as
|
||||
// golang.org/x/text/encoding/charmap and
|
||||
// golang.org/x/text/encoding/japanese.
|
||||
package encoding // import "golang.org/x/text/encoding"
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"io"
|
||||
"strconv"
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/text/encoding/internal/identifier"
|
||||
"golang.org/x/text/transform"
|
||||
)
|
||||
|
||||
// TODO:
|
||||
// - There seems to be some inconsistency in when decoders return errors
|
||||
// and when not. Also documentation seems to suggest they shouldn't return
|
||||
// errors at all (except for UTF-16).
|
||||
// - Encoders seem to rely on or at least benefit from the input being in NFC
|
||||
// normal form. Perhaps add an example how users could prepare their output.
|
||||
|
||||
// Encoding is a character set encoding that can be transformed to and from
|
||||
// UTF-8.
|
||||
type Encoding interface {
|
||||
// NewDecoder returns a Decoder.
|
||||
NewDecoder() *Decoder
|
||||
|
||||
// NewEncoder returns an Encoder.
|
||||
NewEncoder() *Encoder
|
||||
}
|
||||
|
||||
// A Decoder converts bytes to UTF-8. It implements transform.Transformer.
|
||||
//
|
||||
// Transforming source bytes that are not of that encoding will not result in an
|
||||
// error per se. Each byte that cannot be transcoded will be represented in the
|
||||
// output by the UTF-8 encoding of '\uFFFD', the replacement rune.
|
||||
type Decoder struct {
|
||||
transform.Transformer
|
||||
|
||||
// This forces external creators of Decoders to use names in struct
|
||||
// initializers, allowing for future extendibility without having to break
|
||||
// code.
|
||||
_ struct{}
|
||||
}
|
||||
|
||||
// Bytes converts the given encoded bytes to UTF-8. It returns the converted
|
||||
// bytes or nil, err if any error occurred.
|
||||
func (d *Decoder) Bytes(b []byte) ([]byte, error) {
|
||||
b, _, err := transform.Bytes(d, b)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return b, nil
|
||||
}
|
||||
|
||||
// String converts the given encoded string to UTF-8. It returns the converted
|
||||
// string or "", err if any error occurred.
|
||||
func (d *Decoder) String(s string) (string, error) {
|
||||
s, _, err := transform.String(d, s)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return s, nil
|
||||
}
|
||||
|
||||
// Reader wraps another Reader to decode its bytes.
|
||||
//
|
||||
// The Decoder may not be used for any other operation as long as the returned
|
||||
// Reader is in use.
|
||||
func (d *Decoder) Reader(r io.Reader) io.Reader {
|
||||
return transform.NewReader(r, d)
|
||||
}
|
||||
|
||||
// An Encoder converts bytes from UTF-8. It implements transform.Transformer.
|
||||
//
|
||||
// Each rune that cannot be transcoded will result in an error. In this case,
|
||||
// the transform will consume all source byte up to, not including the offending
|
||||
// rune. Transforming source bytes that are not valid UTF-8 will be replaced by
|
||||
// `\uFFFD`. To return early with an error instead, use transform.Chain to
|
||||
// preprocess the data with a UTF8Validator.
|
||||
type Encoder struct {
|
||||
transform.Transformer
|
||||
|
||||
// This forces external creators of Encoders to use names in struct
|
||||
// initializers, allowing for future extendibility without having to break
|
||||
// code.
|
||||
_ struct{}
|
||||
}
|
||||
|
||||
// Bytes converts bytes from UTF-8. It returns the converted bytes or nil, err if
|
||||
// any error occurred.
|
||||
func (e *Encoder) Bytes(b []byte) ([]byte, error) {
|
||||
b, _, err := transform.Bytes(e, b)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return b, nil
|
||||
}
|
||||
|
||||
// String converts a string from UTF-8. It returns the converted string or
|
||||
// "", err if any error occurred.
|
||||
func (e *Encoder) String(s string) (string, error) {
|
||||
s, _, err := transform.String(e, s)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return s, nil
|
||||
}
|
||||
|
||||
// Writer wraps another Writer to encode its UTF-8 output.
|
||||
//
|
||||
// The Encoder may not be used for any other operation as long as the returned
|
||||
// Writer is in use.
|
||||
func (e *Encoder) Writer(w io.Writer) io.Writer {
|
||||
return transform.NewWriter(w, e)
|
||||
}
|
||||
|
||||
// ASCIISub is the ASCII substitute character, as recommended by
|
||||
// https://unicode.org/reports/tr36/#Text_Comparison
|
||||
const ASCIISub = '\x1a'
|
||||
|
||||
// Nop is the nop encoding. Its transformed bytes are the same as the source
|
||||
// bytes; it does not replace invalid UTF-8 sequences.
|
||||
var Nop Encoding = nop{}
|
||||
|
||||
type nop struct{}
|
||||
|
||||
func (nop) NewDecoder() *Decoder {
|
||||
return &Decoder{Transformer: transform.Nop}
|
||||
}
|
||||
func (nop) NewEncoder() *Encoder {
|
||||
return &Encoder{Transformer: transform.Nop}
|
||||
}
|
||||
|
||||
// Replacement is the replacement encoding. Decoding from the replacement
|
||||
// encoding yields a single '\uFFFD' replacement rune. Encoding from UTF-8 to
|
||||
// the replacement encoding yields the same as the source bytes except that
|
||||
// invalid UTF-8 is converted to '\uFFFD'.
|
||||
//
|
||||
// It is defined at http://encoding.spec.whatwg.org/#replacement
|
||||
var Replacement Encoding = replacement{}
|
||||
|
||||
type replacement struct{}
|
||||
|
||||
func (replacement) NewDecoder() *Decoder {
|
||||
return &Decoder{Transformer: replacementDecoder{}}
|
||||
}
|
||||
|
||||
func (replacement) NewEncoder() *Encoder {
|
||||
return &Encoder{Transformer: replacementEncoder{}}
|
||||
}
|
||||
|
||||
func (replacement) ID() (mib identifier.MIB, other string) {
|
||||
return identifier.Replacement, ""
|
||||
}
|
||||
|
||||
type replacementDecoder struct{ transform.NopResetter }
|
||||
|
||||
func (replacementDecoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
if len(dst) < 3 {
|
||||
return 0, 0, transform.ErrShortDst
|
||||
}
|
||||
if atEOF {
|
||||
const fffd = "\ufffd"
|
||||
dst[0] = fffd[0]
|
||||
dst[1] = fffd[1]
|
||||
dst[2] = fffd[2]
|
||||
nDst = 3
|
||||
}
|
||||
return nDst, len(src), nil
|
||||
}
|
||||
|
||||
type replacementEncoder struct{ transform.NopResetter }
|
||||
|
||||
func (replacementEncoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
r, size := rune(0), 0
|
||||
|
||||
for ; nSrc < len(src); nSrc += size {
|
||||
r = rune(src[nSrc])
|
||||
|
||||
// Decode a 1-byte rune.
|
||||
if r < utf8.RuneSelf {
|
||||
size = 1
|
||||
|
||||
} else {
|
||||
// Decode a multi-byte rune.
|
||||
r, size = utf8.DecodeRune(src[nSrc:])
|
||||
if size == 1 {
|
||||
// All valid runes of size 1 (those below utf8.RuneSelf) were
|
||||
// handled above. We have invalid UTF-8 or we haven't seen the
|
||||
// full character yet.
|
||||
if !atEOF && !utf8.FullRune(src[nSrc:]) {
|
||||
err = transform.ErrShortSrc
|
||||
break
|
||||
}
|
||||
r = '\ufffd'
|
||||
}
|
||||
}
|
||||
|
||||
if nDst+utf8.RuneLen(r) > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
nDst += utf8.EncodeRune(dst[nDst:], r)
|
||||
}
|
||||
return nDst, nSrc, err
|
||||
}
|
||||
|
||||
// HTMLEscapeUnsupported wraps encoders to replace source runes outside the
|
||||
// repertoire of the destination encoding with HTML escape sequences.
|
||||
//
|
||||
// This wrapper exists to comply to URL and HTML forms requiring a
|
||||
// non-terminating legacy encoder. The produced sequences may lead to data
|
||||
// loss as they are indistinguishable from legitimate input. To avoid this
|
||||
// issue, use UTF-8 encodings whenever possible.
|
||||
func HTMLEscapeUnsupported(e *Encoder) *Encoder {
|
||||
return &Encoder{Transformer: &errorHandler{e, errorToHTML}}
|
||||
}
|
||||
|
||||
// ReplaceUnsupported wraps encoders to replace source runes outside the
|
||||
// repertoire of the destination encoding with an encoding-specific
|
||||
// replacement.
|
||||
//
|
||||
// This wrapper is only provided for backwards compatibility and legacy
|
||||
// handling. Its use is strongly discouraged. Use UTF-8 whenever possible.
|
||||
func ReplaceUnsupported(e *Encoder) *Encoder {
|
||||
return &Encoder{Transformer: &errorHandler{e, errorToReplacement}}
|
||||
}
|
||||
|
||||
type errorHandler struct {
|
||||
*Encoder
|
||||
handler func(dst []byte, r rune, err repertoireError) (n int, ok bool)
|
||||
}
|
||||
|
||||
// TODO: consider making this error public in some form.
|
||||
type repertoireError interface {
|
||||
Replacement() byte
|
||||
}
|
||||
|
||||
func (h errorHandler) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
nDst, nSrc, err = h.Transformer.Transform(dst, src, atEOF)
|
||||
for err != nil {
|
||||
rerr, ok := err.(repertoireError)
|
||||
if !ok {
|
||||
return nDst, nSrc, err
|
||||
}
|
||||
r, sz := utf8.DecodeRune(src[nSrc:])
|
||||
n, ok := h.handler(dst[nDst:], r, rerr)
|
||||
if !ok {
|
||||
return nDst, nSrc, transform.ErrShortDst
|
||||
}
|
||||
err = nil
|
||||
nDst += n
|
||||
if nSrc += sz; nSrc < len(src) {
|
||||
var dn, sn int
|
||||
dn, sn, err = h.Transformer.Transform(dst[nDst:], src[nSrc:], atEOF)
|
||||
nDst += dn
|
||||
nSrc += sn
|
||||
}
|
||||
}
|
||||
return nDst, nSrc, err
|
||||
}
|
||||
|
||||
func errorToHTML(dst []byte, r rune, err repertoireError) (n int, ok bool) {
|
||||
buf := [8]byte{}
|
||||
b := strconv.AppendUint(buf[:0], uint64(r), 10)
|
||||
if n = len(b) + len("&#;"); n >= len(dst) {
|
||||
return 0, false
|
||||
}
|
||||
dst[0] = '&'
|
||||
dst[1] = '#'
|
||||
dst[copy(dst[2:], b)+2] = ';'
|
||||
return n, true
|
||||
}
|
||||
|
||||
func errorToReplacement(dst []byte, r rune, err repertoireError) (n int, ok bool) {
|
||||
if len(dst) == 0 {
|
||||
return 0, false
|
||||
}
|
||||
dst[0] = err.Replacement()
|
||||
return 1, true
|
||||
}
|
||||
|
||||
// ErrInvalidUTF8 means that a transformer encountered invalid UTF-8.
|
||||
var ErrInvalidUTF8 = errors.New("encoding: invalid UTF-8")
|
||||
|
||||
// UTF8Validator is a transformer that returns ErrInvalidUTF8 on the first
|
||||
// input byte that is not valid UTF-8.
|
||||
var UTF8Validator transform.Transformer = utf8Validator{}
|
||||
|
||||
type utf8Validator struct{ transform.NopResetter }
|
||||
|
||||
func (utf8Validator) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
n := len(src)
|
||||
if n > len(dst) {
|
||||
n = len(dst)
|
||||
}
|
||||
for i := 0; i < n; {
|
||||
if c := src[i]; c < utf8.RuneSelf {
|
||||
dst[i] = c
|
||||
i++
|
||||
continue
|
||||
}
|
||||
_, size := utf8.DecodeRune(src[i:])
|
||||
if size == 1 {
|
||||
// All valid runes of size 1 (those below utf8.RuneSelf) were
|
||||
// handled above. We have invalid UTF-8 or we haven't seen the
|
||||
// full character yet.
|
||||
err = ErrInvalidUTF8
|
||||
if !atEOF && !utf8.FullRune(src[i:]) {
|
||||
err = transform.ErrShortSrc
|
||||
}
|
||||
return i, i, err
|
||||
}
|
||||
if i+size > len(dst) {
|
||||
return i, i, transform.ErrShortDst
|
||||
}
|
||||
for ; size > 0; size-- {
|
||||
dst[i] = src[i]
|
||||
i++
|
||||
}
|
||||
}
|
||||
if len(src) > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
}
|
||||
return n, n, err
|
||||
}
|
||||
173
vendor/golang.org/x/text/encoding/htmlindex/gen.go
generated
vendored
Normal file
173
vendor/golang.org/x/text/encoding/htmlindex/gen.go
generated
vendored
Normal file
@@ -0,0 +1,173 @@
|
||||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"log"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/text/internal/gen"
|
||||
)
|
||||
|
||||
type group struct {
|
||||
Encodings []struct {
|
||||
Labels []string
|
||||
Name string
|
||||
}
|
||||
}
|
||||
|
||||
func main() {
|
||||
gen.Init()
|
||||
|
||||
r := gen.Open("https://encoding.spec.whatwg.org", "whatwg", "encodings.json")
|
||||
var groups []group
|
||||
if err := json.NewDecoder(r).Decode(&groups); err != nil {
|
||||
log.Fatalf("Error reading encodings.json: %v", err)
|
||||
}
|
||||
|
||||
w := &bytes.Buffer{}
|
||||
fmt.Fprintln(w, "type htmlEncoding byte")
|
||||
fmt.Fprintln(w, "const (")
|
||||
for i, g := range groups {
|
||||
for _, e := range g.Encodings {
|
||||
key := strings.ToLower(e.Name)
|
||||
name := consts[key]
|
||||
if name == "" {
|
||||
log.Fatalf("No const defined for %s.", key)
|
||||
}
|
||||
if i == 0 {
|
||||
fmt.Fprintf(w, "%s htmlEncoding = iota\n", name)
|
||||
} else {
|
||||
fmt.Fprintf(w, "%s\n", name)
|
||||
}
|
||||
}
|
||||
}
|
||||
fmt.Fprintln(w, "numEncodings")
|
||||
fmt.Fprint(w, ")\n\n")
|
||||
|
||||
fmt.Fprintln(w, "var canonical = [numEncodings]string{")
|
||||
for _, g := range groups {
|
||||
for _, e := range g.Encodings {
|
||||
fmt.Fprintf(w, "%q,\n", strings.ToLower(e.Name))
|
||||
}
|
||||
}
|
||||
fmt.Fprint(w, "}\n\n")
|
||||
|
||||
fmt.Fprintln(w, "var nameMap = map[string]htmlEncoding{")
|
||||
for _, g := range groups {
|
||||
for _, e := range g.Encodings {
|
||||
for _, l := range e.Labels {
|
||||
key := strings.ToLower(e.Name)
|
||||
name := consts[key]
|
||||
fmt.Fprintf(w, "%q: %s,\n", l, name)
|
||||
}
|
||||
}
|
||||
}
|
||||
fmt.Fprint(w, "}\n\n")
|
||||
|
||||
var tags []string
|
||||
fmt.Fprintln(w, "var localeMap = []htmlEncoding{")
|
||||
for _, loc := range locales {
|
||||
tags = append(tags, loc.tag)
|
||||
fmt.Fprintf(w, "%s, // %s \n", consts[loc.name], loc.tag)
|
||||
}
|
||||
fmt.Fprint(w, "}\n\n")
|
||||
|
||||
fmt.Fprintf(w, "const locales = %q\n", strings.Join(tags, " "))
|
||||
|
||||
gen.WriteGoFile("tables.go", "htmlindex", w.Bytes())
|
||||
}
|
||||
|
||||
// consts maps canonical encoding name to internal constant.
|
||||
var consts = map[string]string{
|
||||
"utf-8": "utf8",
|
||||
"ibm866": "ibm866",
|
||||
"iso-8859-2": "iso8859_2",
|
||||
"iso-8859-3": "iso8859_3",
|
||||
"iso-8859-4": "iso8859_4",
|
||||
"iso-8859-5": "iso8859_5",
|
||||
"iso-8859-6": "iso8859_6",
|
||||
"iso-8859-7": "iso8859_7",
|
||||
"iso-8859-8": "iso8859_8",
|
||||
"iso-8859-8-i": "iso8859_8I",
|
||||
"iso-8859-10": "iso8859_10",
|
||||
"iso-8859-13": "iso8859_13",
|
||||
"iso-8859-14": "iso8859_14",
|
||||
"iso-8859-15": "iso8859_15",
|
||||
"iso-8859-16": "iso8859_16",
|
||||
"koi8-r": "koi8r",
|
||||
"koi8-u": "koi8u",
|
||||
"macintosh": "macintosh",
|
||||
"windows-874": "windows874",
|
||||
"windows-1250": "windows1250",
|
||||
"windows-1251": "windows1251",
|
||||
"windows-1252": "windows1252",
|
||||
"windows-1253": "windows1253",
|
||||
"windows-1254": "windows1254",
|
||||
"windows-1255": "windows1255",
|
||||
"windows-1256": "windows1256",
|
||||
"windows-1257": "windows1257",
|
||||
"windows-1258": "windows1258",
|
||||
"x-mac-cyrillic": "macintoshCyrillic",
|
||||
"gbk": "gbk",
|
||||
"gb18030": "gb18030",
|
||||
// "hz-gb-2312": "hzgb2312", // Was removed from WhatWG
|
||||
"big5": "big5",
|
||||
"euc-jp": "eucjp",
|
||||
"iso-2022-jp": "iso2022jp",
|
||||
"shift_jis": "shiftJIS",
|
||||
"euc-kr": "euckr",
|
||||
"replacement": "replacement",
|
||||
"utf-16be": "utf16be",
|
||||
"utf-16le": "utf16le",
|
||||
"x-user-defined": "xUserDefined",
|
||||
}
|
||||
|
||||
// locales is taken from
|
||||
// https://html.spec.whatwg.org/multipage/syntax.html#encoding-sniffing-algorithm.
|
||||
var locales = []struct{ tag, name string }{
|
||||
// The default value. Explicitly state latin to benefit from the exact
|
||||
// script option, while still making 1252 the default encoding for languages
|
||||
// written in Latin script.
|
||||
{"und_Latn", "windows-1252"},
|
||||
{"ar", "windows-1256"},
|
||||
{"ba", "windows-1251"},
|
||||
{"be", "windows-1251"},
|
||||
{"bg", "windows-1251"},
|
||||
{"cs", "windows-1250"},
|
||||
{"el", "iso-8859-7"},
|
||||
{"et", "windows-1257"},
|
||||
{"fa", "windows-1256"},
|
||||
{"he", "windows-1255"},
|
||||
{"hr", "windows-1250"},
|
||||
{"hu", "iso-8859-2"},
|
||||
{"ja", "shift_jis"},
|
||||
{"kk", "windows-1251"},
|
||||
{"ko", "euc-kr"},
|
||||
{"ku", "windows-1254"},
|
||||
{"ky", "windows-1251"},
|
||||
{"lt", "windows-1257"},
|
||||
{"lv", "windows-1257"},
|
||||
{"mk", "windows-1251"},
|
||||
{"pl", "iso-8859-2"},
|
||||
{"ru", "windows-1251"},
|
||||
{"sah", "windows-1251"},
|
||||
{"sk", "windows-1250"},
|
||||
{"sl", "iso-8859-2"},
|
||||
{"sr", "windows-1251"},
|
||||
{"tg", "windows-1251"},
|
||||
{"th", "windows-874"},
|
||||
{"tr", "windows-1254"},
|
||||
{"tt", "windows-1251"},
|
||||
{"uk", "windows-1251"},
|
||||
{"vi", "windows-1258"},
|
||||
{"zh-hans", "gb18030"},
|
||||
{"zh-hant", "big5"},
|
||||
}
|
||||
86
vendor/golang.org/x/text/encoding/htmlindex/htmlindex.go
generated
vendored
Normal file
86
vendor/golang.org/x/text/encoding/htmlindex/htmlindex.go
generated
vendored
Normal file
@@ -0,0 +1,86 @@
|
||||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:generate go run gen.go
|
||||
|
||||
// Package htmlindex maps character set encoding names to Encodings as
|
||||
// recommended by the W3C for use in HTML 5. See http://www.w3.org/TR/encoding.
|
||||
package htmlindex
|
||||
|
||||
// TODO: perhaps have a "bare" version of the index (used by this package) that
|
||||
// is not pre-loaded with all encodings. Global variables in encodings prevent
|
||||
// the linker from being able to purge unneeded tables. This means that
|
||||
// referencing all encodings, as this package does for the default index, links
|
||||
// in all encodings unconditionally.
|
||||
//
|
||||
// This issue can be solved by either solving the linking issue (see
|
||||
// https://github.com/golang/go/issues/6330) or refactoring the encoding tables
|
||||
// (e.g. moving the tables to internal packages that do not use global
|
||||
// variables).
|
||||
|
||||
// TODO: allow canonicalizing names
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"golang.org/x/text/encoding"
|
||||
"golang.org/x/text/encoding/internal/identifier"
|
||||
"golang.org/x/text/language"
|
||||
)
|
||||
|
||||
var (
|
||||
errInvalidName = errors.New("htmlindex: invalid encoding name")
|
||||
errUnknown = errors.New("htmlindex: unknown Encoding")
|
||||
errUnsupported = errors.New("htmlindex: this encoding is not supported")
|
||||
)
|
||||
|
||||
var (
|
||||
matcherOnce sync.Once
|
||||
matcher language.Matcher
|
||||
)
|
||||
|
||||
// LanguageDefault returns the canonical name of the default encoding for a
|
||||
// given language.
|
||||
func LanguageDefault(tag language.Tag) string {
|
||||
matcherOnce.Do(func() {
|
||||
tags := []language.Tag{}
|
||||
for _, t := range strings.Split(locales, " ") {
|
||||
tags = append(tags, language.MustParse(t))
|
||||
}
|
||||
matcher = language.NewMatcher(tags, language.PreferSameScript(true))
|
||||
})
|
||||
_, i, _ := matcher.Match(tag)
|
||||
return canonical[localeMap[i]] // Default is Windows-1252.
|
||||
}
|
||||
|
||||
// Get returns an Encoding for one of the names listed in
|
||||
// http://www.w3.org/TR/encoding using the Default Index. Matching is case-
|
||||
// insensitive.
|
||||
func Get(name string) (encoding.Encoding, error) {
|
||||
x, ok := nameMap[strings.ToLower(strings.TrimSpace(name))]
|
||||
if !ok {
|
||||
return nil, errInvalidName
|
||||
}
|
||||
return encodings[x], nil
|
||||
}
|
||||
|
||||
// Name reports the canonical name of the given Encoding. It will return
|
||||
// an error if e is not associated with a supported encoding scheme.
|
||||
func Name(e encoding.Encoding) (string, error) {
|
||||
id, ok := e.(identifier.Interface)
|
||||
if !ok {
|
||||
return "", errUnknown
|
||||
}
|
||||
mib, _ := id.ID()
|
||||
if mib == 0 {
|
||||
return "", errUnknown
|
||||
}
|
||||
v, ok := mibMap[mib]
|
||||
if !ok {
|
||||
return "", errUnsupported
|
||||
}
|
||||
return canonical[v], nil
|
||||
}
|
||||
105
vendor/golang.org/x/text/encoding/htmlindex/map.go
generated
vendored
Normal file
105
vendor/golang.org/x/text/encoding/htmlindex/map.go
generated
vendored
Normal file
@@ -0,0 +1,105 @@
|
||||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package htmlindex
|
||||
|
||||
import (
|
||||
"golang.org/x/text/encoding"
|
||||
"golang.org/x/text/encoding/charmap"
|
||||
"golang.org/x/text/encoding/internal/identifier"
|
||||
"golang.org/x/text/encoding/japanese"
|
||||
"golang.org/x/text/encoding/korean"
|
||||
"golang.org/x/text/encoding/simplifiedchinese"
|
||||
"golang.org/x/text/encoding/traditionalchinese"
|
||||
"golang.org/x/text/encoding/unicode"
|
||||
)
|
||||
|
||||
// mibMap maps a MIB identifier to an htmlEncoding index.
|
||||
var mibMap = map[identifier.MIB]htmlEncoding{
|
||||
identifier.UTF8: utf8,
|
||||
identifier.UTF16BE: utf16be,
|
||||
identifier.UTF16LE: utf16le,
|
||||
identifier.IBM866: ibm866,
|
||||
identifier.ISOLatin2: iso8859_2,
|
||||
identifier.ISOLatin3: iso8859_3,
|
||||
identifier.ISOLatin4: iso8859_4,
|
||||
identifier.ISOLatinCyrillic: iso8859_5,
|
||||
identifier.ISOLatinArabic: iso8859_6,
|
||||
identifier.ISOLatinGreek: iso8859_7,
|
||||
identifier.ISOLatinHebrew: iso8859_8,
|
||||
identifier.ISO88598I: iso8859_8I,
|
||||
identifier.ISOLatin6: iso8859_10,
|
||||
identifier.ISO885913: iso8859_13,
|
||||
identifier.ISO885914: iso8859_14,
|
||||
identifier.ISO885915: iso8859_15,
|
||||
identifier.ISO885916: iso8859_16,
|
||||
identifier.KOI8R: koi8r,
|
||||
identifier.KOI8U: koi8u,
|
||||
identifier.Macintosh: macintosh,
|
||||
identifier.MacintoshCyrillic: macintoshCyrillic,
|
||||
identifier.Windows874: windows874,
|
||||
identifier.Windows1250: windows1250,
|
||||
identifier.Windows1251: windows1251,
|
||||
identifier.Windows1252: windows1252,
|
||||
identifier.Windows1253: windows1253,
|
||||
identifier.Windows1254: windows1254,
|
||||
identifier.Windows1255: windows1255,
|
||||
identifier.Windows1256: windows1256,
|
||||
identifier.Windows1257: windows1257,
|
||||
identifier.Windows1258: windows1258,
|
||||
identifier.XUserDefined: xUserDefined,
|
||||
identifier.GBK: gbk,
|
||||
identifier.GB18030: gb18030,
|
||||
identifier.Big5: big5,
|
||||
identifier.EUCPkdFmtJapanese: eucjp,
|
||||
identifier.ISO2022JP: iso2022jp,
|
||||
identifier.ShiftJIS: shiftJIS,
|
||||
identifier.EUCKR: euckr,
|
||||
identifier.Replacement: replacement,
|
||||
}
|
||||
|
||||
// encodings maps the internal htmlEncoding to an Encoding.
|
||||
// TODO: consider using a reusable index in encoding/internal.
|
||||
var encodings = [numEncodings]encoding.Encoding{
|
||||
utf8: unicode.UTF8,
|
||||
ibm866: charmap.CodePage866,
|
||||
iso8859_2: charmap.ISO8859_2,
|
||||
iso8859_3: charmap.ISO8859_3,
|
||||
iso8859_4: charmap.ISO8859_4,
|
||||
iso8859_5: charmap.ISO8859_5,
|
||||
iso8859_6: charmap.ISO8859_6,
|
||||
iso8859_7: charmap.ISO8859_7,
|
||||
iso8859_8: charmap.ISO8859_8,
|
||||
iso8859_8I: charmap.ISO8859_8I,
|
||||
iso8859_10: charmap.ISO8859_10,
|
||||
iso8859_13: charmap.ISO8859_13,
|
||||
iso8859_14: charmap.ISO8859_14,
|
||||
iso8859_15: charmap.ISO8859_15,
|
||||
iso8859_16: charmap.ISO8859_16,
|
||||
koi8r: charmap.KOI8R,
|
||||
koi8u: charmap.KOI8U,
|
||||
macintosh: charmap.Macintosh,
|
||||
windows874: charmap.Windows874,
|
||||
windows1250: charmap.Windows1250,
|
||||
windows1251: charmap.Windows1251,
|
||||
windows1252: charmap.Windows1252,
|
||||
windows1253: charmap.Windows1253,
|
||||
windows1254: charmap.Windows1254,
|
||||
windows1255: charmap.Windows1255,
|
||||
windows1256: charmap.Windows1256,
|
||||
windows1257: charmap.Windows1257,
|
||||
windows1258: charmap.Windows1258,
|
||||
macintoshCyrillic: charmap.MacintoshCyrillic,
|
||||
gbk: simplifiedchinese.GBK,
|
||||
gb18030: simplifiedchinese.GB18030,
|
||||
big5: traditionalchinese.Big5,
|
||||
eucjp: japanese.EUCJP,
|
||||
iso2022jp: japanese.ISO2022JP,
|
||||
shiftJIS: japanese.ShiftJIS,
|
||||
euckr: korean.EUCKR,
|
||||
replacement: encoding.Replacement,
|
||||
utf16be: unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM),
|
||||
utf16le: unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM),
|
||||
xUserDefined: charmap.XUserDefined,
|
||||
}
|
||||
353
vendor/golang.org/x/text/encoding/htmlindex/tables.go
generated
vendored
Normal file
353
vendor/golang.org/x/text/encoding/htmlindex/tables.go
generated
vendored
Normal file
@@ -0,0 +1,353 @@
|
||||
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
|
||||
|
||||
package htmlindex
|
||||
|
||||
type htmlEncoding byte
|
||||
|
||||
const (
|
||||
utf8 htmlEncoding = iota
|
||||
ibm866
|
||||
iso8859_2
|
||||
iso8859_3
|
||||
iso8859_4
|
||||
iso8859_5
|
||||
iso8859_6
|
||||
iso8859_7
|
||||
iso8859_8
|
||||
iso8859_8I
|
||||
iso8859_10
|
||||
iso8859_13
|
||||
iso8859_14
|
||||
iso8859_15
|
||||
iso8859_16
|
||||
koi8r
|
||||
koi8u
|
||||
macintosh
|
||||
windows874
|
||||
windows1250
|
||||
windows1251
|
||||
windows1252
|
||||
windows1253
|
||||
windows1254
|
||||
windows1255
|
||||
windows1256
|
||||
windows1257
|
||||
windows1258
|
||||
macintoshCyrillic
|
||||
gbk
|
||||
gb18030
|
||||
big5
|
||||
eucjp
|
||||
iso2022jp
|
||||
shiftJIS
|
||||
euckr
|
||||
replacement
|
||||
utf16be
|
||||
utf16le
|
||||
xUserDefined
|
||||
numEncodings
|
||||
)
|
||||
|
||||
var canonical = [numEncodings]string{
|
||||
"utf-8",
|
||||
"ibm866",
|
||||
"iso-8859-2",
|
||||
"iso-8859-3",
|
||||
"iso-8859-4",
|
||||
"iso-8859-5",
|
||||
"iso-8859-6",
|
||||
"iso-8859-7",
|
||||
"iso-8859-8",
|
||||
"iso-8859-8-i",
|
||||
"iso-8859-10",
|
||||
"iso-8859-13",
|
||||
"iso-8859-14",
|
||||
"iso-8859-15",
|
||||
"iso-8859-16",
|
||||
"koi8-r",
|
||||
"koi8-u",
|
||||
"macintosh",
|
||||
"windows-874",
|
||||
"windows-1250",
|
||||
"windows-1251",
|
||||
"windows-1252",
|
||||
"windows-1253",
|
||||
"windows-1254",
|
||||
"windows-1255",
|
||||
"windows-1256",
|
||||
"windows-1257",
|
||||
"windows-1258",
|
||||
"x-mac-cyrillic",
|
||||
"gbk",
|
||||
"gb18030",
|
||||
"big5",
|
||||
"euc-jp",
|
||||
"iso-2022-jp",
|
||||
"shift_jis",
|
||||
"euc-kr",
|
||||
"replacement",
|
||||
"utf-16be",
|
||||
"utf-16le",
|
||||
"x-user-defined",
|
||||
}
|
||||
|
||||
var nameMap = map[string]htmlEncoding{
|
||||
"unicode-1-1-utf-8": utf8,
|
||||
"utf-8": utf8,
|
||||
"utf8": utf8,
|
||||
"866": ibm866,
|
||||
"cp866": ibm866,
|
||||
"csibm866": ibm866,
|
||||
"ibm866": ibm866,
|
||||
"csisolatin2": iso8859_2,
|
||||
"iso-8859-2": iso8859_2,
|
||||
"iso-ir-101": iso8859_2,
|
||||
"iso8859-2": iso8859_2,
|
||||
"iso88592": iso8859_2,
|
||||
"iso_8859-2": iso8859_2,
|
||||
"iso_8859-2:1987": iso8859_2,
|
||||
"l2": iso8859_2,
|
||||
"latin2": iso8859_2,
|
||||
"csisolatin3": iso8859_3,
|
||||
"iso-8859-3": iso8859_3,
|
||||
"iso-ir-109": iso8859_3,
|
||||
"iso8859-3": iso8859_3,
|
||||
"iso88593": iso8859_3,
|
||||
"iso_8859-3": iso8859_3,
|
||||
"iso_8859-3:1988": iso8859_3,
|
||||
"l3": iso8859_3,
|
||||
"latin3": iso8859_3,
|
||||
"csisolatin4": iso8859_4,
|
||||
"iso-8859-4": iso8859_4,
|
||||
"iso-ir-110": iso8859_4,
|
||||
"iso8859-4": iso8859_4,
|
||||
"iso88594": iso8859_4,
|
||||
"iso_8859-4": iso8859_4,
|
||||
"iso_8859-4:1988": iso8859_4,
|
||||
"l4": iso8859_4,
|
||||
"latin4": iso8859_4,
|
||||
"csisolatincyrillic": iso8859_5,
|
||||
"cyrillic": iso8859_5,
|
||||
"iso-8859-5": iso8859_5,
|
||||
"iso-ir-144": iso8859_5,
|
||||
"iso8859-5": iso8859_5,
|
||||
"iso88595": iso8859_5,
|
||||
"iso_8859-5": iso8859_5,
|
||||
"iso_8859-5:1988": iso8859_5,
|
||||
"arabic": iso8859_6,
|
||||
"asmo-708": iso8859_6,
|
||||
"csiso88596e": iso8859_6,
|
||||
"csiso88596i": iso8859_6,
|
||||
"csisolatinarabic": iso8859_6,
|
||||
"ecma-114": iso8859_6,
|
||||
"iso-8859-6": iso8859_6,
|
||||
"iso-8859-6-e": iso8859_6,
|
||||
"iso-8859-6-i": iso8859_6,
|
||||
"iso-ir-127": iso8859_6,
|
||||
"iso8859-6": iso8859_6,
|
||||
"iso88596": iso8859_6,
|
||||
"iso_8859-6": iso8859_6,
|
||||
"iso_8859-6:1987": iso8859_6,
|
||||
"csisolatingreek": iso8859_7,
|
||||
"ecma-118": iso8859_7,
|
||||
"elot_928": iso8859_7,
|
||||
"greek": iso8859_7,
|
||||
"greek8": iso8859_7,
|
||||
"iso-8859-7": iso8859_7,
|
||||
"iso-ir-126": iso8859_7,
|
||||
"iso8859-7": iso8859_7,
|
||||
"iso88597": iso8859_7,
|
||||
"iso_8859-7": iso8859_7,
|
||||
"iso_8859-7:1987": iso8859_7,
|
||||
"sun_eu_greek": iso8859_7,
|
||||
"csiso88598e": iso8859_8,
|
||||
"csisolatinhebrew": iso8859_8,
|
||||
"hebrew": iso8859_8,
|
||||
"iso-8859-8": iso8859_8,
|
||||
"iso-8859-8-e": iso8859_8,
|
||||
"iso-ir-138": iso8859_8,
|
||||
"iso8859-8": iso8859_8,
|
||||
"iso88598": iso8859_8,
|
||||
"iso_8859-8": iso8859_8,
|
||||
"iso_8859-8:1988": iso8859_8,
|
||||
"visual": iso8859_8,
|
||||
"csiso88598i": iso8859_8I,
|
||||
"iso-8859-8-i": iso8859_8I,
|
||||
"logical": iso8859_8I,
|
||||
"csisolatin6": iso8859_10,
|
||||
"iso-8859-10": iso8859_10,
|
||||
"iso-ir-157": iso8859_10,
|
||||
"iso8859-10": iso8859_10,
|
||||
"iso885910": iso8859_10,
|
||||
"l6": iso8859_10,
|
||||
"latin6": iso8859_10,
|
||||
"iso-8859-13": iso8859_13,
|
||||
"iso8859-13": iso8859_13,
|
||||
"iso885913": iso8859_13,
|
||||
"iso-8859-14": iso8859_14,
|
||||
"iso8859-14": iso8859_14,
|
||||
"iso885914": iso8859_14,
|
||||
"csisolatin9": iso8859_15,
|
||||
"iso-8859-15": iso8859_15,
|
||||
"iso8859-15": iso8859_15,
|
||||
"iso885915": iso8859_15,
|
||||
"iso_8859-15": iso8859_15,
|
||||
"l9": iso8859_15,
|
||||
"iso-8859-16": iso8859_16,
|
||||
"cskoi8r": koi8r,
|
||||
"koi": koi8r,
|
||||
"koi8": koi8r,
|
||||
"koi8-r": koi8r,
|
||||
"koi8_r": koi8r,
|
||||
"koi8-ru": koi8u,
|
||||
"koi8-u": koi8u,
|
||||
"csmacintosh": macintosh,
|
||||
"mac": macintosh,
|
||||
"macintosh": macintosh,
|
||||
"x-mac-roman": macintosh,
|
||||
"dos-874": windows874,
|
||||
"iso-8859-11": windows874,
|
||||
"iso8859-11": windows874,
|
||||
"iso885911": windows874,
|
||||
"tis-620": windows874,
|
||||
"windows-874": windows874,
|
||||
"cp1250": windows1250,
|
||||
"windows-1250": windows1250,
|
||||
"x-cp1250": windows1250,
|
||||
"cp1251": windows1251,
|
||||
"windows-1251": windows1251,
|
||||
"x-cp1251": windows1251,
|
||||
"ansi_x3.4-1968": windows1252,
|
||||
"ascii": windows1252,
|
||||
"cp1252": windows1252,
|
||||
"cp819": windows1252,
|
||||
"csisolatin1": windows1252,
|
||||
"ibm819": windows1252,
|
||||
"iso-8859-1": windows1252,
|
||||
"iso-ir-100": windows1252,
|
||||
"iso8859-1": windows1252,
|
||||
"iso88591": windows1252,
|
||||
"iso_8859-1": windows1252,
|
||||
"iso_8859-1:1987": windows1252,
|
||||
"l1": windows1252,
|
||||
"latin1": windows1252,
|
||||
"us-ascii": windows1252,
|
||||
"windows-1252": windows1252,
|
||||
"x-cp1252": windows1252,
|
||||
"cp1253": windows1253,
|
||||
"windows-1253": windows1253,
|
||||
"x-cp1253": windows1253,
|
||||
"cp1254": windows1254,
|
||||
"csisolatin5": windows1254,
|
||||
"iso-8859-9": windows1254,
|
||||
"iso-ir-148": windows1254,
|
||||
"iso8859-9": windows1254,
|
||||
"iso88599": windows1254,
|
||||
"iso_8859-9": windows1254,
|
||||
"iso_8859-9:1989": windows1254,
|
||||
"l5": windows1254,
|
||||
"latin5": windows1254,
|
||||
"windows-1254": windows1254,
|
||||
"x-cp1254": windows1254,
|
||||
"cp1255": windows1255,
|
||||
"windows-1255": windows1255,
|
||||
"x-cp1255": windows1255,
|
||||
"cp1256": windows1256,
|
||||
"windows-1256": windows1256,
|
||||
"x-cp1256": windows1256,
|
||||
"cp1257": windows1257,
|
||||
"windows-1257": windows1257,
|
||||
"x-cp1257": windows1257,
|
||||
"cp1258": windows1258,
|
||||
"windows-1258": windows1258,
|
||||
"x-cp1258": windows1258,
|
||||
"x-mac-cyrillic": macintoshCyrillic,
|
||||
"x-mac-ukrainian": macintoshCyrillic,
|
||||
"chinese": gbk,
|
||||
"csgb2312": gbk,
|
||||
"csiso58gb231280": gbk,
|
||||
"gb2312": gbk,
|
||||
"gb_2312": gbk,
|
||||
"gb_2312-80": gbk,
|
||||
"gbk": gbk,
|
||||
"iso-ir-58": gbk,
|
||||
"x-gbk": gbk,
|
||||
"gb18030": gb18030,
|
||||
"big5": big5,
|
||||
"big5-hkscs": big5,
|
||||
"cn-big5": big5,
|
||||
"csbig5": big5,
|
||||
"x-x-big5": big5,
|
||||
"cseucpkdfmtjapanese": eucjp,
|
||||
"euc-jp": eucjp,
|
||||
"x-euc-jp": eucjp,
|
||||
"csiso2022jp": iso2022jp,
|
||||
"iso-2022-jp": iso2022jp,
|
||||
"csshiftjis": shiftJIS,
|
||||
"ms932": shiftJIS,
|
||||
"ms_kanji": shiftJIS,
|
||||
"shift-jis": shiftJIS,
|
||||
"shift_jis": shiftJIS,
|
||||
"sjis": shiftJIS,
|
||||
"windows-31j": shiftJIS,
|
||||
"x-sjis": shiftJIS,
|
||||
"cseuckr": euckr,
|
||||
"csksc56011987": euckr,
|
||||
"euc-kr": euckr,
|
||||
"iso-ir-149": euckr,
|
||||
"korean": euckr,
|
||||
"ks_c_5601-1987": euckr,
|
||||
"ks_c_5601-1989": euckr,
|
||||
"ksc5601": euckr,
|
||||
"ksc_5601": euckr,
|
||||
"windows-949": euckr,
|
||||
"csiso2022kr": replacement,
|
||||
"hz-gb-2312": replacement,
|
||||
"iso-2022-cn": replacement,
|
||||
"iso-2022-cn-ext": replacement,
|
||||
"iso-2022-kr": replacement,
|
||||
"replacement": replacement,
|
||||
"utf-16be": utf16be,
|
||||
"utf-16": utf16le,
|
||||
"utf-16le": utf16le,
|
||||
"x-user-defined": xUserDefined,
|
||||
}
|
||||
|
||||
var localeMap = []htmlEncoding{
|
||||
windows1252, // und_Latn
|
||||
windows1256, // ar
|
||||
windows1251, // ba
|
||||
windows1251, // be
|
||||
windows1251, // bg
|
||||
windows1250, // cs
|
||||
iso8859_7, // el
|
||||
windows1257, // et
|
||||
windows1256, // fa
|
||||
windows1255, // he
|
||||
windows1250, // hr
|
||||
iso8859_2, // hu
|
||||
shiftJIS, // ja
|
||||
windows1251, // kk
|
||||
euckr, // ko
|
||||
windows1254, // ku
|
||||
windows1251, // ky
|
||||
windows1257, // lt
|
||||
windows1257, // lv
|
||||
windows1251, // mk
|
||||
iso8859_2, // pl
|
||||
windows1251, // ru
|
||||
windows1251, // sah
|
||||
windows1250, // sk
|
||||
iso8859_2, // sl
|
||||
windows1251, // sr
|
||||
windows1251, // tg
|
||||
windows874, // th
|
||||
windows1254, // tr
|
||||
windows1251, // tt
|
||||
windows1251, // uk
|
||||
windows1258, // vi
|
||||
gb18030, // zh-hans
|
||||
big5, // zh-hant
|
||||
}
|
||||
|
||||
const locales = "und_Latn ar ba be bg cs el et fa he hr hu ja kk ko ku ky lt lv mk pl ru sah sk sl sr tg th tr tt uk vi zh-hans zh-hant"
|
||||
142
vendor/golang.org/x/text/encoding/internal/identifier/gen.go
generated
vendored
Normal file
142
vendor/golang.org/x/text/encoding/internal/identifier/gen.go
generated
vendored
Normal file
@@ -0,0 +1,142 @@
|
||||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/xml"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/text/internal/gen"
|
||||
)
|
||||
|
||||
type registry struct {
|
||||
XMLName xml.Name `xml:"registry"`
|
||||
Updated string `xml:"updated"`
|
||||
Registry []struct {
|
||||
ID string `xml:"id,attr"`
|
||||
Record []struct {
|
||||
Name string `xml:"name"`
|
||||
Xref []struct {
|
||||
Type string `xml:"type,attr"`
|
||||
Data string `xml:"data,attr"`
|
||||
} `xml:"xref"`
|
||||
Desc struct {
|
||||
Data string `xml:",innerxml"`
|
||||
// Any []struct {
|
||||
// Data string `xml:",chardata"`
|
||||
// } `xml:",any"`
|
||||
// Data string `xml:",chardata"`
|
||||
} `xml:"description,"`
|
||||
MIB string `xml:"value"`
|
||||
Alias []string `xml:"alias"`
|
||||
MIME string `xml:"preferred_alias"`
|
||||
} `xml:"record"`
|
||||
} `xml:"registry"`
|
||||
}
|
||||
|
||||
func main() {
|
||||
r := gen.OpenIANAFile("assignments/character-sets/character-sets.xml")
|
||||
reg := ®istry{}
|
||||
if err := xml.NewDecoder(r).Decode(®); err != nil && err != io.EOF {
|
||||
log.Fatalf("Error decoding charset registry: %v", err)
|
||||
}
|
||||
if len(reg.Registry) == 0 || reg.Registry[0].ID != "character-sets-1" {
|
||||
log.Fatalf("Unexpected ID %s", reg.Registry[0].ID)
|
||||
}
|
||||
|
||||
w := &bytes.Buffer{}
|
||||
fmt.Fprintf(w, "const (\n")
|
||||
for _, rec := range reg.Registry[0].Record {
|
||||
constName := ""
|
||||
for _, a := range rec.Alias {
|
||||
if strings.HasPrefix(a, "cs") && strings.IndexByte(a, '-') == -1 {
|
||||
// Some of the constant definitions have comments in them. Strip those.
|
||||
constName = strings.Title(strings.SplitN(a[2:], "\n", 2)[0])
|
||||
}
|
||||
}
|
||||
if constName == "" {
|
||||
switch rec.MIB {
|
||||
case "2085":
|
||||
constName = "HZGB2312" // Not listed as alias for some reason.
|
||||
default:
|
||||
log.Fatalf("No cs alias defined for %s.", rec.MIB)
|
||||
}
|
||||
}
|
||||
if rec.MIME != "" {
|
||||
rec.MIME = fmt.Sprintf(" (MIME: %s)", rec.MIME)
|
||||
}
|
||||
fmt.Fprintf(w, "// %s is the MIB identifier with IANA name %s%s.\n//\n", constName, rec.Name, rec.MIME)
|
||||
if len(rec.Desc.Data) > 0 {
|
||||
fmt.Fprint(w, "// ")
|
||||
d := xml.NewDecoder(strings.NewReader(rec.Desc.Data))
|
||||
inElem := true
|
||||
attr := ""
|
||||
for {
|
||||
t, err := d.Token()
|
||||
if err != nil {
|
||||
if err != io.EOF {
|
||||
log.Fatal(err)
|
||||
}
|
||||
break
|
||||
}
|
||||
switch x := t.(type) {
|
||||
case xml.CharData:
|
||||
attr = "" // Don't need attribute info.
|
||||
a := bytes.Split([]byte(x), []byte("\n"))
|
||||
for i, b := range a {
|
||||
if b = bytes.TrimSpace(b); len(b) != 0 {
|
||||
if !inElem && i > 0 {
|
||||
fmt.Fprint(w, "\n// ")
|
||||
}
|
||||
inElem = false
|
||||
fmt.Fprintf(w, "%s ", string(b))
|
||||
}
|
||||
}
|
||||
case xml.StartElement:
|
||||
if x.Name.Local == "xref" {
|
||||
inElem = true
|
||||
use := false
|
||||
for _, a := range x.Attr {
|
||||
if a.Name.Local == "type" {
|
||||
use = use || a.Value != "person"
|
||||
}
|
||||
if a.Name.Local == "data" && use {
|
||||
// Patch up URLs to use https. From some links, the
|
||||
// https version is different from the http one.
|
||||
s := a.Value
|
||||
s = strings.Replace(s, "http://", "https://", -1)
|
||||
s = strings.Replace(s, "/unicode/", "/", -1)
|
||||
attr = s + " "
|
||||
}
|
||||
}
|
||||
}
|
||||
case xml.EndElement:
|
||||
inElem = false
|
||||
fmt.Fprint(w, attr)
|
||||
}
|
||||
}
|
||||
fmt.Fprint(w, "\n")
|
||||
}
|
||||
for _, x := range rec.Xref {
|
||||
switch x.Type {
|
||||
case "rfc":
|
||||
fmt.Fprintf(w, "// Reference: %s\n", strings.ToUpper(x.Data))
|
||||
case "uri":
|
||||
fmt.Fprintf(w, "// Reference: %s\n", x.Data)
|
||||
}
|
||||
}
|
||||
fmt.Fprintf(w, "%s MIB = %s\n", constName, rec.MIB)
|
||||
fmt.Fprintln(w)
|
||||
}
|
||||
fmt.Fprintln(w, ")")
|
||||
|
||||
gen.WriteGoFile("mib.go", "identifier", w.Bytes())
|
||||
}
|
||||
81
vendor/golang.org/x/text/encoding/internal/identifier/identifier.go
generated
vendored
Normal file
81
vendor/golang.org/x/text/encoding/internal/identifier/identifier.go
generated
vendored
Normal file
@@ -0,0 +1,81 @@
|
||||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:generate go run gen.go
|
||||
|
||||
// Package identifier defines the contract between implementations of Encoding
|
||||
// and Index by defining identifiers that uniquely identify standardized coded
|
||||
// character sets (CCS) and character encoding schemes (CES), which we will
|
||||
// together refer to as encodings, for which Encoding implementations provide
|
||||
// converters to and from UTF-8. This package is typically only of concern to
|
||||
// implementers of Indexes and Encodings.
|
||||
//
|
||||
// One part of the identifier is the MIB code, which is defined by IANA and
|
||||
// uniquely identifies a CCS or CES. Each code is associated with data that
|
||||
// references authorities, official documentation as well as aliases and MIME
|
||||
// names.
|
||||
//
|
||||
// Not all CESs are covered by the IANA registry. The "other" string that is
|
||||
// returned by ID can be used to identify other character sets or versions of
|
||||
// existing ones.
|
||||
//
|
||||
// It is recommended that each package that provides a set of Encodings provide
|
||||
// the All and Common variables to reference all supported encodings and
|
||||
// commonly used subset. This allows Index implementations to include all
|
||||
// available encodings without explicitly referencing or knowing about them.
|
||||
package identifier
|
||||
|
||||
// Note: this package is internal, but could be made public if there is a need
|
||||
// for writing third-party Indexes and Encodings.
|
||||
|
||||
// References:
|
||||
// - http://source.icu-project.org/repos/icu/icu/trunk/source/data/mappings/convrtrs.txt
|
||||
// - http://www.iana.org/assignments/character-sets/character-sets.xhtml
|
||||
// - http://www.iana.org/assignments/ianacharset-mib/ianacharset-mib
|
||||
// - http://www.ietf.org/rfc/rfc2978.txt
|
||||
// - https://www.unicode.org/reports/tr22/
|
||||
// - http://www.w3.org/TR/encoding/
|
||||
// - https://encoding.spec.whatwg.org/
|
||||
// - https://encoding.spec.whatwg.org/encodings.json
|
||||
// - https://tools.ietf.org/html/rfc6657#section-5
|
||||
|
||||
// Interface can be implemented by Encodings to define the CCS or CES for which
|
||||
// it implements conversions.
|
||||
type Interface interface {
|
||||
// ID returns an encoding identifier. Exactly one of the mib and other
|
||||
// values should be non-zero.
|
||||
//
|
||||
// In the usual case it is only necessary to indicate the MIB code. The
|
||||
// other string can be used to specify encodings for which there is no MIB,
|
||||
// such as "x-mac-dingbat".
|
||||
//
|
||||
// The other string may only contain the characters a-z, A-Z, 0-9, - and _.
|
||||
ID() (mib MIB, other string)
|
||||
|
||||
// NOTE: the restrictions on the encoding are to allow extending the syntax
|
||||
// with additional information such as versions, vendors and other variants.
|
||||
}
|
||||
|
||||
// A MIB identifies an encoding. It is derived from the IANA MIB codes and adds
|
||||
// some identifiers for some encodings that are not covered by the IANA
|
||||
// standard.
|
||||
//
|
||||
// See http://www.iana.org/assignments/ianacharset-mib.
|
||||
type MIB uint16
|
||||
|
||||
// These additional MIB types are not defined in IANA. They are added because
|
||||
// they are common and defined within the text repo.
|
||||
const (
|
||||
// Unofficial marks the start of encodings not registered by IANA.
|
||||
Unofficial MIB = 10000 + iota
|
||||
|
||||
// Replacement is the WhatWG replacement encoding.
|
||||
Replacement
|
||||
|
||||
// XUserDefined is the code for x-user-defined.
|
||||
XUserDefined
|
||||
|
||||
// MacintoshCyrillic is the code for x-mac-cyrillic.
|
||||
MacintoshCyrillic
|
||||
)
|
||||
1619
vendor/golang.org/x/text/encoding/internal/identifier/mib.go
generated
vendored
Normal file
1619
vendor/golang.org/x/text/encoding/internal/identifier/mib.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load Diff
75
vendor/golang.org/x/text/encoding/internal/internal.go
generated
vendored
Normal file
75
vendor/golang.org/x/text/encoding/internal/internal.go
generated
vendored
Normal file
@@ -0,0 +1,75 @@
|
||||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package internal contains code that is shared among encoding implementations.
|
||||
package internal
|
||||
|
||||
import (
|
||||
"golang.org/x/text/encoding"
|
||||
"golang.org/x/text/encoding/internal/identifier"
|
||||
"golang.org/x/text/transform"
|
||||
)
|
||||
|
||||
// Encoding is an implementation of the Encoding interface that adds the String
|
||||
// and ID methods to an existing encoding.
|
||||
type Encoding struct {
|
||||
encoding.Encoding
|
||||
Name string
|
||||
MIB identifier.MIB
|
||||
}
|
||||
|
||||
// _ verifies that Encoding implements identifier.Interface.
|
||||
var _ identifier.Interface = (*Encoding)(nil)
|
||||
|
||||
func (e *Encoding) String() string {
|
||||
return e.Name
|
||||
}
|
||||
|
||||
func (e *Encoding) ID() (mib identifier.MIB, other string) {
|
||||
return e.MIB, ""
|
||||
}
|
||||
|
||||
// SimpleEncoding is an Encoding that combines two Transformers.
|
||||
type SimpleEncoding struct {
|
||||
Decoder transform.Transformer
|
||||
Encoder transform.Transformer
|
||||
}
|
||||
|
||||
func (e *SimpleEncoding) NewDecoder() *encoding.Decoder {
|
||||
return &encoding.Decoder{Transformer: e.Decoder}
|
||||
}
|
||||
|
||||
func (e *SimpleEncoding) NewEncoder() *encoding.Encoder {
|
||||
return &encoding.Encoder{Transformer: e.Encoder}
|
||||
}
|
||||
|
||||
// FuncEncoding is an Encoding that combines two functions returning a new
|
||||
// Transformer.
|
||||
type FuncEncoding struct {
|
||||
Decoder func() transform.Transformer
|
||||
Encoder func() transform.Transformer
|
||||
}
|
||||
|
||||
func (e FuncEncoding) NewDecoder() *encoding.Decoder {
|
||||
return &encoding.Decoder{Transformer: e.Decoder()}
|
||||
}
|
||||
|
||||
func (e FuncEncoding) NewEncoder() *encoding.Encoder {
|
||||
return &encoding.Encoder{Transformer: e.Encoder()}
|
||||
}
|
||||
|
||||
// A RepertoireError indicates a rune is not in the repertoire of a destination
|
||||
// encoding. It is associated with an encoding-specific suggested replacement
|
||||
// byte.
|
||||
type RepertoireError byte
|
||||
|
||||
// Error implements the error interrface.
|
||||
func (r RepertoireError) Error() string {
|
||||
return "encoding: rune not supported by encoding."
|
||||
}
|
||||
|
||||
// Replacement returns the replacement string associated with this error.
|
||||
func (r RepertoireError) Replacement() byte { return byte(r) }
|
||||
|
||||
var ErrASCIIReplacement = RepertoireError(encoding.ASCIISub)
|
||||
12
vendor/golang.org/x/text/encoding/japanese/all.go
generated
vendored
Normal file
12
vendor/golang.org/x/text/encoding/japanese/all.go
generated
vendored
Normal file
@@ -0,0 +1,12 @@
|
||||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package japanese
|
||||
|
||||
import (
|
||||
"golang.org/x/text/encoding"
|
||||
)
|
||||
|
||||
// All is a list of all defined encodings in this package.
|
||||
var All = []encoding.Encoding{EUCJP, ISO2022JP, ShiftJIS}
|
||||
225
vendor/golang.org/x/text/encoding/japanese/eucjp.go
generated
vendored
Normal file
225
vendor/golang.org/x/text/encoding/japanese/eucjp.go
generated
vendored
Normal file
@@ -0,0 +1,225 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package japanese
|
||||
|
||||
import (
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/text/encoding"
|
||||
"golang.org/x/text/encoding/internal"
|
||||
"golang.org/x/text/encoding/internal/identifier"
|
||||
"golang.org/x/text/transform"
|
||||
)
|
||||
|
||||
// EUCJP is the EUC-JP encoding.
|
||||
var EUCJP encoding.Encoding = &eucJP
|
||||
|
||||
var eucJP = internal.Encoding{
|
||||
&internal.SimpleEncoding{eucJPDecoder{}, eucJPEncoder{}},
|
||||
"EUC-JP",
|
||||
identifier.EUCPkdFmtJapanese,
|
||||
}
|
||||
|
||||
type eucJPDecoder struct{ transform.NopResetter }
|
||||
|
||||
// See https://encoding.spec.whatwg.org/#euc-jp-decoder.
|
||||
func (eucJPDecoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
r, size := rune(0), 0
|
||||
loop:
|
||||
for ; nSrc < len(src); nSrc += size {
|
||||
switch c0 := src[nSrc]; {
|
||||
case c0 < utf8.RuneSelf:
|
||||
r, size = rune(c0), 1
|
||||
|
||||
case c0 == 0x8e:
|
||||
if nSrc+1 >= len(src) {
|
||||
if !atEOF {
|
||||
err = transform.ErrShortSrc
|
||||
break loop
|
||||
}
|
||||
r, size = utf8.RuneError, 1
|
||||
break
|
||||
}
|
||||
c1 := src[nSrc+1]
|
||||
switch {
|
||||
case c1 < 0xa1:
|
||||
r, size = utf8.RuneError, 1
|
||||
case c1 > 0xdf:
|
||||
r, size = utf8.RuneError, 2
|
||||
if c1 == 0xff {
|
||||
size = 1
|
||||
}
|
||||
default:
|
||||
r, size = rune(c1)+(0xff61-0xa1), 2
|
||||
}
|
||||
case c0 == 0x8f:
|
||||
if nSrc+2 >= len(src) {
|
||||
if !atEOF {
|
||||
err = transform.ErrShortSrc
|
||||
break loop
|
||||
}
|
||||
r, size = utf8.RuneError, 1
|
||||
if p := nSrc + 1; p < len(src) && 0xa1 <= src[p] && src[p] < 0xfe {
|
||||
size = 2
|
||||
}
|
||||
break
|
||||
}
|
||||
c1 := src[nSrc+1]
|
||||
if c1 < 0xa1 || 0xfe < c1 {
|
||||
r, size = utf8.RuneError, 1
|
||||
break
|
||||
}
|
||||
c2 := src[nSrc+2]
|
||||
if c2 < 0xa1 || 0xfe < c2 {
|
||||
r, size = utf8.RuneError, 2
|
||||
break
|
||||
}
|
||||
r, size = utf8.RuneError, 3
|
||||
if i := int(c1-0xa1)*94 + int(c2-0xa1); i < len(jis0212Decode) {
|
||||
r = rune(jis0212Decode[i])
|
||||
if r == 0 {
|
||||
r = utf8.RuneError
|
||||
}
|
||||
}
|
||||
|
||||
case 0xa1 <= c0 && c0 <= 0xfe:
|
||||
if nSrc+1 >= len(src) {
|
||||
if !atEOF {
|
||||
err = transform.ErrShortSrc
|
||||
break loop
|
||||
}
|
||||
r, size = utf8.RuneError, 1
|
||||
break
|
||||
}
|
||||
c1 := src[nSrc+1]
|
||||
if c1 < 0xa1 || 0xfe < c1 {
|
||||
r, size = utf8.RuneError, 1
|
||||
break
|
||||
}
|
||||
r, size = utf8.RuneError, 2
|
||||
if i := int(c0-0xa1)*94 + int(c1-0xa1); i < len(jis0208Decode) {
|
||||
r = rune(jis0208Decode[i])
|
||||
if r == 0 {
|
||||
r = utf8.RuneError
|
||||
}
|
||||
}
|
||||
|
||||
default:
|
||||
r, size = utf8.RuneError, 1
|
||||
}
|
||||
|
||||
if nDst+utf8.RuneLen(r) > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break loop
|
||||
}
|
||||
nDst += utf8.EncodeRune(dst[nDst:], r)
|
||||
}
|
||||
return nDst, nSrc, err
|
||||
}
|
||||
|
||||
type eucJPEncoder struct{ transform.NopResetter }
|
||||
|
||||
func (eucJPEncoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
r, size := rune(0), 0
|
||||
for ; nSrc < len(src); nSrc += size {
|
||||
r = rune(src[nSrc])
|
||||
|
||||
// Decode a 1-byte rune.
|
||||
if r < utf8.RuneSelf {
|
||||
size = 1
|
||||
|
||||
} else {
|
||||
// Decode a multi-byte rune.
|
||||
r, size = utf8.DecodeRune(src[nSrc:])
|
||||
if size == 1 {
|
||||
// All valid runes of size 1 (those below utf8.RuneSelf) were
|
||||
// handled above. We have invalid UTF-8 or we haven't seen the
|
||||
// full character yet.
|
||||
if !atEOF && !utf8.FullRune(src[nSrc:]) {
|
||||
err = transform.ErrShortSrc
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// func init checks that the switch covers all tables.
|
||||
switch {
|
||||
case encode0Low <= r && r < encode0High:
|
||||
if r = rune(encode0[r-encode0Low]); r != 0 {
|
||||
goto write2or3
|
||||
}
|
||||
case encode1Low <= r && r < encode1High:
|
||||
if r = rune(encode1[r-encode1Low]); r != 0 {
|
||||
goto write2or3
|
||||
}
|
||||
case encode2Low <= r && r < encode2High:
|
||||
if r = rune(encode2[r-encode2Low]); r != 0 {
|
||||
goto write2or3
|
||||
}
|
||||
case encode3Low <= r && r < encode3High:
|
||||
if r = rune(encode3[r-encode3Low]); r != 0 {
|
||||
goto write2or3
|
||||
}
|
||||
case encode4Low <= r && r < encode4High:
|
||||
if r = rune(encode4[r-encode4Low]); r != 0 {
|
||||
goto write2or3
|
||||
}
|
||||
case encode5Low <= r && r < encode5High:
|
||||
if 0xff61 <= r && r < 0xffa0 {
|
||||
goto write2
|
||||
}
|
||||
if r = rune(encode5[r-encode5Low]); r != 0 {
|
||||
goto write2or3
|
||||
}
|
||||
}
|
||||
err = internal.ErrASCIIReplacement
|
||||
break
|
||||
}
|
||||
|
||||
if nDst >= len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
dst[nDst] = uint8(r)
|
||||
nDst++
|
||||
continue
|
||||
|
||||
write2or3:
|
||||
if r>>tableShift == jis0208 {
|
||||
if nDst+2 > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
} else {
|
||||
if nDst+3 > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
dst[nDst] = 0x8f
|
||||
nDst++
|
||||
}
|
||||
dst[nDst+0] = 0xa1 + uint8(r>>codeShift)&codeMask
|
||||
dst[nDst+1] = 0xa1 + uint8(r)&codeMask
|
||||
nDst += 2
|
||||
continue
|
||||
|
||||
write2:
|
||||
if nDst+2 > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
dst[nDst+0] = 0x8e
|
||||
dst[nDst+1] = uint8(r - (0xff61 - 0xa1))
|
||||
nDst += 2
|
||||
continue
|
||||
}
|
||||
return nDst, nSrc, err
|
||||
}
|
||||
|
||||
func init() {
|
||||
// Check that the hard-coded encode switch covers all tables.
|
||||
if numEncodeTables != 6 {
|
||||
panic("bad numEncodeTables")
|
||||
}
|
||||
}
|
||||
299
vendor/golang.org/x/text/encoding/japanese/iso2022jp.go
generated
vendored
Normal file
299
vendor/golang.org/x/text/encoding/japanese/iso2022jp.go
generated
vendored
Normal file
@@ -0,0 +1,299 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package japanese
|
||||
|
||||
import (
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/text/encoding"
|
||||
"golang.org/x/text/encoding/internal"
|
||||
"golang.org/x/text/encoding/internal/identifier"
|
||||
"golang.org/x/text/transform"
|
||||
)
|
||||
|
||||
// ISO2022JP is the ISO-2022-JP encoding.
|
||||
var ISO2022JP encoding.Encoding = &iso2022JP
|
||||
|
||||
var iso2022JP = internal.Encoding{
|
||||
internal.FuncEncoding{iso2022JPNewDecoder, iso2022JPNewEncoder},
|
||||
"ISO-2022-JP",
|
||||
identifier.ISO2022JP,
|
||||
}
|
||||
|
||||
func iso2022JPNewDecoder() transform.Transformer {
|
||||
return new(iso2022JPDecoder)
|
||||
}
|
||||
|
||||
func iso2022JPNewEncoder() transform.Transformer {
|
||||
return new(iso2022JPEncoder)
|
||||
}
|
||||
|
||||
const (
|
||||
asciiState = iota
|
||||
katakanaState
|
||||
jis0208State
|
||||
jis0212State
|
||||
)
|
||||
|
||||
const asciiEsc = 0x1b
|
||||
|
||||
type iso2022JPDecoder int
|
||||
|
||||
func (d *iso2022JPDecoder) Reset() {
|
||||
*d = asciiState
|
||||
}
|
||||
|
||||
func (d *iso2022JPDecoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
r, size := rune(0), 0
|
||||
for ; nSrc < len(src); nSrc += size {
|
||||
c0 := src[nSrc]
|
||||
if c0 >= utf8.RuneSelf {
|
||||
r, size = '\ufffd', 1
|
||||
goto write
|
||||
}
|
||||
|
||||
if c0 == asciiEsc {
|
||||
if nSrc+2 >= len(src) {
|
||||
if !atEOF {
|
||||
return nDst, nSrc, transform.ErrShortSrc
|
||||
}
|
||||
// TODO: is it correct to only skip 1??
|
||||
r, size = '\ufffd', 1
|
||||
goto write
|
||||
}
|
||||
size = 3
|
||||
c1 := src[nSrc+1]
|
||||
c2 := src[nSrc+2]
|
||||
switch {
|
||||
case c1 == '$' && (c2 == '@' || c2 == 'B'): // 0x24 {0x40, 0x42}
|
||||
*d = jis0208State
|
||||
continue
|
||||
case c1 == '$' && c2 == '(': // 0x24 0x28
|
||||
if nSrc+3 >= len(src) {
|
||||
if !atEOF {
|
||||
return nDst, nSrc, transform.ErrShortSrc
|
||||
}
|
||||
r, size = '\ufffd', 1
|
||||
goto write
|
||||
}
|
||||
size = 4
|
||||
if src[nSrc+3] == 'D' {
|
||||
*d = jis0212State
|
||||
continue
|
||||
}
|
||||
case c1 == '(' && (c2 == 'B' || c2 == 'J'): // 0x28 {0x42, 0x4A}
|
||||
*d = asciiState
|
||||
continue
|
||||
case c1 == '(' && c2 == 'I': // 0x28 0x49
|
||||
*d = katakanaState
|
||||
continue
|
||||
}
|
||||
r, size = '\ufffd', 1
|
||||
goto write
|
||||
}
|
||||
|
||||
switch *d {
|
||||
case asciiState:
|
||||
r, size = rune(c0), 1
|
||||
|
||||
case katakanaState:
|
||||
if c0 < 0x21 || 0x60 <= c0 {
|
||||
r, size = '\ufffd', 1
|
||||
goto write
|
||||
}
|
||||
r, size = rune(c0)+(0xff61-0x21), 1
|
||||
|
||||
default:
|
||||
if c0 == 0x0a {
|
||||
*d = asciiState
|
||||
r, size = rune(c0), 1
|
||||
goto write
|
||||
}
|
||||
if nSrc+1 >= len(src) {
|
||||
if !atEOF {
|
||||
return nDst, nSrc, transform.ErrShortSrc
|
||||
}
|
||||
r, size = '\ufffd', 1
|
||||
goto write
|
||||
}
|
||||
size = 2
|
||||
c1 := src[nSrc+1]
|
||||
i := int(c0-0x21)*94 + int(c1-0x21)
|
||||
if *d == jis0208State && i < len(jis0208Decode) {
|
||||
r = rune(jis0208Decode[i])
|
||||
} else if *d == jis0212State && i < len(jis0212Decode) {
|
||||
r = rune(jis0212Decode[i])
|
||||
} else {
|
||||
r = '\ufffd'
|
||||
goto write
|
||||
}
|
||||
if r == 0 {
|
||||
r = '\ufffd'
|
||||
}
|
||||
}
|
||||
|
||||
write:
|
||||
if nDst+utf8.RuneLen(r) > len(dst) {
|
||||
return nDst, nSrc, transform.ErrShortDst
|
||||
}
|
||||
nDst += utf8.EncodeRune(dst[nDst:], r)
|
||||
}
|
||||
return nDst, nSrc, err
|
||||
}
|
||||
|
||||
type iso2022JPEncoder int
|
||||
|
||||
func (e *iso2022JPEncoder) Reset() {
|
||||
*e = asciiState
|
||||
}
|
||||
|
||||
func (e *iso2022JPEncoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
r, size := rune(0), 0
|
||||
for ; nSrc < len(src); nSrc += size {
|
||||
r = rune(src[nSrc])
|
||||
|
||||
// Decode a 1-byte rune.
|
||||
if r < utf8.RuneSelf {
|
||||
size = 1
|
||||
|
||||
} else {
|
||||
// Decode a multi-byte rune.
|
||||
r, size = utf8.DecodeRune(src[nSrc:])
|
||||
if size == 1 {
|
||||
// All valid runes of size 1 (those below utf8.RuneSelf) were
|
||||
// handled above. We have invalid UTF-8 or we haven't seen the
|
||||
// full character yet.
|
||||
if !atEOF && !utf8.FullRune(src[nSrc:]) {
|
||||
err = transform.ErrShortSrc
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// func init checks that the switch covers all tables.
|
||||
//
|
||||
// http://encoding.spec.whatwg.org/#iso-2022-jp says that "the index jis0212
|
||||
// is not used by the iso-2022-jp encoder due to lack of widespread support".
|
||||
//
|
||||
// TODO: do we have to special-case U+00A5 and U+203E, as per
|
||||
// http://encoding.spec.whatwg.org/#iso-2022-jp
|
||||
// Doing so would mean that "\u00a5" would not be preserved
|
||||
// after an encode-decode round trip.
|
||||
switch {
|
||||
case encode0Low <= r && r < encode0High:
|
||||
if r = rune(encode0[r-encode0Low]); r>>tableShift == jis0208 {
|
||||
goto writeJIS
|
||||
}
|
||||
case encode1Low <= r && r < encode1High:
|
||||
if r = rune(encode1[r-encode1Low]); r>>tableShift == jis0208 {
|
||||
goto writeJIS
|
||||
}
|
||||
case encode2Low <= r && r < encode2High:
|
||||
if r = rune(encode2[r-encode2Low]); r>>tableShift == jis0208 {
|
||||
goto writeJIS
|
||||
}
|
||||
case encode3Low <= r && r < encode3High:
|
||||
if r = rune(encode3[r-encode3Low]); r>>tableShift == jis0208 {
|
||||
goto writeJIS
|
||||
}
|
||||
case encode4Low <= r && r < encode4High:
|
||||
if r = rune(encode4[r-encode4Low]); r>>tableShift == jis0208 {
|
||||
goto writeJIS
|
||||
}
|
||||
case encode5Low <= r && r < encode5High:
|
||||
if 0xff61 <= r && r < 0xffa0 {
|
||||
goto writeKatakana
|
||||
}
|
||||
if r = rune(encode5[r-encode5Low]); r>>tableShift == jis0208 {
|
||||
goto writeJIS
|
||||
}
|
||||
}
|
||||
|
||||
// Switch back to ASCII state in case of error so that an ASCII
|
||||
// replacement character can be written in the correct state.
|
||||
if *e != asciiState {
|
||||
if nDst+3 > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
*e = asciiState
|
||||
dst[nDst+0] = asciiEsc
|
||||
dst[nDst+1] = '('
|
||||
dst[nDst+2] = 'B'
|
||||
nDst += 3
|
||||
}
|
||||
err = internal.ErrASCIIReplacement
|
||||
break
|
||||
}
|
||||
|
||||
if *e != asciiState {
|
||||
if nDst+4 > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
*e = asciiState
|
||||
dst[nDst+0] = asciiEsc
|
||||
dst[nDst+1] = '('
|
||||
dst[nDst+2] = 'B'
|
||||
nDst += 3
|
||||
} else if nDst >= len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
dst[nDst] = uint8(r)
|
||||
nDst++
|
||||
continue
|
||||
|
||||
writeJIS:
|
||||
if *e != jis0208State {
|
||||
if nDst+5 > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
*e = jis0208State
|
||||
dst[nDst+0] = asciiEsc
|
||||
dst[nDst+1] = '$'
|
||||
dst[nDst+2] = 'B'
|
||||
nDst += 3
|
||||
} else if nDst+2 > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
dst[nDst+0] = 0x21 + uint8(r>>codeShift)&codeMask
|
||||
dst[nDst+1] = 0x21 + uint8(r)&codeMask
|
||||
nDst += 2
|
||||
continue
|
||||
|
||||
writeKatakana:
|
||||
if *e != katakanaState {
|
||||
if nDst+4 > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
*e = katakanaState
|
||||
dst[nDst+0] = asciiEsc
|
||||
dst[nDst+1] = '('
|
||||
dst[nDst+2] = 'I'
|
||||
nDst += 3
|
||||
} else if nDst >= len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
dst[nDst] = uint8(r - (0xff61 - 0x21))
|
||||
nDst++
|
||||
continue
|
||||
}
|
||||
if atEOF && err == nil && *e != asciiState {
|
||||
if nDst+3 > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
} else {
|
||||
*e = asciiState
|
||||
dst[nDst+0] = asciiEsc
|
||||
dst[nDst+1] = '('
|
||||
dst[nDst+2] = 'B'
|
||||
nDst += 3
|
||||
}
|
||||
}
|
||||
return nDst, nSrc, err
|
||||
}
|
||||
161
vendor/golang.org/x/text/encoding/japanese/maketables.go
generated
vendored
Normal file
161
vendor/golang.org/x/text/encoding/japanese/maketables.go
generated
vendored
Normal file
@@ -0,0 +1,161 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
package main
|
||||
|
||||
// This program generates tables.go:
|
||||
// go run maketables.go | gofmt > tables.go
|
||||
|
||||
// TODO: Emoji extensions?
|
||||
// https://www.unicode.org/faq/emoji_dingbats.html
|
||||
// https://www.unicode.org/Public/UNIDATA/EmojiSources.txt
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"log"
|
||||
"net/http"
|
||||
"sort"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type entry struct {
|
||||
jisCode, table int
|
||||
}
|
||||
|
||||
func main() {
|
||||
fmt.Printf("// generated by go run maketables.go; DO NOT EDIT\n\n")
|
||||
fmt.Printf("// Package japanese provides Japanese encodings such as EUC-JP and Shift JIS.\n")
|
||||
fmt.Printf(`package japanese // import "golang.org/x/text/encoding/japanese"` + "\n\n")
|
||||
|
||||
reverse := [65536]entry{}
|
||||
for i := range reverse {
|
||||
reverse[i].table = -1
|
||||
}
|
||||
|
||||
tables := []struct {
|
||||
url string
|
||||
name string
|
||||
}{
|
||||
{"http://encoding.spec.whatwg.org/index-jis0208.txt", "0208"},
|
||||
{"http://encoding.spec.whatwg.org/index-jis0212.txt", "0212"},
|
||||
}
|
||||
for i, table := range tables {
|
||||
res, err := http.Get(table.url)
|
||||
if err != nil {
|
||||
log.Fatalf("%q: Get: %v", table.url, err)
|
||||
}
|
||||
defer res.Body.Close()
|
||||
|
||||
mapping := [65536]uint16{}
|
||||
|
||||
scanner := bufio.NewScanner(res.Body)
|
||||
for scanner.Scan() {
|
||||
s := strings.TrimSpace(scanner.Text())
|
||||
if s == "" || s[0] == '#' {
|
||||
continue
|
||||
}
|
||||
x, y := 0, uint16(0)
|
||||
if _, err := fmt.Sscanf(s, "%d 0x%x", &x, &y); err != nil {
|
||||
log.Fatalf("%q: could not parse %q", table.url, s)
|
||||
}
|
||||
if x < 0 || 120*94 <= x {
|
||||
log.Fatalf("%q: JIS code %d is out of range", table.url, x)
|
||||
}
|
||||
mapping[x] = y
|
||||
if reverse[y].table == -1 {
|
||||
reverse[y] = entry{jisCode: x, table: i}
|
||||
}
|
||||
}
|
||||
if err := scanner.Err(); err != nil {
|
||||
log.Fatalf("%q: scanner error: %v", table.url, err)
|
||||
}
|
||||
|
||||
fmt.Printf("// jis%sDecode is the decoding table from JIS %s code to Unicode.\n// It is defined at %s\n",
|
||||
table.name, table.name, table.url)
|
||||
fmt.Printf("var jis%sDecode = [...]uint16{\n", table.name)
|
||||
for i, m := range mapping {
|
||||
if m != 0 {
|
||||
fmt.Printf("\t%d: 0x%04X,\n", i, m)
|
||||
}
|
||||
}
|
||||
fmt.Printf("}\n\n")
|
||||
}
|
||||
|
||||
// Any run of at least separation continuous zero entries in the reverse map will
|
||||
// be a separate encode table.
|
||||
const separation = 1024
|
||||
|
||||
intervals := []interval(nil)
|
||||
low, high := -1, -1
|
||||
for i, v := range reverse {
|
||||
if v.table == -1 {
|
||||
continue
|
||||
}
|
||||
if low < 0 {
|
||||
low = i
|
||||
} else if i-high >= separation {
|
||||
if high >= 0 {
|
||||
intervals = append(intervals, interval{low, high})
|
||||
}
|
||||
low = i
|
||||
}
|
||||
high = i + 1
|
||||
}
|
||||
if high >= 0 {
|
||||
intervals = append(intervals, interval{low, high})
|
||||
}
|
||||
sort.Sort(byDecreasingLength(intervals))
|
||||
|
||||
fmt.Printf("const (\n")
|
||||
fmt.Printf("\tjis0208 = 1\n")
|
||||
fmt.Printf("\tjis0212 = 2\n")
|
||||
fmt.Printf("\tcodeMask = 0x7f\n")
|
||||
fmt.Printf("\tcodeShift = 7\n")
|
||||
fmt.Printf("\ttableShift = 14\n")
|
||||
fmt.Printf(")\n\n")
|
||||
|
||||
fmt.Printf("const numEncodeTables = %d\n\n", len(intervals))
|
||||
fmt.Printf("// encodeX are the encoding tables from Unicode to JIS code,\n")
|
||||
fmt.Printf("// sorted by decreasing length.\n")
|
||||
for i, v := range intervals {
|
||||
fmt.Printf("// encode%d: %5d entries for runes in [%5d, %5d).\n", i, v.len(), v.low, v.high)
|
||||
}
|
||||
fmt.Printf("//\n")
|
||||
fmt.Printf("// The high two bits of the value record whether the JIS code comes from the\n")
|
||||
fmt.Printf("// JIS0208 table (high bits == 1) or the JIS0212 table (high bits == 2).\n")
|
||||
fmt.Printf("// The low 14 bits are two 7-bit unsigned integers j1 and j2 that form the\n")
|
||||
fmt.Printf("// JIS code (94*j1 + j2) within that table.\n")
|
||||
fmt.Printf("\n")
|
||||
|
||||
for i, v := range intervals {
|
||||
fmt.Printf("const encode%dLow, encode%dHigh = %d, %d\n\n", i, i, v.low, v.high)
|
||||
fmt.Printf("var encode%d = [...]uint16{\n", i)
|
||||
for j := v.low; j < v.high; j++ {
|
||||
x := reverse[j]
|
||||
if x.table == -1 {
|
||||
continue
|
||||
}
|
||||
fmt.Printf("\t%d - %d: jis%s<<14 | 0x%02X<<7 | 0x%02X,\n",
|
||||
j, v.low, tables[x.table].name, x.jisCode/94, x.jisCode%94)
|
||||
}
|
||||
fmt.Printf("}\n\n")
|
||||
}
|
||||
}
|
||||
|
||||
// interval is a half-open interval [low, high).
|
||||
type interval struct {
|
||||
low, high int
|
||||
}
|
||||
|
||||
func (i interval) len() int { return i.high - i.low }
|
||||
|
||||
// byDecreasingLength sorts intervals by decreasing length.
|
||||
type byDecreasingLength []interval
|
||||
|
||||
func (b byDecreasingLength) Len() int { return len(b) }
|
||||
func (b byDecreasingLength) Less(i, j int) bool { return b[i].len() > b[j].len() }
|
||||
func (b byDecreasingLength) Swap(i, j int) { b[i], b[j] = b[j], b[i] }
|
||||
189
vendor/golang.org/x/text/encoding/japanese/shiftjis.go
generated
vendored
Normal file
189
vendor/golang.org/x/text/encoding/japanese/shiftjis.go
generated
vendored
Normal file
@@ -0,0 +1,189 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package japanese
|
||||
|
||||
import (
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/text/encoding"
|
||||
"golang.org/x/text/encoding/internal"
|
||||
"golang.org/x/text/encoding/internal/identifier"
|
||||
"golang.org/x/text/transform"
|
||||
)
|
||||
|
||||
// ShiftJIS is the Shift JIS encoding, also known as Code Page 932 and
|
||||
// Windows-31J.
|
||||
var ShiftJIS encoding.Encoding = &shiftJIS
|
||||
|
||||
var shiftJIS = internal.Encoding{
|
||||
&internal.SimpleEncoding{shiftJISDecoder{}, shiftJISEncoder{}},
|
||||
"Shift JIS",
|
||||
identifier.ShiftJIS,
|
||||
}
|
||||
|
||||
type shiftJISDecoder struct{ transform.NopResetter }
|
||||
|
||||
func (shiftJISDecoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
r, size := rune(0), 0
|
||||
loop:
|
||||
for ; nSrc < len(src); nSrc += size {
|
||||
switch c0 := src[nSrc]; {
|
||||
case c0 < utf8.RuneSelf:
|
||||
r, size = rune(c0), 1
|
||||
|
||||
case 0xa1 <= c0 && c0 < 0xe0:
|
||||
r, size = rune(c0)+(0xff61-0xa1), 1
|
||||
|
||||
case (0x81 <= c0 && c0 < 0xa0) || (0xe0 <= c0 && c0 < 0xfd):
|
||||
if c0 <= 0x9f {
|
||||
c0 -= 0x70
|
||||
} else {
|
||||
c0 -= 0xb0
|
||||
}
|
||||
c0 = 2*c0 - 0x21
|
||||
|
||||
if nSrc+1 >= len(src) {
|
||||
if !atEOF {
|
||||
err = transform.ErrShortSrc
|
||||
break loop
|
||||
}
|
||||
r, size = '\ufffd', 1
|
||||
goto write
|
||||
}
|
||||
c1 := src[nSrc+1]
|
||||
switch {
|
||||
case c1 < 0x40:
|
||||
r, size = '\ufffd', 1 // c1 is ASCII so output on next round
|
||||
goto write
|
||||
case c1 < 0x7f:
|
||||
c0--
|
||||
c1 -= 0x40
|
||||
case c1 == 0x7f:
|
||||
r, size = '\ufffd', 1 // c1 is ASCII so output on next round
|
||||
goto write
|
||||
case c1 < 0x9f:
|
||||
c0--
|
||||
c1 -= 0x41
|
||||
case c1 < 0xfd:
|
||||
c1 -= 0x9f
|
||||
default:
|
||||
r, size = '\ufffd', 2
|
||||
goto write
|
||||
}
|
||||
r, size = '\ufffd', 2
|
||||
if i := int(c0)*94 + int(c1); i < len(jis0208Decode) {
|
||||
r = rune(jis0208Decode[i])
|
||||
if r == 0 {
|
||||
r = '\ufffd'
|
||||
}
|
||||
}
|
||||
|
||||
case c0 == 0x80:
|
||||
r, size = 0x80, 1
|
||||
|
||||
default:
|
||||
r, size = '\ufffd', 1
|
||||
}
|
||||
write:
|
||||
if nDst+utf8.RuneLen(r) > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break loop
|
||||
}
|
||||
nDst += utf8.EncodeRune(dst[nDst:], r)
|
||||
}
|
||||
return nDst, nSrc, err
|
||||
}
|
||||
|
||||
type shiftJISEncoder struct{ transform.NopResetter }
|
||||
|
||||
func (shiftJISEncoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
r, size := rune(0), 0
|
||||
loop:
|
||||
for ; nSrc < len(src); nSrc += size {
|
||||
r = rune(src[nSrc])
|
||||
|
||||
// Decode a 1-byte rune.
|
||||
if r < utf8.RuneSelf {
|
||||
size = 1
|
||||
|
||||
} else {
|
||||
// Decode a multi-byte rune.
|
||||
r, size = utf8.DecodeRune(src[nSrc:])
|
||||
if size == 1 {
|
||||
// All valid runes of size 1 (those below utf8.RuneSelf) were
|
||||
// handled above. We have invalid UTF-8 or we haven't seen the
|
||||
// full character yet.
|
||||
if !atEOF && !utf8.FullRune(src[nSrc:]) {
|
||||
err = transform.ErrShortSrc
|
||||
break loop
|
||||
}
|
||||
}
|
||||
|
||||
// func init checks that the switch covers all tables.
|
||||
switch {
|
||||
case encode0Low <= r && r < encode0High:
|
||||
if r = rune(encode0[r-encode0Low]); r>>tableShift == jis0208 {
|
||||
goto write2
|
||||
}
|
||||
case encode1Low <= r && r < encode1High:
|
||||
if r = rune(encode1[r-encode1Low]); r>>tableShift == jis0208 {
|
||||
goto write2
|
||||
}
|
||||
case encode2Low <= r && r < encode2High:
|
||||
if r = rune(encode2[r-encode2Low]); r>>tableShift == jis0208 {
|
||||
goto write2
|
||||
}
|
||||
case encode3Low <= r && r < encode3High:
|
||||
if r = rune(encode3[r-encode3Low]); r>>tableShift == jis0208 {
|
||||
goto write2
|
||||
}
|
||||
case encode4Low <= r && r < encode4High:
|
||||
if r = rune(encode4[r-encode4Low]); r>>tableShift == jis0208 {
|
||||
goto write2
|
||||
}
|
||||
case encode5Low <= r && r < encode5High:
|
||||
if 0xff61 <= r && r < 0xffa0 {
|
||||
r -= 0xff61 - 0xa1
|
||||
goto write1
|
||||
}
|
||||
if r = rune(encode5[r-encode5Low]); r>>tableShift == jis0208 {
|
||||
goto write2
|
||||
}
|
||||
}
|
||||
err = internal.ErrASCIIReplacement
|
||||
break
|
||||
}
|
||||
|
||||
write1:
|
||||
if nDst >= len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
dst[nDst] = uint8(r)
|
||||
nDst++
|
||||
continue
|
||||
|
||||
write2:
|
||||
j1 := uint8(r>>codeShift) & codeMask
|
||||
j2 := uint8(r) & codeMask
|
||||
if nDst+2 > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break loop
|
||||
}
|
||||
if j1 <= 61 {
|
||||
dst[nDst+0] = 129 + j1/2
|
||||
} else {
|
||||
dst[nDst+0] = 193 + j1/2
|
||||
}
|
||||
if j1&1 == 0 {
|
||||
dst[nDst+1] = j2 + j2/63 + 64
|
||||
} else {
|
||||
dst[nDst+1] = j2 + 159
|
||||
}
|
||||
nDst += 2
|
||||
continue
|
||||
}
|
||||
return nDst, nSrc, err
|
||||
}
|
||||
26971
vendor/golang.org/x/text/encoding/japanese/tables.go
generated
vendored
Normal file
26971
vendor/golang.org/x/text/encoding/japanese/tables.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load Diff
177
vendor/golang.org/x/text/encoding/korean/euckr.go
generated
vendored
Normal file
177
vendor/golang.org/x/text/encoding/korean/euckr.go
generated
vendored
Normal file
@@ -0,0 +1,177 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package korean
|
||||
|
||||
import (
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/text/encoding"
|
||||
"golang.org/x/text/encoding/internal"
|
||||
"golang.org/x/text/encoding/internal/identifier"
|
||||
"golang.org/x/text/transform"
|
||||
)
|
||||
|
||||
// All is a list of all defined encodings in this package.
|
||||
var All = []encoding.Encoding{EUCKR}
|
||||
|
||||
// EUCKR is the EUC-KR encoding, also known as Code Page 949.
|
||||
var EUCKR encoding.Encoding = &eucKR
|
||||
|
||||
var eucKR = internal.Encoding{
|
||||
&internal.SimpleEncoding{eucKRDecoder{}, eucKREncoder{}},
|
||||
"EUC-KR",
|
||||
identifier.EUCKR,
|
||||
}
|
||||
|
||||
type eucKRDecoder struct{ transform.NopResetter }
|
||||
|
||||
func (eucKRDecoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
r, size := rune(0), 0
|
||||
loop:
|
||||
for ; nSrc < len(src); nSrc += size {
|
||||
switch c0 := src[nSrc]; {
|
||||
case c0 < utf8.RuneSelf:
|
||||
r, size = rune(c0), 1
|
||||
|
||||
case 0x81 <= c0 && c0 < 0xff:
|
||||
if nSrc+1 >= len(src) {
|
||||
if !atEOF {
|
||||
err = transform.ErrShortSrc
|
||||
break loop
|
||||
}
|
||||
r, size = utf8.RuneError, 1
|
||||
break
|
||||
}
|
||||
c1 := src[nSrc+1]
|
||||
size = 2
|
||||
if c0 < 0xc7 {
|
||||
r = 178 * rune(c0-0x81)
|
||||
switch {
|
||||
case 0x41 <= c1 && c1 < 0x5b:
|
||||
r += rune(c1) - (0x41 - 0*26)
|
||||
case 0x61 <= c1 && c1 < 0x7b:
|
||||
r += rune(c1) - (0x61 - 1*26)
|
||||
case 0x81 <= c1 && c1 < 0xff:
|
||||
r += rune(c1) - (0x81 - 2*26)
|
||||
default:
|
||||
goto decError
|
||||
}
|
||||
} else if 0xa1 <= c1 && c1 < 0xff {
|
||||
r = 178*(0xc7-0x81) + rune(c0-0xc7)*94 + rune(c1-0xa1)
|
||||
} else {
|
||||
goto decError
|
||||
}
|
||||
if int(r) < len(decode) {
|
||||
r = rune(decode[r])
|
||||
if r != 0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
decError:
|
||||
r = utf8.RuneError
|
||||
if c1 < utf8.RuneSelf {
|
||||
size = 1
|
||||
}
|
||||
|
||||
default:
|
||||
r, size = utf8.RuneError, 1
|
||||
break
|
||||
}
|
||||
|
||||
if nDst+utf8.RuneLen(r) > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
nDst += utf8.EncodeRune(dst[nDst:], r)
|
||||
}
|
||||
return nDst, nSrc, err
|
||||
}
|
||||
|
||||
type eucKREncoder struct{ transform.NopResetter }
|
||||
|
||||
func (eucKREncoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
r, size := rune(0), 0
|
||||
for ; nSrc < len(src); nSrc += size {
|
||||
r = rune(src[nSrc])
|
||||
|
||||
// Decode a 1-byte rune.
|
||||
if r < utf8.RuneSelf {
|
||||
size = 1
|
||||
|
||||
if nDst >= len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
dst[nDst] = uint8(r)
|
||||
nDst++
|
||||
continue
|
||||
|
||||
} else {
|
||||
// Decode a multi-byte rune.
|
||||
r, size = utf8.DecodeRune(src[nSrc:])
|
||||
if size == 1 {
|
||||
// All valid runes of size 1 (those below utf8.RuneSelf) were
|
||||
// handled above. We have invalid UTF-8 or we haven't seen the
|
||||
// full character yet.
|
||||
if !atEOF && !utf8.FullRune(src[nSrc:]) {
|
||||
err = transform.ErrShortSrc
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// func init checks that the switch covers all tables.
|
||||
switch {
|
||||
case encode0Low <= r && r < encode0High:
|
||||
if r = rune(encode0[r-encode0Low]); r != 0 {
|
||||
goto write2
|
||||
}
|
||||
case encode1Low <= r && r < encode1High:
|
||||
if r = rune(encode1[r-encode1Low]); r != 0 {
|
||||
goto write2
|
||||
}
|
||||
case encode2Low <= r && r < encode2High:
|
||||
if r = rune(encode2[r-encode2Low]); r != 0 {
|
||||
goto write2
|
||||
}
|
||||
case encode3Low <= r && r < encode3High:
|
||||
if r = rune(encode3[r-encode3Low]); r != 0 {
|
||||
goto write2
|
||||
}
|
||||
case encode4Low <= r && r < encode4High:
|
||||
if r = rune(encode4[r-encode4Low]); r != 0 {
|
||||
goto write2
|
||||
}
|
||||
case encode5Low <= r && r < encode5High:
|
||||
if r = rune(encode5[r-encode5Low]); r != 0 {
|
||||
goto write2
|
||||
}
|
||||
case encode6Low <= r && r < encode6High:
|
||||
if r = rune(encode6[r-encode6Low]); r != 0 {
|
||||
goto write2
|
||||
}
|
||||
}
|
||||
err = internal.ErrASCIIReplacement
|
||||
break
|
||||
}
|
||||
|
||||
write2:
|
||||
if nDst+2 > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
dst[nDst+0] = uint8(r >> 8)
|
||||
dst[nDst+1] = uint8(r)
|
||||
nDst += 2
|
||||
continue
|
||||
}
|
||||
return nDst, nSrc, err
|
||||
}
|
||||
|
||||
func init() {
|
||||
// Check that the hard-coded encode switch covers all tables.
|
||||
if numEncodeTables != 7 {
|
||||
panic("bad numEncodeTables")
|
||||
}
|
||||
}
|
||||
143
vendor/golang.org/x/text/encoding/korean/maketables.go
generated
vendored
Normal file
143
vendor/golang.org/x/text/encoding/korean/maketables.go
generated
vendored
Normal file
@@ -0,0 +1,143 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
package main
|
||||
|
||||
// This program generates tables.go:
|
||||
// go run maketables.go | gofmt > tables.go
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"log"
|
||||
"net/http"
|
||||
"sort"
|
||||
"strings"
|
||||
)
|
||||
|
||||
func main() {
|
||||
fmt.Printf("// generated by go run maketables.go; DO NOT EDIT\n\n")
|
||||
fmt.Printf("// Package korean provides Korean encodings such as EUC-KR.\n")
|
||||
fmt.Printf(`package korean // import "golang.org/x/text/encoding/korean"` + "\n\n")
|
||||
|
||||
res, err := http.Get("http://encoding.spec.whatwg.org/index-euc-kr.txt")
|
||||
if err != nil {
|
||||
log.Fatalf("Get: %v", err)
|
||||
}
|
||||
defer res.Body.Close()
|
||||
|
||||
mapping := [65536]uint16{}
|
||||
reverse := [65536]uint16{}
|
||||
|
||||
scanner := bufio.NewScanner(res.Body)
|
||||
for scanner.Scan() {
|
||||
s := strings.TrimSpace(scanner.Text())
|
||||
if s == "" || s[0] == '#' {
|
||||
continue
|
||||
}
|
||||
x, y := uint16(0), uint16(0)
|
||||
if _, err := fmt.Sscanf(s, "%d 0x%x", &x, &y); err != nil {
|
||||
log.Fatalf("could not parse %q", s)
|
||||
}
|
||||
if x < 0 || 178*(0xc7-0x81)+(0xfe-0xc7)*94+(0xff-0xa1) <= x {
|
||||
log.Fatalf("EUC-KR code %d is out of range", x)
|
||||
}
|
||||
mapping[x] = y
|
||||
if reverse[y] == 0 {
|
||||
c0, c1 := uint16(0), uint16(0)
|
||||
if x < 178*(0xc7-0x81) {
|
||||
c0 = uint16(x/178) + 0x81
|
||||
c1 = uint16(x % 178)
|
||||
switch {
|
||||
case c1 < 1*26:
|
||||
c1 += 0x41
|
||||
case c1 < 2*26:
|
||||
c1 += 0x47
|
||||
default:
|
||||
c1 += 0x4d
|
||||
}
|
||||
} else {
|
||||
x -= 178 * (0xc7 - 0x81)
|
||||
c0 = uint16(x/94) + 0xc7
|
||||
c1 = uint16(x%94) + 0xa1
|
||||
}
|
||||
reverse[y] = c0<<8 | c1
|
||||
}
|
||||
}
|
||||
if err := scanner.Err(); err != nil {
|
||||
log.Fatalf("scanner error: %v", err)
|
||||
}
|
||||
|
||||
fmt.Printf("// decode is the decoding table from EUC-KR code to Unicode.\n")
|
||||
fmt.Printf("// It is defined at http://encoding.spec.whatwg.org/index-euc-kr.txt\n")
|
||||
fmt.Printf("var decode = [...]uint16{\n")
|
||||
for i, v := range mapping {
|
||||
if v != 0 {
|
||||
fmt.Printf("\t%d: 0x%04X,\n", i, v)
|
||||
}
|
||||
}
|
||||
fmt.Printf("}\n\n")
|
||||
|
||||
// Any run of at least separation continuous zero entries in the reverse map will
|
||||
// be a separate encode table.
|
||||
const separation = 1024
|
||||
|
||||
intervals := []interval(nil)
|
||||
low, high := -1, -1
|
||||
for i, v := range reverse {
|
||||
if v == 0 {
|
||||
continue
|
||||
}
|
||||
if low < 0 {
|
||||
low = i
|
||||
} else if i-high >= separation {
|
||||
if high >= 0 {
|
||||
intervals = append(intervals, interval{low, high})
|
||||
}
|
||||
low = i
|
||||
}
|
||||
high = i + 1
|
||||
}
|
||||
if high >= 0 {
|
||||
intervals = append(intervals, interval{low, high})
|
||||
}
|
||||
sort.Sort(byDecreasingLength(intervals))
|
||||
|
||||
fmt.Printf("const numEncodeTables = %d\n\n", len(intervals))
|
||||
fmt.Printf("// encodeX are the encoding tables from Unicode to EUC-KR code,\n")
|
||||
fmt.Printf("// sorted by decreasing length.\n")
|
||||
for i, v := range intervals {
|
||||
fmt.Printf("// encode%d: %5d entries for runes in [%5d, %5d).\n", i, v.len(), v.low, v.high)
|
||||
}
|
||||
fmt.Printf("\n")
|
||||
|
||||
for i, v := range intervals {
|
||||
fmt.Printf("const encode%dLow, encode%dHigh = %d, %d\n\n", i, i, v.low, v.high)
|
||||
fmt.Printf("var encode%d = [...]uint16{\n", i)
|
||||
for j := v.low; j < v.high; j++ {
|
||||
x := reverse[j]
|
||||
if x == 0 {
|
||||
continue
|
||||
}
|
||||
fmt.Printf("\t%d-%d: 0x%04X,\n", j, v.low, x)
|
||||
}
|
||||
fmt.Printf("}\n\n")
|
||||
}
|
||||
}
|
||||
|
||||
// interval is a half-open interval [low, high).
|
||||
type interval struct {
|
||||
low, high int
|
||||
}
|
||||
|
||||
func (i interval) len() int { return i.high - i.low }
|
||||
|
||||
// byDecreasingLength sorts intervals by decreasing length.
|
||||
type byDecreasingLength []interval
|
||||
|
||||
func (b byDecreasingLength) Len() int { return len(b) }
|
||||
func (b byDecreasingLength) Less(i, j int) bool { return b[i].len() > b[j].len() }
|
||||
func (b byDecreasingLength) Swap(i, j int) { b[i], b[j] = b[j], b[i] }
|
||||
34152
vendor/golang.org/x/text/encoding/korean/tables.go
generated
vendored
Normal file
34152
vendor/golang.org/x/text/encoding/korean/tables.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load Diff
12
vendor/golang.org/x/text/encoding/simplifiedchinese/all.go
generated
vendored
Normal file
12
vendor/golang.org/x/text/encoding/simplifiedchinese/all.go
generated
vendored
Normal file
@@ -0,0 +1,12 @@
|
||||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package simplifiedchinese
|
||||
|
||||
import (
|
||||
"golang.org/x/text/encoding"
|
||||
)
|
||||
|
||||
// All is a list of all defined encodings in this package.
|
||||
var All = []encoding.Encoding{GB18030, GBK, HZGB2312}
|
||||
269
vendor/golang.org/x/text/encoding/simplifiedchinese/gbk.go
generated
vendored
Normal file
269
vendor/golang.org/x/text/encoding/simplifiedchinese/gbk.go
generated
vendored
Normal file
@@ -0,0 +1,269 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package simplifiedchinese
|
||||
|
||||
import (
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/text/encoding"
|
||||
"golang.org/x/text/encoding/internal"
|
||||
"golang.org/x/text/encoding/internal/identifier"
|
||||
"golang.org/x/text/transform"
|
||||
)
|
||||
|
||||
var (
|
||||
// GB18030 is the GB18030 encoding.
|
||||
GB18030 encoding.Encoding = &gbk18030
|
||||
// GBK is the GBK encoding. It encodes an extension of the GB2312 character set
|
||||
// and is also known as Code Page 936.
|
||||
GBK encoding.Encoding = &gbk
|
||||
)
|
||||
|
||||
var gbk = internal.Encoding{
|
||||
&internal.SimpleEncoding{
|
||||
gbkDecoder{gb18030: false},
|
||||
gbkEncoder{gb18030: false},
|
||||
},
|
||||
"GBK",
|
||||
identifier.GBK,
|
||||
}
|
||||
|
||||
var gbk18030 = internal.Encoding{
|
||||
&internal.SimpleEncoding{
|
||||
gbkDecoder{gb18030: true},
|
||||
gbkEncoder{gb18030: true},
|
||||
},
|
||||
"GB18030",
|
||||
identifier.GB18030,
|
||||
}
|
||||
|
||||
type gbkDecoder struct {
|
||||
transform.NopResetter
|
||||
gb18030 bool
|
||||
}
|
||||
|
||||
func (d gbkDecoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
r, size := rune(0), 0
|
||||
loop:
|
||||
for ; nSrc < len(src); nSrc += size {
|
||||
switch c0 := src[nSrc]; {
|
||||
case c0 < utf8.RuneSelf:
|
||||
r, size = rune(c0), 1
|
||||
|
||||
// Microsoft's Code Page 936 extends GBK 1.0 to encode the euro sign U+20AC
|
||||
// as 0x80. The HTML5 specification at http://encoding.spec.whatwg.org/#gbk
|
||||
// says to treat "gbk" as Code Page 936.
|
||||
case c0 == 0x80:
|
||||
r, size = '€', 1
|
||||
|
||||
case c0 < 0xff:
|
||||
if nSrc+1 >= len(src) {
|
||||
if !atEOF {
|
||||
err = transform.ErrShortSrc
|
||||
break loop
|
||||
}
|
||||
r, size = utf8.RuneError, 1
|
||||
goto write
|
||||
}
|
||||
c1 := src[nSrc+1]
|
||||
switch {
|
||||
case 0x40 <= c1 && c1 < 0x7f:
|
||||
c1 -= 0x40
|
||||
case 0x80 <= c1 && c1 < 0xff:
|
||||
c1 -= 0x41
|
||||
case d.gb18030 && 0x30 <= c1 && c1 < 0x40:
|
||||
if nSrc+3 >= len(src) {
|
||||
if !atEOF {
|
||||
err = transform.ErrShortSrc
|
||||
break loop
|
||||
}
|
||||
// The second byte here is always ASCII, so we can set size
|
||||
// to 1 in all cases.
|
||||
r, size = utf8.RuneError, 1
|
||||
goto write
|
||||
}
|
||||
c2 := src[nSrc+2]
|
||||
if c2 < 0x81 || 0xff <= c2 {
|
||||
r, size = utf8.RuneError, 1
|
||||
goto write
|
||||
}
|
||||
c3 := src[nSrc+3]
|
||||
if c3 < 0x30 || 0x3a <= c3 {
|
||||
r, size = utf8.RuneError, 1
|
||||
goto write
|
||||
}
|
||||
size = 4
|
||||
r = ((rune(c0-0x81)*10+rune(c1-0x30))*126+rune(c2-0x81))*10 + rune(c3-0x30)
|
||||
if r < 39420 {
|
||||
i, j := 0, len(gb18030)
|
||||
for i < j {
|
||||
h := i + (j-i)/2
|
||||
if r >= rune(gb18030[h][0]) {
|
||||
i = h + 1
|
||||
} else {
|
||||
j = h
|
||||
}
|
||||
}
|
||||
dec := &gb18030[i-1]
|
||||
r += rune(dec[1]) - rune(dec[0])
|
||||
goto write
|
||||
}
|
||||
r -= 189000
|
||||
if 0 <= r && r < 0x100000 {
|
||||
r += 0x10000
|
||||
} else {
|
||||
r, size = utf8.RuneError, 1
|
||||
}
|
||||
goto write
|
||||
default:
|
||||
r, size = utf8.RuneError, 1
|
||||
goto write
|
||||
}
|
||||
r, size = '\ufffd', 2
|
||||
if i := int(c0-0x81)*190 + int(c1); i < len(decode) {
|
||||
r = rune(decode[i])
|
||||
if r == 0 {
|
||||
r = '\ufffd'
|
||||
}
|
||||
}
|
||||
|
||||
default:
|
||||
r, size = utf8.RuneError, 1
|
||||
}
|
||||
|
||||
write:
|
||||
if nDst+utf8.RuneLen(r) > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break loop
|
||||
}
|
||||
nDst += utf8.EncodeRune(dst[nDst:], r)
|
||||
}
|
||||
return nDst, nSrc, err
|
||||
}
|
||||
|
||||
type gbkEncoder struct {
|
||||
transform.NopResetter
|
||||
gb18030 bool
|
||||
}
|
||||
|
||||
func (e gbkEncoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
r, r2, size := rune(0), rune(0), 0
|
||||
for ; nSrc < len(src); nSrc += size {
|
||||
r = rune(src[nSrc])
|
||||
|
||||
// Decode a 1-byte rune.
|
||||
if r < utf8.RuneSelf {
|
||||
size = 1
|
||||
|
||||
} else {
|
||||
// Decode a multi-byte rune.
|
||||
r, size = utf8.DecodeRune(src[nSrc:])
|
||||
if size == 1 {
|
||||
// All valid runes of size 1 (those below utf8.RuneSelf) were
|
||||
// handled above. We have invalid UTF-8 or we haven't seen the
|
||||
// full character yet.
|
||||
if !atEOF && !utf8.FullRune(src[nSrc:]) {
|
||||
err = transform.ErrShortSrc
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// func init checks that the switch covers all tables.
|
||||
switch {
|
||||
case encode0Low <= r && r < encode0High:
|
||||
if r2 = rune(encode0[r-encode0Low]); r2 != 0 {
|
||||
goto write2
|
||||
}
|
||||
case encode1Low <= r && r < encode1High:
|
||||
// Microsoft's Code Page 936 extends GBK 1.0 to encode the euro sign U+20AC
|
||||
// as 0x80. The HTML5 specification at http://encoding.spec.whatwg.org/#gbk
|
||||
// says to treat "gbk" as Code Page 936.
|
||||
if r == '€' {
|
||||
r = 0x80
|
||||
goto write1
|
||||
}
|
||||
if r2 = rune(encode1[r-encode1Low]); r2 != 0 {
|
||||
goto write2
|
||||
}
|
||||
case encode2Low <= r && r < encode2High:
|
||||
if r2 = rune(encode2[r-encode2Low]); r2 != 0 {
|
||||
goto write2
|
||||
}
|
||||
case encode3Low <= r && r < encode3High:
|
||||
if r2 = rune(encode3[r-encode3Low]); r2 != 0 {
|
||||
goto write2
|
||||
}
|
||||
case encode4Low <= r && r < encode4High:
|
||||
if r2 = rune(encode4[r-encode4Low]); r2 != 0 {
|
||||
goto write2
|
||||
}
|
||||
}
|
||||
|
||||
if e.gb18030 {
|
||||
if r < 0x10000 {
|
||||
i, j := 0, len(gb18030)
|
||||
for i < j {
|
||||
h := i + (j-i)/2
|
||||
if r >= rune(gb18030[h][1]) {
|
||||
i = h + 1
|
||||
} else {
|
||||
j = h
|
||||
}
|
||||
}
|
||||
dec := &gb18030[i-1]
|
||||
r += rune(dec[0]) - rune(dec[1])
|
||||
goto write4
|
||||
} else if r < 0x110000 {
|
||||
r += 189000 - 0x10000
|
||||
goto write4
|
||||
}
|
||||
}
|
||||
err = internal.ErrASCIIReplacement
|
||||
break
|
||||
}
|
||||
|
||||
write1:
|
||||
if nDst >= len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
dst[nDst] = uint8(r)
|
||||
nDst++
|
||||
continue
|
||||
|
||||
write2:
|
||||
if nDst+2 > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
dst[nDst+0] = uint8(r2 >> 8)
|
||||
dst[nDst+1] = uint8(r2)
|
||||
nDst += 2
|
||||
continue
|
||||
|
||||
write4:
|
||||
if nDst+4 > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
dst[nDst+3] = uint8(r%10 + 0x30)
|
||||
r /= 10
|
||||
dst[nDst+2] = uint8(r%126 + 0x81)
|
||||
r /= 126
|
||||
dst[nDst+1] = uint8(r%10 + 0x30)
|
||||
r /= 10
|
||||
dst[nDst+0] = uint8(r + 0x81)
|
||||
nDst += 4
|
||||
continue
|
||||
}
|
||||
return nDst, nSrc, err
|
||||
}
|
||||
|
||||
func init() {
|
||||
// Check that the hard-coded encode switch covers all tables.
|
||||
if numEncodeTables != 5 {
|
||||
panic("bad numEncodeTables")
|
||||
}
|
||||
}
|
||||
245
vendor/golang.org/x/text/encoding/simplifiedchinese/hzgb2312.go
generated
vendored
Normal file
245
vendor/golang.org/x/text/encoding/simplifiedchinese/hzgb2312.go
generated
vendored
Normal file
@@ -0,0 +1,245 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package simplifiedchinese
|
||||
|
||||
import (
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/text/encoding"
|
||||
"golang.org/x/text/encoding/internal"
|
||||
"golang.org/x/text/encoding/internal/identifier"
|
||||
"golang.org/x/text/transform"
|
||||
)
|
||||
|
||||
// HZGB2312 is the HZ-GB2312 encoding.
|
||||
var HZGB2312 encoding.Encoding = &hzGB2312
|
||||
|
||||
var hzGB2312 = internal.Encoding{
|
||||
internal.FuncEncoding{hzGB2312NewDecoder, hzGB2312NewEncoder},
|
||||
"HZ-GB2312",
|
||||
identifier.HZGB2312,
|
||||
}
|
||||
|
||||
func hzGB2312NewDecoder() transform.Transformer {
|
||||
return new(hzGB2312Decoder)
|
||||
}
|
||||
|
||||
func hzGB2312NewEncoder() transform.Transformer {
|
||||
return new(hzGB2312Encoder)
|
||||
}
|
||||
|
||||
const (
|
||||
asciiState = iota
|
||||
gbState
|
||||
)
|
||||
|
||||
type hzGB2312Decoder int
|
||||
|
||||
func (d *hzGB2312Decoder) Reset() {
|
||||
*d = asciiState
|
||||
}
|
||||
|
||||
func (d *hzGB2312Decoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
r, size := rune(0), 0
|
||||
loop:
|
||||
for ; nSrc < len(src); nSrc += size {
|
||||
c0 := src[nSrc]
|
||||
if c0 >= utf8.RuneSelf {
|
||||
r, size = utf8.RuneError, 1
|
||||
goto write
|
||||
}
|
||||
|
||||
if c0 == '~' {
|
||||
if nSrc+1 >= len(src) {
|
||||
if !atEOF {
|
||||
err = transform.ErrShortSrc
|
||||
break loop
|
||||
}
|
||||
r = utf8.RuneError
|
||||
goto write
|
||||
}
|
||||
size = 2
|
||||
switch src[nSrc+1] {
|
||||
case '{':
|
||||
*d = gbState
|
||||
continue
|
||||
case '}':
|
||||
*d = asciiState
|
||||
continue
|
||||
case '~':
|
||||
if nDst >= len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break loop
|
||||
}
|
||||
dst[nDst] = '~'
|
||||
nDst++
|
||||
continue
|
||||
case '\n':
|
||||
continue
|
||||
default:
|
||||
r = utf8.RuneError
|
||||
goto write
|
||||
}
|
||||
}
|
||||
|
||||
if *d == asciiState {
|
||||
r, size = rune(c0), 1
|
||||
} else {
|
||||
if nSrc+1 >= len(src) {
|
||||
if !atEOF {
|
||||
err = transform.ErrShortSrc
|
||||
break loop
|
||||
}
|
||||
r, size = utf8.RuneError, 1
|
||||
goto write
|
||||
}
|
||||
size = 2
|
||||
c1 := src[nSrc+1]
|
||||
if c0 < 0x21 || 0x7e <= c0 || c1 < 0x21 || 0x7f <= c1 {
|
||||
// error
|
||||
} else if i := int(c0-0x01)*190 + int(c1+0x3f); i < len(decode) {
|
||||
r = rune(decode[i])
|
||||
if r != 0 {
|
||||
goto write
|
||||
}
|
||||
}
|
||||
if c1 > utf8.RuneSelf {
|
||||
// Be consistent and always treat non-ASCII as a single error.
|
||||
size = 1
|
||||
}
|
||||
r = utf8.RuneError
|
||||
}
|
||||
|
||||
write:
|
||||
if nDst+utf8.RuneLen(r) > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break loop
|
||||
}
|
||||
nDst += utf8.EncodeRune(dst[nDst:], r)
|
||||
}
|
||||
return nDst, nSrc, err
|
||||
}
|
||||
|
||||
type hzGB2312Encoder int
|
||||
|
||||
func (d *hzGB2312Encoder) Reset() {
|
||||
*d = asciiState
|
||||
}
|
||||
|
||||
func (e *hzGB2312Encoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
r, size := rune(0), 0
|
||||
for ; nSrc < len(src); nSrc += size {
|
||||
r = rune(src[nSrc])
|
||||
|
||||
// Decode a 1-byte rune.
|
||||
if r < utf8.RuneSelf {
|
||||
size = 1
|
||||
if r == '~' {
|
||||
if nDst+2 > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
dst[nDst+0] = '~'
|
||||
dst[nDst+1] = '~'
|
||||
nDst += 2
|
||||
continue
|
||||
} else if *e != asciiState {
|
||||
if nDst+3 > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
*e = asciiState
|
||||
dst[nDst+0] = '~'
|
||||
dst[nDst+1] = '}'
|
||||
nDst += 2
|
||||
} else if nDst >= len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
dst[nDst] = uint8(r)
|
||||
nDst += 1
|
||||
continue
|
||||
|
||||
}
|
||||
|
||||
// Decode a multi-byte rune.
|
||||
r, size = utf8.DecodeRune(src[nSrc:])
|
||||
if size == 1 {
|
||||
// All valid runes of size 1 (those below utf8.RuneSelf) were
|
||||
// handled above. We have invalid UTF-8 or we haven't seen the
|
||||
// full character yet.
|
||||
if !atEOF && !utf8.FullRune(src[nSrc:]) {
|
||||
err = transform.ErrShortSrc
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// func init checks that the switch covers all tables.
|
||||
switch {
|
||||
case encode0Low <= r && r < encode0High:
|
||||
if r = rune(encode0[r-encode0Low]); r != 0 {
|
||||
goto writeGB
|
||||
}
|
||||
case encode1Low <= r && r < encode1High:
|
||||
if r = rune(encode1[r-encode1Low]); r != 0 {
|
||||
goto writeGB
|
||||
}
|
||||
case encode2Low <= r && r < encode2High:
|
||||
if r = rune(encode2[r-encode2Low]); r != 0 {
|
||||
goto writeGB
|
||||
}
|
||||
case encode3Low <= r && r < encode3High:
|
||||
if r = rune(encode3[r-encode3Low]); r != 0 {
|
||||
goto writeGB
|
||||
}
|
||||
case encode4Low <= r && r < encode4High:
|
||||
if r = rune(encode4[r-encode4Low]); r != 0 {
|
||||
goto writeGB
|
||||
}
|
||||
}
|
||||
|
||||
terminateInASCIIState:
|
||||
// Switch back to ASCII state in case of error so that an ASCII
|
||||
// replacement character can be written in the correct state.
|
||||
if *e != asciiState {
|
||||
if nDst+2 > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
dst[nDst+0] = '~'
|
||||
dst[nDst+1] = '}'
|
||||
nDst += 2
|
||||
}
|
||||
err = internal.ErrASCIIReplacement
|
||||
break
|
||||
|
||||
writeGB:
|
||||
c0 := uint8(r>>8) - 0x80
|
||||
c1 := uint8(r) - 0x80
|
||||
if c0 < 0x21 || 0x7e <= c0 || c1 < 0x21 || 0x7f <= c1 {
|
||||
goto terminateInASCIIState
|
||||
}
|
||||
if *e == asciiState {
|
||||
if nDst+4 > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
*e = gbState
|
||||
dst[nDst+0] = '~'
|
||||
dst[nDst+1] = '{'
|
||||
nDst += 2
|
||||
} else if nDst+2 > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
dst[nDst+0] = c0
|
||||
dst[nDst+1] = c1
|
||||
nDst += 2
|
||||
continue
|
||||
}
|
||||
// TODO: should one always terminate in ASCII state to make it safe to
|
||||
// concatenate two HZ-GB2312-encoded strings?
|
||||
return nDst, nSrc, err
|
||||
}
|
||||
161
vendor/golang.org/x/text/encoding/simplifiedchinese/maketables.go
generated
vendored
Normal file
161
vendor/golang.org/x/text/encoding/simplifiedchinese/maketables.go
generated
vendored
Normal file
@@ -0,0 +1,161 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
package main
|
||||
|
||||
// This program generates tables.go:
|
||||
// go run maketables.go | gofmt > tables.go
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"log"
|
||||
"net/http"
|
||||
"sort"
|
||||
"strings"
|
||||
)
|
||||
|
||||
func main() {
|
||||
fmt.Printf("// generated by go run maketables.go; DO NOT EDIT\n\n")
|
||||
fmt.Printf("// Package simplifiedchinese provides Simplified Chinese encodings such as GBK.\n")
|
||||
fmt.Printf(`package simplifiedchinese // import "golang.org/x/text/encoding/simplifiedchinese"` + "\n\n")
|
||||
|
||||
printGB18030()
|
||||
printGBK()
|
||||
}
|
||||
|
||||
func printGB18030() {
|
||||
res, err := http.Get("http://encoding.spec.whatwg.org/index-gb18030.txt")
|
||||
if err != nil {
|
||||
log.Fatalf("Get: %v", err)
|
||||
}
|
||||
defer res.Body.Close()
|
||||
|
||||
fmt.Printf("// gb18030 is the table from http://encoding.spec.whatwg.org/index-gb18030.txt\n")
|
||||
fmt.Printf("var gb18030 = [...][2]uint16{\n")
|
||||
scanner := bufio.NewScanner(res.Body)
|
||||
for scanner.Scan() {
|
||||
s := strings.TrimSpace(scanner.Text())
|
||||
if s == "" || s[0] == '#' {
|
||||
continue
|
||||
}
|
||||
x, y := uint32(0), uint32(0)
|
||||
if _, err := fmt.Sscanf(s, "%d 0x%x", &x, &y); err != nil {
|
||||
log.Fatalf("could not parse %q", s)
|
||||
}
|
||||
if x < 0x10000 && y < 0x10000 {
|
||||
fmt.Printf("\t{0x%04x, 0x%04x},\n", x, y)
|
||||
}
|
||||
}
|
||||
fmt.Printf("}\n\n")
|
||||
}
|
||||
|
||||
func printGBK() {
|
||||
res, err := http.Get("http://encoding.spec.whatwg.org/index-gbk.txt")
|
||||
if err != nil {
|
||||
log.Fatalf("Get: %v", err)
|
||||
}
|
||||
defer res.Body.Close()
|
||||
|
||||
mapping := [65536]uint16{}
|
||||
reverse := [65536]uint16{}
|
||||
|
||||
scanner := bufio.NewScanner(res.Body)
|
||||
for scanner.Scan() {
|
||||
s := strings.TrimSpace(scanner.Text())
|
||||
if s == "" || s[0] == '#' {
|
||||
continue
|
||||
}
|
||||
x, y := uint16(0), uint16(0)
|
||||
if _, err := fmt.Sscanf(s, "%d 0x%x", &x, &y); err != nil {
|
||||
log.Fatalf("could not parse %q", s)
|
||||
}
|
||||
if x < 0 || 126*190 <= x {
|
||||
log.Fatalf("GBK code %d is out of range", x)
|
||||
}
|
||||
mapping[x] = y
|
||||
if reverse[y] == 0 {
|
||||
c0, c1 := x/190, x%190
|
||||
if c1 >= 0x3f {
|
||||
c1++
|
||||
}
|
||||
reverse[y] = (0x81+c0)<<8 | (0x40 + c1)
|
||||
}
|
||||
}
|
||||
if err := scanner.Err(); err != nil {
|
||||
log.Fatalf("scanner error: %v", err)
|
||||
}
|
||||
|
||||
fmt.Printf("// decode is the decoding table from GBK code to Unicode.\n")
|
||||
fmt.Printf("// It is defined at http://encoding.spec.whatwg.org/index-gbk.txt\n")
|
||||
fmt.Printf("var decode = [...]uint16{\n")
|
||||
for i, v := range mapping {
|
||||
if v != 0 {
|
||||
fmt.Printf("\t%d: 0x%04X,\n", i, v)
|
||||
}
|
||||
}
|
||||
fmt.Printf("}\n\n")
|
||||
|
||||
// Any run of at least separation continuous zero entries in the reverse map will
|
||||
// be a separate encode table.
|
||||
const separation = 1024
|
||||
|
||||
intervals := []interval(nil)
|
||||
low, high := -1, -1
|
||||
for i, v := range reverse {
|
||||
if v == 0 {
|
||||
continue
|
||||
}
|
||||
if low < 0 {
|
||||
low = i
|
||||
} else if i-high >= separation {
|
||||
if high >= 0 {
|
||||
intervals = append(intervals, interval{low, high})
|
||||
}
|
||||
low = i
|
||||
}
|
||||
high = i + 1
|
||||
}
|
||||
if high >= 0 {
|
||||
intervals = append(intervals, interval{low, high})
|
||||
}
|
||||
sort.Sort(byDecreasingLength(intervals))
|
||||
|
||||
fmt.Printf("const numEncodeTables = %d\n\n", len(intervals))
|
||||
fmt.Printf("// encodeX are the encoding tables from Unicode to GBK code,\n")
|
||||
fmt.Printf("// sorted by decreasing length.\n")
|
||||
for i, v := range intervals {
|
||||
fmt.Printf("// encode%d: %5d entries for runes in [%5d, %5d).\n", i, v.len(), v.low, v.high)
|
||||
}
|
||||
fmt.Printf("\n")
|
||||
|
||||
for i, v := range intervals {
|
||||
fmt.Printf("const encode%dLow, encode%dHigh = %d, %d\n\n", i, i, v.low, v.high)
|
||||
fmt.Printf("var encode%d = [...]uint16{\n", i)
|
||||
for j := v.low; j < v.high; j++ {
|
||||
x := reverse[j]
|
||||
if x == 0 {
|
||||
continue
|
||||
}
|
||||
fmt.Printf("\t%d-%d: 0x%04X,\n", j, v.low, x)
|
||||
}
|
||||
fmt.Printf("}\n\n")
|
||||
}
|
||||
}
|
||||
|
||||
// interval is a half-open interval [low, high).
|
||||
type interval struct {
|
||||
low, high int
|
||||
}
|
||||
|
||||
func (i interval) len() int { return i.high - i.low }
|
||||
|
||||
// byDecreasingLength sorts intervals by decreasing length.
|
||||
type byDecreasingLength []interval
|
||||
|
||||
func (b byDecreasingLength) Len() int { return len(b) }
|
||||
func (b byDecreasingLength) Less(i, j int) bool { return b[i].len() > b[j].len() }
|
||||
func (b byDecreasingLength) Swap(i, j int) { b[i], b[j] = b[j], b[i] }
|
||||
43999
vendor/golang.org/x/text/encoding/simplifiedchinese/tables.go
generated
vendored
Normal file
43999
vendor/golang.org/x/text/encoding/simplifiedchinese/tables.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load Diff
199
vendor/golang.org/x/text/encoding/traditionalchinese/big5.go
generated
vendored
Normal file
199
vendor/golang.org/x/text/encoding/traditionalchinese/big5.go
generated
vendored
Normal file
@@ -0,0 +1,199 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package traditionalchinese
|
||||
|
||||
import (
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/text/encoding"
|
||||
"golang.org/x/text/encoding/internal"
|
||||
"golang.org/x/text/encoding/internal/identifier"
|
||||
"golang.org/x/text/transform"
|
||||
)
|
||||
|
||||
// All is a list of all defined encodings in this package.
|
||||
var All = []encoding.Encoding{Big5}
|
||||
|
||||
// Big5 is the Big5 encoding, also known as Code Page 950.
|
||||
var Big5 encoding.Encoding = &big5
|
||||
|
||||
var big5 = internal.Encoding{
|
||||
&internal.SimpleEncoding{big5Decoder{}, big5Encoder{}},
|
||||
"Big5",
|
||||
identifier.Big5,
|
||||
}
|
||||
|
||||
type big5Decoder struct{ transform.NopResetter }
|
||||
|
||||
func (big5Decoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
r, size, s := rune(0), 0, ""
|
||||
loop:
|
||||
for ; nSrc < len(src); nSrc += size {
|
||||
switch c0 := src[nSrc]; {
|
||||
case c0 < utf8.RuneSelf:
|
||||
r, size = rune(c0), 1
|
||||
|
||||
case 0x81 <= c0 && c0 < 0xff:
|
||||
if nSrc+1 >= len(src) {
|
||||
if !atEOF {
|
||||
err = transform.ErrShortSrc
|
||||
break loop
|
||||
}
|
||||
r, size = utf8.RuneError, 1
|
||||
goto write
|
||||
}
|
||||
c1 := src[nSrc+1]
|
||||
switch {
|
||||
case 0x40 <= c1 && c1 < 0x7f:
|
||||
c1 -= 0x40
|
||||
case 0xa1 <= c1 && c1 < 0xff:
|
||||
c1 -= 0x62
|
||||
case c1 < 0x40:
|
||||
r, size = utf8.RuneError, 1
|
||||
goto write
|
||||
default:
|
||||
r, size = utf8.RuneError, 2
|
||||
goto write
|
||||
}
|
||||
r, size = '\ufffd', 2
|
||||
if i := int(c0-0x81)*157 + int(c1); i < len(decode) {
|
||||
if 1133 <= i && i < 1167 {
|
||||
// The two-rune special cases for LATIN CAPITAL / SMALL E WITH CIRCUMFLEX
|
||||
// AND MACRON / CARON are from http://encoding.spec.whatwg.org/#big5
|
||||
switch i {
|
||||
case 1133:
|
||||
s = "\u00CA\u0304"
|
||||
goto writeStr
|
||||
case 1135:
|
||||
s = "\u00CA\u030C"
|
||||
goto writeStr
|
||||
case 1164:
|
||||
s = "\u00EA\u0304"
|
||||
goto writeStr
|
||||
case 1166:
|
||||
s = "\u00EA\u030C"
|
||||
goto writeStr
|
||||
}
|
||||
}
|
||||
r = rune(decode[i])
|
||||
if r == 0 {
|
||||
r = '\ufffd'
|
||||
}
|
||||
}
|
||||
|
||||
default:
|
||||
r, size = utf8.RuneError, 1
|
||||
}
|
||||
|
||||
write:
|
||||
if nDst+utf8.RuneLen(r) > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break loop
|
||||
}
|
||||
nDst += utf8.EncodeRune(dst[nDst:], r)
|
||||
continue loop
|
||||
|
||||
writeStr:
|
||||
if nDst+len(s) > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break loop
|
||||
}
|
||||
nDst += copy(dst[nDst:], s)
|
||||
continue loop
|
||||
}
|
||||
return nDst, nSrc, err
|
||||
}
|
||||
|
||||
type big5Encoder struct{ transform.NopResetter }
|
||||
|
||||
func (big5Encoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
r, size := rune(0), 0
|
||||
for ; nSrc < len(src); nSrc += size {
|
||||
r = rune(src[nSrc])
|
||||
|
||||
// Decode a 1-byte rune.
|
||||
if r < utf8.RuneSelf {
|
||||
size = 1
|
||||
if nDst >= len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
dst[nDst] = uint8(r)
|
||||
nDst++
|
||||
continue
|
||||
|
||||
} else {
|
||||
// Decode a multi-byte rune.
|
||||
r, size = utf8.DecodeRune(src[nSrc:])
|
||||
if size == 1 {
|
||||
// All valid runes of size 1 (those below utf8.RuneSelf) were
|
||||
// handled above. We have invalid UTF-8 or we haven't seen the
|
||||
// full character yet.
|
||||
if !atEOF && !utf8.FullRune(src[nSrc:]) {
|
||||
err = transform.ErrShortSrc
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if r >= utf8.RuneSelf {
|
||||
// func init checks that the switch covers all tables.
|
||||
switch {
|
||||
case encode0Low <= r && r < encode0High:
|
||||
if r = rune(encode0[r-encode0Low]); r != 0 {
|
||||
goto write2
|
||||
}
|
||||
case encode1Low <= r && r < encode1High:
|
||||
if r = rune(encode1[r-encode1Low]); r != 0 {
|
||||
goto write2
|
||||
}
|
||||
case encode2Low <= r && r < encode2High:
|
||||
if r = rune(encode2[r-encode2Low]); r != 0 {
|
||||
goto write2
|
||||
}
|
||||
case encode3Low <= r && r < encode3High:
|
||||
if r = rune(encode3[r-encode3Low]); r != 0 {
|
||||
goto write2
|
||||
}
|
||||
case encode4Low <= r && r < encode4High:
|
||||
if r = rune(encode4[r-encode4Low]); r != 0 {
|
||||
goto write2
|
||||
}
|
||||
case encode5Low <= r && r < encode5High:
|
||||
if r = rune(encode5[r-encode5Low]); r != 0 {
|
||||
goto write2
|
||||
}
|
||||
case encode6Low <= r && r < encode6High:
|
||||
if r = rune(encode6[r-encode6Low]); r != 0 {
|
||||
goto write2
|
||||
}
|
||||
case encode7Low <= r && r < encode7High:
|
||||
if r = rune(encode7[r-encode7Low]); r != 0 {
|
||||
goto write2
|
||||
}
|
||||
}
|
||||
err = internal.ErrASCIIReplacement
|
||||
break
|
||||
}
|
||||
|
||||
write2:
|
||||
if nDst+2 > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
dst[nDst+0] = uint8(r >> 8)
|
||||
dst[nDst+1] = uint8(r)
|
||||
nDst += 2
|
||||
continue
|
||||
}
|
||||
return nDst, nSrc, err
|
||||
}
|
||||
|
||||
func init() {
|
||||
// Check that the hard-coded encode switch covers all tables.
|
||||
if numEncodeTables != 8 {
|
||||
panic("bad numEncodeTables")
|
||||
}
|
||||
}
|
||||
140
vendor/golang.org/x/text/encoding/traditionalchinese/maketables.go
generated
vendored
Normal file
140
vendor/golang.org/x/text/encoding/traditionalchinese/maketables.go
generated
vendored
Normal file
@@ -0,0 +1,140 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
package main
|
||||
|
||||
// This program generates tables.go:
|
||||
// go run maketables.go | gofmt > tables.go
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"log"
|
||||
"net/http"
|
||||
"sort"
|
||||
"strings"
|
||||
)
|
||||
|
||||
func main() {
|
||||
fmt.Printf("// generated by go run maketables.go; DO NOT EDIT\n\n")
|
||||
fmt.Printf("// Package traditionalchinese provides Traditional Chinese encodings such as Big5.\n")
|
||||
fmt.Printf(`package traditionalchinese // import "golang.org/x/text/encoding/traditionalchinese"` + "\n\n")
|
||||
|
||||
res, err := http.Get("http://encoding.spec.whatwg.org/index-big5.txt")
|
||||
if err != nil {
|
||||
log.Fatalf("Get: %v", err)
|
||||
}
|
||||
defer res.Body.Close()
|
||||
|
||||
mapping := [65536]uint32{}
|
||||
reverse := [65536 * 4]uint16{}
|
||||
|
||||
scanner := bufio.NewScanner(res.Body)
|
||||
for scanner.Scan() {
|
||||
s := strings.TrimSpace(scanner.Text())
|
||||
if s == "" || s[0] == '#' {
|
||||
continue
|
||||
}
|
||||
x, y := uint16(0), uint32(0)
|
||||
if _, err := fmt.Sscanf(s, "%d 0x%x", &x, &y); err != nil {
|
||||
log.Fatalf("could not parse %q", s)
|
||||
}
|
||||
if x < 0 || 126*157 <= x {
|
||||
log.Fatalf("Big5 code %d is out of range", x)
|
||||
}
|
||||
mapping[x] = y
|
||||
|
||||
// The WHATWG spec http://encoding.spec.whatwg.org/#indexes says that
|
||||
// "The index pointer for code point in index is the first pointer
|
||||
// corresponding to code point in index", which would normally mean
|
||||
// that the code below should be guarded by "if reverse[y] == 0", but
|
||||
// last instead of first seems to match the behavior of
|
||||
// "iconv -f UTF-8 -t BIG5". For example, U+8005 者 occurs twice in
|
||||
// http://encoding.spec.whatwg.org/index-big5.txt, as index 2148
|
||||
// (encoded as "\x8e\xcd") and index 6543 (encoded as "\xaa\xcc")
|
||||
// and "echo 者 | iconv -f UTF-8 -t BIG5 | xxd" gives "\xaa\xcc".
|
||||
c0, c1 := x/157, x%157
|
||||
if c1 < 0x3f {
|
||||
c1 += 0x40
|
||||
} else {
|
||||
c1 += 0x62
|
||||
}
|
||||
reverse[y] = (0x81+c0)<<8 | c1
|
||||
}
|
||||
if err := scanner.Err(); err != nil {
|
||||
log.Fatalf("scanner error: %v", err)
|
||||
}
|
||||
|
||||
fmt.Printf("// decode is the decoding table from Big5 code to Unicode.\n")
|
||||
fmt.Printf("// It is defined at http://encoding.spec.whatwg.org/index-big5.txt\n")
|
||||
fmt.Printf("var decode = [...]uint32{\n")
|
||||
for i, v := range mapping {
|
||||
if v != 0 {
|
||||
fmt.Printf("\t%d: 0x%08X,\n", i, v)
|
||||
}
|
||||
}
|
||||
fmt.Printf("}\n\n")
|
||||
|
||||
// Any run of at least separation continuous zero entries in the reverse map will
|
||||
// be a separate encode table.
|
||||
const separation = 1024
|
||||
|
||||
intervals := []interval(nil)
|
||||
low, high := -1, -1
|
||||
for i, v := range reverse {
|
||||
if v == 0 {
|
||||
continue
|
||||
}
|
||||
if low < 0 {
|
||||
low = i
|
||||
} else if i-high >= separation {
|
||||
if high >= 0 {
|
||||
intervals = append(intervals, interval{low, high})
|
||||
}
|
||||
low = i
|
||||
}
|
||||
high = i + 1
|
||||
}
|
||||
if high >= 0 {
|
||||
intervals = append(intervals, interval{low, high})
|
||||
}
|
||||
sort.Sort(byDecreasingLength(intervals))
|
||||
|
||||
fmt.Printf("const numEncodeTables = %d\n\n", len(intervals))
|
||||
fmt.Printf("// encodeX are the encoding tables from Unicode to Big5 code,\n")
|
||||
fmt.Printf("// sorted by decreasing length.\n")
|
||||
for i, v := range intervals {
|
||||
fmt.Printf("// encode%d: %5d entries for runes in [%6d, %6d).\n", i, v.len(), v.low, v.high)
|
||||
}
|
||||
fmt.Printf("\n")
|
||||
|
||||
for i, v := range intervals {
|
||||
fmt.Printf("const encode%dLow, encode%dHigh = %d, %d\n\n", i, i, v.low, v.high)
|
||||
fmt.Printf("var encode%d = [...]uint16{\n", i)
|
||||
for j := v.low; j < v.high; j++ {
|
||||
x := reverse[j]
|
||||
if x == 0 {
|
||||
continue
|
||||
}
|
||||
fmt.Printf("\t%d-%d: 0x%04X,\n", j, v.low, x)
|
||||
}
|
||||
fmt.Printf("}\n\n")
|
||||
}
|
||||
}
|
||||
|
||||
// interval is a half-open interval [low, high).
|
||||
type interval struct {
|
||||
low, high int
|
||||
}
|
||||
|
||||
func (i interval) len() int { return i.high - i.low }
|
||||
|
||||
// byDecreasingLength sorts intervals by decreasing length.
|
||||
type byDecreasingLength []interval
|
||||
|
||||
func (b byDecreasingLength) Len() int { return len(b) }
|
||||
func (b byDecreasingLength) Less(i, j int) bool { return b[i].len() > b[j].len() }
|
||||
func (b byDecreasingLength) Swap(i, j int) { b[i], b[j] = b[j], b[i] }
|
||||
37142
vendor/golang.org/x/text/encoding/traditionalchinese/tables.go
generated
vendored
Normal file
37142
vendor/golang.org/x/text/encoding/traditionalchinese/tables.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load Diff
82
vendor/golang.org/x/text/encoding/unicode/override.go
generated
vendored
Normal file
82
vendor/golang.org/x/text/encoding/unicode/override.go
generated
vendored
Normal file
@@ -0,0 +1,82 @@
|
||||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package unicode
|
||||
|
||||
import (
|
||||
"golang.org/x/text/transform"
|
||||
)
|
||||
|
||||
// BOMOverride returns a new decoder transformer that is identical to fallback,
|
||||
// except that the presence of a Byte Order Mark at the start of the input
|
||||
// causes it to switch to the corresponding Unicode decoding. It will only
|
||||
// consider BOMs for UTF-8, UTF-16BE, and UTF-16LE.
|
||||
//
|
||||
// This differs from using ExpectBOM by allowing a BOM to switch to UTF-8, not
|
||||
// just UTF-16 variants, and allowing falling back to any encoding scheme.
|
||||
//
|
||||
// This technique is recommended by the W3C for use in HTML 5: "For
|
||||
// compatibility with deployed content, the byte order mark (also known as BOM)
|
||||
// is considered more authoritative than anything else."
|
||||
// http://www.w3.org/TR/encoding/#specification-hooks
|
||||
//
|
||||
// Using BOMOverride is mostly intended for use cases where the first characters
|
||||
// of a fallback encoding are known to not be a BOM, for example, for valid HTML
|
||||
// and most encodings.
|
||||
func BOMOverride(fallback transform.Transformer) transform.Transformer {
|
||||
// TODO: possibly allow a variadic argument of unicode encodings to allow
|
||||
// specifying details of which fallbacks are supported as well as
|
||||
// specifying the details of the implementations. This would also allow for
|
||||
// support for UTF-32, which should not be supported by default.
|
||||
return &bomOverride{fallback: fallback}
|
||||
}
|
||||
|
||||
type bomOverride struct {
|
||||
fallback transform.Transformer
|
||||
current transform.Transformer
|
||||
}
|
||||
|
||||
func (d *bomOverride) Reset() {
|
||||
d.current = nil
|
||||
d.fallback.Reset()
|
||||
}
|
||||
|
||||
var (
|
||||
// TODO: we could use decode functions here, instead of allocating a new
|
||||
// decoder on every NewDecoder as IgnoreBOM decoders can be stateless.
|
||||
utf16le = UTF16(LittleEndian, IgnoreBOM)
|
||||
utf16be = UTF16(BigEndian, IgnoreBOM)
|
||||
)
|
||||
|
||||
const utf8BOM = "\ufeff"
|
||||
|
||||
func (d *bomOverride) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
if d.current != nil {
|
||||
return d.current.Transform(dst, src, atEOF)
|
||||
}
|
||||
if len(src) < 3 && !atEOF {
|
||||
return 0, 0, transform.ErrShortSrc
|
||||
}
|
||||
d.current = d.fallback
|
||||
bomSize := 0
|
||||
if len(src) >= 2 {
|
||||
if src[0] == 0xFF && src[1] == 0xFE {
|
||||
d.current = utf16le.NewDecoder()
|
||||
bomSize = 2
|
||||
} else if src[0] == 0xFE && src[1] == 0xFF {
|
||||
d.current = utf16be.NewDecoder()
|
||||
bomSize = 2
|
||||
} else if len(src) >= 3 &&
|
||||
src[0] == utf8BOM[0] &&
|
||||
src[1] == utf8BOM[1] &&
|
||||
src[2] == utf8BOM[2] {
|
||||
d.current = transform.Nop
|
||||
bomSize = 3
|
||||
}
|
||||
}
|
||||
if bomSize < len(src) {
|
||||
nDst, nSrc, err = d.current.Transform(dst, src[bomSize:], atEOF)
|
||||
}
|
||||
return nDst, nSrc + bomSize, err
|
||||
}
|
||||
434
vendor/golang.org/x/text/encoding/unicode/unicode.go
generated
vendored
Normal file
434
vendor/golang.org/x/text/encoding/unicode/unicode.go
generated
vendored
Normal file
@@ -0,0 +1,434 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package unicode provides Unicode encodings such as UTF-16.
|
||||
package unicode // import "golang.org/x/text/encoding/unicode"
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"unicode/utf16"
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/text/encoding"
|
||||
"golang.org/x/text/encoding/internal"
|
||||
"golang.org/x/text/encoding/internal/identifier"
|
||||
"golang.org/x/text/internal/utf8internal"
|
||||
"golang.org/x/text/runes"
|
||||
"golang.org/x/text/transform"
|
||||
)
|
||||
|
||||
// TODO: I think the Transformers really should return errors on unmatched
|
||||
// surrogate pairs and odd numbers of bytes. This is not required by RFC 2781,
|
||||
// which leaves it open, but is suggested by WhatWG. It will allow for all error
|
||||
// modes as defined by WhatWG: fatal, HTML and Replacement. This would require
|
||||
// the introduction of some kind of error type for conveying the erroneous code
|
||||
// point.
|
||||
|
||||
// UTF8 is the UTF-8 encoding.
|
||||
var UTF8 encoding.Encoding = utf8enc
|
||||
|
||||
var utf8enc = &internal.Encoding{
|
||||
&internal.SimpleEncoding{utf8Decoder{}, runes.ReplaceIllFormed()},
|
||||
"UTF-8",
|
||||
identifier.UTF8,
|
||||
}
|
||||
|
||||
type utf8Decoder struct{ transform.NopResetter }
|
||||
|
||||
func (utf8Decoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
var pSrc int // point from which to start copy in src
|
||||
var accept utf8internal.AcceptRange
|
||||
|
||||
// The decoder can only make the input larger, not smaller.
|
||||
n := len(src)
|
||||
if len(dst) < n {
|
||||
err = transform.ErrShortDst
|
||||
n = len(dst)
|
||||
atEOF = false
|
||||
}
|
||||
for nSrc < n {
|
||||
c := src[nSrc]
|
||||
if c < utf8.RuneSelf {
|
||||
nSrc++
|
||||
continue
|
||||
}
|
||||
first := utf8internal.First[c]
|
||||
size := int(first & utf8internal.SizeMask)
|
||||
if first == utf8internal.FirstInvalid {
|
||||
goto handleInvalid // invalid starter byte
|
||||
}
|
||||
accept = utf8internal.AcceptRanges[first>>utf8internal.AcceptShift]
|
||||
if nSrc+size > n {
|
||||
if !atEOF {
|
||||
// We may stop earlier than necessary here if the short sequence
|
||||
// has invalid bytes. Not checking for this simplifies the code
|
||||
// and may avoid duplicate computations in certain conditions.
|
||||
if err == nil {
|
||||
err = transform.ErrShortSrc
|
||||
}
|
||||
break
|
||||
}
|
||||
// Determine the maximal subpart of an ill-formed subsequence.
|
||||
switch {
|
||||
case nSrc+1 >= n || src[nSrc+1] < accept.Lo || accept.Hi < src[nSrc+1]:
|
||||
size = 1
|
||||
case nSrc+2 >= n || src[nSrc+2] < utf8internal.LoCB || utf8internal.HiCB < src[nSrc+2]:
|
||||
size = 2
|
||||
default:
|
||||
size = 3 // As we are short, the maximum is 3.
|
||||
}
|
||||
goto handleInvalid
|
||||
}
|
||||
if c = src[nSrc+1]; c < accept.Lo || accept.Hi < c {
|
||||
size = 1
|
||||
goto handleInvalid // invalid continuation byte
|
||||
} else if size == 2 {
|
||||
} else if c = src[nSrc+2]; c < utf8internal.LoCB || utf8internal.HiCB < c {
|
||||
size = 2
|
||||
goto handleInvalid // invalid continuation byte
|
||||
} else if size == 3 {
|
||||
} else if c = src[nSrc+3]; c < utf8internal.LoCB || utf8internal.HiCB < c {
|
||||
size = 3
|
||||
goto handleInvalid // invalid continuation byte
|
||||
}
|
||||
nSrc += size
|
||||
continue
|
||||
|
||||
handleInvalid:
|
||||
// Copy the scanned input so far.
|
||||
nDst += copy(dst[nDst:], src[pSrc:nSrc])
|
||||
|
||||
// Append RuneError to the destination.
|
||||
const runeError = "\ufffd"
|
||||
if nDst+len(runeError) > len(dst) {
|
||||
return nDst, nSrc, transform.ErrShortDst
|
||||
}
|
||||
nDst += copy(dst[nDst:], runeError)
|
||||
|
||||
// Skip the maximal subpart of an ill-formed subsequence according to
|
||||
// the W3C standard way instead of the Go way. This Transform is
|
||||
// probably the only place in the text repo where it is warranted.
|
||||
nSrc += size
|
||||
pSrc = nSrc
|
||||
|
||||
// Recompute the maximum source length.
|
||||
if sz := len(dst) - nDst; sz < len(src)-nSrc {
|
||||
err = transform.ErrShortDst
|
||||
n = nSrc + sz
|
||||
atEOF = false
|
||||
}
|
||||
}
|
||||
return nDst + copy(dst[nDst:], src[pSrc:nSrc]), nSrc, err
|
||||
}
|
||||
|
||||
// UTF16 returns a UTF-16 Encoding for the given default endianness and byte
|
||||
// order mark (BOM) policy.
|
||||
//
|
||||
// When decoding from UTF-16 to UTF-8, if the BOMPolicy is IgnoreBOM then
|
||||
// neither BOMs U+FEFF nor noncharacters U+FFFE in the input stream will affect
|
||||
// the endianness used for decoding, and will instead be output as their
|
||||
// standard UTF-8 encodings: "\xef\xbb\xbf" and "\xef\xbf\xbe". If the BOMPolicy
|
||||
// is UseBOM or ExpectBOM a staring BOM is not written to the UTF-8 output.
|
||||
// Instead, it overrides the default endianness e for the remainder of the
|
||||
// transformation. Any subsequent BOMs U+FEFF or noncharacters U+FFFE will not
|
||||
// affect the endianness used, and will instead be output as their standard
|
||||
// UTF-8 encodings. For UseBOM, if there is no starting BOM, it will proceed
|
||||
// with the default Endianness. For ExpectBOM, in that case, the transformation
|
||||
// will return early with an ErrMissingBOM error.
|
||||
//
|
||||
// When encoding from UTF-8 to UTF-16, a BOM will be inserted at the start of
|
||||
// the output if the BOMPolicy is UseBOM or ExpectBOM. Otherwise, a BOM will not
|
||||
// be inserted. The UTF-8 input does not need to contain a BOM.
|
||||
//
|
||||
// There is no concept of a 'native' endianness. If the UTF-16 data is produced
|
||||
// and consumed in a greater context that implies a certain endianness, use
|
||||
// IgnoreBOM. Otherwise, use ExpectBOM and always produce and consume a BOM.
|
||||
//
|
||||
// In the language of https://www.unicode.org/faq/utf_bom.html#bom10, IgnoreBOM
|
||||
// corresponds to "Where the precise type of the data stream is known... the
|
||||
// BOM should not be used" and ExpectBOM corresponds to "A particular
|
||||
// protocol... may require use of the BOM".
|
||||
func UTF16(e Endianness, b BOMPolicy) encoding.Encoding {
|
||||
return utf16Encoding{config{e, b}, mibValue[e][b&bomMask]}
|
||||
}
|
||||
|
||||
// mibValue maps Endianness and BOMPolicy settings to MIB constants. Note that
|
||||
// some configurations map to the same MIB identifier. RFC 2781 has requirements
|
||||
// and recommendations. Some of the "configurations" are merely recommendations,
|
||||
// so multiple configurations could match.
|
||||
var mibValue = map[Endianness][numBOMValues]identifier.MIB{
|
||||
BigEndian: [numBOMValues]identifier.MIB{
|
||||
IgnoreBOM: identifier.UTF16BE,
|
||||
UseBOM: identifier.UTF16, // BigEnding default is preferred by RFC 2781.
|
||||
// TODO: acceptBOM | strictBOM would map to UTF16BE as well.
|
||||
},
|
||||
LittleEndian: [numBOMValues]identifier.MIB{
|
||||
IgnoreBOM: identifier.UTF16LE,
|
||||
UseBOM: identifier.UTF16, // LittleEndian default is allowed and preferred on Windows.
|
||||
// TODO: acceptBOM | strictBOM would map to UTF16LE as well.
|
||||
},
|
||||
// ExpectBOM is not widely used and has no valid MIB identifier.
|
||||
}
|
||||
|
||||
// All lists a configuration for each IANA-defined UTF-16 variant.
|
||||
var All = []encoding.Encoding{
|
||||
UTF8,
|
||||
UTF16(BigEndian, UseBOM),
|
||||
UTF16(BigEndian, IgnoreBOM),
|
||||
UTF16(LittleEndian, IgnoreBOM),
|
||||
}
|
||||
|
||||
// BOMPolicy is a UTF-16 encoding's byte order mark policy.
|
||||
type BOMPolicy uint8
|
||||
|
||||
const (
|
||||
writeBOM BOMPolicy = 0x01
|
||||
acceptBOM BOMPolicy = 0x02
|
||||
requireBOM BOMPolicy = 0x04
|
||||
bomMask BOMPolicy = 0x07
|
||||
|
||||
// HACK: numBOMValues == 8 triggers a bug in the 1.4 compiler (cannot have a
|
||||
// map of an array of length 8 of a type that is also used as a key or value
|
||||
// in another map). See golang.org/issue/11354.
|
||||
// TODO: consider changing this value back to 8 if the use of 1.4.* has
|
||||
// been minimized.
|
||||
numBOMValues = 8 + 1
|
||||
|
||||
// IgnoreBOM means to ignore any byte order marks.
|
||||
IgnoreBOM BOMPolicy = 0
|
||||
// Common and RFC 2781-compliant interpretation for UTF-16BE/LE.
|
||||
|
||||
// UseBOM means that the UTF-16 form may start with a byte order mark, which
|
||||
// will be used to override the default encoding.
|
||||
UseBOM BOMPolicy = writeBOM | acceptBOM
|
||||
// Common and RFC 2781-compliant interpretation for UTF-16.
|
||||
|
||||
// ExpectBOM means that the UTF-16 form must start with a byte order mark,
|
||||
// which will be used to override the default encoding.
|
||||
ExpectBOM BOMPolicy = writeBOM | acceptBOM | requireBOM
|
||||
// Used in Java as Unicode (not to be confused with Java's UTF-16) and
|
||||
// ICU's UTF-16,version=1. Not compliant with RFC 2781.
|
||||
|
||||
// TODO (maybe): strictBOM: BOM must match Endianness. This would allow:
|
||||
// - UTF-16(B|L)E,version=1: writeBOM | acceptBOM | requireBOM | strictBOM
|
||||
// (UnicodeBig and UnicodeLittle in Java)
|
||||
// - RFC 2781-compliant, but less common interpretation for UTF-16(B|L)E:
|
||||
// acceptBOM | strictBOM (e.g. assigned to CheckBOM).
|
||||
// This addition would be consistent with supporting ExpectBOM.
|
||||
)
|
||||
|
||||
// Endianness is a UTF-16 encoding's default endianness.
|
||||
type Endianness bool
|
||||
|
||||
const (
|
||||
// BigEndian is UTF-16BE.
|
||||
BigEndian Endianness = false
|
||||
// LittleEndian is UTF-16LE.
|
||||
LittleEndian Endianness = true
|
||||
)
|
||||
|
||||
// ErrMissingBOM means that decoding UTF-16 input with ExpectBOM did not find a
|
||||
// starting byte order mark.
|
||||
var ErrMissingBOM = errors.New("encoding: missing byte order mark")
|
||||
|
||||
type utf16Encoding struct {
|
||||
config
|
||||
mib identifier.MIB
|
||||
}
|
||||
|
||||
type config struct {
|
||||
endianness Endianness
|
||||
bomPolicy BOMPolicy
|
||||
}
|
||||
|
||||
func (u utf16Encoding) NewDecoder() *encoding.Decoder {
|
||||
return &encoding.Decoder{Transformer: &utf16Decoder{
|
||||
initial: u.config,
|
||||
current: u.config,
|
||||
}}
|
||||
}
|
||||
|
||||
func (u utf16Encoding) NewEncoder() *encoding.Encoder {
|
||||
return &encoding.Encoder{Transformer: &utf16Encoder{
|
||||
endianness: u.endianness,
|
||||
initialBOMPolicy: u.bomPolicy,
|
||||
currentBOMPolicy: u.bomPolicy,
|
||||
}}
|
||||
}
|
||||
|
||||
func (u utf16Encoding) ID() (mib identifier.MIB, other string) {
|
||||
return u.mib, ""
|
||||
}
|
||||
|
||||
func (u utf16Encoding) String() string {
|
||||
e, b := "B", ""
|
||||
if u.endianness == LittleEndian {
|
||||
e = "L"
|
||||
}
|
||||
switch u.bomPolicy {
|
||||
case ExpectBOM:
|
||||
b = "Expect"
|
||||
case UseBOM:
|
||||
b = "Use"
|
||||
case IgnoreBOM:
|
||||
b = "Ignore"
|
||||
}
|
||||
return "UTF-16" + e + "E (" + b + " BOM)"
|
||||
}
|
||||
|
||||
type utf16Decoder struct {
|
||||
initial config
|
||||
current config
|
||||
}
|
||||
|
||||
func (u *utf16Decoder) Reset() {
|
||||
u.current = u.initial
|
||||
}
|
||||
|
||||
func (u *utf16Decoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
if len(src) == 0 {
|
||||
if atEOF && u.current.bomPolicy&requireBOM != 0 {
|
||||
return 0, 0, ErrMissingBOM
|
||||
}
|
||||
return 0, 0, nil
|
||||
}
|
||||
if u.current.bomPolicy&acceptBOM != 0 {
|
||||
if len(src) < 2 {
|
||||
return 0, 0, transform.ErrShortSrc
|
||||
}
|
||||
switch {
|
||||
case src[0] == 0xfe && src[1] == 0xff:
|
||||
u.current.endianness = BigEndian
|
||||
nSrc = 2
|
||||
case src[0] == 0xff && src[1] == 0xfe:
|
||||
u.current.endianness = LittleEndian
|
||||
nSrc = 2
|
||||
default:
|
||||
if u.current.bomPolicy&requireBOM != 0 {
|
||||
return 0, 0, ErrMissingBOM
|
||||
}
|
||||
}
|
||||
u.current.bomPolicy = IgnoreBOM
|
||||
}
|
||||
|
||||
var r rune
|
||||
var dSize, sSize int
|
||||
for nSrc < len(src) {
|
||||
if nSrc+1 < len(src) {
|
||||
x := uint16(src[nSrc+0])<<8 | uint16(src[nSrc+1])
|
||||
if u.current.endianness == LittleEndian {
|
||||
x = x>>8 | x<<8
|
||||
}
|
||||
r, sSize = rune(x), 2
|
||||
if utf16.IsSurrogate(r) {
|
||||
if nSrc+3 < len(src) {
|
||||
x = uint16(src[nSrc+2])<<8 | uint16(src[nSrc+3])
|
||||
if u.current.endianness == LittleEndian {
|
||||
x = x>>8 | x<<8
|
||||
}
|
||||
// Save for next iteration if it is not a high surrogate.
|
||||
if isHighSurrogate(rune(x)) {
|
||||
r, sSize = utf16.DecodeRune(r, rune(x)), 4
|
||||
}
|
||||
} else if !atEOF {
|
||||
err = transform.ErrShortSrc
|
||||
break
|
||||
}
|
||||
}
|
||||
if dSize = utf8.RuneLen(r); dSize < 0 {
|
||||
r, dSize = utf8.RuneError, 3
|
||||
}
|
||||
} else if atEOF {
|
||||
// Single trailing byte.
|
||||
r, dSize, sSize = utf8.RuneError, 3, 1
|
||||
} else {
|
||||
err = transform.ErrShortSrc
|
||||
break
|
||||
}
|
||||
if nDst+dSize > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
nDst += utf8.EncodeRune(dst[nDst:], r)
|
||||
nSrc += sSize
|
||||
}
|
||||
return nDst, nSrc, err
|
||||
}
|
||||
|
||||
func isHighSurrogate(r rune) bool {
|
||||
return 0xDC00 <= r && r <= 0xDFFF
|
||||
}
|
||||
|
||||
type utf16Encoder struct {
|
||||
endianness Endianness
|
||||
initialBOMPolicy BOMPolicy
|
||||
currentBOMPolicy BOMPolicy
|
||||
}
|
||||
|
||||
func (u *utf16Encoder) Reset() {
|
||||
u.currentBOMPolicy = u.initialBOMPolicy
|
||||
}
|
||||
|
||||
func (u *utf16Encoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
if u.currentBOMPolicy&writeBOM != 0 {
|
||||
if len(dst) < 2 {
|
||||
return 0, 0, transform.ErrShortDst
|
||||
}
|
||||
dst[0], dst[1] = 0xfe, 0xff
|
||||
u.currentBOMPolicy = IgnoreBOM
|
||||
nDst = 2
|
||||
}
|
||||
|
||||
r, size := rune(0), 0
|
||||
for nSrc < len(src) {
|
||||
r = rune(src[nSrc])
|
||||
|
||||
// Decode a 1-byte rune.
|
||||
if r < utf8.RuneSelf {
|
||||
size = 1
|
||||
|
||||
} else {
|
||||
// Decode a multi-byte rune.
|
||||
r, size = utf8.DecodeRune(src[nSrc:])
|
||||
if size == 1 {
|
||||
// All valid runes of size 1 (those below utf8.RuneSelf) were
|
||||
// handled above. We have invalid UTF-8 or we haven't seen the
|
||||
// full character yet.
|
||||
if !atEOF && !utf8.FullRune(src[nSrc:]) {
|
||||
err = transform.ErrShortSrc
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if r <= 0xffff {
|
||||
if nDst+2 > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
dst[nDst+0] = uint8(r >> 8)
|
||||
dst[nDst+1] = uint8(r)
|
||||
nDst += 2
|
||||
} else {
|
||||
if nDst+4 > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
r1, r2 := utf16.EncodeRune(r)
|
||||
dst[nDst+0] = uint8(r1 >> 8)
|
||||
dst[nDst+1] = uint8(r1)
|
||||
dst[nDst+2] = uint8(r2 >> 8)
|
||||
dst[nDst+3] = uint8(r2)
|
||||
nDst += 4
|
||||
}
|
||||
nSrc += size
|
||||
}
|
||||
|
||||
if u.endianness == LittleEndian {
|
||||
for i := 0; i < nDst; i += 2 {
|
||||
dst[i], dst[i+1] = dst[i+1], dst[i]
|
||||
}
|
||||
}
|
||||
return nDst, nSrc, err
|
||||
}
|
||||
16
vendor/golang.org/x/text/internal/language/common.go
generated
vendored
Normal file
16
vendor/golang.org/x/text/internal/language/common.go
generated
vendored
Normal file
@@ -0,0 +1,16 @@
|
||||
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
|
||||
|
||||
package language
|
||||
|
||||
// This file contains code common to the maketables.go and the package code.
|
||||
|
||||
// AliasType is the type of an alias in AliasMap.
|
||||
type AliasType int8
|
||||
|
||||
const (
|
||||
Deprecated AliasType = iota
|
||||
Macro
|
||||
Legacy
|
||||
|
||||
AliasTypeUnknown AliasType = -1
|
||||
)
|
||||
29
vendor/golang.org/x/text/internal/language/compact.go
generated
vendored
Normal file
29
vendor/golang.org/x/text/internal/language/compact.go
generated
vendored
Normal file
@@ -0,0 +1,29 @@
|
||||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package language
|
||||
|
||||
// CompactCoreInfo is a compact integer with the three core tags encoded.
|
||||
type CompactCoreInfo uint32
|
||||
|
||||
// GetCompactCore generates a uint32 value that is guaranteed to be unique for
|
||||
// different language, region, and script values.
|
||||
func GetCompactCore(t Tag) (cci CompactCoreInfo, ok bool) {
|
||||
if t.LangID > langNoIndexOffset {
|
||||
return 0, false
|
||||
}
|
||||
cci |= CompactCoreInfo(t.LangID) << (8 + 12)
|
||||
cci |= CompactCoreInfo(t.ScriptID) << 12
|
||||
cci |= CompactCoreInfo(t.RegionID)
|
||||
return cci, true
|
||||
}
|
||||
|
||||
// Tag generates a tag from c.
|
||||
func (c CompactCoreInfo) Tag() Tag {
|
||||
return Tag{
|
||||
LangID: Language(c >> 20),
|
||||
RegionID: Region(c & 0x3ff),
|
||||
ScriptID: Script(c>>12) & 0xff,
|
||||
}
|
||||
}
|
||||
61
vendor/golang.org/x/text/internal/language/compact/compact.go
generated
vendored
Normal file
61
vendor/golang.org/x/text/internal/language/compact/compact.go
generated
vendored
Normal file
@@ -0,0 +1,61 @@
|
||||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package compact defines a compact representation of language tags.
|
||||
//
|
||||
// Common language tags (at least all for which locale information is defined
|
||||
// in CLDR) are assigned a unique index. Each Tag is associated with such an
|
||||
// ID for selecting language-related resources (such as translations) as well
|
||||
// as one for selecting regional defaults (currency, number formatting, etc.)
|
||||
//
|
||||
// It may want to export this functionality at some point, but at this point
|
||||
// this is only available for use within x/text.
|
||||
package compact // import "golang.org/x/text/internal/language/compact"
|
||||
|
||||
import (
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/text/internal/language"
|
||||
)
|
||||
|
||||
// ID is an integer identifying a single tag.
|
||||
type ID uint16
|
||||
|
||||
func getCoreIndex(t language.Tag) (id ID, ok bool) {
|
||||
cci, ok := language.GetCompactCore(t)
|
||||
if !ok {
|
||||
return 0, false
|
||||
}
|
||||
i := sort.Search(len(coreTags), func(i int) bool {
|
||||
return cci <= coreTags[i]
|
||||
})
|
||||
if i == len(coreTags) || coreTags[i] != cci {
|
||||
return 0, false
|
||||
}
|
||||
return ID(i), true
|
||||
}
|
||||
|
||||
// Parent returns the ID of the parent or the root ID if id is already the root.
|
||||
func (id ID) Parent() ID {
|
||||
return parents[id]
|
||||
}
|
||||
|
||||
// Tag converts id to an internal language Tag.
|
||||
func (id ID) Tag() language.Tag {
|
||||
if int(id) >= len(coreTags) {
|
||||
return specialTags[int(id)-len(coreTags)]
|
||||
}
|
||||
return coreTags[id].Tag()
|
||||
}
|
||||
|
||||
var specialTags []language.Tag
|
||||
|
||||
func init() {
|
||||
tags := strings.Split(specialTagsStr, " ")
|
||||
specialTags = make([]language.Tag, len(tags))
|
||||
for i, t := range tags {
|
||||
specialTags[i] = language.MustParse(t)
|
||||
}
|
||||
}
|
||||
64
vendor/golang.org/x/text/internal/language/compact/gen.go
generated
vendored
Normal file
64
vendor/golang.org/x/text/internal/language/compact/gen.go
generated
vendored
Normal file
@@ -0,0 +1,64 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
// Language tag table generator.
|
||||
// Data read from the web.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"log"
|
||||
|
||||
"golang.org/x/text/internal/gen"
|
||||
"golang.org/x/text/unicode/cldr"
|
||||
)
|
||||
|
||||
var (
|
||||
test = flag.Bool("test",
|
||||
false,
|
||||
"test existing tables; can be used to compare web data with package data.")
|
||||
outputFile = flag.String("output",
|
||||
"tables.go",
|
||||
"output file for generated tables")
|
||||
)
|
||||
|
||||
func main() {
|
||||
gen.Init()
|
||||
|
||||
w := gen.NewCodeWriter()
|
||||
defer w.WriteGoFile("tables.go", "compact")
|
||||
|
||||
fmt.Fprintln(w, `import "golang.org/x/text/internal/language"`)
|
||||
|
||||
b := newBuilder(w)
|
||||
gen.WriteCLDRVersion(w)
|
||||
|
||||
b.writeCompactIndex()
|
||||
}
|
||||
|
||||
type builder struct {
|
||||
w *gen.CodeWriter
|
||||
data *cldr.CLDR
|
||||
supp *cldr.SupplementalData
|
||||
}
|
||||
|
||||
func newBuilder(w *gen.CodeWriter) *builder {
|
||||
r := gen.OpenCLDRCoreZip()
|
||||
defer r.Close()
|
||||
d := &cldr.Decoder{}
|
||||
data, err := d.DecodeZip(r)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
b := builder{
|
||||
w: w,
|
||||
data: data,
|
||||
supp: data.Supplemental(),
|
||||
}
|
||||
return &b
|
||||
}
|
||||
113
vendor/golang.org/x/text/internal/language/compact/gen_index.go
generated
vendored
Normal file
113
vendor/golang.org/x/text/internal/language/compact/gen_index.go
generated
vendored
Normal file
@@ -0,0 +1,113 @@
|
||||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
package main
|
||||
|
||||
// This file generates derivative tables based on the language package itself.
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/text/internal/language"
|
||||
)
|
||||
|
||||
// Compact indices:
|
||||
// Note -va-X variants only apply to localization variants.
|
||||
// BCP variants only ever apply to language.
|
||||
// The only ambiguity between tags is with regions.
|
||||
|
||||
func (b *builder) writeCompactIndex() {
|
||||
// Collect all language tags for which we have any data in CLDR.
|
||||
m := map[language.Tag]bool{}
|
||||
for _, lang := range b.data.Locales() {
|
||||
// We include all locales unconditionally to be consistent with en_US.
|
||||
// We want en_US, even though it has no data associated with it.
|
||||
|
||||
// TODO: put any of the languages for which no data exists at the end
|
||||
// of the index. This allows all components based on ICU to use that
|
||||
// as the cutoff point.
|
||||
// if x := data.RawLDML(lang); false ||
|
||||
// x.LocaleDisplayNames != nil ||
|
||||
// x.Characters != nil ||
|
||||
// x.Delimiters != nil ||
|
||||
// x.Measurement != nil ||
|
||||
// x.Dates != nil ||
|
||||
// x.Numbers != nil ||
|
||||
// x.Units != nil ||
|
||||
// x.ListPatterns != nil ||
|
||||
// x.Collations != nil ||
|
||||
// x.Segmentations != nil ||
|
||||
// x.Rbnf != nil ||
|
||||
// x.Annotations != nil ||
|
||||
// x.Metadata != nil {
|
||||
|
||||
// TODO: support POSIX natively, albeit non-standard.
|
||||
tag := language.Make(strings.Replace(lang, "_POSIX", "-u-va-posix", 1))
|
||||
m[tag] = true
|
||||
// }
|
||||
}
|
||||
|
||||
// TODO: plural rules are also defined for the deprecated tags:
|
||||
// iw mo sh tl
|
||||
// Consider removing these as compact tags.
|
||||
|
||||
// Include locales for plural rules, which uses a different structure.
|
||||
for _, plurals := range b.supp.Plurals {
|
||||
for _, rules := range plurals.PluralRules {
|
||||
for _, lang := range strings.Split(rules.Locales, " ") {
|
||||
m[language.Make(lang)] = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var coreTags []language.CompactCoreInfo
|
||||
var special []string
|
||||
|
||||
for t := range m {
|
||||
if x := t.Extensions(); len(x) != 0 && fmt.Sprint(x) != "[u-va-posix]" {
|
||||
log.Fatalf("Unexpected extension %v in %v", x, t)
|
||||
}
|
||||
if len(t.Variants()) == 0 && len(t.Extensions()) == 0 {
|
||||
cci, ok := language.GetCompactCore(t)
|
||||
if !ok {
|
||||
log.Fatalf("Locale for non-basic language %q", t)
|
||||
}
|
||||
coreTags = append(coreTags, cci)
|
||||
} else {
|
||||
special = append(special, t.String())
|
||||
}
|
||||
}
|
||||
|
||||
w := b.w
|
||||
|
||||
sort.Slice(coreTags, func(i, j int) bool { return coreTags[i] < coreTags[j] })
|
||||
sort.Strings(special)
|
||||
|
||||
w.WriteComment(`
|
||||
NumCompactTags is the number of common tags. The maximum tag is
|
||||
NumCompactTags-1.`)
|
||||
w.WriteConst("NumCompactTags", len(m))
|
||||
|
||||
fmt.Fprintln(w, "const (")
|
||||
for i, t := range coreTags {
|
||||
fmt.Fprintf(w, "%s ID = %d\n", ident(t.Tag().String()), i)
|
||||
}
|
||||
for i, t := range special {
|
||||
fmt.Fprintf(w, "%s ID = %d\n", ident(t), i+len(coreTags))
|
||||
}
|
||||
fmt.Fprintln(w, ")")
|
||||
|
||||
w.WriteVar("coreTags", coreTags)
|
||||
|
||||
w.WriteConst("specialTagsStr", strings.Join(special, " "))
|
||||
}
|
||||
|
||||
func ident(s string) string {
|
||||
return strings.Replace(s, "-", "", -1) + "Index"
|
||||
}
|
||||
54
vendor/golang.org/x/text/internal/language/compact/gen_parents.go
generated
vendored
Normal file
54
vendor/golang.org/x/text/internal/language/compact/gen_parents.go
generated
vendored
Normal file
@@ -0,0 +1,54 @@
|
||||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"log"
|
||||
|
||||
"golang.org/x/text/internal/gen"
|
||||
"golang.org/x/text/internal/language"
|
||||
"golang.org/x/text/internal/language/compact"
|
||||
"golang.org/x/text/unicode/cldr"
|
||||
)
|
||||
|
||||
func main() {
|
||||
r := gen.OpenCLDRCoreZip()
|
||||
defer r.Close()
|
||||
|
||||
d := &cldr.Decoder{}
|
||||
data, err := d.DecodeZip(r)
|
||||
if err != nil {
|
||||
log.Fatalf("DecodeZip: %v", err)
|
||||
}
|
||||
|
||||
w := gen.NewCodeWriter()
|
||||
defer w.WriteGoFile("parents.go", "compact")
|
||||
|
||||
// Create parents table.
|
||||
type ID uint16
|
||||
parents := make([]ID, compact.NumCompactTags)
|
||||
for _, loc := range data.Locales() {
|
||||
tag := language.MustParse(loc)
|
||||
index, ok := compact.FromTag(tag)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
parentIndex := compact.ID(0) // und
|
||||
for p := tag.Parent(); p != language.Und; p = p.Parent() {
|
||||
if x, ok := compact.FromTag(p); ok {
|
||||
parentIndex = x
|
||||
break
|
||||
}
|
||||
}
|
||||
parents[index] = ID(parentIndex)
|
||||
}
|
||||
|
||||
w.WriteComment(`
|
||||
parents maps a compact index of a tag to the compact index of the parent of
|
||||
this tag.`)
|
||||
w.WriteVar("parents", parents)
|
||||
}
|
||||
260
vendor/golang.org/x/text/internal/language/compact/language.go
generated
vendored
Normal file
260
vendor/golang.org/x/text/internal/language/compact/language.go
generated
vendored
Normal file
@@ -0,0 +1,260 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:generate go run gen.go gen_index.go -output tables.go
|
||||
//go:generate go run gen_parents.go
|
||||
|
||||
package compact
|
||||
|
||||
// TODO: Remove above NOTE after:
|
||||
// - verifying that tables are dropped correctly (most notably matcher tables).
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
"golang.org/x/text/internal/language"
|
||||
)
|
||||
|
||||
// Tag represents a BCP 47 language tag. It is used to specify an instance of a
|
||||
// specific language or locale. All language tag values are guaranteed to be
|
||||
// well-formed.
|
||||
type Tag struct {
|
||||
// NOTE: exported tags will become part of the public API.
|
||||
language ID
|
||||
locale ID
|
||||
full fullTag // always a language.Tag for now.
|
||||
}
|
||||
|
||||
const _und = 0
|
||||
|
||||
type fullTag interface {
|
||||
IsRoot() bool
|
||||
Parent() language.Tag
|
||||
}
|
||||
|
||||
// Make a compact Tag from a fully specified internal language Tag.
|
||||
func Make(t language.Tag) (tag Tag) {
|
||||
if region := t.TypeForKey("rg"); len(region) == 6 && region[2:] == "zzzz" {
|
||||
if r, err := language.ParseRegion(region[:2]); err == nil {
|
||||
tFull := t
|
||||
t, _ = t.SetTypeForKey("rg", "")
|
||||
// TODO: should we not consider "va" for the language tag?
|
||||
var exact1, exact2 bool
|
||||
tag.language, exact1 = FromTag(t)
|
||||
t.RegionID = r
|
||||
tag.locale, exact2 = FromTag(t)
|
||||
if !exact1 || !exact2 {
|
||||
tag.full = tFull
|
||||
}
|
||||
return tag
|
||||
}
|
||||
}
|
||||
lang, ok := FromTag(t)
|
||||
tag.language = lang
|
||||
tag.locale = lang
|
||||
if !ok {
|
||||
tag.full = t
|
||||
}
|
||||
return tag
|
||||
}
|
||||
|
||||
// Tag returns an internal language Tag version of this tag.
|
||||
func (t Tag) Tag() language.Tag {
|
||||
if t.full != nil {
|
||||
return t.full.(language.Tag)
|
||||
}
|
||||
tag := t.language.Tag()
|
||||
if t.language != t.locale {
|
||||
loc := t.locale.Tag()
|
||||
tag, _ = tag.SetTypeForKey("rg", strings.ToLower(loc.RegionID.String())+"zzzz")
|
||||
}
|
||||
return tag
|
||||
}
|
||||
|
||||
// IsCompact reports whether this tag is fully defined in terms of ID.
|
||||
func (t *Tag) IsCompact() bool {
|
||||
return t.full == nil
|
||||
}
|
||||
|
||||
// MayHaveVariants reports whether a tag may have variants. If it returns false
|
||||
// it is guaranteed the tag does not have variants.
|
||||
func (t Tag) MayHaveVariants() bool {
|
||||
return t.full != nil || int(t.language) >= len(coreTags)
|
||||
}
|
||||
|
||||
// MayHaveExtensions reports whether a tag may have extensions. If it returns
|
||||
// false it is guaranteed the tag does not have them.
|
||||
func (t Tag) MayHaveExtensions() bool {
|
||||
return t.full != nil ||
|
||||
int(t.language) >= len(coreTags) ||
|
||||
t.language != t.locale
|
||||
}
|
||||
|
||||
// IsRoot returns true if t is equal to language "und".
|
||||
func (t Tag) IsRoot() bool {
|
||||
if t.full != nil {
|
||||
return t.full.IsRoot()
|
||||
}
|
||||
return t.language == _und
|
||||
}
|
||||
|
||||
// Parent returns the CLDR parent of t. In CLDR, missing fields in data for a
|
||||
// specific language are substituted with fields from the parent language.
|
||||
// The parent for a language may change for newer versions of CLDR.
|
||||
func (t Tag) Parent() Tag {
|
||||
if t.full != nil {
|
||||
return Make(t.full.Parent())
|
||||
}
|
||||
if t.language != t.locale {
|
||||
// Simulate stripping -u-rg-xxxxxx
|
||||
return Tag{language: t.language, locale: t.language}
|
||||
}
|
||||
// TODO: use parent lookup table once cycle from internal package is
|
||||
// removed. Probably by internalizing the table and declaring this fast
|
||||
// enough.
|
||||
// lang := compactID(internal.Parent(uint16(t.language)))
|
||||
lang, _ := FromTag(t.language.Tag().Parent())
|
||||
return Tag{language: lang, locale: lang}
|
||||
}
|
||||
|
||||
// returns token t and the rest of the string.
|
||||
func nextToken(s string) (t, tail string) {
|
||||
p := strings.Index(s[1:], "-")
|
||||
if p == -1 {
|
||||
return s[1:], ""
|
||||
}
|
||||
p++
|
||||
return s[1:p], s[p:]
|
||||
}
|
||||
|
||||
// LanguageID returns an index, where 0 <= index < NumCompactTags, for tags
|
||||
// for which data exists in the text repository.The index will change over time
|
||||
// and should not be stored in persistent storage. If t does not match a compact
|
||||
// index, exact will be false and the compact index will be returned for the
|
||||
// first match after repeatedly taking the Parent of t.
|
||||
func LanguageID(t Tag) (id ID, exact bool) {
|
||||
return t.language, t.full == nil
|
||||
}
|
||||
|
||||
// RegionalID returns the ID for the regional variant of this tag. This index is
|
||||
// used to indicate region-specific overrides, such as default currency, default
|
||||
// calendar and week data, default time cycle, and default measurement system
|
||||
// and unit preferences.
|
||||
//
|
||||
// For instance, the tag en-GB-u-rg-uszzzz specifies British English with US
|
||||
// settings for currency, number formatting, etc. The CompactIndex for this tag
|
||||
// will be that for en-GB, while the RegionalID will be the one corresponding to
|
||||
// en-US.
|
||||
func RegionalID(t Tag) (id ID, exact bool) {
|
||||
return t.locale, t.full == nil
|
||||
}
|
||||
|
||||
// LanguageTag returns t stripped of regional variant indicators.
|
||||
//
|
||||
// At the moment this means it is stripped of a regional and variant subtag "rg"
|
||||
// and "va" in the "u" extension.
|
||||
func (t Tag) LanguageTag() Tag {
|
||||
if t.full == nil {
|
||||
return Tag{language: t.language, locale: t.language}
|
||||
}
|
||||
tt := t.Tag()
|
||||
tt.SetTypeForKey("rg", "")
|
||||
tt.SetTypeForKey("va", "")
|
||||
return Make(tt)
|
||||
}
|
||||
|
||||
// RegionalTag returns the regional variant of the tag.
|
||||
//
|
||||
// At the moment this means that the region is set from the regional subtag
|
||||
// "rg" in the "u" extension.
|
||||
func (t Tag) RegionalTag() Tag {
|
||||
rt := Tag{language: t.locale, locale: t.locale}
|
||||
if t.full == nil {
|
||||
return rt
|
||||
}
|
||||
b := language.Builder{}
|
||||
tag := t.Tag()
|
||||
// tag, _ = tag.SetTypeForKey("rg", "")
|
||||
b.SetTag(t.locale.Tag())
|
||||
if v := tag.Variants(); v != "" {
|
||||
for _, v := range strings.Split(v, "-") {
|
||||
b.AddVariant(v)
|
||||
}
|
||||
}
|
||||
for _, e := range tag.Extensions() {
|
||||
b.AddExt(e)
|
||||
}
|
||||
return t
|
||||
}
|
||||
|
||||
// FromTag reports closest matching ID for an internal language Tag.
|
||||
func FromTag(t language.Tag) (id ID, exact bool) {
|
||||
// TODO: perhaps give more frequent tags a lower index.
|
||||
// TODO: we could make the indexes stable. This will excluded some
|
||||
// possibilities for optimization, so don't do this quite yet.
|
||||
exact = true
|
||||
|
||||
b, s, r := t.Raw()
|
||||
if t.HasString() {
|
||||
if t.IsPrivateUse() {
|
||||
// We have no entries for user-defined tags.
|
||||
return 0, false
|
||||
}
|
||||
hasExtra := false
|
||||
if t.HasVariants() {
|
||||
if t.HasExtensions() {
|
||||
build := language.Builder{}
|
||||
build.SetTag(language.Tag{LangID: b, ScriptID: s, RegionID: r})
|
||||
build.AddVariant(t.Variants())
|
||||
exact = false
|
||||
t = build.Make()
|
||||
}
|
||||
hasExtra = true
|
||||
} else if _, ok := t.Extension('u'); ok {
|
||||
// TODO: va may mean something else. Consider not considering it.
|
||||
// Strip all but the 'va' entry.
|
||||
old := t
|
||||
variant := t.TypeForKey("va")
|
||||
t = language.Tag{LangID: b, ScriptID: s, RegionID: r}
|
||||
if variant != "" {
|
||||
t, _ = t.SetTypeForKey("va", variant)
|
||||
hasExtra = true
|
||||
}
|
||||
exact = old == t
|
||||
} else {
|
||||
exact = false
|
||||
}
|
||||
if hasExtra {
|
||||
// We have some variants.
|
||||
for i, s := range specialTags {
|
||||
if s == t {
|
||||
return ID(i + len(coreTags)), exact
|
||||
}
|
||||
}
|
||||
exact = false
|
||||
}
|
||||
}
|
||||
if x, ok := getCoreIndex(t); ok {
|
||||
return x, exact
|
||||
}
|
||||
exact = false
|
||||
if r != 0 && s == 0 {
|
||||
// Deal with cases where an extra script is inserted for the region.
|
||||
t, _ := t.Maximize()
|
||||
if x, ok := getCoreIndex(t); ok {
|
||||
return x, exact
|
||||
}
|
||||
}
|
||||
for t = t.Parent(); t != root; t = t.Parent() {
|
||||
// No variants specified: just compare core components.
|
||||
// The key has the form lllssrrr, where l, s, and r are nibbles for
|
||||
// respectively the langID, scriptID, and regionID.
|
||||
if x, ok := getCoreIndex(t); ok {
|
||||
return x, exact
|
||||
}
|
||||
}
|
||||
return 0, exact
|
||||
}
|
||||
|
||||
var root = language.Tag{}
|
||||
120
vendor/golang.org/x/text/internal/language/compact/parents.go
generated
vendored
Normal file
120
vendor/golang.org/x/text/internal/language/compact/parents.go
generated
vendored
Normal file
@@ -0,0 +1,120 @@
|
||||
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
|
||||
|
||||
package compact
|
||||
|
||||
// parents maps a compact index of a tag to the compact index of the parent of
|
||||
// this tag.
|
||||
var parents = []ID{ // 775 elements
|
||||
// Entry 0 - 3F
|
||||
0x0000, 0x0000, 0x0001, 0x0001, 0x0000, 0x0004, 0x0000, 0x0006,
|
||||
0x0000, 0x0008, 0x0000, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a,
|
||||
0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a,
|
||||
0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a,
|
||||
0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x000a, 0x0000,
|
||||
0x0000, 0x0028, 0x0000, 0x002a, 0x0000, 0x002c, 0x0000, 0x0000,
|
||||
0x002f, 0x002e, 0x002e, 0x0000, 0x0033, 0x0000, 0x0035, 0x0000,
|
||||
0x0037, 0x0000, 0x0039, 0x0000, 0x003b, 0x0000, 0x0000, 0x003e,
|
||||
// Entry 40 - 7F
|
||||
0x0000, 0x0040, 0x0040, 0x0000, 0x0043, 0x0043, 0x0000, 0x0046,
|
||||
0x0000, 0x0048, 0x0000, 0x0000, 0x004b, 0x004a, 0x004a, 0x0000,
|
||||
0x004f, 0x004f, 0x004f, 0x004f, 0x0000, 0x0054, 0x0054, 0x0000,
|
||||
0x0057, 0x0000, 0x0059, 0x0000, 0x005b, 0x0000, 0x005d, 0x005d,
|
||||
0x0000, 0x0060, 0x0000, 0x0062, 0x0000, 0x0064, 0x0000, 0x0066,
|
||||
0x0066, 0x0000, 0x0069, 0x0000, 0x006b, 0x006b, 0x006b, 0x006b,
|
||||
0x006b, 0x006b, 0x006b, 0x0000, 0x0073, 0x0000, 0x0075, 0x0000,
|
||||
0x0077, 0x0000, 0x0000, 0x007a, 0x0000, 0x007c, 0x0000, 0x007e,
|
||||
// Entry 80 - BF
|
||||
0x0000, 0x0080, 0x0080, 0x0000, 0x0083, 0x0083, 0x0000, 0x0086,
|
||||
0x0087, 0x0087, 0x0087, 0x0086, 0x0088, 0x0087, 0x0087, 0x0087,
|
||||
0x0086, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0088,
|
||||
0x0087, 0x0087, 0x0087, 0x0087, 0x0088, 0x0087, 0x0088, 0x0087,
|
||||
0x0087, 0x0088, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087,
|
||||
0x0087, 0x0087, 0x0087, 0x0086, 0x0087, 0x0087, 0x0087, 0x0087,
|
||||
0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087,
|
||||
0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0086, 0x0087, 0x0086,
|
||||
// Entry C0 - FF
|
||||
0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087,
|
||||
0x0088, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087,
|
||||
0x0086, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0088, 0x0087,
|
||||
0x0087, 0x0088, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087,
|
||||
0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0086, 0x0086, 0x0087,
|
||||
0x0087, 0x0086, 0x0087, 0x0087, 0x0087, 0x0087, 0x0087, 0x0000,
|
||||
0x00ef, 0x0000, 0x00f1, 0x00f2, 0x00f2, 0x00f2, 0x00f2, 0x00f2,
|
||||
0x00f2, 0x00f2, 0x00f2, 0x00f2, 0x00f1, 0x00f2, 0x00f1, 0x00f1,
|
||||
// Entry 100 - 13F
|
||||
0x00f2, 0x00f2, 0x00f1, 0x00f2, 0x00f2, 0x00f2, 0x00f2, 0x00f1,
|
||||
0x00f2, 0x00f2, 0x00f2, 0x00f2, 0x00f2, 0x00f2, 0x0000, 0x010e,
|
||||
0x0000, 0x0110, 0x0000, 0x0112, 0x0000, 0x0114, 0x0114, 0x0000,
|
||||
0x0117, 0x0117, 0x0117, 0x0117, 0x0000, 0x011c, 0x0000, 0x011e,
|
||||
0x0000, 0x0120, 0x0120, 0x0000, 0x0123, 0x0123, 0x0123, 0x0123,
|
||||
0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123,
|
||||
0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123,
|
||||
0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123,
|
||||
// Entry 140 - 17F
|
||||
0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123,
|
||||
0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123, 0x0123,
|
||||
0x0123, 0x0123, 0x0000, 0x0152, 0x0000, 0x0154, 0x0000, 0x0156,
|
||||
0x0000, 0x0158, 0x0000, 0x015a, 0x0000, 0x015c, 0x015c, 0x015c,
|
||||
0x0000, 0x0160, 0x0000, 0x0000, 0x0163, 0x0000, 0x0165, 0x0000,
|
||||
0x0167, 0x0167, 0x0167, 0x0000, 0x016b, 0x0000, 0x016d, 0x0000,
|
||||
0x016f, 0x0000, 0x0171, 0x0171, 0x0000, 0x0174, 0x0000, 0x0176,
|
||||
0x0000, 0x0178, 0x0000, 0x017a, 0x0000, 0x017c, 0x0000, 0x017e,
|
||||
// Entry 180 - 1BF
|
||||
0x0000, 0x0000, 0x0000, 0x0182, 0x0000, 0x0184, 0x0184, 0x0184,
|
||||
0x0184, 0x0000, 0x0000, 0x0000, 0x018b, 0x0000, 0x0000, 0x018e,
|
||||
0x0000, 0x0000, 0x0191, 0x0000, 0x0000, 0x0000, 0x0195, 0x0000,
|
||||
0x0197, 0x0000, 0x0000, 0x019a, 0x0000, 0x0000, 0x019d, 0x0000,
|
||||
0x019f, 0x0000, 0x01a1, 0x0000, 0x01a3, 0x0000, 0x01a5, 0x0000,
|
||||
0x01a7, 0x0000, 0x01a9, 0x0000, 0x01ab, 0x0000, 0x01ad, 0x0000,
|
||||
0x01af, 0x0000, 0x01b1, 0x01b1, 0x0000, 0x01b4, 0x0000, 0x01b6,
|
||||
0x0000, 0x01b8, 0x0000, 0x01ba, 0x0000, 0x01bc, 0x0000, 0x0000,
|
||||
// Entry 1C0 - 1FF
|
||||
0x01bf, 0x0000, 0x01c1, 0x0000, 0x01c3, 0x0000, 0x01c5, 0x0000,
|
||||
0x01c7, 0x0000, 0x01c9, 0x0000, 0x01cb, 0x01cb, 0x01cb, 0x01cb,
|
||||
0x0000, 0x01d0, 0x0000, 0x01d2, 0x01d2, 0x0000, 0x01d5, 0x0000,
|
||||
0x01d7, 0x0000, 0x01d9, 0x0000, 0x01db, 0x0000, 0x01dd, 0x0000,
|
||||
0x01df, 0x01df, 0x0000, 0x01e2, 0x0000, 0x01e4, 0x0000, 0x01e6,
|
||||
0x0000, 0x01e8, 0x0000, 0x01ea, 0x0000, 0x01ec, 0x0000, 0x01ee,
|
||||
0x0000, 0x01f0, 0x0000, 0x0000, 0x01f3, 0x0000, 0x01f5, 0x01f5,
|
||||
0x01f5, 0x0000, 0x01f9, 0x0000, 0x01fb, 0x0000, 0x01fd, 0x0000,
|
||||
// Entry 200 - 23F
|
||||
0x01ff, 0x0000, 0x0000, 0x0202, 0x0000, 0x0204, 0x0204, 0x0000,
|
||||
0x0207, 0x0000, 0x0209, 0x0209, 0x0000, 0x020c, 0x020c, 0x0000,
|
||||
0x020f, 0x020f, 0x020f, 0x020f, 0x020f, 0x020f, 0x020f, 0x0000,
|
||||
0x0217, 0x0000, 0x0219, 0x0000, 0x021b, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x0000, 0x0221, 0x0000, 0x0000, 0x0224, 0x0000, 0x0226,
|
||||
0x0226, 0x0000, 0x0229, 0x0000, 0x022b, 0x022b, 0x0000, 0x0000,
|
||||
0x022f, 0x022e, 0x022e, 0x0000, 0x0000, 0x0234, 0x0000, 0x0236,
|
||||
0x0000, 0x0238, 0x0000, 0x0244, 0x023a, 0x0244, 0x0244, 0x0244,
|
||||
// Entry 240 - 27F
|
||||
0x0244, 0x0244, 0x0244, 0x0244, 0x023a, 0x0244, 0x0244, 0x0000,
|
||||
0x0247, 0x0247, 0x0247, 0x0000, 0x024b, 0x0000, 0x024d, 0x0000,
|
||||
0x024f, 0x024f, 0x0000, 0x0252, 0x0000, 0x0254, 0x0254, 0x0254,
|
||||
0x0254, 0x0254, 0x0254, 0x0000, 0x025b, 0x0000, 0x025d, 0x0000,
|
||||
0x025f, 0x0000, 0x0261, 0x0000, 0x0263, 0x0000, 0x0265, 0x0000,
|
||||
0x0000, 0x0268, 0x0268, 0x0268, 0x0000, 0x026c, 0x0000, 0x026e,
|
||||
0x0000, 0x0270, 0x0000, 0x0000, 0x0000, 0x0274, 0x0273, 0x0273,
|
||||
0x0000, 0x0278, 0x0000, 0x027a, 0x0000, 0x027c, 0x0000, 0x0000,
|
||||
// Entry 280 - 2BF
|
||||
0x0000, 0x0000, 0x0281, 0x0000, 0x0000, 0x0284, 0x0000, 0x0286,
|
||||
0x0286, 0x0286, 0x0286, 0x0000, 0x028b, 0x028b, 0x028b, 0x0000,
|
||||
0x028f, 0x028f, 0x028f, 0x028f, 0x028f, 0x0000, 0x0295, 0x0295,
|
||||
0x0295, 0x0295, 0x0000, 0x0000, 0x0000, 0x0000, 0x029d, 0x029d,
|
||||
0x029d, 0x0000, 0x02a1, 0x02a1, 0x02a1, 0x02a1, 0x0000, 0x0000,
|
||||
0x02a7, 0x02a7, 0x02a7, 0x02a7, 0x0000, 0x02ac, 0x0000, 0x02ae,
|
||||
0x02ae, 0x0000, 0x02b1, 0x0000, 0x02b3, 0x0000, 0x02b5, 0x02b5,
|
||||
0x0000, 0x0000, 0x02b9, 0x0000, 0x0000, 0x0000, 0x02bd, 0x0000,
|
||||
// Entry 2C0 - 2FF
|
||||
0x02bf, 0x02bf, 0x0000, 0x0000, 0x02c3, 0x0000, 0x02c5, 0x0000,
|
||||
0x02c7, 0x0000, 0x02c9, 0x0000, 0x02cb, 0x0000, 0x02cd, 0x02cd,
|
||||
0x0000, 0x0000, 0x02d1, 0x0000, 0x02d3, 0x02d0, 0x02d0, 0x0000,
|
||||
0x0000, 0x02d8, 0x02d7, 0x02d7, 0x0000, 0x0000, 0x02dd, 0x0000,
|
||||
0x02df, 0x0000, 0x02e1, 0x0000, 0x0000, 0x02e4, 0x0000, 0x02e6,
|
||||
0x0000, 0x0000, 0x02e9, 0x0000, 0x02eb, 0x0000, 0x02ed, 0x0000,
|
||||
0x02ef, 0x02ef, 0x0000, 0x0000, 0x02f3, 0x02f2, 0x02f2, 0x0000,
|
||||
0x02f7, 0x0000, 0x02f9, 0x02f9, 0x02f9, 0x02f9, 0x02f9, 0x0000,
|
||||
// Entry 300 - 33F
|
||||
0x02ff, 0x0300, 0x02ff, 0x0000, 0x0303, 0x0051, 0x00e6,
|
||||
} // Size: 1574 bytes
|
||||
|
||||
// Total table size 1574 bytes (1KiB); checksum: 895AAF0B
|
||||
1015
vendor/golang.org/x/text/internal/language/compact/tables.go
generated
vendored
Normal file
1015
vendor/golang.org/x/text/internal/language/compact/tables.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load Diff
91
vendor/golang.org/x/text/internal/language/compact/tags.go
generated
vendored
Normal file
91
vendor/golang.org/x/text/internal/language/compact/tags.go
generated
vendored
Normal file
@@ -0,0 +1,91 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package compact
|
||||
|
||||
var (
|
||||
und = Tag{}
|
||||
|
||||
Und Tag = Tag{}
|
||||
|
||||
Afrikaans Tag = Tag{language: afIndex, locale: afIndex}
|
||||
Amharic Tag = Tag{language: amIndex, locale: amIndex}
|
||||
Arabic Tag = Tag{language: arIndex, locale: arIndex}
|
||||
ModernStandardArabic Tag = Tag{language: ar001Index, locale: ar001Index}
|
||||
Azerbaijani Tag = Tag{language: azIndex, locale: azIndex}
|
||||
Bulgarian Tag = Tag{language: bgIndex, locale: bgIndex}
|
||||
Bengali Tag = Tag{language: bnIndex, locale: bnIndex}
|
||||
Catalan Tag = Tag{language: caIndex, locale: caIndex}
|
||||
Czech Tag = Tag{language: csIndex, locale: csIndex}
|
||||
Danish Tag = Tag{language: daIndex, locale: daIndex}
|
||||
German Tag = Tag{language: deIndex, locale: deIndex}
|
||||
Greek Tag = Tag{language: elIndex, locale: elIndex}
|
||||
English Tag = Tag{language: enIndex, locale: enIndex}
|
||||
AmericanEnglish Tag = Tag{language: enUSIndex, locale: enUSIndex}
|
||||
BritishEnglish Tag = Tag{language: enGBIndex, locale: enGBIndex}
|
||||
Spanish Tag = Tag{language: esIndex, locale: esIndex}
|
||||
EuropeanSpanish Tag = Tag{language: esESIndex, locale: esESIndex}
|
||||
LatinAmericanSpanish Tag = Tag{language: es419Index, locale: es419Index}
|
||||
Estonian Tag = Tag{language: etIndex, locale: etIndex}
|
||||
Persian Tag = Tag{language: faIndex, locale: faIndex}
|
||||
Finnish Tag = Tag{language: fiIndex, locale: fiIndex}
|
||||
Filipino Tag = Tag{language: filIndex, locale: filIndex}
|
||||
French Tag = Tag{language: frIndex, locale: frIndex}
|
||||
CanadianFrench Tag = Tag{language: frCAIndex, locale: frCAIndex}
|
||||
Gujarati Tag = Tag{language: guIndex, locale: guIndex}
|
||||
Hebrew Tag = Tag{language: heIndex, locale: heIndex}
|
||||
Hindi Tag = Tag{language: hiIndex, locale: hiIndex}
|
||||
Croatian Tag = Tag{language: hrIndex, locale: hrIndex}
|
||||
Hungarian Tag = Tag{language: huIndex, locale: huIndex}
|
||||
Armenian Tag = Tag{language: hyIndex, locale: hyIndex}
|
||||
Indonesian Tag = Tag{language: idIndex, locale: idIndex}
|
||||
Icelandic Tag = Tag{language: isIndex, locale: isIndex}
|
||||
Italian Tag = Tag{language: itIndex, locale: itIndex}
|
||||
Japanese Tag = Tag{language: jaIndex, locale: jaIndex}
|
||||
Georgian Tag = Tag{language: kaIndex, locale: kaIndex}
|
||||
Kazakh Tag = Tag{language: kkIndex, locale: kkIndex}
|
||||
Khmer Tag = Tag{language: kmIndex, locale: kmIndex}
|
||||
Kannada Tag = Tag{language: knIndex, locale: knIndex}
|
||||
Korean Tag = Tag{language: koIndex, locale: koIndex}
|
||||
Kirghiz Tag = Tag{language: kyIndex, locale: kyIndex}
|
||||
Lao Tag = Tag{language: loIndex, locale: loIndex}
|
||||
Lithuanian Tag = Tag{language: ltIndex, locale: ltIndex}
|
||||
Latvian Tag = Tag{language: lvIndex, locale: lvIndex}
|
||||
Macedonian Tag = Tag{language: mkIndex, locale: mkIndex}
|
||||
Malayalam Tag = Tag{language: mlIndex, locale: mlIndex}
|
||||
Mongolian Tag = Tag{language: mnIndex, locale: mnIndex}
|
||||
Marathi Tag = Tag{language: mrIndex, locale: mrIndex}
|
||||
Malay Tag = Tag{language: msIndex, locale: msIndex}
|
||||
Burmese Tag = Tag{language: myIndex, locale: myIndex}
|
||||
Nepali Tag = Tag{language: neIndex, locale: neIndex}
|
||||
Dutch Tag = Tag{language: nlIndex, locale: nlIndex}
|
||||
Norwegian Tag = Tag{language: noIndex, locale: noIndex}
|
||||
Punjabi Tag = Tag{language: paIndex, locale: paIndex}
|
||||
Polish Tag = Tag{language: plIndex, locale: plIndex}
|
||||
Portuguese Tag = Tag{language: ptIndex, locale: ptIndex}
|
||||
BrazilianPortuguese Tag = Tag{language: ptBRIndex, locale: ptBRIndex}
|
||||
EuropeanPortuguese Tag = Tag{language: ptPTIndex, locale: ptPTIndex}
|
||||
Romanian Tag = Tag{language: roIndex, locale: roIndex}
|
||||
Russian Tag = Tag{language: ruIndex, locale: ruIndex}
|
||||
Sinhala Tag = Tag{language: siIndex, locale: siIndex}
|
||||
Slovak Tag = Tag{language: skIndex, locale: skIndex}
|
||||
Slovenian Tag = Tag{language: slIndex, locale: slIndex}
|
||||
Albanian Tag = Tag{language: sqIndex, locale: sqIndex}
|
||||
Serbian Tag = Tag{language: srIndex, locale: srIndex}
|
||||
SerbianLatin Tag = Tag{language: srLatnIndex, locale: srLatnIndex}
|
||||
Swedish Tag = Tag{language: svIndex, locale: svIndex}
|
||||
Swahili Tag = Tag{language: swIndex, locale: swIndex}
|
||||
Tamil Tag = Tag{language: taIndex, locale: taIndex}
|
||||
Telugu Tag = Tag{language: teIndex, locale: teIndex}
|
||||
Thai Tag = Tag{language: thIndex, locale: thIndex}
|
||||
Turkish Tag = Tag{language: trIndex, locale: trIndex}
|
||||
Ukrainian Tag = Tag{language: ukIndex, locale: ukIndex}
|
||||
Urdu Tag = Tag{language: urIndex, locale: urIndex}
|
||||
Uzbek Tag = Tag{language: uzIndex, locale: uzIndex}
|
||||
Vietnamese Tag = Tag{language: viIndex, locale: viIndex}
|
||||
Chinese Tag = Tag{language: zhIndex, locale: zhIndex}
|
||||
SimplifiedChinese Tag = Tag{language: zhHansIndex, locale: zhHansIndex}
|
||||
TraditionalChinese Tag = Tag{language: zhHantIndex, locale: zhHantIndex}
|
||||
Zulu Tag = Tag{language: zuIndex, locale: zuIndex}
|
||||
)
|
||||
167
vendor/golang.org/x/text/internal/language/compose.go
generated
vendored
Normal file
167
vendor/golang.org/x/text/internal/language/compose.go
generated
vendored
Normal file
@@ -0,0 +1,167 @@
|
||||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package language
|
||||
|
||||
import (
|
||||
"sort"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// A Builder allows constructing a Tag from individual components.
|
||||
// Its main user is Compose in the top-level language package.
|
||||
type Builder struct {
|
||||
Tag Tag
|
||||
|
||||
private string // the x extension
|
||||
variants []string
|
||||
extensions []string
|
||||
}
|
||||
|
||||
// Make returns a new Tag from the current settings.
|
||||
func (b *Builder) Make() Tag {
|
||||
t := b.Tag
|
||||
|
||||
if len(b.extensions) > 0 || len(b.variants) > 0 {
|
||||
sort.Sort(sortVariants(b.variants))
|
||||
sort.Strings(b.extensions)
|
||||
|
||||
if b.private != "" {
|
||||
b.extensions = append(b.extensions, b.private)
|
||||
}
|
||||
n := maxCoreSize + tokenLen(b.variants...) + tokenLen(b.extensions...)
|
||||
buf := make([]byte, n)
|
||||
p := t.genCoreBytes(buf)
|
||||
t.pVariant = byte(p)
|
||||
p += appendTokens(buf[p:], b.variants...)
|
||||
t.pExt = uint16(p)
|
||||
p += appendTokens(buf[p:], b.extensions...)
|
||||
t.str = string(buf[:p])
|
||||
// We may not always need to remake the string, but when or when not
|
||||
// to do so is rather tricky.
|
||||
scan := makeScanner(buf[:p])
|
||||
t, _ = parse(&scan, "")
|
||||
return t
|
||||
|
||||
} else if b.private != "" {
|
||||
t.str = b.private
|
||||
t.RemakeString()
|
||||
}
|
||||
return t
|
||||
}
|
||||
|
||||
// SetTag copies all the settings from a given Tag. Any previously set values
|
||||
// are discarded.
|
||||
func (b *Builder) SetTag(t Tag) {
|
||||
b.Tag.LangID = t.LangID
|
||||
b.Tag.RegionID = t.RegionID
|
||||
b.Tag.ScriptID = t.ScriptID
|
||||
// TODO: optimize
|
||||
b.variants = b.variants[:0]
|
||||
if variants := t.Variants(); variants != "" {
|
||||
for _, vr := range strings.Split(variants[1:], "-") {
|
||||
b.variants = append(b.variants, vr)
|
||||
}
|
||||
}
|
||||
b.extensions, b.private = b.extensions[:0], ""
|
||||
for _, e := range t.Extensions() {
|
||||
b.AddExt(e)
|
||||
}
|
||||
}
|
||||
|
||||
// AddExt adds extension e to the tag. e must be a valid extension as returned
|
||||
// by Tag.Extension. If the extension already exists, it will be discarded,
|
||||
// except for a -u extension, where non-existing key-type pairs will added.
|
||||
func (b *Builder) AddExt(e string) {
|
||||
if e[0] == 'x' {
|
||||
if b.private == "" {
|
||||
b.private = e
|
||||
}
|
||||
return
|
||||
}
|
||||
for i, s := range b.extensions {
|
||||
if s[0] == e[0] {
|
||||
if e[0] == 'u' {
|
||||
b.extensions[i] += e[1:]
|
||||
}
|
||||
return
|
||||
}
|
||||
}
|
||||
b.extensions = append(b.extensions, e)
|
||||
}
|
||||
|
||||
// SetExt sets the extension e to the tag. e must be a valid extension as
|
||||
// returned by Tag.Extension. If the extension already exists, it will be
|
||||
// overwritten, except for a -u extension, where the individual key-type pairs
|
||||
// will be set.
|
||||
func (b *Builder) SetExt(e string) {
|
||||
if e[0] == 'x' {
|
||||
b.private = e
|
||||
return
|
||||
}
|
||||
for i, s := range b.extensions {
|
||||
if s[0] == e[0] {
|
||||
if e[0] == 'u' {
|
||||
b.extensions[i] = e + s[1:]
|
||||
} else {
|
||||
b.extensions[i] = e
|
||||
}
|
||||
return
|
||||
}
|
||||
}
|
||||
b.extensions = append(b.extensions, e)
|
||||
}
|
||||
|
||||
// AddVariant adds any number of variants.
|
||||
func (b *Builder) AddVariant(v ...string) {
|
||||
for _, v := range v {
|
||||
if v != "" {
|
||||
b.variants = append(b.variants, v)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ClearVariants removes any variants previously added, including those
|
||||
// copied from a Tag in SetTag.
|
||||
func (b *Builder) ClearVariants() {
|
||||
b.variants = b.variants[:0]
|
||||
}
|
||||
|
||||
// ClearExtensions removes any extensions previously added, including those
|
||||
// copied from a Tag in SetTag.
|
||||
func (b *Builder) ClearExtensions() {
|
||||
b.private = ""
|
||||
b.extensions = b.extensions[:0]
|
||||
}
|
||||
|
||||
func tokenLen(token ...string) (n int) {
|
||||
for _, t := range token {
|
||||
n += len(t) + 1
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func appendTokens(b []byte, token ...string) int {
|
||||
p := 0
|
||||
for _, t := range token {
|
||||
b[p] = '-'
|
||||
copy(b[p+1:], t)
|
||||
p += 1 + len(t)
|
||||
}
|
||||
return p
|
||||
}
|
||||
|
||||
type sortVariants []string
|
||||
|
||||
func (s sortVariants) Len() int {
|
||||
return len(s)
|
||||
}
|
||||
|
||||
func (s sortVariants) Swap(i, j int) {
|
||||
s[j], s[i] = s[i], s[j]
|
||||
}
|
||||
|
||||
func (s sortVariants) Less(i, j int) bool {
|
||||
return variantIndex[s[i]] < variantIndex[s[j]]
|
||||
}
|
||||
28
vendor/golang.org/x/text/internal/language/coverage.go
generated
vendored
Normal file
28
vendor/golang.org/x/text/internal/language/coverage.go
generated
vendored
Normal file
@@ -0,0 +1,28 @@
|
||||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package language
|
||||
|
||||
// BaseLanguages returns the list of all supported base languages. It generates
|
||||
// the list by traversing the internal structures.
|
||||
func BaseLanguages() []Language {
|
||||
base := make([]Language, 0, NumLanguages)
|
||||
for i := 0; i < langNoIndexOffset; i++ {
|
||||
// We included "und" already for the value 0.
|
||||
if i != nonCanonicalUnd {
|
||||
base = append(base, Language(i))
|
||||
}
|
||||
}
|
||||
i := langNoIndexOffset
|
||||
for _, v := range langNoIndex {
|
||||
for k := 0; k < 8; k++ {
|
||||
if v&1 == 1 {
|
||||
base = append(base, Language(i))
|
||||
}
|
||||
v >>= 1
|
||||
i++
|
||||
}
|
||||
}
|
||||
return base
|
||||
}
|
||||
1520
vendor/golang.org/x/text/internal/language/gen.go
generated
vendored
Normal file
1520
vendor/golang.org/x/text/internal/language/gen.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load Diff
20
vendor/golang.org/x/text/internal/language/gen_common.go
generated
vendored
Normal file
20
vendor/golang.org/x/text/internal/language/gen_common.go
generated
vendored
Normal file
@@ -0,0 +1,20 @@
|
||||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
package main
|
||||
|
||||
// This file contains code common to the maketables.go and the package code.
|
||||
|
||||
// AliasType is the type of an alias in AliasMap.
|
||||
type AliasType int8
|
||||
|
||||
const (
|
||||
Deprecated AliasType = iota
|
||||
Macro
|
||||
Legacy
|
||||
|
||||
AliasTypeUnknown AliasType = -1
|
||||
)
|
||||
596
vendor/golang.org/x/text/internal/language/language.go
generated
vendored
Normal file
596
vendor/golang.org/x/text/internal/language/language.go
generated
vendored
Normal file
@@ -0,0 +1,596 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:generate go run gen.go gen_common.go -output tables.go
|
||||
|
||||
package language // import "golang.org/x/text/internal/language"
|
||||
|
||||
// TODO: Remove above NOTE after:
|
||||
// - verifying that tables are dropped correctly (most notably matcher tables).
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"strings"
|
||||
)
|
||||
|
||||
const (
|
||||
// maxCoreSize is the maximum size of a BCP 47 tag without variants and
|
||||
// extensions. Equals max lang (3) + script (4) + max reg (3) + 2 dashes.
|
||||
maxCoreSize = 12
|
||||
|
||||
// max99thPercentileSize is a somewhat arbitrary buffer size that presumably
|
||||
// is large enough to hold at least 99% of the BCP 47 tags.
|
||||
max99thPercentileSize = 32
|
||||
|
||||
// maxSimpleUExtensionSize is the maximum size of a -u extension with one
|
||||
// key-type pair. Equals len("-u-") + key (2) + dash + max value (8).
|
||||
maxSimpleUExtensionSize = 14
|
||||
)
|
||||
|
||||
// Tag represents a BCP 47 language tag. It is used to specify an instance of a
|
||||
// specific language or locale. All language tag values are guaranteed to be
|
||||
// well-formed. The zero value of Tag is Und.
|
||||
type Tag struct {
|
||||
// TODO: the following fields have the form TagTypeID. This name is chosen
|
||||
// to allow refactoring the public package without conflicting with its
|
||||
// Base, Script, and Region methods. Once the transition is fully completed
|
||||
// the ID can be stripped from the name.
|
||||
|
||||
LangID Language
|
||||
RegionID Region
|
||||
// TODO: we will soon run out of positions for ScriptID. Idea: instead of
|
||||
// storing lang, region, and ScriptID codes, store only the compact index and
|
||||
// have a lookup table from this code to its expansion. This greatly speeds
|
||||
// up table lookup, speed up common variant cases.
|
||||
// This will also immediately free up 3 extra bytes. Also, the pVariant
|
||||
// field can now be moved to the lookup table, as the compact index uniquely
|
||||
// determines the offset of a possible variant.
|
||||
ScriptID Script
|
||||
pVariant byte // offset in str, includes preceding '-'
|
||||
pExt uint16 // offset of first extension, includes preceding '-'
|
||||
|
||||
// str is the string representation of the Tag. It will only be used if the
|
||||
// tag has variants or extensions.
|
||||
str string
|
||||
}
|
||||
|
||||
// Make is a convenience wrapper for Parse that omits the error.
|
||||
// In case of an error, a sensible default is returned.
|
||||
func Make(s string) Tag {
|
||||
t, _ := Parse(s)
|
||||
return t
|
||||
}
|
||||
|
||||
// Raw returns the raw base language, script and region, without making an
|
||||
// attempt to infer their values.
|
||||
// TODO: consider removing
|
||||
func (t Tag) Raw() (b Language, s Script, r Region) {
|
||||
return t.LangID, t.ScriptID, t.RegionID
|
||||
}
|
||||
|
||||
// equalTags compares language, script and region subtags only.
|
||||
func (t Tag) equalTags(a Tag) bool {
|
||||
return t.LangID == a.LangID && t.ScriptID == a.ScriptID && t.RegionID == a.RegionID
|
||||
}
|
||||
|
||||
// IsRoot returns true if t is equal to language "und".
|
||||
func (t Tag) IsRoot() bool {
|
||||
if int(t.pVariant) < len(t.str) {
|
||||
return false
|
||||
}
|
||||
return t.equalTags(Und)
|
||||
}
|
||||
|
||||
// IsPrivateUse reports whether the Tag consists solely of an IsPrivateUse use
|
||||
// tag.
|
||||
func (t Tag) IsPrivateUse() bool {
|
||||
return t.str != "" && t.pVariant == 0
|
||||
}
|
||||
|
||||
// RemakeString is used to update t.str in case lang, script or region changed.
|
||||
// It is assumed that pExt and pVariant still point to the start of the
|
||||
// respective parts.
|
||||
func (t *Tag) RemakeString() {
|
||||
if t.str == "" {
|
||||
return
|
||||
}
|
||||
extra := t.str[t.pVariant:]
|
||||
if t.pVariant > 0 {
|
||||
extra = extra[1:]
|
||||
}
|
||||
if t.equalTags(Und) && strings.HasPrefix(extra, "x-") {
|
||||
t.str = extra
|
||||
t.pVariant = 0
|
||||
t.pExt = 0
|
||||
return
|
||||
}
|
||||
var buf [max99thPercentileSize]byte // avoid extra memory allocation in most cases.
|
||||
b := buf[:t.genCoreBytes(buf[:])]
|
||||
if extra != "" {
|
||||
diff := len(b) - int(t.pVariant)
|
||||
b = append(b, '-')
|
||||
b = append(b, extra...)
|
||||
t.pVariant = uint8(int(t.pVariant) + diff)
|
||||
t.pExt = uint16(int(t.pExt) + diff)
|
||||
} else {
|
||||
t.pVariant = uint8(len(b))
|
||||
t.pExt = uint16(len(b))
|
||||
}
|
||||
t.str = string(b)
|
||||
}
|
||||
|
||||
// genCoreBytes writes a string for the base languages, script and region tags
|
||||
// to the given buffer and returns the number of bytes written. It will never
|
||||
// write more than maxCoreSize bytes.
|
||||
func (t *Tag) genCoreBytes(buf []byte) int {
|
||||
n := t.LangID.StringToBuf(buf[:])
|
||||
if t.ScriptID != 0 {
|
||||
n += copy(buf[n:], "-")
|
||||
n += copy(buf[n:], t.ScriptID.String())
|
||||
}
|
||||
if t.RegionID != 0 {
|
||||
n += copy(buf[n:], "-")
|
||||
n += copy(buf[n:], t.RegionID.String())
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
// String returns the canonical string representation of the language tag.
|
||||
func (t Tag) String() string {
|
||||
if t.str != "" {
|
||||
return t.str
|
||||
}
|
||||
if t.ScriptID == 0 && t.RegionID == 0 {
|
||||
return t.LangID.String()
|
||||
}
|
||||
buf := [maxCoreSize]byte{}
|
||||
return string(buf[:t.genCoreBytes(buf[:])])
|
||||
}
|
||||
|
||||
// MarshalText implements encoding.TextMarshaler.
|
||||
func (t Tag) MarshalText() (text []byte, err error) {
|
||||
if t.str != "" {
|
||||
text = append(text, t.str...)
|
||||
} else if t.ScriptID == 0 && t.RegionID == 0 {
|
||||
text = append(text, t.LangID.String()...)
|
||||
} else {
|
||||
buf := [maxCoreSize]byte{}
|
||||
text = buf[:t.genCoreBytes(buf[:])]
|
||||
}
|
||||
return text, nil
|
||||
}
|
||||
|
||||
// UnmarshalText implements encoding.TextUnmarshaler.
|
||||
func (t *Tag) UnmarshalText(text []byte) error {
|
||||
tag, err := Parse(string(text))
|
||||
*t = tag
|
||||
return err
|
||||
}
|
||||
|
||||
// Variants returns the part of the tag holding all variants or the empty string
|
||||
// if there are no variants defined.
|
||||
func (t Tag) Variants() string {
|
||||
if t.pVariant == 0 {
|
||||
return ""
|
||||
}
|
||||
return t.str[t.pVariant:t.pExt]
|
||||
}
|
||||
|
||||
// VariantOrPrivateUseTags returns variants or private use tags.
|
||||
func (t Tag) VariantOrPrivateUseTags() string {
|
||||
if t.pExt > 0 {
|
||||
return t.str[t.pVariant:t.pExt]
|
||||
}
|
||||
return t.str[t.pVariant:]
|
||||
}
|
||||
|
||||
// HasString reports whether this tag defines more than just the raw
|
||||
// components.
|
||||
func (t Tag) HasString() bool {
|
||||
return t.str != ""
|
||||
}
|
||||
|
||||
// Parent returns the CLDR parent of t. In CLDR, missing fields in data for a
|
||||
// specific language are substituted with fields from the parent language.
|
||||
// The parent for a language may change for newer versions of CLDR.
|
||||
func (t Tag) Parent() Tag {
|
||||
if t.str != "" {
|
||||
// Strip the variants and extensions.
|
||||
b, s, r := t.Raw()
|
||||
t = Tag{LangID: b, ScriptID: s, RegionID: r}
|
||||
if t.RegionID == 0 && t.ScriptID != 0 && t.LangID != 0 {
|
||||
base, _ := addTags(Tag{LangID: t.LangID})
|
||||
if base.ScriptID == t.ScriptID {
|
||||
return Tag{LangID: t.LangID}
|
||||
}
|
||||
}
|
||||
return t
|
||||
}
|
||||
if t.LangID != 0 {
|
||||
if t.RegionID != 0 {
|
||||
maxScript := t.ScriptID
|
||||
if maxScript == 0 {
|
||||
max, _ := addTags(t)
|
||||
maxScript = max.ScriptID
|
||||
}
|
||||
|
||||
for i := range parents {
|
||||
if Language(parents[i].lang) == t.LangID && Script(parents[i].maxScript) == maxScript {
|
||||
for _, r := range parents[i].fromRegion {
|
||||
if Region(r) == t.RegionID {
|
||||
return Tag{
|
||||
LangID: t.LangID,
|
||||
ScriptID: Script(parents[i].script),
|
||||
RegionID: Region(parents[i].toRegion),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Strip the script if it is the default one.
|
||||
base, _ := addTags(Tag{LangID: t.LangID})
|
||||
if base.ScriptID != maxScript {
|
||||
return Tag{LangID: t.LangID, ScriptID: maxScript}
|
||||
}
|
||||
return Tag{LangID: t.LangID}
|
||||
} else if t.ScriptID != 0 {
|
||||
// The parent for an base-script pair with a non-default script is
|
||||
// "und" instead of the base language.
|
||||
base, _ := addTags(Tag{LangID: t.LangID})
|
||||
if base.ScriptID != t.ScriptID {
|
||||
return Und
|
||||
}
|
||||
return Tag{LangID: t.LangID}
|
||||
}
|
||||
}
|
||||
return Und
|
||||
}
|
||||
|
||||
// ParseExtension parses s as an extension and returns it on success.
|
||||
func ParseExtension(s string) (ext string, err error) {
|
||||
scan := makeScannerString(s)
|
||||
var end int
|
||||
if n := len(scan.token); n != 1 {
|
||||
return "", ErrSyntax
|
||||
}
|
||||
scan.toLower(0, len(scan.b))
|
||||
end = parseExtension(&scan)
|
||||
if end != len(s) {
|
||||
return "", ErrSyntax
|
||||
}
|
||||
return string(scan.b), nil
|
||||
}
|
||||
|
||||
// HasVariants reports whether t has variants.
|
||||
func (t Tag) HasVariants() bool {
|
||||
return uint16(t.pVariant) < t.pExt
|
||||
}
|
||||
|
||||
// HasExtensions reports whether t has extensions.
|
||||
func (t Tag) HasExtensions() bool {
|
||||
return int(t.pExt) < len(t.str)
|
||||
}
|
||||
|
||||
// Extension returns the extension of type x for tag t. It will return
|
||||
// false for ok if t does not have the requested extension. The returned
|
||||
// extension will be invalid in this case.
|
||||
func (t Tag) Extension(x byte) (ext string, ok bool) {
|
||||
for i := int(t.pExt); i < len(t.str)-1; {
|
||||
var ext string
|
||||
i, ext = getExtension(t.str, i)
|
||||
if ext[0] == x {
|
||||
return ext, true
|
||||
}
|
||||
}
|
||||
return "", false
|
||||
}
|
||||
|
||||
// Extensions returns all extensions of t.
|
||||
func (t Tag) Extensions() []string {
|
||||
e := []string{}
|
||||
for i := int(t.pExt); i < len(t.str)-1; {
|
||||
var ext string
|
||||
i, ext = getExtension(t.str, i)
|
||||
e = append(e, ext)
|
||||
}
|
||||
return e
|
||||
}
|
||||
|
||||
// TypeForKey returns the type associated with the given key, where key and type
|
||||
// are of the allowed values defined for the Unicode locale extension ('u') in
|
||||
// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
|
||||
// TypeForKey will traverse the inheritance chain to get the correct value.
|
||||
func (t Tag) TypeForKey(key string) string {
|
||||
if start, end, _ := t.findTypeForKey(key); end != start {
|
||||
return t.str[start:end]
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
var (
|
||||
errPrivateUse = errors.New("cannot set a key on a private use tag")
|
||||
errInvalidArguments = errors.New("invalid key or type")
|
||||
)
|
||||
|
||||
// SetTypeForKey returns a new Tag with the key set to type, where key and type
|
||||
// are of the allowed values defined for the Unicode locale extension ('u') in
|
||||
// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
|
||||
// An empty value removes an existing pair with the same key.
|
||||
func (t Tag) SetTypeForKey(key, value string) (Tag, error) {
|
||||
if t.IsPrivateUse() {
|
||||
return t, errPrivateUse
|
||||
}
|
||||
if len(key) != 2 {
|
||||
return t, errInvalidArguments
|
||||
}
|
||||
|
||||
// Remove the setting if value is "".
|
||||
if value == "" {
|
||||
start, end, _ := t.findTypeForKey(key)
|
||||
if start != end {
|
||||
// Remove key tag and leading '-'.
|
||||
start -= 4
|
||||
|
||||
// Remove a possible empty extension.
|
||||
if (end == len(t.str) || t.str[end+2] == '-') && t.str[start-2] == '-' {
|
||||
start -= 2
|
||||
}
|
||||
if start == int(t.pVariant) && end == len(t.str) {
|
||||
t.str = ""
|
||||
t.pVariant, t.pExt = 0, 0
|
||||
} else {
|
||||
t.str = fmt.Sprintf("%s%s", t.str[:start], t.str[end:])
|
||||
}
|
||||
}
|
||||
return t, nil
|
||||
}
|
||||
|
||||
if len(value) < 3 || len(value) > 8 {
|
||||
return t, errInvalidArguments
|
||||
}
|
||||
|
||||
var (
|
||||
buf [maxCoreSize + maxSimpleUExtensionSize]byte
|
||||
uStart int // start of the -u extension.
|
||||
)
|
||||
|
||||
// Generate the tag string if needed.
|
||||
if t.str == "" {
|
||||
uStart = t.genCoreBytes(buf[:])
|
||||
buf[uStart] = '-'
|
||||
uStart++
|
||||
}
|
||||
|
||||
// Create new key-type pair and parse it to verify.
|
||||
b := buf[uStart:]
|
||||
copy(b, "u-")
|
||||
copy(b[2:], key)
|
||||
b[4] = '-'
|
||||
b = b[:5+copy(b[5:], value)]
|
||||
scan := makeScanner(b)
|
||||
if parseExtensions(&scan); scan.err != nil {
|
||||
return t, scan.err
|
||||
}
|
||||
|
||||
// Assemble the replacement string.
|
||||
if t.str == "" {
|
||||
t.pVariant, t.pExt = byte(uStart-1), uint16(uStart-1)
|
||||
t.str = string(buf[:uStart+len(b)])
|
||||
} else {
|
||||
s := t.str
|
||||
start, end, hasExt := t.findTypeForKey(key)
|
||||
if start == end {
|
||||
if hasExt {
|
||||
b = b[2:]
|
||||
}
|
||||
t.str = fmt.Sprintf("%s-%s%s", s[:start], b, s[end:])
|
||||
} else {
|
||||
t.str = fmt.Sprintf("%s%s%s", s[:start], value, s[end:])
|
||||
}
|
||||
}
|
||||
return t, nil
|
||||
}
|
||||
|
||||
// findKeyAndType returns the start and end position for the type corresponding
|
||||
// to key or the point at which to insert the key-value pair if the type
|
||||
// wasn't found. The hasExt return value reports whether an -u extension was present.
|
||||
// Note: the extensions are typically very small and are likely to contain
|
||||
// only one key-type pair.
|
||||
func (t Tag) findTypeForKey(key string) (start, end int, hasExt bool) {
|
||||
p := int(t.pExt)
|
||||
if len(key) != 2 || p == len(t.str) || p == 0 {
|
||||
return p, p, false
|
||||
}
|
||||
s := t.str
|
||||
|
||||
// Find the correct extension.
|
||||
for p++; s[p] != 'u'; p++ {
|
||||
if s[p] > 'u' {
|
||||
p--
|
||||
return p, p, false
|
||||
}
|
||||
if p = nextExtension(s, p); p == len(s) {
|
||||
return len(s), len(s), false
|
||||
}
|
||||
}
|
||||
// Proceed to the hyphen following the extension name.
|
||||
p++
|
||||
|
||||
// curKey is the key currently being processed.
|
||||
curKey := ""
|
||||
|
||||
// Iterate over keys until we get the end of a section.
|
||||
for {
|
||||
// p points to the hyphen preceding the current token.
|
||||
if p3 := p + 3; s[p3] == '-' {
|
||||
// Found a key.
|
||||
// Check whether we just processed the key that was requested.
|
||||
if curKey == key {
|
||||
return start, p, true
|
||||
}
|
||||
// Set to the next key and continue scanning type tokens.
|
||||
curKey = s[p+1 : p3]
|
||||
if curKey > key {
|
||||
return p, p, true
|
||||
}
|
||||
// Start of the type token sequence.
|
||||
start = p + 4
|
||||
// A type is at least 3 characters long.
|
||||
p += 7 // 4 + 3
|
||||
} else {
|
||||
// Attribute or type, which is at least 3 characters long.
|
||||
p += 4
|
||||
}
|
||||
// p points past the third character of a type or attribute.
|
||||
max := p + 5 // maximum length of token plus hyphen.
|
||||
if len(s) < max {
|
||||
max = len(s)
|
||||
}
|
||||
for ; p < max && s[p] != '-'; p++ {
|
||||
}
|
||||
// Bail if we have exhausted all tokens or if the next token starts
|
||||
// a new extension.
|
||||
if p == len(s) || s[p+2] == '-' {
|
||||
if curKey == key {
|
||||
return start, p, true
|
||||
}
|
||||
return p, p, true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ParseBase parses a 2- or 3-letter ISO 639 code.
|
||||
// It returns a ValueError if s is a well-formed but unknown language identifier
|
||||
// or another error if another error occurred.
|
||||
func ParseBase(s string) (Language, error) {
|
||||
if n := len(s); n < 2 || 3 < n {
|
||||
return 0, ErrSyntax
|
||||
}
|
||||
var buf [3]byte
|
||||
return getLangID(buf[:copy(buf[:], s)])
|
||||
}
|
||||
|
||||
// ParseScript parses a 4-letter ISO 15924 code.
|
||||
// It returns a ValueError if s is a well-formed but unknown script identifier
|
||||
// or another error if another error occurred.
|
||||
func ParseScript(s string) (Script, error) {
|
||||
if len(s) != 4 {
|
||||
return 0, ErrSyntax
|
||||
}
|
||||
var buf [4]byte
|
||||
return getScriptID(script, buf[:copy(buf[:], s)])
|
||||
}
|
||||
|
||||
// EncodeM49 returns the Region for the given UN M.49 code.
|
||||
// It returns an error if r is not a valid code.
|
||||
func EncodeM49(r int) (Region, error) {
|
||||
return getRegionM49(r)
|
||||
}
|
||||
|
||||
// ParseRegion parses a 2- or 3-letter ISO 3166-1 or a UN M.49 code.
|
||||
// It returns a ValueError if s is a well-formed but unknown region identifier
|
||||
// or another error if another error occurred.
|
||||
func ParseRegion(s string) (Region, error) {
|
||||
if n := len(s); n < 2 || 3 < n {
|
||||
return 0, ErrSyntax
|
||||
}
|
||||
var buf [3]byte
|
||||
return getRegionID(buf[:copy(buf[:], s)])
|
||||
}
|
||||
|
||||
// IsCountry returns whether this region is a country or autonomous area. This
|
||||
// includes non-standard definitions from CLDR.
|
||||
func (r Region) IsCountry() bool {
|
||||
if r == 0 || r.IsGroup() || r.IsPrivateUse() && r != _XK {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// IsGroup returns whether this region defines a collection of regions. This
|
||||
// includes non-standard definitions from CLDR.
|
||||
func (r Region) IsGroup() bool {
|
||||
if r == 0 {
|
||||
return false
|
||||
}
|
||||
return int(regionInclusion[r]) < len(regionContainment)
|
||||
}
|
||||
|
||||
// Contains returns whether Region c is contained by Region r. It returns true
|
||||
// if c == r.
|
||||
func (r Region) Contains(c Region) bool {
|
||||
if r == c {
|
||||
return true
|
||||
}
|
||||
g := regionInclusion[r]
|
||||
if g >= nRegionGroups {
|
||||
return false
|
||||
}
|
||||
m := regionContainment[g]
|
||||
|
||||
d := regionInclusion[c]
|
||||
b := regionInclusionBits[d]
|
||||
|
||||
// A contained country may belong to multiple disjoint groups. Matching any
|
||||
// of these indicates containment. If the contained region is a group, it
|
||||
// must strictly be a subset.
|
||||
if d >= nRegionGroups {
|
||||
return b&m != 0
|
||||
}
|
||||
return b&^m == 0
|
||||
}
|
||||
|
||||
var errNoTLD = errors.New("language: region is not a valid ccTLD")
|
||||
|
||||
// TLD returns the country code top-level domain (ccTLD). UK is returned for GB.
|
||||
// In all other cases it returns either the region itself or an error.
|
||||
//
|
||||
// This method may return an error for a region for which there exists a
|
||||
// canonical form with a ccTLD. To get that ccTLD canonicalize r first. The
|
||||
// region will already be canonicalized it was obtained from a Tag that was
|
||||
// obtained using any of the default methods.
|
||||
func (r Region) TLD() (Region, error) {
|
||||
// See http://en.wikipedia.org/wiki/Country_code_top-level_domain for the
|
||||
// difference between ISO 3166-1 and IANA ccTLD.
|
||||
if r == _GB {
|
||||
r = _UK
|
||||
}
|
||||
if (r.typ() & ccTLD) == 0 {
|
||||
return 0, errNoTLD
|
||||
}
|
||||
return r, nil
|
||||
}
|
||||
|
||||
// Canonicalize returns the region or a possible replacement if the region is
|
||||
// deprecated. It will not return a replacement for deprecated regions that
|
||||
// are split into multiple regions.
|
||||
func (r Region) Canonicalize() Region {
|
||||
if cr := normRegion(r); cr != 0 {
|
||||
return cr
|
||||
}
|
||||
return r
|
||||
}
|
||||
|
||||
// Variant represents a registered variant of a language as defined by BCP 47.
|
||||
type Variant struct {
|
||||
ID uint8
|
||||
str string
|
||||
}
|
||||
|
||||
// ParseVariant parses and returns a Variant. An error is returned if s is not
|
||||
// a valid variant.
|
||||
func ParseVariant(s string) (Variant, error) {
|
||||
s = strings.ToLower(s)
|
||||
if id, ok := variantIndex[s]; ok {
|
||||
return Variant{id, s}, nil
|
||||
}
|
||||
return Variant{}, NewValueError([]byte(s))
|
||||
}
|
||||
|
||||
// String returns the string representation of the variant.
|
||||
func (v Variant) String() string {
|
||||
return v.str
|
||||
}
|
||||
412
vendor/golang.org/x/text/internal/language/lookup.go
generated
vendored
Normal file
412
vendor/golang.org/x/text/internal/language/lookup.go
generated
vendored
Normal file
@@ -0,0 +1,412 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package language
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"sort"
|
||||
"strconv"
|
||||
|
||||
"golang.org/x/text/internal/tag"
|
||||
)
|
||||
|
||||
// findIndex tries to find the given tag in idx and returns a standardized error
|
||||
// if it could not be found.
|
||||
func findIndex(idx tag.Index, key []byte, form string) (index int, err error) {
|
||||
if !tag.FixCase(form, key) {
|
||||
return 0, ErrSyntax
|
||||
}
|
||||
i := idx.Index(key)
|
||||
if i == -1 {
|
||||
return 0, NewValueError(key)
|
||||
}
|
||||
return i, nil
|
||||
}
|
||||
|
||||
func searchUint(imap []uint16, key uint16) int {
|
||||
return sort.Search(len(imap), func(i int) bool {
|
||||
return imap[i] >= key
|
||||
})
|
||||
}
|
||||
|
||||
type Language uint16
|
||||
|
||||
// getLangID returns the langID of s if s is a canonical subtag
|
||||
// or langUnknown if s is not a canonical subtag.
|
||||
func getLangID(s []byte) (Language, error) {
|
||||
if len(s) == 2 {
|
||||
return getLangISO2(s)
|
||||
}
|
||||
return getLangISO3(s)
|
||||
}
|
||||
|
||||
// TODO language normalization as well as the AliasMaps could be moved to the
|
||||
// higher level package, but it is a bit tricky to separate the generation.
|
||||
|
||||
func (id Language) Canonicalize() (Language, AliasType) {
|
||||
return normLang(id)
|
||||
}
|
||||
|
||||
// mapLang returns the mapped langID of id according to mapping m.
|
||||
func normLang(id Language) (Language, AliasType) {
|
||||
k := sort.Search(len(AliasMap), func(i int) bool {
|
||||
return AliasMap[i].From >= uint16(id)
|
||||
})
|
||||
if k < len(AliasMap) && AliasMap[k].From == uint16(id) {
|
||||
return Language(AliasMap[k].To), AliasTypes[k]
|
||||
}
|
||||
return id, AliasTypeUnknown
|
||||
}
|
||||
|
||||
// getLangISO2 returns the langID for the given 2-letter ISO language code
|
||||
// or unknownLang if this does not exist.
|
||||
func getLangISO2(s []byte) (Language, error) {
|
||||
if !tag.FixCase("zz", s) {
|
||||
return 0, ErrSyntax
|
||||
}
|
||||
if i := lang.Index(s); i != -1 && lang.Elem(i)[3] != 0 {
|
||||
return Language(i), nil
|
||||
}
|
||||
return 0, NewValueError(s)
|
||||
}
|
||||
|
||||
const base = 'z' - 'a' + 1
|
||||
|
||||
func strToInt(s []byte) uint {
|
||||
v := uint(0)
|
||||
for i := 0; i < len(s); i++ {
|
||||
v *= base
|
||||
v += uint(s[i] - 'a')
|
||||
}
|
||||
return v
|
||||
}
|
||||
|
||||
// converts the given integer to the original ASCII string passed to strToInt.
|
||||
// len(s) must match the number of characters obtained.
|
||||
func intToStr(v uint, s []byte) {
|
||||
for i := len(s) - 1; i >= 0; i-- {
|
||||
s[i] = byte(v%base) + 'a'
|
||||
v /= base
|
||||
}
|
||||
}
|
||||
|
||||
// getLangISO3 returns the langID for the given 3-letter ISO language code
|
||||
// or unknownLang if this does not exist.
|
||||
func getLangISO3(s []byte) (Language, error) {
|
||||
if tag.FixCase("und", s) {
|
||||
// first try to match canonical 3-letter entries
|
||||
for i := lang.Index(s[:2]); i != -1; i = lang.Next(s[:2], i) {
|
||||
if e := lang.Elem(i); e[3] == 0 && e[2] == s[2] {
|
||||
// We treat "und" as special and always translate it to "unspecified".
|
||||
// Note that ZZ and Zzzz are private use and are not treated as
|
||||
// unspecified by default.
|
||||
id := Language(i)
|
||||
if id == nonCanonicalUnd {
|
||||
return 0, nil
|
||||
}
|
||||
return id, nil
|
||||
}
|
||||
}
|
||||
if i := altLangISO3.Index(s); i != -1 {
|
||||
return Language(altLangIndex[altLangISO3.Elem(i)[3]]), nil
|
||||
}
|
||||
n := strToInt(s)
|
||||
if langNoIndex[n/8]&(1<<(n%8)) != 0 {
|
||||
return Language(n) + langNoIndexOffset, nil
|
||||
}
|
||||
// Check for non-canonical uses of ISO3.
|
||||
for i := lang.Index(s[:1]); i != -1; i = lang.Next(s[:1], i) {
|
||||
if e := lang.Elem(i); e[2] == s[1] && e[3] == s[2] {
|
||||
return Language(i), nil
|
||||
}
|
||||
}
|
||||
return 0, NewValueError(s)
|
||||
}
|
||||
return 0, ErrSyntax
|
||||
}
|
||||
|
||||
// StringToBuf writes the string to b and returns the number of bytes
|
||||
// written. cap(b) must be >= 3.
|
||||
func (id Language) StringToBuf(b []byte) int {
|
||||
if id >= langNoIndexOffset {
|
||||
intToStr(uint(id)-langNoIndexOffset, b[:3])
|
||||
return 3
|
||||
} else if id == 0 {
|
||||
return copy(b, "und")
|
||||
}
|
||||
l := lang[id<<2:]
|
||||
if l[3] == 0 {
|
||||
return copy(b, l[:3])
|
||||
}
|
||||
return copy(b, l[:2])
|
||||
}
|
||||
|
||||
// String returns the BCP 47 representation of the langID.
|
||||
// Use b as variable name, instead of id, to ensure the variable
|
||||
// used is consistent with that of Base in which this type is embedded.
|
||||
func (b Language) String() string {
|
||||
if b == 0 {
|
||||
return "und"
|
||||
} else if b >= langNoIndexOffset {
|
||||
b -= langNoIndexOffset
|
||||
buf := [3]byte{}
|
||||
intToStr(uint(b), buf[:])
|
||||
return string(buf[:])
|
||||
}
|
||||
l := lang.Elem(int(b))
|
||||
if l[3] == 0 {
|
||||
return l[:3]
|
||||
}
|
||||
return l[:2]
|
||||
}
|
||||
|
||||
// ISO3 returns the ISO 639-3 language code.
|
||||
func (b Language) ISO3() string {
|
||||
if b == 0 || b >= langNoIndexOffset {
|
||||
return b.String()
|
||||
}
|
||||
l := lang.Elem(int(b))
|
||||
if l[3] == 0 {
|
||||
return l[:3]
|
||||
} else if l[2] == 0 {
|
||||
return altLangISO3.Elem(int(l[3]))[:3]
|
||||
}
|
||||
// This allocation will only happen for 3-letter ISO codes
|
||||
// that are non-canonical BCP 47 language identifiers.
|
||||
return l[0:1] + l[2:4]
|
||||
}
|
||||
|
||||
// IsPrivateUse reports whether this language code is reserved for private use.
|
||||
func (b Language) IsPrivateUse() bool {
|
||||
return langPrivateStart <= b && b <= langPrivateEnd
|
||||
}
|
||||
|
||||
// SuppressScript returns the script marked as SuppressScript in the IANA
|
||||
// language tag repository, or 0 if there is no such script.
|
||||
func (b Language) SuppressScript() Script {
|
||||
if b < langNoIndexOffset {
|
||||
return Script(suppressScript[b])
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
type Region uint16
|
||||
|
||||
// getRegionID returns the region id for s if s is a valid 2-letter region code
|
||||
// or unknownRegion.
|
||||
func getRegionID(s []byte) (Region, error) {
|
||||
if len(s) == 3 {
|
||||
if isAlpha(s[0]) {
|
||||
return getRegionISO3(s)
|
||||
}
|
||||
if i, err := strconv.ParseUint(string(s), 10, 10); err == nil {
|
||||
return getRegionM49(int(i))
|
||||
}
|
||||
}
|
||||
return getRegionISO2(s)
|
||||
}
|
||||
|
||||
// getRegionISO2 returns the regionID for the given 2-letter ISO country code
|
||||
// or unknownRegion if this does not exist.
|
||||
func getRegionISO2(s []byte) (Region, error) {
|
||||
i, err := findIndex(regionISO, s, "ZZ")
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return Region(i) + isoRegionOffset, nil
|
||||
}
|
||||
|
||||
// getRegionISO3 returns the regionID for the given 3-letter ISO country code
|
||||
// or unknownRegion if this does not exist.
|
||||
func getRegionISO3(s []byte) (Region, error) {
|
||||
if tag.FixCase("ZZZ", s) {
|
||||
for i := regionISO.Index(s[:1]); i != -1; i = regionISO.Next(s[:1], i) {
|
||||
if e := regionISO.Elem(i); e[2] == s[1] && e[3] == s[2] {
|
||||
return Region(i) + isoRegionOffset, nil
|
||||
}
|
||||
}
|
||||
for i := 0; i < len(altRegionISO3); i += 3 {
|
||||
if tag.Compare(altRegionISO3[i:i+3], s) == 0 {
|
||||
return Region(altRegionIDs[i/3]), nil
|
||||
}
|
||||
}
|
||||
return 0, NewValueError(s)
|
||||
}
|
||||
return 0, ErrSyntax
|
||||
}
|
||||
|
||||
func getRegionM49(n int) (Region, error) {
|
||||
if 0 < n && n <= 999 {
|
||||
const (
|
||||
searchBits = 7
|
||||
regionBits = 9
|
||||
regionMask = 1<<regionBits - 1
|
||||
)
|
||||
idx := n >> searchBits
|
||||
buf := fromM49[m49Index[idx]:m49Index[idx+1]]
|
||||
val := uint16(n) << regionBits // we rely on bits shifting out
|
||||
i := sort.Search(len(buf), func(i int) bool {
|
||||
return buf[i] >= val
|
||||
})
|
||||
if r := fromM49[int(m49Index[idx])+i]; r&^regionMask == val {
|
||||
return Region(r & regionMask), nil
|
||||
}
|
||||
}
|
||||
var e ValueError
|
||||
fmt.Fprint(bytes.NewBuffer([]byte(e.v[:])), n)
|
||||
return 0, e
|
||||
}
|
||||
|
||||
// normRegion returns a region if r is deprecated or 0 otherwise.
|
||||
// TODO: consider supporting BYS (-> BLR), CSK (-> 200 or CZ), PHI (-> PHL) and AFI (-> DJ).
|
||||
// TODO: consider mapping split up regions to new most populous one (like CLDR).
|
||||
func normRegion(r Region) Region {
|
||||
m := regionOldMap
|
||||
k := sort.Search(len(m), func(i int) bool {
|
||||
return m[i].From >= uint16(r)
|
||||
})
|
||||
if k < len(m) && m[k].From == uint16(r) {
|
||||
return Region(m[k].To)
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
const (
|
||||
iso3166UserAssigned = 1 << iota
|
||||
ccTLD
|
||||
bcp47Region
|
||||
)
|
||||
|
||||
func (r Region) typ() byte {
|
||||
return regionTypes[r]
|
||||
}
|
||||
|
||||
// String returns the BCP 47 representation for the region.
|
||||
// It returns "ZZ" for an unspecified region.
|
||||
func (r Region) String() string {
|
||||
if r < isoRegionOffset {
|
||||
if r == 0 {
|
||||
return "ZZ"
|
||||
}
|
||||
return fmt.Sprintf("%03d", r.M49())
|
||||
}
|
||||
r -= isoRegionOffset
|
||||
return regionISO.Elem(int(r))[:2]
|
||||
}
|
||||
|
||||
// ISO3 returns the 3-letter ISO code of r.
|
||||
// Note that not all regions have a 3-letter ISO code.
|
||||
// In such cases this method returns "ZZZ".
|
||||
func (r Region) ISO3() string {
|
||||
if r < isoRegionOffset {
|
||||
return "ZZZ"
|
||||
}
|
||||
r -= isoRegionOffset
|
||||
reg := regionISO.Elem(int(r))
|
||||
switch reg[2] {
|
||||
case 0:
|
||||
return altRegionISO3[reg[3]:][:3]
|
||||
case ' ':
|
||||
return "ZZZ"
|
||||
}
|
||||
return reg[0:1] + reg[2:4]
|
||||
}
|
||||
|
||||
// M49 returns the UN M.49 encoding of r, or 0 if this encoding
|
||||
// is not defined for r.
|
||||
func (r Region) M49() int {
|
||||
return int(m49[r])
|
||||
}
|
||||
|
||||
// IsPrivateUse reports whether r has the ISO 3166 User-assigned status. This
|
||||
// may include private-use tags that are assigned by CLDR and used in this
|
||||
// implementation. So IsPrivateUse and IsCountry can be simultaneously true.
|
||||
func (r Region) IsPrivateUse() bool {
|
||||
return r.typ()&iso3166UserAssigned != 0
|
||||
}
|
||||
|
||||
type Script uint8
|
||||
|
||||
// getScriptID returns the script id for string s. It assumes that s
|
||||
// is of the format [A-Z][a-z]{3}.
|
||||
func getScriptID(idx tag.Index, s []byte) (Script, error) {
|
||||
i, err := findIndex(idx, s, "Zzzz")
|
||||
return Script(i), err
|
||||
}
|
||||
|
||||
// String returns the script code in title case.
|
||||
// It returns "Zzzz" for an unspecified script.
|
||||
func (s Script) String() string {
|
||||
if s == 0 {
|
||||
return "Zzzz"
|
||||
}
|
||||
return script.Elem(int(s))
|
||||
}
|
||||
|
||||
// IsPrivateUse reports whether this script code is reserved for private use.
|
||||
func (s Script) IsPrivateUse() bool {
|
||||
return _Qaaa <= s && s <= _Qabx
|
||||
}
|
||||
|
||||
const (
|
||||
maxAltTaglen = len("en-US-POSIX")
|
||||
maxLen = maxAltTaglen
|
||||
)
|
||||
|
||||
var (
|
||||
// grandfatheredMap holds a mapping from legacy and grandfathered tags to
|
||||
// their base language or index to more elaborate tag.
|
||||
grandfatheredMap = map[[maxLen]byte]int16{
|
||||
[maxLen]byte{'a', 'r', 't', '-', 'l', 'o', 'j', 'b', 'a', 'n'}: _jbo, // art-lojban
|
||||
[maxLen]byte{'i', '-', 'a', 'm', 'i'}: _ami, // i-ami
|
||||
[maxLen]byte{'i', '-', 'b', 'n', 'n'}: _bnn, // i-bnn
|
||||
[maxLen]byte{'i', '-', 'h', 'a', 'k'}: _hak, // i-hak
|
||||
[maxLen]byte{'i', '-', 'k', 'l', 'i', 'n', 'g', 'o', 'n'}: _tlh, // i-klingon
|
||||
[maxLen]byte{'i', '-', 'l', 'u', 'x'}: _lb, // i-lux
|
||||
[maxLen]byte{'i', '-', 'n', 'a', 'v', 'a', 'j', 'o'}: _nv, // i-navajo
|
||||
[maxLen]byte{'i', '-', 'p', 'w', 'n'}: _pwn, // i-pwn
|
||||
[maxLen]byte{'i', '-', 't', 'a', 'o'}: _tao, // i-tao
|
||||
[maxLen]byte{'i', '-', 't', 'a', 'y'}: _tay, // i-tay
|
||||
[maxLen]byte{'i', '-', 't', 's', 'u'}: _tsu, // i-tsu
|
||||
[maxLen]byte{'n', 'o', '-', 'b', 'o', 'k'}: _nb, // no-bok
|
||||
[maxLen]byte{'n', 'o', '-', 'n', 'y', 'n'}: _nn, // no-nyn
|
||||
[maxLen]byte{'s', 'g', 'n', '-', 'b', 'e', '-', 'f', 'r'}: _sfb, // sgn-BE-FR
|
||||
[maxLen]byte{'s', 'g', 'n', '-', 'b', 'e', '-', 'n', 'l'}: _vgt, // sgn-BE-NL
|
||||
[maxLen]byte{'s', 'g', 'n', '-', 'c', 'h', '-', 'd', 'e'}: _sgg, // sgn-CH-DE
|
||||
[maxLen]byte{'z', 'h', '-', 'g', 'u', 'o', 'y', 'u'}: _cmn, // zh-guoyu
|
||||
[maxLen]byte{'z', 'h', '-', 'h', 'a', 'k', 'k', 'a'}: _hak, // zh-hakka
|
||||
[maxLen]byte{'z', 'h', '-', 'm', 'i', 'n', '-', 'n', 'a', 'n'}: _nan, // zh-min-nan
|
||||
[maxLen]byte{'z', 'h', '-', 'x', 'i', 'a', 'n', 'g'}: _hsn, // zh-xiang
|
||||
|
||||
// Grandfathered tags with no modern replacement will be converted as
|
||||
// follows:
|
||||
[maxLen]byte{'c', 'e', 'l', '-', 'g', 'a', 'u', 'l', 'i', 's', 'h'}: -1, // cel-gaulish
|
||||
[maxLen]byte{'e', 'n', '-', 'g', 'b', '-', 'o', 'e', 'd'}: -2, // en-GB-oed
|
||||
[maxLen]byte{'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'}: -3, // i-default
|
||||
[maxLen]byte{'i', '-', 'e', 'n', 'o', 'c', 'h', 'i', 'a', 'n'}: -4, // i-enochian
|
||||
[maxLen]byte{'i', '-', 'm', 'i', 'n', 'g', 'o'}: -5, // i-mingo
|
||||
[maxLen]byte{'z', 'h', '-', 'm', 'i', 'n'}: -6, // zh-min
|
||||
|
||||
// CLDR-specific tag.
|
||||
[maxLen]byte{'r', 'o', 'o', 't'}: 0, // root
|
||||
[maxLen]byte{'e', 'n', '-', 'u', 's', '-', 'p', 'o', 's', 'i', 'x'}: -7, // en_US_POSIX"
|
||||
}
|
||||
|
||||
altTagIndex = [...]uint8{0, 17, 31, 45, 61, 74, 86, 102}
|
||||
|
||||
altTags = "xtg-x-cel-gaulishen-GB-oxendicten-x-i-defaultund-x-i-enochiansee-x-i-mingonan-x-zh-minen-US-u-va-posix"
|
||||
)
|
||||
|
||||
func grandfathered(s [maxAltTaglen]byte) (t Tag, ok bool) {
|
||||
if v, ok := grandfatheredMap[s]; ok {
|
||||
if v < 0 {
|
||||
return Make(altTags[altTagIndex[-v-1]:altTagIndex[-v]]), true
|
||||
}
|
||||
t.LangID = Language(v)
|
||||
return t, true
|
||||
}
|
||||
return t, false
|
||||
}
|
||||
226
vendor/golang.org/x/text/internal/language/match.go
generated
vendored
Normal file
226
vendor/golang.org/x/text/internal/language/match.go
generated
vendored
Normal file
@@ -0,0 +1,226 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package language
|
||||
|
||||
import "errors"
|
||||
|
||||
type scriptRegionFlags uint8
|
||||
|
||||
const (
|
||||
isList = 1 << iota
|
||||
scriptInFrom
|
||||
regionInFrom
|
||||
)
|
||||
|
||||
func (t *Tag) setUndefinedLang(id Language) {
|
||||
if t.LangID == 0 {
|
||||
t.LangID = id
|
||||
}
|
||||
}
|
||||
|
||||
func (t *Tag) setUndefinedScript(id Script) {
|
||||
if t.ScriptID == 0 {
|
||||
t.ScriptID = id
|
||||
}
|
||||
}
|
||||
|
||||
func (t *Tag) setUndefinedRegion(id Region) {
|
||||
if t.RegionID == 0 || t.RegionID.Contains(id) {
|
||||
t.RegionID = id
|
||||
}
|
||||
}
|
||||
|
||||
// ErrMissingLikelyTagsData indicates no information was available
|
||||
// to compute likely values of missing tags.
|
||||
var ErrMissingLikelyTagsData = errors.New("missing likely tags data")
|
||||
|
||||
// addLikelySubtags sets subtags to their most likely value, given the locale.
|
||||
// In most cases this means setting fields for unknown values, but in some
|
||||
// cases it may alter a value. It returns an ErrMissingLikelyTagsData error
|
||||
// if the given locale cannot be expanded.
|
||||
func (t Tag) addLikelySubtags() (Tag, error) {
|
||||
id, err := addTags(t)
|
||||
if err != nil {
|
||||
return t, err
|
||||
} else if id.equalTags(t) {
|
||||
return t, nil
|
||||
}
|
||||
id.RemakeString()
|
||||
return id, nil
|
||||
}
|
||||
|
||||
// specializeRegion attempts to specialize a group region.
|
||||
func specializeRegion(t *Tag) bool {
|
||||
if i := regionInclusion[t.RegionID]; i < nRegionGroups {
|
||||
x := likelyRegionGroup[i]
|
||||
if Language(x.lang) == t.LangID && Script(x.script) == t.ScriptID {
|
||||
t.RegionID = Region(x.region)
|
||||
}
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// Maximize returns a new tag with missing tags filled in.
|
||||
func (t Tag) Maximize() (Tag, error) {
|
||||
return addTags(t)
|
||||
}
|
||||
|
||||
func addTags(t Tag) (Tag, error) {
|
||||
// We leave private use identifiers alone.
|
||||
if t.IsPrivateUse() {
|
||||
return t, nil
|
||||
}
|
||||
if t.ScriptID != 0 && t.RegionID != 0 {
|
||||
if t.LangID != 0 {
|
||||
// already fully specified
|
||||
specializeRegion(&t)
|
||||
return t, nil
|
||||
}
|
||||
// Search matches for und-script-region. Note that for these cases
|
||||
// region will never be a group so there is no need to check for this.
|
||||
list := likelyRegion[t.RegionID : t.RegionID+1]
|
||||
if x := list[0]; x.flags&isList != 0 {
|
||||
list = likelyRegionList[x.lang : x.lang+uint16(x.script)]
|
||||
}
|
||||
for _, x := range list {
|
||||
// Deviating from the spec. See match_test.go for details.
|
||||
if Script(x.script) == t.ScriptID {
|
||||
t.setUndefinedLang(Language(x.lang))
|
||||
return t, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
if t.LangID != 0 {
|
||||
// Search matches for lang-script and lang-region, where lang != und.
|
||||
if t.LangID < langNoIndexOffset {
|
||||
x := likelyLang[t.LangID]
|
||||
if x.flags&isList != 0 {
|
||||
list := likelyLangList[x.region : x.region+uint16(x.script)]
|
||||
if t.ScriptID != 0 {
|
||||
for _, x := range list {
|
||||
if Script(x.script) == t.ScriptID && x.flags&scriptInFrom != 0 {
|
||||
t.setUndefinedRegion(Region(x.region))
|
||||
return t, nil
|
||||
}
|
||||
}
|
||||
} else if t.RegionID != 0 {
|
||||
count := 0
|
||||
goodScript := true
|
||||
tt := t
|
||||
for _, x := range list {
|
||||
// We visit all entries for which the script was not
|
||||
// defined, including the ones where the region was not
|
||||
// defined. This allows for proper disambiguation within
|
||||
// regions.
|
||||
if x.flags&scriptInFrom == 0 && t.RegionID.Contains(Region(x.region)) {
|
||||
tt.RegionID = Region(x.region)
|
||||
tt.setUndefinedScript(Script(x.script))
|
||||
goodScript = goodScript && tt.ScriptID == Script(x.script)
|
||||
count++
|
||||
}
|
||||
}
|
||||
if count == 1 {
|
||||
return tt, nil
|
||||
}
|
||||
// Even if we fail to find a unique Region, we might have
|
||||
// an unambiguous script.
|
||||
if goodScript {
|
||||
t.ScriptID = tt.ScriptID
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Search matches for und-script.
|
||||
if t.ScriptID != 0 {
|
||||
x := likelyScript[t.ScriptID]
|
||||
if x.region != 0 {
|
||||
t.setUndefinedRegion(Region(x.region))
|
||||
t.setUndefinedLang(Language(x.lang))
|
||||
return t, nil
|
||||
}
|
||||
}
|
||||
// Search matches for und-region. If und-script-region exists, it would
|
||||
// have been found earlier.
|
||||
if t.RegionID != 0 {
|
||||
if i := regionInclusion[t.RegionID]; i < nRegionGroups {
|
||||
x := likelyRegionGroup[i]
|
||||
if x.region != 0 {
|
||||
t.setUndefinedLang(Language(x.lang))
|
||||
t.setUndefinedScript(Script(x.script))
|
||||
t.RegionID = Region(x.region)
|
||||
}
|
||||
} else {
|
||||
x := likelyRegion[t.RegionID]
|
||||
if x.flags&isList != 0 {
|
||||
x = likelyRegionList[x.lang]
|
||||
}
|
||||
if x.script != 0 && x.flags != scriptInFrom {
|
||||
t.setUndefinedLang(Language(x.lang))
|
||||
t.setUndefinedScript(Script(x.script))
|
||||
return t, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Search matches for lang.
|
||||
if t.LangID < langNoIndexOffset {
|
||||
x := likelyLang[t.LangID]
|
||||
if x.flags&isList != 0 {
|
||||
x = likelyLangList[x.region]
|
||||
}
|
||||
if x.region != 0 {
|
||||
t.setUndefinedScript(Script(x.script))
|
||||
t.setUndefinedRegion(Region(x.region))
|
||||
}
|
||||
specializeRegion(&t)
|
||||
if t.LangID == 0 {
|
||||
t.LangID = _en // default language
|
||||
}
|
||||
return t, nil
|
||||
}
|
||||
return t, ErrMissingLikelyTagsData
|
||||
}
|
||||
|
||||
func (t *Tag) setTagsFrom(id Tag) {
|
||||
t.LangID = id.LangID
|
||||
t.ScriptID = id.ScriptID
|
||||
t.RegionID = id.RegionID
|
||||
}
|
||||
|
||||
// minimize removes the region or script subtags from t such that
|
||||
// t.addLikelySubtags() == t.minimize().addLikelySubtags().
|
||||
func (t Tag) minimize() (Tag, error) {
|
||||
t, err := minimizeTags(t)
|
||||
if err != nil {
|
||||
return t, err
|
||||
}
|
||||
t.RemakeString()
|
||||
return t, nil
|
||||
}
|
||||
|
||||
// minimizeTags mimics the behavior of the ICU 51 C implementation.
|
||||
func minimizeTags(t Tag) (Tag, error) {
|
||||
if t.equalTags(Und) {
|
||||
return t, nil
|
||||
}
|
||||
max, err := addTags(t)
|
||||
if err != nil {
|
||||
return t, err
|
||||
}
|
||||
for _, id := range [...]Tag{
|
||||
{LangID: t.LangID},
|
||||
{LangID: t.LangID, RegionID: t.RegionID},
|
||||
{LangID: t.LangID, ScriptID: t.ScriptID},
|
||||
} {
|
||||
if x, err := addTags(id); err == nil && max.equalTags(x) {
|
||||
t.setTagsFrom(id)
|
||||
break
|
||||
}
|
||||
}
|
||||
return t, nil
|
||||
}
|
||||
594
vendor/golang.org/x/text/internal/language/parse.go
generated
vendored
Normal file
594
vendor/golang.org/x/text/internal/language/parse.go
generated
vendored
Normal file
@@ -0,0 +1,594 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package language
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"fmt"
|
||||
"sort"
|
||||
|
||||
"golang.org/x/text/internal/tag"
|
||||
)
|
||||
|
||||
// isAlpha returns true if the byte is not a digit.
|
||||
// b must be an ASCII letter or digit.
|
||||
func isAlpha(b byte) bool {
|
||||
return b > '9'
|
||||
}
|
||||
|
||||
// isAlphaNum returns true if the string contains only ASCII letters or digits.
|
||||
func isAlphaNum(s []byte) bool {
|
||||
for _, c := range s {
|
||||
if !('a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9') {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// ErrSyntax is returned by any of the parsing functions when the
|
||||
// input is not well-formed, according to BCP 47.
|
||||
// TODO: return the position at which the syntax error occurred?
|
||||
var ErrSyntax = errors.New("language: tag is not well-formed")
|
||||
|
||||
// ErrDuplicateKey is returned when a tag contains the same key twice with
|
||||
// different values in the -u section.
|
||||
var ErrDuplicateKey = errors.New("language: different values for same key in -u extension")
|
||||
|
||||
// ValueError is returned by any of the parsing functions when the
|
||||
// input is well-formed but the respective subtag is not recognized
|
||||
// as a valid value.
|
||||
type ValueError struct {
|
||||
v [8]byte
|
||||
}
|
||||
|
||||
// NewValueError creates a new ValueError.
|
||||
func NewValueError(tag []byte) ValueError {
|
||||
var e ValueError
|
||||
copy(e.v[:], tag)
|
||||
return e
|
||||
}
|
||||
|
||||
func (e ValueError) tag() []byte {
|
||||
n := bytes.IndexByte(e.v[:], 0)
|
||||
if n == -1 {
|
||||
n = 8
|
||||
}
|
||||
return e.v[:n]
|
||||
}
|
||||
|
||||
// Error implements the error interface.
|
||||
func (e ValueError) Error() string {
|
||||
return fmt.Sprintf("language: subtag %q is well-formed but unknown", e.tag())
|
||||
}
|
||||
|
||||
// Subtag returns the subtag for which the error occurred.
|
||||
func (e ValueError) Subtag() string {
|
||||
return string(e.tag())
|
||||
}
|
||||
|
||||
// scanner is used to scan BCP 47 tokens, which are separated by _ or -.
|
||||
type scanner struct {
|
||||
b []byte
|
||||
bytes [max99thPercentileSize]byte
|
||||
token []byte
|
||||
start int // start position of the current token
|
||||
end int // end position of the current token
|
||||
next int // next point for scan
|
||||
err error
|
||||
done bool
|
||||
}
|
||||
|
||||
func makeScannerString(s string) scanner {
|
||||
scan := scanner{}
|
||||
if len(s) <= len(scan.bytes) {
|
||||
scan.b = scan.bytes[:copy(scan.bytes[:], s)]
|
||||
} else {
|
||||
scan.b = []byte(s)
|
||||
}
|
||||
scan.init()
|
||||
return scan
|
||||
}
|
||||
|
||||
// makeScanner returns a scanner using b as the input buffer.
|
||||
// b is not copied and may be modified by the scanner routines.
|
||||
func makeScanner(b []byte) scanner {
|
||||
scan := scanner{b: b}
|
||||
scan.init()
|
||||
return scan
|
||||
}
|
||||
|
||||
func (s *scanner) init() {
|
||||
for i, c := range s.b {
|
||||
if c == '_' {
|
||||
s.b[i] = '-'
|
||||
}
|
||||
}
|
||||
s.scan()
|
||||
}
|
||||
|
||||
// restToLower converts the string between start and end to lower case.
|
||||
func (s *scanner) toLower(start, end int) {
|
||||
for i := start; i < end; i++ {
|
||||
c := s.b[i]
|
||||
if 'A' <= c && c <= 'Z' {
|
||||
s.b[i] += 'a' - 'A'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (s *scanner) setError(e error) {
|
||||
if s.err == nil || (e == ErrSyntax && s.err != ErrSyntax) {
|
||||
s.err = e
|
||||
}
|
||||
}
|
||||
|
||||
// resizeRange shrinks or grows the array at position oldStart such that
|
||||
// a new string of size newSize can fit between oldStart and oldEnd.
|
||||
// Sets the scan point to after the resized range.
|
||||
func (s *scanner) resizeRange(oldStart, oldEnd, newSize int) {
|
||||
s.start = oldStart
|
||||
if end := oldStart + newSize; end != oldEnd {
|
||||
diff := end - oldEnd
|
||||
if end < cap(s.b) {
|
||||
b := make([]byte, len(s.b)+diff)
|
||||
copy(b, s.b[:oldStart])
|
||||
copy(b[end:], s.b[oldEnd:])
|
||||
s.b = b
|
||||
} else {
|
||||
s.b = append(s.b[end:], s.b[oldEnd:]...)
|
||||
}
|
||||
s.next = end + (s.next - s.end)
|
||||
s.end = end
|
||||
}
|
||||
}
|
||||
|
||||
// replace replaces the current token with repl.
|
||||
func (s *scanner) replace(repl string) {
|
||||
s.resizeRange(s.start, s.end, len(repl))
|
||||
copy(s.b[s.start:], repl)
|
||||
}
|
||||
|
||||
// gobble removes the current token from the input.
|
||||
// Caller must call scan after calling gobble.
|
||||
func (s *scanner) gobble(e error) {
|
||||
s.setError(e)
|
||||
if s.start == 0 {
|
||||
s.b = s.b[:+copy(s.b, s.b[s.next:])]
|
||||
s.end = 0
|
||||
} else {
|
||||
s.b = s.b[:s.start-1+copy(s.b[s.start-1:], s.b[s.end:])]
|
||||
s.end = s.start - 1
|
||||
}
|
||||
s.next = s.start
|
||||
}
|
||||
|
||||
// deleteRange removes the given range from s.b before the current token.
|
||||
func (s *scanner) deleteRange(start, end int) {
|
||||
s.b = s.b[:start+copy(s.b[start:], s.b[end:])]
|
||||
diff := end - start
|
||||
s.next -= diff
|
||||
s.start -= diff
|
||||
s.end -= diff
|
||||
}
|
||||
|
||||
// scan parses the next token of a BCP 47 string. Tokens that are larger
|
||||
// than 8 characters or include non-alphanumeric characters result in an error
|
||||
// and are gobbled and removed from the output.
|
||||
// It returns the end position of the last token consumed.
|
||||
func (s *scanner) scan() (end int) {
|
||||
end = s.end
|
||||
s.token = nil
|
||||
for s.start = s.next; s.next < len(s.b); {
|
||||
i := bytes.IndexByte(s.b[s.next:], '-')
|
||||
if i == -1 {
|
||||
s.end = len(s.b)
|
||||
s.next = len(s.b)
|
||||
i = s.end - s.start
|
||||
} else {
|
||||
s.end = s.next + i
|
||||
s.next = s.end + 1
|
||||
}
|
||||
token := s.b[s.start:s.end]
|
||||
if i < 1 || i > 8 || !isAlphaNum(token) {
|
||||
s.gobble(ErrSyntax)
|
||||
continue
|
||||
}
|
||||
s.token = token
|
||||
return end
|
||||
}
|
||||
if n := len(s.b); n > 0 && s.b[n-1] == '-' {
|
||||
s.setError(ErrSyntax)
|
||||
s.b = s.b[:len(s.b)-1]
|
||||
}
|
||||
s.done = true
|
||||
return end
|
||||
}
|
||||
|
||||
// acceptMinSize parses multiple tokens of the given size or greater.
|
||||
// It returns the end position of the last token consumed.
|
||||
func (s *scanner) acceptMinSize(min int) (end int) {
|
||||
end = s.end
|
||||
s.scan()
|
||||
for ; len(s.token) >= min; s.scan() {
|
||||
end = s.end
|
||||
}
|
||||
return end
|
||||
}
|
||||
|
||||
// Parse parses the given BCP 47 string and returns a valid Tag. If parsing
|
||||
// failed it returns an error and any part of the tag that could be parsed.
|
||||
// If parsing succeeded but an unknown value was found, it returns
|
||||
// ValueError. The Tag returned in this case is just stripped of the unknown
|
||||
// value. All other values are preserved. It accepts tags in the BCP 47 format
|
||||
// and extensions to this standard defined in
|
||||
// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
|
||||
func Parse(s string) (t Tag, err error) {
|
||||
// TODO: consider supporting old-style locale key-value pairs.
|
||||
if s == "" {
|
||||
return Und, ErrSyntax
|
||||
}
|
||||
if len(s) <= maxAltTaglen {
|
||||
b := [maxAltTaglen]byte{}
|
||||
for i, c := range s {
|
||||
// Generating invalid UTF-8 is okay as it won't match.
|
||||
if 'A' <= c && c <= 'Z' {
|
||||
c += 'a' - 'A'
|
||||
} else if c == '_' {
|
||||
c = '-'
|
||||
}
|
||||
b[i] = byte(c)
|
||||
}
|
||||
if t, ok := grandfathered(b); ok {
|
||||
return t, nil
|
||||
}
|
||||
}
|
||||
scan := makeScannerString(s)
|
||||
return parse(&scan, s)
|
||||
}
|
||||
|
||||
func parse(scan *scanner, s string) (t Tag, err error) {
|
||||
t = Und
|
||||
var end int
|
||||
if n := len(scan.token); n <= 1 {
|
||||
scan.toLower(0, len(scan.b))
|
||||
if n == 0 || scan.token[0] != 'x' {
|
||||
return t, ErrSyntax
|
||||
}
|
||||
end = parseExtensions(scan)
|
||||
} else if n >= 4 {
|
||||
return Und, ErrSyntax
|
||||
} else { // the usual case
|
||||
t, end = parseTag(scan)
|
||||
if n := len(scan.token); n == 1 {
|
||||
t.pExt = uint16(end)
|
||||
end = parseExtensions(scan)
|
||||
} else if end < len(scan.b) {
|
||||
scan.setError(ErrSyntax)
|
||||
scan.b = scan.b[:end]
|
||||
}
|
||||
}
|
||||
if int(t.pVariant) < len(scan.b) {
|
||||
if end < len(s) {
|
||||
s = s[:end]
|
||||
}
|
||||
if len(s) > 0 && tag.Compare(s, scan.b) == 0 {
|
||||
t.str = s
|
||||
} else {
|
||||
t.str = string(scan.b)
|
||||
}
|
||||
} else {
|
||||
t.pVariant, t.pExt = 0, 0
|
||||
}
|
||||
return t, scan.err
|
||||
}
|
||||
|
||||
// parseTag parses language, script, region and variants.
|
||||
// It returns a Tag and the end position in the input that was parsed.
|
||||
func parseTag(scan *scanner) (t Tag, end int) {
|
||||
var e error
|
||||
// TODO: set an error if an unknown lang, script or region is encountered.
|
||||
t.LangID, e = getLangID(scan.token)
|
||||
scan.setError(e)
|
||||
scan.replace(t.LangID.String())
|
||||
langStart := scan.start
|
||||
end = scan.scan()
|
||||
for len(scan.token) == 3 && isAlpha(scan.token[0]) {
|
||||
// From http://tools.ietf.org/html/bcp47, <lang>-<extlang> tags are equivalent
|
||||
// to a tag of the form <extlang>.
|
||||
lang, e := getLangID(scan.token)
|
||||
if lang != 0 {
|
||||
t.LangID = lang
|
||||
copy(scan.b[langStart:], lang.String())
|
||||
scan.b[langStart+3] = '-'
|
||||
scan.start = langStart + 4
|
||||
}
|
||||
scan.gobble(e)
|
||||
end = scan.scan()
|
||||
}
|
||||
if len(scan.token) == 4 && isAlpha(scan.token[0]) {
|
||||
t.ScriptID, e = getScriptID(script, scan.token)
|
||||
if t.ScriptID == 0 {
|
||||
scan.gobble(e)
|
||||
}
|
||||
end = scan.scan()
|
||||
}
|
||||
if n := len(scan.token); n >= 2 && n <= 3 {
|
||||
t.RegionID, e = getRegionID(scan.token)
|
||||
if t.RegionID == 0 {
|
||||
scan.gobble(e)
|
||||
} else {
|
||||
scan.replace(t.RegionID.String())
|
||||
}
|
||||
end = scan.scan()
|
||||
}
|
||||
scan.toLower(scan.start, len(scan.b))
|
||||
t.pVariant = byte(end)
|
||||
end = parseVariants(scan, end, t)
|
||||
t.pExt = uint16(end)
|
||||
return t, end
|
||||
}
|
||||
|
||||
var separator = []byte{'-'}
|
||||
|
||||
// parseVariants scans tokens as long as each token is a valid variant string.
|
||||
// Duplicate variants are removed.
|
||||
func parseVariants(scan *scanner, end int, t Tag) int {
|
||||
start := scan.start
|
||||
varIDBuf := [4]uint8{}
|
||||
variantBuf := [4][]byte{}
|
||||
varID := varIDBuf[:0]
|
||||
variant := variantBuf[:0]
|
||||
last := -1
|
||||
needSort := false
|
||||
for ; len(scan.token) >= 4; scan.scan() {
|
||||
// TODO: measure the impact of needing this conversion and redesign
|
||||
// the data structure if there is an issue.
|
||||
v, ok := variantIndex[string(scan.token)]
|
||||
if !ok {
|
||||
// unknown variant
|
||||
// TODO: allow user-defined variants?
|
||||
scan.gobble(NewValueError(scan.token))
|
||||
continue
|
||||
}
|
||||
varID = append(varID, v)
|
||||
variant = append(variant, scan.token)
|
||||
if !needSort {
|
||||
if last < int(v) {
|
||||
last = int(v)
|
||||
} else {
|
||||
needSort = true
|
||||
// There is no legal combinations of more than 7 variants
|
||||
// (and this is by no means a useful sequence).
|
||||
const maxVariants = 8
|
||||
if len(varID) > maxVariants {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
end = scan.end
|
||||
}
|
||||
if needSort {
|
||||
sort.Sort(variantsSort{varID, variant})
|
||||
k, l := 0, -1
|
||||
for i, v := range varID {
|
||||
w := int(v)
|
||||
if l == w {
|
||||
// Remove duplicates.
|
||||
continue
|
||||
}
|
||||
varID[k] = varID[i]
|
||||
variant[k] = variant[i]
|
||||
k++
|
||||
l = w
|
||||
}
|
||||
if str := bytes.Join(variant[:k], separator); len(str) == 0 {
|
||||
end = start - 1
|
||||
} else {
|
||||
scan.resizeRange(start, end, len(str))
|
||||
copy(scan.b[scan.start:], str)
|
||||
end = scan.end
|
||||
}
|
||||
}
|
||||
return end
|
||||
}
|
||||
|
||||
type variantsSort struct {
|
||||
i []uint8
|
||||
v [][]byte
|
||||
}
|
||||
|
||||
func (s variantsSort) Len() int {
|
||||
return len(s.i)
|
||||
}
|
||||
|
||||
func (s variantsSort) Swap(i, j int) {
|
||||
s.i[i], s.i[j] = s.i[j], s.i[i]
|
||||
s.v[i], s.v[j] = s.v[j], s.v[i]
|
||||
}
|
||||
|
||||
func (s variantsSort) Less(i, j int) bool {
|
||||
return s.i[i] < s.i[j]
|
||||
}
|
||||
|
||||
type bytesSort struct {
|
||||
b [][]byte
|
||||
n int // first n bytes to compare
|
||||
}
|
||||
|
||||
func (b bytesSort) Len() int {
|
||||
return len(b.b)
|
||||
}
|
||||
|
||||
func (b bytesSort) Swap(i, j int) {
|
||||
b.b[i], b.b[j] = b.b[j], b.b[i]
|
||||
}
|
||||
|
||||
func (b bytesSort) Less(i, j int) bool {
|
||||
for k := 0; k < b.n; k++ {
|
||||
if b.b[i][k] == b.b[j][k] {
|
||||
continue
|
||||
}
|
||||
return b.b[i][k] < b.b[j][k]
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// parseExtensions parses and normalizes the extensions in the buffer.
|
||||
// It returns the last position of scan.b that is part of any extension.
|
||||
// It also trims scan.b to remove excess parts accordingly.
|
||||
func parseExtensions(scan *scanner) int {
|
||||
start := scan.start
|
||||
exts := [][]byte{}
|
||||
private := []byte{}
|
||||
end := scan.end
|
||||
for len(scan.token) == 1 {
|
||||
extStart := scan.start
|
||||
ext := scan.token[0]
|
||||
end = parseExtension(scan)
|
||||
extension := scan.b[extStart:end]
|
||||
if len(extension) < 3 || (ext != 'x' && len(extension) < 4) {
|
||||
scan.setError(ErrSyntax)
|
||||
end = extStart
|
||||
continue
|
||||
} else if start == extStart && (ext == 'x' || scan.start == len(scan.b)) {
|
||||
scan.b = scan.b[:end]
|
||||
return end
|
||||
} else if ext == 'x' {
|
||||
private = extension
|
||||
break
|
||||
}
|
||||
exts = append(exts, extension)
|
||||
}
|
||||
sort.Sort(bytesSort{exts, 1})
|
||||
if len(private) > 0 {
|
||||
exts = append(exts, private)
|
||||
}
|
||||
scan.b = scan.b[:start]
|
||||
if len(exts) > 0 {
|
||||
scan.b = append(scan.b, bytes.Join(exts, separator)...)
|
||||
} else if start > 0 {
|
||||
// Strip trailing '-'.
|
||||
scan.b = scan.b[:start-1]
|
||||
}
|
||||
return end
|
||||
}
|
||||
|
||||
// parseExtension parses a single extension and returns the position of
|
||||
// the extension end.
|
||||
func parseExtension(scan *scanner) int {
|
||||
start, end := scan.start, scan.end
|
||||
switch scan.token[0] {
|
||||
case 'u':
|
||||
attrStart := end
|
||||
scan.scan()
|
||||
for last := []byte{}; len(scan.token) > 2; scan.scan() {
|
||||
if bytes.Compare(scan.token, last) != -1 {
|
||||
// Attributes are unsorted. Start over from scratch.
|
||||
p := attrStart + 1
|
||||
scan.next = p
|
||||
attrs := [][]byte{}
|
||||
for scan.scan(); len(scan.token) > 2; scan.scan() {
|
||||
attrs = append(attrs, scan.token)
|
||||
end = scan.end
|
||||
}
|
||||
sort.Sort(bytesSort{attrs, 3})
|
||||
copy(scan.b[p:], bytes.Join(attrs, separator))
|
||||
break
|
||||
}
|
||||
last = scan.token
|
||||
end = scan.end
|
||||
}
|
||||
var last, key []byte
|
||||
for attrEnd := end; len(scan.token) == 2; last = key {
|
||||
key = scan.token
|
||||
keyEnd := scan.end
|
||||
end = scan.acceptMinSize(3)
|
||||
// TODO: check key value validity
|
||||
if keyEnd == end || bytes.Compare(key, last) != 1 {
|
||||
// We have an invalid key or the keys are not sorted.
|
||||
// Start scanning keys from scratch and reorder.
|
||||
p := attrEnd + 1
|
||||
scan.next = p
|
||||
keys := [][]byte{}
|
||||
for scan.scan(); len(scan.token) == 2; {
|
||||
keyStart, keyEnd := scan.start, scan.end
|
||||
end = scan.acceptMinSize(3)
|
||||
if keyEnd != end {
|
||||
keys = append(keys, scan.b[keyStart:end])
|
||||
} else {
|
||||
scan.setError(ErrSyntax)
|
||||
end = keyStart
|
||||
}
|
||||
}
|
||||
sort.Stable(bytesSort{keys, 2})
|
||||
if n := len(keys); n > 0 {
|
||||
k := 0
|
||||
for i := 1; i < n; i++ {
|
||||
if !bytes.Equal(keys[k][:2], keys[i][:2]) {
|
||||
k++
|
||||
keys[k] = keys[i]
|
||||
} else if !bytes.Equal(keys[k], keys[i]) {
|
||||
scan.setError(ErrDuplicateKey)
|
||||
}
|
||||
}
|
||||
keys = keys[:k+1]
|
||||
}
|
||||
reordered := bytes.Join(keys, separator)
|
||||
if e := p + len(reordered); e < end {
|
||||
scan.deleteRange(e, end)
|
||||
end = e
|
||||
}
|
||||
copy(scan.b[p:], reordered)
|
||||
break
|
||||
}
|
||||
}
|
||||
case 't':
|
||||
scan.scan()
|
||||
if n := len(scan.token); n >= 2 && n <= 3 && isAlpha(scan.token[1]) {
|
||||
_, end = parseTag(scan)
|
||||
scan.toLower(start, end)
|
||||
}
|
||||
for len(scan.token) == 2 && !isAlpha(scan.token[1]) {
|
||||
end = scan.acceptMinSize(3)
|
||||
}
|
||||
case 'x':
|
||||
end = scan.acceptMinSize(1)
|
||||
default:
|
||||
end = scan.acceptMinSize(2)
|
||||
}
|
||||
return end
|
||||
}
|
||||
|
||||
// getExtension returns the name, body and end position of the extension.
|
||||
func getExtension(s string, p int) (end int, ext string) {
|
||||
if s[p] == '-' {
|
||||
p++
|
||||
}
|
||||
if s[p] == 'x' {
|
||||
return len(s), s[p:]
|
||||
}
|
||||
end = nextExtension(s, p)
|
||||
return end, s[p:end]
|
||||
}
|
||||
|
||||
// nextExtension finds the next extension within the string, searching
|
||||
// for the -<char>- pattern from position p.
|
||||
// In the fast majority of cases, language tags will have at most
|
||||
// one extension and extensions tend to be small.
|
||||
func nextExtension(s string, p int) int {
|
||||
for n := len(s) - 3; p < n; {
|
||||
if s[p] == '-' {
|
||||
if s[p+2] == '-' {
|
||||
return p
|
||||
}
|
||||
p += 3
|
||||
} else {
|
||||
p++
|
||||
}
|
||||
}
|
||||
return len(s)
|
||||
}
|
||||
3431
vendor/golang.org/x/text/internal/language/tables.go
generated
vendored
Normal file
3431
vendor/golang.org/x/text/internal/language/tables.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load Diff
48
vendor/golang.org/x/text/internal/language/tags.go
generated
vendored
Normal file
48
vendor/golang.org/x/text/internal/language/tags.go
generated
vendored
Normal file
@@ -0,0 +1,48 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package language
|
||||
|
||||
// MustParse is like Parse, but panics if the given BCP 47 tag cannot be parsed.
|
||||
// It simplifies safe initialization of Tag values.
|
||||
func MustParse(s string) Tag {
|
||||
t, err := Parse(s)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return t
|
||||
}
|
||||
|
||||
// MustParseBase is like ParseBase, but panics if the given base cannot be parsed.
|
||||
// It simplifies safe initialization of Base values.
|
||||
func MustParseBase(s string) Language {
|
||||
b, err := ParseBase(s)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
// MustParseScript is like ParseScript, but panics if the given script cannot be
|
||||
// parsed. It simplifies safe initialization of Script values.
|
||||
func MustParseScript(s string) Script {
|
||||
scr, err := ParseScript(s)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return scr
|
||||
}
|
||||
|
||||
// MustParseRegion is like ParseRegion, but panics if the given region cannot be
|
||||
// parsed. It simplifies safe initialization of Region values.
|
||||
func MustParseRegion(s string) Region {
|
||||
r, err := ParseRegion(s)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return r
|
||||
}
|
||||
|
||||
// Und is the root language.
|
||||
var Und Tag
|
||||
100
vendor/golang.org/x/text/internal/tag/tag.go
generated
vendored
Normal file
100
vendor/golang.org/x/text/internal/tag/tag.go
generated
vendored
Normal file
@@ -0,0 +1,100 @@
|
||||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package tag contains functionality handling tags and related data.
|
||||
package tag // import "golang.org/x/text/internal/tag"
|
||||
|
||||
import "sort"
|
||||
|
||||
// An Index converts tags to a compact numeric value.
|
||||
//
|
||||
// All elements are of size 4. Tags may be up to 4 bytes long. Excess bytes can
|
||||
// be used to store additional information about the tag.
|
||||
type Index string
|
||||
|
||||
// Elem returns the element data at the given index.
|
||||
func (s Index) Elem(x int) string {
|
||||
return string(s[x*4 : x*4+4])
|
||||
}
|
||||
|
||||
// Index reports the index of the given key or -1 if it could not be found.
|
||||
// Only the first len(key) bytes from the start of the 4-byte entries will be
|
||||
// considered for the search and the first match in Index will be returned.
|
||||
func (s Index) Index(key []byte) int {
|
||||
n := len(key)
|
||||
// search the index of the first entry with an equal or higher value than
|
||||
// key in s.
|
||||
index := sort.Search(len(s)/4, func(i int) bool {
|
||||
return cmp(s[i*4:i*4+n], key) != -1
|
||||
})
|
||||
i := index * 4
|
||||
if cmp(s[i:i+len(key)], key) != 0 {
|
||||
return -1
|
||||
}
|
||||
return index
|
||||
}
|
||||
|
||||
// Next finds the next occurrence of key after index x, which must have been
|
||||
// obtained from a call to Index using the same key. It returns x+1 or -1.
|
||||
func (s Index) Next(key []byte, x int) int {
|
||||
if x++; x*4 < len(s) && cmp(s[x*4:x*4+len(key)], key) == 0 {
|
||||
return x
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
// cmp returns an integer comparing a and b lexicographically.
|
||||
func cmp(a Index, b []byte) int {
|
||||
n := len(a)
|
||||
if len(b) < n {
|
||||
n = len(b)
|
||||
}
|
||||
for i, c := range b[:n] {
|
||||
switch {
|
||||
case a[i] > c:
|
||||
return 1
|
||||
case a[i] < c:
|
||||
return -1
|
||||
}
|
||||
}
|
||||
switch {
|
||||
case len(a) < len(b):
|
||||
return -1
|
||||
case len(a) > len(b):
|
||||
return 1
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// Compare returns an integer comparing a and b lexicographically.
|
||||
func Compare(a string, b []byte) int {
|
||||
return cmp(Index(a), b)
|
||||
}
|
||||
|
||||
// FixCase reformats b to the same pattern of cases as form.
|
||||
// If returns false if string b is malformed.
|
||||
func FixCase(form string, b []byte) bool {
|
||||
if len(form) != len(b) {
|
||||
return false
|
||||
}
|
||||
for i, c := range b {
|
||||
if form[i] <= 'Z' {
|
||||
if c >= 'a' {
|
||||
c -= 'z' - 'Z'
|
||||
}
|
||||
if c < 'A' || 'Z' < c {
|
||||
return false
|
||||
}
|
||||
} else {
|
||||
if c <= 'Z' {
|
||||
c += 'z' - 'Z'
|
||||
}
|
||||
if c < 'a' || 'z' < c {
|
||||
return false
|
||||
}
|
||||
}
|
||||
b[i] = c
|
||||
}
|
||||
return true
|
||||
}
|
||||
87
vendor/golang.org/x/text/internal/utf8internal/utf8internal.go
generated
vendored
Normal file
87
vendor/golang.org/x/text/internal/utf8internal/utf8internal.go
generated
vendored
Normal file
@@ -0,0 +1,87 @@
|
||||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package utf8internal contains low-level utf8-related constants, tables, etc.
|
||||
// that are used internally by the text package.
|
||||
package utf8internal
|
||||
|
||||
// The default lowest and highest continuation byte.
|
||||
const (
|
||||
LoCB = 0x80 // 1000 0000
|
||||
HiCB = 0xBF // 1011 1111
|
||||
)
|
||||
|
||||
// Constants related to getting information of first bytes of UTF-8 sequences.
|
||||
const (
|
||||
// ASCII identifies a UTF-8 byte as ASCII.
|
||||
ASCII = as
|
||||
|
||||
// FirstInvalid indicates a byte is invalid as a first byte of a UTF-8
|
||||
// sequence.
|
||||
FirstInvalid = xx
|
||||
|
||||
// SizeMask is a mask for the size bits. Use use x&SizeMask to get the size.
|
||||
SizeMask = 7
|
||||
|
||||
// AcceptShift is the right-shift count for the first byte info byte to get
|
||||
// the index into the AcceptRanges table. See AcceptRanges.
|
||||
AcceptShift = 4
|
||||
|
||||
// The names of these constants are chosen to give nice alignment in the
|
||||
// table below. The first nibble is an index into acceptRanges or F for
|
||||
// special one-byte cases. The second nibble is the Rune length or the
|
||||
// Status for the special one-byte case.
|
||||
xx = 0xF1 // invalid: size 1
|
||||
as = 0xF0 // ASCII: size 1
|
||||
s1 = 0x02 // accept 0, size 2
|
||||
s2 = 0x13 // accept 1, size 3
|
||||
s3 = 0x03 // accept 0, size 3
|
||||
s4 = 0x23 // accept 2, size 3
|
||||
s5 = 0x34 // accept 3, size 4
|
||||
s6 = 0x04 // accept 0, size 4
|
||||
s7 = 0x44 // accept 4, size 4
|
||||
)
|
||||
|
||||
// First is information about the first byte in a UTF-8 sequence.
|
||||
var First = [256]uint8{
|
||||
// 1 2 3 4 5 6 7 8 9 A B C D E F
|
||||
as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x00-0x0F
|
||||
as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x10-0x1F
|
||||
as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x20-0x2F
|
||||
as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x30-0x3F
|
||||
as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x40-0x4F
|
||||
as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x50-0x5F
|
||||
as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x60-0x6F
|
||||
as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x70-0x7F
|
||||
// 1 2 3 4 5 6 7 8 9 A B C D E F
|
||||
xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, // 0x80-0x8F
|
||||
xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, // 0x90-0x9F
|
||||
xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, // 0xA0-0xAF
|
||||
xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, // 0xB0-0xBF
|
||||
xx, xx, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, // 0xC0-0xCF
|
||||
s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, // 0xD0-0xDF
|
||||
s2, s3, s3, s3, s3, s3, s3, s3, s3, s3, s3, s3, s3, s4, s3, s3, // 0xE0-0xEF
|
||||
s5, s6, s6, s6, s7, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, // 0xF0-0xFF
|
||||
}
|
||||
|
||||
// AcceptRange gives the range of valid values for the second byte in a UTF-8
|
||||
// sequence for any value for First that is not ASCII or FirstInvalid.
|
||||
type AcceptRange struct {
|
||||
Lo uint8 // lowest value for second byte.
|
||||
Hi uint8 // highest value for second byte.
|
||||
}
|
||||
|
||||
// AcceptRanges is a slice of AcceptRange values. For a given byte sequence b
|
||||
//
|
||||
// AcceptRanges[First[b[0]]>>AcceptShift]
|
||||
//
|
||||
// will give the value of AcceptRange for the multi-byte UTF-8 sequence starting
|
||||
// at b[0].
|
||||
var AcceptRanges = [...]AcceptRange{
|
||||
0: {LoCB, HiCB},
|
||||
1: {0xA0, HiCB},
|
||||
2: {LoCB, 0x9F},
|
||||
3: {0x90, HiCB},
|
||||
4: {LoCB, 0x8F},
|
||||
}
|
||||
187
vendor/golang.org/x/text/language/coverage.go
generated
vendored
Normal file
187
vendor/golang.org/x/text/language/coverage.go
generated
vendored
Normal file
@@ -0,0 +1,187 @@
|
||||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package language
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sort"
|
||||
|
||||
"golang.org/x/text/internal/language"
|
||||
)
|
||||
|
||||
// The Coverage interface is used to define the level of coverage of an
|
||||
// internationalization service. Note that not all types are supported by all
|
||||
// services. As lists may be generated on the fly, it is recommended that users
|
||||
// of a Coverage cache the results.
|
||||
type Coverage interface {
|
||||
// Tags returns the list of supported tags.
|
||||
Tags() []Tag
|
||||
|
||||
// BaseLanguages returns the list of supported base languages.
|
||||
BaseLanguages() []Base
|
||||
|
||||
// Scripts returns the list of supported scripts.
|
||||
Scripts() []Script
|
||||
|
||||
// Regions returns the list of supported regions.
|
||||
Regions() []Region
|
||||
}
|
||||
|
||||
var (
|
||||
// Supported defines a Coverage that lists all supported subtags. Tags
|
||||
// always returns nil.
|
||||
Supported Coverage = allSubtags{}
|
||||
)
|
||||
|
||||
// TODO:
|
||||
// - Support Variants, numbering systems.
|
||||
// - CLDR coverage levels.
|
||||
// - Set of common tags defined in this package.
|
||||
|
||||
type allSubtags struct{}
|
||||
|
||||
// Regions returns the list of supported regions. As all regions are in a
|
||||
// consecutive range, it simply returns a slice of numbers in increasing order.
|
||||
// The "undefined" region is not returned.
|
||||
func (s allSubtags) Regions() []Region {
|
||||
reg := make([]Region, language.NumRegions)
|
||||
for i := range reg {
|
||||
reg[i] = Region{language.Region(i + 1)}
|
||||
}
|
||||
return reg
|
||||
}
|
||||
|
||||
// Scripts returns the list of supported scripts. As all scripts are in a
|
||||
// consecutive range, it simply returns a slice of numbers in increasing order.
|
||||
// The "undefined" script is not returned.
|
||||
func (s allSubtags) Scripts() []Script {
|
||||
scr := make([]Script, language.NumScripts)
|
||||
for i := range scr {
|
||||
scr[i] = Script{language.Script(i + 1)}
|
||||
}
|
||||
return scr
|
||||
}
|
||||
|
||||
// BaseLanguages returns the list of all supported base languages. It generates
|
||||
// the list by traversing the internal structures.
|
||||
func (s allSubtags) BaseLanguages() []Base {
|
||||
bs := language.BaseLanguages()
|
||||
base := make([]Base, len(bs))
|
||||
for i, b := range bs {
|
||||
base[i] = Base{b}
|
||||
}
|
||||
return base
|
||||
}
|
||||
|
||||
// Tags always returns nil.
|
||||
func (s allSubtags) Tags() []Tag {
|
||||
return nil
|
||||
}
|
||||
|
||||
// coverage is used by NewCoverage which is used as a convenient way for
|
||||
// creating Coverage implementations for partially defined data. Very often a
|
||||
// package will only need to define a subset of slices. coverage provides a
|
||||
// convenient way to do this. Moreover, packages using NewCoverage, instead of
|
||||
// their own implementation, will not break if later new slice types are added.
|
||||
type coverage struct {
|
||||
tags func() []Tag
|
||||
bases func() []Base
|
||||
scripts func() []Script
|
||||
regions func() []Region
|
||||
}
|
||||
|
||||
func (s *coverage) Tags() []Tag {
|
||||
if s.tags == nil {
|
||||
return nil
|
||||
}
|
||||
return s.tags()
|
||||
}
|
||||
|
||||
// bases implements sort.Interface and is used to sort base languages.
|
||||
type bases []Base
|
||||
|
||||
func (b bases) Len() int {
|
||||
return len(b)
|
||||
}
|
||||
|
||||
func (b bases) Swap(i, j int) {
|
||||
b[i], b[j] = b[j], b[i]
|
||||
}
|
||||
|
||||
func (b bases) Less(i, j int) bool {
|
||||
return b[i].langID < b[j].langID
|
||||
}
|
||||
|
||||
// BaseLanguages returns the result from calling s.bases if it is specified or
|
||||
// otherwise derives the set of supported base languages from tags.
|
||||
func (s *coverage) BaseLanguages() []Base {
|
||||
if s.bases == nil {
|
||||
tags := s.Tags()
|
||||
if len(tags) == 0 {
|
||||
return nil
|
||||
}
|
||||
a := make([]Base, len(tags))
|
||||
for i, t := range tags {
|
||||
a[i] = Base{language.Language(t.lang())}
|
||||
}
|
||||
sort.Sort(bases(a))
|
||||
k := 0
|
||||
for i := 1; i < len(a); i++ {
|
||||
if a[k] != a[i] {
|
||||
k++
|
||||
a[k] = a[i]
|
||||
}
|
||||
}
|
||||
return a[:k+1]
|
||||
}
|
||||
return s.bases()
|
||||
}
|
||||
|
||||
func (s *coverage) Scripts() []Script {
|
||||
if s.scripts == nil {
|
||||
return nil
|
||||
}
|
||||
return s.scripts()
|
||||
}
|
||||
|
||||
func (s *coverage) Regions() []Region {
|
||||
if s.regions == nil {
|
||||
return nil
|
||||
}
|
||||
return s.regions()
|
||||
}
|
||||
|
||||
// NewCoverage returns a Coverage for the given lists. It is typically used by
|
||||
// packages providing internationalization services to define their level of
|
||||
// coverage. A list may be of type []T or func() []T, where T is either Tag,
|
||||
// Base, Script or Region. The returned Coverage derives the value for Bases
|
||||
// from Tags if no func or slice for []Base is specified. For other unspecified
|
||||
// types the returned Coverage will return nil for the respective methods.
|
||||
func NewCoverage(list ...interface{}) Coverage {
|
||||
s := &coverage{}
|
||||
for _, x := range list {
|
||||
switch v := x.(type) {
|
||||
case func() []Base:
|
||||
s.bases = v
|
||||
case func() []Script:
|
||||
s.scripts = v
|
||||
case func() []Region:
|
||||
s.regions = v
|
||||
case func() []Tag:
|
||||
s.tags = v
|
||||
case []Base:
|
||||
s.bases = func() []Base { return v }
|
||||
case []Script:
|
||||
s.scripts = func() []Script { return v }
|
||||
case []Region:
|
||||
s.regions = func() []Region { return v }
|
||||
case []Tag:
|
||||
s.tags = func() []Tag { return v }
|
||||
default:
|
||||
panic(fmt.Sprintf("language: unsupported set type %T", v))
|
||||
}
|
||||
}
|
||||
return s
|
||||
}
|
||||
102
vendor/golang.org/x/text/language/doc.go
generated
vendored
Normal file
102
vendor/golang.org/x/text/language/doc.go
generated
vendored
Normal file
@@ -0,0 +1,102 @@
|
||||
// Copyright 2017 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package language implements BCP 47 language tags and related functionality.
|
||||
//
|
||||
// The most important function of package language is to match a list of
|
||||
// user-preferred languages to a list of supported languages.
|
||||
// It alleviates the developer of dealing with the complexity of this process
|
||||
// and provides the user with the best experience
|
||||
// (see https://blog.golang.org/matchlang).
|
||||
//
|
||||
//
|
||||
// Matching preferred against supported languages
|
||||
//
|
||||
// A Matcher for an application that supports English, Australian English,
|
||||
// Danish, and standard Mandarin can be created as follows:
|
||||
//
|
||||
// var matcher = language.NewMatcher([]language.Tag{
|
||||
// language.English, // The first language is used as fallback.
|
||||
// language.MustParse("en-AU"),
|
||||
// language.Danish,
|
||||
// language.Chinese,
|
||||
// })
|
||||
//
|
||||
// This list of supported languages is typically implied by the languages for
|
||||
// which there exists translations of the user interface.
|
||||
//
|
||||
// User-preferred languages usually come as a comma-separated list of BCP 47
|
||||
// language tags.
|
||||
// The MatchString finds best matches for such strings:
|
||||
//
|
||||
// handler(w http.ResponseWriter, r *http.Request) {
|
||||
// lang, _ := r.Cookie("lang")
|
||||
// accept := r.Header.Get("Accept-Language")
|
||||
// tag, _ := language.MatchStrings(matcher, lang.String(), accept)
|
||||
//
|
||||
// // tag should now be used for the initialization of any
|
||||
// // locale-specific service.
|
||||
// }
|
||||
//
|
||||
// The Matcher's Match method can be used to match Tags directly.
|
||||
//
|
||||
// Matchers are aware of the intricacies of equivalence between languages, such
|
||||
// as deprecated subtags, legacy tags, macro languages, mutual
|
||||
// intelligibility between scripts and languages, and transparently passing
|
||||
// BCP 47 user configuration.
|
||||
// For instance, it will know that a reader of Bokmål Danish can read Norwegian
|
||||
// and will know that Cantonese ("yue") is a good match for "zh-HK".
|
||||
//
|
||||
//
|
||||
// Using match results
|
||||
//
|
||||
// To guarantee a consistent user experience to the user it is important to
|
||||
// use the same language tag for the selection of any locale-specific services.
|
||||
// For example, it is utterly confusing to substitute spelled-out numbers
|
||||
// or dates in one language in text of another language.
|
||||
// More subtly confusing is using the wrong sorting order or casing
|
||||
// algorithm for a certain language.
|
||||
//
|
||||
// All the packages in x/text that provide locale-specific services
|
||||
// (e.g. collate, cases) should be initialized with the tag that was
|
||||
// obtained at the start of an interaction with the user.
|
||||
//
|
||||
// Note that Tag that is returned by Match and MatchString may differ from any
|
||||
// of the supported languages, as it may contain carried over settings from
|
||||
// the user tags.
|
||||
// This may be inconvenient when your application has some additional
|
||||
// locale-specific data for your supported languages.
|
||||
// Match and MatchString both return the index of the matched supported tag
|
||||
// to simplify associating such data with the matched tag.
|
||||
//
|
||||
//
|
||||
// Canonicalization
|
||||
//
|
||||
// If one uses the Matcher to compare languages one does not need to
|
||||
// worry about canonicalization.
|
||||
//
|
||||
// The meaning of a Tag varies per application. The language package
|
||||
// therefore delays canonicalization and preserves information as much
|
||||
// as possible. The Matcher, however, will always take into account that
|
||||
// two different tags may represent the same language.
|
||||
//
|
||||
// By default, only legacy and deprecated tags are converted into their
|
||||
// canonical equivalent. All other information is preserved. This approach makes
|
||||
// the confidence scores more accurate and allows matchers to distinguish
|
||||
// between variants that are otherwise lost.
|
||||
//
|
||||
// As a consequence, two tags that should be treated as identical according to
|
||||
// BCP 47 or CLDR, like "en-Latn" and "en", will be represented differently. The
|
||||
// Matcher handles such distinctions, though, and is aware of the
|
||||
// equivalence relations. The CanonType type can be used to alter the
|
||||
// canonicalization form.
|
||||
//
|
||||
// References
|
||||
//
|
||||
// BCP 47 - Tags for Identifying Languages http://tools.ietf.org/html/bcp47
|
||||
//
|
||||
package language // import "golang.org/x/text/language"
|
||||
|
||||
// TODO: explanation on how to match languages for your own locale-specific
|
||||
// service.
|
||||
305
vendor/golang.org/x/text/language/gen.go
generated
vendored
Normal file
305
vendor/golang.org/x/text/language/gen.go
generated
vendored
Normal file
@@ -0,0 +1,305 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
// Language tag table generator.
|
||||
// Data read from the web.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/text/internal/gen"
|
||||
"golang.org/x/text/internal/language"
|
||||
"golang.org/x/text/unicode/cldr"
|
||||
)
|
||||
|
||||
var (
|
||||
test = flag.Bool("test",
|
||||
false,
|
||||
"test existing tables; can be used to compare web data with package data.")
|
||||
outputFile = flag.String("output",
|
||||
"tables.go",
|
||||
"output file for generated tables")
|
||||
)
|
||||
|
||||
func main() {
|
||||
gen.Init()
|
||||
|
||||
w := gen.NewCodeWriter()
|
||||
defer w.WriteGoFile("tables.go", "language")
|
||||
|
||||
b := newBuilder(w)
|
||||
gen.WriteCLDRVersion(w)
|
||||
|
||||
b.writeConstants()
|
||||
b.writeMatchData()
|
||||
}
|
||||
|
||||
type builder struct {
|
||||
w *gen.CodeWriter
|
||||
hw io.Writer // MultiWriter for w and w.Hash
|
||||
data *cldr.CLDR
|
||||
supp *cldr.SupplementalData
|
||||
}
|
||||
|
||||
func (b *builder) langIndex(s string) uint16 {
|
||||
return uint16(language.MustParseBase(s))
|
||||
}
|
||||
|
||||
func (b *builder) regionIndex(s string) int {
|
||||
return int(language.MustParseRegion(s))
|
||||
}
|
||||
|
||||
func (b *builder) scriptIndex(s string) int {
|
||||
return int(language.MustParseScript(s))
|
||||
}
|
||||
|
||||
func newBuilder(w *gen.CodeWriter) *builder {
|
||||
r := gen.OpenCLDRCoreZip()
|
||||
defer r.Close()
|
||||
d := &cldr.Decoder{}
|
||||
data, err := d.DecodeZip(r)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
b := builder{
|
||||
w: w,
|
||||
hw: io.MultiWriter(w, w.Hash),
|
||||
data: data,
|
||||
supp: data.Supplemental(),
|
||||
}
|
||||
return &b
|
||||
}
|
||||
|
||||
// writeConsts computes f(v) for all v in values and writes the results
|
||||
// as constants named _v to a single constant block.
|
||||
func (b *builder) writeConsts(f func(string) int, values ...string) {
|
||||
fmt.Fprintln(b.w, "const (")
|
||||
for _, v := range values {
|
||||
fmt.Fprintf(b.w, "\t_%s = %v\n", v, f(v))
|
||||
}
|
||||
fmt.Fprintln(b.w, ")")
|
||||
}
|
||||
|
||||
// TODO: region inclusion data will probably not be use used in future matchers.
|
||||
|
||||
var langConsts = []string{
|
||||
"de", "en", "fr", "it", "mo", "no", "nb", "pt", "sh", "mul", "und",
|
||||
}
|
||||
|
||||
var scriptConsts = []string{
|
||||
"Latn", "Hani", "Hans", "Hant", "Qaaa", "Qaai", "Qabx", "Zinh", "Zyyy",
|
||||
"Zzzz",
|
||||
}
|
||||
|
||||
var regionConsts = []string{
|
||||
"001", "419", "BR", "CA", "ES", "GB", "MD", "PT", "UK", "US",
|
||||
"ZZ", "XA", "XC", "XK", // Unofficial tag for Kosovo.
|
||||
}
|
||||
|
||||
func (b *builder) writeConstants() {
|
||||
b.writeConsts(func(s string) int { return int(b.langIndex(s)) }, langConsts...)
|
||||
b.writeConsts(b.regionIndex, regionConsts...)
|
||||
b.writeConsts(b.scriptIndex, scriptConsts...)
|
||||
}
|
||||
|
||||
type mutualIntelligibility struct {
|
||||
want, have uint16
|
||||
distance uint8
|
||||
oneway bool
|
||||
}
|
||||
|
||||
type scriptIntelligibility struct {
|
||||
wantLang, haveLang uint16
|
||||
wantScript, haveScript uint8
|
||||
distance uint8
|
||||
// Always oneway
|
||||
}
|
||||
|
||||
type regionIntelligibility struct {
|
||||
lang uint16 // compact language id
|
||||
script uint8 // 0 means any
|
||||
group uint8 // 0 means any; if bit 7 is set it means inverse
|
||||
distance uint8
|
||||
// Always twoway.
|
||||
}
|
||||
|
||||
// writeMatchData writes tables with languages and scripts for which there is
|
||||
// mutual intelligibility. The data is based on CLDR's languageMatching data.
|
||||
// Note that we use a different algorithm than the one defined by CLDR and that
|
||||
// we slightly modify the data. For example, we convert scores to confidence levels.
|
||||
// We also drop all region-related data as we use a different algorithm to
|
||||
// determine region equivalence.
|
||||
func (b *builder) writeMatchData() {
|
||||
lm := b.supp.LanguageMatching.LanguageMatches
|
||||
cldr.MakeSlice(&lm).SelectAnyOf("type", "written_new")
|
||||
|
||||
regionHierarchy := map[string][]string{}
|
||||
for _, g := range b.supp.TerritoryContainment.Group {
|
||||
regions := strings.Split(g.Contains, " ")
|
||||
regionHierarchy[g.Type] = append(regionHierarchy[g.Type], regions...)
|
||||
}
|
||||
regionToGroups := make([]uint8, language.NumRegions)
|
||||
|
||||
idToIndex := map[string]uint8{}
|
||||
for i, mv := range lm[0].MatchVariable {
|
||||
if i > 6 {
|
||||
log.Fatalf("Too many groups: %d", i)
|
||||
}
|
||||
idToIndex[mv.Id] = uint8(i + 1)
|
||||
// TODO: also handle '-'
|
||||
for _, r := range strings.Split(mv.Value, "+") {
|
||||
todo := []string{r}
|
||||
for k := 0; k < len(todo); k++ {
|
||||
r := todo[k]
|
||||
regionToGroups[b.regionIndex(r)] |= 1 << uint8(i)
|
||||
todo = append(todo, regionHierarchy[r]...)
|
||||
}
|
||||
}
|
||||
}
|
||||
b.w.WriteVar("regionToGroups", regionToGroups)
|
||||
|
||||
// maps language id to in- and out-of-group region.
|
||||
paradigmLocales := [][3]uint16{}
|
||||
locales := strings.Split(lm[0].ParadigmLocales[0].Locales, " ")
|
||||
for i := 0; i < len(locales); i += 2 {
|
||||
x := [3]uint16{}
|
||||
for j := 0; j < 2; j++ {
|
||||
pc := strings.SplitN(locales[i+j], "-", 2)
|
||||
x[0] = b.langIndex(pc[0])
|
||||
if len(pc) == 2 {
|
||||
x[1+j] = uint16(b.regionIndex(pc[1]))
|
||||
}
|
||||
}
|
||||
paradigmLocales = append(paradigmLocales, x)
|
||||
}
|
||||
b.w.WriteVar("paradigmLocales", paradigmLocales)
|
||||
|
||||
b.w.WriteType(mutualIntelligibility{})
|
||||
b.w.WriteType(scriptIntelligibility{})
|
||||
b.w.WriteType(regionIntelligibility{})
|
||||
|
||||
matchLang := []mutualIntelligibility{}
|
||||
matchScript := []scriptIntelligibility{}
|
||||
matchRegion := []regionIntelligibility{}
|
||||
// Convert the languageMatch entries in lists keyed by desired language.
|
||||
for _, m := range lm[0].LanguageMatch {
|
||||
// Different versions of CLDR use different separators.
|
||||
desired := strings.Replace(m.Desired, "-", "_", -1)
|
||||
supported := strings.Replace(m.Supported, "-", "_", -1)
|
||||
d := strings.Split(desired, "_")
|
||||
s := strings.Split(supported, "_")
|
||||
if len(d) != len(s) {
|
||||
log.Fatalf("not supported: desired=%q; supported=%q", desired, supported)
|
||||
continue
|
||||
}
|
||||
distance, _ := strconv.ParseInt(m.Distance, 10, 8)
|
||||
switch len(d) {
|
||||
case 2:
|
||||
if desired == supported && desired == "*_*" {
|
||||
continue
|
||||
}
|
||||
// language-script pair.
|
||||
matchScript = append(matchScript, scriptIntelligibility{
|
||||
wantLang: uint16(b.langIndex(d[0])),
|
||||
haveLang: uint16(b.langIndex(s[0])),
|
||||
wantScript: uint8(b.scriptIndex(d[1])),
|
||||
haveScript: uint8(b.scriptIndex(s[1])),
|
||||
distance: uint8(distance),
|
||||
})
|
||||
if m.Oneway != "true" {
|
||||
matchScript = append(matchScript, scriptIntelligibility{
|
||||
wantLang: uint16(b.langIndex(s[0])),
|
||||
haveLang: uint16(b.langIndex(d[0])),
|
||||
wantScript: uint8(b.scriptIndex(s[1])),
|
||||
haveScript: uint8(b.scriptIndex(d[1])),
|
||||
distance: uint8(distance),
|
||||
})
|
||||
}
|
||||
case 1:
|
||||
if desired == supported && desired == "*" {
|
||||
continue
|
||||
}
|
||||
if distance == 1 {
|
||||
// nb == no is already handled by macro mapping. Check there
|
||||
// really is only this case.
|
||||
if d[0] != "no" || s[0] != "nb" {
|
||||
log.Fatalf("unhandled equivalence %s == %s", s[0], d[0])
|
||||
}
|
||||
continue
|
||||
}
|
||||
// TODO: consider dropping oneway field and just doubling the entry.
|
||||
matchLang = append(matchLang, mutualIntelligibility{
|
||||
want: uint16(b.langIndex(d[0])),
|
||||
have: uint16(b.langIndex(s[0])),
|
||||
distance: uint8(distance),
|
||||
oneway: m.Oneway == "true",
|
||||
})
|
||||
case 3:
|
||||
if desired == supported && desired == "*_*_*" {
|
||||
continue
|
||||
}
|
||||
if desired != supported {
|
||||
// This is now supported by CLDR, but only one case, which
|
||||
// should already be covered by paradigm locales. For instance,
|
||||
// test case "und, en, en-GU, en-IN, en-GB ; en-ZA ; en-GB" in
|
||||
// testdata/CLDRLocaleMatcherTest.txt tests this.
|
||||
if supported != "en_*_GB" {
|
||||
log.Fatalf("not supported: desired=%q; supported=%q", desired, supported)
|
||||
}
|
||||
continue
|
||||
}
|
||||
ri := regionIntelligibility{
|
||||
lang: b.langIndex(d[0]),
|
||||
distance: uint8(distance),
|
||||
}
|
||||
if d[1] != "*" {
|
||||
ri.script = uint8(b.scriptIndex(d[1]))
|
||||
}
|
||||
switch {
|
||||
case d[2] == "*":
|
||||
ri.group = 0x80 // not contained in anything
|
||||
case strings.HasPrefix(d[2], "$!"):
|
||||
ri.group = 0x80
|
||||
d[2] = "$" + d[2][len("$!"):]
|
||||
fallthrough
|
||||
case strings.HasPrefix(d[2], "$"):
|
||||
ri.group |= idToIndex[d[2]]
|
||||
}
|
||||
matchRegion = append(matchRegion, ri)
|
||||
default:
|
||||
log.Fatalf("not supported: desired=%q; supported=%q", desired, supported)
|
||||
}
|
||||
}
|
||||
sort.SliceStable(matchLang, func(i, j int) bool {
|
||||
return matchLang[i].distance < matchLang[j].distance
|
||||
})
|
||||
b.w.WriteComment(`
|
||||
matchLang holds pairs of langIDs of base languages that are typically
|
||||
mutually intelligible. Each pair is associated with a confidence and
|
||||
whether the intelligibility goes one or both ways.`)
|
||||
b.w.WriteVar("matchLang", matchLang)
|
||||
|
||||
b.w.WriteComment(`
|
||||
matchScript holds pairs of scriptIDs where readers of one script
|
||||
can typically also read the other. Each is associated with a confidence.`)
|
||||
sort.SliceStable(matchScript, func(i, j int) bool {
|
||||
return matchScript[i].distance < matchScript[j].distance
|
||||
})
|
||||
b.w.WriteVar("matchScript", matchScript)
|
||||
|
||||
sort.SliceStable(matchRegion, func(i, j int) bool {
|
||||
return matchRegion[i].distance < matchRegion[j].distance
|
||||
})
|
||||
b.w.WriteVar("matchRegion", matchRegion)
|
||||
}
|
||||
38
vendor/golang.org/x/text/language/go1_1.go
generated
vendored
Normal file
38
vendor/golang.org/x/text/language/go1_1.go
generated
vendored
Normal file
@@ -0,0 +1,38 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build !go1.2
|
||||
|
||||
package language
|
||||
|
||||
import "sort"
|
||||
|
||||
func sortStable(s sort.Interface) {
|
||||
ss := stableSort{
|
||||
s: s,
|
||||
pos: make([]int, s.Len()),
|
||||
}
|
||||
for i := range ss.pos {
|
||||
ss.pos[i] = i
|
||||
}
|
||||
sort.Sort(&ss)
|
||||
}
|
||||
|
||||
type stableSort struct {
|
||||
s sort.Interface
|
||||
pos []int
|
||||
}
|
||||
|
||||
func (s *stableSort) Len() int {
|
||||
return len(s.pos)
|
||||
}
|
||||
|
||||
func (s *stableSort) Less(i, j int) bool {
|
||||
return s.s.Less(i, j) || !s.s.Less(j, i) && s.pos[i] < s.pos[j]
|
||||
}
|
||||
|
||||
func (s *stableSort) Swap(i, j int) {
|
||||
s.s.Swap(i, j)
|
||||
s.pos[i], s.pos[j] = s.pos[j], s.pos[i]
|
||||
}
|
||||
11
vendor/golang.org/x/text/language/go1_2.go
generated
vendored
Normal file
11
vendor/golang.org/x/text/language/go1_2.go
generated
vendored
Normal file
@@ -0,0 +1,11 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build go1.2
|
||||
|
||||
package language
|
||||
|
||||
import "sort"
|
||||
|
||||
var sortStable = sort.Stable
|
||||
601
vendor/golang.org/x/text/language/language.go
generated
vendored
Normal file
601
vendor/golang.org/x/text/language/language.go
generated
vendored
Normal file
@@ -0,0 +1,601 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:generate go run gen.go -output tables.go
|
||||
|
||||
package language
|
||||
|
||||
// TODO: Remove above NOTE after:
|
||||
// - verifying that tables are dropped correctly (most notably matcher tables).
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
"golang.org/x/text/internal/language"
|
||||
"golang.org/x/text/internal/language/compact"
|
||||
)
|
||||
|
||||
// Tag represents a BCP 47 language tag. It is used to specify an instance of a
|
||||
// specific language or locale. All language tag values are guaranteed to be
|
||||
// well-formed.
|
||||
type Tag compact.Tag
|
||||
|
||||
func makeTag(t language.Tag) (tag Tag) {
|
||||
return Tag(compact.Make(t))
|
||||
}
|
||||
|
||||
func (t *Tag) tag() language.Tag {
|
||||
return (*compact.Tag)(t).Tag()
|
||||
}
|
||||
|
||||
func (t *Tag) isCompact() bool {
|
||||
return (*compact.Tag)(t).IsCompact()
|
||||
}
|
||||
|
||||
// TODO: improve performance.
|
||||
func (t *Tag) lang() language.Language { return t.tag().LangID }
|
||||
func (t *Tag) region() language.Region { return t.tag().RegionID }
|
||||
func (t *Tag) script() language.Script { return t.tag().ScriptID }
|
||||
|
||||
// Make is a convenience wrapper for Parse that omits the error.
|
||||
// In case of an error, a sensible default is returned.
|
||||
func Make(s string) Tag {
|
||||
return Default.Make(s)
|
||||
}
|
||||
|
||||
// Make is a convenience wrapper for c.Parse that omits the error.
|
||||
// In case of an error, a sensible default is returned.
|
||||
func (c CanonType) Make(s string) Tag {
|
||||
t, _ := c.Parse(s)
|
||||
return t
|
||||
}
|
||||
|
||||
// Raw returns the raw base language, script and region, without making an
|
||||
// attempt to infer their values.
|
||||
func (t Tag) Raw() (b Base, s Script, r Region) {
|
||||
tt := t.tag()
|
||||
return Base{tt.LangID}, Script{tt.ScriptID}, Region{tt.RegionID}
|
||||
}
|
||||
|
||||
// IsRoot returns true if t is equal to language "und".
|
||||
func (t Tag) IsRoot() bool {
|
||||
return compact.Tag(t).IsRoot()
|
||||
}
|
||||
|
||||
// CanonType can be used to enable or disable various types of canonicalization.
|
||||
type CanonType int
|
||||
|
||||
const (
|
||||
// Replace deprecated base languages with their preferred replacements.
|
||||
DeprecatedBase CanonType = 1 << iota
|
||||
// Replace deprecated scripts with their preferred replacements.
|
||||
DeprecatedScript
|
||||
// Replace deprecated regions with their preferred replacements.
|
||||
DeprecatedRegion
|
||||
// Remove redundant scripts.
|
||||
SuppressScript
|
||||
// Normalize legacy encodings. This includes legacy languages defined in
|
||||
// CLDR as well as bibliographic codes defined in ISO-639.
|
||||
Legacy
|
||||
// Map the dominant language of a macro language group to the macro language
|
||||
// subtag. For example cmn -> zh.
|
||||
Macro
|
||||
// The CLDR flag should be used if full compatibility with CLDR is required.
|
||||
// There are a few cases where language.Tag may differ from CLDR. To follow all
|
||||
// of CLDR's suggestions, use All|CLDR.
|
||||
CLDR
|
||||
|
||||
// Raw can be used to Compose or Parse without Canonicalization.
|
||||
Raw CanonType = 0
|
||||
|
||||
// Replace all deprecated tags with their preferred replacements.
|
||||
Deprecated = DeprecatedBase | DeprecatedScript | DeprecatedRegion
|
||||
|
||||
// All canonicalizations recommended by BCP 47.
|
||||
BCP47 = Deprecated | SuppressScript
|
||||
|
||||
// All canonicalizations.
|
||||
All = BCP47 | Legacy | Macro
|
||||
|
||||
// Default is the canonicalization used by Parse, Make and Compose. To
|
||||
// preserve as much information as possible, canonicalizations that remove
|
||||
// potentially valuable information are not included. The Matcher is
|
||||
// designed to recognize similar tags that would be the same if
|
||||
// they were canonicalized using All.
|
||||
Default = Deprecated | Legacy
|
||||
|
||||
canonLang = DeprecatedBase | Legacy | Macro
|
||||
|
||||
// TODO: LikelyScript, LikelyRegion: suppress similar to ICU.
|
||||
)
|
||||
|
||||
// canonicalize returns the canonicalized equivalent of the tag and
|
||||
// whether there was any change.
|
||||
func canonicalize(c CanonType, t language.Tag) (language.Tag, bool) {
|
||||
if c == Raw {
|
||||
return t, false
|
||||
}
|
||||
changed := false
|
||||
if c&SuppressScript != 0 {
|
||||
if t.LangID.SuppressScript() == t.ScriptID {
|
||||
t.ScriptID = 0
|
||||
changed = true
|
||||
}
|
||||
}
|
||||
if c&canonLang != 0 {
|
||||
for {
|
||||
if l, aliasType := t.LangID.Canonicalize(); l != t.LangID {
|
||||
switch aliasType {
|
||||
case language.Legacy:
|
||||
if c&Legacy != 0 {
|
||||
if t.LangID == _sh && t.ScriptID == 0 {
|
||||
t.ScriptID = _Latn
|
||||
}
|
||||
t.LangID = l
|
||||
changed = true
|
||||
}
|
||||
case language.Macro:
|
||||
if c&Macro != 0 {
|
||||
// We deviate here from CLDR. The mapping "nb" -> "no"
|
||||
// qualifies as a typical Macro language mapping. However,
|
||||
// for legacy reasons, CLDR maps "no", the macro language
|
||||
// code for Norwegian, to the dominant variant "nb". This
|
||||
// change is currently under consideration for CLDR as well.
|
||||
// See https://unicode.org/cldr/trac/ticket/2698 and also
|
||||
// https://unicode.org/cldr/trac/ticket/1790 for some of the
|
||||
// practical implications. TODO: this check could be removed
|
||||
// if CLDR adopts this change.
|
||||
if c&CLDR == 0 || t.LangID != _nb {
|
||||
changed = true
|
||||
t.LangID = l
|
||||
}
|
||||
}
|
||||
case language.Deprecated:
|
||||
if c&DeprecatedBase != 0 {
|
||||
if t.LangID == _mo && t.RegionID == 0 {
|
||||
t.RegionID = _MD
|
||||
}
|
||||
t.LangID = l
|
||||
changed = true
|
||||
// Other canonicalization types may still apply.
|
||||
continue
|
||||
}
|
||||
}
|
||||
} else if c&Legacy != 0 && t.LangID == _no && c&CLDR != 0 {
|
||||
t.LangID = _nb
|
||||
changed = true
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
if c&DeprecatedScript != 0 {
|
||||
if t.ScriptID == _Qaai {
|
||||
changed = true
|
||||
t.ScriptID = _Zinh
|
||||
}
|
||||
}
|
||||
if c&DeprecatedRegion != 0 {
|
||||
if r := t.RegionID.Canonicalize(); r != t.RegionID {
|
||||
changed = true
|
||||
t.RegionID = r
|
||||
}
|
||||
}
|
||||
return t, changed
|
||||
}
|
||||
|
||||
// Canonicalize returns the canonicalized equivalent of the tag.
|
||||
func (c CanonType) Canonicalize(t Tag) (Tag, error) {
|
||||
// First try fast path.
|
||||
if t.isCompact() {
|
||||
if _, changed := canonicalize(c, compact.Tag(t).Tag()); !changed {
|
||||
return t, nil
|
||||
}
|
||||
}
|
||||
// It is unlikely that one will canonicalize a tag after matching. So do
|
||||
// a slow but simple approach here.
|
||||
if tag, changed := canonicalize(c, t.tag()); changed {
|
||||
tag.RemakeString()
|
||||
return makeTag(tag), nil
|
||||
}
|
||||
return t, nil
|
||||
|
||||
}
|
||||
|
||||
// Confidence indicates the level of certainty for a given return value.
|
||||
// For example, Serbian may be written in Cyrillic or Latin script.
|
||||
// The confidence level indicates whether a value was explicitly specified,
|
||||
// whether it is typically the only possible value, or whether there is
|
||||
// an ambiguity.
|
||||
type Confidence int
|
||||
|
||||
const (
|
||||
No Confidence = iota // full confidence that there was no match
|
||||
Low // most likely value picked out of a set of alternatives
|
||||
High // value is generally assumed to be the correct match
|
||||
Exact // exact match or explicitly specified value
|
||||
)
|
||||
|
||||
var confName = []string{"No", "Low", "High", "Exact"}
|
||||
|
||||
func (c Confidence) String() string {
|
||||
return confName[c]
|
||||
}
|
||||
|
||||
// String returns the canonical string representation of the language tag.
|
||||
func (t Tag) String() string {
|
||||
return t.tag().String()
|
||||
}
|
||||
|
||||
// MarshalText implements encoding.TextMarshaler.
|
||||
func (t Tag) MarshalText() (text []byte, err error) {
|
||||
return t.tag().MarshalText()
|
||||
}
|
||||
|
||||
// UnmarshalText implements encoding.TextUnmarshaler.
|
||||
func (t *Tag) UnmarshalText(text []byte) error {
|
||||
var tag language.Tag
|
||||
err := tag.UnmarshalText(text)
|
||||
*t = makeTag(tag)
|
||||
return err
|
||||
}
|
||||
|
||||
// Base returns the base language of the language tag. If the base language is
|
||||
// unspecified, an attempt will be made to infer it from the context.
|
||||
// It uses a variant of CLDR's Add Likely Subtags algorithm. This is subject to change.
|
||||
func (t Tag) Base() (Base, Confidence) {
|
||||
if b := t.lang(); b != 0 {
|
||||
return Base{b}, Exact
|
||||
}
|
||||
tt := t.tag()
|
||||
c := High
|
||||
if tt.ScriptID == 0 && !tt.RegionID.IsCountry() {
|
||||
c = Low
|
||||
}
|
||||
if tag, err := tt.Maximize(); err == nil && tag.LangID != 0 {
|
||||
return Base{tag.LangID}, c
|
||||
}
|
||||
return Base{0}, No
|
||||
}
|
||||
|
||||
// Script infers the script for the language tag. If it was not explicitly given, it will infer
|
||||
// a most likely candidate.
|
||||
// If more than one script is commonly used for a language, the most likely one
|
||||
// is returned with a low confidence indication. For example, it returns (Cyrl, Low)
|
||||
// for Serbian.
|
||||
// If a script cannot be inferred (Zzzz, No) is returned. We do not use Zyyy (undetermined)
|
||||
// as one would suspect from the IANA registry for BCP 47. In a Unicode context Zyyy marks
|
||||
// common characters (like 1, 2, 3, '.', etc.) and is therefore more like multiple scripts.
|
||||
// See https://www.unicode.org/reports/tr24/#Values for more details. Zzzz is also used for
|
||||
// unknown value in CLDR. (Zzzz, Exact) is returned if Zzzz was explicitly specified.
|
||||
// Note that an inferred script is never guaranteed to be the correct one. Latin is
|
||||
// almost exclusively used for Afrikaans, but Arabic has been used for some texts
|
||||
// in the past. Also, the script that is commonly used may change over time.
|
||||
// It uses a variant of CLDR's Add Likely Subtags algorithm. This is subject to change.
|
||||
func (t Tag) Script() (Script, Confidence) {
|
||||
if scr := t.script(); scr != 0 {
|
||||
return Script{scr}, Exact
|
||||
}
|
||||
tt := t.tag()
|
||||
sc, c := language.Script(_Zzzz), No
|
||||
if scr := tt.LangID.SuppressScript(); scr != 0 {
|
||||
// Note: it is not always the case that a language with a suppress
|
||||
// script value is only written in one script (e.g. kk, ms, pa).
|
||||
if tt.RegionID == 0 {
|
||||
return Script{scr}, High
|
||||
}
|
||||
sc, c = scr, High
|
||||
}
|
||||
if tag, err := tt.Maximize(); err == nil {
|
||||
if tag.ScriptID != sc {
|
||||
sc, c = tag.ScriptID, Low
|
||||
}
|
||||
} else {
|
||||
tt, _ = canonicalize(Deprecated|Macro, tt)
|
||||
if tag, err := tt.Maximize(); err == nil && tag.ScriptID != sc {
|
||||
sc, c = tag.ScriptID, Low
|
||||
}
|
||||
}
|
||||
return Script{sc}, c
|
||||
}
|
||||
|
||||
// Region returns the region for the language tag. If it was not explicitly given, it will
|
||||
// infer a most likely candidate from the context.
|
||||
// It uses a variant of CLDR's Add Likely Subtags algorithm. This is subject to change.
|
||||
func (t Tag) Region() (Region, Confidence) {
|
||||
if r := t.region(); r != 0 {
|
||||
return Region{r}, Exact
|
||||
}
|
||||
tt := t.tag()
|
||||
if tt, err := tt.Maximize(); err == nil {
|
||||
return Region{tt.RegionID}, Low // TODO: differentiate between high and low.
|
||||
}
|
||||
tt, _ = canonicalize(Deprecated|Macro, tt)
|
||||
if tag, err := tt.Maximize(); err == nil {
|
||||
return Region{tag.RegionID}, Low
|
||||
}
|
||||
return Region{_ZZ}, No // TODO: return world instead of undetermined?
|
||||
}
|
||||
|
||||
// Variants returns the variants specified explicitly for this language tag.
|
||||
// or nil if no variant was specified.
|
||||
func (t Tag) Variants() []Variant {
|
||||
if !compact.Tag(t).MayHaveVariants() {
|
||||
return nil
|
||||
}
|
||||
v := []Variant{}
|
||||
x, str := "", t.tag().Variants()
|
||||
for str != "" {
|
||||
x, str = nextToken(str)
|
||||
v = append(v, Variant{x})
|
||||
}
|
||||
return v
|
||||
}
|
||||
|
||||
// Parent returns the CLDR parent of t. In CLDR, missing fields in data for a
|
||||
// specific language are substituted with fields from the parent language.
|
||||
// The parent for a language may change for newer versions of CLDR.
|
||||
//
|
||||
// Parent returns a tag for a less specific language that is mutually
|
||||
// intelligible or Und if there is no such language. This may not be the same as
|
||||
// simply stripping the last BCP 47 subtag. For instance, the parent of "zh-TW"
|
||||
// is "zh-Hant", and the parent of "zh-Hant" is "und".
|
||||
func (t Tag) Parent() Tag {
|
||||
return Tag(compact.Tag(t).Parent())
|
||||
}
|
||||
|
||||
// returns token t and the rest of the string.
|
||||
func nextToken(s string) (t, tail string) {
|
||||
p := strings.Index(s[1:], "-")
|
||||
if p == -1 {
|
||||
return s[1:], ""
|
||||
}
|
||||
p++
|
||||
return s[1:p], s[p:]
|
||||
}
|
||||
|
||||
// Extension is a single BCP 47 extension.
|
||||
type Extension struct {
|
||||
s string
|
||||
}
|
||||
|
||||
// String returns the string representation of the extension, including the
|
||||
// type tag.
|
||||
func (e Extension) String() string {
|
||||
return e.s
|
||||
}
|
||||
|
||||
// ParseExtension parses s as an extension and returns it on success.
|
||||
func ParseExtension(s string) (e Extension, err error) {
|
||||
ext, err := language.ParseExtension(s)
|
||||
return Extension{ext}, err
|
||||
}
|
||||
|
||||
// Type returns the one-byte extension type of e. It returns 0 for the zero
|
||||
// exception.
|
||||
func (e Extension) Type() byte {
|
||||
if e.s == "" {
|
||||
return 0
|
||||
}
|
||||
return e.s[0]
|
||||
}
|
||||
|
||||
// Tokens returns the list of tokens of e.
|
||||
func (e Extension) Tokens() []string {
|
||||
return strings.Split(e.s, "-")
|
||||
}
|
||||
|
||||
// Extension returns the extension of type x for tag t. It will return
|
||||
// false for ok if t does not have the requested extension. The returned
|
||||
// extension will be invalid in this case.
|
||||
func (t Tag) Extension(x byte) (ext Extension, ok bool) {
|
||||
if !compact.Tag(t).MayHaveExtensions() {
|
||||
return Extension{}, false
|
||||
}
|
||||
e, ok := t.tag().Extension(x)
|
||||
return Extension{e}, ok
|
||||
}
|
||||
|
||||
// Extensions returns all extensions of t.
|
||||
func (t Tag) Extensions() []Extension {
|
||||
if !compact.Tag(t).MayHaveExtensions() {
|
||||
return nil
|
||||
}
|
||||
e := []Extension{}
|
||||
for _, ext := range t.tag().Extensions() {
|
||||
e = append(e, Extension{ext})
|
||||
}
|
||||
return e
|
||||
}
|
||||
|
||||
// TypeForKey returns the type associated with the given key, where key and type
|
||||
// are of the allowed values defined for the Unicode locale extension ('u') in
|
||||
// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
|
||||
// TypeForKey will traverse the inheritance chain to get the correct value.
|
||||
func (t Tag) TypeForKey(key string) string {
|
||||
if !compact.Tag(t).MayHaveExtensions() {
|
||||
if key != "rg" && key != "va" {
|
||||
return ""
|
||||
}
|
||||
}
|
||||
return t.tag().TypeForKey(key)
|
||||
}
|
||||
|
||||
// SetTypeForKey returns a new Tag with the key set to type, where key and type
|
||||
// are of the allowed values defined for the Unicode locale extension ('u') in
|
||||
// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
|
||||
// An empty value removes an existing pair with the same key.
|
||||
func (t Tag) SetTypeForKey(key, value string) (Tag, error) {
|
||||
tt, err := t.tag().SetTypeForKey(key, value)
|
||||
return makeTag(tt), err
|
||||
}
|
||||
|
||||
// NumCompactTags is the number of compact tags. The maximum tag is
|
||||
// NumCompactTags-1.
|
||||
const NumCompactTags = compact.NumCompactTags
|
||||
|
||||
// CompactIndex returns an index, where 0 <= index < NumCompactTags, for tags
|
||||
// for which data exists in the text repository.The index will change over time
|
||||
// and should not be stored in persistent storage. If t does not match a compact
|
||||
// index, exact will be false and the compact index will be returned for the
|
||||
// first match after repeatedly taking the Parent of t.
|
||||
func CompactIndex(t Tag) (index int, exact bool) {
|
||||
id, exact := compact.LanguageID(compact.Tag(t))
|
||||
return int(id), exact
|
||||
}
|
||||
|
||||
var root = language.Tag{}
|
||||
|
||||
// Base is an ISO 639 language code, used for encoding the base language
|
||||
// of a language tag.
|
||||
type Base struct {
|
||||
langID language.Language
|
||||
}
|
||||
|
||||
// ParseBase parses a 2- or 3-letter ISO 639 code.
|
||||
// It returns a ValueError if s is a well-formed but unknown language identifier
|
||||
// or another error if another error occurred.
|
||||
func ParseBase(s string) (Base, error) {
|
||||
l, err := language.ParseBase(s)
|
||||
return Base{l}, err
|
||||
}
|
||||
|
||||
// String returns the BCP 47 representation of the base language.
|
||||
func (b Base) String() string {
|
||||
return b.langID.String()
|
||||
}
|
||||
|
||||
// ISO3 returns the ISO 639-3 language code.
|
||||
func (b Base) ISO3() string {
|
||||
return b.langID.ISO3()
|
||||
}
|
||||
|
||||
// IsPrivateUse reports whether this language code is reserved for private use.
|
||||
func (b Base) IsPrivateUse() bool {
|
||||
return b.langID.IsPrivateUse()
|
||||
}
|
||||
|
||||
// Script is a 4-letter ISO 15924 code for representing scripts.
|
||||
// It is idiomatically represented in title case.
|
||||
type Script struct {
|
||||
scriptID language.Script
|
||||
}
|
||||
|
||||
// ParseScript parses a 4-letter ISO 15924 code.
|
||||
// It returns a ValueError if s is a well-formed but unknown script identifier
|
||||
// or another error if another error occurred.
|
||||
func ParseScript(s string) (Script, error) {
|
||||
sc, err := language.ParseScript(s)
|
||||
return Script{sc}, err
|
||||
}
|
||||
|
||||
// String returns the script code in title case.
|
||||
// It returns "Zzzz" for an unspecified script.
|
||||
func (s Script) String() string {
|
||||
return s.scriptID.String()
|
||||
}
|
||||
|
||||
// IsPrivateUse reports whether this script code is reserved for private use.
|
||||
func (s Script) IsPrivateUse() bool {
|
||||
return s.scriptID.IsPrivateUse()
|
||||
}
|
||||
|
||||
// Region is an ISO 3166-1 or UN M.49 code for representing countries and regions.
|
||||
type Region struct {
|
||||
regionID language.Region
|
||||
}
|
||||
|
||||
// EncodeM49 returns the Region for the given UN M.49 code.
|
||||
// It returns an error if r is not a valid code.
|
||||
func EncodeM49(r int) (Region, error) {
|
||||
rid, err := language.EncodeM49(r)
|
||||
return Region{rid}, err
|
||||
}
|
||||
|
||||
// ParseRegion parses a 2- or 3-letter ISO 3166-1 or a UN M.49 code.
|
||||
// It returns a ValueError if s is a well-formed but unknown region identifier
|
||||
// or another error if another error occurred.
|
||||
func ParseRegion(s string) (Region, error) {
|
||||
r, err := language.ParseRegion(s)
|
||||
return Region{r}, err
|
||||
}
|
||||
|
||||
// String returns the BCP 47 representation for the region.
|
||||
// It returns "ZZ" for an unspecified region.
|
||||
func (r Region) String() string {
|
||||
return r.regionID.String()
|
||||
}
|
||||
|
||||
// ISO3 returns the 3-letter ISO code of r.
|
||||
// Note that not all regions have a 3-letter ISO code.
|
||||
// In such cases this method returns "ZZZ".
|
||||
func (r Region) ISO3() string {
|
||||
return r.regionID.ISO3()
|
||||
}
|
||||
|
||||
// M49 returns the UN M.49 encoding of r, or 0 if this encoding
|
||||
// is not defined for r.
|
||||
func (r Region) M49() int {
|
||||
return r.regionID.M49()
|
||||
}
|
||||
|
||||
// IsPrivateUse reports whether r has the ISO 3166 User-assigned status. This
|
||||
// may include private-use tags that are assigned by CLDR and used in this
|
||||
// implementation. So IsPrivateUse and IsCountry can be simultaneously true.
|
||||
func (r Region) IsPrivateUse() bool {
|
||||
return r.regionID.IsPrivateUse()
|
||||
}
|
||||
|
||||
// IsCountry returns whether this region is a country or autonomous area. This
|
||||
// includes non-standard definitions from CLDR.
|
||||
func (r Region) IsCountry() bool {
|
||||
return r.regionID.IsCountry()
|
||||
}
|
||||
|
||||
// IsGroup returns whether this region defines a collection of regions. This
|
||||
// includes non-standard definitions from CLDR.
|
||||
func (r Region) IsGroup() bool {
|
||||
return r.regionID.IsGroup()
|
||||
}
|
||||
|
||||
// Contains returns whether Region c is contained by Region r. It returns true
|
||||
// if c == r.
|
||||
func (r Region) Contains(c Region) bool {
|
||||
return r.regionID.Contains(c.regionID)
|
||||
}
|
||||
|
||||
// TLD returns the country code top-level domain (ccTLD). UK is returned for GB.
|
||||
// In all other cases it returns either the region itself or an error.
|
||||
//
|
||||
// This method may return an error for a region for which there exists a
|
||||
// canonical form with a ccTLD. To get that ccTLD canonicalize r first. The
|
||||
// region will already be canonicalized it was obtained from a Tag that was
|
||||
// obtained using any of the default methods.
|
||||
func (r Region) TLD() (Region, error) {
|
||||
tld, err := r.regionID.TLD()
|
||||
return Region{tld}, err
|
||||
}
|
||||
|
||||
// Canonicalize returns the region or a possible replacement if the region is
|
||||
// deprecated. It will not return a replacement for deprecated regions that
|
||||
// are split into multiple regions.
|
||||
func (r Region) Canonicalize() Region {
|
||||
return Region{r.regionID.Canonicalize()}
|
||||
}
|
||||
|
||||
// Variant represents a registered variant of a language as defined by BCP 47.
|
||||
type Variant struct {
|
||||
variant string
|
||||
}
|
||||
|
||||
// ParseVariant parses and returns a Variant. An error is returned if s is not
|
||||
// a valid variant.
|
||||
func ParseVariant(s string) (Variant, error) {
|
||||
v, err := language.ParseVariant(s)
|
||||
return Variant{v.String()}, err
|
||||
}
|
||||
|
||||
// String returns the string representation of the variant.
|
||||
func (v Variant) String() string {
|
||||
return v.variant
|
||||
}
|
||||
735
vendor/golang.org/x/text/language/match.go
generated
vendored
Normal file
735
vendor/golang.org/x/text/language/match.go
generated
vendored
Normal file
@@ -0,0 +1,735 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package language
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/text/internal/language"
|
||||
)
|
||||
|
||||
// A MatchOption configures a Matcher.
|
||||
type MatchOption func(*matcher)
|
||||
|
||||
// PreferSameScript will, in the absence of a match, result in the first
|
||||
// preferred tag with the same script as a supported tag to match this supported
|
||||
// tag. The default is currently true, but this may change in the future.
|
||||
func PreferSameScript(preferSame bool) MatchOption {
|
||||
return func(m *matcher) { m.preferSameScript = preferSame }
|
||||
}
|
||||
|
||||
// TODO(v1.0.0): consider making Matcher a concrete type, instead of interface.
|
||||
// There doesn't seem to be too much need for multiple types.
|
||||
// Making it a concrete type allows MatchStrings to be a method, which will
|
||||
// improve its discoverability.
|
||||
|
||||
// MatchStrings parses and matches the given strings until one of them matches
|
||||
// the language in the Matcher. A string may be an Accept-Language header as
|
||||
// handled by ParseAcceptLanguage. The default language is returned if no
|
||||
// other language matched.
|
||||
func MatchStrings(m Matcher, lang ...string) (tag Tag, index int) {
|
||||
for _, accept := range lang {
|
||||
desired, _, err := ParseAcceptLanguage(accept)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
if tag, index, conf := m.Match(desired...); conf != No {
|
||||
return tag, index
|
||||
}
|
||||
}
|
||||
tag, index, _ = m.Match()
|
||||
return
|
||||
}
|
||||
|
||||
// Matcher is the interface that wraps the Match method.
|
||||
//
|
||||
// Match returns the best match for any of the given tags, along with
|
||||
// a unique index associated with the returned tag and a confidence
|
||||
// score.
|
||||
type Matcher interface {
|
||||
Match(t ...Tag) (tag Tag, index int, c Confidence)
|
||||
}
|
||||
|
||||
// Comprehends reports the confidence score for a speaker of a given language
|
||||
// to being able to comprehend the written form of an alternative language.
|
||||
func Comprehends(speaker, alternative Tag) Confidence {
|
||||
_, _, c := NewMatcher([]Tag{alternative}).Match(speaker)
|
||||
return c
|
||||
}
|
||||
|
||||
// NewMatcher returns a Matcher that matches an ordered list of preferred tags
|
||||
// against a list of supported tags based on written intelligibility, closeness
|
||||
// of dialect, equivalence of subtags and various other rules. It is initialized
|
||||
// with the list of supported tags. The first element is used as the default
|
||||
// value in case no match is found.
|
||||
//
|
||||
// Its Match method matches the first of the given Tags to reach a certain
|
||||
// confidence threshold. The tags passed to Match should therefore be specified
|
||||
// in order of preference. Extensions are ignored for matching.
|
||||
//
|
||||
// The index returned by the Match method corresponds to the index of the
|
||||
// matched tag in t, but is augmented with the Unicode extension ('u')of the
|
||||
// corresponding preferred tag. This allows user locale options to be passed
|
||||
// transparently.
|
||||
func NewMatcher(t []Tag, options ...MatchOption) Matcher {
|
||||
return newMatcher(t, options)
|
||||
}
|
||||
|
||||
func (m *matcher) Match(want ...Tag) (t Tag, index int, c Confidence) {
|
||||
var tt language.Tag
|
||||
match, w, c := m.getBest(want...)
|
||||
if match != nil {
|
||||
tt, index = match.tag, match.index
|
||||
} else {
|
||||
// TODO: this should be an option
|
||||
tt = m.default_.tag
|
||||
if m.preferSameScript {
|
||||
outer:
|
||||
for _, w := range want {
|
||||
script, _ := w.Script()
|
||||
if script.scriptID == 0 {
|
||||
// Don't do anything if there is no script, such as with
|
||||
// private subtags.
|
||||
continue
|
||||
}
|
||||
for i, h := range m.supported {
|
||||
if script.scriptID == h.maxScript {
|
||||
tt, index = h.tag, i
|
||||
break outer
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// TODO: select first language tag based on script.
|
||||
}
|
||||
if w.RegionID != tt.RegionID && w.RegionID != 0 {
|
||||
if w.RegionID != 0 && tt.RegionID != 0 && tt.RegionID.Contains(w.RegionID) {
|
||||
tt.RegionID = w.RegionID
|
||||
tt.RemakeString()
|
||||
} else if r := w.RegionID.String(); len(r) == 2 {
|
||||
// TODO: also filter macro and deprecated.
|
||||
tt, _ = tt.SetTypeForKey("rg", strings.ToLower(r)+"zzzz")
|
||||
}
|
||||
}
|
||||
// Copy options from the user-provided tag into the result tag. This is hard
|
||||
// to do after the fact, so we do it here.
|
||||
// TODO: add in alternative variants to -u-va-.
|
||||
// TODO: add preferred region to -u-rg-.
|
||||
if e := w.Extensions(); len(e) > 0 {
|
||||
b := language.Builder{}
|
||||
b.SetTag(tt)
|
||||
for _, e := range e {
|
||||
b.AddExt(e)
|
||||
}
|
||||
tt = b.Make()
|
||||
}
|
||||
return makeTag(tt), index, c
|
||||
}
|
||||
|
||||
// ErrMissingLikelyTagsData indicates no information was available
|
||||
// to compute likely values of missing tags.
|
||||
var ErrMissingLikelyTagsData = errors.New("missing likely tags data")
|
||||
|
||||
// func (t *Tag) setTagsFrom(id Tag) {
|
||||
// t.LangID = id.LangID
|
||||
// t.ScriptID = id.ScriptID
|
||||
// t.RegionID = id.RegionID
|
||||
// }
|
||||
|
||||
// Tag Matching
|
||||
// CLDR defines an algorithm for finding the best match between two sets of language
|
||||
// tags. The basic algorithm defines how to score a possible match and then find
|
||||
// the match with the best score
|
||||
// (see https://www.unicode.org/reports/tr35/#LanguageMatching).
|
||||
// Using scoring has several disadvantages. The scoring obfuscates the importance of
|
||||
// the various factors considered, making the algorithm harder to understand. Using
|
||||
// scoring also requires the full score to be computed for each pair of tags.
|
||||
//
|
||||
// We will use a different algorithm which aims to have the following properties:
|
||||
// - clarity on the precedence of the various selection factors, and
|
||||
// - improved performance by allowing early termination of a comparison.
|
||||
//
|
||||
// Matching algorithm (overview)
|
||||
// Input:
|
||||
// - supported: a set of supported tags
|
||||
// - default: the default tag to return in case there is no match
|
||||
// - desired: list of desired tags, ordered by preference, starting with
|
||||
// the most-preferred.
|
||||
//
|
||||
// Algorithm:
|
||||
// 1) Set the best match to the lowest confidence level
|
||||
// 2) For each tag in "desired":
|
||||
// a) For each tag in "supported":
|
||||
// 1) compute the match between the two tags.
|
||||
// 2) if the match is better than the previous best match, replace it
|
||||
// with the new match. (see next section)
|
||||
// b) if the current best match is Exact and pin is true the result will be
|
||||
// frozen to the language found thusfar, although better matches may
|
||||
// still be found for the same language.
|
||||
// 3) If the best match so far is below a certain threshold, return "default".
|
||||
//
|
||||
// Ranking:
|
||||
// We use two phases to determine whether one pair of tags are a better match
|
||||
// than another pair of tags. First, we determine a rough confidence level. If the
|
||||
// levels are different, the one with the highest confidence wins.
|
||||
// Second, if the rough confidence levels are identical, we use a set of tie-breaker
|
||||
// rules.
|
||||
//
|
||||
// The confidence level of matching a pair of tags is determined by finding the
|
||||
// lowest confidence level of any matches of the corresponding subtags (the
|
||||
// result is deemed as good as its weakest link).
|
||||
// We define the following levels:
|
||||
// Exact - An exact match of a subtag, before adding likely subtags.
|
||||
// MaxExact - An exact match of a subtag, after adding likely subtags.
|
||||
// [See Note 2].
|
||||
// High - High level of mutual intelligibility between different subtag
|
||||
// variants.
|
||||
// Low - Low level of mutual intelligibility between different subtag
|
||||
// variants.
|
||||
// No - No mutual intelligibility.
|
||||
//
|
||||
// The following levels can occur for each type of subtag:
|
||||
// Base: Exact, MaxExact, High, Low, No
|
||||
// Script: Exact, MaxExact [see Note 3], Low, No
|
||||
// Region: Exact, MaxExact, High
|
||||
// Variant: Exact, High
|
||||
// Private: Exact, No
|
||||
//
|
||||
// Any result with a confidence level of Low or higher is deemed a possible match.
|
||||
// Once a desired tag matches any of the supported tags with a level of MaxExact
|
||||
// or higher, the next desired tag is not considered (see Step 2.b).
|
||||
// Note that CLDR provides languageMatching data that defines close equivalence
|
||||
// classes for base languages, scripts and regions.
|
||||
//
|
||||
// Tie-breaking
|
||||
// If we get the same confidence level for two matches, we apply a sequence of
|
||||
// tie-breaking rules. The first that succeeds defines the result. The rules are
|
||||
// applied in the following order.
|
||||
// 1) Original language was defined and was identical.
|
||||
// 2) Original region was defined and was identical.
|
||||
// 3) Distance between two maximized regions was the smallest.
|
||||
// 4) Original script was defined and was identical.
|
||||
// 5) Distance from want tag to have tag using the parent relation [see Note 5.]
|
||||
// If there is still no winner after these rules are applied, the first match
|
||||
// found wins.
|
||||
//
|
||||
// Notes:
|
||||
// [2] In practice, as matching of Exact is done in a separate phase from
|
||||
// matching the other levels, we reuse the Exact level to mean MaxExact in
|
||||
// the second phase. As a consequence, we only need the levels defined by
|
||||
// the Confidence type. The MaxExact confidence level is mapped to High in
|
||||
// the public API.
|
||||
// [3] We do not differentiate between maximized script values that were derived
|
||||
// from suppressScript versus most likely tag data. We determined that in
|
||||
// ranking the two, one ranks just after the other. Moreover, the two cannot
|
||||
// occur concurrently. As a consequence, they are identical for practical
|
||||
// purposes.
|
||||
// [4] In case of deprecated, macro-equivalents and legacy mappings, we assign
|
||||
// the MaxExact level to allow iw vs he to still be a closer match than
|
||||
// en-AU vs en-US, for example.
|
||||
// [5] In CLDR a locale inherits fields that are unspecified for this locale
|
||||
// from its parent. Therefore, if a locale is a parent of another locale,
|
||||
// it is a strong measure for closeness, especially when no other tie
|
||||
// breaker rule applies. One could also argue it is inconsistent, for
|
||||
// example, when pt-AO matches pt (which CLDR equates with pt-BR), even
|
||||
// though its parent is pt-PT according to the inheritance rules.
|
||||
//
|
||||
// Implementation Details:
|
||||
// There are several performance considerations worth pointing out. Most notably,
|
||||
// we preprocess as much as possible (within reason) at the time of creation of a
|
||||
// matcher. This includes:
|
||||
// - creating a per-language map, which includes data for the raw base language
|
||||
// and its canonicalized variant (if applicable),
|
||||
// - expanding entries for the equivalence classes defined in CLDR's
|
||||
// languageMatch data.
|
||||
// The per-language map ensures that typically only a very small number of tags
|
||||
// need to be considered. The pre-expansion of canonicalized subtags and
|
||||
// equivalence classes reduces the amount of map lookups that need to be done at
|
||||
// runtime.
|
||||
|
||||
// matcher keeps a set of supported language tags, indexed by language.
|
||||
type matcher struct {
|
||||
default_ *haveTag
|
||||
supported []*haveTag
|
||||
index map[language.Language]*matchHeader
|
||||
passSettings bool
|
||||
preferSameScript bool
|
||||
}
|
||||
|
||||
// matchHeader has the lists of tags for exact matches and matches based on
|
||||
// maximized and canonicalized tags for a given language.
|
||||
type matchHeader struct {
|
||||
haveTags []*haveTag
|
||||
original bool
|
||||
}
|
||||
|
||||
// haveTag holds a supported Tag and its maximized script and region. The maximized
|
||||
// or canonicalized language is not stored as it is not needed during matching.
|
||||
type haveTag struct {
|
||||
tag language.Tag
|
||||
|
||||
// index of this tag in the original list of supported tags.
|
||||
index int
|
||||
|
||||
// conf is the maximum confidence that can result from matching this haveTag.
|
||||
// When conf < Exact this means it was inserted after applying a CLDR equivalence rule.
|
||||
conf Confidence
|
||||
|
||||
// Maximized region and script.
|
||||
maxRegion language.Region
|
||||
maxScript language.Script
|
||||
|
||||
// altScript may be checked as an alternative match to maxScript. If altScript
|
||||
// matches, the confidence level for this match is Low. Theoretically there
|
||||
// could be multiple alternative scripts. This does not occur in practice.
|
||||
altScript language.Script
|
||||
|
||||
// nextMax is the index of the next haveTag with the same maximized tags.
|
||||
nextMax uint16
|
||||
}
|
||||
|
||||
func makeHaveTag(tag language.Tag, index int) (haveTag, language.Language) {
|
||||
max := tag
|
||||
if tag.LangID != 0 || tag.RegionID != 0 || tag.ScriptID != 0 {
|
||||
max, _ = canonicalize(All, max)
|
||||
max, _ = max.Maximize()
|
||||
max.RemakeString()
|
||||
}
|
||||
return haveTag{tag, index, Exact, max.RegionID, max.ScriptID, altScript(max.LangID, max.ScriptID), 0}, max.LangID
|
||||
}
|
||||
|
||||
// altScript returns an alternative script that may match the given script with
|
||||
// a low confidence. At the moment, the langMatch data allows for at most one
|
||||
// script to map to another and we rely on this to keep the code simple.
|
||||
func altScript(l language.Language, s language.Script) language.Script {
|
||||
for _, alt := range matchScript {
|
||||
// TODO: also match cases where language is not the same.
|
||||
if (language.Language(alt.wantLang) == l || language.Language(alt.haveLang) == l) &&
|
||||
language.Script(alt.haveScript) == s {
|
||||
return language.Script(alt.wantScript)
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// addIfNew adds a haveTag to the list of tags only if it is a unique tag.
|
||||
// Tags that have the same maximized values are linked by index.
|
||||
func (h *matchHeader) addIfNew(n haveTag, exact bool) {
|
||||
h.original = h.original || exact
|
||||
// Don't add new exact matches.
|
||||
for _, v := range h.haveTags {
|
||||
if equalsRest(v.tag, n.tag) {
|
||||
return
|
||||
}
|
||||
}
|
||||
// Allow duplicate maximized tags, but create a linked list to allow quickly
|
||||
// comparing the equivalents and bail out.
|
||||
for i, v := range h.haveTags {
|
||||
if v.maxScript == n.maxScript &&
|
||||
v.maxRegion == n.maxRegion &&
|
||||
v.tag.VariantOrPrivateUseTags() == n.tag.VariantOrPrivateUseTags() {
|
||||
for h.haveTags[i].nextMax != 0 {
|
||||
i = int(h.haveTags[i].nextMax)
|
||||
}
|
||||
h.haveTags[i].nextMax = uint16(len(h.haveTags))
|
||||
break
|
||||
}
|
||||
}
|
||||
h.haveTags = append(h.haveTags, &n)
|
||||
}
|
||||
|
||||
// header returns the matchHeader for the given language. It creates one if
|
||||
// it doesn't already exist.
|
||||
func (m *matcher) header(l language.Language) *matchHeader {
|
||||
if h := m.index[l]; h != nil {
|
||||
return h
|
||||
}
|
||||
h := &matchHeader{}
|
||||
m.index[l] = h
|
||||
return h
|
||||
}
|
||||
|
||||
func toConf(d uint8) Confidence {
|
||||
if d <= 10 {
|
||||
return High
|
||||
}
|
||||
if d < 30 {
|
||||
return Low
|
||||
}
|
||||
return No
|
||||
}
|
||||
|
||||
// newMatcher builds an index for the given supported tags and returns it as
|
||||
// a matcher. It also expands the index by considering various equivalence classes
|
||||
// for a given tag.
|
||||
func newMatcher(supported []Tag, options []MatchOption) *matcher {
|
||||
m := &matcher{
|
||||
index: make(map[language.Language]*matchHeader),
|
||||
preferSameScript: true,
|
||||
}
|
||||
for _, o := range options {
|
||||
o(m)
|
||||
}
|
||||
if len(supported) == 0 {
|
||||
m.default_ = &haveTag{}
|
||||
return m
|
||||
}
|
||||
// Add supported languages to the index. Add exact matches first to give
|
||||
// them precedence.
|
||||
for i, tag := range supported {
|
||||
tt := tag.tag()
|
||||
pair, _ := makeHaveTag(tt, i)
|
||||
m.header(tt.LangID).addIfNew(pair, true)
|
||||
m.supported = append(m.supported, &pair)
|
||||
}
|
||||
m.default_ = m.header(supported[0].lang()).haveTags[0]
|
||||
// Keep these in two different loops to support the case that two equivalent
|
||||
// languages are distinguished, such as iw and he.
|
||||
for i, tag := range supported {
|
||||
tt := tag.tag()
|
||||
pair, max := makeHaveTag(tt, i)
|
||||
if max != tt.LangID {
|
||||
m.header(max).addIfNew(pair, true)
|
||||
}
|
||||
}
|
||||
|
||||
// update is used to add indexes in the map for equivalent languages.
|
||||
// update will only add entries to original indexes, thus not computing any
|
||||
// transitive relations.
|
||||
update := func(want, have uint16, conf Confidence) {
|
||||
if hh := m.index[language.Language(have)]; hh != nil {
|
||||
if !hh.original {
|
||||
return
|
||||
}
|
||||
hw := m.header(language.Language(want))
|
||||
for _, ht := range hh.haveTags {
|
||||
v := *ht
|
||||
if conf < v.conf {
|
||||
v.conf = conf
|
||||
}
|
||||
v.nextMax = 0 // this value needs to be recomputed
|
||||
if v.altScript != 0 {
|
||||
v.altScript = altScript(language.Language(want), v.maxScript)
|
||||
}
|
||||
hw.addIfNew(v, conf == Exact && hh.original)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Add entries for languages with mutual intelligibility as defined by CLDR's
|
||||
// languageMatch data.
|
||||
for _, ml := range matchLang {
|
||||
update(ml.want, ml.have, toConf(ml.distance))
|
||||
if !ml.oneway {
|
||||
update(ml.have, ml.want, toConf(ml.distance))
|
||||
}
|
||||
}
|
||||
|
||||
// Add entries for possible canonicalizations. This is an optimization to
|
||||
// ensure that only one map lookup needs to be done at runtime per desired tag.
|
||||
// First we match deprecated equivalents. If they are perfect equivalents
|
||||
// (their canonicalization simply substitutes a different language code, but
|
||||
// nothing else), the match confidence is Exact, otherwise it is High.
|
||||
for i, lm := range language.AliasMap {
|
||||
// If deprecated codes match and there is no fiddling with the script or
|
||||
// or region, we consider it an exact match.
|
||||
conf := Exact
|
||||
if language.AliasTypes[i] != language.Macro {
|
||||
if !isExactEquivalent(language.Language(lm.From)) {
|
||||
conf = High
|
||||
}
|
||||
update(lm.To, lm.From, conf)
|
||||
}
|
||||
update(lm.From, lm.To, conf)
|
||||
}
|
||||
return m
|
||||
}
|
||||
|
||||
// getBest gets the best matching tag in m for any of the given tags, taking into
|
||||
// account the order of preference of the given tags.
|
||||
func (m *matcher) getBest(want ...Tag) (got *haveTag, orig language.Tag, c Confidence) {
|
||||
best := bestMatch{}
|
||||
for i, ww := range want {
|
||||
w := ww.tag()
|
||||
var max language.Tag
|
||||
// Check for exact match first.
|
||||
h := m.index[w.LangID]
|
||||
if w.LangID != 0 {
|
||||
if h == nil {
|
||||
continue
|
||||
}
|
||||
// Base language is defined.
|
||||
max, _ = canonicalize(Legacy|Deprecated|Macro, w)
|
||||
// A region that is added through canonicalization is stronger than
|
||||
// a maximized region: set it in the original (e.g. mo -> ro-MD).
|
||||
if w.RegionID != max.RegionID {
|
||||
w.RegionID = max.RegionID
|
||||
}
|
||||
// TODO: should we do the same for scripts?
|
||||
// See test case: en, sr, nl ; sh ; sr
|
||||
max, _ = max.Maximize()
|
||||
} else {
|
||||
// Base language is not defined.
|
||||
if h != nil {
|
||||
for i := range h.haveTags {
|
||||
have := h.haveTags[i]
|
||||
if equalsRest(have.tag, w) {
|
||||
return have, w, Exact
|
||||
}
|
||||
}
|
||||
}
|
||||
if w.ScriptID == 0 && w.RegionID == 0 {
|
||||
// We skip all tags matching und for approximate matching, including
|
||||
// private tags.
|
||||
continue
|
||||
}
|
||||
max, _ = w.Maximize()
|
||||
if h = m.index[max.LangID]; h == nil {
|
||||
continue
|
||||
}
|
||||
}
|
||||
pin := true
|
||||
for _, t := range want[i+1:] {
|
||||
if w.LangID == t.lang() {
|
||||
pin = false
|
||||
break
|
||||
}
|
||||
}
|
||||
// Check for match based on maximized tag.
|
||||
for i := range h.haveTags {
|
||||
have := h.haveTags[i]
|
||||
best.update(have, w, max.ScriptID, max.RegionID, pin)
|
||||
if best.conf == Exact {
|
||||
for have.nextMax != 0 {
|
||||
have = h.haveTags[have.nextMax]
|
||||
best.update(have, w, max.ScriptID, max.RegionID, pin)
|
||||
}
|
||||
return best.have, best.want, best.conf
|
||||
}
|
||||
}
|
||||
}
|
||||
if best.conf <= No {
|
||||
if len(want) != 0 {
|
||||
return nil, want[0].tag(), No
|
||||
}
|
||||
return nil, language.Tag{}, No
|
||||
}
|
||||
return best.have, best.want, best.conf
|
||||
}
|
||||
|
||||
// bestMatch accumulates the best match so far.
|
||||
type bestMatch struct {
|
||||
have *haveTag
|
||||
want language.Tag
|
||||
conf Confidence
|
||||
pinnedRegion language.Region
|
||||
pinLanguage bool
|
||||
sameRegionGroup bool
|
||||
// Cached results from applying tie-breaking rules.
|
||||
origLang bool
|
||||
origReg bool
|
||||
paradigmReg bool
|
||||
regGroupDist uint8
|
||||
origScript bool
|
||||
}
|
||||
|
||||
// update updates the existing best match if the new pair is considered to be a
|
||||
// better match. To determine if the given pair is a better match, it first
|
||||
// computes the rough confidence level. If this surpasses the current match, it
|
||||
// will replace it and update the tie-breaker rule cache. If there is a tie, it
|
||||
// proceeds with applying a series of tie-breaker rules. If there is no
|
||||
// conclusive winner after applying the tie-breaker rules, it leaves the current
|
||||
// match as the preferred match.
|
||||
//
|
||||
// If pin is true and have and tag are a strong match, it will henceforth only
|
||||
// consider matches for this language. This corresponds to the nothing that most
|
||||
// users have a strong preference for the first defined language. A user can
|
||||
// still prefer a second language over a dialect of the preferred language by
|
||||
// explicitly specifying dialects, e.g. "en, nl, en-GB". In this case pin should
|
||||
// be false.
|
||||
func (m *bestMatch) update(have *haveTag, tag language.Tag, maxScript language.Script, maxRegion language.Region, pin bool) {
|
||||
// Bail if the maximum attainable confidence is below that of the current best match.
|
||||
c := have.conf
|
||||
if c < m.conf {
|
||||
return
|
||||
}
|
||||
// Don't change the language once we already have found an exact match.
|
||||
if m.pinLanguage && tag.LangID != m.want.LangID {
|
||||
return
|
||||
}
|
||||
// Pin the region group if we are comparing tags for the same language.
|
||||
if tag.LangID == m.want.LangID && m.sameRegionGroup {
|
||||
_, sameGroup := regionGroupDist(m.pinnedRegion, have.maxRegion, have.maxScript, m.want.LangID)
|
||||
if !sameGroup {
|
||||
return
|
||||
}
|
||||
}
|
||||
if c == Exact && have.maxScript == maxScript {
|
||||
// If there is another language and then another entry of this language,
|
||||
// don't pin anything, otherwise pin the language.
|
||||
m.pinLanguage = pin
|
||||
}
|
||||
if equalsRest(have.tag, tag) {
|
||||
} else if have.maxScript != maxScript {
|
||||
// There is usually very little comprehension between different scripts.
|
||||
// In a few cases there may still be Low comprehension. This possibility
|
||||
// is pre-computed and stored in have.altScript.
|
||||
if Low < m.conf || have.altScript != maxScript {
|
||||
return
|
||||
}
|
||||
c = Low
|
||||
} else if have.maxRegion != maxRegion {
|
||||
if High < c {
|
||||
// There is usually a small difference between languages across regions.
|
||||
c = High
|
||||
}
|
||||
}
|
||||
|
||||
// We store the results of the computations of the tie-breaker rules along
|
||||
// with the best match. There is no need to do the checks once we determine
|
||||
// we have a winner, but we do still need to do the tie-breaker computations.
|
||||
// We use "beaten" to keep track if we still need to do the checks.
|
||||
beaten := false // true if the new pair defeats the current one.
|
||||
if c != m.conf {
|
||||
if c < m.conf {
|
||||
return
|
||||
}
|
||||
beaten = true
|
||||
}
|
||||
|
||||
// Tie-breaker rules:
|
||||
// We prefer if the pre-maximized language was specified and identical.
|
||||
origLang := have.tag.LangID == tag.LangID && tag.LangID != 0
|
||||
if !beaten && m.origLang != origLang {
|
||||
if m.origLang {
|
||||
return
|
||||
}
|
||||
beaten = true
|
||||
}
|
||||
|
||||
// We prefer if the pre-maximized region was specified and identical.
|
||||
origReg := have.tag.RegionID == tag.RegionID && tag.RegionID != 0
|
||||
if !beaten && m.origReg != origReg {
|
||||
if m.origReg {
|
||||
return
|
||||
}
|
||||
beaten = true
|
||||
}
|
||||
|
||||
regGroupDist, sameGroup := regionGroupDist(have.maxRegion, maxRegion, maxScript, tag.LangID)
|
||||
if !beaten && m.regGroupDist != regGroupDist {
|
||||
if regGroupDist > m.regGroupDist {
|
||||
return
|
||||
}
|
||||
beaten = true
|
||||
}
|
||||
|
||||
paradigmReg := isParadigmLocale(tag.LangID, have.maxRegion)
|
||||
if !beaten && m.paradigmReg != paradigmReg {
|
||||
if !paradigmReg {
|
||||
return
|
||||
}
|
||||
beaten = true
|
||||
}
|
||||
|
||||
// Next we prefer if the pre-maximized script was specified and identical.
|
||||
origScript := have.tag.ScriptID == tag.ScriptID && tag.ScriptID != 0
|
||||
if !beaten && m.origScript != origScript {
|
||||
if m.origScript {
|
||||
return
|
||||
}
|
||||
beaten = true
|
||||
}
|
||||
|
||||
// Update m to the newly found best match.
|
||||
if beaten {
|
||||
m.have = have
|
||||
m.want = tag
|
||||
m.conf = c
|
||||
m.pinnedRegion = maxRegion
|
||||
m.sameRegionGroup = sameGroup
|
||||
m.origLang = origLang
|
||||
m.origReg = origReg
|
||||
m.paradigmReg = paradigmReg
|
||||
m.origScript = origScript
|
||||
m.regGroupDist = regGroupDist
|
||||
}
|
||||
}
|
||||
|
||||
func isParadigmLocale(lang language.Language, r language.Region) bool {
|
||||
for _, e := range paradigmLocales {
|
||||
if language.Language(e[0]) == lang && (r == language.Region(e[1]) || r == language.Region(e[2])) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// regionGroupDist computes the distance between two regions based on their
|
||||
// CLDR grouping.
|
||||
func regionGroupDist(a, b language.Region, script language.Script, lang language.Language) (dist uint8, same bool) {
|
||||
const defaultDistance = 4
|
||||
|
||||
aGroup := uint(regionToGroups[a]) << 1
|
||||
bGroup := uint(regionToGroups[b]) << 1
|
||||
for _, ri := range matchRegion {
|
||||
if language.Language(ri.lang) == lang && (ri.script == 0 || language.Script(ri.script) == script) {
|
||||
group := uint(1 << (ri.group &^ 0x80))
|
||||
if 0x80&ri.group == 0 {
|
||||
if aGroup&bGroup&group != 0 { // Both regions are in the group.
|
||||
return ri.distance, ri.distance == defaultDistance
|
||||
}
|
||||
} else {
|
||||
if (aGroup|bGroup)&group == 0 { // Both regions are not in the group.
|
||||
return ri.distance, ri.distance == defaultDistance
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return defaultDistance, true
|
||||
}
|
||||
|
||||
// equalsRest compares everything except the language.
|
||||
func equalsRest(a, b language.Tag) bool {
|
||||
// TODO: don't include extensions in this comparison. To do this efficiently,
|
||||
// though, we should handle private tags separately.
|
||||
return a.ScriptID == b.ScriptID && a.RegionID == b.RegionID && a.VariantOrPrivateUseTags() == b.VariantOrPrivateUseTags()
|
||||
}
|
||||
|
||||
// isExactEquivalent returns true if canonicalizing the language will not alter
|
||||
// the script or region of a tag.
|
||||
func isExactEquivalent(l language.Language) bool {
|
||||
for _, o := range notEquivalent {
|
||||
if o == l {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
var notEquivalent []language.Language
|
||||
|
||||
func init() {
|
||||
// Create a list of all languages for which canonicalization may alter the
|
||||
// script or region.
|
||||
for _, lm := range language.AliasMap {
|
||||
tag := language.Tag{LangID: language.Language(lm.From)}
|
||||
if tag, _ = canonicalize(All, tag); tag.ScriptID != 0 || tag.RegionID != 0 {
|
||||
notEquivalent = append(notEquivalent, language.Language(lm.From))
|
||||
}
|
||||
}
|
||||
// Maximize undefined regions of paradigm locales.
|
||||
for i, v := range paradigmLocales {
|
||||
t := language.Tag{LangID: language.Language(v[0])}
|
||||
max, _ := t.Maximize()
|
||||
if v[1] == 0 {
|
||||
paradigmLocales[i][1] = uint16(max.RegionID)
|
||||
}
|
||||
if v[2] == 0 {
|
||||
paradigmLocales[i][2] = uint16(max.RegionID)
|
||||
}
|
||||
}
|
||||
}
|
||||
228
vendor/golang.org/x/text/language/parse.go
generated
vendored
Normal file
228
vendor/golang.org/x/text/language/parse.go
generated
vendored
Normal file
@@ -0,0 +1,228 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package language
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/text/internal/language"
|
||||
)
|
||||
|
||||
// ValueError is returned by any of the parsing functions when the
|
||||
// input is well-formed but the respective subtag is not recognized
|
||||
// as a valid value.
|
||||
type ValueError interface {
|
||||
error
|
||||
|
||||
// Subtag returns the subtag for which the error occurred.
|
||||
Subtag() string
|
||||
}
|
||||
|
||||
// Parse parses the given BCP 47 string and returns a valid Tag. If parsing
|
||||
// failed it returns an error and any part of the tag that could be parsed.
|
||||
// If parsing succeeded but an unknown value was found, it returns
|
||||
// ValueError. The Tag returned in this case is just stripped of the unknown
|
||||
// value. All other values are preserved. It accepts tags in the BCP 47 format
|
||||
// and extensions to this standard defined in
|
||||
// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
|
||||
// The resulting tag is canonicalized using the default canonicalization type.
|
||||
func Parse(s string) (t Tag, err error) {
|
||||
return Default.Parse(s)
|
||||
}
|
||||
|
||||
// Parse parses the given BCP 47 string and returns a valid Tag. If parsing
|
||||
// failed it returns an error and any part of the tag that could be parsed.
|
||||
// If parsing succeeded but an unknown value was found, it returns
|
||||
// ValueError. The Tag returned in this case is just stripped of the unknown
|
||||
// value. All other values are preserved. It accepts tags in the BCP 47 format
|
||||
// and extensions to this standard defined in
|
||||
// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
|
||||
// The resulting tag is canonicalized using the canonicalization type c.
|
||||
func (c CanonType) Parse(s string) (t Tag, err error) {
|
||||
tt, err := language.Parse(s)
|
||||
if err != nil {
|
||||
return makeTag(tt), err
|
||||
}
|
||||
tt, changed := canonicalize(c, tt)
|
||||
if changed {
|
||||
tt.RemakeString()
|
||||
}
|
||||
return makeTag(tt), err
|
||||
}
|
||||
|
||||
// Compose creates a Tag from individual parts, which may be of type Tag, Base,
|
||||
// Script, Region, Variant, []Variant, Extension, []Extension or error. If a
|
||||
// Base, Script or Region or slice of type Variant or Extension is passed more
|
||||
// than once, the latter will overwrite the former. Variants and Extensions are
|
||||
// accumulated, but if two extensions of the same type are passed, the latter
|
||||
// will replace the former. For -u extensions, though, the key-type pairs are
|
||||
// added, where later values overwrite older ones. A Tag overwrites all former
|
||||
// values and typically only makes sense as the first argument. The resulting
|
||||
// tag is returned after canonicalizing using the Default CanonType. If one or
|
||||
// more errors are encountered, one of the errors is returned.
|
||||
func Compose(part ...interface{}) (t Tag, err error) {
|
||||
return Default.Compose(part...)
|
||||
}
|
||||
|
||||
// Compose creates a Tag from individual parts, which may be of type Tag, Base,
|
||||
// Script, Region, Variant, []Variant, Extension, []Extension or error. If a
|
||||
// Base, Script or Region or slice of type Variant or Extension is passed more
|
||||
// than once, the latter will overwrite the former. Variants and Extensions are
|
||||
// accumulated, but if two extensions of the same type are passed, the latter
|
||||
// will replace the former. For -u extensions, though, the key-type pairs are
|
||||
// added, where later values overwrite older ones. A Tag overwrites all former
|
||||
// values and typically only makes sense as the first argument. The resulting
|
||||
// tag is returned after canonicalizing using CanonType c. If one or more errors
|
||||
// are encountered, one of the errors is returned.
|
||||
func (c CanonType) Compose(part ...interface{}) (t Tag, err error) {
|
||||
var b language.Builder
|
||||
if err = update(&b, part...); err != nil {
|
||||
return und, err
|
||||
}
|
||||
b.Tag, _ = canonicalize(c, b.Tag)
|
||||
return makeTag(b.Make()), err
|
||||
}
|
||||
|
||||
var errInvalidArgument = errors.New("invalid Extension or Variant")
|
||||
|
||||
func update(b *language.Builder, part ...interface{}) (err error) {
|
||||
for _, x := range part {
|
||||
switch v := x.(type) {
|
||||
case Tag:
|
||||
b.SetTag(v.tag())
|
||||
case Base:
|
||||
b.Tag.LangID = v.langID
|
||||
case Script:
|
||||
b.Tag.ScriptID = v.scriptID
|
||||
case Region:
|
||||
b.Tag.RegionID = v.regionID
|
||||
case Variant:
|
||||
if v.variant == "" {
|
||||
err = errInvalidArgument
|
||||
break
|
||||
}
|
||||
b.AddVariant(v.variant)
|
||||
case Extension:
|
||||
if v.s == "" {
|
||||
err = errInvalidArgument
|
||||
break
|
||||
}
|
||||
b.SetExt(v.s)
|
||||
case []Variant:
|
||||
b.ClearVariants()
|
||||
for _, v := range v {
|
||||
b.AddVariant(v.variant)
|
||||
}
|
||||
case []Extension:
|
||||
b.ClearExtensions()
|
||||
for _, e := range v {
|
||||
b.SetExt(e.s)
|
||||
}
|
||||
// TODO: support parsing of raw strings based on morphology or just extensions?
|
||||
case error:
|
||||
if v != nil {
|
||||
err = v
|
||||
}
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
var errInvalidWeight = errors.New("ParseAcceptLanguage: invalid weight")
|
||||
|
||||
// ParseAcceptLanguage parses the contents of an Accept-Language header as
|
||||
// defined in http://www.ietf.org/rfc/rfc2616.txt and returns a list of Tags and
|
||||
// a list of corresponding quality weights. It is more permissive than RFC 2616
|
||||
// and may return non-nil slices even if the input is not valid.
|
||||
// The Tags will be sorted by highest weight first and then by first occurrence.
|
||||
// Tags with a weight of zero will be dropped. An error will be returned if the
|
||||
// input could not be parsed.
|
||||
func ParseAcceptLanguage(s string) (tag []Tag, q []float32, err error) {
|
||||
var entry string
|
||||
for s != "" {
|
||||
if entry, s = split(s, ','); entry == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
entry, weight := split(entry, ';')
|
||||
|
||||
// Scan the language.
|
||||
t, err := Parse(entry)
|
||||
if err != nil {
|
||||
id, ok := acceptFallback[entry]
|
||||
if !ok {
|
||||
return nil, nil, err
|
||||
}
|
||||
t = makeTag(language.Tag{LangID: id})
|
||||
}
|
||||
|
||||
// Scan the optional weight.
|
||||
w := 1.0
|
||||
if weight != "" {
|
||||
weight = consume(weight, 'q')
|
||||
weight = consume(weight, '=')
|
||||
// consume returns the empty string when a token could not be
|
||||
// consumed, resulting in an error for ParseFloat.
|
||||
if w, err = strconv.ParseFloat(weight, 32); err != nil {
|
||||
return nil, nil, errInvalidWeight
|
||||
}
|
||||
// Drop tags with a quality weight of 0.
|
||||
if w <= 0 {
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
tag = append(tag, t)
|
||||
q = append(q, float32(w))
|
||||
}
|
||||
sortStable(&tagSort{tag, q})
|
||||
return tag, q, nil
|
||||
}
|
||||
|
||||
// consume removes a leading token c from s and returns the result or the empty
|
||||
// string if there is no such token.
|
||||
func consume(s string, c byte) string {
|
||||
if s == "" || s[0] != c {
|
||||
return ""
|
||||
}
|
||||
return strings.TrimSpace(s[1:])
|
||||
}
|
||||
|
||||
func split(s string, c byte) (head, tail string) {
|
||||
if i := strings.IndexByte(s, c); i >= 0 {
|
||||
return strings.TrimSpace(s[:i]), strings.TrimSpace(s[i+1:])
|
||||
}
|
||||
return strings.TrimSpace(s), ""
|
||||
}
|
||||
|
||||
// Add hack mapping to deal with a small number of cases that occur
|
||||
// in Accept-Language (with reasonable frequency).
|
||||
var acceptFallback = map[string]language.Language{
|
||||
"english": _en,
|
||||
"deutsch": _de,
|
||||
"italian": _it,
|
||||
"french": _fr,
|
||||
"*": _mul, // defined in the spec to match all languages.
|
||||
}
|
||||
|
||||
type tagSort struct {
|
||||
tag []Tag
|
||||
q []float32
|
||||
}
|
||||
|
||||
func (s *tagSort) Len() int {
|
||||
return len(s.q)
|
||||
}
|
||||
|
||||
func (s *tagSort) Less(i, j int) bool {
|
||||
return s.q[i] > s.q[j]
|
||||
}
|
||||
|
||||
func (s *tagSort) Swap(i, j int) {
|
||||
s.tag[i], s.tag[j] = s.tag[j], s.tag[i]
|
||||
s.q[i], s.q[j] = s.q[j], s.q[i]
|
||||
}
|
||||
298
vendor/golang.org/x/text/language/tables.go
generated
vendored
Normal file
298
vendor/golang.org/x/text/language/tables.go
generated
vendored
Normal file
@@ -0,0 +1,298 @@
|
||||
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
|
||||
|
||||
package language
|
||||
|
||||
// CLDRVersion is the CLDR version from which the tables in this package are derived.
|
||||
const CLDRVersion = "32"
|
||||
|
||||
const (
|
||||
_de = 269
|
||||
_en = 313
|
||||
_fr = 350
|
||||
_it = 505
|
||||
_mo = 784
|
||||
_no = 879
|
||||
_nb = 839
|
||||
_pt = 960
|
||||
_sh = 1031
|
||||
_mul = 806
|
||||
_und = 0
|
||||
)
|
||||
const (
|
||||
_001 = 1
|
||||
_419 = 31
|
||||
_BR = 65
|
||||
_CA = 73
|
||||
_ES = 110
|
||||
_GB = 123
|
||||
_MD = 188
|
||||
_PT = 238
|
||||
_UK = 306
|
||||
_US = 309
|
||||
_ZZ = 357
|
||||
_XA = 323
|
||||
_XC = 325
|
||||
_XK = 333
|
||||
)
|
||||
const (
|
||||
_Latn = 87
|
||||
_Hani = 54
|
||||
_Hans = 56
|
||||
_Hant = 57
|
||||
_Qaaa = 139
|
||||
_Qaai = 147
|
||||
_Qabx = 188
|
||||
_Zinh = 236
|
||||
_Zyyy = 241
|
||||
_Zzzz = 242
|
||||
)
|
||||
|
||||
var regionToGroups = []uint8{ // 357 elements
|
||||
// Entry 0 - 3F
|
||||
0x00, 0x00, 0x00, 0x04, 0x04, 0x00, 0x00, 0x04,
|
||||
0x00, 0x00, 0x00, 0x00, 0x04, 0x04, 0x04, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x04, 0x00,
|
||||
0x00, 0x04, 0x00, 0x00, 0x04, 0x01, 0x00, 0x00,
|
||||
0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x04, 0x04, 0x00, 0x04,
|
||||
// Entry 40 - 7F
|
||||
0x04, 0x04, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x04, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x04, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00,
|
||||
0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x04, 0x04, 0x00, 0x08,
|
||||
0x00, 0x04, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00,
|
||||
// Entry 80 - BF
|
||||
0x00, 0x00, 0x04, 0x00, 0x00, 0x04, 0x00, 0x00,
|
||||
0x00, 0x04, 0x01, 0x00, 0x04, 0x02, 0x00, 0x04,
|
||||
0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00,
|
||||
0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x08, 0x08, 0x00, 0x00, 0x00, 0x04, 0x00,
|
||||
// Entry C0 - FF
|
||||
0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x01,
|
||||
0x04, 0x08, 0x04, 0x00, 0x00, 0x00, 0x00, 0x04,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x04, 0x00, 0x05, 0x00, 0x00, 0x00,
|
||||
0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
// Entry 100 - 13F
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00,
|
||||
0x00, 0x00, 0x04, 0x04, 0x00, 0x00, 0x00, 0x04,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x08, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x01, 0x00, 0x05, 0x04, 0x00,
|
||||
0x00, 0x04, 0x00, 0x04, 0x04, 0x05, 0x00, 0x00,
|
||||
// Entry 140 - 17F
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
} // Size: 381 bytes
|
||||
|
||||
var paradigmLocales = [][3]uint16{ // 3 elements
|
||||
0: [3]uint16{0x139, 0x0, 0x7b},
|
||||
1: [3]uint16{0x13e, 0x0, 0x1f},
|
||||
2: [3]uint16{0x3c0, 0x41, 0xee},
|
||||
} // Size: 42 bytes
|
||||
|
||||
type mutualIntelligibility struct {
|
||||
want uint16
|
||||
have uint16
|
||||
distance uint8
|
||||
oneway bool
|
||||
}
|
||||
type scriptIntelligibility struct {
|
||||
wantLang uint16
|
||||
haveLang uint16
|
||||
wantScript uint8
|
||||
haveScript uint8
|
||||
distance uint8
|
||||
}
|
||||
type regionIntelligibility struct {
|
||||
lang uint16
|
||||
script uint8
|
||||
group uint8
|
||||
distance uint8
|
||||
}
|
||||
|
||||
// matchLang holds pairs of langIDs of base languages that are typically
|
||||
// mutually intelligible. Each pair is associated with a confidence and
|
||||
// whether the intelligibility goes one or both ways.
|
||||
var matchLang = []mutualIntelligibility{ // 113 elements
|
||||
0: {want: 0x1d1, have: 0xb7, distance: 0x4, oneway: false},
|
||||
1: {want: 0x407, have: 0xb7, distance: 0x4, oneway: false},
|
||||
2: {want: 0x407, have: 0x1d1, distance: 0x4, oneway: false},
|
||||
3: {want: 0x407, have: 0x432, distance: 0x4, oneway: false},
|
||||
4: {want: 0x43a, have: 0x1, distance: 0x4, oneway: false},
|
||||
5: {want: 0x1a3, have: 0x10d, distance: 0x4, oneway: true},
|
||||
6: {want: 0x295, have: 0x10d, distance: 0x4, oneway: true},
|
||||
7: {want: 0x101, have: 0x36f, distance: 0x8, oneway: false},
|
||||
8: {want: 0x101, have: 0x347, distance: 0x8, oneway: false},
|
||||
9: {want: 0x5, have: 0x3e2, distance: 0xa, oneway: true},
|
||||
10: {want: 0xd, have: 0x139, distance: 0xa, oneway: true},
|
||||
11: {want: 0x16, have: 0x367, distance: 0xa, oneway: true},
|
||||
12: {want: 0x21, have: 0x139, distance: 0xa, oneway: true},
|
||||
13: {want: 0x56, have: 0x13e, distance: 0xa, oneway: true},
|
||||
14: {want: 0x58, have: 0x3e2, distance: 0xa, oneway: true},
|
||||
15: {want: 0x71, have: 0x3e2, distance: 0xa, oneway: true},
|
||||
16: {want: 0x75, have: 0x139, distance: 0xa, oneway: true},
|
||||
17: {want: 0x82, have: 0x1be, distance: 0xa, oneway: true},
|
||||
18: {want: 0xa5, have: 0x139, distance: 0xa, oneway: true},
|
||||
19: {want: 0xb2, have: 0x15e, distance: 0xa, oneway: true},
|
||||
20: {want: 0xdd, have: 0x153, distance: 0xa, oneway: true},
|
||||
21: {want: 0xe5, have: 0x139, distance: 0xa, oneway: true},
|
||||
22: {want: 0xe9, have: 0x3a, distance: 0xa, oneway: true},
|
||||
23: {want: 0xf0, have: 0x15e, distance: 0xa, oneway: true},
|
||||
24: {want: 0xf9, have: 0x15e, distance: 0xa, oneway: true},
|
||||
25: {want: 0x100, have: 0x139, distance: 0xa, oneway: true},
|
||||
26: {want: 0x130, have: 0x139, distance: 0xa, oneway: true},
|
||||
27: {want: 0x13c, have: 0x139, distance: 0xa, oneway: true},
|
||||
28: {want: 0x140, have: 0x151, distance: 0xa, oneway: true},
|
||||
29: {want: 0x145, have: 0x13e, distance: 0xa, oneway: true},
|
||||
30: {want: 0x158, have: 0x101, distance: 0xa, oneway: true},
|
||||
31: {want: 0x16d, have: 0x367, distance: 0xa, oneway: true},
|
||||
32: {want: 0x16e, have: 0x139, distance: 0xa, oneway: true},
|
||||
33: {want: 0x16f, have: 0x139, distance: 0xa, oneway: true},
|
||||
34: {want: 0x17e, have: 0x139, distance: 0xa, oneway: true},
|
||||
35: {want: 0x190, have: 0x13e, distance: 0xa, oneway: true},
|
||||
36: {want: 0x194, have: 0x13e, distance: 0xa, oneway: true},
|
||||
37: {want: 0x1a4, have: 0x1be, distance: 0xa, oneway: true},
|
||||
38: {want: 0x1b4, have: 0x139, distance: 0xa, oneway: true},
|
||||
39: {want: 0x1b8, have: 0x139, distance: 0xa, oneway: true},
|
||||
40: {want: 0x1d4, have: 0x15e, distance: 0xa, oneway: true},
|
||||
41: {want: 0x1d7, have: 0x3e2, distance: 0xa, oneway: true},
|
||||
42: {want: 0x1d9, have: 0x139, distance: 0xa, oneway: true},
|
||||
43: {want: 0x1e7, have: 0x139, distance: 0xa, oneway: true},
|
||||
44: {want: 0x1f8, have: 0x139, distance: 0xa, oneway: true},
|
||||
45: {want: 0x20e, have: 0x1e1, distance: 0xa, oneway: true},
|
||||
46: {want: 0x210, have: 0x139, distance: 0xa, oneway: true},
|
||||
47: {want: 0x22d, have: 0x15e, distance: 0xa, oneway: true},
|
||||
48: {want: 0x242, have: 0x3e2, distance: 0xa, oneway: true},
|
||||
49: {want: 0x24a, have: 0x139, distance: 0xa, oneway: true},
|
||||
50: {want: 0x251, have: 0x139, distance: 0xa, oneway: true},
|
||||
51: {want: 0x265, have: 0x139, distance: 0xa, oneway: true},
|
||||
52: {want: 0x274, have: 0x48a, distance: 0xa, oneway: true},
|
||||
53: {want: 0x28a, have: 0x3e2, distance: 0xa, oneway: true},
|
||||
54: {want: 0x28e, have: 0x1f9, distance: 0xa, oneway: true},
|
||||
55: {want: 0x2a3, have: 0x139, distance: 0xa, oneway: true},
|
||||
56: {want: 0x2b5, have: 0x15e, distance: 0xa, oneway: true},
|
||||
57: {want: 0x2b8, have: 0x139, distance: 0xa, oneway: true},
|
||||
58: {want: 0x2be, have: 0x139, distance: 0xa, oneway: true},
|
||||
59: {want: 0x2c3, have: 0x15e, distance: 0xa, oneway: true},
|
||||
60: {want: 0x2ed, have: 0x139, distance: 0xa, oneway: true},
|
||||
61: {want: 0x2f1, have: 0x15e, distance: 0xa, oneway: true},
|
||||
62: {want: 0x2fa, have: 0x139, distance: 0xa, oneway: true},
|
||||
63: {want: 0x2ff, have: 0x7e, distance: 0xa, oneway: true},
|
||||
64: {want: 0x304, have: 0x139, distance: 0xa, oneway: true},
|
||||
65: {want: 0x30b, have: 0x3e2, distance: 0xa, oneway: true},
|
||||
66: {want: 0x31b, have: 0x1be, distance: 0xa, oneway: true},
|
||||
67: {want: 0x31f, have: 0x1e1, distance: 0xa, oneway: true},
|
||||
68: {want: 0x320, have: 0x139, distance: 0xa, oneway: true},
|
||||
69: {want: 0x331, have: 0x139, distance: 0xa, oneway: true},
|
||||
70: {want: 0x351, have: 0x139, distance: 0xa, oneway: true},
|
||||
71: {want: 0x36a, have: 0x347, distance: 0xa, oneway: false},
|
||||
72: {want: 0x36a, have: 0x36f, distance: 0xa, oneway: true},
|
||||
73: {want: 0x37a, have: 0x139, distance: 0xa, oneway: true},
|
||||
74: {want: 0x387, have: 0x139, distance: 0xa, oneway: true},
|
||||
75: {want: 0x389, have: 0x139, distance: 0xa, oneway: true},
|
||||
76: {want: 0x38b, have: 0x15e, distance: 0xa, oneway: true},
|
||||
77: {want: 0x390, have: 0x139, distance: 0xa, oneway: true},
|
||||
78: {want: 0x395, have: 0x139, distance: 0xa, oneway: true},
|
||||
79: {want: 0x39d, have: 0x139, distance: 0xa, oneway: true},
|
||||
80: {want: 0x3a5, have: 0x139, distance: 0xa, oneway: true},
|
||||
81: {want: 0x3be, have: 0x139, distance: 0xa, oneway: true},
|
||||
82: {want: 0x3c4, have: 0x13e, distance: 0xa, oneway: true},
|
||||
83: {want: 0x3d4, have: 0x10d, distance: 0xa, oneway: true},
|
||||
84: {want: 0x3d9, have: 0x139, distance: 0xa, oneway: true},
|
||||
85: {want: 0x3e5, have: 0x15e, distance: 0xa, oneway: true},
|
||||
86: {want: 0x3e9, have: 0x1be, distance: 0xa, oneway: true},
|
||||
87: {want: 0x3fa, have: 0x139, distance: 0xa, oneway: true},
|
||||
88: {want: 0x40c, have: 0x139, distance: 0xa, oneway: true},
|
||||
89: {want: 0x423, have: 0x139, distance: 0xa, oneway: true},
|
||||
90: {want: 0x429, have: 0x139, distance: 0xa, oneway: true},
|
||||
91: {want: 0x431, have: 0x139, distance: 0xa, oneway: true},
|
||||
92: {want: 0x43b, have: 0x139, distance: 0xa, oneway: true},
|
||||
93: {want: 0x43e, have: 0x1e1, distance: 0xa, oneway: true},
|
||||
94: {want: 0x445, have: 0x139, distance: 0xa, oneway: true},
|
||||
95: {want: 0x450, have: 0x139, distance: 0xa, oneway: true},
|
||||
96: {want: 0x461, have: 0x139, distance: 0xa, oneway: true},
|
||||
97: {want: 0x467, have: 0x3e2, distance: 0xa, oneway: true},
|
||||
98: {want: 0x46f, have: 0x139, distance: 0xa, oneway: true},
|
||||
99: {want: 0x476, have: 0x3e2, distance: 0xa, oneway: true},
|
||||
100: {want: 0x3883, have: 0x139, distance: 0xa, oneway: true},
|
||||
101: {want: 0x480, have: 0x139, distance: 0xa, oneway: true},
|
||||
102: {want: 0x482, have: 0x139, distance: 0xa, oneway: true},
|
||||
103: {want: 0x494, have: 0x3e2, distance: 0xa, oneway: true},
|
||||
104: {want: 0x49d, have: 0x139, distance: 0xa, oneway: true},
|
||||
105: {want: 0x4ac, have: 0x529, distance: 0xa, oneway: true},
|
||||
106: {want: 0x4b4, have: 0x139, distance: 0xa, oneway: true},
|
||||
107: {want: 0x4bc, have: 0x3e2, distance: 0xa, oneway: true},
|
||||
108: {want: 0x4e5, have: 0x15e, distance: 0xa, oneway: true},
|
||||
109: {want: 0x4f2, have: 0x139, distance: 0xa, oneway: true},
|
||||
110: {want: 0x512, have: 0x139, distance: 0xa, oneway: true},
|
||||
111: {want: 0x518, have: 0x139, distance: 0xa, oneway: true},
|
||||
112: {want: 0x52f, have: 0x139, distance: 0xa, oneway: true},
|
||||
} // Size: 702 bytes
|
||||
|
||||
// matchScript holds pairs of scriptIDs where readers of one script
|
||||
// can typically also read the other. Each is associated with a confidence.
|
||||
var matchScript = []scriptIntelligibility{ // 26 elements
|
||||
0: {wantLang: 0x432, haveLang: 0x432, wantScript: 0x57, haveScript: 0x1f, distance: 0x5},
|
||||
1: {wantLang: 0x432, haveLang: 0x432, wantScript: 0x1f, haveScript: 0x57, distance: 0x5},
|
||||
2: {wantLang: 0x58, haveLang: 0x3e2, wantScript: 0x57, haveScript: 0x1f, distance: 0xa},
|
||||
3: {wantLang: 0xa5, haveLang: 0x139, wantScript: 0xe, haveScript: 0x57, distance: 0xa},
|
||||
4: {wantLang: 0x1d7, haveLang: 0x3e2, wantScript: 0x8, haveScript: 0x1f, distance: 0xa},
|
||||
5: {wantLang: 0x210, haveLang: 0x139, wantScript: 0x2b, haveScript: 0x57, distance: 0xa},
|
||||
6: {wantLang: 0x24a, haveLang: 0x139, wantScript: 0x4b, haveScript: 0x57, distance: 0xa},
|
||||
7: {wantLang: 0x251, haveLang: 0x139, wantScript: 0x4f, haveScript: 0x57, distance: 0xa},
|
||||
8: {wantLang: 0x2b8, haveLang: 0x139, wantScript: 0x54, haveScript: 0x57, distance: 0xa},
|
||||
9: {wantLang: 0x304, haveLang: 0x139, wantScript: 0x6b, haveScript: 0x57, distance: 0xa},
|
||||
10: {wantLang: 0x331, haveLang: 0x139, wantScript: 0x72, haveScript: 0x57, distance: 0xa},
|
||||
11: {wantLang: 0x351, haveLang: 0x139, wantScript: 0x21, haveScript: 0x57, distance: 0xa},
|
||||
12: {wantLang: 0x395, haveLang: 0x139, wantScript: 0x7d, haveScript: 0x57, distance: 0xa},
|
||||
13: {wantLang: 0x39d, haveLang: 0x139, wantScript: 0x33, haveScript: 0x57, distance: 0xa},
|
||||
14: {wantLang: 0x3be, haveLang: 0x139, wantScript: 0x5, haveScript: 0x57, distance: 0xa},
|
||||
15: {wantLang: 0x3fa, haveLang: 0x139, wantScript: 0x5, haveScript: 0x57, distance: 0xa},
|
||||
16: {wantLang: 0x40c, haveLang: 0x139, wantScript: 0xca, haveScript: 0x57, distance: 0xa},
|
||||
17: {wantLang: 0x450, haveLang: 0x139, wantScript: 0xd7, haveScript: 0x57, distance: 0xa},
|
||||
18: {wantLang: 0x461, haveLang: 0x139, wantScript: 0xda, haveScript: 0x57, distance: 0xa},
|
||||
19: {wantLang: 0x46f, haveLang: 0x139, wantScript: 0x29, haveScript: 0x57, distance: 0xa},
|
||||
20: {wantLang: 0x476, haveLang: 0x3e2, wantScript: 0x57, haveScript: 0x1f, distance: 0xa},
|
||||
21: {wantLang: 0x4b4, haveLang: 0x139, wantScript: 0x5, haveScript: 0x57, distance: 0xa},
|
||||
22: {wantLang: 0x4bc, haveLang: 0x3e2, wantScript: 0x57, haveScript: 0x1f, distance: 0xa},
|
||||
23: {wantLang: 0x512, haveLang: 0x139, wantScript: 0x3b, haveScript: 0x57, distance: 0xa},
|
||||
24: {wantLang: 0x529, haveLang: 0x529, wantScript: 0x38, haveScript: 0x39, distance: 0xf},
|
||||
25: {wantLang: 0x529, haveLang: 0x529, wantScript: 0x39, haveScript: 0x38, distance: 0x13},
|
||||
} // Size: 232 bytes
|
||||
|
||||
var matchRegion = []regionIntelligibility{ // 15 elements
|
||||
0: {lang: 0x3a, script: 0x0, group: 0x4, distance: 0x4},
|
||||
1: {lang: 0x3a, script: 0x0, group: 0x84, distance: 0x4},
|
||||
2: {lang: 0x139, script: 0x0, group: 0x1, distance: 0x4},
|
||||
3: {lang: 0x139, script: 0x0, group: 0x81, distance: 0x4},
|
||||
4: {lang: 0x13e, script: 0x0, group: 0x3, distance: 0x4},
|
||||
5: {lang: 0x13e, script: 0x0, group: 0x83, distance: 0x4},
|
||||
6: {lang: 0x3c0, script: 0x0, group: 0x3, distance: 0x4},
|
||||
7: {lang: 0x3c0, script: 0x0, group: 0x83, distance: 0x4},
|
||||
8: {lang: 0x529, script: 0x39, group: 0x2, distance: 0x4},
|
||||
9: {lang: 0x529, script: 0x39, group: 0x82, distance: 0x4},
|
||||
10: {lang: 0x3a, script: 0x0, group: 0x80, distance: 0x5},
|
||||
11: {lang: 0x139, script: 0x0, group: 0x80, distance: 0x5},
|
||||
12: {lang: 0x13e, script: 0x0, group: 0x80, distance: 0x5},
|
||||
13: {lang: 0x3c0, script: 0x0, group: 0x80, distance: 0x5},
|
||||
14: {lang: 0x529, script: 0x39, group: 0x80, distance: 0x5},
|
||||
} // Size: 114 bytes
|
||||
|
||||
// Total table size 1471 bytes (1KiB); checksum: 4CB1CD46
|
||||
145
vendor/golang.org/x/text/language/tags.go
generated
vendored
Normal file
145
vendor/golang.org/x/text/language/tags.go
generated
vendored
Normal file
@@ -0,0 +1,145 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package language
|
||||
|
||||
import "golang.org/x/text/internal/language/compact"
|
||||
|
||||
// TODO: Various sets of commonly use tags and regions.
|
||||
|
||||
// MustParse is like Parse, but panics if the given BCP 47 tag cannot be parsed.
|
||||
// It simplifies safe initialization of Tag values.
|
||||
func MustParse(s string) Tag {
|
||||
t, err := Parse(s)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return t
|
||||
}
|
||||
|
||||
// MustParse is like Parse, but panics if the given BCP 47 tag cannot be parsed.
|
||||
// It simplifies safe initialization of Tag values.
|
||||
func (c CanonType) MustParse(s string) Tag {
|
||||
t, err := c.Parse(s)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return t
|
||||
}
|
||||
|
||||
// MustParseBase is like ParseBase, but panics if the given base cannot be parsed.
|
||||
// It simplifies safe initialization of Base values.
|
||||
func MustParseBase(s string) Base {
|
||||
b, err := ParseBase(s)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
// MustParseScript is like ParseScript, but panics if the given script cannot be
|
||||
// parsed. It simplifies safe initialization of Script values.
|
||||
func MustParseScript(s string) Script {
|
||||
scr, err := ParseScript(s)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return scr
|
||||
}
|
||||
|
||||
// MustParseRegion is like ParseRegion, but panics if the given region cannot be
|
||||
// parsed. It simplifies safe initialization of Region values.
|
||||
func MustParseRegion(s string) Region {
|
||||
r, err := ParseRegion(s)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return r
|
||||
}
|
||||
|
||||
var (
|
||||
und = Tag{}
|
||||
|
||||
Und Tag = Tag{}
|
||||
|
||||
Afrikaans Tag = Tag(compact.Afrikaans)
|
||||
Amharic Tag = Tag(compact.Amharic)
|
||||
Arabic Tag = Tag(compact.Arabic)
|
||||
ModernStandardArabic Tag = Tag(compact.ModernStandardArabic)
|
||||
Azerbaijani Tag = Tag(compact.Azerbaijani)
|
||||
Bulgarian Tag = Tag(compact.Bulgarian)
|
||||
Bengali Tag = Tag(compact.Bengali)
|
||||
Catalan Tag = Tag(compact.Catalan)
|
||||
Czech Tag = Tag(compact.Czech)
|
||||
Danish Tag = Tag(compact.Danish)
|
||||
German Tag = Tag(compact.German)
|
||||
Greek Tag = Tag(compact.Greek)
|
||||
English Tag = Tag(compact.English)
|
||||
AmericanEnglish Tag = Tag(compact.AmericanEnglish)
|
||||
BritishEnglish Tag = Tag(compact.BritishEnglish)
|
||||
Spanish Tag = Tag(compact.Spanish)
|
||||
EuropeanSpanish Tag = Tag(compact.EuropeanSpanish)
|
||||
LatinAmericanSpanish Tag = Tag(compact.LatinAmericanSpanish)
|
||||
Estonian Tag = Tag(compact.Estonian)
|
||||
Persian Tag = Tag(compact.Persian)
|
||||
Finnish Tag = Tag(compact.Finnish)
|
||||
Filipino Tag = Tag(compact.Filipino)
|
||||
French Tag = Tag(compact.French)
|
||||
CanadianFrench Tag = Tag(compact.CanadianFrench)
|
||||
Gujarati Tag = Tag(compact.Gujarati)
|
||||
Hebrew Tag = Tag(compact.Hebrew)
|
||||
Hindi Tag = Tag(compact.Hindi)
|
||||
Croatian Tag = Tag(compact.Croatian)
|
||||
Hungarian Tag = Tag(compact.Hungarian)
|
||||
Armenian Tag = Tag(compact.Armenian)
|
||||
Indonesian Tag = Tag(compact.Indonesian)
|
||||
Icelandic Tag = Tag(compact.Icelandic)
|
||||
Italian Tag = Tag(compact.Italian)
|
||||
Japanese Tag = Tag(compact.Japanese)
|
||||
Georgian Tag = Tag(compact.Georgian)
|
||||
Kazakh Tag = Tag(compact.Kazakh)
|
||||
Khmer Tag = Tag(compact.Khmer)
|
||||
Kannada Tag = Tag(compact.Kannada)
|
||||
Korean Tag = Tag(compact.Korean)
|
||||
Kirghiz Tag = Tag(compact.Kirghiz)
|
||||
Lao Tag = Tag(compact.Lao)
|
||||
Lithuanian Tag = Tag(compact.Lithuanian)
|
||||
Latvian Tag = Tag(compact.Latvian)
|
||||
Macedonian Tag = Tag(compact.Macedonian)
|
||||
Malayalam Tag = Tag(compact.Malayalam)
|
||||
Mongolian Tag = Tag(compact.Mongolian)
|
||||
Marathi Tag = Tag(compact.Marathi)
|
||||
Malay Tag = Tag(compact.Malay)
|
||||
Burmese Tag = Tag(compact.Burmese)
|
||||
Nepali Tag = Tag(compact.Nepali)
|
||||
Dutch Tag = Tag(compact.Dutch)
|
||||
Norwegian Tag = Tag(compact.Norwegian)
|
||||
Punjabi Tag = Tag(compact.Punjabi)
|
||||
Polish Tag = Tag(compact.Polish)
|
||||
Portuguese Tag = Tag(compact.Portuguese)
|
||||
BrazilianPortuguese Tag = Tag(compact.BrazilianPortuguese)
|
||||
EuropeanPortuguese Tag = Tag(compact.EuropeanPortuguese)
|
||||
Romanian Tag = Tag(compact.Romanian)
|
||||
Russian Tag = Tag(compact.Russian)
|
||||
Sinhala Tag = Tag(compact.Sinhala)
|
||||
Slovak Tag = Tag(compact.Slovak)
|
||||
Slovenian Tag = Tag(compact.Slovenian)
|
||||
Albanian Tag = Tag(compact.Albanian)
|
||||
Serbian Tag = Tag(compact.Serbian)
|
||||
SerbianLatin Tag = Tag(compact.SerbianLatin)
|
||||
Swedish Tag = Tag(compact.Swedish)
|
||||
Swahili Tag = Tag(compact.Swahili)
|
||||
Tamil Tag = Tag(compact.Tamil)
|
||||
Telugu Tag = Tag(compact.Telugu)
|
||||
Thai Tag = Tag(compact.Thai)
|
||||
Turkish Tag = Tag(compact.Turkish)
|
||||
Ukrainian Tag = Tag(compact.Ukrainian)
|
||||
Urdu Tag = Tag(compact.Urdu)
|
||||
Uzbek Tag = Tag(compact.Uzbek)
|
||||
Vietnamese Tag = Tag(compact.Vietnamese)
|
||||
Chinese Tag = Tag(compact.Chinese)
|
||||
SimplifiedChinese Tag = Tag(compact.SimplifiedChinese)
|
||||
TraditionalChinese Tag = Tag(compact.TraditionalChinese)
|
||||
Zulu Tag = Tag(compact.Zulu)
|
||||
)
|
||||
187
vendor/golang.org/x/text/runes/cond.go
generated
vendored
Normal file
187
vendor/golang.org/x/text/runes/cond.go
generated
vendored
Normal file
@@ -0,0 +1,187 @@
|
||||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package runes
|
||||
|
||||
import (
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/text/transform"
|
||||
)
|
||||
|
||||
// Note: below we pass invalid UTF-8 to the tIn and tNotIn transformers as is.
|
||||
// This is done for various reasons:
|
||||
// - To retain the semantics of the Nop transformer: if input is passed to a Nop
|
||||
// one would expect it to be unchanged.
|
||||
// - It would be very expensive to pass a converted RuneError to a transformer:
|
||||
// a transformer might need more source bytes after RuneError, meaning that
|
||||
// the only way to pass it safely is to create a new buffer and manage the
|
||||
// intermingling of RuneErrors and normal input.
|
||||
// - Many transformers leave ill-formed UTF-8 as is, so this is not
|
||||
// inconsistent. Generally ill-formed UTF-8 is only replaced if it is a
|
||||
// logical consequence of the operation (as for Map) or if it otherwise would
|
||||
// pose security concerns (as for Remove).
|
||||
// - An alternative would be to return an error on ill-formed UTF-8, but this
|
||||
// would be inconsistent with other operations.
|
||||
|
||||
// If returns a transformer that applies tIn to consecutive runes for which
|
||||
// s.Contains(r) and tNotIn to consecutive runes for which !s.Contains(r). Reset
|
||||
// is called on tIn and tNotIn at the start of each run. A Nop transformer will
|
||||
// substitute a nil value passed to tIn or tNotIn. Invalid UTF-8 is translated
|
||||
// to RuneError to determine which transformer to apply, but is passed as is to
|
||||
// the respective transformer.
|
||||
func If(s Set, tIn, tNotIn transform.Transformer) Transformer {
|
||||
if tIn == nil && tNotIn == nil {
|
||||
return Transformer{transform.Nop}
|
||||
}
|
||||
if tIn == nil {
|
||||
tIn = transform.Nop
|
||||
}
|
||||
if tNotIn == nil {
|
||||
tNotIn = transform.Nop
|
||||
}
|
||||
sIn, ok := tIn.(transform.SpanningTransformer)
|
||||
if !ok {
|
||||
sIn = dummySpan{tIn}
|
||||
}
|
||||
sNotIn, ok := tNotIn.(transform.SpanningTransformer)
|
||||
if !ok {
|
||||
sNotIn = dummySpan{tNotIn}
|
||||
}
|
||||
|
||||
a := &cond{
|
||||
tIn: sIn,
|
||||
tNotIn: sNotIn,
|
||||
f: s.Contains,
|
||||
}
|
||||
a.Reset()
|
||||
return Transformer{a}
|
||||
}
|
||||
|
||||
type dummySpan struct{ transform.Transformer }
|
||||
|
||||
func (d dummySpan) Span(src []byte, atEOF bool) (n int, err error) {
|
||||
return 0, transform.ErrEndOfSpan
|
||||
}
|
||||
|
||||
type cond struct {
|
||||
tIn, tNotIn transform.SpanningTransformer
|
||||
f func(rune) bool
|
||||
check func(rune) bool // current check to perform
|
||||
t transform.SpanningTransformer // current transformer to use
|
||||
}
|
||||
|
||||
// Reset implements transform.Transformer.
|
||||
func (t *cond) Reset() {
|
||||
t.check = t.is
|
||||
t.t = t.tIn
|
||||
t.t.Reset() // notIn will be reset on first usage.
|
||||
}
|
||||
|
||||
func (t *cond) is(r rune) bool {
|
||||
if t.f(r) {
|
||||
return true
|
||||
}
|
||||
t.check = t.isNot
|
||||
t.t = t.tNotIn
|
||||
t.tNotIn.Reset()
|
||||
return false
|
||||
}
|
||||
|
||||
func (t *cond) isNot(r rune) bool {
|
||||
if !t.f(r) {
|
||||
return true
|
||||
}
|
||||
t.check = t.is
|
||||
t.t = t.tIn
|
||||
t.tIn.Reset()
|
||||
return false
|
||||
}
|
||||
|
||||
// This implementation of Span doesn't help all too much, but it needs to be
|
||||
// there to satisfy this package's Transformer interface.
|
||||
// TODO: there are certainly room for improvements, though. For example, if
|
||||
// t.t == transform.Nop (which will a common occurrence) it will save a bundle
|
||||
// to special-case that loop.
|
||||
func (t *cond) Span(src []byte, atEOF bool) (n int, err error) {
|
||||
p := 0
|
||||
for n < len(src) && err == nil {
|
||||
// Don't process too much at a time as the Spanner that will be
|
||||
// called on this block may terminate early.
|
||||
const maxChunk = 4096
|
||||
max := len(src)
|
||||
if v := n + maxChunk; v < max {
|
||||
max = v
|
||||
}
|
||||
atEnd := false
|
||||
size := 0
|
||||
current := t.t
|
||||
for ; p < max; p += size {
|
||||
r := rune(src[p])
|
||||
if r < utf8.RuneSelf {
|
||||
size = 1
|
||||
} else if r, size = utf8.DecodeRune(src[p:]); size == 1 {
|
||||
if !atEOF && !utf8.FullRune(src[p:]) {
|
||||
err = transform.ErrShortSrc
|
||||
break
|
||||
}
|
||||
}
|
||||
if !t.check(r) {
|
||||
// The next rune will be the start of a new run.
|
||||
atEnd = true
|
||||
break
|
||||
}
|
||||
}
|
||||
n2, err2 := current.Span(src[n:p], atEnd || (atEOF && p == len(src)))
|
||||
n += n2
|
||||
if err2 != nil {
|
||||
return n, err2
|
||||
}
|
||||
// At this point either err != nil or t.check will pass for the rune at p.
|
||||
p = n + size
|
||||
}
|
||||
return n, err
|
||||
}
|
||||
|
||||
func (t *cond) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
p := 0
|
||||
for nSrc < len(src) && err == nil {
|
||||
// Don't process too much at a time, as the work might be wasted if the
|
||||
// destination buffer isn't large enough to hold the result or a
|
||||
// transform returns an error early.
|
||||
const maxChunk = 4096
|
||||
max := len(src)
|
||||
if n := nSrc + maxChunk; n < len(src) {
|
||||
max = n
|
||||
}
|
||||
atEnd := false
|
||||
size := 0
|
||||
current := t.t
|
||||
for ; p < max; p += size {
|
||||
r := rune(src[p])
|
||||
if r < utf8.RuneSelf {
|
||||
size = 1
|
||||
} else if r, size = utf8.DecodeRune(src[p:]); size == 1 {
|
||||
if !atEOF && !utf8.FullRune(src[p:]) {
|
||||
err = transform.ErrShortSrc
|
||||
break
|
||||
}
|
||||
}
|
||||
if !t.check(r) {
|
||||
// The next rune will be the start of a new run.
|
||||
atEnd = true
|
||||
break
|
||||
}
|
||||
}
|
||||
nDst2, nSrc2, err2 := current.Transform(dst[nDst:], src[nSrc:p], atEnd || (atEOF && p == len(src)))
|
||||
nDst += nDst2
|
||||
nSrc += nSrc2
|
||||
if err2 != nil {
|
||||
return nDst, nSrc, err2
|
||||
}
|
||||
// At this point either err != nil or t.check will pass for the rune at p.
|
||||
p = nSrc + size
|
||||
}
|
||||
return nDst, nSrc, err
|
||||
}
|
||||
355
vendor/golang.org/x/text/runes/runes.go
generated
vendored
Normal file
355
vendor/golang.org/x/text/runes/runes.go
generated
vendored
Normal file
@@ -0,0 +1,355 @@
|
||||
// Copyright 2014 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package runes provide transforms for UTF-8 encoded text.
|
||||
package runes // import "golang.org/x/text/runes"
|
||||
|
||||
import (
|
||||
"unicode"
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/text/transform"
|
||||
)
|
||||
|
||||
// A Set is a collection of runes.
|
||||
type Set interface {
|
||||
// Contains returns true if r is contained in the set.
|
||||
Contains(r rune) bool
|
||||
}
|
||||
|
||||
type setFunc func(rune) bool
|
||||
|
||||
func (s setFunc) Contains(r rune) bool {
|
||||
return s(r)
|
||||
}
|
||||
|
||||
// Note: using funcs here instead of wrapping types result in cleaner
|
||||
// documentation and a smaller API.
|
||||
|
||||
// In creates a Set with a Contains method that returns true for all runes in
|
||||
// the given RangeTable.
|
||||
func In(rt *unicode.RangeTable) Set {
|
||||
return setFunc(func(r rune) bool { return unicode.Is(rt, r) })
|
||||
}
|
||||
|
||||
// In creates a Set with a Contains method that returns true for all runes not
|
||||
// in the given RangeTable.
|
||||
func NotIn(rt *unicode.RangeTable) Set {
|
||||
return setFunc(func(r rune) bool { return !unicode.Is(rt, r) })
|
||||
}
|
||||
|
||||
// Predicate creates a Set with a Contains method that returns f(r).
|
||||
func Predicate(f func(rune) bool) Set {
|
||||
return setFunc(f)
|
||||
}
|
||||
|
||||
// Transformer implements the transform.Transformer interface.
|
||||
type Transformer struct {
|
||||
t transform.SpanningTransformer
|
||||
}
|
||||
|
||||
func (t Transformer) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
return t.t.Transform(dst, src, atEOF)
|
||||
}
|
||||
|
||||
func (t Transformer) Span(b []byte, atEOF bool) (n int, err error) {
|
||||
return t.t.Span(b, atEOF)
|
||||
}
|
||||
|
||||
func (t Transformer) Reset() { t.t.Reset() }
|
||||
|
||||
// Bytes returns a new byte slice with the result of converting b using t. It
|
||||
// calls Reset on t. It returns nil if any error was found. This can only happen
|
||||
// if an error-producing Transformer is passed to If.
|
||||
func (t Transformer) Bytes(b []byte) []byte {
|
||||
b, _, err := transform.Bytes(t, b)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
// String returns a string with the result of converting s using t. It calls
|
||||
// Reset on t. It returns the empty string if any error was found. This can only
|
||||
// happen if an error-producing Transformer is passed to If.
|
||||
func (t Transformer) String(s string) string {
|
||||
s, _, err := transform.String(t, s)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
// TODO:
|
||||
// - Copy: copying strings and bytes in whole-rune units.
|
||||
// - Validation (maybe)
|
||||
// - Well-formed-ness (maybe)
|
||||
|
||||
const runeErrorString = string(utf8.RuneError)
|
||||
|
||||
// Remove returns a Transformer that removes runes r for which s.Contains(r).
|
||||
// Illegal input bytes are replaced by RuneError before being passed to f.
|
||||
func Remove(s Set) Transformer {
|
||||
if f, ok := s.(setFunc); ok {
|
||||
// This little trick cuts the running time of BenchmarkRemove for sets
|
||||
// created by Predicate roughly in half.
|
||||
// TODO: special-case RangeTables as well.
|
||||
return Transformer{remove(f)}
|
||||
}
|
||||
return Transformer{remove(s.Contains)}
|
||||
}
|
||||
|
||||
// TODO: remove transform.RemoveFunc.
|
||||
|
||||
type remove func(r rune) bool
|
||||
|
||||
func (remove) Reset() {}
|
||||
|
||||
// Span implements transform.Spanner.
|
||||
func (t remove) Span(src []byte, atEOF bool) (n int, err error) {
|
||||
for r, size := rune(0), 0; n < len(src); {
|
||||
if r = rune(src[n]); r < utf8.RuneSelf {
|
||||
size = 1
|
||||
} else if r, size = utf8.DecodeRune(src[n:]); size == 1 {
|
||||
// Invalid rune.
|
||||
if !atEOF && !utf8.FullRune(src[n:]) {
|
||||
err = transform.ErrShortSrc
|
||||
} else {
|
||||
err = transform.ErrEndOfSpan
|
||||
}
|
||||
break
|
||||
}
|
||||
if t(r) {
|
||||
err = transform.ErrEndOfSpan
|
||||
break
|
||||
}
|
||||
n += size
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// Transform implements transform.Transformer.
|
||||
func (t remove) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
for r, size := rune(0), 0; nSrc < len(src); {
|
||||
if r = rune(src[nSrc]); r < utf8.RuneSelf {
|
||||
size = 1
|
||||
} else if r, size = utf8.DecodeRune(src[nSrc:]); size == 1 {
|
||||
// Invalid rune.
|
||||
if !atEOF && !utf8.FullRune(src[nSrc:]) {
|
||||
err = transform.ErrShortSrc
|
||||
break
|
||||
}
|
||||
// We replace illegal bytes with RuneError. Not doing so might
|
||||
// otherwise turn a sequence of invalid UTF-8 into valid UTF-8.
|
||||
// The resulting byte sequence may subsequently contain runes
|
||||
// for which t(r) is true that were passed unnoticed.
|
||||
if !t(utf8.RuneError) {
|
||||
if nDst+3 > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
dst[nDst+0] = runeErrorString[0]
|
||||
dst[nDst+1] = runeErrorString[1]
|
||||
dst[nDst+2] = runeErrorString[2]
|
||||
nDst += 3
|
||||
}
|
||||
nSrc++
|
||||
continue
|
||||
}
|
||||
if t(r) {
|
||||
nSrc += size
|
||||
continue
|
||||
}
|
||||
if nDst+size > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
for i := 0; i < size; i++ {
|
||||
dst[nDst] = src[nSrc]
|
||||
nDst++
|
||||
nSrc++
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// Map returns a Transformer that maps the runes in the input using the given
|
||||
// mapping. Illegal bytes in the input are converted to utf8.RuneError before
|
||||
// being passed to the mapping func.
|
||||
func Map(mapping func(rune) rune) Transformer {
|
||||
return Transformer{mapper(mapping)}
|
||||
}
|
||||
|
||||
type mapper func(rune) rune
|
||||
|
||||
func (mapper) Reset() {}
|
||||
|
||||
// Span implements transform.Spanner.
|
||||
func (t mapper) Span(src []byte, atEOF bool) (n int, err error) {
|
||||
for r, size := rune(0), 0; n < len(src); n += size {
|
||||
if r = rune(src[n]); r < utf8.RuneSelf {
|
||||
size = 1
|
||||
} else if r, size = utf8.DecodeRune(src[n:]); size == 1 {
|
||||
// Invalid rune.
|
||||
if !atEOF && !utf8.FullRune(src[n:]) {
|
||||
err = transform.ErrShortSrc
|
||||
} else {
|
||||
err = transform.ErrEndOfSpan
|
||||
}
|
||||
break
|
||||
}
|
||||
if t(r) != r {
|
||||
err = transform.ErrEndOfSpan
|
||||
break
|
||||
}
|
||||
}
|
||||
return n, err
|
||||
}
|
||||
|
||||
// Transform implements transform.Transformer.
|
||||
func (t mapper) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
var replacement rune
|
||||
var b [utf8.UTFMax]byte
|
||||
|
||||
for r, size := rune(0), 0; nSrc < len(src); {
|
||||
if r = rune(src[nSrc]); r < utf8.RuneSelf {
|
||||
if replacement = t(r); replacement < utf8.RuneSelf {
|
||||
if nDst == len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
dst[nDst] = byte(replacement)
|
||||
nDst++
|
||||
nSrc++
|
||||
continue
|
||||
}
|
||||
size = 1
|
||||
} else if r, size = utf8.DecodeRune(src[nSrc:]); size == 1 {
|
||||
// Invalid rune.
|
||||
if !atEOF && !utf8.FullRune(src[nSrc:]) {
|
||||
err = transform.ErrShortSrc
|
||||
break
|
||||
}
|
||||
|
||||
if replacement = t(utf8.RuneError); replacement == utf8.RuneError {
|
||||
if nDst+3 > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
dst[nDst+0] = runeErrorString[0]
|
||||
dst[nDst+1] = runeErrorString[1]
|
||||
dst[nDst+2] = runeErrorString[2]
|
||||
nDst += 3
|
||||
nSrc++
|
||||
continue
|
||||
}
|
||||
} else if replacement = t(r); replacement == r {
|
||||
if nDst+size > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
for i := 0; i < size; i++ {
|
||||
dst[nDst] = src[nSrc]
|
||||
nDst++
|
||||
nSrc++
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
n := utf8.EncodeRune(b[:], replacement)
|
||||
|
||||
if nDst+n > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
for i := 0; i < n; i++ {
|
||||
dst[nDst] = b[i]
|
||||
nDst++
|
||||
}
|
||||
nSrc += size
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// ReplaceIllFormed returns a transformer that replaces all input bytes that are
|
||||
// not part of a well-formed UTF-8 code sequence with utf8.RuneError.
|
||||
func ReplaceIllFormed() Transformer {
|
||||
return Transformer{&replaceIllFormed{}}
|
||||
}
|
||||
|
||||
type replaceIllFormed struct{ transform.NopResetter }
|
||||
|
||||
func (t replaceIllFormed) Span(src []byte, atEOF bool) (n int, err error) {
|
||||
for n < len(src) {
|
||||
// ASCII fast path.
|
||||
if src[n] < utf8.RuneSelf {
|
||||
n++
|
||||
continue
|
||||
}
|
||||
|
||||
r, size := utf8.DecodeRune(src[n:])
|
||||
|
||||
// Look for a valid non-ASCII rune.
|
||||
if r != utf8.RuneError || size != 1 {
|
||||
n += size
|
||||
continue
|
||||
}
|
||||
|
||||
// Look for short source data.
|
||||
if !atEOF && !utf8.FullRune(src[n:]) {
|
||||
err = transform.ErrShortSrc
|
||||
break
|
||||
}
|
||||
|
||||
// We have an invalid rune.
|
||||
err = transform.ErrEndOfSpan
|
||||
break
|
||||
}
|
||||
return n, err
|
||||
}
|
||||
|
||||
func (t replaceIllFormed) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
for nSrc < len(src) {
|
||||
// ASCII fast path.
|
||||
if r := src[nSrc]; r < utf8.RuneSelf {
|
||||
if nDst == len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
dst[nDst] = r
|
||||
nDst++
|
||||
nSrc++
|
||||
continue
|
||||
}
|
||||
|
||||
// Look for a valid non-ASCII rune.
|
||||
if _, size := utf8.DecodeRune(src[nSrc:]); size != 1 {
|
||||
if size != copy(dst[nDst:], src[nSrc:nSrc+size]) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
nDst += size
|
||||
nSrc += size
|
||||
continue
|
||||
}
|
||||
|
||||
// Look for short source data.
|
||||
if !atEOF && !utf8.FullRune(src[nSrc:]) {
|
||||
err = transform.ErrShortSrc
|
||||
break
|
||||
}
|
||||
|
||||
// We have an invalid rune.
|
||||
if nDst+3 > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
dst[nDst+0] = runeErrorString[0]
|
||||
dst[nDst+1] = runeErrorString[1]
|
||||
dst[nDst+2] = runeErrorString[2]
|
||||
nDst += 3
|
||||
nSrc++
|
||||
}
|
||||
return nDst, nSrc, err
|
||||
}
|
||||
23
vendor/modules.txt
vendored
23
vendor/modules.txt
vendored
@@ -23,6 +23,10 @@ github.com/PuerkitoBio/goquery
|
||||
github.com/agnivade/levenshtein
|
||||
# github.com/andybalholm/cascadia v1.0.0
|
||||
github.com/andybalholm/cascadia
|
||||
# github.com/antchfx/htmlquery v1.2.0
|
||||
github.com/antchfx/htmlquery
|
||||
# github.com/antchfx/xpath v1.1.2
|
||||
github.com/antchfx/xpath
|
||||
# github.com/bmatcuk/doublestar v1.1.5
|
||||
github.com/bmatcuk/doublestar
|
||||
# github.com/disintegration/imaging v1.6.0
|
||||
@@ -71,6 +75,8 @@ github.com/golang-migrate/migrate/v4/source/file
|
||||
github.com/golang-migrate/migrate/v4
|
||||
github.com/golang-migrate/migrate/v4/source
|
||||
github.com/golang-migrate/migrate/v4/database
|
||||
# github.com/golang/groupcache v0.0.0-20190129154638-5b532d6fd5ef
|
||||
github.com/golang/groupcache/lru
|
||||
# github.com/gorilla/websocket v1.4.0
|
||||
github.com/gorilla/websocket
|
||||
# github.com/h2non/filetype v1.0.8
|
||||
@@ -169,6 +175,7 @@ golang.org/x/image/tiff
|
||||
golang.org/x/image/tiff/lzw
|
||||
# golang.org/x/net v0.0.0-20190522155817-f3200d17e092
|
||||
golang.org/x/net/html
|
||||
golang.org/x/net/html/charset
|
||||
golang.org/x/net/context/ctxhttp
|
||||
golang.org/x/net/html/atom
|
||||
# golang.org/x/sys v0.0.0-20190426135247-a129542de9ae
|
||||
@@ -177,6 +184,22 @@ golang.org/x/sys/windows
|
||||
# golang.org/x/text v0.3.2
|
||||
golang.org/x/text/transform
|
||||
golang.org/x/text/unicode/norm
|
||||
golang.org/x/text/encoding
|
||||
golang.org/x/text/encoding/charmap
|
||||
golang.org/x/text/encoding/htmlindex
|
||||
golang.org/x/text/encoding/internal/identifier
|
||||
golang.org/x/text/encoding/internal
|
||||
golang.org/x/text/encoding/japanese
|
||||
golang.org/x/text/encoding/korean
|
||||
golang.org/x/text/encoding/simplifiedchinese
|
||||
golang.org/x/text/encoding/traditionalchinese
|
||||
golang.org/x/text/encoding/unicode
|
||||
golang.org/x/text/language
|
||||
golang.org/x/text/internal/utf8internal
|
||||
golang.org/x/text/runes
|
||||
golang.org/x/text/internal/language
|
||||
golang.org/x/text/internal/language/compact
|
||||
golang.org/x/text/internal/tag
|
||||
# golang.org/x/tools v0.0.0-20190515012406-7d7faa4812bd
|
||||
golang.org/x/tools/go/packages
|
||||
golang.org/x/tools/imports
|
||||
|
||||
Reference in New Issue
Block a user