Warn on charset decoding issues (#3568)

This commit is contained in:
WithoutPants
2023-03-24 09:04:48 +11:00
committed by GitHub
parent 7e66741998
commit 1f578db2d6

View File

@@ -58,30 +58,32 @@ func newZipFS(fs FS, path string, info fs.FileInfo) (*ZipFS, error) {
// Detect encoding // Detect encoding
d, err := chardet.NewTextDetector().DetectBest(buffer.Bytes()) d, err := chardet.NewTextDetector().DetectBest(buffer.Bytes())
if err != nil { if err != nil {
reader.Close() // If we can't detect the encoding, just assume it's UTF8
return nil, fmt.Errorf("unable to detect decoding: %w", err) logger.Warnf("Unable to detect decoding for %s: %w", path, err)
} }
// If the charset is not UTF8, decode'em // If the charset is not UTF8, decode'em
if d.Charset != "UTF-8" { if d != nil && d.Charset != "UTF-8" {
logger.Debugf("Detected non-utf8 zip charset %s (%s): %s", d.Charset, d.Language, path) logger.Debugf("Detected non-utf8 zip charset %s (%s): %s", d.Charset, d.Language, path)
e, _ := charset.Lookup(d.Charset) e, _ := charset.Lookup(d.Charset)
if e == nil { if e == nil {
reader.Close() // if we can't find the encoding, just assume it's UTF8
return nil, fmt.Errorf("failed to lookup charset %s, language %s", d.Charset, d.Language) logger.Warnf("Failed to lookup charset %s, language %s", d.Charset, d.Language)
} } else {
decoder := e.NewDecoder() decoder := e.NewDecoder()
for _, f := range zipReader.File { for _, f := range zipReader.File {
f.Name, _, err = transform.String(decoder, f.Name) newName, _, err := transform.String(decoder, f.Name)
if err != nil { if err != nil {
reader.Close() reader.Close()
return nil, fmt.Errorf("failed to decode %v: %w", []byte(f.Name), err) logger.Warnf("Failed to decode %v: %v", []byte(f.Name), err)
} else {
f.Name = newName
} }
// Comments are not decoded cuz stash doesn't use that // Comments are not decoded cuz stash doesn't use that
} }
} }
}
return &ZipFS{ return &ZipFS{
Reader: zipReader, Reader: zipReader,