mirror of
https://github.com/miniflux/v2.git
synced 2025-06-27 16:36:00 +00:00
Handle more encoding conversion edge cases
This commit is contained in:
parent
5ff0630726
commit
ae1dc1a91e
24 changed files with 3151 additions and 40 deletions
|
@ -7,6 +7,7 @@ package encoding // import "miniflux.app/reader/encoding"
|
|||
import (
|
||||
"bytes"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/net/html/charset"
|
||||
|
@ -19,20 +20,20 @@ import (
|
|||
//
|
||||
// Several edge cases could exists:
|
||||
//
|
||||
// - Feeds with charset specified only in Content-Type header and not in XML document
|
||||
// - Feeds with charset specified in both places
|
||||
// - Feeds with charset specified only in XML document and not in HTTP header
|
||||
// - Feeds with encoding specified only in Content-Type header and not in XML document
|
||||
// - Feeds with encoding specified in both places
|
||||
// - Feeds with encoding specified only in XML document and not in HTTP header
|
||||
// - Feeds with wrong encoding defined and already in UTF-8
|
||||
func CharsetReader(label string, input io.Reader) (io.Reader, error) {
|
||||
var buf1, buf2 bytes.Buffer
|
||||
w := io.MultiWriter(&buf1, &buf2)
|
||||
io.Copy(w, input)
|
||||
r := bytes.NewReader(buf2.Bytes())
|
||||
buffer, _ := ioutil.ReadAll(input)
|
||||
r := bytes.NewReader(buffer)
|
||||
|
||||
if !utf8.Valid(buf1.Bytes()) {
|
||||
// Transform document to UTF-8 from the specified XML encoding.
|
||||
return charset.NewReaderLabel(label, r)
|
||||
// The document is already UTF-8, do not do anything (avoid double-encoding).
|
||||
// That means the specified encoding in XML prolog is wrong.
|
||||
if utf8.Valid(buffer) {
|
||||
return r, nil
|
||||
}
|
||||
|
||||
// The document is already UTF-8, do not do anything (avoid double-encoding)
|
||||
return r, nil
|
||||
// Transform document to UTF-8 from the specified encoding in XML prolog.
|
||||
return charset.NewReaderLabel(label, r)
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue