mirror of
https://github.com/miniflux/v2.git
synced 2025-09-30 19:22:11 +00:00
Merge 69be57fc9d
into 8adcaed29e
This commit is contained in:
commit
ad68df2b82
1 changed files with 23 additions and 22 deletions
|
@ -8,7 +8,6 @@ import (
|
||||||
"encoding/xml"
|
"encoding/xml"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"strings"
|
|
||||||
"unicode/utf8"
|
"unicode/utf8"
|
||||||
|
|
||||||
"miniflux.app/v2/internal/reader/encoding"
|
"miniflux.app/v2/internal/reader/encoding"
|
||||||
|
@ -23,19 +22,24 @@ func NewXMLDecoder(data io.ReadSeeker) *xml.Decoder {
|
||||||
io.Copy(buffer, data)
|
io.Copy(buffer, data)
|
||||||
|
|
||||||
enc := getEncoding(buffer.Bytes())
|
enc := getEncoding(buffer.Bytes())
|
||||||
if enc == "" || strings.EqualFold(enc, "utf-8") {
|
if enc == nil || bytes.EqualFold(enc, []byte("utf-8")) {
|
||||||
// filter invalid chars now, since decoder.CharsetReader not called for utf-8 content
|
// filter invalid chars now, since decoder.CharsetReader isn't called for utf-8 content
|
||||||
filteredBytes := filterValidXMLChars(buffer.Bytes())
|
filteredBytes := filterValidXMLChars(buffer.Bytes())
|
||||||
decoder = xml.NewDecoder(bytes.NewReader(filteredBytes))
|
decoder = xml.NewDecoder(bytes.NewReader(filteredBytes))
|
||||||
} else {
|
} else {
|
||||||
// filter invalid chars later within decoder.CharsetReader
|
|
||||||
data.Seek(0, io.SeekStart)
|
data.Seek(0, io.SeekStart)
|
||||||
decoder = xml.NewDecoder(data)
|
decoder = xml.NewDecoder(data)
|
||||||
|
// invalid characters will be filtered later via decoder.CharsetReader
|
||||||
|
decoder.CharsetReader = charsetReaderFilterInvalidUtf8
|
||||||
}
|
}
|
||||||
|
|
||||||
decoder.Entity = xml.HTMLEntity
|
decoder.Entity = xml.HTMLEntity
|
||||||
decoder.Strict = false
|
decoder.Strict = false
|
||||||
decoder.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) {
|
|
||||||
|
return decoder
|
||||||
|
}
|
||||||
|
|
||||||
|
func charsetReaderFilterInvalidUtf8(charset string, input io.Reader) (io.Reader, error) {
|
||||||
utf8Reader, err := encoding.CharsetReader(charset, input)
|
utf8Reader, err := encoding.CharsetReader(charset, input)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
|
@ -48,9 +52,6 @@ func NewXMLDecoder(data io.ReadSeeker) *xml.Decoder {
|
||||||
return bytes.NewReader(filteredBytes), nil
|
return bytes.NewReader(filteredBytes), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
return decoder
|
|
||||||
}
|
|
||||||
|
|
||||||
// filterValidXMLChars filters inplace invalid XML characters.
|
// filterValidXMLChars filters inplace invalid XML characters.
|
||||||
// This function is inspired from bytes.Map
|
// This function is inspired from bytes.Map
|
||||||
func filterValidXMLChars(s []byte) []byte {
|
func filterValidXMLChars(s []byte) []byte {
|
||||||
|
@ -89,23 +90,23 @@ func filterValidXMLChar(r rune) rune {
|
||||||
}
|
}
|
||||||
|
|
||||||
// This function is copied from encoding/xml's procInst and adapted for []bytes instead of string
|
// This function is copied from encoding/xml's procInst and adapted for []bytes instead of string
|
||||||
func getEncoding(b []byte) string {
|
func getEncoding(b []byte) []byte {
|
||||||
// This parsing is somewhat lame and not exact.
|
// This parsing is somewhat lame and not exact.
|
||||||
// It works for all actual cases, though.
|
// It works for all actual cases, though.
|
||||||
idx := bytes.Index(b, []byte("encoding="))
|
idx := bytes.Index(b, []byte("encoding="))
|
||||||
if idx == -1 {
|
if idx == -1 {
|
||||||
return ""
|
return nil
|
||||||
}
|
}
|
||||||
v := b[idx+len("encoding="):]
|
v := b[idx+len("encoding="):]
|
||||||
if len(v) == 0 {
|
if len(v) == 0 {
|
||||||
return ""
|
return nil
|
||||||
}
|
}
|
||||||
if v[0] != '\'' && v[0] != '"' {
|
if v[0] != '\'' && v[0] != '"' {
|
||||||
return ""
|
return nil
|
||||||
}
|
}
|
||||||
idx = bytes.IndexRune(v[1:], rune(v[0]))
|
idx = bytes.IndexRune(v[1:], rune(v[0]))
|
||||||
if idx == -1 {
|
if idx == -1 {
|
||||||
return ""
|
return nil
|
||||||
}
|
}
|
||||||
return string(v[1 : idx+1])
|
return v[1 : idx+1]
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue