1
0
Fork 0
mirror of https://github.com/miniflux/v2.git synced 2025-09-30 19:22:11 +00:00
This commit is contained in:
Julien Voisin 2025-09-30 08:54:46 +02:00 committed by GitHub
commit ad68df2b82
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -8,7 +8,6 @@ import (
"encoding/xml" "encoding/xml"
"fmt" "fmt"
"io" "io"
"strings"
"unicode/utf8" "unicode/utf8"
"miniflux.app/v2/internal/reader/encoding" "miniflux.app/v2/internal/reader/encoding"
@ -23,19 +22,24 @@ func NewXMLDecoder(data io.ReadSeeker) *xml.Decoder {
io.Copy(buffer, data) io.Copy(buffer, data)
enc := getEncoding(buffer.Bytes()) enc := getEncoding(buffer.Bytes())
if enc == "" || strings.EqualFold(enc, "utf-8") { if enc == nil || bytes.EqualFold(enc, []byte("utf-8")) {
// filter invalid chars now, since decoder.CharsetReader not called for utf-8 content // filter invalid chars now, since decoder.CharsetReader isn't called for utf-8 content
filteredBytes := filterValidXMLChars(buffer.Bytes()) filteredBytes := filterValidXMLChars(buffer.Bytes())
decoder = xml.NewDecoder(bytes.NewReader(filteredBytes)) decoder = xml.NewDecoder(bytes.NewReader(filteredBytes))
} else { } else {
// filter invalid chars later within decoder.CharsetReader
data.Seek(0, io.SeekStart) data.Seek(0, io.SeekStart)
decoder = xml.NewDecoder(data) decoder = xml.NewDecoder(data)
// invalid characters will be filtered later via decoder.CharsetReader
decoder.CharsetReader = charsetReaderFilterInvalidUtf8
} }
decoder.Entity = xml.HTMLEntity decoder.Entity = xml.HTMLEntity
decoder.Strict = false decoder.Strict = false
decoder.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) {
return decoder
}
func charsetReaderFilterInvalidUtf8(charset string, input io.Reader) (io.Reader, error) {
utf8Reader, err := encoding.CharsetReader(charset, input) utf8Reader, err := encoding.CharsetReader(charset, input)
if err != nil { if err != nil {
return nil, err return nil, err
@ -46,9 +50,6 @@ func NewXMLDecoder(data io.ReadSeeker) *xml.Decoder {
} }
filteredBytes := filterValidXMLChars(rawData) filteredBytes := filterValidXMLChars(rawData)
return bytes.NewReader(filteredBytes), nil return bytes.NewReader(filteredBytes), nil
}
return decoder
} }
// filterValidXMLChars filters inplace invalid XML characters. // filterValidXMLChars filters inplace invalid XML characters.
@ -89,23 +90,23 @@ func filterValidXMLChar(r rune) rune {
} }
// This function is copied from encoding/xml's procInst and adapted for []bytes instead of string // This function is copied from encoding/xml's procInst and adapted for []bytes instead of string
func getEncoding(b []byte) string { func getEncoding(b []byte) []byte {
// This parsing is somewhat lame and not exact. // This parsing is somewhat lame and not exact.
// It works for all actual cases, though. // It works for all actual cases, though.
idx := bytes.Index(b, []byte("encoding=")) idx := bytes.Index(b, []byte("encoding="))
if idx == -1 { if idx == -1 {
return "" return nil
} }
v := b[idx+len("encoding="):] v := b[idx+len("encoding="):]
if len(v) == 0 { if len(v) == 0 {
return "" return nil
} }
if v[0] != '\'' && v[0] != '"' { if v[0] != '\'' && v[0] != '"' {
return "" return nil
} }
idx = bytes.IndexRune(v[1:], rune(v[0])) idx = bytes.IndexRune(v[1:], rune(v[0]))
if idx == -1 { if idx == -1 {
return "" return nil
} }
return string(v[1 : idx+1]) return v[1 : idx+1]
} }