mirror of
https://github.com/miniflux/v2.git
synced 2025-06-27 16:36:00 +00:00
perf(xml): optimize xml filtering
Instead of using bytes.Map which is returning a copy of the provided []byte, use a custom in-place implementation, as the bytes.Map call is taking around 25% of rss.Parse
This commit is contained in:
parent
49085daefe
commit
d59990f1dd
1 changed files with 22 additions and 2 deletions
|
@ -9,6 +9,7 @@ import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"strings"
|
"strings"
|
||||||
|
"unicode/utf8"
|
||||||
|
|
||||||
"miniflux.app/v2/internal/reader/encoding"
|
"miniflux.app/v2/internal/reader/encoding"
|
||||||
)
|
)
|
||||||
|
@ -24,7 +25,7 @@ func NewXMLDecoder(data io.ReadSeeker) *xml.Decoder {
|
||||||
enc := getEncoding(buffer.Bytes())
|
enc := getEncoding(buffer.Bytes())
|
||||||
if enc == "" || strings.EqualFold(enc, "utf-8") {
|
if enc == "" || strings.EqualFold(enc, "utf-8") {
|
||||||
// filter invalid chars now, since decoder.CharsetReader not called for utf-8 content
|
// filter invalid chars now, since decoder.CharsetReader not called for utf-8 content
|
||||||
filteredBytes := bytes.Map(filterValidXMLChar, buffer.Bytes())
|
filteredBytes := filterValidXMLChars(buffer.Bytes())
|
||||||
decoder = xml.NewDecoder(bytes.NewReader(filteredBytes))
|
decoder = xml.NewDecoder(bytes.NewReader(filteredBytes))
|
||||||
} else {
|
} else {
|
||||||
// filter invalid chars later within decoder.CharsetReader
|
// filter invalid chars later within decoder.CharsetReader
|
||||||
|
@ -43,13 +44,32 @@ func NewXMLDecoder(data io.ReadSeeker) *xml.Decoder {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("encoding: unable to read data: %w", err)
|
return nil, fmt.Errorf("encoding: unable to read data: %w", err)
|
||||||
}
|
}
|
||||||
filteredBytes := bytes.Map(filterValidXMLChar, rawData)
|
filteredBytes := filterValidXMLChars(rawData)
|
||||||
return bytes.NewReader(filteredBytes), nil
|
return bytes.NewReader(filteredBytes), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
return decoder
|
return decoder
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// filterValidXMLChars filters inplace invalid XML characters.
|
||||||
|
// This function is inspired from bytes.Map
|
||||||
|
func filterValidXMLChars(s []byte) []byte {
|
||||||
|
j := 0
|
||||||
|
for i := 0; i < len(s); {
|
||||||
|
wid := 1
|
||||||
|
r := rune(s[i])
|
||||||
|
if r >= utf8.RuneSelf {
|
||||||
|
r, wid = utf8.DecodeRune(s[i:])
|
||||||
|
}
|
||||||
|
if r = filterValidXMLChar(r); r >= 0 {
|
||||||
|
utf8.EncodeRune(s[j:], r)
|
||||||
|
j += wid
|
||||||
|
}
|
||||||
|
i += wid
|
||||||
|
}
|
||||||
|
return s[:j]
|
||||||
|
}
|
||||||
|
|
||||||
// This function is copied from encoding/xml package,
|
// This function is copied from encoding/xml package,
|
||||||
// and is used to check if all the characters are legal.
|
// and is used to check if all the characters are legal.
|
||||||
func filterValidXMLChar(r rune) rune {
|
func filterValidXMLChar(r rune) rune {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue