1
0
Fork 0
mirror of https://github.com/miniflux/v2.git synced 2025-06-27 16:36:00 +00:00

perf(xml): optimized NewXMLDecoder

io.ReadAll is growing the underlying buffer progressively, while
io.Copy is able to allocate it in one go, which is significantly faster.
io.ReadAll is currently accounting for around 10% of the CPU time of rss.Parse
This commit is contained in:
jvoisin 2025-06-09 15:15:40 +02:00 committed by Frédéric Guillot
parent 5872710d22
commit 49085daefe

View file

@ -16,11 +16,15 @@ import (
// NewXMLDecoder returns a XML decoder that filters illegal characters.
func NewXMLDecoder(data io.ReadSeeker) *xml.Decoder {
var decoder *xml.Decoder
buffer, _ := io.ReadAll(data)
enc := getEncoding(buffer)
// This is way fasted than io.ReadAll(data) as the buffer can be allocated in one go instead of dynamically grown.
buffer := &bytes.Buffer{}
io.Copy(buffer, data)
enc := getEncoding(buffer.Bytes())
if enc == "" || strings.EqualFold(enc, "utf-8") {
// filter invalid chars now, since decoder.CharsetReader not called for utf-8 content
filteredBytes := bytes.Map(filterValidXMLChar, buffer)
filteredBytes := bytes.Map(filterValidXMLChar, buffer.Bytes())
decoder = xml.NewDecoder(bytes.NewReader(filteredBytes))
} else {
// filter invalid chars later within decoder.CharsetReader