From 49085daefe4673001f0f211f22ac7f59a2f692bb Mon Sep 17 00:00:00 2001 From: jvoisin Date: Mon, 9 Jun 2025 15:15:40 +0200 Subject: [PATCH] perf(xml): optimized NewXMLDecoder io.ReadAll is growing the underlying buffer progressively, while io.Copy is able to allocate it in one go, which is significantly faster. io.ReadAll is currently accounting for around 10% of the CPU time of rss.Parse --- internal/reader/xml/decoder.go | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/internal/reader/xml/decoder.go b/internal/reader/xml/decoder.go index 13a238a9..13af32cb 100644 --- a/internal/reader/xml/decoder.go +++ b/internal/reader/xml/decoder.go @@ -16,11 +16,15 @@ import ( // NewXMLDecoder returns a XML decoder that filters illegal characters. func NewXMLDecoder(data io.ReadSeeker) *xml.Decoder { var decoder *xml.Decoder - buffer, _ := io.ReadAll(data) - enc := getEncoding(buffer) + + // This is way fasted than io.ReadAll(data) as the buffer can be allocated in one go instead of dynamically grown. + buffer := &bytes.Buffer{} + io.Copy(buffer, data) + + enc := getEncoding(buffer.Bytes()) if enc == "" || strings.EqualFold(enc, "utf-8") { // filter invalid chars now, since decoder.CharsetReader not called for utf-8 content - filteredBytes := bytes.Map(filterValidXMLChar, buffer) + filteredBytes := bytes.Map(filterValidXMLChar, buffer.Bytes()) decoder = xml.NewDecoder(bytes.NewReader(filteredBytes)) } else { // filter invalid chars later within decoder.CharsetReader