mirror of
https://github.com/miniflux/v2.git
synced 2025-06-27 16:36:00 +00:00
perf(xml): optimized NewXMLDecoder
io.ReadAll is growing the underlying buffer progressively, while io.Copy is able to allocate it in one go, which is significantly faster. io.ReadAll is currently accounting for around 10% of the CPU time of rss.Parse
This commit is contained in:
parent
5872710d22
commit
49085daefe
1 changed files with 7 additions and 3 deletions
|
@ -16,11 +16,15 @@ import (
|
|||
// NewXMLDecoder returns a XML decoder that filters illegal characters.
|
||||
func NewXMLDecoder(data io.ReadSeeker) *xml.Decoder {
|
||||
var decoder *xml.Decoder
|
||||
buffer, _ := io.ReadAll(data)
|
||||
enc := getEncoding(buffer)
|
||||
|
||||
// This is way fasted than io.ReadAll(data) as the buffer can be allocated in one go instead of dynamically grown.
|
||||
buffer := &bytes.Buffer{}
|
||||
io.Copy(buffer, data)
|
||||
|
||||
enc := getEncoding(buffer.Bytes())
|
||||
if enc == "" || strings.EqualFold(enc, "utf-8") {
|
||||
// filter invalid chars now, since decoder.CharsetReader not called for utf-8 content
|
||||
filteredBytes := bytes.Map(filterValidXMLChar, buffer)
|
||||
filteredBytes := bytes.Map(filterValidXMLChar, buffer.Bytes())
|
||||
decoder = xml.NewDecoder(bytes.NewReader(filteredBytes))
|
||||
} else {
|
||||
// filter invalid chars later within decoder.CharsetReader
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue