From 9e4c5e4cb508bba7c4ccd50d4c1f7d606092c7bd Mon Sep 17 00:00:00 2001 From: jvoisin Date: Fri, 29 Aug 2025 13:25:37 +0200 Subject: [PATCH] perf(reader): speed up filterValidXMLChars MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As kindly explained by @randall77 in https://github.com/golang/go/issues/75184#issuecomment-3234418162, there is a bound check in the for loop due to the fact that `i` is a signed integer, as it could become negative. Changing its type to an unsigned int removes it. ``` goos: linux goarch: arm64 pkg: miniflux.app/v2/internal/reader/parser │ old.txt │ new.txt │ │ sec/op │ sec/op vs base │ Parse-8 40.91m ± 3% 39.30m ± 2% -3.94% (p=0.000 n=50) ``` --- internal/reader/xml/decoder.go | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/internal/reader/xml/decoder.go b/internal/reader/xml/decoder.go index 13bceeb1..efe4fdcc 100644 --- a/internal/reader/xml/decoder.go +++ b/internal/reader/xml/decoder.go @@ -54,8 +54,10 @@ func NewXMLDecoder(data io.ReadSeeker) *xml.Decoder { // filterValidXMLChars filters inplace invalid XML characters. // This function is inspired from bytes.Map func filterValidXMLChars(s []byte) []byte { - j := 0 - for i := 0; i < len(s); { + var i uint // declaring it as an uint removes a bound check in the loop. + var j int + + for i = 0; i < uint(len(s)); { wid := 1 r := rune(s[i]) if r >= utf8.RuneSelf { @@ -67,7 +69,7 @@ func filterValidXMLChars(s []byte) []byte { j += wid } } - i += wid + i += uint(wid) } return s[:j] }