1
0
Fork 0
mirror of https://github.com/miniflux/v2.git synced 2025-09-15 18:57:04 +00:00

perf(reader): speed up filterValidXMLChars

As kindly explained by @randall77 in https://github.com/golang/go/issues/75184#issuecomment-3234418162,
there is a bound check in the for loop due to the fact that `i` is a signed
integer, as it could become negative. Changing its type to an unsigned int
removes it.

```
goos: linux
goarch: arm64
pkg: miniflux.app/v2/internal/reader/parser
        │   old.txt   │              new.txt               │
        │   sec/op    │   sec/op     vs base               │
Parse-8   40.91m ± 3%   39.30m ± 2%  -3.94% (p=0.000 n=50)
```
This commit is contained in:
jvoisin 2025-08-29 13:25:37 +02:00
parent e8f5c2446c
commit 9e4c5e4cb5

View file

@ -54,8 +54,10 @@ func NewXMLDecoder(data io.ReadSeeker) *xml.Decoder {
// filterValidXMLChars filters inplace invalid XML characters. // filterValidXMLChars filters inplace invalid XML characters.
// This function is inspired from bytes.Map // This function is inspired from bytes.Map
func filterValidXMLChars(s []byte) []byte { func filterValidXMLChars(s []byte) []byte {
j := 0 var i uint // declaring it as an uint removes a bound check in the loop.
for i := 0; i < len(s); { var j int
for i = 0; i < uint(len(s)); {
wid := 1 wid := 1
r := rune(s[i]) r := rune(s[i])
if r >= utf8.RuneSelf { if r >= utf8.RuneSelf {
@ -67,7 +69,7 @@ func filterValidXMLChars(s []byte) []byte {
j += wid j += wid
} }
} }
i += wid i += uint(wid)
} }
return s[:j] return s[:j]
} }