1
0
Fork 0
mirror of https://github.com/miniflux/v2.git synced 2025-08-16 18:01:37 +00:00

refactor(xml): improve the performances of NewXMLDecoder

- Invert a condition to make the code more readable
- Extract the encoding directly from the slice of bytes instead of converting
  it to string first.
This commit is contained in:
Julien Voisin 2025-01-31 03:37:06 +00:00 committed by GitHub
parent 3ebeb38ade
commit b193bc212a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -17,15 +17,15 @@ import (
func NewXMLDecoder(data io.ReadSeeker) *xml.Decoder {
var decoder *xml.Decoder
buffer, _ := io.ReadAll(data)
enc := procInst("encoding", string(buffer))
if enc != "" && enc != "utf-8" && enc != "UTF-8" && !strings.EqualFold(enc, "utf-8") {
// filter invalid chars later within decoder.CharsetReader
data.Seek(0, io.SeekStart)
decoder = xml.NewDecoder(data)
} else {
enc := getEncoding(buffer)
if enc == "" || strings.EqualFold(enc, "utf-8") {
// filter invalid chars now, since decoder.CharsetReader not called for utf-8 content
filteredBytes := bytes.Map(filterValidXMLChar, buffer)
decoder = xml.NewDecoder(bytes.NewReader(filteredBytes))
} else {
// filter invalid chars later within decoder.CharsetReader
data.Seek(0, io.SeekStart)
decoder = xml.NewDecoder(data)
}
decoder.Entity = xml.HTMLEntity
@ -60,27 +60,24 @@ func filterValidXMLChar(r rune) rune {
return -1
}
// This function is copied from encoding/xml package,
// procInst parses the `param="..."` or `param='...'`
// value out of the provided string, returning "" if not found.
func procInst(param, s string) string {
// This function is copied from encoding/xml's procInst and adapted for []bytes instead of string
func getEncoding(b []byte) string {
// TODO: this parsing is somewhat lame and not exact.
// It works for all actual cases, though.
param += "="
idx := strings.Index(s, param)
idx := bytes.Index(b, []byte("encoding="))
if idx == -1 {
return ""
}
v := s[idx+len(param):]
if v == "" {
v := b[idx+len("encoding="):]
if len(v) == 0 {
return ""
}
if v[0] != '\'' && v[0] != '"' {
return ""
}
idx = strings.IndexRune(v[1:], rune(v[0]))
idx = bytes.IndexRune(v[1:], rune(v[0]))
if idx == -1 {
return ""
}
return v[1 : idx+1]
return string(v[1 : idx+1])
}