mirror of
https://github.com/miniflux/v2.git
synced 2025-08-16 18:01:37 +00:00
refactor(xml): improve the performances of NewXMLDecoder
- Invert a condition to make the code more readable - Extract the encoding directly from the slice of bytes instead of converting it to string first.
This commit is contained in:
parent
3ebeb38ade
commit
b193bc212a
1 changed files with 13 additions and 16 deletions
|
@ -17,15 +17,15 @@ import (
|
||||||
func NewXMLDecoder(data io.ReadSeeker) *xml.Decoder {
|
func NewXMLDecoder(data io.ReadSeeker) *xml.Decoder {
|
||||||
var decoder *xml.Decoder
|
var decoder *xml.Decoder
|
||||||
buffer, _ := io.ReadAll(data)
|
buffer, _ := io.ReadAll(data)
|
||||||
enc := procInst("encoding", string(buffer))
|
enc := getEncoding(buffer)
|
||||||
if enc != "" && enc != "utf-8" && enc != "UTF-8" && !strings.EqualFold(enc, "utf-8") {
|
if enc == "" || strings.EqualFold(enc, "utf-8") {
|
||||||
// filter invalid chars later within decoder.CharsetReader
|
|
||||||
data.Seek(0, io.SeekStart)
|
|
||||||
decoder = xml.NewDecoder(data)
|
|
||||||
} else {
|
|
||||||
// filter invalid chars now, since decoder.CharsetReader not called for utf-8 content
|
// filter invalid chars now, since decoder.CharsetReader not called for utf-8 content
|
||||||
filteredBytes := bytes.Map(filterValidXMLChar, buffer)
|
filteredBytes := bytes.Map(filterValidXMLChar, buffer)
|
||||||
decoder = xml.NewDecoder(bytes.NewReader(filteredBytes))
|
decoder = xml.NewDecoder(bytes.NewReader(filteredBytes))
|
||||||
|
} else {
|
||||||
|
// filter invalid chars later within decoder.CharsetReader
|
||||||
|
data.Seek(0, io.SeekStart)
|
||||||
|
decoder = xml.NewDecoder(data)
|
||||||
}
|
}
|
||||||
|
|
||||||
decoder.Entity = xml.HTMLEntity
|
decoder.Entity = xml.HTMLEntity
|
||||||
|
@ -60,27 +60,24 @@ func filterValidXMLChar(r rune) rune {
|
||||||
return -1
|
return -1
|
||||||
}
|
}
|
||||||
|
|
||||||
// This function is copied from encoding/xml package,
|
// This function is copied from encoding/xml's procInst and adapted for []bytes instead of string
|
||||||
// procInst parses the `param="..."` or `param='...'`
|
func getEncoding(b []byte) string {
|
||||||
// value out of the provided string, returning "" if not found.
|
|
||||||
func procInst(param, s string) string {
|
|
||||||
// TODO: this parsing is somewhat lame and not exact.
|
// TODO: this parsing is somewhat lame and not exact.
|
||||||
// It works for all actual cases, though.
|
// It works for all actual cases, though.
|
||||||
param += "="
|
idx := bytes.Index(b, []byte("encoding="))
|
||||||
idx := strings.Index(s, param)
|
|
||||||
if idx == -1 {
|
if idx == -1 {
|
||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
v := s[idx+len(param):]
|
v := b[idx+len("encoding="):]
|
||||||
if v == "" {
|
if len(v) == 0 {
|
||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
if v[0] != '\'' && v[0] != '"' {
|
if v[0] != '\'' && v[0] != '"' {
|
||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
idx = strings.IndexRune(v[1:], rune(v[0]))
|
idx = bytes.IndexRune(v[1:], rune(v[0]))
|
||||||
if idx == -1 {
|
if idx == -1 {
|
||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
return v[1 : idx+1]
|
return string(v[1 : idx+1])
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue