mirror of
https://github.com/miniflux/v2.git
synced 2025-06-27 16:36:00 +00:00
Improve XML decoder to remove illegal characters
This commit is contained in:
parent
7409bba0d8
commit
2eb2441f2b
7 changed files with 85 additions and 19 deletions
50
reader/xml/decoder.go
Normal file
50
reader/xml/decoder.go
Normal file
|
@ -0,0 +1,50 @@
|
|||
// Copyright 2019 Frédéric Guillot. All rights reserved.
|
||||
// Use of this source code is governed by the Apache 2.0
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package xml // import "miniflux.app/reader/xml"
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/xml"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
|
||||
"miniflux.app/reader/encoding"
|
||||
)
|
||||
|
||||
// NewDecoder returns a XML decoder that filters illegal characters.
|
||||
func NewDecoder(data io.Reader) *xml.Decoder {
|
||||
decoder := xml.NewDecoder(data)
|
||||
decoder.Entity = xml.HTMLEntity
|
||||
decoder.Strict = false
|
||||
decoder.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) {
|
||||
utf8Reader, err := encoding.CharsetReader(charset, input)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rawData, err := ioutil.ReadAll(utf8Reader)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("Unable to read data: %q", err)
|
||||
}
|
||||
filteredBytes := bytes.Map(filterValidXMLChar, rawData)
|
||||
return bytes.NewReader(filteredBytes), nil
|
||||
}
|
||||
|
||||
return decoder
|
||||
}
|
||||
|
||||
// This function is copied from encoding/xml package,
|
||||
// and is used to check if all the characters are legal.
|
||||
func filterValidXMLChar(r rune) rune {
|
||||
if r == 0x09 ||
|
||||
r == 0x0A ||
|
||||
r == 0x0D ||
|
||||
r >= 0x20 && r <= 0xD7FF ||
|
||||
r >= 0xE000 && r <= 0xFFFD ||
|
||||
r >= 0x10000 && r <= 0x10FFFF {
|
||||
return r
|
||||
}
|
||||
return -1
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue