1
0
Fork 0
mirror of https://github.com/miniflux/v2.git synced 2025-06-27 16:36:00 +00:00

Improve XML decoder to remove illegal characters

This commit is contained in:
Tony Wang 2019-10-23 11:27:27 +08:00 committed by Frédéric Guillot
parent 7409bba0d8
commit 2eb2441f2b
7 changed files with 85 additions and 19 deletions

View file

@ -5,22 +5,17 @@
package atom // import "miniflux.app/reader/atom"
import (
"encoding/xml"
"io"
"miniflux.app/errors"
"miniflux.app/model"
"miniflux.app/reader/encoding"
"miniflux.app/reader/xml"
)
// Parse returns a normalized feed struct from a Atom feed.
func Parse(data io.Reader) (*model.Feed, *errors.LocalizedError) {
atomFeed := new(atomFeed)
decoder := xml.NewDecoder(data)
decoder.Entity = xml.HTMLEntity
decoder.Strict = false
decoder.CharsetReader = encoding.CharsetReader
err := decoder.Decode(atomFeed)
if err != nil {
return nil, errors.NewLocalizedError("Unable to parse Atom feed: %q", err)

View file

@ -5,22 +5,17 @@
package rdf // import "miniflux.app/reader/rdf"
import (
"encoding/xml"
"io"
"miniflux.app/errors"
"miniflux.app/model"
"miniflux.app/reader/encoding"
"miniflux.app/reader/xml"
)
// Parse returns a normalized feed struct from a RDF feed.
func Parse(data io.Reader) (*model.Feed, *errors.LocalizedError) {
feed := new(rdfFeed)
decoder := xml.NewDecoder(data)
decoder.Entity = xml.HTMLEntity
decoder.Strict = false
decoder.CharsetReader = encoding.CharsetReader
err := decoder.Decode(feed)
if err != nil {
return nil, errors.NewLocalizedError("Unable to parse RDF feed: %q", err)

View file

@ -5,22 +5,17 @@
package rss // import "miniflux.app/reader/rss"
import (
"encoding/xml"
"io"
"miniflux.app/errors"
"miniflux.app/model"
"miniflux.app/reader/encoding"
"miniflux.app/reader/xml"
)
// Parse returns a normalized feed struct from a RSS feed.
func Parse(data io.Reader) (*model.Feed, *errors.LocalizedError) {
feed := new(rssFeed)
decoder := xml.NewDecoder(data)
decoder.Entity = xml.HTMLEntity
decoder.Strict = false
decoder.CharsetReader = encoding.CharsetReader
err := decoder.Decode(feed)
if err != nil {
return nil, errors.NewLocalizedError("Unable to parse RSS feed: %q", err)

50
reader/xml/decoder.go Normal file
View file

@ -0,0 +1,50 @@
// Copyright 2019 Frédéric Guillot. All rights reserved.
// Use of this source code is governed by the Apache 2.0
// license that can be found in the LICENSE file.
package xml // import "miniflux.app/reader/xml"
import (
"bytes"
"encoding/xml"
"fmt"
"io"
"io/ioutil"
"miniflux.app/reader/encoding"
)
// NewDecoder returns a XML decoder that filters illegal characters.
func NewDecoder(data io.Reader) *xml.Decoder {
decoder := xml.NewDecoder(data)
decoder.Entity = xml.HTMLEntity
decoder.Strict = false
decoder.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) {
utf8Reader, err := encoding.CharsetReader(charset, input)
if err != nil {
return nil, err
}
rawData, err := ioutil.ReadAll(utf8Reader)
if err != nil {
return nil, fmt.Errorf("Unable to read data: %q", err)
}
filteredBytes := bytes.Map(filterValidXMLChar, rawData)
return bytes.NewReader(filteredBytes), nil
}
return decoder
}
// This function is copied from encoding/xml package,
// and is used to check if all the characters are legal.
func filterValidXMLChar(r rune) rune {
if r == 0x09 ||
r == 0x0A ||
r == 0x0D ||
r >= 0x20 && r <= 0xD7FF ||
r >= 0xE000 && r <= 0xFFFD ||
r >= 0x10000 && r <= 0x10FFFF {
return r
}
return -1
}

View file

@ -0,0 +1,29 @@
// Copyright 2019 Frédéric Guillot. All rights reserved.
// Use of this source code is governed by the Apache 2.0
// license that can be found in the LICENSE file.
package xml // import "miniflux.app/reader/xml"
import (
"encoding/xml"
"fmt"
"strings"
"testing"
)
func TestIllegalCharacters(t *testing.T) {
type myxml struct {
XMLName xml.Name `xml:"rss"`
Version string `xml:"version,attr"`
Title string `xml:"title"`
}
data := fmt.Sprintf(`<?xml version="1.0" encoding="windows-1251"?><rss version="2.0"><title>%s</title></rss>`, "\x10")
var x myxml
decoder := NewDecoder(strings.NewReader(data))
err := decoder.Decode(&x)
if err != nil {
t.Error(err)
}
}