1
0
Fork 0
mirror of https://github.com/miniflux/v2.git synced 2025-07-22 17:18:37 +00:00

Handle more encoding edge cases

- Feeds with charset specified only in Content-Type header and not in XML document
- Feeds with charset specified in both places
- Feeds with charset specified only in XML document and not in HTTP header
This commit is contained in:
Frédéric Guillot 2018-01-19 22:42:55 -08:00
parent 3b62f904d6
commit 713b38e34c
10 changed files with 87 additions and 21 deletions

View file

@ -14,12 +14,11 @@ import (
"github.com/miniflux/miniflux/model"
"github.com/miniflux/miniflux/reader/atom"
"github.com/miniflux/miniflux/reader/encoding"
"github.com/miniflux/miniflux/reader/json"
"github.com/miniflux/miniflux/reader/rdf"
"github.com/miniflux/miniflux/reader/rss"
"github.com/miniflux/miniflux/timer"
"golang.org/x/net/html/charset"
)
// List of feed formats.
@ -32,14 +31,14 @@ const (
)
// DetectFeedFormat detect feed format from input data.
func DetectFeedFormat(data io.Reader) string {
func DetectFeedFormat(r io.Reader) string {
defer timer.ExecutionTime(time.Now(), "[Feed:DetectFeedFormat]")
var buffer bytes.Buffer
tee := io.TeeReader(data, &buffer)
tee := io.TeeReader(r, &buffer)
decoder := xml.NewDecoder(tee)
decoder.CharsetReader = charset.NewReaderLabel
decoder.CharsetReader = encoding.CharsetReader
for {
token, _ := decoder.Token()
@ -66,11 +65,11 @@ func DetectFeedFormat(data io.Reader) string {
return FormatUnknown
}
func parseFeed(data io.Reader) (*model.Feed, error) {
func parseFeed(r io.Reader) (*model.Feed, error) {
defer timer.ExecutionTime(time.Now(), "[Feed:ParseFeed]")
var buffer bytes.Buffer
io.Copy(&buffer, data)
io.Copy(&buffer, r)
reader := bytes.NewReader(buffer.Bytes())
format := DetectFeedFormat(reader)