mirror of
https://github.com/miniflux/v2.git
synced 2025-06-27 16:36:00 +00:00
Simplify feed parser and format detection
- Avoid doing multiple buffer copies - Move parser and format detection logic to its own package
This commit is contained in:
parent
d5ff4191b6
commit
5870f04260
11 changed files with 229 additions and 221 deletions
58
reader/parser/parser.go
Normal file
58
reader/parser/parser.go
Normal file
|
@ -0,0 +1,58 @@
|
|||
// Copyright 2018 Frédéric Guillot. All rights reserved.
|
||||
// Use of this source code is governed by the Apache 2.0
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package parser // import "miniflux.app/reader/parser"
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
"miniflux.app/errors"
|
||||
"miniflux.app/logger"
|
||||
"miniflux.app/model"
|
||||
"miniflux.app/reader/atom"
|
||||
"miniflux.app/reader/json"
|
||||
"miniflux.app/reader/rdf"
|
||||
"miniflux.app/reader/rss"
|
||||
)
|
||||
|
||||
// ParseFeed analyzes the input data and returns a normalized feed object.
|
||||
func ParseFeed(data string) (*model.Feed, *errors.LocalizedError) {
|
||||
data = stripInvalidXMLCharacters(data)
|
||||
|
||||
switch DetectFeedFormat(data) {
|
||||
case FormatAtom:
|
||||
return atom.Parse(strings.NewReader(data))
|
||||
case FormatRSS:
|
||||
return rss.Parse(strings.NewReader(data))
|
||||
case FormatJSON:
|
||||
return json.Parse(strings.NewReader(data))
|
||||
case FormatRDF:
|
||||
return rdf.Parse(strings.NewReader(data))
|
||||
default:
|
||||
return nil, errors.NewLocalizedError("Unsupported feed format")
|
||||
}
|
||||
}
|
||||
|
||||
func stripInvalidXMLCharacters(input string) string {
|
||||
return strings.Map(func(r rune) rune {
|
||||
if isInCharacterRange(r) {
|
||||
return r
|
||||
}
|
||||
|
||||
logger.Debug("Strip invalid XML characters: %U", r)
|
||||
return -1
|
||||
}, input)
|
||||
}
|
||||
|
||||
// Decide whether the given rune is in the XML Character Range, per
|
||||
// the Char production of http://www.xml.com/axml/testaxml.htm,
|
||||
// Section 2.2 Characters.
|
||||
func isInCharacterRange(r rune) (inrange bool) {
|
||||
return r == 0x09 ||
|
||||
r == 0x0A ||
|
||||
r == 0x0D ||
|
||||
r >= 0x20 && r <= 0xDF77 ||
|
||||
r >= 0xE000 && r <= 0xFFFD ||
|
||||
r >= 0x10000 && r <= 0x10FFFF
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue