mirror of
https://github.com/miniflux/v2.git
synced 2025-08-11 17:51:01 +00:00
Refactor feed discovery and avoid an extra HTTP request if the url provided is the feed
This commit is contained in:
parent
14e25ab9fe
commit
eeaab72a9f
31 changed files with 455 additions and 200 deletions
|
@ -4,8 +4,9 @@
|
|||
package parser // import "miniflux.app/v2/internal/reader/parser"
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/xml"
|
||||
"strings"
|
||||
"io"
|
||||
|
||||
rxml "miniflux.app/v2/internal/reader/xml"
|
||||
)
|
||||
|
@ -20,12 +21,16 @@ const (
|
|||
)
|
||||
|
||||
// DetectFeedFormat tries to guess the feed format from input data.
|
||||
func DetectFeedFormat(data string) string {
|
||||
if strings.HasPrefix(strings.TrimSpace(data), "{") {
|
||||
func DetectFeedFormat(r io.ReadSeeker) string {
|
||||
data := make([]byte, 512)
|
||||
r.Read(data)
|
||||
|
||||
if bytes.HasPrefix(bytes.TrimSpace(data), []byte("{")) {
|
||||
return FormatJSON
|
||||
}
|
||||
|
||||
decoder := rxml.NewDecoder(strings.NewReader(data))
|
||||
r.Seek(0, io.SeekStart)
|
||||
decoder := rxml.NewDecoder(r)
|
||||
|
||||
for {
|
||||
token, _ := decoder.Token()
|
||||
|
|
|
@ -4,12 +4,13 @@
|
|||
package parser // import "miniflux.app/v2/internal/reader/parser"
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestDetectRDF(t *testing.T) {
|
||||
data := `<?xml version="1.0"?><rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://my.netscape.com/rdf/simple/0.9/"></rdf:RDF>`
|
||||
format := DetectFeedFormat(data)
|
||||
format := DetectFeedFormat(strings.NewReader(data))
|
||||
|
||||
if format != FormatRDF {
|
||||
t.Errorf(`Wrong format detected: %q instead of %q`, format, FormatRDF)
|
||||
|
@ -18,7 +19,7 @@ func TestDetectRDF(t *testing.T) {
|
|||
|
||||
func TestDetectRSS(t *testing.T) {
|
||||
data := `<?xml version="1.0"?><rss version="2.0"><channel></channel></rss>`
|
||||
format := DetectFeedFormat(data)
|
||||
format := DetectFeedFormat(strings.NewReader(data))
|
||||
|
||||
if format != FormatRSS {
|
||||
t.Errorf(`Wrong format detected: %q instead of %q`, format, FormatRSS)
|
||||
|
@ -27,7 +28,7 @@ func TestDetectRSS(t *testing.T) {
|
|||
|
||||
func TestDetectAtom10(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?><feed xmlns="http://www.w3.org/2005/Atom"></feed>`
|
||||
format := DetectFeedFormat(data)
|
||||
format := DetectFeedFormat(strings.NewReader(data))
|
||||
|
||||
if format != FormatAtom {
|
||||
t.Errorf(`Wrong format detected: %q instead of %q`, format, FormatAtom)
|
||||
|
@ -36,7 +37,7 @@ func TestDetectAtom10(t *testing.T) {
|
|||
|
||||
func TestDetectAtom03(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?><feed version="0.3" xmlns="http://purl.org/atom/ns#" xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="en"></feed>`
|
||||
format := DetectFeedFormat(data)
|
||||
format := DetectFeedFormat(strings.NewReader(data))
|
||||
|
||||
if format != FormatAtom {
|
||||
t.Errorf(`Wrong format detected: %q instead of %q`, format, FormatAtom)
|
||||
|
@ -45,7 +46,7 @@ func TestDetectAtom03(t *testing.T) {
|
|||
|
||||
func TestDetectAtomWithISOCharset(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="ISO-8859-15"?><feed xmlns="http://www.w3.org/2005/Atom"></feed>`
|
||||
format := DetectFeedFormat(data)
|
||||
format := DetectFeedFormat(strings.NewReader(data))
|
||||
|
||||
if format != FormatAtom {
|
||||
t.Errorf(`Wrong format detected: %q instead of %q`, format, FormatAtom)
|
||||
|
@ -59,7 +60,7 @@ func TestDetectJSON(t *testing.T) {
|
|||
"title" : "Example"
|
||||
}
|
||||
`
|
||||
format := DetectFeedFormat(data)
|
||||
format := DetectFeedFormat(strings.NewReader(data))
|
||||
|
||||
if format != FormatJSON {
|
||||
t.Errorf(`Wrong format detected: %q instead of %q`, format, FormatJSON)
|
||||
|
@ -70,7 +71,7 @@ func TestDetectUnknown(t *testing.T) {
|
|||
data := `
|
||||
<!DOCTYPE html> <html> </html>
|
||||
`
|
||||
format := DetectFeedFormat(data)
|
||||
format := DetectFeedFormat(strings.NewReader(data))
|
||||
|
||||
if format != FormatUnknown {
|
||||
t.Errorf(`Wrong format detected: %q instead of %q`, format, FormatUnknown)
|
||||
|
|
|
@ -5,7 +5,7 @@ package parser // import "miniflux.app/v2/internal/reader/parser"
|
|||
|
||||
import (
|
||||
"errors"
|
||||
"strings"
|
||||
"io"
|
||||
|
||||
"miniflux.app/v2/internal/model"
|
||||
"miniflux.app/v2/internal/reader/atom"
|
||||
|
@ -17,16 +17,21 @@ import (
|
|||
var ErrFeedFormatNotDetected = errors.New("parser: unable to detect feed format")
|
||||
|
||||
// ParseFeed analyzes the input data and returns a normalized feed object.
|
||||
func ParseFeed(baseURL, data string) (*model.Feed, error) {
|
||||
switch DetectFeedFormat(data) {
|
||||
func ParseFeed(baseURL string, r io.ReadSeeker) (*model.Feed, error) {
|
||||
r.Seek(0, io.SeekStart)
|
||||
switch DetectFeedFormat(r) {
|
||||
case FormatAtom:
|
||||
return atom.Parse(baseURL, strings.NewReader(data))
|
||||
r.Seek(0, io.SeekStart)
|
||||
return atom.Parse(baseURL, r)
|
||||
case FormatRSS:
|
||||
return rss.Parse(baseURL, strings.NewReader(data))
|
||||
r.Seek(0, io.SeekStart)
|
||||
return rss.Parse(baseURL, r)
|
||||
case FormatJSON:
|
||||
return json.Parse(baseURL, strings.NewReader(data))
|
||||
r.Seek(0, io.SeekStart)
|
||||
return json.Parse(baseURL, r)
|
||||
case FormatRDF:
|
||||
return rdf.Parse(baseURL, strings.NewReader(data))
|
||||
r.Seek(0, io.SeekStart)
|
||||
return rdf.Parse(baseURL, r)
|
||||
default:
|
||||
return nil, ErrFeedFormatNotDetected
|
||||
}
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
package parser // import "miniflux.app/v2/internal/reader/parser"
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
|
@ -29,7 +30,7 @@ func TestParseAtom(t *testing.T) {
|
|||
|
||||
</feed>`
|
||||
|
||||
feed, err := ParseFeed("https://example.org/", data)
|
||||
feed, err := ParseFeed("https://example.org/", strings.NewReader(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
@ -57,7 +58,7 @@ func TestParseAtomFeedWithRelativeURL(t *testing.T) {
|
|||
|
||||
</feed>`
|
||||
|
||||
feed, err := ParseFeed("https://example.org/blog/atom.xml", data)
|
||||
feed, err := ParseFeed("https://example.org/blog/atom.xml", strings.NewReader(data))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
@ -91,7 +92,7 @@ func TestParseRSS(t *testing.T) {
|
|||
</channel>
|
||||
</rss>`
|
||||
|
||||
feed, err := ParseFeed("http://liftoff.msfc.nasa.gov/", data)
|
||||
feed, err := ParseFeed("http://liftoff.msfc.nasa.gov/", strings.NewReader(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
@ -117,7 +118,7 @@ func TestParseRSSFeedWithRelativeURL(t *testing.T) {
|
|||
</channel>
|
||||
</rss>`
|
||||
|
||||
feed, err := ParseFeed("http://example.org/rss.xml", data)
|
||||
feed, err := ParseFeed("http://example.org/rss.xml", strings.NewReader(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
@ -158,7 +159,7 @@ func TestParseRDF(t *testing.T) {
|
|||
</item>
|
||||
</rdf:RDF>`
|
||||
|
||||
feed, err := ParseFeed("http://example.org/", data)
|
||||
feed, err := ParseFeed("http://example.org/", strings.NewReader(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
@ -187,7 +188,7 @@ func TestParseRDFWithRelativeURL(t *testing.T) {
|
|||
</item>
|
||||
</rdf:RDF>`
|
||||
|
||||
feed, err := ParseFeed("http://example.org/rdf.xml", data)
|
||||
feed, err := ParseFeed("http://example.org/rdf.xml", strings.NewReader(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
@ -225,7 +226,7 @@ func TestParseJson(t *testing.T) {
|
|||
]
|
||||
}`
|
||||
|
||||
feed, err := ParseFeed("https://example.org/feed.json", data)
|
||||
feed, err := ParseFeed("https://example.org/feed.json", strings.NewReader(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
@ -250,7 +251,7 @@ func TestParseJsonFeedWithRelativeURL(t *testing.T) {
|
|||
]
|
||||
}`
|
||||
|
||||
feed, err := ParseFeed("https://example.org/blog/feed.json", data)
|
||||
feed, err := ParseFeed("https://example.org/blog/feed.json", strings.NewReader(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
@ -285,14 +286,14 @@ func TestParseUnknownFeed(t *testing.T) {
|
|||
</html>
|
||||
`
|
||||
|
||||
_, err := ParseFeed("https://example.org/", data)
|
||||
_, err := ParseFeed("https://example.org/", strings.NewReader(data))
|
||||
if err == nil {
|
||||
t.Error("ParseFeed must returns an error")
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseEmptyFeed(t *testing.T) {
|
||||
_, err := ParseFeed("", "")
|
||||
_, err := ParseFeed("", strings.NewReader(""))
|
||||
if err == nil {
|
||||
t.Error("ParseFeed must returns an error")
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue