1
0
Fork 0
mirror of https://github.com/miniflux/v2.git synced 2025-08-11 17:51:01 +00:00

Refactor feed discovery and avoid an extra HTTP request if the url provided is the feed

This commit is contained in:
Frédéric Guillot 2023-10-22 16:07:06 -07:00
parent 14e25ab9fe
commit eeaab72a9f
31 changed files with 455 additions and 200 deletions

View file

@ -4,8 +4,9 @@
package parser // import "miniflux.app/v2/internal/reader/parser"
import (
"bytes"
"encoding/xml"
"strings"
"io"
rxml "miniflux.app/v2/internal/reader/xml"
)
@ -20,12 +21,16 @@ const (
)
// DetectFeedFormat tries to guess the feed format from input data.
func DetectFeedFormat(data string) string {
if strings.HasPrefix(strings.TrimSpace(data), "{") {
func DetectFeedFormat(r io.ReadSeeker) string {
data := make([]byte, 512)
r.Read(data)
if bytes.HasPrefix(bytes.TrimSpace(data), []byte("{")) {
return FormatJSON
}
decoder := rxml.NewDecoder(strings.NewReader(data))
r.Seek(0, io.SeekStart)
decoder := rxml.NewDecoder(r)
for {
token, _ := decoder.Token()

View file

@ -4,12 +4,13 @@
package parser // import "miniflux.app/v2/internal/reader/parser"
import (
"strings"
"testing"
)
func TestDetectRDF(t *testing.T) {
data := `<?xml version="1.0"?><rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://my.netscape.com/rdf/simple/0.9/"></rdf:RDF>`
format := DetectFeedFormat(data)
format := DetectFeedFormat(strings.NewReader(data))
if format != FormatRDF {
t.Errorf(`Wrong format detected: %q instead of %q`, format, FormatRDF)
@ -18,7 +19,7 @@ func TestDetectRDF(t *testing.T) {
func TestDetectRSS(t *testing.T) {
data := `<?xml version="1.0"?><rss version="2.0"><channel></channel></rss>`
format := DetectFeedFormat(data)
format := DetectFeedFormat(strings.NewReader(data))
if format != FormatRSS {
t.Errorf(`Wrong format detected: %q instead of %q`, format, FormatRSS)
@ -27,7 +28,7 @@ func TestDetectRSS(t *testing.T) {
func TestDetectAtom10(t *testing.T) {
data := `<?xml version="1.0" encoding="utf-8"?><feed xmlns="http://www.w3.org/2005/Atom"></feed>`
format := DetectFeedFormat(data)
format := DetectFeedFormat(strings.NewReader(data))
if format != FormatAtom {
t.Errorf(`Wrong format detected: %q instead of %q`, format, FormatAtom)
@ -36,7 +37,7 @@ func TestDetectAtom10(t *testing.T) {
func TestDetectAtom03(t *testing.T) {
data := `<?xml version="1.0" encoding="utf-8"?><feed version="0.3" xmlns="http://purl.org/atom/ns#" xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="en"></feed>`
format := DetectFeedFormat(data)
format := DetectFeedFormat(strings.NewReader(data))
if format != FormatAtom {
t.Errorf(`Wrong format detected: %q instead of %q`, format, FormatAtom)
@ -45,7 +46,7 @@ func TestDetectAtom03(t *testing.T) {
func TestDetectAtomWithISOCharset(t *testing.T) {
data := `<?xml version="1.0" encoding="ISO-8859-15"?><feed xmlns="http://www.w3.org/2005/Atom"></feed>`
format := DetectFeedFormat(data)
format := DetectFeedFormat(strings.NewReader(data))
if format != FormatAtom {
t.Errorf(`Wrong format detected: %q instead of %q`, format, FormatAtom)
@ -59,7 +60,7 @@ func TestDetectJSON(t *testing.T) {
"title" : "Example"
}
`
format := DetectFeedFormat(data)
format := DetectFeedFormat(strings.NewReader(data))
if format != FormatJSON {
t.Errorf(`Wrong format detected: %q instead of %q`, format, FormatJSON)
@ -70,7 +71,7 @@ func TestDetectUnknown(t *testing.T) {
data := `
<!DOCTYPE html> <html> </html>
`
format := DetectFeedFormat(data)
format := DetectFeedFormat(strings.NewReader(data))
if format != FormatUnknown {
t.Errorf(`Wrong format detected: %q instead of %q`, format, FormatUnknown)

View file

@ -5,7 +5,7 @@ package parser // import "miniflux.app/v2/internal/reader/parser"
import (
"errors"
"strings"
"io"
"miniflux.app/v2/internal/model"
"miniflux.app/v2/internal/reader/atom"
@ -17,16 +17,21 @@ import (
var ErrFeedFormatNotDetected = errors.New("parser: unable to detect feed format")
// ParseFeed analyzes the input data and returns a normalized feed object.
func ParseFeed(baseURL, data string) (*model.Feed, error) {
switch DetectFeedFormat(data) {
func ParseFeed(baseURL string, r io.ReadSeeker) (*model.Feed, error) {
r.Seek(0, io.SeekStart)
switch DetectFeedFormat(r) {
case FormatAtom:
return atom.Parse(baseURL, strings.NewReader(data))
r.Seek(0, io.SeekStart)
return atom.Parse(baseURL, r)
case FormatRSS:
return rss.Parse(baseURL, strings.NewReader(data))
r.Seek(0, io.SeekStart)
return rss.Parse(baseURL, r)
case FormatJSON:
return json.Parse(baseURL, strings.NewReader(data))
r.Seek(0, io.SeekStart)
return json.Parse(baseURL, r)
case FormatRDF:
return rdf.Parse(baseURL, strings.NewReader(data))
r.Seek(0, io.SeekStart)
return rdf.Parse(baseURL, r)
default:
return nil, ErrFeedFormatNotDetected
}

View file

@ -4,6 +4,7 @@
package parser // import "miniflux.app/v2/internal/reader/parser"
import (
"strings"
"testing"
)
@ -29,7 +30,7 @@ func TestParseAtom(t *testing.T) {
</feed>`
feed, err := ParseFeed("https://example.org/", data)
feed, err := ParseFeed("https://example.org/", strings.NewReader(data))
if err != nil {
t.Error(err)
}
@ -57,7 +58,7 @@ func TestParseAtomFeedWithRelativeURL(t *testing.T) {
</feed>`
feed, err := ParseFeed("https://example.org/blog/atom.xml", data)
feed, err := ParseFeed("https://example.org/blog/atom.xml", strings.NewReader(data))
if err != nil {
t.Fatal(err)
}
@ -91,7 +92,7 @@ func TestParseRSS(t *testing.T) {
</channel>
</rss>`
feed, err := ParseFeed("http://liftoff.msfc.nasa.gov/", data)
feed, err := ParseFeed("http://liftoff.msfc.nasa.gov/", strings.NewReader(data))
if err != nil {
t.Error(err)
}
@ -117,7 +118,7 @@ func TestParseRSSFeedWithRelativeURL(t *testing.T) {
</channel>
</rss>`
feed, err := ParseFeed("http://example.org/rss.xml", data)
feed, err := ParseFeed("http://example.org/rss.xml", strings.NewReader(data))
if err != nil {
t.Error(err)
}
@ -158,7 +159,7 @@ func TestParseRDF(t *testing.T) {
</item>
</rdf:RDF>`
feed, err := ParseFeed("http://example.org/", data)
feed, err := ParseFeed("http://example.org/", strings.NewReader(data))
if err != nil {
t.Error(err)
}
@ -187,7 +188,7 @@ func TestParseRDFWithRelativeURL(t *testing.T) {
</item>
</rdf:RDF>`
feed, err := ParseFeed("http://example.org/rdf.xml", data)
feed, err := ParseFeed("http://example.org/rdf.xml", strings.NewReader(data))
if err != nil {
t.Error(err)
}
@ -225,7 +226,7 @@ func TestParseJson(t *testing.T) {
]
}`
feed, err := ParseFeed("https://example.org/feed.json", data)
feed, err := ParseFeed("https://example.org/feed.json", strings.NewReader(data))
if err != nil {
t.Error(err)
}
@ -250,7 +251,7 @@ func TestParseJsonFeedWithRelativeURL(t *testing.T) {
]
}`
feed, err := ParseFeed("https://example.org/blog/feed.json", data)
feed, err := ParseFeed("https://example.org/blog/feed.json", strings.NewReader(data))
if err != nil {
t.Error(err)
}
@ -285,14 +286,14 @@ func TestParseUnknownFeed(t *testing.T) {
</html>
`
_, err := ParseFeed("https://example.org/", data)
_, err := ParseFeed("https://example.org/", strings.NewReader(data))
if err == nil {
t.Error("ParseFeed must returns an error")
}
}
func TestParseEmptyFeed(t *testing.T) {
_, err := ParseFeed("", "")
_, err := ParseFeed("", strings.NewReader(""))
if err == nil {
t.Error("ParseFeed must returns an error")
}