1
0
Fork 0
mirror of https://github.com/miniflux/v2.git synced 2025-09-15 18:57:04 +00:00

When detecting the format, detect its version as well

There is no need to detect the format and then the version when both can be
done at the same time.

Add a benchmark as well, on large and small atom and rss files.
This commit is contained in:
jvoisin 2024-03-12 13:11:56 +01:00 committed by Frédéric Guillot
parent 688b73b7ae
commit 45d486b919
12 changed files with 3608 additions and 160 deletions

View file

@ -27,7 +27,7 @@ func TestParseAtom03(t *testing.T) {
</entry>
</feed>`
feed, err := Parse("http://diveintomark.org/", bytes.NewReader([]byte(data)))
feed, err := Parse("http://diveintomark.org/", bytes.NewReader([]byte(data)), "0.3")
if err != nil {
t.Fatal(err)
}
@ -87,7 +87,7 @@ func TestParseAtom03WithoutFeedTitle(t *testing.T) {
</entry>
</feed>`
feed, err := Parse("http://diveintomark.org/", bytes.NewReader([]byte(data)))
feed, err := Parse("http://diveintomark.org/", bytes.NewReader([]byte(data)), "0.3")
if err != nil {
t.Fatal(err)
}
@ -110,7 +110,7 @@ func TestParseAtom03WithoutEntryTitleButWithLink(t *testing.T) {
</entry>
</feed>`
feed, err := Parse("http://diveintomark.org/", bytes.NewReader([]byte(data)))
feed, err := Parse("http://diveintomark.org/", bytes.NewReader([]byte(data)), "0.3")
if err != nil {
t.Fatal(err)
}
@ -138,7 +138,7 @@ func TestParseAtom03WithoutEntryTitleButWithSummary(t *testing.T) {
</entry>
</feed>`
feed, err := Parse("http://diveintomark.org/", bytes.NewReader([]byte(data)))
feed, err := Parse("http://diveintomark.org/", bytes.NewReader([]byte(data)), "0.3")
if err != nil {
t.Fatal(err)
}
@ -166,7 +166,7 @@ func TestParseAtom03WithoutEntryTitleButWithXMLContent(t *testing.T) {
</entry>
</feed>`
feed, err := Parse("http://diveintomark.org/", bytes.NewReader([]byte(data)))
feed, err := Parse("http://diveintomark.org/", bytes.NewReader([]byte(data)), "0.3")
if err != nil {
t.Fatal(err)
}
@ -197,7 +197,7 @@ func TestParseAtom03WithSummaryOnly(t *testing.T) {
</entry>
</feed>`
feed, err := Parse("http://diveintomark.org/", bytes.NewReader([]byte(data)))
feed, err := Parse("http://diveintomark.org/", bytes.NewReader([]byte(data)), "0.3")
if err != nil {
t.Fatal(err)
}
@ -228,7 +228,7 @@ func TestParseAtom03WithXMLContent(t *testing.T) {
</entry>
</feed>`
feed, err := Parse("http://diveintomark.org/", bytes.NewReader([]byte(data)))
feed, err := Parse("http://diveintomark.org/", bytes.NewReader([]byte(data)), "0.3")
if err != nil {
t.Fatal(err)
}
@ -259,7 +259,7 @@ func TestParseAtom03WithBase64Content(t *testing.T) {
</entry>
</feed>`
feed, err := Parse("http://diveintomark.org/", bytes.NewReader([]byte(data)))
feed, err := Parse("http://diveintomark.org/", bytes.NewReader([]byte(data)), "0.3")
if err != nil {
t.Fatal(err)
}

View file

@ -31,7 +31,7 @@ func TestParseAtomSample(t *testing.T) {
</feed>`
feed, err := Parse("http://example.org/feed.xml", bytes.NewReader([]byte(data)))
feed, err := Parse("http://example.org/feed.xml", bytes.NewReader([]byte(data)), "10")
if err != nil {
t.Fatal(err)
}
@ -93,7 +93,7 @@ func TestParseFeedWithoutTitle(t *testing.T) {
<updated>2003-12-13T18:30:02Z</updated>
</feed>`
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)))
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)), "10")
if err != nil {
t.Fatal(err)
}
@ -123,7 +123,7 @@ func TestParseEntryWithoutTitleButWithURL(t *testing.T) {
</feed>`
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)))
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)), "10")
if err != nil {
t.Fatal(err)
}
@ -154,7 +154,7 @@ func TestParseEntryWithoutTitleButWithSummary(t *testing.T) {
</feed>`
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)))
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)), "10")
if err != nil {
t.Fatal(err)
}
@ -187,7 +187,7 @@ func TestParseEntryWithoutTitleButWithXHTMLContent(t *testing.T) {
</feed>`
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)))
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)), "10")
if err != nil {
t.Fatal(err)
}
@ -206,7 +206,7 @@ func TestParseFeedURL(t *testing.T) {
<updated>2003-12-13T18:30:02Z</updated>
</feed>`
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)))
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)), "10")
if err != nil {
t.Fatal(err)
}
@ -238,7 +238,7 @@ func TestParseFeedWithRelativeURL(t *testing.T) {
</feed>`
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)))
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)), "10")
if err != nil {
t.Fatal(err)
}
@ -272,7 +272,7 @@ func TestParseEntryWithRelativeURL(t *testing.T) {
</feed>`
feed, err := Parse("https://example.net/", bytes.NewReader([]byte(data)))
feed, err := Parse("https://example.net/", bytes.NewReader([]byte(data)), "10")
if err != nil {
t.Fatal(err)
}
@ -298,7 +298,7 @@ func TestParseEntryURLWithTextHTMLType(t *testing.T) {
</feed>`
feed, err := Parse("https://example.net/", bytes.NewReader([]byte(data)))
feed, err := Parse("https://example.net/", bytes.NewReader([]byte(data)), "10")
if err != nil {
t.Fatal(err)
}
@ -324,7 +324,7 @@ func TestParseEntryURLWithNoRelAndNoType(t *testing.T) {
</feed>`
feed, err := Parse("https://example.net/", bytes.NewReader([]byte(data)))
feed, err := Parse("https://example.net/", bytes.NewReader([]byte(data)), "10")
if err != nil {
t.Fatal(err)
}
@ -350,7 +350,7 @@ func TestParseEntryURLWithAlternateRel(t *testing.T) {
</feed>`
feed, err := Parse("https://example.net/", bytes.NewReader([]byte(data)))
feed, err := Parse("https://example.net/", bytes.NewReader([]byte(data)), "10")
if err != nil {
t.Fatal(err)
}
@ -378,7 +378,7 @@ func TestParseEntryTitleWithWhitespaces(t *testing.T) {
</feed>`
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)))
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)), "10")
if err != nil {
t.Fatal(err)
}
@ -412,7 +412,7 @@ func TestParseEntryWithPlainTextTitle(t *testing.T) {
</feed>`
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)))
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)), "10")
if err != nil {
t.Fatal(err)
}
@ -459,7 +459,7 @@ func TestParseEntryWithHTMLTitle(t *testing.T) {
</feed>`
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)))
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)), "10")
if err != nil {
t.Fatal(err)
}
@ -497,7 +497,7 @@ func TestParseEntryWithXHTMLTitle(t *testing.T) {
</feed>`
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)))
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)), "10")
if err != nil {
t.Fatal(err)
}
@ -524,7 +524,7 @@ func TestParseEntryWithEmptyXHTMLTitle(t *testing.T) {
</feed>`
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)))
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)), "10")
if err != nil {
t.Fatal(err)
}
@ -551,7 +551,7 @@ func TestParseEntryWithXHTMLTitleWithoutDiv(t *testing.T) {
</feed>`
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)))
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)), "10")
if err != nil {
t.Fatal(err)
}
@ -577,7 +577,7 @@ func TestParseEntryWithNumericCharacterReferenceTitle(t *testing.T) {
</feed>`
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)))
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)), "10")
if err != nil {
t.Fatal(err)
}
@ -603,7 +603,7 @@ func TestParseEntryWithDoubleEncodedEntitiesTitle(t *testing.T) {
</feed>`
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)))
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)), "10")
if err != nil {
t.Fatal(err)
}
@ -629,7 +629,7 @@ func TestParseEntryWithXHTMLSummary(t *testing.T) {
</feed>`
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)))
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)), "10")
if err != nil {
t.Fatal(err)
}
@ -671,7 +671,7 @@ func TestParseEntryWithHTMLSummary(t *testing.T) {
</feed>`
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)))
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)), "10")
if err != nil {
t.Fatal(err)
}
@ -723,7 +723,7 @@ func TestParseEntryWithTextSummary(t *testing.T) {
</entry>
</feed>`
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)))
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)), "10")
if err != nil {
t.Fatal(err)
}
@ -776,7 +776,7 @@ func TestParseEntryWithTextContent(t *testing.T) {
</feed>`
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)))
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)), "10")
if err != nil {
t.Fatal(err)
}
@ -821,7 +821,7 @@ func TestParseEntryWithHTMLContent(t *testing.T) {
</feed>`
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)))
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)), "10")
if err != nil {
t.Fatal(err)
}
@ -852,7 +852,7 @@ func TestParseEntryWithXHTMLContent(t *testing.T) {
</feed>`
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)))
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)), "10")
if err != nil {
t.Fatal(err)
}
@ -881,7 +881,7 @@ func TestParseEntryWithAuthorName(t *testing.T) {
</feed>`
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)))
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)), "10")
if err != nil {
t.Fatal(err)
}
@ -910,7 +910,7 @@ func TestParseEntryWithoutAuthorName(t *testing.T) {
</feed>`
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)))
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)), "10")
if err != nil {
t.Fatal(err)
}
@ -941,7 +941,7 @@ func TestParseEntryWithMultipleAuthors(t *testing.T) {
</feed>`
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)))
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)), "10")
if err != nil {
t.Fatal(err)
}
@ -969,7 +969,7 @@ func TestParseEntryWithoutAuthor(t *testing.T) {
</feed>`
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)))
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)), "10")
if err != nil {
t.Fatal(err)
}
@ -1000,7 +1000,7 @@ func TestParseFeedWithMultipleAuthors(t *testing.T) {
</feed>`
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)))
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)), "10")
if err != nil {
t.Fatal(err)
}
@ -1025,7 +1025,7 @@ func TestParseFeedWithoutAuthor(t *testing.T) {
</feed>`
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)))
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)), "10")
if err != nil {
t.Fatal(err)
}
@ -1075,7 +1075,7 @@ func TestParseEntryWithEnclosures(t *testing.T) {
</entry>
</feed>`
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)))
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)), "10")
if err != nil {
t.Fatal(err)
}
@ -1135,7 +1135,7 @@ func TestParseEntryWithoutEnclosureURL(t *testing.T) {
</entry>
</feed>`
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)))
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)), "10")
if err != nil {
t.Fatal(err)
}
@ -1168,7 +1168,7 @@ func TestParseEntryWithPublished(t *testing.T) {
</feed>`
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)))
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)), "10")
if err != nil {
t.Fatal(err)
}
@ -1194,7 +1194,7 @@ func TestParseEntryWithPublishedAndUpdated(t *testing.T) {
</feed>`
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)))
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)), "10")
if err != nil {
t.Fatal(err)
}
@ -1206,7 +1206,7 @@ func TestParseEntryWithPublishedAndUpdated(t *testing.T) {
func TestParseInvalidXml(t *testing.T) {
data := `garbage`
_, err := Parse("https://example.org/", bytes.NewReader([]byte(data)))
_, err := Parse("https://example.org/", bytes.NewReader([]byte(data)), "10")
if err == nil {
t.Error("Parse should returns an error")
}
@ -1221,7 +1221,7 @@ func TestParseTitleWithSingleQuote(t *testing.T) {
</feed>
`
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)))
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)), "10")
if err != nil {
t.Fatal(err)
}
@ -1240,7 +1240,7 @@ func TestParseTitleWithEncodedSingleQuote(t *testing.T) {
</feed>
`
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)))
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)), "10")
if err != nil {
t.Fatal(err)
}
@ -1259,7 +1259,7 @@ func TestParseTitleWithSingleQuoteAndHTMLType(t *testing.T) {
</feed>
`
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)))
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)), "10")
if err != nil {
t.Fatal(err)
}
@ -1278,7 +1278,7 @@ func TestParseWithHTMLEntity(t *testing.T) {
</feed>
`
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)))
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)), "10")
if err != nil {
t.Fatal(err)
}
@ -1297,7 +1297,7 @@ func TestParseWithInvalidCharacterEntity(t *testing.T) {
</feed>
`
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)))
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)), "10")
if err != nil {
t.Fatal(err)
}
@ -1330,7 +1330,7 @@ A website: http://example.org/</media:description>
</entry>
</feed>`
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)))
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)), "10")
if err != nil {
t.Fatal(err)
}
@ -1396,7 +1396,7 @@ A website: http://example.org/</media:description>
</entry>
</feed>`
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)))
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)), "10")
if err != nil {
t.Fatal(err)
}
@ -1467,7 +1467,7 @@ func TestParseRepliesLinkRelationWithHTMLType(t *testing.T) {
</entry>
</feed>`
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)))
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)), "10")
if err != nil {
t.Fatal(err)
}
@ -1511,7 +1511,7 @@ func TestParseRepliesLinkRelationWithXHTMLType(t *testing.T) {
</entry>
</feed>`
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)))
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)), "10")
if err != nil {
t.Fatal(err)
}
@ -1550,7 +1550,7 @@ func TestParseRepliesLinkRelationWithNoType(t *testing.T) {
</entry>
</feed>`
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)))
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)), "10")
if err != nil {
t.Fatal(err)
}
@ -1590,7 +1590,7 @@ func TestAbsoluteCommentsURL(t *testing.T) {
</entry>
</feed>`
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)))
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)), "10")
if err != nil {
t.Fatal(err)
}
@ -1631,7 +1631,7 @@ func TestParseFeedWithCategories(t *testing.T) {
</feed>`
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)))
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)), "10")
if err != nil {
t.Fatal(err)
}
@ -1661,7 +1661,7 @@ func TestParseFeedWithIconURL(t *testing.T) {
<icon>http://example.org/icon.png</icon>
</feed>`
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)))
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)), "10")
if err != nil {
t.Fatal(err)
}

View file

@ -4,7 +4,6 @@
package atom // import "miniflux.app/v2/internal/reader/atom"
import (
"encoding/xml"
"fmt"
"io"
@ -17,14 +16,13 @@ type atomFeed interface {
}
// Parse returns a normalized feed struct from a Atom feed.
func Parse(baseURL string, r io.ReadSeeker) (*model.Feed, error) {
func Parse(baseURL string, r io.ReadSeeker, version string) (*model.Feed, error) {
var rawFeed atomFeed
if getAtomFeedVersion(r) == "0.3" {
if version == "0.3" {
rawFeed = new(atom03Feed)
} else {
rawFeed = new(atom10Feed)
}
r.Seek(0, io.SeekStart)
if err := xml_decoder.NewXMLDecoder(r).Decode(rawFeed); err != nil {
return nil, fmt.Errorf("atom: unable to parse feed: %w", err)
@ -32,25 +30,3 @@ func Parse(baseURL string, r io.ReadSeeker) (*model.Feed, error) {
return rawFeed.Transform(baseURL), nil
}
func getAtomFeedVersion(data io.ReadSeeker) string {
decoder := xml_decoder.NewXMLDecoder(data)
for {
token, _ := decoder.Token()
if token == nil {
break
}
if element, ok := token.(xml.StartElement); ok {
if element.Name.Local == "feed" {
for _, attr := range element.Attr {
if attr.Name.Local == "version" && attr.Value == "0.3" {
return "0.3"
}
}
return "1.0"
}
}
}
return "1.0"
}

View file

@ -1,61 +0,0 @@
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
package atom // import "miniflux.app/v2/internal/reader/atom"
import (
"bytes"
"testing"
)
func TestDetectAtom10(t *testing.T) {
data := `<?xml version="1.0" encoding="utf-8"?>
<feed xmlns="http://www.w3.org/2005/Atom">
<title>Example Feed</title>
<link href="http://example.org/"/>
<updated>2003-12-13T18:30:02Z</updated>
<author>
<name>John Doe</name>
</author>
<id>urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6</id>
<entry>
<title>Atom-Powered Robots Run Amok</title>
<link href="http://example.org/2003/12/13/atom03"/>
<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
<updated>2003-12-13T18:30:02Z</updated>
<summary>Some text.</summary>
</entry>
</feed>`
version := getAtomFeedVersion(bytes.NewReader([]byte(data)))
if version != "1.0" {
t.Errorf(`Invalid Atom version detected: %s`, version)
}
}
func TestDetectAtom03(t *testing.T) {
data := `<?xml version="1.0" encoding="utf-8"?>
<feed version="0.3" xmlns="http://purl.org/atom/ns#">
<title>dive into mark</title>
<link rel="alternate" type="text/html" href="http://diveintomark.org/"/>
<modified>2003-12-13T18:30:02Z</modified>
<author><name>Mark Pilgrim</name></author>
<entry>
<title>Atom 0.3 snapshot</title>
<link rel="alternate" type="text/html" href="http://diveintomark.org/2003/12/13/atom03"/>
<id>tag:diveintomark.org,2003:3.2397</id>
<issued>2003-12-13T08:29:29-04:00</issued>
<modified>2003-12-13T18:30:02Z</modified>
<summary type="text/plain">This is a test</summary>
<content type="text/html" mode="escaped"><![CDATA[<p>HTML content</p>]]></content>
</entry>
</feed>`
version := getAtomFeedVersion(bytes.NewReader([]byte(data)))
if version != "0.3" {
t.Errorf(`Invalid Atom version detected: %s`, version)
}
}