mirror of
https://github.com/miniflux/v2.git
synced 2025-06-27 16:36:00 +00:00
Move feed parsers packages in reader package
This commit is contained in:
parent
c26787f476
commit
d5838b6734
14 changed files with 7 additions and 7 deletions
29
reader/rss/parser.go
Normal file
29
reader/rss/parser.go
Normal file
|
@ -0,0 +1,29 @@
|
|||
// Copyright 2017 Frédéric Guillot. All rights reserved.
|
||||
// Use of this source code is governed by the Apache 2.0
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package rss
|
||||
|
||||
import (
|
||||
"encoding/xml"
|
||||
"io"
|
||||
|
||||
"github.com/miniflux/miniflux2/errors"
|
||||
"github.com/miniflux/miniflux2/model"
|
||||
|
||||
"golang.org/x/net/html/charset"
|
||||
)
|
||||
|
||||
// Parse returns a normalized feed struct from a RSS feed.
|
||||
func Parse(data io.Reader) (*model.Feed, error) {
|
||||
feed := new(rssFeed)
|
||||
decoder := xml.NewDecoder(data)
|
||||
decoder.CharsetReader = charset.NewReaderLabel
|
||||
|
||||
err := decoder.Decode(feed)
|
||||
if err != nil {
|
||||
return nil, errors.NewLocalizedError("Unable to parse RSS feed: %v.", err)
|
||||
}
|
||||
|
||||
return feed.Transform(), nil
|
||||
}
|
550
reader/rss/parser_test.go
Normal file
550
reader/rss/parser_test.go
Normal file
|
@ -0,0 +1,550 @@
|
|||
// Copyright 2017 Frédéric Guillot. All rights reserved.
|
||||
// Use of this source code is governed by the Apache 2.0
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package rss
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/miniflux/miniflux2/errors"
|
||||
)
|
||||
|
||||
func TestParseRss2Sample(t *testing.T) {
|
||||
data := `
|
||||
<?xml version="1.0"?>
|
||||
<rss version="2.0">
|
||||
<channel>
|
||||
<title>Liftoff News</title>
|
||||
<link>http://liftoff.msfc.nasa.gov/</link>
|
||||
<description>Liftoff to Space Exploration.</description>
|
||||
<language>en-us</language>
|
||||
<pubDate>Tue, 10 Jun 2003 04:00:00 GMT</pubDate>
|
||||
<lastBuildDate>Tue, 10 Jun 2003 09:41:01 GMT</lastBuildDate>
|
||||
<docs>http://blogs.law.harvard.edu/tech/rss</docs>
|
||||
<generator>Weblog Editor 2.0</generator>
|
||||
<managingEditor>editor@example.com</managingEditor>
|
||||
<webMaster>webmaster@example.com</webMaster>
|
||||
<item>
|
||||
<title>Star City</title>
|
||||
<link>http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp</link>
|
||||
<description>How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia's <a href="http://howe.iki.rssi.ru/GCTC/gctc_e.htm">Star City</a>.</description>
|
||||
<pubDate>Tue, 03 Jun 2003 09:39:21 GMT</pubDate>
|
||||
<guid>http://liftoff.msfc.nasa.gov/2003/06/03.html#item573</guid>
|
||||
</item>
|
||||
<item>
|
||||
<description>Sky watchers in Europe, Asia, and parts of Alaska and Canada will experience a <a href="http://science.nasa.gov/headlines/y2003/30may_solareclipse.htm">partial eclipse of the Sun</a> on Saturday, May 31st.</description>
|
||||
<pubDate>Fri, 30 May 2003 11:06:42 GMT</pubDate>
|
||||
<guid>http://liftoff.msfc.nasa.gov/2003/05/30.html#item572</guid>
|
||||
</item>
|
||||
<item>
|
||||
<title>The Engine That Does More</title>
|
||||
<link>http://liftoff.msfc.nasa.gov/news/2003/news-VASIMR.asp</link>
|
||||
<description>Before man travels to Mars, NASA hopes to design new engines that will let us fly through the Solar System more quickly. The proposed VASIMR engine would do that.</description>
|
||||
<pubDate>Tue, 27 May 2003 08:37:32 GMT</pubDate>
|
||||
<guid>http://liftoff.msfc.nasa.gov/2003/05/27.html#item571</guid>
|
||||
</item>
|
||||
<item>
|
||||
<title>Astronauts' Dirty Laundry</title>
|
||||
<link>http://liftoff.msfc.nasa.gov/news/2003/news-laundry.asp</link>
|
||||
<description>Compared to earlier spacecraft, the International Space Station has many luxuries, but laundry facilities are not one of them. Instead, astronauts have other options.</description>
|
||||
<pubDate>Tue, 20 May 2003 08:56:02 GMT</pubDate>
|
||||
<guid>http://liftoff.msfc.nasa.gov/2003/05/20.html#item570</guid>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if feed.Title != "Liftoff News" {
|
||||
t.Errorf("Incorrect title, got: %s", feed.Title)
|
||||
}
|
||||
|
||||
if feed.FeedURL != "" {
|
||||
t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
|
||||
}
|
||||
|
||||
if feed.SiteURL != "http://liftoff.msfc.nasa.gov/" {
|
||||
t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
|
||||
}
|
||||
|
||||
if len(feed.Entries) != 4 {
|
||||
t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
|
||||
}
|
||||
|
||||
expectedDate := time.Date(2003, time.June, 3, 9, 39, 21, 0, time.UTC)
|
||||
if !feed.Entries[0].Date.Equal(expectedDate) {
|
||||
t.Errorf("Incorrect entry date, got: %v, want: %v", feed.Entries[0].Date, expectedDate)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Hash != "5b2b4ac2fe1786ddf0fd2da2f1b07f64e691264f41f2db3ea360f31bb6d9152b" {
|
||||
t.Errorf("Incorrect entry hash, got: %s", feed.Entries[0].Hash)
|
||||
}
|
||||
|
||||
if feed.Entries[0].URL != "http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp" {
|
||||
t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Title != "Star City" {
|
||||
t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Content != `How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia's <a href="http://howe.iki.rssi.ru/GCTC/gctc_e.htm" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">Star City</a>.` {
|
||||
t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseFeedWithoutTitle(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<rss version="2.0">
|
||||
<channel>
|
||||
<link>https://example.org/</link>
|
||||
</channel>
|
||||
</rss>`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if feed.Title != "https://example.org/" {
|
||||
t.Errorf("Incorrect feed title, got: %s", feed.Title)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseEntryWithoutTitle(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<rss version="2.0">
|
||||
<channel>
|
||||
<link>https://example.org/</link>
|
||||
<item>
|
||||
<link>https://example.org/item</link>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Title != "https://example.org/item" {
|
||||
t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseEntryWithoutLink(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<rss version="2.0">
|
||||
<channel>
|
||||
<link>https://example.org/</link>
|
||||
<item>
|
||||
<guid isPermaLink="false">1234</guid>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if feed.Entries[0].URL != "https://example.org/" {
|
||||
t.Errorf("Incorrect entry link, got: %s", feed.Entries[0].URL)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Hash != "03ac674216f3e15c761ee1a5e255f067953623c8b388b4459e13f978d7c846f4" {
|
||||
t.Errorf("Incorrect entry hash, got: %s", feed.Entries[0].Hash)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseEntryWithAtomLink(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
|
||||
<channel>
|
||||
<link>https://example.org/</link>
|
||||
<item>
|
||||
<title>Test</title>
|
||||
<atom:link href="https://example.org/item" />
|
||||
</item>
|
||||
</channel>
|
||||
</rss>`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if feed.Entries[0].URL != "https://example.org/item" {
|
||||
t.Errorf("Incorrect entry link, got: %s", feed.Entries[0].URL)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseEntryWithMultipleAtomLinks(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
|
||||
<channel>
|
||||
<link>https://example.org/</link>
|
||||
<item>
|
||||
<title>Test</title>
|
||||
<atom:link rel="payment" href="https://example.org/a" />
|
||||
<atom:link rel="http://foobar.tld" href="https://example.org/b" />
|
||||
</item>
|
||||
</channel>
|
||||
</rss>`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if feed.Entries[0].URL != "https://example.org/b" {
|
||||
t.Errorf("Incorrect entry link, got: %s", feed.Entries[0].URL)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseFeedURLWithAtomLink(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<rss xmlns:atom="http://www.w3.org/2005/Atom" version="2.0">
|
||||
<channel>
|
||||
<title>Example</title>
|
||||
<link>https://example.org/</link>
|
||||
<atom:link href="https://example.org/rss" type="application/rss+xml" rel="self"></atom:link>
|
||||
</channel>
|
||||
</rss>`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if feed.FeedURL != "https://example.org/rss" {
|
||||
t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
|
||||
}
|
||||
|
||||
if feed.SiteURL != "https://example.org/" {
|
||||
t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseEntryWithAtomAuthor(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<rss xmlns:atom="http://www.w3.org/2005/Atom" version="2.0">
|
||||
<channel>
|
||||
<title>Example</title>
|
||||
<link>https://example.org/</link>
|
||||
<atom:link href="https://example.org/rss" type="application/rss+xml" rel="self"></atom:link>
|
||||
<item>
|
||||
<title>Test</title>
|
||||
<link>https://example.org/item</link>
|
||||
<author xmlns:author="http://www.w3.org/2005/Atom">
|
||||
<name>Foo Bar</name>
|
||||
<title>Vice President</title>
|
||||
<department/>
|
||||
<company>FooBar Inc.</company>
|
||||
</author>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Author != "Foo Bar" {
|
||||
t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseEntryWithDublinCoreAuthor(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/">
|
||||
<channel>
|
||||
<title>Example</title>
|
||||
<link>https://example.org/</link>
|
||||
<item>
|
||||
<title>Test</title>
|
||||
<link>https://example.org/item</link>
|
||||
<dc:creator>Me (me@example.com)</dc:creator>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Author != "Me (me@example.com)" {
|
||||
t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseEntryWithItunesAuthor(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<rss version="2.0" xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd">
|
||||
<channel>
|
||||
<title>Example</title>
|
||||
<link>https://example.org/</link>
|
||||
<item>
|
||||
<title>Test</title>
|
||||
<link>https://example.org/item</link>
|
||||
<itunes:author>Someone</itunes:author>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Author != "Someone" {
|
||||
t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseFeedWithItunesAuthor(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<rss version="2.0" xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd">
|
||||
<channel>
|
||||
<title>Example</title>
|
||||
<link>https://example.org/</link>
|
||||
<itunes:author>Someone</itunes:author>
|
||||
<item>
|
||||
<title>Test</title>
|
||||
<link>https://example.org/item</link>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Author != "Someone" {
|
||||
t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseEntryWithDublinCoreDate(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/">
|
||||
<channel>
|
||||
<title>Example</title>
|
||||
<link>http://example.org/</link>
|
||||
<item>
|
||||
<title>Item 1</title>
|
||||
<link>http://example.org/item1</link>
|
||||
<description>Description.</description>
|
||||
<guid isPermaLink="false">UUID</guid>
|
||||
<dc:date>2002-09-29T23:40:06-05:00</dc:date>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
location, _ := time.LoadLocation("EST")
|
||||
expectedDate := time.Date(2002, time.September, 29, 23, 40, 06, 0, location)
|
||||
if !feed.Entries[0].Date.Equal(expectedDate) {
|
||||
t.Errorf("Incorrect entry date, got: %v, want: %v", feed.Entries[0].Date, expectedDate)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseEntryWithContentEncoded(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<rss version="2.0" xmlns:content="http://purl.org/rss/1.0/modules/content/">
|
||||
<channel>
|
||||
<title>Example</title>
|
||||
<link>http://example.org/</link>
|
||||
<item>
|
||||
<title>Item 1</title>
|
||||
<link>http://example.org/item1</link>
|
||||
<description>Description.</description>
|
||||
<guid isPermaLink="false">UUID</guid>
|
||||
<content:encoded><![CDATA[<p><a href="http://www.example.org/">Example</a>.</p>]]></content:encoded>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Content != `<p><a href="http://www.example.org/" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">Example</a>.</p>` {
|
||||
t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseEntryWithFeedBurnerLink(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<rss version="2.0" xmlns:feedburner="http://rssnamespace.org/feedburner/ext/1.0">
|
||||
<channel>
|
||||
<title>Example</title>
|
||||
<link>http://example.org/</link>
|
||||
<item>
|
||||
<title>Item 1</title>
|
||||
<link>http://example.org/item1</link>
|
||||
<feedburner:origLink>http://example.org/original</feedburner:origLink>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if feed.Entries[0].URL != "http://example.org/original" {
|
||||
t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].URL)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseEntryTitleWithWhitespaces(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<rss version="2.0">
|
||||
<channel>
|
||||
<title>Example</title>
|
||||
<link>http://example.org</link>
|
||||
<item>
|
||||
<title>
|
||||
Some Title
|
||||
</title>
|
||||
<link>http://www.example.org/entries/1</link>
|
||||
<pubDate>Fri, 15 Jul 2005 00:00:00 -0500</pubDate>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Title != "Some Title" {
|
||||
t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseEntryWithEnclosures(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<rss version="2.0">
|
||||
<channel>
|
||||
<title>My Podcast Feed</title>
|
||||
<link>http://example.org</link>
|
||||
<author>some.email@example.org</author>
|
||||
<item>
|
||||
<title>Podcasting with RSS</title>
|
||||
<link>http://www.example.org/entries/1</link>
|
||||
<description>An overview of RSS podcasting</description>
|
||||
<pubDate>Fri, 15 Jul 2005 00:00:00 -0500</pubDate>
|
||||
<guid isPermaLink="true">http://www.example.org/entries/1</guid>
|
||||
<enclosure url="http://www.example.org/myaudiofile.mp3"
|
||||
length="12345"
|
||||
type="audio/mpeg" />
|
||||
</item>
|
||||
</channel>
|
||||
</rss>`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if len(feed.Entries) != 1 {
|
||||
t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
|
||||
}
|
||||
|
||||
if feed.Entries[0].URL != "http://www.example.org/entries/1" {
|
||||
t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
|
||||
}
|
||||
|
||||
if len(feed.Entries[0].Enclosures) != 1 {
|
||||
t.Errorf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures))
|
||||
}
|
||||
|
||||
if feed.Entries[0].Enclosures[0].URL != "http://www.example.org/myaudiofile.mp3" {
|
||||
t.Errorf("Incorrect enclosure URL, got: %s", feed.Entries[0].Enclosures[0].URL)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Enclosures[0].MimeType != "audio/mpeg" {
|
||||
t.Errorf("Incorrect enclosure type, got: %s", feed.Entries[0].Enclosures[0].MimeType)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Enclosures[0].Size != 12345 {
|
||||
t.Errorf("Incorrect enclosure length, got: %d", feed.Entries[0].Enclosures[0].Size)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseEntryWithFeedBurnerEnclosures(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<rss version="2.0" xmlns:feedburner="http://rssnamespace.org/feedburner/ext/1.0">
|
||||
<channel>
|
||||
<title>My Example Feed</title>
|
||||
<link>http://example.org</link>
|
||||
<author>some.email@example.org</author>
|
||||
<item>
|
||||
<title>Example Item</title>
|
||||
<link>http://www.example.org/entries/1</link>
|
||||
<enclosure
|
||||
url="http://feedproxy.google.com/~r/example/~5/lpMyFSCvubs/File.mp3"
|
||||
length="76192460"
|
||||
type="audio/mpeg" />
|
||||
<feedburner:origEnclosureLink>http://example.org/67ca416c-f22a-4228-a681-68fc9998ec10/File.mp3</feedburner:origEnclosureLink>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if len(feed.Entries) != 1 {
|
||||
t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
|
||||
}
|
||||
|
||||
if feed.Entries[0].URL != "http://www.example.org/entries/1" {
|
||||
t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
|
||||
}
|
||||
|
||||
if len(feed.Entries[0].Enclosures) != 1 {
|
||||
t.Errorf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures))
|
||||
}
|
||||
|
||||
if feed.Entries[0].Enclosures[0].URL != "http://example.org/67ca416c-f22a-4228-a681-68fc9998ec10/File.mp3" {
|
||||
t.Errorf("Incorrect enclosure URL, got: %s", feed.Entries[0].Enclosures[0].URL)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Enclosures[0].MimeType != "audio/mpeg" {
|
||||
t.Errorf("Incorrect enclosure type, got: %s", feed.Entries[0].Enclosures[0].MimeType)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Enclosures[0].Size != 76192460 {
|
||||
t.Errorf("Incorrect enclosure length, got: %d", feed.Entries[0].Enclosures[0].Size)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseInvalidXml(t *testing.T) {
|
||||
data := `garbage`
|
||||
_, err := Parse(bytes.NewBufferString(data))
|
||||
if err == nil {
|
||||
t.Error("Parse should returns an error")
|
||||
}
|
||||
|
||||
if _, ok := err.(errors.LocalizedError); !ok {
|
||||
t.Error("The error returned must be a LocalizedError")
|
||||
}
|
||||
}
|
235
reader/rss/rss.go
Normal file
235
reader/rss/rss.go
Normal file
|
@ -0,0 +1,235 @@
|
|||
// Copyright 2017 Frédéric Guillot. All rights reserved.
|
||||
// Use of this source code is governed by the Apache 2.0
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package rss
|
||||
|
||||
import (
|
||||
"encoding/xml"
|
||||
"log"
|
||||
"path"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/miniflux/miniflux2/helper"
|
||||
"github.com/miniflux/miniflux2/model"
|
||||
"github.com/miniflux/miniflux2/reader/date"
|
||||
"github.com/miniflux/miniflux2/reader/processor"
|
||||
"github.com/miniflux/miniflux2/reader/sanitizer"
|
||||
)
|
||||
|
||||
type rssFeed struct {
|
||||
XMLName xml.Name `xml:"rss"`
|
||||
Version string `xml:"version,attr"`
|
||||
Title string `xml:"channel>title"`
|
||||
Links []rssLink `xml:"channel>link"`
|
||||
Language string `xml:"channel>language"`
|
||||
Description string `xml:"channel>description"`
|
||||
PubDate string `xml:"channel>pubDate"`
|
||||
ItunesAuthor string `xml:"http://www.itunes.com/dtds/podcast-1.0.dtd channel>author"`
|
||||
Items []rssItem `xml:"channel>item"`
|
||||
}
|
||||
|
||||
type rssLink struct {
|
||||
XMLName xml.Name
|
||||
Data string `xml:",chardata"`
|
||||
Href string `xml:"href,attr"`
|
||||
Rel string `xml:"rel,attr"`
|
||||
}
|
||||
|
||||
type rssItem struct {
|
||||
GUID string `xml:"guid"`
|
||||
Title string `xml:"title"`
|
||||
Links []rssLink `xml:"link"`
|
||||
OriginalLink string `xml:"http://rssnamespace.org/feedburner/ext/1.0 origLink"`
|
||||
Description string `xml:"description"`
|
||||
Content string `xml:"http://purl.org/rss/1.0/modules/content/ encoded"`
|
||||
PubDate string `xml:"pubDate"`
|
||||
Date string `xml:"http://purl.org/dc/elements/1.1/ date"`
|
||||
Authors []rssAuthor `xml:"author"`
|
||||
Creator string `xml:"http://purl.org/dc/elements/1.1/ creator"`
|
||||
Enclosures []rssEnclosure `xml:"enclosure"`
|
||||
OrigEnclosureLink string `xml:"http://rssnamespace.org/feedburner/ext/1.0 origEnclosureLink"`
|
||||
}
|
||||
|
||||
type rssAuthor struct {
|
||||
XMLName xml.Name
|
||||
Data string `xml:",chardata"`
|
||||
Name string `xml:"name"`
|
||||
}
|
||||
|
||||
type rssEnclosure struct {
|
||||
URL string `xml:"url,attr"`
|
||||
Type string `xml:"type,attr"`
|
||||
Length string `xml:"length,attr"`
|
||||
}
|
||||
|
||||
func (r *rssFeed) GetSiteURL() string {
|
||||
for _, element := range r.Links {
|
||||
if element.XMLName.Space == "" {
|
||||
return element.Data
|
||||
}
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
func (r *rssFeed) GetFeedURL() string {
|
||||
for _, element := range r.Links {
|
||||
if element.XMLName.Space == "http://www.w3.org/2005/Atom" {
|
||||
return element.Href
|
||||
}
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
func (r *rssFeed) Transform() *model.Feed {
|
||||
feed := new(model.Feed)
|
||||
feed.SiteURL = r.GetSiteURL()
|
||||
feed.FeedURL = r.GetFeedURL()
|
||||
feed.Title = sanitizer.StripTags(r.Title)
|
||||
|
||||
if feed.Title == "" {
|
||||
feed.Title = feed.SiteURL
|
||||
}
|
||||
|
||||
for _, item := range r.Items {
|
||||
entry := item.Transform()
|
||||
|
||||
if entry.Author == "" && r.ItunesAuthor != "" {
|
||||
entry.Author = r.ItunesAuthor
|
||||
}
|
||||
entry.Author = sanitizer.StripTags(entry.Author)
|
||||
|
||||
if entry.URL == "" {
|
||||
entry.URL = feed.SiteURL
|
||||
}
|
||||
|
||||
feed.Entries = append(feed.Entries, entry)
|
||||
}
|
||||
|
||||
return feed
|
||||
}
|
||||
func (r *rssItem) GetDate() time.Time {
|
||||
value := r.PubDate
|
||||
if r.Date != "" {
|
||||
value = r.Date
|
||||
}
|
||||
|
||||
if value != "" {
|
||||
result, err := date.Parse(value)
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
return time.Now()
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
return time.Now()
|
||||
}
|
||||
|
||||
func (r *rssItem) GetAuthor() string {
|
||||
for _, element := range r.Authors {
|
||||
if element.Name != "" {
|
||||
return element.Name
|
||||
}
|
||||
|
||||
if element.Data != "" {
|
||||
return element.Data
|
||||
}
|
||||
}
|
||||
|
||||
return r.Creator
|
||||
}
|
||||
|
||||
func (r *rssItem) GetHash() string {
|
||||
for _, value := range []string{r.GUID, r.GetURL()} {
|
||||
if value != "" {
|
||||
return helper.Hash(value)
|
||||
}
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
func (r *rssItem) GetContent() string {
|
||||
if r.Content != "" {
|
||||
return r.Content
|
||||
}
|
||||
|
||||
return r.Description
|
||||
}
|
||||
|
||||
func (r *rssItem) GetURL() string {
|
||||
if r.OriginalLink != "" {
|
||||
return r.OriginalLink
|
||||
}
|
||||
|
||||
for _, link := range r.Links {
|
||||
if link.XMLName.Space == "http://www.w3.org/2005/Atom" && link.Href != "" && isValidLinkRelation(link.Rel) {
|
||||
return link.Href
|
||||
}
|
||||
|
||||
if link.Data != "" {
|
||||
return link.Data
|
||||
}
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
func (r *rssItem) GetEnclosures() model.EnclosureList {
|
||||
enclosures := make(model.EnclosureList, 0)
|
||||
|
||||
for _, enclosure := range r.Enclosures {
|
||||
length, _ := strconv.Atoi(enclosure.Length)
|
||||
enclosureURL := enclosure.URL
|
||||
|
||||
if r.OrigEnclosureLink != "" {
|
||||
filename := path.Base(r.OrigEnclosureLink)
|
||||
if strings.Contains(enclosureURL, filename) {
|
||||
enclosureURL = r.OrigEnclosureLink
|
||||
}
|
||||
}
|
||||
|
||||
enclosures = append(enclosures, &model.Enclosure{
|
||||
URL: enclosureURL,
|
||||
MimeType: enclosure.Type,
|
||||
Size: length,
|
||||
})
|
||||
}
|
||||
|
||||
return enclosures
|
||||
}
|
||||
|
||||
func (r *rssItem) Transform() *model.Entry {
|
||||
entry := new(model.Entry)
|
||||
entry.URL = r.GetURL()
|
||||
entry.Date = r.GetDate()
|
||||
entry.Author = r.GetAuthor()
|
||||
entry.Hash = r.GetHash()
|
||||
entry.Content = processor.ItemContentProcessor(entry.URL, r.GetContent())
|
||||
entry.Title = sanitizer.StripTags(strings.Trim(r.Title, " \n\t"))
|
||||
entry.Enclosures = r.GetEnclosures()
|
||||
|
||||
if entry.Title == "" {
|
||||
entry.Title = entry.URL
|
||||
}
|
||||
|
||||
return entry
|
||||
}
|
||||
|
||||
func isValidLinkRelation(rel string) bool {
|
||||
switch rel {
|
||||
case "", "alternate", "enclosure", "related", "self", "via":
|
||||
return true
|
||||
default:
|
||||
if strings.HasPrefix(rel, "http") {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue