diff --git a/internal/reader/rss/adapter.go b/internal/reader/rss/adapter.go
index 2909fc6b..531cc53f 100644
--- a/internal/reader/rss/adapter.go
+++ b/internal/reader/rss/adapter.go
@@ -39,7 +39,7 @@ func (r *RSSAdapter) BuildFeed(feedURL string) *model.Feed {
// Try to find the feed URL from the Atom links.
for _, atomLink := range r.rss.Channel.AtomLinks.Links {
- atomLinkHref := strings.TrimSpace(atomLink.URL)
+ atomLinkHref := strings.TrimSpace(atomLink.Href)
if atomLinkHref != "" && atomLink.Rel == "self" {
if absoluteFeedURL, err := urllib.AbsoluteURL(feedURL, atomLinkHref); err == nil {
feed.FeedURL = absoluteFeedURL
@@ -170,8 +170,8 @@ func findEntryURL(rssItem *RSSItem) string {
}
for _, atomLink := range rssItem.AtomLinks.Links {
- if atomLink.URL != "" && (strings.EqualFold(atomLink.Rel, "alternate") || atomLink.Rel == "") {
- return strings.TrimSpace(atomLink.URL)
+ if atomLink.Href != "" && (strings.EqualFold(atomLink.Rel, "alternate") || atomLink.Rel == "") {
+ return strings.TrimSpace(atomLink.Href)
}
}
@@ -233,8 +233,8 @@ func findEntryAuthor(rssItem *RSSItem) string {
author = rssItem.ItunesAuthor
case rssItem.DublinCoreCreator != "":
author = rssItem.DublinCoreCreator
- case rssItem.AtomAuthor.String() != "":
- author = rssItem.AtomAuthor.String()
+ case rssItem.AtomAuthor.PersonName() != "":
+ author = rssItem.AtomAuthor.PersonName()
case strings.Contains(rssItem.Author.Inner, "
+
+
+ Example
+ http://example.org/
+
+ Item 1
+ http://example.org/item1
+ this is <b>bold</b>
+
+
+ `
+
+ feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ if feed.Entries[0].Content != `this is bold` {
+ t.Errorf("Incorrect entry content, got: %q", feed.Entries[0].Content)
+ }
+}
+
+// https://www.rssboard.org/rss-encoding-examples
+func TestParseEntryWithDescriptionWithHTMLCDATA(t *testing.T) {
+ data := `
+
+
+ Example
+ http://example.org/
+
+ Item 1
+ http://example.org/item1
+ bold]]>
+
+
+ `
+
+ feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ if feed.Entries[0].Content != `this is bold` {
+ t.Errorf("Incorrect entry content, got: %q", feed.Entries[0].Content)
+ }
+}
+
+// https://www.rssboard.org/rss-encoding-examples
+func TestParseEntryDescriptionWithEncodingAngleBracketsInText(t *testing.T) {
+ data := `
+
+
+ Example
+ http://example.org/
+
+ Item 1
+ http://example.org/item1
+ 5 < 8, ticker symbol <BIGCO>
+
+
+ `
+
+ feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ if feed.Entries[0].Content != `5 < 8, ticker symbol <BIGCO>` {
+ t.Errorf("Incorrect entry content, got: %q", feed.Entries[0].Content)
+ }
+}
+
+// https://www.rssboard.org/rss-encoding-examples
+func TestParseEntryDescriptionWithEncodingAngleBracketsWithinCDATASection(t *testing.T) {
+ data := `
+
+
+ Example
+ http://example.org/
+
+ Item 1
+ http://example.org/item1
+
+
+
+ `
+
+ feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ if feed.Entries[0].Content != `5 < 8, ticker symbol <BIGCO>` {
+ t.Errorf("Incorrect entry content, got: %q", feed.Entries[0].Content)
+ }
+}
+
func TestParseEntryWithFeedBurnerLink(t *testing.T) {
data := `
diff --git a/internal/reader/rss/rss.go b/internal/reader/rss/rss.go
index 7935166d..bc99b461 100644
--- a/internal/reader/rss/rss.go
+++ b/internal/reader/rss/rss.go
@@ -16,29 +16,75 @@ import (
// Specs: https://www.rssboard.org/rss-specification
type RSS struct {
- Version string `xml:"rss version,attr"`
+ // Version is the version of the RSS specification.
+ Version string `xml:"rss version,attr"`
+
+ // Channel is the main container for the RSS feed.
Channel RSSChannel `xml:"rss channel"`
}
type RSSChannel struct {
- Title string `xml:"rss title"`
- Link string `xml:"rss link"`
- Description string `xml:"rss description"`
- Language string `xml:"rss language"`
- Copyright string `xml:"rss copyRight"`
- ManagingEditor string `xml:"rss managingEditor"`
- Webmaster string `xml:"rss webMaster"`
- PubDate string `xml:"rss pubDate"`
- LastBuildDate string `xml:"rss lastBuildDate"`
- Categories []string `xml:"rss category"`
- Generator string `xml:"rss generator"`
- Docs string `xml:"rss docs"`
- Cloud *RSSCloud `xml:"rss cloud"`
- Image *RSSImage `xml:"rss image"`
- TTL string `xml:"rss ttl"`
- SkipHours []string `xml:"rss skipHours>hour"`
- SkipDays []string `xml:"rss skipDays>day"`
- Items []RSSItem `xml:"rss item"`
+ // Title is the name of the channel.
+ Title string `xml:"rss title"`
+
+ // Link is the URL to the HTML website corresponding to the channel.
+ Link string `xml:"rss link"`
+
+ // Description is a phrase or sentence describing the channel.
+ Description string `xml:"rss description"`
+
+ // Language is the language the channel is written in.
+ // A list of allowable values for this element, as provided by Netscape, is here: https://www.rssboard.org/rss-language-codes.
+ // You may also use values defined by the W3C: https://www.w3.org/TR/REC-html40/struct/dirlang.html#langcodes.
+ Language string `xml:"rss language"`
+
+ // Copyright is a string indicating the copyright.
+ Copyright string `xml:"rss copyRight"`
+
+ // ManagingEditor is the email address for the person responsible for editorial content.
+ ManagingEditor string `xml:"rss managingEditor"`
+
+ // Webmaster is the email address for the person responsible for technical issues relating to the channel.
+ Webmaster string `xml:"rss webMaster"`
+
+ // PubDate is the publication date for the content in the channel.
+ // All date-times in RSS conform to the Date and Time Specification of RFC 822, with the exception that the year may be expressed with two characters or four characters (four preferred).
+ PubDate string `xml:"rss pubDate"`
+
+ // LastBuildDate is the last time the content of the channel changed.
+ LastBuildDate string `xml:"rss lastBuildDate"`
+
+ // Categories is a collection of categories to which the channel belongs.
+ Categories []string `xml:"rss category"`
+
+ // Generator is a string indicating the program used to generate the channel.
+ Generator string `xml:"rss generator"`
+
+ // Docs is a URL that points to the documentation for the format used in the RSS file.
+ DocumentationURL string `xml:"rss docs"`
+
+ // Cloud is a web service that supports the rssCloud interface which can be implemented in HTTP-POST, XML-RPC or SOAP 1.1.
+ Cloud *RSSCloud `xml:"rss cloud"`
+
+ // Image specifies a GIF, JPEG or PNG image that can be displayed with the channel.
+ Image *RSSImage `xml:"rss image"`
+
+ // TTL is a number of minutes that indicates how long a channel can be cached before refreshing from the source.
+ TTL string `xml:"rss ttl"`
+
+ // SkipHours is a hint for aggregators telling them which hours they can skip.
+ // An XML element that contains up to 24 sub-elements whose value is a number between 0 and 23,
+ // representing a time in GMT, when aggregators,
+ // if they support the feature, may not read the channel on hours listed in the skipHours element.
+ SkipHours []string `xml:"rss skipHours>hour"`
+
+ // SkipDays is a hint for aggregators telling them which days they can skip.
+ // An XML element that contains up to seven sub-elements whose value is Monday, Tuesday, Wednesday, Thursday, Friday, Saturday or Sunday.
+ SkipDays []string `xml:"rss skipDays>day"`
+
+ // Items is a collection of items.
+ Items []RSSItem `xml:"rss item"`
+
AtomLinks
itunes.ItunesChannelElement
googleplay.GooglePlayChannelElement
@@ -64,16 +110,56 @@ type RSSImage struct {
}
type RSSItem struct {
- Title string `xml:"rss title"`
- Link string `xml:"rss link"`
- Description string `xml:"rss description"`
- Author RSSAuthor `xml:"rss author"`
- Categories []string `xml:"rss category"`
- CommentsURL string `xml:"rss comments"`
- Enclosures []RSSEnclosure `xml:"rss enclosure"`
- GUID RSSGUID `xml:"rss guid"`
- PubDate string `xml:"rss pubDate"`
- Source RSSSource `xml:"rss source"`
+ // Title is the title of the item.
+ Title string `xml:"rss title"`
+
+ // Link is the URL of the item.
+ Link string `xml:"rss link"`
+
+ // Description is the item synopsis.
+ Description string `xml:"rss description"`
+
+ // Author is the email address of the author of the item.
+ Author RSSAuthor `xml:"rss author"`
+
+ // is an optional sub-element of .
+ // It has one optional attribute, domain, a string that identifies a categorization taxonomy.
+ Categories []string `xml:"rss category"`
+
+ // is an optional sub-element of .
+ // If present, it contains the URL of the comments page for the item.
+ CommentsURL string `xml:"rss comments"`
+
+ // is an optional sub-element of .
+ // It has three required attributes. url says where the enclosure is located,
+ // length says how big it is in bytes, and type says what its type is, a standard MIME type.
+ Enclosures []RSSEnclosure `xml:"rss enclosure"`
+
+ // is an optional sub-element of .
+ // It's a string that uniquely identifies the item.
+ // When present, an aggregator may choose to use this string to determine if an item is new.
+ //
+ // There are no rules for the syntax of a guid.
+ // Aggregators must view them as a string.
+ // It's up to the source of the feed to establish the uniqueness of the string.
+ //
+ // If the guid element has an attribute named isPermaLink with a value of true,
+ // the reader may assume that it is a permalink to the item, that is, a url that can be opened in a Web browser,
+ // that points to the full item described by the element.
+ //
+ // isPermaLink is optional, its default value is true.
+ // If its value is false, the guid may not be assumed to be a url, or a url to anything in particular.
+ GUID RSSGUID `xml:"rss guid"`
+
+ // is the publication date of the item.
+ // Its value is a string in RFC 822 format.
+ PubDate string `xml:"rss pubDate"`
+
+ // is an optional sub-element of .
+ // Its value is the name of the RSS channel that the item came from, derived from its .
+ // It has one required attribute, url, which contains the URL of the RSS channel.
+ Source RSSSource `xml:"rss source"`
+
dublincore.DublinCoreItemElement
FeedBurnerItemElement
media.MediaItemElement