1
0
Fork 0
mirror of https://github.com/miniflux/v2.git synced 2025-08-26 18:21:01 +00:00

Parse <category> from Feeds (RSS, Atom and JSON)

This commit is contained in:
privatmamtora 2023-02-25 04:52:45 +00:00 committed by GitHub
parent ff8d68c151
commit 8f9ccc6540
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
12 changed files with 252 additions and 11 deletions

View file

@ -80,14 +80,15 @@ func (a *atom10Feed) Transform(baseURL string) *model.Feed {
}
type atom10Entry struct {
ID string `xml:"id"`
Title atom10Text `xml:"title"`
Published string `xml:"published"`
Updated string `xml:"updated"`
Links atomLinks `xml:"link"`
Summary atom10Text `xml:"summary"`
Content atom10Text `xml:"http://www.w3.org/2005/Atom content"`
Authors atomAuthors `xml:"author"`
ID string `xml:"id"`
Title atom10Text `xml:"title"`
Published string `xml:"published"`
Updated string `xml:"updated"`
Links atomLinks `xml:"link"`
Summary atom10Text `xml:"summary"`
Content atom10Text `xml:"http://www.w3.org/2005/Atom content"`
Authors atomAuthors `xml:"author"`
Categories []atom10Category `xml:"category"`
media.Element
}
@ -101,6 +102,7 @@ func (a *atom10Entry) Transform() *model.Entry {
entry.Title = a.entryTitle()
entry.Enclosures = a.entryEnclosures()
entry.CommentsURL = a.entryCommentsURL()
entry.Tags = a.entryCategories()
return entry
}
@ -214,6 +216,20 @@ func (a *atom10Entry) entryEnclosures() model.EnclosureList {
return enclosures
}
func (r *atom10Entry) entryCategories() []string {
var categoryList []string
for _, atomCategory := range r.Categories {
if strings.TrimSpace(atomCategory.Label) != "" {
categoryList = append(categoryList, strings.TrimSpace(atomCategory.Label))
} else {
categoryList = append(categoryList, strings.TrimSpace(atomCategory.Term))
}
}
return categoryList
}
// See https://tools.ietf.org/html/rfc4685#section-4
// If the type attribute of the atom:link is omitted, its value is assumed to be "application/atom+xml".
// We accept only HTML or XHTML documents for now since the intention is to have the same behavior as RSS.
@ -232,6 +248,11 @@ type atom10Text struct {
XHTMLRootElement atomXHTMLRootElement `xml:"http://www.w3.org/1999/xhtml div"`
}
type atom10Category struct {
Term string `xml:"term,attr"`
Label string `xml:"label,attr"`
}
// Text: https://datatracker.ietf.org/doc/html/rfc4287#section-3.1.1.1
// HTML: https://datatracker.ietf.org/doc/html/rfc4287#section-3.1.1.2
// XHTML: https://datatracker.ietf.org/doc/html/rfc4287#section-3.1.1.3

View file

@ -1604,3 +1604,48 @@ func TestAbsoluteCommentsURL(t *testing.T) {
t.Errorf("Incorrect entry comments URL, got: %s", feed.Entries[0].CommentsURL)
}
}
func TestParseFeedWithCategories(t *testing.T) {
data := `<?xml version="1.0" encoding="utf-8"?>
<feed xmlns="http://www.w3.org/2005/Atom">
<title>Example Feed</title>
<link href="http://example.org/"/>
<author>
<name>Alice</name>
</author>
<author>
<name>Bob</name>
</author>
<entry>
<link href="http://example.org/2003/12/13/atom03"/>
<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
<updated>2003-12-13T18:30:02Z</updated>
<summary>Some text.</summary>
<category term='Tech' />
<category term='Technology' label='Science' />
</entry>
</feed>`
feed, err := Parse("https://example.org/", bytes.NewBufferString(data))
if err != nil {
t.Fatal(err)
}
if len(feed.Entries[0].Tags) != 2 {
t.Errorf("Incorrect number of tags, got: %d", len(feed.Entries[0].Tags))
}
expected := "Tech"
result := feed.Entries[0].Tags[0]
if result != expected {
t.Errorf("Incorrect entry category, got %q instead of %q", result, expected)
}
expected = "Science"
result = feed.Entries[0].Tags[1]
if result != expected {
t.Errorf("Incorrect entry category, got %q instead of %q", result, expected)
}
}

View file

@ -43,6 +43,7 @@ type jsonItem struct {
Authors []jsonAuthor `json:"authors"`
Author jsonAuthor `json:"author"`
Attachments []jsonAttachment `json:"attachments"`
Tags []string `json:"tags"`
}
type jsonAttachment struct {
@ -181,6 +182,7 @@ func (j *jsonItem) Transform() *model.Entry {
entry.Content = j.GetContent()
entry.Title = strings.TrimSpace(j.GetTitle())
entry.Enclosures = j.GetEnclosures()
entry.Tags = j.Tags
return entry
}

View file

@ -575,3 +575,45 @@ func TestParseInvalidJSON(t *testing.T) {
t.Error("Parse should returns an error")
}
}
func TestParseTags(t *testing.T) {
data := `{
"version": "https://jsonfeed.org/version/1",
"user_comment": "This is a microblog feed. You can add this to your feed reader using the following URL: https://example.org/feed.json",
"title": "Brent Simmonss Microblog",
"home_page_url": "https://example.org/",
"feed_url": "https://example.org/feed.json",
"author": {
"name": "Brent Simmons",
"url": "http://example.org/",
"avatar": "https://example.org/avatar.png"
},
"items": [
{
"id": "2347259",
"url": "https://example.org/2347259",
"content_text": "Cats are neat. \n\nhttps://example.org/cats",
"date_published": "2016-02-09T14:22:00-07:00",
"tags": [
"tag 1",
"tag 2"
]
}
]
}`
feed, err := Parse("https://example.org/feed.json", bytes.NewBufferString(data))
if err != nil {
t.Fatal(err)
}
if len(feed.Entries[0].Tags) != 2 {
t.Errorf("Incorrect number of Tags, got: %d", len(feed.Entries[0].Tags))
}
expected := "tag 2"
result := feed.Entries[0].Tags[1]
if result != expected {
t.Errorf("Incorrect entry tag, got %q instead of %q", result, expected)
}
}

View file

@ -1426,3 +1426,69 @@ func TestEntryDescriptionFromGooglePlayDescription(t *testing.T) {
t.Errorf(`Unexpected podcast content, got %q instead of %q`, result, expected)
}
}
func TestParseEntryWithCategoryAndInnerHTML(t *testing.T) {
data := `<?xml version="1.0" encoding="utf-8"?>
<rss xmlns:atom="http://www.w3.org/2005/Atom" version="2.0">
<channel>
<title>Example</title>
<link>https://example.org/</link>
<atom:link href="https://example.org/rss" type="application/rss+xml" rel="self"></atom:link>
<item>
<title>Test</title>
<link>https://example.org/item</link>
<category>Category 1</category>
<category>Category 2</category>
</item>
</channel>
</rss>`
feed, err := Parse("https://example.org/", bytes.NewBufferString(data))
if err != nil {
t.Fatal(err)
}
if len(feed.Entries[0].Tags) != 2 {
t.Errorf("Incorrect number of tags, got: %d", len(feed.Entries[0].Tags))
}
expected := "Category 2"
result := feed.Entries[0].Tags[1]
if result != expected {
t.Errorf("Incorrect entry category, got %q instead of %q", result, expected)
}
}
func TestParseEntryWithCategoryAndCDATA(t *testing.T) {
data := `<?xml version="1.0" encoding="utf-8"?>
<rss xmlns:atom="http://www.w3.org/2005/Atom" version="2.0">
<channel>
<title>Example</title>
<link>https://example.org/</link>
<atom:link href="https://example.org/rss" type="application/rss+xml" rel="self"></atom:link>
<item>
<title>Test</title>
<link>https://example.org/item</link>
<author>
by <![CDATA[Foo Bar]]>
</author>
<category>Sample Category</category>
</item>
</channel>
</rss>`
feed, err := Parse("https://example.org/", bytes.NewBufferString(data))
if err != nil {
t.Fatal(err)
}
if len(feed.Entries[0].Tags) != 1 {
t.Errorf("Incorrect number of tags, got: %d", len(feed.Entries[0].Tags))
}
expected := "Sample Category"
result := feed.Entries[0].Tags[0]
if result != expected {
t.Errorf("Incorrect entry category, got %q instead of %q", result, expected)
}
}

View file

@ -156,6 +156,12 @@ type rssEnclosure struct {
Length string `xml:"length,attr"`
}
type rssCategory struct {
XMLName xml.Name
Data string `xml:",chardata"`
Inner string `xml:",innerxml"`
}
func (enclosure *rssEnclosure) Size() int64 {
if enclosure.Length == "" {
return 0
@ -173,6 +179,7 @@ type rssItem struct {
Authors []rssAuthor `xml:"author"`
CommentLinks []rssCommentLink `xml:"comments"`
EnclosureLinks []rssEnclosure `xml:"enclosure"`
Categories []rssCategory `xml:"category"`
DublinCoreElement
FeedBurnerElement
PodcastEntryElement
@ -189,6 +196,8 @@ func (r *rssItem) Transform() *model.Entry {
entry.Content = r.entryContent()
entry.Title = r.entryTitle()
entry.Enclosures = r.entryEnclosures()
entry.Tags = r.entryCategories()
return entry
}
@ -372,6 +381,20 @@ func (r *rssItem) entryEnclosures() model.EnclosureList {
return enclosures
}
func (r *rssItem) entryCategories() []string {
var categoryList []string
for _, rssCategory := range r.Categories {
if strings.Contains(rssCategory.Inner, "<![CDATA[") {
categoryList = append(categoryList, strings.TrimSpace(rssCategory.Data))
} else {
categoryList = append(categoryList, strings.TrimSpace(rssCategory.Inner))
}
}
return categoryList
}
func (r *rssItem) entryCommentsURL() string {
for _, commentLink := range r.CommentLinks {
if commentLink.XMLName.Space == "" {