1
0
Fork 0
mirror of https://github.com/miniflux/v2.git synced 2025-09-15 18:57:04 +00:00

test(reader): ensure consistent tags parsing across feed formats

This commit is contained in:
Frédéric Guillot 2025-07-07 20:00:04 -07:00
parent d6d18a2d61
commit 2e26f5ca75
7 changed files with 98 additions and 70 deletions

View file

@ -7,6 +7,7 @@ import (
"html"
"log/slog"
"path"
"slices"
"strconv"
"strings"
"time"
@ -124,31 +125,13 @@ func (r *RSSAdapter) BuildFeed(baseURL string) *model.Feed {
}
// Populate entry categories.
for _, tag := range item.Categories {
if tag != "" {
entry.Tags = append(entry.Tags, tag)
}
}
for _, tag := range item.MediaCategories.Labels() {
if tag != "" {
entry.Tags = append(entry.Tags, tag)
}
}
entry.Tags = findEntryTags(&item)
if len(entry.Tags) == 0 {
for _, tag := range r.rss.Channel.Categories {
if tag != "" {
entry.Tags = append(entry.Tags, tag)
}
}
for _, tag := range r.rss.Channel.GetItunesCategories() {
if tag != "" {
entry.Tags = append(entry.Tags, tag)
}
}
if r.rss.Channel.GooglePlayCategory.Text != "" {
entry.Tags = append(entry.Tags, r.rss.Channel.GooglePlayCategory.Text)
}
entry.Tags = findFeedTags(&r.rss.Channel)
}
// Sort and deduplicate tags.
slices.Sort(entry.Tags)
entry.Tags = slices.Compact(entry.Tags)
feed.Entries = append(feed.Entries, entry)
}
@ -176,6 +159,30 @@ func findFeedAuthor(rssChannel *RSSChannel) string {
return strings.TrimSpace(sanitizer.StripTags(author))
}
func findFeedTags(rssChannel *RSSChannel) []string {
tags := make([]string, 0)
for _, tag := range rssChannel.Categories {
tag = strings.TrimSpace(tag)
if tag != "" {
tags = append(tags, tag)
}
}
for _, tag := range rssChannel.GetItunesCategories() {
tag = strings.TrimSpace(tag)
if tag != "" {
tags = append(tags, tag)
}
}
if tag := strings.TrimSpace(rssChannel.GooglePlayCategory.Text); tag != "" {
tags = append(tags, tag)
}
return tags
}
func findEntryTitle(rssItem *RSSItem) string {
title := rssItem.Title.Content
@ -270,6 +277,26 @@ func findEntryAuthor(rssItem *RSSItem) string {
return strings.TrimSpace(sanitizer.StripTags(author))
}
func findEntryTags(rssItem *RSSItem) []string {
tags := make([]string, 0)
for _, tag := range rssItem.Categories {
tag = strings.TrimSpace(tag)
if tag != "" {
tags = append(tags, tag)
}
}
for _, tag := range rssItem.MediaCategories.Labels() {
tag = strings.TrimSpace(tag)
if tag != "" {
tags = append(tags, tag)
}
}
return tags
}
func findEntryEnclosures(rssItem *RSSItem, siteURL string) model.EnclosureList {
enclosures := make(model.EnclosureList, 0)
duplicates := make(map[string]bool)

View file

@ -1971,6 +1971,9 @@ func TestParseEntryWithCategories(t *testing.T) {
<link>https://example.org/item</link>
<category>Category 1</category>
<category><![CDATA[Category 2]]></category>
<category>Category 2</category>
<category>Category 0</category>
<category> </category>
</item>
</channel>
</rss>`
@ -1980,11 +1983,11 @@ func TestParseEntryWithCategories(t *testing.T) {
t.Fatal(err)
}
if len(feed.Entries[0].Tags) != 2 {
if len(feed.Entries[0].Tags) != 3 {
t.Fatalf("Incorrect number of tags, got: %d", len(feed.Entries[0].Tags))
}
expected := []string{"Category 1", "Category 2"}
expected := []string{"Category 0", "Category 1", "Category 2"}
result := feed.Entries[0].Tags
for i, tag := range result {
@ -2022,7 +2025,7 @@ func TestParseFeedWithItunesCategories(t *testing.T) {
t.Errorf("Incorrect number of tags, got: %d", len(feed.Entries[0].Tags))
}
expected := []string{"Society & Culture", "Documentary", "Health", "Mental Health"}
expected := []string{"Documentary", "Health", "Mental Health", "Society & Culture"}
result := feed.Entries[0].Tags
for i, tag := range result {
@ -2091,12 +2094,12 @@ func TestParseEntryWithMediaCategories(t *testing.T) {
t.Errorf("Incorrect number of tags, got: %d", len(feed.Entries[0].Tags))
}
expected := []string{"Visual Art", "Ace Ventura - Pet Detective"}
expected := []string{"Ace Ventura - Pet Detective", "Visual Art"}
result := feed.Entries[0].Tags
for i, tag := range result {
if tag != expected[i] {
t.Errorf("Incorrect tag, got: %q", tag)
t.Errorf("Incorrect entry tag, got %q instead of %q", tag, expected[i])
}
}
}