mirror of
https://github.com/miniflux/v2.git
synced 2025-08-06 17:41:00 +00:00
test(reader): ensure consistent tags parsing across feed formats
This commit is contained in:
parent
d6d18a2d61
commit
2e26f5ca75
7 changed files with 98 additions and 70 deletions
|
@ -137,6 +137,8 @@ func (a *Atom10Adapter) populateEntries(siteURL string) model.Entries {
|
|||
if len(categories) == 0 {
|
||||
categories = a.atomFeed.Categories.CategoryNames()
|
||||
}
|
||||
|
||||
// Sort and deduplicate categories.
|
||||
sort.Strings(categories)
|
||||
entry.Tags = slices.Compact(categories)
|
||||
|
||||
|
|
|
@ -1761,6 +1761,8 @@ func TestParseItemWithCategories(t *testing.T) {
|
|||
<updated>2003-12-13T18:30:02Z</updated>
|
||||
<summary>Some text.</summary>
|
||||
<category term='ZZZZ' />
|
||||
<category term='ZZZZ' />
|
||||
<category term=" " />
|
||||
<category term='Technology' label='Science' />
|
||||
</entry>
|
||||
</feed>`
|
||||
|
@ -1774,16 +1776,13 @@ func TestParseItemWithCategories(t *testing.T) {
|
|||
t.Fatalf("Incorrect number of tags, got: %d", len(feed.Entries[0].Tags))
|
||||
}
|
||||
|
||||
expected := "Science"
|
||||
result := feed.Entries[0].Tags[0]
|
||||
if result != expected {
|
||||
t.Errorf("Incorrect entry category, got %q instead of %q", result, expected)
|
||||
}
|
||||
expected := []string{"Science", "ZZZZ"}
|
||||
result := feed.Entries[0].Tags
|
||||
|
||||
expected = "ZZZZ"
|
||||
result = feed.Entries[0].Tags[1]
|
||||
if result != expected {
|
||||
t.Errorf("Incorrect entry category, got %q instead of %q", result, expected)
|
||||
for i, tag := range result {
|
||||
if tag != expected[i] {
|
||||
t.Errorf("Incorrect entry tag, got %q instead of %q", tag, expected[i])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1792,9 +1791,10 @@ func TestParseFeedWithCategories(t *testing.T) {
|
|||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
<title>Example Feed</title>
|
||||
<link href="http://example.org/"/>
|
||||
<category term='Test' label='Some Label' />
|
||||
<category term='Test' label='Some Label' />
|
||||
<category term='Test' label='Some Label' />
|
||||
<category term='C term' label='C label' />
|
||||
<category term='B term' label='B label' />
|
||||
<category term='B term' label='B label' />
|
||||
<category term='A term' label='A label' />
|
||||
<entry>
|
||||
<link href="http://www.example.org/entries/1" />
|
||||
<updated>2003-12-13T18:30:02Z</updated>
|
||||
|
@ -1807,14 +1807,16 @@ func TestParseFeedWithCategories(t *testing.T) {
|
|||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if len(feed.Entries[0].Tags) != 1 {
|
||||
if len(feed.Entries[0].Tags) != 3 {
|
||||
t.Fatalf("Incorrect number of tags, got: %d", len(feed.Entries[0].Tags))
|
||||
}
|
||||
|
||||
expected := "Some Label"
|
||||
result := feed.Entries[0].Tags[0]
|
||||
if result != expected {
|
||||
t.Errorf("Incorrect entry category, got %q instead of %q", result, expected)
|
||||
expected := []string{"A label", "B label", "C label"}
|
||||
result := feed.Entries[0].Tags
|
||||
for i, tag := range result {
|
||||
if tag != expected[i] {
|
||||
t.Errorf("Incorrect entry tag, got %q instead of %q", tag, expected[i])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -157,6 +157,10 @@ func (j *JSONAdapter) BuildFeed(baseURL string) *model.Feed {
|
|||
}
|
||||
}
|
||||
|
||||
// Sort and deduplicate tags.
|
||||
slices.Sort(entry.Tags)
|
||||
entry.Tags = slices.Compact(entry.Tags)
|
||||
|
||||
// Generate a hash for the entry.
|
||||
for _, value := range []string{item.ID, item.URL, item.ContentText + item.ContentHTML + item.Summary} {
|
||||
value = strings.TrimSpace(value)
|
||||
|
|
|
@ -790,7 +790,9 @@ func TestParseItemTags(t *testing.T) {
|
|||
"tags": [
|
||||
" tag 1",
|
||||
" ",
|
||||
"tag 2"
|
||||
"tag 2",
|
||||
"tag 2",
|
||||
"aaa"
|
||||
]
|
||||
}
|
||||
]
|
||||
|
@ -801,14 +803,19 @@ func TestParseItemTags(t *testing.T) {
|
|||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if len(feed.Entries[0].Tags) != 2 {
|
||||
if len(feed.Entries) != 1 {
|
||||
t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
|
||||
}
|
||||
|
||||
if len(feed.Entries[0].Tags) != 3 {
|
||||
t.Errorf("Incorrect number of Tags, got: %d", len(feed.Entries[0].Tags))
|
||||
}
|
||||
|
||||
expected := "tag 2"
|
||||
result := feed.Entries[0].Tags[1]
|
||||
if result != expected {
|
||||
t.Errorf("Incorrect entry tag, got %q instead of %q", result, expected)
|
||||
expected := []string{"aaa", "tag 1", "tag 2"}
|
||||
for i, tag := range feed.Entries[0].Tags {
|
||||
if tag != expected[i] {
|
||||
t.Errorf("Incorrect entry tag, got %q instead of %q", tag, expected[i])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -7,6 +7,7 @@ import (
|
|||
"html"
|
||||
"log/slog"
|
||||
"path"
|
||||
"slices"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
@ -124,31 +125,13 @@ func (r *RSSAdapter) BuildFeed(baseURL string) *model.Feed {
|
|||
}
|
||||
|
||||
// Populate entry categories.
|
||||
for _, tag := range item.Categories {
|
||||
if tag != "" {
|
||||
entry.Tags = append(entry.Tags, tag)
|
||||
}
|
||||
}
|
||||
for _, tag := range item.MediaCategories.Labels() {
|
||||
if tag != "" {
|
||||
entry.Tags = append(entry.Tags, tag)
|
||||
}
|
||||
}
|
||||
entry.Tags = findEntryTags(&item)
|
||||
if len(entry.Tags) == 0 {
|
||||
for _, tag := range r.rss.Channel.Categories {
|
||||
if tag != "" {
|
||||
entry.Tags = append(entry.Tags, tag)
|
||||
}
|
||||
}
|
||||
for _, tag := range r.rss.Channel.GetItunesCategories() {
|
||||
if tag != "" {
|
||||
entry.Tags = append(entry.Tags, tag)
|
||||
}
|
||||
}
|
||||
if r.rss.Channel.GooglePlayCategory.Text != "" {
|
||||
entry.Tags = append(entry.Tags, r.rss.Channel.GooglePlayCategory.Text)
|
||||
}
|
||||
entry.Tags = findFeedTags(&r.rss.Channel)
|
||||
}
|
||||
// Sort and deduplicate tags.
|
||||
slices.Sort(entry.Tags)
|
||||
entry.Tags = slices.Compact(entry.Tags)
|
||||
|
||||
feed.Entries = append(feed.Entries, entry)
|
||||
}
|
||||
|
@ -176,6 +159,30 @@ func findFeedAuthor(rssChannel *RSSChannel) string {
|
|||
return strings.TrimSpace(sanitizer.StripTags(author))
|
||||
}
|
||||
|
||||
func findFeedTags(rssChannel *RSSChannel) []string {
|
||||
tags := make([]string, 0)
|
||||
|
||||
for _, tag := range rssChannel.Categories {
|
||||
tag = strings.TrimSpace(tag)
|
||||
if tag != "" {
|
||||
tags = append(tags, tag)
|
||||
}
|
||||
}
|
||||
|
||||
for _, tag := range rssChannel.GetItunesCategories() {
|
||||
tag = strings.TrimSpace(tag)
|
||||
if tag != "" {
|
||||
tags = append(tags, tag)
|
||||
}
|
||||
}
|
||||
|
||||
if tag := strings.TrimSpace(rssChannel.GooglePlayCategory.Text); tag != "" {
|
||||
tags = append(tags, tag)
|
||||
}
|
||||
|
||||
return tags
|
||||
}
|
||||
|
||||
func findEntryTitle(rssItem *RSSItem) string {
|
||||
title := rssItem.Title.Content
|
||||
|
||||
|
@ -270,6 +277,26 @@ func findEntryAuthor(rssItem *RSSItem) string {
|
|||
return strings.TrimSpace(sanitizer.StripTags(author))
|
||||
}
|
||||
|
||||
func findEntryTags(rssItem *RSSItem) []string {
|
||||
tags := make([]string, 0)
|
||||
|
||||
for _, tag := range rssItem.Categories {
|
||||
tag = strings.TrimSpace(tag)
|
||||
if tag != "" {
|
||||
tags = append(tags, tag)
|
||||
}
|
||||
}
|
||||
|
||||
for _, tag := range rssItem.MediaCategories.Labels() {
|
||||
tag = strings.TrimSpace(tag)
|
||||
if tag != "" {
|
||||
tags = append(tags, tag)
|
||||
}
|
||||
}
|
||||
|
||||
return tags
|
||||
}
|
||||
|
||||
func findEntryEnclosures(rssItem *RSSItem, siteURL string) model.EnclosureList {
|
||||
enclosures := make(model.EnclosureList, 0)
|
||||
duplicates := make(map[string]bool)
|
||||
|
|
|
@ -1971,6 +1971,9 @@ func TestParseEntryWithCategories(t *testing.T) {
|
|||
<link>https://example.org/item</link>
|
||||
<category>Category 1</category>
|
||||
<category><![CDATA[Category 2]]></category>
|
||||
<category>Category 2</category>
|
||||
<category>Category 0</category>
|
||||
<category> </category>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>`
|
||||
|
@ -1980,11 +1983,11 @@ func TestParseEntryWithCategories(t *testing.T) {
|
|||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if len(feed.Entries[0].Tags) != 2 {
|
||||
if len(feed.Entries[0].Tags) != 3 {
|
||||
t.Fatalf("Incorrect number of tags, got: %d", len(feed.Entries[0].Tags))
|
||||
}
|
||||
|
||||
expected := []string{"Category 1", "Category 2"}
|
||||
expected := []string{"Category 0", "Category 1", "Category 2"}
|
||||
result := feed.Entries[0].Tags
|
||||
|
||||
for i, tag := range result {
|
||||
|
@ -2022,7 +2025,7 @@ func TestParseFeedWithItunesCategories(t *testing.T) {
|
|||
t.Errorf("Incorrect number of tags, got: %d", len(feed.Entries[0].Tags))
|
||||
}
|
||||
|
||||
expected := []string{"Society & Culture", "Documentary", "Health", "Mental Health"}
|
||||
expected := []string{"Documentary", "Health", "Mental Health", "Society & Culture"}
|
||||
result := feed.Entries[0].Tags
|
||||
|
||||
for i, tag := range result {
|
||||
|
@ -2091,12 +2094,12 @@ func TestParseEntryWithMediaCategories(t *testing.T) {
|
|||
t.Errorf("Incorrect number of tags, got: %d", len(feed.Entries[0].Tags))
|
||||
}
|
||||
|
||||
expected := []string{"Visual Art", "Ace Ventura - Pet Detective"}
|
||||
expected := []string{"Ace Ventura - Pet Detective", "Visual Art"}
|
||||
result := feed.Entries[0].Tags
|
||||
|
||||
for i, tag := range result {
|
||||
if tag != expected[i] {
|
||||
t.Errorf("Incorrect tag, got: %q", tag)
|
||||
t.Errorf("Incorrect entry tag, got %q instead of %q", tag, expected[i])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -8,8 +8,6 @@ import (
|
|||
"errors"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"slices"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"miniflux.app/v2/internal/crypto"
|
||||
|
@ -142,7 +140,7 @@ func (s *Storage) createEntry(tx *sql.Tx, entry *model.Entry) error {
|
|||
entry.UserID,
|
||||
entry.FeedID,
|
||||
entry.ReadingTime,
|
||||
pq.Array(removeEmpty(removeDuplicates(entry.Tags))),
|
||||
pq.Array(entry.Tags),
|
||||
).Scan(
|
||||
&entry.ID,
|
||||
&entry.Status,
|
||||
|
@ -198,7 +196,7 @@ func (s *Storage) updateEntry(tx *sql.Tx, entry *model.Entry) error {
|
|||
entry.UserID,
|
||||
entry.FeedID,
|
||||
entry.Hash,
|
||||
pq.Array(removeEmpty(removeDuplicates(entry.Tags))),
|
||||
pq.Array(entry.Tags),
|
||||
).Scan(&entry.ID)
|
||||
|
||||
if err != nil {
|
||||
|
@ -630,21 +628,6 @@ func (s *Storage) UnshareEntry(userID int64, entryID int64) (err error) {
|
|||
return
|
||||
}
|
||||
|
||||
func removeDuplicates(l []string) []string {
|
||||
slices.Sort(l)
|
||||
return slices.Compact(l)
|
||||
}
|
||||
|
||||
func removeEmpty(l []string) []string {
|
||||
var finalSlice []string
|
||||
for _, item := range l {
|
||||
if strings.TrimSpace(item) != "" {
|
||||
finalSlice = append(finalSlice, item)
|
||||
}
|
||||
}
|
||||
return finalSlice
|
||||
}
|
||||
|
||||
func truncateString(s string) string {
|
||||
if len(s) > truncationLen {
|
||||
return s[:truncationLen]
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue