diff --git a/internal/reader/atom/atom_10_adapter.go b/internal/reader/atom/atom_10_adapter.go index 6c7d4043..7045faf9 100644 --- a/internal/reader/atom/atom_10_adapter.go +++ b/internal/reader/atom/atom_10_adapter.go @@ -137,6 +137,8 @@ func (a *Atom10Adapter) populateEntries(siteURL string) model.Entries { if len(categories) == 0 { categories = a.atomFeed.Categories.CategoryNames() } + + // Sort and deduplicate categories. sort.Strings(categories) entry.Tags = slices.Compact(categories) diff --git a/internal/reader/atom/atom_10_test.go b/internal/reader/atom/atom_10_test.go index 6c664faf..d7382606 100644 --- a/internal/reader/atom/atom_10_test.go +++ b/internal/reader/atom/atom_10_test.go @@ -1761,6 +1761,8 @@ func TestParseItemWithCategories(t *testing.T) { 2003-12-13T18:30:02Z Some text. + + ` @@ -1774,16 +1776,13 @@ func TestParseItemWithCategories(t *testing.T) { t.Fatalf("Incorrect number of tags, got: %d", len(feed.Entries[0].Tags)) } - expected := "Science" - result := feed.Entries[0].Tags[0] - if result != expected { - t.Errorf("Incorrect entry category, got %q instead of %q", result, expected) - } + expected := []string{"Science", "ZZZZ"} + result := feed.Entries[0].Tags - expected = "ZZZZ" - result = feed.Entries[0].Tags[1] - if result != expected { - t.Errorf("Incorrect entry category, got %q instead of %q", result, expected) + for i, tag := range result { + if tag != expected[i] { + t.Errorf("Incorrect entry tag, got %q instead of %q", tag, expected[i]) + } } } @@ -1792,9 +1791,10 @@ func TestParseFeedWithCategories(t *testing.T) { Example Feed - - - + + + + 2003-12-13T18:30:02Z @@ -1807,14 +1807,16 @@ func TestParseFeedWithCategories(t *testing.T) { t.Fatal(err) } - if len(feed.Entries[0].Tags) != 1 { + if len(feed.Entries[0].Tags) != 3 { t.Fatalf("Incorrect number of tags, got: %d", len(feed.Entries[0].Tags)) } - expected := "Some Label" - result := feed.Entries[0].Tags[0] - if result != expected { - t.Errorf("Incorrect entry category, got %q instead of %q", result, expected) + expected := []string{"A label", "B label", "C label"} + result := feed.Entries[0].Tags + for i, tag := range result { + if tag != expected[i] { + t.Errorf("Incorrect entry tag, got %q instead of %q", tag, expected[i]) + } } } diff --git a/internal/reader/json/adapter.go b/internal/reader/json/adapter.go index 8d63e518..e1a8bdda 100644 --- a/internal/reader/json/adapter.go +++ b/internal/reader/json/adapter.go @@ -157,6 +157,10 @@ func (j *JSONAdapter) BuildFeed(baseURL string) *model.Feed { } } + // Sort and deduplicate tags. + slices.Sort(entry.Tags) + entry.Tags = slices.Compact(entry.Tags) + // Generate a hash for the entry. for _, value := range []string{item.ID, item.URL, item.ContentText + item.ContentHTML + item.Summary} { value = strings.TrimSpace(value) diff --git a/internal/reader/json/parser_test.go b/internal/reader/json/parser_test.go index 6e2038b3..dc628527 100644 --- a/internal/reader/json/parser_test.go +++ b/internal/reader/json/parser_test.go @@ -790,7 +790,9 @@ func TestParseItemTags(t *testing.T) { "tags": [ " tag 1", " ", - "tag 2" + "tag 2", + "tag 2", + "aaa" ] } ] @@ -801,14 +803,19 @@ func TestParseItemTags(t *testing.T) { t.Fatal(err) } - if len(feed.Entries[0].Tags) != 2 { + if len(feed.Entries) != 1 { + t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries)) + } + + if len(feed.Entries[0].Tags) != 3 { t.Errorf("Incorrect number of Tags, got: %d", len(feed.Entries[0].Tags)) } - expected := "tag 2" - result := feed.Entries[0].Tags[1] - if result != expected { - t.Errorf("Incorrect entry tag, got %q instead of %q", result, expected) + expected := []string{"aaa", "tag 1", "tag 2"} + for i, tag := range feed.Entries[0].Tags { + if tag != expected[i] { + t.Errorf("Incorrect entry tag, got %q instead of %q", tag, expected[i]) + } } } diff --git a/internal/reader/rss/adapter.go b/internal/reader/rss/adapter.go index 845b103b..b26636f6 100644 --- a/internal/reader/rss/adapter.go +++ b/internal/reader/rss/adapter.go @@ -7,6 +7,7 @@ import ( "html" "log/slog" "path" + "slices" "strconv" "strings" "time" @@ -124,31 +125,13 @@ func (r *RSSAdapter) BuildFeed(baseURL string) *model.Feed { } // Populate entry categories. - for _, tag := range item.Categories { - if tag != "" { - entry.Tags = append(entry.Tags, tag) - } - } - for _, tag := range item.MediaCategories.Labels() { - if tag != "" { - entry.Tags = append(entry.Tags, tag) - } - } + entry.Tags = findEntryTags(&item) if len(entry.Tags) == 0 { - for _, tag := range r.rss.Channel.Categories { - if tag != "" { - entry.Tags = append(entry.Tags, tag) - } - } - for _, tag := range r.rss.Channel.GetItunesCategories() { - if tag != "" { - entry.Tags = append(entry.Tags, tag) - } - } - if r.rss.Channel.GooglePlayCategory.Text != "" { - entry.Tags = append(entry.Tags, r.rss.Channel.GooglePlayCategory.Text) - } + entry.Tags = findFeedTags(&r.rss.Channel) } + // Sort and deduplicate tags. + slices.Sort(entry.Tags) + entry.Tags = slices.Compact(entry.Tags) feed.Entries = append(feed.Entries, entry) } @@ -176,6 +159,30 @@ func findFeedAuthor(rssChannel *RSSChannel) string { return strings.TrimSpace(sanitizer.StripTags(author)) } +func findFeedTags(rssChannel *RSSChannel) []string { + tags := make([]string, 0) + + for _, tag := range rssChannel.Categories { + tag = strings.TrimSpace(tag) + if tag != "" { + tags = append(tags, tag) + } + } + + for _, tag := range rssChannel.GetItunesCategories() { + tag = strings.TrimSpace(tag) + if tag != "" { + tags = append(tags, tag) + } + } + + if tag := strings.TrimSpace(rssChannel.GooglePlayCategory.Text); tag != "" { + tags = append(tags, tag) + } + + return tags +} + func findEntryTitle(rssItem *RSSItem) string { title := rssItem.Title.Content @@ -270,6 +277,26 @@ func findEntryAuthor(rssItem *RSSItem) string { return strings.TrimSpace(sanitizer.StripTags(author)) } +func findEntryTags(rssItem *RSSItem) []string { + tags := make([]string, 0) + + for _, tag := range rssItem.Categories { + tag = strings.TrimSpace(tag) + if tag != "" { + tags = append(tags, tag) + } + } + + for _, tag := range rssItem.MediaCategories.Labels() { + tag = strings.TrimSpace(tag) + if tag != "" { + tags = append(tags, tag) + } + } + + return tags +} + func findEntryEnclosures(rssItem *RSSItem, siteURL string) model.EnclosureList { enclosures := make(model.EnclosureList, 0) duplicates := make(map[string]bool) diff --git a/internal/reader/rss/parser_test.go b/internal/reader/rss/parser_test.go index efce4575..791eeab5 100644 --- a/internal/reader/rss/parser_test.go +++ b/internal/reader/rss/parser_test.go @@ -1971,6 +1971,9 @@ func TestParseEntryWithCategories(t *testing.T) { https://example.org/item Category 1 + Category 2 + Category 0 + ` @@ -1980,11 +1983,11 @@ func TestParseEntryWithCategories(t *testing.T) { t.Fatal(err) } - if len(feed.Entries[0].Tags) != 2 { + if len(feed.Entries[0].Tags) != 3 { t.Fatalf("Incorrect number of tags, got: %d", len(feed.Entries[0].Tags)) } - expected := []string{"Category 1", "Category 2"} + expected := []string{"Category 0", "Category 1", "Category 2"} result := feed.Entries[0].Tags for i, tag := range result { @@ -2022,7 +2025,7 @@ func TestParseFeedWithItunesCategories(t *testing.T) { t.Errorf("Incorrect number of tags, got: %d", len(feed.Entries[0].Tags)) } - expected := []string{"Society & Culture", "Documentary", "Health", "Mental Health"} + expected := []string{"Documentary", "Health", "Mental Health", "Society & Culture"} result := feed.Entries[0].Tags for i, tag := range result { @@ -2091,12 +2094,12 @@ func TestParseEntryWithMediaCategories(t *testing.T) { t.Errorf("Incorrect number of tags, got: %d", len(feed.Entries[0].Tags)) } - expected := []string{"Visual Art", "Ace Ventura - Pet Detective"} + expected := []string{"Ace Ventura - Pet Detective", "Visual Art"} result := feed.Entries[0].Tags for i, tag := range result { if tag != expected[i] { - t.Errorf("Incorrect tag, got: %q", tag) + t.Errorf("Incorrect entry tag, got %q instead of %q", tag, expected[i]) } } } diff --git a/internal/storage/entry.go b/internal/storage/entry.go index 75248643..a148f63d 100644 --- a/internal/storage/entry.go +++ b/internal/storage/entry.go @@ -8,8 +8,6 @@ import ( "errors" "fmt" "log/slog" - "slices" - "strings" "time" "miniflux.app/v2/internal/crypto" @@ -142,7 +140,7 @@ func (s *Storage) createEntry(tx *sql.Tx, entry *model.Entry) error { entry.UserID, entry.FeedID, entry.ReadingTime, - pq.Array(removeEmpty(removeDuplicates(entry.Tags))), + pq.Array(entry.Tags), ).Scan( &entry.ID, &entry.Status, @@ -198,7 +196,7 @@ func (s *Storage) updateEntry(tx *sql.Tx, entry *model.Entry) error { entry.UserID, entry.FeedID, entry.Hash, - pq.Array(removeEmpty(removeDuplicates(entry.Tags))), + pq.Array(entry.Tags), ).Scan(&entry.ID) if err != nil { @@ -630,21 +628,6 @@ func (s *Storage) UnshareEntry(userID int64, entryID int64) (err error) { return } -func removeDuplicates(l []string) []string { - slices.Sort(l) - return slices.Compact(l) -} - -func removeEmpty(l []string) []string { - var finalSlice []string - for _, item := range l { - if strings.TrimSpace(item) != "" { - finalSlice = append(finalSlice, item) - } - } - return finalSlice -} - func truncateString(s string) string { if len(s) > truncationLen { return s[:truncationLen]