1
0
Fork 0
mirror of https://github.com/miniflux/v2.git synced 2025-06-27 16:36:00 +00:00

feat(feed): implement global deduplication

Add a per-feed boolean to decide if the feed's entries should be deduplicated
against all others. This is useful for aggregators like lobste.rs or
hackernews.

This should close #797
This commit is contained in:
jvoisin 2025-06-09 23:06:42 +02:00
parent d2212dee12
commit e8234033fd
8 changed files with 41 additions and 3 deletions

View file

@ -171,6 +171,7 @@ type Feed struct {
HideGlobally bool `json:"hide_globally"` HideGlobally bool `json:"hide_globally"`
DisableHTTP2 bool `json:"disable_http2"` DisableHTTP2 bool `json:"disable_http2"`
ProxyURL string `json:"proxy_url"` ProxyURL string `json:"proxy_url"`
DeduplicateAgainstAll bool `json:"deduplicate_against_all"`
} }
// FeedCreationRequest represents the request to create a feed. // FeedCreationRequest represents the request to create a feed.
@ -193,6 +194,7 @@ type FeedCreationRequest struct {
HideGlobally bool `json:"hide_globally"` HideGlobally bool `json:"hide_globally"`
DisableHTTP2 bool `json:"disable_http2"` DisableHTTP2 bool `json:"disable_http2"`
ProxyURL string `json:"proxy_url"` ProxyURL string `json:"proxy_url"`
DeduplicateAgainstAll bool `json:"deduplicate_against_all"`
} }
// FeedModificationRequest represents the request to update a feed. // FeedModificationRequest represents the request to update a feed.
@ -217,6 +219,7 @@ type FeedModificationRequest struct {
HideGlobally *bool `json:"hide_globally"` HideGlobally *bool `json:"hide_globally"`
DisableHTTP2 *bool `json:"disable_http2"` DisableHTTP2 *bool `json:"disable_http2"`
ProxyURL *string `json:"proxy_url"` ProxyURL *string `json:"proxy_url"`
DeduplicateAgainstAll *bool `json:"deduplicate_against_all"`
} }
// FeedIcon represents the feed icon. // FeedIcon represents the feed icon.

View file

@ -60,6 +60,7 @@ type Feed struct {
PushoverEnabled bool `json:"pushover_enabled"` PushoverEnabled bool `json:"pushover_enabled"`
PushoverPriority int `json:"pushover_priority"` PushoverPriority int `json:"pushover_priority"`
ProxyURL string `json:"proxy_url"` ProxyURL string `json:"proxy_url"`
DeduplicateAgainstAll bool `json:"deduplicate_against_all"`
// Non-persisted attributes // Non-persisted attributes
Category *Category `json:"category,omitempty"` Category *Category `json:"category,omitempty"`
@ -170,6 +171,7 @@ type FeedCreationRequest struct {
UrlRewriteRules string `json:"urlrewrite_rules"` UrlRewriteRules string `json:"urlrewrite_rules"`
DisableHTTP2 bool `json:"disable_http2"` DisableHTTP2 bool `json:"disable_http2"`
ProxyURL string `json:"proxy_url"` ProxyURL string `json:"proxy_url"`
DeduplicateAgainstAll bool `json:"deduplicate_against_all"`
} }
type FeedCreationRequestFromSubscriptionDiscovery struct { type FeedCreationRequestFromSubscriptionDiscovery struct {
@ -205,6 +207,7 @@ type FeedModificationRequest struct {
HideGlobally *bool `json:"hide_globally"` HideGlobally *bool `json:"hide_globally"`
DisableHTTP2 *bool `json:"disable_http2"` DisableHTTP2 *bool `json:"disable_http2"`
ProxyURL *string `json:"proxy_url"` ProxyURL *string `json:"proxy_url"`
DeduplicateAgainstAll *bool `json:"deduplicate_against_all"`
} }
// Patch updates a feed with modified values. // Patch updates a feed with modified values.
@ -300,6 +303,10 @@ func (f *FeedModificationRequest) Patch(feed *Feed) {
if f.ProxyURL != nil { if f.ProxyURL != nil {
feed.ProxyURL = *f.ProxyURL feed.ProxyURL = *f.ProxyURL
} }
if f.DeduplicateAgainstAll != nil {
feed.DeduplicateAgainstAll = *f.DeduplicateAgainstAll
}
} }
// Feeds is a list of feed // Feeds is a list of feed

View file

@ -336,7 +336,7 @@ func RefreshFeed(store *storage.Storage, userID, feedID int64, forceRefresh bool
// We don't update existing entries when the crawler is enabled (we crawl only inexisting entries). Unless it is forced to refresh // We don't update existing entries when the crawler is enabled (we crawl only inexisting entries). Unless it is forced to refresh
updateExistingEntries := forceRefresh || !originalFeed.Crawler updateExistingEntries := forceRefresh || !originalFeed.Crawler
newEntries, storeErr := store.RefreshFeedEntries(originalFeed.UserID, originalFeed.ID, originalFeed.Entries, updateExistingEntries) newEntries, storeErr := store.RefreshFeedEntries(originalFeed.UserID, originalFeed.ID, originalFeed.Entries, updateExistingEntries, originalFeed.DeduplicateAgainstAll)
if storeErr != nil { if storeErr != nil {
localizedError := locale.NewLocalizedErrorWrapper(storeErr, "error.database_error", storeErr) localizedError := locale.NewLocalizedErrorWrapper(storeErr, "error.database_error", storeErr)
user, storeErr := store.UserByID(userID) user, storeErr := store.UserByID(userID)

View file

@ -225,6 +225,19 @@ func (s *Storage) entryExists(tx *sql.Tx, entry *model.Entry) (bool, error) {
return result, nil return result, nil
} }
// entryExistsAnywhere checks if an entry already exists in other feeds based on its hash.
func (s *Storage) entryExistsAnywhere(tx *sql.Tx, entry *model.Entry) (bool, error) {
var result bool
err := tx.QueryRow(`SELECT true FROM entries WHERE hash=$1`, entry.Hash).Scan(&result)
if err != nil && err != sql.ErrNoRows {
return result, fmt.Errorf(`store: unable to check if entry exists: %v`, err)
}
return result, nil
}
func (s *Storage) IsNewEntry(feedID int64, entryHash string) bool { func (s *Storage) IsNewEntry(feedID int64, entryHash string) bool {
var result bool var result bool
s.db.QueryRow(`SELECT true FROM entries WHERE feed_id=$1 AND hash=$2`, feedID, entryHash).Scan(&result) s.db.QueryRow(`SELECT true FROM entries WHERE feed_id=$1 AND hash=$2`, feedID, entryHash).Scan(&result)
@ -268,7 +281,7 @@ func (s *Storage) cleanupEntries(feedID int64, entryHashes []string) error {
} }
// RefreshFeedEntries updates feed entries while refreshing a feed. // RefreshFeedEntries updates feed entries while refreshing a feed.
func (s *Storage) RefreshFeedEntries(userID, feedID int64, entries model.Entries, updateExistingEntries bool) (newEntries model.Entries, err error) { func (s *Storage) RefreshFeedEntries(userID, feedID int64, entries model.Entries, updateExistingEntries bool, deduplicateAgainstAll bool) (newEntries model.Entries, err error) {
var entryHashes []string var entryHashes []string
for _, entry := range entries { for _, entry := range entries {
@ -280,7 +293,16 @@ func (s *Storage) RefreshFeedEntries(userID, feedID int64, entries model.Entries
return nil, fmt.Errorf(`store: unable to start transaction: %v`, err) return nil, fmt.Errorf(`store: unable to start transaction: %v`, err)
} }
entryExists, err := s.entryExists(tx, entry) entryExists := false
if deduplicateAgainstAll {
entryExists, err = s.entryExistsAnywhere(tx, entry)
// maybe another feed was refreshed and has this entry as well,
// so we need to markd it as removed here.
updateExistingEntries = true
entry.Status = model.EntryStatusRemoved
} else {
entryExists, err = s.entryExists(tx, entry)
}
if err != nil { if err != nil {
if rollbackErr := tx.Rollback(); rollbackErr != nil { if rollbackErr := tx.Rollback(); rollbackErr != nil {
return nil, fmt.Errorf(`store: unable to rollback transaction: %v (rolled back due to: %v)`, rollbackErr, err) return nil, fmt.Errorf(`store: unable to rollback transaction: %v (rolled back due to: %v)`, rollbackErr, err)

View file

@ -253,6 +253,7 @@ func (f *FeedQueryBuilder) GetFeeds() (model.Feeds, error) {
&feed.PushoverEnabled, &feed.PushoverEnabled,
&feed.PushoverPriority, &feed.PushoverPriority,
&feed.ProxyURL, &feed.ProxyURL,
&feed.DeduplicateAgainstAll,
) )
if err != nil { if err != nil {

View file

@ -72,6 +72,7 @@
<label><input type="checkbox" name="no_media_player" {{ if .form.NoMediaPlayer }}checked{{ end }} value="1" > {{ t "form.feed.label.no_media_player" }} </label> <label><input type="checkbox" name="no_media_player" {{ if .form.NoMediaPlayer }}checked{{ end }} value="1" > {{ t "form.feed.label.no_media_player" }} </label>
<label><input type="checkbox" name="disabled" value="1" {{ if .form.Disabled }}checked{{ end }}> {{ t "form.feed.label.disabled" }}</label> <label><input type="checkbox" name="disabled" value="1" {{ if .form.Disabled }}checked{{ end }}> {{ t "form.feed.label.disabled" }}</label>
<label><input type="checkbox" name="deduplicate_against_all" value="1" {{ if .form.DeduplicateAgainstAll }}checked{{ end }}> {{ t "form.feed.label.deduplicate_against_all" }}</label>
<div class="buttons"> <div class="buttons">
<button type="submit" class="button button-primary" data-label-loading="{{ t "form.submit.saving" }}">{{ t "action.update" }}</button> <button type="submit" class="button button-primary" data-label-loading="{{ t "form.submit.saving" }}">{{ t "action.update" }}</button>

View file

@ -71,6 +71,7 @@ func (h *handler) showEditFeedPage(w http.ResponseWriter, r *http.Request) {
PushoverEnabled: feed.PushoverEnabled, PushoverEnabled: feed.PushoverEnabled,
PushoverPriority: feed.PushoverPriority, PushoverPriority: feed.PushoverPriority,
ProxyURL: feed.ProxyURL, ProxyURL: feed.ProxyURL,
DeduplicateAgainstAll: feed.DeduplicateAgainstAll,
} }
sess := session.New(h.store, request.SessionID(r)) sess := session.New(h.store, request.SessionID(r))

View file

@ -43,6 +43,7 @@ type FeedForm struct {
PushoverEnabled bool PushoverEnabled bool
PushoverPriority int PushoverPriority int
ProxyURL string ProxyURL string
DeduplicateAgainstAll bool
} }
// Merge updates the fields of the given feed. // Merge updates the fields of the given feed.
@ -79,6 +80,7 @@ func (f FeedForm) Merge(feed *model.Feed) *model.Feed {
feed.PushoverEnabled = f.PushoverEnabled feed.PushoverEnabled = f.PushoverEnabled
feed.PushoverPriority = f.PushoverPriority feed.PushoverPriority = f.PushoverPriority
feed.ProxyURL = f.ProxyURL feed.ProxyURL = f.ProxyURL
feed.DeduplicateAgainstAll = f.DeduplicateAgainstAll
return feed return feed
} }
@ -130,5 +132,6 @@ func NewFeedForm(r *http.Request) *FeedForm {
PushoverEnabled: r.FormValue("pushover_enabled") == "1", PushoverEnabled: r.FormValue("pushover_enabled") == "1",
PushoverPriority: pushoverPriority, PushoverPriority: pushoverPriority,
ProxyURL: r.FormValue("proxy_url"), ProxyURL: r.FormValue("proxy_url"),
DeduplicateAgainstAll: r.FormValue("deduplicate_against_all") == "1",
} }
} }