mirror of
https://github.com/miniflux/v2.git
synced 2025-06-27 16:36:00 +00:00
feat(feed): implement global deduplication
Add a per-feed boolean to decide if the feed's entries should be deduplicated against all others. This is useful for aggregators like lobste.rs or hackernews. This should close #797
This commit is contained in:
parent
d2212dee12
commit
e8234033fd
8 changed files with 41 additions and 3 deletions
|
@ -171,6 +171,7 @@ type Feed struct {
|
|||
HideGlobally bool `json:"hide_globally"`
|
||||
DisableHTTP2 bool `json:"disable_http2"`
|
||||
ProxyURL string `json:"proxy_url"`
|
||||
DeduplicateAgainstAll bool `json:"deduplicate_against_all"`
|
||||
}
|
||||
|
||||
// FeedCreationRequest represents the request to create a feed.
|
||||
|
@ -193,6 +194,7 @@ type FeedCreationRequest struct {
|
|||
HideGlobally bool `json:"hide_globally"`
|
||||
DisableHTTP2 bool `json:"disable_http2"`
|
||||
ProxyURL string `json:"proxy_url"`
|
||||
DeduplicateAgainstAll bool `json:"deduplicate_against_all"`
|
||||
}
|
||||
|
||||
// FeedModificationRequest represents the request to update a feed.
|
||||
|
@ -217,6 +219,7 @@ type FeedModificationRequest struct {
|
|||
HideGlobally *bool `json:"hide_globally"`
|
||||
DisableHTTP2 *bool `json:"disable_http2"`
|
||||
ProxyURL *string `json:"proxy_url"`
|
||||
DeduplicateAgainstAll *bool `json:"deduplicate_against_all"`
|
||||
}
|
||||
|
||||
// FeedIcon represents the feed icon.
|
||||
|
|
|
@ -60,6 +60,7 @@ type Feed struct {
|
|||
PushoverEnabled bool `json:"pushover_enabled"`
|
||||
PushoverPriority int `json:"pushover_priority"`
|
||||
ProxyURL string `json:"proxy_url"`
|
||||
DeduplicateAgainstAll bool `json:"deduplicate_against_all"`
|
||||
|
||||
// Non-persisted attributes
|
||||
Category *Category `json:"category,omitempty"`
|
||||
|
@ -170,6 +171,7 @@ type FeedCreationRequest struct {
|
|||
UrlRewriteRules string `json:"urlrewrite_rules"`
|
||||
DisableHTTP2 bool `json:"disable_http2"`
|
||||
ProxyURL string `json:"proxy_url"`
|
||||
DeduplicateAgainstAll bool `json:"deduplicate_against_all"`
|
||||
}
|
||||
|
||||
type FeedCreationRequestFromSubscriptionDiscovery struct {
|
||||
|
@ -205,6 +207,7 @@ type FeedModificationRequest struct {
|
|||
HideGlobally *bool `json:"hide_globally"`
|
||||
DisableHTTP2 *bool `json:"disable_http2"`
|
||||
ProxyURL *string `json:"proxy_url"`
|
||||
DeduplicateAgainstAll *bool `json:"deduplicate_against_all"`
|
||||
}
|
||||
|
||||
// Patch updates a feed with modified values.
|
||||
|
@ -300,6 +303,10 @@ func (f *FeedModificationRequest) Patch(feed *Feed) {
|
|||
if f.ProxyURL != nil {
|
||||
feed.ProxyURL = *f.ProxyURL
|
||||
}
|
||||
|
||||
if f.DeduplicateAgainstAll != nil {
|
||||
feed.DeduplicateAgainstAll = *f.DeduplicateAgainstAll
|
||||
}
|
||||
}
|
||||
|
||||
// Feeds is a list of feed
|
||||
|
|
|
@ -336,7 +336,7 @@ func RefreshFeed(store *storage.Storage, userID, feedID int64, forceRefresh bool
|
|||
|
||||
// We don't update existing entries when the crawler is enabled (we crawl only inexisting entries). Unless it is forced to refresh
|
||||
updateExistingEntries := forceRefresh || !originalFeed.Crawler
|
||||
newEntries, storeErr := store.RefreshFeedEntries(originalFeed.UserID, originalFeed.ID, originalFeed.Entries, updateExistingEntries)
|
||||
newEntries, storeErr := store.RefreshFeedEntries(originalFeed.UserID, originalFeed.ID, originalFeed.Entries, updateExistingEntries, originalFeed.DeduplicateAgainstAll)
|
||||
if storeErr != nil {
|
||||
localizedError := locale.NewLocalizedErrorWrapper(storeErr, "error.database_error", storeErr)
|
||||
user, storeErr := store.UserByID(userID)
|
||||
|
|
|
@ -225,6 +225,19 @@ func (s *Storage) entryExists(tx *sql.Tx, entry *model.Entry) (bool, error) {
|
|||
return result, nil
|
||||
}
|
||||
|
||||
// entryExistsAnywhere checks if an entry already exists in other feeds based on its hash.
|
||||
func (s *Storage) entryExistsAnywhere(tx *sql.Tx, entry *model.Entry) (bool, error) {
|
||||
var result bool
|
||||
|
||||
err := tx.QueryRow(`SELECT true FROM entries WHERE hash=$1`, entry.Hash).Scan(&result)
|
||||
|
||||
if err != nil && err != sql.ErrNoRows {
|
||||
return result, fmt.Errorf(`store: unable to check if entry exists: %v`, err)
|
||||
}
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func (s *Storage) IsNewEntry(feedID int64, entryHash string) bool {
|
||||
var result bool
|
||||
s.db.QueryRow(`SELECT true FROM entries WHERE feed_id=$1 AND hash=$2`, feedID, entryHash).Scan(&result)
|
||||
|
@ -268,7 +281,7 @@ func (s *Storage) cleanupEntries(feedID int64, entryHashes []string) error {
|
|||
}
|
||||
|
||||
// RefreshFeedEntries updates feed entries while refreshing a feed.
|
||||
func (s *Storage) RefreshFeedEntries(userID, feedID int64, entries model.Entries, updateExistingEntries bool) (newEntries model.Entries, err error) {
|
||||
func (s *Storage) RefreshFeedEntries(userID, feedID int64, entries model.Entries, updateExistingEntries bool, deduplicateAgainstAll bool) (newEntries model.Entries, err error) {
|
||||
var entryHashes []string
|
||||
|
||||
for _, entry := range entries {
|
||||
|
@ -280,7 +293,16 @@ func (s *Storage) RefreshFeedEntries(userID, feedID int64, entries model.Entries
|
|||
return nil, fmt.Errorf(`store: unable to start transaction: %v`, err)
|
||||
}
|
||||
|
||||
entryExists, err := s.entryExists(tx, entry)
|
||||
entryExists := false
|
||||
if deduplicateAgainstAll {
|
||||
entryExists, err = s.entryExistsAnywhere(tx, entry)
|
||||
// maybe another feed was refreshed and has this entry as well,
|
||||
// so we need to markd it as removed here.
|
||||
updateExistingEntries = true
|
||||
entry.Status = model.EntryStatusRemoved
|
||||
} else {
|
||||
entryExists, err = s.entryExists(tx, entry)
|
||||
}
|
||||
if err != nil {
|
||||
if rollbackErr := tx.Rollback(); rollbackErr != nil {
|
||||
return nil, fmt.Errorf(`store: unable to rollback transaction: %v (rolled back due to: %v)`, rollbackErr, err)
|
||||
|
|
|
@ -253,6 +253,7 @@ func (f *FeedQueryBuilder) GetFeeds() (model.Feeds, error) {
|
|||
&feed.PushoverEnabled,
|
||||
&feed.PushoverPriority,
|
||||
&feed.ProxyURL,
|
||||
&feed.DeduplicateAgainstAll,
|
||||
)
|
||||
|
||||
if err != nil {
|
||||
|
|
|
@ -72,6 +72,7 @@
|
|||
|
||||
<label><input type="checkbox" name="no_media_player" {{ if .form.NoMediaPlayer }}checked{{ end }} value="1" > {{ t "form.feed.label.no_media_player" }} </label>
|
||||
<label><input type="checkbox" name="disabled" value="1" {{ if .form.Disabled }}checked{{ end }}> {{ t "form.feed.label.disabled" }}</label>
|
||||
<label><input type="checkbox" name="deduplicate_against_all" value="1" {{ if .form.DeduplicateAgainstAll }}checked{{ end }}> {{ t "form.feed.label.deduplicate_against_all" }}</label>
|
||||
|
||||
<div class="buttons">
|
||||
<button type="submit" class="button button-primary" data-label-loading="{{ t "form.submit.saving" }}">{{ t "action.update" }}</button>
|
||||
|
|
|
@ -71,6 +71,7 @@ func (h *handler) showEditFeedPage(w http.ResponseWriter, r *http.Request) {
|
|||
PushoverEnabled: feed.PushoverEnabled,
|
||||
PushoverPriority: feed.PushoverPriority,
|
||||
ProxyURL: feed.ProxyURL,
|
||||
DeduplicateAgainstAll: feed.DeduplicateAgainstAll,
|
||||
}
|
||||
|
||||
sess := session.New(h.store, request.SessionID(r))
|
||||
|
|
|
@ -43,6 +43,7 @@ type FeedForm struct {
|
|||
PushoverEnabled bool
|
||||
PushoverPriority int
|
||||
ProxyURL string
|
||||
DeduplicateAgainstAll bool
|
||||
}
|
||||
|
||||
// Merge updates the fields of the given feed.
|
||||
|
@ -79,6 +80,7 @@ func (f FeedForm) Merge(feed *model.Feed) *model.Feed {
|
|||
feed.PushoverEnabled = f.PushoverEnabled
|
||||
feed.PushoverPriority = f.PushoverPriority
|
||||
feed.ProxyURL = f.ProxyURL
|
||||
feed.DeduplicateAgainstAll = f.DeduplicateAgainstAll
|
||||
return feed
|
||||
}
|
||||
|
||||
|
@ -130,5 +132,6 @@ func NewFeedForm(r *http.Request) *FeedForm {
|
|||
PushoverEnabled: r.FormValue("pushover_enabled") == "1",
|
||||
PushoverPriority: pushoverPriority,
|
||||
ProxyURL: r.FormValue("proxy_url"),
|
||||
DeduplicateAgainstAll: r.FormValue("deduplicate_against_all") == "1",
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue