mirror of
https://github.com/miniflux/v2.git
synced 2025-06-27 16:36:00 +00:00
feat(feed): implement global deduplication
Add a per-feed boolean to decide if the feed's entries should be deduplicated against all others. This is useful for aggregators like lobste.rs or hackernews. This should close #797
This commit is contained in:
parent
d2212dee12
commit
e8234033fd
8 changed files with 41 additions and 3 deletions
|
@ -171,6 +171,7 @@ type Feed struct {
|
||||||
HideGlobally bool `json:"hide_globally"`
|
HideGlobally bool `json:"hide_globally"`
|
||||||
DisableHTTP2 bool `json:"disable_http2"`
|
DisableHTTP2 bool `json:"disable_http2"`
|
||||||
ProxyURL string `json:"proxy_url"`
|
ProxyURL string `json:"proxy_url"`
|
||||||
|
DeduplicateAgainstAll bool `json:"deduplicate_against_all"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// FeedCreationRequest represents the request to create a feed.
|
// FeedCreationRequest represents the request to create a feed.
|
||||||
|
@ -193,6 +194,7 @@ type FeedCreationRequest struct {
|
||||||
HideGlobally bool `json:"hide_globally"`
|
HideGlobally bool `json:"hide_globally"`
|
||||||
DisableHTTP2 bool `json:"disable_http2"`
|
DisableHTTP2 bool `json:"disable_http2"`
|
||||||
ProxyURL string `json:"proxy_url"`
|
ProxyURL string `json:"proxy_url"`
|
||||||
|
DeduplicateAgainstAll bool `json:"deduplicate_against_all"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// FeedModificationRequest represents the request to update a feed.
|
// FeedModificationRequest represents the request to update a feed.
|
||||||
|
@ -217,6 +219,7 @@ type FeedModificationRequest struct {
|
||||||
HideGlobally *bool `json:"hide_globally"`
|
HideGlobally *bool `json:"hide_globally"`
|
||||||
DisableHTTP2 *bool `json:"disable_http2"`
|
DisableHTTP2 *bool `json:"disable_http2"`
|
||||||
ProxyURL *string `json:"proxy_url"`
|
ProxyURL *string `json:"proxy_url"`
|
||||||
|
DeduplicateAgainstAll *bool `json:"deduplicate_against_all"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// FeedIcon represents the feed icon.
|
// FeedIcon represents the feed icon.
|
||||||
|
|
|
@ -60,6 +60,7 @@ type Feed struct {
|
||||||
PushoverEnabled bool `json:"pushover_enabled"`
|
PushoverEnabled bool `json:"pushover_enabled"`
|
||||||
PushoverPriority int `json:"pushover_priority"`
|
PushoverPriority int `json:"pushover_priority"`
|
||||||
ProxyURL string `json:"proxy_url"`
|
ProxyURL string `json:"proxy_url"`
|
||||||
|
DeduplicateAgainstAll bool `json:"deduplicate_against_all"`
|
||||||
|
|
||||||
// Non-persisted attributes
|
// Non-persisted attributes
|
||||||
Category *Category `json:"category,omitempty"`
|
Category *Category `json:"category,omitempty"`
|
||||||
|
@ -170,6 +171,7 @@ type FeedCreationRequest struct {
|
||||||
UrlRewriteRules string `json:"urlrewrite_rules"`
|
UrlRewriteRules string `json:"urlrewrite_rules"`
|
||||||
DisableHTTP2 bool `json:"disable_http2"`
|
DisableHTTP2 bool `json:"disable_http2"`
|
||||||
ProxyURL string `json:"proxy_url"`
|
ProxyURL string `json:"proxy_url"`
|
||||||
|
DeduplicateAgainstAll bool `json:"deduplicate_against_all"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type FeedCreationRequestFromSubscriptionDiscovery struct {
|
type FeedCreationRequestFromSubscriptionDiscovery struct {
|
||||||
|
@ -205,6 +207,7 @@ type FeedModificationRequest struct {
|
||||||
HideGlobally *bool `json:"hide_globally"`
|
HideGlobally *bool `json:"hide_globally"`
|
||||||
DisableHTTP2 *bool `json:"disable_http2"`
|
DisableHTTP2 *bool `json:"disable_http2"`
|
||||||
ProxyURL *string `json:"proxy_url"`
|
ProxyURL *string `json:"proxy_url"`
|
||||||
|
DeduplicateAgainstAll *bool `json:"deduplicate_against_all"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// Patch updates a feed with modified values.
|
// Patch updates a feed with modified values.
|
||||||
|
@ -300,6 +303,10 @@ func (f *FeedModificationRequest) Patch(feed *Feed) {
|
||||||
if f.ProxyURL != nil {
|
if f.ProxyURL != nil {
|
||||||
feed.ProxyURL = *f.ProxyURL
|
feed.ProxyURL = *f.ProxyURL
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if f.DeduplicateAgainstAll != nil {
|
||||||
|
feed.DeduplicateAgainstAll = *f.DeduplicateAgainstAll
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Feeds is a list of feed
|
// Feeds is a list of feed
|
||||||
|
|
|
@ -336,7 +336,7 @@ func RefreshFeed(store *storage.Storage, userID, feedID int64, forceRefresh bool
|
||||||
|
|
||||||
// We don't update existing entries when the crawler is enabled (we crawl only inexisting entries). Unless it is forced to refresh
|
// We don't update existing entries when the crawler is enabled (we crawl only inexisting entries). Unless it is forced to refresh
|
||||||
updateExistingEntries := forceRefresh || !originalFeed.Crawler
|
updateExistingEntries := forceRefresh || !originalFeed.Crawler
|
||||||
newEntries, storeErr := store.RefreshFeedEntries(originalFeed.UserID, originalFeed.ID, originalFeed.Entries, updateExistingEntries)
|
newEntries, storeErr := store.RefreshFeedEntries(originalFeed.UserID, originalFeed.ID, originalFeed.Entries, updateExistingEntries, originalFeed.DeduplicateAgainstAll)
|
||||||
if storeErr != nil {
|
if storeErr != nil {
|
||||||
localizedError := locale.NewLocalizedErrorWrapper(storeErr, "error.database_error", storeErr)
|
localizedError := locale.NewLocalizedErrorWrapper(storeErr, "error.database_error", storeErr)
|
||||||
user, storeErr := store.UserByID(userID)
|
user, storeErr := store.UserByID(userID)
|
||||||
|
|
|
@ -225,6 +225,19 @@ func (s *Storage) entryExists(tx *sql.Tx, entry *model.Entry) (bool, error) {
|
||||||
return result, nil
|
return result, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// entryExistsAnywhere checks if an entry already exists in other feeds based on its hash.
|
||||||
|
func (s *Storage) entryExistsAnywhere(tx *sql.Tx, entry *model.Entry) (bool, error) {
|
||||||
|
var result bool
|
||||||
|
|
||||||
|
err := tx.QueryRow(`SELECT true FROM entries WHERE hash=$1`, entry.Hash).Scan(&result)
|
||||||
|
|
||||||
|
if err != nil && err != sql.ErrNoRows {
|
||||||
|
return result, fmt.Errorf(`store: unable to check if entry exists: %v`, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return result, nil
|
||||||
|
}
|
||||||
|
|
||||||
func (s *Storage) IsNewEntry(feedID int64, entryHash string) bool {
|
func (s *Storage) IsNewEntry(feedID int64, entryHash string) bool {
|
||||||
var result bool
|
var result bool
|
||||||
s.db.QueryRow(`SELECT true FROM entries WHERE feed_id=$1 AND hash=$2`, feedID, entryHash).Scan(&result)
|
s.db.QueryRow(`SELECT true FROM entries WHERE feed_id=$1 AND hash=$2`, feedID, entryHash).Scan(&result)
|
||||||
|
@ -268,7 +281,7 @@ func (s *Storage) cleanupEntries(feedID int64, entryHashes []string) error {
|
||||||
}
|
}
|
||||||
|
|
||||||
// RefreshFeedEntries updates feed entries while refreshing a feed.
|
// RefreshFeedEntries updates feed entries while refreshing a feed.
|
||||||
func (s *Storage) RefreshFeedEntries(userID, feedID int64, entries model.Entries, updateExistingEntries bool) (newEntries model.Entries, err error) {
|
func (s *Storage) RefreshFeedEntries(userID, feedID int64, entries model.Entries, updateExistingEntries bool, deduplicateAgainstAll bool) (newEntries model.Entries, err error) {
|
||||||
var entryHashes []string
|
var entryHashes []string
|
||||||
|
|
||||||
for _, entry := range entries {
|
for _, entry := range entries {
|
||||||
|
@ -280,7 +293,16 @@ func (s *Storage) RefreshFeedEntries(userID, feedID int64, entries model.Entries
|
||||||
return nil, fmt.Errorf(`store: unable to start transaction: %v`, err)
|
return nil, fmt.Errorf(`store: unable to start transaction: %v`, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
entryExists, err := s.entryExists(tx, entry)
|
entryExists := false
|
||||||
|
if deduplicateAgainstAll {
|
||||||
|
entryExists, err = s.entryExistsAnywhere(tx, entry)
|
||||||
|
// maybe another feed was refreshed and has this entry as well,
|
||||||
|
// so we need to markd it as removed here.
|
||||||
|
updateExistingEntries = true
|
||||||
|
entry.Status = model.EntryStatusRemoved
|
||||||
|
} else {
|
||||||
|
entryExists, err = s.entryExists(tx, entry)
|
||||||
|
}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if rollbackErr := tx.Rollback(); rollbackErr != nil {
|
if rollbackErr := tx.Rollback(); rollbackErr != nil {
|
||||||
return nil, fmt.Errorf(`store: unable to rollback transaction: %v (rolled back due to: %v)`, rollbackErr, err)
|
return nil, fmt.Errorf(`store: unable to rollback transaction: %v (rolled back due to: %v)`, rollbackErr, err)
|
||||||
|
|
|
@ -253,6 +253,7 @@ func (f *FeedQueryBuilder) GetFeeds() (model.Feeds, error) {
|
||||||
&feed.PushoverEnabled,
|
&feed.PushoverEnabled,
|
||||||
&feed.PushoverPriority,
|
&feed.PushoverPriority,
|
||||||
&feed.ProxyURL,
|
&feed.ProxyURL,
|
||||||
|
&feed.DeduplicateAgainstAll,
|
||||||
)
|
)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|
|
@ -72,6 +72,7 @@
|
||||||
|
|
||||||
<label><input type="checkbox" name="no_media_player" {{ if .form.NoMediaPlayer }}checked{{ end }} value="1" > {{ t "form.feed.label.no_media_player" }} </label>
|
<label><input type="checkbox" name="no_media_player" {{ if .form.NoMediaPlayer }}checked{{ end }} value="1" > {{ t "form.feed.label.no_media_player" }} </label>
|
||||||
<label><input type="checkbox" name="disabled" value="1" {{ if .form.Disabled }}checked{{ end }}> {{ t "form.feed.label.disabled" }}</label>
|
<label><input type="checkbox" name="disabled" value="1" {{ if .form.Disabled }}checked{{ end }}> {{ t "form.feed.label.disabled" }}</label>
|
||||||
|
<label><input type="checkbox" name="deduplicate_against_all" value="1" {{ if .form.DeduplicateAgainstAll }}checked{{ end }}> {{ t "form.feed.label.deduplicate_against_all" }}</label>
|
||||||
|
|
||||||
<div class="buttons">
|
<div class="buttons">
|
||||||
<button type="submit" class="button button-primary" data-label-loading="{{ t "form.submit.saving" }}">{{ t "action.update" }}</button>
|
<button type="submit" class="button button-primary" data-label-loading="{{ t "form.submit.saving" }}">{{ t "action.update" }}</button>
|
||||||
|
|
|
@ -71,6 +71,7 @@ func (h *handler) showEditFeedPage(w http.ResponseWriter, r *http.Request) {
|
||||||
PushoverEnabled: feed.PushoverEnabled,
|
PushoverEnabled: feed.PushoverEnabled,
|
||||||
PushoverPriority: feed.PushoverPriority,
|
PushoverPriority: feed.PushoverPriority,
|
||||||
ProxyURL: feed.ProxyURL,
|
ProxyURL: feed.ProxyURL,
|
||||||
|
DeduplicateAgainstAll: feed.DeduplicateAgainstAll,
|
||||||
}
|
}
|
||||||
|
|
||||||
sess := session.New(h.store, request.SessionID(r))
|
sess := session.New(h.store, request.SessionID(r))
|
||||||
|
|
|
@ -43,6 +43,7 @@ type FeedForm struct {
|
||||||
PushoverEnabled bool
|
PushoverEnabled bool
|
||||||
PushoverPriority int
|
PushoverPriority int
|
||||||
ProxyURL string
|
ProxyURL string
|
||||||
|
DeduplicateAgainstAll bool
|
||||||
}
|
}
|
||||||
|
|
||||||
// Merge updates the fields of the given feed.
|
// Merge updates the fields of the given feed.
|
||||||
|
@ -79,6 +80,7 @@ func (f FeedForm) Merge(feed *model.Feed) *model.Feed {
|
||||||
feed.PushoverEnabled = f.PushoverEnabled
|
feed.PushoverEnabled = f.PushoverEnabled
|
||||||
feed.PushoverPriority = f.PushoverPriority
|
feed.PushoverPriority = f.PushoverPriority
|
||||||
feed.ProxyURL = f.ProxyURL
|
feed.ProxyURL = f.ProxyURL
|
||||||
|
feed.DeduplicateAgainstAll = f.DeduplicateAgainstAll
|
||||||
return feed
|
return feed
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -130,5 +132,6 @@ func NewFeedForm(r *http.Request) *FeedForm {
|
||||||
PushoverEnabled: r.FormValue("pushover_enabled") == "1",
|
PushoverEnabled: r.FormValue("pushover_enabled") == "1",
|
||||||
PushoverPriority: pushoverPriority,
|
PushoverPriority: pushoverPriority,
|
||||||
ProxyURL: r.FormValue("proxy_url"),
|
ProxyURL: r.FormValue("proxy_url"),
|
||||||
|
DeduplicateAgainstAll: r.FormValue("deduplicate_against_all") == "1",
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue