mirror of
https://github.com/miniflux/v2.git
synced 2025-06-27 16:36:00 +00:00
Add feed filters (Keeplist and Blocklist)
This commit is contained in:
parent
3afdf25012
commit
84b83fc3c8
34 changed files with 359 additions and 89 deletions
|
@ -5,6 +5,7 @@
|
|||
package processor
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"time"
|
||||
|
||||
"miniflux.app/config"
|
||||
|
@ -19,9 +20,11 @@ import (
|
|||
|
||||
// ProcessFeedEntries downloads original web page for entries and apply filters.
|
||||
func ProcessFeedEntries(store *storage.Storage, feed *model.Feed) {
|
||||
|
||||
filterFeedEntries(feed)
|
||||
|
||||
for _, entry := range feed.Entries {
|
||||
logger.Debug("[Feed #%d] Processing entry %s", feed.ID, entry.URL)
|
||||
|
||||
if feed.Crawler {
|
||||
if !store.EntryURLExists(feed.ID, entry.URL) {
|
||||
startTime := time.Now()
|
||||
|
@ -51,6 +54,37 @@ func ProcessFeedEntries(store *storage.Storage, feed *model.Feed) {
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
Filters feed entries based on regex rules
|
||||
First we filter based on our keep list, then we remove those entries that match the block list
|
||||
*/
|
||||
func filterFeedEntries(feed *model.Feed) {
|
||||
var filteredEntries []*model.Entry
|
||||
|
||||
if len(feed.KeeplistRules) > 0 {
|
||||
for _, entry := range feed.Entries {
|
||||
match, _ := regexp.MatchString(feed.KeeplistRules, entry.Title)
|
||||
if match == true {
|
||||
filteredEntries = append(filteredEntries, entry)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
filteredEntries = feed.Entries
|
||||
}
|
||||
if len(feed.BlocklistRules) > 0 {
|
||||
k := 0
|
||||
for _, entry := range filteredEntries {
|
||||
match, _ := regexp.MatchString(feed.BlocklistRules, entry.Title)
|
||||
if match != true {
|
||||
filteredEntries[k] = entry
|
||||
k++
|
||||
}
|
||||
}
|
||||
filteredEntries = filteredEntries[:k]
|
||||
}
|
||||
feed.Entries = filteredEntries
|
||||
}
|
||||
|
||||
// ProcessEntryWebPage downloads the entry web page and apply rewrite rules.
|
||||
func ProcessEntryWebPage(entry *model.Entry) error {
|
||||
startTime := time.Now()
|
||||
|
|
88
reader/processor/processor_test.go
Normal file
88
reader/processor/processor_test.go
Normal file
|
@ -0,0 +1,88 @@
|
|||
// Copyright 2017 Frédéric Guillot. All rights reserved.
|
||||
// Use of this source code is governed by the Apache 2.0
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package processor // import "miniflux.app/reader/processor"
|
||||
|
||||
import (
|
||||
"miniflux.app/reader/parser"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestKeeplistRules(t *testing.T) {
|
||||
data := `<?xml version="1.0"?>
|
||||
<rss version="2.0">
|
||||
<channel>
|
||||
<title>SomeGood News</title>
|
||||
<link>http://foo.bar/</link>
|
||||
<item>
|
||||
<title>Kitten News</title>
|
||||
<link>http://kitties.today/daily-kitten</link>
|
||||
<description>Kitten picture of the day.</description>
|
||||
<pubDate>Tue, 03 Jun 2003 09:39:21 GMT</pubDate>
|
||||
<guid>http://kitties.today</guid>
|
||||
</item>
|
||||
<item>
|
||||
<title>Daily Covid DoomScrolling News</title>
|
||||
<link>http://covid.doom/daily-panic-dose</link>
|
||||
<description>Did you know that you can get COVID IN YOUR DREAMS?.</description>
|
||||
<pubDate>Tue, 03 Jun 2020 09:39:21 GMT</pubDate>
|
||||
<guid>http://covid.doom</guid>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>`
|
||||
|
||||
feed, err := parser.ParseFeed(data)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
if len(feed.Entries) != 2 {
|
||||
t.Errorf("Error parsing feed")
|
||||
}
|
||||
|
||||
//case insensitive
|
||||
feed.KeeplistRules = "(?i)kitten"
|
||||
filterFeedEntries(feed)
|
||||
if len(feed.Entries) != 1 {
|
||||
t.Errorf("Keeplist filter rule did not properly filter the feed")
|
||||
}
|
||||
}
|
||||
|
||||
func TestBlocklistRules(t *testing.T) {
|
||||
data := `<?xml version="1.0"?>
|
||||
<rss version="2.0">
|
||||
<channel>
|
||||
<title>SomeGood News</title>
|
||||
<link>http://foo.bar/</link>
|
||||
<item>
|
||||
<title>Kitten News</title>
|
||||
<link>http://kitties.today/daily-kitten</link>
|
||||
<description>Kitten picture of the day.</description>
|
||||
<pubDate>Tue, 03 Jun 2003 09:39:21 GMT</pubDate>
|
||||
<guid>http://kitties.today</guid>
|
||||
</item>
|
||||
<item>
|
||||
<title>Daily Covid DoomScrolling News</title>
|
||||
<link>http://covid.doom/daily-panic-dose</link>
|
||||
<description>Did you know that you can get COVID IN YOUR DREAMS?.</description>
|
||||
<pubDate>Tue, 03 Jun 2020 09:39:21 GMT</pubDate>
|
||||
<guid>http://covid.doom</guid>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>`
|
||||
|
||||
feed, err := parser.ParseFeed(data)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
if len(feed.Entries) != 2 {
|
||||
t.Errorf("Error parsing feed")
|
||||
}
|
||||
|
||||
//case insensitive
|
||||
feed.BlocklistRules = "(?i)covid"
|
||||
filterFeedEntries(feed)
|
||||
if len(feed.Entries) != 1 {
|
||||
t.Errorf("Keeplist filter rule did not properly filter the feed")
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue