mirror of
https://github.com/miniflux/v2.git
synced 2025-06-27 16:36:00 +00:00
Add the possibility to enable crawler for feeds
This commit is contained in:
parent
33445e5b68
commit
ef097f02fe
22 changed files with 77 additions and 25 deletions
|
@ -5,9 +5,12 @@
|
|||
package processor
|
||||
|
||||
import (
|
||||
"log"
|
||||
|
||||
"github.com/miniflux/miniflux2/model"
|
||||
"github.com/miniflux/miniflux2/reader/rewrite"
|
||||
"github.com/miniflux/miniflux2/reader/sanitizer"
|
||||
"github.com/miniflux/miniflux2/reader/scraper"
|
||||
)
|
||||
|
||||
// FeedProcessor handles the processing of feed contents.
|
||||
|
@ -15,6 +18,12 @@ type FeedProcessor struct {
|
|||
feed *model.Feed
|
||||
scraperRules string
|
||||
rewriteRules string
|
||||
crawler bool
|
||||
}
|
||||
|
||||
// WithCrawler enables the crawler.
|
||||
func (f *FeedProcessor) WithCrawler(value bool) {
|
||||
f.crawler = value
|
||||
}
|
||||
|
||||
// WithScraperRules adds scraper rules to the processing.
|
||||
|
@ -30,6 +39,15 @@ func (f *FeedProcessor) WithRewriteRules(rules string) {
|
|||
// Process applies rewrite and scraper rules.
|
||||
func (f *FeedProcessor) Process() {
|
||||
for _, entry := range f.feed.Entries {
|
||||
if f.crawler {
|
||||
content, err := scraper.Fetch(entry.URL, f.scraperRules)
|
||||
if err != nil {
|
||||
log.Println("[FeedProcessor]", err)
|
||||
} else {
|
||||
entry.Content = content
|
||||
}
|
||||
}
|
||||
|
||||
entry.Content = sanitizer.Sanitize(entry.URL, entry.Content)
|
||||
entry.Content = rewrite.Rewriter(entry.URL, entry.Content, f.rewriteRules)
|
||||
}
|
||||
|
@ -37,5 +55,5 @@ func (f *FeedProcessor) Process() {
|
|||
|
||||
// NewFeedProcessor returns a new FeedProcessor.
|
||||
func NewFeedProcessor(feed *model.Feed) *FeedProcessor {
|
||||
return &FeedProcessor{feed: feed}
|
||||
return &FeedProcessor{feed: feed, crawler: false}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue