Add the possibility to enable crawler for feeds

2025-09-30 19:22:11 +00:00 · 2017-12-12 19:19:36 -08:00 · 2017-12-12 19:19:36 -08:00 · ef097f02fe
commit ef097f02fe
parent 33445e5b68
22 changed files with 77 additions and 25 deletions
--- a/reader/processor/processor.go
+++ b/reader/processor/processor.go
@ -5,9 +5,12 @@
 package processor

 import (
+	"log"
+
 	"github.com/miniflux/miniflux2/model"
 	"github.com/miniflux/miniflux2/reader/rewrite"
 	"github.com/miniflux/miniflux2/reader/sanitizer"
+	"github.com/miniflux/miniflux2/reader/scraper"
 )

 // FeedProcessor handles the processing of feed contents.
@ -15,6 +18,12 @@ type FeedProcessor struct {
 	feed         *model.Feed
 	scraperRules string
 	rewriteRules string
+	crawler      bool
+}
+
+// WithCrawler enables the crawler.
+func (f *FeedProcessor) WithCrawler(value bool) {
+	f.crawler = value
 }

 // WithScraperRules adds scraper rules to the processing.
@ -30,6 +39,15 @@ func (f *FeedProcessor) WithRewriteRules(rules string) {
 // Process applies rewrite and scraper rules.
 func (f *FeedProcessor) Process() {
 	for _, entry := range f.feed.Entries {
+		if f.crawler {
+			content, err := scraper.Fetch(entry.URL, f.scraperRules)
+			if err != nil {
+				log.Println("[FeedProcessor]", err)
+			} else {
+				entry.Content = content
+			}
+		}
+
 		entry.Content = sanitizer.Sanitize(entry.URL, entry.Content)
 		entry.Content = rewrite.Rewriter(entry.URL, entry.Content, f.rewriteRules)
 	}
@ -37,5 +55,5 @@ func (f *FeedProcessor) Process() {

 // NewFeedProcessor returns a new FeedProcessor.
 func NewFeedProcessor(feed *model.Feed) *FeedProcessor {
-	return &FeedProcessor{feed: feed}
+	return &FeedProcessor{feed: feed, crawler: false}
 }