1
0
Fork 0
mirror of https://github.com/miniflux/v2.git synced 2025-06-27 16:36:00 +00:00

refactor(processor): move RewriteEntryURL function to rewrite package

This commit is contained in:
Frédéric Guillot 2025-06-19 13:14:22 -07:00
parent c12476c1a9
commit cb59944d6b
7 changed files with 347 additions and 40 deletions

View file

@ -6,7 +6,6 @@ package processor // import "miniflux.app/v2/internal/reader/processor"
import (
"log/slog"
"net/url"
"regexp"
"slices"
"time"
@ -24,8 +23,6 @@ import (
"miniflux.app/v2/internal/storage"
)
var customReplaceRuleRegex = regexp.MustCompile(`rewrite\("([^"]+)"\|"([^"]+)"\)`)
// ProcessFeedEntries downloads original web page for entries and apply filters.
func ProcessFeedEntries(store *storage.Storage, feed *model.Feed, userID int64, forceRefresh bool) {
var filteredEntries model.Entries
@ -60,7 +57,7 @@ func ProcessFeedEntries(store *storage.Storage, feed *model.Feed, userID int64,
}
webpageBaseURL := ""
entry.URL = rewriteEntryURL(feed, entry)
entry.URL = rewrite.RewriteEntryURL(feed, entry)
entryIsNew := store.IsNewEntry(feed.ID, entry.Hash)
if feed.Crawler && (entryIsNew || forceRefresh) {
slog.Debug("Scraping entry",
@ -143,7 +140,7 @@ func ProcessFeedEntries(store *storage.Storage, feed *model.Feed, userID int64,
// ProcessEntryWebPage downloads the entry web page and apply rewrite rules.
func ProcessEntryWebPage(feed *model.Feed, entry *model.Entry, user *model.User) error {
startTime := time.Now()
entry.URL = rewriteEntryURL(feed, entry)
entry.URL = rewrite.RewriteEntryURL(feed, entry)
requestBuilder := fetcher.NewRequestBuilder()
requestBuilder.WithUserAgent(feed.UserAgent, config.Opts.HTTPClientUserAgent())
@ -187,41 +184,6 @@ func ProcessEntryWebPage(feed *model.Feed, entry *model.Entry, user *model.User)
return nil
}
func rewriteEntryURL(feed *model.Feed, entry *model.Entry) string {
var rewrittenURL = entry.URL
if feed.UrlRewriteRules != "" {
parts := customReplaceRuleRegex.FindStringSubmatch(feed.UrlRewriteRules)
if len(parts) >= 3 {
re, err := regexp.Compile(parts[1])
if err != nil {
slog.Error("Failed on regexp compilation",
slog.String("url_rewrite_rules", feed.UrlRewriteRules),
slog.Any("error", err),
)
return rewrittenURL
}
rewrittenURL = re.ReplaceAllString(entry.URL, parts[2])
slog.Debug("Rewriting entry URL",
slog.String("original_entry_url", entry.URL),
slog.String("rewritten_entry_url", rewrittenURL),
slog.Int64("feed_id", feed.ID),
slog.String("feed_url", feed.FeedURL),
)
} else {
slog.Debug("Cannot find search and replace terms for replace rule",
slog.String("original_entry_url", entry.URL),
slog.String("rewritten_entry_url", rewrittenURL),
slog.Int64("feed_id", feed.ID),
slog.String("feed_url", feed.FeedURL),
slog.String("url_rewrite_rules", feed.UrlRewriteRules),
)
}
}
return rewrittenURL
}
func isRecentEntry(entry *model.Entry) bool {
if config.Opts.FilterEntryMaxAgeDays() == 0 || entry.Date.After(time.Now().AddDate(0, 0, -config.Opts.FilterEntryMaxAgeDays())) {
return true