Merge branch 'main' into sitemap

2025-07-27 17:28:38 +00:00 · 2024-12-25 23:35:52 +00:00 · 2024-12-25 23:35:52 +00:00 · 893ae2f822
commit 893ae2f822
parent 9c9a0c5a02 f3989cdb2f
45 changed files with 676 additions and 258 deletions
--- a/internal/reader/handler/handler.go
+++ b/internal/reader/handler/handler.go
@ -31,11 +31,6 @@ func CreateFeedFromSubscriptionDiscovery(store *storage.Storage, userID int64, f
 		slog.String("feed_url", feedCreationRequest.FeedURL),
 	)

-	user, storeErr := store.UserByID(userID)
-	if storeErr != nil {
-		return nil, locale.NewLocalizedErrorWrapper(storeErr, "error.database_error", storeErr)
-	}
-
 	if !store.CategoryIDExists(userID, feedCreationRequest.CategoryID) {
 		return nil, locale.NewLocalizedErrorWrapper(ErrCategoryNotFound, "error.category_not_found")
 	}
@ -71,7 +66,7 @@ func CreateFeedFromSubscriptionDiscovery(store *storage.Storage, userID int64, f
 	subscription.WithCategoryID(feedCreationRequest.CategoryID)
 	subscription.CheckedNow()

-	processor.ProcessFeedEntries(store, subscription, user, true)
+	processor.ProcessFeedEntries(store, subscription, userID, true)

 	if storeErr := store.CreateFeed(subscription); storeErr != nil {
 		return nil, locale.NewLocalizedErrorWrapper(storeErr, "error.database_error", storeErr)
@ -105,11 +100,6 @@ func CreateFeed(store *storage.Storage, userID int64, feedCreationRequest *model
 		slog.String("feed_url", feedCreationRequest.FeedURL),
 	)

-	user, storeErr := store.UserByID(userID)
-	if storeErr != nil {
-		return nil, locale.NewLocalizedErrorWrapper(storeErr, "error.database_error", storeErr)
-	}
-
 	if !store.CategoryIDExists(userID, feedCreationRequest.CategoryID) {
 		return nil, locale.NewLocalizedErrorWrapper(ErrCategoryNotFound, "error.category_not_found")
 	}
@ -170,7 +160,7 @@ func CreateFeed(store *storage.Storage, userID int64, feedCreationRequest *model
 	subscription.WithCategoryID(feedCreationRequest.CategoryID)
 	subscription.CheckedNow()

-	processor.ProcessFeedEntries(store, subscription, user, true)
+	processor.ProcessFeedEntries(store, subscription, userID, true)

 	if storeErr := store.CreateFeed(subscription); storeErr != nil {
 		return nil, locale.NewLocalizedErrorWrapper(storeErr, "error.database_error", storeErr)
@ -195,11 +185,6 @@ func RefreshFeed(store *storage.Storage, userID, feedID int64, forceRefresh bool
 		slog.Bool("force_refresh", forceRefresh),
 	)

-	user, storeErr := store.UserByID(userID)
-	if storeErr != nil {
-		return locale.NewLocalizedErrorWrapper(storeErr, "error.database_error", storeErr)
-	}
-
 	originalFeed, storeErr := store.FeedByID(userID, feedID)
 	if storeErr != nil {
 		return locale.NewLocalizedErrorWrapper(storeErr, "error.database_error", storeErr)
@ -256,6 +241,10 @@ func RefreshFeed(store *storage.Storage, userID, feedID int64, forceRefresh bool

 	if localizedError := responseHandler.LocalizedError(); localizedError != nil {
 		slog.Warn("Unable to fetch feed", slog.String("feed_url", originalFeed.FeedURL), slog.Any("error", localizedError.Error()))
+		user, storeErr := store.UserByID(userID)
+		if storeErr != nil {
+			return locale.NewLocalizedErrorWrapper(storeErr, "error.database_error", storeErr)
+		}
 		originalFeed.WithTranslatedErrorMessage(localizedError.Translate(user.Language))
 		store.UpdateFeedError(originalFeed)
 		return localizedError
@ -263,6 +252,10 @@ func RefreshFeed(store *storage.Storage, userID, feedID int64, forceRefresh bool

 	if store.AnotherFeedURLExists(userID, originalFeed.ID, responseHandler.EffectiveURL()) {
 		localizedError := locale.NewLocalizedErrorWrapper(ErrDuplicatedFeed, "error.duplicated_feed")
+		user, storeErr := store.UserByID(userID)
+		if storeErr != nil {
+			return locale.NewLocalizedErrorWrapper(storeErr, "error.database_error", storeErr)
+		}
 		originalFeed.WithTranslatedErrorMessage(localizedError.Translate(user.Language))
 		store.UpdateFeedError(originalFeed)
 		return localizedError
@ -289,6 +282,10 @@ func RefreshFeed(store *storage.Storage, userID, feedID int64, forceRefresh bool
 			if errors.Is(parseErr, parser.ErrFeedFormatNotDetected) {
 				localizedError = locale.NewLocalizedErrorWrapper(parseErr, "error.feed_format_not_detected", parseErr)
 			}
+			user, storeErr := store.UserByID(userID)
+			if storeErr != nil {
+				return locale.NewLocalizedErrorWrapper(storeErr, "error.database_error", storeErr)
+			}

 			originalFeed.WithTranslatedErrorMessage(localizedError.Translate(user.Language))
 			store.UpdateFeedError(originalFeed)
@ -309,13 +306,17 @@ func RefreshFeed(store *storage.Storage, userID, feedID int64, forceRefresh bool
 		)

 		originalFeed.Entries = updatedFeed.Entries
-		processor.ProcessFeedEntries(store, originalFeed, user, forceRefresh)
+		processor.ProcessFeedEntries(store, originalFeed, userID, forceRefresh)

 		// We don't update existing entries when the crawler is enabled (we crawl only inexisting entries). Unless it is forced to refresh
 		updateExistingEntries := forceRefresh || !originalFeed.Crawler
 		newEntries, storeErr := store.RefreshFeedEntries(originalFeed.UserID, originalFeed.ID, originalFeed.Entries, updateExistingEntries)
 		if storeErr != nil {
 			localizedError := locale.NewLocalizedErrorWrapper(storeErr, "error.database_error", storeErr)
+			user, storeErr := store.UserByID(userID)
+			if storeErr != nil {
+				return locale.NewLocalizedErrorWrapper(storeErr, "error.database_error", storeErr)
+			}
 			originalFeed.WithTranslatedErrorMessage(localizedError.Translate(user.Language))
 			store.UpdateFeedError(originalFeed)
 			return localizedError
@ -359,6 +360,10 @@ func RefreshFeed(store *storage.Storage, userID, feedID int64, forceRefresh bool

 	if storeErr := store.UpdateFeed(originalFeed); storeErr != nil {
 		localizedError := locale.NewLocalizedErrorWrapper(storeErr, "error.database_error", storeErr)
+		user, storeErr := store.UserByID(userID)
+		if storeErr != nil {
+			return locale.NewLocalizedErrorWrapper(storeErr, "error.database_error", storeErr)
+		}
 		originalFeed.WithTranslatedErrorMessage(localizedError.Translate(user.Language))
 		store.UpdateFeedError(originalFeed)
 		return localizedError
--- a/internal/reader/icon/finder.go
+++ b/internal/reader/icon/finder.go
@ -4,12 +4,18 @@
 package icon // import "miniflux.app/v2/internal/reader/icon"

 import (
+	"bytes"
 	"encoding/base64"
 	"fmt"
+	"image"
+	"image/gif"
+	"image/jpeg"
+	"image/png"
 	"io"
 	"log/slog"
 	"net/url"
 	"regexp"
+	"slices"
 	"strings"

 	"miniflux.app/v2/internal/config"
@ -19,6 +25,7 @@ import (
 	"miniflux.app/v2/internal/urllib"

 	"github.com/PuerkitoBio/goquery"
+	"golang.org/x/image/draw"
 	"golang.org/x/net/html/charset"
 )

@ -180,9 +187,59 @@ func (f *IconFinder) DownloadIcon(iconURL string) (*model.Icon, error) {
 		Content:  responseBody,
 	}

+	icon = resizeIcon(icon)
+
 	return icon, nil
 }

+func resizeIcon(icon *model.Icon) *model.Icon {
+	r := bytes.NewReader(icon.Content)
+
+	if !slices.Contains([]string{"image/jpeg", "image/png", "image/gif"}, icon.MimeType) {
+		slog.Info("icon isn't a png/gif/jpeg/ico, can't resize", slog.String("mimetype", icon.MimeType))
+		return icon
+	}
+
+	// Don't resize icons that we can't decode, or that already have the right size.
+	config, _, err := image.DecodeConfig(r)
+	if err != nil {
+		slog.Warn("unable to decode the metadata of the icon", slog.Any("error", err))
+		return icon
+	}
+	if config.Height <= 32 && config.Width <= 32 {
+		slog.Debug("icon don't need to be rescaled", slog.Int("height", config.Height), slog.Int("width", config.Width))
+		return icon
+	}
+
+	r.Seek(0, io.SeekStart)
+
+	var src image.Image
+	switch icon.MimeType {
+	case "image/jpeg":
+		src, err = jpeg.Decode(r)
+	case "image/png":
+		src, err = png.Decode(r)
+	case "image/gif":
+		src, err = gif.Decode(r)
+	}
+	if err != nil {
+		slog.Warn("unable to decode the icon", slog.Any("error", err))
+		return icon
+	}
+
+	dst := image.NewRGBA(image.Rect(0, 0, 32, 32))
+	draw.BiLinear.Scale(dst, dst.Rect, src, src.Bounds(), draw.Over, nil)
+
+	var b bytes.Buffer
+	if err = png.Encode(io.Writer(&b), dst); err != nil {
+		slog.Warn("unable to encode the new icon", slog.Any("error", err))
+	}
+
+	icon.Content = b.Bytes()
+	icon.MimeType = "image/png"
+	return icon
+}
+
 func findIconURLsFromHTMLDocument(body io.Reader, contentType string) ([]string, error) {
 	queries := []string{
 		"link[rel='icon' i]",
--- a/internal/reader/icon/finder_test.go
+++ b/internal/reader/icon/finder_test.go
@ -4,8 +4,13 @@
 package icon // import "miniflux.app/v2/internal/reader/icon"

 import (
+	"bytes"
+	"encoding/base64"
+	"image"
 	"strings"
 	"testing"
+
+	"miniflux.app/v2/internal/model"
 )

 func TestParseImageDataURL(t *testing.T) {
@ -125,3 +130,52 @@ func TestParseDocumentWithWhitespaceIconURL(t *testing.T) {
 		t.Errorf(`Invalid icon URL, got %q`, iconURLs[0])
 	}
 }
+
+func TestResizeIconSmallGif(t *testing.T) {
+	data, err := base64.StdEncoding.DecodeString("R0lGODlhAQABAAAAACH5BAEKAAEALAAAAAABAAEAAAICTAEAOw==")
+	if err != nil {
+		t.Fatal(err)
+	}
+	icon := model.Icon{
+		Content:  data,
+		MimeType: "image/gif",
+	}
+	if !bytes.Equal(icon.Content, resizeIcon(&icon).Content) {
+		t.Fatalf("Converted gif smaller than 16x16")
+	}
+}
+
+func TestResizeIconPng(t *testing.T) {
+	data, err := base64.StdEncoding.DecodeString("iVBORw0KGgoAAAANSUhEUgAAACEAAAAhCAYAAABX5MJvAAAALUlEQVR42u3OMQEAAAgDoJnc6BpjDyRgcrcpGwkJCQkJCQkJCQkJCQkJCYmyB7NfUj/Kk4FkAAAAAElFTkSuQmCC")
+	if err != nil {
+		t.Fatal(err)
+	}
+	icon := model.Icon{
+		Content:  data,
+		MimeType: "image/png",
+	}
+	resizedIcon := resizeIcon(&icon)
+
+	if bytes.Equal(data, resizedIcon.Content) {
+		t.Fatalf("Didn't convert png of 33x33")
+	}
+
+	config, _, err := image.DecodeConfig(bytes.NewReader(resizedIcon.Content))
+	if err != nil {
+		t.Fatalf("Couln't decode resulting png: %v", err)
+	}
+
+	if config.Height != 32 || config.Width != 32 {
+		t.Fatalf("Was expecting an image of 16x16, got %dx%d", config.Width, config.Height)
+	}
+}
+
+func TestResizeInvalidImage(t *testing.T) {
+	icon := model.Icon{
+		Content:  []byte("invalid data"),
+		MimeType: "image/gif",
+	}
+	if !bytes.Equal(icon.Content, resizeIcon(&icon).Content) {
+		t.Fatalf("Tried to convert an invalid image")
+	}
+}
--- a/internal/reader/processor/nebula.go
+++ b/internal/reader/processor/nebula.go
@ -7,7 +7,7 @@ import (
 	"errors"
 	"fmt"
 	"log/slog"
-	"regexp"
+	"net/url"
 	"strconv"

 	"github.com/PuerkitoBio/goquery"
@ -17,14 +17,17 @@ import (
 	"miniflux.app/v2/internal/reader/fetcher"
 )

-var nebulaRegex = regexp.MustCompile(`^https://nebula\.tv`)
-
 func shouldFetchNebulaWatchTime(entry *model.Entry) bool {
 	if !config.Opts.FetchNebulaWatchTime() {
 		return false
 	}
-	matches := nebulaRegex.FindStringSubmatch(entry.URL)
-	return matches != nil
+
+	u, err := url.Parse(entry.URL)
+	if err != nil {
+		return false
+	}
+
+	return u.Hostname() == "nebula.tv"
 }

 func fetchNebulaWatchTime(websiteURL string) (int, error) {
@ -45,7 +48,7 @@ func fetchNebulaWatchTime(websiteURL string) (int, error) {
 		return 0, docErr
 	}

-	durs, exists := doc.Find(`meta[property="video:duration"]`).First().Attr("content")
+	durs, exists := doc.FindMatcher(goquery.Single(`meta[property="video:duration"]`)).Attr("content")
 	// durs contains video watch time in seconds
 	if !exists {
 		return 0, errors.New("duration has not found")
--- a/internal/reader/processor/odysee.go
+++ b/internal/reader/processor/odysee.go
@ -7,7 +7,7 @@ import (
 	"errors"
 	"fmt"
 	"log/slog"
-	"regexp"
+	"net/url"
 	"strconv"

 	"github.com/PuerkitoBio/goquery"
@ -17,14 +17,17 @@ import (
 	"miniflux.app/v2/internal/reader/fetcher"
 )

-var odyseeRegex = regexp.MustCompile(`^https://odysee\.com`)
-
 func shouldFetchOdyseeWatchTime(entry *model.Entry) bool {
 	if !config.Opts.FetchOdyseeWatchTime() {
 		return false
 	}
-	matches := odyseeRegex.FindStringSubmatch(entry.URL)
-	return matches != nil
+
+	u, err := url.Parse(entry.URL)
+	if err != nil {
+		return false
+	}
+
+	return u.Hostname() == "odysee.com"
 }

 func fetchOdyseeWatchTime(websiteURL string) (int, error) {
@ -45,7 +48,7 @@ func fetchOdyseeWatchTime(websiteURL string) (int, error) {
 		return 0, docErr
 	}

-	durs, exists := doc.Find(`meta[property="og:video:duration"]`).First().Attr("content")
+	durs, exists := doc.FindMatcher(goquery.Single(`meta[property="og:video:duration"]`)).Attr("content")
 	// durs contains video watch time in seconds
 	if !exists {
 		return 0, errors.New("duration has not found")
--- a/internal/reader/processor/processor.go
+++ b/internal/reader/processor/processor.go
@ -10,6 +10,9 @@ import (
 	"strings"
 	"time"

+	"github.com/tdewolff/minify/v2"
+	"github.com/tdewolff/minify/v2/html"
+
 	"miniflux.app/v2/internal/config"
 	"miniflux.app/v2/internal/metric"
 	"miniflux.app/v2/internal/model"
@ -20,17 +23,20 @@ import (
 	"miniflux.app/v2/internal/reader/scraper"
 	"miniflux.app/v2/internal/reader/urlcleaner"
 	"miniflux.app/v2/internal/storage"
-
-	"github.com/tdewolff/minify/v2"
-	"github.com/tdewolff/minify/v2/html"
 )

 var customReplaceRuleRegex = regexp.MustCompile(`rewrite\("([^"]+)"\|"([^"]+)"\)`)

 // ProcessFeedEntries downloads original web page for entries and apply filters.
-func ProcessFeedEntries(store *storage.Storage, feed *model.Feed, user *model.User, forceRefresh bool) {
+func ProcessFeedEntries(store *storage.Storage, feed *model.Feed, userID int64, forceRefresh bool) {
 	var filteredEntries model.Entries

+	user, storeErr := store.UserByID(userID)
+	if storeErr != nil {
+		slog.Error("Database error", slog.Any("error", storeErr))
+		return
+	}
+
 	// Process older entries first
 	for i := len(feed.Entries) - 1; i >= 0; i-- {
 		entry := feed.Entries[i]
@ -135,6 +141,9 @@ func isBlockedEntry(feed *model.Feed, entry *model.Entry, user *model.User) bool

 			var match bool
 			switch parts[0] {
+			case "EntryDate":
+				datePattern := parts[1]
+				match = isDateMatchingPattern(entry.Date, datePattern)
 			case "EntryTitle":
 				match, _ = regexp.MatchString(parts[1], entry.Title)
 			case "EntryURL":
@ -205,6 +214,9 @@ func isAllowedEntry(feed *model.Feed, entry *model.Entry, user *model.User) bool

 			var match bool
 			switch parts[0] {
+			case "EntryDate":
+				datePattern := parts[1]
+				match = isDateMatchingPattern(entry.Date, datePattern)
 			case "EntryTitle":
 				match, _ = regexp.MatchString(parts[1], entry.Title)
 			case "EntryURL":
@ -456,3 +468,44 @@ func minifyEntryContent(entryContent string) string {

 	return entryContent
 }
+
+func isDateMatchingPattern(entryDate time.Time, pattern string) bool {
+	if pattern == "future" {
+		return entryDate.After(time.Now())
+	}
+
+	parts := strings.SplitN(pattern, ":", 2)
+	if len(parts) != 2 {
+		return false
+	}
+
+	operator := parts[0]
+	dateStr := parts[1]
+
+	switch operator {
+	case "before":
+		targetDate, err := time.Parse("2006-01-02", dateStr)
+		if err != nil {
+			return false
+		}
+		return entryDate.Before(targetDate)
+	case "after":
+		targetDate, err := time.Parse("2006-01-02", dateStr)
+		if err != nil {
+			return false
+		}
+		return entryDate.After(targetDate)
+	case "between":
+		dates := strings.Split(dateStr, ",")
+		if len(dates) != 2 {
+			return false
+		}
+		startDate, err1 := time.Parse("2006-01-02", dates[0])
+		endDate, err2 := time.Parse("2006-01-02", dates[1])
+		if err1 != nil || err2 != nil {
+			return false
+		}
+		return entryDate.After(startDate) && entryDate.Before(endDate)
+	}
+	return false
+}
--- a/internal/reader/processor/processor_test.go
+++ b/internal/reader/processor/processor_test.go
@ -75,6 +75,12 @@ func TestAllowEntries(t *testing.T) {
 		{&model.Feed{ID: 1, BlocklistRules: ""}, &model.Entry{Author: "Example", Tags: []string{"example", "something else"}}, &model.User{KeepFilterEntryRules: "EntryAuthor=(?i)example\nEntryTag=(?i)Test"}, true},
 		{&model.Feed{ID: 1, BlocklistRules: ""}, &model.Entry{Author: "Different", Tags: []string{"example", "something else"}}, &model.User{KeepFilterEntryRules: "EntryAuthor=(?i)example\nEntryTag=(?i)example"}, true},
 		{&model.Feed{ID: 1, BlocklistRules: ""}, &model.Entry{Author: "Different", Tags: []string{"example", "something else"}}, &model.User{KeepFilterEntryRules: "EntryAuthor=(?i)example\nEntryTag=(?i)Test"}, false},
+		{&model.Feed{ID: 1, BlocklistRules: ""}, &model.Entry{Date: time.Now().Add(24 * time.Hour)}, &model.User{KeepFilterEntryRules: "EntryDate=future"}, true},
+		{&model.Feed{ID: 1, BlocklistRules: ""}, &model.Entry{Date: time.Now().Add(-24 * time.Hour)}, &model.User{KeepFilterEntryRules: "EntryDate=future"}, false},
+		{&model.Feed{ID: 1, BlocklistRules: ""}, &model.Entry{Date: time.Date(2024, 3, 14, 0, 0, 0, 0, time.UTC)}, &model.User{KeepFilterEntryRules: "EntryDate=before:2024-03-15"}, true},
+		{&model.Feed{ID: 1, BlocklistRules: ""}, &model.Entry{Date: time.Date(2024, 3, 16, 0, 0, 0, 0, time.UTC)}, &model.User{KeepFilterEntryRules: "EntryDate=after:2024-03-15"}, true},
+		{&model.Feed{ID: 1, BlocklistRules: ""}, &model.Entry{Date: time.Date(2024, 3, 10, 0, 0, 0, 0, time.UTC)}, &model.User{KeepFilterEntryRules: "EntryDate=between:2024-03-01,2024-03-15"}, true},
+		{&model.Feed{ID: 1, BlocklistRules: ""}, &model.Entry{Date: time.Date(2024, 2, 28, 0, 0, 0, 0, time.UTC)}, &model.User{KeepFilterEntryRules: "EntryDate=between:2024-03-01,2024-03-15"}, false},
 	}

 	for _, tc := range scenarios {
--- a/internal/reader/processor/youtube.go
+++ b/internal/reader/processor/youtube.go
@ -60,7 +60,7 @@ func fetchYouTubeWatchTimeFromWebsite(websiteURL string) (int, error) {
 		return 0, docErr
 	}

-	durs, exists := doc.Find(`meta[itemprop="duration"]`).First().Attr("content")
+	durs, exists := doc.FindMatcher(goquery.Single(`meta[itemprop="duration"]`)).Attr("content")
 	if !exists {
 		return 0, errors.New("duration has not found")
 	}
--- a/internal/reader/readability/readability.go
+++ b/internal/reader/readability/readability.go
@ -4,11 +4,9 @@
 package readability // import "miniflux.app/v2/internal/reader/readability"

 import (
-	"bytes"
 	"fmt"
 	"io"
 	"log/slog"
-	"math"
 	"regexp"
 	"strings"

@ -24,9 +22,7 @@ const (

 var (
 	divToPElementsRegexp = regexp.MustCompile(`(?i)<(a|blockquote|dl|div|img|ol|p|pre|table|ul)`)
-	sentenceRegexp       = regexp.MustCompile(`\.( |$)`)

-	blacklistCandidatesRegexp  = regexp.MustCompile(`popupbody|-ad|g-plus`)
 	okMaybeItsACandidateRegexp = regexp.MustCompile(`and|article|body|column|main|shadow`)
 	unlikelyCandidatesRegexp   = regexp.MustCompile(`banner|breadcrumbs|combx|comment|community|cover-wrap|disqus|extra|foot|header|legends|menu|modal|related|remark|replies|rss|shoutbox|sidebar|skyscraper|social|sponsor|supplemental|ad-break|agegate|pagination|pager|popup|yom-remote`)

@ -77,16 +73,14 @@ func ExtractContent(page io.Reader) (baseURL string, extractedContent string, er
 		return "", "", err
 	}

-	if hrefValue, exists := document.Find("head base").First().Attr("href"); exists {
+	if hrefValue, exists := document.FindMatcher(goquery.Single("head base")).Attr("href"); exists {
 		hrefValue = strings.TrimSpace(hrefValue)
 		if urllib.IsAbsoluteURL(hrefValue) {
 			baseURL = hrefValue
 		}
 	}

-	document.Find("script,style").Each(func(i int, s *goquery.Selection) {
-		removeNodes(s)
-	})
+	document.Find("script,style").Remove()

 	transformMisusedDivsIntoParagraphs(document)
 	removeUnlikelyCandidates(document)
@ -107,8 +101,9 @@ func ExtractContent(page io.Reader) (baseURL string, extractedContent string, er
 // Now that we have the top candidate, look through its siblings for content that might also be related.
 // Things like preambles, content split by ads that we removed, etc.
 func getArticle(topCandidate *candidate, candidates candidateList) string {
-	output := bytes.NewBufferString("<div>")
-	siblingScoreThreshold := float32(math.Max(10, float64(topCandidate.score*.2)))
+	var output strings.Builder
+	output.WriteString("<div>")
+	siblingScoreThreshold := max(10, topCandidate.score*.2)

 	topCandidate.selection.Siblings().Union(topCandidate.selection).Each(func(i int, s *goquery.Selection) {
 		append := false
@ -125,10 +120,14 @@ func getArticle(topCandidate *candidate, candidates candidateList) string {
 			content := s.Text()
 			contentLength := len(content)

-			if contentLength >= 80 && linkDensity < .25 {
-				append = true
-			} else if contentLength < 80 && linkDensity == 0 && sentenceRegexp.MatchString(content) {
-				append = true
+			if contentLength >= 80 {
+				if linkDensity < .25 {
+					append = true
+				}
+			} else {
+				if linkDensity == 0 && containsSentence(content) {
+					append = true
+				}
 			}
 		}

@ -139,7 +138,7 @@ func getArticle(topCandidate *candidate, candidates candidateList) string {
 			}

 			html, _ := s.Html()
-			fmt.Fprintf(output, "<%s>%s</%s>", tag, html, tag)
+			output.WriteString("<" + tag + ">" + html + "</" + tag + ">")
 		}
 	})

@ -148,18 +147,29 @@ func getArticle(topCandidate *candidate, candidates candidateList) string {
 }

 func removeUnlikelyCandidates(document *goquery.Document) {
+	var shouldRemove = func(str string) bool {
+		str = strings.ToLower(str)
+		if strings.Contains(str, "popupbody") || strings.Contains(str, "-ad") || strings.Contains(str, "g-plus") {
+			return true
+		} else if unlikelyCandidatesRegexp.MatchString(str) && !okMaybeItsACandidateRegexp.MatchString(str) {
+			return true
+		}
+		return false
+	}
+
 	document.Find("*").Each(func(i int, s *goquery.Selection) {
 		if s.Length() == 0 || s.Get(0).Data == "html" || s.Get(0).Data == "body" {
 			return
 		}
-		class, _ := s.Attr("class")
-		id, _ := s.Attr("id")
-		str := strings.ToLower(class + id)

-		if blacklistCandidatesRegexp.MatchString(str) {
-			removeNodes(s)
-		} else if unlikelyCandidatesRegexp.MatchString(str) && !okMaybeItsACandidateRegexp.MatchString(str) {
-			removeNodes(s)
+		if class, ok := s.Attr("class"); ok {
+			if shouldRemove(class) {
+				s.Remove()
+			}
+		} else if id, ok := s.Attr("id"); ok {
+			if shouldRemove(id) {
+				s.Remove()
+			}
 		}
 	})
 }
@ -223,7 +233,7 @@ func getCandidates(document *goquery.Document) candidateList {
 		contentScore += float32(strings.Count(text, ",") + 1)

 		// For every 100 characters in this paragraph, add another point. Up to 3 points.
-		contentScore += float32(math.Min(float64(int(len(text)/100.0)), 3))
+		contentScore += float32(min(len(text)/100.0, 3))

 		candidates[parentNode].score += contentScore
 		if grandParentNode != nil {
@ -262,13 +272,14 @@ func scoreNode(s *goquery.Selection) *candidate {
 // Get the density of links as a percentage of the content
 // This is the amount of text that is inside a link divided by the total text in the node.
 func getLinkDensity(s *goquery.Selection) float32 {
-	linkLength := len(s.Find("a").Text())
 	textLength := len(s.Text())

 	if textLength == 0 {
 		return 0
 	}

+	linkLength := len(s.Find("a").Text())
+
 	return float32(linkLength) / float32(textLength)
 }

@ -276,28 +287,21 @@ func getLinkDensity(s *goquery.Selection) float32 {
 // element looks good or bad.
 func getClassWeight(s *goquery.Selection) float32 {
 	weight := 0
-	class, _ := s.Attr("class")
-	id, _ := s.Attr("id")

-	class = strings.ToLower(class)
-	id = strings.ToLower(id)
-
-	if class != "" {
+	if class, ok := s.Attr("class"); ok {
+		class = strings.ToLower(class)
 		if negativeRegexp.MatchString(class) {
 			weight -= 25
-		}
-
-		if positiveRegexp.MatchString(class) {
+		} else if positiveRegexp.MatchString(class) {
 			weight += 25
 		}
 	}

-	if id != "" {
+	if id, ok := s.Attr("id"); ok {
+		id = strings.ToLower(id)
 		if negativeRegexp.MatchString(id) {
 			weight -= 25
-		}
-
-		if positiveRegexp.MatchString(id) {
+		} else if positiveRegexp.MatchString(id) {
 			weight += 25
 		}
 	}
@ -315,11 +319,6 @@ func transformMisusedDivsIntoParagraphs(document *goquery.Document) {
 	})
 }

-func removeNodes(s *goquery.Selection) {
-	s.Each(func(i int, s *goquery.Selection) {
-		parent := s.Parent()
-		if parent.Length() > 0 {
-			parent.Get(0).RemoveChild(s.Get(0))
-		}
-	})
+func containsSentence(content string) bool {
+	return strings.HasSuffix(content, ".") || strings.Contains(content, ". ")
 }
--- a/internal/reader/readability/readability_test.go
+++ b/internal/reader/readability/readability_test.go
@ -4,6 +4,8 @@
 package readability // import "miniflux.app/v2/internal/reader/readability"

 import (
+	"bytes"
+	"os"
 	"strings"
 	"testing"
 )
@ -100,3 +102,83 @@ func TestWithoutBaseURL(t *testing.T) {
 		t.Errorf(`Unexpected base URL, got %q instead of ""`, baseURL)
 	}
 }
+
+func TestRemoveStyleScript(t *testing.T) {
+	html := `
+		<html>
+			<head>
+				<title>Test</title>
+				    <script src="tololo.js"></script>
+			</head>
+			<body>
+				<script src="tololo.js"></script>
+				<style>
+			  		h1 {color:red;}
+			  		p {color:blue;}
+				</style>
+				<article>Some content</article>
+			</body>
+		</html>`
+	want := `<div><div><article>Somecontent</article></div></div>`
+
+	_, content, err := ExtractContent(strings.NewReader(html))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	content = strings.ReplaceAll(content, "\n", "")
+	content = strings.ReplaceAll(content, " ", "")
+	content = strings.ReplaceAll(content, "\t", "")
+
+	if content != want {
+		t.Errorf(`Invalid content, got %s instead of %s`, content, want)
+	}
+}
+
+func TestRemoveBlacklist(t *testing.T) {
+	html := `
+		<html>
+			<head>
+				<title>Test</title>
+			</head>
+			<body>
+				<article class="super-ad">Some content</article>
+				<article class="g-plus-crap">Some other thing</article>
+				<article class="stuff popupbody">And more</article>
+				<article class="legit">Valid!</article>
+			</body>
+		</html>`
+	want := `<div><div><articleclass="legit">Valid!</article></div></div>`
+
+	_, content, err := ExtractContent(strings.NewReader(html))
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	content = strings.ReplaceAll(content, "\n", "")
+	content = strings.ReplaceAll(content, " ", "")
+	content = strings.ReplaceAll(content, "\t", "")
+
+	if content != want {
+		t.Errorf(`Invalid content, got %s instead of %s`, content, want)
+	}
+}
+
+func BenchmarkExtractContent(b *testing.B) {
+	var testCases = map[string][]byte{
+		"miniflux_github.html":    {},
+		"miniflux_wikipedia.html": {},
+	}
+	for filename := range testCases {
+		data, err := os.ReadFile("testdata/" + filename)
+		if err != nil {
+			b.Fatalf(`Unable to read file %q: %v`, filename, err)
+		}
+		testCases[filename] = data
+	}
+	for range b.N {
+		for _, v := range testCases {
+			ExtractContent(bytes.NewReader(v))
+		}
+	}
+}
--- a/internal/reader/readability/testdata
+++ b/internal/reader/readability/testdata
@ -0,0 +1 @@
+../../reader/sanitizer/testdata/
--- a/internal/reader/readingtime/readingtime.go
+++ b/internal/reader/readingtime/readingtime.go
@ -19,7 +19,7 @@ func EstimateReadingTime(content string, defaultReadingSpeed, cjkReadingSpeed in
 	sanitizedContent := sanitizer.StripTags(content)

 	// Litterature on language detection says that around 100 signes is enough, we're safe here.
-	truncationPoint := int(math.Min(float64(len(sanitizedContent)), 250))
+	truncationPoint := min(len(sanitizedContent), 250)

 	// We're only interested in identifying Japanse/Chinese/Korean
 	options := whatlanggo.Options{
--- a/internal/reader/rewrite/referer_override_test.go
+++ b/internal/reader/rewrite/referer_override_test.go
@ -0,0 +1,67 @@
+// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+package rewrite // import "miniflux.app/v2/internal/reader/rewrite"
+
+import (
+	"testing"
+)
+
+func TestGetRefererForURL(t *testing.T) {
+	testCases := []struct {
+		name     string
+		url      string
+		expected string
+	}{
+		{
+			name:     "Weibo Image URL",
+			url:      "https://wx1.sinaimg.cn/large/example.jpg",
+			expected: "https://weibo.com",
+		},
+		{
+			name:     "Pixiv Image URL",
+			url:      "https://i.pximg.net/img-master/example.jpg",
+			expected: "https://www.pixiv.net",
+		},
+		{
+			name:     "SSPai CDN URL",
+			url:      "https://cdnfile.sspai.com/example.png",
+			expected: "https://sspai.com",
+		},
+		{
+			name:     "Instagram CDN URL",
+			url:      "https://scontent-sjc3-1.cdninstagram.com/example.jpg",
+			expected: "https://www.instagram.com",
+		},
+		{
+			name:     "Piokok URL",
+			url:      "https://sp1.piokok.com/example.jpg",
+			expected: "https://sp1.piokok.com",
+		},
+		{
+			name:     "Weibo Video URL",
+			url:      "https://f.video.weibocdn.com/example.mp4",
+			expected: "https://weibo.com",
+		},
+		{
+			name:     "HelloGithub Image URL",
+			url:      "https://img.hellogithub.com/example.png",
+			expected: "https://hellogithub.com",
+		},
+		{
+			name:     "Non-matching URL",
+			url:      "https://example.com/image.jpg",
+			expected: "",
+		},
+	}
+
+	for _, tc := range testCases {
+		t.Run(tc.name, func(t *testing.T) {
+			result := GetRefererForURL(tc.url)
+			if result != tc.expected {
+				t.Errorf("GetRefererForURL(%s): expected %s, got %s",
+					tc.url, tc.expected, result)
+			}
+		})
+	}
+}
--- a/internal/reader/rewrite/rewrite_functions.go
+++ b/internal/reader/rewrite/rewrite_functions.go
@ -11,6 +11,7 @@ import (
 	"net/url"
 	"regexp"
 	"strings"
+	"unicode"

 	"miniflux.app/v2/internal/config"

@ -23,11 +24,28 @@ var (
 	youtubeRegex   = regexp.MustCompile(`youtube\.com/watch\?v=(.*)$`)
 	youtubeIdRegex = regexp.MustCompile(`youtube_id"?\s*[:=]\s*"([a-zA-Z0-9_-]{11})"`)
 	invidioRegex   = regexp.MustCompile(`https?://(.*)/watch\?v=(.*)`)
-	imgRegex       = regexp.MustCompile(`<img [^>]+>`)
 	textLinkRegex  = regexp.MustCompile(`(?mi)(\bhttps?:\/\/[-A-Z0-9+&@#\/%?=~_|!:,.;]*[-A-Z0-9+&@#\/%=~_|])`)
 )

-func addImageTitle(entryURL, entryContent string) string {
+// titlelize returns a copy of the string s with all Unicode letters that begin words
+// mapped to their Unicode title case.
+func titlelize(s string) string {
+	// A closure is used here to remember the previous character
+	// so that we can check if there is a space preceding the current
+	// character.
+	previous := ' '
+	return strings.Map(
+		func(current rune) rune {
+			if unicode.IsSpace(previous) {
+				previous = current
+				return unicode.ToTitle(current)
+			}
+			previous = current
+			return current
+		}, strings.ToLower(s))
+}
+
+func addImageTitle(entryContent string) string {
 	doc, err := goquery.NewDocumentFromReader(strings.NewReader(entryContent))
 	if err != nil {
 		return entryContent
@ -44,14 +62,14 @@ func addImageTitle(entryURL, entryContent string) string {
 			img.ReplaceWithHtml(`<figure><img src="` + srcAttr + `" alt="` + altAttr + `"/><figcaption><p>` + html.EscapeString(titleAttr) + `</p></figcaption></figure>`)
 		})

-		output, _ := doc.Find("body").First().Html()
+		output, _ := doc.FindMatcher(goquery.Single("body")).Html()
 		return output
 	}

 	return entryContent
 }

-func addMailtoSubject(entryURL, entryContent string) string {
+func addMailtoSubject(entryContent string) string {
 	doc, err := goquery.NewDocumentFromReader(strings.NewReader(entryContent))
 	if err != nil {
 		return entryContent
@ -76,18 +94,19 @@ func addMailtoSubject(entryURL, entryContent string) string {
 			a.AppendHtml(" [" + html.EscapeString(subject) + "]")
 		})

-		output, _ := doc.Find("body").First().Html()
+		output, _ := doc.FindMatcher(goquery.Single("body")).Html()
 		return output
 	}

 	return entryContent
 }

-func addDynamicImage(entryURL, entryContent string) string {
-	doc, err := goquery.NewDocumentFromReader(strings.NewReader(entryContent))
+func addDynamicImage(entryContent string) string {
+	parserHtml, err := nethtml.ParseWithOptions(strings.NewReader(entryContent), nethtml.ParseOptionEnableScripting(false))
 	if err != nil {
 		return entryContent
 	}
+	doc := goquery.NewDocumentFromNode(parserHtml)

 	// Ordered most preferred to least preferred.
 	candidateAttrs := []string{
@ -149,25 +168,22 @@ func addDynamicImage(entryURL, entryContent string) string {

 	if !changed {
 		doc.Find("noscript").Each(func(i int, noscript *goquery.Selection) {
-			matches := imgRegex.FindAllString(noscript.Text(), 2)
-
-			if len(matches) == 1 {
+			if img := noscript.Find("img"); img.Length() == 1 {
+				img.Unwrap()
 				changed = true
-
-				noscript.ReplaceWithHtml(matches[0])
 			}
 		})
 	}

 	if changed {
-		output, _ := doc.Find("body").First().Html()
+		output, _ := doc.FindMatcher(goquery.Single("body")).Html()
 		return output
 	}

 	return entryContent
 }

-func addDynamicIframe(entryURL, entryContent string) string {
+func addDynamicIframe(entryContent string) string {
 	doc, err := goquery.NewDocumentFromReader(strings.NewReader(entryContent))
 	if err != nil {
 		return entryContent
@ -197,14 +213,14 @@ func addDynamicIframe(entryURL, entryContent string) string {
 	})

 	if changed {
-		output, _ := doc.Find("body").First().Html()
+		output, _ := doc.FindMatcher(goquery.Single("body")).Html()
 		return output
 	}

 	return entryContent
 }

-func fixMediumImages(entryURL, entryContent string) string {
+func fixMediumImages(entryContent string) string {
 	doc, err := goquery.NewDocumentFromReader(strings.NewReader(entryContent))
 	if err != nil {
 		return entryContent
@ -217,11 +233,11 @@ func fixMediumImages(entryURL, entryContent string) string {
 		}
 	})

-	output, _ := doc.Find("body").First().Html()
+	output, _ := doc.FindMatcher(goquery.Single("body")).Html()
 	return output
 }

-func useNoScriptImages(entryURL, entryContent string) string {
+func useNoScriptImages(entryContent string) string {
 	doc, err := goquery.NewDocumentFromReader(strings.NewReader(entryContent))
 	if err != nil {
 		return entryContent
@ -239,7 +255,7 @@ func useNoScriptImages(entryURL, entryContent string) string {
 		}
 	})

-	output, _ := doc.Find("body").First().Html()
+	output, _ := doc.FindMatcher(goquery.Single("body")).Html()
 	return output
 }

@ -317,7 +333,7 @@ func removeCustom(entryContent string, selector string) string {

 	doc.Find(selector).Remove()

-	output, _ := doc.Find("body").First().Html()
+	output, _ := doc.FindMatcher(goquery.Single("body")).Html()
 	return output
 }

@ -344,7 +360,7 @@ func applyFuncOnTextContent(entryContent string, selector string, repl func(stri

 	doc.Find(selector).Each(treatChildren)

-	output, _ := doc.Find("body").First().Html()
+	output, _ := doc.FindMatcher(goquery.Single("body")).Html()
 	return output
 }

@ -401,7 +417,7 @@ func addHackerNewsLinksUsing(entryContent, app string) string {
 			}
 		})

-		output, _ := doc.Find("body").First().Html()
+		output, _ := doc.FindMatcher(goquery.Single("body")).Html()
 		return output
 	}

@ -420,7 +436,7 @@ func removeTables(entryContent string) string {

 	for _, selector := range selectors {
 		for {
-			loopElement = doc.Find(selector).First()
+			loopElement = doc.FindMatcher(goquery.Single(selector))

 			if loopElement.Length() == 0 {
 				break
@ -436,6 +452,6 @@ func removeTables(entryContent string) string {
 		}
 	}

-	output, _ := doc.Find("body").First().Html()
+	output, _ := doc.FindMatcher(goquery.Single("body")).Html()
 	return output
 }
--- a/internal/reader/rewrite/rewriter.go
+++ b/internal/reader/rewrite/rewriter.go
@ -11,9 +11,6 @@ import (

 	"miniflux.app/v2/internal/model"
 	"miniflux.app/v2/internal/urllib"
-
-	"golang.org/x/text/cases"
-	"golang.org/x/text/language"
 )

 type rule struct {
@ -24,13 +21,13 @@ type rule struct {
 func (rule rule) applyRule(entryURL string, entry *model.Entry) {
 	switch rule.name {
 	case "add_image_title":
-		entry.Content = addImageTitle(entryURL, entry.Content)
+		entry.Content = addImageTitle(entry.Content)
 	case "add_mailto_subject":
-		entry.Content = addMailtoSubject(entryURL, entry.Content)
+		entry.Content = addMailtoSubject(entry.Content)
 	case "add_dynamic_image":
-		entry.Content = addDynamicImage(entryURL, entry.Content)
+		entry.Content = addDynamicImage(entry.Content)
 	case "add_dynamic_iframe":
-		entry.Content = addDynamicIframe(entryURL, entry.Content)
+		entry.Content = addDynamicIframe(entry.Content)
 	case "add_youtube_video":
 		entry.Content = addYoutubeVideo(entryURL, entry.Content)
 	case "add_invidious_video":
@ -46,9 +43,9 @@ func (rule rule) applyRule(entryURL string, entry *model.Entry) {
 	case "convert_text_link", "convert_text_links":
 		entry.Content = replaceTextLinks(entry.Content)
 	case "fix_medium_images":
-		entry.Content = fixMediumImages(entryURL, entry.Content)
+		entry.Content = fixMediumImages(entry.Content)
 	case "use_noscript_figure_images":
-		entry.Content = useNoScriptImages(entryURL, entry.Content)
+		entry.Content = useNoScriptImages(entry.Content)
 	case "replace":
 		// Format: replace("search-term"|"replace-term")
 		if len(rule.args) >= 2 {
@ -94,7 +91,7 @@ func (rule rule) applyRule(entryURL string, entry *model.Entry) {
 	case "remove_tables":
 		entry.Content = removeTables(entry.Content)
 	case "remove_clickbait":
-		entry.Title = cases.Title(language.English).String(strings.ToLower(entry.Title))
+		entry.Title = titlelize(entry.Title)
 	}
 }

--- a/internal/reader/rewrite/rewriter_test.go
+++ b/internal/reader/rewrite/rewriter_test.go
@ -256,7 +256,7 @@ func TestRewriteWithNoLazyImage(t *testing.T) {
 func TestRewriteWithLazyImage(t *testing.T) {
 	controlEntry := &model.Entry{
 		Title:   `A title`,
-		Content: `<img src="https://example.org/image.jpg" data-url="https://example.org/image.jpg" alt="Image"/><noscript><img src="https://example.org/fallback.jpg" alt="Fallback"></noscript>`,
+		Content: `<img src="https://example.org/image.jpg" data-url="https://example.org/image.jpg" alt="Image"/><noscript><img src="https://example.org/fallback.jpg" alt="Fallback"/></noscript>`,
 	}
 	testEntry := &model.Entry{
 		Title:   `A title`,
@ -272,7 +272,7 @@ func TestRewriteWithLazyImage(t *testing.T) {
 func TestRewriteWithLazyDivImage(t *testing.T) {
 	controlEntry := &model.Entry{
 		Title:   `A title`,
-		Content: `<img src="https://example.org/image.jpg" alt="Image"/><noscript><img src="https://example.org/fallback.jpg" alt="Fallback"></noscript>`,
+		Content: `<img src="https://example.org/image.jpg" alt="Image"/><noscript><img src="https://example.org/fallback.jpg" alt="Fallback"/></noscript>`,
 	}
 	testEntry := &model.Entry{
 		Title:   `A title`,
--- a/internal/reader/rewrite/rules.go
+++ b/internal/reader/rewrite/rules.go
@ -3,7 +3,10 @@

 package rewrite // import "miniflux.app/v2/internal/reader/rewrite"

-import "regexp"
+import (
+	"net/url"
+	"strings"
+)

 // List of predefined rewrite rules (alphabetically sorted)
 // Available rules: "add_image_title", "add_youtube_video"
@ -39,49 +42,40 @@ var predefinedRules = map[string]string{
 	"youtube.com":            "add_youtube_video",
 }

-type RefererRule struct {
-	URLPattern *regexp.Regexp
-	Referer    string
-}
-
-// List of predefined referer rules
-var PredefinedRefererRules = []RefererRule{
-	{
-		URLPattern: regexp.MustCompile(`^https://\w+\.sinaimg\.cn`),
-		Referer:    "https://weibo.com",
-	},
-	{
-		URLPattern: regexp.MustCompile(`^https://i\.pximg\.net`),
-		Referer:    "https://www.pixiv.net",
-	},
-	{
-		URLPattern: regexp.MustCompile(`^https://cdnfile\.sspai\.com`),
-		Referer:    "https://sspai.com",
-	},
-	{
-		URLPattern: regexp.MustCompile(`^https://(?:\w|-)+\.cdninstagram\.com`),
-		Referer:    "https://www.instagram.com",
-	},
-	{
-		URLPattern: regexp.MustCompile(`^https://sp1\.piokok\.com`),
-		Referer:    "https://sp1.piokok.com",
-	},
-	{
-		URLPattern: regexp.MustCompile(`^https://f\.video\.weibocdn\.com`),
-		Referer:    "https://weibo.com",
-	},
-	{
-		URLPattern: regexp.MustCompile(`^https://img\.hellogithub\.com`),
-		Referer:    "https://hellogithub.com",
-	},
-}
-
 // GetRefererForURL returns the referer for the given URL if it exists, otherwise an empty string.
-func GetRefererForURL(url string) string {
-	for _, rule := range PredefinedRefererRules {
-		if rule.URLPattern.MatchString(url) {
-			return rule.Referer
-		}
+func GetRefererForURL(u string) string {
+	parsedUrl, err := url.Parse(u)
+	if err != nil {
+		return ""
 	}
+
+	switch parsedUrl.Hostname() {
+	case "moyu.im":
+		return "https://i.jandan.net"
+	case "i.pximg.net":
+		return "https://www.pixiv.net"
+	case "sp1.piokok.com":
+		return "https://sp1.piokok.com"
+	case "cdnfile.sspai.com":
+		return "https://sspai.com"
+	case "f.video.weibocdn.com":
+		return "https://weibo.com"
+	case "img.hellogithub.com":
+		return "https://hellogithub.com"
+	case "bjp.org.cn":
+		return "https://bjp.org.cn"
+	case "appinn.com":
+		return "https://appinn.com"
+	}
+
+	switch {
+	case strings.HasSuffix(parsedUrl.Hostname(), ".sinaimg.cn"):
+		return "https://weibo.com"
+	case strings.HasSuffix(parsedUrl.Hostname(), ".cdninstagram.com"):
+		return "https://www.instagram.com"
+	case strings.HasSuffix(parsedUrl.Hostname(), ".moyu.im"):
+		return "https://i.jandan.net"
+	}
+
 	return ""
 }
--- a/internal/reader/sanitizer/sanitizer.go
+++ b/internal/reader/sanitizer/sanitizer.go
@ -5,7 +5,7 @@ package sanitizer // import "miniflux.app/v2/internal/reader/sanitizer"

 import (
 	"io"
-	"regexp"
+	"net/url"
 	"slices"
 	"strconv"
 	"strings"
@ -18,8 +18,7 @@ import (
 )

 var (
-	youtubeEmbedRegex = regexp.MustCompile(`^(?:https?:)?//(?:www\.)?youtube\.com/embed/(.+)$`)
-	tagAllowList      = map[string][]string{
+	tagAllowList = map[string][]string{
 		"a":          {"href", "title", "id"},
 		"abbr":       {"title"},
 		"acronym":    {"title"},
@ -397,9 +396,27 @@ func isValidIframeSource(baseURL, src string) bool {
 }

 func rewriteIframeURL(link string) string {
-	matches := youtubeEmbedRegex.FindStringSubmatch(link)
-	if len(matches) == 2 {
-		return config.Opts.YouTubeEmbedUrlOverride() + matches[1]
+	u, err := url.Parse(link)
+	if err != nil {
+		return link
+	}
+
+	switch strings.TrimPrefix(u.Hostname(), "www.") {
+	case "youtube.com":
+		if strings.HasPrefix(u.Path, "/embed/") {
+			if len(u.RawQuery) > 0 {
+				return config.Opts.YouTubeEmbedUrlOverride() + strings.TrimPrefix(u.Path, "/embed/") + "?" + u.RawQuery
+			}
+			return config.Opts.YouTubeEmbedUrlOverride() + strings.TrimPrefix(u.Path, "/embed/")
+		}
+	case "player.vimeo.com":
+		// See https://help.vimeo.com/hc/en-us/articles/12426260232977-About-Player-parameters
+		if strings.HasPrefix(u.Path, "/video/") {
+			if len(u.RawQuery) > 0 {
+				return link + "&dnt=1"
+			}
+			return link + "?dnt=1"
+		}
 	}

 	return link
--- a/internal/reader/sanitizer/sanitizer_test.go
+++ b/internal/reader/sanitizer/sanitizer_test.go
@ -611,9 +611,9 @@ func TestReplaceYoutubeURLWithCustomURL(t *testing.T) {
 	}
 }

-func TestReplaceIframeURL(t *testing.T) {
+func TestReplaceIframeVimedoDNTURL(t *testing.T) {
 	input := `<iframe src="https://player.vimeo.com/video/123456?title=0&amp;byline=0"></iframe>`
-	expected := `<iframe src="https://player.vimeo.com/video/123456?title=0&amp;byline=0" sandbox="allow-scripts allow-same-origin allow-popups allow-popups-to-escape-sandbox" loading="lazy"></iframe>`
+	expected := `<iframe src="https://player.vimeo.com/video/123456?title=0&amp;byline=0&amp;dnt=1" sandbox="allow-scripts allow-same-origin allow-popups allow-popups-to-escape-sandbox" loading="lazy"></iframe>`
 	output := Sanitize("http://example.org/", input)

 	if expected != output {
--- a/internal/reader/scraper/scraper.go
+++ b/internal/reader/scraper/scraper.go
@ -75,7 +75,7 @@ func findContentUsingCustomRules(page io.Reader, rules string) (baseURL string,
 		return "", "", err
 	}

-	if hrefValue, exists := document.Find("head base").First().Attr("href"); exists {
+	if hrefValue, exists := document.FindMatcher(goquery.Single("head base")).Attr("href"); exists {
 		hrefValue = strings.TrimSpace(hrefValue)
 		if urllib.IsAbsoluteURL(hrefValue) {
 			baseURL = hrefValue
--- a/internal/reader/subscription/finder.go
+++ b/internal/reader/subscription/finder.go
@ -26,7 +26,6 @@ import (
 )

 var (
-	youtubeHostRegex    = regexp.MustCompile(`youtube\.com$`)
 	youtubeChannelRegex = regexp.MustCompile(`channel/(.*)$`)
 )

@ -156,7 +155,7 @@ func (f *SubscriptionFinder) FindSubscriptionsFromWebPage(websiteURL, contentTyp
 		return nil, locale.NewLocalizedErrorWrapper(err, "error.unable_to_parse_html_document", err)
 	}

-	if hrefValue, exists := doc.Find("head base").First().Attr("href"); exists {
+	if hrefValue, exists := doc.FindMatcher(goquery.Single("head base")).Attr("href"); exists {
 		hrefValue = strings.TrimSpace(hrefValue)
 		if urllib.IsAbsoluteURL(hrefValue) {
 			websiteURL = hrefValue
@ -295,7 +294,7 @@ func (f *SubscriptionFinder) FindSubscriptionsFromYouTubeChannelPage(websiteURL
 		return nil, locale.NewLocalizedErrorWrapper(err, "error.invalid_site_url", err)
 	}

-	if !youtubeHostRegex.MatchString(decodedUrl.Host) {
+	if !strings.HasSuffix(decodedUrl.Host, "youtube.com") {
 		slog.Debug("This website is not a YouTube page, the regex doesn't match", slog.String("website_url", websiteURL))
 		return nil, nil
 	}
@ -314,7 +313,7 @@ func (f *SubscriptionFinder) FindSubscriptionsFromYouTubePlaylistPage(websiteURL
 		return nil, locale.NewLocalizedErrorWrapper(err, "error.invalid_site_url", err)
 	}

-	if !youtubeHostRegex.MatchString(decodedUrl.Host) {
+	if !strings.HasSuffix(decodedUrl.Host, "youtube.com") {
 		slog.Debug("This website is not a YouTube page, the regex doesn't match", slog.String("website_url", websiteURL))
 		return nil, nil
 	}