1
0
Fork 0
mirror of https://github.com/miniflux/v2.git synced 2025-10-05 19:31:01 +00:00

feat(rewrite): add remove_img_blur_params rule

Adds a new content rewrite rule to strip image URL query parameters from blurred images.

This addresses issues with sites like Belgian national news that use blurry placeholder images which get replaced with high-quality versions, allowing Miniflux to fetch the original images instead of the placeholders.
This commit is contained in:
Axel Verhaeghe 2025-10-02 05:41:08 +02:00 committed by GitHub
parent 04a360a536
commit b8bc367a00
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 200 additions and 3 deletions

View file

@ -94,6 +94,8 @@ func (rule rule) applyRule(entryURL string, entry *model.Entry) {
entry.Title = titlelize(entry.Title)
case "fix_ghost_cards":
entry.Content = fixGhostCards(entry.Content)
case "remove_img_blur_params":
entry.Content = removeImgBlurParams(entry.Content)
}
}
@ -130,7 +132,7 @@ func parseRules(rulesText string) (rules []rule) {
rules[l].args = append(rules[l].args, text)
}
case scanner.EOF:
return
return rules
}
}
}

View file

@ -10,6 +10,7 @@ import (
"log/slog"
"net/url"
"regexp"
"strconv"
"strings"
"unicode"
@ -547,3 +548,43 @@ func fixGhostCards(entryContent string) string {
output, _ := doc.FindMatcher(goquery.Single("body")).Html()
return strings.TrimSpace(output)
}
func removeImgBlurParams(entryContent string) string {
doc, err := goquery.NewDocumentFromReader(strings.NewReader(entryContent))
if err != nil {
return entryContent
}
changed := false
doc.Find("img[src]").Each(func(i int, img *goquery.Selection) {
srcAttr, exists := img.Attr("src")
if !exists {
return
}
parsedURL, err := url.Parse(srcAttr)
if err != nil {
return
}
// Only strip query parameters if this is a blurry placeholder image
if parsedURL.RawQuery != "" {
// Check if there's a blur parameter with a non-zero value
if blurValue := parsedURL.Query().Get("blur"); blurValue != "" {
if blurInt, err := strconv.Atoi(blurValue); err == nil && blurInt > 0 {
parsedURL.RawQuery = ""
img.SetAttr("src", parsedURL.String())
changed = true
}
}
}
})
if changed {
output, _ := doc.FindMatcher(goquery.Single("body")).Html()
return output
}
return entryContent
}

View file

@ -133,7 +133,6 @@ func TestRewriteYoutubeLinkAndCustomEmbedURL(t *testing.T) {
var err error
parser := config.NewConfigParser()
config.Opts, err = parser.ParseEnvironmentVariables()
if err != nil {
t.Fatalf(`Parsing failure: %v`, err)
}
@ -241,7 +240,6 @@ func TestAddYoutubeVideoFromIdWithCustomEmbedURL(t *testing.T) {
var err error
parser := config.NewConfigParser()
config.Opts, err = parser.ParseEnvironmentVariables()
if err != nil {
t.Fatalf(`Parsing failure: %v`, err)
}
@ -797,6 +795,7 @@ func TestRewriteRemoveCustom(t *testing.T) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
}
}
func TestRewriteRemoveQuotedSelector(t *testing.T) {
controlEntry := &model.Entry{
URL: "https://example.org/article",
@ -1248,3 +1247,158 @@ func TestFixGhostCardMultipleSplit(t *testing.T) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
}
}
func TestStripImageQueryParams(t *testing.T) {
testEntry := &model.Entry{
URL: "https://example.org/article",
Title: `News Article Title`,
Content: `
<article>
<p>Article content with images having query parameters:</p>
<img src="https://example.org/images/image1.jpg?width=200&height=113&q=80&blur=90" alt="Image with params">
<img src="https://example.org/images/image2.jpg?width=800&height=600&q=85" alt="Another image with params">
<p>More images with various query parameters:</p>
<img src="https://example.org/image123.jpg?blur=50&size=small&format=webp" alt="Complex query params">
<img src="https://example.org/image123.jpg?size=large&quality=95&cache=123" alt="Different params">
<p>Image without query parameters:</p>
<img src="https://example.org/single-image.jpg" alt="Clean image">
<p>Images with various other params:</p>
<img src="https://example.org/normal1.jpg?width=300&format=jpg" alt="Normal 1">
<img src="https://example.org/normal1.jpg?width=600&quality=high" alt="Normal 2">
</article>`,
}
controlEntry := &model.Entry{
URL: "https://example.org/article",
Title: `News Article Title`,
Content: `<article>
<p>Article content with images having query parameters:</p>
<img src="https://example.org/images/image1.jpg" alt="Image with params"/>
<img src="https://example.org/images/image2.jpg?width=800&amp;height=600&amp;q=85" alt="Another image with params"/>
<p>More images with various query parameters:</p>
<img src="https://example.org/image123.jpg" alt="Complex query params"/>
<img src="https://example.org/image123.jpg?size=large&amp;quality=95&amp;cache=123" alt="Different params"/>
<p>Image without query parameters:</p>
<img src="https://example.org/single-image.jpg" alt="Clean image"/>
<p>Images with various other params:</p>
<img src="https://example.org/normal1.jpg?width=300&amp;format=jpg" alt="Normal 1"/>
<img src="https://example.org/normal1.jpg?width=600&amp;quality=high" alt="Normal 2"/>
</article>`,
}
ApplyContentRewriteRules(testEntry, `remove_img_blur_params`)
if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
}
}
func TestStripImageQueryParamsNoChanges(t *testing.T) {
testEntry := &model.Entry{
URL: "https://example.org/article",
Title: `Article Without Images`,
Content: `<p>No images here:</p>
<div>Just some text content</div>
<a href="https://example.org">A link</a>`,
}
controlEntry := &model.Entry{
URL: "https://example.org/article",
Title: `Article Without Images`,
Content: `<p>No images here:</p>
<div>Just some text content</div>
<a href="https://example.org">A link</a>`,
}
ApplyContentRewriteRules(testEntry, `remove_img_blur_params`)
if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
}
}
func TestStripImageQueryParamsEdgeCases(t *testing.T) {
testEntry := &model.Entry{
URL: "https://example.org/article",
Title: `Edge Cases`,
Content: `
<p>Edge cases for image query parameter stripping:</p>
<!-- Various query parameters -->
<img src="https://example.org/image1.jpg?blur=80&width=300" alt="Multiple params">
<!-- Complex query parameters -->
<img src="https://example.org/image2.jpg?BLUR=60&format=webp&cache=123" alt="Complex params">
<img src="https://example.org/image3.jpg?quality=high&version=2" alt="Other params">
<!-- Query params in middle of string -->
<img src="https://example.org/image4.jpg?size=large&blur=30&format=webp&quality=90" alt="Middle params">
<!-- Image without query params -->
<img src="https://example.org/clean.jpg" alt="Clean image">
`,
}
controlEntry := &model.Entry{
URL: "https://example.org/article",
Title: `Edge Cases`,
Content: `<p>Edge cases for image query parameter stripping:</p>
<!-- Various query parameters -->
<img src="https://example.org/image1.jpg" alt="Multiple params"/>
<!-- Complex query parameters -->
<img src="https://example.org/image2.jpg?BLUR=60&amp;format=webp&amp;cache=123" alt="Complex params"/>
<img src="https://example.org/image3.jpg?quality=high&amp;version=2" alt="Other params"/>
<!-- Query params in middle of string -->
<img src="https://example.org/image4.jpg" alt="Middle params"/>
<!-- Image without query params -->
<img src="https://example.org/clean.jpg" alt="Clean image"/>
`,
}
ApplyContentRewriteRules(testEntry, `remove_img_blur_params`)
if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
}
}
func TestStripImageQueryParamsSimple(t *testing.T) {
testEntry := &model.Entry{
URL: "https://example.org/article",
Title: `Simple Test`,
Content: `
<p>Testing query parameter stripping:</p>
<!-- Images with various query parameters -->
<img src="https://example.org/test1.jpg?blur=0&width=300" alt="With blur zero">
<img src="https://example.org/test2.jpg?blur=50&width=300&format=webp" alt="With blur fifty">
<img src="https://example.org/test3.jpg?width=800&quality=high" alt="No blur param">
<img src="https://example.org/test4.jpg" alt="No params at all">
`,
}
controlEntry := &model.Entry{
URL: "https://example.org/article",
Title: `Simple Test`,
Content: `<p>Testing query parameter stripping:</p>
<!-- Images with various query parameters -->
<img src="https://example.org/test1.jpg?blur=0&amp;width=300" alt="With blur zero"/>
<img src="https://example.org/test2.jpg" alt="With blur fifty"/>
<img src="https://example.org/test3.jpg?width=800&amp;quality=high" alt="No blur param"/>
<img src="https://example.org/test4.jpg" alt="No params at all"/>
`,
}
ApplyContentRewriteRules(testEntry, `remove_img_blur_params`)
if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
}
}