From b8bc367a00441072527e0a8c109cededf627b490 Mon Sep 17 00:00:00 2001 From: Axel Verhaeghe <36919424+AxelVerhaeghe@users.noreply.github.com> Date: Thu, 2 Oct 2025 05:41:08 +0200 Subject: [PATCH] feat(rewrite): add `remove_img_blur_params` rule Adds a new content rewrite rule to strip image URL query parameters from blurred images. This addresses issues with sites like Belgian national news that use blurry placeholder images which get replaced with high-quality versions, allowing Miniflux to fetch the original images instead of the placeholders. --- internal/reader/rewrite/content_rewrite.go | 4 +- .../rewrite/content_rewrite_functions.go | 41 +++++ .../reader/rewrite/content_rewrite_test.go | 158 +++++++++++++++++- 3 files changed, 200 insertions(+), 3 deletions(-) diff --git a/internal/reader/rewrite/content_rewrite.go b/internal/reader/rewrite/content_rewrite.go index 4d806dad..db6db6e2 100644 --- a/internal/reader/rewrite/content_rewrite.go +++ b/internal/reader/rewrite/content_rewrite.go @@ -94,6 +94,8 @@ func (rule rule) applyRule(entryURL string, entry *model.Entry) { entry.Title = titlelize(entry.Title) case "fix_ghost_cards": entry.Content = fixGhostCards(entry.Content) + case "remove_img_blur_params": + entry.Content = removeImgBlurParams(entry.Content) } } @@ -130,7 +132,7 @@ func parseRules(rulesText string) (rules []rule) { rules[l].args = append(rules[l].args, text) } case scanner.EOF: - return + return rules } } } diff --git a/internal/reader/rewrite/content_rewrite_functions.go b/internal/reader/rewrite/content_rewrite_functions.go index a3858270..c2551fe7 100644 --- a/internal/reader/rewrite/content_rewrite_functions.go +++ b/internal/reader/rewrite/content_rewrite_functions.go @@ -10,6 +10,7 @@ import ( "log/slog" "net/url" "regexp" + "strconv" "strings" "unicode" @@ -547,3 +548,43 @@ func fixGhostCards(entryContent string) string { output, _ := doc.FindMatcher(goquery.Single("body")).Html() return strings.TrimSpace(output) } + +func removeImgBlurParams(entryContent string) string { + doc, err := goquery.NewDocumentFromReader(strings.NewReader(entryContent)) + if err != nil { + return entryContent + } + + changed := false + + doc.Find("img[src]").Each(func(i int, img *goquery.Selection) { + srcAttr, exists := img.Attr("src") + if !exists { + return + } + + parsedURL, err := url.Parse(srcAttr) + if err != nil { + return + } + + // Only strip query parameters if this is a blurry placeholder image + if parsedURL.RawQuery != "" { + // Check if there's a blur parameter with a non-zero value + if blurValue := parsedURL.Query().Get("blur"); blurValue != "" { + if blurInt, err := strconv.Atoi(blurValue); err == nil && blurInt > 0 { + parsedURL.RawQuery = "" + img.SetAttr("src", parsedURL.String()) + changed = true + } + } + } + }) + + if changed { + output, _ := doc.FindMatcher(goquery.Single("body")).Html() + return output + } + + return entryContent +} diff --git a/internal/reader/rewrite/content_rewrite_test.go b/internal/reader/rewrite/content_rewrite_test.go index 307dd747..2aa9d6b4 100644 --- a/internal/reader/rewrite/content_rewrite_test.go +++ b/internal/reader/rewrite/content_rewrite_test.go @@ -133,7 +133,6 @@ func TestRewriteYoutubeLinkAndCustomEmbedURL(t *testing.T) { var err error parser := config.NewConfigParser() config.Opts, err = parser.ParseEnvironmentVariables() - if err != nil { t.Fatalf(`Parsing failure: %v`, err) } @@ -241,7 +240,6 @@ func TestAddYoutubeVideoFromIdWithCustomEmbedURL(t *testing.T) { var err error parser := config.NewConfigParser() config.Opts, err = parser.ParseEnvironmentVariables() - if err != nil { t.Fatalf(`Parsing failure: %v`, err) } @@ -797,6 +795,7 @@ func TestRewriteRemoveCustom(t *testing.T) { t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) } } + func TestRewriteRemoveQuotedSelector(t *testing.T) { controlEntry := &model.Entry{ URL: "https://example.org/article", @@ -1248,3 +1247,158 @@ func TestFixGhostCardMultipleSplit(t *testing.T) { t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) } } + +func TestStripImageQueryParams(t *testing.T) { + testEntry := &model.Entry{ + URL: "https://example.org/article", + Title: `News Article Title`, + Content: ` +
+

Article content with images having query parameters:

+ Image with params + Another image with params + +

More images with various query parameters:

+ Complex query params + Different params + +

Image without query parameters:

+ Clean image + +

Images with various other params:

+ Normal 1 + Normal 2 +
`, + } + + controlEntry := &model.Entry{ + URL: "https://example.org/article", + Title: `News Article Title`, + Content: `
+

Article content with images having query parameters:

+ Image with params + Another image with params + +

More images with various query parameters:

+ Complex query params + Different params + +

Image without query parameters:

+ Clean image + +

Images with various other params:

+ Normal 1 + Normal 2 +
`, + } + ApplyContentRewriteRules(testEntry, `remove_img_blur_params`) + + if !reflect.DeepEqual(testEntry, controlEntry) { + t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) + } +} + +func TestStripImageQueryParamsNoChanges(t *testing.T) { + testEntry := &model.Entry{ + URL: "https://example.org/article", + Title: `Article Without Images`, + Content: `

No images here:

+
Just some text content
+ A link`, + } + + controlEntry := &model.Entry{ + URL: "https://example.org/article", + Title: `Article Without Images`, + Content: `

No images here:

+
Just some text content
+ A link`, + } + ApplyContentRewriteRules(testEntry, `remove_img_blur_params`) + + if !reflect.DeepEqual(testEntry, controlEntry) { + t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) + } +} + +func TestStripImageQueryParamsEdgeCases(t *testing.T) { + testEntry := &model.Entry{ + URL: "https://example.org/article", + Title: `Edge Cases`, + Content: ` +

Edge cases for image query parameter stripping:

+ + + Multiple params + + + Complex params + Other params + + + Middle params + + + Clean image + `, + } + + controlEntry := &model.Entry{ + URL: "https://example.org/article", + Title: `Edge Cases`, + Content: `

Edge cases for image query parameter stripping:

+ + + Multiple params + + + Complex params + Other params + + + Middle params + + + Clean image + `, + } + ApplyContentRewriteRules(testEntry, `remove_img_blur_params`) + + if !reflect.DeepEqual(testEntry, controlEntry) { + t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) + } +} + +func TestStripImageQueryParamsSimple(t *testing.T) { + testEntry := &model.Entry{ + URL: "https://example.org/article", + Title: `Simple Test`, + Content: ` +

Testing query parameter stripping:

+ + + With blur zero + With blur fifty + No blur param + No params at all + `, + } + + controlEntry := &model.Entry{ + URL: "https://example.org/article", + Title: `Simple Test`, + Content: `

Testing query parameter stripping:

+ + + With blur zero + With blur fifty + No blur param + No params at all + `, + } + ApplyContentRewriteRules(testEntry, `remove_img_blur_params`) + + if !reflect.DeepEqual(testEntry, controlEntry) { + t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) + } +}