From a8d539ec62825569bcd303ac3fed3ce55acdb834 Mon Sep 17 00:00:00 2001 From: Axel Verhaeghe Date: Mon, 29 Sep 2025 22:34:54 +0200 Subject: [PATCH] feat(reader): add content rewrite rule to strip query params from blurry placeholder images --- internal/reader/rewrite/content_rewrite.go | 2 + .../rewrite/content_rewrite_functions.go | 48 ++++++ .../reader/rewrite/content_rewrite_test.go | 155 ++++++++++++++++++ 3 files changed, 205 insertions(+) diff --git a/internal/reader/rewrite/content_rewrite.go b/internal/reader/rewrite/content_rewrite.go index 4d806dad..fab9cedb 100644 --- a/internal/reader/rewrite/content_rewrite.go +++ b/internal/reader/rewrite/content_rewrite.go @@ -94,6 +94,8 @@ func (rule rule) applyRule(entryURL string, entry *model.Entry) { entry.Title = titlelize(entry.Title) case "fix_ghost_cards": entry.Content = fixGhostCards(entry.Content) + case "strip_image_query_params": + entry.Content = stripImageQueryParams(entry.Content) } } diff --git a/internal/reader/rewrite/content_rewrite_functions.go b/internal/reader/rewrite/content_rewrite_functions.go index a3858270..6416fe79 100644 --- a/internal/reader/rewrite/content_rewrite_functions.go +++ b/internal/reader/rewrite/content_rewrite_functions.go @@ -10,6 +10,7 @@ import ( "log/slog" "net/url" "regexp" + "strconv" "strings" "unicode" @@ -547,3 +548,50 @@ func fixGhostCards(entryContent string) string { output, _ := doc.FindMatcher(goquery.Single("body")).Html() return strings.TrimSpace(output) } + +func stripImageQueryParams(entryContent string) string { + doc, err := goquery.NewDocumentFromReader(strings.NewReader(entryContent)) + if err != nil { + return entryContent + } + + changed := false + + doc.Find("img").Each(func(i int, img *goquery.Selection) { + srcAttr, exists := img.Attr("src") + if !exists { + return + } + + parsedURL, err := url.Parse(srcAttr) + if err != nil { + return + } + + // Only strip query parameters if this is a blurry placeholder image + if parsedURL.RawQuery != "" { + queryParams, err := url.ParseQuery(parsedURL.RawQuery) + if err != nil { + return + } + + // Check if there's a blur parameter with a non-zero value + blurValues, hasBlur := queryParams["blur"] + if hasBlur && len(blurValues) > 0 { + blurValue, err := strconv.Atoi(blurValues[0]) + if err == nil && blurValue > 0 { + parsedURL.RawQuery = "" + img.SetAttr("src", parsedURL.String()) + changed = true + } + } + } + }) + + if changed { + output, _ := doc.FindMatcher(goquery.Single("body")).Html() + return output + } + + return entryContent +} diff --git a/internal/reader/rewrite/content_rewrite_test.go b/internal/reader/rewrite/content_rewrite_test.go index 307dd747..9106fea9 100644 --- a/internal/reader/rewrite/content_rewrite_test.go +++ b/internal/reader/rewrite/content_rewrite_test.go @@ -1248,3 +1248,158 @@ func TestFixGhostCardMultipleSplit(t *testing.T) { t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) } } + +func TestStripImageQueryParams(t *testing.T) { + testEntry := &model.Entry{ + URL: "https://example.org/article", + Title: `News Article Title`, + Content: ` +
+

Article content with images having query parameters:

+ Image with params + Another image with params + +

More images with various query parameters:

+ Complex query params + Different params + +

Image without query parameters:

+ Clean image + +

Images with various other params:

+ Normal 1 + Normal 2 +
`, + } + + controlEntry := &model.Entry{ + URL: "https://example.org/article", + Title: `News Article Title`, + Content: `
+

Article content with images having query parameters:

+ Image with params + Another image with params + +

More images with various query parameters:

+ Complex query params + Different params + +

Image without query parameters:

+ Clean image + +

Images with various other params:

+ Normal 1 + Normal 2 +
`, + } + ApplyContentRewriteRules(testEntry, `strip_image_query_params`) + + if !reflect.DeepEqual(testEntry, controlEntry) { + t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) + } +} + +func TestStripImageQueryParamsNoChanges(t *testing.T) { + testEntry := &model.Entry{ + URL: "https://example.org/article", + Title: `Article Without Images`, + Content: `

No images here:

+
Just some text content
+ A link`, + } + + controlEntry := &model.Entry{ + URL: "https://example.org/article", + Title: `Article Without Images`, + Content: `

No images here:

+
Just some text content
+ A link`, + } + ApplyContentRewriteRules(testEntry, `strip_image_query_params`) + + if !reflect.DeepEqual(testEntry, controlEntry) { + t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) + } +} + +func TestStripImageQueryParamsEdgeCases(t *testing.T) { + testEntry := &model.Entry{ + URL: "https://example.org/article", + Title: `Edge Cases`, + Content: ` +

Edge cases for image query parameter stripping:

+ + + Multiple params + + + Complex params + Other params + + + Middle params + + + Clean image + `, + } + + controlEntry := &model.Entry{ + URL: "https://example.org/article", + Title: `Edge Cases`, + Content: `

Edge cases for image query parameter stripping:

+ + + Multiple params + + + Complex params + Other params + + + Middle params + + + Clean image + `, + } + ApplyContentRewriteRules(testEntry, `strip_image_query_params`) + + if !reflect.DeepEqual(testEntry, controlEntry) { + t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) + } +} + +func TestStripImageQueryParamsSimple(t *testing.T) { + testEntry := &model.Entry{ + URL: "https://example.org/article", + Title: `Simple Test`, + Content: ` +

Testing query parameter stripping:

+ + + With blur zero + With blur fifty + No blur param + No params at all + `, + } + + controlEntry := &model.Entry{ + URL: "https://example.org/article", + Title: `Simple Test`, + Content: `

Testing query parameter stripping:

+ + + With blur zero + With blur fifty + No blur param + No params at all + `, + } + ApplyContentRewriteRules(testEntry, `strip_image_query_params`) + + if !reflect.DeepEqual(testEntry, controlEntry) { + t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) + } +}