1
0
Fork 0
mirror of https://github.com/miniflux/v2.git synced 2025-09-30 19:22:11 +00:00

feat(reader): add content rewrite rule to strip query params from blurry placeholder images

This commit is contained in:
Axel Verhaeghe 2025-09-29 22:34:54 +02:00
parent e279b955c4
commit a8d539ec62
3 changed files with 205 additions and 0 deletions

View file

@ -94,6 +94,8 @@ func (rule rule) applyRule(entryURL string, entry *model.Entry) {
entry.Title = titlelize(entry.Title)
case "fix_ghost_cards":
entry.Content = fixGhostCards(entry.Content)
case "strip_image_query_params":
entry.Content = stripImageQueryParams(entry.Content)
}
}

View file

@ -10,6 +10,7 @@ import (
"log/slog"
"net/url"
"regexp"
"strconv"
"strings"
"unicode"
@ -547,3 +548,50 @@ func fixGhostCards(entryContent string) string {
output, _ := doc.FindMatcher(goquery.Single("body")).Html()
return strings.TrimSpace(output)
}
func stripImageQueryParams(entryContent string) string {
doc, err := goquery.NewDocumentFromReader(strings.NewReader(entryContent))
if err != nil {
return entryContent
}
changed := false
doc.Find("img").Each(func(i int, img *goquery.Selection) {
srcAttr, exists := img.Attr("src")
if !exists {
return
}
parsedURL, err := url.Parse(srcAttr)
if err != nil {
return
}
// Only strip query parameters if this is a blurry placeholder image
if parsedURL.RawQuery != "" {
queryParams, err := url.ParseQuery(parsedURL.RawQuery)
if err != nil {
return
}
// Check if there's a blur parameter with a non-zero value
blurValues, hasBlur := queryParams["blur"]
if hasBlur && len(blurValues) > 0 {
blurValue, err := strconv.Atoi(blurValues[0])
if err == nil && blurValue > 0 {
parsedURL.RawQuery = ""
img.SetAttr("src", parsedURL.String())
changed = true
}
}
}
})
if changed {
output, _ := doc.FindMatcher(goquery.Single("body")).Html()
return output
}
return entryContent
}

View file

@ -1248,3 +1248,158 @@ func TestFixGhostCardMultipleSplit(t *testing.T) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
}
}
func TestStripImageQueryParams(t *testing.T) {
testEntry := &model.Entry{
URL: "https://example.org/article",
Title: `News Article Title`,
Content: `
<article>
<p>Article content with images having query parameters:</p>
<img src="https://example.org/images/image1.jpg?width=200&height=113&q=80&blur=90" alt="Image with params">
<img src="https://example.org/images/image2.jpg?width=800&height=600&q=85" alt="Another image with params">
<p>More images with various query parameters:</p>
<img src="https://example.org/image123.jpg?blur=50&size=small&format=webp" alt="Complex query params">
<img src="https://example.org/image123.jpg?size=large&quality=95&cache=123" alt="Different params">
<p>Image without query parameters:</p>
<img src="https://example.org/single-image.jpg" alt="Clean image">
<p>Images with various other params:</p>
<img src="https://example.org/normal1.jpg?width=300&format=jpg" alt="Normal 1">
<img src="https://example.org/normal1.jpg?width=600&quality=high" alt="Normal 2">
</article>`,
}
controlEntry := &model.Entry{
URL: "https://example.org/article",
Title: `News Article Title`,
Content: `<article>
<p>Article content with images having query parameters:</p>
<img src="https://example.org/images/image1.jpg" alt="Image with params"/>
<img src="https://example.org/images/image2.jpg?width=800&amp;height=600&amp;q=85" alt="Another image with params"/>
<p>More images with various query parameters:</p>
<img src="https://example.org/image123.jpg" alt="Complex query params"/>
<img src="https://example.org/image123.jpg?size=large&amp;quality=95&amp;cache=123" alt="Different params"/>
<p>Image without query parameters:</p>
<img src="https://example.org/single-image.jpg" alt="Clean image"/>
<p>Images with various other params:</p>
<img src="https://example.org/normal1.jpg?width=300&amp;format=jpg" alt="Normal 1"/>
<img src="https://example.org/normal1.jpg?width=600&amp;quality=high" alt="Normal 2"/>
</article>`,
}
ApplyContentRewriteRules(testEntry, `strip_image_query_params`)
if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
}
}
func TestStripImageQueryParamsNoChanges(t *testing.T) {
testEntry := &model.Entry{
URL: "https://example.org/article",
Title: `Article Without Images`,
Content: `<p>No images here:</p>
<div>Just some text content</div>
<a href="https://example.org">A link</a>`,
}
controlEntry := &model.Entry{
URL: "https://example.org/article",
Title: `Article Without Images`,
Content: `<p>No images here:</p>
<div>Just some text content</div>
<a href="https://example.org">A link</a>`,
}
ApplyContentRewriteRules(testEntry, `strip_image_query_params`)
if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
}
}
func TestStripImageQueryParamsEdgeCases(t *testing.T) {
testEntry := &model.Entry{
URL: "https://example.org/article",
Title: `Edge Cases`,
Content: `
<p>Edge cases for image query parameter stripping:</p>
<!-- Various query parameters -->
<img src="https://example.org/image1.jpg?blur=80&width=300" alt="Multiple params">
<!-- Complex query parameters -->
<img src="https://example.org/image2.jpg?BLUR=60&format=webp&cache=123" alt="Complex params">
<img src="https://example.org/image3.jpg?quality=high&version=2" alt="Other params">
<!-- Query params in middle of string -->
<img src="https://example.org/image4.jpg?size=large&blur=30&format=webp&quality=90" alt="Middle params">
<!-- Image without query params -->
<img src="https://example.org/clean.jpg" alt="Clean image">
`,
}
controlEntry := &model.Entry{
URL: "https://example.org/article",
Title: `Edge Cases`,
Content: `<p>Edge cases for image query parameter stripping:</p>
<!-- Various query parameters -->
<img src="https://example.org/image1.jpg" alt="Multiple params"/>
<!-- Complex query parameters -->
<img src="https://example.org/image2.jpg?BLUR=60&amp;format=webp&amp;cache=123" alt="Complex params"/>
<img src="https://example.org/image3.jpg?quality=high&amp;version=2" alt="Other params"/>
<!-- Query params in middle of string -->
<img src="https://example.org/image4.jpg" alt="Middle params"/>
<!-- Image without query params -->
<img src="https://example.org/clean.jpg" alt="Clean image"/>
`,
}
ApplyContentRewriteRules(testEntry, `strip_image_query_params`)
if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
}
}
func TestStripImageQueryParamsSimple(t *testing.T) {
testEntry := &model.Entry{
URL: "https://example.org/article",
Title: `Simple Test`,
Content: `
<p>Testing query parameter stripping:</p>
<!-- Images with various query parameters -->
<img src="https://example.org/test1.jpg?blur=0&width=300" alt="With blur zero">
<img src="https://example.org/test2.jpg?blur=50&width=300&format=webp" alt="With blur fifty">
<img src="https://example.org/test3.jpg?width=800&quality=high" alt="No blur param">
<img src="https://example.org/test4.jpg" alt="No params at all">
`,
}
controlEntry := &model.Entry{
URL: "https://example.org/article",
Title: `Simple Test`,
Content: `<p>Testing query parameter stripping:</p>
<!-- Images with various query parameters -->
<img src="https://example.org/test1.jpg?blur=0&amp;width=300" alt="With blur zero"/>
<img src="https://example.org/test2.jpg" alt="With blur fifty"/>
<img src="https://example.org/test3.jpg?width=800&amp;quality=high" alt="No blur param"/>
<img src="https://example.org/test4.jpg" alt="No params at all"/>
`,
}
ApplyContentRewriteRules(testEntry, `strip_image_query_params`)
if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
}
}