mirror of
https://github.com/miniflux/v2.git
synced 2025-09-30 19:22:11 +00:00
feat(reader): add content rewrite rule to strip query params from blurry placeholder images
This commit is contained in:
parent
e279b955c4
commit
a8d539ec62
3 changed files with 205 additions and 0 deletions
|
@ -94,6 +94,8 @@ func (rule rule) applyRule(entryURL string, entry *model.Entry) {
|
|||
entry.Title = titlelize(entry.Title)
|
||||
case "fix_ghost_cards":
|
||||
entry.Content = fixGhostCards(entry.Content)
|
||||
case "strip_image_query_params":
|
||||
entry.Content = stripImageQueryParams(entry.Content)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -10,6 +10,7 @@ import (
|
|||
"log/slog"
|
||||
"net/url"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
"unicode"
|
||||
|
||||
|
@ -547,3 +548,50 @@ func fixGhostCards(entryContent string) string {
|
|||
output, _ := doc.FindMatcher(goquery.Single("body")).Html()
|
||||
return strings.TrimSpace(output)
|
||||
}
|
||||
|
||||
func stripImageQueryParams(entryContent string) string {
|
||||
doc, err := goquery.NewDocumentFromReader(strings.NewReader(entryContent))
|
||||
if err != nil {
|
||||
return entryContent
|
||||
}
|
||||
|
||||
changed := false
|
||||
|
||||
doc.Find("img").Each(func(i int, img *goquery.Selection) {
|
||||
srcAttr, exists := img.Attr("src")
|
||||
if !exists {
|
||||
return
|
||||
}
|
||||
|
||||
parsedURL, err := url.Parse(srcAttr)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
// Only strip query parameters if this is a blurry placeholder image
|
||||
if parsedURL.RawQuery != "" {
|
||||
queryParams, err := url.ParseQuery(parsedURL.RawQuery)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
// Check if there's a blur parameter with a non-zero value
|
||||
blurValues, hasBlur := queryParams["blur"]
|
||||
if hasBlur && len(blurValues) > 0 {
|
||||
blurValue, err := strconv.Atoi(blurValues[0])
|
||||
if err == nil && blurValue > 0 {
|
||||
parsedURL.RawQuery = ""
|
||||
img.SetAttr("src", parsedURL.String())
|
||||
changed = true
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
if changed {
|
||||
output, _ := doc.FindMatcher(goquery.Single("body")).Html()
|
||||
return output
|
||||
}
|
||||
|
||||
return entryContent
|
||||
}
|
||||
|
|
|
@ -1248,3 +1248,158 @@ func TestFixGhostCardMultipleSplit(t *testing.T) {
|
|||
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStripImageQueryParams(t *testing.T) {
|
||||
testEntry := &model.Entry{
|
||||
URL: "https://example.org/article",
|
||||
Title: `News Article Title`,
|
||||
Content: `
|
||||
<article>
|
||||
<p>Article content with images having query parameters:</p>
|
||||
<img src="https://example.org/images/image1.jpg?width=200&height=113&q=80&blur=90" alt="Image with params">
|
||||
<img src="https://example.org/images/image2.jpg?width=800&height=600&q=85" alt="Another image with params">
|
||||
|
||||
<p>More images with various query parameters:</p>
|
||||
<img src="https://example.org/image123.jpg?blur=50&size=small&format=webp" alt="Complex query params">
|
||||
<img src="https://example.org/image123.jpg?size=large&quality=95&cache=123" alt="Different params">
|
||||
|
||||
<p>Image without query parameters:</p>
|
||||
<img src="https://example.org/single-image.jpg" alt="Clean image">
|
||||
|
||||
<p>Images with various other params:</p>
|
||||
<img src="https://example.org/normal1.jpg?width=300&format=jpg" alt="Normal 1">
|
||||
<img src="https://example.org/normal1.jpg?width=600&quality=high" alt="Normal 2">
|
||||
</article>`,
|
||||
}
|
||||
|
||||
controlEntry := &model.Entry{
|
||||
URL: "https://example.org/article",
|
||||
Title: `News Article Title`,
|
||||
Content: `<article>
|
||||
<p>Article content with images having query parameters:</p>
|
||||
<img src="https://example.org/images/image1.jpg" alt="Image with params"/>
|
||||
<img src="https://example.org/images/image2.jpg?width=800&height=600&q=85" alt="Another image with params"/>
|
||||
|
||||
<p>More images with various query parameters:</p>
|
||||
<img src="https://example.org/image123.jpg" alt="Complex query params"/>
|
||||
<img src="https://example.org/image123.jpg?size=large&quality=95&cache=123" alt="Different params"/>
|
||||
|
||||
<p>Image without query parameters:</p>
|
||||
<img src="https://example.org/single-image.jpg" alt="Clean image"/>
|
||||
|
||||
<p>Images with various other params:</p>
|
||||
<img src="https://example.org/normal1.jpg?width=300&format=jpg" alt="Normal 1"/>
|
||||
<img src="https://example.org/normal1.jpg?width=600&quality=high" alt="Normal 2"/>
|
||||
</article>`,
|
||||
}
|
||||
ApplyContentRewriteRules(testEntry, `strip_image_query_params`)
|
||||
|
||||
if !reflect.DeepEqual(testEntry, controlEntry) {
|
||||
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStripImageQueryParamsNoChanges(t *testing.T) {
|
||||
testEntry := &model.Entry{
|
||||
URL: "https://example.org/article",
|
||||
Title: `Article Without Images`,
|
||||
Content: `<p>No images here:</p>
|
||||
<div>Just some text content</div>
|
||||
<a href="https://example.org">A link</a>`,
|
||||
}
|
||||
|
||||
controlEntry := &model.Entry{
|
||||
URL: "https://example.org/article",
|
||||
Title: `Article Without Images`,
|
||||
Content: `<p>No images here:</p>
|
||||
<div>Just some text content</div>
|
||||
<a href="https://example.org">A link</a>`,
|
||||
}
|
||||
ApplyContentRewriteRules(testEntry, `strip_image_query_params`)
|
||||
|
||||
if !reflect.DeepEqual(testEntry, controlEntry) {
|
||||
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStripImageQueryParamsEdgeCases(t *testing.T) {
|
||||
testEntry := &model.Entry{
|
||||
URL: "https://example.org/article",
|
||||
Title: `Edge Cases`,
|
||||
Content: `
|
||||
<p>Edge cases for image query parameter stripping:</p>
|
||||
|
||||
<!-- Various query parameters -->
|
||||
<img src="https://example.org/image1.jpg?blur=80&width=300" alt="Multiple params">
|
||||
|
||||
<!-- Complex query parameters -->
|
||||
<img src="https://example.org/image2.jpg?BLUR=60&format=webp&cache=123" alt="Complex params">
|
||||
<img src="https://example.org/image3.jpg?quality=high&version=2" alt="Other params">
|
||||
|
||||
<!-- Query params in middle of string -->
|
||||
<img src="https://example.org/image4.jpg?size=large&blur=30&format=webp&quality=90" alt="Middle params">
|
||||
|
||||
<!-- Image without query params -->
|
||||
<img src="https://example.org/clean.jpg" alt="Clean image">
|
||||
`,
|
||||
}
|
||||
|
||||
controlEntry := &model.Entry{
|
||||
URL: "https://example.org/article",
|
||||
Title: `Edge Cases`,
|
||||
Content: `<p>Edge cases for image query parameter stripping:</p>
|
||||
|
||||
<!-- Various query parameters -->
|
||||
<img src="https://example.org/image1.jpg" alt="Multiple params"/>
|
||||
|
||||
<!-- Complex query parameters -->
|
||||
<img src="https://example.org/image2.jpg?BLUR=60&format=webp&cache=123" alt="Complex params"/>
|
||||
<img src="https://example.org/image3.jpg?quality=high&version=2" alt="Other params"/>
|
||||
|
||||
<!-- Query params in middle of string -->
|
||||
<img src="https://example.org/image4.jpg" alt="Middle params"/>
|
||||
|
||||
<!-- Image without query params -->
|
||||
<img src="https://example.org/clean.jpg" alt="Clean image"/>
|
||||
`,
|
||||
}
|
||||
ApplyContentRewriteRules(testEntry, `strip_image_query_params`)
|
||||
|
||||
if !reflect.DeepEqual(testEntry, controlEntry) {
|
||||
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
|
||||
}
|
||||
}
|
||||
|
||||
func TestStripImageQueryParamsSimple(t *testing.T) {
|
||||
testEntry := &model.Entry{
|
||||
URL: "https://example.org/article",
|
||||
Title: `Simple Test`,
|
||||
Content: `
|
||||
<p>Testing query parameter stripping:</p>
|
||||
|
||||
<!-- Images with various query parameters -->
|
||||
<img src="https://example.org/test1.jpg?blur=0&width=300" alt="With blur zero">
|
||||
<img src="https://example.org/test2.jpg?blur=50&width=300&format=webp" alt="With blur fifty">
|
||||
<img src="https://example.org/test3.jpg?width=800&quality=high" alt="No blur param">
|
||||
<img src="https://example.org/test4.jpg" alt="No params at all">
|
||||
`,
|
||||
}
|
||||
|
||||
controlEntry := &model.Entry{
|
||||
URL: "https://example.org/article",
|
||||
Title: `Simple Test`,
|
||||
Content: `<p>Testing query parameter stripping:</p>
|
||||
|
||||
<!-- Images with various query parameters -->
|
||||
<img src="https://example.org/test1.jpg?blur=0&width=300" alt="With blur zero"/>
|
||||
<img src="https://example.org/test2.jpg" alt="With blur fifty"/>
|
||||
<img src="https://example.org/test3.jpg?width=800&quality=high" alt="No blur param"/>
|
||||
<img src="https://example.org/test4.jpg" alt="No params at all"/>
|
||||
`,
|
||||
}
|
||||
ApplyContentRewriteRules(testEntry, `strip_image_query_params`)
|
||||
|
||||
if !reflect.DeepEqual(testEntry, controlEntry) {
|
||||
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue