mirror of
https://github.com/miniflux/v2.git
synced 2025-09-30 19:22:11 +00:00
feat(reader): add content rewrite rule to strip query params from blurry placeholder images
This commit is contained in:
parent
e279b955c4
commit
a8d539ec62
3 changed files with 205 additions and 0 deletions
|
@ -94,6 +94,8 @@ func (rule rule) applyRule(entryURL string, entry *model.Entry) {
|
||||||
entry.Title = titlelize(entry.Title)
|
entry.Title = titlelize(entry.Title)
|
||||||
case "fix_ghost_cards":
|
case "fix_ghost_cards":
|
||||||
entry.Content = fixGhostCards(entry.Content)
|
entry.Content = fixGhostCards(entry.Content)
|
||||||
|
case "strip_image_query_params":
|
||||||
|
entry.Content = stripImageQueryParams(entry.Content)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -10,6 +10,7 @@ import (
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"net/url"
|
"net/url"
|
||||||
"regexp"
|
"regexp"
|
||||||
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"unicode"
|
"unicode"
|
||||||
|
|
||||||
|
@ -547,3 +548,50 @@ func fixGhostCards(entryContent string) string {
|
||||||
output, _ := doc.FindMatcher(goquery.Single("body")).Html()
|
output, _ := doc.FindMatcher(goquery.Single("body")).Html()
|
||||||
return strings.TrimSpace(output)
|
return strings.TrimSpace(output)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func stripImageQueryParams(entryContent string) string {
|
||||||
|
doc, err := goquery.NewDocumentFromReader(strings.NewReader(entryContent))
|
||||||
|
if err != nil {
|
||||||
|
return entryContent
|
||||||
|
}
|
||||||
|
|
||||||
|
changed := false
|
||||||
|
|
||||||
|
doc.Find("img").Each(func(i int, img *goquery.Selection) {
|
||||||
|
srcAttr, exists := img.Attr("src")
|
||||||
|
if !exists {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
parsedURL, err := url.Parse(srcAttr)
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Only strip query parameters if this is a blurry placeholder image
|
||||||
|
if parsedURL.RawQuery != "" {
|
||||||
|
queryParams, err := url.ParseQuery(parsedURL.RawQuery)
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if there's a blur parameter with a non-zero value
|
||||||
|
blurValues, hasBlur := queryParams["blur"]
|
||||||
|
if hasBlur && len(blurValues) > 0 {
|
||||||
|
blurValue, err := strconv.Atoi(blurValues[0])
|
||||||
|
if err == nil && blurValue > 0 {
|
||||||
|
parsedURL.RawQuery = ""
|
||||||
|
img.SetAttr("src", parsedURL.String())
|
||||||
|
changed = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
if changed {
|
||||||
|
output, _ := doc.FindMatcher(goquery.Single("body")).Html()
|
||||||
|
return output
|
||||||
|
}
|
||||||
|
|
||||||
|
return entryContent
|
||||||
|
}
|
||||||
|
|
|
@ -1248,3 +1248,158 @@ func TestFixGhostCardMultipleSplit(t *testing.T) {
|
||||||
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
|
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestStripImageQueryParams(t *testing.T) {
|
||||||
|
testEntry := &model.Entry{
|
||||||
|
URL: "https://example.org/article",
|
||||||
|
Title: `News Article Title`,
|
||||||
|
Content: `
|
||||||
|
<article>
|
||||||
|
<p>Article content with images having query parameters:</p>
|
||||||
|
<img src="https://example.org/images/image1.jpg?width=200&height=113&q=80&blur=90" alt="Image with params">
|
||||||
|
<img src="https://example.org/images/image2.jpg?width=800&height=600&q=85" alt="Another image with params">
|
||||||
|
|
||||||
|
<p>More images with various query parameters:</p>
|
||||||
|
<img src="https://example.org/image123.jpg?blur=50&size=small&format=webp" alt="Complex query params">
|
||||||
|
<img src="https://example.org/image123.jpg?size=large&quality=95&cache=123" alt="Different params">
|
||||||
|
|
||||||
|
<p>Image without query parameters:</p>
|
||||||
|
<img src="https://example.org/single-image.jpg" alt="Clean image">
|
||||||
|
|
||||||
|
<p>Images with various other params:</p>
|
||||||
|
<img src="https://example.org/normal1.jpg?width=300&format=jpg" alt="Normal 1">
|
||||||
|
<img src="https://example.org/normal1.jpg?width=600&quality=high" alt="Normal 2">
|
||||||
|
</article>`,
|
||||||
|
}
|
||||||
|
|
||||||
|
controlEntry := &model.Entry{
|
||||||
|
URL: "https://example.org/article",
|
||||||
|
Title: `News Article Title`,
|
||||||
|
Content: `<article>
|
||||||
|
<p>Article content with images having query parameters:</p>
|
||||||
|
<img src="https://example.org/images/image1.jpg" alt="Image with params"/>
|
||||||
|
<img src="https://example.org/images/image2.jpg?width=800&height=600&q=85" alt="Another image with params"/>
|
||||||
|
|
||||||
|
<p>More images with various query parameters:</p>
|
||||||
|
<img src="https://example.org/image123.jpg" alt="Complex query params"/>
|
||||||
|
<img src="https://example.org/image123.jpg?size=large&quality=95&cache=123" alt="Different params"/>
|
||||||
|
|
||||||
|
<p>Image without query parameters:</p>
|
||||||
|
<img src="https://example.org/single-image.jpg" alt="Clean image"/>
|
||||||
|
|
||||||
|
<p>Images with various other params:</p>
|
||||||
|
<img src="https://example.org/normal1.jpg?width=300&format=jpg" alt="Normal 1"/>
|
||||||
|
<img src="https://example.org/normal1.jpg?width=600&quality=high" alt="Normal 2"/>
|
||||||
|
</article>`,
|
||||||
|
}
|
||||||
|
ApplyContentRewriteRules(testEntry, `strip_image_query_params`)
|
||||||
|
|
||||||
|
if !reflect.DeepEqual(testEntry, controlEntry) {
|
||||||
|
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestStripImageQueryParamsNoChanges(t *testing.T) {
|
||||||
|
testEntry := &model.Entry{
|
||||||
|
URL: "https://example.org/article",
|
||||||
|
Title: `Article Without Images`,
|
||||||
|
Content: `<p>No images here:</p>
|
||||||
|
<div>Just some text content</div>
|
||||||
|
<a href="https://example.org">A link</a>`,
|
||||||
|
}
|
||||||
|
|
||||||
|
controlEntry := &model.Entry{
|
||||||
|
URL: "https://example.org/article",
|
||||||
|
Title: `Article Without Images`,
|
||||||
|
Content: `<p>No images here:</p>
|
||||||
|
<div>Just some text content</div>
|
||||||
|
<a href="https://example.org">A link</a>`,
|
||||||
|
}
|
||||||
|
ApplyContentRewriteRules(testEntry, `strip_image_query_params`)
|
||||||
|
|
||||||
|
if !reflect.DeepEqual(testEntry, controlEntry) {
|
||||||
|
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestStripImageQueryParamsEdgeCases(t *testing.T) {
|
||||||
|
testEntry := &model.Entry{
|
||||||
|
URL: "https://example.org/article",
|
||||||
|
Title: `Edge Cases`,
|
||||||
|
Content: `
|
||||||
|
<p>Edge cases for image query parameter stripping:</p>
|
||||||
|
|
||||||
|
<!-- Various query parameters -->
|
||||||
|
<img src="https://example.org/image1.jpg?blur=80&width=300" alt="Multiple params">
|
||||||
|
|
||||||
|
<!-- Complex query parameters -->
|
||||||
|
<img src="https://example.org/image2.jpg?BLUR=60&format=webp&cache=123" alt="Complex params">
|
||||||
|
<img src="https://example.org/image3.jpg?quality=high&version=2" alt="Other params">
|
||||||
|
|
||||||
|
<!-- Query params in middle of string -->
|
||||||
|
<img src="https://example.org/image4.jpg?size=large&blur=30&format=webp&quality=90" alt="Middle params">
|
||||||
|
|
||||||
|
<!-- Image without query params -->
|
||||||
|
<img src="https://example.org/clean.jpg" alt="Clean image">
|
||||||
|
`,
|
||||||
|
}
|
||||||
|
|
||||||
|
controlEntry := &model.Entry{
|
||||||
|
URL: "https://example.org/article",
|
||||||
|
Title: `Edge Cases`,
|
||||||
|
Content: `<p>Edge cases for image query parameter stripping:</p>
|
||||||
|
|
||||||
|
<!-- Various query parameters -->
|
||||||
|
<img src="https://example.org/image1.jpg" alt="Multiple params"/>
|
||||||
|
|
||||||
|
<!-- Complex query parameters -->
|
||||||
|
<img src="https://example.org/image2.jpg?BLUR=60&format=webp&cache=123" alt="Complex params"/>
|
||||||
|
<img src="https://example.org/image3.jpg?quality=high&version=2" alt="Other params"/>
|
||||||
|
|
||||||
|
<!-- Query params in middle of string -->
|
||||||
|
<img src="https://example.org/image4.jpg" alt="Middle params"/>
|
||||||
|
|
||||||
|
<!-- Image without query params -->
|
||||||
|
<img src="https://example.org/clean.jpg" alt="Clean image"/>
|
||||||
|
`,
|
||||||
|
}
|
||||||
|
ApplyContentRewriteRules(testEntry, `strip_image_query_params`)
|
||||||
|
|
||||||
|
if !reflect.DeepEqual(testEntry, controlEntry) {
|
||||||
|
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestStripImageQueryParamsSimple(t *testing.T) {
|
||||||
|
testEntry := &model.Entry{
|
||||||
|
URL: "https://example.org/article",
|
||||||
|
Title: `Simple Test`,
|
||||||
|
Content: `
|
||||||
|
<p>Testing query parameter stripping:</p>
|
||||||
|
|
||||||
|
<!-- Images with various query parameters -->
|
||||||
|
<img src="https://example.org/test1.jpg?blur=0&width=300" alt="With blur zero">
|
||||||
|
<img src="https://example.org/test2.jpg?blur=50&width=300&format=webp" alt="With blur fifty">
|
||||||
|
<img src="https://example.org/test3.jpg?width=800&quality=high" alt="No blur param">
|
||||||
|
<img src="https://example.org/test4.jpg" alt="No params at all">
|
||||||
|
`,
|
||||||
|
}
|
||||||
|
|
||||||
|
controlEntry := &model.Entry{
|
||||||
|
URL: "https://example.org/article",
|
||||||
|
Title: `Simple Test`,
|
||||||
|
Content: `<p>Testing query parameter stripping:</p>
|
||||||
|
|
||||||
|
<!-- Images with various query parameters -->
|
||||||
|
<img src="https://example.org/test1.jpg?blur=0&width=300" alt="With blur zero"/>
|
||||||
|
<img src="https://example.org/test2.jpg" alt="With blur fifty"/>
|
||||||
|
<img src="https://example.org/test3.jpg?width=800&quality=high" alt="No blur param"/>
|
||||||
|
<img src="https://example.org/test4.jpg" alt="No params at all"/>
|
||||||
|
`,
|
||||||
|
}
|
||||||
|
ApplyContentRewriteRules(testEntry, `strip_image_query_params`)
|
||||||
|
|
||||||
|
if !reflect.DeepEqual(testEntry, controlEntry) {
|
||||||
|
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue