1
0
Fork 0
mirror of https://github.com/miniflux/v2.git synced 2025-08-01 17:38:37 +00:00

feat: remove well-known URL parameter trackers

This commit is contained in:
Frédéric Guillot 2024-07-19 21:03:33 -07:00
parent 11cafec863
commit c0f6e32a99
5 changed files with 252 additions and 17 deletions

View file

@ -12,6 +12,7 @@ import (
"strings"
"miniflux.app/v2/internal/config"
"miniflux.app/v2/internal/reader/urlcleaner"
"miniflux.app/v2/internal/urllib"
"golang.org/x/net/html"
@ -211,6 +212,10 @@ func sanitizeAttributes(baseURL, tagName string, attributes []html.Attribute) ([
if !hasValidURIScheme(value) || isBlockedResource(value) {
continue
}
if cleanedURL, err := urlcleaner.RemoveTrackingParameters(value); err == nil {
value = cleanedURL
}
}
}

View file

@ -490,6 +490,26 @@ func TestBlacklistedLink(t *testing.T) {
}
}
func TestLinkWithTrackers(t *testing.T) {
input := `<p>This link has trackers <a href="https://example.com/page?utm_source=newsletter">Test</a></p>`
expected := `<p>This link has trackers <a href="https://example.com/page" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">Test</a></p>`
output := Sanitize("http://example.org/", input)
if expected != output {
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
}
}
func TestImageSrcWithTrackers(t *testing.T) {
input := `<p>This image has trackers <img src="https://example.org/?id=123&utm_source=newsletter&utm_medium=email&fbclid=abc123"></p>`
expected := `<p>This image has trackers <img src="https://example.org/?id=123" loading="lazy"></p>`
output := Sanitize("http://example.org/", input)
if expected != output {
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
}
}
func TestPixelTracker(t *testing.T) {
input := `<p><img src="https://tracker1.example.org/" height="1" width="1"> and <img src="https://tracker2.example.org/" height="1" width="1"/></p>`
expected := `<p> and </p>`