1
0
Fork 0
mirror of https://github.com/miniflux/v2.git synced 2025-06-27 16:36:00 +00:00
jvoisin 2025-04-04 16:49:52 +02:00
parent 3de9629a49
commit 8e345ccd7b
4 changed files with 68 additions and 4 deletions

View file

@ -52,7 +52,7 @@ func ProcessFeedEntries(store *storage.Storage, feed *model.Feed, userID int64,
continue continue
} }
if cleanedURL, err := urlcleaner.RemoveTrackingParameters(entry.URL); err == nil { if cleanedURL, err := urlcleaner.RemoveTrackingParameters(feed.FeedURL, feed.SiteURL, entry.URL); err == nil {
entry.URL = cleanedURL entry.URL = cleanedURL
} }

View file

@ -217,7 +217,8 @@ func sanitizeAttributes(baseURL, tagName string, attributes []html.Attribute) ([
continue continue
} }
if cleanedURL, err := urlcleaner.RemoveTrackingParameters(value); err == nil { // TODO use feedURL instead of baseURL twice.
if cleanedURL, err := urlcleaner.RemoveTrackingParameters(baseURL, baseURL, value); err == nil {
value = cleanedURL value = cleanedURL
} }
} }

View file

@ -89,7 +89,13 @@ var trackingParams = map[string]bool{
"_branch_referrer": true, "_branch_referrer": true,
} }
func RemoveTrackingParameters(inputURL string) (string, error) { // Outbound tracking parameters are appending the website's url to outbound links.
var trackingParamsOutbound = map[string]bool{
// Ghost
"ref": true,
}
func RemoveTrackingParameters(baseUrl, feedUrl, inputURL string) (string, error) {
parsedURL, err := url.Parse(inputURL) parsedURL, err := url.Parse(inputURL)
if err != nil { if err != nil {
return "", fmt.Errorf("urlcleaner: error parsing URL: %v", err) return "", fmt.Errorf("urlcleaner: error parsing URL: %v", err)
@ -99,6 +105,15 @@ func RemoveTrackingParameters(inputURL string) (string, error) {
return inputURL, nil return inputURL, nil
} }
parsedBaseUrl, err := url.Parse(baseUrl)
if err != nil {
return "", fmt.Errorf("urlcleaner: error parsing base URL: %v", err)
}
parsedFeedUrl, err := url.Parse(feedUrl)
if err != nil {
return "", fmt.Errorf("urlcleaner: error parsing feed URL: %v", err)
}
queryParams := parsedURL.Query() queryParams := parsedURL.Query()
hasTrackers := false hasTrackers := false
@ -109,6 +124,16 @@ func RemoveTrackingParameters(inputURL string) (string, error) {
queryParams.Del(param) queryParams.Del(param)
hasTrackers = true hasTrackers = true
} }
if trackingParamsOutbound[lowerParam] {
// handle duplicate parameters like ?a=b&a=c&a=d…
for _, value := range queryParams[param] {
if value == parsedBaseUrl.Hostname() || value == parsedFeedUrl.Hostname() {
queryParams.Del(param)
hasTrackers = true
break
}
}
}
} }
// Do not modify the URL if there are no tracking parameters // Do not modify the URL if there are no tracking parameters

View file

@ -14,6 +14,8 @@ func TestRemoveTrackingParams(t *testing.T) {
name string name string
input string input string
expected string expected string
baseUrl string
feedUrl string
strictComparison bool strictComparison bool
}{ }{
{ {
@ -62,28 +64,64 @@ func TestRemoveTrackingParams(t *testing.T) {
input: "https://example.com/page?name=John%20Doe&utm_source=newsletter", input: "https://example.com/page?name=John%20Doe&utm_source=newsletter",
expected: "https://example.com/page?name=John+Doe", expected: "https://example.com/page?name=John+Doe",
}, },
{
name: "ref parameter for another url",
input: "https://example.com/page?ref=test.com",
baseUrl: "https://example.com/page",
expected: "https://example.com/page?ref=test.com",
},
{
name: "ref parameter for feed url",
input: "https://example.com/page?ref=feed.com",
baseUrl: "https://example.com/page",
expected: "https://example.com/page",
feedUrl: "http://feed.com",
},
{
name: "ref parameter for site url",
input: "https://example.com/page?ref=example.com",
baseUrl: "https://example.com/page",
expected: "https://example.com/page",
},
{
name: "ref parameter for base url",
input: "https://example.com/page?ref=example.com",
expected: "https://example.com/page",
baseUrl: "https://example.com",
feedUrl: "https://feedburned.com/example",
},
{
name: "ref parameter for base url on subdomain",
input: "https://blog.exploits.club/some-path?ref=blog.exploits.club",
expected: "https://blog.exploits.club/some-path",
baseUrl: "https://blog.exploits.club/some-path",
feedUrl: "https://feedburned.com/exploit.club",
},
{ {
name: "Non-standard URL parameter with no tracker", name: "Non-standard URL parameter with no tracker",
input: "https://example.com/foo.jpg?crop/1420x708/format/webp", input: "https://example.com/foo.jpg?crop/1420x708/format/webp",
expected: "https://example.com/foo.jpg?crop/1420x708/format/webp", expected: "https://example.com/foo.jpg?crop/1420x708/format/webp",
baseUrl: "https://example.com/page",
strictComparison: true, strictComparison: true,
}, },
{ {
name: "Invalid URL", name: "Invalid URL",
input: "https://example|org/", input: "https://example|org/",
baseUrl: "https://example.com/page",
expected: "", expected: "",
}, },
{ {
name: "Non-HTTP URL", name: "Non-HTTP URL",
input: "mailto:user@example.org", input: "mailto:user@example.org",
expected: "mailto:user@example.org", expected: "mailto:user@example.org",
baseUrl: "https://example.com/page",
strictComparison: true, strictComparison: true,
}, },
} }
for _, tt := range tests { for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) { t.Run(tt.name, func(t *testing.T) {
result, err := RemoveTrackingParameters(tt.input) result, err := RemoveTrackingParameters(tt.baseUrl, tt.feedUrl, tt.input)
if tt.expected == "" { if tt.expected == "" {
if err == nil { if err == nil {
t.Errorf("Expected an error for invalid URL, but got none") t.Errorf("Expected an error for invalid URL, but got none")