mirror of
https://github.com/miniflux/v2.git
synced 2025-06-27 16:36:00 +00:00
feat: remove the ref
parameter from url
This is used by (at least) Ghost (https://forum.ghost.org/t/ref-parameter-being-added-to-links/38335) Examples: - https://blog.exploits.club/exploits-club-weekly-newsletter-66-mitigations-galore-dirtycow-revisited-program-analysis-for-uafs-and-more/ - https://labs.watchtowr.com/is-the-sofistication-in-the-room-with-us-x-forwarded-for-and-ivanti-connect-secure-cve-2025-22457/
This commit is contained in:
parent
a5e3719773
commit
ff2dfe977b
4 changed files with 68 additions and 4 deletions
|
@ -52,7 +52,7 @@ func ProcessFeedEntries(store *storage.Storage, feed *model.Feed, userID int64,
|
|||
continue
|
||||
}
|
||||
|
||||
if cleanedURL, err := urlcleaner.RemoveTrackingParameters(entry.URL); err == nil {
|
||||
if cleanedURL, err := urlcleaner.RemoveTrackingParameters(feed.FeedURL, feed.SiteURL, entry.URL); err == nil {
|
||||
entry.URL = cleanedURL
|
||||
}
|
||||
|
||||
|
|
|
@ -217,7 +217,8 @@ func sanitizeAttributes(baseURL, tagName string, attributes []html.Attribute) ([
|
|||
continue
|
||||
}
|
||||
|
||||
if cleanedURL, err := urlcleaner.RemoveTrackingParameters(value); err == nil {
|
||||
// TODO use feedURL instead of baseURL twice.
|
||||
if cleanedURL, err := urlcleaner.RemoveTrackingParameters(baseURL, baseURL, value); err == nil {
|
||||
value = cleanedURL
|
||||
}
|
||||
}
|
||||
|
|
|
@ -89,7 +89,13 @@ var trackingParams = map[string]bool{
|
|||
"_branch_referrer": true,
|
||||
}
|
||||
|
||||
func RemoveTrackingParameters(inputURL string) (string, error) {
|
||||
// Outbound tracking parameters are appending the website's url to outbound links.
|
||||
var trackingParamsOutbound = map[string]bool{
|
||||
// Ghost
|
||||
"ref": true,
|
||||
}
|
||||
|
||||
func RemoveTrackingParameters(baseUrl, feedUrl, inputURL string) (string, error) {
|
||||
parsedURL, err := url.Parse(inputURL)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("urlcleaner: error parsing URL: %v", err)
|
||||
|
@ -99,6 +105,15 @@ func RemoveTrackingParameters(inputURL string) (string, error) {
|
|||
return inputURL, nil
|
||||
}
|
||||
|
||||
parsedBaseUrl, err := url.Parse(baseUrl)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("urlcleaner: error parsing base URL: %v", err)
|
||||
}
|
||||
parsedFeedUrl, err := url.Parse(feedUrl)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("urlcleaner: error parsing feed URL: %v", err)
|
||||
}
|
||||
|
||||
queryParams := parsedURL.Query()
|
||||
hasTrackers := false
|
||||
|
||||
|
@ -109,6 +124,16 @@ func RemoveTrackingParameters(inputURL string) (string, error) {
|
|||
queryParams.Del(param)
|
||||
hasTrackers = true
|
||||
}
|
||||
if trackingParamsOutbound[lowerParam] {
|
||||
// handle duplicate parameters like ?a=b&a=c&a=d…
|
||||
for _, value := range queryParams[param] {
|
||||
if value == parsedBaseUrl.Hostname() || value == parsedFeedUrl.Hostname() {
|
||||
queryParams.Del(param)
|
||||
hasTrackers = true
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Do not modify the URL if there are no tracking parameters
|
||||
|
|
|
@ -14,6 +14,8 @@ func TestRemoveTrackingParams(t *testing.T) {
|
|||
name string
|
||||
input string
|
||||
expected string
|
||||
baseUrl string
|
||||
feedUrl string
|
||||
strictComparison bool
|
||||
}{
|
||||
{
|
||||
|
@ -62,28 +64,64 @@ func TestRemoveTrackingParams(t *testing.T) {
|
|||
input: "https://example.com/page?name=John%20Doe&utm_source=newsletter",
|
||||
expected: "https://example.com/page?name=John+Doe",
|
||||
},
|
||||
{
|
||||
name: "ref parameter for another url",
|
||||
input: "https://example.com/page?ref=test.com",
|
||||
baseUrl: "https://example.com/page",
|
||||
expected: "https://example.com/page?ref=test.com",
|
||||
},
|
||||
{
|
||||
name: "ref parameter for feed url",
|
||||
input: "https://example.com/page?ref=feed.com",
|
||||
baseUrl: "https://example.com/page",
|
||||
expected: "https://example.com/page",
|
||||
feedUrl: "http://feed.com",
|
||||
},
|
||||
{
|
||||
name: "ref parameter for site url",
|
||||
input: "https://example.com/page?ref=example.com",
|
||||
baseUrl: "https://example.com/page",
|
||||
expected: "https://example.com/page",
|
||||
},
|
||||
{
|
||||
name: "ref parameter for base url",
|
||||
input: "https://example.com/page?ref=example.com",
|
||||
expected: "https://example.com/page",
|
||||
baseUrl: "https://example.com",
|
||||
feedUrl: "https://feedburned.com/example",
|
||||
},
|
||||
{
|
||||
name: "ref parameter for base url on subdomain",
|
||||
input: "https://blog.exploits.club/some-path?ref=blog.exploits.club",
|
||||
expected: "https://blog.exploits.club/some-path",
|
||||
baseUrl: "https://blog.exploits.club/some-path",
|
||||
feedUrl: "https://feedburned.com/exploit.club",
|
||||
},
|
||||
{
|
||||
name: "Non-standard URL parameter with no tracker",
|
||||
input: "https://example.com/foo.jpg?crop/1420x708/format/webp",
|
||||
expected: "https://example.com/foo.jpg?crop/1420x708/format/webp",
|
||||
baseUrl: "https://example.com/page",
|
||||
strictComparison: true,
|
||||
},
|
||||
{
|
||||
name: "Invalid URL",
|
||||
input: "https://example|org/",
|
||||
baseUrl: "https://example.com/page",
|
||||
expected: "",
|
||||
},
|
||||
{
|
||||
name: "Non-HTTP URL",
|
||||
input: "mailto:user@example.org",
|
||||
expected: "mailto:user@example.org",
|
||||
baseUrl: "https://example.com/page",
|
||||
strictComparison: true,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result, err := RemoveTrackingParameters(tt.input)
|
||||
result, err := RemoveTrackingParameters(tt.baseUrl, tt.feedUrl, tt.input)
|
||||
if tt.expected == "" {
|
||||
if err == nil {
|
||||
t.Errorf("Expected an error for invalid URL, but got none")
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue