mirror of
https://github.com/miniflux/v2.git
synced 2025-06-27 16:36:00 +00:00
feat: remove the ref
parameter from url
This is used by (at least) Ghost (https://forum.ghost.org/t/ref-parameter-being-added-to-links/38335) Examples: - https://blog.exploits.club/exploits-club-weekly-newsletter-66-mitigations-galore-dirtycow-revisited-program-analysis-for-uafs-and-more/ - https://labs.watchtowr.com/is-the-sofistication-in-the-room-with-us-x-forwarded-for-and-ivanti-connect-secure-cve-2025-22457/
This commit is contained in:
parent
3de9629a49
commit
8e345ccd7b
4 changed files with 68 additions and 4 deletions
|
@ -52,7 +52,7 @@ func ProcessFeedEntries(store *storage.Storage, feed *model.Feed, userID int64,
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
if cleanedURL, err := urlcleaner.RemoveTrackingParameters(entry.URL); err == nil {
|
if cleanedURL, err := urlcleaner.RemoveTrackingParameters(feed.FeedURL, feed.SiteURL, entry.URL); err == nil {
|
||||||
entry.URL = cleanedURL
|
entry.URL = cleanedURL
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -217,7 +217,8 @@ func sanitizeAttributes(baseURL, tagName string, attributes []html.Attribute) ([
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
if cleanedURL, err := urlcleaner.RemoveTrackingParameters(value); err == nil {
|
// TODO use feedURL instead of baseURL twice.
|
||||||
|
if cleanedURL, err := urlcleaner.RemoveTrackingParameters(baseURL, baseURL, value); err == nil {
|
||||||
value = cleanedURL
|
value = cleanedURL
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -89,7 +89,13 @@ var trackingParams = map[string]bool{
|
||||||
"_branch_referrer": true,
|
"_branch_referrer": true,
|
||||||
}
|
}
|
||||||
|
|
||||||
func RemoveTrackingParameters(inputURL string) (string, error) {
|
// Outbound tracking parameters are appending the website's url to outbound links.
|
||||||
|
var trackingParamsOutbound = map[string]bool{
|
||||||
|
// Ghost
|
||||||
|
"ref": true,
|
||||||
|
}
|
||||||
|
|
||||||
|
func RemoveTrackingParameters(baseUrl, feedUrl, inputURL string) (string, error) {
|
||||||
parsedURL, err := url.Parse(inputURL)
|
parsedURL, err := url.Parse(inputURL)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", fmt.Errorf("urlcleaner: error parsing URL: %v", err)
|
return "", fmt.Errorf("urlcleaner: error parsing URL: %v", err)
|
||||||
|
@ -99,6 +105,15 @@ func RemoveTrackingParameters(inputURL string) (string, error) {
|
||||||
return inputURL, nil
|
return inputURL, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
parsedBaseUrl, err := url.Parse(baseUrl)
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("urlcleaner: error parsing base URL: %v", err)
|
||||||
|
}
|
||||||
|
parsedFeedUrl, err := url.Parse(feedUrl)
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("urlcleaner: error parsing feed URL: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
queryParams := parsedURL.Query()
|
queryParams := parsedURL.Query()
|
||||||
hasTrackers := false
|
hasTrackers := false
|
||||||
|
|
||||||
|
@ -109,6 +124,16 @@ func RemoveTrackingParameters(inputURL string) (string, error) {
|
||||||
queryParams.Del(param)
|
queryParams.Del(param)
|
||||||
hasTrackers = true
|
hasTrackers = true
|
||||||
}
|
}
|
||||||
|
if trackingParamsOutbound[lowerParam] {
|
||||||
|
// handle duplicate parameters like ?a=b&a=c&a=d…
|
||||||
|
for _, value := range queryParams[param] {
|
||||||
|
if value == parsedBaseUrl.Hostname() || value == parsedFeedUrl.Hostname() {
|
||||||
|
queryParams.Del(param)
|
||||||
|
hasTrackers = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Do not modify the URL if there are no tracking parameters
|
// Do not modify the URL if there are no tracking parameters
|
||||||
|
|
|
@ -14,6 +14,8 @@ func TestRemoveTrackingParams(t *testing.T) {
|
||||||
name string
|
name string
|
||||||
input string
|
input string
|
||||||
expected string
|
expected string
|
||||||
|
baseUrl string
|
||||||
|
feedUrl string
|
||||||
strictComparison bool
|
strictComparison bool
|
||||||
}{
|
}{
|
||||||
{
|
{
|
||||||
|
@ -62,28 +64,64 @@ func TestRemoveTrackingParams(t *testing.T) {
|
||||||
input: "https://example.com/page?name=John%20Doe&utm_source=newsletter",
|
input: "https://example.com/page?name=John%20Doe&utm_source=newsletter",
|
||||||
expected: "https://example.com/page?name=John+Doe",
|
expected: "https://example.com/page?name=John+Doe",
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "ref parameter for another url",
|
||||||
|
input: "https://example.com/page?ref=test.com",
|
||||||
|
baseUrl: "https://example.com/page",
|
||||||
|
expected: "https://example.com/page?ref=test.com",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "ref parameter for feed url",
|
||||||
|
input: "https://example.com/page?ref=feed.com",
|
||||||
|
baseUrl: "https://example.com/page",
|
||||||
|
expected: "https://example.com/page",
|
||||||
|
feedUrl: "http://feed.com",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "ref parameter for site url",
|
||||||
|
input: "https://example.com/page?ref=example.com",
|
||||||
|
baseUrl: "https://example.com/page",
|
||||||
|
expected: "https://example.com/page",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "ref parameter for base url",
|
||||||
|
input: "https://example.com/page?ref=example.com",
|
||||||
|
expected: "https://example.com/page",
|
||||||
|
baseUrl: "https://example.com",
|
||||||
|
feedUrl: "https://feedburned.com/example",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "ref parameter for base url on subdomain",
|
||||||
|
input: "https://blog.exploits.club/some-path?ref=blog.exploits.club",
|
||||||
|
expected: "https://blog.exploits.club/some-path",
|
||||||
|
baseUrl: "https://blog.exploits.club/some-path",
|
||||||
|
feedUrl: "https://feedburned.com/exploit.club",
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: "Non-standard URL parameter with no tracker",
|
name: "Non-standard URL parameter with no tracker",
|
||||||
input: "https://example.com/foo.jpg?crop/1420x708/format/webp",
|
input: "https://example.com/foo.jpg?crop/1420x708/format/webp",
|
||||||
expected: "https://example.com/foo.jpg?crop/1420x708/format/webp",
|
expected: "https://example.com/foo.jpg?crop/1420x708/format/webp",
|
||||||
|
baseUrl: "https://example.com/page",
|
||||||
strictComparison: true,
|
strictComparison: true,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "Invalid URL",
|
name: "Invalid URL",
|
||||||
input: "https://example|org/",
|
input: "https://example|org/",
|
||||||
|
baseUrl: "https://example.com/page",
|
||||||
expected: "",
|
expected: "",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "Non-HTTP URL",
|
name: "Non-HTTP URL",
|
||||||
input: "mailto:user@example.org",
|
input: "mailto:user@example.org",
|
||||||
expected: "mailto:user@example.org",
|
expected: "mailto:user@example.org",
|
||||||
|
baseUrl: "https://example.com/page",
|
||||||
strictComparison: true,
|
strictComparison: true,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, tt := range tests {
|
for _, tt := range tests {
|
||||||
t.Run(tt.name, func(t *testing.T) {
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
result, err := RemoveTrackingParameters(tt.input)
|
result, err := RemoveTrackingParameters(tt.baseUrl, tt.feedUrl, tt.input)
|
||||||
if tt.expected == "" {
|
if tt.expected == "" {
|
||||||
if err == nil {
|
if err == nil {
|
||||||
t.Errorf("Expected an error for invalid URL, but got none")
|
t.Errorf("Expected an error for invalid URL, but got none")
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue