From b296f21e98d284869d0e12d0e177caa8dbc658ad Mon Sep 17 00:00:00 2001 From: jvoisin Date: Tue, 17 Jun 2025 16:34:13 +0200 Subject: [PATCH] refactor(internal): add an urllib.DomainWithoutWWW function --- internal/reader/sanitizer/sanitizer.go | 2 +- internal/reader/scraper/scraper.go | 3 +-- internal/urllib/url.go | 5 +++++ 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/internal/reader/sanitizer/sanitizer.go b/internal/reader/sanitizer/sanitizer.go index 677fc218..41191e2f 100644 --- a/internal/reader/sanitizer/sanitizer.go +++ b/internal/reader/sanitizer/sanitizer.go @@ -486,7 +486,7 @@ func isBlockedResource(absoluteURL string) bool { } func isValidIframeSource(iframeSourceURL string) bool { - iframeSourceDomain := strings.TrimPrefix(urllib.Domain(iframeSourceURL), "www.") + iframeSourceDomain := urllib.DomainWithoutWWW(iframeSourceURL) if _, ok := iframeAllowList[iframeSourceDomain]; ok { return true diff --git a/internal/reader/scraper/scraper.go b/internal/reader/scraper/scraper.go index cf17e0c5..6e2e9237 100644 --- a/internal/reader/scraper/scraper.go +++ b/internal/reader/scraper/scraper.go @@ -93,8 +93,7 @@ func findContentUsingCustomRules(page io.Reader, rules string) (baseURL string, } func getPredefinedScraperRules(websiteURL string) string { - urlDomain := urllib.Domain(websiteURL) - urlDomain = strings.TrimPrefix(urlDomain, "www.") + urlDomain := urllib.DomainWithoutWWW(websiteURL) if rules, ok := predefinedRules[urlDomain]; ok { return rules diff --git a/internal/urllib/url.go b/internal/urllib/url.go index 56d41d96..259b7a9b 100644 --- a/internal/urllib/url.go +++ b/internal/urllib/url.go @@ -83,6 +83,11 @@ func Domain(websiteURL string) string { return parsedURL.Host } +// DomainWithoutWWW returns only the domain part of the given URL, with the "www." prefix removed if present. +func DomainWithoutWWW(websiteURL string) string { + return strings.TrimPrefix(Domain(websiteURL), "www.") +} + // JoinBaseURLAndPath returns a URL string with the provided path elements joined together. func JoinBaseURLAndPath(baseURL, path string) (string, error) { if baseURL == "" {