From 5a97bf8b5ee0c33cf9a678a64865f6830df0608f Mon Sep 17 00:00:00 2001 From: Julien Voisin Date: Tue, 30 Sep 2025 04:42:45 +0200 Subject: [PATCH] refactor(sanitizer): simplify `hasValidURIScheme` and `isBlockedResource` functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - use an array instead of a map for the schemes, as the overwhelming majority of them will be either http or https, which we can place in front of the array. This is faster than using a map. - Simplify hasValidURIScheme by using strings.HasPrefix instead of doing strings.IndexByte - Simplify isBlockedResource by using a simple for loop, instead of a weird slices.ContainsFunc+strings.Contains construct. On my noisy system: ``` goos: linux goarch: arm64 pkg: miniflux.app/v2/internal/reader/sanitizer │ old.txt │ new.txt │ │ sec/op │ sec/op vs base │ Sanitize-8 22.19m ± 4% 21.97m ± 4% ~ (p=0.948 n=50) ``` --- internal/reader/sanitizer/sanitizer.go | 106 +++++++++++++------------ 1 file changed, 55 insertions(+), 51 deletions(-) diff --git a/internal/reader/sanitizer/sanitizer.go b/internal/reader/sanitizer/sanitizer.go index 62d230c7..0551f0ee 100644 --- a/internal/reader/sanitizer/sanitizer.go +++ b/internal/reader/sanitizer/sanitizer.go @@ -138,46 +138,51 @@ var ( "linkedin.com/shareArticle", } - validURISchemes = map[string]struct{}{ - "apt": {}, - "bitcoin": {}, - "callto": {}, - "dav": {}, - "davs": {}, - "ed2k": {}, - "facetime": {}, - "feed": {}, - "ftp": {}, - "geo": {}, - "git": {}, - "gopher": {}, - "http": {}, - "https": {}, - "irc": {}, - "irc6": {}, - "ircs": {}, - "itms-apps": {}, - "itms": {}, - "magnet": {}, - "mailto": {}, - "news": {}, - "nntp": {}, - "rtmp": {}, - "sftp": {}, - "sip": {}, - "sips": {}, - "skype": {}, - "spotify": {}, - "ssh": {}, - "steam": {}, - "svn": {}, - "svn+ssh": {}, - "tel": {}, - "webcal": {}, - "xmpp": {}, + // See https://www.iana.org/assignments/uri-schemes/uri-schemes.xhtml + validURISchemes = []string{ + // Most commong schemes on top. + "https:", + "http:", + + // Then the rest. + "apt:", + "bitcoin:", + "callto:", + "dav:", + "davs:", + "ed2k:", + "facetime:", + "feed:", + "ftp:", + "geo:", + "git:", + "gopher:", + "irc:", + "irc6:", + "ircs:", + "itms-apps:", + "itms:", + "magnet:", + "mailto:", + "news:", + "nntp:", + "rtmp:", + "sftp:", + "sip:", + "sips:", + "skype:", + "spotify:", + "ssh:", + "steam:", + "svn:", + "svn+ssh:", + "tel:", + "webcal:", + "xmpp:", + // iOS Apps - "opener": {}, // https://www.opener.link - "hack": {}, // https://apps.apple.com/it/app/hack-for-hacker-news-reader/id1464477788?l=en-GB + "opener:", // https://www.opener.link + "hack:", // https://apps.apple.com/it/app/hack-for-hacker-news-reader/id1464477788?l=en-GB } dataAttributeAllowedPrefixes = []string{ @@ -467,23 +472,22 @@ func hasRequiredAttributes(tagName string, attributes []string) bool { } } -// See https://www.iana.org/assignments/uri-schemes/uri-schemes.xhtml func hasValidURIScheme(absoluteURL string) bool { - colonIndex := strings.IndexByte(absoluteURL, ':') - // Scheme must exist (colonIndex > 0). An empty scheme (e.g. ":foo") is not allowed. - if colonIndex <= 0 { - return false + for _, scheme := range validURISchemes { + if strings.HasPrefix(absoluteURL, scheme) { + return true + } } - - scheme := absoluteURL[:colonIndex] - _, ok := validURISchemes[strings.ToLower(scheme)] - return ok + return false } func isBlockedResource(absoluteURL string) bool { - return slices.ContainsFunc(blockedResourceURLSubstrings, func(element string) bool { - return strings.Contains(absoluteURL, element) - }) + for _, blockedURL := range blockedResourceURLSubstrings { + if strings.Contains(absoluteURL, blockedURL) { + return true + } + } + return false } func isValidIframeSource(iframeSourceURL string) bool {