1
0
Fork 0
mirror of https://github.com/miniflux/v2.git synced 2025-09-30 19:22:11 +00:00

refactor(sanitizer): minor optimization

- use an array instead of a map for the schemes, as the overwhelming majority
  of them will be either http or https, which we can place in front of the
  array. This is faster than using a map.
- Simplify hasValidURIScheme by using strings.HasPrefix instead of doing
  strings.IndexByte
- Simplify isBlockedResource by using a simple for loop, instead of a weird
  slices.ContainsFunc+strings.Contains construct.

On my noisy system:

```
goos: linux
goarch: arm64
pkg: miniflux.app/v2/internal/reader/sanitizer
           │   old.txt   │            new.txt            │
           │   sec/op    │   sec/op     vs base          │
Sanitize-8   22.19m ± 4%   21.97m ± 4%  ~ (p=0.948 n=50)
```
This commit is contained in:
jvoisin 2025-09-29 21:11:11 +02:00
parent e279b955c4
commit 512564993c

View file

@ -138,46 +138,51 @@ var (
"linkedin.com/shareArticle",
}
validURISchemes = map[string]struct{}{
"apt": {},
"bitcoin": {},
"callto": {},
"dav": {},
"davs": {},
"ed2k": {},
"facetime": {},
"feed": {},
"ftp": {},
"geo": {},
"git": {},
"gopher": {},
"http": {},
"https": {},
"irc": {},
"irc6": {},
"ircs": {},
"itms-apps": {},
"itms": {},
"magnet": {},
"mailto": {},
"news": {},
"nntp": {},
"rtmp": {},
"sftp": {},
"sip": {},
"sips": {},
"skype": {},
"spotify": {},
"ssh": {},
"steam": {},
"svn": {},
"svn+ssh": {},
"tel": {},
"webcal": {},
"xmpp": {},
// See https://www.iana.org/assignments/uri-schemes/uri-schemes.xhtml
validURISchemes = []string{
// Most commong schemes on top.
"https:",
"http:",
// Then the rest.
"apt:",
"bitcoin:",
"callto:",
"dav:",
"davs:",
"ed2k:",
"facetime:",
"feed:",
"ftp:",
"geo:",
"git:",
"gopher:",
"irc:",
"irc6:",
"ircs:",
"itms-apps:",
"itms:",
"magnet:",
"mailto:",
"news:",
"nntp:",
"rtmp:",
"sftp:",
"sip:",
"sips:",
"skype:",
"spotify:",
"ssh:",
"steam:",
"svn:",
"svn+ssh:",
"tel:",
"webcal:",
"xmpp:",
// iOS Apps
"opener": {}, // https://www.opener.link
"hack": {}, // https://apps.apple.com/it/app/hack-for-hacker-news-reader/id1464477788?l=en-GB
"opener:", // https://www.opener.link
"hack:", // https://apps.apple.com/it/app/hack-for-hacker-news-reader/id1464477788?l=en-GB
}
dataAttributeAllowedPrefixes = []string{
@ -467,23 +472,22 @@ func hasRequiredAttributes(tagName string, attributes []string) bool {
}
}
// See https://www.iana.org/assignments/uri-schemes/uri-schemes.xhtml
func hasValidURIScheme(absoluteURL string) bool {
colonIndex := strings.IndexByte(absoluteURL, ':')
// Scheme must exist (colonIndex > 0). An empty scheme (e.g. ":foo") is not allowed.
if colonIndex <= 0 {
return false
for _, scheme := range validURISchemes {
if strings.HasPrefix(absoluteURL, scheme) {
return true
}
}
scheme := absoluteURL[:colonIndex]
_, ok := validURISchemes[strings.ToLower(scheme)]
return ok
return false
}
func isBlockedResource(absoluteURL string) bool {
return slices.ContainsFunc(blockedResourceURLSubstrings, func(element string) bool {
return strings.Contains(absoluteURL, element)
})
for _, blockedURL := range blockedResourceURLSubstrings {
if strings.Contains(absoluteURL, blockedURL) {
return true
}
}
return false
}
func isValidIframeSource(iframeSourceURL string) bool {