mirror of
https://github.com/miniflux/v2.git
synced 2025-06-27 16:36:00 +00:00
refactor(sanitizer): use global variables to avoid recreating slices on every call
This commit is contained in:
parent
ac44507af2
commit
3538c4271b
2 changed files with 119 additions and 79 deletions
|
@ -18,7 +18,7 @@ import (
|
|||
)
|
||||
|
||||
var (
|
||||
tagAllowList = map[string][]string{
|
||||
allowedHTMLTagsAndAttributes = map[string][]string{
|
||||
"a": {"href", "title", "id"},
|
||||
"abbr": {"title"},
|
||||
"acronym": {"title"},
|
||||
|
@ -125,6 +125,78 @@ var (
|
|||
"youtube-nocookie.com": {},
|
||||
"youtube.com": {},
|
||||
}
|
||||
|
||||
blockedResourceURLSubstrings = []string{
|
||||
"api.flattr.com",
|
||||
"feeds.feedburner.com",
|
||||
"feedsportal.com",
|
||||
"pinterest.com/pin/create/button/",
|
||||
"stats.wordpress.com",
|
||||
"twitter.com/intent/tweet",
|
||||
"twitter.com/share",
|
||||
"www.facebook.com/sharer.php",
|
||||
"www.linkedin.com/shareArticle",
|
||||
}
|
||||
|
||||
validURISchemes = map[string]struct{}{
|
||||
"apt": {},
|
||||
"bitcoin": {},
|
||||
"callto": {},
|
||||
"dav": {},
|
||||
"davs": {},
|
||||
"ed2k": {},
|
||||
"facetime": {},
|
||||
"feed": {},
|
||||
"ftp": {},
|
||||
"geo": {},
|
||||
"git": {},
|
||||
"gopher": {},
|
||||
"http": {},
|
||||
"https": {},
|
||||
"irc": {},
|
||||
"irc6": {},
|
||||
"ircs": {},
|
||||
"itms-apps": {},
|
||||
"itms": {},
|
||||
"magnet": {},
|
||||
"mailto": {},
|
||||
"news": {},
|
||||
"nntp": {},
|
||||
"rtmp": {},
|
||||
"sftp": {},
|
||||
"sip": {},
|
||||
"sips": {},
|
||||
"skype": {},
|
||||
"spotify": {},
|
||||
"ssh": {},
|
||||
"steam": {},
|
||||
"svn": {},
|
||||
"svn+ssh": {},
|
||||
"tel": {},
|
||||
"webcal": {},
|
||||
"xmpp": {},
|
||||
// iOS Apps
|
||||
"opener": {}, // https://www.opener.link
|
||||
"hack": {}, // https://apps.apple.com/it/app/hack-for-hacker-news-reader/id1464477788?l=en-GB
|
||||
}
|
||||
|
||||
blockedTags = map[string]struct{}{
|
||||
"noscript": {},
|
||||
"script": {},
|
||||
"style": {},
|
||||
}
|
||||
|
||||
dataAttributeAllowedPrefixes = []string{
|
||||
"data:image/avif",
|
||||
"data:image/apng",
|
||||
"data:image/png",
|
||||
"data:image/svg",
|
||||
"data:image/svg+xml",
|
||||
"data:image/jpg",
|
||||
"data:image/jpeg",
|
||||
"data:image/gif",
|
||||
"data:image/webp",
|
||||
}
|
||||
)
|
||||
|
||||
type SanitizerOptions struct {
|
||||
|
@ -345,12 +417,12 @@ func getExtraAttributes(tagName string, sanitizerOptions *SanitizerOptions) ([]s
|
|||
}
|
||||
|
||||
func isValidTag(tagName string) bool {
|
||||
_, ok := tagAllowList[tagName]
|
||||
_, ok := allowedHTMLTagsAndAttributes[tagName]
|
||||
return ok
|
||||
}
|
||||
|
||||
func isValidAttribute(tagName, attributeName string) bool {
|
||||
if attributes, ok := tagAllowList[tagName]; ok {
|
||||
if attributes, ok := allowedHTMLTagsAndAttributes[tagName]; ok {
|
||||
return slices.Contains(attributes, attributeName)
|
||||
}
|
||||
return false
|
||||
|
@ -400,66 +472,21 @@ func hasRequiredAttributes(tagName string, attributes []string) bool {
|
|||
}
|
||||
|
||||
// See https://www.iana.org/assignments/uri-schemes/uri-schemes.xhtml
|
||||
func hasValidURIScheme(src string) bool {
|
||||
whitelist := []string{
|
||||
"apt:",
|
||||
"bitcoin:",
|
||||
"callto:",
|
||||
"dav:",
|
||||
"davs:",
|
||||
"ed2k://",
|
||||
"facetime://",
|
||||
"feed:",
|
||||
"ftp://",
|
||||
"geo:",
|
||||
"gopher://",
|
||||
"git://",
|
||||
"http://",
|
||||
"https://",
|
||||
"irc://",
|
||||
"irc6://",
|
||||
"ircs://",
|
||||
"itms://",
|
||||
"itms-apps://",
|
||||
"magnet:",
|
||||
"mailto:",
|
||||
"news:",
|
||||
"nntp:",
|
||||
"rtmp://",
|
||||
"sip:",
|
||||
"sips:",
|
||||
"skype:",
|
||||
"spotify:",
|
||||
"ssh://",
|
||||
"sftp://",
|
||||
"steam://",
|
||||
"svn://",
|
||||
"svn+ssh://",
|
||||
"tel:",
|
||||
"webcal://",
|
||||
"xmpp:",
|
||||
|
||||
// iOS Apps
|
||||
"opener://", // https://www.opener.link
|
||||
"hack://", // https://apps.apple.com/it/app/hack-for-hacker-news-reader/id1464477788?l=en-GB
|
||||
func hasValidURIScheme(absoluteURL string) bool {
|
||||
colonIndex := strings.IndexByte(absoluteURL, ':')
|
||||
// Scheme must exist (colonIndex > 0). An empty scheme (e.g. ":foo") is not allowed.
|
||||
if colonIndex <= 0 {
|
||||
return false
|
||||
}
|
||||
|
||||
return slices.ContainsFunc(whitelist, func(prefix string) bool {
|
||||
return strings.HasPrefix(src, prefix)
|
||||
})
|
||||
scheme := absoluteURL[:colonIndex]
|
||||
_, ok := validURISchemes[strings.ToLower(scheme)]
|
||||
return ok
|
||||
}
|
||||
|
||||
func isBlockedResource(src string) bool {
|
||||
blacklist := []string{
|
||||
"feedsportal.com",
|
||||
"api.flattr.com",
|
||||
"stats.wordpress.com",
|
||||
"twitter.com/share",
|
||||
"feeds.feedburner.com",
|
||||
}
|
||||
|
||||
return slices.ContainsFunc(blacklist, func(element string) bool {
|
||||
return strings.Contains(src, element)
|
||||
func isBlockedResource(absoluteURL string) bool {
|
||||
return slices.ContainsFunc(blockedResourceURLSubstrings, func(element string) bool {
|
||||
return strings.Contains(absoluteURL, element)
|
||||
})
|
||||
}
|
||||
|
||||
|
@ -509,13 +536,8 @@ func rewriteIframeURL(link string) string {
|
|||
}
|
||||
|
||||
func isBlockedTag(tagName string) bool {
|
||||
blacklist := []string{
|
||||
"noscript",
|
||||
"script",
|
||||
"style",
|
||||
}
|
||||
|
||||
return slices.Contains(blacklist, tagName)
|
||||
_, ok := blockedTags[tagName]
|
||||
return ok
|
||||
}
|
||||
|
||||
func sanitizeSrcsetAttr(baseURL, value string) string {
|
||||
|
@ -531,20 +553,12 @@ func sanitizeSrcsetAttr(baseURL, value string) string {
|
|||
}
|
||||
|
||||
func isValidDataAttribute(value string) bool {
|
||||
var dataAttributeAllowList = []string{
|
||||
"data:image/avif",
|
||||
"data:image/apng",
|
||||
"data:image/png",
|
||||
"data:image/svg",
|
||||
"data:image/svg+xml",
|
||||
"data:image/jpg",
|
||||
"data:image/jpeg",
|
||||
"data:image/gif",
|
||||
"data:image/webp",
|
||||
for _, prefix := range dataAttributeAllowedPrefixes {
|
||||
if strings.HasPrefix(value, prefix) {
|
||||
return true
|
||||
}
|
||||
return slices.ContainsFunc(dataAttributeAllowList, func(prefix string) bool {
|
||||
return strings.HasPrefix(value, prefix)
|
||||
})
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func isPositiveInteger(value string) bool {
|
||||
|
|
|
@ -887,3 +887,29 @@ func TestInvalidMathMLXMLNamespace(t *testing.T) {
|
|||
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBlockedResourcesSubstrings(t *testing.T) {
|
||||
input := `<p>Before paragraph.</p><img src="http://stats.wordpress.com/something.php" alt="Blocked Resource"><p>After paragraph.</p>`
|
||||
expected := `<p>Before paragraph.</p><p>After paragraph.</p>`
|
||||
output := SanitizeHTMLWithDefaultOptions("http://example.org/", input)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
||||
}
|
||||
|
||||
input = `<p>Before paragraph.</p><img src="http://twitter.com/share?text=This+is+google+a+search+engine&url=https%3A%2F%2Fwww.google.com" alt="Blocked Resource"><p>After paragraph.</p>`
|
||||
expected = `<p>Before paragraph.</p><p>After paragraph.</p>`
|
||||
output = SanitizeHTMLWithDefaultOptions("http://example.org/", input)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
||||
}
|
||||
|
||||
input = `<p>Before paragraph.</p><img src="http://www.facebook.com/sharer.php?u=https%3A%2F%2Fwww.google.com%[title]=This+Is%2C+Google+a+search+engine" alt="Blocked Resource"><p>After paragraph.</p>`
|
||||
expected = `<p>Before paragraph.</p><p>After paragraph.</p>`
|
||||
output = SanitizeHTMLWithDefaultOptions("http://example.org/", input)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue