mirror of
https://github.com/miniflux/v2.git
synced 2025-06-27 16:36:00 +00:00
refactor(sanitizer): use global variables to avoid recreating slices on every call
This commit is contained in:
parent
ac44507af2
commit
3538c4271b
2 changed files with 119 additions and 79 deletions
|
@ -18,7 +18,7 @@ import (
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
tagAllowList = map[string][]string{
|
allowedHTMLTagsAndAttributes = map[string][]string{
|
||||||
"a": {"href", "title", "id"},
|
"a": {"href", "title", "id"},
|
||||||
"abbr": {"title"},
|
"abbr": {"title"},
|
||||||
"acronym": {"title"},
|
"acronym": {"title"},
|
||||||
|
@ -125,6 +125,78 @@ var (
|
||||||
"youtube-nocookie.com": {},
|
"youtube-nocookie.com": {},
|
||||||
"youtube.com": {},
|
"youtube.com": {},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
blockedResourceURLSubstrings = []string{
|
||||||
|
"api.flattr.com",
|
||||||
|
"feeds.feedburner.com",
|
||||||
|
"feedsportal.com",
|
||||||
|
"pinterest.com/pin/create/button/",
|
||||||
|
"stats.wordpress.com",
|
||||||
|
"twitter.com/intent/tweet",
|
||||||
|
"twitter.com/share",
|
||||||
|
"www.facebook.com/sharer.php",
|
||||||
|
"www.linkedin.com/shareArticle",
|
||||||
|
}
|
||||||
|
|
||||||
|
validURISchemes = map[string]struct{}{
|
||||||
|
"apt": {},
|
||||||
|
"bitcoin": {},
|
||||||
|
"callto": {},
|
||||||
|
"dav": {},
|
||||||
|
"davs": {},
|
||||||
|
"ed2k": {},
|
||||||
|
"facetime": {},
|
||||||
|
"feed": {},
|
||||||
|
"ftp": {},
|
||||||
|
"geo": {},
|
||||||
|
"git": {},
|
||||||
|
"gopher": {},
|
||||||
|
"http": {},
|
||||||
|
"https": {},
|
||||||
|
"irc": {},
|
||||||
|
"irc6": {},
|
||||||
|
"ircs": {},
|
||||||
|
"itms-apps": {},
|
||||||
|
"itms": {},
|
||||||
|
"magnet": {},
|
||||||
|
"mailto": {},
|
||||||
|
"news": {},
|
||||||
|
"nntp": {},
|
||||||
|
"rtmp": {},
|
||||||
|
"sftp": {},
|
||||||
|
"sip": {},
|
||||||
|
"sips": {},
|
||||||
|
"skype": {},
|
||||||
|
"spotify": {},
|
||||||
|
"ssh": {},
|
||||||
|
"steam": {},
|
||||||
|
"svn": {},
|
||||||
|
"svn+ssh": {},
|
||||||
|
"tel": {},
|
||||||
|
"webcal": {},
|
||||||
|
"xmpp": {},
|
||||||
|
// iOS Apps
|
||||||
|
"opener": {}, // https://www.opener.link
|
||||||
|
"hack": {}, // https://apps.apple.com/it/app/hack-for-hacker-news-reader/id1464477788?l=en-GB
|
||||||
|
}
|
||||||
|
|
||||||
|
blockedTags = map[string]struct{}{
|
||||||
|
"noscript": {},
|
||||||
|
"script": {},
|
||||||
|
"style": {},
|
||||||
|
}
|
||||||
|
|
||||||
|
dataAttributeAllowedPrefixes = []string{
|
||||||
|
"data:image/avif",
|
||||||
|
"data:image/apng",
|
||||||
|
"data:image/png",
|
||||||
|
"data:image/svg",
|
||||||
|
"data:image/svg+xml",
|
||||||
|
"data:image/jpg",
|
||||||
|
"data:image/jpeg",
|
||||||
|
"data:image/gif",
|
||||||
|
"data:image/webp",
|
||||||
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
type SanitizerOptions struct {
|
type SanitizerOptions struct {
|
||||||
|
@ -345,12 +417,12 @@ func getExtraAttributes(tagName string, sanitizerOptions *SanitizerOptions) ([]s
|
||||||
}
|
}
|
||||||
|
|
||||||
func isValidTag(tagName string) bool {
|
func isValidTag(tagName string) bool {
|
||||||
_, ok := tagAllowList[tagName]
|
_, ok := allowedHTMLTagsAndAttributes[tagName]
|
||||||
return ok
|
return ok
|
||||||
}
|
}
|
||||||
|
|
||||||
func isValidAttribute(tagName, attributeName string) bool {
|
func isValidAttribute(tagName, attributeName string) bool {
|
||||||
if attributes, ok := tagAllowList[tagName]; ok {
|
if attributes, ok := allowedHTMLTagsAndAttributes[tagName]; ok {
|
||||||
return slices.Contains(attributes, attributeName)
|
return slices.Contains(attributes, attributeName)
|
||||||
}
|
}
|
||||||
return false
|
return false
|
||||||
|
@ -400,66 +472,21 @@ func hasRequiredAttributes(tagName string, attributes []string) bool {
|
||||||
}
|
}
|
||||||
|
|
||||||
// See https://www.iana.org/assignments/uri-schemes/uri-schemes.xhtml
|
// See https://www.iana.org/assignments/uri-schemes/uri-schemes.xhtml
|
||||||
func hasValidURIScheme(src string) bool {
|
func hasValidURIScheme(absoluteURL string) bool {
|
||||||
whitelist := []string{
|
colonIndex := strings.IndexByte(absoluteURL, ':')
|
||||||
"apt:",
|
// Scheme must exist (colonIndex > 0). An empty scheme (e.g. ":foo") is not allowed.
|
||||||
"bitcoin:",
|
if colonIndex <= 0 {
|
||||||
"callto:",
|
return false
|
||||||
"dav:",
|
|
||||||
"davs:",
|
|
||||||
"ed2k://",
|
|
||||||
"facetime://",
|
|
||||||
"feed:",
|
|
||||||
"ftp://",
|
|
||||||
"geo:",
|
|
||||||
"gopher://",
|
|
||||||
"git://",
|
|
||||||
"http://",
|
|
||||||
"https://",
|
|
||||||
"irc://",
|
|
||||||
"irc6://",
|
|
||||||
"ircs://",
|
|
||||||
"itms://",
|
|
||||||
"itms-apps://",
|
|
||||||
"magnet:",
|
|
||||||
"mailto:",
|
|
||||||
"news:",
|
|
||||||
"nntp:",
|
|
||||||
"rtmp://",
|
|
||||||
"sip:",
|
|
||||||
"sips:",
|
|
||||||
"skype:",
|
|
||||||
"spotify:",
|
|
||||||
"ssh://",
|
|
||||||
"sftp://",
|
|
||||||
"steam://",
|
|
||||||
"svn://",
|
|
||||||
"svn+ssh://",
|
|
||||||
"tel:",
|
|
||||||
"webcal://",
|
|
||||||
"xmpp:",
|
|
||||||
|
|
||||||
// iOS Apps
|
|
||||||
"opener://", // https://www.opener.link
|
|
||||||
"hack://", // https://apps.apple.com/it/app/hack-for-hacker-news-reader/id1464477788?l=en-GB
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return slices.ContainsFunc(whitelist, func(prefix string) bool {
|
scheme := absoluteURL[:colonIndex]
|
||||||
return strings.HasPrefix(src, prefix)
|
_, ok := validURISchemes[strings.ToLower(scheme)]
|
||||||
})
|
return ok
|
||||||
}
|
}
|
||||||
|
|
||||||
func isBlockedResource(src string) bool {
|
func isBlockedResource(absoluteURL string) bool {
|
||||||
blacklist := []string{
|
return slices.ContainsFunc(blockedResourceURLSubstrings, func(element string) bool {
|
||||||
"feedsportal.com",
|
return strings.Contains(absoluteURL, element)
|
||||||
"api.flattr.com",
|
|
||||||
"stats.wordpress.com",
|
|
||||||
"twitter.com/share",
|
|
||||||
"feeds.feedburner.com",
|
|
||||||
}
|
|
||||||
|
|
||||||
return slices.ContainsFunc(blacklist, func(element string) bool {
|
|
||||||
return strings.Contains(src, element)
|
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -509,13 +536,8 @@ func rewriteIframeURL(link string) string {
|
||||||
}
|
}
|
||||||
|
|
||||||
func isBlockedTag(tagName string) bool {
|
func isBlockedTag(tagName string) bool {
|
||||||
blacklist := []string{
|
_, ok := blockedTags[tagName]
|
||||||
"noscript",
|
return ok
|
||||||
"script",
|
|
||||||
"style",
|
|
||||||
}
|
|
||||||
|
|
||||||
return slices.Contains(blacklist, tagName)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func sanitizeSrcsetAttr(baseURL, value string) string {
|
func sanitizeSrcsetAttr(baseURL, value string) string {
|
||||||
|
@ -531,20 +553,12 @@ func sanitizeSrcsetAttr(baseURL, value string) string {
|
||||||
}
|
}
|
||||||
|
|
||||||
func isValidDataAttribute(value string) bool {
|
func isValidDataAttribute(value string) bool {
|
||||||
var dataAttributeAllowList = []string{
|
for _, prefix := range dataAttributeAllowedPrefixes {
|
||||||
"data:image/avif",
|
if strings.HasPrefix(value, prefix) {
|
||||||
"data:image/apng",
|
return true
|
||||||
"data:image/png",
|
|
||||||
"data:image/svg",
|
|
||||||
"data:image/svg+xml",
|
|
||||||
"data:image/jpg",
|
|
||||||
"data:image/jpeg",
|
|
||||||
"data:image/gif",
|
|
||||||
"data:image/webp",
|
|
||||||
}
|
}
|
||||||
return slices.ContainsFunc(dataAttributeAllowList, func(prefix string) bool {
|
}
|
||||||
return strings.HasPrefix(value, prefix)
|
return false
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func isPositiveInteger(value string) bool {
|
func isPositiveInteger(value string) bool {
|
||||||
|
|
|
@ -887,3 +887,29 @@ func TestInvalidMathMLXMLNamespace(t *testing.T) {
|
||||||
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestBlockedResourcesSubstrings(t *testing.T) {
|
||||||
|
input := `<p>Before paragraph.</p><img src="http://stats.wordpress.com/something.php" alt="Blocked Resource"><p>After paragraph.</p>`
|
||||||
|
expected := `<p>Before paragraph.</p><p>After paragraph.</p>`
|
||||||
|
output := SanitizeHTMLWithDefaultOptions("http://example.org/", input)
|
||||||
|
|
||||||
|
if expected != output {
|
||||||
|
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
||||||
|
}
|
||||||
|
|
||||||
|
input = `<p>Before paragraph.</p><img src="http://twitter.com/share?text=This+is+google+a+search+engine&url=https%3A%2F%2Fwww.google.com" alt="Blocked Resource"><p>After paragraph.</p>`
|
||||||
|
expected = `<p>Before paragraph.</p><p>After paragraph.</p>`
|
||||||
|
output = SanitizeHTMLWithDefaultOptions("http://example.org/", input)
|
||||||
|
|
||||||
|
if expected != output {
|
||||||
|
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
||||||
|
}
|
||||||
|
|
||||||
|
input = `<p>Before paragraph.</p><img src="http://www.facebook.com/sharer.php?u=https%3A%2F%2Fwww.google.com%[title]=This+Is%2C+Google+a+search+engine" alt="Blocked Resource"><p>After paragraph.</p>`
|
||||||
|
expected = `<p>Before paragraph.</p><p>After paragraph.</p>`
|
||||||
|
output = SanitizeHTMLWithDefaultOptions("http://example.org/", input)
|
||||||
|
|
||||||
|
if expected != output {
|
||||||
|
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue