mirror of
https://github.com/miniflux/v2.git
synced 2025-06-27 16:36:00 +00:00
Improve sanitizer to remove style tag contents.
See #157. Refactored how blacklisted tags are handled so they're easier manage in the future.
This commit is contained in:
parent
d847b10e32
commit
c9131b0e89
2 changed files with 31 additions and 8 deletions
|
@ -25,7 +25,7 @@ func Sanitize(baseURL, input string) string {
|
||||||
tokenizer := html.NewTokenizer(bytes.NewBufferString(input))
|
tokenizer := html.NewTokenizer(bytes.NewBufferString(input))
|
||||||
var buffer bytes.Buffer
|
var buffer bytes.Buffer
|
||||||
var tagStack []string
|
var tagStack []string
|
||||||
scriptTagDepth := 0
|
blacklistedTagDepth := 0
|
||||||
|
|
||||||
for {
|
for {
|
||||||
if tokenizer.Next() == html.ErrorToken {
|
if tokenizer.Next() == html.ErrorToken {
|
||||||
|
@ -40,7 +40,7 @@ func Sanitize(baseURL, input string) string {
|
||||||
token := tokenizer.Token()
|
token := tokenizer.Token()
|
||||||
switch token.Type {
|
switch token.Type {
|
||||||
case html.TextToken:
|
case html.TextToken:
|
||||||
if scriptTagDepth > 0 {
|
if blacklistedTagDepth > 0 {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -60,15 +60,15 @@ func Sanitize(baseURL, input string) string {
|
||||||
|
|
||||||
tagStack = append(tagStack, tagName)
|
tagStack = append(tagStack, tagName)
|
||||||
}
|
}
|
||||||
} else if isScriptTag(tagName) {
|
} else if isBlacklistedTag(tagName) {
|
||||||
scriptTagDepth++
|
blacklistedTagDepth++
|
||||||
}
|
}
|
||||||
case html.EndTagToken:
|
case html.EndTagToken:
|
||||||
tagName := token.DataAtom.String()
|
tagName := token.DataAtom.String()
|
||||||
if isValidTag(tagName) && inList(tagName, tagStack) {
|
if isValidTag(tagName) && inList(tagName, tagStack) {
|
||||||
buffer.WriteString(fmt.Sprintf("</%s>", tagName))
|
buffer.WriteString(fmt.Sprintf("</%s>", tagName))
|
||||||
} else if isScriptTag(tagName) {
|
} else if isBlacklistedTag(tagName) {
|
||||||
scriptTagDepth--
|
blacklistedTagDepth--
|
||||||
}
|
}
|
||||||
case html.SelfClosingTagToken:
|
case html.SelfClosingTagToken:
|
||||||
tagName := token.DataAtom.String()
|
tagName := token.DataAtom.String()
|
||||||
|
@ -394,6 +394,19 @@ func rewriteIframeURL(link string) string {
|
||||||
return link
|
return link
|
||||||
}
|
}
|
||||||
|
|
||||||
func isScriptTag(tagName string) bool {
|
// Blacklisted tags remove the tag and all descendants.
|
||||||
return tagName == "script" || tagName == "noscript"
|
func isBlacklistedTag(tagName string) bool {
|
||||||
|
blacklist := []string{
|
||||||
|
"noscript",
|
||||||
|
"script",
|
||||||
|
"style",
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, element := range blacklist {
|
||||||
|
if element == tagName {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false
|
||||||
}
|
}
|
||||||
|
|
|
@ -232,3 +232,13 @@ func TestReplaceScript(t *testing.T) {
|
||||||
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestReplaceStyle(t *testing.T) {
|
||||||
|
input := `<p>Before paragraph.</p><style>body { background-color: #ff0000; }</style><p>After paragraph.</p>`
|
||||||
|
expected := `<p>Before paragraph.</p><p>After paragraph.</p>`
|
||||||
|
output := Sanitize("http://example.org/", input)
|
||||||
|
|
||||||
|
if expected != output {
|
||||||
|
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue