1
0
Fork 0
mirror of https://github.com/miniflux/v2.git synced 2025-09-15 18:57:04 +00:00

Improve HTML sanitizer

This commit is contained in:
Frédéric Guillot 2017-11-25 18:08:59 -08:00
parent 1f015d5dfe
commit bd663b43a0
4 changed files with 28 additions and 7 deletions

View file

@ -7,10 +7,11 @@ package sanitizer
import (
"bytes"
"fmt"
"github.com/miniflux/miniflux2/reader/url"
"io"
"strings"
"github.com/miniflux/miniflux2/reader/url"
"golang.org/x/net/html"
)
@ -33,7 +34,7 @@ func Sanitize(baseURL, input string) string {
token := tokenizer.Token()
switch token.Type {
case html.TextToken:
buffer.WriteString(token.Data)
buffer.WriteString(html.EscapeString(token.Data))
case html.StartTagToken:
tagName := token.DataAtom.String()
@ -72,8 +73,8 @@ func Sanitize(baseURL, input string) string {
}
}
func sanitizeAttributes(baseURL, tagName string, attributes []html.Attribute) (attrNames []string, html string) {
var htmlAttrs []string
func sanitizeAttributes(baseURL, tagName string, attributes []html.Attribute) ([]string, string) {
var htmlAttrs, attrNames []string
var err error
for _, attribute := range attributes {
@ -99,7 +100,7 @@ func sanitizeAttributes(baseURL, tagName string, attributes []html.Attribute) (a
}
attrNames = append(attrNames, attribute.Key)
htmlAttrs = append(htmlAttrs, fmt.Sprintf(`%s="%s"`, attribute.Key, value))
htmlAttrs = append(htmlAttrs, fmt.Sprintf(`%s="%s"`, attribute.Key, html.EscapeString(value)))
}
extraAttrNames, extraHTMLAttributes := getExtraAttributes(tagName)

View file

@ -142,3 +142,23 @@ func TestPixelTracker(t *testing.T) {
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
}
}
func TestXmlEntities(t *testing.T) {
input := `<pre>echo "test" &gt; /etc/hosts</pre>`
expected := `<pre>echo &#34;test&#34; &gt; /etc/hosts</pre>`
output := Sanitize("http://example.org/", input)
if expected != output {
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
}
}
func TestEspaceAttributes(t *testing.T) {
input := `<td rowspan="<b>test</b>">test</td>`
expected := `<td rowspan="&lt;b&gt;test&lt;/b&gt;">test</td>`
output := Sanitize("http://example.org/", input)
if expected != output {
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
}
}