1
0
Fork 0
mirror of https://github.com/miniflux/v2.git synced 2025-09-15 18:57:04 +00:00

sanitizer: handle image URLs in srcset attribute with comma

This commit is contained in:
Frédéric Guillot 2022-07-04 12:48:48 -07:00
parent d85908e3de
commit 806a069785
6 changed files with 212 additions and 88 deletions

View file

@ -5,18 +5,16 @@
package proxy // import "miniflux.app/proxy"
import (
"regexp"
"strings"
"miniflux.app/config"
"miniflux.app/reader/sanitizer"
"miniflux.app/url"
"github.com/PuerkitoBio/goquery"
"github.com/gorilla/mux"
)
var regexSplitSrcset = regexp.MustCompile(`,\s+`)
// ImageProxyRewriter replaces image URLs with internal proxy URLs.
func ImageProxyRewriter(router *mux.Router, data string) string {
proxyImages := config.Opts.ProxyImages()
@ -30,24 +28,20 @@ func ImageProxyRewriter(router *mux.Router, data string) string {
}
doc.Find("img").Each(func(i int, img *goquery.Selection) {
if srcAttr, ok := img.Attr("src"); ok {
if !isDataURL(srcAttr) && (proxyImages == "all" || !url.IsHTTPS(srcAttr)) {
img.SetAttr("src", ProxifyURL(router, srcAttr))
if srcAttrValue, ok := img.Attr("src"); ok {
if !isDataURL(srcAttrValue) && (proxyImages == "all" || !url.IsHTTPS(srcAttrValue)) {
img.SetAttr("src", ProxifyURL(router, srcAttrValue))
}
}
if srcsetAttr, ok := img.Attr("srcset"); ok {
if proxyImages == "all" || !url.IsHTTPS(srcsetAttr) {
proxifySourceSet(img, router, srcsetAttr)
}
if srcsetAttrValue, ok := img.Attr("srcset"); ok {
proxifySourceSet(img, router, proxyImages, srcsetAttrValue)
}
})
doc.Find("picture source").Each(func(i int, sourceElement *goquery.Selection) {
if srcsetAttr, ok := sourceElement.Attr("srcset"); ok {
if proxyImages == "all" || !url.IsHTTPS(srcsetAttr) {
proxifySourceSet(sourceElement, router, srcsetAttr)
}
if srcsetAttrValue, ok := sourceElement.Attr("srcset"); ok {
proxifySourceSet(sourceElement, router, proxyImages, srcsetAttrValue)
}
})
@ -59,30 +53,16 @@ func ImageProxyRewriter(router *mux.Router, data string) string {
return output
}
func proxifySourceSet(element *goquery.Selection, router *mux.Router, attributeValue string) {
var proxifiedSources []string
func proxifySourceSet(element *goquery.Selection, router *mux.Router, proxyImages, srcsetAttrValue string) {
imageCandidates := sanitizer.ParseSrcSetAttribute(srcsetAttrValue)
for _, source := range regexSplitSrcset.Split(attributeValue, -1) {
parts := strings.Split(strings.TrimSpace(source), " ")
nbParts := len(parts)
if nbParts > 0 {
rewrittenSource := parts[0]
if !isDataURL(rewrittenSource) {
rewrittenSource = ProxifyURL(router, rewrittenSource)
}
if nbParts > 1 {
rewrittenSource += " " + parts[1]
}
proxifiedSources = append(proxifiedSources, rewrittenSource)
for _, imageCandidate := range imageCandidates {
if !isDataURL(imageCandidate.ImageURL) && (proxyImages == "all" || !url.IsHTTPS(imageCandidate.ImageURL)) {
imageCandidate.ImageURL = ProxifyURL(router, imageCandidate.ImageURL)
}
}
if len(proxifiedSources) > 0 {
element.SetAttr("srcset", strings.Join(proxifiedSources, ", "))
}
element.SetAttr("srcset", imageCandidates.String())
}
func isDataURL(s string) bool {

View file

@ -234,8 +234,31 @@ func TestProxyFilterWithPictureSource(t *testing.T) {
r := mux.NewRouter()
r.HandleFunc("/proxy/{encodedURL}", func(w http.ResponseWriter, r *http.Request) {}).Name("proxy")
input := `<picture><source srcset="http://website/folder/image2.png 656w, http://website/folder/image3.png 360w"></picture>`
expected := `<picture><source srcset="/proxy/aHR0cDovL3dlYnNpdGUvZm9sZGVyL2ltYWdlMi5wbmc= 656w, /proxy/aHR0cDovL3dlYnNpdGUvZm9sZGVyL2ltYWdlMy5wbmc= 360w"/></picture>`
input := `<picture><source srcset="http://website/folder/image2.png 656w, http://website/folder/image3.png 360w, https://website/some,image.png 2x"></picture>`
expected := `<picture><source srcset="/proxy/aHR0cDovL3dlYnNpdGUvZm9sZGVyL2ltYWdlMi5wbmc= 656w, /proxy/aHR0cDovL3dlYnNpdGUvZm9sZGVyL2ltYWdlMy5wbmc= 360w, /proxy/aHR0cHM6Ly93ZWJzaXRlL3NvbWUsaW1hZ2UucG5n 2x"/></picture>`
output := ImageProxyRewriter(r, input)
if expected != output {
t.Errorf(`Not expected output: got %s`, output)
}
}
func TestProxyFilterOnlyNonHTTPWithPictureSource(t *testing.T) {
os.Clearenv()
os.Setenv("PROXY_IMAGES", "https")
var err error
parser := config.NewParser()
config.Opts, err = parser.ParseEnvironmentVariables()
if err != nil {
t.Fatalf(`Parsing failure: %v`, err)
}
r := mux.NewRouter()
r.HandleFunc("/proxy/{encodedURL}", func(w http.ResponseWriter, r *http.Request) {}).Name("proxy")
input := `<picture><source srcset="http://website/folder/image2.png 656w, https://website/some,image.png 2x"></picture>`
expected := `<picture><source srcset="/proxy/aHR0cDovL3dlYnNpdGUvZm9sZGVyL2ltYWdlMi5wbmc= 656w, https://website/some,image.png 2x"/></picture>`
output := ImageProxyRewriter(r, input)
if expected != output {