diff --git a/go.mod b/go.mod index af171c2e..fbbe8860 100644 --- a/go.mod +++ b/go.mod @@ -13,6 +13,7 @@ require ( github.com/prometheus/procfs v0.2.0 // indirect github.com/rylans/getlang v0.0.0-20200505200108-4c3188ff8a2d github.com/stretchr/testify v1.6.1 // indirect + github.com/tdewolff/minify/v2 v2.9.7 // indirect golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9 golang.org/x/net v0.0.0-20200625001655-4c5254603344 golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d diff --git a/go.sum b/go.sum index 62a61bcc..3236b211 100644 --- a/go.sum +++ b/go.sum @@ -34,6 +34,7 @@ github.com/cenkalti/backoff v2.2.1+incompatible/go.mod h1:90ReRw6GdpyfrHakVjL/QH github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= github.com/cespare/xxhash/v2 v2.1.1 h1:6MnRN8NT7+YBpUIWxHtefFZOKTAPgGjpQSxqLNn0+qY= github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/cheekybits/is v0.0.0-20150225183255-68e9c0620927/go.mod h1:h/aW8ynjgkuj+NQRlZcDbAbM1ORAbXjXX77sX7T289U= github.com/clbanning/x2j v0.0.0-20191024224557-825249438eec/go.mod h1:jMjuTZXRI4dUb/I5gc9Hdhagfvm9+RyrPryS/auMzxE= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= github.com/cockroachdb/datadriven v0.0.0-20190809214429-80d97fb3cbaa/go.mod h1:zn76sxSg3SzpJ0PPJaLDCu+Bu0Lg3sKTORVIj19EIF8= @@ -51,6 +52,7 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ= github.com/dustin/go-humanize v0.0.0-20171111073723-bb3d318650d4/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= +github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= github.com/eapache/go-resiliency v1.1.0/go.mod h1:kFI+JgMyC7bLPUVY133qvEBtVayf5mFgVsvEsIPBvNs= github.com/eapache/go-xerial-snappy v0.0.0-20180814174437-776d5712da21/go.mod h1:+020luEh2TKB4/GOp8oxxtq0Daoen/Cii55CzbTV6DU= github.com/eapache/queue v1.1.0/go.mod h1:6eCeP0CKFpHLu8blIFXhExK/dRa7WDZfr6jVFPTqq+I= @@ -62,6 +64,7 @@ github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5Kwzbycv github.com/franela/goblin v0.0.0-20200105215937-c9ffbefa60db/go.mod h1:7dvUGVsVBjqR7JHJk0brhHOZYGmfBYOrK0ZhYMEtBr4= github.com/franela/goreq v0.0.0-20171204163338-bcd34c9993f8/go.mod h1:ZhphrRTfi2rbfLwlschooIH4+wKKDR4Pdxhh+TRoA20= github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= +github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ= github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= github.com/go-kit/kit v0.9.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= @@ -164,6 +167,7 @@ github.com/lib/pq v1.8.0/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o= github.com/lightstep/lightstep-tracer-common/golang/gogo v0.0.0-20190605223551-bc2310a04743/go.mod h1:qklhhLq1aX+mtWk9cPHPzaBjWImj5ULL6C7HFJtXQMM= github.com/lightstep/lightstep-tracer-go v0.18.1/go.mod h1:jlF1pusYV4pidLvZ+XD0UBX0ZE6WURAspgAczcDHrL4= github.com/lyft/protoc-gen-validate v0.0.13/go.mod h1:XbGvPuh87YZc5TdIa2/I4pLk0QoUACkjt2znoq26NVQ= +github.com/matryer/try v0.0.0-20161228173917-9ac251b645a2/go.mod h1:0KeJpeMD6o+O4hW7qJOT7vyQPKrWmj26uf5wMc/IiIs= github.com/mattn/go-colorable v0.0.9/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaOChaDxuIBZU= github.com/mattn/go-isatty v0.0.3/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4= github.com/mattn/go-isatty v0.0.4/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4= @@ -268,6 +272,7 @@ github.com/soheilhy/cmux v0.1.4/go.mod h1:IM3LyeVVIOuxMH7sFAkER9+bJ4dT7Ms6E4xg4k github.com/sony/gobreaker v0.4.1/go.mod h1:ZKptC7FHNvhBz7dN2LGjPVBz2sZJmc0/PkyDJOjmxWY= github.com/spf13/cobra v0.0.3/go.mod h1:1l0Ry5zgKvJasoi3XT1TypsSe7PqH0Sj9dhYf7v3XqQ= github.com/spf13/pflag v1.0.1/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= +github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/streadway/amqp v0.0.0-20190404075320-75d898a42a94/go.mod h1:AZpEONHx3DKn8O/DFsRAY58/XVQiIPMTMB1SddzLXVw= github.com/streadway/amqp v0.0.0-20190827072141-edfb9018d271/go.mod h1:AZpEONHx3DKn8O/DFsRAY58/XVQiIPMTMB1SddzLXVw= github.com/streadway/handy v0.0.0-20190108123426-d5acb3125c2a/go.mod h1:qNTQ5P5JnDBl6z3cMAg/SywNDC5ABu5ApDIw6lUbRmI= @@ -278,6 +283,13 @@ github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UV github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= github.com/stretchr/testify v1.6.1 h1:hDPOHmpOpP40lSULcqw7IrRb/u7w6RpDC9399XyoNd0= github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/tdewolff/minify v1.1.0 h1:nxHQi1ML+g3ZbZHffiZ6eC7vMqNvSRfX3KB5Y5y/kfw= +github.com/tdewolff/minify v2.3.6+incompatible h1:2hw5/9ZvxhWLvBUnHE06gElGYz+Jv9R4Eys0XUzItYo= +github.com/tdewolff/minify/v2 v2.9.7 h1:r8ewdcX8VYUoNj+s9WSy4FtNNNqNPevWOkb/MksAtzQ= +github.com/tdewolff/minify/v2 v2.9.7/go.mod h1:AcJ/ggtHex5N/QiafLI8rlIO3qwSlgbPNLi27VZSYz8= +github.com/tdewolff/parse/v2 v2.5.4 h1:ggaQ1SVE8wErRrZwUs49I6iQ1zL/tFlb7KtYsk2I8Yk= +github.com/tdewolff/parse/v2 v2.5.4/go.mod h1:WzaJpRSbwq++EIQHYIRTpbYKNA3gn9it1Ik++q4zyho= +github.com/tdewolff/test v1.0.6/go.mod h1:6DAvZliBAAnD7rhVgwaM7DE5/d9NMOAJ09SqYqeK4QE= github.com/tmc/grpc-websocket-proxy v0.0.0-20170815181823-89b8d40f7ca8/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= github.com/urfave/cli v1.20.0/go.mod h1:70zkFmudgCuE/ngEzBv17Jvp/497gISqfk5gWijbERA= github.com/urfave/cli v1.22.1/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0= @@ -356,12 +368,14 @@ golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20190502145724-3ef323f4f1fd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190726091711-fc99dfbffb4e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190826190057-c7b8b68b1456/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191220142924-d4481acd189f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200106162015-b016eb3dc98e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd h1:xhmwyvizuTgC2qz7ZlMluP20uW+C3Rm0FD/WLDX8884= golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200615200032-f1bc736245b1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200625212154-ddb9806d33ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200724161237-0e2f3a69832c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200926100807-9d91bd62050c h1:38q6VNPWR010vN82/SB121GujZNIfAUb4YttE2rhGuc= golang.org/x/sys v0.0.0-20200926100807-9d91bd62050c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg= diff --git a/proxy/image_proxy.go b/proxy/image_proxy.go index 56109599..07b087b9 100644 --- a/proxy/image_proxy.go +++ b/proxy/image_proxy.go @@ -5,6 +5,7 @@ package proxy // import "miniflux.app/proxy" import ( + "regexp" "strings" "miniflux.app/config" @@ -14,6 +15,8 @@ import ( "github.com/gorilla/mux" ) +var regexSplitSrcset = regexp.MustCompile(`,\s+`) + // ImageProxyRewriter replaces image URLs with internal proxy URLs. func ImageProxyRewriter(router *mux.Router, data string) string { proxyImages := config.Opts.ProxyImages() @@ -28,7 +31,7 @@ func ImageProxyRewriter(router *mux.Router, data string) string { doc.Find("img").Each(func(i int, img *goquery.Selection) { if srcAttr, ok := img.Attr("src"); ok { - if proxyImages == "all" || !url.IsHTTPS(srcAttr) { + if !isDataURL(srcAttr) && (proxyImages == "all" || !url.IsHTTPS(srcAttr)) { img.SetAttr("src", ProxifyURL(router, srcAttr)) } } @@ -59,18 +62,21 @@ func ImageProxyRewriter(router *mux.Router, data string) string { func proxifySourceSet(element *goquery.Selection, router *mux.Router, attributeValue string) { var proxifiedSources []string - for _, source := range strings.Split(attributeValue, ",") { + for _, source := range regexSplitSrcset.Split(attributeValue, -1) { parts := strings.Split(strings.TrimSpace(source), " ") nbParts := len(parts) if nbParts > 0 { - source = ProxifyURL(router, parts[0]) - - if nbParts > 1 { - source += " " + parts[1] + rewrittenSource := parts[0] + if !isDataURL(rewrittenSource) { + rewrittenSource = ProxifyURL(router, rewrittenSource) } - proxifiedSources = append(proxifiedSources, source) + if nbParts > 1 { + rewrittenSource += " " + parts[1] + } + + proxifiedSources = append(proxifiedSources, rewrittenSource) } } @@ -78,3 +84,7 @@ func proxifySourceSet(element *goquery.Selection, router *mux.Router, attributeV element.SetAttr("srcset", strings.Join(proxifiedSources, ", ")) } } + +func isDataURL(s string) bool { + return strings.HasPrefix(s, "data:") +} diff --git a/proxy/image_proxy_test.go b/proxy/image_proxy_test.go index d336a7e5..3b91c0fd 100644 --- a/proxy/image_proxy_test.go +++ b/proxy/image_proxy_test.go @@ -234,7 +234,7 @@ func TestProxyFilterWithPictureSource(t *testing.T) { r := mux.NewRouter() r.HandleFunc("/proxy/{encodedURL}", func(w http.ResponseWriter, r *http.Request) {}).Name("proxy") - input := `` + input := `` expected := `` output := ImageProxyRewriter(r, input) @@ -242,3 +242,49 @@ func TestProxyFilterWithPictureSource(t *testing.T) { t.Errorf(`Not expected output: got %s`, output) } } + +func TestImageProxyWithImageDataURL(t *testing.T) { + os.Clearenv() + os.Setenv("PROXY_IMAGES", "all") + + var err error + parser := config.NewParser() + config.Opts, err = parser.ParseEnvironmentVariables() + if err != nil { + t.Fatalf(`Parsing failure: %v`, err) + } + + r := mux.NewRouter() + r.HandleFunc("/proxy/{encodedURL}", func(w http.ResponseWriter, r *http.Request) {}).Name("proxy") + + input := `` + expected := `` + output := ImageProxyRewriter(r, input) + + if expected != output { + t.Errorf(`Not expected output: got %s`, output) + } +} + +func TestImageProxyWithImageSourceDataURL(t *testing.T) { + os.Clearenv() + os.Setenv("PROXY_IMAGES", "all") + + var err error + parser := config.NewParser() + config.Opts, err = parser.ParseEnvironmentVariables() + if err != nil { + t.Fatalf(`Parsing failure: %v`, err) + } + + r := mux.NewRouter() + r.HandleFunc("/proxy/{encodedURL}", func(w http.ResponseWriter, r *http.Request) {}).Name("proxy") + + input := `` + expected := `` + output := ImageProxyRewriter(r, input) + + if expected != output { + t.Errorf(`Not expected output: got %s`, output) + } +} diff --git a/reader/sanitizer/sanitizer.go b/reader/sanitizer/sanitizer.go index b361cbe2..477c98e2 100644 --- a/reader/sanitizer/sanitizer.go +++ b/reader/sanitizer/sanitizer.go @@ -19,6 +19,7 @@ import ( var ( youtubeEmbedRegex = regexp.MustCompile(`//www\.youtube\.com/embed/(.*)`) + splitSrcsetRegex = regexp.MustCompile(`,\s+`) ) // Sanitize returns safe HTML. @@ -110,6 +111,8 @@ func sanitizeAttributes(baseURL, tagName string, attributes []html.Attribute) ([ } else { continue } + } else if tagName == "img" && attribute.Key == "src" && strings.HasPrefix(attribute.Val, "data:") { + value = attribute.Val } else { value, err = url.AbsoluteURL(baseURL, value) if err != nil { @@ -439,15 +442,19 @@ Each string is composed of: */ func sanitizeSrcsetAttr(baseURL, value string) string { var sanitizedSources []string - rawSources := strings.Split(value, ",") + rawSources := splitSrcsetRegex.Split(value, -1) for _, rawSource := range rawSources { parts := strings.Split(strings.TrimSpace(rawSource), " ") nbParts := len(parts) if nbParts > 0 { - sanitizedSource, err := url.AbsoluteURL(baseURL, parts[0]) - if err != nil { - continue + sanitizedSource := parts[0] + if !strings.HasPrefix(parts[0], "data:") { + var err error + sanitizedSource, err = url.AbsoluteURL(baseURL, parts[0]) + if err != nil { + continue + } } if nbParts == 2 && isValidWidthOrDensityDescriptor(parts[1]) { diff --git a/reader/sanitizer/sanitizer_test.go b/reader/sanitizer/sanitizer_test.go index 30adeefb..f19e2cf9 100644 --- a/reader/sanitizer/sanitizer_test.go +++ b/reader/sanitizer/sanitizer_test.go @@ -15,8 +15,18 @@ func TestValidInput(t *testing.T) { } } +func TestImgWithDataURL(t *testing.T) { + input := `Example` + expected := `Example` + output := Sanitize("http://example.org/", input) + + if output != expected { + t.Errorf(`Wrong output: %s`, output) + } +} + func TestImgWithSrcset(t *testing.T) { - input := `Example` + input := `Example` expected := `Example` output := Sanitize("http://example.org/", input) @@ -25,6 +35,16 @@ func TestImgWithSrcset(t *testing.T) { } } +func TestImgWithSrcsetAndDataURL(t *testing.T) { + input := `Example` + expected := `Example` + output := Sanitize("http://example.org/", input) + + if output != expected { + t.Errorf(`Wrong output: %s`, output) + } +} + func TestSourceWithSrcsetAndMedia(t *testing.T) { input := `` expected := ``