mirror of
https://github.com/miniflux/v2.git
synced 2025-08-31 18:31:01 +00:00
Proxify articles crawled manually
This commit is contained in:
parent
997006cdd7
commit
6117a2f3cd
6 changed files with 347 additions and 305 deletions
80
proxy/image_proxy.go
Normal file
80
proxy/image_proxy.go
Normal file
|
@ -0,0 +1,80 @@
|
|||
// Copyright 2020 Frédéric Guillot. All rights reserved.
|
||||
// Use of this source code is governed by the Apache 2.0
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package proxy // import "miniflux.app/proxy"
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
"miniflux.app/config"
|
||||
"miniflux.app/url"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
"github.com/gorilla/mux"
|
||||
)
|
||||
|
||||
// ImageProxyRewriter replaces image URLs with internal proxy URLs.
|
||||
func ImageProxyRewriter(router *mux.Router, data string) string {
|
||||
proxyImages := config.Opts.ProxyImages()
|
||||
if proxyImages == "none" {
|
||||
return data
|
||||
}
|
||||
|
||||
doc, err := goquery.NewDocumentFromReader(strings.NewReader(data))
|
||||
if err != nil {
|
||||
return data
|
||||
}
|
||||
|
||||
doc.Find("img").Each(func(i int, img *goquery.Selection) {
|
||||
if srcAttr, ok := img.Attr("src"); ok {
|
||||
if proxyImages == "all" || !url.IsHTTPS(srcAttr) {
|
||||
img.SetAttr("src", ProxifyURL(router, srcAttr))
|
||||
}
|
||||
}
|
||||
|
||||
if srcsetAttr, ok := img.Attr("srcset"); ok {
|
||||
if proxyImages == "all" || !url.IsHTTPS(srcsetAttr) {
|
||||
proxifySourceSet(img, router, srcsetAttr)
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
doc.Find("picture source").Each(func(i int, sourceElement *goquery.Selection) {
|
||||
if srcsetAttr, ok := sourceElement.Attr("srcset"); ok {
|
||||
if proxyImages == "all" || !url.IsHTTPS(srcsetAttr) {
|
||||
proxifySourceSet(sourceElement, router, srcsetAttr)
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
output, err := doc.Find("body").First().Html()
|
||||
if err != nil {
|
||||
return data
|
||||
}
|
||||
|
||||
return output
|
||||
}
|
||||
|
||||
func proxifySourceSet(element *goquery.Selection, router *mux.Router, attributeValue string) {
|
||||
var proxifiedSources []string
|
||||
|
||||
for _, source := range strings.Split(attributeValue, ",") {
|
||||
parts := strings.Split(strings.TrimSpace(source), " ")
|
||||
nbParts := len(parts)
|
||||
|
||||
if nbParts > 0 {
|
||||
source = ProxifyURL(router, parts[0])
|
||||
|
||||
if nbParts > 1 {
|
||||
source += " " + parts[1]
|
||||
}
|
||||
|
||||
proxifiedSources = append(proxifiedSources, source)
|
||||
}
|
||||
}
|
||||
|
||||
if len(proxifiedSources) > 0 {
|
||||
element.SetAttr("srcset", strings.Join(proxifiedSources, ", "))
|
||||
}
|
||||
}
|
244
proxy/image_proxy_test.go
Normal file
244
proxy/image_proxy_test.go
Normal file
|
@ -0,0 +1,244 @@
|
|||
// Copyright 2020 Frédéric Guillot. All rights reserved.
|
||||
// Use of this source code is governed by the Apache 2.0
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package proxy // import "miniflux.app/proxy"
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
"github.com/gorilla/mux"
|
||||
"miniflux.app/config"
|
||||
)
|
||||
|
||||
func TestProxyFilterWithHttpDefault(t *testing.T) {
|
||||
os.Clearenv()
|
||||
os.Setenv("PROXY_IMAGES", "http-only")
|
||||
|
||||
var err error
|
||||
parser := config.NewParser()
|
||||
config.Opts, err = parser.ParseEnvironmentVariables()
|
||||
if err != nil {
|
||||
t.Fatalf(`Parsing failure: %v`, err)
|
||||
}
|
||||
|
||||
r := mux.NewRouter()
|
||||
r.HandleFunc("/proxy/{encodedURL}", func(w http.ResponseWriter, r *http.Request) {}).Name("proxy")
|
||||
|
||||
input := `<p><img src="http://website/folder/image.png" alt="Test"/></p>`
|
||||
output := ImageProxyRewriter(r, input)
|
||||
expected := `<p><img src="/proxy/aHR0cDovL3dlYnNpdGUvZm9sZGVyL2ltYWdlLnBuZw==" alt="Test"/></p>`
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
|
||||
}
|
||||
}
|
||||
|
||||
func TestProxyFilterWithHttpsDefault(t *testing.T) {
|
||||
os.Clearenv()
|
||||
os.Setenv("PROXY_IMAGES", "http-only")
|
||||
|
||||
var err error
|
||||
parser := config.NewParser()
|
||||
config.Opts, err = parser.ParseEnvironmentVariables()
|
||||
if err != nil {
|
||||
t.Fatalf(`Parsing failure: %v`, err)
|
||||
}
|
||||
|
||||
r := mux.NewRouter()
|
||||
r.HandleFunc("/proxy/{encodedURL}", func(w http.ResponseWriter, r *http.Request) {}).Name("proxy")
|
||||
|
||||
input := `<p><img src="https://website/folder/image.png" alt="Test"/></p>`
|
||||
output := ImageProxyRewriter(r, input)
|
||||
expected := `<p><img src="https://website/folder/image.png" alt="Test"/></p>`
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
|
||||
}
|
||||
}
|
||||
|
||||
func TestProxyFilterWithHttpNever(t *testing.T) {
|
||||
os.Clearenv()
|
||||
os.Setenv("PROXY_IMAGES", "none")
|
||||
|
||||
var err error
|
||||
parser := config.NewParser()
|
||||
config.Opts, err = parser.ParseEnvironmentVariables()
|
||||
if err != nil {
|
||||
t.Fatalf(`Parsing failure: %v`, err)
|
||||
}
|
||||
|
||||
r := mux.NewRouter()
|
||||
r.HandleFunc("/proxy/{encodedURL}", func(w http.ResponseWriter, r *http.Request) {}).Name("proxy")
|
||||
|
||||
input := `<p><img src="http://website/folder/image.png" alt="Test"/></p>`
|
||||
output := ImageProxyRewriter(r, input)
|
||||
expected := input
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
|
||||
}
|
||||
}
|
||||
|
||||
func TestProxyFilterWithHttpsNever(t *testing.T) {
|
||||
os.Clearenv()
|
||||
os.Setenv("PROXY_IMAGES", "none")
|
||||
|
||||
var err error
|
||||
parser := config.NewParser()
|
||||
config.Opts, err = parser.ParseEnvironmentVariables()
|
||||
if err != nil {
|
||||
t.Fatalf(`Parsing failure: %v`, err)
|
||||
}
|
||||
|
||||
r := mux.NewRouter()
|
||||
r.HandleFunc("/proxy/{encodedURL}", func(w http.ResponseWriter, r *http.Request) {}).Name("proxy")
|
||||
|
||||
input := `<p><img src="https://website/folder/image.png" alt="Test"/></p>`
|
||||
output := ImageProxyRewriter(r, input)
|
||||
expected := input
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
|
||||
}
|
||||
}
|
||||
|
||||
func TestProxyFilterWithHttpAlways(t *testing.T) {
|
||||
os.Clearenv()
|
||||
os.Setenv("PROXY_IMAGES", "all")
|
||||
|
||||
var err error
|
||||
parser := config.NewParser()
|
||||
config.Opts, err = parser.ParseEnvironmentVariables()
|
||||
if err != nil {
|
||||
t.Fatalf(`Parsing failure: %v`, err)
|
||||
}
|
||||
|
||||
r := mux.NewRouter()
|
||||
r.HandleFunc("/proxy/{encodedURL}", func(w http.ResponseWriter, r *http.Request) {}).Name("proxy")
|
||||
|
||||
input := `<p><img src="http://website/folder/image.png" alt="Test"/></p>`
|
||||
output := ImageProxyRewriter(r, input)
|
||||
expected := `<p><img src="/proxy/aHR0cDovL3dlYnNpdGUvZm9sZGVyL2ltYWdlLnBuZw==" alt="Test"/></p>`
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
|
||||
}
|
||||
}
|
||||
|
||||
func TestProxyFilterWithHttpsAlways(t *testing.T) {
|
||||
os.Clearenv()
|
||||
os.Setenv("PROXY_IMAGES", "all")
|
||||
|
||||
var err error
|
||||
parser := config.NewParser()
|
||||
config.Opts, err = parser.ParseEnvironmentVariables()
|
||||
if err != nil {
|
||||
t.Fatalf(`Parsing failure: %v`, err)
|
||||
}
|
||||
|
||||
r := mux.NewRouter()
|
||||
r.HandleFunc("/proxy/{encodedURL}", func(w http.ResponseWriter, r *http.Request) {}).Name("proxy")
|
||||
|
||||
input := `<p><img src="https://website/folder/image.png" alt="Test"/></p>`
|
||||
output := ImageProxyRewriter(r, input)
|
||||
expected := `<p><img src="/proxy/aHR0cHM6Ly93ZWJzaXRlL2ZvbGRlci9pbWFnZS5wbmc=" alt="Test"/></p>`
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
|
||||
}
|
||||
}
|
||||
|
||||
func TestProxyFilterWithHttpInvalid(t *testing.T) {
|
||||
os.Clearenv()
|
||||
os.Setenv("PROXY_IMAGES", "invalid")
|
||||
|
||||
var err error
|
||||
parser := config.NewParser()
|
||||
config.Opts, err = parser.ParseEnvironmentVariables()
|
||||
if err != nil {
|
||||
t.Fatalf(`Parsing failure: %v`, err)
|
||||
}
|
||||
|
||||
r := mux.NewRouter()
|
||||
r.HandleFunc("/proxy/{encodedURL}", func(w http.ResponseWriter, r *http.Request) {}).Name("proxy")
|
||||
|
||||
input := `<p><img src="http://website/folder/image.png" alt="Test"/></p>`
|
||||
output := ImageProxyRewriter(r, input)
|
||||
expected := `<p><img src="/proxy/aHR0cDovL3dlYnNpdGUvZm9sZGVyL2ltYWdlLnBuZw==" alt="Test"/></p>`
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
|
||||
}
|
||||
}
|
||||
|
||||
func TestProxyFilterWithHttpsInvalid(t *testing.T) {
|
||||
os.Clearenv()
|
||||
os.Setenv("PROXY_IMAGES", "invalid")
|
||||
|
||||
var err error
|
||||
parser := config.NewParser()
|
||||
config.Opts, err = parser.ParseEnvironmentVariables()
|
||||
if err != nil {
|
||||
t.Fatalf(`Parsing failure: %v`, err)
|
||||
}
|
||||
|
||||
r := mux.NewRouter()
|
||||
r.HandleFunc("/proxy/{encodedURL}", func(w http.ResponseWriter, r *http.Request) {}).Name("proxy")
|
||||
|
||||
input := `<p><img src="https://website/folder/image.png" alt="Test"/></p>`
|
||||
output := ImageProxyRewriter(r, input)
|
||||
expected := `<p><img src="https://website/folder/image.png" alt="Test"/></p>`
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
|
||||
}
|
||||
}
|
||||
|
||||
func TestProxyFilterWithSrcset(t *testing.T) {
|
||||
os.Clearenv()
|
||||
os.Setenv("PROXY_IMAGES", "all")
|
||||
|
||||
var err error
|
||||
parser := config.NewParser()
|
||||
config.Opts, err = parser.ParseEnvironmentVariables()
|
||||
if err != nil {
|
||||
t.Fatalf(`Parsing failure: %v`, err)
|
||||
}
|
||||
|
||||
r := mux.NewRouter()
|
||||
r.HandleFunc("/proxy/{encodedURL}", func(w http.ResponseWriter, r *http.Request) {}).Name("proxy")
|
||||
|
||||
input := `<p><img src="http://website/folder/image.png" srcset="http://website/folder/image2.png 656w, http://website/folder/image3.png 360w" alt="test"></p>`
|
||||
expected := `<p><img src="/proxy/aHR0cDovL3dlYnNpdGUvZm9sZGVyL2ltYWdlLnBuZw==" srcset="/proxy/aHR0cDovL3dlYnNpdGUvZm9sZGVyL2ltYWdlMi5wbmc= 656w, /proxy/aHR0cDovL3dlYnNpdGUvZm9sZGVyL2ltYWdlMy5wbmc= 360w" alt="test"/></p>`
|
||||
output := ImageProxyRewriter(r, input)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Not expected output: got %s`, output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestProxyFilterWithPictureSource(t *testing.T) {
|
||||
os.Clearenv()
|
||||
os.Setenv("PROXY_IMAGES", "all")
|
||||
|
||||
var err error
|
||||
parser := config.NewParser()
|
||||
config.Opts, err = parser.ParseEnvironmentVariables()
|
||||
if err != nil {
|
||||
t.Fatalf(`Parsing failure: %v`, err)
|
||||
}
|
||||
|
||||
r := mux.NewRouter()
|
||||
r.HandleFunc("/proxy/{encodedURL}", func(w http.ResponseWriter, r *http.Request) {}).Name("proxy")
|
||||
|
||||
input := `<picture><source srcset="http://website/folder/image2.png 656w, http://website/folder/image3.png 360w"></picture>`
|
||||
expected := `<picture><source srcset="/proxy/aHR0cDovL3dlYnNpdGUvZm9sZGVyL2ltYWdlMi5wbmc= 656w, /proxy/aHR0cDovL3dlYnNpdGUvZm9sZGVyL2ltYWdlMy5wbmc= 360w"/></picture>`
|
||||
output := ImageProxyRewriter(r, input)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Not expected output: got %s`, output)
|
||||
}
|
||||
}
|
18
proxy/proxy.go
Normal file
18
proxy/proxy.go
Normal file
|
@ -0,0 +1,18 @@
|
|||
// Copyright 2020 Frédéric Guillot. All rights reserved.
|
||||
// Use of this source code is governed by the Apache 2.0
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package proxy // import "miniflux.app/proxy"
|
||||
|
||||
import (
|
||||
"encoding/base64"
|
||||
|
||||
"miniflux.app/http/route"
|
||||
|
||||
"github.com/gorilla/mux"
|
||||
)
|
||||
|
||||
// ProxifyURL generates an URL for a proxified resource.
|
||||
func ProxifyURL(router *mux.Router, link string) string {
|
||||
return route.Path(router, "proxy", "encodedURL", base64.URLEncoding.EncodeToString([]byte(link)))
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue