mirror of
https://github.com/miniflux/v2.git
synced 2025-07-02 16:38:37 +00:00
New add_dynamic_image
rewriter for JavaScript-loaded images.
Searches tags for various `data-*` attributes and sets `img` tag `src` attribute appropriately. Falls back to searching `noscript` for `img` tags. Includes unit tests.
This commit is contained in:
parent
8ee4280461
commit
6d25e02cb5
3 changed files with 107 additions and 0 deletions
|
@ -14,6 +14,7 @@ import (
|
||||||
|
|
||||||
var (
|
var (
|
||||||
youtubeRegex = regexp.MustCompile(`youtube\.com/watch\?v=(.*)`)
|
youtubeRegex = regexp.MustCompile(`youtube\.com/watch\?v=(.*)`)
|
||||||
|
imgRegex = regexp.MustCompile(`<img [^>]+>`)
|
||||||
)
|
)
|
||||||
|
|
||||||
func addImageTitle(entryURL, entryContent string) string {
|
func addImageTitle(entryURL, entryContent string) string {
|
||||||
|
@ -40,6 +41,68 @@ func addImageTitle(entryURL, entryContent string) string {
|
||||||
return entryContent
|
return entryContent
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func addDynamicImage(entryURL, entryContent string) string {
|
||||||
|
doc, err := goquery.NewDocumentFromReader(strings.NewReader(entryContent))
|
||||||
|
if err != nil {
|
||||||
|
return entryContent
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ordered most preferred to least preferred.
|
||||||
|
candidateAttrs := []string{
|
||||||
|
"data-src",
|
||||||
|
"data-original",
|
||||||
|
"data-orig",
|
||||||
|
"data-url",
|
||||||
|
"data-orig-file",
|
||||||
|
"data-large-file",
|
||||||
|
"data-medium-file",
|
||||||
|
"data-2000src",
|
||||||
|
"data-1000src",
|
||||||
|
"data-800src",
|
||||||
|
"data-655src",
|
||||||
|
"data-500src",
|
||||||
|
"data-380src",
|
||||||
|
}
|
||||||
|
|
||||||
|
changed := false
|
||||||
|
|
||||||
|
doc.Find("img,div").Each(func(i int, img *goquery.Selection) {
|
||||||
|
for _, candidateAttr := range candidateAttrs {
|
||||||
|
if srcAttr, found := img.Attr(candidateAttr); found {
|
||||||
|
changed = true
|
||||||
|
|
||||||
|
if img.Is("img") {
|
||||||
|
img.SetAttr("src",srcAttr)
|
||||||
|
} else {
|
||||||
|
altAttr := img.AttrOr("alt", "")
|
||||||
|
img.ReplaceWithHtml(`<img src="` + srcAttr + `" alt="` + altAttr + `"/>`)
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
if !changed {
|
||||||
|
doc.Find("noscript").Each(func(i int, noscript *goquery.Selection) {
|
||||||
|
matches := imgRegex.FindAllString(noscript.Text(), 2)
|
||||||
|
|
||||||
|
if len(matches) == 1 {
|
||||||
|
changed = true
|
||||||
|
|
||||||
|
noscript.ReplaceWithHtml(matches[0])
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
if changed {
|
||||||
|
output, _ := doc.Find("body").First().Html()
|
||||||
|
return output
|
||||||
|
}
|
||||||
|
|
||||||
|
return entryContent
|
||||||
|
}
|
||||||
|
|
||||||
func addYoutubeVideo(entryURL, entryContent string) string {
|
func addYoutubeVideo(entryURL, entryContent string) string {
|
||||||
matches := youtubeRegex.FindStringSubmatch(entryURL)
|
matches := youtubeRegex.FindStringSubmatch(entryURL)
|
||||||
|
|
||||||
|
|
|
@ -24,6 +24,8 @@ func Rewriter(entryURL, entryContent, customRewriteRules string) string {
|
||||||
switch strings.TrimSpace(rule) {
|
switch strings.TrimSpace(rule) {
|
||||||
case "add_image_title":
|
case "add_image_title":
|
||||||
entryContent = addImageTitle(entryURL, entryContent)
|
entryContent = addImageTitle(entryURL, entryContent)
|
||||||
|
case "add_dynamic_image":
|
||||||
|
entryContent = addDynamicImage(entryURL, entryContent)
|
||||||
case "add_youtube_video":
|
case "add_youtube_video":
|
||||||
entryContent = addYoutubeVideo(entryURL, entryContent)
|
entryContent = addYoutubeVideo(entryURL, entryContent)
|
||||||
case "add_pdf_download_link":
|
case "add_pdf_download_link":
|
||||||
|
|
|
@ -40,6 +40,7 @@ func TestRewriteWithXkcdLink(t *testing.T) {
|
||||||
t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
|
t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestRewriteWithXkcdLinkAndImageNoTitle(t *testing.T) {
|
func TestRewriteWithXkcdLinkAndImageNoTitle(t *testing.T) {
|
||||||
description := `<img src="https://imgs.xkcd.com/comics/thermostat.png" alt="Your problem is so terrible, I worry that, if I help you, I risk drawing the attention of whatever god of technology inflicted it on you." />`
|
description := `<img src="https://imgs.xkcd.com/comics/thermostat.png" alt="Your problem is so terrible, I worry that, if I help you, I risk drawing the attention of whatever god of technology inflicted it on you." />`
|
||||||
output := Rewriter("https://xkcd.com/1912/", description, ``)
|
output := Rewriter("https://xkcd.com/1912/", description, ``)
|
||||||
|
@ -48,6 +49,7 @@ func TestRewriteWithXkcdLinkAndImageNoTitle(t *testing.T) {
|
||||||
t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
|
t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestRewriteWithXkcdLinkAndNoImage(t *testing.T) {
|
func TestRewriteWithXkcdLinkAndNoImage(t *testing.T) {
|
||||||
description := "test"
|
description := "test"
|
||||||
output := Rewriter("https://xkcd.com/1912/", description, ``)
|
output := Rewriter("https://xkcd.com/1912/", description, ``)
|
||||||
|
@ -76,3 +78,43 @@ func TestRewriteWithPDFLink(t *testing.T) {
|
||||||
t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
|
t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestRewriteWithNoLazyImage(t *testing.T) {
|
||||||
|
description := `<img src="https://example.org/image.jpg" alt="Image"><noscript><p>Some text</p></noscript>`
|
||||||
|
output := Rewriter("https://example.org/article", description, "add_dynamic_image")
|
||||||
|
expected := description
|
||||||
|
|
||||||
|
if expected != output {
|
||||||
|
t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRewriteWithLazyImage(t *testing.T) {
|
||||||
|
description := `<img src="" data-url="https://example.org/image.jpg" alt="Image"><noscript><img src="https://example.org/fallback.jpg" alt="Fallback"></noscript>`
|
||||||
|
output := Rewriter("https://example.org/article", description, "add_dynamic_image")
|
||||||
|
expected := `<img src="https://example.org/image.jpg" data-url="https://example.org/image.jpg" alt="Image"/><noscript><img src="https://example.org/fallback.jpg" alt="Fallback"></noscript>`
|
||||||
|
|
||||||
|
if expected != output {
|
||||||
|
t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRewriteWithLazyDivImage(t *testing.T) {
|
||||||
|
description := `<div data-url="https://example.org/image.jpg" alt="Image"></div><noscript><img src="https://example.org/fallback.jpg" alt="Fallback"></noscript>`
|
||||||
|
output := Rewriter("https://example.org/article", description, "add_dynamic_image")
|
||||||
|
expected := `<img src="https://example.org/image.jpg" alt="Image"/><noscript><img src="https://example.org/fallback.jpg" alt="Fallback"></noscript>`
|
||||||
|
|
||||||
|
if expected != output {
|
||||||
|
t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRewriteWithUnknownLazyNoScriptImage(t *testing.T) {
|
||||||
|
description := `<img src="" data-non-candidate="https://example.org/image.jpg" alt="Image"><noscript><img src="https://example.org/fallback.jpg" alt="Fallback"></noscript>`
|
||||||
|
output := Rewriter("https://example.org/article", description, "add_dynamic_image")
|
||||||
|
expected := `<img src="" data-non-candidate="https://example.org/image.jpg" alt="Image"/><img src="https://example.org/fallback.jpg" alt="Fallback"/>`
|
||||||
|
|
||||||
|
if expected != output {
|
||||||
|
t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue