1
0
Fork 0
mirror of https://github.com/miniflux/v2.git synced 2025-07-02 16:38:37 +00:00

New add_dynamic_image rewriter for JavaScript-loaded images.

Searches tags for various `data-*` attributes and sets `img` tag `src` attribute appropriately. Falls back to searching `noscript` for `img` tags.

Includes unit tests.
This commit is contained in:
dzaikos 2018-07-09 01:22:48 -04:00
parent 8ee4280461
commit 6d25e02cb5
3 changed files with 107 additions and 0 deletions

View file

@ -14,6 +14,7 @@ import (
var (
youtubeRegex = regexp.MustCompile(`youtube\.com/watch\?v=(.*)`)
imgRegex = regexp.MustCompile(`<img [^>]+>`)
)
func addImageTitle(entryURL, entryContent string) string {
@ -40,6 +41,68 @@ func addImageTitle(entryURL, entryContent string) string {
return entryContent
}
func addDynamicImage(entryURL, entryContent string) string {
doc, err := goquery.NewDocumentFromReader(strings.NewReader(entryContent))
if err != nil {
return entryContent
}
// Ordered most preferred to least preferred.
candidateAttrs := []string{
"data-src",
"data-original",
"data-orig",
"data-url",
"data-orig-file",
"data-large-file",
"data-medium-file",
"data-2000src",
"data-1000src",
"data-800src",
"data-655src",
"data-500src",
"data-380src",
}
changed := false
doc.Find("img,div").Each(func(i int, img *goquery.Selection) {
for _, candidateAttr := range candidateAttrs {
if srcAttr, found := img.Attr(candidateAttr); found {
changed = true
if img.Is("img") {
img.SetAttr("src",srcAttr)
} else {
altAttr := img.AttrOr("alt", "")
img.ReplaceWithHtml(`<img src="` + srcAttr + `" alt="` + altAttr + `"/>`)
}
break;
}
}
})
if !changed {
doc.Find("noscript").Each(func(i int, noscript *goquery.Selection) {
matches := imgRegex.FindAllString(noscript.Text(), 2)
if len(matches) == 1 {
changed = true
noscript.ReplaceWithHtml(matches[0])
}
})
}
if changed {
output, _ := doc.Find("body").First().Html()
return output
}
return entryContent
}
func addYoutubeVideo(entryURL, entryContent string) string {
matches := youtubeRegex.FindStringSubmatch(entryURL)