mirror of
https://github.com/miniflux/v2.git
synced 2025-06-27 16:36:00 +00:00
refactor(rewriter): avoid the use of regex in addDynamicImage
See https://dustri.org/b/parsing-noscript-tags-with-goquery.html for the whole story.
This commit is contained in:
parent
14a6e8ed3a
commit
cfda948c3a
2 changed files with 6 additions and 9 deletions
|
@ -23,7 +23,6 @@ var (
|
||||||
youtubeRegex = regexp.MustCompile(`youtube\.com/watch\?v=(.*)$`)
|
youtubeRegex = regexp.MustCompile(`youtube\.com/watch\?v=(.*)$`)
|
||||||
youtubeIdRegex = regexp.MustCompile(`youtube_id"?\s*[:=]\s*"([a-zA-Z0-9_-]{11})"`)
|
youtubeIdRegex = regexp.MustCompile(`youtube_id"?\s*[:=]\s*"([a-zA-Z0-9_-]{11})"`)
|
||||||
invidioRegex = regexp.MustCompile(`https?://(.*)/watch\?v=(.*)`)
|
invidioRegex = regexp.MustCompile(`https?://(.*)/watch\?v=(.*)`)
|
||||||
imgRegex = regexp.MustCompile(`<img [^>]+>`)
|
|
||||||
textLinkRegex = regexp.MustCompile(`(?mi)(\bhttps?:\/\/[-A-Z0-9+&@#\/%?=~_|!:,.;]*[-A-Z0-9+&@#\/%=~_|])`)
|
textLinkRegex = regexp.MustCompile(`(?mi)(\bhttps?:\/\/[-A-Z0-9+&@#\/%?=~_|!:,.;]*[-A-Z0-9+&@#\/%=~_|])`)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -84,10 +83,11 @@ func addMailtoSubject(entryContent string) string {
|
||||||
}
|
}
|
||||||
|
|
||||||
func addDynamicImage(entryContent string) string {
|
func addDynamicImage(entryContent string) string {
|
||||||
doc, err := goquery.NewDocumentFromReader(strings.NewReader(entryContent))
|
parserHtml, err := nethtml.ParseWithOptions(strings.NewReader(entryContent), nethtml.ParseOptionEnableScripting(false))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return entryContent
|
return entryContent
|
||||||
}
|
}
|
||||||
|
doc := goquery.NewDocumentFromNode(parserHtml)
|
||||||
|
|
||||||
// Ordered most preferred to least preferred.
|
// Ordered most preferred to least preferred.
|
||||||
candidateAttrs := []string{
|
candidateAttrs := []string{
|
||||||
|
@ -149,12 +149,9 @@ func addDynamicImage(entryContent string) string {
|
||||||
|
|
||||||
if !changed {
|
if !changed {
|
||||||
doc.Find("noscript").Each(func(i int, noscript *goquery.Selection) {
|
doc.Find("noscript").Each(func(i int, noscript *goquery.Selection) {
|
||||||
matches := imgRegex.FindAllString(noscript.Text(), 2)
|
if img := noscript.Find("img"); img.Length() == 1 {
|
||||||
|
img.Unwrap()
|
||||||
if len(matches) == 1 {
|
|
||||||
changed = true
|
changed = true
|
||||||
|
|
||||||
noscript.ReplaceWithHtml(matches[0])
|
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
|
@ -256,7 +256,7 @@ func TestRewriteWithNoLazyImage(t *testing.T) {
|
||||||
func TestRewriteWithLazyImage(t *testing.T) {
|
func TestRewriteWithLazyImage(t *testing.T) {
|
||||||
controlEntry := &model.Entry{
|
controlEntry := &model.Entry{
|
||||||
Title: `A title`,
|
Title: `A title`,
|
||||||
Content: `<img src="https://example.org/image.jpg" data-url="https://example.org/image.jpg" alt="Image"/><noscript><img src="https://example.org/fallback.jpg" alt="Fallback"></noscript>`,
|
Content: `<img src="https://example.org/image.jpg" data-url="https://example.org/image.jpg" alt="Image"/><noscript><img src="https://example.org/fallback.jpg" alt="Fallback"/></noscript>`,
|
||||||
}
|
}
|
||||||
testEntry := &model.Entry{
|
testEntry := &model.Entry{
|
||||||
Title: `A title`,
|
Title: `A title`,
|
||||||
|
@ -272,7 +272,7 @@ func TestRewriteWithLazyImage(t *testing.T) {
|
||||||
func TestRewriteWithLazyDivImage(t *testing.T) {
|
func TestRewriteWithLazyDivImage(t *testing.T) {
|
||||||
controlEntry := &model.Entry{
|
controlEntry := &model.Entry{
|
||||||
Title: `A title`,
|
Title: `A title`,
|
||||||
Content: `<img src="https://example.org/image.jpg" alt="Image"/><noscript><img src="https://example.org/fallback.jpg" alt="Fallback"></noscript>`,
|
Content: `<img src="https://example.org/image.jpg" alt="Image"/><noscript><img src="https://example.org/fallback.jpg" alt="Fallback"/></noscript>`,
|
||||||
}
|
}
|
||||||
testEntry := &model.Entry{
|
testEntry := &model.Entry{
|
||||||
Title: `A title`,
|
Title: `A title`,
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue