mirror of
https://github.com/miniflux/v2.git
synced 2025-07-02 16:38:37 +00:00
Add rewrite rule to remove dom elements
This commit is contained in:
parent
9fbcfc213b
commit
93596c1218
3 changed files with 112 additions and 38 deletions
|
@ -229,3 +229,15 @@ func replaceCustom(entryContent string, searchTerm string, replaceTerm string) s
|
||||||
}
|
}
|
||||||
return entryContent
|
return entryContent
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func removeCustom(entryContent string, selector string) string {
|
||||||
|
doc, err := goquery.NewDocumentFromReader(strings.NewReader(entryContent))
|
||||||
|
if err != nil {
|
||||||
|
return entryContent
|
||||||
|
}
|
||||||
|
|
||||||
|
doc.Find(selector).Remove()
|
||||||
|
|
||||||
|
output, _ := doc.Find("body").First().Html()
|
||||||
|
return output
|
||||||
|
}
|
||||||
|
|
|
@ -5,14 +5,18 @@
|
||||||
package rewrite // import "miniflux.app/reader/rewrite"
|
package rewrite // import "miniflux.app/reader/rewrite"
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"regexp"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
"text/scanner"
|
||||||
|
|
||||||
"miniflux.app/logger"
|
"miniflux.app/logger"
|
||||||
"miniflux.app/url"
|
"miniflux.app/url"
|
||||||
)
|
)
|
||||||
|
|
||||||
var customReplaceRuleRegex = regexp.MustCompile(`replace\("(.*)"\|"(.*)"\)`)
|
type rule struct {
|
||||||
|
name string
|
||||||
|
args []string
|
||||||
|
}
|
||||||
|
|
||||||
// Rewriter modify item contents with a set of rewriting rules.
|
// Rewriter modify item contents with a set of rewriting rules.
|
||||||
func Rewriter(entryURL, entryContent, customRewriteRules string) string {
|
func Rewriter(entryURL, entryContent, customRewriteRules string) string {
|
||||||
|
@ -21,46 +25,78 @@ func Rewriter(entryURL, entryContent, customRewriteRules string) string {
|
||||||
rulesList = customRewriteRules
|
rulesList = customRewriteRules
|
||||||
}
|
}
|
||||||
|
|
||||||
rules := strings.Split(rulesList, ",")
|
rules := parseRules(rulesList)
|
||||||
rules = append(rules, "add_pdf_download_link")
|
rules = append(rules, rule{name: "add_pdf_download_link"})
|
||||||
|
|
||||||
logger.Debug(`[Rewrite] Applying rules %v for %q`, rules, entryURL)
|
logger.Debug(`[Rewrite] Applying rules %v for %q`, rules, entryURL)
|
||||||
|
|
||||||
for _, rule := range rules {
|
for _, rule := range rules {
|
||||||
rule := strings.TrimSpace(rule)
|
entryContent = applyRule(entryURL, entryContent, rule)
|
||||||
switch rule {
|
}
|
||||||
case "add_image_title":
|
|
||||||
entryContent = addImageTitle(entryURL, entryContent)
|
return entryContent
|
||||||
case "add_mailto_subject":
|
}
|
||||||
entryContent = addMailtoSubject(entryURL, entryContent)
|
|
||||||
case "add_dynamic_image":
|
func parseRules(rulesText string) (rules []rule) {
|
||||||
entryContent = addDynamicImage(entryURL, entryContent)
|
scan := scanner.Scanner{Mode: scanner.ScanIdents | scanner.ScanStrings}
|
||||||
case "add_youtube_video":
|
scan.Init(strings.NewReader(rulesText))
|
||||||
entryContent = addYoutubeVideo(entryURL, entryContent)
|
|
||||||
case "add_invidious_video":
|
for {
|
||||||
entryContent = addInvidiousVideo(entryURL, entryContent)
|
switch scan.Scan() {
|
||||||
case "add_youtube_video_using_invidious_player":
|
case scanner.Ident:
|
||||||
entryContent = addYoutubeVideoUsingInvidiousPlayer(entryURL, entryContent)
|
rules = append(rules, rule{name: scan.TokenText()})
|
||||||
case "add_pdf_download_link":
|
|
||||||
entryContent = addPDFLink(entryURL, entryContent)
|
case scanner.String:
|
||||||
case "nl2br":
|
if l := len(rules) - 1; l >= 0 {
|
||||||
entryContent = replaceLineFeeds(entryContent)
|
text := scan.TokenText()
|
||||||
case "convert_text_link", "convert_text_links":
|
text, _ = strconv.Unquote(text)
|
||||||
entryContent = replaceTextLinks(entryContent)
|
|
||||||
case "fix_medium_images":
|
rules[l].args = append(rules[l].args, text)
|
||||||
entryContent = fixMediumImages(entryURL, entryContent)
|
|
||||||
case "use_noscript_figure_images":
|
|
||||||
entryContent = useNoScriptImages(entryURL, entryContent)
|
|
||||||
default:
|
|
||||||
if strings.Contains(rule, "replace") {
|
|
||||||
// Format: replace("search-term"|"replace-term")
|
|
||||||
args := customReplaceRuleRegex.FindStringSubmatch(rule)
|
|
||||||
if len(args) >= 3 {
|
|
||||||
entryContent = replaceCustom(entryContent, args[1], args[2])
|
|
||||||
} else {
|
|
||||||
logger.Debug("[Rewrite] Cannot find search and replace terms for replace rule %s", rule)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
case scanner.EOF:
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func applyRule(entryURL, entryContent string, rule rule) string {
|
||||||
|
switch rule.name {
|
||||||
|
case "add_image_title":
|
||||||
|
entryContent = addImageTitle(entryURL, entryContent)
|
||||||
|
case "add_mailto_subject":
|
||||||
|
entryContent = addMailtoSubject(entryURL, entryContent)
|
||||||
|
case "add_dynamic_image":
|
||||||
|
entryContent = addDynamicImage(entryURL, entryContent)
|
||||||
|
case "add_youtube_video":
|
||||||
|
entryContent = addYoutubeVideo(entryURL, entryContent)
|
||||||
|
case "add_invidious_video":
|
||||||
|
entryContent = addInvidiousVideo(entryURL, entryContent)
|
||||||
|
case "add_youtube_video_using_invidious_player":
|
||||||
|
entryContent = addYoutubeVideoUsingInvidiousPlayer(entryURL, entryContent)
|
||||||
|
case "add_pdf_download_link":
|
||||||
|
entryContent = addPDFLink(entryURL, entryContent)
|
||||||
|
case "nl2br":
|
||||||
|
entryContent = replaceLineFeeds(entryContent)
|
||||||
|
case "convert_text_link", "convert_text_links":
|
||||||
|
entryContent = replaceTextLinks(entryContent)
|
||||||
|
case "fix_medium_images":
|
||||||
|
entryContent = fixMediumImages(entryURL, entryContent)
|
||||||
|
case "use_noscript_figure_images":
|
||||||
|
entryContent = useNoScriptImages(entryURL, entryContent)
|
||||||
|
case "replace":
|
||||||
|
// Format: replace("search-term"|"replace-term")
|
||||||
|
if len(rule.args) >= 2 {
|
||||||
|
entryContent = replaceCustom(entryContent, rule.args[0], rule.args[1])
|
||||||
|
} else {
|
||||||
|
logger.Debug("[Rewrite] Cannot find search and replace terms for replace rule %s", rule)
|
||||||
|
}
|
||||||
|
case "remove":
|
||||||
|
// Format: remove("#selector > .element, .another")
|
||||||
|
if len(rule.args) >= 1 {
|
||||||
|
entryContent = removeCustom(entryContent, rule.args[0])
|
||||||
|
} else {
|
||||||
|
logger.Debug("[Rewrite] Cannot find selector for remove rule %s", rule)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -5,10 +5,26 @@
|
||||||
package rewrite // import "miniflux.app/reader/rewrite"
|
package rewrite // import "miniflux.app/reader/rewrite"
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"reflect"
|
||||||
"strings"
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
func TestParseRules(t *testing.T) {
|
||||||
|
rulesText := `add_dynamic_image,replace("article/(.*).svg"|"article/$1.png"),remove(".spam, .ads:not(.keep)")`
|
||||||
|
expected := []rule{
|
||||||
|
{name: "add_dynamic_image"},
|
||||||
|
{name: "replace", args: []string{"article/(.*).svg", "article/$1.png"}},
|
||||||
|
{name: "remove", args: []string{".spam, .ads:not(.keep)"}},
|
||||||
|
}
|
||||||
|
|
||||||
|
actual := parseRules(rulesText)
|
||||||
|
|
||||||
|
if !reflect.DeepEqual(expected, actual) {
|
||||||
|
t.Errorf(`Parsed rules do not match expected rules: got %v instead of %v`, actual, expected)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestReplaceTextLinks(t *testing.T) {
|
func TestReplaceTextLinks(t *testing.T) {
|
||||||
scenarios := map[string]string{
|
scenarios := map[string]string{
|
||||||
`This is a link to example.org`: `This is a link to example.org`,
|
`This is a link to example.org`: `This is a link to example.org`,
|
||||||
|
@ -234,7 +250,17 @@ func TestRewriteNoScriptImageWithNoScriptTag(t *testing.T) {
|
||||||
func TestRewriteReplaceCustom(t *testing.T) {
|
func TestRewriteReplaceCustom(t *testing.T) {
|
||||||
content := `<img src="http://example.org/logo.svg"><img src="https://example.org/article/picture.svg">`
|
content := `<img src="http://example.org/logo.svg"><img src="https://example.org/article/picture.svg">`
|
||||||
expected := `<img src="http://example.org/logo.svg"><img src="https://example.org/article/picture.png">`
|
expected := `<img src="http://example.org/logo.svg"><img src="https://example.org/article/picture.png">`
|
||||||
output := Rewriter("https://example.org/artcle", content, `replace("article/(.*).svg"|"article/$1.png")`)
|
output := Rewriter("https://example.org/article", content, `replace("article/(.*).svg"|"article/$1.png")`)
|
||||||
|
|
||||||
|
if expected != output {
|
||||||
|
t.Errorf(`Not expected output: %s`, output)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRewriteRemoveCustom(t *testing.T) {
|
||||||
|
content := `<div>Lorem Ipsum <span class="spam">I dont want to see this</span><span class="ads keep">Super important info</span></div>`
|
||||||
|
expected := `<div>Lorem Ipsum <span class="ads keep">Super important info</span></div>`
|
||||||
|
output := Rewriter("https://example.org/article", content, `remove(".spam, .ads:not(.keep)")`)
|
||||||
|
|
||||||
if expected != output {
|
if expected != output {
|
||||||
t.Errorf(`Not expected output: %s`, output)
|
t.Errorf(`Not expected output: %s`, output)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue