diff --git a/reader/rewrite/rewrite_functions.go b/reader/rewrite/rewrite_functions.go
index bd257c90..c01545c9 100644
--- a/reader/rewrite/rewrite_functions.go
+++ b/reader/rewrite/rewrite_functions.go
@@ -229,3 +229,15 @@ func replaceCustom(entryContent string, searchTerm string, replaceTerm string) s
}
return entryContent
}
+
+func removeCustom(entryContent string, selector string) string {
+ doc, err := goquery.NewDocumentFromReader(strings.NewReader(entryContent))
+ if err != nil {
+ return entryContent
+ }
+
+ doc.Find(selector).Remove()
+
+ output, _ := doc.Find("body").First().Html()
+ return output
+}
diff --git a/reader/rewrite/rewriter.go b/reader/rewrite/rewriter.go
index 77ac147d..27058b55 100644
--- a/reader/rewrite/rewriter.go
+++ b/reader/rewrite/rewriter.go
@@ -5,14 +5,18 @@
package rewrite // import "miniflux.app/reader/rewrite"
import (
- "regexp"
+ "strconv"
"strings"
+ "text/scanner"
"miniflux.app/logger"
"miniflux.app/url"
)
-var customReplaceRuleRegex = regexp.MustCompile(`replace\("(.*)"\|"(.*)"\)`)
+type rule struct {
+ name string
+ args []string
+}
// Rewriter modify item contents with a set of rewriting rules.
func Rewriter(entryURL, entryContent, customRewriteRules string) string {
@@ -21,46 +25,78 @@ func Rewriter(entryURL, entryContent, customRewriteRules string) string {
rulesList = customRewriteRules
}
- rules := strings.Split(rulesList, ",")
- rules = append(rules, "add_pdf_download_link")
+ rules := parseRules(rulesList)
+ rules = append(rules, rule{name: "add_pdf_download_link"})
logger.Debug(`[Rewrite] Applying rules %v for %q`, rules, entryURL)
for _, rule := range rules {
- rule := strings.TrimSpace(rule)
- switch rule {
- case "add_image_title":
- entryContent = addImageTitle(entryURL, entryContent)
- case "add_mailto_subject":
- entryContent = addMailtoSubject(entryURL, entryContent)
- case "add_dynamic_image":
- entryContent = addDynamicImage(entryURL, entryContent)
- case "add_youtube_video":
- entryContent = addYoutubeVideo(entryURL, entryContent)
- case "add_invidious_video":
- entryContent = addInvidiousVideo(entryURL, entryContent)
- case "add_youtube_video_using_invidious_player":
- entryContent = addYoutubeVideoUsingInvidiousPlayer(entryURL, entryContent)
- case "add_pdf_download_link":
- entryContent = addPDFLink(entryURL, entryContent)
- case "nl2br":
- entryContent = replaceLineFeeds(entryContent)
- case "convert_text_link", "convert_text_links":
- entryContent = replaceTextLinks(entryContent)
- case "fix_medium_images":
- entryContent = fixMediumImages(entryURL, entryContent)
- case "use_noscript_figure_images":
- entryContent = useNoScriptImages(entryURL, entryContent)
- default:
- if strings.Contains(rule, "replace") {
- // Format: replace("search-term"|"replace-term")
- args := customReplaceRuleRegex.FindStringSubmatch(rule)
- if len(args) >= 3 {
- entryContent = replaceCustom(entryContent, args[1], args[2])
- } else {
- logger.Debug("[Rewrite] Cannot find search and replace terms for replace rule %s", rule)
- }
+ entryContent = applyRule(entryURL, entryContent, rule)
+ }
+
+ return entryContent
+}
+
+func parseRules(rulesText string) (rules []rule) {
+ scan := scanner.Scanner{Mode: scanner.ScanIdents | scanner.ScanStrings}
+ scan.Init(strings.NewReader(rulesText))
+
+ for {
+ switch scan.Scan() {
+ case scanner.Ident:
+ rules = append(rules, rule{name: scan.TokenText()})
+
+ case scanner.String:
+ if l := len(rules) - 1; l >= 0 {
+ text := scan.TokenText()
+ text, _ = strconv.Unquote(text)
+
+ rules[l].args = append(rules[l].args, text)
}
+
+ case scanner.EOF:
+ return
+ }
+ }
+}
+
+func applyRule(entryURL, entryContent string, rule rule) string {
+ switch rule.name {
+ case "add_image_title":
+ entryContent = addImageTitle(entryURL, entryContent)
+ case "add_mailto_subject":
+ entryContent = addMailtoSubject(entryURL, entryContent)
+ case "add_dynamic_image":
+ entryContent = addDynamicImage(entryURL, entryContent)
+ case "add_youtube_video":
+ entryContent = addYoutubeVideo(entryURL, entryContent)
+ case "add_invidious_video":
+ entryContent = addInvidiousVideo(entryURL, entryContent)
+ case "add_youtube_video_using_invidious_player":
+ entryContent = addYoutubeVideoUsingInvidiousPlayer(entryURL, entryContent)
+ case "add_pdf_download_link":
+ entryContent = addPDFLink(entryURL, entryContent)
+ case "nl2br":
+ entryContent = replaceLineFeeds(entryContent)
+ case "convert_text_link", "convert_text_links":
+ entryContent = replaceTextLinks(entryContent)
+ case "fix_medium_images":
+ entryContent = fixMediumImages(entryURL, entryContent)
+ case "use_noscript_figure_images":
+ entryContent = useNoScriptImages(entryURL, entryContent)
+ case "replace":
+ // Format: replace("search-term"|"replace-term")
+ if len(rule.args) >= 2 {
+ entryContent = replaceCustom(entryContent, rule.args[0], rule.args[1])
+ } else {
+ logger.Debug("[Rewrite] Cannot find search and replace terms for replace rule %s", rule)
+ }
+ case "remove":
+ // Format: remove("#selector > .element, .another")
+ if len(rule.args) >= 1 {
+ entryContent = removeCustom(entryContent, rule.args[0])
+ } else {
+ logger.Debug("[Rewrite] Cannot find selector for remove rule %s", rule)
}
}
diff --git a/reader/rewrite/rewriter_test.go b/reader/rewrite/rewriter_test.go
index aebaf9f9..7f63473e 100644
--- a/reader/rewrite/rewriter_test.go
+++ b/reader/rewrite/rewriter_test.go
@@ -5,10 +5,26 @@
package rewrite // import "miniflux.app/reader/rewrite"
import (
+ "reflect"
"strings"
"testing"
)
+func TestParseRules(t *testing.T) {
+ rulesText := `add_dynamic_image,replace("article/(.*).svg"|"article/$1.png"),remove(".spam, .ads:not(.keep)")`
+ expected := []rule{
+ {name: "add_dynamic_image"},
+ {name: "replace", args: []string{"article/(.*).svg", "article/$1.png"}},
+ {name: "remove", args: []string{".spam, .ads:not(.keep)"}},
+ }
+
+ actual := parseRules(rulesText)
+
+ if !reflect.DeepEqual(expected, actual) {
+ t.Errorf(`Parsed rules do not match expected rules: got %v instead of %v`, actual, expected)
+ }
+}
+
func TestReplaceTextLinks(t *testing.T) {
scenarios := map[string]string{
`This is a link to example.org`: `This is a link to example.org`,
@@ -234,7 +250,17 @@ func TestRewriteNoScriptImageWithNoScriptTag(t *testing.T) {
func TestRewriteReplaceCustom(t *testing.T) {
content := ``
expected := `
`
- output := Rewriter("https://example.org/artcle", content, `replace("article/(.*).svg"|"article/$1.png")`)
+ output := Rewriter("https://example.org/article", content, `replace("article/(.*).svg"|"article/$1.png")`)
+
+ if expected != output {
+ t.Errorf(`Not expected output: %s`, output)
+ }
+}
+
+func TestRewriteRemoveCustom(t *testing.T) {
+ content := `