1
0
Fork 0
mirror of https://github.com/miniflux/v2.git synced 2025-08-01 17:38:37 +00:00

Move internal packages to an internal folder

For reference: https://go.dev/doc/go1.4#internalpackages
This commit is contained in:
Frédéric Guillot 2023-08-10 19:46:45 -07:00
parent c234903255
commit 168a870c02
433 changed files with 1121 additions and 1123 deletions

View file

@ -0,0 +1,383 @@
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
package rewrite // import "miniflux.app/v2/internal/reader/rewrite"
import (
"encoding/base64"
"fmt"
"html"
"net/url"
"regexp"
"strings"
"miniflux.app/v2/internal/config"
"github.com/PuerkitoBio/goquery"
"github.com/yuin/goldmark"
goldmarkhtml "github.com/yuin/goldmark/renderer/html"
)
var (
youtubeRegex = regexp.MustCompile(`youtube\.com/watch\?v=(.*)`)
youtubeIdRegex = regexp.MustCompile(`youtube_id"?\s*[:=]\s*"([a-zA-Z0-9_-]{11})"`)
invidioRegex = regexp.MustCompile(`https?:\/\/(.*)\/watch\?v=(.*)`)
imgRegex = regexp.MustCompile(`<img [^>]+>`)
textLinkRegex = regexp.MustCompile(`(?mi)(\bhttps?:\/\/[-A-Z0-9+&@#\/%?=~_|!:,.;]*[-A-Z0-9+&@#\/%=~_|])`)
)
func addImageTitle(entryURL, entryContent string) string {
doc, err := goquery.NewDocumentFromReader(strings.NewReader(entryContent))
if err != nil {
return entryContent
}
matches := doc.Find("img[src][title]")
if matches.Length() > 0 {
matches.Each(func(i int, img *goquery.Selection) {
altAttr := img.AttrOr("alt", "")
srcAttr, _ := img.Attr("src")
titleAttr, _ := img.Attr("title")
img.ReplaceWithHtml(`<figure><img src="` + srcAttr + `" alt="` + altAttr + `"/><figcaption><p>` + html.EscapeString(titleAttr) + `</p></figcaption></figure>`)
})
output, _ := doc.Find("body").First().Html()
return output
}
return entryContent
}
func addMailtoSubject(entryURL, entryContent string) string {
doc, err := goquery.NewDocumentFromReader(strings.NewReader(entryContent))
if err != nil {
return entryContent
}
matches := doc.Find(`a[href^="mailto:"]`)
if matches.Length() > 0 {
matches.Each(func(i int, a *goquery.Selection) {
hrefAttr, _ := a.Attr("href")
mailto, err := url.Parse(hrefAttr)
if err != nil {
return
}
subject := mailto.Query().Get("subject")
if subject == "" {
return
}
a.AppendHtml(" [" + html.EscapeString(subject) + "]")
})
output, _ := doc.Find("body").First().Html()
return output
}
return entryContent
}
func addDynamicImage(entryURL, entryContent string) string {
doc, err := goquery.NewDocumentFromReader(strings.NewReader(entryContent))
if err != nil {
return entryContent
}
// Ordered most preferred to least preferred.
candidateAttrs := []string{
"data-src",
"data-original",
"data-orig",
"data-url",
"data-orig-file",
"data-large-file",
"data-medium-file",
"data-2000src",
"data-1000src",
"data-800src",
"data-655src",
"data-500src",
"data-380src",
}
candidateSrcsetAttrs := []string{
"data-srcset",
}
changed := false
doc.Find("img,div").Each(func(i int, img *goquery.Selection) {
// Src-linked candidates
for _, candidateAttr := range candidateAttrs {
if srcAttr, found := img.Attr(candidateAttr); found {
changed = true
if img.Is("img") {
img.SetAttr("src", srcAttr)
} else {
altAttr := img.AttrOr("alt", "")
img.ReplaceWithHtml(`<img src="` + srcAttr + `" alt="` + altAttr + `"/>`)
}
break
}
}
// Srcset-linked candidates
for _, candidateAttr := range candidateSrcsetAttrs {
if srcAttr, found := img.Attr(candidateAttr); found {
changed = true
if img.Is("img") {
img.SetAttr("srcset", srcAttr)
} else {
altAttr := img.AttrOr("alt", "")
img.ReplaceWithHtml(`<img srcset="` + srcAttr + `" alt="` + altAttr + `"/>`)
}
break
}
}
})
if !changed {
doc.Find("noscript").Each(func(i int, noscript *goquery.Selection) {
matches := imgRegex.FindAllString(noscript.Text(), 2)
if len(matches) == 1 {
changed = true
noscript.ReplaceWithHtml(matches[0])
}
})
}
if changed {
output, _ := doc.Find("body").First().Html()
return output
}
return entryContent
}
func fixMediumImages(entryURL, entryContent string) string {
doc, err := goquery.NewDocumentFromReader(strings.NewReader(entryContent))
if err != nil {
return entryContent
}
doc.Find("figure.paragraph-image").Each(func(i int, paragraphImage *goquery.Selection) {
noscriptElement := paragraphImage.Find("noscript")
if noscriptElement.Length() > 0 {
paragraphImage.ReplaceWithHtml(noscriptElement.Text())
}
})
output, _ := doc.Find("body").First().Html()
return output
}
func useNoScriptImages(entryURL, entryContent string) string {
doc, err := goquery.NewDocumentFromReader(strings.NewReader(entryContent))
if err != nil {
return entryContent
}
doc.Find("figure").Each(func(i int, figureElement *goquery.Selection) {
imgElement := figureElement.Find("img")
if imgElement.Length() > 0 {
noscriptElement := figureElement.Find("noscript")
if noscriptElement.Length() > 0 {
figureElement.PrependHtml(noscriptElement.Text())
imgElement.Remove()
noscriptElement.Remove()
}
}
})
output, _ := doc.Find("body").First().Html()
return output
}
func addYoutubeVideo(entryURL, entryContent string) string {
matches := youtubeRegex.FindStringSubmatch(entryURL)
if len(matches) == 2 {
video := `<iframe width="650" height="350" frameborder="0" src="` + config.Opts.YouTubeEmbedUrlOverride() + matches[1] + `" allowfullscreen></iframe>`
return video + `<br>` + entryContent
}
return entryContent
}
func addYoutubeVideoUsingInvidiousPlayer(entryURL, entryContent string) string {
matches := youtubeRegex.FindStringSubmatch(entryURL)
if len(matches) == 2 {
video := `<iframe width="650" height="350" frameborder="0" src="https://` + config.Opts.InvidiousInstance() + `/embed/` + matches[1] + `" allowfullscreen></iframe>`
return video + `<br>` + entryContent
}
return entryContent
}
func addYoutubeVideoFromId(entryContent string) string {
matches := youtubeIdRegex.FindAllStringSubmatch(entryContent, -1)
if matches == nil {
return entryContent
}
sb := strings.Builder{}
for _, match := range matches {
if len(match) == 2 {
sb.WriteString(`<iframe width="650" height="350" frameborder="0" src="`)
sb.WriteString(config.Opts.YouTubeEmbedUrlOverride())
sb.WriteString(match[1])
sb.WriteString(`" allowfullscreen></iframe><br>`)
}
}
sb.WriteString(entryContent)
return sb.String()
}
func addInvidiousVideo(entryURL, entryContent string) string {
matches := invidioRegex.FindStringSubmatch(entryURL)
if len(matches) == 3 {
video := `<iframe width="650" height="350" frameborder="0" src="https://` + matches[1] + `/embed/` + matches[2] + `" allowfullscreen></iframe>`
return video + `<br>` + entryContent
}
return entryContent
}
func addPDFLink(entryURL, entryContent string) string {
if strings.HasSuffix(entryURL, ".pdf") {
return fmt.Sprintf(`<a href="%s">PDF</a><br>%s`, entryURL, entryContent)
}
return entryContent
}
func replaceTextLinks(input string) string {
return textLinkRegex.ReplaceAllString(input, `<a href="${1}">${1}</a>`)
}
func replaceLineFeeds(input string) string {
return strings.Replace(input, "\n", "<br>", -1)
}
func replaceCustom(entryContent string, searchTerm string, replaceTerm string) string {
re, err := regexp.Compile(searchTerm)
if err == nil {
return re.ReplaceAllString(entryContent, replaceTerm)
}
return entryContent
}
func removeCustom(entryContent string, selector string) string {
doc, err := goquery.NewDocumentFromReader(strings.NewReader(entryContent))
if err != nil {
return entryContent
}
doc.Find(selector).Remove()
output, _ := doc.Find("body").First().Html()
return output
}
func addCastopodEpisode(entryURL, entryContent string) string {
player := `<iframe width="650" frameborder="0" src="` + entryURL + `/embed/light"></iframe>`
return player + `<br>` + entryContent
}
func applyFuncOnTextContent(entryContent string, selector string, repl func(string) string) string {
var treatChildren func(i int, s *goquery.Selection)
treatChildren = func(i int, s *goquery.Selection) {
if s.Nodes[0].Type == 1 {
s.ReplaceWithHtml(repl(s.Nodes[0].Data))
} else {
s.Contents().Each(treatChildren)
}
}
doc, err := goquery.NewDocumentFromReader(strings.NewReader(entryContent))
if err != nil {
return entryContent
}
doc.Find(selector).Each(treatChildren)
output, _ := doc.Find("body").First().Html()
return output
}
func decodeBase64Content(entryContent string) string {
if ret, err := base64.StdEncoding.DecodeString(strings.TrimSpace(entryContent)); err != nil {
return entryContent
} else {
return html.EscapeString(string(ret))
}
}
func parseMarkdown(entryContent string) string {
var sb strings.Builder
md := goldmark.New(
goldmark.WithRendererOptions(
goldmarkhtml.WithUnsafe(),
),
)
if err := md.Convert([]byte(entryContent), &sb); err != nil {
return entryContent
}
return sb.String()
}
func removeTables(entryContent string) string {
doc, err := goquery.NewDocumentFromReader(strings.NewReader(entryContent))
if err != nil {
return entryContent
}
selectors := []string{"table", "tbody", "thead", "td", "th", "td"}
var loopElement *goquery.Selection
for _, selector := range selectors {
for {
loopElement = doc.Find(selector).First()
if loopElement.Length() == 0 {
break
}
innerHtml, err := loopElement.Html()
if err != nil {
break
}
loopElement.Parent().AppendHtml(innerHtml)
loopElement.Remove()
}
}
output, _ := doc.Find("body").First().Html()
return output
}
func removeClickbait(entryTitle string) string {
titleWords := []string{}
for _, word := range strings.Fields(entryTitle) {
runes := []rune(word)
if len(runes) > 1 {
// keep first rune as is to keep the first capital letter
titleWords = append(titleWords, string([]rune{runes[0]})+strings.ToLower(string(runes[1:])))
} else {
titleWords = append(titleWords, word)
}
}
return strings.Join(titleWords, " ")
}

View file

@ -0,0 +1,127 @@
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
package rewrite // import "miniflux.app/v2/internal/reader/rewrite"
import (
"strconv"
"strings"
"text/scanner"
"miniflux.app/v2/internal/logger"
"miniflux.app/v2/internal/model"
"miniflux.app/v2/internal/url"
)
type rule struct {
name string
args []string
}
// Rewriter modify item contents with a set of rewriting rules.
func Rewriter(entryURL string, entry *model.Entry, customRewriteRules string) {
rulesList := getPredefinedRewriteRules(entryURL)
if customRewriteRules != "" {
rulesList = customRewriteRules
}
rules := parseRules(rulesList)
rules = append(rules, rule{name: "add_pdf_download_link"})
logger.Debug(`[Rewrite] Applying rules %v for %q`, rules, entryURL)
for _, rule := range rules {
applyRule(entryURL, entry, rule)
}
}
func parseRules(rulesText string) (rules []rule) {
scan := scanner.Scanner{Mode: scanner.ScanIdents | scanner.ScanStrings}
scan.Init(strings.NewReader(rulesText))
for {
switch scan.Scan() {
case scanner.Ident:
rules = append(rules, rule{name: scan.TokenText()})
case scanner.String:
if l := len(rules) - 1; l >= 0 {
text := scan.TokenText()
text, _ = strconv.Unquote(text)
rules[l].args = append(rules[l].args, text)
}
case scanner.EOF:
return
}
}
}
func applyRule(entryURL string, entry *model.Entry, rule rule) {
switch rule.name {
case "add_image_title":
entry.Content = addImageTitle(entryURL, entry.Content)
case "add_mailto_subject":
entry.Content = addMailtoSubject(entryURL, entry.Content)
case "add_dynamic_image":
entry.Content = addDynamicImage(entryURL, entry.Content)
case "add_youtube_video":
entry.Content = addYoutubeVideo(entryURL, entry.Content)
case "add_invidious_video":
entry.Content = addInvidiousVideo(entryURL, entry.Content)
case "add_youtube_video_using_invidious_player":
entry.Content = addYoutubeVideoUsingInvidiousPlayer(entryURL, entry.Content)
case "add_youtube_video_from_id":
entry.Content = addYoutubeVideoFromId(entry.Content)
case "add_pdf_download_link":
entry.Content = addPDFLink(entryURL, entry.Content)
case "nl2br":
entry.Content = replaceLineFeeds(entry.Content)
case "convert_text_link", "convert_text_links":
entry.Content = replaceTextLinks(entry.Content)
case "fix_medium_images":
entry.Content = fixMediumImages(entryURL, entry.Content)
case "use_noscript_figure_images":
entry.Content = useNoScriptImages(entryURL, entry.Content)
case "replace":
// Format: replace("search-term"|"replace-term")
if len(rule.args) >= 2 {
entry.Content = replaceCustom(entry.Content, rule.args[0], rule.args[1])
} else {
logger.Debug("[Rewrite] Cannot find search and replace terms for replace rule %s", rule)
}
case "remove":
// Format: remove("#selector > .element, .another")
if len(rule.args) >= 1 {
entry.Content = removeCustom(entry.Content, rule.args[0])
} else {
logger.Debug("[Rewrite] Cannot find selector for remove rule %s", rule)
}
case "add_castopod_episode":
entry.Content = addCastopodEpisode(entryURL, entry.Content)
case "base64_decode":
if len(rule.args) >= 1 {
entry.Content = applyFuncOnTextContent(entry.Content, rule.args[0], decodeBase64Content)
} else {
entry.Content = applyFuncOnTextContent(entry.Content, "body", decodeBase64Content)
}
case "parse_markdown":
entry.Content = parseMarkdown(entry.Content)
case "remove_tables":
entry.Content = removeTables(entry.Content)
case "remove_clickbait":
entry.Title = removeClickbait(entry.Title)
}
}
func getPredefinedRewriteRules(entryURL string) string {
urlDomain := url.Domain(entryURL)
for domain, rules := range predefinedRules {
if strings.Contains(urlDomain, domain) {
return rules
}
}
return ""
}

View file

@ -0,0 +1,563 @@
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
package rewrite // import "miniflux.app/v2/internal/reader/rewrite"
import (
"os"
"reflect"
"strings"
"testing"
"miniflux.app/v2/internal/config"
"miniflux.app/v2/internal/model"
)
func TestParseRules(t *testing.T) {
rulesText := `add_dynamic_image,replace("article/(.*).svg"|"article/$1.png"),remove(".spam, .ads:not(.keep)")`
expected := []rule{
{name: "add_dynamic_image"},
{name: "replace", args: []string{"article/(.*).svg", "article/$1.png"}},
{name: "remove", args: []string{".spam, .ads:not(.keep)"}},
}
actual := parseRules(rulesText)
if !reflect.DeepEqual(expected, actual) {
t.Errorf(`Parsed rules do not match expected rules: got %v instead of %v`, actual, expected)
}
}
func TestReplaceTextLinks(t *testing.T) {
scenarios := map[string]string{
`This is a link to example.org`: `This is a link to example.org`,
`This is a link to ftp://example.org`: `This is a link to ftp://example.org`,
`This is a link to www.example.org`: `This is a link to www.example.org`,
`This is a link to http://example.org`: `This is a link to <a href="http://example.org">http://example.org</a>`,
`This is a link to http://example.org, end of sentence.`: `This is a link to <a href="http://example.org">http://example.org</a>, end of sentence.`,
`This is a link to https://example.org`: `This is a link to <a href="https://example.org">https://example.org</a>`,
`This is a link to https://www.example.org/path/to?q=s`: `This is a link to <a href="https://www.example.org/path/to?q=s">https://www.example.org/path/to?q=s</a>`,
`This is a link to https://example.org/index#hash-tag, http://example.org/.`: `This is a link to <a href="https://example.org/index#hash-tag">https://example.org/index#hash-tag</a>, <a href="http://example.org/">http://example.org/</a>.`,
}
for input, expected := range scenarios {
actual := replaceTextLinks(input)
if actual != expected {
t.Errorf(`Unexpected link replacement, got "%s" instead of "%s"`, actual, expected)
}
}
}
func TestRewriteWithNoMatchingRule(t *testing.T) {
controlEntry := &model.Entry{
Title: `A title`,
Content: `Some text.`,
}
testEntry := &model.Entry{
Title: `A title`,
Content: `Some text.`,
}
Rewriter("https://example.org/article", testEntry, ``)
if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
}
}
func TestRewriteWithYoutubeLink(t *testing.T) {
config.Opts = config.NewOptions()
controlEntry := &model.Entry{
Title: `A title`,
Content: `<iframe width="650" height="350" frameborder="0" src="https://www.youtube-nocookie.com/embed/1234" allowfullscreen></iframe><br>Video Description`,
}
testEntry := &model.Entry{
Title: `A title`,
Content: `Video Description`,
}
Rewriter("https://www.youtube.com/watch?v=1234", testEntry, ``)
if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
}
}
func TestRewriteWithYoutubeLinkAndCustomEmbedURL(t *testing.T) {
os.Clearenv()
os.Setenv("YOUTUBE_EMBED_URL_OVERRIDE", "https://invidious.custom/embed/")
var err error
parser := config.NewParser()
config.Opts, err = parser.ParseEnvironmentVariables()
if err != nil {
t.Fatalf(`Parsing failure: %v`, err)
}
controlEntry := &model.Entry{
Title: `A title`,
Content: `<iframe width="650" height="350" frameborder="0" src="https://invidious.custom/embed/1234" allowfullscreen></iframe><br>Video Description`,
}
testEntry := &model.Entry{
Title: `A title`,
Content: `Video Description`,
}
Rewriter("https://www.youtube.com/watch?v=1234", testEntry, ``)
if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
}
}
func TestRewriteWithInexistingCustomRule(t *testing.T) {
controlEntry := &model.Entry{
Title: `A title`,
Content: `Video Description`,
}
testEntry := &model.Entry{
Title: `A title`,
Content: `Video Description`,
}
Rewriter("https://www.youtube.com/watch?v=1234", testEntry, `some rule`)
if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
}
}
func TestRewriteWithXkcdLink(t *testing.T) {
controlEntry := &model.Entry{
Title: `A title`,
Content: `<figure><img src="https://imgs.xkcd.com/comics/thermostat.png" alt="Your problem is so terrible, I worry that, if I help you, I risk drawing the attention of whatever god of technology inflicted it on you."/><figcaption><p>Your problem is so terrible, I worry that, if I help you, I risk drawing the attention of whatever god of technology inflicted it on you.</p></figcaption></figure>`,
}
testEntry := &model.Entry{
Title: `A title`,
Content: `<img src="https://imgs.xkcd.com/comics/thermostat.png" title="Your problem is so terrible, I worry that, if I help you, I risk drawing the attention of whatever god of technology inflicted it on you." alt="Your problem is so terrible, I worry that, if I help you, I risk drawing the attention of whatever god of technology inflicted it on you." />`,
}
Rewriter("https://xkcd.com/1912/", testEntry, ``)
if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
}
}
func TestRewriteWithXkcdLinkHtmlInjection(t *testing.T) {
controlEntry := &model.Entry{
Title: `A title`,
Content: `<figure><img src="https://imgs.xkcd.com/comics/thermostat.png" alt="&lt;foo&gt;"/><figcaption><p>&lt;foo&gt;</p></figcaption></figure>`,
}
testEntry := &model.Entry{
Title: `A title`,
Content: `<img src="https://imgs.xkcd.com/comics/thermostat.png" title="<foo>" alt="<foo>" />`,
}
Rewriter("https://xkcd.com/1912/", testEntry, ``)
if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
}
}
func TestRewriteWithXkcdLinkAndImageNoTitle(t *testing.T) {
controlEntry := &model.Entry{
Title: `A title`,
Content: `<img src="https://imgs.xkcd.com/comics/thermostat.png" alt="Your problem is so terrible, I worry that, if I help you, I risk drawing the attention of whatever god of technology inflicted it on you." />`,
}
testEntry := &model.Entry{
Title: `A title`,
Content: `<img src="https://imgs.xkcd.com/comics/thermostat.png" alt="Your problem is so terrible, I worry that, if I help you, I risk drawing the attention of whatever god of technology inflicted it on you." />`,
}
Rewriter("https://xkcd.com/1912/", testEntry, ``)
if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
}
}
func TestRewriteWithXkcdLinkAndNoImage(t *testing.T) {
controlEntry := &model.Entry{
Title: `A title`,
Content: `test`,
}
testEntry := &model.Entry{
Title: `A title`,
Content: `test`,
}
Rewriter("https://xkcd.com/1912/", testEntry, ``)
if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
}
}
func TestRewriteWithXkcdAndNoImage(t *testing.T) {
controlEntry := &model.Entry{
Title: `A title`,
Content: `test`,
}
testEntry := &model.Entry{
Title: `A title`,
Content: `test`,
}
Rewriter("https://xkcd.com/1912/", testEntry, ``)
if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
}
}
func TestRewriteMailtoLink(t *testing.T) {
controlEntry := &model.Entry{
Title: `A title`,
Content: `<a href="mailto:ryan@qwantz.com?subject=blah%20blah">contact [blah blah]</a>`,
}
testEntry := &model.Entry{
Title: `A title`,
Content: `<a href="mailto:ryan@qwantz.com?subject=blah%20blah">contact</a>`,
}
Rewriter("https://www.qwantz.com/", testEntry, ``)
if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
}
}
func TestRewriteWithPDFLink(t *testing.T) {
controlEntry := &model.Entry{
Title: `A title`,
Content: `<a href="https://example.org/document.pdf">PDF</a><br>test`,
}
testEntry := &model.Entry{
Title: `A title`,
Content: `test`,
}
Rewriter("https://example.org/document.pdf", testEntry, ``)
if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
}
}
func TestRewriteWithNoLazyImage(t *testing.T) {
controlEntry := &model.Entry{
Title: `A title`,
Content: `<img src="https://example.org/image.jpg" alt="Image"><noscript><p>Some text</p></noscript>`,
}
testEntry := &model.Entry{
Title: `A title`,
Content: `<img src="https://example.org/image.jpg" alt="Image"><noscript><p>Some text</p></noscript>`,
}
Rewriter("https://example.org/article", testEntry, "add_dynamic_image")
if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
}
}
func TestRewriteWithLazyImage(t *testing.T) {
controlEntry := &model.Entry{
Title: `A title`,
Content: `<img src="https://example.org/image.jpg" data-url="https://example.org/image.jpg" alt="Image"/><noscript><img src="https://example.org/fallback.jpg" alt="Fallback"></noscript>`,
}
testEntry := &model.Entry{
Title: `A title`,
Content: `<img src="" data-url="https://example.org/image.jpg" alt="Image"><noscript><img src="https://example.org/fallback.jpg" alt="Fallback"></noscript>`,
}
Rewriter("https://example.org/article", testEntry, "add_dynamic_image")
if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
}
}
func TestRewriteWithLazyDivImage(t *testing.T) {
controlEntry := &model.Entry{
Title: `A title`,
Content: `<img src="https://example.org/image.jpg" alt="Image"/><noscript><img src="https://example.org/fallback.jpg" alt="Fallback"></noscript>`,
}
testEntry := &model.Entry{
Title: `A title`,
Content: `<div data-url="https://example.org/image.jpg" alt="Image"></div><noscript><img src="https://example.org/fallback.jpg" alt="Fallback"></noscript>`,
}
Rewriter("https://example.org/article", testEntry, "add_dynamic_image")
if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
}
}
func TestRewriteWithUnknownLazyNoScriptImage(t *testing.T) {
controlEntry := &model.Entry{
Title: `A title`,
Content: `<img src="" data-non-candidate="https://example.org/image.jpg" alt="Image"/><img src="https://example.org/fallback.jpg" alt="Fallback"/>`,
}
testEntry := &model.Entry{
Title: `A title`,
Content: `<img src="" data-non-candidate="https://example.org/image.jpg" alt="Image"><noscript><img src="https://example.org/fallback.jpg" alt="Fallback"></noscript>`,
}
Rewriter("https://example.org/article", testEntry, "add_dynamic_image")
if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
}
}
func TestRewriteWithLazySrcset(t *testing.T) {
controlEntry := &model.Entry{
Title: `A title`,
Content: `<img srcset="https://example.org/image.jpg" data-srcset="https://example.org/image.jpg" alt="Image"/>`,
}
testEntry := &model.Entry{
Title: `A title`,
Content: `<img srcset="" data-srcset="https://example.org/image.jpg" alt="Image">`,
}
Rewriter("https://example.org/article", testEntry, "add_dynamic_image")
if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
}
}
func TestRewriteWithImageAndLazySrcset(t *testing.T) {
controlEntry := &model.Entry{
Title: `A title`,
Content: `<img src="meow" srcset="https://example.org/image.jpg" data-srcset="https://example.org/image.jpg" alt="Image"/>`,
}
testEntry := &model.Entry{
Title: `A title`,
Content: `<img src="meow" srcset="" data-srcset="https://example.org/image.jpg" alt="Image">`,
}
Rewriter("https://example.org/article", testEntry, "add_dynamic_image")
if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
}
}
func TestNewLineRewriteRule(t *testing.T) {
controlEntry := &model.Entry{
Title: `A title`,
Content: `A<br>B<br>C`,
}
testEntry := &model.Entry{
Title: `A title`,
Content: "A\nB\nC",
}
Rewriter("https://example.org/article", testEntry, "nl2br")
if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
}
}
func TestConvertTextLinkRewriteRule(t *testing.T) {
controlEntry := &model.Entry{
Title: `A title`,
Content: `Test: <a href="http://example.org/a/b">http://example.org/a/b</a>`,
}
testEntry := &model.Entry{
Title: `A title`,
Content: `Test: http://example.org/a/b`,
}
Rewriter("https://example.org/article", testEntry, "convert_text_link")
if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
}
}
func TestMediumImage(t *testing.T) {
controlEntry := &model.Entry{
Title: `A title`,
Content: `<img alt="Image for post" class="t u v if aj" src="https://miro.medium.com/max/2560/1*ephLSqSzQYLvb7faDwzRbw.jpeg" width="1280" height="720" srcset="https://miro.medium.com/max/552/1*ephLSqSzQYLvb7faDwzRbw.jpeg 276w, https://miro.medium.com/max/1104/1*ephLSqSzQYLvb7faDwzRbw.jpeg 552w, https://miro.medium.com/max/1280/1*ephLSqSzQYLvb7faDwzRbw.jpeg 640w, https://miro.medium.com/max/1400/1*ephLSqSzQYLvb7faDwzRbw.jpeg 700w" sizes="700px"/>`,
}
testEntry := &model.Entry{
Title: `A title`,
Content: `
<figure class="ht hu hv hw hx hy cy cz paragraph-image">
<div class="hz ia ib ic aj">
<div class="cy cz hs">
<div class="ii s ib ij">
<div class="ik il s">
<div class="id ie t u v if aj bk ig ih">
<img alt="Image for post" class="t u v if aj im in io" src="https://miro.medium.com/max/60/1*ephLSqSzQYLvb7faDwzRbw.jpeg?q=20" width="1280" height="720"/>
</div>
<img alt="Image for post" class="id ie t u v if aj c" width="1280" height="720"/>
<noscript>
<img alt="Image for post" class="t u v if aj" src="https://miro.medium.com/max/2560/1*ephLSqSzQYLvb7faDwzRbw.jpeg" width="1280" height="720" srcSet="https://miro.medium.com/max/552/1*ephLSqSzQYLvb7faDwzRbw.jpeg 276w, https://miro.medium.com/max/1104/1*ephLSqSzQYLvb7faDwzRbw.jpeg 552w, https://miro.medium.com/max/1280/1*ephLSqSzQYLvb7faDwzRbw.jpeg 640w, https://miro.medium.com/max/1400/1*ephLSqSzQYLvb7faDwzRbw.jpeg 700w" sizes="700px"/>
</noscript>
</div>
</div>
</div>
</div>
</figure>
`,
}
Rewriter("https://example.org/article", testEntry, "fix_medium_images")
testEntry.Content = strings.TrimSpace(testEntry.Content)
if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
}
}
func TestRewriteNoScriptImageWithoutNoScriptTag(t *testing.T) {
controlEntry := &model.Entry{
Title: `A title`,
Content: `<figure><img src="https://developer.mozilla.org/static/img/favicon144.png" alt="The beautiful MDN logo."/><figcaption>MDN Logo</figcaption></figure>`,
}
testEntry := &model.Entry{
Title: `A title`,
Content: `<figure><img src="https://developer.mozilla.org/static/img/favicon144.png" alt="The beautiful MDN logo."><figcaption>MDN Logo</figcaption></figure>`,
}
Rewriter("https://example.org/article", testEntry, "use_noscript_figure_images")
testEntry.Content = strings.TrimSpace(testEntry.Content)
if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
}
}
func TestRewriteNoScriptImageWithNoScriptTag(t *testing.T) {
controlEntry := &model.Entry{
Title: `A title`,
Content: `<figure><img src="http://example.org/logo.svg"/><figcaption>MDN Logo</figcaption></figure>`,
}
testEntry := &model.Entry{
Title: `A title`,
Content: `<figure><img src="https://developer.mozilla.org/static/img/favicon144.png" alt="The beautiful MDN logo."><noscript><img src="http://example.org/logo.svg"></noscript><figcaption>MDN Logo</figcaption></figure>`,
}
Rewriter("https://example.org/article", testEntry, "use_noscript_figure_images")
testEntry.Content = strings.TrimSpace(testEntry.Content)
if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
}
}
func TestRewriteReplaceCustom(t *testing.T) {
controlEntry := &model.Entry{
Title: `A title`,
Content: `<img src="http://example.org/logo.svg"><img src="https://example.org/article/picture.png">`,
}
testEntry := &model.Entry{
Title: `A title`,
Content: `<img src="http://example.org/logo.svg"><img src="https://example.org/article/picture.svg">`,
}
Rewriter("https://example.org/article", testEntry, `replace("article/(.*).svg"|"article/$1.png")`)
if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
}
}
func TestRewriteRemoveCustom(t *testing.T) {
controlEntry := &model.Entry{
Title: `A title`,
Content: `<div>Lorem Ipsum <span class="ads keep">Super important info</span></div>`,
}
testEntry := &model.Entry{
Title: `A title`,
Content: `<div>Lorem Ipsum <span class="spam">I dont want to see this</span><span class="ads keep">Super important info</span></div>`,
}
Rewriter("https://example.org/article", testEntry, `remove(".spam, .ads:not(.keep)")`)
if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
}
}
func TestRewriteAddCastopodEpisode(t *testing.T) {
controlEntry := &model.Entry{
Title: `A title`,
Content: `<iframe width="650" frameborder="0" src="https://podcast.demo/@demo/episodes/test/embed/light"></iframe><br>Episode Description`,
}
testEntry := &model.Entry{
Title: `A title`,
Content: `Episode Description`,
}
Rewriter("https://podcast.demo/@demo/episodes/test", testEntry, `add_castopod_episode`)
if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
}
}
func TestRewriteBase64Decode(t *testing.T) {
controlEntry := &model.Entry{
Title: `A title`,
Content: `This is some base64 encoded content`,
}
testEntry := &model.Entry{
Title: `A title`,
Content: `VGhpcyBpcyBzb21lIGJhc2U2NCBlbmNvZGVkIGNvbnRlbnQ=`,
}
Rewriter("https://example.org/article", testEntry, `base64_decode`)
if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
}
}
func TestRewriteBase64DecodeInHTML(t *testing.T) {
controlEntry := &model.Entry{
Title: `A title`,
Content: `<div>Lorem Ipsum not valid base64<span class="base64">This is some base64 encoded content</span></div>`,
}
testEntry := &model.Entry{
Title: `A title`,
Content: `<div>Lorem Ipsum not valid base64<span class="base64">VGhpcyBpcyBzb21lIGJhc2U2NCBlbmNvZGVkIGNvbnRlbnQ=</span></div>`,
}
Rewriter("https://example.org/article", testEntry, `base64_decode`)
if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
}
}
func TestRewriteBase64DecodeArgs(t *testing.T) {
controlEntry := &model.Entry{
Title: `A title`,
Content: `<div>Lorem Ipsum<span class="base64">This is some base64 encoded content</span></div>`,
}
testEntry := &model.Entry{
Title: `A title`,
Content: `<div>Lorem Ipsum<span class="base64">VGhpcyBpcyBzb21lIGJhc2U2NCBlbmNvZGVkIGNvbnRlbnQ=</span></div>`,
}
Rewriter("https://example.org/article", testEntry, `base64_decode(".base64")`)
if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
}
}
func TestRewriteRemoveTables(t *testing.T) {
controlEntry := &model.Entry{
Title: `A title`,
Content: `<p>Test</p><p>Hello World!</p><p>Test</p>`,
}
testEntry := &model.Entry{
Title: `A title`,
Content: `<table class="container"><tbody><tr><td><p>Test</p><table class="row"><tbody><tr><td><p>Hello World!</p></td><td><p>Test</p></td></tr></tbody></table></td></tr></tbody></table>`,
}
Rewriter("https://example.org/article", testEntry, `remove_tables`)
if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
}
}
func TestRemoveClickbait(t *testing.T) {
controlEntry := &model.Entry{
Title: `This Is Amazing`,
Content: `Some description`,
}
testEntry := &model.Entry{
Title: `THIS IS AMAZING`,
Content: `Some description`,
}
Rewriter("https://example.org/article", testEntry, `remove_clickbait`)
if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
}
}

View file

@ -0,0 +1,40 @@
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
package rewrite // import "miniflux.app/v2/internal/reader/rewrite"
// List of predefined rewrite rules (alphabetically sorted)
// Available rules: "add_image_title", "add_youtube_video"
// domain => rule name
var predefinedRules = map[string]string{
"abstrusegoose.com": "add_image_title",
"amazingsuperpowers.com": "add_image_title",
"blog.cloudflare.com": `add_image_title,remove("figure.kg-image-card figure.kg-image + img")`,
"blog.laravel.com": "parse_markdown",
"cowbirdsinlove.com": "add_image_title",
"drawingboardcomic.com": "add_image_title",
"exocomics.com": "add_image_title",
"framatube.org": "nl2br,convert_text_link",
"happletea.com": "add_image_title",
"ilpost.it": `remove(".art_tag, #audioPlayerArticle, .author-container, .caption, .ilpostShare, .lastRecents, #mc_embed_signup, .outbrain_inread, p:has(.leggi-anche), .youtube-overlay")`,
"imogenquest.net": "add_image_title",
"lukesurl.com": "add_image_title",
"medium.com": "fix_medium_images",
"mercworks.net": "add_image_title",
"monkeyuser.com": "add_image_title",
"mrlovenstein.com": "add_image_title",
"nedroid.com": "add_image_title",
"oglaf.com": "add_image_title",
"optipess.com": "add_image_title",
"peebleslab.com": "add_image_title",
"quantamagazine.org": `add_youtube_video_from_id, remove("h6:not(.byline,.post__title__kicker), #comments, .next-post__content, .footer__section, figure .outer--content, script")`,
"sentfromthemoon.com": "add_image_title",
"thedoghousediaries.com": "add_image_title",
"theverge.com": `add_dynamic_image, remove("div.duet--recirculation--related-list")`,
"treelobsters.com": "add_image_title",
"webtoons.com": `add_dynamic_image,replace("webtoon"|"swebtoon")`,
"www.qwantz.com": "add_image_title,add_mailto_subject",
"www.recalbox.com": "parse_markdown",
"xkcd.com": "add_image_title",
"youtube.com": "add_youtube_video",
}