mirror of
https://github.com/miniflux/v2.git
synced 2025-08-01 17:38:37 +00:00
Move internal packages to an internal folder
For reference: https://go.dev/doc/go1.4#internalpackages
This commit is contained in:
parent
c234903255
commit
168a870c02
433 changed files with 1121 additions and 1123 deletions
383
internal/reader/rewrite/rewrite_functions.go
Normal file
383
internal/reader/rewrite/rewrite_functions.go
Normal file
|
@ -0,0 +1,383 @@
|
|||
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
package rewrite // import "miniflux.app/v2/internal/reader/rewrite"
|
||||
|
||||
import (
|
||||
"encoding/base64"
|
||||
"fmt"
|
||||
"html"
|
||||
"net/url"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"miniflux.app/v2/internal/config"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
"github.com/yuin/goldmark"
|
||||
goldmarkhtml "github.com/yuin/goldmark/renderer/html"
|
||||
)
|
||||
|
||||
var (
|
||||
youtubeRegex = regexp.MustCompile(`youtube\.com/watch\?v=(.*)`)
|
||||
youtubeIdRegex = regexp.MustCompile(`youtube_id"?\s*[:=]\s*"([a-zA-Z0-9_-]{11})"`)
|
||||
invidioRegex = regexp.MustCompile(`https?:\/\/(.*)\/watch\?v=(.*)`)
|
||||
imgRegex = regexp.MustCompile(`<img [^>]+>`)
|
||||
textLinkRegex = regexp.MustCompile(`(?mi)(\bhttps?:\/\/[-A-Z0-9+&@#\/%?=~_|!:,.;]*[-A-Z0-9+&@#\/%=~_|])`)
|
||||
)
|
||||
|
||||
func addImageTitle(entryURL, entryContent string) string {
|
||||
doc, err := goquery.NewDocumentFromReader(strings.NewReader(entryContent))
|
||||
if err != nil {
|
||||
return entryContent
|
||||
}
|
||||
|
||||
matches := doc.Find("img[src][title]")
|
||||
|
||||
if matches.Length() > 0 {
|
||||
matches.Each(func(i int, img *goquery.Selection) {
|
||||
altAttr := img.AttrOr("alt", "")
|
||||
srcAttr, _ := img.Attr("src")
|
||||
titleAttr, _ := img.Attr("title")
|
||||
|
||||
img.ReplaceWithHtml(`<figure><img src="` + srcAttr + `" alt="` + altAttr + `"/><figcaption><p>` + html.EscapeString(titleAttr) + `</p></figcaption></figure>`)
|
||||
})
|
||||
|
||||
output, _ := doc.Find("body").First().Html()
|
||||
return output
|
||||
}
|
||||
|
||||
return entryContent
|
||||
}
|
||||
|
||||
func addMailtoSubject(entryURL, entryContent string) string {
|
||||
doc, err := goquery.NewDocumentFromReader(strings.NewReader(entryContent))
|
||||
if err != nil {
|
||||
return entryContent
|
||||
}
|
||||
|
||||
matches := doc.Find(`a[href^="mailto:"]`)
|
||||
|
||||
if matches.Length() > 0 {
|
||||
matches.Each(func(i int, a *goquery.Selection) {
|
||||
hrefAttr, _ := a.Attr("href")
|
||||
|
||||
mailto, err := url.Parse(hrefAttr)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
subject := mailto.Query().Get("subject")
|
||||
if subject == "" {
|
||||
return
|
||||
}
|
||||
|
||||
a.AppendHtml(" [" + html.EscapeString(subject) + "]")
|
||||
})
|
||||
|
||||
output, _ := doc.Find("body").First().Html()
|
||||
return output
|
||||
}
|
||||
|
||||
return entryContent
|
||||
}
|
||||
|
||||
func addDynamicImage(entryURL, entryContent string) string {
|
||||
doc, err := goquery.NewDocumentFromReader(strings.NewReader(entryContent))
|
||||
if err != nil {
|
||||
return entryContent
|
||||
}
|
||||
|
||||
// Ordered most preferred to least preferred.
|
||||
candidateAttrs := []string{
|
||||
"data-src",
|
||||
"data-original",
|
||||
"data-orig",
|
||||
"data-url",
|
||||
"data-orig-file",
|
||||
"data-large-file",
|
||||
"data-medium-file",
|
||||
"data-2000src",
|
||||
"data-1000src",
|
||||
"data-800src",
|
||||
"data-655src",
|
||||
"data-500src",
|
||||
"data-380src",
|
||||
}
|
||||
|
||||
candidateSrcsetAttrs := []string{
|
||||
"data-srcset",
|
||||
}
|
||||
|
||||
changed := false
|
||||
|
||||
doc.Find("img,div").Each(func(i int, img *goquery.Selection) {
|
||||
// Src-linked candidates
|
||||
for _, candidateAttr := range candidateAttrs {
|
||||
if srcAttr, found := img.Attr(candidateAttr); found {
|
||||
changed = true
|
||||
|
||||
if img.Is("img") {
|
||||
img.SetAttr("src", srcAttr)
|
||||
} else {
|
||||
altAttr := img.AttrOr("alt", "")
|
||||
img.ReplaceWithHtml(`<img src="` + srcAttr + `" alt="` + altAttr + `"/>`)
|
||||
}
|
||||
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// Srcset-linked candidates
|
||||
for _, candidateAttr := range candidateSrcsetAttrs {
|
||||
if srcAttr, found := img.Attr(candidateAttr); found {
|
||||
changed = true
|
||||
|
||||
if img.Is("img") {
|
||||
img.SetAttr("srcset", srcAttr)
|
||||
} else {
|
||||
altAttr := img.AttrOr("alt", "")
|
||||
img.ReplaceWithHtml(`<img srcset="` + srcAttr + `" alt="` + altAttr + `"/>`)
|
||||
}
|
||||
|
||||
break
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
if !changed {
|
||||
doc.Find("noscript").Each(func(i int, noscript *goquery.Selection) {
|
||||
matches := imgRegex.FindAllString(noscript.Text(), 2)
|
||||
|
||||
if len(matches) == 1 {
|
||||
changed = true
|
||||
|
||||
noscript.ReplaceWithHtml(matches[0])
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
if changed {
|
||||
output, _ := doc.Find("body").First().Html()
|
||||
return output
|
||||
}
|
||||
|
||||
return entryContent
|
||||
}
|
||||
|
||||
func fixMediumImages(entryURL, entryContent string) string {
|
||||
doc, err := goquery.NewDocumentFromReader(strings.NewReader(entryContent))
|
||||
if err != nil {
|
||||
return entryContent
|
||||
}
|
||||
|
||||
doc.Find("figure.paragraph-image").Each(func(i int, paragraphImage *goquery.Selection) {
|
||||
noscriptElement := paragraphImage.Find("noscript")
|
||||
if noscriptElement.Length() > 0 {
|
||||
paragraphImage.ReplaceWithHtml(noscriptElement.Text())
|
||||
}
|
||||
})
|
||||
|
||||
output, _ := doc.Find("body").First().Html()
|
||||
return output
|
||||
}
|
||||
|
||||
func useNoScriptImages(entryURL, entryContent string) string {
|
||||
doc, err := goquery.NewDocumentFromReader(strings.NewReader(entryContent))
|
||||
if err != nil {
|
||||
return entryContent
|
||||
}
|
||||
|
||||
doc.Find("figure").Each(func(i int, figureElement *goquery.Selection) {
|
||||
imgElement := figureElement.Find("img")
|
||||
if imgElement.Length() > 0 {
|
||||
noscriptElement := figureElement.Find("noscript")
|
||||
if noscriptElement.Length() > 0 {
|
||||
figureElement.PrependHtml(noscriptElement.Text())
|
||||
imgElement.Remove()
|
||||
noscriptElement.Remove()
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
output, _ := doc.Find("body").First().Html()
|
||||
return output
|
||||
}
|
||||
|
||||
func addYoutubeVideo(entryURL, entryContent string) string {
|
||||
matches := youtubeRegex.FindStringSubmatch(entryURL)
|
||||
|
||||
if len(matches) == 2 {
|
||||
video := `<iframe width="650" height="350" frameborder="0" src="` + config.Opts.YouTubeEmbedUrlOverride() + matches[1] + `" allowfullscreen></iframe>`
|
||||
return video + `<br>` + entryContent
|
||||
}
|
||||
return entryContent
|
||||
}
|
||||
|
||||
func addYoutubeVideoUsingInvidiousPlayer(entryURL, entryContent string) string {
|
||||
matches := youtubeRegex.FindStringSubmatch(entryURL)
|
||||
|
||||
if len(matches) == 2 {
|
||||
video := `<iframe width="650" height="350" frameborder="0" src="https://` + config.Opts.InvidiousInstance() + `/embed/` + matches[1] + `" allowfullscreen></iframe>`
|
||||
return video + `<br>` + entryContent
|
||||
}
|
||||
return entryContent
|
||||
}
|
||||
|
||||
func addYoutubeVideoFromId(entryContent string) string {
|
||||
matches := youtubeIdRegex.FindAllStringSubmatch(entryContent, -1)
|
||||
if matches == nil {
|
||||
return entryContent
|
||||
}
|
||||
sb := strings.Builder{}
|
||||
for _, match := range matches {
|
||||
if len(match) == 2 {
|
||||
sb.WriteString(`<iframe width="650" height="350" frameborder="0" src="`)
|
||||
sb.WriteString(config.Opts.YouTubeEmbedUrlOverride())
|
||||
sb.WriteString(match[1])
|
||||
sb.WriteString(`" allowfullscreen></iframe><br>`)
|
||||
}
|
||||
}
|
||||
sb.WriteString(entryContent)
|
||||
return sb.String()
|
||||
}
|
||||
|
||||
func addInvidiousVideo(entryURL, entryContent string) string {
|
||||
matches := invidioRegex.FindStringSubmatch(entryURL)
|
||||
if len(matches) == 3 {
|
||||
video := `<iframe width="650" height="350" frameborder="0" src="https://` + matches[1] + `/embed/` + matches[2] + `" allowfullscreen></iframe>`
|
||||
return video + `<br>` + entryContent
|
||||
}
|
||||
return entryContent
|
||||
}
|
||||
|
||||
func addPDFLink(entryURL, entryContent string) string {
|
||||
if strings.HasSuffix(entryURL, ".pdf") {
|
||||
return fmt.Sprintf(`<a href="%s">PDF</a><br>%s`, entryURL, entryContent)
|
||||
}
|
||||
return entryContent
|
||||
}
|
||||
|
||||
func replaceTextLinks(input string) string {
|
||||
return textLinkRegex.ReplaceAllString(input, `<a href="${1}">${1}</a>`)
|
||||
}
|
||||
|
||||
func replaceLineFeeds(input string) string {
|
||||
return strings.Replace(input, "\n", "<br>", -1)
|
||||
}
|
||||
|
||||
func replaceCustom(entryContent string, searchTerm string, replaceTerm string) string {
|
||||
re, err := regexp.Compile(searchTerm)
|
||||
if err == nil {
|
||||
return re.ReplaceAllString(entryContent, replaceTerm)
|
||||
}
|
||||
return entryContent
|
||||
}
|
||||
|
||||
func removeCustom(entryContent string, selector string) string {
|
||||
doc, err := goquery.NewDocumentFromReader(strings.NewReader(entryContent))
|
||||
if err != nil {
|
||||
return entryContent
|
||||
}
|
||||
|
||||
doc.Find(selector).Remove()
|
||||
|
||||
output, _ := doc.Find("body").First().Html()
|
||||
return output
|
||||
}
|
||||
|
||||
func addCastopodEpisode(entryURL, entryContent string) string {
|
||||
player := `<iframe width="650" frameborder="0" src="` + entryURL + `/embed/light"></iframe>`
|
||||
|
||||
return player + `<br>` + entryContent
|
||||
}
|
||||
|
||||
func applyFuncOnTextContent(entryContent string, selector string, repl func(string) string) string {
|
||||
var treatChildren func(i int, s *goquery.Selection)
|
||||
treatChildren = func(i int, s *goquery.Selection) {
|
||||
if s.Nodes[0].Type == 1 {
|
||||
s.ReplaceWithHtml(repl(s.Nodes[0].Data))
|
||||
} else {
|
||||
s.Contents().Each(treatChildren)
|
||||
}
|
||||
}
|
||||
|
||||
doc, err := goquery.NewDocumentFromReader(strings.NewReader(entryContent))
|
||||
if err != nil {
|
||||
return entryContent
|
||||
}
|
||||
|
||||
doc.Find(selector).Each(treatChildren)
|
||||
|
||||
output, _ := doc.Find("body").First().Html()
|
||||
return output
|
||||
}
|
||||
|
||||
func decodeBase64Content(entryContent string) string {
|
||||
if ret, err := base64.StdEncoding.DecodeString(strings.TrimSpace(entryContent)); err != nil {
|
||||
return entryContent
|
||||
} else {
|
||||
return html.EscapeString(string(ret))
|
||||
}
|
||||
}
|
||||
|
||||
func parseMarkdown(entryContent string) string {
|
||||
var sb strings.Builder
|
||||
md := goldmark.New(
|
||||
goldmark.WithRendererOptions(
|
||||
goldmarkhtml.WithUnsafe(),
|
||||
),
|
||||
)
|
||||
|
||||
if err := md.Convert([]byte(entryContent), &sb); err != nil {
|
||||
return entryContent
|
||||
}
|
||||
|
||||
return sb.String()
|
||||
}
|
||||
|
||||
func removeTables(entryContent string) string {
|
||||
doc, err := goquery.NewDocumentFromReader(strings.NewReader(entryContent))
|
||||
if err != nil {
|
||||
return entryContent
|
||||
}
|
||||
|
||||
selectors := []string{"table", "tbody", "thead", "td", "th", "td"}
|
||||
|
||||
var loopElement *goquery.Selection
|
||||
|
||||
for _, selector := range selectors {
|
||||
for {
|
||||
loopElement = doc.Find(selector).First()
|
||||
|
||||
if loopElement.Length() == 0 {
|
||||
break
|
||||
}
|
||||
|
||||
innerHtml, err := loopElement.Html()
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
|
||||
loopElement.Parent().AppendHtml(innerHtml)
|
||||
loopElement.Remove()
|
||||
}
|
||||
}
|
||||
|
||||
output, _ := doc.Find("body").First().Html()
|
||||
return output
|
||||
}
|
||||
|
||||
func removeClickbait(entryTitle string) string {
|
||||
titleWords := []string{}
|
||||
for _, word := range strings.Fields(entryTitle) {
|
||||
runes := []rune(word)
|
||||
if len(runes) > 1 {
|
||||
// keep first rune as is to keep the first capital letter
|
||||
titleWords = append(titleWords, string([]rune{runes[0]})+strings.ToLower(string(runes[1:])))
|
||||
} else {
|
||||
titleWords = append(titleWords, word)
|
||||
}
|
||||
}
|
||||
return strings.Join(titleWords, " ")
|
||||
}
|
127
internal/reader/rewrite/rewriter.go
Normal file
127
internal/reader/rewrite/rewriter.go
Normal file
|
@ -0,0 +1,127 @@
|
|||
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
package rewrite // import "miniflux.app/v2/internal/reader/rewrite"
|
||||
|
||||
import (
|
||||
"strconv"
|
||||
"strings"
|
||||
"text/scanner"
|
||||
|
||||
"miniflux.app/v2/internal/logger"
|
||||
"miniflux.app/v2/internal/model"
|
||||
"miniflux.app/v2/internal/url"
|
||||
)
|
||||
|
||||
type rule struct {
|
||||
name string
|
||||
args []string
|
||||
}
|
||||
|
||||
// Rewriter modify item contents with a set of rewriting rules.
|
||||
func Rewriter(entryURL string, entry *model.Entry, customRewriteRules string) {
|
||||
rulesList := getPredefinedRewriteRules(entryURL)
|
||||
if customRewriteRules != "" {
|
||||
rulesList = customRewriteRules
|
||||
}
|
||||
|
||||
rules := parseRules(rulesList)
|
||||
rules = append(rules, rule{name: "add_pdf_download_link"})
|
||||
|
||||
logger.Debug(`[Rewrite] Applying rules %v for %q`, rules, entryURL)
|
||||
|
||||
for _, rule := range rules {
|
||||
applyRule(entryURL, entry, rule)
|
||||
}
|
||||
}
|
||||
|
||||
func parseRules(rulesText string) (rules []rule) {
|
||||
scan := scanner.Scanner{Mode: scanner.ScanIdents | scanner.ScanStrings}
|
||||
scan.Init(strings.NewReader(rulesText))
|
||||
|
||||
for {
|
||||
switch scan.Scan() {
|
||||
case scanner.Ident:
|
||||
rules = append(rules, rule{name: scan.TokenText()})
|
||||
|
||||
case scanner.String:
|
||||
if l := len(rules) - 1; l >= 0 {
|
||||
text := scan.TokenText()
|
||||
text, _ = strconv.Unquote(text)
|
||||
|
||||
rules[l].args = append(rules[l].args, text)
|
||||
}
|
||||
|
||||
case scanner.EOF:
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func applyRule(entryURL string, entry *model.Entry, rule rule) {
|
||||
switch rule.name {
|
||||
case "add_image_title":
|
||||
entry.Content = addImageTitle(entryURL, entry.Content)
|
||||
case "add_mailto_subject":
|
||||
entry.Content = addMailtoSubject(entryURL, entry.Content)
|
||||
case "add_dynamic_image":
|
||||
entry.Content = addDynamicImage(entryURL, entry.Content)
|
||||
case "add_youtube_video":
|
||||
entry.Content = addYoutubeVideo(entryURL, entry.Content)
|
||||
case "add_invidious_video":
|
||||
entry.Content = addInvidiousVideo(entryURL, entry.Content)
|
||||
case "add_youtube_video_using_invidious_player":
|
||||
entry.Content = addYoutubeVideoUsingInvidiousPlayer(entryURL, entry.Content)
|
||||
case "add_youtube_video_from_id":
|
||||
entry.Content = addYoutubeVideoFromId(entry.Content)
|
||||
case "add_pdf_download_link":
|
||||
entry.Content = addPDFLink(entryURL, entry.Content)
|
||||
case "nl2br":
|
||||
entry.Content = replaceLineFeeds(entry.Content)
|
||||
case "convert_text_link", "convert_text_links":
|
||||
entry.Content = replaceTextLinks(entry.Content)
|
||||
case "fix_medium_images":
|
||||
entry.Content = fixMediumImages(entryURL, entry.Content)
|
||||
case "use_noscript_figure_images":
|
||||
entry.Content = useNoScriptImages(entryURL, entry.Content)
|
||||
case "replace":
|
||||
// Format: replace("search-term"|"replace-term")
|
||||
if len(rule.args) >= 2 {
|
||||
entry.Content = replaceCustom(entry.Content, rule.args[0], rule.args[1])
|
||||
} else {
|
||||
logger.Debug("[Rewrite] Cannot find search and replace terms for replace rule %s", rule)
|
||||
}
|
||||
case "remove":
|
||||
// Format: remove("#selector > .element, .another")
|
||||
if len(rule.args) >= 1 {
|
||||
entry.Content = removeCustom(entry.Content, rule.args[0])
|
||||
} else {
|
||||
logger.Debug("[Rewrite] Cannot find selector for remove rule %s", rule)
|
||||
}
|
||||
case "add_castopod_episode":
|
||||
entry.Content = addCastopodEpisode(entryURL, entry.Content)
|
||||
case "base64_decode":
|
||||
if len(rule.args) >= 1 {
|
||||
entry.Content = applyFuncOnTextContent(entry.Content, rule.args[0], decodeBase64Content)
|
||||
} else {
|
||||
entry.Content = applyFuncOnTextContent(entry.Content, "body", decodeBase64Content)
|
||||
}
|
||||
case "parse_markdown":
|
||||
entry.Content = parseMarkdown(entry.Content)
|
||||
case "remove_tables":
|
||||
entry.Content = removeTables(entry.Content)
|
||||
case "remove_clickbait":
|
||||
entry.Title = removeClickbait(entry.Title)
|
||||
}
|
||||
}
|
||||
|
||||
func getPredefinedRewriteRules(entryURL string) string {
|
||||
urlDomain := url.Domain(entryURL)
|
||||
for domain, rules := range predefinedRules {
|
||||
if strings.Contains(urlDomain, domain) {
|
||||
return rules
|
||||
}
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
563
internal/reader/rewrite/rewriter_test.go
Normal file
563
internal/reader/rewrite/rewriter_test.go
Normal file
|
@ -0,0 +1,563 @@
|
|||
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
package rewrite // import "miniflux.app/v2/internal/reader/rewrite"
|
||||
|
||||
import (
|
||||
"os"
|
||||
"reflect"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"miniflux.app/v2/internal/config"
|
||||
"miniflux.app/v2/internal/model"
|
||||
)
|
||||
|
||||
func TestParseRules(t *testing.T) {
|
||||
rulesText := `add_dynamic_image,replace("article/(.*).svg"|"article/$1.png"),remove(".spam, .ads:not(.keep)")`
|
||||
expected := []rule{
|
||||
{name: "add_dynamic_image"},
|
||||
{name: "replace", args: []string{"article/(.*).svg", "article/$1.png"}},
|
||||
{name: "remove", args: []string{".spam, .ads:not(.keep)"}},
|
||||
}
|
||||
|
||||
actual := parseRules(rulesText)
|
||||
|
||||
if !reflect.DeepEqual(expected, actual) {
|
||||
t.Errorf(`Parsed rules do not match expected rules: got %v instead of %v`, actual, expected)
|
||||
}
|
||||
}
|
||||
|
||||
func TestReplaceTextLinks(t *testing.T) {
|
||||
scenarios := map[string]string{
|
||||
`This is a link to example.org`: `This is a link to example.org`,
|
||||
`This is a link to ftp://example.org`: `This is a link to ftp://example.org`,
|
||||
`This is a link to www.example.org`: `This is a link to www.example.org`,
|
||||
`This is a link to http://example.org`: `This is a link to <a href="http://example.org">http://example.org</a>`,
|
||||
`This is a link to http://example.org, end of sentence.`: `This is a link to <a href="http://example.org">http://example.org</a>, end of sentence.`,
|
||||
`This is a link to https://example.org`: `This is a link to <a href="https://example.org">https://example.org</a>`,
|
||||
`This is a link to https://www.example.org/path/to?q=s`: `This is a link to <a href="https://www.example.org/path/to?q=s">https://www.example.org/path/to?q=s</a>`,
|
||||
`This is a link to https://example.org/index#hash-tag, http://example.org/.`: `This is a link to <a href="https://example.org/index#hash-tag">https://example.org/index#hash-tag</a>, <a href="http://example.org/">http://example.org/</a>.`,
|
||||
}
|
||||
|
||||
for input, expected := range scenarios {
|
||||
actual := replaceTextLinks(input)
|
||||
if actual != expected {
|
||||
t.Errorf(`Unexpected link replacement, got "%s" instead of "%s"`, actual, expected)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestRewriteWithNoMatchingRule(t *testing.T) {
|
||||
controlEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `Some text.`,
|
||||
}
|
||||
testEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `Some text.`,
|
||||
}
|
||||
Rewriter("https://example.org/article", testEntry, ``)
|
||||
|
||||
if !reflect.DeepEqual(testEntry, controlEntry) {
|
||||
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRewriteWithYoutubeLink(t *testing.T) {
|
||||
config.Opts = config.NewOptions()
|
||||
|
||||
controlEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `<iframe width="650" height="350" frameborder="0" src="https://www.youtube-nocookie.com/embed/1234" allowfullscreen></iframe><br>Video Description`,
|
||||
}
|
||||
testEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `Video Description`,
|
||||
}
|
||||
Rewriter("https://www.youtube.com/watch?v=1234", testEntry, ``)
|
||||
|
||||
if !reflect.DeepEqual(testEntry, controlEntry) {
|
||||
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRewriteWithYoutubeLinkAndCustomEmbedURL(t *testing.T) {
|
||||
os.Clearenv()
|
||||
os.Setenv("YOUTUBE_EMBED_URL_OVERRIDE", "https://invidious.custom/embed/")
|
||||
|
||||
var err error
|
||||
parser := config.NewParser()
|
||||
config.Opts, err = parser.ParseEnvironmentVariables()
|
||||
|
||||
if err != nil {
|
||||
t.Fatalf(`Parsing failure: %v`, err)
|
||||
}
|
||||
|
||||
controlEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `<iframe width="650" height="350" frameborder="0" src="https://invidious.custom/embed/1234" allowfullscreen></iframe><br>Video Description`,
|
||||
}
|
||||
testEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `Video Description`,
|
||||
}
|
||||
Rewriter("https://www.youtube.com/watch?v=1234", testEntry, ``)
|
||||
|
||||
if !reflect.DeepEqual(testEntry, controlEntry) {
|
||||
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRewriteWithInexistingCustomRule(t *testing.T) {
|
||||
controlEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `Video Description`,
|
||||
}
|
||||
testEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `Video Description`,
|
||||
}
|
||||
Rewriter("https://www.youtube.com/watch?v=1234", testEntry, `some rule`)
|
||||
|
||||
if !reflect.DeepEqual(testEntry, controlEntry) {
|
||||
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRewriteWithXkcdLink(t *testing.T) {
|
||||
controlEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `<figure><img src="https://imgs.xkcd.com/comics/thermostat.png" alt="Your problem is so terrible, I worry that, if I help you, I risk drawing the attention of whatever god of technology inflicted it on you."/><figcaption><p>Your problem is so terrible, I worry that, if I help you, I risk drawing the attention of whatever god of technology inflicted it on you.</p></figcaption></figure>`,
|
||||
}
|
||||
testEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `<img src="https://imgs.xkcd.com/comics/thermostat.png" title="Your problem is so terrible, I worry that, if I help you, I risk drawing the attention of whatever god of technology inflicted it on you." alt="Your problem is so terrible, I worry that, if I help you, I risk drawing the attention of whatever god of technology inflicted it on you." />`,
|
||||
}
|
||||
Rewriter("https://xkcd.com/1912/", testEntry, ``)
|
||||
|
||||
if !reflect.DeepEqual(testEntry, controlEntry) {
|
||||
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRewriteWithXkcdLinkHtmlInjection(t *testing.T) {
|
||||
controlEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `<figure><img src="https://imgs.xkcd.com/comics/thermostat.png" alt="<foo>"/><figcaption><p><foo></p></figcaption></figure>`,
|
||||
}
|
||||
testEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `<img src="https://imgs.xkcd.com/comics/thermostat.png" title="<foo>" alt="<foo>" />`,
|
||||
}
|
||||
Rewriter("https://xkcd.com/1912/", testEntry, ``)
|
||||
|
||||
if !reflect.DeepEqual(testEntry, controlEntry) {
|
||||
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRewriteWithXkcdLinkAndImageNoTitle(t *testing.T) {
|
||||
controlEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `<img src="https://imgs.xkcd.com/comics/thermostat.png" alt="Your problem is so terrible, I worry that, if I help you, I risk drawing the attention of whatever god of technology inflicted it on you." />`,
|
||||
}
|
||||
testEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `<img src="https://imgs.xkcd.com/comics/thermostat.png" alt="Your problem is so terrible, I worry that, if I help you, I risk drawing the attention of whatever god of technology inflicted it on you." />`,
|
||||
}
|
||||
Rewriter("https://xkcd.com/1912/", testEntry, ``)
|
||||
|
||||
if !reflect.DeepEqual(testEntry, controlEntry) {
|
||||
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRewriteWithXkcdLinkAndNoImage(t *testing.T) {
|
||||
controlEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `test`,
|
||||
}
|
||||
testEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `test`,
|
||||
}
|
||||
Rewriter("https://xkcd.com/1912/", testEntry, ``)
|
||||
|
||||
if !reflect.DeepEqual(testEntry, controlEntry) {
|
||||
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRewriteWithXkcdAndNoImage(t *testing.T) {
|
||||
controlEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `test`,
|
||||
}
|
||||
testEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `test`,
|
||||
}
|
||||
Rewriter("https://xkcd.com/1912/", testEntry, ``)
|
||||
|
||||
if !reflect.DeepEqual(testEntry, controlEntry) {
|
||||
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRewriteMailtoLink(t *testing.T) {
|
||||
controlEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `<a href="mailto:ryan@qwantz.com?subject=blah%20blah">contact [blah blah]</a>`,
|
||||
}
|
||||
testEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `<a href="mailto:ryan@qwantz.com?subject=blah%20blah">contact</a>`,
|
||||
}
|
||||
Rewriter("https://www.qwantz.com/", testEntry, ``)
|
||||
|
||||
if !reflect.DeepEqual(testEntry, controlEntry) {
|
||||
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRewriteWithPDFLink(t *testing.T) {
|
||||
controlEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `<a href="https://example.org/document.pdf">PDF</a><br>test`,
|
||||
}
|
||||
testEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `test`,
|
||||
}
|
||||
Rewriter("https://example.org/document.pdf", testEntry, ``)
|
||||
|
||||
if !reflect.DeepEqual(testEntry, controlEntry) {
|
||||
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRewriteWithNoLazyImage(t *testing.T) {
|
||||
controlEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `<img src="https://example.org/image.jpg" alt="Image"><noscript><p>Some text</p></noscript>`,
|
||||
}
|
||||
testEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `<img src="https://example.org/image.jpg" alt="Image"><noscript><p>Some text</p></noscript>`,
|
||||
}
|
||||
Rewriter("https://example.org/article", testEntry, "add_dynamic_image")
|
||||
|
||||
if !reflect.DeepEqual(testEntry, controlEntry) {
|
||||
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRewriteWithLazyImage(t *testing.T) {
|
||||
controlEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `<img src="https://example.org/image.jpg" data-url="https://example.org/image.jpg" alt="Image"/><noscript><img src="https://example.org/fallback.jpg" alt="Fallback"></noscript>`,
|
||||
}
|
||||
testEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `<img src="" data-url="https://example.org/image.jpg" alt="Image"><noscript><img src="https://example.org/fallback.jpg" alt="Fallback"></noscript>`,
|
||||
}
|
||||
Rewriter("https://example.org/article", testEntry, "add_dynamic_image")
|
||||
|
||||
if !reflect.DeepEqual(testEntry, controlEntry) {
|
||||
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRewriteWithLazyDivImage(t *testing.T) {
|
||||
controlEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `<img src="https://example.org/image.jpg" alt="Image"/><noscript><img src="https://example.org/fallback.jpg" alt="Fallback"></noscript>`,
|
||||
}
|
||||
testEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `<div data-url="https://example.org/image.jpg" alt="Image"></div><noscript><img src="https://example.org/fallback.jpg" alt="Fallback"></noscript>`,
|
||||
}
|
||||
Rewriter("https://example.org/article", testEntry, "add_dynamic_image")
|
||||
|
||||
if !reflect.DeepEqual(testEntry, controlEntry) {
|
||||
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRewriteWithUnknownLazyNoScriptImage(t *testing.T) {
|
||||
controlEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `<img src="" data-non-candidate="https://example.org/image.jpg" alt="Image"/><img src="https://example.org/fallback.jpg" alt="Fallback"/>`,
|
||||
}
|
||||
testEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `<img src="" data-non-candidate="https://example.org/image.jpg" alt="Image"><noscript><img src="https://example.org/fallback.jpg" alt="Fallback"></noscript>`,
|
||||
}
|
||||
Rewriter("https://example.org/article", testEntry, "add_dynamic_image")
|
||||
|
||||
if !reflect.DeepEqual(testEntry, controlEntry) {
|
||||
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRewriteWithLazySrcset(t *testing.T) {
|
||||
controlEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `<img srcset="https://example.org/image.jpg" data-srcset="https://example.org/image.jpg" alt="Image"/>`,
|
||||
}
|
||||
testEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `<img srcset="" data-srcset="https://example.org/image.jpg" alt="Image">`,
|
||||
}
|
||||
Rewriter("https://example.org/article", testEntry, "add_dynamic_image")
|
||||
|
||||
if !reflect.DeepEqual(testEntry, controlEntry) {
|
||||
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRewriteWithImageAndLazySrcset(t *testing.T) {
|
||||
controlEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `<img src="meow" srcset="https://example.org/image.jpg" data-srcset="https://example.org/image.jpg" alt="Image"/>`,
|
||||
}
|
||||
testEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `<img src="meow" srcset="" data-srcset="https://example.org/image.jpg" alt="Image">`,
|
||||
}
|
||||
Rewriter("https://example.org/article", testEntry, "add_dynamic_image")
|
||||
|
||||
if !reflect.DeepEqual(testEntry, controlEntry) {
|
||||
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
|
||||
}
|
||||
}
|
||||
|
||||
func TestNewLineRewriteRule(t *testing.T) {
|
||||
controlEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `A<br>B<br>C`,
|
||||
}
|
||||
testEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: "A\nB\nC",
|
||||
}
|
||||
Rewriter("https://example.org/article", testEntry, "nl2br")
|
||||
|
||||
if !reflect.DeepEqual(testEntry, controlEntry) {
|
||||
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
|
||||
}
|
||||
}
|
||||
|
||||
func TestConvertTextLinkRewriteRule(t *testing.T) {
|
||||
controlEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `Test: <a href="http://example.org/a/b">http://example.org/a/b</a>`,
|
||||
}
|
||||
testEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `Test: http://example.org/a/b`,
|
||||
}
|
||||
Rewriter("https://example.org/article", testEntry, "convert_text_link")
|
||||
|
||||
if !reflect.DeepEqual(testEntry, controlEntry) {
|
||||
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMediumImage(t *testing.T) {
|
||||
controlEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `<img alt="Image for post" class="t u v if aj" src="https://miro.medium.com/max/2560/1*ephLSqSzQYLvb7faDwzRbw.jpeg" width="1280" height="720" srcset="https://miro.medium.com/max/552/1*ephLSqSzQYLvb7faDwzRbw.jpeg 276w, https://miro.medium.com/max/1104/1*ephLSqSzQYLvb7faDwzRbw.jpeg 552w, https://miro.medium.com/max/1280/1*ephLSqSzQYLvb7faDwzRbw.jpeg 640w, https://miro.medium.com/max/1400/1*ephLSqSzQYLvb7faDwzRbw.jpeg 700w" sizes="700px"/>`,
|
||||
}
|
||||
testEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `
|
||||
<figure class="ht hu hv hw hx hy cy cz paragraph-image">
|
||||
<div class="hz ia ib ic aj">
|
||||
<div class="cy cz hs">
|
||||
<div class="ii s ib ij">
|
||||
<div class="ik il s">
|
||||
<div class="id ie t u v if aj bk ig ih">
|
||||
<img alt="Image for post" class="t u v if aj im in io" src="https://miro.medium.com/max/60/1*ephLSqSzQYLvb7faDwzRbw.jpeg?q=20" width="1280" height="720"/>
|
||||
</div>
|
||||
<img alt="Image for post" class="id ie t u v if aj c" width="1280" height="720"/>
|
||||
<noscript>
|
||||
<img alt="Image for post" class="t u v if aj" src="https://miro.medium.com/max/2560/1*ephLSqSzQYLvb7faDwzRbw.jpeg" width="1280" height="720" srcSet="https://miro.medium.com/max/552/1*ephLSqSzQYLvb7faDwzRbw.jpeg 276w, https://miro.medium.com/max/1104/1*ephLSqSzQYLvb7faDwzRbw.jpeg 552w, https://miro.medium.com/max/1280/1*ephLSqSzQYLvb7faDwzRbw.jpeg 640w, https://miro.medium.com/max/1400/1*ephLSqSzQYLvb7faDwzRbw.jpeg 700w" sizes="700px"/>
|
||||
</noscript>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</figure>
|
||||
`,
|
||||
}
|
||||
Rewriter("https://example.org/article", testEntry, "fix_medium_images")
|
||||
testEntry.Content = strings.TrimSpace(testEntry.Content)
|
||||
|
||||
if !reflect.DeepEqual(testEntry, controlEntry) {
|
||||
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRewriteNoScriptImageWithoutNoScriptTag(t *testing.T) {
|
||||
controlEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `<figure><img src="https://developer.mozilla.org/static/img/favicon144.png" alt="The beautiful MDN logo."/><figcaption>MDN Logo</figcaption></figure>`,
|
||||
}
|
||||
testEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `<figure><img src="https://developer.mozilla.org/static/img/favicon144.png" alt="The beautiful MDN logo."><figcaption>MDN Logo</figcaption></figure>`,
|
||||
}
|
||||
Rewriter("https://example.org/article", testEntry, "use_noscript_figure_images")
|
||||
testEntry.Content = strings.TrimSpace(testEntry.Content)
|
||||
|
||||
if !reflect.DeepEqual(testEntry, controlEntry) {
|
||||
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRewriteNoScriptImageWithNoScriptTag(t *testing.T) {
|
||||
controlEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `<figure><img src="http://example.org/logo.svg"/><figcaption>MDN Logo</figcaption></figure>`,
|
||||
}
|
||||
testEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `<figure><img src="https://developer.mozilla.org/static/img/favicon144.png" alt="The beautiful MDN logo."><noscript><img src="http://example.org/logo.svg"></noscript><figcaption>MDN Logo</figcaption></figure>`,
|
||||
}
|
||||
Rewriter("https://example.org/article", testEntry, "use_noscript_figure_images")
|
||||
testEntry.Content = strings.TrimSpace(testEntry.Content)
|
||||
|
||||
if !reflect.DeepEqual(testEntry, controlEntry) {
|
||||
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRewriteReplaceCustom(t *testing.T) {
|
||||
controlEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `<img src="http://example.org/logo.svg"><img src="https://example.org/article/picture.png">`,
|
||||
}
|
||||
testEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `<img src="http://example.org/logo.svg"><img src="https://example.org/article/picture.svg">`,
|
||||
}
|
||||
Rewriter("https://example.org/article", testEntry, `replace("article/(.*).svg"|"article/$1.png")`)
|
||||
|
||||
if !reflect.DeepEqual(testEntry, controlEntry) {
|
||||
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRewriteRemoveCustom(t *testing.T) {
|
||||
controlEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `<div>Lorem Ipsum <span class="ads keep">Super important info</span></div>`,
|
||||
}
|
||||
testEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `<div>Lorem Ipsum <span class="spam">I dont want to see this</span><span class="ads keep">Super important info</span></div>`,
|
||||
}
|
||||
Rewriter("https://example.org/article", testEntry, `remove(".spam, .ads:not(.keep)")`)
|
||||
|
||||
if !reflect.DeepEqual(testEntry, controlEntry) {
|
||||
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRewriteAddCastopodEpisode(t *testing.T) {
|
||||
controlEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `<iframe width="650" frameborder="0" src="https://podcast.demo/@demo/episodes/test/embed/light"></iframe><br>Episode Description`,
|
||||
}
|
||||
testEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `Episode Description`,
|
||||
}
|
||||
Rewriter("https://podcast.demo/@demo/episodes/test", testEntry, `add_castopod_episode`)
|
||||
|
||||
if !reflect.DeepEqual(testEntry, controlEntry) {
|
||||
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRewriteBase64Decode(t *testing.T) {
|
||||
controlEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `This is some base64 encoded content`,
|
||||
}
|
||||
testEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `VGhpcyBpcyBzb21lIGJhc2U2NCBlbmNvZGVkIGNvbnRlbnQ=`,
|
||||
}
|
||||
Rewriter("https://example.org/article", testEntry, `base64_decode`)
|
||||
|
||||
if !reflect.DeepEqual(testEntry, controlEntry) {
|
||||
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRewriteBase64DecodeInHTML(t *testing.T) {
|
||||
controlEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `<div>Lorem Ipsum not valid base64<span class="base64">This is some base64 encoded content</span></div>`,
|
||||
}
|
||||
testEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `<div>Lorem Ipsum not valid base64<span class="base64">VGhpcyBpcyBzb21lIGJhc2U2NCBlbmNvZGVkIGNvbnRlbnQ=</span></div>`,
|
||||
}
|
||||
Rewriter("https://example.org/article", testEntry, `base64_decode`)
|
||||
|
||||
if !reflect.DeepEqual(testEntry, controlEntry) {
|
||||
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRewriteBase64DecodeArgs(t *testing.T) {
|
||||
controlEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `<div>Lorem Ipsum<span class="base64">This is some base64 encoded content</span></div>`,
|
||||
}
|
||||
testEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `<div>Lorem Ipsum<span class="base64">VGhpcyBpcyBzb21lIGJhc2U2NCBlbmNvZGVkIGNvbnRlbnQ=</span></div>`,
|
||||
}
|
||||
Rewriter("https://example.org/article", testEntry, `base64_decode(".base64")`)
|
||||
|
||||
if !reflect.DeepEqual(testEntry, controlEntry) {
|
||||
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRewriteRemoveTables(t *testing.T) {
|
||||
controlEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `<p>Test</p><p>Hello World!</p><p>Test</p>`,
|
||||
}
|
||||
testEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `<table class="container"><tbody><tr><td><p>Test</p><table class="row"><tbody><tr><td><p>Hello World!</p></td><td><p>Test</p></td></tr></tbody></table></td></tr></tbody></table>`,
|
||||
}
|
||||
Rewriter("https://example.org/article", testEntry, `remove_tables`)
|
||||
|
||||
if !reflect.DeepEqual(testEntry, controlEntry) {
|
||||
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRemoveClickbait(t *testing.T) {
|
||||
controlEntry := &model.Entry{
|
||||
Title: `This Is Amazing`,
|
||||
Content: `Some description`,
|
||||
}
|
||||
testEntry := &model.Entry{
|
||||
Title: `THIS IS AMAZING`,
|
||||
Content: `Some description`,
|
||||
}
|
||||
Rewriter("https://example.org/article", testEntry, `remove_clickbait`)
|
||||
|
||||
if !reflect.DeepEqual(testEntry, controlEntry) {
|
||||
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
|
||||
}
|
||||
}
|
40
internal/reader/rewrite/rules.go
Normal file
40
internal/reader/rewrite/rules.go
Normal file
|
@ -0,0 +1,40 @@
|
|||
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
package rewrite // import "miniflux.app/v2/internal/reader/rewrite"
|
||||
|
||||
// List of predefined rewrite rules (alphabetically sorted)
|
||||
// Available rules: "add_image_title", "add_youtube_video"
|
||||
// domain => rule name
|
||||
var predefinedRules = map[string]string{
|
||||
"abstrusegoose.com": "add_image_title",
|
||||
"amazingsuperpowers.com": "add_image_title",
|
||||
"blog.cloudflare.com": `add_image_title,remove("figure.kg-image-card figure.kg-image + img")`,
|
||||
"blog.laravel.com": "parse_markdown",
|
||||
"cowbirdsinlove.com": "add_image_title",
|
||||
"drawingboardcomic.com": "add_image_title",
|
||||
"exocomics.com": "add_image_title",
|
||||
"framatube.org": "nl2br,convert_text_link",
|
||||
"happletea.com": "add_image_title",
|
||||
"ilpost.it": `remove(".art_tag, #audioPlayerArticle, .author-container, .caption, .ilpostShare, .lastRecents, #mc_embed_signup, .outbrain_inread, p:has(.leggi-anche), .youtube-overlay")`,
|
||||
"imogenquest.net": "add_image_title",
|
||||
"lukesurl.com": "add_image_title",
|
||||
"medium.com": "fix_medium_images",
|
||||
"mercworks.net": "add_image_title",
|
||||
"monkeyuser.com": "add_image_title",
|
||||
"mrlovenstein.com": "add_image_title",
|
||||
"nedroid.com": "add_image_title",
|
||||
"oglaf.com": "add_image_title",
|
||||
"optipess.com": "add_image_title",
|
||||
"peebleslab.com": "add_image_title",
|
||||
"quantamagazine.org": `add_youtube_video_from_id, remove("h6:not(.byline,.post__title__kicker), #comments, .next-post__content, .footer__section, figure .outer--content, script")`,
|
||||
"sentfromthemoon.com": "add_image_title",
|
||||
"thedoghousediaries.com": "add_image_title",
|
||||
"theverge.com": `add_dynamic_image, remove("div.duet--recirculation--related-list")`,
|
||||
"treelobsters.com": "add_image_title",
|
||||
"webtoons.com": `add_dynamic_image,replace("webtoon"|"swebtoon")`,
|
||||
"www.qwantz.com": "add_image_title,add_mailto_subject",
|
||||
"www.recalbox.com": "parse_markdown",
|
||||
"xkcd.com": "add_image_title",
|
||||
"youtube.com": "add_youtube_video",
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue