1
0
Fork 0
mirror of https://github.com/miniflux/v2.git synced 2025-07-02 16:38:37 +00:00

refactor(rewrite): rename Rewriter function to ApplyContentRewriteRules

This commit is contained in:
Frédéric Guillot 2025-06-10 20:21:47 -07:00
parent 7c857bdc72
commit 16df19b5d3
3 changed files with 149 additions and 64 deletions

View file

@ -62,9 +62,8 @@ func ProcessFeedEntries(store *storage.Storage, feed *model.Feed, userID int64,
entry.URL = cleanedURL entry.URL = cleanedURL
} }
pageBaseURL := "" webpageBaseURL := ""
rewrittenURL := rewriteEntryURL(feed, entry) entry.URL = rewriteEntryURL(feed, entry)
entry.URL = rewrittenURL
entryIsNew := store.IsNewEntry(feed.ID, entry.Hash) entryIsNew := store.IsNewEntry(feed.ID, entry.Hash)
if feed.Crawler && (entryIsNew || forceRefresh) { if feed.Crawler && (entryIsNew || forceRefresh) {
slog.Debug("Scraping entry", slog.Debug("Scraping entry",
@ -76,7 +75,6 @@ func ProcessFeedEntries(store *storage.Storage, feed *model.Feed, userID int64,
slog.String("feed_url", feed.FeedURL), slog.String("feed_url", feed.FeedURL),
slog.Bool("entry_is_new", entryIsNew), slog.Bool("entry_is_new", entryIsNew),
slog.Bool("force_refresh", forceRefresh), slog.Bool("force_refresh", forceRefresh),
slog.String("rewritten_url", rewrittenURL),
) )
startTime := time.Now() startTime := time.Now()
@ -94,12 +92,12 @@ func ProcessFeedEntries(store *storage.Storage, feed *model.Feed, userID int64,
scrapedPageBaseURL, extractedContent, scraperErr := scraper.ScrapeWebsite( scrapedPageBaseURL, extractedContent, scraperErr := scraper.ScrapeWebsite(
requestBuilder, requestBuilder,
rewrittenURL, entry.URL,
feed.ScraperRules, feed.ScraperRules,
) )
if scrapedPageBaseURL != "" { if scrapedPageBaseURL != "" {
pageBaseURL = scrapedPageBaseURL webpageBaseURL = scrapedPageBaseURL
} }
if config.Opts.HasMetricsCollector() { if config.Opts.HasMetricsCollector() {
@ -124,14 +122,14 @@ func ProcessFeedEntries(store *storage.Storage, feed *model.Feed, userID int64,
} }
} }
rewrite.Rewriter(rewrittenURL, entry, feed.RewriteRules) rewrite.ApplyContentRewriteRules(entry, feed.RewriteRules)
if pageBaseURL == "" { if webpageBaseURL == "" {
pageBaseURL = rewrittenURL webpageBaseURL = entry.URL
} }
// The sanitizer should always run at the end of the process to make sure unsafe HTML is filtered out. // The sanitizer should always run at the end of the process to make sure unsafe HTML is filtered out.
entry.Content = sanitizer.SanitizeHTML(pageBaseURL, entry.Content, &sanitizer.SanitizerOptions{OpenLinksInNewTab: user.OpenExternalLinksInNewTab}) entry.Content = sanitizer.SanitizeHTML(webpageBaseURL, entry.Content, &sanitizer.SanitizerOptions{OpenLinksInNewTab: user.OpenExternalLinksInNewTab})
updateEntryReadingTime(store, feed, entry, entryIsNew, user) updateEntryReadingTime(store, feed, entry, entryIsNew, user)
@ -148,7 +146,7 @@ func ProcessFeedEntries(store *storage.Storage, feed *model.Feed, userID int64,
// ProcessEntryWebPage downloads the entry web page and apply rewrite rules. // ProcessEntryWebPage downloads the entry web page and apply rewrite rules.
func ProcessEntryWebPage(feed *model.Feed, entry *model.Entry, user *model.User) error { func ProcessEntryWebPage(feed *model.Feed, entry *model.Entry, user *model.User) error {
startTime := time.Now() startTime := time.Now()
rewrittenEntryURL := rewriteEntryURL(feed, entry) entry.URL = rewriteEntryURL(feed, entry)
requestBuilder := fetcher.NewRequestBuilder() requestBuilder := fetcher.NewRequestBuilder()
requestBuilder.WithUserAgent(feed.UserAgent, config.Opts.HTTPClientUserAgent()) requestBuilder.WithUserAgent(feed.UserAgent, config.Opts.HTTPClientUserAgent())
@ -161,9 +159,9 @@ func ProcessEntryWebPage(feed *model.Feed, entry *model.Entry, user *model.User)
requestBuilder.IgnoreTLSErrors(feed.AllowSelfSignedCertificates) requestBuilder.IgnoreTLSErrors(feed.AllowSelfSignedCertificates)
requestBuilder.DisableHTTP2(feed.DisableHTTP2) requestBuilder.DisableHTTP2(feed.DisableHTTP2)
pageBaseURL, extractedContent, scraperErr := scraper.ScrapeWebsite( webpageBaseURL, extractedContent, scraperErr := scraper.ScrapeWebsite(
requestBuilder, requestBuilder,
rewrittenEntryURL, entry.URL,
feed.ScraperRules, feed.ScraperRules,
) )
@ -186,8 +184,8 @@ func ProcessEntryWebPage(feed *model.Feed, entry *model.Entry, user *model.User)
} }
} }
rewrite.Rewriter(rewrittenEntryURL, entry, entry.Feed.RewriteRules) rewrite.ApplyContentRewriteRules(entry, entry.Feed.RewriteRules)
entry.Content = sanitizer.SanitizeHTML(pageBaseURL, entry.Content, &sanitizer.SanitizerOptions{OpenLinksInNewTab: user.OpenExternalLinksInNewTab}) entry.Content = sanitizer.SanitizeHTML(webpageBaseURL, entry.Content, &sanitizer.SanitizerOptions{OpenLinksInNewTab: user.OpenExternalLinksInNewTab})
return nil return nil
} }

View file

@ -97,9 +97,8 @@ func (rule rule) applyRule(entryURL string, entry *model.Entry) {
} }
} }
// Rewriter modify item contents with a set of rewriting rules. func ApplyContentRewriteRules(entry *model.Entry, customRewriteRules string) {
func Rewriter(entryURL string, entry *model.Entry, customRewriteRules string) { rulesList := getPredefinedRewriteRules(entry.URL)
rulesList := getPredefinedRewriteRules(entryURL)
if customRewriteRules != "" { if customRewriteRules != "" {
rulesList = customRewriteRules rulesList = customRewriteRules
} }
@ -109,11 +108,11 @@ func Rewriter(entryURL string, entry *model.Entry, customRewriteRules string) {
slog.Debug("Rewrite rules applied", slog.Debug("Rewrite rules applied",
slog.Any("rules", rules), slog.Any("rules", rules),
slog.String("entry_url", entryURL), slog.String("entry_url", entry.URL),
) )
for _, rule := range rules { for _, rule := range rules {
rule.applyRule(entryURL, entry) rule.applyRule(entry.URL, entry)
} }
} }

View file

@ -50,14 +50,16 @@ func TestReplaceTextLinks(t *testing.T) {
func TestRewriteWithNoMatchingRule(t *testing.T) { func TestRewriteWithNoMatchingRule(t *testing.T) {
controlEntry := &model.Entry{ controlEntry := &model.Entry{
URL: "https://example.org/article",
Title: `A title`, Title: `A title`,
Content: `Some text.`, Content: `Some text.`,
} }
testEntry := &model.Entry{ testEntry := &model.Entry{
URL: "https://example.org/article",
Title: `A title`, Title: `A title`,
Content: `Some text.`, Content: `Some text.`,
} }
Rewriter("https://example.org/article", testEntry, ``) ApplyContentRewriteRules(testEntry, ``)
if !reflect.DeepEqual(testEntry, controlEntry) { if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
@ -68,14 +70,16 @@ func TestRewriteWithYoutubeLink(t *testing.T) {
config.Opts = config.NewOptions() config.Opts = config.NewOptions()
controlEntry := &model.Entry{ controlEntry := &model.Entry{
URL: "https://www.youtube.com/watch?v=1234",
Title: `A title`, Title: `A title`,
Content: `<iframe width="650" height="350" frameborder="0" src="https://www.youtube-nocookie.com/embed/1234" allowfullscreen></iframe><br>Video Description`, Content: `<iframe width="650" height="350" frameborder="0" src="https://www.youtube-nocookie.com/embed/1234" allowfullscreen></iframe><br>Video Description`,
} }
testEntry := &model.Entry{ testEntry := &model.Entry{
URL: "https://www.youtube.com/watch?v=1234",
Title: `A title`, Title: `A title`,
Content: `Video Description`, Content: `Video Description`,
} }
Rewriter("https://www.youtube.com/watch?v=1234", testEntry, ``) ApplyContentRewriteRules(testEntry, ``)
if !reflect.DeepEqual(testEntry, controlEntry) { if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
@ -95,14 +99,16 @@ func TestRewriteWithYoutubeLinkAndCustomEmbedURL(t *testing.T) {
} }
controlEntry := &model.Entry{ controlEntry := &model.Entry{
URL: "https://www.youtube.com/watch?v=1234",
Title: `A title`, Title: `A title`,
Content: `<iframe width="650" height="350" frameborder="0" src="https://invidious.custom/embed/1234" allowfullscreen></iframe><br>Video Description`, Content: `<iframe width="650" height="350" frameborder="0" src="https://invidious.custom/embed/1234" allowfullscreen></iframe><br>Video Description`,
} }
testEntry := &model.Entry{ testEntry := &model.Entry{
URL: "https://www.youtube.com/watch?v=1234",
Title: `A title`, Title: `A title`,
Content: `Video Description`, Content: `Video Description`,
} }
Rewriter("https://www.youtube.com/watch?v=1234", testEntry, ``) ApplyContentRewriteRules(testEntry, ``)
if !reflect.DeepEqual(testEntry, controlEntry) { if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
@ -111,14 +117,16 @@ func TestRewriteWithYoutubeLinkAndCustomEmbedURL(t *testing.T) {
func TestRewriteWithInexistingCustomRule(t *testing.T) { func TestRewriteWithInexistingCustomRule(t *testing.T) {
controlEntry := &model.Entry{ controlEntry := &model.Entry{
URL: "https://www.youtube.com/watch?v=1234",
Title: `A title`, Title: `A title`,
Content: `Video Description`, Content: `Video Description`,
} }
testEntry := &model.Entry{ testEntry := &model.Entry{
URL: "https://www.youtube.com/watch?v=1234",
Title: `A title`, Title: `A title`,
Content: `Video Description`, Content: `Video Description`,
} }
Rewriter("https://www.youtube.com/watch?v=1234", testEntry, `some rule`) ApplyContentRewriteRules(testEntry, `some rule`)
if !reflect.DeepEqual(testEntry, controlEntry) { if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
@ -127,14 +135,16 @@ func TestRewriteWithInexistingCustomRule(t *testing.T) {
func TestRewriteWithXkcdLink(t *testing.T) { func TestRewriteWithXkcdLink(t *testing.T) {
controlEntry := &model.Entry{ controlEntry := &model.Entry{
URL: "https://xkcd.com/1912/",
Title: `A title`, Title: `A title`,
Content: `<figure><img src="https://imgs.xkcd.com/comics/thermostat.png" alt="Your problem is so terrible, I worry that, if I help you, I risk drawing the attention of whatever god of technology inflicted it on you."/><figcaption><p>Your problem is so terrible, I worry that, if I help you, I risk drawing the attention of whatever god of technology inflicted it on you.</p></figcaption></figure>`, Content: `<figure><img src="https://imgs.xkcd.com/comics/thermostat.png" alt="Your problem is so terrible, I worry that, if I help you, I risk drawing the attention of whatever god of technology inflicted it on you."/><figcaption><p>Your problem is so terrible, I worry that, if I help you, I risk drawing the attention of whatever god of technology inflicted it on you.</p></figcaption></figure>`,
} }
testEntry := &model.Entry{ testEntry := &model.Entry{
URL: "https://xkcd.com/1912/",
Title: `A title`, Title: `A title`,
Content: `<img src="https://imgs.xkcd.com/comics/thermostat.png" title="Your problem is so terrible, I worry that, if I help you, I risk drawing the attention of whatever god of technology inflicted it on you." alt="Your problem is so terrible, I worry that, if I help you, I risk drawing the attention of whatever god of technology inflicted it on you." />`, Content: `<img src="https://imgs.xkcd.com/comics/thermostat.png" title="Your problem is so terrible, I worry that, if I help you, I risk drawing the attention of whatever god of technology inflicted it on you." alt="Your problem is so terrible, I worry that, if I help you, I risk drawing the attention of whatever god of technology inflicted it on you." />`,
} }
Rewriter("https://xkcd.com/1912/", testEntry, ``) ApplyContentRewriteRules(testEntry, ``)
if !reflect.DeepEqual(testEntry, controlEntry) { if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
@ -143,14 +153,16 @@ func TestRewriteWithXkcdLink(t *testing.T) {
func TestRewriteWithXkcdLinkHtmlInjection(t *testing.T) { func TestRewriteWithXkcdLinkHtmlInjection(t *testing.T) {
controlEntry := &model.Entry{ controlEntry := &model.Entry{
URL: "https://xkcd.com/1912/",
Title: `A title`, Title: `A title`,
Content: `<figure><img src="https://imgs.xkcd.com/comics/thermostat.png" alt="&lt;foo&gt;"/><figcaption><p>&lt;foo&gt;</p></figcaption></figure>`, Content: `<figure><img src="https://imgs.xkcd.com/comics/thermostat.png" alt="&lt;foo&gt;"/><figcaption><p>&lt;foo&gt;</p></figcaption></figure>`,
} }
testEntry := &model.Entry{ testEntry := &model.Entry{
URL: "https://xkcd.com/1912/",
Title: `A title`, Title: `A title`,
Content: `<img src="https://imgs.xkcd.com/comics/thermostat.png" title="<foo>" alt="<foo>" />`, Content: `<img src="https://imgs.xkcd.com/comics/thermostat.png" title="<foo>" alt="<foo>" />`,
} }
Rewriter("https://xkcd.com/1912/", testEntry, ``) ApplyContentRewriteRules(testEntry, ``)
if !reflect.DeepEqual(testEntry, controlEntry) { if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
@ -159,14 +171,16 @@ func TestRewriteWithXkcdLinkHtmlInjection(t *testing.T) {
func TestRewriteWithXkcdLinkAndImageNoTitle(t *testing.T) { func TestRewriteWithXkcdLinkAndImageNoTitle(t *testing.T) {
controlEntry := &model.Entry{ controlEntry := &model.Entry{
URL: "https://xkcd.com/1912/",
Title: `A title`, Title: `A title`,
Content: `<img src="https://imgs.xkcd.com/comics/thermostat.png" alt="Your problem is so terrible, I worry that, if I help you, I risk drawing the attention of whatever god of technology inflicted it on you." />`, Content: `<img src="https://imgs.xkcd.com/comics/thermostat.png" alt="Your problem is so terrible, I worry that, if I help you, I risk drawing the attention of whatever god of technology inflicted it on you." />`,
} }
testEntry := &model.Entry{ testEntry := &model.Entry{
URL: "https://xkcd.com/1912/",
Title: `A title`, Title: `A title`,
Content: `<img src="https://imgs.xkcd.com/comics/thermostat.png" alt="Your problem is so terrible, I worry that, if I help you, I risk drawing the attention of whatever god of technology inflicted it on you." />`, Content: `<img src="https://imgs.xkcd.com/comics/thermostat.png" alt="Your problem is so terrible, I worry that, if I help you, I risk drawing the attention of whatever god of technology inflicted it on you." />`,
} }
Rewriter("https://xkcd.com/1912/", testEntry, ``) ApplyContentRewriteRules(testEntry, ``)
if !reflect.DeepEqual(testEntry, controlEntry) { if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
@ -175,14 +189,16 @@ func TestRewriteWithXkcdLinkAndImageNoTitle(t *testing.T) {
func TestRewriteWithXkcdLinkAndNoImage(t *testing.T) { func TestRewriteWithXkcdLinkAndNoImage(t *testing.T) {
controlEntry := &model.Entry{ controlEntry := &model.Entry{
URL: "https://xkcd.com/1912/",
Title: `A title`, Title: `A title`,
Content: `test`, Content: `test`,
} }
testEntry := &model.Entry{ testEntry := &model.Entry{
URL: "https://xkcd.com/1912/",
Title: `A title`, Title: `A title`,
Content: `test`, Content: `test`,
} }
Rewriter("https://xkcd.com/1912/", testEntry, ``) ApplyContentRewriteRules(testEntry, ``)
if !reflect.DeepEqual(testEntry, controlEntry) { if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
@ -191,14 +207,16 @@ func TestRewriteWithXkcdLinkAndNoImage(t *testing.T) {
func TestRewriteWithXkcdAndNoImage(t *testing.T) { func TestRewriteWithXkcdAndNoImage(t *testing.T) {
controlEntry := &model.Entry{ controlEntry := &model.Entry{
URL: "https://xkcd.com/1912/",
Title: `A title`, Title: `A title`,
Content: `test`, Content: `test`,
} }
testEntry := &model.Entry{ testEntry := &model.Entry{
URL: "https://xkcd.com/1912/",
Title: `A title`, Title: `A title`,
Content: `test`, Content: `test`,
} }
Rewriter("https://xkcd.com/1912/", testEntry, ``) ApplyContentRewriteRules(testEntry, ``)
if !reflect.DeepEqual(testEntry, controlEntry) { if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
@ -207,14 +225,16 @@ func TestRewriteWithXkcdAndNoImage(t *testing.T) {
func TestRewriteMailtoLink(t *testing.T) { func TestRewriteMailtoLink(t *testing.T) {
controlEntry := &model.Entry{ controlEntry := &model.Entry{
URL: "https://www.qwantz.com/",
Title: `A title`, Title: `A title`,
Content: `<a href="mailto:ryan@qwantz.com?subject=blah%20blah">contact [blah blah]</a>`, Content: `<a href="mailto:ryan@qwantz.com?subject=blah%20blah">contact [blah blah]</a>`,
} }
testEntry := &model.Entry{ testEntry := &model.Entry{
URL: "https://www.qwantz.com/",
Title: `A title`, Title: `A title`,
Content: `<a href="mailto:ryan@qwantz.com?subject=blah%20blah">contact</a>`, Content: `<a href="mailto:ryan@qwantz.com?subject=blah%20blah">contact</a>`,
} }
Rewriter("https://www.qwantz.com/", testEntry, ``) ApplyContentRewriteRules(testEntry, ``)
if !reflect.DeepEqual(testEntry, controlEntry) { if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
@ -223,14 +243,16 @@ func TestRewriteMailtoLink(t *testing.T) {
func TestRewriteWithPDFLink(t *testing.T) { func TestRewriteWithPDFLink(t *testing.T) {
controlEntry := &model.Entry{ controlEntry := &model.Entry{
URL: "https://example.org/document.pdf",
Title: `A title`, Title: `A title`,
Content: `<a href="https://example.org/document.pdf">PDF</a><br>test`, Content: `<a href="https://example.org/document.pdf">PDF</a><br>test`,
} }
testEntry := &model.Entry{ testEntry := &model.Entry{
URL: "https://example.org/document.pdf",
Title: `A title`, Title: `A title`,
Content: `test`, Content: `test`,
} }
Rewriter("https://example.org/document.pdf", testEntry, ``) ApplyContentRewriteRules(testEntry, ``)
if !reflect.DeepEqual(testEntry, controlEntry) { if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
@ -239,14 +261,16 @@ func TestRewriteWithPDFLink(t *testing.T) {
func TestRewriteWithNoLazyImage(t *testing.T) { func TestRewriteWithNoLazyImage(t *testing.T) {
controlEntry := &model.Entry{ controlEntry := &model.Entry{
URL: "https://example.org/article",
Title: `A title`, Title: `A title`,
Content: `<img src="https://example.org/image.jpg" alt="Image"><noscript><p>Some text</p></noscript>`, Content: `<img src="https://example.org/image.jpg" alt="Image"><noscript><p>Some text</p></noscript>`,
} }
testEntry := &model.Entry{ testEntry := &model.Entry{
URL: "https://example.org/article",
Title: `A title`, Title: `A title`,
Content: `<img src="https://example.org/image.jpg" alt="Image"><noscript><p>Some text</p></noscript>`, Content: `<img src="https://example.org/image.jpg" alt="Image"><noscript><p>Some text</p></noscript>`,
} }
Rewriter("https://example.org/article", testEntry, "add_dynamic_image") ApplyContentRewriteRules(testEntry, "add_dynamic_image")
if !reflect.DeepEqual(testEntry, controlEntry) { if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
@ -255,14 +279,16 @@ func TestRewriteWithNoLazyImage(t *testing.T) {
func TestRewriteWithLazyImage(t *testing.T) { func TestRewriteWithLazyImage(t *testing.T) {
controlEntry := &model.Entry{ controlEntry := &model.Entry{
URL: "https://example.org/article",
Title: `A title`, Title: `A title`,
Content: `<img src="https://example.org/image.jpg" data-url="https://example.org/image.jpg" alt="Image"/><noscript><img src="https://example.org/fallback.jpg" alt="Fallback"/></noscript>`, Content: `<img src="https://example.org/image.jpg" data-url="https://example.org/image.jpg" alt="Image"/><noscript><img src="https://example.org/fallback.jpg" alt="Fallback"/></noscript>`,
} }
testEntry := &model.Entry{ testEntry := &model.Entry{
URL: "https://example.org/article",
Title: `A title`, Title: `A title`,
Content: `<img src="" data-url="https://example.org/image.jpg" alt="Image"><noscript><img src="https://example.org/fallback.jpg" alt="Fallback"></noscript>`, Content: `<img src="" data-url="https://example.org/image.jpg" alt="Image"><noscript><img src="https://example.org/fallback.jpg" alt="Fallback"></noscript>`,
} }
Rewriter("https://example.org/article", testEntry, "add_dynamic_image") ApplyContentRewriteRules(testEntry, "add_dynamic_image")
if !reflect.DeepEqual(testEntry, controlEntry) { if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
@ -271,14 +297,16 @@ func TestRewriteWithLazyImage(t *testing.T) {
func TestRewriteWithLazyDivImage(t *testing.T) { func TestRewriteWithLazyDivImage(t *testing.T) {
controlEntry := &model.Entry{ controlEntry := &model.Entry{
URL: "https://example.org/article",
Title: `A title`, Title: `A title`,
Content: `<img src="https://example.org/image.jpg" alt="Image"/><noscript><img src="https://example.org/fallback.jpg" alt="Fallback"/></noscript>`, Content: `<img src="https://example.org/image.jpg" alt="Image"/><noscript><img src="https://example.org/fallback.jpg" alt="Fallback"/></noscript>`,
} }
testEntry := &model.Entry{ testEntry := &model.Entry{
URL: "https://example.org/article",
Title: `A title`, Title: `A title`,
Content: `<div data-url="https://example.org/image.jpg" alt="Image"></div><noscript><img src="https://example.org/fallback.jpg" alt="Fallback"></noscript>`, Content: `<div data-url="https://example.org/image.jpg" alt="Image"></div><noscript><img src="https://example.org/fallback.jpg" alt="Fallback"></noscript>`,
} }
Rewriter("https://example.org/article", testEntry, "add_dynamic_image") ApplyContentRewriteRules(testEntry, "add_dynamic_image")
if !reflect.DeepEqual(testEntry, controlEntry) { if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
@ -287,14 +315,16 @@ func TestRewriteWithLazyDivImage(t *testing.T) {
func TestRewriteWithUnknownLazyNoScriptImage(t *testing.T) { func TestRewriteWithUnknownLazyNoScriptImage(t *testing.T) {
controlEntry := &model.Entry{ controlEntry := &model.Entry{
URL: "https://example.org/article",
Title: `A title`, Title: `A title`,
Content: `<img src="" data-non-candidate="https://example.org/image.jpg" alt="Image"/><img src="https://example.org/fallback.jpg" alt="Fallback"/>`, Content: `<img src="" data-non-candidate="https://example.org/image.jpg" alt="Image"/><img src="https://example.org/fallback.jpg" alt="Fallback"/>`,
} }
testEntry := &model.Entry{ testEntry := &model.Entry{
URL: "https://example.org/article",
Title: `A title`, Title: `A title`,
Content: `<img src="" data-non-candidate="https://example.org/image.jpg" alt="Image"><noscript><img src="https://example.org/fallback.jpg" alt="Fallback"></noscript>`, Content: `<img src="" data-non-candidate="https://example.org/image.jpg" alt="Image"><noscript><img src="https://example.org/fallback.jpg" alt="Fallback"></noscript>`,
} }
Rewriter("https://example.org/article", testEntry, "add_dynamic_image") ApplyContentRewriteRules(testEntry, "add_dynamic_image")
if !reflect.DeepEqual(testEntry, controlEntry) { if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
@ -303,14 +333,16 @@ func TestRewriteWithUnknownLazyNoScriptImage(t *testing.T) {
func TestRewriteWithLazySrcset(t *testing.T) { func TestRewriteWithLazySrcset(t *testing.T) {
controlEntry := &model.Entry{ controlEntry := &model.Entry{
URL: "https://example.org/article",
Title: `A title`, Title: `A title`,
Content: `<img srcset="https://example.org/image.jpg" data-srcset="https://example.org/image.jpg" alt="Image"/>`, Content: `<img srcset="https://example.org/image.jpg" data-srcset="https://example.org/image.jpg" alt="Image"/>`,
} }
testEntry := &model.Entry{ testEntry := &model.Entry{
URL: "https://example.org/article",
Title: `A title`, Title: `A title`,
Content: `<img srcset="" data-srcset="https://example.org/image.jpg" alt="Image">`, Content: `<img srcset="" data-srcset="https://example.org/image.jpg" alt="Image">`,
} }
Rewriter("https://example.org/article", testEntry, "add_dynamic_image") ApplyContentRewriteRules(testEntry, "add_dynamic_image")
if !reflect.DeepEqual(testEntry, controlEntry) { if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
@ -319,14 +351,16 @@ func TestRewriteWithLazySrcset(t *testing.T) {
func TestRewriteWithImageAndLazySrcset(t *testing.T) { func TestRewriteWithImageAndLazySrcset(t *testing.T) {
controlEntry := &model.Entry{ controlEntry := &model.Entry{
URL: "https://example.org/article",
Title: `A title`, Title: `A title`,
Content: `<img src="meow" srcset="https://example.org/image.jpg" data-srcset="https://example.org/image.jpg" alt="Image"/>`, Content: `<img src="meow" srcset="https://example.org/image.jpg" data-srcset="https://example.org/image.jpg" alt="Image"/>`,
} }
testEntry := &model.Entry{ testEntry := &model.Entry{
URL: "https://example.org/article",
Title: `A title`, Title: `A title`,
Content: `<img src="meow" srcset="" data-srcset="https://example.org/image.jpg" alt="Image">`, Content: `<img src="meow" srcset="" data-srcset="https://example.org/image.jpg" alt="Image">`,
} }
Rewriter("https://example.org/article", testEntry, "add_dynamic_image") ApplyContentRewriteRules(testEntry, "add_dynamic_image")
if !reflect.DeepEqual(testEntry, controlEntry) { if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
@ -335,14 +369,16 @@ func TestRewriteWithImageAndLazySrcset(t *testing.T) {
func TestRewriteWithNoLazyIframe(t *testing.T) { func TestRewriteWithNoLazyIframe(t *testing.T) {
controlEntry := &model.Entry{ controlEntry := &model.Entry{
URL: "https://example.org/article",
Title: `A title`, Title: `A title`,
Content: `<iframe src="https://example.org/embed" allowfullscreen></iframe>`, Content: `<iframe src="https://example.org/embed" allowfullscreen></iframe>`,
} }
testEntry := &model.Entry{ testEntry := &model.Entry{
URL: "https://example.org/article",
Title: `A title`, Title: `A title`,
Content: `<iframe src="https://example.org/embed" allowfullscreen></iframe>`, Content: `<iframe src="https://example.org/embed" allowfullscreen></iframe>`,
} }
Rewriter("https://example.org/article", testEntry, "add_dynamic_iframe") ApplyContentRewriteRules(testEntry, "add_dynamic_iframe")
if !reflect.DeepEqual(testEntry, controlEntry) { if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
@ -351,14 +387,16 @@ func TestRewriteWithNoLazyIframe(t *testing.T) {
func TestRewriteWithLazyIframe(t *testing.T) { func TestRewriteWithLazyIframe(t *testing.T) {
controlEntry := &model.Entry{ controlEntry := &model.Entry{
URL: "https://example.org/article",
Title: `A title`, Title: `A title`,
Content: `<iframe data-src="https://example.org/embed" allowfullscreen="" src="https://example.org/embed"></iframe>`, Content: `<iframe data-src="https://example.org/embed" allowfullscreen="" src="https://example.org/embed"></iframe>`,
} }
testEntry := &model.Entry{ testEntry := &model.Entry{
URL: "https://example.org/article",
Title: `A title`, Title: `A title`,
Content: `<iframe data-src="https://example.org/embed" allowfullscreen></iframe>`, Content: `<iframe data-src="https://example.org/embed" allowfullscreen></iframe>`,
} }
Rewriter("https://example.org/article", testEntry, "add_dynamic_iframe") ApplyContentRewriteRules(testEntry, "add_dynamic_iframe")
if !reflect.DeepEqual(testEntry, controlEntry) { if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
@ -367,14 +405,16 @@ func TestRewriteWithLazyIframe(t *testing.T) {
func TestRewriteWithLazyIframeAndSrc(t *testing.T) { func TestRewriteWithLazyIframeAndSrc(t *testing.T) {
controlEntry := &model.Entry{ controlEntry := &model.Entry{
URL: "https://example.org/article",
Title: `A title`, Title: `A title`,
Content: `<iframe src="https://example.org/embed" data-src="https://example.org/embed" allowfullscreen=""></iframe>`, Content: `<iframe src="https://example.org/embed" data-src="https://example.org/embed" allowfullscreen=""></iframe>`,
} }
testEntry := &model.Entry{ testEntry := &model.Entry{
URL: "https://example.org/article",
Title: `A title`, Title: `A title`,
Content: `<iframe src="about:blank" data-src="https://example.org/embed" allowfullscreen></iframe>`, Content: `<iframe src="about:blank" data-src="https://example.org/embed" allowfullscreen></iframe>`,
} }
Rewriter("https://example.org/article", testEntry, "add_dynamic_iframe") ApplyContentRewriteRules(testEntry, "add_dynamic_iframe")
if !reflect.DeepEqual(testEntry, controlEntry) { if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
@ -383,14 +423,16 @@ func TestRewriteWithLazyIframeAndSrc(t *testing.T) {
func TestNewLineRewriteRule(t *testing.T) { func TestNewLineRewriteRule(t *testing.T) {
controlEntry := &model.Entry{ controlEntry := &model.Entry{
URL: "https://example.org/article",
Title: `A title`, Title: `A title`,
Content: `A<br>B<br>C`, Content: `A<br>B<br>C`,
} }
testEntry := &model.Entry{ testEntry := &model.Entry{
URL: "https://example.org/article",
Title: `A title`, Title: `A title`,
Content: "A\nB\nC", Content: "A\nB\nC",
} }
Rewriter("https://example.org/article", testEntry, "nl2br") ApplyContentRewriteRules(testEntry, "nl2br")
if !reflect.DeepEqual(testEntry, controlEntry) { if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
@ -399,14 +441,16 @@ func TestNewLineRewriteRule(t *testing.T) {
func TestConvertTextLinkRewriteRule(t *testing.T) { func TestConvertTextLinkRewriteRule(t *testing.T) {
controlEntry := &model.Entry{ controlEntry := &model.Entry{
URL: "https://example.org/article",
Title: `A title`, Title: `A title`,
Content: `Test: <a href="http://example.org/a/b">http://example.org/a/b</a>`, Content: `Test: <a href="http://example.org/a/b">http://example.org/a/b</a>`,
} }
testEntry := &model.Entry{ testEntry := &model.Entry{
URL: "https://example.org/article",
Title: `A title`, Title: `A title`,
Content: `Test: http://example.org/a/b`, Content: `Test: http://example.org/a/b`,
} }
Rewriter("https://example.org/article", testEntry, "convert_text_link") ApplyContentRewriteRules(testEntry, "convert_text_link")
if !reflect.DeepEqual(testEntry, controlEntry) { if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
@ -415,10 +459,12 @@ func TestConvertTextLinkRewriteRule(t *testing.T) {
func TestMediumImage(t *testing.T) { func TestMediumImage(t *testing.T) {
controlEntry := &model.Entry{ controlEntry := &model.Entry{
URL: "https://example.org/article",
Title: `A title`, Title: `A title`,
Content: `<img alt="Image for post" class="t u v if aj" src="https://miro.medium.com/max/2560/1*ephLSqSzQYLvb7faDwzRbw.jpeg" width="1280" height="720" srcset="https://miro.medium.com/max/552/1*ephLSqSzQYLvb7faDwzRbw.jpeg 276w, https://miro.medium.com/max/1104/1*ephLSqSzQYLvb7faDwzRbw.jpeg 552w, https://miro.medium.com/max/1280/1*ephLSqSzQYLvb7faDwzRbw.jpeg 640w, https://miro.medium.com/max/1400/1*ephLSqSzQYLvb7faDwzRbw.jpeg 700w" sizes="700px"/>`, Content: `<img alt="Image for post" class="t u v if aj" src="https://miro.medium.com/max/2560/1*ephLSqSzQYLvb7faDwzRbw.jpeg" width="1280" height="720" srcset="https://miro.medium.com/max/552/1*ephLSqSzQYLvb7faDwzRbw.jpeg 276w, https://miro.medium.com/max/1104/1*ephLSqSzQYLvb7faDwzRbw.jpeg 552w, https://miro.medium.com/max/1280/1*ephLSqSzQYLvb7faDwzRbw.jpeg 640w, https://miro.medium.com/max/1400/1*ephLSqSzQYLvb7faDwzRbw.jpeg 700w" sizes="700px"/>`,
} }
testEntry := &model.Entry{ testEntry := &model.Entry{
URL: "https://example.org/article",
Title: `A title`, Title: `A title`,
Content: ` Content: `
<figure class="ht hu hv hw hx hy cy cz paragraph-image"> <figure class="ht hu hv hw hx hy cy cz paragraph-image">
@ -440,7 +486,7 @@ func TestMediumImage(t *testing.T) {
</figure> </figure>
`, `,
} }
Rewriter("https://example.org/article", testEntry, "fix_medium_images") ApplyContentRewriteRules(testEntry, "fix_medium_images")
testEntry.Content = strings.TrimSpace(testEntry.Content) testEntry.Content = strings.TrimSpace(testEntry.Content)
if !reflect.DeepEqual(testEntry, controlEntry) { if !reflect.DeepEqual(testEntry, controlEntry) {
@ -450,14 +496,16 @@ func TestMediumImage(t *testing.T) {
func TestRewriteNoScriptImageWithoutNoScriptTag(t *testing.T) { func TestRewriteNoScriptImageWithoutNoScriptTag(t *testing.T) {
controlEntry := &model.Entry{ controlEntry := &model.Entry{
URL: "https://example.org/article",
Title: `A title`, Title: `A title`,
Content: `<figure><img src="https://developer.mozilla.org/static/img/favicon144.png" alt="The beautiful MDN logo."/><figcaption>MDN Logo</figcaption></figure>`, Content: `<figure><img src="https://developer.mozilla.org/static/img/favicon144.png" alt="The beautiful MDN logo."/><figcaption>MDN Logo</figcaption></figure>`,
} }
testEntry := &model.Entry{ testEntry := &model.Entry{
URL: "https://example.org/article",
Title: `A title`, Title: `A title`,
Content: `<figure><img src="https://developer.mozilla.org/static/img/favicon144.png" alt="The beautiful MDN logo."><figcaption>MDN Logo</figcaption></figure>`, Content: `<figure><img src="https://developer.mozilla.org/static/img/favicon144.png" alt="The beautiful MDN logo."><figcaption>MDN Logo</figcaption></figure>`,
} }
Rewriter("https://example.org/article", testEntry, "use_noscript_figure_images") ApplyContentRewriteRules(testEntry, "use_noscript_figure_images")
testEntry.Content = strings.TrimSpace(testEntry.Content) testEntry.Content = strings.TrimSpace(testEntry.Content)
if !reflect.DeepEqual(testEntry, controlEntry) { if !reflect.DeepEqual(testEntry, controlEntry) {
@ -467,14 +515,16 @@ func TestRewriteNoScriptImageWithoutNoScriptTag(t *testing.T) {
func TestRewriteNoScriptImageWithNoScriptTag(t *testing.T) { func TestRewriteNoScriptImageWithNoScriptTag(t *testing.T) {
controlEntry := &model.Entry{ controlEntry := &model.Entry{
URL: "https://example.org/article",
Title: `A title`, Title: `A title`,
Content: `<figure><img src="http://example.org/logo.svg"/><figcaption>MDN Logo</figcaption></figure>`, Content: `<figure><img src="http://example.org/logo.svg"/><figcaption>MDN Logo</figcaption></figure>`,
} }
testEntry := &model.Entry{ testEntry := &model.Entry{
URL: "https://example.org/article",
Title: `A title`, Title: `A title`,
Content: `<figure><img src="https://developer.mozilla.org/static/img/favicon144.png" alt="The beautiful MDN logo."><noscript><img src="http://example.org/logo.svg"></noscript><figcaption>MDN Logo</figcaption></figure>`, Content: `<figure><img src="https://developer.mozilla.org/static/img/favicon144.png" alt="The beautiful MDN logo."><noscript><img src="http://example.org/logo.svg"></noscript><figcaption>MDN Logo</figcaption></figure>`,
} }
Rewriter("https://example.org/article", testEntry, "use_noscript_figure_images") ApplyContentRewriteRules(testEntry, "use_noscript_figure_images")
testEntry.Content = strings.TrimSpace(testEntry.Content) testEntry.Content = strings.TrimSpace(testEntry.Content)
if !reflect.DeepEqual(testEntry, controlEntry) { if !reflect.DeepEqual(testEntry, controlEntry) {
@ -484,14 +534,16 @@ func TestRewriteNoScriptImageWithNoScriptTag(t *testing.T) {
func TestRewriteReplaceCustom(t *testing.T) { func TestRewriteReplaceCustom(t *testing.T) {
controlEntry := &model.Entry{ controlEntry := &model.Entry{
URL: "https://example.org/article",
Title: `A title`, Title: `A title`,
Content: `<img src="http://example.org/logo.svg"><img src="https://example.org/article/picture.png">`, Content: `<img src="http://example.org/logo.svg"><img src="https://example.org/article/picture.png">`,
} }
testEntry := &model.Entry{ testEntry := &model.Entry{
URL: "https://example.org/article",
Title: `A title`, Title: `A title`,
Content: `<img src="http://example.org/logo.svg"><img src="https://example.org/article/picture.svg">`, Content: `<img src="http://example.org/logo.svg"><img src="https://example.org/article/picture.svg">`,
} }
Rewriter("https://example.org/article", testEntry, `replace("article/(.*).svg"|"article/$1.png")`) ApplyContentRewriteRules(testEntry, `replace("article/(.*).svg"|"article/$1.png")`)
if !reflect.DeepEqual(testEntry, controlEntry) { if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
@ -500,14 +552,16 @@ func TestRewriteReplaceCustom(t *testing.T) {
func TestRewriteReplaceTitleCustom(t *testing.T) { func TestRewriteReplaceTitleCustom(t *testing.T) {
controlEntry := &model.Entry{ controlEntry := &model.Entry{
URL: "https://example.org/article",
Title: `Ouch, a thistle`, Title: `Ouch, a thistle`,
Content: `The replace_title rewrite rule should not modify the content.`, Content: `The replace_title rewrite rule should not modify the content.`,
} }
testEntry := &model.Entry{ testEntry := &model.Entry{
URL: "https://example.org/article",
Title: `A title`, Title: `A title`,
Content: `The replace_title rewrite rule should not modify the content.`, Content: `The replace_title rewrite rule should not modify the content.`,
} }
Rewriter("https://example.org/article", testEntry, `replace_title("(?i)^a\\s*ti"|"Ouch, a this")`) ApplyContentRewriteRules(testEntry, `replace_title("(?i)^a\\s*ti"|"Ouch, a this")`)
if !reflect.DeepEqual(testEntry, controlEntry) { if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
@ -516,14 +570,16 @@ func TestRewriteReplaceTitleCustom(t *testing.T) {
func TestRewriteRemoveCustom(t *testing.T) { func TestRewriteRemoveCustom(t *testing.T) {
controlEntry := &model.Entry{ controlEntry := &model.Entry{
URL: "https://example.org/article",
Title: `A title`, Title: `A title`,
Content: `<div>Lorem Ipsum <span class="ads keep">Super important info</span></div>`, Content: `<div>Lorem Ipsum <span class="ads keep">Super important info</span></div>`,
} }
testEntry := &model.Entry{ testEntry := &model.Entry{
URL: "https://example.org/article",
Title: `A title`, Title: `A title`,
Content: `<div>Lorem Ipsum <span class="spam">I dont want to see this</span><span class="ads keep">Super important info</span></div>`, Content: `<div>Lorem Ipsum <span class="spam">I dont want to see this</span><span class="ads keep">Super important info</span></div>`,
} }
Rewriter("https://example.org/article", testEntry, `remove(".spam, .ads:not(.keep)")`) ApplyContentRewriteRules(testEntry, `remove(".spam, .ads:not(.keep)")`)
if !reflect.DeepEqual(testEntry, controlEntry) { if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
@ -532,14 +588,16 @@ func TestRewriteRemoveCustom(t *testing.T) {
func TestRewriteAddCastopodEpisode(t *testing.T) { func TestRewriteAddCastopodEpisode(t *testing.T) {
controlEntry := &model.Entry{ controlEntry := &model.Entry{
URL: "https://podcast.demo/@demo/episodes/test",
Title: `A title`, Title: `A title`,
Content: `<iframe width="650" frameborder="0" src="https://podcast.demo/@demo/episodes/test/embed/light"></iframe><br>Episode Description`, Content: `<iframe width="650" frameborder="0" src="https://podcast.demo/@demo/episodes/test/embed/light"></iframe><br>Episode Description`,
} }
testEntry := &model.Entry{ testEntry := &model.Entry{
URL: "https://podcast.demo/@demo/episodes/test",
Title: `A title`, Title: `A title`,
Content: `Episode Description`, Content: `Episode Description`,
} }
Rewriter("https://podcast.demo/@demo/episodes/test", testEntry, `add_castopod_episode`) ApplyContentRewriteRules(testEntry, `add_castopod_episode`)
if !reflect.DeepEqual(testEntry, controlEntry) { if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
@ -548,14 +606,16 @@ func TestRewriteAddCastopodEpisode(t *testing.T) {
func TestRewriteBase64Decode(t *testing.T) { func TestRewriteBase64Decode(t *testing.T) {
controlEntry := &model.Entry{ controlEntry := &model.Entry{
URL: "https://example.org/article",
Title: `A title`, Title: `A title`,
Content: `This is some base64 encoded content`, Content: `This is some base64 encoded content`,
} }
testEntry := &model.Entry{ testEntry := &model.Entry{
URL: "https://example.org/article",
Title: `A title`, Title: `A title`,
Content: `VGhpcyBpcyBzb21lIGJhc2U2NCBlbmNvZGVkIGNvbnRlbnQ=`, Content: `VGhpcyBpcyBzb21lIGJhc2U2NCBlbmNvZGVkIGNvbnRlbnQ=`,
} }
Rewriter("https://example.org/article", testEntry, `base64_decode`) ApplyContentRewriteRules(testEntry, `base64_decode`)
if !reflect.DeepEqual(testEntry, controlEntry) { if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
@ -564,14 +624,16 @@ func TestRewriteBase64Decode(t *testing.T) {
func TestRewriteBase64DecodeInHTML(t *testing.T) { func TestRewriteBase64DecodeInHTML(t *testing.T) {
controlEntry := &model.Entry{ controlEntry := &model.Entry{
URL: "https://example.org/article",
Title: `A title`, Title: `A title`,
Content: `<div>Lorem Ipsum not valid base64<span class="base64">This is some base64 encoded content</span></div>`, Content: `<div>Lorem Ipsum not valid base64<span class="base64">This is some base64 encoded content</span></div>`,
} }
testEntry := &model.Entry{ testEntry := &model.Entry{
URL: "https://example.org/article",
Title: `A title`, Title: `A title`,
Content: `<div>Lorem Ipsum not valid base64<span class="base64">VGhpcyBpcyBzb21lIGJhc2U2NCBlbmNvZGVkIGNvbnRlbnQ=</span></div>`, Content: `<div>Lorem Ipsum not valid base64<span class="base64">VGhpcyBpcyBzb21lIGJhc2U2NCBlbmNvZGVkIGNvbnRlbnQ=</span></div>`,
} }
Rewriter("https://example.org/article", testEntry, `base64_decode`) ApplyContentRewriteRules(testEntry, `base64_decode`)
if !reflect.DeepEqual(testEntry, controlEntry) { if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
@ -580,14 +642,16 @@ func TestRewriteBase64DecodeInHTML(t *testing.T) {
func TestRewriteBase64DecodeArgs(t *testing.T) { func TestRewriteBase64DecodeArgs(t *testing.T) {
controlEntry := &model.Entry{ controlEntry := &model.Entry{
URL: "https://example.org/article",
Title: `A title`, Title: `A title`,
Content: `<div>Lorem Ipsum<span class="base64">This is some base64 encoded content</span></div>`, Content: `<div>Lorem Ipsum<span class="base64">This is some base64 encoded content</span></div>`,
} }
testEntry := &model.Entry{ testEntry := &model.Entry{
URL: "https://example.org/article",
Title: `A title`, Title: `A title`,
Content: `<div>Lorem Ipsum<span class="base64">VGhpcyBpcyBzb21lIGJhc2U2NCBlbmNvZGVkIGNvbnRlbnQ=</span></div>`, Content: `<div>Lorem Ipsum<span class="base64">VGhpcyBpcyBzb21lIGJhc2U2NCBlbmNvZGVkIGNvbnRlbnQ=</span></div>`,
} }
Rewriter("https://example.org/article", testEntry, `base64_decode(".base64")`) ApplyContentRewriteRules(testEntry, `base64_decode(".base64")`)
if !reflect.DeepEqual(testEntry, controlEntry) { if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
@ -596,14 +660,16 @@ func TestRewriteBase64DecodeArgs(t *testing.T) {
func TestRewriteRemoveTables(t *testing.T) { func TestRewriteRemoveTables(t *testing.T) {
controlEntry := &model.Entry{ controlEntry := &model.Entry{
URL: "https://example.org/article",
Title: `A title`, Title: `A title`,
Content: `<p>Test</p><p>Hello World!</p><p>Test</p>`, Content: `<p>Test</p><p>Hello World!</p><p>Test</p>`,
} }
testEntry := &model.Entry{ testEntry := &model.Entry{
URL: "https://example.org/article",
Title: `A title`, Title: `A title`,
Content: `<table class="container"><tbody><tr><td><p>Test</p><table class="row"><tbody><tr><td><p>Hello World!</p></td><td><p>Test</p></td></tr></tbody></table></td></tr></tbody></table>`, Content: `<table class="container"><tbody><tr><td><p>Test</p><table class="row"><tbody><tr><td><p>Hello World!</p></td><td><p>Test</p></td></tr></tbody></table></td></tr></tbody></table>`,
} }
Rewriter("https://example.org/article", testEntry, `remove_tables`) ApplyContentRewriteRules(testEntry, `remove_tables`)
if !reflect.DeepEqual(testEntry, controlEntry) { if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
@ -612,14 +678,16 @@ func TestRewriteRemoveTables(t *testing.T) {
func TestRemoveClickbait(t *testing.T) { func TestRemoveClickbait(t *testing.T) {
controlEntry := &model.Entry{ controlEntry := &model.Entry{
URL: "https://example.org/article",
Title: `This Is Amazing`, Title: `This Is Amazing`,
Content: `Some description`, Content: `Some description`,
} }
testEntry := &model.Entry{ testEntry := &model.Entry{
URL: "https://example.org/article",
Title: `THIS IS AMAZING`, Title: `THIS IS AMAZING`,
Content: `Some description`, Content: `Some description`,
} }
Rewriter("https://example.org/article", testEntry, `remove_clickbait`) ApplyContentRewriteRules(testEntry, `remove_clickbait`)
if !reflect.DeepEqual(testEntry, controlEntry) { if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
@ -628,6 +696,7 @@ func TestRemoveClickbait(t *testing.T) {
func TestAddHackerNewsLinksUsingHack(t *testing.T) { func TestAddHackerNewsLinksUsingHack(t *testing.T) {
testEntry := &model.Entry{ testEntry := &model.Entry{
URL: "https://example.org/article",
Title: `A title`, Title: `A title`,
Content: `<p>Article URL: <a href="https://example.org/url">https://example.org/article</a></p> Content: `<p>Article URL: <a href="https://example.org/url">https://example.org/article</a></p>
<p>Comments URL: <a href="https://news.ycombinator.com/item?id=37620043">https://news.ycombinator.com/item?id=37620043</a></p> <p>Comments URL: <a href="https://news.ycombinator.com/item?id=37620043">https://news.ycombinator.com/item?id=37620043</a></p>
@ -636,13 +705,14 @@ func TestAddHackerNewsLinksUsingHack(t *testing.T) {
} }
controlEntry := &model.Entry{ controlEntry := &model.Entry{
URL: "https://example.org/article",
Title: `A title`, Title: `A title`,
Content: `<p>Article URL: <a href="https://example.org/url">https://example.org/article</a></p> Content: `<p>Article URL: <a href="https://example.org/url">https://example.org/article</a></p>
<p>Comments URL: <a href="https://news.ycombinator.com/item?id=37620043">https://news.ycombinator.com/item?id=37620043</a> <a href="hack://item?id=37620043">Open with HACK</a></p> <p>Comments URL: <a href="https://news.ycombinator.com/item?id=37620043">https://news.ycombinator.com/item?id=37620043</a> <a href="hack://item?id=37620043">Open with HACK</a></p>
<p>Points: 23</p> <p>Points: 23</p>
<p># Comments: 38</p>`, <p># Comments: 38</p>`,
} }
Rewriter("https://example.org/article", testEntry, `add_hn_links_using_hack`) ApplyContentRewriteRules(testEntry, `add_hn_links_using_hack`)
if !reflect.DeepEqual(testEntry, controlEntry) { if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
@ -651,6 +721,7 @@ func TestAddHackerNewsLinksUsingHack(t *testing.T) {
func TestAddHackerNewsLinksUsingOpener(t *testing.T) { func TestAddHackerNewsLinksUsingOpener(t *testing.T) {
testEntry := &model.Entry{ testEntry := &model.Entry{
URL: "https://example.org/article",
Title: `A title`, Title: `A title`,
Content: `<p>Article URL: <a href="https://example.org/url">https://example.org/article</a></p> Content: `<p>Article URL: <a href="https://example.org/url">https://example.org/article</a></p>
<p>Comments URL: <a href="https://news.ycombinator.com/item?id=37620043">https://news.ycombinator.com/item?id=37620043</a></p> <p>Comments URL: <a href="https://news.ycombinator.com/item?id=37620043">https://news.ycombinator.com/item?id=37620043</a></p>
@ -659,13 +730,14 @@ func TestAddHackerNewsLinksUsingOpener(t *testing.T) {
} }
controlEntry := &model.Entry{ controlEntry := &model.Entry{
URL: "https://example.org/article",
Title: `A title`, Title: `A title`,
Content: `<p>Article URL: <a href="https://example.org/url">https://example.org/article</a></p> Content: `<p>Article URL: <a href="https://example.org/url">https://example.org/article</a></p>
<p>Comments URL: <a href="https://news.ycombinator.com/item?id=37620043">https://news.ycombinator.com/item?id=37620043</a> <a href="opener://x-callback-url/show-options?url=https%3A%2F%2Fnews.ycombinator.com%2Fitem%3Fid%3D37620043">Open with Opener</a></p> <p>Comments URL: <a href="https://news.ycombinator.com/item?id=37620043">https://news.ycombinator.com/item?id=37620043</a> <a href="opener://x-callback-url/show-options?url=https%3A%2F%2Fnews.ycombinator.com%2Fitem%3Fid%3D37620043">Open with Opener</a></p>
<p>Points: 23</p> <p>Points: 23</p>
<p># Comments: 38</p>`, <p># Comments: 38</p>`,
} }
Rewriter("https://example.org/article", testEntry, `add_hn_links_using_opener`) ApplyContentRewriteRules(testEntry, `add_hn_links_using_opener`)
if !reflect.DeepEqual(testEntry, controlEntry) { if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
@ -674,6 +746,7 @@ func TestAddHackerNewsLinksUsingOpener(t *testing.T) {
func TestAddImageTitle(t *testing.T) { func TestAddImageTitle(t *testing.T) {
testEntry := &model.Entry{ testEntry := &model.Entry{
URL: "https://example.org/article",
Title: `A title`, Title: `A title`,
Content: ` Content: `
<img src="pif" title="pouf"> <img src="pif" title="pouf">
@ -687,6 +760,7 @@ func TestAddImageTitle(t *testing.T) {
} }
controlEntry := &model.Entry{ controlEntry := &model.Entry{
URL: "https://example.org/article",
Title: `A title`, Title: `A title`,
Content: `<figure><img src="pif" alt=""/><figcaption><p>pouf</p></figcaption></figure> Content: `<figure><img src="pif" alt=""/><figcaption><p>pouf</p></figcaption></figure>
<figure><img src="pif" alt="" onerror="alert(1)" a=""/><figcaption><p>pouf</p></figcaption></figure> <figure><img src="pif" alt="" onerror="alert(1)" a=""/><figcaption><p>pouf</p></figcaption></figure>
@ -697,7 +771,7 @@ func TestAddImageTitle(t *testing.T) {
<figure><img src="pif" alt="pouf"/><figcaption><p>;&amp;quot;onerror=alert(1) a=;&amp;quot;</p></figcaption></figure> <figure><img src="pif" alt="pouf"/><figcaption><p>;&amp;quot;onerror=alert(1) a=;&amp;quot;</p></figcaption></figure>
`, `,
} }
Rewriter("https://example.org/article", testEntry, `add_image_title`) ApplyContentRewriteRules(testEntry, `add_image_title`)
if !reflect.DeepEqual(testEntry, controlEntry) { if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
@ -706,6 +780,7 @@ func TestAddImageTitle(t *testing.T) {
func TestFixGhostCard(t *testing.T) { func TestFixGhostCard(t *testing.T) {
testEntry := &model.Entry{ testEntry := &model.Entry{
URL: "https://example.org/article",
Title: `A title`, Title: `A title`,
Content: `<figure class="kg-card kg-bookmark-card"> Content: `<figure class="kg-card kg-bookmark-card">
<a class="kg-bookmark-container" href="https://example.org/article"> <a class="kg-bookmark-container" href="https://example.org/article">
@ -726,10 +801,11 @@ func TestFixGhostCard(t *testing.T) {
} }
controlEntry := &model.Entry{ controlEntry := &model.Entry{
URL: "https://example.org/article",
Title: `A title`, Title: `A title`,
Content: `<a href="https://example.org/article">Example Article - Example</a>`, Content: `<a href="https://example.org/article">Example Article - Example</a>`,
} }
Rewriter("https://example.org/article", testEntry, `fix_ghost_cards`) ApplyContentRewriteRules(testEntry, `fix_ghost_cards`)
if !reflect.DeepEqual(testEntry, controlEntry) { if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
@ -738,15 +814,17 @@ func TestFixGhostCard(t *testing.T) {
func TestFixGhostCardNoCard(t *testing.T) { func TestFixGhostCardNoCard(t *testing.T) {
testEntry := &model.Entry{ testEntry := &model.Entry{
URL: "https://example.org/article",
Title: `A title`, Title: `A title`,
Content: `<a href="https://example.org/article">Example Article - Example</a>`, Content: `<a href="https://example.org/article">Example Article - Example</a>`,
} }
controlEntry := &model.Entry{ controlEntry := &model.Entry{
URL: "https://example.org/article",
Title: `A title`, Title: `A title`,
Content: `<a href="https://example.org/article">Example Article - Example</a>`, Content: `<a href="https://example.org/article">Example Article - Example</a>`,
} }
Rewriter("https://example.org/article", testEntry, `fix_ghost_cards`) ApplyContentRewriteRules(testEntry, `fix_ghost_cards`)
if !reflect.DeepEqual(testEntry, controlEntry) { if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
@ -755,6 +833,7 @@ func TestFixGhostCardNoCard(t *testing.T) {
func TestFixGhostCardInvalidCard(t *testing.T) { func TestFixGhostCardInvalidCard(t *testing.T) {
testEntry := &model.Entry{ testEntry := &model.Entry{
URL: "https://example.org/article",
Title: `A title`, Title: `A title`,
Content: `<figure class="kg-card kg-bookmark-card"> Content: `<figure class="kg-card kg-bookmark-card">
<a href="https://example.org/article">This card does not have the required fields</a> <a href="https://example.org/article">This card does not have the required fields</a>
@ -762,12 +841,13 @@ func TestFixGhostCardInvalidCard(t *testing.T) {
} }
controlEntry := &model.Entry{ controlEntry := &model.Entry{
URL: "https://example.org/article",
Title: `A title`, Title: `A title`,
Content: `<figure class="kg-card kg-bookmark-card"> Content: `<figure class="kg-card kg-bookmark-card">
<a href="https://example.org/article">This card does not have the required fields</a> <a href="https://example.org/article">This card does not have the required fields</a>
</figure>`, </figure>`,
} }
Rewriter("https://example.org/article", testEntry, `fix_ghost_cards`) ApplyContentRewriteRules(testEntry, `fix_ghost_cards`)
if !reflect.DeepEqual(testEntry, controlEntry) { if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
@ -776,6 +856,7 @@ func TestFixGhostCardInvalidCard(t *testing.T) {
func TestFixGhostCardMissingAuthor(t *testing.T) { func TestFixGhostCardMissingAuthor(t *testing.T) {
testEntry := &model.Entry{ testEntry := &model.Entry{
URL: "https://example.org/article",
Title: `A title`, Title: `A title`,
Content: `<figure class="kg-card kg-bookmark-card"> Content: `<figure class="kg-card kg-bookmark-card">
<a class="kg-bookmark-container" href="https://example.org/article"> <a class="kg-bookmark-container" href="https://example.org/article">
@ -791,10 +872,11 @@ func TestFixGhostCardMissingAuthor(t *testing.T) {
} }
controlEntry := &model.Entry{ controlEntry := &model.Entry{
URL: "https://example.org/article",
Title: `A title`, Title: `A title`,
Content: `<a href="https://example.org/article">Example Article</a>`, Content: `<a href="https://example.org/article">Example Article</a>`,
} }
Rewriter("https://example.org/article", testEntry, `fix_ghost_cards`) ApplyContentRewriteRules(testEntry, `fix_ghost_cards`)
if !reflect.DeepEqual(testEntry, controlEntry) { if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
@ -803,6 +885,7 @@ func TestFixGhostCardMissingAuthor(t *testing.T) {
func TestFixGhostCardDuplicatedAuthor(t *testing.T) { func TestFixGhostCardDuplicatedAuthor(t *testing.T) {
testEntry := &model.Entry{ testEntry := &model.Entry{
URL: "https://example.org/article",
Title: `A title`, Title: `A title`,
Content: `<figure class="kg-card kg-bookmark-card"> Content: `<figure class="kg-card kg-bookmark-card">
<a class="kg-bookmark-container" href="https://example.org/article"> <a class="kg-bookmark-container" href="https://example.org/article">
@ -823,10 +906,11 @@ func TestFixGhostCardDuplicatedAuthor(t *testing.T) {
} }
controlEntry := &model.Entry{ controlEntry := &model.Entry{
URL: "https://example.org/article",
Title: `A title`, Title: `A title`,
Content: `<a href="https://example.org/article">Example Article - Example</a>`, Content: `<a href="https://example.org/article">Example Article - Example</a>`,
} }
Rewriter("https://example.org/article", testEntry, `fix_ghost_cards`) ApplyContentRewriteRules(testEntry, `fix_ghost_cards`)
if !reflect.DeepEqual(testEntry, controlEntry) { if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
@ -835,6 +919,7 @@ func TestFixGhostCardDuplicatedAuthor(t *testing.T) {
func TestFixGhostCardMultiple(t *testing.T) { func TestFixGhostCardMultiple(t *testing.T) {
testEntry := &model.Entry{ testEntry := &model.Entry{
URL: "https://example.org/article",
Title: `A title`, Title: `A title`,
Content: `<figure class="kg-card kg-bookmark-card"> Content: `<figure class="kg-card kg-bookmark-card">
<a class="kg-bookmark-container" href="https://example.org/article1"> <a class="kg-bookmark-container" href="https://example.org/article1">
@ -871,10 +956,11 @@ func TestFixGhostCardMultiple(t *testing.T) {
} }
controlEntry := &model.Entry{ controlEntry := &model.Entry{
URL: "https://example.org/article",
Title: `A title`, Title: `A title`,
Content: `<ul><li><a href="https://example.org/article1">Example Article 1 - Example</a></li><li><a href="https://example.org/article2">Example Article 2 - Example</a></li></ul>`, Content: `<ul><li><a href="https://example.org/article1">Example Article 1 - Example</a></li><li><a href="https://example.org/article2">Example Article 2 - Example</a></li></ul>`,
} }
Rewriter("https://example.org/article", testEntry, `fix_ghost_cards`) ApplyContentRewriteRules(testEntry, `fix_ghost_cards`)
if !reflect.DeepEqual(testEntry, controlEntry) { if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
@ -883,6 +969,7 @@ func TestFixGhostCardMultiple(t *testing.T) {
func TestFixGhostCardMultipleSplit(t *testing.T) { func TestFixGhostCardMultipleSplit(t *testing.T) {
testEntry := &model.Entry{ testEntry := &model.Entry{
URL: "https://example.org/article",
Title: `A title`, Title: `A title`,
Content: `<figure class="kg-card kg-bookmark-card"> Content: `<figure class="kg-card kg-bookmark-card">
<a class="kg-bookmark-container" href="https://example.org/article1"> <a class="kg-bookmark-container" href="https://example.org/article1">
@ -920,12 +1007,13 @@ func TestFixGhostCardMultipleSplit(t *testing.T) {
} }
controlEntry := &model.Entry{ controlEntry := &model.Entry{
URL: "https://example.org/article",
Title: `A title`, Title: `A title`,
Content: `<a href="https://example.org/article1">Example Article 1 - Example</a> Content: `<a href="https://example.org/article1">Example Article 1 - Example</a>
<p>This separates the two cards</p> <p>This separates the two cards</p>
<a href="https://example.org/article2">Example Article 2 - Example</a>`, <a href="https://example.org/article2">Example Article 2 - Example</a>`,
} }
Rewriter("https://example.org/article", testEntry, `fix_ghost_cards`) ApplyContentRewriteRules(testEntry, `fix_ghost_cards`)
if !reflect.DeepEqual(testEntry, controlEntry) { if !reflect.DeepEqual(testEntry, controlEntry) {
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)