mirror of
https://github.com/miniflux/v2.git
synced 2025-06-27 16:36:00 +00:00
Add the possibility to define rewrite rules for each feed
This commit is contained in:
parent
87ccad5c7f
commit
33445e5b68
29 changed files with 214 additions and 72 deletions
|
@ -1,5 +1,5 @@
|
||||||
// Code generated by go generate; DO NOT EDIT.
|
// Code generated by go generate; DO NOT EDIT.
|
||||||
// 2017-12-10 20:08:14.447304303 -0800 PST m=+0.040286758
|
// 2017-12-11 22:04:47.860104328 -0800 PST m=+0.042425898
|
||||||
|
|
||||||
package locale
|
package locale
|
||||||
|
|
||||||
|
@ -168,12 +168,13 @@ var translations = map[string]string{
|
||||||
"Fever Username": "Nom d'utilisateur pour l'API de Fever",
|
"Fever Username": "Nom d'utilisateur pour l'API de Fever",
|
||||||
"Fever Password": "Mot de passe pour l'API de Fever",
|
"Fever Password": "Mot de passe pour l'API de Fever",
|
||||||
"Fetch original content": "Récupérer le contenu original",
|
"Fetch original content": "Récupérer le contenu original",
|
||||||
"Scraper Rules": "Règles pour récupérer le contenu original"
|
"Scraper Rules": "Règles pour récupérer le contenu original",
|
||||||
|
"Rewrite Rules": "Règles de réécriture"
|
||||||
}
|
}
|
||||||
`,
|
`,
|
||||||
}
|
}
|
||||||
|
|
||||||
var translationsChecksums = map[string]string{
|
var translationsChecksums = map[string]string{
|
||||||
"en_US": "6fe95384260941e8a5a3c695a655a932e0a8a6a572c1e45cb2b1ae8baa01b897",
|
"en_US": "6fe95384260941e8a5a3c695a655a932e0a8a6a572c1e45cb2b1ae8baa01b897",
|
||||||
"fr_FR": "4426cea875ee2c9acb1a2b0619cb82f3a32f71aabe5d07657eaf2f6b7387c5f9",
|
"fr_FR": "0e14d65f38ca5c5e34f1d84f6837ce8a29a4ae5f8836b384bb098222b724cb5b",
|
||||||
}
|
}
|
||||||
|
|
|
@ -152,5 +152,6 @@
|
||||||
"Fever Username": "Nom d'utilisateur pour l'API de Fever",
|
"Fever Username": "Nom d'utilisateur pour l'API de Fever",
|
||||||
"Fever Password": "Mot de passe pour l'API de Fever",
|
"Fever Password": "Mot de passe pour l'API de Fever",
|
||||||
"Fetch original content": "Récupérer le contenu original",
|
"Fetch original content": "Récupérer le contenu original",
|
||||||
"Scraper Rules": "Règles pour récupérer le contenu original"
|
"Scraper Rules": "Règles pour récupérer le contenu original",
|
||||||
|
"Rewrite Rules": "Règles de réécriture"
|
||||||
}
|
}
|
||||||
|
|
BIN
miniflux-test
Executable file
BIN
miniflux-test
Executable file
Binary file not shown.
|
@ -23,6 +23,7 @@ type Feed struct {
|
||||||
ParsingErrorMsg string `json:"parsing_error_message,omitempty"`
|
ParsingErrorMsg string `json:"parsing_error_message,omitempty"`
|
||||||
ParsingErrorCount int `json:"parsing_error_count,omitempty"`
|
ParsingErrorCount int `json:"parsing_error_count,omitempty"`
|
||||||
ScraperRules string `json:"scraper_rules"`
|
ScraperRules string `json:"scraper_rules"`
|
||||||
|
RewriteRules string `json:"rewrite_rules"`
|
||||||
Category *Category `json:"category,omitempty"`
|
Category *Category `json:"category,omitempty"`
|
||||||
Entries Entries `json:"entries,omitempty"`
|
Entries Entries `json:"entries,omitempty"`
|
||||||
Icon *FeedIcon `json:"icon,omitempty"`
|
Icon *FeedIcon `json:"icon,omitempty"`
|
||||||
|
|
|
@ -14,7 +14,6 @@ import (
|
||||||
"github.com/miniflux/miniflux2/helper"
|
"github.com/miniflux/miniflux2/helper"
|
||||||
"github.com/miniflux/miniflux2/model"
|
"github.com/miniflux/miniflux2/model"
|
||||||
"github.com/miniflux/miniflux2/reader/date"
|
"github.com/miniflux/miniflux2/reader/date"
|
||||||
"github.com/miniflux/miniflux2/reader/processor"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
type atomFeed struct {
|
type atomFeed struct {
|
||||||
|
@ -87,7 +86,7 @@ func (a *atomEntry) Transform() *model.Entry {
|
||||||
entry.Date = getDate(a)
|
entry.Date = getDate(a)
|
||||||
entry.Author = getAuthor(a.Author)
|
entry.Author = getAuthor(a.Author)
|
||||||
entry.Hash = getHash(a)
|
entry.Hash = getHash(a)
|
||||||
entry.Content = processor.ItemContentProcessor(entry.URL, getContent(a))
|
entry.Content = getContent(a)
|
||||||
entry.Title = strings.TrimSpace(a.Title)
|
entry.Title = strings.TrimSpace(a.Title)
|
||||||
entry.Enclosures = getEnclosures(a)
|
entry.Enclosures = getEnclosures(a)
|
||||||
|
|
||||||
|
|
|
@ -14,6 +14,7 @@ import (
|
||||||
"github.com/miniflux/miniflux2/http"
|
"github.com/miniflux/miniflux2/http"
|
||||||
"github.com/miniflux/miniflux2/model"
|
"github.com/miniflux/miniflux2/model"
|
||||||
"github.com/miniflux/miniflux2/reader/icon"
|
"github.com/miniflux/miniflux2/reader/icon"
|
||||||
|
"github.com/miniflux/miniflux2/reader/processor"
|
||||||
"github.com/miniflux/miniflux2/storage"
|
"github.com/miniflux/miniflux2/storage"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -63,6 +64,9 @@ func (h *Handler) CreateFeed(userID, categoryID int64, url string) (*model.Feed,
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
feedProcessor := processor.NewFeedProcessor(subscription)
|
||||||
|
feedProcessor.Process()
|
||||||
|
|
||||||
subscription.Category = &model.Category{ID: categoryID}
|
subscription.Category = &model.Category{ID: categoryID}
|
||||||
subscription.EtagHeader = response.ETag
|
subscription.EtagHeader = response.ETag
|
||||||
subscription.LastModifiedHeader = response.LastModified
|
subscription.LastModifiedHeader = response.LastModified
|
||||||
|
@ -136,6 +140,11 @@ func (h *Handler) RefreshFeed(userID, feedID int64) error {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
feedProcessor := processor.NewFeedProcessor(subscription)
|
||||||
|
feedProcessor.WithScraperRules(originalFeed.ScraperRules)
|
||||||
|
feedProcessor.WithRewriteRules(originalFeed.RewriteRules)
|
||||||
|
feedProcessor.Process()
|
||||||
|
|
||||||
originalFeed.EtagHeader = response.ETag
|
originalFeed.EtagHeader = response.ETag
|
||||||
originalFeed.LastModifiedHeader = response.LastModified
|
originalFeed.LastModifiedHeader = response.LastModified
|
||||||
|
|
||||||
|
|
|
@ -9,12 +9,10 @@ import (
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/miniflux/miniflux2/reader/sanitizer"
|
|
||||||
|
|
||||||
"github.com/miniflux/miniflux2/helper"
|
"github.com/miniflux/miniflux2/helper"
|
||||||
"github.com/miniflux/miniflux2/model"
|
"github.com/miniflux/miniflux2/model"
|
||||||
"github.com/miniflux/miniflux2/reader/date"
|
"github.com/miniflux/miniflux2/reader/date"
|
||||||
"github.com/miniflux/miniflux2/reader/processor"
|
"github.com/miniflux/miniflux2/reader/sanitizer"
|
||||||
)
|
)
|
||||||
|
|
||||||
type jsonFeed struct {
|
type jsonFeed struct {
|
||||||
|
@ -148,7 +146,7 @@ func (j *jsonItem) Transform() *model.Entry {
|
||||||
entry.Date = j.GetDate()
|
entry.Date = j.GetDate()
|
||||||
entry.Author = j.GetAuthor()
|
entry.Author = j.GetAuthor()
|
||||||
entry.Hash = j.GetHash()
|
entry.Hash = j.GetHash()
|
||||||
entry.Content = processor.ItemContentProcessor(entry.URL, j.GetContent())
|
entry.Content = j.GetContent()
|
||||||
entry.Title = strings.TrimSpace(j.GetTitle())
|
entry.Title = strings.TrimSpace(j.GetTitle())
|
||||||
entry.Enclosures = j.GetEnclosures()
|
entry.Enclosures = j.GetEnclosures()
|
||||||
return entry
|
return entry
|
||||||
|
|
|
@ -148,7 +148,7 @@ func TestParsePodcast(t *testing.T) {
|
||||||
t.Errorf(`Incorrect entry title, got: "%s"`, feed.Entries[0].Title)
|
t.Errorf(`Incorrect entry title, got: "%s"`, feed.Entries[0].Title)
|
||||||
}
|
}
|
||||||
|
|
||||||
if feed.Entries[0].Content != `Chris has worked at <a href="http://adobe.com/" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">Adobe</a> and as a founder of Rogue Sheep, which won an Apple Design Award for Postage. Chris’s new company is Aged & Distilled with Guy English — which shipped <a href="http://aged-and-distilled.com/napkin/" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">Napkin</a>, a Mac app for visual collaboration. Chris is also the co-host of The Record. He lives on <a href="http://www.ci.bainbridge-isl.wa.us/" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">Bainbridge Island</a>, a quick ferry ride from Seattle.` {
|
if feed.Entries[0].Content != `Chris has worked at <a href="http://adobe.com/">Adobe</a> and as a founder of Rogue Sheep, which won an Apple Design Award for Postage. Chris’s new company is Aged & Distilled with Guy English — which shipped <a href="http://aged-and-distilled.com/napkin/">Napkin</a>, a Mac app for visual collaboration. Chris is also the co-host of The Record. He lives on <a href="http://www.ci.bainbridge-isl.wa.us/">Bainbridge Island</a>, a quick ferry ride from Seattle.` {
|
||||||
t.Errorf(`Incorrect entry content, got: "%s"`, feed.Entries[0].Content)
|
t.Errorf(`Incorrect entry content, got: "%s"`, feed.Entries[0].Content)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -5,12 +5,37 @@
|
||||||
package processor
|
package processor
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"github.com/miniflux/miniflux2/model"
|
||||||
"github.com/miniflux/miniflux2/reader/rewrite"
|
"github.com/miniflux/miniflux2/reader/rewrite"
|
||||||
"github.com/miniflux/miniflux2/reader/sanitizer"
|
"github.com/miniflux/miniflux2/reader/sanitizer"
|
||||||
)
|
)
|
||||||
|
|
||||||
// ItemContentProcessor executes a set of functions to sanitize and alter item contents.
|
// FeedProcessor handles the processing of feed contents.
|
||||||
func ItemContentProcessor(url, content string) string {
|
type FeedProcessor struct {
|
||||||
content = sanitizer.Sanitize(url, content)
|
feed *model.Feed
|
||||||
return rewrite.Rewriter(url, content)
|
scraperRules string
|
||||||
|
rewriteRules string
|
||||||
|
}
|
||||||
|
|
||||||
|
// WithScraperRules adds scraper rules to the processing.
|
||||||
|
func (f *FeedProcessor) WithScraperRules(rules string) {
|
||||||
|
f.scraperRules = rules
|
||||||
|
}
|
||||||
|
|
||||||
|
// WithRewriteRules adds rewrite rules to the processing.
|
||||||
|
func (f *FeedProcessor) WithRewriteRules(rules string) {
|
||||||
|
f.rewriteRules = rules
|
||||||
|
}
|
||||||
|
|
||||||
|
// Process applies rewrite and scraper rules.
|
||||||
|
func (f *FeedProcessor) Process() {
|
||||||
|
for _, entry := range f.feed.Entries {
|
||||||
|
entry.Content = sanitizer.Sanitize(entry.URL, entry.Content)
|
||||||
|
entry.Content = rewrite.Rewriter(entry.URL, entry.Content, f.rewriteRules)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewFeedProcessor returns a new FeedProcessor.
|
||||||
|
func NewFeedProcessor(feed *model.Feed) *FeedProcessor {
|
||||||
|
return &FeedProcessor{feed: feed}
|
||||||
}
|
}
|
||||||
|
|
|
@ -10,10 +10,8 @@ import (
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/miniflux/miniflux2/helper"
|
"github.com/miniflux/miniflux2/helper"
|
||||||
"github.com/miniflux/miniflux2/reader/processor"
|
|
||||||
"github.com/miniflux/miniflux2/reader/sanitizer"
|
|
||||||
|
|
||||||
"github.com/miniflux/miniflux2/model"
|
"github.com/miniflux/miniflux2/model"
|
||||||
|
"github.com/miniflux/miniflux2/reader/sanitizer"
|
||||||
)
|
)
|
||||||
|
|
||||||
type rdfFeed struct {
|
type rdfFeed struct {
|
||||||
|
@ -58,7 +56,7 @@ func (r *rdfItem) Transform() *model.Entry {
|
||||||
entry.Title = strings.TrimSpace(r.Title)
|
entry.Title = strings.TrimSpace(r.Title)
|
||||||
entry.Author = strings.TrimSpace(r.Creator)
|
entry.Author = strings.TrimSpace(r.Creator)
|
||||||
entry.URL = r.Link
|
entry.URL = r.Link
|
||||||
entry.Content = processor.ItemContentProcessor(entry.URL, r.Description)
|
entry.Content = r.Description
|
||||||
entry.Hash = getHash(r)
|
entry.Hash = getHash(r)
|
||||||
entry.Date = time.Now()
|
entry.Date = time.Now()
|
||||||
return entry
|
return entry
|
||||||
|
|
40
reader/rewrite/rewrite_functions.go
Normal file
40
reader/rewrite/rewrite_functions.go
Normal file
|
@ -0,0 +1,40 @@
|
||||||
|
// Copyright 2017 Frédéric Guillot. All rights reserved.
|
||||||
|
// Use of this source code is governed by the Apache 2.0
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package rewrite
|
||||||
|
|
||||||
|
import (
|
||||||
|
"regexp"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/PuerkitoBio/goquery"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
youtubeRegex = regexp.MustCompile(`youtube\.com/watch\?v=(.*)`)
|
||||||
|
)
|
||||||
|
|
||||||
|
func addImageTitle(entryURL, entryContent string) string {
|
||||||
|
doc, err := goquery.NewDocumentFromReader(strings.NewReader(entryContent))
|
||||||
|
if err != nil {
|
||||||
|
return entryContent
|
||||||
|
}
|
||||||
|
|
||||||
|
imgTag := doc.Find("img").First()
|
||||||
|
if titleAttr, found := imgTag.Attr("title"); found {
|
||||||
|
return entryContent + `<blockquote cite="` + entryURL + `">` + titleAttr + "</blockquote>"
|
||||||
|
}
|
||||||
|
|
||||||
|
return entryContent
|
||||||
|
}
|
||||||
|
|
||||||
|
func addYoutubeVideo(entryURL, entryContent string) string {
|
||||||
|
matches := youtubeRegex.FindStringSubmatch(entryURL)
|
||||||
|
|
||||||
|
if len(matches) == 2 {
|
||||||
|
video := `<iframe width="650" height="350" frameborder="0" src="https://www.youtube-nocookie.com/embed/` + matches[1] + `" allowfullscreen></iframe>`
|
||||||
|
return video + "<p>" + entryContent + "</p>"
|
||||||
|
}
|
||||||
|
return entryContent
|
||||||
|
}
|
|
@ -5,44 +5,39 @@
|
||||||
package rewrite
|
package rewrite
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"regexp"
|
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/PuerkitoBio/goquery"
|
"github.com/miniflux/miniflux2/url"
|
||||||
)
|
)
|
||||||
|
|
||||||
var rewriteRules = []func(string, string) string{
|
|
||||||
func(url, content string) string {
|
|
||||||
re := regexp.MustCompile(`youtube\.com/watch\?v=(.*)`)
|
|
||||||
matches := re.FindStringSubmatch(url)
|
|
||||||
|
|
||||||
if len(matches) == 2 {
|
|
||||||
video := `<iframe width="650" height="350" frameborder="0" src="https://www.youtube-nocookie.com/embed/` + matches[1] + `" allowfullscreen></iframe>`
|
|
||||||
return video + "<p>" + content + "</p>"
|
|
||||||
}
|
|
||||||
return content
|
|
||||||
},
|
|
||||||
func(url, content string) string {
|
|
||||||
if strings.HasPrefix(url, "https://xkcd.com") {
|
|
||||||
doc, err := goquery.NewDocumentFromReader(strings.NewReader(content))
|
|
||||||
if err != nil {
|
|
||||||
return content
|
|
||||||
}
|
|
||||||
|
|
||||||
imgTag := doc.Find("img").First()
|
|
||||||
if titleAttr, found := imgTag.Attr("title"); found {
|
|
||||||
return content + `<blockquote cite="` + url + `">` + titleAttr + "</blockquote>"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return content
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
// Rewriter modify item contents with a set of rewriting rules.
|
// Rewriter modify item contents with a set of rewriting rules.
|
||||||
func Rewriter(url, content string) string {
|
func Rewriter(entryURL, entryContent, customRewriteRules string) string {
|
||||||
for _, rewriteRule := range rewriteRules {
|
rulesList := getPredefinedRewriteRules(entryURL)
|
||||||
content = rewriteRule(url, content)
|
if customRewriteRules != "" {
|
||||||
|
rulesList = customRewriteRules
|
||||||
}
|
}
|
||||||
|
|
||||||
return content
|
rules := strings.Split(rulesList, ",")
|
||||||
|
for _, rule := range rules {
|
||||||
|
switch strings.TrimSpace(rule) {
|
||||||
|
case "add_image_title":
|
||||||
|
entryContent = addImageTitle(entryURL, entryContent)
|
||||||
|
case "add_youtube_video":
|
||||||
|
entryContent = addYoutubeVideo(entryURL, entryContent)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return entryContent
|
||||||
|
}
|
||||||
|
|
||||||
|
func getPredefinedRewriteRules(entryURL string) string {
|
||||||
|
urlDomain := url.Domain(entryURL)
|
||||||
|
|
||||||
|
for domain, rules := range predefinedRules {
|
||||||
|
if strings.Contains(urlDomain, domain) {
|
||||||
|
return rules
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return ""
|
||||||
}
|
}
|
||||||
|
|
|
@ -7,7 +7,7 @@ package rewrite
|
||||||
import "testing"
|
import "testing"
|
||||||
|
|
||||||
func TestRewriteWithNoMatchingRule(t *testing.T) {
|
func TestRewriteWithNoMatchingRule(t *testing.T) {
|
||||||
output := Rewriter("https://example.org/article", `Some text.`)
|
output := Rewriter("https://example.org/article", `Some text.`, ``)
|
||||||
expected := `Some text.`
|
expected := `Some text.`
|
||||||
|
|
||||||
if expected != output {
|
if expected != output {
|
||||||
|
@ -16,7 +16,7 @@ func TestRewriteWithNoMatchingRule(t *testing.T) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestRewriteWithYoutubeLink(t *testing.T) {
|
func TestRewriteWithYoutubeLink(t *testing.T) {
|
||||||
output := Rewriter("https://www.youtube.com/watch?v=1234", `Video Description`)
|
output := Rewriter("https://www.youtube.com/watch?v=1234", `Video Description`, ``)
|
||||||
expected := `<iframe width="650" height="350" frameborder="0" src="https://www.youtube-nocookie.com/embed/1234" allowfullscreen></iframe><p>Video Description</p>`
|
expected := `<iframe width="650" height="350" frameborder="0" src="https://www.youtube-nocookie.com/embed/1234" allowfullscreen></iframe><p>Video Description</p>`
|
||||||
|
|
||||||
if expected != output {
|
if expected != output {
|
||||||
|
@ -24,11 +24,37 @@ func TestRewriteWithYoutubeLink(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestRewriteWithInexistingCustomRule(t *testing.T) {
|
||||||
|
output := Rewriter("https://www.youtube.com/watch?v=1234", `Video Description`, `some rule`)
|
||||||
|
expected := `Video Description`
|
||||||
|
if expected != output {
|
||||||
|
t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestRewriteWithXkcdLink(t *testing.T) {
|
func TestRewriteWithXkcdLink(t *testing.T) {
|
||||||
description := `<img src="https://imgs.xkcd.com/comics/thermostat.png" title="Your problem is so terrible, I worry that, if I help you, I risk drawing the attention of whatever god of technology inflicted it on you." alt="Your problem is so terrible, I worry that, if I help you, I risk drawing the attention of whatever god of technology inflicted it on you." />`
|
description := `<img src="https://imgs.xkcd.com/comics/thermostat.png" title="Your problem is so terrible, I worry that, if I help you, I risk drawing the attention of whatever god of technology inflicted it on you." alt="Your problem is so terrible, I worry that, if I help you, I risk drawing the attention of whatever god of technology inflicted it on you." />`
|
||||||
output := Rewriter("https://xkcd.com/1912/", description)
|
output := Rewriter("https://xkcd.com/1912/", description, ``)
|
||||||
expected := description + `<blockquote cite="https://xkcd.com/1912/">Your problem is so terrible, I worry that, if I help you, I risk drawing the attention of whatever god of technology inflicted it on you.</blockquote>`
|
expected := description + `<blockquote cite="https://xkcd.com/1912/">Your problem is so terrible, I worry that, if I help you, I risk drawing the attention of whatever god of technology inflicted it on you.</blockquote>`
|
||||||
if expected != output {
|
if expected != output {
|
||||||
t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
|
t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
func TestRewriteWithXkcdLinkAndNoImage(t *testing.T) {
|
||||||
|
description := "test"
|
||||||
|
output := Rewriter("https://xkcd.com/1912/", description, ``)
|
||||||
|
expected := description
|
||||||
|
if expected != output {
|
||||||
|
t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRewriteWithXkcdAndNoImage(t *testing.T) {
|
||||||
|
description := "test"
|
||||||
|
output := Rewriter("https://xkcd.com/1912/", description, ``)
|
||||||
|
expected := description
|
||||||
|
|
||||||
|
if expected != output {
|
||||||
|
t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
30
reader/rewrite/rules.go
Normal file
30
reader/rewrite/rules.go
Normal file
|
@ -0,0 +1,30 @@
|
||||||
|
// Copyright 2017 Frédéric Guillot. All rights reserved.
|
||||||
|
// Use of this source code is governed by the Apache 2.0
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package rewrite
|
||||||
|
|
||||||
|
// List of predefined rewrite rules (alphabetically sorted)
|
||||||
|
// Available rules: "add_image_title", "add_youtube_video"
|
||||||
|
// domain => rule name
|
||||||
|
var predefinedRules = map[string]string{
|
||||||
|
"abstrusegoose.com": "add_image_title",
|
||||||
|
"amazingsuperpowers.com": "add_image_title",
|
||||||
|
"cowbirdsinlove.com": "add_image_title",
|
||||||
|
"drawingboardcomic.com": "add_image_title",
|
||||||
|
"exocomics.com": "add_image_title",
|
||||||
|
"happletea.com": "add_image_title",
|
||||||
|
"imogenquest.net": "add_image_title",
|
||||||
|
"lukesurl.com": "add_image_title",
|
||||||
|
"mercworks.net": "add_image_title",
|
||||||
|
"mrlovenstein.com": "add_image_title",
|
||||||
|
"nedroid.com": "add_image_title",
|
||||||
|
"oglaf.com": "add_image_title",
|
||||||
|
"optipess.com": "add_image_title",
|
||||||
|
"peebleslab.com": "add_image_title",
|
||||||
|
"sentfromthemoon.com": "add_image_title",
|
||||||
|
"thedoghousediaries.com": "add_image_title",
|
||||||
|
"treelobsters.com": "add_image_title",
|
||||||
|
"youtube.com": "add_youtube_video",
|
||||||
|
"xkcd.com": "add_image_title",
|
||||||
|
}
|
|
@ -94,7 +94,7 @@ func TestParseRss2Sample(t *testing.T) {
|
||||||
t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
|
t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
|
||||||
}
|
}
|
||||||
|
|
||||||
if feed.Entries[0].Content != `How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia's <a href="http://howe.iki.rssi.ru/GCTC/gctc_e.htm" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">Star City</a>.` {
|
if feed.Entries[0].Content != `How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia's <a href="http://howe.iki.rssi.ru/GCTC/gctc_e.htm">Star City</a>.` {
|
||||||
t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content)
|
t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -383,7 +383,7 @@ func TestParseEntryWithContentEncoded(t *testing.T) {
|
||||||
t.Error(err)
|
t.Error(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if feed.Entries[0].Content != `<p><a href="http://www.example.org/" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">Example</a>.</p>` {
|
if feed.Entries[0].Content != `<p><a href="http://www.example.org/">Example</a>.</p>` {
|
||||||
t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content)
|
t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -15,7 +15,6 @@ import (
|
||||||
"github.com/miniflux/miniflux2/helper"
|
"github.com/miniflux/miniflux2/helper"
|
||||||
"github.com/miniflux/miniflux2/model"
|
"github.com/miniflux/miniflux2/model"
|
||||||
"github.com/miniflux/miniflux2/reader/date"
|
"github.com/miniflux/miniflux2/reader/date"
|
||||||
"github.com/miniflux/miniflux2/reader/processor"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
type rssFeed struct {
|
type rssFeed struct {
|
||||||
|
@ -211,7 +210,7 @@ func (r *rssItem) Transform() *model.Entry {
|
||||||
entry.Date = r.GetDate()
|
entry.Date = r.GetDate()
|
||||||
entry.Author = r.GetAuthor()
|
entry.Author = r.GetAuthor()
|
||||||
entry.Hash = r.GetHash()
|
entry.Hash = r.GetHash()
|
||||||
entry.Content = processor.ItemContentProcessor(entry.URL, r.GetContent())
|
entry.Content = r.GetContent()
|
||||||
entry.Title = strings.TrimSpace(r.Title)
|
entry.Title = strings.TrimSpace(r.Title)
|
||||||
entry.Enclosures = r.GetEnclosures()
|
entry.Enclosures = r.GetEnclosures()
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
// Code generated by go generate; DO NOT EDIT.
|
// Code generated by go generate; DO NOT EDIT.
|
||||||
// 2017-12-10 18:56:24.36887959 -0800 PST m=+0.010858677
|
// 2017-12-11 22:04:47.832384663 -0800 PST m=+0.014706233
|
||||||
|
|
||||||
package static
|
package static
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
// Code generated by go generate; DO NOT EDIT.
|
// Code generated by go generate; DO NOT EDIT.
|
||||||
// 2017-12-10 18:56:24.370410193 -0800 PST m=+0.012389280
|
// 2017-12-11 22:04:47.835872498 -0800 PST m=+0.018194068
|
||||||
|
|
||||||
package static
|
package static
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
// Code generated by go generate; DO NOT EDIT.
|
// Code generated by go generate; DO NOT EDIT.
|
||||||
// 2017-12-10 18:56:24.37299237 -0800 PST m=+0.014971457
|
// 2017-12-11 22:04:47.840119593 -0800 PST m=+0.022441163
|
||||||
|
|
||||||
package static
|
package static
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
// Code generated by go generate; DO NOT EDIT.
|
// Code generated by go generate; DO NOT EDIT.
|
||||||
// 2017-12-10 18:56:24.386027486 -0800 PST m=+0.028006573
|
// 2017-12-11 22:04:47.859021405 -0800 PST m=+0.041342975
|
||||||
|
|
||||||
package template
|
package template
|
||||||
|
|
||||||
|
|
|
@ -48,6 +48,9 @@
|
||||||
<label for="form-scraper-rules">{{ t "Scraper Rules" }}</label>
|
<label for="form-scraper-rules">{{ t "Scraper Rules" }}</label>
|
||||||
<input type="text" name="scraper_rules" id="form-scraper-rules" value="{{ .form.ScraperRules }}">
|
<input type="text" name="scraper_rules" id="form-scraper-rules" value="{{ .form.ScraperRules }}">
|
||||||
|
|
||||||
|
<label for="form-rewrite-rules">{{ t "Rewrite Rules" }}</label>
|
||||||
|
<input type="text" name="rewrite_rules" id="form-rewrite-rules" value="{{ .form.RewriteRules }}">
|
||||||
|
|
||||||
<label for="form-category">{{ t "Category" }}</label>
|
<label for="form-category">{{ t "Category" }}</label>
|
||||||
<select id="form-category" name="category_id">
|
<select id="form-category" name="category_id">
|
||||||
{{ range .categories }}
|
{{ range .categories }}
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
// Code generated by go generate; DO NOT EDIT.
|
// Code generated by go generate; DO NOT EDIT.
|
||||||
// 2017-12-10 20:08:14.428877093 -0800 PST m=+0.021859548
|
// 2017-12-11 22:04:47.843652435 -0800 PST m=+0.025974005
|
||||||
|
|
||||||
package template
|
package template
|
||||||
|
|
||||||
|
@ -398,6 +398,9 @@ var templateViewsMap = map[string]string{
|
||||||
<label for="form-scraper-rules">{{ t "Scraper Rules" }}</label>
|
<label for="form-scraper-rules">{{ t "Scraper Rules" }}</label>
|
||||||
<input type="text" name="scraper_rules" id="form-scraper-rules" value="{{ .form.ScraperRules }}">
|
<input type="text" name="scraper_rules" id="form-scraper-rules" value="{{ .form.ScraperRules }}">
|
||||||
|
|
||||||
|
<label for="form-rewrite-rules">{{ t "Rewrite Rules" }}</label>
|
||||||
|
<input type="text" name="rewrite_rules" id="form-rewrite-rules" value="{{ .form.RewriteRules }}">
|
||||||
|
|
||||||
<label for="form-category">{{ t "Category" }}</label>
|
<label for="form-category">{{ t "Category" }}</label>
|
||||||
<select id="form-category" name="category_id">
|
<select id="form-category" name="category_id">
|
||||||
{{ range .categories }}
|
{{ range .categories }}
|
||||||
|
@ -1184,7 +1187,7 @@ var templateViewsMapChecksums = map[string]string{
|
||||||
"create_category": "2b82af5d2dcd67898dc5daa57a6461e6ff8121a6089b2a2a1be909f35e4a2275",
|
"create_category": "2b82af5d2dcd67898dc5daa57a6461e6ff8121a6089b2a2a1be909f35e4a2275",
|
||||||
"create_user": "45e226df757126d5fe7c464e295e9a34f07952cfdb71e31e49839850d35af139",
|
"create_user": "45e226df757126d5fe7c464e295e9a34f07952cfdb71e31e49839850d35af139",
|
||||||
"edit_category": "cee720faadcec58289b707ad30af623d2ee66c1ce23a732965463250d7ff41c5",
|
"edit_category": "cee720faadcec58289b707ad30af623d2ee66c1ce23a732965463250d7ff41c5",
|
||||||
"edit_feed": "b3c7dd5e93d58e051abcd59da31217d8e9b50587014b895d1b7c9172247b35f8",
|
"edit_feed": "e33e64de5e2b9c12580e693d048c2fab907968d4e7cddb2055d0251efc5b75e4",
|
||||||
"edit_user": "82d9749d76ddbd2352816d813c4b1f6d92f2222de678b4afe5821090246735c7",
|
"edit_user": "82d9749d76ddbd2352816d813c4b1f6d92f2222de678b4afe5821090246735c7",
|
||||||
"entry": "ebcf9bb35812dd02759718f7f7411267e6a6c8efd59a9aa0a0e735bcb88efeff",
|
"entry": "ebcf9bb35812dd02759718f7f7411267e6a6c8efd59a9aa0a0e735bcb88efeff",
|
||||||
"feed_entries": "547c19eb36b20e350ce70ed045173b064cdcd6b114afb241c9f2dda9d88fcc27",
|
"feed_entries": "547c19eb36b20e350ce70ed045173b064cdcd6b114afb241c9f2dda9d88fcc27",
|
||||||
|
|
|
@ -221,6 +221,7 @@ func (c *Controller) getFeedFormTemplateArgs(ctx *core.Context, user *model.User
|
||||||
FeedURL: feed.FeedURL,
|
FeedURL: feed.FeedURL,
|
||||||
Title: feed.Title,
|
Title: feed.Title,
|
||||||
ScraperRules: feed.ScraperRules,
|
ScraperRules: feed.ScraperRules,
|
||||||
|
RewriteRules: feed.RewriteRules,
|
||||||
CategoryID: feed.Category.ID,
|
CategoryID: feed.Category.ID,
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -18,6 +18,7 @@ type FeedForm struct {
|
||||||
SiteURL string
|
SiteURL string
|
||||||
Title string
|
Title string
|
||||||
ScraperRules string
|
ScraperRules string
|
||||||
|
RewriteRules string
|
||||||
CategoryID int64
|
CategoryID int64
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -36,6 +37,7 @@ func (f FeedForm) Merge(feed *model.Feed) *model.Feed {
|
||||||
feed.SiteURL = f.SiteURL
|
feed.SiteURL = f.SiteURL
|
||||||
feed.FeedURL = f.FeedURL
|
feed.FeedURL = f.FeedURL
|
||||||
feed.ScraperRules = f.ScraperRules
|
feed.ScraperRules = f.ScraperRules
|
||||||
|
feed.RewriteRules = f.RewriteRules
|
||||||
feed.ParsingErrorCount = 0
|
feed.ParsingErrorCount = 0
|
||||||
feed.ParsingErrorMsg = ""
|
feed.ParsingErrorMsg = ""
|
||||||
return feed
|
return feed
|
||||||
|
@ -53,6 +55,7 @@ func NewFeedForm(r *http.Request) *FeedForm {
|
||||||
SiteURL: r.FormValue("site_url"),
|
SiteURL: r.FormValue("site_url"),
|
||||||
Title: r.FormValue("title"),
|
Title: r.FormValue("title"),
|
||||||
ScraperRules: r.FormValue("scraper_rules"),
|
ScraperRules: r.FormValue("scraper_rules"),
|
||||||
|
RewriteRules: r.FormValue("rewrite_rules"),
|
||||||
CategoryID: int64(categoryID),
|
CategoryID: int64(categoryID),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
1
sql/schema_version_7.sql
Normal file
1
sql/schema_version_7.sql
Normal file
|
@ -0,0 +1 @@
|
||||||
|
alter table feeds add column rewrite_rules text default '';
|
|
@ -1,5 +1,5 @@
|
||||||
// Code generated by go generate; DO NOT EDIT.
|
// Code generated by go generate; DO NOT EDIT.
|
||||||
// 2017-12-10 20:08:14.411225368 -0800 PST m=+0.004207823
|
// 2017-12-11 22:04:47.821813568 -0800 PST m=+0.004135138
|
||||||
|
|
||||||
package sql
|
package sql
|
||||||
|
|
||||||
|
@ -138,6 +138,8 @@ alter table users add column entry_direction entry_sorting_direction default 'as
|
||||||
)
|
)
|
||||||
`,
|
`,
|
||||||
"schema_version_6": `alter table feeds add column scraper_rules text default '';
|
"schema_version_6": `alter table feeds add column scraper_rules text default '';
|
||||||
|
`,
|
||||||
|
"schema_version_7": `alter table feeds add column rewrite_rules text default '';
|
||||||
`,
|
`,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -148,4 +150,5 @@ var SqlMapChecksums = map[string]string{
|
||||||
"schema_version_4": "216ea3a7d3e1704e40c797b5dc47456517c27dbb6ca98bf88812f4f63d74b5d9",
|
"schema_version_4": "216ea3a7d3e1704e40c797b5dc47456517c27dbb6ca98bf88812f4f63d74b5d9",
|
||||||
"schema_version_5": "46397e2f5f2c82116786127e9f6a403e975b14d2ca7b652a48cd1ba843e6a27c",
|
"schema_version_5": "46397e2f5f2c82116786127e9f6a403e975b14d2ca7b652a48cd1ba843e6a27c",
|
||||||
"schema_version_6": "9d05b4fb223f0e60efc716add5048b0ca9c37511cf2041721e20505d6d798ce4",
|
"schema_version_6": "9d05b4fb223f0e60efc716add5048b0ca9c37511cf2041721e20505d6d798ce4",
|
||||||
|
"schema_version_7": "33f298c9aa30d6de3ca28e1270df51c2884d7596f1283a75716e2aeb634cd05c",
|
||||||
}
|
}
|
||||||
|
|
|
@ -152,7 +152,7 @@ func (e *EntryQueryBuilder) GetEntries() (model.Entries, error) {
|
||||||
SELECT
|
SELECT
|
||||||
e.id, e.user_id, e.feed_id, e.hash, e.published_at at time zone '%s', e.title, e.url, e.author, e.content, e.status,
|
e.id, e.user_id, e.feed_id, e.hash, e.published_at at time zone '%s', e.title, e.url, e.author, e.content, e.status,
|
||||||
f.title as feed_title, f.feed_url, f.site_url, f.checked_at,
|
f.title as feed_title, f.feed_url, f.site_url, f.checked_at,
|
||||||
f.category_id, c.title as category_title, f.scraper_rules,
|
f.category_id, c.title as category_title, f.scraper_rules, f.rewrite_rules,
|
||||||
fi.icon_id
|
fi.icon_id
|
||||||
FROM entries e
|
FROM entries e
|
||||||
LEFT JOIN feeds f ON f.id=e.feed_id
|
LEFT JOIN feeds f ON f.id=e.feed_id
|
||||||
|
@ -198,6 +198,7 @@ func (e *EntryQueryBuilder) GetEntries() (model.Entries, error) {
|
||||||
&entry.Feed.Category.ID,
|
&entry.Feed.Category.ID,
|
||||||
&entry.Feed.Category.Title,
|
&entry.Feed.Category.Title,
|
||||||
&entry.Feed.ScraperRules,
|
&entry.Feed.ScraperRules,
|
||||||
|
&entry.Feed.RewriteRules,
|
||||||
&iconID,
|
&iconID,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
@ -52,7 +52,8 @@ func (s *Storage) Feeds(userID int64) (model.Feeds, error) {
|
||||||
feeds := make(model.Feeds, 0)
|
feeds := make(model.Feeds, 0)
|
||||||
query := `SELECT
|
query := `SELECT
|
||||||
f.id, f.feed_url, f.site_url, f.title, f.etag_header, f.last_modified_header,
|
f.id, f.feed_url, f.site_url, f.title, f.etag_header, f.last_modified_header,
|
||||||
f.user_id, f.checked_at, f.parsing_error_count, f.parsing_error_msg, f.scraper_rules,
|
f.user_id, f.checked_at, f.parsing_error_count, f.parsing_error_msg,
|
||||||
|
f.scraper_rules, f.rewrite_rules,
|
||||||
f.category_id, c.title as category_title,
|
f.category_id, c.title as category_title,
|
||||||
fi.icon_id
|
fi.icon_id
|
||||||
FROM feeds f
|
FROM feeds f
|
||||||
|
@ -85,6 +86,7 @@ func (s *Storage) Feeds(userID int64) (model.Feeds, error) {
|
||||||
&feed.ParsingErrorCount,
|
&feed.ParsingErrorCount,
|
||||||
&errorMsg,
|
&errorMsg,
|
||||||
&feed.ScraperRules,
|
&feed.ScraperRules,
|
||||||
|
&feed.RewriteRules,
|
||||||
&feed.Category.ID,
|
&feed.Category.ID,
|
||||||
&feed.Category.Title,
|
&feed.Category.Title,
|
||||||
&iconID,
|
&iconID,
|
||||||
|
@ -123,7 +125,8 @@ func (s *Storage) FeedByID(userID, feedID int64) (*model.Feed, error) {
|
||||||
query := `
|
query := `
|
||||||
SELECT
|
SELECT
|
||||||
f.id, f.feed_url, f.site_url, f.title, f.etag_header, f.last_modified_header,
|
f.id, f.feed_url, f.site_url, f.title, f.etag_header, f.last_modified_header,
|
||||||
f.user_id, f.checked_at, f.parsing_error_count, f.parsing_error_msg, f.scraper_rules,
|
f.user_id, f.checked_at, f.parsing_error_count, f.parsing_error_msg,
|
||||||
|
f.scraper_rules, f.rewrite_rules,
|
||||||
f.category_id, c.title as category_title
|
f.category_id, c.title as category_title
|
||||||
FROM feeds f
|
FROM feeds f
|
||||||
LEFT JOIN categories c ON c.id=f.category_id
|
LEFT JOIN categories c ON c.id=f.category_id
|
||||||
|
@ -141,6 +144,7 @@ func (s *Storage) FeedByID(userID, feedID int64) (*model.Feed, error) {
|
||||||
&feed.ParsingErrorCount,
|
&feed.ParsingErrorCount,
|
||||||
&feed.ParsingErrorMsg,
|
&feed.ParsingErrorMsg,
|
||||||
&feed.ScraperRules,
|
&feed.ScraperRules,
|
||||||
|
&feed.RewriteRules,
|
||||||
&feed.Category.ID,
|
&feed.Category.ID,
|
||||||
&feed.Category.Title,
|
&feed.Category.Title,
|
||||||
)
|
)
|
||||||
|
@ -197,8 +201,8 @@ func (s *Storage) UpdateFeed(feed *model.Feed) (err error) {
|
||||||
|
|
||||||
query := `UPDATE feeds SET
|
query := `UPDATE feeds SET
|
||||||
feed_url=$1, site_url=$2, title=$3, category_id=$4, etag_header=$5, last_modified_header=$6, checked_at=$7,
|
feed_url=$1, site_url=$2, title=$3, category_id=$4, etag_header=$5, last_modified_header=$6, checked_at=$7,
|
||||||
parsing_error_msg=$8, parsing_error_count=$9, scraper_rules=$10
|
parsing_error_msg=$8, parsing_error_count=$9, scraper_rules=$10, rewrite_rules=$11
|
||||||
WHERE id=$11 AND user_id=$12`
|
WHERE id=$12 AND user_id=$13`
|
||||||
|
|
||||||
_, err = s.db.Exec(query,
|
_, err = s.db.Exec(query,
|
||||||
feed.FeedURL,
|
feed.FeedURL,
|
||||||
|
@ -211,6 +215,7 @@ func (s *Storage) UpdateFeed(feed *model.Feed) (err error) {
|
||||||
feed.ParsingErrorMsg,
|
feed.ParsingErrorMsg,
|
||||||
feed.ParsingErrorCount,
|
feed.ParsingErrorCount,
|
||||||
feed.ScraperRules,
|
feed.ScraperRules,
|
||||||
|
feed.RewriteRules,
|
||||||
feed.ID,
|
feed.ID,
|
||||||
feed.UserID,
|
feed.UserID,
|
||||||
)
|
)
|
||||||
|
|
|
@ -12,7 +12,7 @@ import (
|
||||||
"github.com/miniflux/miniflux2/sql"
|
"github.com/miniflux/miniflux2/sql"
|
||||||
)
|
)
|
||||||
|
|
||||||
const schemaVersion = 6
|
const schemaVersion = 7
|
||||||
|
|
||||||
// Migrate run database migrations.
|
// Migrate run database migrations.
|
||||||
func (s *Storage) Migrate() {
|
func (s *Storage) Migrate() {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue