diff --git a/.github/workflows/build_binaries.yml b/.github/workflows/build_binaries.yml index 7de6e733..62ff3a4e 100644 --- a/.github/workflows/build_binaries.yml +++ b/.github/workflows/build_binaries.yml @@ -9,13 +9,13 @@ jobs: name: Build runs-on: ubuntu-latest steps: + - name: Checkout + uses: actions/checkout@v4 - name: Set up Golang uses: actions/setup-go@v5 with: go-version: "1.23.x" check-latest: true - - name: Checkout - uses: actions/checkout@v4 - name: Compile binaries env: CGO_ENABLED: 0 diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index f9df2dd0..b2eb4d95 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -22,8 +22,6 @@ jobs: strategy: fail-fast: false - matrix: - language: [ 'go', 'javascript' ] steps: - name: Checkout repository diff --git a/.github/workflows/linters.yml b/.github/workflows/linters.yml index 695dea78..c26957ab 100644 --- a/.github/workflows/linters.yml +++ b/.github/workflows/linters.yml @@ -29,7 +29,6 @@ jobs: - uses: actions/setup-go@v5 with: go-version: "1.23.x" - - run: "go vet ./..." - uses: golangci/golangci-lint-action@v6 with: args: > diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index ff6d16da..1d4af838 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -17,14 +17,18 @@ jobs: os: [ubuntu-latest, windows-latest, macOS-latest] go-version: ["1.23.x"] steps: + - name: Checkout + uses: actions/checkout@v4 - name: Set up Go uses: actions/setup-go@v5 with: go-version: ${{ matrix.go-version }} - - name: Checkout - uses: actions/checkout@v4 - - name: Run unit tests + - name: Run unit tests with coverage and race conditions checking + if: matrix.os == 'ubuntu-latest' run: make test + - name: Run unit tests without coverage and race conditions checking + if: matrix.os != 'ubuntu-latest' + run: go test ./... integration-tests: name: Integration Tests @@ -40,12 +44,12 @@ jobs: - 5432:5432 options: --health-cmd pg_isready --health-interval 10s --health-timeout 5s --health-retries 5 steps: + - name: Checkout + uses: actions/checkout@v4 - name: Set up Go uses: actions/setup-go@v5 with: go-version: "1.23.x" - - name: Checkout - uses: actions/checkout@v4 - name: Install Postgres client run: sudo apt update && sudo apt install -y postgresql-client - name: Run integration tests diff --git a/ChangeLog b/ChangeLog index 3c273451..4a4d699e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,64 @@ +Version 2.2.4 (December 20, 2024) +--------------------------------- + +* test(rewrite): add unit test for referer rewrite function +* refactor(subscription): use `strings.HasSuffix` instead of a regex in `FindSubscriptionsFromYouTubePlaylistPage` +* refactor(sanitizer): use `token.String()` instead of `html.EscapeString(token.Data)` +* refactor(sanitizer): simplify `isValidTag` +* refactor(sanitizer): simplify `hasRequiredAttributes` +* refactor(sanitizer): remove condition because `config.Opts` is guaranteed to never be nil +* refactor(sanitizer): remove a now-useless function after refactoring +* refactor(sanitizer): refactor conditions to highlight their similitude, enabling further refactoring +* refactor(sanitizer): optimize `strip_tags.go` +* refactor(sanitizer): micro-optimizations of `srcset.go` +* refactor(sanitizer): merge two conditions +* refactor(sanitizer): inline a function in `sanitizeAttributes` and fix a bug in it +* refactor(sanitizer): inline a condition in `sanitizeSrcsetAttr` +* refactor(sanitizer): improve `rewriteIframeURL()` +* refactor(sanitizer): Google+ isn't a thing anymore +* refactor(sanitizer): change the scope of a variable +* refactor(rewriter): replace regex with URL parsing for referrer override +* refactor(rewriter): avoid the use of regex in `addDynamicImage` +* refactor(rewrite): remove unused function arguments +* refactor(readability): various improvements and optimizations +* refactor(readability): simplify the regexes in `readability.go` +* refactor(processor): use URL parsing instead of a regex +* refactor(processor): improve the `rewrite` URL rule regex +* refactor(locale): delay parsing of translations until they're used +* refactor(js): factorise a line in `app.js` +* refactor(handler): delay `store.UserByID()` as much as possible +* refactor(css): replace `-ms-text-size-adjust` with `text-size-adjust` +* refactor(css): remove `-webkit-clip-path` +* refactor(css): factorise `.pagination-next` and `.pagination-last` together +* refactor: use a better construct than `doc.Find(…).First()` +* refactor: use `min/max` instead of `math.Min/math.Max` +* refactor: refactor `internal/reader/readability/testdata` +* refactor: optimize `sanitizeAttributes` +* refactor: get rid of `numberOfPluralFormsPerLanguage` test-only variable +* fix(storage): replace timezone function call with view +* fix(consistency): align feed modification behavior between API and UI +* fix(ci): fix grammar in pull-request template +* fix: load icon from site URL instead of feed URL +* fix: feed icon from xml ignored during force refresh +* feat(rewrite)!: remove `parse_markdown` rewrite rule +* feat(mediaproxy): update predefined referer spoofing rules for restricted media resources +* feat(locale): update translations to clarify readeck URL instead of readeck API endpoint +* feat(locale): update German translations +* feat(locale): update Chinese translations +* feat(apprise): update `SendNotification` to handle multiple entries and add logging +* feat(apprise): add title in notification request body +* feat: resize favicons before storing them in the database +* feat: optionally fetch watch time from YouTube API instead of website +* feat: only show the commit URL if it's not empty on `/about` +* feat: add predefined scraper rules for `arstechnica.com` +* feat: add date-based entry filtering rules +* chore: remove `blog.laravel.com` rewrite rule +* build(deps): bump `library/alpine` in `/packaging/docker/alpine` to `3.21` +* build(deps): bump `golang.org/x/term` from `0.26.0` to `0.27.0` +* build(deps): bump `golang.org/x/net` from `0.31.0` to `0.33.0` +* build(deps): bump `golang.org/x/crypto` from `0.30.0` to `0.31.0` +* build(deps): bump `github.com/tdewolff/minify/v2` from `2.21.1` to `2.21.2` + Version 2.2.3 (November 10, 2024) --------------------------------- diff --git a/go.mod b/go.mod index 8d5ae72c..38341a40 100644 --- a/go.mod +++ b/go.mod @@ -12,11 +12,11 @@ require ( github.com/lib/pq v1.10.9 github.com/prometheus/client_golang v1.20.5 github.com/tdewolff/minify/v2 v2.21.2 - golang.org/x/crypto v0.30.0 - golang.org/x/net v0.32.0 + golang.org/x/crypto v0.31.0 + golang.org/x/image v0.23.0 + golang.org/x/net v0.33.0 golang.org/x/oauth2 v0.24.0 golang.org/x/term v0.27.0 - golang.org/x/text v0.21.0 ) require ( @@ -41,6 +41,7 @@ require ( github.com/tdewolff/parse/v2 v2.7.19 // indirect github.com/x448/float16 v0.8.4 // indirect golang.org/x/sys v0.28.0 // indirect + golang.org/x/text v0.21.0 // indirect google.golang.org/protobuf v1.34.2 // indirect ) diff --git a/go.sum b/go.sum index 715e1a58..68a5ed1c 100644 --- a/go.sum +++ b/go.sum @@ -68,8 +68,10 @@ github.com/xyproto/randomstring v1.0.5/go.mod h1:rgmS5DeNXLivK7YprL0pY+lTuhNQW3i github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= -golang.org/x/crypto v0.30.0 h1:RwoQn3GkWiMkzlX562cLB7OxWvjH1L8xutO2WoJcRoY= -golang.org/x/crypto v0.30.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk= +golang.org/x/crypto v0.31.0 h1:ihbySMvVjLAeSH1IbfcRTkD/iNscyz8rGzjF/E5hV6U= +golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk= +golang.org/x/image v0.23.0 h1:HseQ7c2OpPKTPVzNjG5fwJsOTCiiwS4QdsYi5XU6H68= +golang.org/x/image v0.23.0/go.mod h1:wJJBTdLfCCf3tiHa1fNxpZmUI4mmoZvwMCPP0ddoNKY= golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= @@ -77,8 +79,8 @@ golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= golang.org/x/net v0.9.0/go.mod h1:d48xBJpPfHeWQsugry2m+kC02ZBRGRgulfHnEXEuWns= -golang.org/x/net v0.32.0 h1:ZqPmj8Kzc+Y6e0+skZsuACbx+wzMgo5MQsJh9Qd6aYI= -golang.org/x/net v0.32.0/go.mod h1:CwU0IoeOlnQQWJ6ioyFrfRuomB8GKF6KbYXZVyeXNfs= +golang.org/x/net v0.33.0 h1:74SYHlV8BIgHIFC/LrYkOGIwL19eTYXQ5wc6TBuO36I= +golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4= golang.org/x/oauth2 v0.24.0 h1:KTBBxWqUa0ykRPLtV69rRto9TLXcqYkeswu48x/gvNE= golang.org/x/oauth2 v0.24.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= diff --git a/internal/cli/cli.go b/internal/cli/cli.go index ca4f47bd..fc074717 100644 --- a/internal/cli/cli.go +++ b/internal/cli/cli.go @@ -13,7 +13,6 @@ import ( "miniflux.app/v2/internal/config" "miniflux.app/v2/internal/database" - "miniflux.app/v2/internal/locale" "miniflux.app/v2/internal/storage" "miniflux.app/v2/internal/ui/static" "miniflux.app/v2/internal/version" @@ -153,10 +152,6 @@ func Parse() { slog.Info("The default value for DATABASE_URL is used") } - if err := locale.LoadCatalogMessages(); err != nil { - printErrorAndExit(fmt.Errorf("unable to load translations: %v", err)) - } - if err := static.CalculateBinaryFileChecksums(); err != nil { printErrorAndExit(fmt.Errorf("unable to calculate binary file checksums: %v", err)) } diff --git a/internal/locale/catalog.go b/internal/locale/catalog.go index 61f5f27d..8ecdab74 100644 --- a/internal/locale/catalog.go +++ b/internal/locale/catalog.go @@ -12,17 +12,26 @@ import ( type translationDict map[string]interface{} type catalog map[string]translationDict -var defaultCatalog catalog +var defaultCatalog = make(catalog, len(AvailableLanguages)) //go:embed translations/*.json var translationFiles embed.FS +func GetTranslationDict(language string) (translationDict, error) { + if _, ok := defaultCatalog[language]; !ok { + var err error + if defaultCatalog[language], err = loadTranslationFile(language); err != nil { + return nil, err + } + } + return defaultCatalog[language], nil +} + // LoadCatalogMessages loads and parses all translations encoded in JSON. func LoadCatalogMessages() error { var err error - defaultCatalog = make(catalog, len(AvailableLanguages())) - for language := range AvailableLanguages() { + for language := range AvailableLanguages { defaultCatalog[language], err = loadTranslationFile(language) if err != nil { return err diff --git a/internal/locale/catalog_test.go b/internal/locale/catalog_test.go index 75537911..687b1de2 100644 --- a/internal/locale/catalog_test.go +++ b/internal/locale/catalog_test.go @@ -39,7 +39,7 @@ func TestLoadCatalog(t *testing.T) { } func TestAllKeysHaveValue(t *testing.T) { - for language := range AvailableLanguages() { + for language := range AvailableLanguages { messages, err := loadTranslationFile(language) if err != nil { t.Fatalf(`Unable to load translation messages for language %q`, language) @@ -71,7 +71,7 @@ func TestMissingTranslations(t *testing.T) { t.Fatal(`Unable to parse reference language`) } - for language := range AvailableLanguages() { + for language := range AvailableLanguages { if language == refLang { continue } @@ -110,7 +110,7 @@ func TestTranslationFilePluralForms(t *testing.T) { "uk_UA": 3, "id_ID": 1, } - for language := range AvailableLanguages() { + for language := range AvailableLanguages { messages, err := loadTranslationFile(language) if err != nil { t.Fatalf(`Unable to load translation messages for language %q`, language) diff --git a/internal/locale/locale.go b/internal/locale/locale.go index a5a1010b..aa6165b8 100644 --- a/internal/locale/locale.go +++ b/internal/locale/locale.go @@ -3,26 +3,24 @@ package locale // import "miniflux.app/v2/internal/locale" -// AvailableLanguages returns the list of available languages. -func AvailableLanguages() map[string]string { - return map[string]string{ - "en_US": "English", - "es_ES": "Español", - "fr_FR": "Français", - "de_DE": "Deutsch", - "pl_PL": "Polski", - "pt_BR": "Português Brasileiro", - "zh_CN": "简体中文", - "zh_TW": "繁體中文", - "nl_NL": "Nederlands", - "ru_RU": "Русский", - "it_IT": "Italiano", - "ja_JP": "日本語", - "tr_TR": "Türkçe", - "el_EL": "Ελληνικά", - "fi_FI": "Suomi", - "hi_IN": "हिन्दी", - "uk_UA": "Українська", - "id_ID": "Bahasa Indonesia", - } +// AvailableLanguages is the list of available languages. +var AvailableLanguages = map[string]string{ + "en_US": "English", + "es_ES": "Español", + "fr_FR": "Français", + "de_DE": "Deutsch", + "pl_PL": "Polski", + "pt_BR": "Português Brasileiro", + "zh_CN": "简体中文", + "zh_TW": "繁體中文", + "nl_NL": "Nederlands", + "ru_RU": "Русский", + "it_IT": "Italiano", + "ja_JP": "日本語", + "tr_TR": "Türkçe", + "el_EL": "Ελληνικά", + "fi_FI": "Suomi", + "hi_IN": "हिन्दी", + "uk_UA": "Українська", + "id_ID": "Bahasa Indonesia", } diff --git a/internal/locale/locale_test.go b/internal/locale/locale_test.go index 86b52820..32f6a40f 100644 --- a/internal/locale/locale_test.go +++ b/internal/locale/locale_test.go @@ -6,7 +6,7 @@ package locale // import "miniflux.app/v2/internal/locale" import "testing" func TestAvailableLanguages(t *testing.T) { - results := AvailableLanguages() + results := AvailableLanguages for k, v := range results { if k == "" { t.Errorf(`Empty language key detected`) diff --git a/internal/locale/printer.go b/internal/locale/printer.go index f85960fa..d997c1a7 100644 --- a/internal/locale/printer.go +++ b/internal/locale/printer.go @@ -11,9 +11,11 @@ type Printer struct { } func (p *Printer) Print(key string) string { - if str, ok := defaultCatalog[p.language][key]; ok { - if translation, ok := str.(string); ok { - return translation + if dict, err := GetTranslationDict(p.language); err == nil { + if str, ok := dict[key]; ok { + if translation, ok := str.(string); ok { + return translation + } } } return key @@ -21,16 +23,16 @@ func (p *Printer) Print(key string) string { // Printf is like fmt.Printf, but using language-specific formatting. func (p *Printer) Printf(key string, args ...interface{}) string { - var translation string + translation := key - str, found := defaultCatalog[p.language][key] - if !found { - translation = key - } else { - var valid bool - translation, valid = str.(string) - if !valid { - translation = key + if dict, err := GetTranslationDict(p.language); err == nil { + str, found := dict[key] + if found { + var valid bool + translation, valid = str.(string) + if !valid { + translation = key + } } } @@ -39,9 +41,12 @@ func (p *Printer) Printf(key string, args ...interface{}) string { // Plural returns the translation of the given key by using the language plural form. func (p *Printer) Plural(key string, n int, args ...interface{}) string { - choices, found := defaultCatalog[p.language][key] + dict, err := GetTranslationDict(p.language) + if err != nil { + return key + } - if found { + if choices, found := dict[key]; found { var plurals []string switch v := choices.(type) { diff --git a/internal/mediaproxy/rewriter.go b/internal/mediaproxy/rewriter.go index bb5c2b78..39da1e8b 100644 --- a/internal/mediaproxy/rewriter.go +++ b/internal/mediaproxy/rewriter.go @@ -87,7 +87,7 @@ func genericProxyRewriter(router *mux.Router, proxifyFunction urlProxyRewriter, } } - output, err := doc.Find("body").First().Html() + output, err := doc.FindMatcher(goquery.Single("body")).Html() if err != nil { return htmlDocument } diff --git a/internal/model/feed.go b/internal/model/feed.go index 9f1de1eb..1682b111 100644 --- a/internal/model/feed.go +++ b/internal/model/feed.go @@ -123,8 +123,8 @@ func (f *Feed) ScheduleNextCheck(weeklyCount int, refreshDelayInMinutes int) { intervalMinutes = config.Opts.SchedulerEntryFrequencyMaxInterval() } else { intervalMinutes = int(math.Round(float64(7*24*60) / float64(weeklyCount*config.Opts.SchedulerEntryFrequencyFactor()))) - intervalMinutes = int(math.Min(float64(intervalMinutes), float64(config.Opts.SchedulerEntryFrequencyMaxInterval()))) - intervalMinutes = int(math.Max(float64(intervalMinutes), float64(config.Opts.SchedulerEntryFrequencyMinInterval()))) + intervalMinutes = min(intervalMinutes, config.Opts.SchedulerEntryFrequencyMaxInterval()) + intervalMinutes = max(intervalMinutes, config.Opts.SchedulerEntryFrequencyMinInterval()) } } diff --git a/internal/reader/handler/handler.go b/internal/reader/handler/handler.go index 185c57fa..937d7b78 100644 --- a/internal/reader/handler/handler.go +++ b/internal/reader/handler/handler.go @@ -31,11 +31,6 @@ func CreateFeedFromSubscriptionDiscovery(store *storage.Storage, userID int64, f slog.String("feed_url", feedCreationRequest.FeedURL), ) - user, storeErr := store.UserByID(userID) - if storeErr != nil { - return nil, locale.NewLocalizedErrorWrapper(storeErr, "error.database_error", storeErr) - } - if !store.CategoryIDExists(userID, feedCreationRequest.CategoryID) { return nil, locale.NewLocalizedErrorWrapper(ErrCategoryNotFound, "error.category_not_found") } @@ -71,7 +66,7 @@ func CreateFeedFromSubscriptionDiscovery(store *storage.Storage, userID int64, f subscription.WithCategoryID(feedCreationRequest.CategoryID) subscription.CheckedNow() - processor.ProcessFeedEntries(store, subscription, user, true) + processor.ProcessFeedEntries(store, subscription, userID, true) if storeErr := store.CreateFeed(subscription); storeErr != nil { return nil, locale.NewLocalizedErrorWrapper(storeErr, "error.database_error", storeErr) @@ -105,11 +100,6 @@ func CreateFeed(store *storage.Storage, userID int64, feedCreationRequest *model slog.String("feed_url", feedCreationRequest.FeedURL), ) - user, storeErr := store.UserByID(userID) - if storeErr != nil { - return nil, locale.NewLocalizedErrorWrapper(storeErr, "error.database_error", storeErr) - } - if !store.CategoryIDExists(userID, feedCreationRequest.CategoryID) { return nil, locale.NewLocalizedErrorWrapper(ErrCategoryNotFound, "error.category_not_found") } @@ -170,7 +160,7 @@ func CreateFeed(store *storage.Storage, userID int64, feedCreationRequest *model subscription.WithCategoryID(feedCreationRequest.CategoryID) subscription.CheckedNow() - processor.ProcessFeedEntries(store, subscription, user, true) + processor.ProcessFeedEntries(store, subscription, userID, true) if storeErr := store.CreateFeed(subscription); storeErr != nil { return nil, locale.NewLocalizedErrorWrapper(storeErr, "error.database_error", storeErr) @@ -195,11 +185,6 @@ func RefreshFeed(store *storage.Storage, userID, feedID int64, forceRefresh bool slog.Bool("force_refresh", forceRefresh), ) - user, storeErr := store.UserByID(userID) - if storeErr != nil { - return locale.NewLocalizedErrorWrapper(storeErr, "error.database_error", storeErr) - } - originalFeed, storeErr := store.FeedByID(userID, feedID) if storeErr != nil { return locale.NewLocalizedErrorWrapper(storeErr, "error.database_error", storeErr) @@ -256,6 +241,10 @@ func RefreshFeed(store *storage.Storage, userID, feedID int64, forceRefresh bool if localizedError := responseHandler.LocalizedError(); localizedError != nil { slog.Warn("Unable to fetch feed", slog.String("feed_url", originalFeed.FeedURL), slog.Any("error", localizedError.Error())) + user, storeErr := store.UserByID(userID) + if storeErr != nil { + return locale.NewLocalizedErrorWrapper(storeErr, "error.database_error", storeErr) + } originalFeed.WithTranslatedErrorMessage(localizedError.Translate(user.Language)) store.UpdateFeedError(originalFeed) return localizedError @@ -263,6 +252,10 @@ func RefreshFeed(store *storage.Storage, userID, feedID int64, forceRefresh bool if store.AnotherFeedURLExists(userID, originalFeed.ID, responseHandler.EffectiveURL()) { localizedError := locale.NewLocalizedErrorWrapper(ErrDuplicatedFeed, "error.duplicated_feed") + user, storeErr := store.UserByID(userID) + if storeErr != nil { + return locale.NewLocalizedErrorWrapper(storeErr, "error.database_error", storeErr) + } originalFeed.WithTranslatedErrorMessage(localizedError.Translate(user.Language)) store.UpdateFeedError(originalFeed) return localizedError @@ -289,6 +282,10 @@ func RefreshFeed(store *storage.Storage, userID, feedID int64, forceRefresh bool if errors.Is(parseErr, parser.ErrFeedFormatNotDetected) { localizedError = locale.NewLocalizedErrorWrapper(parseErr, "error.feed_format_not_detected", parseErr) } + user, storeErr := store.UserByID(userID) + if storeErr != nil { + return locale.NewLocalizedErrorWrapper(storeErr, "error.database_error", storeErr) + } originalFeed.WithTranslatedErrorMessage(localizedError.Translate(user.Language)) store.UpdateFeedError(originalFeed) @@ -309,13 +306,17 @@ func RefreshFeed(store *storage.Storage, userID, feedID int64, forceRefresh bool ) originalFeed.Entries = updatedFeed.Entries - processor.ProcessFeedEntries(store, originalFeed, user, forceRefresh) + processor.ProcessFeedEntries(store, originalFeed, userID, forceRefresh) // We don't update existing entries when the crawler is enabled (we crawl only inexisting entries). Unless it is forced to refresh updateExistingEntries := forceRefresh || !originalFeed.Crawler newEntries, storeErr := store.RefreshFeedEntries(originalFeed.UserID, originalFeed.ID, originalFeed.Entries, updateExistingEntries) if storeErr != nil { localizedError := locale.NewLocalizedErrorWrapper(storeErr, "error.database_error", storeErr) + user, storeErr := store.UserByID(userID) + if storeErr != nil { + return locale.NewLocalizedErrorWrapper(storeErr, "error.database_error", storeErr) + } originalFeed.WithTranslatedErrorMessage(localizedError.Translate(user.Language)) store.UpdateFeedError(originalFeed) return localizedError @@ -359,6 +360,10 @@ func RefreshFeed(store *storage.Storage, userID, feedID int64, forceRefresh bool if storeErr := store.UpdateFeed(originalFeed); storeErr != nil { localizedError := locale.NewLocalizedErrorWrapper(storeErr, "error.database_error", storeErr) + user, storeErr := store.UserByID(userID) + if storeErr != nil { + return locale.NewLocalizedErrorWrapper(storeErr, "error.database_error", storeErr) + } originalFeed.WithTranslatedErrorMessage(localizedError.Translate(user.Language)) store.UpdateFeedError(originalFeed) return localizedError diff --git a/internal/reader/icon/finder.go b/internal/reader/icon/finder.go index 835a3a14..21d179bb 100644 --- a/internal/reader/icon/finder.go +++ b/internal/reader/icon/finder.go @@ -4,12 +4,18 @@ package icon // import "miniflux.app/v2/internal/reader/icon" import ( + "bytes" "encoding/base64" "fmt" + "image" + "image/gif" + "image/jpeg" + "image/png" "io" "log/slog" "net/url" "regexp" + "slices" "strings" "miniflux.app/v2/internal/config" @@ -19,6 +25,7 @@ import ( "miniflux.app/v2/internal/urllib" "github.com/PuerkitoBio/goquery" + "golang.org/x/image/draw" "golang.org/x/net/html/charset" ) @@ -180,9 +187,59 @@ func (f *IconFinder) DownloadIcon(iconURL string) (*model.Icon, error) { Content: responseBody, } + icon = resizeIcon(icon) + return icon, nil } +func resizeIcon(icon *model.Icon) *model.Icon { + r := bytes.NewReader(icon.Content) + + if !slices.Contains([]string{"image/jpeg", "image/png", "image/gif"}, icon.MimeType) { + slog.Info("icon isn't a png/gif/jpeg/ico, can't resize", slog.String("mimetype", icon.MimeType)) + return icon + } + + // Don't resize icons that we can't decode, or that already have the right size. + config, _, err := image.DecodeConfig(r) + if err != nil { + slog.Warn("unable to decode the metadata of the icon", slog.Any("error", err)) + return icon + } + if config.Height <= 32 && config.Width <= 32 { + slog.Debug("icon don't need to be rescaled", slog.Int("height", config.Height), slog.Int("width", config.Width)) + return icon + } + + r.Seek(0, io.SeekStart) + + var src image.Image + switch icon.MimeType { + case "image/jpeg": + src, err = jpeg.Decode(r) + case "image/png": + src, err = png.Decode(r) + case "image/gif": + src, err = gif.Decode(r) + } + if err != nil { + slog.Warn("unable to decode the icon", slog.Any("error", err)) + return icon + } + + dst := image.NewRGBA(image.Rect(0, 0, 32, 32)) + draw.BiLinear.Scale(dst, dst.Rect, src, src.Bounds(), draw.Over, nil) + + var b bytes.Buffer + if err = png.Encode(io.Writer(&b), dst); err != nil { + slog.Warn("unable to encode the new icon", slog.Any("error", err)) + } + + icon.Content = b.Bytes() + icon.MimeType = "image/png" + return icon +} + func findIconURLsFromHTMLDocument(body io.Reader, contentType string) ([]string, error) { queries := []string{ "link[rel='icon' i]", diff --git a/internal/reader/icon/finder_test.go b/internal/reader/icon/finder_test.go index 9bb71126..3a06e35f 100644 --- a/internal/reader/icon/finder_test.go +++ b/internal/reader/icon/finder_test.go @@ -4,8 +4,13 @@ package icon // import "miniflux.app/v2/internal/reader/icon" import ( + "bytes" + "encoding/base64" + "image" "strings" "testing" + + "miniflux.app/v2/internal/model" ) func TestParseImageDataURL(t *testing.T) { @@ -125,3 +130,52 @@ func TestParseDocumentWithWhitespaceIconURL(t *testing.T) { t.Errorf(`Invalid icon URL, got %q`, iconURLs[0]) } } + +func TestResizeIconSmallGif(t *testing.T) { + data, err := base64.StdEncoding.DecodeString("R0lGODlhAQABAAAAACH5BAEKAAEALAAAAAABAAEAAAICTAEAOw==") + if err != nil { + t.Fatal(err) + } + icon := model.Icon{ + Content: data, + MimeType: "image/gif", + } + if !bytes.Equal(icon.Content, resizeIcon(&icon).Content) { + t.Fatalf("Converted gif smaller than 16x16") + } +} + +func TestResizeIconPng(t *testing.T) { + data, err := base64.StdEncoding.DecodeString("iVBORw0KGgoAAAANSUhEUgAAACEAAAAhCAYAAABX5MJvAAAALUlEQVR42u3OMQEAAAgDoJnc6BpjDyRgcrcpGwkJCQkJCQkJCQkJCQkJCYmyB7NfUj/Kk4FkAAAAAElFTkSuQmCC") + if err != nil { + t.Fatal(err) + } + icon := model.Icon{ + Content: data, + MimeType: "image/png", + } + resizedIcon := resizeIcon(&icon) + + if bytes.Equal(data, resizedIcon.Content) { + t.Fatalf("Didn't convert png of 33x33") + } + + config, _, err := image.DecodeConfig(bytes.NewReader(resizedIcon.Content)) + if err != nil { + t.Fatalf("Couln't decode resulting png: %v", err) + } + + if config.Height != 32 || config.Width != 32 { + t.Fatalf("Was expecting an image of 16x16, got %dx%d", config.Width, config.Height) + } +} + +func TestResizeInvalidImage(t *testing.T) { + icon := model.Icon{ + Content: []byte("invalid data"), + MimeType: "image/gif", + } + if !bytes.Equal(icon.Content, resizeIcon(&icon).Content) { + t.Fatalf("Tried to convert an invalid image") + } +} diff --git a/internal/reader/processor/nebula.go b/internal/reader/processor/nebula.go index d0b0b6ef..216e9b34 100644 --- a/internal/reader/processor/nebula.go +++ b/internal/reader/processor/nebula.go @@ -7,7 +7,7 @@ import ( "errors" "fmt" "log/slog" - "regexp" + "net/url" "strconv" "github.com/PuerkitoBio/goquery" @@ -17,14 +17,17 @@ import ( "miniflux.app/v2/internal/reader/fetcher" ) -var nebulaRegex = regexp.MustCompile(`^https://nebula\.tv`) - func shouldFetchNebulaWatchTime(entry *model.Entry) bool { if !config.Opts.FetchNebulaWatchTime() { return false } - matches := nebulaRegex.FindStringSubmatch(entry.URL) - return matches != nil + + u, err := url.Parse(entry.URL) + if err != nil { + return false + } + + return u.Hostname() == "nebula.tv" } func fetchNebulaWatchTime(websiteURL string) (int, error) { @@ -45,7 +48,7 @@ func fetchNebulaWatchTime(websiteURL string) (int, error) { return 0, docErr } - durs, exists := doc.Find(`meta[property="video:duration"]`).First().Attr("content") + durs, exists := doc.FindMatcher(goquery.Single(`meta[property="video:duration"]`)).Attr("content") // durs contains video watch time in seconds if !exists { return 0, errors.New("duration has not found") diff --git a/internal/reader/processor/odysee.go b/internal/reader/processor/odysee.go index 90733b2f..873ae60c 100644 --- a/internal/reader/processor/odysee.go +++ b/internal/reader/processor/odysee.go @@ -7,7 +7,7 @@ import ( "errors" "fmt" "log/slog" - "regexp" + "net/url" "strconv" "github.com/PuerkitoBio/goquery" @@ -17,14 +17,17 @@ import ( "miniflux.app/v2/internal/reader/fetcher" ) -var odyseeRegex = regexp.MustCompile(`^https://odysee\.com`) - func shouldFetchOdyseeWatchTime(entry *model.Entry) bool { if !config.Opts.FetchOdyseeWatchTime() { return false } - matches := odyseeRegex.FindStringSubmatch(entry.URL) - return matches != nil + + u, err := url.Parse(entry.URL) + if err != nil { + return false + } + + return u.Hostname() == "odysee.com" } func fetchOdyseeWatchTime(websiteURL string) (int, error) { @@ -45,7 +48,7 @@ func fetchOdyseeWatchTime(websiteURL string) (int, error) { return 0, docErr } - durs, exists := doc.Find(`meta[property="og:video:duration"]`).First().Attr("content") + durs, exists := doc.FindMatcher(goquery.Single(`meta[property="og:video:duration"]`)).Attr("content") // durs contains video watch time in seconds if !exists { return 0, errors.New("duration has not found") diff --git a/internal/reader/processor/processor.go b/internal/reader/processor/processor.go index ceae674c..3c824b66 100644 --- a/internal/reader/processor/processor.go +++ b/internal/reader/processor/processor.go @@ -10,6 +10,9 @@ import ( "strings" "time" + "github.com/tdewolff/minify/v2" + "github.com/tdewolff/minify/v2/html" + "miniflux.app/v2/internal/config" "miniflux.app/v2/internal/metric" "miniflux.app/v2/internal/model" @@ -20,17 +23,20 @@ import ( "miniflux.app/v2/internal/reader/scraper" "miniflux.app/v2/internal/reader/urlcleaner" "miniflux.app/v2/internal/storage" - - "github.com/tdewolff/minify/v2" - "github.com/tdewolff/minify/v2/html" ) var customReplaceRuleRegex = regexp.MustCompile(`rewrite\("([^"]+)"\|"([^"]+)"\)`) // ProcessFeedEntries downloads original web page for entries and apply filters. -func ProcessFeedEntries(store *storage.Storage, feed *model.Feed, user *model.User, forceRefresh bool) { +func ProcessFeedEntries(store *storage.Storage, feed *model.Feed, userID int64, forceRefresh bool) { var filteredEntries model.Entries + user, storeErr := store.UserByID(userID) + if storeErr != nil { + slog.Error("Database error", slog.Any("error", storeErr)) + return + } + // Process older entries first for i := len(feed.Entries) - 1; i >= 0; i-- { entry := feed.Entries[i] @@ -135,6 +141,9 @@ func isBlockedEntry(feed *model.Feed, entry *model.Entry, user *model.User) bool var match bool switch parts[0] { + case "EntryDate": + datePattern := parts[1] + match = isDateMatchingPattern(entry.Date, datePattern) case "EntryTitle": match, _ = regexp.MatchString(parts[1], entry.Title) case "EntryURL": @@ -205,6 +214,9 @@ func isAllowedEntry(feed *model.Feed, entry *model.Entry, user *model.User) bool var match bool switch parts[0] { + case "EntryDate": + datePattern := parts[1] + match = isDateMatchingPattern(entry.Date, datePattern) case "EntryTitle": match, _ = regexp.MatchString(parts[1], entry.Title) case "EntryURL": @@ -456,3 +468,44 @@ func minifyEntryContent(entryContent string) string { return entryContent } + +func isDateMatchingPattern(entryDate time.Time, pattern string) bool { + if pattern == "future" { + return entryDate.After(time.Now()) + } + + parts := strings.SplitN(pattern, ":", 2) + if len(parts) != 2 { + return false + } + + operator := parts[0] + dateStr := parts[1] + + switch operator { + case "before": + targetDate, err := time.Parse("2006-01-02", dateStr) + if err != nil { + return false + } + return entryDate.Before(targetDate) + case "after": + targetDate, err := time.Parse("2006-01-02", dateStr) + if err != nil { + return false + } + return entryDate.After(targetDate) + case "between": + dates := strings.Split(dateStr, ",") + if len(dates) != 2 { + return false + } + startDate, err1 := time.Parse("2006-01-02", dates[0]) + endDate, err2 := time.Parse("2006-01-02", dates[1]) + if err1 != nil || err2 != nil { + return false + } + return entryDate.After(startDate) && entryDate.Before(endDate) + } + return false +} diff --git a/internal/reader/processor/processor_test.go b/internal/reader/processor/processor_test.go index 2a594a4a..9e228366 100644 --- a/internal/reader/processor/processor_test.go +++ b/internal/reader/processor/processor_test.go @@ -75,6 +75,12 @@ func TestAllowEntries(t *testing.T) { {&model.Feed{ID: 1, BlocklistRules: ""}, &model.Entry{Author: "Example", Tags: []string{"example", "something else"}}, &model.User{KeepFilterEntryRules: "EntryAuthor=(?i)example\nEntryTag=(?i)Test"}, true}, {&model.Feed{ID: 1, BlocklistRules: ""}, &model.Entry{Author: "Different", Tags: []string{"example", "something else"}}, &model.User{KeepFilterEntryRules: "EntryAuthor=(?i)example\nEntryTag=(?i)example"}, true}, {&model.Feed{ID: 1, BlocklistRules: ""}, &model.Entry{Author: "Different", Tags: []string{"example", "something else"}}, &model.User{KeepFilterEntryRules: "EntryAuthor=(?i)example\nEntryTag=(?i)Test"}, false}, + {&model.Feed{ID: 1, BlocklistRules: ""}, &model.Entry{Date: time.Now().Add(24 * time.Hour)}, &model.User{KeepFilterEntryRules: "EntryDate=future"}, true}, + {&model.Feed{ID: 1, BlocklistRules: ""}, &model.Entry{Date: time.Now().Add(-24 * time.Hour)}, &model.User{KeepFilterEntryRules: "EntryDate=future"}, false}, + {&model.Feed{ID: 1, BlocklistRules: ""}, &model.Entry{Date: time.Date(2024, 3, 14, 0, 0, 0, 0, time.UTC)}, &model.User{KeepFilterEntryRules: "EntryDate=before:2024-03-15"}, true}, + {&model.Feed{ID: 1, BlocklistRules: ""}, &model.Entry{Date: time.Date(2024, 3, 16, 0, 0, 0, 0, time.UTC)}, &model.User{KeepFilterEntryRules: "EntryDate=after:2024-03-15"}, true}, + {&model.Feed{ID: 1, BlocklistRules: ""}, &model.Entry{Date: time.Date(2024, 3, 10, 0, 0, 0, 0, time.UTC)}, &model.User{KeepFilterEntryRules: "EntryDate=between:2024-03-01,2024-03-15"}, true}, + {&model.Feed{ID: 1, BlocklistRules: ""}, &model.Entry{Date: time.Date(2024, 2, 28, 0, 0, 0, 0, time.UTC)}, &model.User{KeepFilterEntryRules: "EntryDate=between:2024-03-01,2024-03-15"}, false}, } for _, tc := range scenarios { diff --git a/internal/reader/processor/youtube.go b/internal/reader/processor/youtube.go index 2d41e11f..68e72ba6 100644 --- a/internal/reader/processor/youtube.go +++ b/internal/reader/processor/youtube.go @@ -60,7 +60,7 @@ func fetchYouTubeWatchTimeFromWebsite(websiteURL string) (int, error) { return 0, docErr } - durs, exists := doc.Find(`meta[itemprop="duration"]`).First().Attr("content") + durs, exists := doc.FindMatcher(goquery.Single(`meta[itemprop="duration"]`)).Attr("content") if !exists { return 0, errors.New("duration has not found") } diff --git a/internal/reader/readability/readability.go b/internal/reader/readability/readability.go index 193edf07..46771eeb 100644 --- a/internal/reader/readability/readability.go +++ b/internal/reader/readability/readability.go @@ -4,11 +4,9 @@ package readability // import "miniflux.app/v2/internal/reader/readability" import ( - "bytes" "fmt" "io" "log/slog" - "math" "regexp" "strings" @@ -24,9 +22,7 @@ const ( var ( divToPElementsRegexp = regexp.MustCompile(`(?i)<(a|blockquote|dl|div|img|ol|p|pre|table|ul)`) - sentenceRegexp = regexp.MustCompile(`\.( |$)`) - blacklistCandidatesRegexp = regexp.MustCompile(`popupbody|-ad|g-plus`) okMaybeItsACandidateRegexp = regexp.MustCompile(`and|article|body|column|main|shadow`) unlikelyCandidatesRegexp = regexp.MustCompile(`banner|breadcrumbs|combx|comment|community|cover-wrap|disqus|extra|foot|header|legends|menu|modal|related|remark|replies|rss|shoutbox|sidebar|skyscraper|social|sponsor|supplemental|ad-break|agegate|pagination|pager|popup|yom-remote`) @@ -77,16 +73,14 @@ func ExtractContent(page io.Reader) (baseURL string, extractedContent string, er return "", "", err } - if hrefValue, exists := document.Find("head base").First().Attr("href"); exists { + if hrefValue, exists := document.FindMatcher(goquery.Single("head base")).Attr("href"); exists { hrefValue = strings.TrimSpace(hrefValue) if urllib.IsAbsoluteURL(hrefValue) { baseURL = hrefValue } } - document.Find("script,style").Each(func(i int, s *goquery.Selection) { - removeNodes(s) - }) + document.Find("script,style").Remove() transformMisusedDivsIntoParagraphs(document) removeUnlikelyCandidates(document) @@ -107,8 +101,9 @@ func ExtractContent(page io.Reader) (baseURL string, extractedContent string, er // Now that we have the top candidate, look through its siblings for content that might also be related. // Things like preambles, content split by ads that we removed, etc. func getArticle(topCandidate *candidate, candidates candidateList) string { - output := bytes.NewBufferString("
` + html.EscapeString(titleAttr) + `