From 0c3e251884e83de503aa648e0bcae6ac3cd9266c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Guillot?= Date: Wed, 30 Jul 2025 21:00:57 -0700 Subject: [PATCH] refactor(filter): parse and merge filters only once per refresh --- internal/reader/filter/filter.go | 228 ++-- internal/reader/filter/filter_test.go | 1565 +++++++++++++++++++++--- internal/reader/processor/processor.go | 25 +- 3 files changed, 1539 insertions(+), 279 deletions(-) diff --git a/internal/reader/filter/filter.go b/internal/reader/filter/filter.go index c50225ea..06ef1818 100644 --- a/internal/reader/filter/filter.go +++ b/internal/reader/filter/filter.go @@ -1,5 +1,25 @@ // SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved. // SPDX-License-Identifier: Apache-2.0 +// +// Package filter provides functions to filter entries based on user-defined rules. +// +// There are two types of rules: +// +// Block Rules: Ignore articles that match the regex. +// Keep Rules: Retain only articles that match the regex. +// +// Rules are processed in this order: +// +// 1. User block filter rules +// 2. Feed block filter rules +// 3. User keep filter rules +// 4. Feed keep filter rules +// +// Each rule must be on a separate line. +// Duplicate rules are allowed. For example, having multiple EntryTitle rules is possible. +// The provided regex should use the RE2 syntax. +// The order of the rules matters as the processor stops on the first match for both Block and Keep rules. +// Invalid rules are ignored. package filter // import "miniflux.app/v2/internal/reader/filter" @@ -15,12 +35,71 @@ import ( "miniflux.app/v2/internal/model" ) -type filterActionType string +type filterRule struct { + Type string + Value string +} -const ( - filterActionBlock filterActionType = "block" - filterActionAllow filterActionType = "allow" -) +type filterRules []filterRule + +func ParseRules(userRules, feedRules string) filterRules { + rules := make(filterRules, 0) + for line := range strings.SplitSeq(strings.TrimSpace(userRules), "\n") { + if valid, filterRule := parseRule(line); valid { + rules = append(rules, filterRule) + } + } + for line := range strings.SplitSeq(strings.TrimSpace(feedRules), "\n") { + if valid, filterRule := parseRule(line); valid { + rules = append(rules, filterRule) + } + } + return rules +} + +func parseRule(userDefinedRule string) (bool, filterRule) { + userDefinedRule = strings.TrimSpace(strings.ReplaceAll(userDefinedRule, "\r\n", "")) + parts := strings.SplitN(userDefinedRule, "=", 2) + if len(parts) != 2 { + return false, filterRule{} + } + return true, filterRule{ + Type: strings.TrimSpace(parts[0]), + Value: strings.TrimSpace(parts[1]), + } +} + +func IsBlockedEntry(blockRules filterRules, allowRules filterRules, feed *model.Feed, entry *model.Entry) bool { + if isBlockedGlobally(entry) { + return true + } + + if matchesEntryFilterRules(blockRules, feed, entry) { + return true + } + + if matches, valid := matchesEntryRegexRules(feed.BlocklistRules, feed, entry); valid && matches { + return true + } + + // If allow rules exist, only entries that match them should be retained + if len(allowRules) > 0 { + if !matchesEntryFilterRules(allowRules, feed, entry) { + return true // Block entry if it doesn't match any allow rules + } + return false // Allow entry if it matches allow rules + } + + // If keeplist rules exist, only entries that match them should be retained + if feed.KeeplistRules != "" { + if matches, valid := matchesEntryRegexRules(feed.KeeplistRules, feed, entry); valid && !matches { + return true // Block entry if it doesn't match keeplist rules + } + return false // Allow entry if it matches keeplist rules or rule is invalid (ignored) + } + + return false +} func isBlockedGlobally(entry *model.Entry) bool { if config.Opts == nil { @@ -42,74 +121,20 @@ func isBlockedGlobally(entry *model.Entry) bool { return false } -func IsBlockedEntry(feed *model.Feed, entry *model.Entry, user *model.User) bool { - if isBlockedGlobally(entry) { - return true +// matchesEntryRegexRules checks if the entry matches the regex rules defined in the feed or user settings. +// It returns true if the entry matches the regex pattern, and a boolean indicating if the regex is valid. +func matchesEntryRegexRules(regexPattern string, feed *model.Feed, entry *model.Entry) (bool, bool) { + if regexPattern == "" { + return false, true // No pattern means rule is valid but doesn't match } - combinedRules := combineFilterRules(user.BlockFilterEntryRules, feed.BlockFilterEntryRules) - if combinedRules != "" { - if matchesEntryFilterRules(combinedRules, entry, feed, filterActionBlock) { - return true - } - } - - if feed.BlocklistRules == "" { - return false - } - - return matchesEntryRegexRules(feed.BlocklistRules, entry, feed, filterActionBlock) -} - -func IsAllowedEntry(feed *model.Feed, entry *model.Entry, user *model.User) bool { - combinedRules := combineFilterRules(user.KeepFilterEntryRules, feed.KeepFilterEntryRules) - if combinedRules != "" { - return matchesEntryFilterRules(combinedRules, entry, feed, filterActionAllow) - } - - if feed.KeeplistRules == "" { - return true - } - - return matchesEntryRegexRules(feed.KeeplistRules, entry, feed, filterActionAllow) -} - -func combineFilterRules(userRules, feedRules string) string { - var combinedRules strings.Builder - - userRules = strings.TrimSpace(userRules) - feedRules = strings.TrimSpace(feedRules) - - if userRules != "" { - combinedRules.WriteString(userRules) - } - if feedRules != "" { - if combinedRules.Len() > 0 { - combinedRules.WriteString("\n") - } - combinedRules.WriteString(feedRules) - } - return combinedRules.String() -} - -func matchesEntryFilterRules(rules string, entry *model.Entry, feed *model.Feed, filterAction filterActionType) bool { - for rule := range strings.SplitSeq(rules, "\n") { - if matchesRule(rule, entry) { - logFilterAction(entry, feed, rule, filterAction) - return true - } - } - return false -} - -func matchesEntryRegexRules(rules string, entry *model.Entry, feed *model.Feed, filterAction filterActionType) bool { - compiledRegex, err := regexp.Compile(rules) + compiledRegex, err := regexp.Compile(regexPattern) if err != nil { slog.Warn("Failed on regexp compilation", - slog.String("pattern", rules), + slog.String("regex_pattern", regexPattern), slog.Any("error", err), ) - return false + return false, false // Invalid regex pattern } containsMatchingTag := slices.ContainsFunc(entry.Tags, func(tag string) bool { @@ -120,57 +145,62 @@ func matchesEntryRegexRules(rules string, entry *model.Entry, feed *model.Feed, compiledRegex.MatchString(entry.Title) || compiledRegex.MatchString(entry.Author) || containsMatchingTag { - logFilterAction(entry, feed, rules, filterAction) - return true + slog.Debug("Entry matches regex rule", + slog.String("entry_url", entry.URL), + slog.String("entry_title", entry.Title), + slog.String("entry_author", entry.Author), + slog.String("feed_url", feed.FeedURL), + slog.String("regex_pattern", regexPattern), + ) + return true, true // Pattern matches and is valid } + return false, true // Pattern is valid but doesn't match +} + +func matchesEntryFilterRules(rules filterRules, feed *model.Feed, entry *model.Entry) bool { + for _, rule := range rules { + if matchesRule(rule, entry) { + slog.Debug("Entry matches filter rule", + slog.String("entry_url", entry.URL), + slog.String("entry_title", entry.Title), + slog.String("entry_author", entry.Author), + slog.String("feed_url", feed.FeedURL), + slog.String("rule_type", rule.Type), + slog.String("rule_value", rule.Value), + ) + return true + } + } return false } -func matchesRule(rule string, entry *model.Entry) bool { - rule = strings.TrimSpace(strings.ReplaceAll(rule, "\r\n", "")) - parts := strings.SplitN(rule, "=", 2) - if len(parts) != 2 { - return false - } - - ruleType, ruleValue := parts[0], parts[1] - - switch ruleType { +func matchesRule(rule filterRule, entry *model.Entry) bool { + switch rule.Type { case "EntryDate": - return isDateMatchingPattern(ruleValue, entry.Date) + return isDateMatchingPattern(rule.Value, entry.Date) case "EntryTitle": - match, _ := regexp.MatchString(ruleValue, entry.Title) + match, _ := regexp.MatchString(rule.Value, entry.Title) return match case "EntryURL": - match, _ := regexp.MatchString(ruleValue, entry.URL) + match, _ := regexp.MatchString(rule.Value, entry.URL) return match case "EntryCommentsURL": - match, _ := regexp.MatchString(ruleValue, entry.CommentsURL) + match, _ := regexp.MatchString(rule.Value, entry.CommentsURL) return match case "EntryContent": - match, _ := regexp.MatchString(ruleValue, entry.Content) + match, _ := regexp.MatchString(rule.Value, entry.Content) return match case "EntryAuthor": - match, _ := regexp.MatchString(ruleValue, entry.Author) + match, _ := regexp.MatchString(rule.Value, entry.Author) return match case "EntryTag": - return containsRegexPattern(ruleValue, entry.Tags) + return containsRegexPattern(rule.Value, entry.Tags) } return false } -func logFilterAction(entry *model.Entry, feed *model.Feed, filterRule string, filterAction filterActionType) { - slog.Debug("Filtering entry based on rule", - slog.Int64("feed_id", feed.ID), - slog.String("feed_url", feed.FeedURL), - slog.String("entry_url", entry.URL), - slog.String("filter_rule", filterRule), - slog.String("filter_action", string(filterAction)), - ) -} - func isDateMatchingPattern(pattern string, entryDate time.Time) bool { if pattern == "future" { return entryDate.After(time.Now()) @@ -221,9 +251,9 @@ func isDateMatchingPattern(pattern string, entryDate time.Time) bool { return false } -func containsRegexPattern(pattern string, entries []string) bool { - for _, entry := range entries { - if matched, _ := regexp.MatchString(pattern, entry); matched { +func containsRegexPattern(pattern string, items []string) bool { + for _, item := range items { + if matched, _ := regexp.MatchString(pattern, item); matched { return true } } diff --git a/internal/reader/filter/filter_test.go b/internal/reader/filter/filter_test.go index 3e11c676..d485689b 100644 --- a/internal/reader/filter/filter_test.go +++ b/internal/reader/filter/filter_test.go @@ -12,175 +12,406 @@ import ( "miniflux.app/v2/internal/model" ) -func TestBlockingEntries(t *testing.T) { - var scenarios = []struct { - feed *model.Feed - entry *model.Entry - user *model.User - expected bool - }{ - {&model.Feed{ID: 1, BlocklistRules: "(?i)example"}, &model.Entry{URL: "https://example.com"}, &model.User{}, true}, - {&model.Feed{ID: 1, BlocklistRules: "[a-z"}, &model.Entry{URL: "https://example.com"}, &model.User{}, false}, // invalid regex - {&model.Feed{ID: 1, BlocklistRules: "(?i)example"}, &model.Entry{URL: "https://different.com"}, &model.User{}, false}, - {&model.Feed{ID: 1, BlocklistRules: "(?i)example"}, &model.Entry{Title: "Some Example"}, &model.User{}, true}, - {&model.Feed{ID: 1, BlocklistRules: "(?i)example"}, &model.Entry{Title: "Something different"}, &model.User{}, false}, - {&model.Feed{ID: 1, BlocklistRules: "(?i)example"}, &model.Entry{Title: "Something different", Tags: []string{"example", "something else"}}, &model.User{}, true}, - {&model.Feed{ID: 1, BlocklistRules: "(?i)example"}, &model.Entry{Title: "Example", Tags: []string{"example", "something else"}}, &model.User{}, true}, - {&model.Feed{ID: 1, BlocklistRules: "(?i)example"}, &model.Entry{Title: "Example", Tags: []string{"something different", "something else"}}, &model.User{}, true}, - {&model.Feed{ID: 1, BlocklistRules: "(?i)example"}, &model.Entry{Title: "Something different", Tags: []string{"something different", "something else"}}, &model.User{}, false}, - {&model.Feed{ID: 1, BlocklistRules: "(?i)example"}, &model.Entry{Title: "Something different", Author: "Example"}, &model.User{}, true}, - {&model.Feed{ID: 1, BlocklistRules: "(?i)example"}, &model.Entry{Title: "Something different", Author: "Something different"}, &model.User{}, false}, - {&model.Feed{ID: 1}, &model.Entry{Title: "No rule defined"}, &model.User{}, false}, - {&model.Feed{ID: 1}, &model.Entry{URL: "https://example.com", Title: "Some Example"}, &model.User{BlockFilterEntryRules: "EntryURL=(?i)example\nEntryTitle=(?i)Test"}, true}, - {&model.Feed{ID: 1}, &model.Entry{URL: "https://different.com", Title: "Some Test"}, &model.User{BlockFilterEntryRules: "EntryURL=(?i)example\nEntryTitle=(?i)Test"}, true}, - {&model.Feed{ID: 1}, &model.Entry{URL: "https://different.com", Title: "Some Example"}, &model.User{BlockFilterEntryRules: "EntryURL=(?i)example\nEntryTitle=(?i)Test"}, false}, - {&model.Feed{ID: 1}, &model.Entry{CommentsURL: "https://example.com", Content: "Some Example"}, &model.User{BlockFilterEntryRules: "EntryCommentsURL=(?i)example\nEntryContent=(?i)Test"}, true}, - {&model.Feed{ID: 1}, &model.Entry{CommentsURL: "https://different.com", Content: "Some Test"}, &model.User{BlockFilterEntryRules: "EntryCommentsURL=(?i)example\nEntryContent=(?i)Test"}, true}, - {&model.Feed{ID: 1}, &model.Entry{CommentsURL: "https://different.com", Content: "Some Example"}, &model.User{BlockFilterEntryRules: "EntryCommentsURL=(?i)example\nEntryContent=(?i)Test"}, false}, - {&model.Feed{ID: 1}, &model.Entry{Author: "Example", Tags: []string{"example", "something else"}}, &model.User{BlockFilterEntryRules: "EntryAuthor=(?i)example\nEntryTag=(?i)Test"}, true}, - {&model.Feed{ID: 1}, &model.Entry{Author: "Different", Tags: []string{"example", "something else"}}, &model.User{BlockFilterEntryRules: "EntryAuthor=(?i)example\nEntryTag=(?i)example"}, true}, - {&model.Feed{ID: 1}, &model.Entry{Author: "Different", Tags: []string{"example", "something else"}}, &model.User{BlockFilterEntryRules: "EntryAuthor=(?i)example\nEntryTag=(?i)Test"}, false}, - {&model.Feed{ID: 1}, &model.Entry{Author: "Different", Tags: []string{"example", "test"}}, &model.User{BlockFilterEntryRules: "EntryAuthor\nEntryTag=(?i)Test"}, true}, - {&model.Feed{ID: 1}, &model.Entry{Date: time.Date(2024, 3, 14, 0, 0, 0, 0, time.UTC)}, &model.User{BlockFilterEntryRules: "EntryDate=before:2024-03-15"}, true}, - // Test max-age filter - {&model.Feed{ID: 1}, &model.Entry{Date: time.Date(2024, 1, 1, 0, 0, 0, 0, time.UTC)}, &model.User{BlockFilterEntryRules: "EntryDate=max-age:30d"}, true}, // Entry from Jan 1, 2024 is definitely older than 30 days - {&model.Feed{ID: 1}, &model.Entry{Date: time.Date(2024, 1, 1, 0, 0, 0, 0, time.UTC)}, &model.User{BlockFilterEntryRules: "EntryDate=max-age:invalid"}, false}, // Invalid duration format - {&model.Feed{ID: 1}, &model.Entry{Date: time.Date(2024, 3, 14, 0, 0, 0, 0, time.UTC)}, &model.User{BlockFilterEntryRules: "UnknownRuleType=test"}, false}, - {&model.Feed{ID: 1, BlockFilterEntryRules: "EntryURL=(?i)example\nEntryTitle=(?i)Test"}, &model.Entry{URL: "https://example.com", Title: "Some Example"}, &model.User{}, true}, - // Test cases for merged user and feed BlockFilterEntryRules - {&model.Feed{ID: 1, BlockFilterEntryRules: "EntryURL=(?i)website"}, &model.Entry{URL: "https://example.com", Title: "Some Title"}, &model.User{BlockFilterEntryRules: " EntryTitle=(?i)title "}, true}, // User rule matches - {&model.Feed{ID: 1, BlockFilterEntryRules: "EntryURL=(?i)example"}, &model.Entry{URL: "https://example.com", Title: "Some Other"}, &model.User{BlockFilterEntryRules: "EntryTitle=(?i)title"}, true}, // Feed rule matches - {&model.Feed{ID: 1, BlockFilterEntryRules: "EntryURL=(?i)example"}, &model.Entry{URL: "https://different.com", Title: "Some Other"}, &model.User{BlockFilterEntryRules: "EntryTitle=(?i)title"}, false}, // Neither rule matches - {&model.Feed{ID: 1, BlockFilterEntryRules: "EntryURL=(?i)example"}, &model.Entry{URL: "https://example.com", Title: "Some Title"}, &model.User{BlockFilterEntryRules: "EntryTitle=(?i)title"}, true}, // Both rules would match - // Test multiple rules with \r\n separators - {&model.Feed{ID: 1, BlockFilterEntryRules: "EntryURL=(?i)example\r\nEntryTitle=(?i)Test"}, &model.Entry{URL: "https://example.com", Title: "Some Example"}, &model.User{}, true}, - {&model.Feed{ID: 1, BlockFilterEntryRules: "EntryURL=(?i)example\r\nEntryTitle=(?i)Test"}, &model.Entry{URL: "https://different.com", Title: "Some Test"}, &model.User{}, true}, - } - - for index, tc := range scenarios { - result := IsBlockedEntry(tc.feed, tc.entry, tc.user) - if tc.expected != result { - t.Errorf(`Unexpected result for scenario %d, got %v for entry %q`, index, result, tc.entry.Title) - } +// Test helper functions +func createTestEntry() *model.Entry { + return &model.Entry{ + ID: 1, + Title: "Test Entry Title", + URL: "https://example.com/test-entry", + CommentsURL: "https://example.com/test-entry/comments", + Content: "This is the test entry content", + Author: "Test Author", + Date: time.Now(), + Tags: []string{"golang", "testing", "miniflux"}, } } -func TestAllowEntries(t *testing.T) { - var scenarios = []struct { - feed *model.Feed - entry *model.Entry - user *model.User - expected bool - }{ - {&model.Feed{ID: 1, KeeplistRules: "(?i)example"}, &model.Entry{Title: "https://example.com"}, &model.User{}, true}, - {&model.Feed{ID: 1, KeeplistRules: "[a-z"}, &model.Entry{Title: "https://example.com"}, &model.User{}, false}, // invalid regex - {&model.Feed{ID: 1, KeeplistRules: "(?i)example"}, &model.Entry{Title: "https://different.com"}, &model.User{}, false}, - {&model.Feed{ID: 1, KeeplistRules: "(?i)example"}, &model.Entry{Title: "Some Example"}, &model.User{}, true}, - {&model.Feed{ID: 1, KeeplistRules: "(?i)example"}, &model.Entry{Title: "Something different"}, &model.User{}, false}, - {&model.Feed{ID: 1}, &model.Entry{Title: "No rule defined"}, &model.User{}, true}, - {&model.Feed{ID: 1, KeeplistRules: "(?i)example"}, &model.Entry{Title: "Something different", Tags: []string{"example", "something else"}}, &model.User{}, true}, - {&model.Feed{ID: 1, KeeplistRules: "(?i)example"}, &model.Entry{Title: "Example", Tags: []string{"example", "something else"}}, &model.User{}, true}, - {&model.Feed{ID: 1, KeeplistRules: "(?i)example"}, &model.Entry{Title: "Example", Tags: []string{"something different", "something else"}}, &model.User{}, true}, - {&model.Feed{ID: 1, KeeplistRules: "(?i)example"}, &model.Entry{Title: "Something more", Tags: []string{"something different", "something else"}}, &model.User{}, false}, - {&model.Feed{ID: 1, KeeplistRules: "(?i)example"}, &model.Entry{Title: "Something different", Author: "Example"}, &model.User{}, true}, - {&model.Feed{ID: 1, KeeplistRules: "(?i)example"}, &model.Entry{Title: "Something different", Author: "Something different"}, &model.User{}, false}, - {&model.Feed{ID: 1}, &model.Entry{URL: "https://example.com", Title: "Some Example"}, &model.User{KeepFilterEntryRules: "EntryURL=(?i)example\nEntryTitle=(?i)Test"}, true}, - {&model.Feed{ID: 1}, &model.Entry{URL: "https://different.com", Title: "Some Test"}, &model.User{KeepFilterEntryRules: "EntryURL=(?i)example\nEntryTitle=(?i)Test"}, true}, - {&model.Feed{ID: 1}, &model.Entry{URL: "https://different.com", Title: "Some Example"}, &model.User{KeepFilterEntryRules: "EntryURL=(?i)example\nEntryTitle=(?i)Test"}, false}, - {&model.Feed{ID: 1}, &model.Entry{CommentsURL: "https://example.com", Content: "Some Example"}, &model.User{KeepFilterEntryRules: "EntryCommentsURL=(?i)example\nEntryContent=(?i)Test"}, true}, - {&model.Feed{ID: 1}, &model.Entry{CommentsURL: "https://different.com", Content: "Some Test"}, &model.User{KeepFilterEntryRules: "EntryCommentsURL=(?i)example\nEntryContent=(?i)Test"}, true}, - {&model.Feed{ID: 1}, &model.Entry{CommentsURL: "https://different.com", Content: "Some Example"}, &model.User{KeepFilterEntryRules: "EntryCommentsURL=(?i)example\nEntryContent=(?i)Test"}, false}, - {&model.Feed{ID: 1}, &model.Entry{Author: "Example", Tags: []string{"example", "something else"}}, &model.User{KeepFilterEntryRules: "EntryAuthor=(?i)example\nEntryTag=(?i)Test"}, true}, - {&model.Feed{ID: 1}, &model.Entry{Author: "Different", Tags: []string{"example", "something else"}}, &model.User{KeepFilterEntryRules: "EntryAuthor=(?i)example\nEntryTag=(?i)example"}, true}, - {&model.Feed{ID: 1}, &model.Entry{Author: "Different", Tags: []string{"example", "something else"}}, &model.User{KeepFilterEntryRules: "EntryAuthor=(?i)example\nEntryTag=(?i)Test"}, false}, - {&model.Feed{ID: 1}, &model.Entry{Author: "Different", Tags: []string{"example", "some test"}}, &model.User{KeepFilterEntryRules: "EntryAuthor\nEntryTag=(?i)Test"}, true}, - {&model.Feed{ID: 1}, &model.Entry{Date: time.Now().Add(24 * time.Hour)}, &model.User{KeepFilterEntryRules: "EntryDate=future"}, true}, - {&model.Feed{ID: 1}, &model.Entry{Date: time.Now().Add(-24 * time.Hour)}, &model.User{KeepFilterEntryRules: "EntryDate=future"}, false}, - {&model.Feed{ID: 1}, &model.Entry{Date: time.Date(2024, 3, 14, 0, 0, 0, 0, time.UTC)}, &model.User{KeepFilterEntryRules: "EntryDate=before:2024-03-15"}, true}, - {&model.Feed{ID: 1}, &model.Entry{Date: time.Date(2024, 3, 14, 0, 0, 0, 0, time.UTC)}, &model.User{KeepFilterEntryRules: "EntryDate=before:invalid-date"}, false}, // invalid date format - {&model.Feed{ID: 1}, &model.Entry{Date: time.Date(2024, 3, 16, 0, 0, 0, 0, time.UTC)}, &model.User{KeepFilterEntryRules: "EntryDate=after:2024-03-15"}, true}, - {&model.Feed{ID: 1}, &model.Entry{Date: time.Date(2024, 3, 16, 0, 0, 0, 0, time.UTC)}, &model.User{KeepFilterEntryRules: "EntryDate=after:invalid-date"}, false}, // invalid date format - {&model.Feed{ID: 1}, &model.Entry{Date: time.Date(2024, 3, 10, 0, 0, 0, 0, time.UTC)}, &model.User{KeepFilterEntryRules: "EntryDate=between:2024-03-01,2024-03-15"}, true}, - {&model.Feed{ID: 1}, &model.Entry{Date: time.Date(2024, 2, 28, 0, 0, 0, 0, time.UTC)}, &model.User{KeepFilterEntryRules: "EntryDate=between:2024-03-01,2024-03-15"}, false}, - {&model.Feed{ID: 1}, &model.Entry{Date: time.Date(2024, 2, 28, 0, 0, 0, 0, time.UTC)}, &model.User{KeepFilterEntryRules: "EntryDate=between:invalid-date,2024-03-15"}, false}, // invalid date format - {&model.Feed{ID: 1}, &model.Entry{Date: time.Date(2024, 2, 28, 0, 0, 0, 0, time.UTC)}, &model.User{KeepFilterEntryRules: "EntryDate=between:2024-03-15,invalid-date"}, false}, // invalid date format - {&model.Feed{ID: 1}, &model.Entry{Date: time.Date(2024, 2, 28, 0, 0, 0, 0, time.UTC)}, &model.User{KeepFilterEntryRules: "EntryDate=between:2024-03-15"}, false}, // missing second date in range - // Test max-age filter - {&model.Feed{ID: 1}, &model.Entry{Date: time.Date(2024, 1, 1, 0, 0, 0, 0, time.UTC)}, &model.User{KeepFilterEntryRules: "EntryDate=max-age:30d"}, true}, // Entry from Jan 1, 2024 is definitely older than 30 days - {&model.Feed{ID: 1}, &model.Entry{Date: time.Date(2024, 1, 1, 0, 0, 0, 0, time.UTC)}, &model.User{KeepFilterEntryRules: "EntryDate=max-age:invalid"}, false}, // Invalid duration format - {&model.Feed{ID: 1}, &model.Entry{Date: time.Date(2024, 2, 28, 0, 0, 0, 0, time.UTC)}, &model.User{KeepFilterEntryRules: "EntryDate=abcd"}, false}, // no colon in rule value - {&model.Feed{ID: 1}, &model.Entry{Date: time.Date(2024, 2, 28, 0, 0, 0, 0, time.UTC)}, &model.User{KeepFilterEntryRules: "EntryDate=unknown:2024-03-15"}, false}, // unknown rule type - {&model.Feed{ID: 1, KeepFilterEntryRules: "EntryURL=(?i)example\nEntryTitle=(?i)Test"}, &model.Entry{URL: "https://example.com", Title: "Some Example"}, &model.User{}, true}, - // Test cases for merged user and feed KeepFilterEntryRules - {&model.Feed{ID: 1, KeepFilterEntryRules: "EntryURL=(?i)website"}, &model.Entry{URL: "https://example.com", Title: "Some Title"}, &model.User{KeepFilterEntryRules: "EntryTitle=(?i)title"}, true}, // User rule matches - {&model.Feed{ID: 1, KeepFilterEntryRules: "EntryURL=(?i)example"}, &model.Entry{URL: "https://example.com", Title: "Some Other"}, &model.User{KeepFilterEntryRules: "EntryTitle=(?i)title"}, true}, // Feed rule matches - {&model.Feed{ID: 1, KeepFilterEntryRules: "EntryURL=(?i)example"}, &model.Entry{URL: "https://different.com", Title: "Some Other"}, &model.User{KeepFilterEntryRules: "EntryTitle=(?i)title"}, false}, // Neither rule matches - {&model.Feed{ID: 1, KeepFilterEntryRules: "EntryURL=(?i)example"}, &model.Entry{URL: "https://example.com", Title: "Some Title"}, &model.User{KeepFilterEntryRules: "EntryTitle=(?i)title"}, true}, // Both rules would match - // Test multiple rules with \r\n separators - {&model.Feed{ID: 1, KeepFilterEntryRules: "EntryURL=(?i)example\r\nEntryTitle=(?i)Test"}, &model.Entry{URL: "https://example.com", Title: "Some Example"}, &model.User{}, true}, - {&model.Feed{ID: 1, KeepFilterEntryRules: "EntryURL=(?i)example\r\nEntryTitle=(?i)Test"}, &model.Entry{URL: "https://different.com", Title: "Some Test"}, &model.User{}, true}, - {&model.Feed{ID: 1, KeepFilterEntryRules: "EntryURL=(?i)example\r\nEntryTitle=(?i)Test"}, &model.Entry{URL: "https://different.com", Title: "Some Example"}, &model.User{}, false}, - } - - for _, tc := range scenarios { - result := IsAllowedEntry(tc.feed, tc.entry, tc.user) - if tc.expected != result { - t.Errorf(`Unexpected result, got %v for entry %q`, result, tc.entry.Title) - } +func createTestFeed() *model.Feed { + return &model.Feed{ + ID: 1, + FeedURL: "https://example.com/feed.xml", + BlocklistRules: "", + KeeplistRules: "", + BlockFilterEntryRules: "", + KeepFilterEntryRules: "", } } -func TestParseDuration(t *testing.T) { +// Tests for ParseRules function +func TestParseRules(t *testing.T) { tests := []struct { - input string - expected time.Duration - err bool + name string + userRules string + feedRules string + expected int }{ - {"30d", 30 * 24 * time.Hour, false}, - {"1h", time.Hour, false}, - {"2m", 2 * time.Minute, false}, - {"invalid", 0, true}, - {"5x", 0, true}, // Invalid unit + { + name: "empty rules", + userRules: "", + feedRules: "", + expected: 0, + }, + { + name: "valid user rules only", + userRules: "EntryTitle=test\nEntryAuthor=author", + feedRules: "", + expected: 2, + }, + { + name: "valid feed rules only", + userRules: "", + feedRules: "EntryURL=example\nEntryContent=content", + expected: 2, + }, + { + name: "both user and feed rules", + userRules: "EntryTitle=test\nEntryAuthor=author", + feedRules: "EntryURL=example\nEntryContent=content", + expected: 4, + }, + { + name: "mixed valid and invalid rules", + userRules: "EntryTitle=test\ninvalid_rule\nEntryAuthor=author", + feedRules: "EntryURL=example\nanotherInvalid\nEntryContent=content", + expected: 4, + }, + { + name: "rules with carriage returns", + userRules: "EntryTitle=test\r\nEntryAuthor=author\r\n", + feedRules: "", + expected: 2, + }, + { + name: "rules with extra whitespace", + userRules: " EntryTitle = test \n EntryAuthor = author ", + feedRules: "", + expected: 2, + }, } - for _, test := range tests { - result, err := parseDuration(test.input) - if (err != nil) != test.err { - t.Errorf("parseDuration(%q) error = %v, expected error: %v", test.input, err, test.err) - continue - } - if result != test.expected { - t.Errorf("parseDuration(%q) = %v, expected %v", test.input, result, test.expected) - } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + rules := ParseRules(tt.userRules, tt.feedRules) + if len(rules) != tt.expected { + t.Errorf("ParseRules() returned %d rules, expected %d", len(rules), tt.expected) + } + }) } } -func TestMaxAgeFilter(t *testing.T) { - now := time.Now() - oldEntry := &model.Entry{ - Title: "Old Entry", - Date: now.Add(-48 * time.Hour), // 48 hours ago - } - newEntry := &model.Entry{ - Title: "New Entry", - Date: now.Add(-30 * time.Minute), // 30 minutes ago +// Tests for parseRule function +func TestParseRule(t *testing.T) { + tests := []struct { + name string + rule string + valid bool + expected filterRule + }{ + { + name: "valid rule", + rule: "EntryTitle=test", + valid: true, + expected: filterRule{Type: "EntryTitle", Value: "test"}, + }, + { + name: "rule with extra whitespace", + rule: " EntryTitle = test ", + valid: true, + expected: filterRule{Type: "EntryTitle", Value: "test"}, + }, + { + name: "rule with carriage return", + rule: "EntryTitle=test\r\n", + valid: true, + expected: filterRule{Type: "EntryTitle", Value: "test"}, + }, + { + name: "rule with single carriage return", + rule: "EntryTitle=test\r", + valid: true, + expected: filterRule{Type: "EntryTitle", Value: "test"}, + }, + { + name: "invalid rule - no equals", + rule: "EntryTitle", + valid: false, + }, + { + name: "invalid rule - empty", + rule: "", + valid: false, + }, + { + name: "invalid rule - multiple equals", + rule: "EntryTitle=test=value", + valid: true, + expected: filterRule{Type: "EntryTitle", Value: "test=value"}, + }, + { + name: "rule with equals in value", + rule: "EntryContent=x=y", + valid: true, + expected: filterRule{Type: "EntryContent", Value: "x=y"}, + }, } - // Test blocking old entries - feed := &model.Feed{ID: 1} - user := &model.User{BlockFilterEntryRules: "EntryDate=max-age:1d"} - - // Old entry should be blocked (48 hours > 1 day is true) - if !IsBlockedEntry(feed, oldEntry, user) { - t.Error("Expected old entry to be blocked with max-age:1d") - } - - // New entry should not be blocked - if IsBlockedEntry(feed, newEntry, user) { - t.Error("Expected new entry to not be blocked with max-age:1d") + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + valid, rule := parseRule(tt.rule) + if valid != tt.valid { + t.Errorf("parseRule() validity = %v, expected %v", valid, tt.valid) + } + if valid && (rule.Type != tt.expected.Type || rule.Value != tt.expected.Value) { + t.Errorf("parseRule() = %+v, expected %+v", rule, tt.expected) + } + }) } } +// Tests for IsBlockedEntry function +func TestIsBlockedEntry(t *testing.T) { + entry := createTestEntry() + feed := createTestFeed() + + tests := []struct { + name string + blockRules filterRules + allowRules filterRules + setup func() + expected bool + }{ + { + name: "no rules - not blocked", + blockRules: filterRules{}, + allowRules: filterRules{}, + setup: func() {}, + expected: false, + }, + { + name: "matching block rule", + blockRules: filterRules{{Type: "EntryTitle", Value: "Test"}}, + allowRules: filterRules{}, + setup: func() {}, + expected: true, + }, + { + name: "block rule takes precedence over allow rule", + blockRules: filterRules{{Type: "EntryTitle", Value: "Test"}}, + allowRules: filterRules{{Type: "EntryTitle", Value: "Test"}}, + setup: func() {}, + expected: true, // Block rules are checked first + }, + { + name: "non-matching block rule", + blockRules: filterRules{{Type: "EntryTitle", Value: "NonMatching"}}, + allowRules: filterRules{}, + setup: func() {}, + expected: false, + }, + { + name: "allow rule matches - entry should be allowed", + blockRules: filterRules{}, + allowRules: filterRules{{Type: "EntryTitle", Value: "Test"}}, + setup: func() {}, + expected: false, + }, + { + name: "allow rule exists but doesn't match - entry should be blocked", + blockRules: filterRules{}, + allowRules: filterRules{{Type: "EntryTitle", Value: "NonMatching"}}, + setup: func() {}, + expected: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tt.setup() + result := IsBlockedEntry(tt.blockRules, tt.allowRules, feed, entry) + if result != tt.expected { + t.Errorf("IsBlockedEntry() = %v, expected %v", result, tt.expected) + } + }) + } +} + +func TestAllowRulesExclusiveBehavior(t *testing.T) { + entry := createTestEntry() + feed := createTestFeed() + + tests := []struct { + name string + allowRules filterRules + expected bool + description string + }{ + { + name: "no allow rules - entry should pass", + allowRules: filterRules{}, + expected: false, + description: "When no allow rules exist, entry should not be blocked", + }, + { + name: "allow rule matches - entry should pass", + allowRules: filterRules{{Type: "EntryTitle", Value: "Test"}}, + expected: false, + description: "When allow rules exist and match, entry should not be blocked", + }, + { + name: "allow rule doesn't match - entry should be blocked", + allowRules: filterRules{{Type: "EntryTitle", Value: "NonMatching"}}, + expected: true, + description: "When allow rules exist but don't match, entry should be blocked", + }, + { + name: "multiple allow rules - one matches", + allowRules: filterRules{ + {Type: "EntryTitle", Value: "NonMatching"}, + {Type: "EntryAuthor", Value: "Test"}, + }, + expected: false, + description: "When any allow rule matches, entry should not be blocked", + }, + { + name: "multiple allow rules - none match", + allowRules: filterRules{ + {Type: "EntryTitle", Value: "NonMatching1"}, + {Type: "EntryAuthor", Value: "NonMatching2"}, + }, + expected: true, + description: "When no allow rules match, entry should be blocked", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := IsBlockedEntry(filterRules{}, tt.allowRules, feed, entry) + if result != tt.expected { + t.Errorf("IsBlockedEntry() = %v, expected %v (%s)", result, tt.expected, tt.description) + } + }) + } +} + +func TestAllowRulesWithBlockRulesPrecedence(t *testing.T) { + entry := createTestEntry() + feed := createTestFeed() + + tests := []struct { + name string + blockRules filterRules + allowRules filterRules + expected bool + description string + }{ + { + name: "block rule takes precedence over matching allow rule", + blockRules: filterRules{{Type: "EntryTitle", Value: "Test"}}, + allowRules: filterRules{{Type: "EntryTitle", Value: "Test"}}, + expected: true, + description: "Block rules should always take precedence, even when allow rules would match", + }, + { + name: "block rule takes precedence, allow rule would fail anyway", + blockRules: filterRules{{Type: "EntryTitle", Value: "Test"}}, + allowRules: filterRules{{Type: "EntryTitle", Value: "NonMatching"}}, + expected: true, + description: "Block rules should take precedence regardless of allow rule matching", + }, + { + name: "no block rule, allow rule matches", + blockRules: filterRules{}, + allowRules: filterRules{{Type: "EntryTitle", Value: "Test"}}, + expected: false, + description: "When no block rules match and allow rule matches, entry should pass", + }, + { + name: "non-matching block rule, allow rule doesn't match", + blockRules: filterRules{{Type: "EntryTitle", Value: "NonMatching"}}, + allowRules: filterRules{{Type: "EntryTitle", Value: "NonMatching"}}, + expected: true, + description: "When block rules don't match but allow rules also don't match, entry should be blocked", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := IsBlockedEntry(tt.blockRules, tt.allowRules, feed, entry) + if result != tt.expected { + t.Errorf("IsBlockedEntry() = %v, expected %v (%s)", result, tt.expected, tt.description) + } + }) + } +} + +func TestKeeplistRulesBehavior(t *testing.T) { + entry := createTestEntry() + feed := createTestFeed() + + tests := []struct { + name string + keeplistRule string + expected bool + description string + }{ + { + name: "no keeplist rules - entry should pass", + keeplistRule: "", + expected: false, + description: "When no keeplist rules exist, entry should not be blocked", + }, + { + name: "keeplist rule matches title - entry should pass", + keeplistRule: "Test.*Title", + expected: false, + description: "When keeplist rule matches entry title, entry should not be blocked", + }, + { + name: "keeplist rule matches URL - entry should pass", + keeplistRule: "example\\.com", + expected: false, + description: "When keeplist rule matches entry URL, entry should not be blocked", + }, + { + name: "keeplist rule matches author - entry should pass", + keeplistRule: "Test.*Author", + expected: false, + description: "When keeplist rule matches entry author, entry should not be blocked", + }, + { + name: "keeplist rule matches tag - entry should pass", + keeplistRule: "golang", + expected: false, + description: "When keeplist rule matches entry tag, entry should not be blocked", + }, + { + name: "keeplist rule doesn't match - entry should be blocked", + keeplistRule: "NonMatchingPattern", + expected: true, + description: "When keeplist rule doesn't match any entry field, entry should be blocked", + }, + { + name: "invalid keeplist regex - entry should pass", + keeplistRule: "[invalid", + expected: false, + description: "When keeplist rule is invalid regex, entry should not be blocked (rule is ignored)", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + feed.KeeplistRules = tt.keeplistRule + feed.BlocklistRules = "" // Ensure no blocklist interference + result := IsBlockedEntry(filterRules{}, filterRules{}, feed, entry) + if result != tt.expected { + t.Errorf("IsBlockedEntry() with keeplist '%s' = %v, expected %v (%s)", + tt.keeplistRule, result, tt.expected, tt.description) + } + }) + } +} + +// Tests for isBlockedGlobally function func TestIsBlockedGlobally(t *testing.T) { var err error config.Opts, err = config.NewParser().ParseEnvironmentVariables() @@ -188,7 +419,10 @@ func TestIsBlockedGlobally(t *testing.T) { t.Fatalf(`Parsing failure: %v`, err) } - if isBlockedGlobally(&model.Entry{Title: "Test Entry", Date: time.Date(2020, 5, 1, 05, 05, 05, 05, time.UTC)}) { + testEntry := createTestEntry() + testEntry.Date = time.Date(2020, 5, 1, 05, 05, 05, 05, time.UTC) + + if IsBlockedEntry(nil, nil, createTestFeed(), testEntry) { t.Error("Expected no entries to be blocked globally when max-age is not set") } @@ -200,46 +434,1021 @@ func TestIsBlockedGlobally(t *testing.T) { t.Fatalf(`Parsing failure: %v`, err) } - if !isBlockedGlobally(&model.Entry{Title: "Test Entry", Date: time.Date(2020, 5, 1, 05, 05, 05, 05, time.UTC)}) { + if !IsBlockedEntry(nil, nil, createTestFeed(), testEntry) { t.Error("Expected entries to be blocked globally when max-age is set") } - if isBlockedGlobally(&model.Entry{Title: "Test Entry", Date: time.Now().Add(-2 * time.Hour)}) { + testEntry.Date = time.Now().Add(-2 * 24 * time.Hour) + if isBlockedGlobally(testEntry) { t.Error("Expected entries not to be blocked globally when they are within the max-age limit") } } -func TestIsBlockedEntryWithGlobalMaxAge(t *testing.T) { - os.Setenv("FILTER_ENTRY_MAX_AGE_DAYS", "30") - defer os.Clearenv() +// Tests for matchesEntryRegexRules function +func TestMatchesEntryRegexRules(t *testing.T) { + entry := createTestEntry() + feed := createTestFeed() - var err error - config.Opts, err = config.NewParser().ParseEnvironmentVariables() - if err != nil { - t.Fatalf(`Parsing failure: %v`, err) + tests := []struct { + name string + regexPattern string + expectedMatch bool + expectedValid bool + description string + }{ + { + name: "empty pattern", + regexPattern: "", + expectedMatch: false, + expectedValid: true, + description: "Empty pattern should be valid but not match", + }, + { + name: "invalid regex", + regexPattern: "[", + expectedMatch: false, + expectedValid: false, + description: "Invalid regex should return false for both match and validity", + }, + { + name: "matches title", + regexPattern: "Test.*Title", + expectedMatch: true, + expectedValid: true, + description: "Valid regex matching title should return true for both", + }, + { + name: "matches URL", + regexPattern: "example\\.com", + expectedMatch: true, + expectedValid: true, + description: "Valid regex matching URL should return true for both", + }, + { + name: "matches author", + regexPattern: "Test.*Author", + expectedMatch: true, + expectedValid: true, + description: "Valid regex matching author should return true for both", + }, + { + name: "matches tag", + regexPattern: "golang", + expectedMatch: true, + expectedValid: true, + description: "Valid regex matching tag should return true for both", + }, + { + name: "no match but valid regex", + regexPattern: "nomatch", + expectedMatch: false, + expectedValid: true, + description: "Valid regex with no match should return false for match, true for validity", + }, + { + name: "invalid regex - unclosed parenthesis", + regexPattern: "(unclosed", + expectedMatch: false, + expectedValid: false, + description: "Invalid regex with unclosed parenthesis should return false for both", + }, + { + name: "invalid regex - invalid quantifier", + regexPattern: "*invalid", + expectedMatch: false, + expectedValid: false, + description: "Invalid regex with wrong quantifier should return false for both", + }, } - entry := &model.Entry{Title: "Test Entry", Date: time.Now().Add(-31 * 24 * time.Hour)} // 31 days old - feed := &model.Feed{ID: 1} - user := &model.User{} - - if !IsBlockedEntry(feed, entry, user) { - t.Error("Expected entry to be blocked due to global max-age rule") + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + match, valid := matchesEntryRegexRules(tt.regexPattern, feed, entry) + if match != tt.expectedMatch { + t.Errorf("matchesEntryRegexRules() match = %v, expected %v (%s)", match, tt.expectedMatch, tt.description) + } + if valid != tt.expectedValid { + t.Errorf("matchesEntryRegexRules() valid = %v, expected %v (%s)", valid, tt.expectedValid, tt.description) + } + }) } } -func TestIsBlockedEntryWithDefaultGlobalMaxAge(t *testing.T) { - var err error - config.Opts, err = config.NewParser().ParseEnvironmentVariables() - if err != nil { - t.Fatalf(`Parsing failure: %v`, err) +// Tests for matchesEntryFilterRules function +func TestMatchesEntryFilterRules(t *testing.T) { + entry := createTestEntry() + feed := createTestFeed() + + tests := []struct { + name string + rules filterRules + expected bool + }{ + { + name: "empty rules", + rules: filterRules{}, + expected: false, + }, + { + name: "matching rule", + rules: filterRules{ + {Type: "EntryTitle", Value: "Test"}, + }, + expected: true, + }, + { + name: "non-matching rule", + rules: filterRules{ + {Type: "EntryTitle", Value: "NonMatching"}, + }, + expected: false, + }, + { + name: "multiple rules - one matches", + rules: filterRules{ + {Type: "EntryTitle", Value: "NonMatching"}, + {Type: "EntryAuthor", Value: "Test"}, + }, + expected: true, + }, } - entry := &model.Entry{Title: "Test Entry", Date: time.Now().Add(-31 * 24 * time.Hour)} // 31 days old - feed := &model.Feed{ID: 1} - user := &model.User{} - - if IsBlockedEntry(feed, entry, user) { - t.Error("Expected entry not to be blocked due to default global max-age rule") + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := matchesEntryFilterRules(tt.rules, feed, entry) + if result != tt.expected { + t.Errorf("matchesEntryFilterRules() = %v, expected %v", result, tt.expected) + } + }) + } +} + +// Tests for matchesRule function +func TestMatchesRule(t *testing.T) { + entry := createTestEntry() + futureEntry := createTestEntry() + futureEntry.Date = time.Now().Add(time.Hour) + + tests := []struct { + name string + rule filterRule + entry *model.Entry + expected bool + }{ + { + name: "EntryTitle match", + rule: filterRule{Type: "EntryTitle", Value: "Test"}, + entry: entry, + expected: true, + }, + { + name: "EntryTitle no match", + rule: filterRule{Type: "EntryTitle", Value: "NoMatch"}, + entry: entry, + expected: false, + }, + { + name: "EntryURL match", + rule: filterRule{Type: "EntryURL", Value: "example\\.com"}, + entry: entry, + expected: true, + }, + { + name: "EntryURL no match", + rule: filterRule{Type: "EntryURL", Value: "nomatch\\.com"}, + entry: entry, + expected: false, + }, + { + name: "EntryCommentsURL match", + rule: filterRule{Type: "EntryCommentsURL", Value: "comments"}, + entry: entry, + expected: true, + }, + { + name: "EntryContent match", + rule: filterRule{Type: "EntryContent", Value: "test.*content"}, + entry: entry, + expected: true, + }, + { + name: "EntryAuthor match", + rule: filterRule{Type: "EntryAuthor", Value: "Test.*Author"}, + entry: entry, + expected: true, + }, + { + name: "EntryTag match", + rule: filterRule{Type: "EntryTag", Value: "golang"}, + entry: entry, + expected: true, + }, + { + name: "EntryTag no match", + rule: filterRule{Type: "EntryTag", Value: "python"}, + entry: entry, + expected: false, + }, + { + name: "EntryDate future", + rule: filterRule{Type: "EntryDate", Value: "future"}, + entry: futureEntry, + expected: true, + }, + { + name: "EntryDate not future", + rule: filterRule{Type: "EntryDate", Value: "future"}, + entry: entry, + expected: false, + }, + { + name: "unknown rule type", + rule: filterRule{Type: "UnknownType", Value: "test"}, + entry: entry, + expected: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := matchesRule(tt.rule, tt.entry) + if result != tt.expected { + t.Errorf("matchesRule() = %v, expected %v", result, tt.expected) + } + }) + } +} + +// Tests for isDateMatchingPattern function +func TestIsDateMatchingPattern(t *testing.T) { + now := time.Now() + testDate := time.Date(2023, 6, 15, 12, 0, 0, 0, time.UTC) + + tests := []struct { + name string + pattern string + entryDate time.Time + expected bool + }{ + { + name: "future - positive case", + pattern: "future", + entryDate: now.Add(time.Hour), + expected: true, + }, + { + name: "future - negative case", + pattern: "future", + entryDate: now.Add(-time.Hour), + expected: false, + }, + { + name: "before - positive case", + pattern: "before:2023-07-01", + entryDate: testDate, + expected: true, + }, + { + name: "before - negative case", + pattern: "before:2023-06-01", + entryDate: testDate, + expected: false, + }, + { + name: "before - invalid date", + pattern: "before:invalid-date", + entryDate: testDate, + expected: false, + }, + { + name: "after - positive case", + pattern: "after:2023-06-01", + entryDate: testDate, + expected: true, + }, + { + name: "after - negative case", + pattern: "after:2023-07-01", + entryDate: testDate, + expected: false, + }, + { + name: "after - invalid date", + pattern: "after:invalid-date", + entryDate: testDate, + expected: false, + }, + { + name: "between - positive case", + pattern: "between:2023-06-01,2023-07-01", + entryDate: testDate, + expected: true, + }, + { + name: "between - negative case", + pattern: "between:2023-07-01,2023-08-01", + entryDate: testDate, + expected: false, + }, + { + name: "between - invalid format", + pattern: "between:2023-06-01", + entryDate: testDate, + expected: false, + }, + { + name: "between - invalid start date", + pattern: "between:invalid,2023-07-01", + entryDate: testDate, + expected: false, + }, + { + name: "between - invalid end date", + pattern: "between:2023-06-01,invalid", + entryDate: testDate, + expected: false, + }, + { + name: "max-age - positive case", + pattern: "max-age:1d", + entryDate: now.Add(-2 * 24 * time.Hour), + expected: true, + }, + { + name: "max-age - negative case", + pattern: "max-age:3d", + entryDate: now.Add(-2 * 24 * time.Hour), + expected: false, + }, + { + name: "max-age - invalid duration", + pattern: "max-age:invalid", + entryDate: testDate, + expected: false, + }, + { + name: "invalid pattern format", + pattern: "invalid-pattern", + entryDate: testDate, + expected: false, + }, + { + name: "unknown rule type", + pattern: "unknown:value", + entryDate: testDate, + expected: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := isDateMatchingPattern(tt.pattern, tt.entryDate) + if result != tt.expected { + t.Errorf("isDateMatchingPattern() = %v, expected %v", result, tt.expected) + } + }) + } +} + +// Tests for containsRegexPattern function +func TestContainsRegexPattern(t *testing.T) { + tests := []struct { + name string + pattern string + items []string + expected bool + }{ + { + name: "match found", + pattern: "go.*", + items: []string{"golang", "python", "javascript"}, + expected: true, + }, + { + name: "no match", + pattern: "rust", + items: []string{"golang", "python", "javascript"}, + expected: false, + }, + { + name: "empty items", + pattern: "test", + items: []string{}, + expected: false, + }, + { + name: "invalid regex", + pattern: "[", + items: []string{"test"}, + expected: false, + }, + { + name: "case sensitive match", + pattern: "Go", + items: []string{"golang", "python"}, + expected: false, + }, + { + name: "exact match", + pattern: "^golang$", + items: []string{"golang", "go"}, + expected: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := containsRegexPattern(tt.pattern, tt.items) + if result != tt.expected { + t.Errorf("containsRegexPattern() = %v, expected %v", result, tt.expected) + } + }) + } +} + +// Tests for parseDuration function +func TestParseDuration(t *testing.T) { + tests := []struct { + name string + duration string + expected time.Duration + expectError bool + }{ + { + name: "days - single digit", + duration: "1d", + expected: 24 * time.Hour, + expectError: false, + }, + { + name: "days - multiple digits", + duration: "30d", + expected: 30 * 24 * time.Hour, + expectError: false, + }, + { + name: "days - zero", + duration: "0d", + expected: 0, + expectError: false, + }, + { + name: "days - empty number", + duration: "d", + expected: 0, + expectError: false, + }, + { + name: "days - invalid number", + duration: "invalid_d", + expected: 0, + expectError: true, + }, + { + name: "hours", + duration: "24h", + expected: 24 * time.Hour, + expectError: false, + }, + { + name: "minutes", + duration: "60m", + expected: 60 * time.Minute, + expectError: false, + }, + { + name: "seconds", + duration: "30s", + expected: 30 * time.Second, + expectError: false, + }, + { + name: "milliseconds", + duration: "500ms", + expected: 500 * time.Millisecond, + expectError: false, + }, + { + name: "microseconds", + duration: "1000us", + expected: 1000 * time.Microsecond, + expectError: false, + }, + { + name: "nanoseconds", + duration: "1000ns", + expected: 1000 * time.Nanosecond, + expectError: false, + }, + { + name: "invalid duration", + duration: "invalid", + expected: 0, + expectError: true, + }, + { + name: "empty string", + duration: "", + expected: 0, + expectError: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result, err := parseDuration(tt.duration) + if tt.expectError && err == nil { + t.Errorf("parseDuration() expected error but got none") + } + if !tt.expectError && err != nil { + t.Errorf("parseDuration() unexpected error: %v", err) + } + if !tt.expectError && result != tt.expected { + t.Errorf("parseDuration() = %v, expected %v", result, tt.expected) + } + }) + } +} + +// Additional edge case tests +func TestParseRulesEdgeCases(t *testing.T) { + tests := []struct { + name string + userRules string + feedRules string + expected int + }{ + { + name: "rules with only newlines", + userRules: "\n\n\n", + feedRules: "\n\n", + expected: 0, + }, + { + name: "rules with only whitespace", + userRules: " \n \t \n", + feedRules: "", + expected: 0, + }, + { + name: "rules with equals but empty value", + userRules: "EntryTitle=", + feedRules: "", + expected: 1, + }, + { + name: "rules with equals but empty key", + userRules: "=value", + feedRules: "", + expected: 1, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + rules := ParseRules(tt.userRules, tt.feedRules) + if len(rules) != tt.expected { + t.Errorf("ParseRules() returned %d rules, expected %d", len(rules), tt.expected) + } + }) + } +} + +func TestIsBlockedEntryWithRegexRules(t *testing.T) { + entry := createTestEntry() + feed := createTestFeed() + + // Test with blocklist regex rules + feed.BlocklistRules = "Test.*Title" + result := IsBlockedEntry(filterRules{}, filterRules{}, feed, entry) + if !result { + t.Errorf("IsBlockedEntry() should block entry matching blocklist regex") + } + + // Test with both blocklist and keeplist regex rules - blocklist takes precedence + feed.KeeplistRules = "Test.*Title" + result = IsBlockedEntry(filterRules{}, filterRules{}, feed, entry) + if !result { + t.Errorf("IsBlockedEntry() should block entry when both blocklist and keeplist match (blocklist takes precedence)") + } + + // Reset blocklist and test with keeplist only + feed.BlocklistRules = "" + feed.KeeplistRules = "Test.*Title" + result = IsBlockedEntry(filterRules{}, filterRules{}, feed, entry) + if result { + t.Errorf("IsBlockedEntry() should not block entry matching keeplist only") + } + + // Test with keeplist that doesn't match - should block + feed.KeeplistRules = "NonMatchingPattern" + result = IsBlockedEntry(filterRules{}, filterRules{}, feed, entry) + if !result { + t.Errorf("IsBlockedEntry() should block entry when keeplist doesn't match") + } +} + +func TestMatchesRuleWithInvalidRegex(t *testing.T) { + entry := createTestEntry() + + // Test invalid regex patterns + rule := filterRule{Type: "EntryTitle", Value: "["} + result := matchesRule(rule, entry) + if result { + t.Errorf("matchesRule() should return false for invalid regex") + } +} + +func TestIsDateMatchingPatternEdgeCases(t *testing.T) { + testDate := time.Date(2023, 6, 15, 12, 0, 0, 0, time.UTC) + + // Test edge case: between with boundary dates + result := isDateMatchingPattern("between:2023-06-15,2023-06-15", testDate) + if result { + t.Errorf("isDateMatchingPattern() should return false for date exactly on boundaries") + } + + // Test edge case: max-age with hours + now := time.Now() + oldEntry := now.Add(-25 * time.Hour) + result = isDateMatchingPattern("max-age:24h", oldEntry) + if !result { + t.Errorf("isDateMatchingPattern() should match old entry with max-age in hours") + } +} + +// Additional comprehensive edge case tests +func TestComplexFilterScenarios(t *testing.T) { + entry := createTestEntry() + feed := createTestFeed() + + // Test complex scenario: block filter rules + blocklist regex + allow filter rules + keeplist regex + blockRules := filterRules{{Type: "EntryAuthor", Value: "Test.*Author"}} + allowRules := filterRules{{Type: "EntryTitle", Value: "Test.*Title"}} + feed.BlocklistRules = "golang" + feed.KeeplistRules = "testing" + + // Block filter rules should take precedence + result := IsBlockedEntry(blockRules, allowRules, feed, entry) + if !result { + t.Errorf("Complex scenario: block filter rules should take precedence") + } + + // Remove block filter rules, now blocklist regex should block + result = IsBlockedEntry(filterRules{}, allowRules, feed, entry) + if !result { + t.Errorf("Complex scenario: blocklist regex should block when no filter block rules") + } + + // Remove blocklist regex, allow filter rules should allow (since they match) + feed.BlocklistRules = "" + result = IsBlockedEntry(filterRules{}, allowRules, feed, entry) + if result { + t.Errorf("Complex scenario: allow filter rules should not block when they match") + } + + // Change allow filter rules to non-matching, should block + allowRules = filterRules{{Type: "EntryTitle", Value: "NonMatching"}} + result = IsBlockedEntry(filterRules{}, allowRules, feed, entry) + if !result { + t.Errorf("Complex scenario: non-matching allow filter rules should block") + } + + // Remove allow filter rules, keeplist regex should allow + result = IsBlockedEntry(filterRules{}, filterRules{}, feed, entry) + if result { + t.Errorf("Complex scenario: keeplist regex should not block when it matches") + } +} + +func TestFilterRulesWithSpecialCharacters(t *testing.T) { + entry := &model.Entry{ + Title: "Test [Special] (Characters) & Symbols!", + URL: "https://example.com/test?param=value&other=123", + Content: "Content with tags and $pecial characters", + Author: "Author@domain.com", + Tags: []string{"c++", "c#", ".net"}, + } + + tests := []struct { + name string + rule filterRule + expected bool + }{ + { + name: "brackets in title", + rule: filterRule{Type: "EntryTitle", Value: "\\[Special\\]"}, + expected: true, + }, + { + name: "parentheses in title", + rule: filterRule{Type: "EntryTitle", Value: "\\(Characters\\)"}, + expected: true, + }, + { + name: "URL with query parameters", + rule: filterRule{Type: "EntryURL", Value: "param=value"}, + expected: true, + }, + { + name: "HTML tags in content", + rule: filterRule{Type: "EntryContent", Value: ""}, + expected: true, + }, + { + name: "email pattern in author", + rule: filterRule{Type: "EntryAuthor", Value: "@domain\\.com"}, + expected: true, + }, + { + name: "programming language tags", + rule: filterRule{Type: "EntryTag", Value: "c\\+\\+"}, + expected: true, + }, + { + name: "tags with special chars", + rule: filterRule{Type: "EntryTag", Value: "c#"}, + expected: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := matchesRule(tt.rule, entry) + if result != tt.expected { + t.Errorf("matchesRule() with special characters = %v, expected %v", result, tt.expected) + } + }) + } +} + +func TestEntryWithEmptyFields(t *testing.T) { + entry := &model.Entry{ + Title: "", + URL: "", + CommentsURL: "", + Content: "", + Author: "", + Tags: []string{}, + Date: time.Time{}, // Zero time + } + + tests := []struct { + name string + rule filterRule + expected bool + }{ + { + name: "empty title", + rule: filterRule{Type: "EntryTitle", Value: ".*"}, + expected: true, // Empty string matches .* + }, + { + name: "empty title specific match", + rule: filterRule{Type: "EntryTitle", Value: "^$"}, + expected: true, // Empty string matches ^$ + }, + { + name: "empty URL", + rule: filterRule{Type: "EntryURL", Value: "^$"}, + expected: true, + }, + { + name: "empty tags", + rule: filterRule{Type: "EntryTag", Value: "anything"}, + expected: false, // No tags to match + }, + { + name: "zero time as future", + rule: filterRule{Type: "EntryDate", Value: "future"}, + expected: false, // Zero time is not in future + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := matchesRule(tt.rule, entry) + if result != tt.expected { + t.Errorf("matchesRule() with empty fields = %v, expected %v", result, tt.expected) + } + }) + } +} + +func TestBoundaryConditionsForDates(t *testing.T) { + // Test dates at exact boundaries + exactDate := time.Date(2023, 6, 15, 0, 0, 0, 0, time.UTC) + + tests := []struct { + name string + pattern string + entryDate time.Time + expected bool + }{ + { + name: "exact boundary - before same date", + pattern: "before:2023-06-15", + entryDate: exactDate, + expected: false, + }, + { + name: "exact boundary - after same date", + pattern: "after:2023-06-15", + entryDate: exactDate, + expected: false, + }, + { + name: "one second before boundary", + pattern: "before:2023-06-15", + entryDate: exactDate.Add(-time.Second), + expected: true, + }, + { + name: "one second after boundary", + pattern: "after:2023-06-15", + entryDate: exactDate.Add(time.Second), + expected: true, + }, + { + name: "between same dates", + pattern: "between:2023-06-15,2023-06-15", + entryDate: exactDate, + expected: false, // Entry is not between identical dates + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := isDateMatchingPattern(tt.pattern, tt.entryDate) + if result != tt.expected { + t.Errorf("isDateMatchingPattern() boundary test = %v, expected %v", result, tt.expected) + } + }) + } +} + +func TestRegexErrorHandling(t *testing.T) { + entry := createTestEntry() + feed := createTestFeed() + + // Test invalid regex in various contexts + tests := []struct { + name string + regexPattern string + expected bool + }{ + { + name: "invalid regex - unclosed bracket", + regexPattern: "[abc", + expected: false, + }, + { + name: "invalid regex - unclosed parenthesis", + regexPattern: "(abc", + expected: false, + }, + { + name: "invalid regex - invalid quantifier", + regexPattern: "*abc", + expected: false, + }, + { + name: "valid complex regex", + regexPattern: "^Test.*Entry.*Title$", + expected: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result, _ := matchesEntryRegexRules(tt.regexPattern, feed, entry) + if result != tt.expected { + t.Errorf("matchesEntryRegexRules() with invalid regex = %v, expected %v", result, tt.expected) + } + }) + } +} + +func TestParseDurationWithVariousFormats(t *testing.T) { + tests := []struct { + name string + duration string + expected time.Duration + expectError bool + }{ + // Additional duration format tests + { + name: "complex duration - hours and minutes", + duration: "1h30m", + expected: time.Hour + 30*time.Minute, + expectError: false, + }, + { + name: "complex duration - minutes and seconds", + duration: "30m45s", + expected: 30*time.Minute + 45*time.Second, + expectError: false, + }, + { + name: "fractional hours", + duration: "1.5h", + expected: time.Hour + 30*time.Minute, + expectError: false, + }, + { + name: "negative duration", + duration: "-1h", + expected: -time.Hour, + expectError: false, + }, + { + name: "zero duration", + duration: "0", + expected: 0, + expectError: false, + }, + { + name: "large number of days", + duration: "999d", + expected: 999 * 24 * time.Hour, + expectError: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result, err := parseDuration(tt.duration) + if tt.expectError && err == nil { + t.Errorf("parseDuration() expected error but got none") + } + if !tt.expectError && err != nil { + t.Errorf("parseDuration() unexpected error: %v", err) + } + if !tt.expectError && result != tt.expected { + t.Errorf("parseDuration() = %v, expected %v", result, tt.expected) + } + }) + } +} + +// Benchmark tests for performance +func BenchmarkParseRules(b *testing.B) { + userRules := `EntryTitle=test1 +EntryAuthor=author1 +EntryURL=example1 +EntryContent=content1 +EntryTag=tag1` + feedRules := `EntryTitle=test2 +EntryAuthor=author2 +EntryURL=example2 +EntryContent=content2 +EntryTag=tag2` + + b.ResetTimer() + for b.Loop() { + ParseRules(userRules, feedRules) + } +} + +func BenchmarkIsBlockedEntry(b *testing.B) { + entry := createTestEntry() + feed := createTestFeed() + blockRules := filterRules{ + {Type: "EntryTitle", Value: "test"}, + {Type: "EntryAuthor", Value: "author"}, + {Type: "EntryURL", Value: "example"}, + } + allowRules := filterRules{ + {Type: "EntryContent", Value: "content"}, + {Type: "EntryTag", Value: "tag"}, + } + + for b.Loop() { + IsBlockedEntry(blockRules, allowRules, feed, entry) + } +} + +func BenchmarkMatchesEntryRegexRules(b *testing.B) { + entry := createTestEntry() + feed := createTestFeed() + regexPattern := "Test.*Title|example\\.com|Test.*Author|golang" + + for b.Loop() { + matchesEntryRegexRules(regexPattern, feed, entry) + } +} + +func BenchmarkIsDateMatchingPattern(b *testing.B) { + entryDate := time.Now().Add(-2 * 24 * time.Hour) + pattern := "max-age:1d" + + for b.Loop() { + isDateMatchingPattern(pattern, entryDate) + } +} + +func BenchmarkParseDuration(b *testing.B) { + for b.Loop() { + parseDuration("30d") } } diff --git a/internal/reader/processor/processor.go b/internal/reader/processor/processor.go index 966ea972..000ebd12 100644 --- a/internal/reader/processor/processor.go +++ b/internal/reader/processor/processor.go @@ -37,7 +37,20 @@ func ProcessFeedEntries(store *storage.Storage, feed *model.Feed, userID int64, parsedFeedURL, _ := url.Parse(feed.FeedURL) parsedSiteURL, _ := url.Parse(feed.SiteURL) - // Process older entries first + blockRules := filter.ParseRules(user.BlockFilterEntryRules, feed.BlockFilterEntryRules) + allowRules := filter.ParseRules(user.KeepFilterEntryRules, feed.KeepFilterEntryRules) + slog.Debug("Filter rules", + slog.String("user_block_filter_rules", user.BlockFilterEntryRules), + slog.String("feed_block_filter_rules", feed.BlockFilterEntryRules), + slog.String("user_keep_filter_rules", user.KeepFilterEntryRules), + slog.String("feed_keep_filter_rules", feed.KeepFilterEntryRules), + slog.Any("block_rules", blockRules), + slog.Any("allow_rules", allowRules), + slog.Int64("user_id", user.ID), + slog.Int64("feed_id", feed.ID), + ) + + // Processing older entries first ensures that their creation timestamp is lower than newer entries. for _, entry := range slices.Backward(feed.Entries) { slog.Debug("Processing entry", slog.Int64("user_id", user.ID), @@ -48,7 +61,15 @@ func ProcessFeedEntries(store *storage.Storage, feed *model.Feed, userID int64, slog.String("feed_url", feed.FeedURL), ) - if filter.IsBlockedEntry(feed, entry, user) || !filter.IsAllowedEntry(feed, entry, user) { + if filter.IsBlockedEntry(blockRules, allowRules, feed, entry) { + slog.Debug("Entry is blocked by filter rules", + slog.Int64("user_id", user.ID), + slog.String("entry_url", entry.URL), + slog.String("entry_hash", entry.Hash), + slog.String("entry_title", entry.Title), + slog.Int64("feed_id", feed.ID), + slog.String("feed_url", feed.FeedURL), + ) continue }