From fe4b00b9f8657c610ca750b0a1709dae6b1ca811 Mon Sep 17 00:00:00 2001 From: jvoisin Date: Wed, 18 Jun 2025 22:27:18 +0200 Subject: [PATCH] refactor(processor): extract some functions into an utils.go file --- internal/reader/processor/processor.go | 23 +----- internal/reader/processor/processor_test.go | 2 +- internal/reader/processor/utils.go | 82 +++++++++++++++++++++ internal/reader/processor/youtube.go | 37 ---------- 4 files changed, 85 insertions(+), 59 deletions(-) create mode 100644 internal/reader/processor/utils.go diff --git a/internal/reader/processor/processor.go b/internal/reader/processor/processor.go index af98fd14..5b2e7c86 100644 --- a/internal/reader/processor/processor.go +++ b/internal/reader/processor/processor.go @@ -10,9 +10,6 @@ import ( "slices" "time" - "github.com/tdewolff/minify/v2" - "github.com/tdewolff/minify/v2/html" - "miniflux.app/v2/internal/config" "miniflux.app/v2/internal/metric" "miniflux.app/v2/internal/model" @@ -117,7 +114,7 @@ func ProcessFeedEntries(store *storage.Storage, feed *model.Feed, userID int64, ) } else if extractedContent != "" { // We replace the entry content only if the scraper doesn't return any error. - entry.Content = minifyEntryContent(extractedContent) + entry.Content = minifyContent(extractedContent) } } @@ -177,7 +174,7 @@ func ProcessEntryWebPage(feed *model.Feed, entry *model.Entry, user *model.User) } if extractedContent != "" { - entry.Content = minifyEntryContent(extractedContent) + entry.Content = minifyContent(extractedContent) if user.ShowReadingTime { entry.ReadingTime = readingtime.EstimateReadingTime(entry.Content, user.DefaultReadingSpeed, user.CJKReadingSpeed) } @@ -230,19 +227,3 @@ func isRecentEntry(entry *model.Entry) bool { } return false } - -func minifyEntryContent(entryContent string) string { - m := minify.New() - - // Options required to avoid breaking the HTML content. - m.Add("text/html", &html.Minifier{ - KeepEndTags: true, - KeepQuotes: true, - }) - - if minifiedHTML, err := m.String("text/html", entryContent); err == nil { - entryContent = minifiedHTML - } - - return entryContent -} diff --git a/internal/reader/processor/processor_test.go b/internal/reader/processor/processor_test.go index 9e228366..2d5ac837 100644 --- a/internal/reader/processor/processor_test.go +++ b/internal/reader/processor/processor_test.go @@ -118,7 +118,7 @@ func TestIsRecentEntry(t *testing.T) { func TestMinifyEntryContent(t *testing.T) { input := `

Some text with a link

` expected := `

Some text with a link

` - result := minifyEntryContent(input) + result := minifyContent(input) if expected != result { t.Errorf(`Unexpected result, got %q`, result) } diff --git a/internal/reader/processor/utils.go b/internal/reader/processor/utils.go new file mode 100644 index 00000000..660f116e --- /dev/null +++ b/internal/reader/processor/utils.go @@ -0,0 +1,82 @@ +// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +package processor // import "miniflux.app/v2/internal/reader/processor" + +import ( + "errors" + "fmt" + "regexp" + "strconv" + "time" + + "github.com/tdewolff/minify/v2" + "github.com/tdewolff/minify/v2/html" +) + +// TODO: use something less horrible than a regex to parse ISO 8601 durations. + +var ( + iso8601Regex = regexp.MustCompile(`^P((?P\d+)Y)?((?P\d+)M)?((?P\d+)W)?((?P\d+)D)?(T((?P\d+)H)?((?P\d+)M)?((?P\d+)S)?)?$`) +) + +func parseISO8601(from string) (time.Duration, error) { + var match []string + var d time.Duration + + if iso8601Regex.MatchString(from) { + match = iso8601Regex.FindStringSubmatch(from) + } else { + return 0, errors.New("youtube: could not parse duration string") + } + + for i, name := range iso8601Regex.SubexpNames() { + part := match[i] + if i == 0 || name == "" || part == "" { + continue + } + + val, err := strconv.ParseInt(part, 10, 64) + if err != nil { + return 0, err + } + + switch name { + case "hour": + d += time.Duration(val) * time.Hour + case "minute": + d += time.Duration(val) * time.Minute + case "second": + d += time.Duration(val) * time.Second + default: + return 0, fmt.Errorf("youtube: unknown field %s", name) + } + } + + return d, nil +} + +func minifyContent(content string) string { + m := minify.New() + + // Options required to avoid breaking the HTML content. + m.Add("text/html", &html.Minifier{ + KeepEndTags: true, + KeepQuotes: true, + }) + + if minifiedHTML, err := m.String("text/html", content); err == nil { + content = minifiedHTML + } + + return content +} + +func containsRegexPattern(pattern string, entries []string) bool { + for _, entry := range entries { + if matched, _ := regexp.MatchString(pattern, entry); matched { + return true + } + } + return false +} diff --git a/internal/reader/processor/youtube.go b/internal/reader/processor/youtube.go index 72c9ce15..6f9af960 100644 --- a/internal/reader/processor/youtube.go +++ b/internal/reader/processor/youtube.go @@ -24,7 +24,6 @@ import ( var ( youtubeRegex = regexp.MustCompile(`youtube\.com/watch\?v=(.*)$`) - iso8601Regex = regexp.MustCompile(`^P((?P\d+)Y)?((?P\d+)M)?((?P\d+)W)?((?P\d+)D)?(T((?P\d+)H)?((?P\d+)M)?((?P\d+)S)?)?$`) ) func isYouTubeVideoURL(websiteURL string) bool { @@ -160,42 +159,6 @@ func fetchYouTubeWatchTimeFromApiInBulk(videoIDs []string) (map[string]time.Dura return watchTimeMap, nil } -func parseISO8601(from string) (time.Duration, error) { - var match []string - var d time.Duration - - if iso8601Regex.MatchString(from) { - match = iso8601Regex.FindStringSubmatch(from) - } else { - return 0, errors.New("youtube: could not parse duration string") - } - - for i, name := range iso8601Regex.SubexpNames() { - part := match[i] - if i == 0 || name == "" || part == "" { - continue - } - - val, err := strconv.ParseInt(part, 10, 64) - if err != nil { - return 0, err - } - - switch name { - case "hour": - d += time.Duration(val) * time.Hour - case "minute": - d += time.Duration(val) * time.Minute - case "second": - d += time.Duration(val) * time.Second - default: - return 0, fmt.Errorf("youtube: unknown field %s", name) - } - } - - return d, nil -} - type youtubeVideoListResponse struct { Items []struct { ID string `json:"id"`