refactor(processor): extract some functions into an utils.go file

2025-09-30 19:22:11 +00:00 · 2025-06-18 22:27:18 +02:00 · 2025-06-18 22:27:18 +02:00 · fe4b00b9f8
commit fe4b00b9f8
parent 46b159ac58
4 changed files with 85 additions and 59 deletions
--- a/internal/reader/processor/processor.go
+++ b/internal/reader/processor/processor.go
@ -10,9 +10,6 @@ import (
 	"slices"
 	"time"
 	"github.com/tdewolff/minify/v2"
 	"github.com/tdewolff/minify/v2/html"
 	"miniflux.app/v2/internal/config"
 	"miniflux.app/v2/internal/metric"
 	"miniflux.app/v2/internal/model"
@ -117,7 +114,7 @@ func ProcessFeedEntries(store *storage.Storage, feed *model.Feed, userID int64,
 				)
 			} else if extractedContent != "" {
 				// We replace the entry content only if the scraper doesn't return any error.
-				entry.Content = minifyEntryContent(extractedContent)
+				entry.Content = minifyContent(extractedContent)
 			}
 		}
@ -177,7 +174,7 @@ func ProcessEntryWebPage(feed *model.Feed, entry *model.Entry, user *model.User)
 	}
 	if extractedContent != "" {
-		entry.Content = minifyEntryContent(extractedContent)
+		entry.Content = minifyContent(extractedContent)
 		if user.ShowReadingTime {
 			entry.ReadingTime = readingtime.EstimateReadingTime(entry.Content, user.DefaultReadingSpeed, user.CJKReadingSpeed)
 		}
@ -230,19 +227,3 @@ func isRecentEntry(entry *model.Entry) bool {
 	}
 	return false
 }
 func minifyEntryContent(entryContent string) string {
 	m := minify.New()
 	// Options required to avoid breaking the HTML content.
 	m.Add("text/html", &html.Minifier{
 		KeepEndTags: true,
 		KeepQuotes:  true,
 	})
 	if minifiedHTML, err := m.String("text/html", entryContent); err == nil {
 		entryContent = minifiedHTML
 	}
 	return entryContent
 }
--- a/internal/reader/processor/processor_test.go
+++ b/internal/reader/processor/processor_test.go
@ -118,7 +118,7 @@ func TestIsRecentEntry(t *testing.T) {
 func TestMinifyEntryContent(t *testing.T) {
 	input := `<p>    Some text with a <a href="http://example.org/"> link   </a>    </p>`
 	expected := `<p>Some text with a <a href="http://example.org/">link</a></p>`
-	result := minifyEntryContent(input)
+	result := minifyContent(input)
 	if expected != result {
 		t.Errorf(`Unexpected result, got %q`, result)
 	}
--- a/internal/reader/processor/utils.go
+++ b/internal/reader/processor/utils.go
@ -0,0 +1,82 @@
 // SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
 // SPDX-License-Identifier: Apache-2.0
 package processor // import "miniflux.app/v2/internal/reader/processor"
 import (
 	"errors"
 	"fmt"
 	"regexp"
 	"strconv"
 	"time"
 	"github.com/tdewolff/minify/v2"
 	"github.com/tdewolff/minify/v2/html"
 )
 // TODO: use something less horrible than a regex to parse ISO 8601 durations.
 var (
 	iso8601Regex = regexp.MustCompile(`^P((?P<year>\d+)Y)?((?P<month>\d+)M)?((?P<week>\d+)W)?((?P<day>\d+)D)?(T((?P<hour>\d+)H)?((?P<minute>\d+)M)?((?P<second>\d+)S)?)?$`)
 )
 func parseISO8601(from string) (time.Duration, error) {
 	var match []string
 	var d time.Duration
 	if iso8601Regex.MatchString(from) {
 		match = iso8601Regex.FindStringSubmatch(from)
 	} else {
 		return 0, errors.New("youtube: could not parse duration string")
 	}
 	for i, name := range iso8601Regex.SubexpNames() {
 		part := match[i]
 		if i == 0 || name == "" || part == "" {
 			continue
 		}
 		val, err := strconv.ParseInt(part, 10, 64)
 		if err != nil {
 			return 0, err
 		}
 		switch name {
 		case "hour":
 			d += time.Duration(val) * time.Hour
 		case "minute":
 			d += time.Duration(val) * time.Minute
 		case "second":
 			d += time.Duration(val) * time.Second
 		default:
 			return 0, fmt.Errorf("youtube: unknown field %s", name)
 		}
 	}
 	return d, nil
 }
 func minifyContent(content string) string {
 	m := minify.New()
 	// Options required to avoid breaking the HTML content.
 	m.Add("text/html", &html.Minifier{
 		KeepEndTags: true,
 		KeepQuotes:  true,
 	})
 	if minifiedHTML, err := m.String("text/html", content); err == nil {
 		content = minifiedHTML
 	}
 	return content
 }
 func containsRegexPattern(pattern string, entries []string) bool {
 	for _, entry := range entries {
 		if matched, _ := regexp.MatchString(pattern, entry); matched {
 			return true
 		}
 	}
 	return false
 }
--- a/internal/reader/processor/youtube.go
+++ b/internal/reader/processor/youtube.go
@ -24,7 +24,6 @@ import (
 var (
 	youtubeRegex = regexp.MustCompile(`youtube\.com/watch\?v=(.*)$`)
 	iso8601Regex = regexp.MustCompile(`^P((?P<year>\d+)Y)?((?P<month>\d+)M)?((?P<week>\d+)W)?((?P<day>\d+)D)?(T((?P<hour>\d+)H)?((?P<minute>\d+)M)?((?P<second>\d+)S)?)?$`)
 )
 func isYouTubeVideoURL(websiteURL string) bool {
@ -160,42 +159,6 @@ func fetchYouTubeWatchTimeFromApiInBulk(videoIDs []string) (map[string]time.Dura
 	return watchTimeMap, nil
 }
 func parseISO8601(from string) (time.Duration, error) {
 	var match []string
 	var d time.Duration
 	if iso8601Regex.MatchString(from) {
 		match = iso8601Regex.FindStringSubmatch(from)
 	} else {
 		return 0, errors.New("youtube: could not parse duration string")
 	}
 	for i, name := range iso8601Regex.SubexpNames() {
 		part := match[i]
 		if i == 0 || name == "" || part == "" {
 			continue
 		}
 		val, err := strconv.ParseInt(part, 10, 64)
 		if err != nil {
 			return 0, err
 		}
 		switch name {
 		case "hour":
 			d += time.Duration(val) * time.Hour
 		case "minute":
 			d += time.Duration(val) * time.Minute
 		case "second":
 			d += time.Duration(val) * time.Second
 		default:
 			return 0, fmt.Errorf("youtube: unknown field %s", name)
 		}
 	}
 	return d, nil
 }
 type youtubeVideoListResponse struct {
 	Items []struct {
 		ID             string `json:"id"`