mirror of
https://github.com/miniflux/v2.git
synced 2025-06-27 16:36:00 +00:00
refactor(processor): extract some functions into an utils.go file
This commit is contained in:
parent
46b159ac58
commit
fe4b00b9f8
4 changed files with 85 additions and 59 deletions
|
@ -10,9 +10,6 @@ import (
|
|||
"slices"
|
||||
"time"
|
||||
|
||||
"github.com/tdewolff/minify/v2"
|
||||
"github.com/tdewolff/minify/v2/html"
|
||||
|
||||
"miniflux.app/v2/internal/config"
|
||||
"miniflux.app/v2/internal/metric"
|
||||
"miniflux.app/v2/internal/model"
|
||||
|
@ -117,7 +114,7 @@ func ProcessFeedEntries(store *storage.Storage, feed *model.Feed, userID int64,
|
|||
)
|
||||
} else if extractedContent != "" {
|
||||
// We replace the entry content only if the scraper doesn't return any error.
|
||||
entry.Content = minifyEntryContent(extractedContent)
|
||||
entry.Content = minifyContent(extractedContent)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -177,7 +174,7 @@ func ProcessEntryWebPage(feed *model.Feed, entry *model.Entry, user *model.User)
|
|||
}
|
||||
|
||||
if extractedContent != "" {
|
||||
entry.Content = minifyEntryContent(extractedContent)
|
||||
entry.Content = minifyContent(extractedContent)
|
||||
if user.ShowReadingTime {
|
||||
entry.ReadingTime = readingtime.EstimateReadingTime(entry.Content, user.DefaultReadingSpeed, user.CJKReadingSpeed)
|
||||
}
|
||||
|
@ -230,19 +227,3 @@ func isRecentEntry(entry *model.Entry) bool {
|
|||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func minifyEntryContent(entryContent string) string {
|
||||
m := minify.New()
|
||||
|
||||
// Options required to avoid breaking the HTML content.
|
||||
m.Add("text/html", &html.Minifier{
|
||||
KeepEndTags: true,
|
||||
KeepQuotes: true,
|
||||
})
|
||||
|
||||
if minifiedHTML, err := m.String("text/html", entryContent); err == nil {
|
||||
entryContent = minifiedHTML
|
||||
}
|
||||
|
||||
return entryContent
|
||||
}
|
||||
|
|
|
@ -118,7 +118,7 @@ func TestIsRecentEntry(t *testing.T) {
|
|||
func TestMinifyEntryContent(t *testing.T) {
|
||||
input := `<p> Some text with a <a href="http://example.org/"> link </a> </p>`
|
||||
expected := `<p>Some text with a <a href="http://example.org/">link</a></p>`
|
||||
result := minifyEntryContent(input)
|
||||
result := minifyContent(input)
|
||||
if expected != result {
|
||||
t.Errorf(`Unexpected result, got %q`, result)
|
||||
}
|
||||
|
|
82
internal/reader/processor/utils.go
Normal file
82
internal/reader/processor/utils.go
Normal file
|
@ -0,0 +1,82 @@
|
|||
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
package processor // import "miniflux.app/v2/internal/reader/processor"
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"github.com/tdewolff/minify/v2"
|
||||
"github.com/tdewolff/minify/v2/html"
|
||||
)
|
||||
|
||||
// TODO: use something less horrible than a regex to parse ISO 8601 durations.
|
||||
|
||||
var (
|
||||
iso8601Regex = regexp.MustCompile(`^P((?P<year>\d+)Y)?((?P<month>\d+)M)?((?P<week>\d+)W)?((?P<day>\d+)D)?(T((?P<hour>\d+)H)?((?P<minute>\d+)M)?((?P<second>\d+)S)?)?$`)
|
||||
)
|
||||
|
||||
func parseISO8601(from string) (time.Duration, error) {
|
||||
var match []string
|
||||
var d time.Duration
|
||||
|
||||
if iso8601Regex.MatchString(from) {
|
||||
match = iso8601Regex.FindStringSubmatch(from)
|
||||
} else {
|
||||
return 0, errors.New("youtube: could not parse duration string")
|
||||
}
|
||||
|
||||
for i, name := range iso8601Regex.SubexpNames() {
|
||||
part := match[i]
|
||||
if i == 0 || name == "" || part == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
val, err := strconv.ParseInt(part, 10, 64)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
switch name {
|
||||
case "hour":
|
||||
d += time.Duration(val) * time.Hour
|
||||
case "minute":
|
||||
d += time.Duration(val) * time.Minute
|
||||
case "second":
|
||||
d += time.Duration(val) * time.Second
|
||||
default:
|
||||
return 0, fmt.Errorf("youtube: unknown field %s", name)
|
||||
}
|
||||
}
|
||||
|
||||
return d, nil
|
||||
}
|
||||
|
||||
func minifyContent(content string) string {
|
||||
m := minify.New()
|
||||
|
||||
// Options required to avoid breaking the HTML content.
|
||||
m.Add("text/html", &html.Minifier{
|
||||
KeepEndTags: true,
|
||||
KeepQuotes: true,
|
||||
})
|
||||
|
||||
if minifiedHTML, err := m.String("text/html", content); err == nil {
|
||||
content = minifiedHTML
|
||||
}
|
||||
|
||||
return content
|
||||
}
|
||||
|
||||
func containsRegexPattern(pattern string, entries []string) bool {
|
||||
for _, entry := range entries {
|
||||
if matched, _ := regexp.MatchString(pattern, entry); matched {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
|
@ -24,7 +24,6 @@ import (
|
|||
|
||||
var (
|
||||
youtubeRegex = regexp.MustCompile(`youtube\.com/watch\?v=(.*)$`)
|
||||
iso8601Regex = regexp.MustCompile(`^P((?P<year>\d+)Y)?((?P<month>\d+)M)?((?P<week>\d+)W)?((?P<day>\d+)D)?(T((?P<hour>\d+)H)?((?P<minute>\d+)M)?((?P<second>\d+)S)?)?$`)
|
||||
)
|
||||
|
||||
func isYouTubeVideoURL(websiteURL string) bool {
|
||||
|
@ -160,42 +159,6 @@ func fetchYouTubeWatchTimeFromApiInBulk(videoIDs []string) (map[string]time.Dura
|
|||
return watchTimeMap, nil
|
||||
}
|
||||
|
||||
func parseISO8601(from string) (time.Duration, error) {
|
||||
var match []string
|
||||
var d time.Duration
|
||||
|
||||
if iso8601Regex.MatchString(from) {
|
||||
match = iso8601Regex.FindStringSubmatch(from)
|
||||
} else {
|
||||
return 0, errors.New("youtube: could not parse duration string")
|
||||
}
|
||||
|
||||
for i, name := range iso8601Regex.SubexpNames() {
|
||||
part := match[i]
|
||||
if i == 0 || name == "" || part == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
val, err := strconv.ParseInt(part, 10, 64)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
switch name {
|
||||
case "hour":
|
||||
d += time.Duration(val) * time.Hour
|
||||
case "minute":
|
||||
d += time.Duration(val) * time.Minute
|
||||
case "second":
|
||||
d += time.Duration(val) * time.Second
|
||||
default:
|
||||
return 0, fmt.Errorf("youtube: unknown field %s", name)
|
||||
}
|
||||
}
|
||||
|
||||
return d, nil
|
||||
}
|
||||
|
||||
type youtubeVideoListResponse struct {
|
||||
Items []struct {
|
||||
ID string `json:"id"`
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue