mirror of
https://github.com/miniflux/v2.git
synced 2025-06-27 16:36:00 +00:00
refactor: split processor package into smaller files
This commit is contained in:
parent
c2ac2bfb83
commit
cfe410f202
7 changed files with 351 additions and 271 deletions
92
internal/reader/processor/bilibili.go
Normal file
92
internal/reader/processor/bilibili.go
Normal file
|
@ -0,0 +1,92 @@
|
||||||
|
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
package processor
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"log/slog"
|
||||||
|
"regexp"
|
||||||
|
|
||||||
|
"miniflux.app/v2/internal/config"
|
||||||
|
"miniflux.app/v2/internal/model"
|
||||||
|
"miniflux.app/v2/internal/reader/fetcher"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
bilibiliURLRegex = regexp.MustCompile(`bilibili\.com/video/(.*)$`)
|
||||||
|
bilibiliVideoIdRegex = regexp.MustCompile(`/video/(?:av(\d+)|BV([a-zA-Z0-9]+))`)
|
||||||
|
)
|
||||||
|
|
||||||
|
func shouldFetchBilibiliWatchTime(entry *model.Entry) bool {
|
||||||
|
if !config.Opts.FetchBilibiliWatchTime() {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
matches := bilibiliURLRegex.FindStringSubmatch(entry.URL)
|
||||||
|
urlMatchesBilibiliPattern := len(matches) == 2
|
||||||
|
return urlMatchesBilibiliPattern
|
||||||
|
}
|
||||||
|
|
||||||
|
func extractBilibiliVideoID(websiteURL string) (string, string, error) {
|
||||||
|
matches := bilibiliVideoIdRegex.FindStringSubmatch(websiteURL)
|
||||||
|
if matches == nil {
|
||||||
|
return "", "", fmt.Errorf("no video ID found in URL: %s", websiteURL)
|
||||||
|
}
|
||||||
|
if matches[1] != "" {
|
||||||
|
return "aid", matches[1], nil
|
||||||
|
}
|
||||||
|
if matches[2] != "" {
|
||||||
|
return "bvid", matches[2], nil
|
||||||
|
}
|
||||||
|
return "", "", fmt.Errorf("unexpected regex match result for URL: %s", websiteURL)
|
||||||
|
}
|
||||||
|
|
||||||
|
func fetchBilibiliWatchTime(websiteURL string) (int, error) {
|
||||||
|
requestBuilder := fetcher.NewRequestBuilder()
|
||||||
|
requestBuilder.WithTimeout(config.Opts.HTTPClientTimeout())
|
||||||
|
requestBuilder.WithProxy(config.Opts.HTTPClientProxy())
|
||||||
|
|
||||||
|
idType, videoID, extractErr := extractBilibiliVideoID(websiteURL)
|
||||||
|
if extractErr != nil {
|
||||||
|
return 0, extractErr
|
||||||
|
}
|
||||||
|
bilibiliApiURL := fmt.Sprintf("https://api.bilibili.com/x/web-interface/view?%s=%s", idType, videoID)
|
||||||
|
|
||||||
|
responseHandler := fetcher.NewResponseHandler(requestBuilder.ExecuteRequest(bilibiliApiURL))
|
||||||
|
defer responseHandler.Close()
|
||||||
|
|
||||||
|
if localizedError := responseHandler.LocalizedError(); localizedError != nil {
|
||||||
|
slog.Warn("Unable to fetch Bilibili API",
|
||||||
|
slog.String("website_url", websiteURL),
|
||||||
|
slog.String("api_url", bilibiliApiURL),
|
||||||
|
slog.Any("error", localizedError.Error()))
|
||||||
|
return 0, localizedError.Error()
|
||||||
|
}
|
||||||
|
|
||||||
|
var result map[string]interface{}
|
||||||
|
doc := json.NewDecoder(responseHandler.Body(config.Opts.HTTPClientMaxBodySize()))
|
||||||
|
if docErr := doc.Decode(&result); docErr != nil {
|
||||||
|
return 0, fmt.Errorf("failed to decode API response: %v", docErr)
|
||||||
|
}
|
||||||
|
|
||||||
|
if code, ok := result["code"].(float64); !ok || code != 0 {
|
||||||
|
return 0, fmt.Errorf("API returned error code: %v", result["code"])
|
||||||
|
}
|
||||||
|
|
||||||
|
data, ok := result["data"].(map[string]interface{})
|
||||||
|
if !ok {
|
||||||
|
return 0, fmt.Errorf("data field not found or not an object")
|
||||||
|
}
|
||||||
|
|
||||||
|
duration, ok := data["duration"].(float64)
|
||||||
|
if !ok {
|
||||||
|
return 0, fmt.Errorf("duration not found or not a number")
|
||||||
|
}
|
||||||
|
intDuration := int(duration)
|
||||||
|
durationMin := intDuration / 60
|
||||||
|
if intDuration%60 != 0 {
|
||||||
|
durationMin++
|
||||||
|
}
|
||||||
|
return durationMin, nil
|
||||||
|
}
|
60
internal/reader/processor/nebula.go
Normal file
60
internal/reader/processor/nebula.go
Normal file
|
@ -0,0 +1,60 @@
|
||||||
|
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
package processor
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"log/slog"
|
||||||
|
"regexp"
|
||||||
|
"strconv"
|
||||||
|
|
||||||
|
"github.com/PuerkitoBio/goquery"
|
||||||
|
|
||||||
|
"miniflux.app/v2/internal/config"
|
||||||
|
"miniflux.app/v2/internal/model"
|
||||||
|
"miniflux.app/v2/internal/reader/fetcher"
|
||||||
|
)
|
||||||
|
|
||||||
|
var nebulaRegex = regexp.MustCompile(`^https://nebula\.tv`)
|
||||||
|
|
||||||
|
func shouldFetchNebulaWatchTime(entry *model.Entry) bool {
|
||||||
|
if !config.Opts.FetchNebulaWatchTime() {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
matches := nebulaRegex.FindStringSubmatch(entry.URL)
|
||||||
|
return matches != nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func fetchNebulaWatchTime(websiteURL string) (int, error) {
|
||||||
|
requestBuilder := fetcher.NewRequestBuilder()
|
||||||
|
requestBuilder.WithTimeout(config.Opts.HTTPClientTimeout())
|
||||||
|
requestBuilder.WithProxy(config.Opts.HTTPClientProxy())
|
||||||
|
|
||||||
|
responseHandler := fetcher.NewResponseHandler(requestBuilder.ExecuteRequest(websiteURL))
|
||||||
|
defer responseHandler.Close()
|
||||||
|
|
||||||
|
if localizedError := responseHandler.LocalizedError(); localizedError != nil {
|
||||||
|
slog.Warn("Unable to fetch Nebula watch time", slog.String("website_url", websiteURL), slog.Any("error", localizedError.Error()))
|
||||||
|
return 0, localizedError.Error()
|
||||||
|
}
|
||||||
|
|
||||||
|
doc, docErr := goquery.NewDocumentFromReader(responseHandler.Body(config.Opts.HTTPClientMaxBodySize()))
|
||||||
|
if docErr != nil {
|
||||||
|
return 0, docErr
|
||||||
|
}
|
||||||
|
|
||||||
|
durs, exists := doc.Find(`meta[property="video:duration"]`).First().Attr("content")
|
||||||
|
// durs contains video watch time in seconds
|
||||||
|
if !exists {
|
||||||
|
return 0, errors.New("duration has not found")
|
||||||
|
}
|
||||||
|
|
||||||
|
dur, err := strconv.ParseInt(durs, 10, 64)
|
||||||
|
if err != nil {
|
||||||
|
return 0, fmt.Errorf("unable to parse duration %s: %v", durs, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return int(dur / 60), nil
|
||||||
|
}
|
60
internal/reader/processor/odysee.go
Normal file
60
internal/reader/processor/odysee.go
Normal file
|
@ -0,0 +1,60 @@
|
||||||
|
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
package processor
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"log/slog"
|
||||||
|
"regexp"
|
||||||
|
"strconv"
|
||||||
|
|
||||||
|
"github.com/PuerkitoBio/goquery"
|
||||||
|
|
||||||
|
"miniflux.app/v2/internal/config"
|
||||||
|
"miniflux.app/v2/internal/model"
|
||||||
|
"miniflux.app/v2/internal/reader/fetcher"
|
||||||
|
)
|
||||||
|
|
||||||
|
var odyseeRegex = regexp.MustCompile(`^https://odysee\.com`)
|
||||||
|
|
||||||
|
func shouldFetchOdyseeWatchTime(entry *model.Entry) bool {
|
||||||
|
if !config.Opts.FetchOdyseeWatchTime() {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
matches := odyseeRegex.FindStringSubmatch(entry.URL)
|
||||||
|
return matches != nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func fetchOdyseeWatchTime(websiteURL string) (int, error) {
|
||||||
|
requestBuilder := fetcher.NewRequestBuilder()
|
||||||
|
requestBuilder.WithTimeout(config.Opts.HTTPClientTimeout())
|
||||||
|
requestBuilder.WithProxy(config.Opts.HTTPClientProxy())
|
||||||
|
|
||||||
|
responseHandler := fetcher.NewResponseHandler(requestBuilder.ExecuteRequest(websiteURL))
|
||||||
|
defer responseHandler.Close()
|
||||||
|
|
||||||
|
if localizedError := responseHandler.LocalizedError(); localizedError != nil {
|
||||||
|
slog.Warn("Unable to fetch Odysee watch time", slog.String("website_url", websiteURL), slog.Any("error", localizedError.Error()))
|
||||||
|
return 0, localizedError.Error()
|
||||||
|
}
|
||||||
|
|
||||||
|
doc, docErr := goquery.NewDocumentFromReader(responseHandler.Body(config.Opts.HTTPClientMaxBodySize()))
|
||||||
|
if docErr != nil {
|
||||||
|
return 0, docErr
|
||||||
|
}
|
||||||
|
|
||||||
|
durs, exists := doc.Find(`meta[property="og:video:duration"]`).First().Attr("content")
|
||||||
|
// durs contains video watch time in seconds
|
||||||
|
if !exists {
|
||||||
|
return 0, errors.New("duration has not found")
|
||||||
|
}
|
||||||
|
|
||||||
|
dur, err := strconv.ParseInt(durs, 10, 64)
|
||||||
|
if err != nil {
|
||||||
|
return 0, fmt.Errorf("unable to parse duration %s: %v", durs, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return int(dur / 60), nil
|
||||||
|
}
|
|
@ -4,13 +4,9 @@
|
||||||
package processor
|
package processor
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"encoding/json"
|
|
||||||
"errors"
|
|
||||||
"fmt"
|
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"regexp"
|
"regexp"
|
||||||
"slices"
|
"slices"
|
||||||
"strconv"
|
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
@ -25,20 +21,11 @@ import (
|
||||||
"miniflux.app/v2/internal/reader/urlcleaner"
|
"miniflux.app/v2/internal/reader/urlcleaner"
|
||||||
"miniflux.app/v2/internal/storage"
|
"miniflux.app/v2/internal/storage"
|
||||||
|
|
||||||
"github.com/PuerkitoBio/goquery"
|
|
||||||
"github.com/tdewolff/minify/v2"
|
"github.com/tdewolff/minify/v2"
|
||||||
"github.com/tdewolff/minify/v2/html"
|
"github.com/tdewolff/minify/v2/html"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var customReplaceRuleRegex = regexp.MustCompile(`rewrite\("(.*)"\|"(.*)"\)`)
|
||||||
youtubeRegex = regexp.MustCompile(`youtube\.com/watch\?v=(.*)$`)
|
|
||||||
nebulaRegex = regexp.MustCompile(`^https://nebula\.tv`)
|
|
||||||
odyseeRegex = regexp.MustCompile(`^https://odysee\.com`)
|
|
||||||
bilibiliURLRegex = regexp.MustCompile(`bilibili\.com/video/(.*)$`)
|
|
||||||
bilibiliVideoIdRegex = regexp.MustCompile(`/video/(?:av(\d+)|BV([a-zA-Z0-9]+))`)
|
|
||||||
iso8601Regex = regexp.MustCompile(`^P((?P<year>\d+)Y)?((?P<month>\d+)M)?((?P<week>\d+)W)?((?P<day>\d+)D)?(T((?P<hour>\d+)H)?((?P<minute>\d+)M)?((?P<second>\d+)S)?)?$`)
|
|
||||||
customReplaceRuleRegex = regexp.MustCompile(`rewrite\("(.*)"\|"(.*)"\)`)
|
|
||||||
)
|
|
||||||
|
|
||||||
// ProcessFeedEntries downloads original web page for entries and apply filters.
|
// ProcessFeedEntries downloads original web page for entries and apply filters.
|
||||||
func ProcessFeedEntries(store *storage.Storage, feed *model.Feed, user *model.User, forceRefresh bool) {
|
func ProcessFeedEntries(store *storage.Storage, feed *model.Feed, user *model.User, forceRefresh bool) {
|
||||||
|
@ -446,234 +433,6 @@ func updateEntryReadingTime(store *storage.Storage, feed *model.Feed, entry *mod
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func shouldFetchYouTubeWatchTime(entry *model.Entry) bool {
|
|
||||||
if !config.Opts.FetchYouTubeWatchTime() {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
matches := youtubeRegex.FindStringSubmatch(entry.URL)
|
|
||||||
urlMatchesYouTubePattern := len(matches) == 2
|
|
||||||
return urlMatchesYouTubePattern
|
|
||||||
}
|
|
||||||
|
|
||||||
func shouldFetchNebulaWatchTime(entry *model.Entry) bool {
|
|
||||||
if !config.Opts.FetchNebulaWatchTime() {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
matches := nebulaRegex.FindStringSubmatch(entry.URL)
|
|
||||||
return matches != nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func shouldFetchOdyseeWatchTime(entry *model.Entry) bool {
|
|
||||||
if !config.Opts.FetchOdyseeWatchTime() {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
matches := odyseeRegex.FindStringSubmatch(entry.URL)
|
|
||||||
return matches != nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func shouldFetchBilibiliWatchTime(entry *model.Entry) bool {
|
|
||||||
if !config.Opts.FetchBilibiliWatchTime() {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
matches := bilibiliURLRegex.FindStringSubmatch(entry.URL)
|
|
||||||
urlMatchesBilibiliPattern := len(matches) == 2
|
|
||||||
return urlMatchesBilibiliPattern
|
|
||||||
}
|
|
||||||
|
|
||||||
func fetchYouTubeWatchTime(websiteURL string) (int, error) {
|
|
||||||
requestBuilder := fetcher.NewRequestBuilder()
|
|
||||||
requestBuilder.WithTimeout(config.Opts.HTTPClientTimeout())
|
|
||||||
requestBuilder.WithProxy(config.Opts.HTTPClientProxy())
|
|
||||||
|
|
||||||
responseHandler := fetcher.NewResponseHandler(requestBuilder.ExecuteRequest(websiteURL))
|
|
||||||
defer responseHandler.Close()
|
|
||||||
|
|
||||||
if localizedError := responseHandler.LocalizedError(); localizedError != nil {
|
|
||||||
slog.Warn("Unable to fetch YouTube page", slog.String("website_url", websiteURL), slog.Any("error", localizedError.Error()))
|
|
||||||
return 0, localizedError.Error()
|
|
||||||
}
|
|
||||||
|
|
||||||
doc, docErr := goquery.NewDocumentFromReader(responseHandler.Body(config.Opts.HTTPClientMaxBodySize()))
|
|
||||||
if docErr != nil {
|
|
||||||
return 0, docErr
|
|
||||||
}
|
|
||||||
|
|
||||||
durs, exists := doc.Find(`meta[itemprop="duration"]`).First().Attr("content")
|
|
||||||
if !exists {
|
|
||||||
return 0, errors.New("duration has not found")
|
|
||||||
}
|
|
||||||
|
|
||||||
dur, err := parseISO8601(durs)
|
|
||||||
if err != nil {
|
|
||||||
return 0, fmt.Errorf("unable to parse duration %s: %v", durs, err)
|
|
||||||
}
|
|
||||||
|
|
||||||
return int(dur.Minutes()), nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func fetchNebulaWatchTime(websiteURL string) (int, error) {
|
|
||||||
requestBuilder := fetcher.NewRequestBuilder()
|
|
||||||
requestBuilder.WithTimeout(config.Opts.HTTPClientTimeout())
|
|
||||||
requestBuilder.WithProxy(config.Opts.HTTPClientProxy())
|
|
||||||
|
|
||||||
responseHandler := fetcher.NewResponseHandler(requestBuilder.ExecuteRequest(websiteURL))
|
|
||||||
defer responseHandler.Close()
|
|
||||||
|
|
||||||
if localizedError := responseHandler.LocalizedError(); localizedError != nil {
|
|
||||||
slog.Warn("Unable to fetch Nebula watch time", slog.String("website_url", websiteURL), slog.Any("error", localizedError.Error()))
|
|
||||||
return 0, localizedError.Error()
|
|
||||||
}
|
|
||||||
|
|
||||||
doc, docErr := goquery.NewDocumentFromReader(responseHandler.Body(config.Opts.HTTPClientMaxBodySize()))
|
|
||||||
if docErr != nil {
|
|
||||||
return 0, docErr
|
|
||||||
}
|
|
||||||
|
|
||||||
durs, exists := doc.Find(`meta[property="video:duration"]`).First().Attr("content")
|
|
||||||
// durs contains video watch time in seconds
|
|
||||||
if !exists {
|
|
||||||
return 0, errors.New("duration has not found")
|
|
||||||
}
|
|
||||||
|
|
||||||
dur, err := strconv.ParseInt(durs, 10, 64)
|
|
||||||
if err != nil {
|
|
||||||
return 0, fmt.Errorf("unable to parse duration %s: %v", durs, err)
|
|
||||||
}
|
|
||||||
|
|
||||||
return int(dur / 60), nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func fetchOdyseeWatchTime(websiteURL string) (int, error) {
|
|
||||||
requestBuilder := fetcher.NewRequestBuilder()
|
|
||||||
requestBuilder.WithTimeout(config.Opts.HTTPClientTimeout())
|
|
||||||
requestBuilder.WithProxy(config.Opts.HTTPClientProxy())
|
|
||||||
|
|
||||||
responseHandler := fetcher.NewResponseHandler(requestBuilder.ExecuteRequest(websiteURL))
|
|
||||||
defer responseHandler.Close()
|
|
||||||
|
|
||||||
if localizedError := responseHandler.LocalizedError(); localizedError != nil {
|
|
||||||
slog.Warn("Unable to fetch Odysee watch time", slog.String("website_url", websiteURL), slog.Any("error", localizedError.Error()))
|
|
||||||
return 0, localizedError.Error()
|
|
||||||
}
|
|
||||||
|
|
||||||
doc, docErr := goquery.NewDocumentFromReader(responseHandler.Body(config.Opts.HTTPClientMaxBodySize()))
|
|
||||||
if docErr != nil {
|
|
||||||
return 0, docErr
|
|
||||||
}
|
|
||||||
|
|
||||||
durs, exists := doc.Find(`meta[property="og:video:duration"]`).First().Attr("content")
|
|
||||||
// durs contains video watch time in seconds
|
|
||||||
if !exists {
|
|
||||||
return 0, errors.New("duration has not found")
|
|
||||||
}
|
|
||||||
|
|
||||||
dur, err := strconv.ParseInt(durs, 10, 64)
|
|
||||||
if err != nil {
|
|
||||||
return 0, fmt.Errorf("unable to parse duration %s: %v", durs, err)
|
|
||||||
}
|
|
||||||
|
|
||||||
return int(dur / 60), nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func extractBilibiliVideoID(websiteURL string) (string, string, error) {
|
|
||||||
matches := bilibiliVideoIdRegex.FindStringSubmatch(websiteURL)
|
|
||||||
if matches == nil {
|
|
||||||
return "", "", fmt.Errorf("no video ID found in URL: %s", websiteURL)
|
|
||||||
}
|
|
||||||
if matches[1] != "" {
|
|
||||||
return "aid", matches[1], nil
|
|
||||||
}
|
|
||||||
if matches[2] != "" {
|
|
||||||
return "bvid", matches[2], nil
|
|
||||||
}
|
|
||||||
return "", "", fmt.Errorf("unexpected regex match result for URL: %s", websiteURL)
|
|
||||||
}
|
|
||||||
|
|
||||||
func fetchBilibiliWatchTime(websiteURL string) (int, error) {
|
|
||||||
requestBuilder := fetcher.NewRequestBuilder()
|
|
||||||
requestBuilder.WithTimeout(config.Opts.HTTPClientTimeout())
|
|
||||||
requestBuilder.WithProxy(config.Opts.HTTPClientProxy())
|
|
||||||
|
|
||||||
idType, videoID, extractErr := extractBilibiliVideoID(websiteURL)
|
|
||||||
if extractErr != nil {
|
|
||||||
return 0, extractErr
|
|
||||||
}
|
|
||||||
bilibiliApiURL := fmt.Sprintf("https://api.bilibili.com/x/web-interface/view?%s=%s", idType, videoID)
|
|
||||||
|
|
||||||
responseHandler := fetcher.NewResponseHandler(requestBuilder.ExecuteRequest(bilibiliApiURL))
|
|
||||||
defer responseHandler.Close()
|
|
||||||
|
|
||||||
if localizedError := responseHandler.LocalizedError(); localizedError != nil {
|
|
||||||
slog.Warn("Unable to fetch Bilibili API",
|
|
||||||
slog.String("website_url", bilibiliApiURL),
|
|
||||||
slog.Any("error", localizedError.Error()))
|
|
||||||
return 0, localizedError.Error()
|
|
||||||
}
|
|
||||||
|
|
||||||
var result map[string]interface{}
|
|
||||||
doc := json.NewDecoder(responseHandler.Body(config.Opts.HTTPClientMaxBodySize()))
|
|
||||||
if docErr := doc.Decode(&result); docErr != nil {
|
|
||||||
return 0, fmt.Errorf("failed to decode API response: %v", docErr)
|
|
||||||
}
|
|
||||||
|
|
||||||
if code, ok := result["code"].(float64); !ok || code != 0 {
|
|
||||||
return 0, fmt.Errorf("API returned error code: %v", result["code"])
|
|
||||||
}
|
|
||||||
|
|
||||||
data, ok := result["data"].(map[string]interface{})
|
|
||||||
if !ok {
|
|
||||||
return 0, fmt.Errorf("data field not found or not an object")
|
|
||||||
}
|
|
||||||
|
|
||||||
duration, ok := data["duration"].(float64)
|
|
||||||
if !ok {
|
|
||||||
return 0, fmt.Errorf("duration not found or not a number")
|
|
||||||
}
|
|
||||||
intDuration := int(duration)
|
|
||||||
durationMin := intDuration / 60
|
|
||||||
if intDuration%60 != 0 {
|
|
||||||
durationMin++
|
|
||||||
}
|
|
||||||
return durationMin, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// parseISO8601 parses an ISO 8601 duration string.
|
|
||||||
func parseISO8601(from string) (time.Duration, error) {
|
|
||||||
var match []string
|
|
||||||
var d time.Duration
|
|
||||||
|
|
||||||
if iso8601Regex.MatchString(from) {
|
|
||||||
match = iso8601Regex.FindStringSubmatch(from)
|
|
||||||
} else {
|
|
||||||
return 0, errors.New("could not parse duration string")
|
|
||||||
}
|
|
||||||
|
|
||||||
for i, name := range iso8601Regex.SubexpNames() {
|
|
||||||
part := match[i]
|
|
||||||
if i == 0 || name == "" || part == "" {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
val, err := strconv.ParseInt(part, 10, 64)
|
|
||||||
if err != nil {
|
|
||||||
return 0, err
|
|
||||||
}
|
|
||||||
|
|
||||||
switch name {
|
|
||||||
case "hour":
|
|
||||||
d += (time.Duration(val) * time.Hour)
|
|
||||||
case "minute":
|
|
||||||
d += (time.Duration(val) * time.Minute)
|
|
||||||
case "second":
|
|
||||||
d += (time.Duration(val) * time.Second)
|
|
||||||
default:
|
|
||||||
return 0, fmt.Errorf("unknown field %s", name)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return d, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func isRecentEntry(entry *model.Entry) bool {
|
func isRecentEntry(entry *model.Entry) bool {
|
||||||
if config.Opts.FilterEntryMaxAgeDays() == 0 || entry.Date.After(time.Now().AddDate(0, 0, -config.Opts.FilterEntryMaxAgeDays())) {
|
if config.Opts.FilterEntryMaxAgeDays() == 0 || entry.Date.After(time.Now().AddDate(0, 0, -config.Opts.FilterEntryMaxAgeDays())) {
|
||||||
return true
|
return true
|
||||||
|
|
|
@ -85,35 +85,6 @@ func TestAllowEntries(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestParseISO8601(t *testing.T) {
|
|
||||||
var scenarios = []struct {
|
|
||||||
duration string
|
|
||||||
expected time.Duration
|
|
||||||
}{
|
|
||||||
// Live streams and radio.
|
|
||||||
{"PT0M0S", 0},
|
|
||||||
// https://www.youtube.com/watch?v=HLrqNhgdiC0
|
|
||||||
{"PT6M20S", (6 * time.Minute) + (20 * time.Second)},
|
|
||||||
// https://www.youtube.com/watch?v=LZa5KKfqHtA
|
|
||||||
{"PT5M41S", (5 * time.Minute) + (41 * time.Second)},
|
|
||||||
// https://www.youtube.com/watch?v=yIxEEgEuhT4
|
|
||||||
{"PT51M52S", (51 * time.Minute) + (52 * time.Second)},
|
|
||||||
// https://www.youtube.com/watch?v=bpHf1XcoiFs
|
|
||||||
{"PT80M42S", (1 * time.Hour) + (20 * time.Minute) + (42 * time.Second)},
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, tc := range scenarios {
|
|
||||||
result, err := parseISO8601(tc.duration)
|
|
||||||
if err != nil {
|
|
||||||
t.Errorf("Got an error when parsing %q: %v", tc.duration, err)
|
|
||||||
}
|
|
||||||
|
|
||||||
if tc.expected != result {
|
|
||||||
t.Errorf(`Unexpected result, got %v for duration %q`, result, tc.duration)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestIsRecentEntry(t *testing.T) {
|
func TestIsRecentEntry(t *testing.T) {
|
||||||
parser := config.NewParser()
|
parser := config.NewParser()
|
||||||
var err error
|
var err error
|
||||||
|
|
100
internal/reader/processor/youtube.go
Normal file
100
internal/reader/processor/youtube.go
Normal file
|
@ -0,0 +1,100 @@
|
||||||
|
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
package processor
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"log/slog"
|
||||||
|
"regexp"
|
||||||
|
"strconv"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/PuerkitoBio/goquery"
|
||||||
|
|
||||||
|
"miniflux.app/v2/internal/config"
|
||||||
|
"miniflux.app/v2/internal/model"
|
||||||
|
"miniflux.app/v2/internal/reader/fetcher"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
youtubeRegex = regexp.MustCompile(`youtube\.com/watch\?v=(.*)$`)
|
||||||
|
iso8601Regex = regexp.MustCompile(`^P((?P<year>\d+)Y)?((?P<month>\d+)M)?((?P<week>\d+)W)?((?P<day>\d+)D)?(T((?P<hour>\d+)H)?((?P<minute>\d+)M)?((?P<second>\d+)S)?)?$`)
|
||||||
|
)
|
||||||
|
|
||||||
|
func shouldFetchYouTubeWatchTime(entry *model.Entry) bool {
|
||||||
|
if !config.Opts.FetchYouTubeWatchTime() {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
matches := youtubeRegex.FindStringSubmatch(entry.URL)
|
||||||
|
urlMatchesYouTubePattern := len(matches) == 2
|
||||||
|
return urlMatchesYouTubePattern
|
||||||
|
}
|
||||||
|
|
||||||
|
func fetchYouTubeWatchTime(websiteURL string) (int, error) {
|
||||||
|
requestBuilder := fetcher.NewRequestBuilder()
|
||||||
|
requestBuilder.WithTimeout(config.Opts.HTTPClientTimeout())
|
||||||
|
requestBuilder.WithProxy(config.Opts.HTTPClientProxy())
|
||||||
|
|
||||||
|
responseHandler := fetcher.NewResponseHandler(requestBuilder.ExecuteRequest(websiteURL))
|
||||||
|
defer responseHandler.Close()
|
||||||
|
|
||||||
|
if localizedError := responseHandler.LocalizedError(); localizedError != nil {
|
||||||
|
slog.Warn("Unable to fetch YouTube page", slog.String("website_url", websiteURL), slog.Any("error", localizedError.Error()))
|
||||||
|
return 0, localizedError.Error()
|
||||||
|
}
|
||||||
|
|
||||||
|
doc, docErr := goquery.NewDocumentFromReader(responseHandler.Body(config.Opts.HTTPClientMaxBodySize()))
|
||||||
|
if docErr != nil {
|
||||||
|
return 0, docErr
|
||||||
|
}
|
||||||
|
|
||||||
|
durs, exists := doc.Find(`meta[itemprop="duration"]`).First().Attr("content")
|
||||||
|
if !exists {
|
||||||
|
return 0, errors.New("duration has not found")
|
||||||
|
}
|
||||||
|
|
||||||
|
dur, err := parseISO8601(durs)
|
||||||
|
if err != nil {
|
||||||
|
return 0, fmt.Errorf("unable to parse duration %s: %v", durs, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return int(dur.Minutes()), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseISO8601(from string) (time.Duration, error) {
|
||||||
|
var match []string
|
||||||
|
var d time.Duration
|
||||||
|
|
||||||
|
if iso8601Regex.MatchString(from) {
|
||||||
|
match = iso8601Regex.FindStringSubmatch(from)
|
||||||
|
} else {
|
||||||
|
return 0, errors.New("could not parse duration string")
|
||||||
|
}
|
||||||
|
|
||||||
|
for i, name := range iso8601Regex.SubexpNames() {
|
||||||
|
part := match[i]
|
||||||
|
if i == 0 || name == "" || part == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
val, err := strconv.ParseInt(part, 10, 64)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
|
||||||
|
switch name {
|
||||||
|
case "hour":
|
||||||
|
d += (time.Duration(val) * time.Hour)
|
||||||
|
case "minute":
|
||||||
|
d += (time.Duration(val) * time.Minute)
|
||||||
|
case "second":
|
||||||
|
d += (time.Duration(val) * time.Second)
|
||||||
|
default:
|
||||||
|
return 0, fmt.Errorf("unknown field %s", name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return d, nil
|
||||||
|
}
|
38
internal/reader/processor/youtube_test.go
Normal file
38
internal/reader/processor/youtube_test.go
Normal file
|
@ -0,0 +1,38 @@
|
||||||
|
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
package processor // import "miniflux.app/v2/internal/reader/processor"
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestParseISO8601(t *testing.T) {
|
||||||
|
var scenarios = []struct {
|
||||||
|
duration string
|
||||||
|
expected time.Duration
|
||||||
|
}{
|
||||||
|
// Live streams and radio.
|
||||||
|
{"PT0M0S", 0},
|
||||||
|
// https://www.youtube.com/watch?v=HLrqNhgdiC0
|
||||||
|
{"PT6M20S", (6 * time.Minute) + (20 * time.Second)},
|
||||||
|
// https://www.youtube.com/watch?v=LZa5KKfqHtA
|
||||||
|
{"PT5M41S", (5 * time.Minute) + (41 * time.Second)},
|
||||||
|
// https://www.youtube.com/watch?v=yIxEEgEuhT4
|
||||||
|
{"PT51M52S", (51 * time.Minute) + (52 * time.Second)},
|
||||||
|
// https://www.youtube.com/watch?v=bpHf1XcoiFs
|
||||||
|
{"PT80M42S", (1 * time.Hour) + (20 * time.Minute) + (42 * time.Second)},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tc := range scenarios {
|
||||||
|
result, err := parseISO8601(tc.duration)
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("Got an error when parsing %q: %v", tc.duration, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if tc.expected != result {
|
||||||
|
t.Errorf(`Unexpected result, got %v for duration %q`, result, tc.duration)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Loading…
Add table
Add a link
Reference in a new issue