mirror of
https://github.com/miniflux/v2.git
synced 2025-06-27 16:36:00 +00:00
As youtubeVideoID is assigned to getVideoIDFromYouTubeURL(entry.URL), there is no need to call the latter again when we can simly use youtubeVideoID instead.
169 lines
4.9 KiB
Go
169 lines
4.9 KiB
Go
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
|
// SPDX-License-Identifier: Apache-2.0
|
|
|
|
package processor // import "miniflux.app/v2/internal/reader/processor"
|
|
|
|
import (
|
|
"encoding/json"
|
|
"errors"
|
|
"fmt"
|
|
"log/slog"
|
|
"net/url"
|
|
"regexp"
|
|
"strconv"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/PuerkitoBio/goquery"
|
|
|
|
"miniflux.app/v2/internal/config"
|
|
"miniflux.app/v2/internal/model"
|
|
"miniflux.app/v2/internal/proxyrotator"
|
|
"miniflux.app/v2/internal/reader/fetcher"
|
|
)
|
|
|
|
var (
|
|
youtubeRegex = regexp.MustCompile(`youtube\.com/watch\?v=(.*)$`)
|
|
)
|
|
|
|
func isYouTubeVideoURL(websiteURL string) bool {
|
|
return len(youtubeRegex.FindStringSubmatch(websiteURL)) == 2
|
|
}
|
|
|
|
func getVideoIDFromYouTubeURL(websiteURL string) string {
|
|
parsedWebsiteURL, err := url.Parse(websiteURL)
|
|
if err != nil {
|
|
return ""
|
|
}
|
|
|
|
return parsedWebsiteURL.Query().Get("v")
|
|
}
|
|
|
|
func shouldFetchYouTubeWatchTimeForSingleEntry(entry *model.Entry) bool {
|
|
return config.Opts.FetchYouTubeWatchTime() && config.Opts.YouTubeApiKey() == "" && isYouTubeVideoURL(entry.URL)
|
|
}
|
|
|
|
func shouldFetchYouTubeWatchTimeInBulk() bool {
|
|
return config.Opts.FetchYouTubeWatchTime() && config.Opts.YouTubeApiKey() != ""
|
|
}
|
|
|
|
func fetchYouTubeWatchTimeForSingleEntry(websiteURL string) (int, error) {
|
|
slog.Debug("Fetching YouTube watch time for a single entry", slog.String("website_url", websiteURL))
|
|
|
|
requestBuilder := fetcher.NewRequestBuilder()
|
|
requestBuilder.WithTimeout(config.Opts.HTTPClientTimeout())
|
|
requestBuilder.WithProxyRotator(proxyrotator.ProxyRotatorInstance)
|
|
|
|
responseHandler := fetcher.NewResponseHandler(requestBuilder.ExecuteRequest(websiteURL))
|
|
defer responseHandler.Close()
|
|
|
|
if localizedError := responseHandler.LocalizedError(); localizedError != nil {
|
|
slog.Warn("Unable to fetch YouTube page", slog.String("website_url", websiteURL), slog.Any("error", localizedError.Error()))
|
|
return 0, localizedError.Error()
|
|
}
|
|
|
|
doc, docErr := goquery.NewDocumentFromReader(responseHandler.Body(config.Opts.HTTPClientMaxBodySize()))
|
|
if docErr != nil {
|
|
return 0, docErr
|
|
}
|
|
|
|
htmlDuration, exists := doc.FindMatcher(goquery.Single(`meta[itemprop="duration"]`)).Attr("content")
|
|
if !exists {
|
|
return 0, errors.New("youtube: duration has not found")
|
|
}
|
|
|
|
parsedDuration, err := parseISO8601(htmlDuration)
|
|
if err != nil {
|
|
return 0, fmt.Errorf("youtube: unable to parse duration %s: %v", htmlDuration, err)
|
|
}
|
|
|
|
return int(parsedDuration.Minutes()), nil
|
|
}
|
|
|
|
func fetchYouTubeWatchTimeInBulk(entries []*model.Entry) {
|
|
var videosEntriesMapping = make(map[string]*model.Entry)
|
|
var videoIDs []string
|
|
|
|
for _, entry := range entries {
|
|
if !isYouTubeVideoURL(entry.URL) {
|
|
continue
|
|
}
|
|
|
|
youtubeVideoID := getVideoIDFromYouTubeURL(entry.URL)
|
|
if youtubeVideoID == "" {
|
|
continue
|
|
}
|
|
|
|
videosEntriesMapping[youtubeVideoID] = entry
|
|
videoIDs = append(videoIDs, youtubeVideoID)
|
|
}
|
|
|
|
if len(videoIDs) == 0 {
|
|
return
|
|
}
|
|
|
|
watchTimeMap, err := fetchYouTubeWatchTimeFromApiInBulk(videoIDs)
|
|
if err != nil {
|
|
slog.Warn("Unable to fetch YouTube watch time in bulk", slog.Any("error", err))
|
|
return
|
|
}
|
|
|
|
for videoID, watchTime := range watchTimeMap {
|
|
if entry, ok := videosEntriesMapping[videoID]; ok {
|
|
entry.ReadingTime = int(watchTime.Minutes())
|
|
}
|
|
}
|
|
}
|
|
|
|
func fetchYouTubeWatchTimeFromApiInBulk(videoIDs []string) (map[string]time.Duration, error) {
|
|
slog.Debug("Fetching YouTube watch time in bulk", slog.Any("video_ids", videoIDs))
|
|
|
|
apiQuery := url.Values{}
|
|
apiQuery.Set("id", strings.Join(videoIDs, ","))
|
|
apiQuery.Set("key", config.Opts.YouTubeApiKey())
|
|
apiQuery.Set("part", "contentDetails")
|
|
|
|
apiURL := url.URL{
|
|
Scheme: "https",
|
|
Host: "www.googleapis.com",
|
|
Path: "youtube/v3/videos",
|
|
RawQuery: apiQuery.Encode(),
|
|
}
|
|
|
|
requestBuilder := fetcher.NewRequestBuilder()
|
|
requestBuilder.WithTimeout(config.Opts.HTTPClientTimeout())
|
|
requestBuilder.WithProxyRotator(proxyrotator.ProxyRotatorInstance)
|
|
|
|
responseHandler := fetcher.NewResponseHandler(requestBuilder.ExecuteRequest(apiURL.String()))
|
|
defer responseHandler.Close()
|
|
|
|
if localizedError := responseHandler.LocalizedError(); localizedError != nil {
|
|
slog.Warn("Unable to fetch contentDetails from YouTube API", slog.Any("error", localizedError.Error()))
|
|
return nil, localizedError.Error()
|
|
}
|
|
|
|
var videos youtubeVideoListResponse
|
|
if err := json.NewDecoder(responseHandler.Body(config.Opts.HTTPClientMaxBodySize())).Decode(&videos); err != nil {
|
|
return nil, fmt.Errorf("youtube: unable to decode JSON: %v", err)
|
|
}
|
|
|
|
watchTimeMap := make(map[string]time.Duration)
|
|
for _, video := range videos.Items {
|
|
duration, err := parseISO8601(video.ContentDetails.Duration)
|
|
if err != nil {
|
|
slog.Warn("Unable to parse ISO8601 duration", slog.Any("error", err))
|
|
continue
|
|
}
|
|
watchTimeMap[video.ID] = duration
|
|
}
|
|
return watchTimeMap, nil
|
|
}
|
|
|
|
type youtubeVideoListResponse struct {
|
|
Items []struct {
|
|
ID string `json:"id"`
|
|
ContentDetails struct {
|
|
Duration string `json:"duration"`
|
|
} `json:"contentDetails"`
|
|
} `json:"items"`
|
|
}
|