1
0
Fork 0
mirror of https://github.com/miniflux/v2.git synced 2025-06-27 16:36:00 +00:00

feat: use Bilibili API instead of web scraping to get video watch time

This commit is contained in:
Qeynos 2024-09-23 09:05:43 +08:00 committed by GitHub
parent c326d5574b
commit c2ac2bfb83
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -4,6 +4,7 @@
package processor package processor
import ( import (
"encoding/json"
"errors" "errors"
"fmt" "fmt"
"log/slog" "log/slog"
@ -33,8 +34,8 @@ var (
youtubeRegex = regexp.MustCompile(`youtube\.com/watch\?v=(.*)$`) youtubeRegex = regexp.MustCompile(`youtube\.com/watch\?v=(.*)$`)
nebulaRegex = regexp.MustCompile(`^https://nebula\.tv`) nebulaRegex = regexp.MustCompile(`^https://nebula\.tv`)
odyseeRegex = regexp.MustCompile(`^https://odysee\.com`) odyseeRegex = regexp.MustCompile(`^https://odysee\.com`)
bilibiliRegex = regexp.MustCompile(`bilibili\.com/video/(.*)$`) bilibiliURLRegex = regexp.MustCompile(`bilibili\.com/video/(.*)$`)
timelengthRegex = regexp.MustCompile(`"timelength":\s*(\d+)`) bilibiliVideoIdRegex = regexp.MustCompile(`/video/(?:av(\d+)|BV([a-zA-Z0-9]+))`)
iso8601Regex = regexp.MustCompile(`^P((?P<year>\d+)Y)?((?P<month>\d+)M)?((?P<week>\d+)W)?((?P<day>\d+)D)?(T((?P<hour>\d+)H)?((?P<minute>\d+)M)?((?P<second>\d+)S)?)?$`) iso8601Regex = regexp.MustCompile(`^P((?P<year>\d+)Y)?((?P<month>\d+)M)?((?P<week>\d+)W)?((?P<day>\d+)D)?(T((?P<hour>\d+)H)?((?P<minute>\d+)M)?((?P<second>\d+)S)?)?$`)
customReplaceRuleRegex = regexp.MustCompile(`rewrite\("(.*)"\|"(.*)"\)`) customReplaceRuleRegex = regexp.MustCompile(`rewrite\("(.*)"\|"(.*)"\)`)
) )
@ -474,7 +475,7 @@ func shouldFetchBilibiliWatchTime(entry *model.Entry) bool {
if !config.Opts.FetchBilibiliWatchTime() { if !config.Opts.FetchBilibiliWatchTime() {
return false return false
} }
matches := bilibiliRegex.FindStringSubmatch(entry.URL) matches := bilibiliURLRegex.FindStringSubmatch(entry.URL)
urlMatchesBilibiliPattern := len(matches) == 2 urlMatchesBilibiliPattern := len(matches) == 2
return urlMatchesBilibiliPattern return urlMatchesBilibiliPattern
} }
@ -574,41 +575,66 @@ func fetchOdyseeWatchTime(websiteURL string) (int, error) {
return int(dur / 60), nil return int(dur / 60), nil
} }
func extractBilibiliVideoID(websiteURL string) (string, string, error) {
matches := bilibiliVideoIdRegex.FindStringSubmatch(websiteURL)
if matches == nil {
return "", "", fmt.Errorf("no video ID found in URL: %s", websiteURL)
}
if matches[1] != "" {
return "aid", matches[1], nil
}
if matches[2] != "" {
return "bvid", matches[2], nil
}
return "", "", fmt.Errorf("unexpected regex match result for URL: %s", websiteURL)
}
func fetchBilibiliWatchTime(websiteURL string) (int, error) { func fetchBilibiliWatchTime(websiteURL string) (int, error) {
requestBuilder := fetcher.NewRequestBuilder() requestBuilder := fetcher.NewRequestBuilder()
requestBuilder.WithTimeout(config.Opts.HTTPClientTimeout()) requestBuilder.WithTimeout(config.Opts.HTTPClientTimeout())
requestBuilder.WithProxy(config.Opts.HTTPClientProxy()) requestBuilder.WithProxy(config.Opts.HTTPClientProxy())
responseHandler := fetcher.NewResponseHandler(requestBuilder.ExecuteRequest(websiteURL)) idType, videoID, extractErr := extractBilibiliVideoID(websiteURL)
if extractErr != nil {
return 0, extractErr
}
bilibiliApiURL := fmt.Sprintf("https://api.bilibili.com/x/web-interface/view?%s=%s", idType, videoID)
responseHandler := fetcher.NewResponseHandler(requestBuilder.ExecuteRequest(bilibiliApiURL))
defer responseHandler.Close() defer responseHandler.Close()
if localizedError := responseHandler.LocalizedError(); localizedError != nil { if localizedError := responseHandler.LocalizedError(); localizedError != nil {
slog.Warn("Unable to fetch Bilibili page", slog.String("website_url", websiteURL), slog.Any("error", localizedError.Error())) slog.Warn("Unable to fetch Bilibili API",
slog.String("website_url", bilibiliApiURL),
slog.Any("error", localizedError.Error()))
return 0, localizedError.Error() return 0, localizedError.Error()
} }
doc, docErr := goquery.NewDocumentFromReader(responseHandler.Body(config.Opts.HTTPClientMaxBodySize())) var result map[string]interface{}
if docErr != nil { doc := json.NewDecoder(responseHandler.Body(config.Opts.HTTPClientMaxBodySize()))
return 0, docErr if docErr := doc.Decode(&result); docErr != nil {
return 0, fmt.Errorf("failed to decode API response: %v", docErr)
} }
timelengthMatches := timelengthRegex.FindStringSubmatch(doc.Text()) if code, ok := result["code"].(float64); !ok || code != 0 {
if len(timelengthMatches) < 2 { return 0, fmt.Errorf("API returned error code: %v", result["code"])
return 0, errors.New("duration has not found")
} }
durationMs, err := strconv.ParseInt(timelengthMatches[1], 10, 64) data, ok := result["data"].(map[string]interface{})
if err != nil { if !ok {
return 0, fmt.Errorf("unable to parse duration %s: %v", timelengthMatches[1], err) return 0, fmt.Errorf("data field not found or not an object")
} }
durationSec := durationMs / 1000 duration, ok := data["duration"].(float64)
durationMin := durationSec / 60 if !ok {
if durationSec%60 != 0 { return 0, fmt.Errorf("duration not found or not a number")
}
intDuration := int(duration)
durationMin := intDuration / 60
if intDuration%60 != 0 {
durationMin++ durationMin++
} }
return durationMin, nil
return int(durationMin), nil
} }
// parseISO8601 parses an ISO 8601 duration string. // parseISO8601 parses an ISO 8601 duration string.