mirror of
https://github.com/miniflux/v2.git
synced 2025-06-27 16:36:00 +00:00
feat: add FETCH_BILIBILI_WATCH_TIME
config option
This commit is contained in:
parent
569529d73b
commit
bcbf9f4025
5 changed files with 103 additions and 0 deletions
|
@ -2044,6 +2044,24 @@ func TestAuthProxyUserCreationAdmin(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestFetchBilibiliWatchTime(t *testing.T) {
|
||||||
|
os.Clearenv()
|
||||||
|
os.Setenv("FETCH_BILIBILI_WATCH_TIME", "1")
|
||||||
|
|
||||||
|
parser := NewParser()
|
||||||
|
opts, err := parser.ParseEnvironmentVariables()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf(`Parsing failure: %v`, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
expected := true
|
||||||
|
result := opts.FetchBilibiliWatchTime()
|
||||||
|
|
||||||
|
if result != expected {
|
||||||
|
t.Fatalf(`Unexpected FETCH_BILIBILI_WATCH_TIME value, got %v instead of %v`, result, expected)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestFetchNebulaWatchTime(t *testing.T) {
|
func TestFetchNebulaWatchTime(t *testing.T) {
|
||||||
os.Clearenv()
|
os.Clearenv()
|
||||||
os.Setenv("FETCH_NEBULA_WATCH_TIME", "1")
|
os.Setenv("FETCH_NEBULA_WATCH_TIME", "1")
|
||||||
|
|
|
@ -56,6 +56,7 @@ const (
|
||||||
defaultMediaResourceTypes = "image"
|
defaultMediaResourceTypes = "image"
|
||||||
defaultMediaProxyURL = ""
|
defaultMediaProxyURL = ""
|
||||||
defaultFilterEntryMaxAgeDays = 0
|
defaultFilterEntryMaxAgeDays = 0
|
||||||
|
defaultFetchBilibiliWatchTime = false
|
||||||
defaultFetchNebulaWatchTime = false
|
defaultFetchNebulaWatchTime = false
|
||||||
defaultFetchOdyseeWatchTime = false
|
defaultFetchOdyseeWatchTime = false
|
||||||
defaultFetchYouTubeWatchTime = false
|
defaultFetchYouTubeWatchTime = false
|
||||||
|
@ -141,6 +142,7 @@ type Options struct {
|
||||||
mediaProxyMode string
|
mediaProxyMode string
|
||||||
mediaProxyResourceTypes []string
|
mediaProxyResourceTypes []string
|
||||||
mediaProxyCustomURL string
|
mediaProxyCustomURL string
|
||||||
|
fetchBilibiliWatchTime bool
|
||||||
fetchNebulaWatchTime bool
|
fetchNebulaWatchTime bool
|
||||||
fetchOdyseeWatchTime bool
|
fetchOdyseeWatchTime bool
|
||||||
fetchYouTubeWatchTime bool
|
fetchYouTubeWatchTime bool
|
||||||
|
@ -218,6 +220,7 @@ func NewOptions() *Options {
|
||||||
mediaProxyResourceTypes: []string{defaultMediaResourceTypes},
|
mediaProxyResourceTypes: []string{defaultMediaResourceTypes},
|
||||||
mediaProxyCustomURL: defaultMediaProxyURL,
|
mediaProxyCustomURL: defaultMediaProxyURL,
|
||||||
filterEntryMaxAgeDays: defaultFilterEntryMaxAgeDays,
|
filterEntryMaxAgeDays: defaultFilterEntryMaxAgeDays,
|
||||||
|
fetchBilibiliWatchTime: defaultFetchBilibiliWatchTime,
|
||||||
fetchNebulaWatchTime: defaultFetchNebulaWatchTime,
|
fetchNebulaWatchTime: defaultFetchNebulaWatchTime,
|
||||||
fetchOdyseeWatchTime: defaultFetchOdyseeWatchTime,
|
fetchOdyseeWatchTime: defaultFetchOdyseeWatchTime,
|
||||||
fetchYouTubeWatchTime: defaultFetchYouTubeWatchTime,
|
fetchYouTubeWatchTime: defaultFetchYouTubeWatchTime,
|
||||||
|
@ -501,6 +504,12 @@ func (o *Options) FetchOdyseeWatchTime() bool {
|
||||||
return o.fetchOdyseeWatchTime
|
return o.fetchOdyseeWatchTime
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// FetchBilibiliWatchTime returns true if the Bilibili video duration
|
||||||
|
// should be fetched and used as a reading time.
|
||||||
|
func (o *Options) FetchBilibiliWatchTime() bool {
|
||||||
|
return o.fetchBilibiliWatchTime
|
||||||
|
}
|
||||||
|
|
||||||
// MediaProxyMode returns "none" to never proxy, "http-only" to proxy non-HTTPS, "all" to always proxy.
|
// MediaProxyMode returns "none" to never proxy, "http-only" to proxy non-HTTPS, "all" to always proxy.
|
||||||
func (o *Options) MediaProxyMode() string {
|
func (o *Options) MediaProxyMode() string {
|
||||||
return o.mediaProxyMode
|
return o.mediaProxyMode
|
||||||
|
@ -658,6 +667,7 @@ func (o *Options) SortedOptions(redactSecret bool) []*Option {
|
||||||
"FETCH_YOUTUBE_WATCH_TIME": o.fetchYouTubeWatchTime,
|
"FETCH_YOUTUBE_WATCH_TIME": o.fetchYouTubeWatchTime,
|
||||||
"FETCH_NEBULA_WATCH_TIME": o.fetchNebulaWatchTime,
|
"FETCH_NEBULA_WATCH_TIME": o.fetchNebulaWatchTime,
|
||||||
"FETCH_ODYSEE_WATCH_TIME": o.fetchOdyseeWatchTime,
|
"FETCH_ODYSEE_WATCH_TIME": o.fetchOdyseeWatchTime,
|
||||||
|
"FETCH_BILIBILI_WATCH_TIME": o.fetchBilibiliWatchTime,
|
||||||
"HTTPS": o.HTTPS,
|
"HTTPS": o.HTTPS,
|
||||||
"HTTP_CLIENT_MAX_BODY_SIZE": o.httpClientMaxBodySize,
|
"HTTP_CLIENT_MAX_BODY_SIZE": o.httpClientMaxBodySize,
|
||||||
"HTTP_CLIENT_PROXY": o.httpClientProxy,
|
"HTTP_CLIENT_PROXY": o.httpClientProxy,
|
||||||
|
|
|
@ -259,6 +259,8 @@ func (p *Parser) parseLines(lines []string) (err error) {
|
||||||
p.opts.metricsPassword = parseString(value, defaultMetricsPassword)
|
p.opts.metricsPassword = parseString(value, defaultMetricsPassword)
|
||||||
case "METRICS_PASSWORD_FILE":
|
case "METRICS_PASSWORD_FILE":
|
||||||
p.opts.metricsPassword = readSecretFile(value, defaultMetricsPassword)
|
p.opts.metricsPassword = readSecretFile(value, defaultMetricsPassword)
|
||||||
|
case "FETCH_BILIBILI_WATCH_TIME":
|
||||||
|
p.opts.fetchBilibiliWatchTime = parseBool(value, defaultFetchBilibiliWatchTime)
|
||||||
case "FETCH_NEBULA_WATCH_TIME":
|
case "FETCH_NEBULA_WATCH_TIME":
|
||||||
p.opts.fetchNebulaWatchTime = parseBool(value, defaultFetchNebulaWatchTime)
|
p.opts.fetchNebulaWatchTime = parseBool(value, defaultFetchNebulaWatchTime)
|
||||||
case "FETCH_ODYSEE_WATCH_TIME":
|
case "FETCH_ODYSEE_WATCH_TIME":
|
||||||
|
|
|
@ -33,6 +33,8 @@ var (
|
||||||
youtubeRegex = regexp.MustCompile(`youtube\.com/watch\?v=(.*)$`)
|
youtubeRegex = regexp.MustCompile(`youtube\.com/watch\?v=(.*)$`)
|
||||||
nebulaRegex = regexp.MustCompile(`^https://nebula\.tv`)
|
nebulaRegex = regexp.MustCompile(`^https://nebula\.tv`)
|
||||||
odyseeRegex = regexp.MustCompile(`^https://odysee\.com`)
|
odyseeRegex = regexp.MustCompile(`^https://odysee\.com`)
|
||||||
|
bilibiliRegex = regexp.MustCompile(`bilibili\.com/video/(.*)$`)
|
||||||
|
timelengthRegex = regexp.MustCompile(`"timelength":\s*(\d+)`)
|
||||||
iso8601Regex = regexp.MustCompile(`^P((?P<year>\d+)Y)?((?P<month>\d+)M)?((?P<week>\d+)W)?((?P<day>\d+)D)?(T((?P<hour>\d+)H)?((?P<minute>\d+)M)?((?P<second>\d+)S)?)?$`)
|
iso8601Regex = regexp.MustCompile(`^P((?P<year>\d+)Y)?((?P<month>\d+)M)?((?P<week>\d+)W)?((?P<day>\d+)D)?(T((?P<hour>\d+)H)?((?P<minute>\d+)M)?((?P<second>\d+)S)?)?$`)
|
||||||
customReplaceRuleRegex = regexp.MustCompile(`rewrite\("(.*)"\|"(.*)"\)`)
|
customReplaceRuleRegex = regexp.MustCompile(`rewrite\("(.*)"\|"(.*)"\)`)
|
||||||
)
|
)
|
||||||
|
@ -418,6 +420,25 @@ func updateEntryReadingTime(store *storage.Storage, feed *model.Feed, entry *mod
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if shouldFetchBilibiliWatchTime(entry) {
|
||||||
|
if entryIsNew {
|
||||||
|
watchTime, err := fetchBilibiliWatchTime(entry.URL)
|
||||||
|
if err != nil {
|
||||||
|
slog.Warn("Unable to fetch Bilibili watch time",
|
||||||
|
slog.Int64("user_id", user.ID),
|
||||||
|
slog.Int64("entry_id", entry.ID),
|
||||||
|
slog.String("entry_url", entry.URL),
|
||||||
|
slog.Int64("feed_id", feed.ID),
|
||||||
|
slog.String("feed_url", feed.FeedURL),
|
||||||
|
slog.Any("error", err),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
entry.ReadingTime = watchTime
|
||||||
|
} else {
|
||||||
|
entry.ReadingTime = store.GetReadTime(feed.ID, entry.Hash)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Handle YT error case and non-YT entries.
|
// Handle YT error case and non-YT entries.
|
||||||
if entry.ReadingTime == 0 {
|
if entry.ReadingTime == 0 {
|
||||||
entry.ReadingTime = readingtime.EstimateReadingTime(entry.Content, user.DefaultReadingSpeed, user.CJKReadingSpeed)
|
entry.ReadingTime = readingtime.EstimateReadingTime(entry.Content, user.DefaultReadingSpeed, user.CJKReadingSpeed)
|
||||||
|
@ -449,6 +470,15 @@ func shouldFetchOdyseeWatchTime(entry *model.Entry) bool {
|
||||||
return matches != nil
|
return matches != nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func shouldFetchBilibiliWatchTime(entry *model.Entry) bool {
|
||||||
|
if !config.Opts.FetchBilibiliWatchTime() {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
matches := bilibiliRegex.FindStringSubmatch(entry.URL)
|
||||||
|
urlMatchesBilibiliPattern := len(matches) == 2
|
||||||
|
return urlMatchesBilibiliPattern
|
||||||
|
}
|
||||||
|
|
||||||
func fetchYouTubeWatchTime(websiteURL string) (int, error) {
|
func fetchYouTubeWatchTime(websiteURL string) (int, error) {
|
||||||
requestBuilder := fetcher.NewRequestBuilder()
|
requestBuilder := fetcher.NewRequestBuilder()
|
||||||
requestBuilder.WithTimeout(config.Opts.HTTPClientTimeout())
|
requestBuilder.WithTimeout(config.Opts.HTTPClientTimeout())
|
||||||
|
@ -544,6 +574,43 @@ func fetchOdyseeWatchTime(websiteURL string) (int, error) {
|
||||||
return int(dur / 60), nil
|
return int(dur / 60), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func fetchBilibiliWatchTime(websiteURL string) (int, error) {
|
||||||
|
requestBuilder := fetcher.NewRequestBuilder()
|
||||||
|
requestBuilder.WithTimeout(config.Opts.HTTPClientTimeout())
|
||||||
|
requestBuilder.WithProxy(config.Opts.HTTPClientProxy())
|
||||||
|
|
||||||
|
responseHandler := fetcher.NewResponseHandler(requestBuilder.ExecuteRequest(websiteURL))
|
||||||
|
defer responseHandler.Close()
|
||||||
|
|
||||||
|
if localizedError := responseHandler.LocalizedError(); localizedError != nil {
|
||||||
|
slog.Warn("Unable to fetch Bilibili page", slog.String("website_url", websiteURL), slog.Any("error", localizedError.Error()))
|
||||||
|
return 0, localizedError.Error()
|
||||||
|
}
|
||||||
|
|
||||||
|
doc, docErr := goquery.NewDocumentFromReader(responseHandler.Body(config.Opts.HTTPClientMaxBodySize()))
|
||||||
|
if docErr != nil {
|
||||||
|
return 0, docErr
|
||||||
|
}
|
||||||
|
|
||||||
|
timelengthMatches := timelengthRegex.FindStringSubmatch(doc.Text())
|
||||||
|
if len(timelengthMatches) < 2 {
|
||||||
|
return 0, errors.New("duration has not found")
|
||||||
|
}
|
||||||
|
|
||||||
|
durationMs, err := strconv.ParseInt(timelengthMatches[1], 10, 64)
|
||||||
|
if err != nil {
|
||||||
|
return 0, fmt.Errorf("unable to parse duration %s: %v", timelengthMatches[1], err)
|
||||||
|
}
|
||||||
|
|
||||||
|
durationSec := durationMs / 1000
|
||||||
|
durationMin := durationSec / 60
|
||||||
|
if durationSec%60 != 0 {
|
||||||
|
durationMin++
|
||||||
|
}
|
||||||
|
|
||||||
|
return int(durationMin), nil
|
||||||
|
}
|
||||||
|
|
||||||
// parseISO8601 parses an ISO 8601 duration string.
|
// parseISO8601 parses an ISO 8601 duration string.
|
||||||
func parseISO8601(from string) (time.Duration, error) {
|
func parseISO8601(from string) (time.Duration, error) {
|
||||||
var match []string
|
var match []string
|
||||||
|
|
|
@ -244,6 +244,12 @@ Set the value to 1 to disable the internal scheduler service\&.
|
||||||
.br
|
.br
|
||||||
Default is false (The internal scheduler service is enabled)\&.
|
Default is false (The internal scheduler service is enabled)\&.
|
||||||
.TP
|
.TP
|
||||||
|
.B FETCH_BILIBILI_WATCH_TIME
|
||||||
|
Set the value to 1 to scrape video duration from Bilibili website and
|
||||||
|
use it as a reading time\&.
|
||||||
|
.br
|
||||||
|
Disabled by default\&.
|
||||||
|
.TP
|
||||||
.B FETCH_NEBULA_WATCH_TIME
|
.B FETCH_NEBULA_WATCH_TIME
|
||||||
Set the value to 1 to scrape video duration from Nebula website and
|
Set the value to 1 to scrape video duration from Nebula website and
|
||||||
use it as a reading time\&.
|
use it as a reading time\&.
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue