mirror of
https://github.com/miniflux/v2.git
synced 2025-06-27 16:36:00 +00:00
Replace github.com/rylans/getlang with github.com/abadojack/whatlanggo
github.com/rylans/getlang doesn't seems to be updated anymore
This commit is contained in:
parent
09e9b0361d
commit
7b541af253
5 changed files with 103 additions and 27 deletions
|
@ -7,25 +7,22 @@ import (
|
|||
"errors"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"math"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
"unicode/utf8"
|
||||
|
||||
"miniflux.app/v2/internal/config"
|
||||
"miniflux.app/v2/internal/http/client"
|
||||
"miniflux.app/v2/internal/metric"
|
||||
"miniflux.app/v2/internal/model"
|
||||
"miniflux.app/v2/internal/reader/browser"
|
||||
"miniflux.app/v2/internal/reader/readingtime"
|
||||
"miniflux.app/v2/internal/reader/rewrite"
|
||||
"miniflux.app/v2/internal/reader/sanitizer"
|
||||
"miniflux.app/v2/internal/reader/scraper"
|
||||
"miniflux.app/v2/internal/storage"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
"github.com/rylans/getlang"
|
||||
)
|
||||
|
||||
var (
|
||||
|
@ -174,7 +171,7 @@ func ProcessEntryWebPage(feed *model.Feed, entry *model.Entry, user *model.User)
|
|||
|
||||
if content != "" {
|
||||
entry.Content = content
|
||||
entry.ReadingTime = calculateReadingTime(content, user)
|
||||
entry.ReadingTime = readingtime.EstimateReadingTime(entry.Content, user.DefaultReadingSpeed, user.CJKReadingSpeed)
|
||||
}
|
||||
|
||||
rewrite.Rewriter(url, entry, entry.Feed.RewriteRules)
|
||||
|
@ -252,7 +249,7 @@ func updateEntryReadingTime(store *storage.Storage, feed *model.Feed, entry *mod
|
|||
}
|
||||
// Handle YT error case and non-YT entries.
|
||||
if entry.ReadingTime == 0 {
|
||||
entry.ReadingTime = calculateReadingTime(entry.Content, user)
|
||||
entry.ReadingTime = readingtime.EstimateReadingTime(entry.Content, user.DefaultReadingSpeed, user.CJKReadingSpeed)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -360,18 +357,3 @@ func parseISO8601(from string) (time.Duration, error) {
|
|||
|
||||
return d, nil
|
||||
}
|
||||
|
||||
func calculateReadingTime(content string, user *model.User) int {
|
||||
sanitizedContent := sanitizer.StripTags(content)
|
||||
languageInfo := getlang.FromString(sanitizedContent)
|
||||
|
||||
var timeToReadInt int
|
||||
if languageInfo.LanguageCode() == "ko" || languageInfo.LanguageCode() == "zh" || languageInfo.LanguageCode() == "jp" {
|
||||
timeToReadInt = int(math.Ceil(float64(utf8.RuneCountInString(sanitizedContent)) / float64(user.CJKReadingSpeed)))
|
||||
} else {
|
||||
nbOfWords := len(strings.Fields(sanitizedContent))
|
||||
timeToReadInt = int(math.Ceil(float64(nbOfWords) / float64(user.DefaultReadingSpeed)))
|
||||
}
|
||||
|
||||
return timeToReadInt
|
||||
}
|
||||
|
|
31
internal/reader/readingtime/readingtime.go
Normal file
31
internal/reader/readingtime/readingtime.go
Normal file
|
@ -0,0 +1,31 @@
|
|||
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
// Package readtime provides a function to estimate the reading time of an article.
|
||||
package readingtime
|
||||
|
||||
import (
|
||||
"math"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
|
||||
"miniflux.app/v2/internal/reader/sanitizer"
|
||||
|
||||
"github.com/abadojack/whatlanggo"
|
||||
)
|
||||
|
||||
// EstimateReadingTime returns the estimated reading time of an article in minute.
|
||||
func EstimateReadingTime(content string, defaultReadingSpeed, cjkReadingSpeed int) int {
|
||||
sanitizedContent := sanitizer.StripTags(content)
|
||||
langInfo := whatlanggo.Detect(sanitizedContent)
|
||||
|
||||
var timeToReadInt int
|
||||
if langInfo.IsReliable() && (langInfo.Lang == whatlanggo.Jpn || langInfo.Lang == whatlanggo.Cmn || langInfo.Lang == whatlanggo.Kor) {
|
||||
timeToReadInt = int(math.Ceil(float64(utf8.RuneCountInString(sanitizedContent)) / float64(cjkReadingSpeed)))
|
||||
} else {
|
||||
nbOfWords := len(strings.Fields(sanitizedContent))
|
||||
timeToReadInt = int(math.Ceil(float64(nbOfWords) / float64(defaultReadingSpeed)))
|
||||
}
|
||||
|
||||
return timeToReadInt
|
||||
}
|
61
internal/reader/readingtime/readingtime_test.go
Normal file
61
internal/reader/readingtime/readingtime_test.go
Normal file
|
@ -0,0 +1,61 @@
|
|||
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
package readingtime
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestEstimateReadingTimeInEnglish(t *testing.T) {
|
||||
sampleText := `
|
||||
In turpis lacus, sollicitudin non accumsan sed, suscipit eget magna. Morbi id
|
||||
neque enim. Aenean ac lacus consectetur, accumsan elit ac, suscipit dui. Donec
|
||||
congue mi et nisl bibendum, venenatis fringilla orci tristique. Nullam ullamcorper
|
||||
cursus justo, ac iaculis ante euismod a. Fusce dapibus lacus arcu, consectetur
|
||||
porttitor odio finibus ac. Integer dictum faucibus egestas. Etiam magna diam, placerat
|
||||
sed velit vitae, lobortis accumsan nisi. Sed viverra dui in odio commodo dapibus.
|
||||
Sed pulvinar metus finibus, hendrerit diam eu, faucibus lectus. Mauris est tellus,
|
||||
convallis et velit sit amet, convallis sagittis nunc. Quisque at ex leo. Donec eget leo
|
||||
vel nibh porta molestie. Aenean pellentesque purus non laoreet aliquam.
|
||||
|
||||
In feugiat eget arcu nec sodales. Nunc rutrum felis in tellus venenatis, sit
|
||||
amet tincidunt augue varius. Nunc nec dignissim quam. In euismod gravida rhoncus.
|
||||
Vivamus eget nibh sed diam malesuada facilisis. Donec ac convallis elit. Fusce
|
||||
fermentum tincidunt est. Nunc viverra, eros in gravida convallis, ex augue vehicula
|
||||
magna, sed tincidunt metus sem et mauris. In pretium purus odio, a auctor tellus
|
||||
ornare vel. Donec ac dolor pulvinar, placerat elit eget, ultrices nisi. Donec
|
||||
tincidunt magna eget pretium sodales. In urna lorem, consectetur in fringilla eget,
|
||||
rutrum et erat. Proin fringilla, lectus eget commodo consequat, est massa lacinia
|
||||
lorem, ut ultricies nunc erat id sapien.
|
||||
|
||||
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce fermentum id
|
||||
sem sed commodo. Ut eget mauris eu lectus mollis aliquam. Fusce convallis, quam
|
||||
vel volutpat aliquet, nunc sem rhoncus magna, a iaculis enim ex nec neque.
|
||||
Suspendisse vel imperdiet leo. Quisque ultrices semper commodo. Pellentesque nec libero et
|
||||
mauris gravida porta vitae id nunc. Fusce sed sem sed augue gravida ultricies at nec
|
||||
turpis. Sed semper eu urna sit amet malesuada. Suspendisse blandit condimentum elit,
|
||||
in scelerisque tellus convallis eu. Nunc eleifend sem et mauris vestibulum
|
||||
mattis. Praesent ultricies pellentesque eros non posuere.
|
||||
`
|
||||
|
||||
readingTime := EstimateReadingTime(sampleText, 200, 500)
|
||||
if readingTime != 2 {
|
||||
t.Errorf(`Wrong reading time, got %d instead of 2`, readingTime)
|
||||
}
|
||||
}
|
||||
|
||||
func TestEstimateReadingTimeInChinese(t *testing.T) {
|
||||
sampleText := `
|
||||
労問委格名町違載式新青脂通由。割止書円画民京般著治登門画拡下。有国同観教田美森素説砂者徴多。上治速相支存色分繰年活元事集遣逆山。身消年森発世財間世変悲原記潟旅好手真今。現通浪口特愛始信川節身方一表著購。郁不使権草定内防並要更一条露加。載交源図訴際属年券重供健三洗。事北残却女鮎朝分要廷込宣政愛無投事。
|
||||
|
||||
問警技亮参沼洗請米物模人。誰探重午局新戦報投性病庭。典向載問千著書故表視新権最石車音端乏大。白僚三掲局係仕表広無旧見要最裁。額寄済生年余講前本次載隊劇。権成観始応泉早高拓了経地本稼室目犯井出。暮載必広傷内校岡公南散広転行別釈。康運行関本掲隠泉傷退報告。独変年換差取予口男旅挑講禁姿。出芳工類胸管払時済潟髪内豊。
|
||||
|
||||
康浴部問玲玉追球化就店岡問画路投。施先太業阪能敏所陸不供探掲方用。手右演社援発示竹育対橋除際愛功旬転好使公。利時改本項輸属嘆員複携者地剤。天政朝戸祝言月接住世黙極者議編連。囲淑覧重弾必治物健賄開頂外称豊開名銀戸院。政稿調励廃演手生告題営味董演何南峰貨。学横公得行提大品回猿齢利込家前役把煎。天代者内身慢作業署間地日。
|
||||
|
||||
中個興本広坂態掲神中能等無滞長対。号処月画界意気様党目購栃欠歌暮。一耳供意盛四俊健必財下画例本判著堺要北王。宮大攻人水一備治首闘振円分建前趣校。目少供午見掲岡安画入情薦続土世始。診読格七久改急目斉実配正。性止月模多様更社発掲雪奇芸量全兵経負。予転済反問止下生買再無旅的。模治明以共会必華浅知館版領送。
|
||||
`
|
||||
|
||||
readingTime := EstimateReadingTime(sampleText, 200, 500)
|
||||
if readingTime != 2 {
|
||||
t.Errorf(`Wrong reading time, got %d instead of 2`, readingTime)
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue