1
0
Fork 0
mirror of https://github.com/miniflux/v2.git synced 2025-06-27 16:36:00 +00:00
This commit is contained in:
Julien Voisin 2025-06-27 16:26:52 +02:00 committed by GitHub
commit 87202e9267
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -103,19 +103,19 @@ func ExtractContent(page io.Reader) (baseURL string, extractedContent string, er
func getArticle(topCandidate *candidate, candidates candidateList) string { func getArticle(topCandidate *candidate, candidates candidateList) string {
var output strings.Builder var output strings.Builder
output.WriteString("<div>") output.WriteString("<div>")
siblingScoreThreshold := max(10, topCandidate.score*.2) siblingScoreThreshold := max(10, topCandidate.score/5)
topCandidate.selection.Siblings().Union(topCandidate.selection).Each(func(i int, s *goquery.Selection) { topCandidate.selection.Siblings().Union(topCandidate.selection).Each(func(i int, s *goquery.Selection) {
append := false append := false
tag := "div"
node := s.Get(0) node := s.Get(0)
if node == topCandidate.Node() { if node == topCandidate.Node() {
append = true append = true
} else if c, ok := candidates[node]; ok && c.score >= siblingScoreThreshold { } else if c, ok := candidates[node]; ok && c.score >= siblingScoreThreshold {
append = true append = true
} } else if s.Is("p") {
tag = node.Data
if s.Is("p") {
linkDensity := getLinkDensity(s) linkDensity := getLinkDensity(s)
content := s.Text() content := s.Text()
contentLength := len(content) contentLength := len(content)
@ -125,18 +125,15 @@ func getArticle(topCandidate *candidate, candidates candidateList) string {
append = true append = true
} }
} else { } else {
if linkDensity == 0 && containsSentence(content) { if linkDensity == 0 {
append = true if containsSentence(content) {
append = true
}
} }
} }
} }
if append { if append {
tag := "div"
if s.Is("p") {
tag = node.Data
}
html, _ := s.Html() html, _ := s.Html()
output.WriteString("<" + tag + ">" + html + "</" + tag + ">") output.WriteString("<" + tag + ">" + html + "</" + tag + ">")
} }