mirror of
https://github.com/miniflux/v2.git
synced 2025-06-27 16:36:00 +00:00
refactor(readability): simplify a bit getArticle
- Use a proper division instead of multiplying by a float. - Extract a condition in the parent scope - Use an else-if construct instead of a simple if
This commit is contained in:
parent
fcf86e33b9
commit
2d0af094b7
1 changed files with 8 additions and 11 deletions
|
@ -103,19 +103,19 @@ func ExtractContent(page io.Reader) (baseURL string, extractedContent string, er
|
|||
func getArticle(topCandidate *candidate, candidates candidateList) string {
|
||||
var output strings.Builder
|
||||
output.WriteString("<div>")
|
||||
siblingScoreThreshold := max(10, topCandidate.score*.2)
|
||||
siblingScoreThreshold := max(10, topCandidate.score/5)
|
||||
|
||||
topCandidate.selection.Siblings().Union(topCandidate.selection).Each(func(i int, s *goquery.Selection) {
|
||||
append := false
|
||||
tag := "div"
|
||||
node := s.Get(0)
|
||||
|
||||
if node == topCandidate.Node() {
|
||||
append = true
|
||||
} else if c, ok := candidates[node]; ok && c.score >= siblingScoreThreshold {
|
||||
append = true
|
||||
}
|
||||
|
||||
if s.Is("p") {
|
||||
} else if s.Is("p") {
|
||||
tag = node.Data
|
||||
linkDensity := getLinkDensity(s)
|
||||
content := s.Text()
|
||||
contentLength := len(content)
|
||||
|
@ -125,18 +125,15 @@ func getArticle(topCandidate *candidate, candidates candidateList) string {
|
|||
append = true
|
||||
}
|
||||
} else {
|
||||
if linkDensity == 0 && containsSentence(content) {
|
||||
if linkDensity == 0 {
|
||||
if containsSentence(content) {
|
||||
append = true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if append {
|
||||
tag := "div"
|
||||
if s.Is("p") {
|
||||
tag = node.Data
|
||||
}
|
||||
|
||||
html, _ := s.Html()
|
||||
output.WriteString("<" + tag + ">" + html + "</" + tag + ">")
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue