mirror of
https://github.com/miniflux/v2.git
synced 2025-08-06 17:41:00 +00:00
refactor(readability): make use of getSelectionLength
This commit is contained in:
parent
cb617ff6e0
commit
766d4ab834
1 changed files with 2 additions and 22 deletions
|
@ -241,7 +241,6 @@ func getTopCandidate(document *goquery.Document, candidates candidateList) *cand
|
||||||
// Loop through all paragraphs, and assign a score to them based on how content-y they look.
|
// Loop through all paragraphs, and assign a score to them based on how content-y they look.
|
||||||
// Then add their score to their parent node.
|
// Then add their score to their parent node.
|
||||||
// A score is determined by things like number of commas, class names, etc.
|
// A score is determined by things like number of commas, class names, etc.
|
||||||
// Maybe eventually link density.
|
|
||||||
func getCandidates(document *goquery.Document) candidateList {
|
func getCandidates(document *goquery.Document) candidateList {
|
||||||
candidates := make(candidateList)
|
candidates := make(candidateList)
|
||||||
|
|
||||||
|
@ -324,31 +323,12 @@ func scoreNode(s *goquery.Selection) *candidate {
|
||||||
// Get the density of links as a percentage of the content
|
// Get the density of links as a percentage of the content
|
||||||
// This is the amount of text that is inside a link divided by the total text in the node.
|
// This is the amount of text that is inside a link divided by the total text in the node.
|
||||||
func getLinkDensity(s *goquery.Selection) float32 {
|
func getLinkDensity(s *goquery.Selection) float32 {
|
||||||
var getLengthOfTextContent func(*html.Node) int
|
sum := getSelectionLength(s)
|
||||||
getLengthOfTextContent = func(n *html.Node) int {
|
|
||||||
total := 0
|
|
||||||
if n.Type == html.TextNode {
|
|
||||||
total += len(n.Data)
|
|
||||||
}
|
|
||||||
if n.FirstChild != nil {
|
|
||||||
for c := n.FirstChild; c != nil; c = c.NextSibling {
|
|
||||||
total += getLengthOfTextContent(c)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return total
|
|
||||||
}
|
|
||||||
|
|
||||||
sum := 0
|
|
||||||
for _, n := range s.Nodes {
|
|
||||||
sum += getLengthOfTextContent(n)
|
|
||||||
}
|
|
||||||
|
|
||||||
if sum == 0 {
|
if sum == 0 {
|
||||||
return 0
|
return 0
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: use something better than materializing the HTML.
|
linkLength := getSelectionLength(s.Find("a"))
|
||||||
linkLength := len(s.Find("a").Text())
|
|
||||||
|
|
||||||
return float32(linkLength) / float32(sum)
|
return float32(linkLength) / float32(sum)
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue