diff --git a/internal/reader/readability/readability.go b/internal/reader/readability/readability.go index bdc85fe2..42560886 100644 --- a/internal/reader/readability/readability.go +++ b/internal/reader/readability/readability.go @@ -241,7 +241,6 @@ func getTopCandidate(document *goquery.Document, candidates candidateList) *cand // Loop through all paragraphs, and assign a score to them based on how content-y they look. // Then add their score to their parent node. // A score is determined by things like number of commas, class names, etc. -// Maybe eventually link density. func getCandidates(document *goquery.Document) candidateList { candidates := make(candidateList) @@ -324,31 +323,12 @@ func scoreNode(s *goquery.Selection) *candidate { // Get the density of links as a percentage of the content // This is the amount of text that is inside a link divided by the total text in the node. func getLinkDensity(s *goquery.Selection) float32 { - var getLengthOfTextContent func(*html.Node) int - getLengthOfTextContent = func(n *html.Node) int { - total := 0 - if n.Type == html.TextNode { - total += len(n.Data) - } - if n.FirstChild != nil { - for c := n.FirstChild; c != nil; c = c.NextSibling { - total += getLengthOfTextContent(c) - } - } - return total - } - - sum := 0 - for _, n := range s.Nodes { - sum += getLengthOfTextContent(n) - } - + sum := getSelectionLength(s) if sum == 0 { return 0 } - // TODO: use something better than materializing the HTML. - linkLength := len(s.Find("a").Text()) + linkLength := getSelectionLength(s.Find("a")) return float32(linkLength) / float32(sum) }