From 2d0af094b7d6a85f3ad0efe2ad1f8825ced9541b Mon Sep 17 00:00:00 2001 From: jvoisin Date: Fri, 27 Jun 2025 16:25:23 +0200 Subject: [PATCH] refactor(readability): simplify a bit getArticle - Use a proper division instead of multiplying by a float. - Extract a condition in the parent scope - Use an else-if construct instead of a simple if --- internal/reader/readability/readability.go | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/internal/reader/readability/readability.go b/internal/reader/readability/readability.go index bad0424c..d8d2b016 100644 --- a/internal/reader/readability/readability.go +++ b/internal/reader/readability/readability.go @@ -103,19 +103,19 @@ func ExtractContent(page io.Reader) (baseURL string, extractedContent string, er func getArticle(topCandidate *candidate, candidates candidateList) string { var output strings.Builder output.WriteString("
") - siblingScoreThreshold := max(10, topCandidate.score*.2) + siblingScoreThreshold := max(10, topCandidate.score/5) topCandidate.selection.Siblings().Union(topCandidate.selection).Each(func(i int, s *goquery.Selection) { append := false + tag := "div" node := s.Get(0) if node == topCandidate.Node() { append = true } else if c, ok := candidates[node]; ok && c.score >= siblingScoreThreshold { append = true - } - - if s.Is("p") { + } else if s.Is("p") { + tag = node.Data linkDensity := getLinkDensity(s) content := s.Text() contentLength := len(content) @@ -125,18 +125,15 @@ func getArticle(topCandidate *candidate, candidates candidateList) string { append = true } } else { - if linkDensity == 0 && containsSentence(content) { - append = true + if linkDensity == 0 { + if containsSentence(content) { + append = true + } } } } if append { - tag := "div" - if s.Is("p") { - tag = node.Data - } - html, _ := s.Html() output.WriteString("<" + tag + ">" + html + "") }