1
0
Fork 0
mirror of https://github.com/miniflux/v2.git synced 2025-08-06 17:41:00 +00:00

perf(readability): simplify removeUnlikelyCandidates

- Use an iterator instead of generating a whole slice when iterating on the selection.
- Using an iterator allows to use a for-loop construct, instead of a lambda,
  which is a bit clearer
- Do the filtering Find()'s selector, instead of in the loop, which doesn't
  matter much now that we're using an iterator, but it makes the code a bit
  more obvious/simpler, and likely reduces a bit the number of iterations.
This commit is contained in:
jvoisin 2025-07-10 17:21:16 +02:00 committed by Frédéric Guillot
parent 7912b9b8fb
commit 1de9cf4241

View file

@ -208,14 +208,18 @@ func shouldRemoveCandidate(str string) bool {
}
func removeUnlikelyCandidates(document *goquery.Document) {
document.Find("*").Each(func(i int, s *goquery.Selection) {
if s.Length() == 0 || s.Get(0).Data == "html" || s.Get(0).Data == "body" {
return
// Only select tags with either a class or an id attribute,
// and never the html nor body tags, as we don't want to ever remove them.
selector := "[class]:not(body,html)" + "," + "[id]:not(body,html)"
for _, s := range document.Find(selector).EachIter() {
if s.Length() == 0 {
continue
}
// Don't remove elements within code blocks (pre or code tags)
if s.Closest("pre, code").Length() > 0 {
return
if s.Closest("pre,code").Length() > 0 {
continue
}
if class, ok := s.Attr("class"); ok && shouldRemoveCandidate(class) {
@ -223,7 +227,7 @@ func removeUnlikelyCandidates(document *goquery.Document) {
} else if id, ok := s.Attr("id"); ok && shouldRemoveCandidate(id) {
s.Remove()
}
})
}
}
func getTopCandidate(document *goquery.Document, candidates candidateList) *candidate {