mirror of
https://github.com/miniflux/v2.git
synced 2025-08-11 17:51:01 +00:00
feat(readability): avoid removing elements with content
class
This commit is contained in:
parent
54abd0a736
commit
66b269e6cd
1 changed files with 9 additions and 9 deletions
|
@ -18,9 +18,9 @@ import (
|
||||||
const defaultTagsToScore = "section,h2,h3,h4,h5,h6,p,td,pre,div"
|
const defaultTagsToScore = "section,h2,h3,h4,h5,h6,p,td,pre,div"
|
||||||
|
|
||||||
var (
|
var (
|
||||||
strongCandidates = [...]string{"popupbody", "-ad", "g-plus"}
|
strongCandidatesToRemove = [...]string{"popupbody", "-ad", "g-plus"}
|
||||||
maybeCandidate = [...]string{"and", "article", "body", "column", "main", "shadow"}
|
maybeCandidateToRemove = [...]string{"and", "article", "body", "column", "main", "shadow", "content"}
|
||||||
unlikelyCandidate = [...]string{"banner", "breadcrumbs", "combx", "comment", "community", "cover-wrap", "disqus", "extra", "foot", "header", "legends", "menu", "modal", "related", "remark", "replies", "rss", "shoutbox", "sidebar", "skyscraper", "social", "sponsor", "supplemental", "ad-break", "agegate", "pagination", "pager", "popup", "yom-remote"}
|
unlikelyCandidateToRemove = [...]string{"banner", "breadcrumbs", "combx", "comment", "community", "cover-wrap", "disqus", "extra", "foot", "header", "legends", "menu", "modal", "related", "remark", "replies", "rss", "shoutbox", "sidebar", "skyscraper", "social", "sponsor", "supplemental", "ad-break", "agegate", "pagination", "pager", "popup", "yom-remote"}
|
||||||
|
|
||||||
positiveKeywords = [...]string{"article", "blog", "body", "content", "entry", "h-entry", "hentry", "main", "page", "pagination", "post", "story", "text"}
|
positiveKeywords = [...]string{"article", "blog", "body", "content", "entry", "h-entry", "hentry", "main", "page", "pagination", "post", "story", "text"}
|
||||||
negativeKeywords = [...]string{"author", "banner", "byline", "com-", "combx", "comment", "contact", "dateline", "foot", "hid", "masthead", "media", "meta", "modal", "outbrain", "promo", "related", "scroll", "share", "shopping", "shoutbox", "sidebar", "skyscraper", "sponsor", "tags", "tool", "widget", "writtenby"}
|
negativeKeywords = [...]string{"author", "banner", "byline", "com-", "combx", "comment", "contact", "dateline", "foot", "hid", "masthead", "media", "meta", "modal", "outbrain", "promo", "related", "scroll", "share", "shopping", "shoutbox", "sidebar", "skyscraper", "sponsor", "tags", "tool", "widget", "writtenby"}
|
||||||
|
@ -185,17 +185,17 @@ func shouldRemoveCandidate(str string) bool {
|
||||||
str = strings.ToLower(str)
|
str = strings.ToLower(str)
|
||||||
|
|
||||||
// Those candidates have no false-positives, no need to check against `maybeCandidate`
|
// Those candidates have no false-positives, no need to check against `maybeCandidate`
|
||||||
for _, strongCandidate := range strongCandidates {
|
for _, strongCandidateToRemove := range strongCandidatesToRemove {
|
||||||
if strings.Contains(str, strongCandidate) {
|
if strings.Contains(str, strongCandidateToRemove) {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, unlikelyCandidate := range unlikelyCandidate {
|
for _, unlikelyCandidateToRemove := range unlikelyCandidateToRemove {
|
||||||
if strings.Contains(str, unlikelyCandidate) {
|
if strings.Contains(str, unlikelyCandidateToRemove) {
|
||||||
// Do we have a false positive?
|
// Do we have a false positive?
|
||||||
for _, maybe := range maybeCandidate {
|
for _, maybeCandidateToRemove := range maybeCandidateToRemove {
|
||||||
if strings.Contains(str, maybe) {
|
if strings.Contains(str, maybeCandidateToRemove) {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue