mirror of
https://github.com/miniflux/v2.git
synced 2025-08-06 17:41:00 +00:00
test(readability): increase test coverage
This commit is contained in:
parent
99c5bcdb01
commit
6eeccae7cd
2 changed files with 1209 additions and 46 deletions
|
@ -27,8 +27,8 @@ var (
|
||||||
maybeCandidate = [...]string{"and", "article", "body", "column", "main", "shadow"}
|
maybeCandidate = [...]string{"and", "article", "body", "column", "main", "shadow"}
|
||||||
unlikelyCandidate = [...]string{"banner", "breadcrumbs", "combx", "comment", "community", "cover-wrap", "disqus", "extra", "foot", "header", "legends", "menu", "modal", "related", "remark", "replies", "rss", "shoutbox", "sidebar", "skyscraper", "social", "sponsor", "supplemental", "ad-break", "agegate", "pagination", "pager", "popup", "yom-remote"}
|
unlikelyCandidate = [...]string{"banner", "breadcrumbs", "combx", "comment", "community", "cover-wrap", "disqus", "extra", "foot", "header", "legends", "menu", "modal", "related", "remark", "replies", "rss", "shoutbox", "sidebar", "skyscraper", "social", "sponsor", "supplemental", "ad-break", "agegate", "pagination", "pager", "popup", "yom-remote"}
|
||||||
|
|
||||||
positive = [...]string{"article", "blog", "body", "content", "entry", "h-entry", "hentry", "main", "page", "pagination", "post", "story", "text"}
|
positiveKeywords = [...]string{"article", "blog", "body", "content", "entry", "h-entry", "hentry", "main", "page", "pagination", "post", "story", "text"}
|
||||||
negative = [...]string{"author", "banner", "byline", "com-", "combx", "comment", "contact", "dateline", "foot", "hid", "masthead", "media", "meta", "modal", "outbrain", "promo", "related", "scroll", "share", "shopping", "shoutbox", "sidebar", "skyscraper", "sponsor", "tags", "tool", "widget", "writtenby"}
|
negativeKeywords = [...]string{"author", "banner", "byline", "com-", "combx", "comment", "contact", "dateline", "foot", "hid", "masthead", "media", "meta", "modal", "outbrain", "promo", "related", "scroll", "share", "shopping", "shoutbox", "sidebar", "skyscraper", "sponsor", "tags", "tool", "widget", "writtenby"}
|
||||||
)
|
)
|
||||||
|
|
||||||
type candidate struct {
|
type candidate struct {
|
||||||
|
@ -37,23 +37,31 @@ type candidate struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *candidate) Node() *html.Node {
|
func (c *candidate) Node() *html.Node {
|
||||||
|
if c.selection.Length() == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
return c.selection.Get(0)
|
return c.selection.Get(0)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *candidate) String() string {
|
func (c *candidate) String() string {
|
||||||
|
node := c.Node()
|
||||||
|
if node == nil {
|
||||||
|
return fmt.Sprintf("empty => %f", c.score)
|
||||||
|
}
|
||||||
|
|
||||||
id, _ := c.selection.Attr("id")
|
id, _ := c.selection.Attr("id")
|
||||||
class, _ := c.selection.Attr("class")
|
class, _ := c.selection.Attr("class")
|
||||||
|
|
||||||
switch {
|
switch {
|
||||||
case id != "" && class != "":
|
case id != "" && class != "":
|
||||||
return fmt.Sprintf("%s#%s.%s => %f", c.Node().DataAtom, id, class, c.score)
|
return fmt.Sprintf("%s#%s.%s => %f", node.DataAtom, id, class, c.score)
|
||||||
case id != "":
|
case id != "":
|
||||||
return fmt.Sprintf("%s#%s => %f", c.Node().DataAtom, id, c.score)
|
return fmt.Sprintf("%s#%s => %f", node.DataAtom, id, c.score)
|
||||||
case class != "":
|
case class != "":
|
||||||
return fmt.Sprintf("%s.%s => %f", c.Node().DataAtom, class, c.score)
|
return fmt.Sprintf("%s.%s => %f", node.DataAtom, class, c.score)
|
||||||
}
|
}
|
||||||
|
|
||||||
return fmt.Sprintf("%s => %f", c.Node().DataAtom, c.score)
|
return fmt.Sprintf("%s => %f", node.DataAtom, c.score)
|
||||||
}
|
}
|
||||||
|
|
||||||
type candidateList map[*html.Node]*candidate
|
type candidateList map[*html.Node]*candidate
|
||||||
|
@ -111,7 +119,8 @@ func getArticle(topCandidate *candidate, candidates candidateList) string {
|
||||||
tag := "div"
|
tag := "div"
|
||||||
node := s.Get(0)
|
node := s.Get(0)
|
||||||
|
|
||||||
if node == topCandidate.Node() {
|
topNode := topCandidate.Node()
|
||||||
|
if topNode != nil && node == topNode {
|
||||||
append = true
|
append = true
|
||||||
} else if c, ok := candidates[node]; ok && c.score >= siblingScoreThreshold {
|
} else if c, ok := candidates[node]; ok && c.score >= siblingScoreThreshold {
|
||||||
append = true
|
append = true
|
||||||
|
@ -147,14 +156,14 @@ func shouldRemoveCandidate(str string) bool {
|
||||||
str = strings.ToLower(str)
|
str = strings.ToLower(str)
|
||||||
|
|
||||||
// Those candidates have no false-positives, no need to check against `maybeCandidate`
|
// Those candidates have no false-positives, no need to check against `maybeCandidate`
|
||||||
for _, strong := range strongCandidates {
|
for _, strongCandidate := range strongCandidates {
|
||||||
if strings.Contains(str, strong) {
|
if strings.Contains(str, strongCandidate) {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, unlikely := range unlikelyCandidate {
|
for _, unlikelyCandidate := range unlikelyCandidate {
|
||||||
if strings.Contains(str, unlikely) {
|
if strings.Contains(str, unlikelyCandidate) {
|
||||||
// Do we have a false positive?
|
// Do we have a false positive?
|
||||||
for _, maybe := range maybeCandidate {
|
for _, maybe := range maybeCandidate {
|
||||||
if strings.Contains(str, maybe) {
|
if strings.Contains(str, maybe) {
|
||||||
|
@ -268,6 +277,11 @@ func getCandidates(document *goquery.Document) candidateList {
|
||||||
func scoreNode(s *goquery.Selection) *candidate {
|
func scoreNode(s *goquery.Selection) *candidate {
|
||||||
c := &candidate{selection: s, score: 0}
|
c := &candidate{selection: s, score: 0}
|
||||||
|
|
||||||
|
// Check if selection is empty to avoid panic
|
||||||
|
if s.Length() == 0 {
|
||||||
|
return c
|
||||||
|
}
|
||||||
|
|
||||||
switch s.Get(0).DataAtom.String() {
|
switch s.Get(0).DataAtom.String() {
|
||||||
case "div":
|
case "div":
|
||||||
c.score += 5
|
c.score += 5
|
||||||
|
@ -314,13 +328,13 @@ func getClassWeight(s *goquery.Selection) float32 {
|
||||||
|
|
||||||
func getWeight(s string) int {
|
func getWeight(s string) int {
|
||||||
s = strings.ToLower(s)
|
s = strings.ToLower(s)
|
||||||
for _, pos := range negative {
|
for _, keyword := range negativeKeywords {
|
||||||
if strings.Contains(s, pos) {
|
if strings.Contains(s, keyword) {
|
||||||
return -25
|
return -25
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for _, pos := range positive {
|
for _, keyword := range positiveKeywords {
|
||||||
if strings.Contains(s, pos) {
|
if strings.Contains(s, keyword) {
|
||||||
return +25
|
return +25
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue