mirror of
https://github.com/miniflux/v2.git
synced 2025-08-06 17:41:00 +00:00
perf(readability): significantly improve transformMisusedDivsIntoParagraphs
This commit is contained in:
parent
2f7b2e7375
commit
89c32d518d
1 changed files with 16 additions and 2 deletions
|
@ -361,10 +361,24 @@ func getWeight(s string) int {
|
||||||
|
|
||||||
func transformMisusedDivsIntoParagraphs(document *goquery.Document) {
|
func transformMisusedDivsIntoParagraphs(document *goquery.Document) {
|
||||||
document.Find("div").Each(func(i int, s *goquery.Selection) {
|
document.Find("div").Each(func(i int, s *goquery.Selection) {
|
||||||
html, _ := s.Html()
|
nodes := s.Children().Nodes
|
||||||
if !divToPElementsRegexp.MatchString(html) {
|
|
||||||
|
if len(nodes) == 0 {
|
||||||
node := s.Get(0)
|
node := s.Get(0)
|
||||||
node.Data = "p"
|
node.Data = "p"
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, node := range nodes {
|
||||||
|
switch node.Data {
|
||||||
|
case "a", "blockquote", "div", "dl",
|
||||||
|
"img", "ol", "p", "pre",
|
||||||
|
"table", "ul":
|
||||||
|
return
|
||||||
|
default:
|
||||||
|
node := s.Get(0)
|
||||||
|
node.Data = "p"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue