diff --git a/internal/reader/readability/readability.go b/internal/reader/readability/readability.go
index 42560886..83d227aa 100644
--- a/internal/reader/readability/readability.go
+++ b/internal/reader/readability/readability.go
@@ -7,7 +7,6 @@ import (
"fmt"
"io"
"log/slog"
- "regexp"
"strings"
"miniflux.app/v2/internal/urllib"
@@ -16,13 +15,9 @@ import (
"golang.org/x/net/html"
)
-const (
- defaultTagsToScore = "section,h2,h3,h4,h5,h6,p,td,pre,div"
-)
+const defaultTagsToScore = "section,h2,h3,h4,h5,h6,p,td,pre,div"
var (
- divToPElementsRegexp = regexp.MustCompile(`(?i)<(?:a|blockquote|dl|div|img|ol|p|pre|table|ul)[ />]`)
-
strongCandidates = [...]string{"popupbody", "-ad", "g-plus"}
maybeCandidate = [...]string{"and", "article", "body", "column", "main", "shadow"}
unlikelyCandidate = [...]string{"banner", "breadcrumbs", "combx", "comment", "community", "cover-wrap", "disqus", "extra", "foot", "header", "legends", "menu", "modal", "related", "remark", "replies", "rss", "shoutbox", "sidebar", "skyscraper", "social", "sponsor", "supplemental", "ad-break", "agegate", "pagination", "pager", "popup", "yom-remote"}
diff --git a/internal/reader/readability/readability_test.go b/internal/reader/readability/readability_test.go
index 14e099cf..6c111f08 100644
--- a/internal/reader/readability/readability_test.go
+++ b/internal/reader/readability/readability_test.go
@@ -1880,152 +1880,6 @@ func TestTransformMisusedDivsIntoParagraphs(t *testing.T) {
}
}
-func TestTransformMisusedDivsIntoParagraphsRegexPattern(t *testing.T) {
- // Test the regex pattern directly to ensure it matches the expected elements
- testCases := []struct {
- name string
- html string
- shouldMatch bool
- description string
- }{
- {
- name: "anchor tag",
- html: `link`,
- shouldMatch: true,
- description: "should match anchor tags",
- },
- {
- name: "blockquote tag",
- html: `
quote
`,
- shouldMatch: true,
- description: "should match blockquote tags",
- },
- {
- name: "dl tag",
- html: `- term
`,
- shouldMatch: true,
- description: "should match dl tags",
- },
- {
- name: "div tag",
- html: `content
`,
- shouldMatch: true,
- description: "should match div tags",
- },
- {
- name: "img tag",
- html: `
`,
- shouldMatch: true,
- description: "should match img tags",
- },
- {
- name: "ol tag",
- html: `- item
`,
- shouldMatch: true,
- description: "should match ol tags",
- },
- {
- name: "p tag",
- html: `paragraph
`,
- shouldMatch: true,
- description: "should match p tags",
- },
- {
- name: "pre tag",
- html: `code
`,
- shouldMatch: true,
- description: "should match pre tags",
- },
- {
- name: "table tag",
- html: ``,
- shouldMatch: true,
- description: "should match table tags",
- },
- {
- name: "ul tag",
- html: ``,
- shouldMatch: true,
- description: "should match ul tags",
- },
- {
- name: "self-closing anchor",
- html: ``,
- shouldMatch: true,
- description: "should match self-closing anchor tags",
- },
- {
- name: "tag with attributes",
- html: `text`,
- shouldMatch: true,
- description: "should match tags with attributes",
- },
- {
- name: "uppercase tags",
- html: `link`,
- shouldMatch: true,
- description: "should be case insensitive",
- },
- {
- name: "mixed case tags",
- html: `
`,
- shouldMatch: true,
- description: "should match mixed case tags",
- },
- {
- name: "span tag",
- html: `text`,
- shouldMatch: false,
- description: "should NOT match span tags",
- },
- {
- name: "em tag",
- html: `emphasis`,
- shouldMatch: false,
- description: "should NOT match em tags",
- },
- {
- name: "strong tag",
- html: `bold`,
- shouldMatch: false,
- description: "should NOT match strong tags",
- },
- {
- name: "i tag",
- html: `italic`,
- shouldMatch: false,
- description: "should NOT match i tags",
- },
- {
- name: "b tag",
- html: `bold`,
- shouldMatch: false,
- description: "should NOT match b tags",
- },
- {
- name: "plain text",
- html: `just plain text`,
- shouldMatch: false,
- description: "should NOT match plain text",
- },
- {
- name: "empty string",
- html: ``,
- shouldMatch: false,
- description: "should NOT match empty string",
- },
- }
-
- for _, tc := range testCases {
- t.Run(tc.name, func(t *testing.T) {
- result := divToPElementsRegexp.MatchString(tc.html)
- if result != tc.shouldMatch {
- t.Errorf("%s\nHTML: %s\nExpected match: %v, Got: %v", tc.description, tc.html, tc.shouldMatch, result)
- }
- })
- }
-}
-
func TestTransformMisusedDivsIntoParagraphsEdgeCases(t *testing.T) {
t.Run("document with no divs", func(t *testing.T) {
html := `No divs here
Just other elements`