From 8efac6000e6de9b23657dd0bff73b73b1a70122b Mon Sep 17 00:00:00 2001 From: jvoisin Date: Thu, 12 Jun 2025 13:30:59 +0200 Subject: [PATCH] perf(readability): minor regex improvement - Improve the check for tags by matching only if its name is followed either by a space, a slash or a closing angle - Use an anonymous group --- internal/reader/readability/readability.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/reader/readability/readability.go b/internal/reader/readability/readability.go index 46771eeb..f3d2e976 100644 --- a/internal/reader/readability/readability.go +++ b/internal/reader/readability/readability.go @@ -21,7 +21,7 @@ const ( ) var ( - divToPElementsRegexp = regexp.MustCompile(`(?i)<(a|blockquote|dl|div|img|ol|p|pre|table|ul)`) + divToPElementsRegexp = regexp.MustCompile(`(?i)<(?:a|blockquote|dl|div|img|ol|p|pre|table|ul)[ />]`) okMaybeItsACandidateRegexp = regexp.MustCompile(`and|article|body|column|main|shadow`) unlikelyCandidatesRegexp = regexp.MustCompile(`banner|breadcrumbs|combx|comment|community|cover-wrap|disqus|extra|foot|header|legends|menu|modal|related|remark|replies|rss|shoutbox|sidebar|skyscraper|social|sponsor|supplemental|ad-break|agegate|pagination|pager|popup|yom-remote`)