From 61d3210ccaeed73b1357e48bbd5abb4d4b2fd621 Mon Sep 17 00:00:00 2001 From: jvoisin Date: Tue, 17 Jun 2025 16:19:12 +0200 Subject: [PATCH] perf(media): minor regex simplification The previous regex was using the [ABC..D]*[ABC] pattern, resulting in a lot of backtracking. The new regex is stopping the matching at the first space or end of text (and removes the trailing `.` should one be present). The backtracking was taking around 50% of the CPU time spent in atom.Parse --- internal/reader/media/media.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/reader/media/media.go b/internal/reader/media/media.go index 736fc06c..8c78d7a5 100644 --- a/internal/reader/media/media.go +++ b/internal/reader/media/media.go @@ -9,7 +9,7 @@ import ( "strings" ) -var textLinkRegex = regexp.MustCompile(`(?mi)(\bhttps?:\/\/[-A-Z0-9+&@#\/%?=~_|!:,.;]*[-A-Z0-9+&@#\/%=~_|])`) +var textLinkRegex = regexp.MustCompile(`(?mi)(\bhttps?://[^\s]+)[.]?(?:\s|$)`) // Specs: https://www.rssboard.org/media-rss type MediaItemElement struct {