Deduplicate feed URLs when parsing HTML document during discovery process

Fixes #2232
2025-09-15 18:57:04 +00:00 · 2023-12-01 13:35:24 -08:00 · 2023-12-01 13:35:24 -08:00 · 5de0714256
commit 5de0714256
parent bfa83cbf99
2 changed files with 41 additions and 2 deletions
--- a/internal/reader/subscription/finder_test.go
+++ b/internal/reader/subscription/finder_test.go
@ -249,6 +249,40 @@ func TestParseWebPageWithMultipleFeeds(t *testing.T) {
 	}
 }

+func TestParseWebPageWithDuplicatedFeeds(t *testing.T) {
+	htmlPage := `
+	<!doctype html>
+	<html>
+		<head>
+			<link href="http://example.org/feed.xml" rel="alternate" type="application/rss+xml" title="Feed A">
+			<link href="http://example.org/feed.xml" rel="alternate" type="application/rss+xml" title="Feed B">
+		</head>
+		<body>
+		</body>
+	</html>`
+
+	subscriptions, err := NewSubscriptionFinder(nil).FindSubscriptionsFromWebPage("http://example.org/", strings.NewReader(htmlPage))
+	if err != nil {
+		t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
+	}
+
+	if len(subscriptions) != 1 {
+		t.Fatal(`Incorrect number of subscriptions returned`)
+	}
+
+	if subscriptions[0].Title != "Feed A" {
+		t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title)
+	}
+
+	if subscriptions[0].URL != "http://example.org/feed.xml" {
+		t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL)
+	}
+
+	if subscriptions[0].Type != "rss" {
+		t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type)
+	}
+}
+
 func TestParseWebPageWithEmptyFeedURL(t *testing.T) {
 	htmlPage := `
 	<!doctype html>