From 6570dcb70c9c6302ffb7187ed2fc3e09f42d196f Mon Sep 17 00:00:00 2001 From: jvoisin Date: Thu, 28 Aug 2025 17:41:23 +0200 Subject: [PATCH] refactor(subscriptions): simplify YouTube subscriptions finder The functions findSubscriptionsFromYouTubeChannelPage and findSubscriptionsFromYouTubePlaylistPage share a lot of code, it makes sense to merge them. Moreover, this allows to only parse the provided url once instead of twice, making things a bit faster. --- internal/reader/subscription/finder.go | 53 ++++-------- internal/reader/subscription/finder_test.go | 93 +-------------------- 2 files changed, 16 insertions(+), 130 deletions(-) diff --git a/internal/reader/subscription/finder.go b/internal/reader/subscription/finder.go index c6c53cc2..8e16be64 100644 --- a/internal/reader/subscription/finder.go +++ b/internal/reader/subscription/finder.go @@ -70,24 +70,15 @@ func (f *subscriptionFinder) FindSubscriptions(websiteURL, rssBridgeURL string, } // Step 2) Check if the website URL is a YouTube channel. - slog.Debug("Try to detect feeds from YouTube channel page", slog.String("website_url", websiteURL)) - if subscriptions, localizedError := f.findSubscriptionsFromYouTubeChannelPage(websiteURL); localizedError != nil { + slog.Debug("Try to detect feeds for a YouTube page", slog.String("website_url", websiteURL)) + if subscriptions, localizedError := f.findSubscriptionsFromYouTube(websiteURL); localizedError != nil { return nil, localizedError } else if len(subscriptions) > 0 { slog.Debug("Subscriptions found from YouTube channel page", slog.String("website_url", websiteURL), slog.Any("subscriptions", subscriptions)) return subscriptions, nil } - // Step 3) Check if the website URL is a YouTube playlist. - slog.Debug("Try to detect feeds from YouTube playlist page", slog.String("website_url", websiteURL)) - if subscriptions, localizedError := f.findSubscriptionsFromYouTubePlaylistPage(websiteURL); localizedError != nil { - return nil, localizedError - } else if len(subscriptions) > 0 { - slog.Debug("Subscriptions found from YouTube playlist page", slog.String("website_url", websiteURL), slog.Any("subscriptions", subscriptions)) - return subscriptions, nil - } - - // Step 4) Parse web page to find feeds from HTML meta tags. + // Step 3) Parse web page to find feeds from HTML meta tags. slog.Debug("Try to detect feeds from HTML meta tags", slog.String("website_url", websiteURL), slog.String("content_type", responseHandler.ContentType()), @@ -99,7 +90,7 @@ func (f *subscriptionFinder) FindSubscriptions(websiteURL, rssBridgeURL string, return subscriptions, nil } - // Step 5) Check if the website URL can use RSS-Bridge. + // Step 4) Check if the website URL can use RSS-Bridge. if rssBridgeURL != "" { slog.Debug("Try to detect feeds with RSS-Bridge", slog.String("website_url", websiteURL)) if subscriptions, localizedError := f.findSubscriptionsFromRSSBridge(websiteURL, rssBridgeURL, rssBridgeToken); localizedError != nil { @@ -110,7 +101,7 @@ func (f *subscriptionFinder) FindSubscriptions(websiteURL, rssBridgeURL string, } } - // Step 6) Check if the website has a known feed URL. + // Step 5) Check if the website has a known feed URL. slog.Debug("Try to detect feeds from well-known URLs", slog.String("website_url", websiteURL)) if subscriptions, localizedError := f.findSubscriptionsFromWellKnownURLs(websiteURL); localizedError != nil { return nil, localizedError @@ -283,40 +274,24 @@ func (f *subscriptionFinder) findSubscriptionsFromRSSBridge(websiteURL, rssBridg return subscriptions, nil } -func (f *subscriptionFinder) findSubscriptionsFromYouTubeChannelPage(websiteURL string) (Subscriptions, *locale.LocalizedErrorWrapper) { - decodedUrl, err := url.Parse(websiteURL) +func (f *subscriptionFinder) findSubscriptionsFromYouTube(websiteURL string) (Subscriptions, *locale.LocalizedErrorWrapper) { + u, err := url.Parse(websiteURL) if err != nil { return nil, locale.NewLocalizedErrorWrapper(err, "error.invalid_site_url", err) } - if !strings.HasSuffix(decodedUrl.Host, "youtube.com") { - slog.Debug("This website is not a YouTube page, the regex doesn't match", slog.String("website_url", websiteURL)) + if !strings.HasSuffix(u.Host, "youtube.com") { + slog.Debug("This website isn't on the youtube domain.", slog.String("website_url", websiteURL)) return nil, nil } - - if _, channelID, found := strings.Cut(decodedUrl.Path, "channel/"); found { + if _, channelID, found := strings.Cut(u.Path, "channel/"); found { feedURL := "https://www.youtube.com/feeds/videos.xml?channel_id=" + channelID - return Subscriptions{NewSubscription(websiteURL, feedURL, parser.FormatAtom)}, nil + return Subscriptions{NewSubscription(u.String(), feedURL, parser.FormatAtom)}, nil } - - return nil, nil -} - -func (f *subscriptionFinder) findSubscriptionsFromYouTubePlaylistPage(websiteURL string) (Subscriptions, *locale.LocalizedErrorWrapper) { - decodedUrl, err := url.Parse(websiteURL) - if err != nil { - return nil, locale.NewLocalizedErrorWrapper(err, "error.invalid_site_url", err) - } - - if !strings.HasSuffix(decodedUrl.Host, "youtube.com") { - slog.Debug("This website is not a YouTube page, the regex doesn't match", slog.String("website_url", websiteURL)) - return nil, nil - } - - if (strings.HasPrefix(decodedUrl.Path, "/watch") && decodedUrl.Query().Has("list")) || strings.HasPrefix(decodedUrl.Path, "/playlist") { - playlistID := decodedUrl.Query().Get("list") + if (strings.HasPrefix(u.Path, "/watch") && u.Query().Has("list")) || strings.HasPrefix(u.Path, "/playlist") { + playlistID := u.Query().Get("list") feedURL := "https://www.youtube.com/feeds/videos.xml?playlist_id=" + playlistID - return Subscriptions{NewSubscription(websiteURL, feedURL, parser.FormatAtom)}, nil + return Subscriptions{NewSubscription(u.String(), feedURL, parser.FormatAtom)}, nil } return nil, nil diff --git a/internal/reader/subscription/finder_test.go b/internal/reader/subscription/finder_test.go index 21cc6a7c..bc83645d 100644 --- a/internal/reader/subscription/finder_test.go +++ b/internal/reader/subscription/finder_test.go @@ -34,7 +34,7 @@ func TestFindYoutubePlaylistFeed(t *testing.T) { // Channel URL { websiteURL: "https://www.youtube.com/channel/UC-Qj80avWItNRjkZ41rzHyw", - feedURL: "", + feedURL: "https://www.youtube.com/feeds/videos.xml?channel_id=UC-Qj80avWItNRjkZ41rzHyw", }, // Channel URL with name { @@ -70,7 +70,7 @@ func TestFindYoutubePlaylistFeed(t *testing.T) { } for _, scenario := range scenarios { - subscriptions, localizedError := NewSubscriptionFinder(nil).findSubscriptionsFromYouTubePlaylistPage(scenario.websiteURL) + subscriptions, localizedError := NewSubscriptionFinder(nil).findSubscriptionsFromYouTube(scenario.websiteURL) if scenario.discoveryError { if localizedError == nil { t.Fatalf(`Parsing an invalid URL should return an error`) @@ -97,95 +97,6 @@ func TestFindYoutubePlaylistFeed(t *testing.T) { } } -func TestFindYoutubeChannelFeed(t *testing.T) { - type testResult struct { - websiteURL string - feedURL string - discoveryError bool - } - - scenarios := []testResult{ - // Video URL - { - websiteURL: "https://www.youtube.com/watch?v=dQw4w9WgXcQ", - feedURL: "", - }, - // Video URL with position argument - { - websiteURL: "https://www.youtube.com/watch?v=dQw4w9WgXcQ&t=1", - feedURL: "", - }, - // Video URL with position argument - { - websiteURL: "https://www.youtube.com/watch?t=1&v=dQw4w9WgXcQ", - feedURL: "", - }, - // Channel URL - { - websiteURL: "https://www.youtube.com/channel/UC-Qj80avWItNRjkZ41rzHyw", - feedURL: "https://www.youtube.com/feeds/videos.xml?channel_id=UC-Qj80avWItNRjkZ41rzHyw", - }, - // Channel URL with name - { - websiteURL: "https://www.youtube.com/@ABCDEFG", - feedURL: "", - }, - // Playlist URL - { - websiteURL: "https://www.youtube.com/playlist?list=PLOOwEPgFWm_NHcQd9aCi5JXWASHO_n5uR", - feedURL: "", - }, - // Playlist URL with video ID - { - websiteURL: "https://www.youtube.com/watch?v=dQw4w9WgXcQ&list=PLOOwEPgFWm_N42HlCLhqyJ0ZBWr5K1QDM", - feedURL: "", - }, - // Playlist URL with video ID and index argument - { - websiteURL: "https://www.youtube.com/watch?v=6IutBmRJNLk&list=PLOOwEPgFWm_NHcQd9aCi5JXWASHO_n5uR&index=4", - feedURL: "", - }, - // Non-Youtube URL - { - websiteURL: "https://www.example.com/channel/UC-Qj80avWItNRjkZ41rzHyw", - feedURL: "", - }, - // Invalid URL - { - websiteURL: "https://example|org/", - feedURL: "", - discoveryError: true, - }, - } - - for _, scenario := range scenarios { - subscriptions, localizedError := NewSubscriptionFinder(nil).findSubscriptionsFromYouTubeChannelPage(scenario.websiteURL) - if scenario.discoveryError { - if localizedError == nil { - t.Fatalf(`Parsing an invalid URL should return an error`) - } - } - - if scenario.feedURL == "" { - if len(subscriptions) > 0 { - t.Fatalf(`Parsing a non-channel URL should not return any subscription: %q`, scenario.websiteURL) - } - } else { - if localizedError != nil { - t.Fatalf(`Parsing a correctly formatted YouTube channel page should not return any error: %v`, localizedError) - } - - if len(subscriptions) != 1 { - t.Fatalf(`Incorrect number of subscriptions returned`) - } - - if subscriptions[0].URL != scenario.feedURL { - t.Errorf(`Unexpected Feed, got %s, instead of %s`, subscriptions[0].URL, scenario.feedURL) - } - } - } -} - func TestParseWebPageWithRssFeed(t *testing.T) { htmlPage := `