mirror of
https://github.com/miniflux/v2.git
synced 2025-09-15 18:57:04 +00:00
refactor(subscriptions): simplify YouTube subscriptions finder
The functions findSubscriptionsFromYouTubeChannelPage and findSubscriptionsFromYouTubePlaylistPage share a lot of code, it makes sense to merge them. Moreover, this allows to only parse the provided url once instead of twice, making things a bit faster.
This commit is contained in:
parent
e8f5c2446c
commit
6570dcb70c
2 changed files with 16 additions and 130 deletions
|
@ -70,24 +70,15 @@ func (f *subscriptionFinder) FindSubscriptions(websiteURL, rssBridgeURL string,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Step 2) Check if the website URL is a YouTube channel.
|
// Step 2) Check if the website URL is a YouTube channel.
|
||||||
slog.Debug("Try to detect feeds from YouTube channel page", slog.String("website_url", websiteURL))
|
slog.Debug("Try to detect feeds for a YouTube page", slog.String("website_url", websiteURL))
|
||||||
if subscriptions, localizedError := f.findSubscriptionsFromYouTubeChannelPage(websiteURL); localizedError != nil {
|
if subscriptions, localizedError := f.findSubscriptionsFromYouTube(websiteURL); localizedError != nil {
|
||||||
return nil, localizedError
|
return nil, localizedError
|
||||||
} else if len(subscriptions) > 0 {
|
} else if len(subscriptions) > 0 {
|
||||||
slog.Debug("Subscriptions found from YouTube channel page", slog.String("website_url", websiteURL), slog.Any("subscriptions", subscriptions))
|
slog.Debug("Subscriptions found from YouTube channel page", slog.String("website_url", websiteURL), slog.Any("subscriptions", subscriptions))
|
||||||
return subscriptions, nil
|
return subscriptions, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Step 3) Check if the website URL is a YouTube playlist.
|
// Step 3) Parse web page to find feeds from HTML meta tags.
|
||||||
slog.Debug("Try to detect feeds from YouTube playlist page", slog.String("website_url", websiteURL))
|
|
||||||
if subscriptions, localizedError := f.findSubscriptionsFromYouTubePlaylistPage(websiteURL); localizedError != nil {
|
|
||||||
return nil, localizedError
|
|
||||||
} else if len(subscriptions) > 0 {
|
|
||||||
slog.Debug("Subscriptions found from YouTube playlist page", slog.String("website_url", websiteURL), slog.Any("subscriptions", subscriptions))
|
|
||||||
return subscriptions, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Step 4) Parse web page to find feeds from HTML meta tags.
|
|
||||||
slog.Debug("Try to detect feeds from HTML meta tags",
|
slog.Debug("Try to detect feeds from HTML meta tags",
|
||||||
slog.String("website_url", websiteURL),
|
slog.String("website_url", websiteURL),
|
||||||
slog.String("content_type", responseHandler.ContentType()),
|
slog.String("content_type", responseHandler.ContentType()),
|
||||||
|
@ -99,7 +90,7 @@ func (f *subscriptionFinder) FindSubscriptions(websiteURL, rssBridgeURL string,
|
||||||
return subscriptions, nil
|
return subscriptions, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Step 5) Check if the website URL can use RSS-Bridge.
|
// Step 4) Check if the website URL can use RSS-Bridge.
|
||||||
if rssBridgeURL != "" {
|
if rssBridgeURL != "" {
|
||||||
slog.Debug("Try to detect feeds with RSS-Bridge", slog.String("website_url", websiteURL))
|
slog.Debug("Try to detect feeds with RSS-Bridge", slog.String("website_url", websiteURL))
|
||||||
if subscriptions, localizedError := f.findSubscriptionsFromRSSBridge(websiteURL, rssBridgeURL, rssBridgeToken); localizedError != nil {
|
if subscriptions, localizedError := f.findSubscriptionsFromRSSBridge(websiteURL, rssBridgeURL, rssBridgeToken); localizedError != nil {
|
||||||
|
@ -110,7 +101,7 @@ func (f *subscriptionFinder) FindSubscriptions(websiteURL, rssBridgeURL string,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Step 6) Check if the website has a known feed URL.
|
// Step 5) Check if the website has a known feed URL.
|
||||||
slog.Debug("Try to detect feeds from well-known URLs", slog.String("website_url", websiteURL))
|
slog.Debug("Try to detect feeds from well-known URLs", slog.String("website_url", websiteURL))
|
||||||
if subscriptions, localizedError := f.findSubscriptionsFromWellKnownURLs(websiteURL); localizedError != nil {
|
if subscriptions, localizedError := f.findSubscriptionsFromWellKnownURLs(websiteURL); localizedError != nil {
|
||||||
return nil, localizedError
|
return nil, localizedError
|
||||||
|
@ -283,40 +274,24 @@ func (f *subscriptionFinder) findSubscriptionsFromRSSBridge(websiteURL, rssBridg
|
||||||
return subscriptions, nil
|
return subscriptions, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (f *subscriptionFinder) findSubscriptionsFromYouTubeChannelPage(websiteURL string) (Subscriptions, *locale.LocalizedErrorWrapper) {
|
func (f *subscriptionFinder) findSubscriptionsFromYouTube(websiteURL string) (Subscriptions, *locale.LocalizedErrorWrapper) {
|
||||||
decodedUrl, err := url.Parse(websiteURL)
|
u, err := url.Parse(websiteURL)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, locale.NewLocalizedErrorWrapper(err, "error.invalid_site_url", err)
|
return nil, locale.NewLocalizedErrorWrapper(err, "error.invalid_site_url", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if !strings.HasSuffix(decodedUrl.Host, "youtube.com") {
|
if !strings.HasSuffix(u.Host, "youtube.com") {
|
||||||
slog.Debug("This website is not a YouTube page, the regex doesn't match", slog.String("website_url", websiteURL))
|
slog.Debug("This website isn't on the youtube domain.", slog.String("website_url", websiteURL))
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
if _, channelID, found := strings.Cut(u.Path, "channel/"); found {
|
||||||
if _, channelID, found := strings.Cut(decodedUrl.Path, "channel/"); found {
|
|
||||||
feedURL := "https://www.youtube.com/feeds/videos.xml?channel_id=" + channelID
|
feedURL := "https://www.youtube.com/feeds/videos.xml?channel_id=" + channelID
|
||||||
return Subscriptions{NewSubscription(websiteURL, feedURL, parser.FormatAtom)}, nil
|
return Subscriptions{NewSubscription(u.String(), feedURL, parser.FormatAtom)}, nil
|
||||||
}
|
}
|
||||||
|
if (strings.HasPrefix(u.Path, "/watch") && u.Query().Has("list")) || strings.HasPrefix(u.Path, "/playlist") {
|
||||||
return nil, nil
|
playlistID := u.Query().Get("list")
|
||||||
}
|
|
||||||
|
|
||||||
func (f *subscriptionFinder) findSubscriptionsFromYouTubePlaylistPage(websiteURL string) (Subscriptions, *locale.LocalizedErrorWrapper) {
|
|
||||||
decodedUrl, err := url.Parse(websiteURL)
|
|
||||||
if err != nil {
|
|
||||||
return nil, locale.NewLocalizedErrorWrapper(err, "error.invalid_site_url", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
if !strings.HasSuffix(decodedUrl.Host, "youtube.com") {
|
|
||||||
slog.Debug("This website is not a YouTube page, the regex doesn't match", slog.String("website_url", websiteURL))
|
|
||||||
return nil, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
if (strings.HasPrefix(decodedUrl.Path, "/watch") && decodedUrl.Query().Has("list")) || strings.HasPrefix(decodedUrl.Path, "/playlist") {
|
|
||||||
playlistID := decodedUrl.Query().Get("list")
|
|
||||||
feedURL := "https://www.youtube.com/feeds/videos.xml?playlist_id=" + playlistID
|
feedURL := "https://www.youtube.com/feeds/videos.xml?playlist_id=" + playlistID
|
||||||
return Subscriptions{NewSubscription(websiteURL, feedURL, parser.FormatAtom)}, nil
|
return Subscriptions{NewSubscription(u.String(), feedURL, parser.FormatAtom)}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil, nil
|
return nil, nil
|
||||||
|
|
|
@ -34,7 +34,7 @@ func TestFindYoutubePlaylistFeed(t *testing.T) {
|
||||||
// Channel URL
|
// Channel URL
|
||||||
{
|
{
|
||||||
websiteURL: "https://www.youtube.com/channel/UC-Qj80avWItNRjkZ41rzHyw",
|
websiteURL: "https://www.youtube.com/channel/UC-Qj80avWItNRjkZ41rzHyw",
|
||||||
feedURL: "",
|
feedURL: "https://www.youtube.com/feeds/videos.xml?channel_id=UC-Qj80avWItNRjkZ41rzHyw",
|
||||||
},
|
},
|
||||||
// Channel URL with name
|
// Channel URL with name
|
||||||
{
|
{
|
||||||
|
@ -70,7 +70,7 @@ func TestFindYoutubePlaylistFeed(t *testing.T) {
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, scenario := range scenarios {
|
for _, scenario := range scenarios {
|
||||||
subscriptions, localizedError := NewSubscriptionFinder(nil).findSubscriptionsFromYouTubePlaylistPage(scenario.websiteURL)
|
subscriptions, localizedError := NewSubscriptionFinder(nil).findSubscriptionsFromYouTube(scenario.websiteURL)
|
||||||
if scenario.discoveryError {
|
if scenario.discoveryError {
|
||||||
if localizedError == nil {
|
if localizedError == nil {
|
||||||
t.Fatalf(`Parsing an invalid URL should return an error`)
|
t.Fatalf(`Parsing an invalid URL should return an error`)
|
||||||
|
@ -97,95 +97,6 @@ func TestFindYoutubePlaylistFeed(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestFindYoutubeChannelFeed(t *testing.T) {
|
|
||||||
type testResult struct {
|
|
||||||
websiteURL string
|
|
||||||
feedURL string
|
|
||||||
discoveryError bool
|
|
||||||
}
|
|
||||||
|
|
||||||
scenarios := []testResult{
|
|
||||||
// Video URL
|
|
||||||
{
|
|
||||||
websiteURL: "https://www.youtube.com/watch?v=dQw4w9WgXcQ",
|
|
||||||
feedURL: "",
|
|
||||||
},
|
|
||||||
// Video URL with position argument
|
|
||||||
{
|
|
||||||
websiteURL: "https://www.youtube.com/watch?v=dQw4w9WgXcQ&t=1",
|
|
||||||
feedURL: "",
|
|
||||||
},
|
|
||||||
// Video URL with position argument
|
|
||||||
{
|
|
||||||
websiteURL: "https://www.youtube.com/watch?t=1&v=dQw4w9WgXcQ",
|
|
||||||
feedURL: "",
|
|
||||||
},
|
|
||||||
// Channel URL
|
|
||||||
{
|
|
||||||
websiteURL: "https://www.youtube.com/channel/UC-Qj80avWItNRjkZ41rzHyw",
|
|
||||||
feedURL: "https://www.youtube.com/feeds/videos.xml?channel_id=UC-Qj80avWItNRjkZ41rzHyw",
|
|
||||||
},
|
|
||||||
// Channel URL with name
|
|
||||||
{
|
|
||||||
websiteURL: "https://www.youtube.com/@ABCDEFG",
|
|
||||||
feedURL: "",
|
|
||||||
},
|
|
||||||
// Playlist URL
|
|
||||||
{
|
|
||||||
websiteURL: "https://www.youtube.com/playlist?list=PLOOwEPgFWm_NHcQd9aCi5JXWASHO_n5uR",
|
|
||||||
feedURL: "",
|
|
||||||
},
|
|
||||||
// Playlist URL with video ID
|
|
||||||
{
|
|
||||||
websiteURL: "https://www.youtube.com/watch?v=dQw4w9WgXcQ&list=PLOOwEPgFWm_N42HlCLhqyJ0ZBWr5K1QDM",
|
|
||||||
feedURL: "",
|
|
||||||
},
|
|
||||||
// Playlist URL with video ID and index argument
|
|
||||||
{
|
|
||||||
websiteURL: "https://www.youtube.com/watch?v=6IutBmRJNLk&list=PLOOwEPgFWm_NHcQd9aCi5JXWASHO_n5uR&index=4",
|
|
||||||
feedURL: "",
|
|
||||||
},
|
|
||||||
// Non-Youtube URL
|
|
||||||
{
|
|
||||||
websiteURL: "https://www.example.com/channel/UC-Qj80avWItNRjkZ41rzHyw",
|
|
||||||
feedURL: "",
|
|
||||||
},
|
|
||||||
// Invalid URL
|
|
||||||
{
|
|
||||||
websiteURL: "https://example|org/",
|
|
||||||
feedURL: "",
|
|
||||||
discoveryError: true,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, scenario := range scenarios {
|
|
||||||
subscriptions, localizedError := NewSubscriptionFinder(nil).findSubscriptionsFromYouTubeChannelPage(scenario.websiteURL)
|
|
||||||
if scenario.discoveryError {
|
|
||||||
if localizedError == nil {
|
|
||||||
t.Fatalf(`Parsing an invalid URL should return an error`)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if scenario.feedURL == "" {
|
|
||||||
if len(subscriptions) > 0 {
|
|
||||||
t.Fatalf(`Parsing a non-channel URL should not return any subscription: %q`, scenario.websiteURL)
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if localizedError != nil {
|
|
||||||
t.Fatalf(`Parsing a correctly formatted YouTube channel page should not return any error: %v`, localizedError)
|
|
||||||
}
|
|
||||||
|
|
||||||
if len(subscriptions) != 1 {
|
|
||||||
t.Fatalf(`Incorrect number of subscriptions returned`)
|
|
||||||
}
|
|
||||||
|
|
||||||
if subscriptions[0].URL != scenario.feedURL {
|
|
||||||
t.Errorf(`Unexpected Feed, got %s, instead of %s`, subscriptions[0].URL, scenario.feedURL)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestParseWebPageWithRssFeed(t *testing.T) {
|
func TestParseWebPageWithRssFeed(t *testing.T) {
|
||||||
htmlPage := `
|
htmlPage := `
|
||||||
<!doctype html>
|
<!doctype html>
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue