Move internal packages to an internal folder

For reference: https://go.dev/doc/go1.4#internalpackages
2025-09-15 18:57:04 +00:00 · 2023-08-10 19:46:45 -07:00 · 2023-08-10 19:46:45 -07:00 · 168a870c02
commit 168a870c02
parent c234903255
433 changed files with 1121 additions and 1123 deletions
--- a/internal/reader/subscription/finder.go
+++ b/internal/reader/subscription/finder.go
@ -0,0 +1,197 @@
+// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+package subscription // import "miniflux.app/v2/internal/reader/subscription"
+
+import (
+	"fmt"
+	"io"
+	"regexp"
+	"strings"
+
+	"miniflux.app/v2/internal/config"
+	"miniflux.app/v2/internal/errors"
+	"miniflux.app/v2/internal/http/client"
+	"miniflux.app/v2/internal/reader/browser"
+	"miniflux.app/v2/internal/reader/parser"
+	"miniflux.app/v2/internal/url"
+
+	"github.com/PuerkitoBio/goquery"
+)
+
+var (
+	errUnreadableDoc    = "Unable to analyze this page: %v"
+	youtubeChannelRegex = regexp.MustCompile(`youtube\.com/channel/(.*)`)
+	youtubeVideoRegex   = regexp.MustCompile(`youtube\.com/watch\?v=(.*)`)
+)
+
+// FindSubscriptions downloads and try to find one or more subscriptions from an URL.
+func FindSubscriptions(websiteURL, userAgent, cookie, username, password string, fetchViaProxy, allowSelfSignedCertificates bool) (Subscriptions, *errors.LocalizedError) {
+	websiteURL = findYoutubeChannelFeed(websiteURL)
+	websiteURL = parseYoutubeVideoPage(websiteURL)
+
+	clt := client.NewClientWithConfig(websiteURL, config.Opts)
+	clt.WithCredentials(username, password)
+	clt.WithUserAgent(userAgent)
+	clt.WithCookie(cookie)
+	clt.AllowSelfSignedCertificates = allowSelfSignedCertificates
+
+	if fetchViaProxy {
+		clt.WithProxy()
+	}
+
+	response, err := browser.Exec(clt)
+	if err != nil {
+		return nil, err
+	}
+
+	body := response.BodyAsString()
+	if format := parser.DetectFeedFormat(body); format != parser.FormatUnknown {
+		var subscriptions Subscriptions
+		subscriptions = append(subscriptions, &Subscription{
+			Title: response.EffectiveURL,
+			URL:   response.EffectiveURL,
+			Type:  format,
+		})
+
+		return subscriptions, nil
+	}
+
+	subscriptions, err := parseWebPage(response.EffectiveURL, strings.NewReader(body))
+	if err != nil || subscriptions != nil {
+		return subscriptions, err
+	}
+
+	return tryWellKnownUrls(websiteURL, userAgent, cookie, username, password)
+}
+
+func parseWebPage(websiteURL string, data io.Reader) (Subscriptions, *errors.LocalizedError) {
+	var subscriptions Subscriptions
+	queries := map[string]string{
+		"link[type='application/rss+xml']":   "rss",
+		"link[type='application/atom+xml']":  "atom",
+		"link[type='application/json']":      "json",
+		"link[type='application/feed+json']": "json",
+	}
+
+	doc, err := goquery.NewDocumentFromReader(data)
+	if err != nil {
+		return nil, errors.NewLocalizedError(errUnreadableDoc, err)
+	}
+
+	for query, kind := range queries {
+		doc.Find(query).Each(func(i int, s *goquery.Selection) {
+			subscription := new(Subscription)
+			subscription.Type = kind
+
+			if title, exists := s.Attr("title"); exists {
+				subscription.Title = title
+			}
+
+			if feedURL, exists := s.Attr("href"); exists {
+				if feedURL != "" {
+					subscription.URL, _ = url.AbsoluteURL(websiteURL, feedURL)
+				}
+			}
+
+			if subscription.Title == "" {
+				subscription.Title = subscription.URL
+			}
+
+			if subscription.URL != "" {
+				subscriptions = append(subscriptions, subscription)
+			}
+		})
+	}
+
+	return subscriptions, nil
+}
+
+func findYoutubeChannelFeed(websiteURL string) string {
+	matches := youtubeChannelRegex.FindStringSubmatch(websiteURL)
+
+	if len(matches) == 2 {
+		return fmt.Sprintf(`https://www.youtube.com/feeds/videos.xml?channel_id=%s`, matches[1])
+	}
+	return websiteURL
+}
+
+func parseYoutubeVideoPage(websiteURL string) string {
+	if !youtubeVideoRegex.MatchString(websiteURL) {
+		return websiteURL
+	}
+
+	clt := client.NewClientWithConfig(websiteURL, config.Opts)
+	response, browserErr := browser.Exec(clt)
+	if browserErr != nil {
+		return websiteURL
+	}
+
+	doc, docErr := goquery.NewDocumentFromReader(response.Body)
+	if docErr != nil {
+		return websiteURL
+	}
+
+	if channelID, exists := doc.Find(`meta[itemprop="channelId"]`).First().Attr("content"); exists {
+		return fmt.Sprintf(`https://www.youtube.com/feeds/videos.xml?channel_id=%s`, channelID)
+	}
+
+	return websiteURL
+}
+
+func tryWellKnownUrls(websiteURL, userAgent, cookie, username, password string) (Subscriptions, *errors.LocalizedError) {
+	var subscriptions Subscriptions
+	knownURLs := map[string]string{
+		"atom.xml": "atom",
+		"feed.xml": "atom",
+		"feed/":    "atom",
+		"rss.xml":  "rss",
+		"rss/":     "rss",
+	}
+
+	websiteURLRoot := url.RootURL(websiteURL)
+	baseURLs := []string{
+		// Look for knownURLs in the root.
+		websiteURLRoot,
+	}
+	// Look for knownURLs in current subdirectory, such as 'example.com/blog/'.
+	websiteURL, _ = url.AbsoluteURL(websiteURL, "./")
+	if websiteURL != websiteURLRoot {
+		baseURLs = append(baseURLs, websiteURL)
+	}
+
+	for _, baseURL := range baseURLs {
+		for knownURL, kind := range knownURLs {
+			fullURL, err := url.AbsoluteURL(baseURL, knownURL)
+			if err != nil {
+				continue
+			}
+			clt := client.NewClientWithConfig(fullURL, config.Opts)
+			clt.WithCredentials(username, password)
+			clt.WithUserAgent(userAgent)
+			clt.WithCookie(cookie)
+
+			// Some websites redirects unknown URLs to the home page.
+			// As result, the list of known URLs is returned to the subscription list.
+			// We don't want the user to choose between invalid feed URLs.
+			clt.WithoutRedirects()
+
+			response, err := clt.Get()
+			if err != nil {
+				continue
+			}
+
+			if response != nil && response.StatusCode == 200 {
+				subscription := new(Subscription)
+				subscription.Type = kind
+				subscription.Title = fullURL
+				subscription.URL = fullURL
+				if subscription.URL != "" {
+					subscriptions = append(subscriptions, subscription)
+				}
+			}
+		}
+	}
+
+	return subscriptions, nil
+}
--- a/internal/reader/subscription/finder_test.go
+++ b/internal/reader/subscription/finder_test.go
@ -0,0 +1,285 @@
+// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+package subscription
+
+import (
+	"strings"
+	"testing"
+)
+
+func TestFindYoutubeChannelFeed(t *testing.T) {
+	scenarios := map[string]string{
+		"https://www.youtube.com/channel/UC-Qj80avWItNRjkZ41rzHyw": "https://www.youtube.com/feeds/videos.xml?channel_id=UC-Qj80avWItNRjkZ41rzHyw",
+		"http://example.org/feed":                                  "http://example.org/feed",
+	}
+
+	for websiteURL, expectedFeedURL := range scenarios {
+		result := findYoutubeChannelFeed(websiteURL)
+		if result != expectedFeedURL {
+			t.Errorf(`Unexpected Feed, got %s, instead of %s`, result, expectedFeedURL)
+		}
+	}
+}
+
+func TestParseWebPageWithRssFeed(t *testing.T) {
+	htmlPage := `
+	<!doctype html>
+	<html>
+		<head>
+			<link href="http://example.org/rss" rel="alternate" type="application/rss+xml" title="Some Title">
+		</head>
+		<body>
+		</body>
+	</html>`
+
+	subscriptions, err := parseWebPage("http://example.org/", strings.NewReader(htmlPage))
+	if err != nil {
+		t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
+	}
+
+	if len(subscriptions) != 1 {
+		t.Fatal(`Incorrect number of subscriptions returned`)
+	}
+
+	if subscriptions[0].Title != "Some Title" {
+		t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title)
+	}
+
+	if subscriptions[0].URL != "http://example.org/rss" {
+		t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL)
+	}
+
+	if subscriptions[0].Type != "rss" {
+		t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type)
+	}
+}
+
+func TestParseWebPageWithAtomFeed(t *testing.T) {
+	htmlPage := `
+	<!doctype html>
+	<html>
+		<head>
+			<link href="http://example.org/atom.xml" rel="alternate" type="application/atom+xml" title="Some Title">
+		</head>
+		<body>
+		</body>
+	</html>`
+
+	subscriptions, err := parseWebPage("http://example.org/", strings.NewReader(htmlPage))
+	if err != nil {
+		t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
+	}
+
+	if len(subscriptions) != 1 {
+		t.Fatal(`Incorrect number of subscriptions returned`)
+	}
+
+	if subscriptions[0].Title != "Some Title" {
+		t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title)
+	}
+
+	if subscriptions[0].URL != "http://example.org/atom.xml" {
+		t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL)
+	}
+
+	if subscriptions[0].Type != "atom" {
+		t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type)
+	}
+}
+
+func TestParseWebPageWithJSONFeed(t *testing.T) {
+	htmlPage := `
+	<!doctype html>
+	<html>
+		<head>
+			<link href="http://example.org/feed.json" rel="alternate" type="application/feed+json" title="Some Title">
+		</head>
+		<body>
+		</body>
+	</html>`
+
+	subscriptions, err := parseWebPage("http://example.org/", strings.NewReader(htmlPage))
+	if err != nil {
+		t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
+	}
+
+	if len(subscriptions) != 1 {
+		t.Fatal(`Incorrect number of subscriptions returned`)
+	}
+
+	if subscriptions[0].Title != "Some Title" {
+		t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title)
+	}
+
+	if subscriptions[0].URL != "http://example.org/feed.json" {
+		t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL)
+	}
+
+	if subscriptions[0].Type != "json" {
+		t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type)
+	}
+}
+
+func TestParseWebPageWithOldJSONFeedMimeType(t *testing.T) {
+	htmlPage := `
+	<!doctype html>
+	<html>
+		<head>
+			<link href="http://example.org/feed.json" rel="alternate" type="application/json" title="Some Title">
+		</head>
+		<body>
+		</body>
+	</html>`
+
+	subscriptions, err := parseWebPage("http://example.org/", strings.NewReader(htmlPage))
+	if err != nil {
+		t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
+	}
+
+	if len(subscriptions) != 1 {
+		t.Fatal(`Incorrect number of subscriptions returned`)
+	}
+
+	if subscriptions[0].Title != "Some Title" {
+		t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title)
+	}
+
+	if subscriptions[0].URL != "http://example.org/feed.json" {
+		t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL)
+	}
+
+	if subscriptions[0].Type != "json" {
+		t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type)
+	}
+}
+
+func TestParseWebPageWithRelativeFeedURL(t *testing.T) {
+	htmlPage := `
+	<!doctype html>
+	<html>
+		<head>
+			<link href="/feed.json" rel="alternate" type="application/feed+json" title="Some Title">
+		</head>
+		<body>
+		</body>
+	</html>`
+
+	subscriptions, err := parseWebPage("http://example.org/", strings.NewReader(htmlPage))
+	if err != nil {
+		t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
+	}
+
+	if len(subscriptions) != 1 {
+		t.Fatal(`Incorrect number of subscriptions returned`)
+	}
+
+	if subscriptions[0].Title != "Some Title" {
+		t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title)
+	}
+
+	if subscriptions[0].URL != "http://example.org/feed.json" {
+		t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL)
+	}
+
+	if subscriptions[0].Type != "json" {
+		t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type)
+	}
+}
+
+func TestParseWebPageWithEmptyTitle(t *testing.T) {
+	htmlPage := `
+	<!doctype html>
+	<html>
+		<head>
+			<link href="/feed.json" rel="alternate" type="application/feed+json">
+		</head>
+		<body>
+		</body>
+	</html>`
+
+	subscriptions, err := parseWebPage("http://example.org/", strings.NewReader(htmlPage))
+	if err != nil {
+		t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
+	}
+
+	if len(subscriptions) != 1 {
+		t.Fatal(`Incorrect number of subscriptions returned`)
+	}
+
+	if subscriptions[0].Title != "http://example.org/feed.json" {
+		t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title)
+	}
+
+	if subscriptions[0].URL != "http://example.org/feed.json" {
+		t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL)
+	}
+
+	if subscriptions[0].Type != "json" {
+		t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type)
+	}
+}
+
+func TestParseWebPageWithMultipleFeeds(t *testing.T) {
+	htmlPage := `
+	<!doctype html>
+	<html>
+		<head>
+			<link href="http://example.org/atom.xml" rel="alternate" type="application/atom+xml" title="Atom Feed">
+			<link href="http://example.org/feed.json" rel="alternate" type="application/feed+json" title="JSON Feed">
+		</head>
+		<body>
+		</body>
+	</html>`
+
+	subscriptions, err := parseWebPage("http://example.org/", strings.NewReader(htmlPage))
+	if err != nil {
+		t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
+	}
+
+	if len(subscriptions) != 2 {
+		t.Fatal(`Incorrect number of subscriptions returned`)
+	}
+}
+
+func TestParseWebPageWithEmptyFeedURL(t *testing.T) {
+	htmlPage := `
+	<!doctype html>
+	<html>
+		<head>
+			<link href rel="alternate" type="application/feed+json" title="Some Title">
+		</head>
+		<body>
+		</body>
+	</html>`
+
+	subscriptions, err := parseWebPage("http://example.org/", strings.NewReader(htmlPage))
+	if err != nil {
+		t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
+	}
+
+	if len(subscriptions) != 0 {
+		t.Fatal(`Incorrect number of subscriptions returned`)
+	}
+}
+
+func TestParseWebPageWithNoHref(t *testing.T) {
+	htmlPage := `
+	<!doctype html>
+	<html>
+		<head>
+			<link rel="alternate" type="application/feed+json" title="Some Title">
+		</head>
+		<body>
+		</body>
+	</html>`
+
+	subscriptions, err := parseWebPage("http://example.org/", strings.NewReader(htmlPage))
+	if err != nil {
+		t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
+	}
+
+	if len(subscriptions) != 0 {
+		t.Fatal(`Incorrect number of subscriptions returned`)
+	}
+}
--- a/internal/reader/subscription/subscription.go
+++ b/internal/reader/subscription/subscription.go
@ -0,0 +1,20 @@
+// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+package subscription // import "miniflux.app/v2/internal/reader/subscription"
+
+import "fmt"
+
+// Subscription represents a feed subscription.
+type Subscription struct {
+	Title string `json:"title"`
+	URL   string `json:"url"`
+	Type  string `json:"type"`
+}
+
+func (s Subscription) String() string {
+	return fmt.Sprintf(`Title="%s", URL="%s", Type="%s"`, s.Title, s.URL, s.Type)
+}
+
+// Subscriptions represents a list of subscription.
+type Subscriptions []*Subscription