1
0
Fork 0
mirror of https://github.com/miniflux/v2.git synced 2025-09-15 18:57:04 +00:00

Move internal packages to an internal folder

For reference: https://go.dev/doc/go1.4#internalpackages
This commit is contained in:
Frédéric Guillot 2023-08-10 19:46:45 -07:00
parent c234903255
commit 168a870c02
433 changed files with 1121 additions and 1123 deletions

View file

@ -0,0 +1,197 @@
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
package subscription // import "miniflux.app/v2/internal/reader/subscription"
import (
"fmt"
"io"
"regexp"
"strings"
"miniflux.app/v2/internal/config"
"miniflux.app/v2/internal/errors"
"miniflux.app/v2/internal/http/client"
"miniflux.app/v2/internal/reader/browser"
"miniflux.app/v2/internal/reader/parser"
"miniflux.app/v2/internal/url"
"github.com/PuerkitoBio/goquery"
)
var (
errUnreadableDoc = "Unable to analyze this page: %v"
youtubeChannelRegex = regexp.MustCompile(`youtube\.com/channel/(.*)`)
youtubeVideoRegex = regexp.MustCompile(`youtube\.com/watch\?v=(.*)`)
)
// FindSubscriptions downloads and try to find one or more subscriptions from an URL.
func FindSubscriptions(websiteURL, userAgent, cookie, username, password string, fetchViaProxy, allowSelfSignedCertificates bool) (Subscriptions, *errors.LocalizedError) {
websiteURL = findYoutubeChannelFeed(websiteURL)
websiteURL = parseYoutubeVideoPage(websiteURL)
clt := client.NewClientWithConfig(websiteURL, config.Opts)
clt.WithCredentials(username, password)
clt.WithUserAgent(userAgent)
clt.WithCookie(cookie)
clt.AllowSelfSignedCertificates = allowSelfSignedCertificates
if fetchViaProxy {
clt.WithProxy()
}
response, err := browser.Exec(clt)
if err != nil {
return nil, err
}
body := response.BodyAsString()
if format := parser.DetectFeedFormat(body); format != parser.FormatUnknown {
var subscriptions Subscriptions
subscriptions = append(subscriptions, &Subscription{
Title: response.EffectiveURL,
URL: response.EffectiveURL,
Type: format,
})
return subscriptions, nil
}
subscriptions, err := parseWebPage(response.EffectiveURL, strings.NewReader(body))
if err != nil || subscriptions != nil {
return subscriptions, err
}
return tryWellKnownUrls(websiteURL, userAgent, cookie, username, password)
}
func parseWebPage(websiteURL string, data io.Reader) (Subscriptions, *errors.LocalizedError) {
var subscriptions Subscriptions
queries := map[string]string{
"link[type='application/rss+xml']": "rss",
"link[type='application/atom+xml']": "atom",
"link[type='application/json']": "json",
"link[type='application/feed+json']": "json",
}
doc, err := goquery.NewDocumentFromReader(data)
if err != nil {
return nil, errors.NewLocalizedError(errUnreadableDoc, err)
}
for query, kind := range queries {
doc.Find(query).Each(func(i int, s *goquery.Selection) {
subscription := new(Subscription)
subscription.Type = kind
if title, exists := s.Attr("title"); exists {
subscription.Title = title
}
if feedURL, exists := s.Attr("href"); exists {
if feedURL != "" {
subscription.URL, _ = url.AbsoluteURL(websiteURL, feedURL)
}
}
if subscription.Title == "" {
subscription.Title = subscription.URL
}
if subscription.URL != "" {
subscriptions = append(subscriptions, subscription)
}
})
}
return subscriptions, nil
}
func findYoutubeChannelFeed(websiteURL string) string {
matches := youtubeChannelRegex.FindStringSubmatch(websiteURL)
if len(matches) == 2 {
return fmt.Sprintf(`https://www.youtube.com/feeds/videos.xml?channel_id=%s`, matches[1])
}
return websiteURL
}
func parseYoutubeVideoPage(websiteURL string) string {
if !youtubeVideoRegex.MatchString(websiteURL) {
return websiteURL
}
clt := client.NewClientWithConfig(websiteURL, config.Opts)
response, browserErr := browser.Exec(clt)
if browserErr != nil {
return websiteURL
}
doc, docErr := goquery.NewDocumentFromReader(response.Body)
if docErr != nil {
return websiteURL
}
if channelID, exists := doc.Find(`meta[itemprop="channelId"]`).First().Attr("content"); exists {
return fmt.Sprintf(`https://www.youtube.com/feeds/videos.xml?channel_id=%s`, channelID)
}
return websiteURL
}
func tryWellKnownUrls(websiteURL, userAgent, cookie, username, password string) (Subscriptions, *errors.LocalizedError) {
var subscriptions Subscriptions
knownURLs := map[string]string{
"atom.xml": "atom",
"feed.xml": "atom",
"feed/": "atom",
"rss.xml": "rss",
"rss/": "rss",
}
websiteURLRoot := url.RootURL(websiteURL)
baseURLs := []string{
// Look for knownURLs in the root.
websiteURLRoot,
}
// Look for knownURLs in current subdirectory, such as 'example.com/blog/'.
websiteURL, _ = url.AbsoluteURL(websiteURL, "./")
if websiteURL != websiteURLRoot {
baseURLs = append(baseURLs, websiteURL)
}
for _, baseURL := range baseURLs {
for knownURL, kind := range knownURLs {
fullURL, err := url.AbsoluteURL(baseURL, knownURL)
if err != nil {
continue
}
clt := client.NewClientWithConfig(fullURL, config.Opts)
clt.WithCredentials(username, password)
clt.WithUserAgent(userAgent)
clt.WithCookie(cookie)
// Some websites redirects unknown URLs to the home page.
// As result, the list of known URLs is returned to the subscription list.
// We don't want the user to choose between invalid feed URLs.
clt.WithoutRedirects()
response, err := clt.Get()
if err != nil {
continue
}
if response != nil && response.StatusCode == 200 {
subscription := new(Subscription)
subscription.Type = kind
subscription.Title = fullURL
subscription.URL = fullURL
if subscription.URL != "" {
subscriptions = append(subscriptions, subscription)
}
}
}
}
return subscriptions, nil
}

View file

@ -0,0 +1,285 @@
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
package subscription
import (
"strings"
"testing"
)
func TestFindYoutubeChannelFeed(t *testing.T) {
scenarios := map[string]string{
"https://www.youtube.com/channel/UC-Qj80avWItNRjkZ41rzHyw": "https://www.youtube.com/feeds/videos.xml?channel_id=UC-Qj80avWItNRjkZ41rzHyw",
"http://example.org/feed": "http://example.org/feed",
}
for websiteURL, expectedFeedURL := range scenarios {
result := findYoutubeChannelFeed(websiteURL)
if result != expectedFeedURL {
t.Errorf(`Unexpected Feed, got %s, instead of %s`, result, expectedFeedURL)
}
}
}
func TestParseWebPageWithRssFeed(t *testing.T) {
htmlPage := `
<!doctype html>
<html>
<head>
<link href="http://example.org/rss" rel="alternate" type="application/rss+xml" title="Some Title">
</head>
<body>
</body>
</html>`
subscriptions, err := parseWebPage("http://example.org/", strings.NewReader(htmlPage))
if err != nil {
t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
}
if len(subscriptions) != 1 {
t.Fatal(`Incorrect number of subscriptions returned`)
}
if subscriptions[0].Title != "Some Title" {
t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title)
}
if subscriptions[0].URL != "http://example.org/rss" {
t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL)
}
if subscriptions[0].Type != "rss" {
t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type)
}
}
func TestParseWebPageWithAtomFeed(t *testing.T) {
htmlPage := `
<!doctype html>
<html>
<head>
<link href="http://example.org/atom.xml" rel="alternate" type="application/atom+xml" title="Some Title">
</head>
<body>
</body>
</html>`
subscriptions, err := parseWebPage("http://example.org/", strings.NewReader(htmlPage))
if err != nil {
t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
}
if len(subscriptions) != 1 {
t.Fatal(`Incorrect number of subscriptions returned`)
}
if subscriptions[0].Title != "Some Title" {
t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title)
}
if subscriptions[0].URL != "http://example.org/atom.xml" {
t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL)
}
if subscriptions[0].Type != "atom" {
t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type)
}
}
func TestParseWebPageWithJSONFeed(t *testing.T) {
htmlPage := `
<!doctype html>
<html>
<head>
<link href="http://example.org/feed.json" rel="alternate" type="application/feed+json" title="Some Title">
</head>
<body>
</body>
</html>`
subscriptions, err := parseWebPage("http://example.org/", strings.NewReader(htmlPage))
if err != nil {
t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
}
if len(subscriptions) != 1 {
t.Fatal(`Incorrect number of subscriptions returned`)
}
if subscriptions[0].Title != "Some Title" {
t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title)
}
if subscriptions[0].URL != "http://example.org/feed.json" {
t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL)
}
if subscriptions[0].Type != "json" {
t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type)
}
}
func TestParseWebPageWithOldJSONFeedMimeType(t *testing.T) {
htmlPage := `
<!doctype html>
<html>
<head>
<link href="http://example.org/feed.json" rel="alternate" type="application/json" title="Some Title">
</head>
<body>
</body>
</html>`
subscriptions, err := parseWebPage("http://example.org/", strings.NewReader(htmlPage))
if err != nil {
t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
}
if len(subscriptions) != 1 {
t.Fatal(`Incorrect number of subscriptions returned`)
}
if subscriptions[0].Title != "Some Title" {
t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title)
}
if subscriptions[0].URL != "http://example.org/feed.json" {
t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL)
}
if subscriptions[0].Type != "json" {
t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type)
}
}
func TestParseWebPageWithRelativeFeedURL(t *testing.T) {
htmlPage := `
<!doctype html>
<html>
<head>
<link href="/feed.json" rel="alternate" type="application/feed+json" title="Some Title">
</head>
<body>
</body>
</html>`
subscriptions, err := parseWebPage("http://example.org/", strings.NewReader(htmlPage))
if err != nil {
t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
}
if len(subscriptions) != 1 {
t.Fatal(`Incorrect number of subscriptions returned`)
}
if subscriptions[0].Title != "Some Title" {
t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title)
}
if subscriptions[0].URL != "http://example.org/feed.json" {
t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL)
}
if subscriptions[0].Type != "json" {
t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type)
}
}
func TestParseWebPageWithEmptyTitle(t *testing.T) {
htmlPage := `
<!doctype html>
<html>
<head>
<link href="/feed.json" rel="alternate" type="application/feed+json">
</head>
<body>
</body>
</html>`
subscriptions, err := parseWebPage("http://example.org/", strings.NewReader(htmlPage))
if err != nil {
t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
}
if len(subscriptions) != 1 {
t.Fatal(`Incorrect number of subscriptions returned`)
}
if subscriptions[0].Title != "http://example.org/feed.json" {
t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title)
}
if subscriptions[0].URL != "http://example.org/feed.json" {
t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL)
}
if subscriptions[0].Type != "json" {
t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type)
}
}
func TestParseWebPageWithMultipleFeeds(t *testing.T) {
htmlPage := `
<!doctype html>
<html>
<head>
<link href="http://example.org/atom.xml" rel="alternate" type="application/atom+xml" title="Atom Feed">
<link href="http://example.org/feed.json" rel="alternate" type="application/feed+json" title="JSON Feed">
</head>
<body>
</body>
</html>`
subscriptions, err := parseWebPage("http://example.org/", strings.NewReader(htmlPage))
if err != nil {
t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
}
if len(subscriptions) != 2 {
t.Fatal(`Incorrect number of subscriptions returned`)
}
}
func TestParseWebPageWithEmptyFeedURL(t *testing.T) {
htmlPage := `
<!doctype html>
<html>
<head>
<link href rel="alternate" type="application/feed+json" title="Some Title">
</head>
<body>
</body>
</html>`
subscriptions, err := parseWebPage("http://example.org/", strings.NewReader(htmlPage))
if err != nil {
t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
}
if len(subscriptions) != 0 {
t.Fatal(`Incorrect number of subscriptions returned`)
}
}
func TestParseWebPageWithNoHref(t *testing.T) {
htmlPage := `
<!doctype html>
<html>
<head>
<link rel="alternate" type="application/feed+json" title="Some Title">
</head>
<body>
</body>
</html>`
subscriptions, err := parseWebPage("http://example.org/", strings.NewReader(htmlPage))
if err != nil {
t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
}
if len(subscriptions) != 0 {
t.Fatal(`Incorrect number of subscriptions returned`)
}
}

View file

@ -0,0 +1,20 @@
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
package subscription // import "miniflux.app/v2/internal/reader/subscription"
import "fmt"
// Subscription represents a feed subscription.
type Subscription struct {
Title string `json:"title"`
URL string `json:"url"`
Type string `json:"type"`
}
func (s Subscription) String() string {
return fmt.Sprintf(`Title="%s", URL="%s", Type="%s"`, s.Title, s.URL, s.Type)
}
// Subscriptions represents a list of subscription.
type Subscriptions []*Subscription