mirror of
https://github.com/miniflux/v2.git
synced 2025-09-15 18:57:04 +00:00
Move internal packages to an internal folder
For reference: https://go.dev/doc/go1.4#internalpackages
This commit is contained in:
parent
c234903255
commit
168a870c02
433 changed files with 1121 additions and 1123 deletions
197
internal/reader/subscription/finder.go
Normal file
197
internal/reader/subscription/finder.go
Normal file
|
@ -0,0 +1,197 @@
|
|||
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
package subscription // import "miniflux.app/v2/internal/reader/subscription"
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"miniflux.app/v2/internal/config"
|
||||
"miniflux.app/v2/internal/errors"
|
||||
"miniflux.app/v2/internal/http/client"
|
||||
"miniflux.app/v2/internal/reader/browser"
|
||||
"miniflux.app/v2/internal/reader/parser"
|
||||
"miniflux.app/v2/internal/url"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
)
|
||||
|
||||
var (
|
||||
errUnreadableDoc = "Unable to analyze this page: %v"
|
||||
youtubeChannelRegex = regexp.MustCompile(`youtube\.com/channel/(.*)`)
|
||||
youtubeVideoRegex = regexp.MustCompile(`youtube\.com/watch\?v=(.*)`)
|
||||
)
|
||||
|
||||
// FindSubscriptions downloads and try to find one or more subscriptions from an URL.
|
||||
func FindSubscriptions(websiteURL, userAgent, cookie, username, password string, fetchViaProxy, allowSelfSignedCertificates bool) (Subscriptions, *errors.LocalizedError) {
|
||||
websiteURL = findYoutubeChannelFeed(websiteURL)
|
||||
websiteURL = parseYoutubeVideoPage(websiteURL)
|
||||
|
||||
clt := client.NewClientWithConfig(websiteURL, config.Opts)
|
||||
clt.WithCredentials(username, password)
|
||||
clt.WithUserAgent(userAgent)
|
||||
clt.WithCookie(cookie)
|
||||
clt.AllowSelfSignedCertificates = allowSelfSignedCertificates
|
||||
|
||||
if fetchViaProxy {
|
||||
clt.WithProxy()
|
||||
}
|
||||
|
||||
response, err := browser.Exec(clt)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
body := response.BodyAsString()
|
||||
if format := parser.DetectFeedFormat(body); format != parser.FormatUnknown {
|
||||
var subscriptions Subscriptions
|
||||
subscriptions = append(subscriptions, &Subscription{
|
||||
Title: response.EffectiveURL,
|
||||
URL: response.EffectiveURL,
|
||||
Type: format,
|
||||
})
|
||||
|
||||
return subscriptions, nil
|
||||
}
|
||||
|
||||
subscriptions, err := parseWebPage(response.EffectiveURL, strings.NewReader(body))
|
||||
if err != nil || subscriptions != nil {
|
||||
return subscriptions, err
|
||||
}
|
||||
|
||||
return tryWellKnownUrls(websiteURL, userAgent, cookie, username, password)
|
||||
}
|
||||
|
||||
func parseWebPage(websiteURL string, data io.Reader) (Subscriptions, *errors.LocalizedError) {
|
||||
var subscriptions Subscriptions
|
||||
queries := map[string]string{
|
||||
"link[type='application/rss+xml']": "rss",
|
||||
"link[type='application/atom+xml']": "atom",
|
||||
"link[type='application/json']": "json",
|
||||
"link[type='application/feed+json']": "json",
|
||||
}
|
||||
|
||||
doc, err := goquery.NewDocumentFromReader(data)
|
||||
if err != nil {
|
||||
return nil, errors.NewLocalizedError(errUnreadableDoc, err)
|
||||
}
|
||||
|
||||
for query, kind := range queries {
|
||||
doc.Find(query).Each(func(i int, s *goquery.Selection) {
|
||||
subscription := new(Subscription)
|
||||
subscription.Type = kind
|
||||
|
||||
if title, exists := s.Attr("title"); exists {
|
||||
subscription.Title = title
|
||||
}
|
||||
|
||||
if feedURL, exists := s.Attr("href"); exists {
|
||||
if feedURL != "" {
|
||||
subscription.URL, _ = url.AbsoluteURL(websiteURL, feedURL)
|
||||
}
|
||||
}
|
||||
|
||||
if subscription.Title == "" {
|
||||
subscription.Title = subscription.URL
|
||||
}
|
||||
|
||||
if subscription.URL != "" {
|
||||
subscriptions = append(subscriptions, subscription)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
return subscriptions, nil
|
||||
}
|
||||
|
||||
func findYoutubeChannelFeed(websiteURL string) string {
|
||||
matches := youtubeChannelRegex.FindStringSubmatch(websiteURL)
|
||||
|
||||
if len(matches) == 2 {
|
||||
return fmt.Sprintf(`https://www.youtube.com/feeds/videos.xml?channel_id=%s`, matches[1])
|
||||
}
|
||||
return websiteURL
|
||||
}
|
||||
|
||||
func parseYoutubeVideoPage(websiteURL string) string {
|
||||
if !youtubeVideoRegex.MatchString(websiteURL) {
|
||||
return websiteURL
|
||||
}
|
||||
|
||||
clt := client.NewClientWithConfig(websiteURL, config.Opts)
|
||||
response, browserErr := browser.Exec(clt)
|
||||
if browserErr != nil {
|
||||
return websiteURL
|
||||
}
|
||||
|
||||
doc, docErr := goquery.NewDocumentFromReader(response.Body)
|
||||
if docErr != nil {
|
||||
return websiteURL
|
||||
}
|
||||
|
||||
if channelID, exists := doc.Find(`meta[itemprop="channelId"]`).First().Attr("content"); exists {
|
||||
return fmt.Sprintf(`https://www.youtube.com/feeds/videos.xml?channel_id=%s`, channelID)
|
||||
}
|
||||
|
||||
return websiteURL
|
||||
}
|
||||
|
||||
func tryWellKnownUrls(websiteURL, userAgent, cookie, username, password string) (Subscriptions, *errors.LocalizedError) {
|
||||
var subscriptions Subscriptions
|
||||
knownURLs := map[string]string{
|
||||
"atom.xml": "atom",
|
||||
"feed.xml": "atom",
|
||||
"feed/": "atom",
|
||||
"rss.xml": "rss",
|
||||
"rss/": "rss",
|
||||
}
|
||||
|
||||
websiteURLRoot := url.RootURL(websiteURL)
|
||||
baseURLs := []string{
|
||||
// Look for knownURLs in the root.
|
||||
websiteURLRoot,
|
||||
}
|
||||
// Look for knownURLs in current subdirectory, such as 'example.com/blog/'.
|
||||
websiteURL, _ = url.AbsoluteURL(websiteURL, "./")
|
||||
if websiteURL != websiteURLRoot {
|
||||
baseURLs = append(baseURLs, websiteURL)
|
||||
}
|
||||
|
||||
for _, baseURL := range baseURLs {
|
||||
for knownURL, kind := range knownURLs {
|
||||
fullURL, err := url.AbsoluteURL(baseURL, knownURL)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
clt := client.NewClientWithConfig(fullURL, config.Opts)
|
||||
clt.WithCredentials(username, password)
|
||||
clt.WithUserAgent(userAgent)
|
||||
clt.WithCookie(cookie)
|
||||
|
||||
// Some websites redirects unknown URLs to the home page.
|
||||
// As result, the list of known URLs is returned to the subscription list.
|
||||
// We don't want the user to choose between invalid feed URLs.
|
||||
clt.WithoutRedirects()
|
||||
|
||||
response, err := clt.Get()
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
if response != nil && response.StatusCode == 200 {
|
||||
subscription := new(Subscription)
|
||||
subscription.Type = kind
|
||||
subscription.Title = fullURL
|
||||
subscription.URL = fullURL
|
||||
if subscription.URL != "" {
|
||||
subscriptions = append(subscriptions, subscription)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return subscriptions, nil
|
||||
}
|
285
internal/reader/subscription/finder_test.go
Normal file
285
internal/reader/subscription/finder_test.go
Normal file
|
@ -0,0 +1,285 @@
|
|||
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
package subscription
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestFindYoutubeChannelFeed(t *testing.T) {
|
||||
scenarios := map[string]string{
|
||||
"https://www.youtube.com/channel/UC-Qj80avWItNRjkZ41rzHyw": "https://www.youtube.com/feeds/videos.xml?channel_id=UC-Qj80avWItNRjkZ41rzHyw",
|
||||
"http://example.org/feed": "http://example.org/feed",
|
||||
}
|
||||
|
||||
for websiteURL, expectedFeedURL := range scenarios {
|
||||
result := findYoutubeChannelFeed(websiteURL)
|
||||
if result != expectedFeedURL {
|
||||
t.Errorf(`Unexpected Feed, got %s, instead of %s`, result, expectedFeedURL)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseWebPageWithRssFeed(t *testing.T) {
|
||||
htmlPage := `
|
||||
<!doctype html>
|
||||
<html>
|
||||
<head>
|
||||
<link href="http://example.org/rss" rel="alternate" type="application/rss+xml" title="Some Title">
|
||||
</head>
|
||||
<body>
|
||||
</body>
|
||||
</html>`
|
||||
|
||||
subscriptions, err := parseWebPage("http://example.org/", strings.NewReader(htmlPage))
|
||||
if err != nil {
|
||||
t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
|
||||
}
|
||||
|
||||
if len(subscriptions) != 1 {
|
||||
t.Fatal(`Incorrect number of subscriptions returned`)
|
||||
}
|
||||
|
||||
if subscriptions[0].Title != "Some Title" {
|
||||
t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title)
|
||||
}
|
||||
|
||||
if subscriptions[0].URL != "http://example.org/rss" {
|
||||
t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL)
|
||||
}
|
||||
|
||||
if subscriptions[0].Type != "rss" {
|
||||
t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseWebPageWithAtomFeed(t *testing.T) {
|
||||
htmlPage := `
|
||||
<!doctype html>
|
||||
<html>
|
||||
<head>
|
||||
<link href="http://example.org/atom.xml" rel="alternate" type="application/atom+xml" title="Some Title">
|
||||
</head>
|
||||
<body>
|
||||
</body>
|
||||
</html>`
|
||||
|
||||
subscriptions, err := parseWebPage("http://example.org/", strings.NewReader(htmlPage))
|
||||
if err != nil {
|
||||
t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
|
||||
}
|
||||
|
||||
if len(subscriptions) != 1 {
|
||||
t.Fatal(`Incorrect number of subscriptions returned`)
|
||||
}
|
||||
|
||||
if subscriptions[0].Title != "Some Title" {
|
||||
t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title)
|
||||
}
|
||||
|
||||
if subscriptions[0].URL != "http://example.org/atom.xml" {
|
||||
t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL)
|
||||
}
|
||||
|
||||
if subscriptions[0].Type != "atom" {
|
||||
t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseWebPageWithJSONFeed(t *testing.T) {
|
||||
htmlPage := `
|
||||
<!doctype html>
|
||||
<html>
|
||||
<head>
|
||||
<link href="http://example.org/feed.json" rel="alternate" type="application/feed+json" title="Some Title">
|
||||
</head>
|
||||
<body>
|
||||
</body>
|
||||
</html>`
|
||||
|
||||
subscriptions, err := parseWebPage("http://example.org/", strings.NewReader(htmlPage))
|
||||
if err != nil {
|
||||
t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
|
||||
}
|
||||
|
||||
if len(subscriptions) != 1 {
|
||||
t.Fatal(`Incorrect number of subscriptions returned`)
|
||||
}
|
||||
|
||||
if subscriptions[0].Title != "Some Title" {
|
||||
t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title)
|
||||
}
|
||||
|
||||
if subscriptions[0].URL != "http://example.org/feed.json" {
|
||||
t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL)
|
||||
}
|
||||
|
||||
if subscriptions[0].Type != "json" {
|
||||
t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseWebPageWithOldJSONFeedMimeType(t *testing.T) {
|
||||
htmlPage := `
|
||||
<!doctype html>
|
||||
<html>
|
||||
<head>
|
||||
<link href="http://example.org/feed.json" rel="alternate" type="application/json" title="Some Title">
|
||||
</head>
|
||||
<body>
|
||||
</body>
|
||||
</html>`
|
||||
|
||||
subscriptions, err := parseWebPage("http://example.org/", strings.NewReader(htmlPage))
|
||||
if err != nil {
|
||||
t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
|
||||
}
|
||||
|
||||
if len(subscriptions) != 1 {
|
||||
t.Fatal(`Incorrect number of subscriptions returned`)
|
||||
}
|
||||
|
||||
if subscriptions[0].Title != "Some Title" {
|
||||
t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title)
|
||||
}
|
||||
|
||||
if subscriptions[0].URL != "http://example.org/feed.json" {
|
||||
t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL)
|
||||
}
|
||||
|
||||
if subscriptions[0].Type != "json" {
|
||||
t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseWebPageWithRelativeFeedURL(t *testing.T) {
|
||||
htmlPage := `
|
||||
<!doctype html>
|
||||
<html>
|
||||
<head>
|
||||
<link href="/feed.json" rel="alternate" type="application/feed+json" title="Some Title">
|
||||
</head>
|
||||
<body>
|
||||
</body>
|
||||
</html>`
|
||||
|
||||
subscriptions, err := parseWebPage("http://example.org/", strings.NewReader(htmlPage))
|
||||
if err != nil {
|
||||
t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
|
||||
}
|
||||
|
||||
if len(subscriptions) != 1 {
|
||||
t.Fatal(`Incorrect number of subscriptions returned`)
|
||||
}
|
||||
|
||||
if subscriptions[0].Title != "Some Title" {
|
||||
t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title)
|
||||
}
|
||||
|
||||
if subscriptions[0].URL != "http://example.org/feed.json" {
|
||||
t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL)
|
||||
}
|
||||
|
||||
if subscriptions[0].Type != "json" {
|
||||
t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseWebPageWithEmptyTitle(t *testing.T) {
|
||||
htmlPage := `
|
||||
<!doctype html>
|
||||
<html>
|
||||
<head>
|
||||
<link href="/feed.json" rel="alternate" type="application/feed+json">
|
||||
</head>
|
||||
<body>
|
||||
</body>
|
||||
</html>`
|
||||
|
||||
subscriptions, err := parseWebPage("http://example.org/", strings.NewReader(htmlPage))
|
||||
if err != nil {
|
||||
t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
|
||||
}
|
||||
|
||||
if len(subscriptions) != 1 {
|
||||
t.Fatal(`Incorrect number of subscriptions returned`)
|
||||
}
|
||||
|
||||
if subscriptions[0].Title != "http://example.org/feed.json" {
|
||||
t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title)
|
||||
}
|
||||
|
||||
if subscriptions[0].URL != "http://example.org/feed.json" {
|
||||
t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL)
|
||||
}
|
||||
|
||||
if subscriptions[0].Type != "json" {
|
||||
t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseWebPageWithMultipleFeeds(t *testing.T) {
|
||||
htmlPage := `
|
||||
<!doctype html>
|
||||
<html>
|
||||
<head>
|
||||
<link href="http://example.org/atom.xml" rel="alternate" type="application/atom+xml" title="Atom Feed">
|
||||
<link href="http://example.org/feed.json" rel="alternate" type="application/feed+json" title="JSON Feed">
|
||||
</head>
|
||||
<body>
|
||||
</body>
|
||||
</html>`
|
||||
|
||||
subscriptions, err := parseWebPage("http://example.org/", strings.NewReader(htmlPage))
|
||||
if err != nil {
|
||||
t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
|
||||
}
|
||||
|
||||
if len(subscriptions) != 2 {
|
||||
t.Fatal(`Incorrect number of subscriptions returned`)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseWebPageWithEmptyFeedURL(t *testing.T) {
|
||||
htmlPage := `
|
||||
<!doctype html>
|
||||
<html>
|
||||
<head>
|
||||
<link href rel="alternate" type="application/feed+json" title="Some Title">
|
||||
</head>
|
||||
<body>
|
||||
</body>
|
||||
</html>`
|
||||
|
||||
subscriptions, err := parseWebPage("http://example.org/", strings.NewReader(htmlPage))
|
||||
if err != nil {
|
||||
t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
|
||||
}
|
||||
|
||||
if len(subscriptions) != 0 {
|
||||
t.Fatal(`Incorrect number of subscriptions returned`)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseWebPageWithNoHref(t *testing.T) {
|
||||
htmlPage := `
|
||||
<!doctype html>
|
||||
<html>
|
||||
<head>
|
||||
<link rel="alternate" type="application/feed+json" title="Some Title">
|
||||
</head>
|
||||
<body>
|
||||
</body>
|
||||
</html>`
|
||||
|
||||
subscriptions, err := parseWebPage("http://example.org/", strings.NewReader(htmlPage))
|
||||
if err != nil {
|
||||
t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
|
||||
}
|
||||
|
||||
if len(subscriptions) != 0 {
|
||||
t.Fatal(`Incorrect number of subscriptions returned`)
|
||||
}
|
||||
}
|
20
internal/reader/subscription/subscription.go
Normal file
20
internal/reader/subscription/subscription.go
Normal file
|
@ -0,0 +1,20 @@
|
|||
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
package subscription // import "miniflux.app/v2/internal/reader/subscription"
|
||||
|
||||
import "fmt"
|
||||
|
||||
// Subscription represents a feed subscription.
|
||||
type Subscription struct {
|
||||
Title string `json:"title"`
|
||||
URL string `json:"url"`
|
||||
Type string `json:"type"`
|
||||
}
|
||||
|
||||
func (s Subscription) String() string {
|
||||
return fmt.Sprintf(`Title="%s", URL="%s", Type="%s"`, s.Title, s.URL, s.Type)
|
||||
}
|
||||
|
||||
// Subscriptions represents a list of subscription.
|
||||
type Subscriptions []*Subscription
|
Loading…
Add table
Add a link
Reference in a new issue