mirror of
https://github.com/miniflux/v2.git
synced 2025-06-27 16:36:00 +00:00
Merge 689132b78c
into b583de88f3
This commit is contained in:
commit
e4215906a9
2 changed files with 122 additions and 8 deletions
|
@ -5,6 +5,7 @@ package subscription // import "miniflux.app/v2/internal/reader/subscription"
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
|
"encoding/xml"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
|
@ -124,6 +125,14 @@ func (f *SubscriptionFinder) FindSubscriptions(websiteURL, rssBridgeURL string)
|
||||||
slog.Debug("Subscriptions found with well-known URLs", slog.String("website_url", websiteURL), slog.Any("subscriptions", subscriptions))
|
slog.Debug("Subscriptions found with well-known URLs", slog.String("website_url", websiteURL), slog.Any("subscriptions", subscriptions))
|
||||||
return subscriptions, nil
|
return subscriptions, nil
|
||||||
}
|
}
|
||||||
|
// Step 7) Check if the website has feeds in its sitemap.
|
||||||
|
slog.Debug("Try to detect feeds from sitemap", slog.String("website_url", websiteURL))
|
||||||
|
if subscriptions, localizedError := f.FindSubscriptionsFromSitemap(websiteURL); localizedError != nil {
|
||||||
|
return nil, localizedError
|
||||||
|
} else if len(subscriptions) > 0 {
|
||||||
|
slog.Debug("Subscriptions found with sitemap", slog.String("website_url", websiteURL), slog.Any("subscriptions", subscriptions))
|
||||||
|
return subscriptions, nil
|
||||||
|
}
|
||||||
|
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
@ -189,14 +198,16 @@ func (f *SubscriptionFinder) FindSubscriptionsFromWebPage(websiteURL, contentTyp
|
||||||
|
|
||||||
func (f *SubscriptionFinder) FindSubscriptionsFromWellKnownURLs(websiteURL string) (Subscriptions, *locale.LocalizedErrorWrapper) {
|
func (f *SubscriptionFinder) FindSubscriptionsFromWellKnownURLs(websiteURL string) (Subscriptions, *locale.LocalizedErrorWrapper) {
|
||||||
knownURLs := map[string]string{
|
knownURLs := map[string]string{
|
||||||
"atom.xml": parser.FormatAtom,
|
"atom.xml": parser.FormatAtom,
|
||||||
"feed.xml": parser.FormatAtom,
|
"feed.xml": parser.FormatAtom,
|
||||||
"feed/": parser.FormatAtom,
|
"feed": parser.FormatAtom,
|
||||||
"rss.xml": parser.FormatRSS,
|
"rss.xml": parser.FormatRSS,
|
||||||
"rss/": parser.FormatRSS,
|
"rss": parser.FormatRSS,
|
||||||
"index.rss": parser.FormatRSS,
|
"index.rss": parser.FormatRSS,
|
||||||
"index.xml": parser.FormatRSS,
|
"index.xml": parser.FormatRSS,
|
||||||
"feed.atom": parser.FormatAtom,
|
"feed.atom": parser.FormatAtom,
|
||||||
|
"atom": parser.FormatAtom,
|
||||||
|
"index.atom": parser.FormatAtom,
|
||||||
}
|
}
|
||||||
|
|
||||||
websiteURLRoot := urllib.RootURL(websiteURL)
|
websiteURLRoot := urllib.RootURL(websiteURL)
|
||||||
|
@ -324,3 +335,66 @@ func (f *SubscriptionFinder) FindSubscriptionsFromYouTubePlaylistPage(websiteURL
|
||||||
|
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (f *SubscriptionFinder) FindSubscriptionsFromSitemap(websiteURL string) (Subscriptions, *locale.LocalizedErrorWrapper) {
|
||||||
|
websiteURLRoot := urllib.RootURL(websiteURL)
|
||||||
|
|
||||||
|
responseHandler := fetcher.NewResponseHandler(f.requestBuilder.ExecuteRequest(websiteURLRoot + "/sitemap.xml"))
|
||||||
|
defer responseHandler.Close()
|
||||||
|
|
||||||
|
if localizedError := responseHandler.LocalizedError(); localizedError != nil {
|
||||||
|
slog.Warn("Unable to find subscriptions", slog.String("website_url", websiteURL), slog.Any("error", localizedError.Error()))
|
||||||
|
return nil, localizedError
|
||||||
|
}
|
||||||
|
|
||||||
|
responseBody, localizedError := responseHandler.ReadBody(config.Opts.HTTPClientMaxBodySize())
|
||||||
|
if localizedError != nil {
|
||||||
|
slog.Warn("Unable to find subscriptions", slog.String("website_url", websiteURL), slog.Any("error", localizedError.Error()))
|
||||||
|
return nil, localizedError
|
||||||
|
}
|
||||||
|
return findSubscriptionsFromDownloadedSitemap(bytes.NewReader(responseBody))
|
||||||
|
}
|
||||||
|
|
||||||
|
func findSubscriptionsFromDownloadedSitemap(body io.Reader) (Subscriptions, *locale.LocalizedErrorWrapper) {
|
||||||
|
var subscriptions Subscriptions
|
||||||
|
loc := struct {
|
||||||
|
Content string `xml:",chardata"`
|
||||||
|
}{}
|
||||||
|
|
||||||
|
decoder := xml.NewDecoder(body)
|
||||||
|
for {
|
||||||
|
t, _ := decoder.Token()
|
||||||
|
if t == nil {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
switch se := t.(type) {
|
||||||
|
case xml.StartElement:
|
||||||
|
if se.Name.Local != "loc" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := decoder.DecodeElement(&loc, &se); err != nil {
|
||||||
|
slog.Warn("Unable to decode loc", slog.Any("error", err))
|
||||||
|
}
|
||||||
|
feedUrl := loc.Content
|
||||||
|
switch {
|
||||||
|
case strings.Contains(feedUrl, ".xml"),
|
||||||
|
strings.Contains(feedUrl, "rss"):
|
||||||
|
subscriptions = append(subscriptions, &Subscription{
|
||||||
|
Type: parser.FormatRSS,
|
||||||
|
Title: feedUrl,
|
||||||
|
URL: feedUrl,
|
||||||
|
})
|
||||||
|
case strings.Contains(feedUrl, "feed"),
|
||||||
|
strings.Contains(feedUrl, "atom"):
|
||||||
|
subscriptions = append(subscriptions, &Subscription{
|
||||||
|
Type: parser.FormatAtom,
|
||||||
|
Title: feedUrl,
|
||||||
|
URL: feedUrl,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return subscriptions, nil
|
||||||
|
}
|
||||||
|
|
|
@ -481,3 +481,43 @@ func TestParseWebPageWithNoHref(t *testing.T) {
|
||||||
t.Fatal(`Incorrect number of subscriptions returned`)
|
t.Fatal(`Incorrect number of subscriptions returned`)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestParseSiteMap(t *testing.T) {
|
||||||
|
htmlPage := `
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
||||||
|
<url>
|
||||||
|
<loc>http://www.example.com/</loc>
|
||||||
|
<lastmod>2005-01-01</lastmod>
|
||||||
|
<changefreq>monthly</changefreq>
|
||||||
|
<priority>0.8</priority>
|
||||||
|
</url>
|
||||||
|
<url>
|
||||||
|
<loc>http://www.example.com/feed/myfeed</loc>
|
||||||
|
<lastmod>2005-01-01</lastmod>
|
||||||
|
<changefreq>monthly</changefreq>
|
||||||
|
<priority>0.8</priority>
|
||||||
|
</url>
|
||||||
|
<url>
|
||||||
|
<loc>http://www.example.com/myfeed.xml</loc>
|
||||||
|
<lastmod>2005-01-01</lastmod>
|
||||||
|
<changefreq>monthly</changefreq>
|
||||||
|
<priority>0.8</priority>
|
||||||
|
</url>
|
||||||
|
<url>
|
||||||
|
<loc>http://www.example.com/atom_feed.xml</loc>
|
||||||
|
<lastmod>2005-01-01</lastmod>
|
||||||
|
<changefreq>monthly</changefreq>
|
||||||
|
<priority>0.8</priority>
|
||||||
|
</url>
|
||||||
|
</urlset> `
|
||||||
|
|
||||||
|
subscriptions, err := findSubscriptionsFromDownloadedSitemap(strings.NewReader(htmlPage))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(subscriptions) != 3 {
|
||||||
|
t.Fatal(`Incorrect number of subscriptions returned`)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue