mirror of
https://github.com/miniflux/v2.git
synced 2025-06-27 16:36:00 +00:00
Merge 689132b78c
into b583de88f3
This commit is contained in:
commit
e4215906a9
2 changed files with 122 additions and 8 deletions
|
@ -5,6 +5,7 @@ package subscription // import "miniflux.app/v2/internal/reader/subscription"
|
|||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/xml"
|
||||
"fmt"
|
||||
"io"
|
||||
"log/slog"
|
||||
|
@ -124,6 +125,14 @@ func (f *SubscriptionFinder) FindSubscriptions(websiteURL, rssBridgeURL string)
|
|||
slog.Debug("Subscriptions found with well-known URLs", slog.String("website_url", websiteURL), slog.Any("subscriptions", subscriptions))
|
||||
return subscriptions, nil
|
||||
}
|
||||
// Step 7) Check if the website has feeds in its sitemap.
|
||||
slog.Debug("Try to detect feeds from sitemap", slog.String("website_url", websiteURL))
|
||||
if subscriptions, localizedError := f.FindSubscriptionsFromSitemap(websiteURL); localizedError != nil {
|
||||
return nil, localizedError
|
||||
} else if len(subscriptions) > 0 {
|
||||
slog.Debug("Subscriptions found with sitemap", slog.String("website_url", websiteURL), slog.Any("subscriptions", subscriptions))
|
||||
return subscriptions, nil
|
||||
}
|
||||
|
||||
return nil, nil
|
||||
}
|
||||
|
@ -191,12 +200,14 @@ func (f *SubscriptionFinder) FindSubscriptionsFromWellKnownURLs(websiteURL strin
|
|||
knownURLs := map[string]string{
|
||||
"atom.xml": parser.FormatAtom,
|
||||
"feed.xml": parser.FormatAtom,
|
||||
"feed/": parser.FormatAtom,
|
||||
"feed": parser.FormatAtom,
|
||||
"rss.xml": parser.FormatRSS,
|
||||
"rss/": parser.FormatRSS,
|
||||
"rss": parser.FormatRSS,
|
||||
"index.rss": parser.FormatRSS,
|
||||
"index.xml": parser.FormatRSS,
|
||||
"feed.atom": parser.FormatAtom,
|
||||
"atom": parser.FormatAtom,
|
||||
"index.atom": parser.FormatAtom,
|
||||
}
|
||||
|
||||
websiteURLRoot := urllib.RootURL(websiteURL)
|
||||
|
@ -324,3 +335,66 @@ func (f *SubscriptionFinder) FindSubscriptionsFromYouTubePlaylistPage(websiteURL
|
|||
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (f *SubscriptionFinder) FindSubscriptionsFromSitemap(websiteURL string) (Subscriptions, *locale.LocalizedErrorWrapper) {
|
||||
websiteURLRoot := urllib.RootURL(websiteURL)
|
||||
|
||||
responseHandler := fetcher.NewResponseHandler(f.requestBuilder.ExecuteRequest(websiteURLRoot + "/sitemap.xml"))
|
||||
defer responseHandler.Close()
|
||||
|
||||
if localizedError := responseHandler.LocalizedError(); localizedError != nil {
|
||||
slog.Warn("Unable to find subscriptions", slog.String("website_url", websiteURL), slog.Any("error", localizedError.Error()))
|
||||
return nil, localizedError
|
||||
}
|
||||
|
||||
responseBody, localizedError := responseHandler.ReadBody(config.Opts.HTTPClientMaxBodySize())
|
||||
if localizedError != nil {
|
||||
slog.Warn("Unable to find subscriptions", slog.String("website_url", websiteURL), slog.Any("error", localizedError.Error()))
|
||||
return nil, localizedError
|
||||
}
|
||||
return findSubscriptionsFromDownloadedSitemap(bytes.NewReader(responseBody))
|
||||
}
|
||||
|
||||
func findSubscriptionsFromDownloadedSitemap(body io.Reader) (Subscriptions, *locale.LocalizedErrorWrapper) {
|
||||
var subscriptions Subscriptions
|
||||
loc := struct {
|
||||
Content string `xml:",chardata"`
|
||||
}{}
|
||||
|
||||
decoder := xml.NewDecoder(body)
|
||||
for {
|
||||
t, _ := decoder.Token()
|
||||
if t == nil {
|
||||
break
|
||||
}
|
||||
switch se := t.(type) {
|
||||
case xml.StartElement:
|
||||
if se.Name.Local != "loc" {
|
||||
continue
|
||||
}
|
||||
|
||||
if err := decoder.DecodeElement(&loc, &se); err != nil {
|
||||
slog.Warn("Unable to decode loc", slog.Any("error", err))
|
||||
}
|
||||
feedUrl := loc.Content
|
||||
switch {
|
||||
case strings.Contains(feedUrl, ".xml"),
|
||||
strings.Contains(feedUrl, "rss"):
|
||||
subscriptions = append(subscriptions, &Subscription{
|
||||
Type: parser.FormatRSS,
|
||||
Title: feedUrl,
|
||||
URL: feedUrl,
|
||||
})
|
||||
case strings.Contains(feedUrl, "feed"),
|
||||
strings.Contains(feedUrl, "atom"):
|
||||
subscriptions = append(subscriptions, &Subscription{
|
||||
Type: parser.FormatAtom,
|
||||
Title: feedUrl,
|
||||
URL: feedUrl,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return subscriptions, nil
|
||||
}
|
||||
|
|
|
@ -481,3 +481,43 @@ func TestParseWebPageWithNoHref(t *testing.T) {
|
|||
t.Fatal(`Incorrect number of subscriptions returned`)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseSiteMap(t *testing.T) {
|
||||
htmlPage := `
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
||||
<url>
|
||||
<loc>http://www.example.com/</loc>
|
||||
<lastmod>2005-01-01</lastmod>
|
||||
<changefreq>monthly</changefreq>
|
||||
<priority>0.8</priority>
|
||||
</url>
|
||||
<url>
|
||||
<loc>http://www.example.com/feed/myfeed</loc>
|
||||
<lastmod>2005-01-01</lastmod>
|
||||
<changefreq>monthly</changefreq>
|
||||
<priority>0.8</priority>
|
||||
</url>
|
||||
<url>
|
||||
<loc>http://www.example.com/myfeed.xml</loc>
|
||||
<lastmod>2005-01-01</lastmod>
|
||||
<changefreq>monthly</changefreq>
|
||||
<priority>0.8</priority>
|
||||
</url>
|
||||
<url>
|
||||
<loc>http://www.example.com/atom_feed.xml</loc>
|
||||
<lastmod>2005-01-01</lastmod>
|
||||
<changefreq>monthly</changefreq>
|
||||
<priority>0.8</priority>
|
||||
</url>
|
||||
</urlset> `
|
||||
|
||||
subscriptions, err := findSubscriptionsFromDownloadedSitemap(strings.NewReader(htmlPage))
|
||||
if err != nil {
|
||||
t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
|
||||
}
|
||||
|
||||
if len(subscriptions) != 3 {
|
||||
t.Fatal(`Incorrect number of subscriptions returned`)
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue