1
0
Fork 0
mirror of https://github.com/miniflux/v2.git synced 2025-06-27 16:36:00 +00:00

Ensure enclosure URLs are always absolute

This commit is contained in:
Frédéric Guillot 2024-03-19 21:24:30 -07:00
parent 4be993e055
commit 08640b27d5
6 changed files with 448 additions and 129 deletions

View file

@ -72,7 +72,7 @@ func (r *RSSAdapter) BuildFeed(baseURL string) *model.Feed {
entry := model.NewEntry()
entry.Date = findEntryDate(&item)
entry.Content = findEntryContent(&item)
entry.Enclosures = findEntryEnclosures(&item)
entry.Enclosures = findEntryEnclosures(&item, feed.SiteURL)
// Populate the entry URL.
entryURL := findEntryURL(&item)
@ -245,18 +245,30 @@ func findEntryAuthor(rssItem *RSSItem) string {
return strings.TrimSpace(sanitizer.StripTags(author))
}
func findEntryEnclosures(rssItem *RSSItem) model.EnclosureList {
func findEntryEnclosures(rssItem *RSSItem, siteURL string) model.EnclosureList {
enclosures := make(model.EnclosureList, 0)
duplicates := make(map[string]bool)
for _, mediaThumbnail := range rssItem.AllMediaThumbnails() {
if _, found := duplicates[mediaThumbnail.URL]; !found {
duplicates[mediaThumbnail.URL] = true
enclosures = append(enclosures, &model.Enclosure{
URL: mediaThumbnail.URL,
MimeType: mediaThumbnail.MimeType(),
Size: mediaThumbnail.Size(),
})
mediaURL := strings.TrimSpace(mediaThumbnail.URL)
if mediaURL == "" {
continue
}
if _, found := duplicates[mediaURL]; !found {
if mediaAbsoluteURL, err := urllib.AbsoluteURL(siteURL, mediaURL); err != nil {
slog.Debug("Unable to build absolute URL for media thumbnail",
slog.String("url", mediaThumbnail.URL),
slog.String("site_url", siteURL),
slog.Any("error", err),
)
} else {
duplicates[mediaAbsoluteURL] = true
enclosures = append(enclosures, &model.Enclosure{
URL: mediaAbsoluteURL,
MimeType: mediaThumbnail.MimeType(),
Size: mediaThumbnail.Size(),
})
}
}
}
@ -265,15 +277,20 @@ func findEntryEnclosures(rssItem *RSSItem) model.EnclosureList {
if rssItem.FeedBurnerEnclosureLink != "" {
filename := path.Base(rssItem.FeedBurnerEnclosureLink)
if strings.Contains(enclosureURL, filename) {
if strings.HasSuffix(enclosureURL, filename) {
enclosureURL = rssItem.FeedBurnerEnclosureLink
}
}
enclosureURL = strings.TrimSpace(enclosureURL)
if enclosureURL == "" {
continue
}
if absoluteEnclosureURL, err := urllib.AbsoluteURL(siteURL, enclosureURL); err == nil {
enclosureURL = absoluteEnclosureURL
}
if _, found := duplicates[enclosureURL]; !found {
duplicates[enclosureURL] = true
@ -286,24 +303,50 @@ func findEntryEnclosures(rssItem *RSSItem) model.EnclosureList {
}
for _, mediaContent := range rssItem.AllMediaContents() {
if _, found := duplicates[mediaContent.URL]; !found {
duplicates[mediaContent.URL] = true
enclosures = append(enclosures, &model.Enclosure{
URL: mediaContent.URL,
MimeType: mediaContent.MimeType(),
Size: mediaContent.Size(),
})
mediaURL := strings.TrimSpace(mediaContent.URL)
if mediaURL == "" {
continue
}
if _, found := duplicates[mediaURL]; !found {
mediaURL := strings.TrimSpace(mediaContent.URL)
if mediaAbsoluteURL, err := urllib.AbsoluteURL(siteURL, mediaURL); err != nil {
slog.Debug("Unable to build absolute URL for media content",
slog.String("url", mediaContent.URL),
slog.String("site_url", siteURL),
slog.Any("error", err),
)
} else {
duplicates[mediaAbsoluteURL] = true
enclosures = append(enclosures, &model.Enclosure{
URL: mediaAbsoluteURL,
MimeType: mediaContent.MimeType(),
Size: mediaContent.Size(),
})
}
}
}
for _, mediaPeerLink := range rssItem.AllMediaPeerLinks() {
if _, found := duplicates[mediaPeerLink.URL]; !found {
duplicates[mediaPeerLink.URL] = true
enclosures = append(enclosures, &model.Enclosure{
URL: mediaPeerLink.URL,
MimeType: mediaPeerLink.MimeType(),
Size: mediaPeerLink.Size(),
})
mediaURL := strings.TrimSpace(mediaPeerLink.URL)
if mediaURL == "" {
continue
}
if _, found := duplicates[mediaURL]; !found {
mediaURL := strings.TrimSpace(mediaPeerLink.URL)
if mediaAbsoluteURL, err := urllib.AbsoluteURL(siteURL, mediaURL); err != nil {
slog.Debug("Unable to build absolute URL for media peer link",
slog.String("url", mediaPeerLink.URL),
slog.String("site_url", siteURL),
slog.Any("error", err),
)
} else {
duplicates[mediaAbsoluteURL] = true
enclosures = append(enclosures, &model.Enclosure{
URL: mediaAbsoluteURL,
MimeType: mediaPeerLink.MimeType(),
Size: mediaPeerLink.Size(),
})
}
}
}

View file

@ -1016,15 +1016,11 @@ func TestParseEntryWithEnclosures(t *testing.T) {
}
if len(feed.Entries) != 1 {
t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
}
if feed.Entries[0].URL != "http://www.example.org/entries/1" {
t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
t.Fatalf("Incorrect number of entries, got: %d", len(feed.Entries))
}
if len(feed.Entries[0].Enclosures) != 1 {
t.Errorf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures))
t.Fatalf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures))
}
if feed.Entries[0].Enclosures[0].URL != "http://www.example.org/myaudiofile.mp3" {
@ -1065,15 +1061,11 @@ func TestParseEntryWithIncorrectEnclosureLength(t *testing.T) {
}
if len(feed.Entries) != 1 {
t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
}
if feed.Entries[0].URL != "http://www.example.org/entries/1" {
t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
t.Fatalf("Incorrect number of entries, got: %d", len(feed.Entries))
}
if len(feed.Entries[0].Enclosures) != 2 {
t.Errorf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures))
t.Fatalf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures))
}
if feed.Entries[0].Enclosures[0].URL != "http://www.example.org/myaudiofile.mp3" {
@ -1093,6 +1085,39 @@ func TestParseEntryWithIncorrectEnclosureLength(t *testing.T) {
}
}
func TestParseEntryWithDuplicatedEnclosureURL(t *testing.T) {
data := `<?xml version="1.0" encoding="utf-8"?>
<rss version="2.0">
<channel>
<title>My Podcast Feed</title>
<link>http://example.org</link>
<item>
<title>Podcasting with RSS</title>
<link>http://www.example.org/entries/1</link>
<enclosure url="http://www.example.org/myaudiofile.mp3" type="audio/mpeg" />
<enclosure url=" http://www.example.org/myaudiofile.mp3 " type="audio/mpeg" />
</item>
</channel>
</rss>`
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)))
if err != nil {
t.Fatal(err)
}
if len(feed.Entries) != 1 {
t.Fatalf("Incorrect number of entries, got: %d", len(feed.Entries))
}
if len(feed.Entries[0].Enclosures) != 1 {
t.Fatalf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures))
}
if feed.Entries[0].Enclosures[0].URL != "http://www.example.org/myaudiofile.mp3" {
t.Errorf("Incorrect enclosure URL, got: %s", feed.Entries[0].Enclosures[0].URL)
}
}
func TestParseEntryWithEmptyEnclosureURL(t *testing.T) {
data := `<?xml version="1.0" encoding="utf-8"?>
<rss version="2.0">
@ -1106,7 +1131,7 @@ func TestParseEntryWithEmptyEnclosureURL(t *testing.T) {
<description>An overview of RSS podcasting</description>
<pubDate>Fri, 15 Jul 2005 00:00:00 -0500</pubDate>
<guid isPermaLink="true">http://www.example.org/entries/1</guid>
<enclosure url="" length="0"/>
<enclosure url=" " length="0"/>
</item>
</channel>
</rss>`
@ -1117,15 +1142,47 @@ func TestParseEntryWithEmptyEnclosureURL(t *testing.T) {
}
if len(feed.Entries) != 1 {
t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
}
if feed.Entries[0].URL != "http://www.example.org/entries/1" {
t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
t.Fatalf("Incorrect number of entries, got: %d", len(feed.Entries))
}
if len(feed.Entries[0].Enclosures) != 0 {
t.Errorf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures))
t.Fatalf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures))
}
}
func TestParseEntryWithRelativeEnclosureURL(t *testing.T) {
data := `<?xml version="1.0" encoding="utf-8"?>
<rss version="2.0">
<channel>
<title>My Podcast Feed</title>
<link>http://example.org</link>
<author>some.email@example.org</author>
<item>
<title>Podcasting with RSS</title>
<link>http://www.example.org/entries/1</link>
<description>An overview of RSS podcasting</description>
<pubDate>Fri, 15 Jul 2005 00:00:00 -0500</pubDate>
<guid isPermaLink="true">http://www.example.org/entries/1</guid>
<enclosure url=" /files/file.mp3 "/>
</item>
</channel>
</rss>`
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)))
if err != nil {
t.Fatal(err)
}
if len(feed.Entries) != 1 {
t.Fatalf("Incorrect number of entries, got: %d", len(feed.Entries))
}
if len(feed.Entries[0].Enclosures) != 1 {
t.Fatalf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures))
}
if feed.Entries[0].Enclosures[0].URL != "http://example.org/files/file.mp3" {
t.Errorf("Incorrect enclosure URL, got: %q", feed.Entries[0].Enclosures[0].URL)
}
}
@ -1154,15 +1211,11 @@ func TestParseEntryWithFeedBurnerEnclosures(t *testing.T) {
}
if len(feed.Entries) != 1 {
t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
}
if feed.Entries[0].URL != "http://www.example.org/entries/1" {
t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
t.Fatalf("Incorrect number of entries, got: %d", len(feed.Entries))
}
if len(feed.Entries[0].Enclosures) != 1 {
t.Errorf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures))
t.Fatalf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures))
}
if feed.Entries[0].Enclosures[0].URL != "http://example.org/67ca416c-f22a-4228-a681-68fc9998ec10/File.mp3" {
@ -1178,6 +1231,42 @@ func TestParseEntryWithFeedBurnerEnclosures(t *testing.T) {
}
}
func TestParseEntryWithFeedBurnerEnclosuresAndRelativeURL(t *testing.T) {
data := `<?xml version="1.0" encoding="utf-8"?>
<rss version="2.0" xmlns:feedburner="http://rssnamespace.org/feedburner/ext/1.0">
<channel>
<title>My Example Feed</title>
<link>http://example.org</link>
<item>
<title>Example Item</title>
<link>http://www.example.org/entries/1</link>
<enclosure
url="http://feedproxy.google.com/~r/example/~5/lpMyFSCvubs/File.mp3"
length="76192460"
type="audio/mpeg" />
<feedburner:origEnclosureLink>/67ca416c-f22a-4228-a681-68fc9998ec10/File.mp3</feedburner:origEnclosureLink>
</item>
</channel>
</rss>`
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)))
if err != nil {
t.Fatal(err)
}
if len(feed.Entries) != 1 {
t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
}
if len(feed.Entries[0].Enclosures) != 1 {
t.Fatalf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures))
}
if feed.Entries[0].Enclosures[0].URL != "http://example.org/67ca416c-f22a-4228-a681-68fc9998ec10/File.mp3" {
t.Errorf("Incorrect enclosure URL, got: %s", feed.Entries[0].Enclosures[0].URL)
}
}
func TestParseEntryWithRelativeURL(t *testing.T) {
data := `<?xml version="1.0" encoding="utf-8"?>
<rss version="2.0">
@ -1389,7 +1478,7 @@ func TestParseEntryWithMediaGroup(t *testing.T) {
<rss version="2.0" xmlns:media="http://search.yahoo.com/mrss/">
<channel>
<title>My Example Feed</title>
<link>http://example.org</link>
<link>https://example.org</link>
<item>
<title>Example Item</title>
<link>http://www.example.org/entries/1</link>
@ -1400,7 +1489,9 @@ func TestParseEntryWithMediaGroup(t *testing.T) {
<media:content type="application/x-bittorrent" url="https://example.org/file2.torrent" isDefault="true"></media:content>
<media:content type="application/x-bittorrent" url="https://example.org/file3.torrent"></media:content>
<media:content type="application/x-bittorrent" url="https://example.org/file4.torrent"></media:content>
<media:content type="application/x-bittorrent" url="https://example.org/file5.torrent" fileSize="42"></media:content>
<media:content type="application/x-bittorrent" url="https://example.org/file4.torrent"></media:content>
<media:content type="application/x-bittorrent" url=" file5.torrent " fileSize="42"></media:content>
<media:content type="application/x-bittorrent" url=" " fileSize="42"></media:content>
<media:rating>nonadult</media:rating>
</media:group>
<media:thumbnail url="https://example.org/image.jpg" height="122" width="223"></media:thumbnail>
@ -1453,15 +1544,19 @@ func TestParseEntryWithMediaContent(t *testing.T) {
<rss version="2.0" xmlns:media="http://search.yahoo.com/mrss/">
<channel>
<title>My Example Feed</title>
<link>http://example.org</link>
<link>https://example.org</link>
<item>
<title>Example Item</title>
<link>http://www.example.org/entries/1</link>
<media:thumbnail url="https://example.org/thumbnail.jpg" />
<media:thumbnail url="https://example.org/thumbnail.jpg" />
<media:thumbnail url=" thumbnail.jpg " />
<media:thumbnail url=" " />
<media:content url="https://example.org/media1.jpg" medium="image">
<media:title type="html">Some Title for Media 1</media:title>
</media:content>
<media:content url="https://example.org/media2.jpg" medium="image" />
<media:content url=" /media2.jpg " medium="image" />
<media:content url=" " medium="image" />
</item>
</channel>
</rss>`
@ -1472,9 +1567,9 @@ func TestParseEntryWithMediaContent(t *testing.T) {
}
if len(feed.Entries) != 1 {
t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
t.Fatalf("Incorrect number of entries, got: %d", len(feed.Entries))
}
if len(feed.Entries[0].Enclosures) != 3 {
if len(feed.Entries[0].Enclosures) != 4 {
t.Fatalf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures))
}
@ -1483,6 +1578,7 @@ func TestParseEntryWithMediaContent(t *testing.T) {
mimeType string
size int64
}{
{"https://example.org/thumbnail.jpg", "image/*", 0},
{"https://example.org/thumbnail.jpg", "image/*", 0},
{"https://example.org/media1.jpg", "image/*", 0},
{"https://example.org/media2.jpg", "image/*", 0},
@ -1508,11 +1604,14 @@ func TestParseEntryWithMediaPeerLink(t *testing.T) {
<rss version="2.0" xmlns:media="http://search.yahoo.com/mrss/">
<channel>
<title>My Example Feed</title>
<link>http://example.org</link>
<link>https://website.example.org</link>
<item>
<title>Example Item</title>
<link>http://www.example.org/entries/1</link>
<media:peerLink type="application/x-bittorrent" href="http://www.example.org/file.torrent" />
<media:peerLink type="application/x-bittorrent" href="https://www.example.org/file.torrent" />
<media:peerLink type="application/x-bittorrent" href="https://www.example.org/file.torrent" />
<media:peerLink type="application/x-bittorrent" href=" file2.torrent " />
<media:peerLink type="application/x-bittorrent" href=" " />
</item>
</channel>
</rss>`
@ -1523,10 +1622,10 @@ func TestParseEntryWithMediaPeerLink(t *testing.T) {
}
if len(feed.Entries) != 1 {
t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
t.Fatalf("Incorrect number of entries, got: %d", len(feed.Entries))
}
if len(feed.Entries[0].Enclosures) != 1 {
if len(feed.Entries[0].Enclosures) != 2 {
t.Fatalf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures))
}
@ -1535,7 +1634,8 @@ func TestParseEntryWithMediaPeerLink(t *testing.T) {
mimeType string
size int64
}{
{"http://www.example.org/file.torrent", "application/x-bittorrent", 0},
{"https://www.example.org/file.torrent", "application/x-bittorrent", 0},
{"https://website.example.org/file2.torrent", "application/x-bittorrent", 0},
}
for index, enclosure := range feed.Entries[0].Enclosures {