mirror of
https://github.com/miniflux/v2.git
synced 2025-08-26 18:21:01 +00:00
Refactor Atom parser to use an adapter
This commit is contained in:
parent
2ba893bc79
commit
dd4fb660c1
11 changed files with 795 additions and 500 deletions
210
internal/reader/atom/atom_10_adapter.go
Normal file
210
internal/reader/atom/atom_10_adapter.go
Normal file
|
@ -0,0 +1,210 @@
|
|||
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
package atom // import "miniflux.app/v2/internal/reader/atom"
|
||||
|
||||
import (
|
||||
"log/slog"
|
||||
"slices"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"miniflux.app/v2/internal/crypto"
|
||||
"miniflux.app/v2/internal/model"
|
||||
"miniflux.app/v2/internal/reader/date"
|
||||
"miniflux.app/v2/internal/reader/sanitizer"
|
||||
"miniflux.app/v2/internal/urllib"
|
||||
)
|
||||
|
||||
type Atom10Adapter struct {
|
||||
atomFeed *Atom10Feed
|
||||
}
|
||||
|
||||
func NewAtom10Adapter(atomFeed *Atom10Feed) *Atom10Adapter {
|
||||
return &Atom10Adapter{atomFeed}
|
||||
}
|
||||
|
||||
func (a *Atom10Adapter) BuildFeed(baseURL string) *model.Feed {
|
||||
feed := new(model.Feed)
|
||||
|
||||
// Populate the feed URL.
|
||||
feedURL := a.atomFeed.Links.firstLinkWithRelation("self")
|
||||
if feedURL != "" {
|
||||
if absoluteFeedURL, err := urllib.AbsoluteURL(baseURL, feedURL); err == nil {
|
||||
feed.FeedURL = absoluteFeedURL
|
||||
}
|
||||
} else {
|
||||
feed.FeedURL = baseURL
|
||||
}
|
||||
|
||||
// Populate the site URL.
|
||||
siteURL := a.atomFeed.Links.OriginalLink()
|
||||
if siteURL != "" {
|
||||
if absoluteSiteURL, err := urllib.AbsoluteURL(baseURL, siteURL); err == nil {
|
||||
feed.SiteURL = absoluteSiteURL
|
||||
}
|
||||
} else {
|
||||
feed.SiteURL = baseURL
|
||||
}
|
||||
|
||||
// Populate the feed title.
|
||||
feed.Title = a.atomFeed.Title.Body()
|
||||
if feed.Title == "" {
|
||||
feed.Title = feed.SiteURL
|
||||
}
|
||||
|
||||
// Populate the feed icon.
|
||||
if a.atomFeed.Icon != "" {
|
||||
if absoluteIconURL, err := urllib.AbsoluteURL(feed.SiteURL, a.atomFeed.Icon); err == nil {
|
||||
feed.IconURL = absoluteIconURL
|
||||
}
|
||||
} else if a.atomFeed.Logo != "" {
|
||||
if absoluteLogoURL, err := urllib.AbsoluteURL(feed.SiteURL, a.atomFeed.Logo); err == nil {
|
||||
feed.IconURL = absoluteLogoURL
|
||||
}
|
||||
}
|
||||
|
||||
for _, atomEntry := range a.atomFeed.Entries {
|
||||
entry := model.NewEntry()
|
||||
|
||||
// Populate the entry URL.
|
||||
entry.URL = atomEntry.Links.OriginalLink()
|
||||
if entry.URL != "" {
|
||||
if absoluteEntryURL, err := urllib.AbsoluteURL(feed.SiteURL, entry.URL); err == nil {
|
||||
entry.URL = absoluteEntryURL
|
||||
}
|
||||
}
|
||||
|
||||
// Populate the entry content.
|
||||
entry.Content = atomEntry.Content.Body()
|
||||
if entry.Content == "" {
|
||||
entry.Content = atomEntry.Summary.Body()
|
||||
}
|
||||
if entry.Content == "" {
|
||||
entry.Content = atomEntry.FirstMediaDescription()
|
||||
}
|
||||
|
||||
// Populate the entry title.
|
||||
entry.Title = atomEntry.Title.Title()
|
||||
if entry.Title == "" {
|
||||
entry.Title = sanitizer.TruncateHTML(entry.Content, 100)
|
||||
}
|
||||
if entry.Title == "" {
|
||||
entry.Title = entry.URL
|
||||
}
|
||||
|
||||
// Populate the entry author.
|
||||
authors := atomEntry.Authors.PersonNames()
|
||||
if len(authors) == 0 {
|
||||
authors = append(authors, a.atomFeed.Authors.PersonNames()...)
|
||||
}
|
||||
authors = slices.Compact(authors)
|
||||
sort.Strings(authors)
|
||||
entry.Author = strings.Join(authors, ", ")
|
||||
|
||||
// Populate the entry date.
|
||||
for _, value := range []string{atomEntry.Published, atomEntry.Updated} {
|
||||
if parsedDate, err := date.Parse(value); err != nil {
|
||||
slog.Debug("Unable to parse date from Atom 1.0 feed",
|
||||
slog.String("date", value),
|
||||
slog.String("url", entry.URL),
|
||||
slog.Any("error", err),
|
||||
)
|
||||
} else {
|
||||
entry.Date = parsedDate
|
||||
break
|
||||
}
|
||||
}
|
||||
if entry.Date.IsZero() {
|
||||
entry.Date = time.Now()
|
||||
}
|
||||
|
||||
// Populate categories.
|
||||
categories := atomEntry.Categories.CategoryNames()
|
||||
if len(categories) == 0 {
|
||||
categories = append(categories, a.atomFeed.Categories.CategoryNames()...)
|
||||
}
|
||||
if len(categories) > 0 {
|
||||
categories = slices.Compact(categories)
|
||||
sort.Strings(categories)
|
||||
entry.Tags = categories
|
||||
}
|
||||
|
||||
// Populate the commentsURL if defined.
|
||||
// See https://tools.ietf.org/html/rfc4685#section-4
|
||||
// If the type attribute of the atom:link is omitted, its value is assumed to be "application/atom+xml".
|
||||
// We accept only HTML or XHTML documents for now since the intention is to have the same behavior as RSS.
|
||||
commentsURL := atomEntry.Links.firstLinkWithRelationAndType("replies", "text/html", "application/xhtml+xml")
|
||||
if urllib.IsAbsoluteURL(commentsURL) {
|
||||
entry.CommentsURL = commentsURL
|
||||
}
|
||||
|
||||
// Generate the entry hash.
|
||||
for _, value := range []string{atomEntry.ID, atomEntry.Links.OriginalLink()} {
|
||||
if value != "" {
|
||||
entry.Hash = crypto.Hash(value)
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// Populate the entry enclosures.
|
||||
uniqueEnclosuresMap := make(map[string]bool)
|
||||
|
||||
for _, mediaThumbnail := range atomEntry.AllMediaThumbnails() {
|
||||
if _, found := uniqueEnclosuresMap[mediaThumbnail.URL]; !found {
|
||||
uniqueEnclosuresMap[mediaThumbnail.URL] = true
|
||||
entry.Enclosures = append(entry.Enclosures, &model.Enclosure{
|
||||
URL: mediaThumbnail.URL,
|
||||
MimeType: mediaThumbnail.MimeType(),
|
||||
Size: mediaThumbnail.Size(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
for _, link := range atomEntry.Links {
|
||||
if strings.EqualFold(link.Rel, "enclosure") {
|
||||
if link.Href == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
if _, found := uniqueEnclosuresMap[link.Href]; !found {
|
||||
uniqueEnclosuresMap[link.Href] = true
|
||||
length, _ := strconv.ParseInt(link.Length, 10, 0)
|
||||
entry.Enclosures = append(entry.Enclosures, &model.Enclosure{
|
||||
URL: link.Href,
|
||||
MimeType: link.Type,
|
||||
Size: length,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for _, mediaContent := range atomEntry.AllMediaContents() {
|
||||
if _, found := uniqueEnclosuresMap[mediaContent.URL]; !found {
|
||||
uniqueEnclosuresMap[mediaContent.URL] = true
|
||||
entry.Enclosures = append(entry.Enclosures, &model.Enclosure{
|
||||
URL: mediaContent.URL,
|
||||
MimeType: mediaContent.MimeType(),
|
||||
Size: mediaContent.Size(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
for _, mediaPeerLink := range atomEntry.AllMediaPeerLinks() {
|
||||
if _, found := uniqueEnclosuresMap[mediaPeerLink.URL]; !found {
|
||||
uniqueEnclosuresMap[mediaPeerLink.URL] = true
|
||||
entry.Enclosures = append(entry.Enclosures, &model.Enclosure{
|
||||
URL: mediaPeerLink.URL,
|
||||
MimeType: mediaPeerLink.MimeType(),
|
||||
Size: mediaPeerLink.Size(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
feed.Entries = append(feed.Entries, entry)
|
||||
}
|
||||
|
||||
return feed
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue