mirror of
https://github.com/miniflux/v2.git
synced 2025-06-27 16:36:00 +00:00
Refactor JSON Feed parser to use an adapter
This commit is contained in:
parent
6bc4b35e38
commit
8429c6b0ab
4 changed files with 487 additions and 253 deletions
|
@ -3,207 +3,141 @@
|
|||
|
||||
package json // import "miniflux.app/v2/internal/reader/json"
|
||||
|
||||
import (
|
||||
"log/slog"
|
||||
"strings"
|
||||
"time"
|
||||
// JSON Feed specs:
|
||||
// https://www.jsonfeed.org/version/1.1/
|
||||
// https://www.jsonfeed.org/version/1/
|
||||
type JSONFeed struct {
|
||||
// Version is the URL of the version of the format the feed uses.
|
||||
// This should appear at the very top, though we recognize that not all JSON generators allow for ordering.
|
||||
Version string `json:"version"`
|
||||
|
||||
"miniflux.app/v2/internal/crypto"
|
||||
"miniflux.app/v2/internal/model"
|
||||
"miniflux.app/v2/internal/reader/date"
|
||||
"miniflux.app/v2/internal/reader/sanitizer"
|
||||
"miniflux.app/v2/internal/urllib"
|
||||
)
|
||||
// Title is the name of the feed, which will often correspond to the name of the website.
|
||||
Title string `json:"title"`
|
||||
|
||||
type jsonFeed struct {
|
||||
Version string `json:"version"`
|
||||
Title string `json:"title"`
|
||||
SiteURL string `json:"home_page_url"`
|
||||
IconURL string `json:"icon"`
|
||||
FaviconURL string `json:"favicon"`
|
||||
FeedURL string `json:"feed_url"`
|
||||
Authors []jsonAuthor `json:"authors"`
|
||||
Author jsonAuthor `json:"author"`
|
||||
Items []jsonItem `json:"items"`
|
||||
// HomePageURL is the URL of the resource that the feed describes.
|
||||
// This resource may or may not actually be a “home” page, but it should be an HTML page.
|
||||
HomePageURL string `json:"home_page_url"`
|
||||
|
||||
// FeedURL is the URL of the feed, and serves as the unique identifier for the feed.
|
||||
FeedURL string `json:"feed_url"`
|
||||
|
||||
// Description provides more detail, beyond the title, on what the feed is about.
|
||||
Description string `json:"description"`
|
||||
|
||||
// IconURL is the URL of an image for the feed suitable to be used in a timeline, much the way an avatar might be used.
|
||||
IconURL string `json:"icon"`
|
||||
|
||||
// FaviconURL is the URL of an image for the feed suitable to be used in a source list. It should be square and relatively small.
|
||||
FaviconURL string `json:"favicon"`
|
||||
|
||||
// Authors specifies one or more feed authors. The author object has several members.
|
||||
Authors []JSONAuthor `json:"authors"` // JSON Feed v1.1
|
||||
|
||||
// Author specifies the feed author. The author object has several members.
|
||||
// JSON Feed v1 (deprecated)
|
||||
Author JSONAuthor `json:"author"`
|
||||
|
||||
// Language is the primary language for the feed in the format specified in RFC 5646.
|
||||
// The value is usually a 2-letter language tag from ISO 639-1, optionally followed by a region tag. (Examples: en or en-US.)
|
||||
Language string `json:"language"`
|
||||
|
||||
// Expired is a boolean value that specifies whether or not the feed is finished.
|
||||
Expired bool `json:"expired"`
|
||||
|
||||
// Items is an array, each representing an individual item in the feed.
|
||||
Items []JSONItem `json:"items"`
|
||||
|
||||
// Hubs describes endpoints that can be used to subscribe to real-time notifications from the publisher of this feed.
|
||||
Hubs []JSONHub `json:"hubs"`
|
||||
}
|
||||
|
||||
type jsonAuthor struct {
|
||||
type JSONAuthor struct {
|
||||
// Author's name.
|
||||
Name string `json:"name"`
|
||||
URL string `json:"url"`
|
||||
|
||||
// Author's website URL (Blog or micro-blog).
|
||||
WebsiteURL string `json:"url"`
|
||||
|
||||
// Author's avatar URL.
|
||||
AvatarURL string `json:"avatar"`
|
||||
}
|
||||
|
||||
type jsonItem struct {
|
||||
ID string `json:"id"`
|
||||
URL string `json:"url"`
|
||||
Title string `json:"title"`
|
||||
Summary string `json:"summary"`
|
||||
Text string `json:"content_text"`
|
||||
HTML string `json:"content_html"`
|
||||
DatePublished string `json:"date_published"`
|
||||
DateModified string `json:"date_modified"`
|
||||
Authors []jsonAuthor `json:"authors"`
|
||||
Author jsonAuthor `json:"author"`
|
||||
Attachments []jsonAttachment `json:"attachments"`
|
||||
Tags []string `json:"tags"`
|
||||
type JSONHub struct {
|
||||
// Type defines the protocol used to talk with the hub: "rssCloud" or "WebSub".
|
||||
Type string `json:"type"`
|
||||
|
||||
// URL is the location of the hub.
|
||||
URL string `json:"url"`
|
||||
}
|
||||
|
||||
type jsonAttachment struct {
|
||||
URL string `json:"url"`
|
||||
type JSONItem struct {
|
||||
// Unique identifier for the item.
|
||||
// Ideally, the id is the full URL of the resource described by the item, since URLs make great unique identifiers.
|
||||
ID string `json:"id"`
|
||||
|
||||
// URL of the resource described by the item.
|
||||
URL string `json:"url"`
|
||||
|
||||
// ExternalURL is the URL of a page elsewhere.
|
||||
// This is especially useful for linkblogs.
|
||||
// If url links to where you’re talking about a thing, then external_url links to the thing you’re talking about.
|
||||
ExternalURL string `json:"external_url"`
|
||||
|
||||
// Title of the item (optional).
|
||||
// Microblog items in particular may omit titles.
|
||||
Title string `json:"title"`
|
||||
|
||||
// ContentHTML is the HTML body of the item.
|
||||
ContentHTML string `json:"content_html"`
|
||||
|
||||
// ContentText is the text body of the item.
|
||||
ContentText string `json:"content_text"`
|
||||
|
||||
// Summary is a plain text sentence or two describing the item.
|
||||
Summary string `json:"summary"`
|
||||
|
||||
// ImageURL is the URL of the main image for the item.
|
||||
ImageURL string `json:"image"`
|
||||
|
||||
// BannerImageURL is the URL of an image to use as a banner.
|
||||
BannerImageURL string `json:"banner_image"`
|
||||
|
||||
// DatePublished is the date the item was published.
|
||||
DatePublished string `json:"date_published"`
|
||||
|
||||
// DateModified is the date the item was modified.
|
||||
DateModified string `json:"date_modified"`
|
||||
|
||||
// Language is the language of the item.
|
||||
Language string `json:"language"`
|
||||
|
||||
// Authors is an array of JSONAuthor.
|
||||
Authors []JSONAuthor `json:"authors"`
|
||||
|
||||
// Author is a JSONAuthor.
|
||||
// JSON Feed v1 (deprecated)
|
||||
Author JSONAuthor `json:"author"`
|
||||
|
||||
// Tags is an array of strings.
|
||||
Tags []string `json:"tags"`
|
||||
|
||||
// Attachments is an array of JSONAttachment.
|
||||
Attachments []JSONAttachment `json:"attachments"`
|
||||
}
|
||||
|
||||
type JSONAttachment struct {
|
||||
// URL of the attachment.
|
||||
URL string `json:"url"`
|
||||
|
||||
// MIME type of the attachment.
|
||||
MimeType string `json:"mime_type"`
|
||||
Title string `json:"title"`
|
||||
Size int64 `json:"size_in_bytes"`
|
||||
Duration int `json:"duration_in_seconds"`
|
||||
}
|
||||
|
||||
func (j *jsonFeed) GetAuthor() string {
|
||||
if len(j.Authors) > 0 {
|
||||
return (getAuthor(j.Authors[0]))
|
||||
}
|
||||
return getAuthor(j.Author)
|
||||
}
|
||||
|
||||
func (j *jsonFeed) Transform(baseURL string) *model.Feed {
|
||||
var err error
|
||||
|
||||
feed := new(model.Feed)
|
||||
|
||||
feed.FeedURL, err = urllib.AbsoluteURL(baseURL, j.FeedURL)
|
||||
if err != nil {
|
||||
feed.FeedURL = j.FeedURL
|
||||
}
|
||||
|
||||
feed.SiteURL, err = urllib.AbsoluteURL(baseURL, j.SiteURL)
|
||||
if err != nil {
|
||||
feed.SiteURL = j.SiteURL
|
||||
}
|
||||
|
||||
feed.IconURL = strings.TrimSpace(j.IconURL)
|
||||
|
||||
if feed.IconURL == "" {
|
||||
feed.IconURL = strings.TrimSpace(j.FaviconURL)
|
||||
}
|
||||
|
||||
feed.Title = strings.TrimSpace(j.Title)
|
||||
if feed.Title == "" {
|
||||
feed.Title = feed.SiteURL
|
||||
}
|
||||
|
||||
for _, item := range j.Items {
|
||||
entry := item.Transform()
|
||||
entryURL, err := urllib.AbsoluteURL(feed.SiteURL, entry.URL)
|
||||
if err == nil {
|
||||
entry.URL = entryURL
|
||||
}
|
||||
|
||||
if entry.Author == "" {
|
||||
entry.Author = j.GetAuthor()
|
||||
}
|
||||
|
||||
feed.Entries = append(feed.Entries, entry)
|
||||
}
|
||||
|
||||
return feed
|
||||
}
|
||||
|
||||
func (j *jsonItem) GetDate() time.Time {
|
||||
for _, value := range []string{j.DatePublished, j.DateModified} {
|
||||
if value != "" {
|
||||
d, err := date.Parse(value)
|
||||
if err != nil {
|
||||
slog.Debug("Unable to parse date from JSON feed",
|
||||
slog.String("date", value),
|
||||
slog.String("url", j.URL),
|
||||
slog.Any("error", err),
|
||||
)
|
||||
return time.Now()
|
||||
}
|
||||
|
||||
return d
|
||||
}
|
||||
}
|
||||
|
||||
return time.Now()
|
||||
}
|
||||
|
||||
func (j *jsonItem) GetAuthor() string {
|
||||
if len(j.Authors) > 0 {
|
||||
return getAuthor(j.Authors[0])
|
||||
}
|
||||
return getAuthor(j.Author)
|
||||
}
|
||||
|
||||
func (j *jsonItem) GetHash() string {
|
||||
for _, value := range []string{j.ID, j.URL, j.Text + j.HTML + j.Summary} {
|
||||
if value != "" {
|
||||
return crypto.Hash(value)
|
||||
}
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
func (j *jsonItem) GetTitle() string {
|
||||
if j.Title != "" {
|
||||
return j.Title
|
||||
}
|
||||
|
||||
for _, value := range []string{j.Summary, j.Text, j.HTML} {
|
||||
if value != "" {
|
||||
return sanitizer.TruncateHTML(value, 100)
|
||||
}
|
||||
}
|
||||
|
||||
return j.URL
|
||||
}
|
||||
|
||||
func (j *jsonItem) GetContent() string {
|
||||
for _, value := range []string{j.HTML, j.Text, j.Summary} {
|
||||
if value != "" {
|
||||
return value
|
||||
}
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
func (j *jsonItem) GetEnclosures() model.EnclosureList {
|
||||
enclosures := make(model.EnclosureList, 0)
|
||||
|
||||
for _, attachment := range j.Attachments {
|
||||
if attachment.URL == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
enclosures = append(enclosures, &model.Enclosure{
|
||||
URL: attachment.URL,
|
||||
MimeType: attachment.MimeType,
|
||||
Size: attachment.Size,
|
||||
})
|
||||
}
|
||||
|
||||
return enclosures
|
||||
}
|
||||
|
||||
func (j *jsonItem) Transform() *model.Entry {
|
||||
entry := model.NewEntry()
|
||||
entry.URL = j.URL
|
||||
entry.Date = j.GetDate()
|
||||
entry.Author = j.GetAuthor()
|
||||
entry.Hash = j.GetHash()
|
||||
entry.Content = j.GetContent()
|
||||
entry.Title = strings.TrimSpace(j.GetTitle())
|
||||
entry.Enclosures = j.GetEnclosures()
|
||||
if len(j.Tags) > 0 {
|
||||
entry.Tags = j.Tags
|
||||
}
|
||||
|
||||
return entry
|
||||
}
|
||||
|
||||
func getAuthor(author jsonAuthor) string {
|
||||
if author.Name != "" {
|
||||
return strings.TrimSpace(author.Name)
|
||||
}
|
||||
|
||||
return ""
|
||||
|
||||
// Title of the attachment.
|
||||
Title string `json:"title"`
|
||||
|
||||
// Size of the attachment in bytes.
|
||||
Size int64 `json:"size_in_bytes"`
|
||||
|
||||
// Duration of the attachment in seconds.
|
||||
Duration int `json:"duration_in_seconds"`
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue