1
0
Fork 0
mirror of https://github.com/miniflux/v2.git synced 2025-08-16 18:01:37 +00:00

Move feed parsers packages in reader package

This commit is contained in:
Frédéric Guillot 2017-11-20 19:17:04 -08:00
parent c26787f476
commit d5838b6734
14 changed files with 7 additions and 7 deletions

171
reader/json/json.go Normal file
View file

@ -0,0 +1,171 @@
// Copyright 2017 Frédéric Guillot. All rights reserved.
// Use of this source code is governed by the Apache 2.0
// license that can be found in the LICENSE file.
package json
import (
"log"
"strings"
"time"
"github.com/miniflux/miniflux2/helper"
"github.com/miniflux/miniflux2/model"
"github.com/miniflux/miniflux2/reader/date"
"github.com/miniflux/miniflux2/reader/processor"
"github.com/miniflux/miniflux2/reader/sanitizer"
)
type jsonFeed struct {
Version string `json:"version"`
Title string `json:"title"`
SiteURL string `json:"home_page_url"`
FeedURL string `json:"feed_url"`
Author jsonAuthor `json:"author"`
Items []jsonItem `json:"items"`
}
type jsonAuthor struct {
Name string `json:"name"`
URL string `json:"url"`
}
type jsonItem struct {
ID string `json:"id"`
URL string `json:"url"`
Title string `json:"title"`
Summary string `json:"summary"`
Text string `json:"content_text"`
HTML string `json:"content_html"`
DatePublished string `json:"date_published"`
DateModified string `json:"date_modified"`
Author jsonAuthor `json:"author"`
Attachments []jsonAttachment `json:"attachments"`
}
type jsonAttachment struct {
URL string `json:"url"`
MimeType string `json:"mime_type"`
Title string `json:"title"`
Size int `json:"size_in_bytes"`
Duration int `json:"duration_in_seconds"`
}
func (j *jsonFeed) GetAuthor() string {
return getAuthor(j.Author)
}
func (j *jsonFeed) Transform() *model.Feed {
feed := new(model.Feed)
feed.FeedURL = j.FeedURL
feed.SiteURL = j.SiteURL
feed.Title = sanitizer.StripTags(j.Title)
if feed.Title == "" {
feed.Title = feed.SiteURL
}
for _, item := range j.Items {
entry := item.Transform()
if entry.Author == "" {
entry.Author = j.GetAuthor()
}
feed.Entries = append(feed.Entries, entry)
}
return feed
}
func (j *jsonItem) GetDate() time.Time {
for _, value := range []string{j.DatePublished, j.DateModified} {
if value != "" {
d, err := date.Parse(value)
if err != nil {
log.Println(err)
return time.Now()
}
return d
}
}
return time.Now()
}
func (j *jsonItem) GetAuthor() string {
return getAuthor(j.Author)
}
func (j *jsonItem) GetHash() string {
for _, value := range []string{j.ID, j.URL, j.Text + j.HTML + j.Summary} {
if value != "" {
return helper.Hash(value)
}
}
return ""
}
func (j *jsonItem) GetTitle() string {
for _, value := range []string{j.Title, j.Summary, j.Text, j.HTML} {
if value != "" {
return truncate(value)
}
}
return j.URL
}
func (j *jsonItem) GetContent() string {
for _, value := range []string{j.HTML, j.Text, j.Summary} {
if value != "" {
return value
}
}
return ""
}
func (j *jsonItem) GetEnclosures() model.EnclosureList {
enclosures := make(model.EnclosureList, 0)
for _, attachment := range j.Attachments {
enclosures = append(enclosures, &model.Enclosure{
URL: attachment.URL,
MimeType: attachment.MimeType,
Size: attachment.Size,
})
}
return enclosures
}
func (j *jsonItem) Transform() *model.Entry {
entry := new(model.Entry)
entry.URL = j.URL
entry.Date = j.GetDate()
entry.Author = sanitizer.StripTags(j.GetAuthor())
entry.Hash = j.GetHash()
entry.Content = processor.ItemContentProcessor(entry.URL, j.GetContent())
entry.Title = sanitizer.StripTags(strings.Trim(j.GetTitle(), " \n\t"))
entry.Enclosures = j.GetEnclosures()
return entry
}
func getAuthor(author jsonAuthor) string {
if author.Name != "" {
return author.Name
}
return ""
}
func truncate(str string) string {
max := 100
if len(str) > max {
return str[:max] + "..."
}
return str
}

24
reader/json/parser.go Normal file
View file

@ -0,0 +1,24 @@
// Copyright 2017 Frédéric Guillot. All rights reserved.
// Use of this source code is governed by the Apache 2.0
// license that can be found in the LICENSE file.
package json
import (
"encoding/json"
"io"
"github.com/miniflux/miniflux2/errors"
"github.com/miniflux/miniflux2/model"
)
// Parse returns a normalized feed struct from a JON feed.
func Parse(data io.Reader) (*model.Feed, error) {
feed := new(jsonFeed)
decoder := json.NewDecoder(data)
if err := decoder.Decode(&feed); err != nil {
return nil, errors.NewLocalizedError("Unable to parse JSON Feed: %v", err)
}
return feed.Transform(), nil
}

359
reader/json/parser_test.go Normal file
View file

@ -0,0 +1,359 @@
// Copyright 2017 Frédéric Guillot. All rights reserved.
// Use of this source code is governed by the Apache 2.0
// license that can be found in the LICENSE file.
package json
import (
"bytes"
"strings"
"testing"
"time"
"github.com/miniflux/miniflux2/errors"
)
func TestParseJsonFeed(t *testing.T) {
data := `{
"version": "https://jsonfeed.org/version/1",
"title": "My Example Feed",
"home_page_url": "https://example.org/",
"feed_url": "https://example.org/feed.json",
"items": [
{
"id": "2",
"content_text": "This is a second item.",
"url": "https://example.org/second-item"
},
{
"id": "1",
"content_html": "<p>Hello, world!</p>",
"url": "https://example.org/initial-post"
}
]
}`
feed, err := Parse(bytes.NewBufferString(data))
if err != nil {
t.Error(err)
}
if feed.Title != "My Example Feed" {
t.Errorf("Incorrect title, got: %s", feed.Title)
}
if feed.FeedURL != "https://example.org/feed.json" {
t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
}
if feed.SiteURL != "https://example.org/" {
t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
}
if len(feed.Entries) != 2 {
t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
}
if feed.Entries[0].Hash != "d4735e3a265e16eee03f59718b9b5d03019c07d8b6c51f90da3a666eec13ab35" {
t.Errorf("Incorrect entry hash, got: %s", feed.Entries[0].Hash)
}
if feed.Entries[0].URL != "https://example.org/second-item" {
t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
}
if feed.Entries[0].Title != "This is a second item." {
t.Errorf(`Incorrect entry title, got: "%s"`, feed.Entries[0].Title)
}
if feed.Entries[0].Content != "This is a second item." {
t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content)
}
if feed.Entries[1].Hash != "6b86b273ff34fce19d6b804eff5a3f5747ada4eaa22f1d49c01e52ddb7875b4b" {
t.Errorf("Incorrect entry hash, got: %s", feed.Entries[1].Hash)
}
if feed.Entries[1].URL != "https://example.org/initial-post" {
t.Errorf("Incorrect entry URL, got: %s", feed.Entries[1].URL)
}
if feed.Entries[1].Title != "Hello, world!" {
t.Errorf(`Incorrect entry title, got: "%s"`, feed.Entries[1].Title)
}
if feed.Entries[1].Content != "<p>Hello, world!</p>" {
t.Errorf("Incorrect entry content, got: %s", feed.Entries[1].Content)
}
}
func TestParsePodcast(t *testing.T) {
data := `{
"version": "https://jsonfeed.org/version/1",
"user_comment": "This is a podcast feed. You can add this feed to your podcast client using the following URL: http://therecord.co/feed.json",
"title": "The Record",
"home_page_url": "http://therecord.co/",
"feed_url": "http://therecord.co/feed.json",
"items": [
{
"id": "http://therecord.co/chris-parrish",
"title": "Special #1 - Chris Parrish",
"url": "http://therecord.co/chris-parrish",
"content_text": "Chris has worked at Adobe and as a founder of Rogue Sheep, which won an Apple Design Award for Postage. Chriss new company is Aged & Distilled with Guy English — which shipped Napkin, a Mac app for visual collaboration. Chris is also the co-host of The Record. He lives on Bainbridge Island, a quick ferry ride from Seattle.",
"content_html": "Chris has worked at <a href=\"http://adobe.com/\">Adobe</a> and as a founder of Rogue Sheep, which won an Apple Design Award for Postage. Chriss new company is Aged & Distilled with Guy English — which shipped <a href=\"http://aged-and-distilled.com/napkin/\">Napkin</a>, a Mac app for visual collaboration. Chris is also the co-host of The Record. He lives on <a href=\"http://www.ci.bainbridge-isl.wa.us/\">Bainbridge Island</a>, a quick ferry ride from Seattle.",
"summary": "Brent interviews Chris Parrish, co-host of The Record and one-half of Aged & Distilled.",
"date_published": "2014-05-09T14:04:00-07:00",
"attachments": [
{
"url": "http://therecord.co/downloads/The-Record-sp1e1-ChrisParrish.m4a",
"mime_type": "audio/x-m4a",
"size_in_bytes": 89970236,
"duration_in_seconds": 6629
}
]
}
]
}`
feed, err := Parse(bytes.NewBufferString(data))
if err != nil {
t.Error(err)
}
if feed.Title != "The Record" {
t.Errorf("Incorrect title, got: %s", feed.Title)
}
if feed.FeedURL != "http://therecord.co/feed.json" {
t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
}
if feed.SiteURL != "http://therecord.co/" {
t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
}
if len(feed.Entries) != 1 {
t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
}
if feed.Entries[0].Hash != "6b678e57962a1b001e4e873756563cdc08bbd06ca561e764e0baa9a382485797" {
t.Errorf("Incorrect entry hash, got: %s", feed.Entries[0].Hash)
}
if feed.Entries[0].URL != "http://therecord.co/chris-parrish" {
t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
}
if feed.Entries[0].Title != "Special #1 - Chris Parrish" {
t.Errorf(`Incorrect entry title, got: "%s"`, feed.Entries[0].Title)
}
if feed.Entries[0].Content != `Chris has worked at <a href="http://adobe.com/" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">Adobe</a> and as a founder of Rogue Sheep, which won an Apple Design Award for Postage. Chriss new company is Aged & Distilled with Guy English — which shipped <a href="http://aged-and-distilled.com/napkin/" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">Napkin</a>, a Mac app for visual collaboration. Chris is also the co-host of The Record. He lives on <a href="http://www.ci.bainbridge-isl.wa.us/" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">Bainbridge Island</a>, a quick ferry ride from Seattle.` {
t.Errorf(`Incorrect entry content, got: "%s"`, feed.Entries[0].Content)
}
location, _ := time.LoadLocation("America/Vancouver")
if !feed.Entries[0].Date.Equal(time.Date(2014, time.May, 9, 14, 4, 0, 0, location)) {
t.Errorf("Incorrect entry date, got: %v", feed.Entries[0].Date)
}
if len(feed.Entries[0].Enclosures) != 1 {
t.Errorf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures))
}
if feed.Entries[0].Enclosures[0].URL != "http://therecord.co/downloads/The-Record-sp1e1-ChrisParrish.m4a" {
t.Errorf("Incorrect enclosure URL, got: %s", feed.Entries[0].Enclosures[0].URL)
}
if feed.Entries[0].Enclosures[0].MimeType != "audio/x-m4a" {
t.Errorf("Incorrect enclosure type, got: %s", feed.Entries[0].Enclosures[0].MimeType)
}
if feed.Entries[0].Enclosures[0].Size != 89970236 {
t.Errorf("Incorrect enclosure length, got: %d", feed.Entries[0].Enclosures[0].Size)
}
}
func TestParseAuthor(t *testing.T) {
data := `{
"version": "https://jsonfeed.org/version/1",
"user_comment": "This is a microblog feed. You can add this to your feed reader using the following URL: https://example.org/feed.json",
"title": "Brent Simmonss Microblog",
"home_page_url": "https://example.org/",
"feed_url": "https://example.org/feed.json",
"author": {
"name": "Brent Simmons",
"url": "http://example.org/",
"avatar": "https://example.org/avatar.png"
},
"items": [
{
"id": "2347259",
"url": "https://example.org/2347259",
"content_text": "Cats are neat. \n\nhttps://example.org/cats",
"date_published": "2016-02-09T14:22:00-07:00"
}
]
}`
feed, err := Parse(bytes.NewBufferString(data))
if err != nil {
t.Error(err)
}
if len(feed.Entries) != 1 {
t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
}
if feed.Entries[0].Author != "Brent Simmons" {
t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author)
}
}
func TestParseFeedWithoutTitle(t *testing.T) {
data := `{
"version": "https://jsonfeed.org/version/1",
"home_page_url": "https://example.org/",
"feed_url": "https://example.org/feed.json",
"items": [
{
"id": "2347259",
"url": "https://example.org/2347259",
"content_text": "Cats are neat. \n\nhttps://example.org/cats",
"date_published": "2016-02-09T14:22:00-07:00"
}
]
}`
feed, err := Parse(bytes.NewBufferString(data))
if err != nil {
t.Error(err)
}
if feed.Title != "https://example.org/" {
t.Errorf("Incorrect title, got: %s", feed.Title)
}
}
func TestParseFeedItemWithInvalidDate(t *testing.T) {
data := `{
"version": "https://jsonfeed.org/version/1",
"title": "My Example Feed",
"home_page_url": "https://example.org/",
"feed_url": "https://example.org/feed.json",
"items": [
{
"id": "2347259",
"url": "https://example.org/2347259",
"content_text": "Cats are neat. \n\nhttps://example.org/cats",
"date_published": "Tomorrow"
}
]
}`
feed, err := Parse(bytes.NewBufferString(data))
if err != nil {
t.Error(err)
}
if len(feed.Entries) != 1 {
t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
}
if !feed.Entries[0].Date.Before(time.Now()) {
t.Errorf("Incorrect entry date, got: %v", feed.Entries[0].Date)
}
}
func TestParseFeedItemWithoutID(t *testing.T) {
data := `{
"version": "https://jsonfeed.org/version/1",
"title": "My Example Feed",
"home_page_url": "https://example.org/",
"feed_url": "https://example.org/feed.json",
"items": [
{
"content_text": "Some text."
}
]
}`
feed, err := Parse(bytes.NewBufferString(data))
if err != nil {
t.Error(err)
}
if len(feed.Entries) != 1 {
t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
}
if feed.Entries[0].Hash != "13b4c5aecd1b6d749afcee968fbf9c80f1ed1bbdbe1aaf25cb34ebd01144bbe9" {
t.Errorf("Incorrect entry hash, got: %s", feed.Entries[0].Hash)
}
}
func TestParseFeedItemWithoutTitle(t *testing.T) {
data := `{
"version": "https://jsonfeed.org/version/1",
"title": "My Example Feed",
"home_page_url": "https://example.org/",
"feed_url": "https://example.org/feed.json",
"items": [
{
"url": "https://example.org/item"
}
]
}`
feed, err := Parse(bytes.NewBufferString(data))
if err != nil {
t.Error(err)
}
if len(feed.Entries) != 1 {
t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
}
if feed.Entries[0].Title != "https://example.org/item" {
t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
}
}
func TestParseTruncateItemTitle(t *testing.T) {
data := `{
"version": "https://jsonfeed.org/version/1",
"title": "My Example Feed",
"home_page_url": "https://example.org/",
"feed_url": "https://example.org/feed.json",
"items": [
{
"title": "` + strings.Repeat("a", 200) + `"
}
]
}`
feed, err := Parse(bytes.NewBufferString(data))
if err != nil {
t.Error(err)
}
if len(feed.Entries) != 1 {
t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
}
if len(feed.Entries[0].Title) != 103 {
t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
}
}
func TestParseInvalidJSON(t *testing.T) {
data := `garbage`
_, err := Parse(bytes.NewBufferString(data))
if err == nil {
t.Error("Parse should returns an error")
}
if _, ok := err.(errors.LocalizedError); !ok {
t.Error("The error returned must be a LocalizedError")
}
}