mirror of
https://github.com/miniflux/v2.git
synced 2025-07-02 16:38:37 +00:00
Move feed parsers packages in reader package
This commit is contained in:
parent
c26787f476
commit
d5838b6734
14 changed files with 7 additions and 7 deletions
|
@ -1,193 +0,0 @@
|
|||
// Copyright 2017 Frédéric Guillot. All rights reserved.
|
||||
// Use of this source code is governed by the Apache 2.0
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package atom
|
||||
|
||||
import (
|
||||
"encoding/xml"
|
||||
"log"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/miniflux/miniflux2/helper"
|
||||
"github.com/miniflux/miniflux2/model"
|
||||
"github.com/miniflux/miniflux2/reader/feed/date"
|
||||
"github.com/miniflux/miniflux2/reader/processor"
|
||||
"github.com/miniflux/miniflux2/reader/sanitizer"
|
||||
)
|
||||
|
||||
type atomFeed struct {
|
||||
XMLName xml.Name `xml:"http://www.w3.org/2005/Atom feed"`
|
||||
ID string `xml:"id"`
|
||||
Title string `xml:"title"`
|
||||
Author atomAuthor `xml:"author"`
|
||||
Links []atomLink `xml:"link"`
|
||||
Entries []atomEntry `xml:"entry"`
|
||||
}
|
||||
|
||||
type atomEntry struct {
|
||||
ID string `xml:"id"`
|
||||
Title string `xml:"title"`
|
||||
Updated string `xml:"updated"`
|
||||
Links []atomLink `xml:"link"`
|
||||
Summary string `xml:"summary"`
|
||||
Content atomContent `xml:"content"`
|
||||
MediaGroup atomMediaGroup `xml:"http://search.yahoo.com/mrss/ group"`
|
||||
Author atomAuthor `xml:"author"`
|
||||
}
|
||||
|
||||
type atomAuthor struct {
|
||||
Name string `xml:"name"`
|
||||
Email string `xml:"email"`
|
||||
}
|
||||
|
||||
type atomLink struct {
|
||||
URL string `xml:"href,attr"`
|
||||
Type string `xml:"type,attr"`
|
||||
Rel string `xml:"rel,attr"`
|
||||
Length string `xml:"length,attr"`
|
||||
}
|
||||
|
||||
type atomContent struct {
|
||||
Type string `xml:"type,attr"`
|
||||
Data string `xml:",chardata"`
|
||||
XML string `xml:",innerxml"`
|
||||
}
|
||||
|
||||
type atomMediaGroup struct {
|
||||
Description string `xml:"http://search.yahoo.com/mrss/ description"`
|
||||
}
|
||||
|
||||
func (a *atomFeed) Transform() *model.Feed {
|
||||
feed := new(model.Feed)
|
||||
feed.FeedURL = getRelationURL(a.Links, "self")
|
||||
feed.SiteURL = getURL(a.Links)
|
||||
feed.Title = sanitizer.StripTags(a.Title)
|
||||
|
||||
if feed.Title == "" {
|
||||
feed.Title = feed.SiteURL
|
||||
}
|
||||
|
||||
for _, entry := range a.Entries {
|
||||
item := entry.Transform()
|
||||
if item.Author == "" {
|
||||
item.Author = getAuthor(a.Author)
|
||||
}
|
||||
|
||||
feed.Entries = append(feed.Entries, item)
|
||||
}
|
||||
|
||||
return feed
|
||||
}
|
||||
|
||||
func (a *atomEntry) Transform() *model.Entry {
|
||||
entry := new(model.Entry)
|
||||
entry.URL = getURL(a.Links)
|
||||
entry.Date = getDate(a)
|
||||
entry.Author = sanitizer.StripTags(getAuthor(a.Author))
|
||||
entry.Hash = getHash(a)
|
||||
entry.Content = processor.ItemContentProcessor(entry.URL, getContent(a))
|
||||
entry.Title = sanitizer.StripTags(strings.Trim(a.Title, " \n\t"))
|
||||
entry.Enclosures = getEnclosures(a)
|
||||
|
||||
if entry.Title == "" {
|
||||
entry.Title = entry.URL
|
||||
}
|
||||
|
||||
return entry
|
||||
}
|
||||
|
||||
func getURL(links []atomLink) string {
|
||||
for _, link := range links {
|
||||
if strings.ToLower(link.Rel) == "alternate" {
|
||||
return link.URL
|
||||
}
|
||||
|
||||
if link.Rel == "" && link.Type == "" {
|
||||
return link.URL
|
||||
}
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
func getRelationURL(links []atomLink, relation string) string {
|
||||
for _, link := range links {
|
||||
if strings.ToLower(link.Rel) == relation {
|
||||
return link.URL
|
||||
}
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
func getDate(a *atomEntry) time.Time {
|
||||
if a.Updated != "" {
|
||||
result, err := date.Parse(a.Updated)
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
return time.Now()
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
return time.Now()
|
||||
}
|
||||
|
||||
func getContent(a *atomEntry) string {
|
||||
if a.Content.Type == "html" || a.Content.Type == "text" {
|
||||
return a.Content.Data
|
||||
}
|
||||
|
||||
if a.Content.Type == "xhtml" {
|
||||
return a.Content.XML
|
||||
}
|
||||
|
||||
if a.Summary != "" {
|
||||
return a.Summary
|
||||
}
|
||||
|
||||
if a.MediaGroup.Description != "" {
|
||||
return a.MediaGroup.Description
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
func getHash(a *atomEntry) string {
|
||||
for _, value := range []string{a.ID, getURL(a.Links)} {
|
||||
if value != "" {
|
||||
return helper.Hash(value)
|
||||
}
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
func getEnclosures(a *atomEntry) model.EnclosureList {
|
||||
enclosures := make(model.EnclosureList, 0)
|
||||
|
||||
for _, link := range a.Links {
|
||||
if strings.ToLower(link.Rel) == "enclosure" {
|
||||
length, _ := strconv.Atoi(link.Length)
|
||||
enclosures = append(enclosures, &model.Enclosure{URL: link.URL, MimeType: link.Type, Size: length})
|
||||
}
|
||||
}
|
||||
|
||||
return enclosures
|
||||
}
|
||||
|
||||
func getAuthor(author atomAuthor) string {
|
||||
if author.Name != "" {
|
||||
return author.Name
|
||||
}
|
||||
|
||||
if author.Email != "" {
|
||||
return author.Email
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
|
@ -1,29 +0,0 @@
|
|||
// Copyright 2017 Frédéric Guillot. All rights reserved.
|
||||
// Use of this source code is governed by the Apache 2.0
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package atom
|
||||
|
||||
import (
|
||||
"encoding/xml"
|
||||
"io"
|
||||
|
||||
"github.com/miniflux/miniflux2/errors"
|
||||
"github.com/miniflux/miniflux2/model"
|
||||
|
||||
"golang.org/x/net/html/charset"
|
||||
)
|
||||
|
||||
// Parse returns a normalized feed struct from a Atom feed.
|
||||
func Parse(data io.Reader) (*model.Feed, error) {
|
||||
atomFeed := new(atomFeed)
|
||||
decoder := xml.NewDecoder(data)
|
||||
decoder.CharsetReader = charset.NewReaderLabel
|
||||
|
||||
err := decoder.Decode(atomFeed)
|
||||
if err != nil {
|
||||
return nil, errors.NewLocalizedError("Unable to parse Atom feed: %v.", err)
|
||||
}
|
||||
|
||||
return atomFeed.Transform(), nil
|
||||
}
|
|
@ -1,333 +0,0 @@
|
|||
// Copyright 2017 Frédéric Guillot. All rights reserved.
|
||||
// Use of this source code is governed by the Apache 2.0
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package atom
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/miniflux/miniflux2/errors"
|
||||
)
|
||||
|
||||
func TestParseAtomSample(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
|
||||
<title>Example Feed</title>
|
||||
<link href="http://example.org/"/>
|
||||
<updated>2003-12-13T18:30:02Z</updated>
|
||||
<author>
|
||||
<name>John Doe</name>
|
||||
</author>
|
||||
<id>urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6</id>
|
||||
|
||||
<entry>
|
||||
<title>Atom-Powered Robots Run Amok</title>
|
||||
<link href="http://example.org/2003/12/13/atom03"/>
|
||||
<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
|
||||
<updated>2003-12-13T18:30:02Z</updated>
|
||||
<summary>Some text.</summary>
|
||||
</entry>
|
||||
|
||||
</feed>`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if feed.Title != "Example Feed" {
|
||||
t.Errorf("Incorrect title, got: %s", feed.Title)
|
||||
}
|
||||
|
||||
if feed.FeedURL != "" {
|
||||
t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
|
||||
}
|
||||
|
||||
if feed.SiteURL != "http://example.org/" {
|
||||
t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
|
||||
}
|
||||
|
||||
if len(feed.Entries) != 1 {
|
||||
t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
|
||||
}
|
||||
|
||||
if !feed.Entries[0].Date.Equal(time.Date(2003, time.December, 13, 18, 30, 2, 0, time.UTC)) {
|
||||
t.Errorf("Incorrect entry date, got: %v", feed.Entries[0].Date)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Hash != "3841e5cf232f5111fc5841e9eba5f4b26d95e7d7124902e0f7272729d65601a6" {
|
||||
t.Errorf("Incorrect entry hash, got: %s", feed.Entries[0].Hash)
|
||||
}
|
||||
|
||||
if feed.Entries[0].URL != "http://example.org/2003/12/13/atom03" {
|
||||
t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Title != "Atom-Powered Robots Run Amok" {
|
||||
t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Content != "Some text." {
|
||||
t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Author != "John Doe" {
|
||||
t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseFeedWithoutTitle(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
<link rel="alternate" type="text/html" href="https://example.org/"/>
|
||||
<link rel="self" type="application/atom+xml" href="https://example.org/feed"/>
|
||||
<updated>2003-12-13T18:30:02Z</updated>
|
||||
</feed>`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if feed.Title != "https://example.org/" {
|
||||
t.Errorf("Incorrect feed title, got: %s", feed.Title)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseEntryWithoutTitle(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
|
||||
<title>Example Feed</title>
|
||||
<link href="http://example.org/"/>
|
||||
<updated>2003-12-13T18:30:02Z</updated>
|
||||
<author>
|
||||
<name>John Doe</name>
|
||||
</author>
|
||||
<id>urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6</id>
|
||||
|
||||
<entry>
|
||||
<link href="http://example.org/2003/12/13/atom03"/>
|
||||
<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
|
||||
<updated>2003-12-13T18:30:02Z</updated>
|
||||
<summary>Some text.</summary>
|
||||
</entry>
|
||||
|
||||
</feed>`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Title != "http://example.org/2003/12/13/atom03" {
|
||||
t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseFeedURL(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
<title>Example Feed</title>
|
||||
<link rel="alternate" type="text/html" href="https://example.org/"/>
|
||||
<link rel="self" type="application/atom+xml" href="https://example.org/feed"/>
|
||||
<updated>2003-12-13T18:30:02Z</updated>
|
||||
</feed>`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if feed.SiteURL != "https://example.org/" {
|
||||
t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
|
||||
}
|
||||
|
||||
if feed.FeedURL != "https://example.org/feed" {
|
||||
t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseEntryTitleWithWhitespaces(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
<title>Example Feed</title>
|
||||
<link href="http://example.org/"/>
|
||||
|
||||
<entry>
|
||||
<title>
|
||||
Some Title
|
||||
</title>
|
||||
<link href="http://example.org/2003/12/13/atom03"/>
|
||||
<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
|
||||
<updated>2003-12-13T18:30:02Z</updated>
|
||||
<summary>Some text.</summary>
|
||||
</entry>
|
||||
|
||||
</feed>`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Title != "Some Title" {
|
||||
t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseEntryWithAuthorName(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
<title>Example Feed</title>
|
||||
<link href="http://example.org/"/>
|
||||
|
||||
<entry>
|
||||
<link href="http://example.org/2003/12/13/atom03"/>
|
||||
<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
|
||||
<updated>2003-12-13T18:30:02Z</updated>
|
||||
<summary>Some text.</summary>
|
||||
<author>
|
||||
<name>Me</name>
|
||||
<email>me@localhost</email>
|
||||
</author>
|
||||
</entry>
|
||||
|
||||
</feed>`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Author != "Me" {
|
||||
t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseEntryWithoutAuthorName(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
<title>Example Feed</title>
|
||||
<link href="http://example.org/"/>
|
||||
|
||||
<entry>
|
||||
<link href="http://example.org/2003/12/13/atom03"/>
|
||||
<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
|
||||
<updated>2003-12-13T18:30:02Z</updated>
|
||||
<summary>Some text.</summary>
|
||||
<author>
|
||||
<name/>
|
||||
<email>me@localhost</email>
|
||||
</author>
|
||||
</entry>
|
||||
|
||||
</feed>`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Author != "me@localhost" {
|
||||
t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseEntryWithEnclosures(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
<id>http://www.example.org/myfeed</id>
|
||||
<title>My Podcast Feed</title>
|
||||
<updated>2005-07-15T12:00:00Z</updated>
|
||||
<author>
|
||||
<name>John Doe</name>
|
||||
</author>
|
||||
<link href="http://example.org" />
|
||||
<link rel="self" href="http://example.org/myfeed" />
|
||||
<entry>
|
||||
<id>http://www.example.org/entries/1</id>
|
||||
<title>Atom 1.0</title>
|
||||
<updated>2005-07-15T12:00:00Z</updated>
|
||||
<link href="http://www.example.org/entries/1" />
|
||||
<summary>An overview of Atom 1.0</summary>
|
||||
<link rel="enclosure"
|
||||
type="audio/mpeg"
|
||||
title="MP3"
|
||||
href="http://www.example.org/myaudiofile.mp3"
|
||||
length="1234" />
|
||||
<link rel="enclosure"
|
||||
type="application/x-bittorrent"
|
||||
title="BitTorrent"
|
||||
href="http://www.example.org/myaudiofile.torrent"
|
||||
length="4567" />
|
||||
<content type="xhtml">
|
||||
<div xmlns="http://www.w3.org/1999/xhtml">
|
||||
<h1>Show Notes</h1>
|
||||
<ul>
|
||||
<li>00:01:00 -- Introduction</li>
|
||||
<li>00:15:00 -- Talking about Atom 1.0</li>
|
||||
<li>00:30:00 -- Wrapping up</li>
|
||||
</ul>
|
||||
</div>
|
||||
</content>
|
||||
</entry>
|
||||
</feed>`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if len(feed.Entries) != 1 {
|
||||
t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
|
||||
}
|
||||
|
||||
if feed.Entries[0].URL != "http://www.example.org/entries/1" {
|
||||
t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
|
||||
}
|
||||
|
||||
if len(feed.Entries[0].Enclosures) != 2 {
|
||||
t.Errorf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures))
|
||||
}
|
||||
|
||||
if feed.Entries[0].Enclosures[0].URL != "http://www.example.org/myaudiofile.mp3" {
|
||||
t.Errorf("Incorrect enclosure URL, got: %s", feed.Entries[0].Enclosures[0].URL)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Enclosures[0].MimeType != "audio/mpeg" {
|
||||
t.Errorf("Incorrect enclosure type, got: %s", feed.Entries[0].Enclosures[0].MimeType)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Enclosures[0].Size != 1234 {
|
||||
t.Errorf("Incorrect enclosure length, got: %d", feed.Entries[0].Enclosures[0].Size)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Enclosures[1].URL != "http://www.example.org/myaudiofile.torrent" {
|
||||
t.Errorf("Incorrect enclosure URL, got: %s", feed.Entries[0].Enclosures[1].URL)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Enclosures[1].MimeType != "application/x-bittorrent" {
|
||||
t.Errorf("Incorrect enclosure type, got: %s", feed.Entries[0].Enclosures[1].MimeType)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Enclosures[1].Size != 4567 {
|
||||
t.Errorf("Incorrect enclosure length, got: %d", feed.Entries[0].Enclosures[1].Size)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseInvalidXml(t *testing.T) {
|
||||
data := `garbage`
|
||||
_, err := Parse(bytes.NewBufferString(data))
|
||||
if err == nil {
|
||||
t.Error("Parse should returns an error")
|
||||
}
|
||||
|
||||
if _, ok := err.(errors.LocalizedError); !ok {
|
||||
t.Error("The error returned must be a LocalizedError")
|
||||
}
|
||||
}
|
|
@ -1,203 +0,0 @@
|
|||
// Copyright 2017 Frédéric Guillot. All rights reserved.
|
||||
// Use of this source code is governed by the Apache 2.0
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package date
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
// DateFormats taken from github.com/mjibson/goread
|
||||
var dateFormats = []string{
|
||||
time.RFC822, // RSS
|
||||
time.RFC822Z, // RSS
|
||||
time.RFC3339, // Atom
|
||||
time.UnixDate,
|
||||
time.RubyDate,
|
||||
time.RFC850,
|
||||
time.RFC1123Z,
|
||||
time.RFC1123,
|
||||
time.ANSIC,
|
||||
"Mon, January 2 2006 15:04:05 -0700",
|
||||
"Mon, January 02, 2006, 15:04:05 MST",
|
||||
"Mon, January 02, 2006 15:04:05 MST",
|
||||
"Mon, Jan 2, 2006 15:04 MST",
|
||||
"Mon, Jan 2 2006 15:04 MST",
|
||||
"Mon, Jan 2, 2006 15:04:05 MST",
|
||||
"Mon, Jan 2 2006 15:04:05 -700",
|
||||
"Mon, Jan 2 2006 15:04:05 -0700",
|
||||
"Mon Jan 2 15:04 2006",
|
||||
"Mon Jan 2 15:04:05 2006 MST",
|
||||
"Mon Jan 02, 2006 3:04 pm",
|
||||
"Mon, Jan 02,2006 15:04:05 MST",
|
||||
"Mon Jan 02 2006 15:04:05 -0700",
|
||||
"Monday, January 2, 2006 15:04:05 MST",
|
||||
"Monday, January 2, 2006 03:04 PM",
|
||||
"Monday, January 2, 2006",
|
||||
"Monday, January 02, 2006",
|
||||
"Monday, 2 January 2006 15:04:05 MST",
|
||||
"Monday, 2 January 2006 15:04:05 -0700",
|
||||
"Monday, 2 Jan 2006 15:04:05 MST",
|
||||
"Monday, 2 Jan 2006 15:04:05 -0700",
|
||||
"Monday, 02 January 2006 15:04:05 MST",
|
||||
"Monday, 02 January 2006 15:04:05 -0700",
|
||||
"Monday, 02 January 2006 15:04:05",
|
||||
"Mon, 2 January 2006 15:04 MST",
|
||||
"Mon, 2 January 2006, 15:04 -0700",
|
||||
"Mon, 2 January 2006, 15:04:05 MST",
|
||||
"Mon, 2 January 2006 15:04:05 MST",
|
||||
"Mon, 2 January 2006 15:04:05 -0700",
|
||||
"Mon, 2 January 2006",
|
||||
"Mon, 2 Jan 2006 3:04:05 PM -0700",
|
||||
"Mon, 2 Jan 2006 15:4:5 MST",
|
||||
"Mon, 2 Jan 2006 15:4:5 -0700 GMT",
|
||||
"Mon, 2, Jan 2006 15:4",
|
||||
"Mon, 2 Jan 2006 15:04 MST",
|
||||
"Mon, 2 Jan 2006, 15:04 -0700",
|
||||
"Mon, 2 Jan 2006 15:04 -0700",
|
||||
"Mon, 2 Jan 2006 15:04:05 UT",
|
||||
"Mon, 2 Jan 2006 15:04:05MST",
|
||||
"Mon, 2 Jan 2006 15:04:05 MST",
|
||||
"Mon 2 Jan 2006 15:04:05 MST",
|
||||
"mon,2 Jan 2006 15:04:05 MST",
|
||||
"Mon, 2 Jan 2006 15:04:05 -0700 MST",
|
||||
"Mon, 2 Jan 2006 15:04:05-0700",
|
||||
"Mon, 2 Jan 2006 15:04:05 -0700",
|
||||
"Mon, 2 Jan 2006 15:04:05",
|
||||
"Mon, 2 Jan 2006 15:04",
|
||||
"Mon,2 Jan 2006",
|
||||
"Mon, 2 Jan 2006",
|
||||
"Mon, 2 Jan 15:04:05 MST",
|
||||
"Mon, 2 Jan 06 15:04:05 MST",
|
||||
"Mon, 2 Jan 06 15:04:05 -0700",
|
||||
"Mon, 2006-01-02 15:04",
|
||||
"Mon,02 January 2006 14:04:05 MST",
|
||||
"Mon, 02 January 2006",
|
||||
"Mon, 02 Jan 2006 3:04:05 PM MST",
|
||||
"Mon, 02 Jan 2006 15 -0700",
|
||||
"Mon,02 Jan 2006 15:04 MST",
|
||||
"Mon, 02 Jan 2006 15:04 MST",
|
||||
"Mon, 02 Jan 2006 15:04 -0700",
|
||||
"Mon, 02 Jan 2006 15:04:05 Z",
|
||||
"Mon, 02 Jan 2006 15:04:05 UT",
|
||||
"Mon, 02 Jan 2006 15:04:05 MST-07:00",
|
||||
"Mon, 02 Jan 2006 15:04:05 MST -0700",
|
||||
"Mon, 02 Jan 2006, 15:04:05 MST",
|
||||
"Mon, 02 Jan 2006 15:04:05MST",
|
||||
"Mon, 02 Jan 2006 15:04:05 MST",
|
||||
"Mon , 02 Jan 2006 15:04:05 MST",
|
||||
"Mon, 02 Jan 2006 15:04:05 GMT-0700",
|
||||
"Mon,02 Jan 2006 15:04:05 -0700",
|
||||
"Mon, 02 Jan 2006 15:04:05 -0700",
|
||||
"Mon, 02 Jan 2006 15:04:05 -07:00",
|
||||
"Mon, 02 Jan 2006 15:04:05 --0700",
|
||||
"Mon 02 Jan 2006 15:04:05 -0700",
|
||||
"Mon, 02 Jan 2006 15:04:05 -07",
|
||||
"Mon, 02 Jan 2006 15:04:05 00",
|
||||
"Mon, 02 Jan 2006 15:04:05",
|
||||
"Mon, 02 Jan 2006",
|
||||
"Mon, 02 Jan 06 15:04:05 MST",
|
||||
"January 2, 2006 3:04 PM",
|
||||
"January 2, 2006, 3:04 p.m.",
|
||||
"January 2, 2006 15:04:05 MST",
|
||||
"January 2, 2006 15:04:05",
|
||||
"January 2, 2006 03:04 PM",
|
||||
"January 2, 2006",
|
||||
"January 02, 2006 15:04:05 MST",
|
||||
"January 02, 2006 15:04",
|
||||
"January 02, 2006 03:04 PM",
|
||||
"January 02, 2006",
|
||||
"Jan 2, 2006 3:04:05 PM MST",
|
||||
"Jan 2, 2006 3:04:05 PM",
|
||||
"Jan 2, 2006 15:04:05 MST",
|
||||
"Jan 2, 2006",
|
||||
"Jan 02 2006 03:04:05PM",
|
||||
"Jan 02, 2006",
|
||||
"6/1/2 15:04",
|
||||
"6-1-2 15:04",
|
||||
"2 January 2006 15:04:05 MST",
|
||||
"2 January 2006 15:04:05 -0700",
|
||||
"2 January 2006",
|
||||
"2 Jan 2006 15:04:05 Z",
|
||||
"2 Jan 2006 15:04:05 MST",
|
||||
"2 Jan 2006 15:04:05 -0700",
|
||||
"2 Jan 2006",
|
||||
"2.1.2006 15:04:05",
|
||||
"2/1/2006",
|
||||
"2-1-2006",
|
||||
"2006 January 02",
|
||||
"2006-1-2T15:04:05Z",
|
||||
"2006-1-2 15:04:05",
|
||||
"2006-1-2",
|
||||
"2006-1-02T15:04:05Z",
|
||||
"2006-01-02T15:04Z",
|
||||
"2006-01-02T15:04-07:00",
|
||||
"2006-01-02T15:04:05Z",
|
||||
"2006-01-02T15:04:05-07:00:00",
|
||||
"2006-01-02T15:04:05:-0700",
|
||||
"2006-01-02T15:04:05-0700",
|
||||
"2006-01-02T15:04:05-07:00",
|
||||
"2006-01-02T15:04:05 -0700",
|
||||
"2006-01-02T15:04:05:00",
|
||||
"2006-01-02T15:04:05",
|
||||
"2006-01-02 at 15:04:05",
|
||||
"2006-01-02 15:04:05Z",
|
||||
"2006-01-02 15:04:05 MST",
|
||||
"2006-01-02 15:04:05-0700",
|
||||
"2006-01-02 15:04:05-07:00",
|
||||
"2006-01-02 15:04:05 -0700",
|
||||
"2006-01-02 15:04",
|
||||
"2006-01-02 00:00:00.0 15:04:05.0 -0700",
|
||||
"2006/01/02",
|
||||
"2006-01-02",
|
||||
"15:04 02.01.2006 -0700",
|
||||
"1/2/2006 3:04 PM MST",
|
||||
"1/2/2006 3:04:05 PM MST",
|
||||
"1/2/2006 3:04:05 PM",
|
||||
"1/2/2006 15:04:05 MST",
|
||||
"1/2/2006",
|
||||
"06/1/2 15:04",
|
||||
"06-1-2 15:04",
|
||||
"02 Monday, Jan 2006 15:04",
|
||||
"02 Jan 2006 15:04 MST",
|
||||
"02 Jan 2006 15:04:05 UT",
|
||||
"02 Jan 2006 15:04:05 MST",
|
||||
"02 Jan 2006 15:04:05 -0700",
|
||||
"02 Jan 2006 15:04:05",
|
||||
"02 Jan 2006",
|
||||
"02/01/2006 15:04 MST",
|
||||
"02-01-2006 15:04:05 MST",
|
||||
"02.01.2006 15:04:05",
|
||||
"02/01/2006 15:04:05",
|
||||
"02.01.2006 15:04",
|
||||
"02/01/2006 - 15:04",
|
||||
"02.01.2006 -0700",
|
||||
"02/01/2006",
|
||||
"02-01-2006",
|
||||
"01/02/2006 3:04 PM",
|
||||
"01/02/2006 15:04:05 MST",
|
||||
"01/02/2006 - 15:04",
|
||||
"01/02/2006",
|
||||
"01-02-2006",
|
||||
}
|
||||
|
||||
// Parse parses a given date string using a large
|
||||
// list of commonly found feed date formats.
|
||||
func Parse(ds string) (t time.Time, err error) {
|
||||
d := strings.TrimSpace(ds)
|
||||
if d == "" {
|
||||
return t, fmt.Errorf("Date string is empty")
|
||||
}
|
||||
|
||||
for _, f := range dateFormats {
|
||||
if t, err = time.Parse(f, d); err == nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
err = fmt.Errorf("Failed to parse date: %s", ds)
|
||||
return
|
||||
}
|
|
@ -1,171 +0,0 @@
|
|||
// Copyright 2017 Frédéric Guillot. All rights reserved.
|
||||
// Use of this source code is governed by the Apache 2.0
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package json
|
||||
|
||||
import (
|
||||
"log"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/miniflux/miniflux2/helper"
|
||||
"github.com/miniflux/miniflux2/model"
|
||||
"github.com/miniflux/miniflux2/reader/feed/date"
|
||||
"github.com/miniflux/miniflux2/reader/processor"
|
||||
"github.com/miniflux/miniflux2/reader/sanitizer"
|
||||
)
|
||||
|
||||
type jsonFeed struct {
|
||||
Version string `json:"version"`
|
||||
Title string `json:"title"`
|
||||
SiteURL string `json:"home_page_url"`
|
||||
FeedURL string `json:"feed_url"`
|
||||
Author jsonAuthor `json:"author"`
|
||||
Items []jsonItem `json:"items"`
|
||||
}
|
||||
|
||||
type jsonAuthor struct {
|
||||
Name string `json:"name"`
|
||||
URL string `json:"url"`
|
||||
}
|
||||
|
||||
type jsonItem struct {
|
||||
ID string `json:"id"`
|
||||
URL string `json:"url"`
|
||||
Title string `json:"title"`
|
||||
Summary string `json:"summary"`
|
||||
Text string `json:"content_text"`
|
||||
HTML string `json:"content_html"`
|
||||
DatePublished string `json:"date_published"`
|
||||
DateModified string `json:"date_modified"`
|
||||
Author jsonAuthor `json:"author"`
|
||||
Attachments []jsonAttachment `json:"attachments"`
|
||||
}
|
||||
|
||||
type jsonAttachment struct {
|
||||
URL string `json:"url"`
|
||||
MimeType string `json:"mime_type"`
|
||||
Title string `json:"title"`
|
||||
Size int `json:"size_in_bytes"`
|
||||
Duration int `json:"duration_in_seconds"`
|
||||
}
|
||||
|
||||
func (j *jsonFeed) GetAuthor() string {
|
||||
return getAuthor(j.Author)
|
||||
}
|
||||
|
||||
func (j *jsonFeed) Transform() *model.Feed {
|
||||
feed := new(model.Feed)
|
||||
feed.FeedURL = j.FeedURL
|
||||
feed.SiteURL = j.SiteURL
|
||||
feed.Title = sanitizer.StripTags(j.Title)
|
||||
|
||||
if feed.Title == "" {
|
||||
feed.Title = feed.SiteURL
|
||||
}
|
||||
|
||||
for _, item := range j.Items {
|
||||
entry := item.Transform()
|
||||
if entry.Author == "" {
|
||||
entry.Author = j.GetAuthor()
|
||||
}
|
||||
|
||||
feed.Entries = append(feed.Entries, entry)
|
||||
}
|
||||
|
||||
return feed
|
||||
}
|
||||
|
||||
func (j *jsonItem) GetDate() time.Time {
|
||||
for _, value := range []string{j.DatePublished, j.DateModified} {
|
||||
if value != "" {
|
||||
d, err := date.Parse(value)
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
return time.Now()
|
||||
}
|
||||
|
||||
return d
|
||||
}
|
||||
}
|
||||
|
||||
return time.Now()
|
||||
}
|
||||
|
||||
func (j *jsonItem) GetAuthor() string {
|
||||
return getAuthor(j.Author)
|
||||
}
|
||||
|
||||
func (j *jsonItem) GetHash() string {
|
||||
for _, value := range []string{j.ID, j.URL, j.Text + j.HTML + j.Summary} {
|
||||
if value != "" {
|
||||
return helper.Hash(value)
|
||||
}
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
func (j *jsonItem) GetTitle() string {
|
||||
for _, value := range []string{j.Title, j.Summary, j.Text, j.HTML} {
|
||||
if value != "" {
|
||||
return truncate(value)
|
||||
}
|
||||
}
|
||||
|
||||
return j.URL
|
||||
}
|
||||
|
||||
func (j *jsonItem) GetContent() string {
|
||||
for _, value := range []string{j.HTML, j.Text, j.Summary} {
|
||||
if value != "" {
|
||||
return value
|
||||
}
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
func (j *jsonItem) GetEnclosures() model.EnclosureList {
|
||||
enclosures := make(model.EnclosureList, 0)
|
||||
|
||||
for _, attachment := range j.Attachments {
|
||||
enclosures = append(enclosures, &model.Enclosure{
|
||||
URL: attachment.URL,
|
||||
MimeType: attachment.MimeType,
|
||||
Size: attachment.Size,
|
||||
})
|
||||
}
|
||||
|
||||
return enclosures
|
||||
}
|
||||
|
||||
func (j *jsonItem) Transform() *model.Entry {
|
||||
entry := new(model.Entry)
|
||||
entry.URL = j.URL
|
||||
entry.Date = j.GetDate()
|
||||
entry.Author = sanitizer.StripTags(j.GetAuthor())
|
||||
entry.Hash = j.GetHash()
|
||||
entry.Content = processor.ItemContentProcessor(entry.URL, j.GetContent())
|
||||
entry.Title = sanitizer.StripTags(strings.Trim(j.GetTitle(), " \n\t"))
|
||||
entry.Enclosures = j.GetEnclosures()
|
||||
return entry
|
||||
}
|
||||
|
||||
func getAuthor(author jsonAuthor) string {
|
||||
if author.Name != "" {
|
||||
return author.Name
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
func truncate(str string) string {
|
||||
max := 100
|
||||
if len(str) > max {
|
||||
return str[:max] + "..."
|
||||
}
|
||||
|
||||
return str
|
||||
}
|
|
@ -1,24 +0,0 @@
|
|||
// Copyright 2017 Frédéric Guillot. All rights reserved.
|
||||
// Use of this source code is governed by the Apache 2.0
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package json
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"io"
|
||||
|
||||
"github.com/miniflux/miniflux2/errors"
|
||||
"github.com/miniflux/miniflux2/model"
|
||||
)
|
||||
|
||||
// Parse returns a normalized feed struct from a JON feed.
|
||||
func Parse(data io.Reader) (*model.Feed, error) {
|
||||
feed := new(jsonFeed)
|
||||
decoder := json.NewDecoder(data)
|
||||
if err := decoder.Decode(&feed); err != nil {
|
||||
return nil, errors.NewLocalizedError("Unable to parse JSON Feed: %v", err)
|
||||
}
|
||||
|
||||
return feed.Transform(), nil
|
||||
}
|
|
@ -1,359 +0,0 @@
|
|||
// Copyright 2017 Frédéric Guillot. All rights reserved.
|
||||
// Use of this source code is governed by the Apache 2.0
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package json
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/miniflux/miniflux2/errors"
|
||||
)
|
||||
|
||||
func TestParseJsonFeed(t *testing.T) {
|
||||
data := `{
|
||||
"version": "https://jsonfeed.org/version/1",
|
||||
"title": "My Example Feed",
|
||||
"home_page_url": "https://example.org/",
|
||||
"feed_url": "https://example.org/feed.json",
|
||||
"items": [
|
||||
{
|
||||
"id": "2",
|
||||
"content_text": "This is a second item.",
|
||||
"url": "https://example.org/second-item"
|
||||
},
|
||||
{
|
||||
"id": "1",
|
||||
"content_html": "<p>Hello, world!</p>",
|
||||
"url": "https://example.org/initial-post"
|
||||
}
|
||||
]
|
||||
}`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if feed.Title != "My Example Feed" {
|
||||
t.Errorf("Incorrect title, got: %s", feed.Title)
|
||||
}
|
||||
|
||||
if feed.FeedURL != "https://example.org/feed.json" {
|
||||
t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
|
||||
}
|
||||
|
||||
if feed.SiteURL != "https://example.org/" {
|
||||
t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
|
||||
}
|
||||
|
||||
if len(feed.Entries) != 2 {
|
||||
t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
|
||||
}
|
||||
|
||||
if feed.Entries[0].Hash != "d4735e3a265e16eee03f59718b9b5d03019c07d8b6c51f90da3a666eec13ab35" {
|
||||
t.Errorf("Incorrect entry hash, got: %s", feed.Entries[0].Hash)
|
||||
}
|
||||
|
||||
if feed.Entries[0].URL != "https://example.org/second-item" {
|
||||
t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Title != "This is a second item." {
|
||||
t.Errorf(`Incorrect entry title, got: "%s"`, feed.Entries[0].Title)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Content != "This is a second item." {
|
||||
t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content)
|
||||
}
|
||||
|
||||
if feed.Entries[1].Hash != "6b86b273ff34fce19d6b804eff5a3f5747ada4eaa22f1d49c01e52ddb7875b4b" {
|
||||
t.Errorf("Incorrect entry hash, got: %s", feed.Entries[1].Hash)
|
||||
}
|
||||
|
||||
if feed.Entries[1].URL != "https://example.org/initial-post" {
|
||||
t.Errorf("Incorrect entry URL, got: %s", feed.Entries[1].URL)
|
||||
}
|
||||
|
||||
if feed.Entries[1].Title != "Hello, world!" {
|
||||
t.Errorf(`Incorrect entry title, got: "%s"`, feed.Entries[1].Title)
|
||||
}
|
||||
|
||||
if feed.Entries[1].Content != "<p>Hello, world!</p>" {
|
||||
t.Errorf("Incorrect entry content, got: %s", feed.Entries[1].Content)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParsePodcast(t *testing.T) {
|
||||
data := `{
|
||||
"version": "https://jsonfeed.org/version/1",
|
||||
"user_comment": "This is a podcast feed. You can add this feed to your podcast client using the following URL: http://therecord.co/feed.json",
|
||||
"title": "The Record",
|
||||
"home_page_url": "http://therecord.co/",
|
||||
"feed_url": "http://therecord.co/feed.json",
|
||||
"items": [
|
||||
{
|
||||
"id": "http://therecord.co/chris-parrish",
|
||||
"title": "Special #1 - Chris Parrish",
|
||||
"url": "http://therecord.co/chris-parrish",
|
||||
"content_text": "Chris has worked at Adobe and as a founder of Rogue Sheep, which won an Apple Design Award for Postage. Chris’s new company is Aged & Distilled with Guy English — which shipped Napkin, a Mac app for visual collaboration. Chris is also the co-host of The Record. He lives on Bainbridge Island, a quick ferry ride from Seattle.",
|
||||
"content_html": "Chris has worked at <a href=\"http://adobe.com/\">Adobe</a> and as a founder of Rogue Sheep, which won an Apple Design Award for Postage. Chris’s new company is Aged & Distilled with Guy English — which shipped <a href=\"http://aged-and-distilled.com/napkin/\">Napkin</a>, a Mac app for visual collaboration. Chris is also the co-host of The Record. He lives on <a href=\"http://www.ci.bainbridge-isl.wa.us/\">Bainbridge Island</a>, a quick ferry ride from Seattle.",
|
||||
"summary": "Brent interviews Chris Parrish, co-host of The Record and one-half of Aged & Distilled.",
|
||||
"date_published": "2014-05-09T14:04:00-07:00",
|
||||
"attachments": [
|
||||
{
|
||||
"url": "http://therecord.co/downloads/The-Record-sp1e1-ChrisParrish.m4a",
|
||||
"mime_type": "audio/x-m4a",
|
||||
"size_in_bytes": 89970236,
|
||||
"duration_in_seconds": 6629
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if feed.Title != "The Record" {
|
||||
t.Errorf("Incorrect title, got: %s", feed.Title)
|
||||
}
|
||||
|
||||
if feed.FeedURL != "http://therecord.co/feed.json" {
|
||||
t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
|
||||
}
|
||||
|
||||
if feed.SiteURL != "http://therecord.co/" {
|
||||
t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
|
||||
}
|
||||
|
||||
if len(feed.Entries) != 1 {
|
||||
t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
|
||||
}
|
||||
|
||||
if feed.Entries[0].Hash != "6b678e57962a1b001e4e873756563cdc08bbd06ca561e764e0baa9a382485797" {
|
||||
t.Errorf("Incorrect entry hash, got: %s", feed.Entries[0].Hash)
|
||||
}
|
||||
|
||||
if feed.Entries[0].URL != "http://therecord.co/chris-parrish" {
|
||||
t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Title != "Special #1 - Chris Parrish" {
|
||||
t.Errorf(`Incorrect entry title, got: "%s"`, feed.Entries[0].Title)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Content != `Chris has worked at <a href="http://adobe.com/" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">Adobe</a> and as a founder of Rogue Sheep, which won an Apple Design Award for Postage. Chris’s new company is Aged & Distilled with Guy English — which shipped <a href="http://aged-and-distilled.com/napkin/" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">Napkin</a>, a Mac app for visual collaboration. Chris is also the co-host of The Record. He lives on <a href="http://www.ci.bainbridge-isl.wa.us/" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">Bainbridge Island</a>, a quick ferry ride from Seattle.` {
|
||||
t.Errorf(`Incorrect entry content, got: "%s"`, feed.Entries[0].Content)
|
||||
}
|
||||
|
||||
location, _ := time.LoadLocation("America/Vancouver")
|
||||
if !feed.Entries[0].Date.Equal(time.Date(2014, time.May, 9, 14, 4, 0, 0, location)) {
|
||||
t.Errorf("Incorrect entry date, got: %v", feed.Entries[0].Date)
|
||||
}
|
||||
|
||||
if len(feed.Entries[0].Enclosures) != 1 {
|
||||
t.Errorf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures))
|
||||
}
|
||||
|
||||
if feed.Entries[0].Enclosures[0].URL != "http://therecord.co/downloads/The-Record-sp1e1-ChrisParrish.m4a" {
|
||||
t.Errorf("Incorrect enclosure URL, got: %s", feed.Entries[0].Enclosures[0].URL)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Enclosures[0].MimeType != "audio/x-m4a" {
|
||||
t.Errorf("Incorrect enclosure type, got: %s", feed.Entries[0].Enclosures[0].MimeType)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Enclosures[0].Size != 89970236 {
|
||||
t.Errorf("Incorrect enclosure length, got: %d", feed.Entries[0].Enclosures[0].Size)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseAuthor(t *testing.T) {
|
||||
data := `{
|
||||
"version": "https://jsonfeed.org/version/1",
|
||||
"user_comment": "This is a microblog feed. You can add this to your feed reader using the following URL: https://example.org/feed.json",
|
||||
"title": "Brent Simmons’s Microblog",
|
||||
"home_page_url": "https://example.org/",
|
||||
"feed_url": "https://example.org/feed.json",
|
||||
"author": {
|
||||
"name": "Brent Simmons",
|
||||
"url": "http://example.org/",
|
||||
"avatar": "https://example.org/avatar.png"
|
||||
},
|
||||
"items": [
|
||||
{
|
||||
"id": "2347259",
|
||||
"url": "https://example.org/2347259",
|
||||
"content_text": "Cats are neat. \n\nhttps://example.org/cats",
|
||||
"date_published": "2016-02-09T14:22:00-07:00"
|
||||
}
|
||||
]
|
||||
}`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if len(feed.Entries) != 1 {
|
||||
t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
|
||||
}
|
||||
|
||||
if feed.Entries[0].Author != "Brent Simmons" {
|
||||
t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseFeedWithoutTitle(t *testing.T) {
|
||||
data := `{
|
||||
"version": "https://jsonfeed.org/version/1",
|
||||
"home_page_url": "https://example.org/",
|
||||
"feed_url": "https://example.org/feed.json",
|
||||
"items": [
|
||||
{
|
||||
"id": "2347259",
|
||||
"url": "https://example.org/2347259",
|
||||
"content_text": "Cats are neat. \n\nhttps://example.org/cats",
|
||||
"date_published": "2016-02-09T14:22:00-07:00"
|
||||
}
|
||||
]
|
||||
}`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if feed.Title != "https://example.org/" {
|
||||
t.Errorf("Incorrect title, got: %s", feed.Title)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseFeedItemWithInvalidDate(t *testing.T) {
|
||||
data := `{
|
||||
"version": "https://jsonfeed.org/version/1",
|
||||
"title": "My Example Feed",
|
||||
"home_page_url": "https://example.org/",
|
||||
"feed_url": "https://example.org/feed.json",
|
||||
"items": [
|
||||
{
|
||||
"id": "2347259",
|
||||
"url": "https://example.org/2347259",
|
||||
"content_text": "Cats are neat. \n\nhttps://example.org/cats",
|
||||
"date_published": "Tomorrow"
|
||||
}
|
||||
]
|
||||
}`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if len(feed.Entries) != 1 {
|
||||
t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
|
||||
}
|
||||
|
||||
if !feed.Entries[0].Date.Before(time.Now()) {
|
||||
t.Errorf("Incorrect entry date, got: %v", feed.Entries[0].Date)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseFeedItemWithoutID(t *testing.T) {
|
||||
data := `{
|
||||
"version": "https://jsonfeed.org/version/1",
|
||||
"title": "My Example Feed",
|
||||
"home_page_url": "https://example.org/",
|
||||
"feed_url": "https://example.org/feed.json",
|
||||
"items": [
|
||||
{
|
||||
"content_text": "Some text."
|
||||
}
|
||||
]
|
||||
}`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if len(feed.Entries) != 1 {
|
||||
t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
|
||||
}
|
||||
|
||||
if feed.Entries[0].Hash != "13b4c5aecd1b6d749afcee968fbf9c80f1ed1bbdbe1aaf25cb34ebd01144bbe9" {
|
||||
t.Errorf("Incorrect entry hash, got: %s", feed.Entries[0].Hash)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseFeedItemWithoutTitle(t *testing.T) {
|
||||
data := `{
|
||||
"version": "https://jsonfeed.org/version/1",
|
||||
"title": "My Example Feed",
|
||||
"home_page_url": "https://example.org/",
|
||||
"feed_url": "https://example.org/feed.json",
|
||||
"items": [
|
||||
{
|
||||
"url": "https://example.org/item"
|
||||
}
|
||||
]
|
||||
}`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if len(feed.Entries) != 1 {
|
||||
t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
|
||||
}
|
||||
|
||||
if feed.Entries[0].Title != "https://example.org/item" {
|
||||
t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseTruncateItemTitle(t *testing.T) {
|
||||
data := `{
|
||||
"version": "https://jsonfeed.org/version/1",
|
||||
"title": "My Example Feed",
|
||||
"home_page_url": "https://example.org/",
|
||||
"feed_url": "https://example.org/feed.json",
|
||||
"items": [
|
||||
{
|
||||
"title": "` + strings.Repeat("a", 200) + `"
|
||||
}
|
||||
]
|
||||
}`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if len(feed.Entries) != 1 {
|
||||
t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
|
||||
}
|
||||
|
||||
if len(feed.Entries[0].Title) != 103 {
|
||||
t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseInvalidJSON(t *testing.T) {
|
||||
data := `garbage`
|
||||
_, err := Parse(bytes.NewBufferString(data))
|
||||
if err == nil {
|
||||
t.Error("Parse should returns an error")
|
||||
}
|
||||
|
||||
if _, ok := err.(errors.LocalizedError); !ok {
|
||||
t.Error("The error returned must be a LocalizedError")
|
||||
}
|
||||
}
|
|
@ -14,10 +14,10 @@ import (
|
|||
|
||||
"github.com/miniflux/miniflux2/helper"
|
||||
"github.com/miniflux/miniflux2/model"
|
||||
"github.com/miniflux/miniflux2/reader/feed/atom"
|
||||
"github.com/miniflux/miniflux2/reader/feed/json"
|
||||
"github.com/miniflux/miniflux2/reader/feed/rdf"
|
||||
"github.com/miniflux/miniflux2/reader/feed/rss"
|
||||
"github.com/miniflux/miniflux2/reader/atom"
|
||||
"github.com/miniflux/miniflux2/reader/json"
|
||||
"github.com/miniflux/miniflux2/reader/rdf"
|
||||
"github.com/miniflux/miniflux2/reader/rss"
|
||||
|
||||
"golang.org/x/net/html/charset"
|
||||
)
|
||||
|
|
|
@ -1,28 +0,0 @@
|
|||
// Copyright 2017 Frédéric Guillot. All rights reserved.
|
||||
// Use of this source code is governed by the Apache 2.0
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package rdf
|
||||
|
||||
import (
|
||||
"encoding/xml"
|
||||
"io"
|
||||
|
||||
"github.com/miniflux/miniflux2/errors"
|
||||
"github.com/miniflux/miniflux2/model"
|
||||
"golang.org/x/net/html/charset"
|
||||
)
|
||||
|
||||
// Parse returns a normalized feed struct from a RDF feed.
|
||||
func Parse(data io.Reader) (*model.Feed, error) {
|
||||
feed := new(rdfFeed)
|
||||
decoder := xml.NewDecoder(data)
|
||||
decoder.CharsetReader = charset.NewReaderLabel
|
||||
|
||||
err := decoder.Decode(feed)
|
||||
if err != nil {
|
||||
return nil, errors.NewLocalizedError("Unable to parse RDF feed: %v.", err)
|
||||
}
|
||||
|
||||
return feed.Transform(), nil
|
||||
}
|
|
@ -1,307 +0,0 @@
|
|||
// Copyright 2017 Frédéric Guillot. All rights reserved.
|
||||
// Use of this source code is governed by the Apache 2.0
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package rdf
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/miniflux/miniflux2/errors"
|
||||
)
|
||||
|
||||
func TestParseRDFSample(t *testing.T) {
|
||||
data := `
|
||||
<?xml version="1.0"?>
|
||||
|
||||
<rdf:RDF
|
||||
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||
xmlns="http://purl.org/rss/1.0/"
|
||||
>
|
||||
|
||||
<channel rdf:about="http://www.xml.com/xml/news.rss">
|
||||
<title>XML.com</title>
|
||||
<link>http://xml.com/pub</link>
|
||||
<description>
|
||||
XML.com features a rich mix of information and services
|
||||
for the XML community.
|
||||
</description>
|
||||
|
||||
<image rdf:resource="http://xml.com/universal/images/xml_tiny.gif" />
|
||||
|
||||
<items>
|
||||
<rdf:Seq>
|
||||
<rdf:li resource="http://xml.com/pub/2000/08/09/xslt/xslt.html" />
|
||||
<rdf:li resource="http://xml.com/pub/2000/08/09/rdfdb/index.html" />
|
||||
</rdf:Seq>
|
||||
</items>
|
||||
|
||||
<textinput rdf:resource="http://search.xml.com" />
|
||||
|
||||
</channel>
|
||||
|
||||
<image rdf:about="http://xml.com/universal/images/xml_tiny.gif">
|
||||
<title>XML.com</title>
|
||||
<link>http://www.xml.com</link>
|
||||
<url>http://xml.com/universal/images/xml_tiny.gif</url>
|
||||
</image>
|
||||
|
||||
<item rdf:about="http://xml.com/pub/2000/08/09/xslt/xslt.html">
|
||||
<title>Processing Inclusions with XSLT</title>
|
||||
<link>http://xml.com/pub/2000/08/09/xslt/xslt.html</link>
|
||||
<description>
|
||||
Processing document inclusions with general XML tools can be
|
||||
problematic. This article proposes a way of preserving inclusion
|
||||
information through SAX-based processing.
|
||||
</description>
|
||||
</item>
|
||||
|
||||
<item rdf:about="http://xml.com/pub/2000/08/09/rdfdb/index.html">
|
||||
<title>Putting RDF to Work</title>
|
||||
<link>http://xml.com/pub/2000/08/09/rdfdb/index.html</link>
|
||||
<description>
|
||||
Tool and API support for the Resource Description Framework
|
||||
is slowly coming of age. Edd Dumbill takes a look at RDFDB,
|
||||
one of the most exciting new RDF toolkits.
|
||||
</description>
|
||||
</item>
|
||||
|
||||
<textinput rdf:about="http://search.xml.com">
|
||||
<title>Search XML.com</title>
|
||||
<description>Search XML.com's XML collection</description>
|
||||
<name>s</name>
|
||||
<link>http://search.xml.com</link>
|
||||
</textinput>
|
||||
|
||||
</rdf:RDF>`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if feed.Title != "XML.com" {
|
||||
t.Errorf("Incorrect title, got: %s", feed.Title)
|
||||
}
|
||||
|
||||
if feed.FeedURL != "" {
|
||||
t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
|
||||
}
|
||||
|
||||
if feed.SiteURL != "http://xml.com/pub" {
|
||||
t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
|
||||
}
|
||||
|
||||
if len(feed.Entries) != 2 {
|
||||
t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
|
||||
}
|
||||
|
||||
if feed.Entries[1].Hash != "8aaeee5d3ab50351422fbded41078ee88c73bf1441085b16a8c09fd90a7db321" {
|
||||
t.Errorf("Incorrect entry hash, got: %s", feed.Entries[0].Hash)
|
||||
}
|
||||
|
||||
if feed.Entries[1].URL != "http://xml.com/pub/2000/08/09/rdfdb/index.html" {
|
||||
t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
|
||||
}
|
||||
|
||||
if feed.Entries[1].Title != "Putting RDF to Work" {
|
||||
t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
|
||||
}
|
||||
|
||||
if strings.HasSuffix(feed.Entries[1].Content, "Tool and API support") {
|
||||
t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseRDFSampleWithDublinCore(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
|
||||
<rdf:RDF
|
||||
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||
xmlns:sy="http://purl.org/rss/1.0/modules/syndication/"
|
||||
xmlns:co="http://purl.org/rss/1.0/modules/company/"
|
||||
xmlns:ti="http://purl.org/rss/1.0/modules/textinput/"
|
||||
xmlns="http://purl.org/rss/1.0/"
|
||||
>
|
||||
|
||||
<channel rdf:about="http://meerkat.oreillynet.com/?_fl=rss1.0">
|
||||
<title>Meerkat</title>
|
||||
<link>http://meerkat.oreillynet.com</link>
|
||||
<description>Meerkat: An Open Wire Service</description>
|
||||
<dc:publisher>The O'Reilly Network</dc:publisher>
|
||||
<dc:creator>Rael Dornfest (mailto:rael@oreilly.com)</dc:creator>
|
||||
<dc:rights>Copyright © 2000 O'Reilly & Associates, Inc.</dc:rights>
|
||||
<dc:date>2000-01-01T12:00+00:00</dc:date>
|
||||
<sy:updatePeriod>hourly</sy:updatePeriod>
|
||||
<sy:updateFrequency>2</sy:updateFrequency>
|
||||
<sy:updateBase>2000-01-01T12:00+00:00</sy:updateBase>
|
||||
|
||||
<image rdf:resource="http://meerkat.oreillynet.com/icons/meerkat-powered.jpg" />
|
||||
|
||||
<items>
|
||||
<rdf:Seq>
|
||||
<rdf:li resource="http://c.moreover.com/click/here.pl?r123" />
|
||||
</rdf:Seq>
|
||||
</items>
|
||||
|
||||
<textinput rdf:resource="http://meerkat.oreillynet.com" />
|
||||
|
||||
</channel>
|
||||
|
||||
<image rdf:about="http://meerkat.oreillynet.com/icons/meerkat-powered.jpg">
|
||||
<title>Meerkat Powered!</title>
|
||||
<url>http://meerkat.oreillynet.com/icons/meerkat-powered.jpg</url>
|
||||
<link>http://meerkat.oreillynet.com</link>
|
||||
</image>
|
||||
|
||||
<item rdf:about="http://c.moreover.com/click/here.pl?r123">
|
||||
<title>XML: A Disruptive Technology</title>
|
||||
<link>http://c.moreover.com/click/here.pl?r123</link>
|
||||
<dc:description>
|
||||
XML is placing increasingly heavy loads on the existing technical
|
||||
infrastructure of the Internet.
|
||||
</dc:description>
|
||||
<dc:publisher>The O'Reilly Network</dc:publisher>
|
||||
<dc:creator>Simon St.Laurent (mailto:simonstl@simonstl.com)</dc:creator>
|
||||
<dc:rights>Copyright © 2000 O'Reilly & Associates, Inc.</dc:rights>
|
||||
<dc:subject>XML</dc:subject>
|
||||
<co:name>XML.com</co:name>
|
||||
<co:market>NASDAQ</co:market>
|
||||
<co:symbol>XML</co:symbol>
|
||||
</item>
|
||||
|
||||
<textinput rdf:about="http://meerkat.oreillynet.com">
|
||||
<title>Search Meerkat</title>
|
||||
<description>Search Meerkat's RSS Database...</description>
|
||||
<name>s</name>
|
||||
<link>http://meerkat.oreillynet.com/</link>
|
||||
<ti:function>search</ti:function>
|
||||
<ti:inputType>regex</ti:inputType>
|
||||
</textinput>
|
||||
|
||||
</rdf:RDF>`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if feed.Title != "Meerkat" {
|
||||
t.Errorf("Incorrect title, got: %s", feed.Title)
|
||||
}
|
||||
|
||||
if feed.FeedURL != "" {
|
||||
t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
|
||||
}
|
||||
|
||||
if feed.SiteURL != "http://meerkat.oreillynet.com" {
|
||||
t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
|
||||
}
|
||||
|
||||
if len(feed.Entries) != 1 {
|
||||
t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
|
||||
}
|
||||
|
||||
if feed.Entries[0].Hash != "fa4ef7c300b175ca66f92f226b5dba5caa2a9619f031101bf56e5b884b02cd97" {
|
||||
t.Errorf("Incorrect entry hash, got: %s", feed.Entries[0].Hash)
|
||||
}
|
||||
|
||||
if feed.Entries[0].URL != "http://c.moreover.com/click/here.pl?r123" {
|
||||
t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Title != "XML: A Disruptive Technology" {
|
||||
t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
|
||||
}
|
||||
|
||||
if strings.HasSuffix(feed.Entries[0].Content, "XML is placing increasingly") {
|
||||
t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Author != "Simon St.Laurent (mailto:simonstl@simonstl.com)" {
|
||||
t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseItemWithOnlyFeedAuthor(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
|
||||
<rdf:RDF
|
||||
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||
xmlns="http://purl.org/rss/1.0/"
|
||||
>
|
||||
|
||||
<channel rdf:about="http://meerkat.oreillynet.com/?_fl=rss1.0">
|
||||
<title>Meerkat</title>
|
||||
<link>http://meerkat.oreillynet.com</link>
|
||||
<dc:creator>Rael Dornfest (mailto:rael@oreilly.com)</dc:creator>
|
||||
</channel>
|
||||
|
||||
<item rdf:about="http://c.moreover.com/click/here.pl?r123">
|
||||
<title>XML: A Disruptive Technology</title>
|
||||
<link>http://c.moreover.com/click/here.pl?r123</link>
|
||||
<dc:description>
|
||||
XML is placing increasingly heavy loads on the existing technical
|
||||
infrastructure of the Internet.
|
||||
</dc:description>
|
||||
</item>
|
||||
</rdf:RDF>`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Author != "Rael Dornfest (mailto:rael@oreilly.com)" {
|
||||
t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseItemWithoutLink(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
|
||||
<rdf:RDF
|
||||
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||
xmlns="http://purl.org/rss/1.0/"
|
||||
>
|
||||
|
||||
<channel rdf:about="http://meerkat.oreillynet.com/?_fl=rss1.0">
|
||||
<title>Meerkat</title>
|
||||
<link>http://meerkat.oreillynet.com</link>
|
||||
</channel>
|
||||
|
||||
<item rdf:about="http://c.moreover.com/click/here.pl?r123">
|
||||
<title>Title</title>
|
||||
<description>Test</description>
|
||||
</item>
|
||||
</rdf:RDF>`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Hash != "37f5223ebd58639aa62a49afbb61df960efb7dc5db5181dfb3cedd9a49ad34c6" {
|
||||
t.Errorf("Incorrect entry hash, got: %s", feed.Entries[0].Hash)
|
||||
}
|
||||
|
||||
if feed.Entries[0].URL != "http://meerkat.oreillynet.com" {
|
||||
t.Errorf("Incorrect entry url, got: %s", feed.Entries[0].URL)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseInvalidXml(t *testing.T) {
|
||||
data := `garbage`
|
||||
_, err := Parse(bytes.NewBufferString(data))
|
||||
if err == nil {
|
||||
t.Error("Parse should returns an error")
|
||||
}
|
||||
|
||||
if _, ok := err.(errors.LocalizedError); !ok {
|
||||
t.Error("The error returned must be a LocalizedError")
|
||||
}
|
||||
}
|
|
@ -1,71 +0,0 @@
|
|||
// Copyright 2017 Frédéric Guillot. All rights reserved.
|
||||
// Use of this source code is governed by the Apache 2.0
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package rdf
|
||||
|
||||
import (
|
||||
"encoding/xml"
|
||||
|
||||
"github.com/miniflux/miniflux2/helper"
|
||||
"github.com/miniflux/miniflux2/reader/processor"
|
||||
"github.com/miniflux/miniflux2/reader/sanitizer"
|
||||
|
||||
"github.com/miniflux/miniflux2/model"
|
||||
)
|
||||
|
||||
type rdfFeed struct {
|
||||
XMLName xml.Name `xml:"RDF"`
|
||||
Title string `xml:"channel>title"`
|
||||
Link string `xml:"channel>link"`
|
||||
Creator string `xml:"channel>creator"`
|
||||
Items []rdfItem `xml:"item"`
|
||||
}
|
||||
|
||||
func (r *rdfFeed) Transform() *model.Feed {
|
||||
feed := new(model.Feed)
|
||||
feed.Title = sanitizer.StripTags(r.Title)
|
||||
feed.SiteURL = r.Link
|
||||
|
||||
for _, item := range r.Items {
|
||||
entry := item.Transform()
|
||||
|
||||
if entry.Author == "" && r.Creator != "" {
|
||||
entry.Author = sanitizer.StripTags(r.Creator)
|
||||
}
|
||||
|
||||
if entry.URL == "" {
|
||||
entry.URL = feed.SiteURL
|
||||
}
|
||||
|
||||
feed.Entries = append(feed.Entries, entry)
|
||||
}
|
||||
|
||||
return feed
|
||||
}
|
||||
|
||||
type rdfItem struct {
|
||||
Title string `xml:"title"`
|
||||
Link string `xml:"link"`
|
||||
Description string `xml:"description"`
|
||||
Creator string `xml:"creator"`
|
||||
}
|
||||
|
||||
func (r *rdfItem) Transform() *model.Entry {
|
||||
entry := new(model.Entry)
|
||||
entry.Title = sanitizer.StripTags(r.Title)
|
||||
entry.Author = sanitizer.StripTags(r.Creator)
|
||||
entry.URL = r.Link
|
||||
entry.Content = processor.ItemContentProcessor(entry.URL, r.Description)
|
||||
entry.Hash = getHash(r)
|
||||
return entry
|
||||
}
|
||||
|
||||
func getHash(r *rdfItem) string {
|
||||
value := r.Link
|
||||
if value == "" {
|
||||
value = r.Title + r.Description
|
||||
}
|
||||
|
||||
return helper.Hash(value)
|
||||
}
|
|
@ -1,29 +0,0 @@
|
|||
// Copyright 2017 Frédéric Guillot. All rights reserved.
|
||||
// Use of this source code is governed by the Apache 2.0
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package rss
|
||||
|
||||
import (
|
||||
"encoding/xml"
|
||||
"io"
|
||||
|
||||
"github.com/miniflux/miniflux2/errors"
|
||||
"github.com/miniflux/miniflux2/model"
|
||||
|
||||
"golang.org/x/net/html/charset"
|
||||
)
|
||||
|
||||
// Parse returns a normalized feed struct from a RSS feed.
|
||||
func Parse(data io.Reader) (*model.Feed, error) {
|
||||
feed := new(rssFeed)
|
||||
decoder := xml.NewDecoder(data)
|
||||
decoder.CharsetReader = charset.NewReaderLabel
|
||||
|
||||
err := decoder.Decode(feed)
|
||||
if err != nil {
|
||||
return nil, errors.NewLocalizedError("Unable to parse RSS feed: %v.", err)
|
||||
}
|
||||
|
||||
return feed.Transform(), nil
|
||||
}
|
|
@ -1,550 +0,0 @@
|
|||
// Copyright 2017 Frédéric Guillot. All rights reserved.
|
||||
// Use of this source code is governed by the Apache 2.0
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package rss
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/miniflux/miniflux2/errors"
|
||||
)
|
||||
|
||||
func TestParseRss2Sample(t *testing.T) {
|
||||
data := `
|
||||
<?xml version="1.0"?>
|
||||
<rss version="2.0">
|
||||
<channel>
|
||||
<title>Liftoff News</title>
|
||||
<link>http://liftoff.msfc.nasa.gov/</link>
|
||||
<description>Liftoff to Space Exploration.</description>
|
||||
<language>en-us</language>
|
||||
<pubDate>Tue, 10 Jun 2003 04:00:00 GMT</pubDate>
|
||||
<lastBuildDate>Tue, 10 Jun 2003 09:41:01 GMT</lastBuildDate>
|
||||
<docs>http://blogs.law.harvard.edu/tech/rss</docs>
|
||||
<generator>Weblog Editor 2.0</generator>
|
||||
<managingEditor>editor@example.com</managingEditor>
|
||||
<webMaster>webmaster@example.com</webMaster>
|
||||
<item>
|
||||
<title>Star City</title>
|
||||
<link>http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp</link>
|
||||
<description>How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia's <a href="http://howe.iki.rssi.ru/GCTC/gctc_e.htm">Star City</a>.</description>
|
||||
<pubDate>Tue, 03 Jun 2003 09:39:21 GMT</pubDate>
|
||||
<guid>http://liftoff.msfc.nasa.gov/2003/06/03.html#item573</guid>
|
||||
</item>
|
||||
<item>
|
||||
<description>Sky watchers in Europe, Asia, and parts of Alaska and Canada will experience a <a href="http://science.nasa.gov/headlines/y2003/30may_solareclipse.htm">partial eclipse of the Sun</a> on Saturday, May 31st.</description>
|
||||
<pubDate>Fri, 30 May 2003 11:06:42 GMT</pubDate>
|
||||
<guid>http://liftoff.msfc.nasa.gov/2003/05/30.html#item572</guid>
|
||||
</item>
|
||||
<item>
|
||||
<title>The Engine That Does More</title>
|
||||
<link>http://liftoff.msfc.nasa.gov/news/2003/news-VASIMR.asp</link>
|
||||
<description>Before man travels to Mars, NASA hopes to design new engines that will let us fly through the Solar System more quickly. The proposed VASIMR engine would do that.</description>
|
||||
<pubDate>Tue, 27 May 2003 08:37:32 GMT</pubDate>
|
||||
<guid>http://liftoff.msfc.nasa.gov/2003/05/27.html#item571</guid>
|
||||
</item>
|
||||
<item>
|
||||
<title>Astronauts' Dirty Laundry</title>
|
||||
<link>http://liftoff.msfc.nasa.gov/news/2003/news-laundry.asp</link>
|
||||
<description>Compared to earlier spacecraft, the International Space Station has many luxuries, but laundry facilities are not one of them. Instead, astronauts have other options.</description>
|
||||
<pubDate>Tue, 20 May 2003 08:56:02 GMT</pubDate>
|
||||
<guid>http://liftoff.msfc.nasa.gov/2003/05/20.html#item570</guid>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if feed.Title != "Liftoff News" {
|
||||
t.Errorf("Incorrect title, got: %s", feed.Title)
|
||||
}
|
||||
|
||||
if feed.FeedURL != "" {
|
||||
t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
|
||||
}
|
||||
|
||||
if feed.SiteURL != "http://liftoff.msfc.nasa.gov/" {
|
||||
t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
|
||||
}
|
||||
|
||||
if len(feed.Entries) != 4 {
|
||||
t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
|
||||
}
|
||||
|
||||
expectedDate := time.Date(2003, time.June, 3, 9, 39, 21, 0, time.UTC)
|
||||
if !feed.Entries[0].Date.Equal(expectedDate) {
|
||||
t.Errorf("Incorrect entry date, got: %v, want: %v", feed.Entries[0].Date, expectedDate)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Hash != "5b2b4ac2fe1786ddf0fd2da2f1b07f64e691264f41f2db3ea360f31bb6d9152b" {
|
||||
t.Errorf("Incorrect entry hash, got: %s", feed.Entries[0].Hash)
|
||||
}
|
||||
|
||||
if feed.Entries[0].URL != "http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp" {
|
||||
t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Title != "Star City" {
|
||||
t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Content != `How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia's <a href="http://howe.iki.rssi.ru/GCTC/gctc_e.htm" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">Star City</a>.` {
|
||||
t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseFeedWithoutTitle(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<rss version="2.0">
|
||||
<channel>
|
||||
<link>https://example.org/</link>
|
||||
</channel>
|
||||
</rss>`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if feed.Title != "https://example.org/" {
|
||||
t.Errorf("Incorrect feed title, got: %s", feed.Title)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseEntryWithoutTitle(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<rss version="2.0">
|
||||
<channel>
|
||||
<link>https://example.org/</link>
|
||||
<item>
|
||||
<link>https://example.org/item</link>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Title != "https://example.org/item" {
|
||||
t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseEntryWithoutLink(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<rss version="2.0">
|
||||
<channel>
|
||||
<link>https://example.org/</link>
|
||||
<item>
|
||||
<guid isPermaLink="false">1234</guid>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if feed.Entries[0].URL != "https://example.org/" {
|
||||
t.Errorf("Incorrect entry link, got: %s", feed.Entries[0].URL)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Hash != "03ac674216f3e15c761ee1a5e255f067953623c8b388b4459e13f978d7c846f4" {
|
||||
t.Errorf("Incorrect entry hash, got: %s", feed.Entries[0].Hash)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseEntryWithAtomLink(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
|
||||
<channel>
|
||||
<link>https://example.org/</link>
|
||||
<item>
|
||||
<title>Test</title>
|
||||
<atom:link href="https://example.org/item" />
|
||||
</item>
|
||||
</channel>
|
||||
</rss>`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if feed.Entries[0].URL != "https://example.org/item" {
|
||||
t.Errorf("Incorrect entry link, got: %s", feed.Entries[0].URL)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseEntryWithMultipleAtomLinks(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
|
||||
<channel>
|
||||
<link>https://example.org/</link>
|
||||
<item>
|
||||
<title>Test</title>
|
||||
<atom:link rel="payment" href="https://example.org/a" />
|
||||
<atom:link rel="http://foobar.tld" href="https://example.org/b" />
|
||||
</item>
|
||||
</channel>
|
||||
</rss>`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if feed.Entries[0].URL != "https://example.org/b" {
|
||||
t.Errorf("Incorrect entry link, got: %s", feed.Entries[0].URL)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseFeedURLWithAtomLink(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<rss xmlns:atom="http://www.w3.org/2005/Atom" version="2.0">
|
||||
<channel>
|
||||
<title>Example</title>
|
||||
<link>https://example.org/</link>
|
||||
<atom:link href="https://example.org/rss" type="application/rss+xml" rel="self"></atom:link>
|
||||
</channel>
|
||||
</rss>`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if feed.FeedURL != "https://example.org/rss" {
|
||||
t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
|
||||
}
|
||||
|
||||
if feed.SiteURL != "https://example.org/" {
|
||||
t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseEntryWithAtomAuthor(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<rss xmlns:atom="http://www.w3.org/2005/Atom" version="2.0">
|
||||
<channel>
|
||||
<title>Example</title>
|
||||
<link>https://example.org/</link>
|
||||
<atom:link href="https://example.org/rss" type="application/rss+xml" rel="self"></atom:link>
|
||||
<item>
|
||||
<title>Test</title>
|
||||
<link>https://example.org/item</link>
|
||||
<author xmlns:author="http://www.w3.org/2005/Atom">
|
||||
<name>Foo Bar</name>
|
||||
<title>Vice President</title>
|
||||
<department/>
|
||||
<company>FooBar Inc.</company>
|
||||
</author>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Author != "Foo Bar" {
|
||||
t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseEntryWithDublinCoreAuthor(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/">
|
||||
<channel>
|
||||
<title>Example</title>
|
||||
<link>https://example.org/</link>
|
||||
<item>
|
||||
<title>Test</title>
|
||||
<link>https://example.org/item</link>
|
||||
<dc:creator>Me (me@example.com)</dc:creator>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Author != "Me (me@example.com)" {
|
||||
t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseEntryWithItunesAuthor(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<rss version="2.0" xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd">
|
||||
<channel>
|
||||
<title>Example</title>
|
||||
<link>https://example.org/</link>
|
||||
<item>
|
||||
<title>Test</title>
|
||||
<link>https://example.org/item</link>
|
||||
<itunes:author>Someone</itunes:author>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Author != "Someone" {
|
||||
t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseFeedWithItunesAuthor(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<rss version="2.0" xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd">
|
||||
<channel>
|
||||
<title>Example</title>
|
||||
<link>https://example.org/</link>
|
||||
<itunes:author>Someone</itunes:author>
|
||||
<item>
|
||||
<title>Test</title>
|
||||
<link>https://example.org/item</link>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Author != "Someone" {
|
||||
t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseEntryWithDublinCoreDate(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/">
|
||||
<channel>
|
||||
<title>Example</title>
|
||||
<link>http://example.org/</link>
|
||||
<item>
|
||||
<title>Item 1</title>
|
||||
<link>http://example.org/item1</link>
|
||||
<description>Description.</description>
|
||||
<guid isPermaLink="false">UUID</guid>
|
||||
<dc:date>2002-09-29T23:40:06-05:00</dc:date>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
location, _ := time.LoadLocation("EST")
|
||||
expectedDate := time.Date(2002, time.September, 29, 23, 40, 06, 0, location)
|
||||
if !feed.Entries[0].Date.Equal(expectedDate) {
|
||||
t.Errorf("Incorrect entry date, got: %v, want: %v", feed.Entries[0].Date, expectedDate)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseEntryWithContentEncoded(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<rss version="2.0" xmlns:content="http://purl.org/rss/1.0/modules/content/">
|
||||
<channel>
|
||||
<title>Example</title>
|
||||
<link>http://example.org/</link>
|
||||
<item>
|
||||
<title>Item 1</title>
|
||||
<link>http://example.org/item1</link>
|
||||
<description>Description.</description>
|
||||
<guid isPermaLink="false">UUID</guid>
|
||||
<content:encoded><![CDATA[<p><a href="http://www.example.org/">Example</a>.</p>]]></content:encoded>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Content != `<p><a href="http://www.example.org/" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">Example</a>.</p>` {
|
||||
t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseEntryWithFeedBurnerLink(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<rss version="2.0" xmlns:feedburner="http://rssnamespace.org/feedburner/ext/1.0">
|
||||
<channel>
|
||||
<title>Example</title>
|
||||
<link>http://example.org/</link>
|
||||
<item>
|
||||
<title>Item 1</title>
|
||||
<link>http://example.org/item1</link>
|
||||
<feedburner:origLink>http://example.org/original</feedburner:origLink>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if feed.Entries[0].URL != "http://example.org/original" {
|
||||
t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].URL)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseEntryTitleWithWhitespaces(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<rss version="2.0">
|
||||
<channel>
|
||||
<title>Example</title>
|
||||
<link>http://example.org</link>
|
||||
<item>
|
||||
<title>
|
||||
Some Title
|
||||
</title>
|
||||
<link>http://www.example.org/entries/1</link>
|
||||
<pubDate>Fri, 15 Jul 2005 00:00:00 -0500</pubDate>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Title != "Some Title" {
|
||||
t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseEntryWithEnclosures(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<rss version="2.0">
|
||||
<channel>
|
||||
<title>My Podcast Feed</title>
|
||||
<link>http://example.org</link>
|
||||
<author>some.email@example.org</author>
|
||||
<item>
|
||||
<title>Podcasting with RSS</title>
|
||||
<link>http://www.example.org/entries/1</link>
|
||||
<description>An overview of RSS podcasting</description>
|
||||
<pubDate>Fri, 15 Jul 2005 00:00:00 -0500</pubDate>
|
||||
<guid isPermaLink="true">http://www.example.org/entries/1</guid>
|
||||
<enclosure url="http://www.example.org/myaudiofile.mp3"
|
||||
length="12345"
|
||||
type="audio/mpeg" />
|
||||
</item>
|
||||
</channel>
|
||||
</rss>`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if len(feed.Entries) != 1 {
|
||||
t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
|
||||
}
|
||||
|
||||
if feed.Entries[0].URL != "http://www.example.org/entries/1" {
|
||||
t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
|
||||
}
|
||||
|
||||
if len(feed.Entries[0].Enclosures) != 1 {
|
||||
t.Errorf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures))
|
||||
}
|
||||
|
||||
if feed.Entries[0].Enclosures[0].URL != "http://www.example.org/myaudiofile.mp3" {
|
||||
t.Errorf("Incorrect enclosure URL, got: %s", feed.Entries[0].Enclosures[0].URL)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Enclosures[0].MimeType != "audio/mpeg" {
|
||||
t.Errorf("Incorrect enclosure type, got: %s", feed.Entries[0].Enclosures[0].MimeType)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Enclosures[0].Size != 12345 {
|
||||
t.Errorf("Incorrect enclosure length, got: %d", feed.Entries[0].Enclosures[0].Size)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseEntryWithFeedBurnerEnclosures(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<rss version="2.0" xmlns:feedburner="http://rssnamespace.org/feedburner/ext/1.0">
|
||||
<channel>
|
||||
<title>My Example Feed</title>
|
||||
<link>http://example.org</link>
|
||||
<author>some.email@example.org</author>
|
||||
<item>
|
||||
<title>Example Item</title>
|
||||
<link>http://www.example.org/entries/1</link>
|
||||
<enclosure
|
||||
url="http://feedproxy.google.com/~r/example/~5/lpMyFSCvubs/File.mp3"
|
||||
length="76192460"
|
||||
type="audio/mpeg" />
|
||||
<feedburner:origEnclosureLink>http://example.org/67ca416c-f22a-4228-a681-68fc9998ec10/File.mp3</feedburner:origEnclosureLink>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if len(feed.Entries) != 1 {
|
||||
t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
|
||||
}
|
||||
|
||||
if feed.Entries[0].URL != "http://www.example.org/entries/1" {
|
||||
t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
|
||||
}
|
||||
|
||||
if len(feed.Entries[0].Enclosures) != 1 {
|
||||
t.Errorf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures))
|
||||
}
|
||||
|
||||
if feed.Entries[0].Enclosures[0].URL != "http://example.org/67ca416c-f22a-4228-a681-68fc9998ec10/File.mp3" {
|
||||
t.Errorf("Incorrect enclosure URL, got: %s", feed.Entries[0].Enclosures[0].URL)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Enclosures[0].MimeType != "audio/mpeg" {
|
||||
t.Errorf("Incorrect enclosure type, got: %s", feed.Entries[0].Enclosures[0].MimeType)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Enclosures[0].Size != 76192460 {
|
||||
t.Errorf("Incorrect enclosure length, got: %d", feed.Entries[0].Enclosures[0].Size)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseInvalidXml(t *testing.T) {
|
||||
data := `garbage`
|
||||
_, err := Parse(bytes.NewBufferString(data))
|
||||
if err == nil {
|
||||
t.Error("Parse should returns an error")
|
||||
}
|
||||
|
||||
if _, ok := err.(errors.LocalizedError); !ok {
|
||||
t.Error("The error returned must be a LocalizedError")
|
||||
}
|
||||
}
|
|
@ -1,235 +0,0 @@
|
|||
// Copyright 2017 Frédéric Guillot. All rights reserved.
|
||||
// Use of this source code is governed by the Apache 2.0
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package rss
|
||||
|
||||
import (
|
||||
"encoding/xml"
|
||||
"log"
|
||||
"path"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/miniflux/miniflux2/helper"
|
||||
"github.com/miniflux/miniflux2/model"
|
||||
"github.com/miniflux/miniflux2/reader/feed/date"
|
||||
"github.com/miniflux/miniflux2/reader/processor"
|
||||
"github.com/miniflux/miniflux2/reader/sanitizer"
|
||||
)
|
||||
|
||||
type rssFeed struct {
|
||||
XMLName xml.Name `xml:"rss"`
|
||||
Version string `xml:"version,attr"`
|
||||
Title string `xml:"channel>title"`
|
||||
Links []rssLink `xml:"channel>link"`
|
||||
Language string `xml:"channel>language"`
|
||||
Description string `xml:"channel>description"`
|
||||
PubDate string `xml:"channel>pubDate"`
|
||||
ItunesAuthor string `xml:"http://www.itunes.com/dtds/podcast-1.0.dtd channel>author"`
|
||||
Items []rssItem `xml:"channel>item"`
|
||||
}
|
||||
|
||||
type rssLink struct {
|
||||
XMLName xml.Name
|
||||
Data string `xml:",chardata"`
|
||||
Href string `xml:"href,attr"`
|
||||
Rel string `xml:"rel,attr"`
|
||||
}
|
||||
|
||||
type rssItem struct {
|
||||
GUID string `xml:"guid"`
|
||||
Title string `xml:"title"`
|
||||
Links []rssLink `xml:"link"`
|
||||
OriginalLink string `xml:"http://rssnamespace.org/feedburner/ext/1.0 origLink"`
|
||||
Description string `xml:"description"`
|
||||
Content string `xml:"http://purl.org/rss/1.0/modules/content/ encoded"`
|
||||
PubDate string `xml:"pubDate"`
|
||||
Date string `xml:"http://purl.org/dc/elements/1.1/ date"`
|
||||
Authors []rssAuthor `xml:"author"`
|
||||
Creator string `xml:"http://purl.org/dc/elements/1.1/ creator"`
|
||||
Enclosures []rssEnclosure `xml:"enclosure"`
|
||||
OrigEnclosureLink string `xml:"http://rssnamespace.org/feedburner/ext/1.0 origEnclosureLink"`
|
||||
}
|
||||
|
||||
type rssAuthor struct {
|
||||
XMLName xml.Name
|
||||
Data string `xml:",chardata"`
|
||||
Name string `xml:"name"`
|
||||
}
|
||||
|
||||
type rssEnclosure struct {
|
||||
URL string `xml:"url,attr"`
|
||||
Type string `xml:"type,attr"`
|
||||
Length string `xml:"length,attr"`
|
||||
}
|
||||
|
||||
func (r *rssFeed) GetSiteURL() string {
|
||||
for _, element := range r.Links {
|
||||
if element.XMLName.Space == "" {
|
||||
return element.Data
|
||||
}
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
func (r *rssFeed) GetFeedURL() string {
|
||||
for _, element := range r.Links {
|
||||
if element.XMLName.Space == "http://www.w3.org/2005/Atom" {
|
||||
return element.Href
|
||||
}
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
func (r *rssFeed) Transform() *model.Feed {
|
||||
feed := new(model.Feed)
|
||||
feed.SiteURL = r.GetSiteURL()
|
||||
feed.FeedURL = r.GetFeedURL()
|
||||
feed.Title = sanitizer.StripTags(r.Title)
|
||||
|
||||
if feed.Title == "" {
|
||||
feed.Title = feed.SiteURL
|
||||
}
|
||||
|
||||
for _, item := range r.Items {
|
||||
entry := item.Transform()
|
||||
|
||||
if entry.Author == "" && r.ItunesAuthor != "" {
|
||||
entry.Author = r.ItunesAuthor
|
||||
}
|
||||
entry.Author = sanitizer.StripTags(entry.Author)
|
||||
|
||||
if entry.URL == "" {
|
||||
entry.URL = feed.SiteURL
|
||||
}
|
||||
|
||||
feed.Entries = append(feed.Entries, entry)
|
||||
}
|
||||
|
||||
return feed
|
||||
}
|
||||
func (r *rssItem) GetDate() time.Time {
|
||||
value := r.PubDate
|
||||
if r.Date != "" {
|
||||
value = r.Date
|
||||
}
|
||||
|
||||
if value != "" {
|
||||
result, err := date.Parse(value)
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
return time.Now()
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
return time.Now()
|
||||
}
|
||||
|
||||
func (r *rssItem) GetAuthor() string {
|
||||
for _, element := range r.Authors {
|
||||
if element.Name != "" {
|
||||
return element.Name
|
||||
}
|
||||
|
||||
if element.Data != "" {
|
||||
return element.Data
|
||||
}
|
||||
}
|
||||
|
||||
return r.Creator
|
||||
}
|
||||
|
||||
func (r *rssItem) GetHash() string {
|
||||
for _, value := range []string{r.GUID, r.GetURL()} {
|
||||
if value != "" {
|
||||
return helper.Hash(value)
|
||||
}
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
func (r *rssItem) GetContent() string {
|
||||
if r.Content != "" {
|
||||
return r.Content
|
||||
}
|
||||
|
||||
return r.Description
|
||||
}
|
||||
|
||||
func (r *rssItem) GetURL() string {
|
||||
if r.OriginalLink != "" {
|
||||
return r.OriginalLink
|
||||
}
|
||||
|
||||
for _, link := range r.Links {
|
||||
if link.XMLName.Space == "http://www.w3.org/2005/Atom" && link.Href != "" && isValidLinkRelation(link.Rel) {
|
||||
return link.Href
|
||||
}
|
||||
|
||||
if link.Data != "" {
|
||||
return link.Data
|
||||
}
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
func (r *rssItem) GetEnclosures() model.EnclosureList {
|
||||
enclosures := make(model.EnclosureList, 0)
|
||||
|
||||
for _, enclosure := range r.Enclosures {
|
||||
length, _ := strconv.Atoi(enclosure.Length)
|
||||
enclosureURL := enclosure.URL
|
||||
|
||||
if r.OrigEnclosureLink != "" {
|
||||
filename := path.Base(r.OrigEnclosureLink)
|
||||
if strings.Contains(enclosureURL, filename) {
|
||||
enclosureURL = r.OrigEnclosureLink
|
||||
}
|
||||
}
|
||||
|
||||
enclosures = append(enclosures, &model.Enclosure{
|
||||
URL: enclosureURL,
|
||||
MimeType: enclosure.Type,
|
||||
Size: length,
|
||||
})
|
||||
}
|
||||
|
||||
return enclosures
|
||||
}
|
||||
|
||||
func (r *rssItem) Transform() *model.Entry {
|
||||
entry := new(model.Entry)
|
||||
entry.URL = r.GetURL()
|
||||
entry.Date = r.GetDate()
|
||||
entry.Author = r.GetAuthor()
|
||||
entry.Hash = r.GetHash()
|
||||
entry.Content = processor.ItemContentProcessor(entry.URL, r.GetContent())
|
||||
entry.Title = sanitizer.StripTags(strings.Trim(r.Title, " \n\t"))
|
||||
entry.Enclosures = r.GetEnclosures()
|
||||
|
||||
if entry.Title == "" {
|
||||
entry.Title = entry.URL
|
||||
}
|
||||
|
||||
return entry
|
||||
}
|
||||
|
||||
func isValidLinkRelation(rel string) bool {
|
||||
switch rel {
|
||||
case "", "alternate", "enclosure", "related", "self", "via":
|
||||
return true
|
||||
default:
|
||||
if strings.HasPrefix(rel, "http") {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue