mirror of
https://github.com/miniflux/v2.git
synced 2025-06-27 16:36:00 +00:00
First commit
This commit is contained in:
commit
8ffb773f43
2121 changed files with 1118910 additions and 0 deletions
214
reader/feed/atom/atom.go
Normal file
214
reader/feed/atom/atom.go
Normal file
|
@ -0,0 +1,214 @@
|
|||
// Copyright 2017 Frédéric Guillot. All rights reserved.
|
||||
// Use of this source code is governed by the Apache 2.0
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package atom
|
||||
|
||||
import (
|
||||
"encoding/xml"
|
||||
"github.com/miniflux/miniflux2/helper"
|
||||
"github.com/miniflux/miniflux2/model"
|
||||
"github.com/miniflux/miniflux2/reader/feed/date"
|
||||
"github.com/miniflux/miniflux2/reader/processor"
|
||||
"github.com/miniflux/miniflux2/reader/sanitizer"
|
||||
"log"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
type AtomFeed struct {
|
||||
XMLName xml.Name `xml:"http://www.w3.org/2005/Atom feed"`
|
||||
ID string `xml:"id"`
|
||||
Title string `xml:"title"`
|
||||
Author Author `xml:"author"`
|
||||
Links []Link `xml:"link"`
|
||||
Entries []AtomEntry `xml:"entry"`
|
||||
}
|
||||
|
||||
type AtomEntry struct {
|
||||
ID string `xml:"id"`
|
||||
Title string `xml:"title"`
|
||||
Updated string `xml:"updated"`
|
||||
Links []Link `xml:"link"`
|
||||
Summary string `xml:"summary"`
|
||||
Content Content `xml:"content"`
|
||||
MediaGroup MediaGroup `xml:"http://search.yahoo.com/mrss/ group"`
|
||||
Author Author `xml:"author"`
|
||||
}
|
||||
|
||||
type Author struct {
|
||||
Name string `xml:"name"`
|
||||
Email string `xml:"email"`
|
||||
}
|
||||
|
||||
type Link struct {
|
||||
Url string `xml:"href,attr"`
|
||||
Type string `xml:"type,attr"`
|
||||
Rel string `xml:"rel,attr"`
|
||||
Length string `xml:"length,attr"`
|
||||
}
|
||||
|
||||
type Content struct {
|
||||
Type string `xml:"type,attr"`
|
||||
Data string `xml:",chardata"`
|
||||
Xml string `xml:",innerxml"`
|
||||
}
|
||||
|
||||
type MediaGroup struct {
|
||||
Description string `xml:"http://search.yahoo.com/mrss/ description"`
|
||||
}
|
||||
|
||||
func (a *AtomFeed) getSiteURL() string {
|
||||
for _, link := range a.Links {
|
||||
if strings.ToLower(link.Rel) == "alternate" {
|
||||
return link.Url
|
||||
}
|
||||
|
||||
if link.Rel == "" && link.Type == "" {
|
||||
return link.Url
|
||||
}
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
func (a *AtomFeed) getFeedURL() string {
|
||||
for _, link := range a.Links {
|
||||
if strings.ToLower(link.Rel) == "self" {
|
||||
return link.Url
|
||||
}
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
func (a *AtomFeed) Transform() *model.Feed {
|
||||
feed := new(model.Feed)
|
||||
feed.FeedURL = a.getFeedURL()
|
||||
feed.SiteURL = a.getSiteURL()
|
||||
feed.Title = sanitizer.StripTags(a.Title)
|
||||
|
||||
if feed.Title == "" {
|
||||
feed.Title = feed.SiteURL
|
||||
}
|
||||
|
||||
for _, entry := range a.Entries {
|
||||
item := entry.Transform()
|
||||
if item.Author == "" {
|
||||
item.Author = a.GetAuthor()
|
||||
}
|
||||
|
||||
feed.Entries = append(feed.Entries, item)
|
||||
}
|
||||
|
||||
return feed
|
||||
}
|
||||
|
||||
func (a *AtomFeed) GetAuthor() string {
|
||||
return getAuthor(a.Author)
|
||||
}
|
||||
|
||||
func (e *AtomEntry) GetDate() time.Time {
|
||||
if e.Updated != "" {
|
||||
result, err := date.Parse(e.Updated)
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
return time.Now()
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
return time.Now()
|
||||
}
|
||||
|
||||
func (e *AtomEntry) GetURL() string {
|
||||
for _, link := range e.Links {
|
||||
if strings.ToLower(link.Rel) == "alternate" {
|
||||
return link.Url
|
||||
}
|
||||
|
||||
if link.Rel == "" && link.Type == "" {
|
||||
return link.Url
|
||||
}
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
func (e *AtomEntry) GetAuthor() string {
|
||||
return getAuthor(e.Author)
|
||||
}
|
||||
|
||||
func (e *AtomEntry) GetHash() string {
|
||||
for _, value := range []string{e.ID, e.GetURL()} {
|
||||
if value != "" {
|
||||
return helper.Hash(value)
|
||||
}
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
func (e *AtomEntry) GetContent() string {
|
||||
if e.Content.Type == "html" || e.Content.Type == "text" {
|
||||
return e.Content.Data
|
||||
}
|
||||
|
||||
if e.Content.Type == "xhtml" {
|
||||
return e.Content.Xml
|
||||
}
|
||||
|
||||
if e.Summary != "" {
|
||||
return e.Summary
|
||||
}
|
||||
|
||||
if e.MediaGroup.Description != "" {
|
||||
return e.MediaGroup.Description
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
func (e *AtomEntry) GetEnclosures() model.EnclosureList {
|
||||
enclosures := make(model.EnclosureList, 0)
|
||||
|
||||
for _, link := range e.Links {
|
||||
if strings.ToLower(link.Rel) == "enclosure" {
|
||||
length, _ := strconv.Atoi(link.Length)
|
||||
enclosures = append(enclosures, &model.Enclosure{URL: link.Url, MimeType: link.Type, Size: length})
|
||||
}
|
||||
}
|
||||
|
||||
return enclosures
|
||||
}
|
||||
|
||||
func (e *AtomEntry) Transform() *model.Entry {
|
||||
entry := new(model.Entry)
|
||||
entry.URL = e.GetURL()
|
||||
entry.Date = e.GetDate()
|
||||
entry.Author = sanitizer.StripTags(e.GetAuthor())
|
||||
entry.Hash = e.GetHash()
|
||||
entry.Content = processor.ItemContentProcessor(entry.URL, e.GetContent())
|
||||
entry.Title = sanitizer.StripTags(strings.Trim(e.Title, " \n\t"))
|
||||
entry.Enclosures = e.GetEnclosures()
|
||||
|
||||
if entry.Title == "" {
|
||||
entry.Title = entry.URL
|
||||
}
|
||||
|
||||
return entry
|
||||
}
|
||||
|
||||
func getAuthor(author Author) string {
|
||||
if author.Name != "" {
|
||||
return author.Name
|
||||
}
|
||||
|
||||
if author.Email != "" {
|
||||
return author.Email
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
28
reader/feed/atom/parser.go
Normal file
28
reader/feed/atom/parser.go
Normal file
|
@ -0,0 +1,28 @@
|
|||
// Copyright 2017 Frédéric Guillot. All rights reserved.
|
||||
// Use of this source code is governed by the Apache 2.0
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package atom
|
||||
|
||||
import (
|
||||
"encoding/xml"
|
||||
"fmt"
|
||||
"github.com/miniflux/miniflux2/model"
|
||||
"io"
|
||||
|
||||
"golang.org/x/net/html/charset"
|
||||
)
|
||||
|
||||
// Parse returns a normalized feed struct.
|
||||
func Parse(data io.Reader) (*model.Feed, error) {
|
||||
atomFeed := new(AtomFeed)
|
||||
decoder := xml.NewDecoder(data)
|
||||
decoder.CharsetReader = charset.NewReaderLabel
|
||||
|
||||
err := decoder.Decode(atomFeed)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("Unable to parse Atom feed: %v\n", err)
|
||||
}
|
||||
|
||||
return atomFeed.Transform(), nil
|
||||
}
|
319
reader/feed/atom/parser_test.go
Normal file
319
reader/feed/atom/parser_test.go
Normal file
|
@ -0,0 +1,319 @@
|
|||
// Copyright 2017 Frédéric Guillot. All rights reserved.
|
||||
// Use of this source code is governed by the Apache 2.0
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package atom
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestParseAtomSample(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
|
||||
<title>Example Feed</title>
|
||||
<link href="http://example.org/"/>
|
||||
<updated>2003-12-13T18:30:02Z</updated>
|
||||
<author>
|
||||
<name>John Doe</name>
|
||||
</author>
|
||||
<id>urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6</id>
|
||||
|
||||
<entry>
|
||||
<title>Atom-Powered Robots Run Amok</title>
|
||||
<link href="http://example.org/2003/12/13/atom03"/>
|
||||
<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
|
||||
<updated>2003-12-13T18:30:02Z</updated>
|
||||
<summary>Some text.</summary>
|
||||
</entry>
|
||||
|
||||
</feed>`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if feed.Title != "Example Feed" {
|
||||
t.Errorf("Incorrect title, got: %s", feed.Title)
|
||||
}
|
||||
|
||||
if feed.FeedURL != "" {
|
||||
t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
|
||||
}
|
||||
|
||||
if feed.SiteURL != "http://example.org/" {
|
||||
t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
|
||||
}
|
||||
|
||||
if len(feed.Entries) != 1 {
|
||||
t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
|
||||
}
|
||||
|
||||
if !feed.Entries[0].Date.Equal(time.Date(2003, time.December, 13, 18, 30, 2, 0, time.UTC)) {
|
||||
t.Errorf("Incorrect entry date, got: %v", feed.Entries[0].Date)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Hash != "3841e5cf232f5111fc5841e9eba5f4b26d95e7d7124902e0f7272729d65601a6" {
|
||||
t.Errorf("Incorrect entry hash, got: %s", feed.Entries[0].Hash)
|
||||
}
|
||||
|
||||
if feed.Entries[0].URL != "http://example.org/2003/12/13/atom03" {
|
||||
t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Title != "Atom-Powered Robots Run Amok" {
|
||||
t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Content != "Some text." {
|
||||
t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Author != "John Doe" {
|
||||
t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseFeedWithoutTitle(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
<link rel="alternate" type="text/html" href="https://example.org/"/>
|
||||
<link rel="self" type="application/atom+xml" href="https://example.org/feed"/>
|
||||
<updated>2003-12-13T18:30:02Z</updated>
|
||||
</feed>`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if feed.Title != "https://example.org/" {
|
||||
t.Errorf("Incorrect feed title, got: %s", feed.Title)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseEntryWithoutTitle(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
|
||||
<title>Example Feed</title>
|
||||
<link href="http://example.org/"/>
|
||||
<updated>2003-12-13T18:30:02Z</updated>
|
||||
<author>
|
||||
<name>John Doe</name>
|
||||
</author>
|
||||
<id>urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6</id>
|
||||
|
||||
<entry>
|
||||
<link href="http://example.org/2003/12/13/atom03"/>
|
||||
<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
|
||||
<updated>2003-12-13T18:30:02Z</updated>
|
||||
<summary>Some text.</summary>
|
||||
</entry>
|
||||
|
||||
</feed>`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Title != "http://example.org/2003/12/13/atom03" {
|
||||
t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseFeedURL(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
<title>Example Feed</title>
|
||||
<link rel="alternate" type="text/html" href="https://example.org/"/>
|
||||
<link rel="self" type="application/atom+xml" href="https://example.org/feed"/>
|
||||
<updated>2003-12-13T18:30:02Z</updated>
|
||||
</feed>`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if feed.SiteURL != "https://example.org/" {
|
||||
t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
|
||||
}
|
||||
|
||||
if feed.FeedURL != "https://example.org/feed" {
|
||||
t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseEntryTitleWithWhitespaces(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
<title>Example Feed</title>
|
||||
<link href="http://example.org/"/>
|
||||
|
||||
<entry>
|
||||
<title>
|
||||
Some Title
|
||||
</title>
|
||||
<link href="http://example.org/2003/12/13/atom03"/>
|
||||
<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
|
||||
<updated>2003-12-13T18:30:02Z</updated>
|
||||
<summary>Some text.</summary>
|
||||
</entry>
|
||||
|
||||
</feed>`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Title != "Some Title" {
|
||||
t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseEntryWithAuthorName(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
<title>Example Feed</title>
|
||||
<link href="http://example.org/"/>
|
||||
|
||||
<entry>
|
||||
<link href="http://example.org/2003/12/13/atom03"/>
|
||||
<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
|
||||
<updated>2003-12-13T18:30:02Z</updated>
|
||||
<summary>Some text.</summary>
|
||||
<author>
|
||||
<name>Me</name>
|
||||
<email>me@localhost</email>
|
||||
</author>
|
||||
</entry>
|
||||
|
||||
</feed>`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Author != "Me" {
|
||||
t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseEntryWithoutAuthorName(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
<title>Example Feed</title>
|
||||
<link href="http://example.org/"/>
|
||||
|
||||
<entry>
|
||||
<link href="http://example.org/2003/12/13/atom03"/>
|
||||
<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
|
||||
<updated>2003-12-13T18:30:02Z</updated>
|
||||
<summary>Some text.</summary>
|
||||
<author>
|
||||
<name/>
|
||||
<email>me@localhost</email>
|
||||
</author>
|
||||
</entry>
|
||||
|
||||
</feed>`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Author != "me@localhost" {
|
||||
t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseEntryWithEnclosures(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
<id>http://www.example.org/myfeed</id>
|
||||
<title>My Podcast Feed</title>
|
||||
<updated>2005-07-15T12:00:00Z</updated>
|
||||
<author>
|
||||
<name>John Doe</name>
|
||||
</author>
|
||||
<link href="http://example.org" />
|
||||
<link rel="self" href="http://example.org/myfeed" />
|
||||
<entry>
|
||||
<id>http://www.example.org/entries/1</id>
|
||||
<title>Atom 1.0</title>
|
||||
<updated>2005-07-15T12:00:00Z</updated>
|
||||
<link href="http://www.example.org/entries/1" />
|
||||
<summary>An overview of Atom 1.0</summary>
|
||||
<link rel="enclosure"
|
||||
type="audio/mpeg"
|
||||
title="MP3"
|
||||
href="http://www.example.org/myaudiofile.mp3"
|
||||
length="1234" />
|
||||
<link rel="enclosure"
|
||||
type="application/x-bittorrent"
|
||||
title="BitTorrent"
|
||||
href="http://www.example.org/myaudiofile.torrent"
|
||||
length="4567" />
|
||||
<content type="xhtml">
|
||||
<div xmlns="http://www.w3.org/1999/xhtml">
|
||||
<h1>Show Notes</h1>
|
||||
<ul>
|
||||
<li>00:01:00 -- Introduction</li>
|
||||
<li>00:15:00 -- Talking about Atom 1.0</li>
|
||||
<li>00:30:00 -- Wrapping up</li>
|
||||
</ul>
|
||||
</div>
|
||||
</content>
|
||||
</entry>
|
||||
</feed>`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if len(feed.Entries) != 1 {
|
||||
t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
|
||||
}
|
||||
|
||||
if feed.Entries[0].URL != "http://www.example.org/entries/1" {
|
||||
t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
|
||||
}
|
||||
|
||||
if len(feed.Entries[0].Enclosures) != 2 {
|
||||
t.Errorf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures))
|
||||
}
|
||||
|
||||
if feed.Entries[0].Enclosures[0].URL != "http://www.example.org/myaudiofile.mp3" {
|
||||
t.Errorf("Incorrect enclosure URL, got: %s", feed.Entries[0].Enclosures[0].URL)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Enclosures[0].MimeType != "audio/mpeg" {
|
||||
t.Errorf("Incorrect enclosure type, got: %s", feed.Entries[0].Enclosures[0].MimeType)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Enclosures[0].Size != 1234 {
|
||||
t.Errorf("Incorrect enclosure length, got: %d", feed.Entries[0].Enclosures[0].Size)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Enclosures[1].URL != "http://www.example.org/myaudiofile.torrent" {
|
||||
t.Errorf("Incorrect enclosure URL, got: %s", feed.Entries[0].Enclosures[1].URL)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Enclosures[1].MimeType != "application/x-bittorrent" {
|
||||
t.Errorf("Incorrect enclosure type, got: %s", feed.Entries[0].Enclosures[1].MimeType)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Enclosures[1].Size != 4567 {
|
||||
t.Errorf("Incorrect enclosure length, got: %d", feed.Entries[0].Enclosures[1].Size)
|
||||
}
|
||||
}
|
203
reader/feed/date/parser.go
Normal file
203
reader/feed/date/parser.go
Normal file
|
@ -0,0 +1,203 @@
|
|||
// Copyright 2017 Frédéric Guillot. All rights reserved.
|
||||
// Use of this source code is governed by the Apache 2.0
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package date
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
// DateFormats taken from github.com/mjibson/goread
|
||||
var dateFormats = []string{
|
||||
time.RFC822, // RSS
|
||||
time.RFC822Z, // RSS
|
||||
time.RFC3339, // Atom
|
||||
time.UnixDate,
|
||||
time.RubyDate,
|
||||
time.RFC850,
|
||||
time.RFC1123Z,
|
||||
time.RFC1123,
|
||||
time.ANSIC,
|
||||
"Mon, January 2 2006 15:04:05 -0700",
|
||||
"Mon, January 02, 2006, 15:04:05 MST",
|
||||
"Mon, January 02, 2006 15:04:05 MST",
|
||||
"Mon, Jan 2, 2006 15:04 MST",
|
||||
"Mon, Jan 2 2006 15:04 MST",
|
||||
"Mon, Jan 2, 2006 15:04:05 MST",
|
||||
"Mon, Jan 2 2006 15:04:05 -700",
|
||||
"Mon, Jan 2 2006 15:04:05 -0700",
|
||||
"Mon Jan 2 15:04 2006",
|
||||
"Mon Jan 2 15:04:05 2006 MST",
|
||||
"Mon Jan 02, 2006 3:04 pm",
|
||||
"Mon, Jan 02,2006 15:04:05 MST",
|
||||
"Mon Jan 02 2006 15:04:05 -0700",
|
||||
"Monday, January 2, 2006 15:04:05 MST",
|
||||
"Monday, January 2, 2006 03:04 PM",
|
||||
"Monday, January 2, 2006",
|
||||
"Monday, January 02, 2006",
|
||||
"Monday, 2 January 2006 15:04:05 MST",
|
||||
"Monday, 2 January 2006 15:04:05 -0700",
|
||||
"Monday, 2 Jan 2006 15:04:05 MST",
|
||||
"Monday, 2 Jan 2006 15:04:05 -0700",
|
||||
"Monday, 02 January 2006 15:04:05 MST",
|
||||
"Monday, 02 January 2006 15:04:05 -0700",
|
||||
"Monday, 02 January 2006 15:04:05",
|
||||
"Mon, 2 January 2006 15:04 MST",
|
||||
"Mon, 2 January 2006, 15:04 -0700",
|
||||
"Mon, 2 January 2006, 15:04:05 MST",
|
||||
"Mon, 2 January 2006 15:04:05 MST",
|
||||
"Mon, 2 January 2006 15:04:05 -0700",
|
||||
"Mon, 2 January 2006",
|
||||
"Mon, 2 Jan 2006 3:04:05 PM -0700",
|
||||
"Mon, 2 Jan 2006 15:4:5 MST",
|
||||
"Mon, 2 Jan 2006 15:4:5 -0700 GMT",
|
||||
"Mon, 2, Jan 2006 15:4",
|
||||
"Mon, 2 Jan 2006 15:04 MST",
|
||||
"Mon, 2 Jan 2006, 15:04 -0700",
|
||||
"Mon, 2 Jan 2006 15:04 -0700",
|
||||
"Mon, 2 Jan 2006 15:04:05 UT",
|
||||
"Mon, 2 Jan 2006 15:04:05MST",
|
||||
"Mon, 2 Jan 2006 15:04:05 MST",
|
||||
"Mon 2 Jan 2006 15:04:05 MST",
|
||||
"mon,2 Jan 2006 15:04:05 MST",
|
||||
"Mon, 2 Jan 2006 15:04:05 -0700 MST",
|
||||
"Mon, 2 Jan 2006 15:04:05-0700",
|
||||
"Mon, 2 Jan 2006 15:04:05 -0700",
|
||||
"Mon, 2 Jan 2006 15:04:05",
|
||||
"Mon, 2 Jan 2006 15:04",
|
||||
"Mon,2 Jan 2006",
|
||||
"Mon, 2 Jan 2006",
|
||||
"Mon, 2 Jan 15:04:05 MST",
|
||||
"Mon, 2 Jan 06 15:04:05 MST",
|
||||
"Mon, 2 Jan 06 15:04:05 -0700",
|
||||
"Mon, 2006-01-02 15:04",
|
||||
"Mon,02 January 2006 14:04:05 MST",
|
||||
"Mon, 02 January 2006",
|
||||
"Mon, 02 Jan 2006 3:04:05 PM MST",
|
||||
"Mon, 02 Jan 2006 15 -0700",
|
||||
"Mon,02 Jan 2006 15:04 MST",
|
||||
"Mon, 02 Jan 2006 15:04 MST",
|
||||
"Mon, 02 Jan 2006 15:04 -0700",
|
||||
"Mon, 02 Jan 2006 15:04:05 Z",
|
||||
"Mon, 02 Jan 2006 15:04:05 UT",
|
||||
"Mon, 02 Jan 2006 15:04:05 MST-07:00",
|
||||
"Mon, 02 Jan 2006 15:04:05 MST -0700",
|
||||
"Mon, 02 Jan 2006, 15:04:05 MST",
|
||||
"Mon, 02 Jan 2006 15:04:05MST",
|
||||
"Mon, 02 Jan 2006 15:04:05 MST",
|
||||
"Mon , 02 Jan 2006 15:04:05 MST",
|
||||
"Mon, 02 Jan 2006 15:04:05 GMT-0700",
|
||||
"Mon,02 Jan 2006 15:04:05 -0700",
|
||||
"Mon, 02 Jan 2006 15:04:05 -0700",
|
||||
"Mon, 02 Jan 2006 15:04:05 -07:00",
|
||||
"Mon, 02 Jan 2006 15:04:05 --0700",
|
||||
"Mon 02 Jan 2006 15:04:05 -0700",
|
||||
"Mon, 02 Jan 2006 15:04:05 -07",
|
||||
"Mon, 02 Jan 2006 15:04:05 00",
|
||||
"Mon, 02 Jan 2006 15:04:05",
|
||||
"Mon, 02 Jan 2006",
|
||||
"Mon, 02 Jan 06 15:04:05 MST",
|
||||
"January 2, 2006 3:04 PM",
|
||||
"January 2, 2006, 3:04 p.m.",
|
||||
"January 2, 2006 15:04:05 MST",
|
||||
"January 2, 2006 15:04:05",
|
||||
"January 2, 2006 03:04 PM",
|
||||
"January 2, 2006",
|
||||
"January 02, 2006 15:04:05 MST",
|
||||
"January 02, 2006 15:04",
|
||||
"January 02, 2006 03:04 PM",
|
||||
"January 02, 2006",
|
||||
"Jan 2, 2006 3:04:05 PM MST",
|
||||
"Jan 2, 2006 3:04:05 PM",
|
||||
"Jan 2, 2006 15:04:05 MST",
|
||||
"Jan 2, 2006",
|
||||
"Jan 02 2006 03:04:05PM",
|
||||
"Jan 02, 2006",
|
||||
"6/1/2 15:04",
|
||||
"6-1-2 15:04",
|
||||
"2 January 2006 15:04:05 MST",
|
||||
"2 January 2006 15:04:05 -0700",
|
||||
"2 January 2006",
|
||||
"2 Jan 2006 15:04:05 Z",
|
||||
"2 Jan 2006 15:04:05 MST",
|
||||
"2 Jan 2006 15:04:05 -0700",
|
||||
"2 Jan 2006",
|
||||
"2.1.2006 15:04:05",
|
||||
"2/1/2006",
|
||||
"2-1-2006",
|
||||
"2006 January 02",
|
||||
"2006-1-2T15:04:05Z",
|
||||
"2006-1-2 15:04:05",
|
||||
"2006-1-2",
|
||||
"2006-1-02T15:04:05Z",
|
||||
"2006-01-02T15:04Z",
|
||||
"2006-01-02T15:04-07:00",
|
||||
"2006-01-02T15:04:05Z",
|
||||
"2006-01-02T15:04:05-07:00:00",
|
||||
"2006-01-02T15:04:05:-0700",
|
||||
"2006-01-02T15:04:05-0700",
|
||||
"2006-01-02T15:04:05-07:00",
|
||||
"2006-01-02T15:04:05 -0700",
|
||||
"2006-01-02T15:04:05:00",
|
||||
"2006-01-02T15:04:05",
|
||||
"2006-01-02 at 15:04:05",
|
||||
"2006-01-02 15:04:05Z",
|
||||
"2006-01-02 15:04:05 MST",
|
||||
"2006-01-02 15:04:05-0700",
|
||||
"2006-01-02 15:04:05-07:00",
|
||||
"2006-01-02 15:04:05 -0700",
|
||||
"2006-01-02 15:04",
|
||||
"2006-01-02 00:00:00.0 15:04:05.0 -0700",
|
||||
"2006/01/02",
|
||||
"2006-01-02",
|
||||
"15:04 02.01.2006 -0700",
|
||||
"1/2/2006 3:04 PM MST",
|
||||
"1/2/2006 3:04:05 PM MST",
|
||||
"1/2/2006 3:04:05 PM",
|
||||
"1/2/2006 15:04:05 MST",
|
||||
"1/2/2006",
|
||||
"06/1/2 15:04",
|
||||
"06-1-2 15:04",
|
||||
"02 Monday, Jan 2006 15:04",
|
||||
"02 Jan 2006 15:04 MST",
|
||||
"02 Jan 2006 15:04:05 UT",
|
||||
"02 Jan 2006 15:04:05 MST",
|
||||
"02 Jan 2006 15:04:05 -0700",
|
||||
"02 Jan 2006 15:04:05",
|
||||
"02 Jan 2006",
|
||||
"02/01/2006 15:04 MST",
|
||||
"02-01-2006 15:04:05 MST",
|
||||
"02.01.2006 15:04:05",
|
||||
"02/01/2006 15:04:05",
|
||||
"02.01.2006 15:04",
|
||||
"02/01/2006 - 15:04",
|
||||
"02.01.2006 -0700",
|
||||
"02/01/2006",
|
||||
"02-01-2006",
|
||||
"01/02/2006 3:04 PM",
|
||||
"01/02/2006 15:04:05 MST",
|
||||
"01/02/2006 - 15:04",
|
||||
"01/02/2006",
|
||||
"01-02-2006",
|
||||
}
|
||||
|
||||
// Parse parses a given date string using a large
|
||||
// list of commonly found feed date formats.
|
||||
func Parse(ds string) (t time.Time, err error) {
|
||||
d := strings.TrimSpace(ds)
|
||||
if d == "" {
|
||||
return t, fmt.Errorf("Date string is empty")
|
||||
}
|
||||
|
||||
for _, f := range dateFormats {
|
||||
if t, err = time.Parse(f, d); err == nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
err = fmt.Errorf("Failed to parse date: %s", ds)
|
||||
return
|
||||
}
|
152
reader/feed/handler.go
Normal file
152
reader/feed/handler.go
Normal file
|
@ -0,0 +1,152 @@
|
|||
// Copyright 2017 Frédéric Guillot. All rights reserved.
|
||||
// Use of this source code is governed by the Apache 2.0
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package feed
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/miniflux/miniflux2/errors"
|
||||
"github.com/miniflux/miniflux2/helper"
|
||||
"github.com/miniflux/miniflux2/model"
|
||||
"github.com/miniflux/miniflux2/reader/http"
|
||||
"github.com/miniflux/miniflux2/reader/icon"
|
||||
"github.com/miniflux/miniflux2/storage"
|
||||
"log"
|
||||
"time"
|
||||
)
|
||||
|
||||
var (
|
||||
errRequestFailed = "Unable to execute request: %v"
|
||||
errServerFailure = "Unable to fetch feed (statusCode=%d)."
|
||||
errDuplicate = "This feed already exists (%s)."
|
||||
errNotFound = "Feed %d not found"
|
||||
)
|
||||
|
||||
// Handler contains all the logic to create and refresh feeds.
|
||||
type Handler struct {
|
||||
store *storage.Storage
|
||||
}
|
||||
|
||||
// CreateFeed fetch, parse and store a new feed.
|
||||
func (h *Handler) CreateFeed(userID, categoryID int64, url string) (*model.Feed, error) {
|
||||
defer helper.ExecutionTime(time.Now(), fmt.Sprintf("[Handler:CreateFeed] feedUrl=%s", url))
|
||||
|
||||
client := http.NewHttpClient(url)
|
||||
response, err := client.Get()
|
||||
if err != nil {
|
||||
return nil, errors.NewLocalizedError(errRequestFailed, err)
|
||||
}
|
||||
|
||||
if response.HasServerFailure() {
|
||||
return nil, errors.NewLocalizedError(errServerFailure, response.StatusCode)
|
||||
}
|
||||
|
||||
if h.store.FeedURLExists(userID, response.EffectiveURL) {
|
||||
return nil, errors.NewLocalizedError(errDuplicate, response.EffectiveURL)
|
||||
}
|
||||
|
||||
subscription, err := parseFeed(response.Body)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
subscription.Category = &model.Category{ID: categoryID}
|
||||
subscription.EtagHeader = response.ETag
|
||||
subscription.LastModifiedHeader = response.LastModified
|
||||
subscription.FeedURL = response.EffectiveURL
|
||||
subscription.UserID = userID
|
||||
|
||||
err = h.store.CreateFeed(subscription)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
log.Println("[Handler:CreateFeed] Feed saved with ID:", subscription.ID)
|
||||
|
||||
icon, err := icon.FindIcon(subscription.SiteURL)
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
} else if icon == nil {
|
||||
log.Printf("No icon found for feedID=%d\n", subscription.ID)
|
||||
} else {
|
||||
h.store.CreateFeedIcon(subscription, icon)
|
||||
}
|
||||
|
||||
return subscription, nil
|
||||
}
|
||||
|
||||
// RefreshFeed fetch and update a feed if necessary.
|
||||
func (h *Handler) RefreshFeed(userID, feedID int64) error {
|
||||
defer helper.ExecutionTime(time.Now(), fmt.Sprintf("[Handler:RefreshFeed] feedID=%d", feedID))
|
||||
|
||||
originalFeed, err := h.store.GetFeedById(userID, feedID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if originalFeed == nil {
|
||||
return errors.NewLocalizedError(errNotFound, feedID)
|
||||
}
|
||||
|
||||
client := http.NewHttpClientWithCacheHeaders(originalFeed.FeedURL, originalFeed.EtagHeader, originalFeed.LastModifiedHeader)
|
||||
response, err := client.Get()
|
||||
if err != nil {
|
||||
customErr := errors.NewLocalizedError(errRequestFailed, err)
|
||||
originalFeed.ParsingErrorCount++
|
||||
originalFeed.ParsingErrorMsg = customErr.Error()
|
||||
h.store.UpdateFeed(originalFeed)
|
||||
return customErr
|
||||
}
|
||||
|
||||
originalFeed.CheckedAt = time.Now()
|
||||
|
||||
if response.HasServerFailure() {
|
||||
err := errors.NewLocalizedError(errServerFailure, response.StatusCode)
|
||||
originalFeed.ParsingErrorCount++
|
||||
originalFeed.ParsingErrorMsg = err.Error()
|
||||
h.store.UpdateFeed(originalFeed)
|
||||
return err
|
||||
}
|
||||
|
||||
if response.IsModified(originalFeed.EtagHeader, originalFeed.LastModifiedHeader) {
|
||||
log.Printf("[Handler:RefreshFeed] Feed #%d has been modified\n", feedID)
|
||||
|
||||
subscription, err := parseFeed(response.Body)
|
||||
if err != nil {
|
||||
originalFeed.ParsingErrorCount++
|
||||
originalFeed.ParsingErrorMsg = err.Error()
|
||||
h.store.UpdateFeed(originalFeed)
|
||||
return err
|
||||
}
|
||||
|
||||
originalFeed.EtagHeader = response.ETag
|
||||
originalFeed.LastModifiedHeader = response.LastModified
|
||||
|
||||
if err := h.store.UpdateEntries(originalFeed.UserID, originalFeed.ID, subscription.Entries); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if !h.store.HasIcon(originalFeed.ID) {
|
||||
log.Println("[Handler:RefreshFeed] Looking for feed icon")
|
||||
icon, err := icon.FindIcon(originalFeed.SiteURL)
|
||||
if err != nil {
|
||||
log.Println("[Handler:RefreshFeed]", err)
|
||||
} else {
|
||||
h.store.CreateFeedIcon(originalFeed, icon)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
log.Printf("[Handler:RefreshFeed] Feed #%d not modified\n", feedID)
|
||||
}
|
||||
|
||||
originalFeed.ParsingErrorCount = 0
|
||||
originalFeed.ParsingErrorMsg = ""
|
||||
|
||||
return h.store.UpdateFeed(originalFeed)
|
||||
}
|
||||
|
||||
// NewFeedHandler returns a feed handler.
|
||||
func NewFeedHandler(store *storage.Storage) *Handler {
|
||||
return &Handler{store: store}
|
||||
}
|
170
reader/feed/json/json.go
Normal file
170
reader/feed/json/json.go
Normal file
|
@ -0,0 +1,170 @@
|
|||
// Copyright 2017 Frédéric Guillot. All rights reserved.
|
||||
// Use of this source code is governed by the Apache 2.0
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package json
|
||||
|
||||
import (
|
||||
"github.com/miniflux/miniflux2/helper"
|
||||
"github.com/miniflux/miniflux2/model"
|
||||
"github.com/miniflux/miniflux2/reader/feed/date"
|
||||
"github.com/miniflux/miniflux2/reader/processor"
|
||||
"github.com/miniflux/miniflux2/reader/sanitizer"
|
||||
"log"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
type JsonFeed struct {
|
||||
Version string `json:"version"`
|
||||
Title string `json:"title"`
|
||||
SiteURL string `json:"home_page_url"`
|
||||
FeedURL string `json:"feed_url"`
|
||||
Author JsonAuthor `json:"author"`
|
||||
Items []JsonItem `json:"items"`
|
||||
}
|
||||
|
||||
type JsonAuthor struct {
|
||||
Name string `json:"name"`
|
||||
URL string `json:"url"`
|
||||
}
|
||||
|
||||
type JsonItem struct {
|
||||
ID string `json:"id"`
|
||||
URL string `json:"url"`
|
||||
Title string `json:"title"`
|
||||
Summary string `json:"summary"`
|
||||
Text string `json:"content_text"`
|
||||
Html string `json:"content_html"`
|
||||
DatePublished string `json:"date_published"`
|
||||
DateModified string `json:"date_modified"`
|
||||
Author JsonAuthor `json:"author"`
|
||||
Attachments []JsonAttachment `json:"attachments"`
|
||||
}
|
||||
|
||||
type JsonAttachment struct {
|
||||
URL string `json:"url"`
|
||||
MimeType string `json:"mime_type"`
|
||||
Title string `json:"title"`
|
||||
Size int `json:"size_in_bytes"`
|
||||
Duration int `json:"duration_in_seconds"`
|
||||
}
|
||||
|
||||
func (j *JsonFeed) GetAuthor() string {
|
||||
return getAuthor(j.Author)
|
||||
}
|
||||
|
||||
func (j *JsonFeed) Transform() *model.Feed {
|
||||
feed := new(model.Feed)
|
||||
feed.FeedURL = j.FeedURL
|
||||
feed.SiteURL = j.SiteURL
|
||||
feed.Title = sanitizer.StripTags(j.Title)
|
||||
|
||||
if feed.Title == "" {
|
||||
feed.Title = feed.SiteURL
|
||||
}
|
||||
|
||||
for _, item := range j.Items {
|
||||
entry := item.Transform()
|
||||
if entry.Author == "" {
|
||||
entry.Author = j.GetAuthor()
|
||||
}
|
||||
|
||||
feed.Entries = append(feed.Entries, entry)
|
||||
}
|
||||
|
||||
return feed
|
||||
}
|
||||
|
||||
func (j *JsonItem) GetDate() time.Time {
|
||||
for _, value := range []string{j.DatePublished, j.DateModified} {
|
||||
if value != "" {
|
||||
d, err := date.Parse(value)
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
return time.Now()
|
||||
}
|
||||
|
||||
return d
|
||||
}
|
||||
}
|
||||
|
||||
return time.Now()
|
||||
}
|
||||
|
||||
func (j *JsonItem) GetAuthor() string {
|
||||
return getAuthor(j.Author)
|
||||
}
|
||||
|
||||
func (j *JsonItem) GetHash() string {
|
||||
for _, value := range []string{j.ID, j.URL, j.Text + j.Html + j.Summary} {
|
||||
if value != "" {
|
||||
return helper.Hash(value)
|
||||
}
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
func (j *JsonItem) GetTitle() string {
|
||||
for _, value := range []string{j.Title, j.Summary, j.Text, j.Html} {
|
||||
if value != "" {
|
||||
return truncate(value)
|
||||
}
|
||||
}
|
||||
|
||||
return j.URL
|
||||
}
|
||||
|
||||
func (j *JsonItem) GetContent() string {
|
||||
for _, value := range []string{j.Html, j.Text, j.Summary} {
|
||||
if value != "" {
|
||||
return value
|
||||
}
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
func (j *JsonItem) GetEnclosures() model.EnclosureList {
|
||||
enclosures := make(model.EnclosureList, 0)
|
||||
|
||||
for _, attachment := range j.Attachments {
|
||||
enclosures = append(enclosures, &model.Enclosure{
|
||||
URL: attachment.URL,
|
||||
MimeType: attachment.MimeType,
|
||||
Size: attachment.Size,
|
||||
})
|
||||
}
|
||||
|
||||
return enclosures
|
||||
}
|
||||
|
||||
func (j *JsonItem) Transform() *model.Entry {
|
||||
entry := new(model.Entry)
|
||||
entry.URL = j.URL
|
||||
entry.Date = j.GetDate()
|
||||
entry.Author = sanitizer.StripTags(j.GetAuthor())
|
||||
entry.Hash = j.GetHash()
|
||||
entry.Content = processor.ItemContentProcessor(entry.URL, j.GetContent())
|
||||
entry.Title = sanitizer.StripTags(strings.Trim(j.GetTitle(), " \n\t"))
|
||||
entry.Enclosures = j.GetEnclosures()
|
||||
return entry
|
||||
}
|
||||
|
||||
func getAuthor(author JsonAuthor) string {
|
||||
if author.Name != "" {
|
||||
return author.Name
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
func truncate(str string) string {
|
||||
max := 100
|
||||
if len(str) > max {
|
||||
return str[:max] + "..."
|
||||
}
|
||||
|
||||
return str
|
||||
}
|
23
reader/feed/json/parser.go
Normal file
23
reader/feed/json/parser.go
Normal file
|
@ -0,0 +1,23 @@
|
|||
// Copyright 2017 Frédéric Guillot. All rights reserved.
|
||||
// Use of this source code is governed by the Apache 2.0
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package json
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"github.com/miniflux/miniflux2/model"
|
||||
"io"
|
||||
)
|
||||
|
||||
// Parse returns a normalized feed struct.
|
||||
func Parse(data io.Reader) (*model.Feed, error) {
|
||||
jsonFeed := new(JsonFeed)
|
||||
decoder := json.NewDecoder(data)
|
||||
if err := decoder.Decode(&jsonFeed); err != nil {
|
||||
return nil, fmt.Errorf("Unable to parse JSON Feed: %v", err)
|
||||
}
|
||||
|
||||
return jsonFeed.Transform(), nil
|
||||
}
|
345
reader/feed/json/parser_test.go
Normal file
345
reader/feed/json/parser_test.go
Normal file
|
@ -0,0 +1,345 @@
|
|||
// Copyright 2017 Frédéric Guillot. All rights reserved.
|
||||
// Use of this source code is governed by the Apache 2.0
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package json
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestParseJsonFeed(t *testing.T) {
|
||||
data := `{
|
||||
"version": "https://jsonfeed.org/version/1",
|
||||
"title": "My Example Feed",
|
||||
"home_page_url": "https://example.org/",
|
||||
"feed_url": "https://example.org/feed.json",
|
||||
"items": [
|
||||
{
|
||||
"id": "2",
|
||||
"content_text": "This is a second item.",
|
||||
"url": "https://example.org/second-item"
|
||||
},
|
||||
{
|
||||
"id": "1",
|
||||
"content_html": "<p>Hello, world!</p>",
|
||||
"url": "https://example.org/initial-post"
|
||||
}
|
||||
]
|
||||
}`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if feed.Title != "My Example Feed" {
|
||||
t.Errorf("Incorrect title, got: %s", feed.Title)
|
||||
}
|
||||
|
||||
if feed.FeedURL != "https://example.org/feed.json" {
|
||||
t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
|
||||
}
|
||||
|
||||
if feed.SiteURL != "https://example.org/" {
|
||||
t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
|
||||
}
|
||||
|
||||
if len(feed.Entries) != 2 {
|
||||
t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
|
||||
}
|
||||
|
||||
if feed.Entries[0].Hash != "d4735e3a265e16eee03f59718b9b5d03019c07d8b6c51f90da3a666eec13ab35" {
|
||||
t.Errorf("Incorrect entry hash, got: %s", feed.Entries[0].Hash)
|
||||
}
|
||||
|
||||
if feed.Entries[0].URL != "https://example.org/second-item" {
|
||||
t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Title != "This is a second item." {
|
||||
t.Errorf(`Incorrect entry title, got: "%s"`, feed.Entries[0].Title)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Content != "This is a second item." {
|
||||
t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content)
|
||||
}
|
||||
|
||||
if feed.Entries[1].Hash != "6b86b273ff34fce19d6b804eff5a3f5747ada4eaa22f1d49c01e52ddb7875b4b" {
|
||||
t.Errorf("Incorrect entry hash, got: %s", feed.Entries[1].Hash)
|
||||
}
|
||||
|
||||
if feed.Entries[1].URL != "https://example.org/initial-post" {
|
||||
t.Errorf("Incorrect entry URL, got: %s", feed.Entries[1].URL)
|
||||
}
|
||||
|
||||
if feed.Entries[1].Title != "Hello, world!" {
|
||||
t.Errorf(`Incorrect entry title, got: "%s"`, feed.Entries[1].Title)
|
||||
}
|
||||
|
||||
if feed.Entries[1].Content != "<p>Hello, world!</p>" {
|
||||
t.Errorf("Incorrect entry content, got: %s", feed.Entries[1].Content)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParsePodcast(t *testing.T) {
|
||||
data := `{
|
||||
"version": "https://jsonfeed.org/version/1",
|
||||
"user_comment": "This is a podcast feed. You can add this feed to your podcast client using the following URL: http://therecord.co/feed.json",
|
||||
"title": "The Record",
|
||||
"home_page_url": "http://therecord.co/",
|
||||
"feed_url": "http://therecord.co/feed.json",
|
||||
"items": [
|
||||
{
|
||||
"id": "http://therecord.co/chris-parrish",
|
||||
"title": "Special #1 - Chris Parrish",
|
||||
"url": "http://therecord.co/chris-parrish",
|
||||
"content_text": "Chris has worked at Adobe and as a founder of Rogue Sheep, which won an Apple Design Award for Postage. Chris’s new company is Aged & Distilled with Guy English — which shipped Napkin, a Mac app for visual collaboration. Chris is also the co-host of The Record. He lives on Bainbridge Island, a quick ferry ride from Seattle.",
|
||||
"content_html": "Chris has worked at <a href=\"http://adobe.com/\">Adobe</a> and as a founder of Rogue Sheep, which won an Apple Design Award for Postage. Chris’s new company is Aged & Distilled with Guy English — which shipped <a href=\"http://aged-and-distilled.com/napkin/\">Napkin</a>, a Mac app for visual collaboration. Chris is also the co-host of The Record. He lives on <a href=\"http://www.ci.bainbridge-isl.wa.us/\">Bainbridge Island</a>, a quick ferry ride from Seattle.",
|
||||
"summary": "Brent interviews Chris Parrish, co-host of The Record and one-half of Aged & Distilled.",
|
||||
"date_published": "2014-05-09T14:04:00-07:00",
|
||||
"attachments": [
|
||||
{
|
||||
"url": "http://therecord.co/downloads/The-Record-sp1e1-ChrisParrish.m4a",
|
||||
"mime_type": "audio/x-m4a",
|
||||
"size_in_bytes": 89970236,
|
||||
"duration_in_seconds": 6629
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if feed.Title != "The Record" {
|
||||
t.Errorf("Incorrect title, got: %s", feed.Title)
|
||||
}
|
||||
|
||||
if feed.FeedURL != "http://therecord.co/feed.json" {
|
||||
t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
|
||||
}
|
||||
|
||||
if feed.SiteURL != "http://therecord.co/" {
|
||||
t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
|
||||
}
|
||||
|
||||
if len(feed.Entries) != 1 {
|
||||
t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
|
||||
}
|
||||
|
||||
if feed.Entries[0].Hash != "6b678e57962a1b001e4e873756563cdc08bbd06ca561e764e0baa9a382485797" {
|
||||
t.Errorf("Incorrect entry hash, got: %s", feed.Entries[0].Hash)
|
||||
}
|
||||
|
||||
if feed.Entries[0].URL != "http://therecord.co/chris-parrish" {
|
||||
t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Title != "Special #1 - Chris Parrish" {
|
||||
t.Errorf(`Incorrect entry title, got: "%s"`, feed.Entries[0].Title)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Content != `Chris has worked at <a href="http://adobe.com/" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">Adobe</a> and as a founder of Rogue Sheep, which won an Apple Design Award for Postage. Chris’s new company is Aged & Distilled with Guy English — which shipped <a href="http://aged-and-distilled.com/napkin/" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">Napkin</a>, a Mac app for visual collaboration. Chris is also the co-host of The Record. He lives on <a href="http://www.ci.bainbridge-isl.wa.us/" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">Bainbridge Island</a>, a quick ferry ride from Seattle.` {
|
||||
t.Errorf(`Incorrect entry content, got: "%s"`, feed.Entries[0].Content)
|
||||
}
|
||||
|
||||
location, _ := time.LoadLocation("America/Vancouver")
|
||||
if !feed.Entries[0].Date.Equal(time.Date(2014, time.May, 9, 14, 4, 0, 0, location)) {
|
||||
t.Errorf("Incorrect entry date, got: %v", feed.Entries[0].Date)
|
||||
}
|
||||
|
||||
if len(feed.Entries[0].Enclosures) != 1 {
|
||||
t.Errorf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures))
|
||||
}
|
||||
|
||||
if feed.Entries[0].Enclosures[0].URL != "http://therecord.co/downloads/The-Record-sp1e1-ChrisParrish.m4a" {
|
||||
t.Errorf("Incorrect enclosure URL, got: %s", feed.Entries[0].Enclosures[0].URL)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Enclosures[0].MimeType != "audio/x-m4a" {
|
||||
t.Errorf("Incorrect enclosure type, got: %s", feed.Entries[0].Enclosures[0].MimeType)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Enclosures[0].Size != 89970236 {
|
||||
t.Errorf("Incorrect enclosure length, got: %d", feed.Entries[0].Enclosures[0].Size)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseAuthor(t *testing.T) {
|
||||
data := `{
|
||||
"version": "https://jsonfeed.org/version/1",
|
||||
"user_comment": "This is a microblog feed. You can add this to your feed reader using the following URL: https://example.org/feed.json",
|
||||
"title": "Brent Simmons’s Microblog",
|
||||
"home_page_url": "https://example.org/",
|
||||
"feed_url": "https://example.org/feed.json",
|
||||
"author": {
|
||||
"name": "Brent Simmons",
|
||||
"url": "http://example.org/",
|
||||
"avatar": "https://example.org/avatar.png"
|
||||
},
|
||||
"items": [
|
||||
{
|
||||
"id": "2347259",
|
||||
"url": "https://example.org/2347259",
|
||||
"content_text": "Cats are neat. \n\nhttps://example.org/cats",
|
||||
"date_published": "2016-02-09T14:22:00-07:00"
|
||||
}
|
||||
]
|
||||
}`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if len(feed.Entries) != 1 {
|
||||
t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
|
||||
}
|
||||
|
||||
if feed.Entries[0].Author != "Brent Simmons" {
|
||||
t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseFeedWithoutTitle(t *testing.T) {
|
||||
data := `{
|
||||
"version": "https://jsonfeed.org/version/1",
|
||||
"home_page_url": "https://example.org/",
|
||||
"feed_url": "https://example.org/feed.json",
|
||||
"items": [
|
||||
{
|
||||
"id": "2347259",
|
||||
"url": "https://example.org/2347259",
|
||||
"content_text": "Cats are neat. \n\nhttps://example.org/cats",
|
||||
"date_published": "2016-02-09T14:22:00-07:00"
|
||||
}
|
||||
]
|
||||
}`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if feed.Title != "https://example.org/" {
|
||||
t.Errorf("Incorrect title, got: %s", feed.Title)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseFeedItemWithInvalidDate(t *testing.T) {
|
||||
data := `{
|
||||
"version": "https://jsonfeed.org/version/1",
|
||||
"title": "My Example Feed",
|
||||
"home_page_url": "https://example.org/",
|
||||
"feed_url": "https://example.org/feed.json",
|
||||
"items": [
|
||||
{
|
||||
"id": "2347259",
|
||||
"url": "https://example.org/2347259",
|
||||
"content_text": "Cats are neat. \n\nhttps://example.org/cats",
|
||||
"date_published": "Tomorrow"
|
||||
}
|
||||
]
|
||||
}`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if len(feed.Entries) != 1 {
|
||||
t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
|
||||
}
|
||||
|
||||
if !feed.Entries[0].Date.Before(time.Now()) {
|
||||
t.Errorf("Incorrect entry date, got: %v", feed.Entries[0].Date)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseFeedItemWithoutID(t *testing.T) {
|
||||
data := `{
|
||||
"version": "https://jsonfeed.org/version/1",
|
||||
"title": "My Example Feed",
|
||||
"home_page_url": "https://example.org/",
|
||||
"feed_url": "https://example.org/feed.json",
|
||||
"items": [
|
||||
{
|
||||
"content_text": "Some text."
|
||||
}
|
||||
]
|
||||
}`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if len(feed.Entries) != 1 {
|
||||
t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
|
||||
}
|
||||
|
||||
if feed.Entries[0].Hash != "13b4c5aecd1b6d749afcee968fbf9c80f1ed1bbdbe1aaf25cb34ebd01144bbe9" {
|
||||
t.Errorf("Incorrect entry hash, got: %s", feed.Entries[0].Hash)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseFeedItemWithoutTitle(t *testing.T) {
|
||||
data := `{
|
||||
"version": "https://jsonfeed.org/version/1",
|
||||
"title": "My Example Feed",
|
||||
"home_page_url": "https://example.org/",
|
||||
"feed_url": "https://example.org/feed.json",
|
||||
"items": [
|
||||
{
|
||||
"url": "https://example.org/item"
|
||||
}
|
||||
]
|
||||
}`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if len(feed.Entries) != 1 {
|
||||
t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
|
||||
}
|
||||
|
||||
if feed.Entries[0].Title != "https://example.org/item" {
|
||||
t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseTruncateItemTitle(t *testing.T) {
|
||||
data := `{
|
||||
"version": "https://jsonfeed.org/version/1",
|
||||
"title": "My Example Feed",
|
||||
"home_page_url": "https://example.org/",
|
||||
"feed_url": "https://example.org/feed.json",
|
||||
"items": [
|
||||
{
|
||||
"title": "` + strings.Repeat("a", 200) + `"
|
||||
}
|
||||
]
|
||||
}`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if len(feed.Entries) != 1 {
|
||||
t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
|
||||
}
|
||||
|
||||
if len(feed.Entries[0].Title) != 103 {
|
||||
t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
|
||||
}
|
||||
}
|
82
reader/feed/parser.go
Normal file
82
reader/feed/parser.go
Normal file
|
@ -0,0 +1,82 @@
|
|||
// Copyright 2017 Frédéric Guillot. All rights reserved.
|
||||
// Use of this source code is governed by the Apache 2.0
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package feed
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/xml"
|
||||
"errors"
|
||||
"github.com/miniflux/miniflux2/helper"
|
||||
"github.com/miniflux/miniflux2/model"
|
||||
"github.com/miniflux/miniflux2/reader/feed/atom"
|
||||
"github.com/miniflux/miniflux2/reader/feed/json"
|
||||
"github.com/miniflux/miniflux2/reader/feed/rss"
|
||||
"io"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"golang.org/x/net/html/charset"
|
||||
)
|
||||
|
||||
const (
|
||||
FormatRss = "rss"
|
||||
FormatAtom = "atom"
|
||||
FormatJson = "json"
|
||||
FormatUnknown = "unknown"
|
||||
)
|
||||
|
||||
func DetectFeedFormat(data io.Reader) string {
|
||||
defer helper.ExecutionTime(time.Now(), "[Feed:DetectFeedFormat]")
|
||||
|
||||
var buffer bytes.Buffer
|
||||
tee := io.TeeReader(data, &buffer)
|
||||
|
||||
decoder := xml.NewDecoder(tee)
|
||||
decoder.CharsetReader = charset.NewReaderLabel
|
||||
|
||||
for {
|
||||
token, _ := decoder.Token()
|
||||
if token == nil {
|
||||
break
|
||||
}
|
||||
|
||||
if element, ok := token.(xml.StartElement); ok {
|
||||
switch element.Name.Local {
|
||||
case "rss":
|
||||
return FormatRss
|
||||
case "feed":
|
||||
return FormatAtom
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if strings.HasPrefix(strings.TrimSpace(buffer.String()), "{") {
|
||||
return FormatJson
|
||||
}
|
||||
|
||||
return FormatUnknown
|
||||
}
|
||||
|
||||
func parseFeed(data io.Reader) (*model.Feed, error) {
|
||||
defer helper.ExecutionTime(time.Now(), "[Feed:ParseFeed]")
|
||||
|
||||
var buffer bytes.Buffer
|
||||
io.Copy(&buffer, data)
|
||||
|
||||
reader := bytes.NewReader(buffer.Bytes())
|
||||
format := DetectFeedFormat(reader)
|
||||
reader.Seek(0, io.SeekStart)
|
||||
|
||||
switch format {
|
||||
case FormatAtom:
|
||||
return atom.Parse(reader)
|
||||
case FormatRss:
|
||||
return rss.Parse(reader)
|
||||
case FormatJson:
|
||||
return json.Parse(reader)
|
||||
default:
|
||||
return nil, errors.New("Unsupported feed format")
|
||||
}
|
||||
}
|
169
reader/feed/parser_test.go
Normal file
169
reader/feed/parser_test.go
Normal file
|
@ -0,0 +1,169 @@
|
|||
// Copyright 2017 Frédéric Guillot. All rights reserved.
|
||||
// Use of this source code is governed by the Apache 2.0
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package feed
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestDetectRSS(t *testing.T) {
|
||||
data := `<?xml version="1.0"?><rss version="2.0"><channel></channel></rss>`
|
||||
format := DetectFeedFormat(bytes.NewBufferString(data))
|
||||
|
||||
if format != FormatRss {
|
||||
t.Errorf("Wrong format detected: %s instead of %s", format, FormatRss)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDetectAtom(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?><feed xmlns="http://www.w3.org/2005/Atom"></feed>`
|
||||
format := DetectFeedFormat(bytes.NewBufferString(data))
|
||||
|
||||
if format != FormatAtom {
|
||||
t.Errorf("Wrong format detected: %s instead of %s", format, FormatAtom)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDetectAtomWithISOCharset(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="ISO-8859-15"?><feed xmlns="http://www.w3.org/2005/Atom"></feed>`
|
||||
format := DetectFeedFormat(bytes.NewBufferString(data))
|
||||
|
||||
if format != FormatAtom {
|
||||
t.Errorf("Wrong format detected: %s instead of %s", format, FormatAtom)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDetectJSON(t *testing.T) {
|
||||
data := `
|
||||
{
|
||||
"version" : "https://jsonfeed.org/version/1",
|
||||
"title" : "Example"
|
||||
}
|
||||
`
|
||||
format := DetectFeedFormat(bytes.NewBufferString(data))
|
||||
|
||||
if format != FormatJson {
|
||||
t.Errorf("Wrong format detected: %s instead of %s", format, FormatJson)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDetectUnknown(t *testing.T) {
|
||||
data := `
|
||||
<!DOCTYPE html> <html> </html>
|
||||
`
|
||||
format := DetectFeedFormat(bytes.NewBufferString(data))
|
||||
|
||||
if format != FormatUnknown {
|
||||
t.Errorf("Wrong format detected: %s instead of %s", format, FormatUnknown)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseAtom(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
|
||||
<title>Example Feed</title>
|
||||
<link href="http://example.org/"/>
|
||||
<updated>2003-12-13T18:30:02Z</updated>
|
||||
<author>
|
||||
<name>John Doe</name>
|
||||
</author>
|
||||
<id>urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6</id>
|
||||
|
||||
<entry>
|
||||
<title>Atom-Powered Robots Run Amok</title>
|
||||
<link href="http://example.org/2003/12/13/atom03"/>
|
||||
<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
|
||||
<updated>2003-12-13T18:30:02Z</updated>
|
||||
<summary>Some text.</summary>
|
||||
</entry>
|
||||
|
||||
</feed>`
|
||||
|
||||
feed, err := parseFeed(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if feed.Title != "Example Feed" {
|
||||
t.Errorf("Incorrect title, got: %s", feed.Title)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseRss(t *testing.T) {
|
||||
data := `<?xml version="1.0"?>
|
||||
<rss version="2.0">
|
||||
<channel>
|
||||
<title>Liftoff News</title>
|
||||
<link>http://liftoff.msfc.nasa.gov/</link>
|
||||
<item>
|
||||
<title>Star City</title>
|
||||
<link>http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp</link>
|
||||
<description>How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia's <a href="http://howe.iki.rssi.ru/GCTC/gctc_e.htm">Star City</a>.</description>
|
||||
<pubDate>Tue, 03 Jun 2003 09:39:21 GMT</pubDate>
|
||||
<guid>http://liftoff.msfc.nasa.gov/2003/06/03.html#item573</guid>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>`
|
||||
|
||||
feed, err := parseFeed(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if feed.Title != "Liftoff News" {
|
||||
t.Errorf("Incorrect title, got: %s", feed.Title)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseJson(t *testing.T) {
|
||||
data := `{
|
||||
"version": "https://jsonfeed.org/version/1",
|
||||
"title": "My Example Feed",
|
||||
"home_page_url": "https://example.org/",
|
||||
"feed_url": "https://example.org/feed.json",
|
||||
"items": [
|
||||
{
|
||||
"id": "2",
|
||||
"content_text": "This is a second item.",
|
||||
"url": "https://example.org/second-item"
|
||||
},
|
||||
{
|
||||
"id": "1",
|
||||
"content_html": "<p>Hello, world!</p>",
|
||||
"url": "https://example.org/initial-post"
|
||||
}
|
||||
]
|
||||
}`
|
||||
|
||||
feed, err := parseFeed(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if feed.Title != "My Example Feed" {
|
||||
t.Errorf("Incorrect title, got: %s", feed.Title)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseUnknownFeed(t *testing.T) {
|
||||
data := `
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head>
|
||||
<title>Title of document</title>
|
||||
</head>
|
||||
<body>
|
||||
some content
|
||||
</body>
|
||||
</html>
|
||||
`
|
||||
|
||||
_, err := parseFeed(bytes.NewBufferString(data))
|
||||
if err == nil {
|
||||
t.Error("ParseFeed must returns an error")
|
||||
}
|
||||
}
|
28
reader/feed/rss/parser.go
Normal file
28
reader/feed/rss/parser.go
Normal file
|
@ -0,0 +1,28 @@
|
|||
// Copyright 2017 Frédéric Guillot. All rights reserved.
|
||||
// Use of this source code is governed by the Apache 2.0
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package rss
|
||||
|
||||
import (
|
||||
"encoding/xml"
|
||||
"fmt"
|
||||
"github.com/miniflux/miniflux2/model"
|
||||
"io"
|
||||
|
||||
"golang.org/x/net/html/charset"
|
||||
)
|
||||
|
||||
// Parse returns a normalized feed struct.
|
||||
func Parse(data io.Reader) (*model.Feed, error) {
|
||||
rssFeed := new(RssFeed)
|
||||
decoder := xml.NewDecoder(data)
|
||||
decoder.CharsetReader = charset.NewReaderLabel
|
||||
|
||||
err := decoder.Decode(rssFeed)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("Unable to parse RSS feed: %v", err)
|
||||
}
|
||||
|
||||
return rssFeed.Transform(), nil
|
||||
}
|
466
reader/feed/rss/parser_test.go
Normal file
466
reader/feed/rss/parser_test.go
Normal file
|
@ -0,0 +1,466 @@
|
|||
// Copyright 2017 Frédéric Guillot. All rights reserved.
|
||||
// Use of this source code is governed by the Apache 2.0
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package rss
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestParseRss2Sample(t *testing.T) {
|
||||
data := `
|
||||
<?xml version="1.0"?>
|
||||
<rss version="2.0">
|
||||
<channel>
|
||||
<title>Liftoff News</title>
|
||||
<link>http://liftoff.msfc.nasa.gov/</link>
|
||||
<description>Liftoff to Space Exploration.</description>
|
||||
<language>en-us</language>
|
||||
<pubDate>Tue, 10 Jun 2003 04:00:00 GMT</pubDate>
|
||||
<lastBuildDate>Tue, 10 Jun 2003 09:41:01 GMT</lastBuildDate>
|
||||
<docs>http://blogs.law.harvard.edu/tech/rss</docs>
|
||||
<generator>Weblog Editor 2.0</generator>
|
||||
<managingEditor>editor@example.com</managingEditor>
|
||||
<webMaster>webmaster@example.com</webMaster>
|
||||
<item>
|
||||
<title>Star City</title>
|
||||
<link>http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp</link>
|
||||
<description>How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia's <a href="http://howe.iki.rssi.ru/GCTC/gctc_e.htm">Star City</a>.</description>
|
||||
<pubDate>Tue, 03 Jun 2003 09:39:21 GMT</pubDate>
|
||||
<guid>http://liftoff.msfc.nasa.gov/2003/06/03.html#item573</guid>
|
||||
</item>
|
||||
<item>
|
||||
<description>Sky watchers in Europe, Asia, and parts of Alaska and Canada will experience a <a href="http://science.nasa.gov/headlines/y2003/30may_solareclipse.htm">partial eclipse of the Sun</a> on Saturday, May 31st.</description>
|
||||
<pubDate>Fri, 30 May 2003 11:06:42 GMT</pubDate>
|
||||
<guid>http://liftoff.msfc.nasa.gov/2003/05/30.html#item572</guid>
|
||||
</item>
|
||||
<item>
|
||||
<title>The Engine That Does More</title>
|
||||
<link>http://liftoff.msfc.nasa.gov/news/2003/news-VASIMR.asp</link>
|
||||
<description>Before man travels to Mars, NASA hopes to design new engines that will let us fly through the Solar System more quickly. The proposed VASIMR engine would do that.</description>
|
||||
<pubDate>Tue, 27 May 2003 08:37:32 GMT</pubDate>
|
||||
<guid>http://liftoff.msfc.nasa.gov/2003/05/27.html#item571</guid>
|
||||
</item>
|
||||
<item>
|
||||
<title>Astronauts' Dirty Laundry</title>
|
||||
<link>http://liftoff.msfc.nasa.gov/news/2003/news-laundry.asp</link>
|
||||
<description>Compared to earlier spacecraft, the International Space Station has many luxuries, but laundry facilities are not one of them. Instead, astronauts have other options.</description>
|
||||
<pubDate>Tue, 20 May 2003 08:56:02 GMT</pubDate>
|
||||
<guid>http://liftoff.msfc.nasa.gov/2003/05/20.html#item570</guid>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if feed.Title != "Liftoff News" {
|
||||
t.Errorf("Incorrect title, got: %s", feed.Title)
|
||||
}
|
||||
|
||||
if feed.FeedURL != "" {
|
||||
t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
|
||||
}
|
||||
|
||||
if feed.SiteURL != "http://liftoff.msfc.nasa.gov/" {
|
||||
t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
|
||||
}
|
||||
|
||||
if len(feed.Entries) != 4 {
|
||||
t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
|
||||
}
|
||||
|
||||
expectedDate := time.Date(2003, time.June, 3, 9, 39, 21, 0, time.UTC)
|
||||
if !feed.Entries[0].Date.Equal(expectedDate) {
|
||||
t.Errorf("Incorrect entry date, got: %v, want: %v", feed.Entries[0].Date, expectedDate)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Hash != "5b2b4ac2fe1786ddf0fd2da2f1b07f64e691264f41f2db3ea360f31bb6d9152b" {
|
||||
t.Errorf("Incorrect entry hash, got: %s", feed.Entries[0].Hash)
|
||||
}
|
||||
|
||||
if feed.Entries[0].URL != "http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp" {
|
||||
t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Title != "Star City" {
|
||||
t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Content != `How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia's <a href="http://howe.iki.rssi.ru/GCTC/gctc_e.htm" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">Star City</a>.` {
|
||||
t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseFeedWithoutTitle(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<rss version="2.0">
|
||||
<channel>
|
||||
<link>https://example.org/</link>
|
||||
</channel>
|
||||
</rss>`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if feed.Title != "https://example.org/" {
|
||||
t.Errorf("Incorrect feed title, got: %s", feed.Title)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseEntryWithoutTitle(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<rss version="2.0">
|
||||
<channel>
|
||||
<link>https://example.org/</link>
|
||||
<item>
|
||||
<link>https://example.org/item</link>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Title != "https://example.org/item" {
|
||||
t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseFeedURLWithAtomLink(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<rss xmlns:atom="http://www.w3.org/2005/Atom" version="2.0">
|
||||
<channel>
|
||||
<title>Example</title>
|
||||
<link>https://example.org/</link>
|
||||
<atom:link href="https://example.org/rss" type="application/rss+xml" rel="self"></atom:link>
|
||||
</channel>
|
||||
</rss>`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if feed.FeedURL != "https://example.org/rss" {
|
||||
t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
|
||||
}
|
||||
|
||||
if feed.SiteURL != "https://example.org/" {
|
||||
t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseEntryWithAtomAuthor(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<rss xmlns:atom="http://www.w3.org/2005/Atom" version="2.0">
|
||||
<channel>
|
||||
<title>Example</title>
|
||||
<link>https://example.org/</link>
|
||||
<atom:link href="https://example.org/rss" type="application/rss+xml" rel="self"></atom:link>
|
||||
<item>
|
||||
<title>Test</title>
|
||||
<link>https://example.org/item</link>
|
||||
<author xmlns:author="http://www.w3.org/2005/Atom">
|
||||
<name>Foo Bar</name>
|
||||
<title>Vice President</title>
|
||||
<department/>
|
||||
<company>FooBar Inc.</company>
|
||||
</author>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Author != "Foo Bar" {
|
||||
t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseEntryWithDublinCoreAuthor(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/">
|
||||
<channel>
|
||||
<title>Example</title>
|
||||
<link>https://example.org/</link>
|
||||
<item>
|
||||
<title>Test</title>
|
||||
<link>https://example.org/item</link>
|
||||
<dc:creator>Me (me@example.com)</dc:creator>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Author != "Me (me@example.com)" {
|
||||
t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseEntryWithItunesAuthor(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<rss version="2.0" xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd">
|
||||
<channel>
|
||||
<title>Example</title>
|
||||
<link>https://example.org/</link>
|
||||
<item>
|
||||
<title>Test</title>
|
||||
<link>https://example.org/item</link>
|
||||
<itunes:author>Someone</itunes:author>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Author != "Someone" {
|
||||
t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseFeedWithItunesAuthor(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<rss version="2.0" xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd">
|
||||
<channel>
|
||||
<title>Example</title>
|
||||
<link>https://example.org/</link>
|
||||
<itunes:author>Someone</itunes:author>
|
||||
<item>
|
||||
<title>Test</title>
|
||||
<link>https://example.org/item</link>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Author != "Someone" {
|
||||
t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseEntryWithDublinCoreDate(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/">
|
||||
<channel>
|
||||
<title>Example</title>
|
||||
<link>http://example.org/</link>
|
||||
<item>
|
||||
<title>Item 1</title>
|
||||
<link>http://example.org/item1</link>
|
||||
<description>Description.</description>
|
||||
<guid isPermaLink="false">UUID</guid>
|
||||
<dc:date>2002-09-29T23:40:06-05:00</dc:date>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
location, _ := time.LoadLocation("EST")
|
||||
expectedDate := time.Date(2002, time.September, 29, 23, 40, 06, 0, location)
|
||||
if !feed.Entries[0].Date.Equal(expectedDate) {
|
||||
t.Errorf("Incorrect entry date, got: %v, want: %v", feed.Entries[0].Date, expectedDate)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseEntryWithContentEncoded(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<rss version="2.0" xmlns:content="http://purl.org/rss/1.0/modules/content/">
|
||||
<channel>
|
||||
<title>Example</title>
|
||||
<link>http://example.org/</link>
|
||||
<item>
|
||||
<title>Item 1</title>
|
||||
<link>http://example.org/item1</link>
|
||||
<description>Description.</description>
|
||||
<guid isPermaLink="false">UUID</guid>
|
||||
<content:encoded><![CDATA[<p><a href="http://www.example.org/">Example</a>.</p>]]></content:encoded>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Content != `<p><a href="http://www.example.org/" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">Example</a>.</p>` {
|
||||
t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseEntryWithFeedBurnerLink(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<rss version="2.0" xmlns:feedburner="http://rssnamespace.org/feedburner/ext/1.0">
|
||||
<channel>
|
||||
<title>Example</title>
|
||||
<link>http://example.org/</link>
|
||||
<item>
|
||||
<title>Item 1</title>
|
||||
<link>http://example.org/item1</link>
|
||||
<feedburner:origLink>http://example.org/original</feedburner:origLink>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if feed.Entries[0].URL != "http://example.org/original" {
|
||||
t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].URL)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseEntryTitleWithWhitespaces(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<rss version="2.0">
|
||||
<channel>
|
||||
<title>Example</title>
|
||||
<link>http://example.org</link>
|
||||
<item>
|
||||
<title>
|
||||
Some Title
|
||||
</title>
|
||||
<link>http://www.example.org/entries/1</link>
|
||||
<pubDate>Fri, 15 Jul 2005 00:00:00 -0500</pubDate>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Title != "Some Title" {
|
||||
t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseEntryWithEnclosures(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<rss version="2.0">
|
||||
<channel>
|
||||
<title>My Podcast Feed</title>
|
||||
<link>http://example.org</link>
|
||||
<author>some.email@example.org</author>
|
||||
<item>
|
||||
<title>Podcasting with RSS</title>
|
||||
<link>http://www.example.org/entries/1</link>
|
||||
<description>An overview of RSS podcasting</description>
|
||||
<pubDate>Fri, 15 Jul 2005 00:00:00 -0500</pubDate>
|
||||
<guid isPermaLink="true">http://www.example.org/entries/1</guid>
|
||||
<enclosure url="http://www.example.org/myaudiofile.mp3"
|
||||
length="12345"
|
||||
type="audio/mpeg" />
|
||||
</item>
|
||||
</channel>
|
||||
</rss>`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if len(feed.Entries) != 1 {
|
||||
t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
|
||||
}
|
||||
|
||||
if feed.Entries[0].URL != "http://www.example.org/entries/1" {
|
||||
t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
|
||||
}
|
||||
|
||||
if len(feed.Entries[0].Enclosures) != 1 {
|
||||
t.Errorf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures))
|
||||
}
|
||||
|
||||
if feed.Entries[0].Enclosures[0].URL != "http://www.example.org/myaudiofile.mp3" {
|
||||
t.Errorf("Incorrect enclosure URL, got: %s", feed.Entries[0].Enclosures[0].URL)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Enclosures[0].MimeType != "audio/mpeg" {
|
||||
t.Errorf("Incorrect enclosure type, got: %s", feed.Entries[0].Enclosures[0].MimeType)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Enclosures[0].Size != 12345 {
|
||||
t.Errorf("Incorrect enclosure length, got: %d", feed.Entries[0].Enclosures[0].Size)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseEntryWithFeedBurnerEnclosures(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<rss version="2.0" xmlns:feedburner="http://rssnamespace.org/feedburner/ext/1.0">
|
||||
<channel>
|
||||
<title>My Example Feed</title>
|
||||
<link>http://example.org</link>
|
||||
<author>some.email@example.org</author>
|
||||
<item>
|
||||
<title>Example Item</title>
|
||||
<link>http://www.example.org/entries/1</link>
|
||||
<enclosure
|
||||
url="http://feedproxy.google.com/~r/example/~5/lpMyFSCvubs/File.mp3"
|
||||
length="76192460"
|
||||
type="audio/mpeg" />
|
||||
<feedburner:origEnclosureLink>http://example.org/67ca416c-f22a-4228-a681-68fc9998ec10/File.mp3</feedburner:origEnclosureLink>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>`
|
||||
|
||||
feed, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if len(feed.Entries) != 1 {
|
||||
t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
|
||||
}
|
||||
|
||||
if feed.Entries[0].URL != "http://www.example.org/entries/1" {
|
||||
t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
|
||||
}
|
||||
|
||||
if len(feed.Entries[0].Enclosures) != 1 {
|
||||
t.Errorf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures))
|
||||
}
|
||||
|
||||
if feed.Entries[0].Enclosures[0].URL != "http://example.org/67ca416c-f22a-4228-a681-68fc9998ec10/File.mp3" {
|
||||
t.Errorf("Incorrect enclosure URL, got: %s", feed.Entries[0].Enclosures[0].URL)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Enclosures[0].MimeType != "audio/mpeg" {
|
||||
t.Errorf("Incorrect enclosure type, got: %s", feed.Entries[0].Enclosures[0].MimeType)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Enclosures[0].Size != 76192460 {
|
||||
t.Errorf("Incorrect enclosure length, got: %d", feed.Entries[0].Enclosures[0].Size)
|
||||
}
|
||||
}
|
207
reader/feed/rss/rss.go
Normal file
207
reader/feed/rss/rss.go
Normal file
|
@ -0,0 +1,207 @@
|
|||
// Copyright 2017 Frédéric Guillot. All rights reserved.
|
||||
// Use of this source code is governed by the Apache 2.0
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package rss
|
||||
|
||||
import (
|
||||
"encoding/xml"
|
||||
"github.com/miniflux/miniflux2/helper"
|
||||
"github.com/miniflux/miniflux2/model"
|
||||
"github.com/miniflux/miniflux2/reader/feed/date"
|
||||
"github.com/miniflux/miniflux2/reader/processor"
|
||||
"github.com/miniflux/miniflux2/reader/sanitizer"
|
||||
"log"
|
||||
"path"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
type RssLink struct {
|
||||
XMLName xml.Name
|
||||
Data string `xml:",chardata"`
|
||||
Href string `xml:"href,attr"`
|
||||
}
|
||||
|
||||
type RssFeed struct {
|
||||
XMLName xml.Name `xml:"rss"`
|
||||
Version string `xml:"version,attr"`
|
||||
Title string `xml:"channel>title"`
|
||||
Links []RssLink `xml:"channel>link"`
|
||||
Language string `xml:"channel>language"`
|
||||
Description string `xml:"channel>description"`
|
||||
PubDate string `xml:"channel>pubDate"`
|
||||
ItunesAuthor string `xml:"http://www.itunes.com/dtds/podcast-1.0.dtd channel>author"`
|
||||
Items []RssItem `xml:"channel>item"`
|
||||
}
|
||||
|
||||
type RssItem struct {
|
||||
Guid string `xml:"guid"`
|
||||
Title string `xml:"title"`
|
||||
Link string `xml:"link"`
|
||||
OriginalLink string `xml:"http://rssnamespace.org/feedburner/ext/1.0 origLink"`
|
||||
Description string `xml:"description"`
|
||||
Content string `xml:"http://purl.org/rss/1.0/modules/content/ encoded"`
|
||||
PubDate string `xml:"pubDate"`
|
||||
Date string `xml:"http://purl.org/dc/elements/1.1/ date"`
|
||||
Authors []RssAuthor `xml:"author"`
|
||||
Creator string `xml:"http://purl.org/dc/elements/1.1/ creator"`
|
||||
Enclosures []RssEnclosure `xml:"enclosure"`
|
||||
OrigEnclosureLink string `xml:"http://rssnamespace.org/feedburner/ext/1.0 origEnclosureLink"`
|
||||
}
|
||||
|
||||
type RssAuthor struct {
|
||||
XMLName xml.Name
|
||||
Data string `xml:",chardata"`
|
||||
Name string `xml:"name"`
|
||||
}
|
||||
|
||||
type RssEnclosure struct {
|
||||
Url string `xml:"url,attr"`
|
||||
Type string `xml:"type,attr"`
|
||||
Length string `xml:"length,attr"`
|
||||
}
|
||||
|
||||
func (r *RssFeed) GetSiteURL() string {
|
||||
for _, elem := range r.Links {
|
||||
if elem.XMLName.Space == "" {
|
||||
return elem.Data
|
||||
}
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
func (r *RssFeed) GetFeedURL() string {
|
||||
for _, elem := range r.Links {
|
||||
if elem.XMLName.Space == "http://www.w3.org/2005/Atom" {
|
||||
return elem.Href
|
||||
}
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
func (r *RssFeed) Transform() *model.Feed {
|
||||
feed := new(model.Feed)
|
||||
feed.SiteURL = r.GetSiteURL()
|
||||
feed.FeedURL = r.GetFeedURL()
|
||||
feed.Title = sanitizer.StripTags(r.Title)
|
||||
|
||||
if feed.Title == "" {
|
||||
feed.Title = feed.SiteURL
|
||||
}
|
||||
|
||||
for _, item := range r.Items {
|
||||
entry := item.Transform()
|
||||
|
||||
if entry.Author == "" && r.ItunesAuthor != "" {
|
||||
entry.Author = r.ItunesAuthor
|
||||
}
|
||||
entry.Author = sanitizer.StripTags(entry.Author)
|
||||
|
||||
feed.Entries = append(feed.Entries, entry)
|
||||
}
|
||||
|
||||
return feed
|
||||
}
|
||||
func (i *RssItem) GetDate() time.Time {
|
||||
value := i.PubDate
|
||||
if i.Date != "" {
|
||||
value = i.Date
|
||||
}
|
||||
|
||||
if value != "" {
|
||||
result, err := date.Parse(value)
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
return time.Now()
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
return time.Now()
|
||||
}
|
||||
|
||||
func (i *RssItem) GetAuthor() string {
|
||||
for _, element := range i.Authors {
|
||||
if element.Name != "" {
|
||||
return element.Name
|
||||
}
|
||||
|
||||
if element.Data != "" {
|
||||
return element.Data
|
||||
}
|
||||
}
|
||||
|
||||
return i.Creator
|
||||
}
|
||||
|
||||
func (i *RssItem) GetHash() string {
|
||||
for _, value := range []string{i.Guid, i.Link} {
|
||||
if value != "" {
|
||||
return helper.Hash(value)
|
||||
}
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
func (i *RssItem) GetContent() string {
|
||||
if i.Content != "" {
|
||||
return i.Content
|
||||
}
|
||||
|
||||
return i.Description
|
||||
}
|
||||
|
||||
func (i *RssItem) GetURL() string {
|
||||
if i.OriginalLink != "" {
|
||||
return i.OriginalLink
|
||||
}
|
||||
|
||||
return i.Link
|
||||
}
|
||||
|
||||
func (i *RssItem) GetEnclosures() model.EnclosureList {
|
||||
enclosures := make(model.EnclosureList, 0)
|
||||
|
||||
for _, enclosure := range i.Enclosures {
|
||||
length, _ := strconv.Atoi(enclosure.Length)
|
||||
enclosureURL := enclosure.Url
|
||||
|
||||
if i.OrigEnclosureLink != "" {
|
||||
filename := path.Base(i.OrigEnclosureLink)
|
||||
if strings.Contains(enclosureURL, filename) {
|
||||
enclosureURL = i.OrigEnclosureLink
|
||||
}
|
||||
}
|
||||
|
||||
enclosures = append(enclosures, &model.Enclosure{
|
||||
URL: enclosureURL,
|
||||
MimeType: enclosure.Type,
|
||||
Size: length,
|
||||
})
|
||||
}
|
||||
|
||||
return enclosures
|
||||
}
|
||||
|
||||
func (i *RssItem) Transform() *model.Entry {
|
||||
entry := new(model.Entry)
|
||||
entry.URL = i.GetURL()
|
||||
entry.Date = i.GetDate()
|
||||
entry.Author = i.GetAuthor()
|
||||
entry.Hash = i.GetHash()
|
||||
entry.Content = processor.ItemContentProcessor(entry.URL, i.GetContent())
|
||||
entry.Title = sanitizer.StripTags(strings.Trim(i.Title, " \n\t"))
|
||||
entry.Enclosures = i.GetEnclosures()
|
||||
|
||||
if entry.Title == "" {
|
||||
entry.Title = entry.URL
|
||||
}
|
||||
|
||||
return entry
|
||||
}
|
95
reader/http/client.go
Normal file
95
reader/http/client.go
Normal file
|
@ -0,0 +1,95 @@
|
|||
// Copyright 2017 Frédéric Guillot. All rights reserved.
|
||||
// Use of this source code is governed by the Apache 2.0
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package http
|
||||
|
||||
import (
|
||||
"crypto/tls"
|
||||
"fmt"
|
||||
"github.com/miniflux/miniflux2/helper"
|
||||
"log"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"time"
|
||||
)
|
||||
|
||||
const HTTP_USER_AGENT = "Miniflux <https://miniflux.net/>"
|
||||
|
||||
type HttpClient struct {
|
||||
url string
|
||||
etagHeader string
|
||||
lastModifiedHeader string
|
||||
Insecure bool
|
||||
}
|
||||
|
||||
func (h *HttpClient) Get() (*ServerResponse, error) {
|
||||
defer helper.ExecutionTime(time.Now(), fmt.Sprintf("[HttpClient:Get] url=%s", h.url))
|
||||
u, _ := url.Parse(h.url)
|
||||
|
||||
req := &http.Request{
|
||||
URL: u,
|
||||
Method: "GET",
|
||||
Header: h.buildHeaders(),
|
||||
}
|
||||
|
||||
client := h.buildClient()
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
response := &ServerResponse{
|
||||
Body: resp.Body,
|
||||
StatusCode: resp.StatusCode,
|
||||
EffectiveURL: resp.Request.URL.String(),
|
||||
LastModified: resp.Header.Get("Last-Modified"),
|
||||
ETag: resp.Header.Get("ETag"),
|
||||
ContentType: resp.Header.Get("Content-Type"),
|
||||
}
|
||||
|
||||
log.Println("[HttpClient:Get]",
|
||||
"OriginalURL:", h.url,
|
||||
"StatusCode:", response.StatusCode,
|
||||
"ETag:", response.ETag,
|
||||
"LastModified:", response.LastModified,
|
||||
"EffectiveURL:", response.EffectiveURL,
|
||||
)
|
||||
|
||||
return response, err
|
||||
}
|
||||
|
||||
func (h *HttpClient) buildClient() http.Client {
|
||||
if h.Insecure {
|
||||
transport := &http.Transport{
|
||||
TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
|
||||
}
|
||||
|
||||
return http.Client{Transport: transport}
|
||||
}
|
||||
|
||||
return http.Client{}
|
||||
}
|
||||
|
||||
func (h *HttpClient) buildHeaders() http.Header {
|
||||
headers := make(http.Header)
|
||||
headers.Add("User-Agent", HTTP_USER_AGENT)
|
||||
|
||||
if h.etagHeader != "" {
|
||||
headers.Add("If-None-Match", h.etagHeader)
|
||||
}
|
||||
|
||||
if h.lastModifiedHeader != "" {
|
||||
headers.Add("If-Modified-Since", h.lastModifiedHeader)
|
||||
}
|
||||
|
||||
return headers
|
||||
}
|
||||
|
||||
func NewHttpClient(url string) *HttpClient {
|
||||
return &HttpClient{url: url, Insecure: false}
|
||||
}
|
||||
|
||||
func NewHttpClientWithCacheHeaders(url, etagHeader, lastModifiedHeader string) *HttpClient {
|
||||
return &HttpClient{url: url, etagHeader: etagHeader, lastModifiedHeader: lastModifiedHeader, Insecure: false}
|
||||
}
|
32
reader/http/response.go
Normal file
32
reader/http/response.go
Normal file
|
@ -0,0 +1,32 @@
|
|||
// Copyright 2017 Frédéric Guillot. All rights reserved.
|
||||
// Use of this source code is governed by the Apache 2.0
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package http
|
||||
|
||||
import "io"
|
||||
|
||||
type ServerResponse struct {
|
||||
Body io.Reader
|
||||
StatusCode int
|
||||
EffectiveURL string
|
||||
LastModified string
|
||||
ETag string
|
||||
ContentType string
|
||||
}
|
||||
|
||||
func (s *ServerResponse) HasServerFailure() bool {
|
||||
return s.StatusCode >= 400
|
||||
}
|
||||
|
||||
func (s *ServerResponse) IsModified(etag, lastModified string) bool {
|
||||
if s.StatusCode == 304 {
|
||||
return false
|
||||
}
|
||||
|
||||
if s.ETag != "" && s.LastModified != "" && (s.ETag == etag || s.LastModified == lastModified) {
|
||||
return false
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
109
reader/icon/finder.go
Normal file
109
reader/icon/finder.go
Normal file
|
@ -0,0 +1,109 @@
|
|||
// Copyright 2017 Frédéric Guillot. All rights reserved.
|
||||
// Use of this source code is governed by the Apache 2.0
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package icon
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/miniflux/miniflux2/helper"
|
||||
"github.com/miniflux/miniflux2/model"
|
||||
"github.com/miniflux/miniflux2/reader/http"
|
||||
"github.com/miniflux/miniflux2/reader/url"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
)
|
||||
|
||||
// FindIcon try to find the website's icon.
|
||||
func FindIcon(websiteURL string) (*model.Icon, error) {
|
||||
rootURL := url.GetRootURL(websiteURL)
|
||||
client := http.NewHttpClient(rootURL)
|
||||
response, err := client.Get()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to download website index page: %v", err)
|
||||
}
|
||||
|
||||
if response.HasServerFailure() {
|
||||
return nil, fmt.Errorf("unable to download website index page: status=%d", response.StatusCode)
|
||||
}
|
||||
|
||||
iconURL, err := parseDocument(rootURL, response.Body)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
log.Println("[FindIcon] Fetching icon =>", iconURL)
|
||||
icon, err := downloadIcon(iconURL)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return icon, nil
|
||||
}
|
||||
|
||||
func parseDocument(websiteURL string, data io.Reader) (string, error) {
|
||||
queries := []string{
|
||||
"link[rel='shortcut icon']",
|
||||
"link[rel='Shortcut Icon']",
|
||||
"link[rel='icon shortcut']",
|
||||
"link[rel='icon']",
|
||||
}
|
||||
|
||||
doc, err := goquery.NewDocumentFromReader(data)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("unable to read document: %v", err)
|
||||
}
|
||||
|
||||
var iconURL string
|
||||
for _, query := range queries {
|
||||
doc.Find(query).Each(func(i int, s *goquery.Selection) {
|
||||
if href, exists := s.Attr("href"); exists {
|
||||
iconURL = href
|
||||
}
|
||||
})
|
||||
|
||||
if iconURL != "" {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if iconURL == "" {
|
||||
iconURL = url.GetRootURL(websiteURL) + "favicon.ico"
|
||||
} else {
|
||||
iconURL, _ = url.GetAbsoluteURL(websiteURL, iconURL)
|
||||
}
|
||||
|
||||
return iconURL, nil
|
||||
}
|
||||
|
||||
func downloadIcon(iconURL string) (*model.Icon, error) {
|
||||
client := http.NewHttpClient(iconURL)
|
||||
response, err := client.Get()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to download iconURL: %v", err)
|
||||
}
|
||||
|
||||
if response.HasServerFailure() {
|
||||
return nil, fmt.Errorf("unable to download icon: status=%d", response.StatusCode)
|
||||
}
|
||||
|
||||
body, err := ioutil.ReadAll(response.Body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to read downloaded icon: %v", err)
|
||||
}
|
||||
|
||||
if len(body) == 0 {
|
||||
return nil, fmt.Errorf("downloaded icon is empty, iconURL=%s", iconURL)
|
||||
}
|
||||
|
||||
icon := &model.Icon{
|
||||
Hash: helper.HashFromBytes(body),
|
||||
MimeType: response.ContentType,
|
||||
Content: body,
|
||||
}
|
||||
|
||||
return icon, nil
|
||||
}
|
94
reader/opml/handler.go
Normal file
94
reader/opml/handler.go
Normal file
|
@ -0,0 +1,94 @@
|
|||
// Copyright 2017 Frédéric Guillot. All rights reserved.
|
||||
// Use of this source code is governed by the Apache 2.0
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package opml
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"github.com/miniflux/miniflux2/model"
|
||||
"github.com/miniflux/miniflux2/storage"
|
||||
"io"
|
||||
"log"
|
||||
)
|
||||
|
||||
type OpmlHandler struct {
|
||||
store *storage.Storage
|
||||
}
|
||||
|
||||
func (o *OpmlHandler) Export(userID int64) (string, error) {
|
||||
feeds, err := o.store.GetFeeds(userID)
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
return "", errors.New("Unable to fetch feeds.")
|
||||
}
|
||||
|
||||
var subscriptions SubcriptionList
|
||||
for _, feed := range feeds {
|
||||
subscriptions = append(subscriptions, &Subcription{
|
||||
Title: feed.Title,
|
||||
FeedURL: feed.FeedURL,
|
||||
SiteURL: feed.SiteURL,
|
||||
CategoryName: feed.Category.Title,
|
||||
})
|
||||
}
|
||||
|
||||
return Serialize(subscriptions), nil
|
||||
}
|
||||
|
||||
func (o *OpmlHandler) Import(userID int64, data io.Reader) (err error) {
|
||||
subscriptions, err := Parse(data)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, subscription := range subscriptions {
|
||||
if !o.store.FeedURLExists(userID, subscription.FeedURL) {
|
||||
var category *model.Category
|
||||
|
||||
if subscription.CategoryName == "" {
|
||||
category, err = o.store.GetFirstCategory(userID)
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
return errors.New("Unable to find first category.")
|
||||
}
|
||||
} else {
|
||||
category, err = o.store.GetCategoryByTitle(userID, subscription.CategoryName)
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
return errors.New("Unable to search category by title.")
|
||||
}
|
||||
|
||||
if category == nil {
|
||||
category = &model.Category{
|
||||
UserID: userID,
|
||||
Title: subscription.CategoryName,
|
||||
}
|
||||
|
||||
err := o.store.CreateCategory(category)
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
return fmt.Errorf(`Unable to create this category: "%s".`, subscription.CategoryName)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
feed := &model.Feed{
|
||||
UserID: userID,
|
||||
Title: subscription.Title,
|
||||
FeedURL: subscription.FeedURL,
|
||||
SiteURL: subscription.SiteURL,
|
||||
Category: category,
|
||||
}
|
||||
|
||||
o.store.CreateFeed(feed)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func NewOpmlHandler(store *storage.Storage) *OpmlHandler {
|
||||
return &OpmlHandler{store: store}
|
||||
}
|
82
reader/opml/opml.go
Normal file
82
reader/opml/opml.go
Normal file
|
@ -0,0 +1,82 @@
|
|||
// Copyright 2017 Frédéric Guillot. All rights reserved.
|
||||
// Use of this source code is governed by the Apache 2.0
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package opml
|
||||
|
||||
import "encoding/xml"
|
||||
|
||||
type Opml struct {
|
||||
XMLName xml.Name `xml:"opml"`
|
||||
Version string `xml:"version,attr"`
|
||||
Outlines []Outline `xml:"body>outline"`
|
||||
}
|
||||
|
||||
type Outline struct {
|
||||
Title string `xml:"title,attr,omitempty"`
|
||||
Text string `xml:"text,attr"`
|
||||
FeedURL string `xml:"xmlUrl,attr,omitempty"`
|
||||
SiteURL string `xml:"htmlUrl,attr,omitempty"`
|
||||
Outlines []Outline `xml:"outline,omitempty"`
|
||||
}
|
||||
|
||||
func (o *Outline) GetTitle() string {
|
||||
if o.Title != "" {
|
||||
return o.Title
|
||||
}
|
||||
|
||||
if o.Text != "" {
|
||||
return o.Text
|
||||
}
|
||||
|
||||
if o.SiteURL != "" {
|
||||
return o.SiteURL
|
||||
}
|
||||
|
||||
if o.FeedURL != "" {
|
||||
return o.FeedURL
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
func (o *Outline) GetSiteURL() string {
|
||||
if o.SiteURL != "" {
|
||||
return o.SiteURL
|
||||
}
|
||||
|
||||
return o.FeedURL
|
||||
}
|
||||
|
||||
func (o *Outline) IsCategory() bool {
|
||||
return o.Text != "" && o.SiteURL == "" && o.FeedURL == ""
|
||||
}
|
||||
|
||||
func (o *Outline) Append(subscriptions SubcriptionList, category string) SubcriptionList {
|
||||
if o.FeedURL != "" {
|
||||
subscriptions = append(subscriptions, &Subcription{
|
||||
Title: o.GetTitle(),
|
||||
FeedURL: o.FeedURL,
|
||||
SiteURL: o.GetSiteURL(),
|
||||
CategoryName: category,
|
||||
})
|
||||
}
|
||||
|
||||
return subscriptions
|
||||
}
|
||||
|
||||
func (o *Opml) Transform() SubcriptionList {
|
||||
var subscriptions SubcriptionList
|
||||
|
||||
for _, outline := range o.Outlines {
|
||||
if outline.IsCategory() {
|
||||
for _, element := range outline.Outlines {
|
||||
subscriptions = element.Append(subscriptions, outline.Text)
|
||||
}
|
||||
} else {
|
||||
subscriptions = outline.Append(subscriptions, "")
|
||||
}
|
||||
}
|
||||
|
||||
return subscriptions
|
||||
}
|
26
reader/opml/parser.go
Normal file
26
reader/opml/parser.go
Normal file
|
@ -0,0 +1,26 @@
|
|||
// Copyright 2017 Frédéric Guillot. All rights reserved.
|
||||
// Use of this source code is governed by the Apache 2.0
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package opml
|
||||
|
||||
import (
|
||||
"encoding/xml"
|
||||
"fmt"
|
||||
"io"
|
||||
|
||||
"golang.org/x/net/html/charset"
|
||||
)
|
||||
|
||||
func Parse(data io.Reader) (SubcriptionList, error) {
|
||||
opml := new(Opml)
|
||||
decoder := xml.NewDecoder(data)
|
||||
decoder.CharsetReader = charset.NewReaderLabel
|
||||
|
||||
err := decoder.Decode(opml)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("Unable to parse OPML file: %v\n", err)
|
||||
}
|
||||
|
||||
return opml.Transform(), nil
|
||||
}
|
138
reader/opml/parser_test.go
Normal file
138
reader/opml/parser_test.go
Normal file
|
@ -0,0 +1,138 @@
|
|||
// Copyright 2017 Frédéric Guillot. All rights reserved.
|
||||
// Use of this source code is governed by the Apache 2.0
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package opml
|
||||
|
||||
import "testing"
|
||||
import "bytes"
|
||||
|
||||
func TestParseOpmlWithoutCategories(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="ISO-8859-1"?>
|
||||
<opml version="2.0">
|
||||
<head>
|
||||
<title>mySubscriptions.opml</title>
|
||||
</head>
|
||||
<body>
|
||||
<outline text="CNET News.com" description="Tech news and business reports by CNET News.com. Focused on information technology, core topics include computers, hardware, software, networking, and Internet media." htmlUrl="http://news.com.com/" language="unknown" title="CNET News.com" type="rss" version="RSS2" xmlUrl="http://news.com.com/2547-1_3-0-5.xml"/>
|
||||
<outline text="washingtonpost.com - Politics" description="Politics" htmlUrl="http://www.washingtonpost.com/wp-dyn/politics?nav=rss_politics" language="unknown" title="washingtonpost.com - Politics" type="rss" version="RSS2" xmlUrl="http://www.washingtonpost.com/wp-srv/politics/rssheadlines.xml"/>
|
||||
<outline text="Scobleizer: Microsoft Geek Blogger" description="Robert Scoble's look at geek and Microsoft life." htmlUrl="http://radio.weblogs.com/0001011/" language="unknown" title="Scobleizer: Microsoft Geek Blogger" type="rss" version="RSS2" xmlUrl="http://radio.weblogs.com/0001011/rss.xml"/>
|
||||
<outline text="Yahoo! News: Technology" description="Technology" htmlUrl="http://news.yahoo.com/news?tmpl=index&cid=738" language="unknown" title="Yahoo! News: Technology" type="rss" version="RSS2" xmlUrl="http://rss.news.yahoo.com/rss/tech"/>
|
||||
<outline text="Workbench" description="Programming and publishing news and comment" htmlUrl="http://www.cadenhead.org/workbench/" language="unknown" title="Workbench" type="rss" version="RSS2" xmlUrl="http://www.cadenhead.org/workbench/rss.xml"/>
|
||||
<outline text="Christian Science Monitor | Top Stories" description="Read the front page stories of csmonitor.com." htmlUrl="http://csmonitor.com" language="unknown" title="Christian Science Monitor | Top Stories" type="rss" version="RSS" xmlUrl="http://www.csmonitor.com/rss/top.rss"/>
|
||||
<outline text="Dictionary.com Word of the Day" description="A new word is presented every day with its definition and example sentences from actual published works." htmlUrl="http://dictionary.reference.com/wordoftheday/" language="unknown" title="Dictionary.com Word of the Day" type="rss" version="RSS" xmlUrl="http://www.dictionary.com/wordoftheday/wotd.rss"/>
|
||||
<outline text="The Motley Fool" description="To Educate, Amuse, and Enrich" htmlUrl="http://www.fool.com" language="unknown" title="The Motley Fool" type="rss" version="RSS" xmlUrl="http://www.fool.com/xml/foolnews_rss091.xml"/>
|
||||
<outline text="InfoWorld: Top News" description="The latest on Top News from InfoWorld" htmlUrl="http://www.infoworld.com/news/index.html" language="unknown" title="InfoWorld: Top News" type="rss" version="RSS2" xmlUrl="http://www.infoworld.com/rss/news.xml"/>
|
||||
<outline text="NYT > Business" description="Find breaking news & business news on Wall Street, media & advertising, international business, banking, interest rates, the stock market, currencies & funds." htmlUrl="http://www.nytimes.com/pages/business/index.html?partner=rssnyt" language="unknown" title="NYT > Business" type="rss" version="RSS2" xmlUrl="http://www.nytimes.com/services/xml/rss/nyt/Business.xml"/>
|
||||
<outline text="NYT > Technology" description="" htmlUrl="http://www.nytimes.com/pages/technology/index.html?partner=rssnyt" language="unknown" title="NYT > Technology" type="rss" version="RSS2" xmlUrl="http://www.nytimes.com/services/xml/rss/nyt/Technology.xml"/>
|
||||
<outline text="Scripting News" description="It's even worse than it appears." htmlUrl="http://www.scripting.com/" language="unknown" title="Scripting News" type="rss" version="RSS2" xmlUrl="http://www.scripting.com/rss.xml"/>
|
||||
<outline text="Wired News" description="Technology, and the way we do business, is changing the world we know. Wired News is a technology - and business-oriented news service feeding an intelligent, discerning audience. What role does technology play in the day-to-day living of your life? Wired News tells you. How has evolving technology changed the face of the international business world? Wired News puts you in the picture." htmlUrl="http://www.wired.com/" language="unknown" title="Wired News" type="rss" version="RSS" xmlUrl="http://www.wired.com/news_drop/netcenter/netcenter.rdf"/>
|
||||
</body>
|
||||
</opml>
|
||||
`
|
||||
|
||||
var expected SubcriptionList
|
||||
expected = append(expected, &Subcription{Title: "CNET News.com", FeedURL: "http://news.com.com/2547-1_3-0-5.xml", SiteURL: "http://news.com.com/"})
|
||||
|
||||
subscriptions, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if len(subscriptions) != 13 {
|
||||
t.Errorf("Wrong number of subscriptions: %d instead of %d", len(subscriptions), 13)
|
||||
}
|
||||
|
||||
if !subscriptions[0].Equals(expected[0]) {
|
||||
t.Errorf(`Subscription are different: "%v" vs "%v"`, subscriptions[0], expected[0])
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseOpmlWithCategories(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<opml version="2.0">
|
||||
<head>
|
||||
<title>mySubscriptions.opml</title>
|
||||
</head>
|
||||
<body>
|
||||
<outline text="My Category 1">
|
||||
<outline text="Feed 1" xmlUrl="http://example.org/feed1/" htmlUrl="http://example.org/1"/>
|
||||
<outline text="Feed 2" xmlUrl="http://example.org/feed2/" htmlUrl="http://example.org/2"/>
|
||||
</outline>
|
||||
<outline text="My Category 2">
|
||||
<outline text="Feed 3" xmlUrl="http://example.org/feed3/" htmlUrl="http://example.org/3"/>
|
||||
</outline>
|
||||
</body>
|
||||
</opml>
|
||||
`
|
||||
|
||||
var expected SubcriptionList
|
||||
expected = append(expected, &Subcription{Title: "Feed 1", FeedURL: "http://example.org/feed1/", SiteURL: "http://example.org/1", CategoryName: "My Category 1"})
|
||||
expected = append(expected, &Subcription{Title: "Feed 2", FeedURL: "http://example.org/feed2/", SiteURL: "http://example.org/2", CategoryName: "My Category 1"})
|
||||
expected = append(expected, &Subcription{Title: "Feed 3", FeedURL: "http://example.org/feed3/", SiteURL: "http://example.org/3", CategoryName: "My Category 2"})
|
||||
|
||||
subscriptions, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if len(subscriptions) != 3 {
|
||||
t.Errorf("Wrong number of subscriptions: %d instead of %d", len(subscriptions), 3)
|
||||
}
|
||||
|
||||
for i := 0; i < len(subscriptions); i++ {
|
||||
if !subscriptions[i].Equals(expected[i]) {
|
||||
t.Errorf(`Subscription are different: "%v" vs "%v"`, subscriptions[i], expected[i])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseOpmlWithEmptyTitleAndEmptySiteURL(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="ISO-8859-1"?>
|
||||
<opml version="2.0">
|
||||
<head>
|
||||
<title>mySubscriptions.opml</title>
|
||||
</head>
|
||||
<body>
|
||||
<outline xmlUrl="http://example.org/feed1/" htmlUrl="http://example.org/1"/>
|
||||
<outline xmlUrl="http://example.org/feed2/"/>
|
||||
</body>
|
||||
</opml>
|
||||
`
|
||||
|
||||
var expected SubcriptionList
|
||||
expected = append(expected, &Subcription{Title: "http://example.org/1", FeedURL: "http://example.org/feed1/", SiteURL: "http://example.org/1", CategoryName: ""})
|
||||
expected = append(expected, &Subcription{Title: "http://example.org/feed2/", FeedURL: "http://example.org/feed2/", SiteURL: "http://example.org/feed2/", CategoryName: ""})
|
||||
|
||||
subscriptions, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if len(subscriptions) != 2 {
|
||||
t.Errorf("Wrong number of subscriptions: %d instead of %d", len(subscriptions), 2)
|
||||
}
|
||||
|
||||
for i := 0; i < len(subscriptions); i++ {
|
||||
if !subscriptions[i].Equals(expected[i]) {
|
||||
t.Errorf(`Subscription are different: "%v" vs "%v"`, subscriptions[i], expected[i])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseInvalidXML(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="ISO-8859-1"?>
|
||||
<opml version="2.0">
|
||||
<head>
|
||||
</head>
|
||||
<body>
|
||||
<outline
|
||||
</body>
|
||||
</opml>
|
||||
`
|
||||
|
||||
_, err := Parse(bytes.NewBufferString(data))
|
||||
if err == nil {
|
||||
t.Error(err)
|
||||
}
|
||||
}
|
58
reader/opml/serializer.go
Normal file
58
reader/opml/serializer.go
Normal file
|
@ -0,0 +1,58 @@
|
|||
// Copyright 2017 Frédéric Guillot. All rights reserved.
|
||||
// Use of this source code is governed by the Apache 2.0
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package opml
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"encoding/xml"
|
||||
"log"
|
||||
)
|
||||
|
||||
func Serialize(subscriptions SubcriptionList) string {
|
||||
var b bytes.Buffer
|
||||
writer := bufio.NewWriter(&b)
|
||||
writer.WriteString(xml.Header)
|
||||
|
||||
opml := new(Opml)
|
||||
opml.Version = "2.0"
|
||||
for categoryName, subs := range groupSubscriptionsByFeed(subscriptions) {
|
||||
outline := Outline{Text: categoryName}
|
||||
|
||||
for _, subscription := range subs {
|
||||
outline.Outlines = append(outline.Outlines, Outline{
|
||||
Title: subscription.Title,
|
||||
Text: subscription.Title,
|
||||
FeedURL: subscription.FeedURL,
|
||||
SiteURL: subscription.SiteURL,
|
||||
})
|
||||
}
|
||||
|
||||
opml.Outlines = append(opml.Outlines, outline)
|
||||
}
|
||||
|
||||
encoder := xml.NewEncoder(writer)
|
||||
encoder.Indent(" ", " ")
|
||||
if err := encoder.Encode(opml); err != nil {
|
||||
log.Println(err)
|
||||
return ""
|
||||
}
|
||||
|
||||
return b.String()
|
||||
}
|
||||
|
||||
func groupSubscriptionsByFeed(subscriptions SubcriptionList) map[string]SubcriptionList {
|
||||
groups := make(map[string]SubcriptionList)
|
||||
|
||||
for _, subscription := range subscriptions {
|
||||
// if subs, ok := groups[subscription.CategoryName]; !ok {
|
||||
// groups[subscription.CategoryName] = SubcriptionList{}
|
||||
// }
|
||||
|
||||
groups[subscription.CategoryName] = append(groups[subscription.CategoryName], subscription)
|
||||
}
|
||||
|
||||
return groups
|
||||
}
|
31
reader/opml/serializer_test.go
Normal file
31
reader/opml/serializer_test.go
Normal file
|
@ -0,0 +1,31 @@
|
|||
// Copyright 2017 Frédéric Guillot. All rights reserved.
|
||||
// Use of this source code is governed by the Apache 2.0
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package opml
|
||||
|
||||
import "testing"
|
||||
import "bytes"
|
||||
|
||||
func TestSerialize(t *testing.T) {
|
||||
var subscriptions SubcriptionList
|
||||
subscriptions = append(subscriptions, &Subcription{Title: "Feed 1", FeedURL: "http://example.org/feed/1", SiteURL: "http://example.org/1", CategoryName: "Category 1"})
|
||||
subscriptions = append(subscriptions, &Subcription{Title: "Feed 2", FeedURL: "http://example.org/feed/2", SiteURL: "http://example.org/2", CategoryName: "Category 1"})
|
||||
subscriptions = append(subscriptions, &Subcription{Title: "Feed 3", FeedURL: "http://example.org/feed/3", SiteURL: "http://example.org/3", CategoryName: "Category 2"})
|
||||
|
||||
output := Serialize(subscriptions)
|
||||
feeds, err := Parse(bytes.NewBufferString(output))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if len(feeds) != 3 {
|
||||
t.Errorf("Wrong number of subscriptions: %d instead of %d", len(feeds), 3)
|
||||
}
|
||||
|
||||
for i := 0; i < len(feeds); i++ {
|
||||
if !feeds[i].Equals(subscriptions[i]) {
|
||||
t.Errorf(`Subscription are different: "%v" vs "%v"`, subscriptions[i], feeds[i])
|
||||
}
|
||||
}
|
||||
}
|
18
reader/opml/subscription.go
Normal file
18
reader/opml/subscription.go
Normal file
|
@ -0,0 +1,18 @@
|
|||
// Copyright 2017 Frédéric Guillot. All rights reserved.
|
||||
// Use of this source code is governed by the Apache 2.0
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package opml
|
||||
|
||||
type Subcription struct {
|
||||
Title string
|
||||
SiteURL string
|
||||
FeedURL string
|
||||
CategoryName string
|
||||
}
|
||||
|
||||
func (s Subcription) Equals(subscription *Subcription) bool {
|
||||
return s.Title == subscription.Title && s.SiteURL == subscription.SiteURL && s.FeedURL == subscription.FeedURL && s.CategoryName == subscription.CategoryName
|
||||
}
|
||||
|
||||
type SubcriptionList []*Subcription
|
15
reader/processor/processor.go
Normal file
15
reader/processor/processor.go
Normal file
|
@ -0,0 +1,15 @@
|
|||
// Copyright 2017 Frédéric Guillot. All rights reserved.
|
||||
// Use of this source code is governed by the Apache 2.0
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package processor
|
||||
|
||||
import (
|
||||
"github.com/miniflux/miniflux2/reader/rewrite"
|
||||
"github.com/miniflux/miniflux2/reader/sanitizer"
|
||||
)
|
||||
|
||||
func ItemContentProcessor(url, content string) string {
|
||||
content = sanitizer.Sanitize(url, content)
|
||||
return rewrite.Rewriter(url, content)
|
||||
}
|
47
reader/rewrite/rewriter.go
Normal file
47
reader/rewrite/rewriter.go
Normal file
|
@ -0,0 +1,47 @@
|
|||
// Copyright 2017 Frédéric Guillot. All rights reserved.
|
||||
// Use of this source code is governed by the Apache 2.0
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package rewrite
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
)
|
||||
|
||||
var rewriteRules = []func(string, string) string{
|
||||
func(url, content string) string {
|
||||
re := regexp.MustCompile(`youtube\.com/watch\?v=(.*)`)
|
||||
matches := re.FindStringSubmatch(url)
|
||||
|
||||
if len(matches) == 2 {
|
||||
video := `<iframe width="650" height="350" frameborder="0" src="https://www.youtube-nocookie.com/embed/` + matches[1] + `" allowfullscreen></iframe>`
|
||||
return video + "<p>" + content + "</p>"
|
||||
}
|
||||
return content
|
||||
},
|
||||
func(url, content string) string {
|
||||
if strings.HasPrefix(url, "https://xkcd.com") {
|
||||
doc, err := goquery.NewDocumentFromReader(strings.NewReader(content))
|
||||
if err != nil {
|
||||
return content
|
||||
}
|
||||
|
||||
imgTag := doc.Find("img").First()
|
||||
if titleAttr, found := imgTag.Attr("title"); found {
|
||||
return content + `<blockquote cite="` + url + `">` + titleAttr + "</blockquote>"
|
||||
}
|
||||
}
|
||||
return content
|
||||
},
|
||||
}
|
||||
|
||||
func Rewriter(url, content string) string {
|
||||
for _, rewriteRule := range rewriteRules {
|
||||
content = rewriteRule(url, content)
|
||||
}
|
||||
|
||||
return content
|
||||
}
|
34
reader/rewrite/rewriter_test.go
Normal file
34
reader/rewrite/rewriter_test.go
Normal file
|
@ -0,0 +1,34 @@
|
|||
// Copyright 2017 Frédéric Guillot. All rights reserved.
|
||||
// Use of this source code is governed by the Apache 2.0
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package rewrite
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestRewriteWithNoMatchingRule(t *testing.T) {
|
||||
output := Rewriter("https://example.org/article", `Some text.`)
|
||||
expected := `Some text.`
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRewriteWithYoutubeLink(t *testing.T) {
|
||||
output := Rewriter("https://www.youtube.com/watch?v=1234", `Video Description`)
|
||||
expected := `<iframe width="650" height="350" frameborder="0" src="https://www.youtube-nocookie.com/embed/1234" allowfullscreen></iframe><p>Video Description</p>`
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRewriteWithXkcdLink(t *testing.T) {
|
||||
description := `<img src="https://imgs.xkcd.com/comics/thermostat.png" title="Your problem is so terrible, I worry that, if I help you, I risk drawing the attention of whatever god of technology inflicted it on you." alt="Your problem is so terrible, I worry that, if I help you, I risk drawing the attention of whatever god of technology inflicted it on you." />`
|
||||
output := Rewriter("https://xkcd.com/1912/", description)
|
||||
expected := description + `<blockquote cite="https://xkcd.com/1912/">Your problem is so terrible, I worry that, if I help you, I risk drawing the attention of whatever god of technology inflicted it on you.</blockquote>`
|
||||
if expected != output {
|
||||
t.Errorf(`Not expected output: got "%s" instead of "%s"`, output, expected)
|
||||
}
|
||||
}
|
360
reader/sanitizer/sanitizer.go
Normal file
360
reader/sanitizer/sanitizer.go
Normal file
|
@ -0,0 +1,360 @@
|
|||
// Copyright 2017 Frédéric Guillot. All rights reserved.
|
||||
// Use of this source code is governed by the Apache 2.0
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package sanitizer
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"github.com/miniflux/miniflux2/reader/url"
|
||||
"io"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/net/html"
|
||||
)
|
||||
|
||||
// Sanitize returns safe HTML.
|
||||
func Sanitize(baseURL, input string) string {
|
||||
tokenizer := html.NewTokenizer(bytes.NewBufferString(input))
|
||||
var buffer bytes.Buffer
|
||||
var tagStack []string
|
||||
|
||||
for {
|
||||
if tokenizer.Next() == html.ErrorToken {
|
||||
err := tokenizer.Err()
|
||||
if err == io.EOF {
|
||||
return buffer.String()
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
token := tokenizer.Token()
|
||||
switch token.Type {
|
||||
case html.TextToken:
|
||||
buffer.WriteString(token.Data)
|
||||
case html.StartTagToken:
|
||||
tagName := token.DataAtom.String()
|
||||
|
||||
if !isPixelTracker(tagName, token.Attr) && isValidTag(tagName) {
|
||||
attrNames, htmlAttributes := sanitizeAttributes(baseURL, tagName, token.Attr)
|
||||
|
||||
if hasRequiredAttributes(tagName, attrNames) {
|
||||
if len(attrNames) > 0 {
|
||||
buffer.WriteString("<" + tagName + " " + htmlAttributes + ">")
|
||||
} else {
|
||||
buffer.WriteString("<" + tagName + ">")
|
||||
}
|
||||
|
||||
tagStack = append(tagStack, tagName)
|
||||
}
|
||||
}
|
||||
case html.EndTagToken:
|
||||
tagName := token.DataAtom.String()
|
||||
if isValidTag(tagName) && inList(tagName, tagStack) {
|
||||
buffer.WriteString(fmt.Sprintf("</%s>", tagName))
|
||||
}
|
||||
case html.SelfClosingTagToken:
|
||||
tagName := token.DataAtom.String()
|
||||
if !isPixelTracker(tagName, token.Attr) && isValidTag(tagName) {
|
||||
attrNames, htmlAttributes := sanitizeAttributes(baseURL, tagName, token.Attr)
|
||||
|
||||
if hasRequiredAttributes(tagName, attrNames) {
|
||||
if len(attrNames) > 0 {
|
||||
buffer.WriteString("<" + tagName + " " + htmlAttributes + "/>")
|
||||
} else {
|
||||
buffer.WriteString("<" + tagName + "/>")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func sanitizeAttributes(baseURL, tagName string, attributes []html.Attribute) (attrNames []string, html string) {
|
||||
var htmlAttrs []string
|
||||
var err error
|
||||
|
||||
for _, attribute := range attributes {
|
||||
value := attribute.Val
|
||||
|
||||
if !isValidAttribute(tagName, attribute.Key) {
|
||||
continue
|
||||
}
|
||||
|
||||
if isExternalResourceAttribute(attribute.Key) {
|
||||
if tagName == "iframe" && !isValidIframeSource(attribute.Val) {
|
||||
continue
|
||||
} else {
|
||||
value, err = url.GetAbsoluteURL(baseURL, value)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
if !hasValidScheme(value) || isBlacklistedResource(value) {
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
attrNames = append(attrNames, attribute.Key)
|
||||
htmlAttrs = append(htmlAttrs, fmt.Sprintf(`%s="%s"`, attribute.Key, value))
|
||||
}
|
||||
|
||||
extraAttrNames, extraHTMLAttributes := getExtraAttributes(tagName)
|
||||
if len(extraAttrNames) > 0 {
|
||||
attrNames = append(attrNames, extraAttrNames...)
|
||||
htmlAttrs = append(htmlAttrs, extraHTMLAttributes...)
|
||||
}
|
||||
|
||||
return attrNames, strings.Join(htmlAttrs, " ")
|
||||
}
|
||||
|
||||
func getExtraAttributes(tagName string) ([]string, []string) {
|
||||
if tagName == "a" {
|
||||
return []string{"rel", "target", "referrerpolicy"}, []string{`rel="noopener noreferrer"`, `target="_blank"`, `referrerpolicy="no-referrer"`}
|
||||
}
|
||||
|
||||
if tagName == "video" || tagName == "audio" {
|
||||
return []string{"controls"}, []string{"controls"}
|
||||
}
|
||||
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func isValidTag(tagName string) bool {
|
||||
for element := range getTagWhitelist() {
|
||||
if tagName == element {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func isValidAttribute(tagName, attributeName string) bool {
|
||||
for element, attributes := range getTagWhitelist() {
|
||||
if tagName == element {
|
||||
if inList(attributeName, attributes) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func isExternalResourceAttribute(attribute string) bool {
|
||||
switch attribute {
|
||||
case "src", "href", "poster", "cite":
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func isPixelTracker(tagName string, attributes []html.Attribute) bool {
|
||||
if tagName == "img" {
|
||||
hasHeight := false
|
||||
hasWidth := false
|
||||
|
||||
for _, attribute := range attributes {
|
||||
if attribute.Key == "height" && attribute.Val == "1" {
|
||||
hasHeight = true
|
||||
}
|
||||
|
||||
if attribute.Key == "width" && attribute.Val == "1" {
|
||||
hasWidth = true
|
||||
}
|
||||
}
|
||||
|
||||
return hasHeight && hasWidth
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func hasRequiredAttributes(tagName string, attributes []string) bool {
|
||||
elements := make(map[string][]string)
|
||||
elements["a"] = []string{"href"}
|
||||
elements["iframe"] = []string{"src"}
|
||||
elements["img"] = []string{"src"}
|
||||
elements["source"] = []string{"src"}
|
||||
|
||||
for element, attrs := range elements {
|
||||
if tagName == element {
|
||||
for _, attribute := range attributes {
|
||||
for _, attr := range attrs {
|
||||
if attr == attribute {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
func hasValidScheme(src string) bool {
|
||||
// See https://www.iana.org/assignments/uri-schemes/uri-schemes.xhtml
|
||||
whitelist := []string{
|
||||
"apt://",
|
||||
"bitcoin://",
|
||||
"callto://",
|
||||
"ed2k://",
|
||||
"facetime://",
|
||||
"feed://",
|
||||
"ftp://",
|
||||
"geo://",
|
||||
"gopher://",
|
||||
"git://",
|
||||
"http://",
|
||||
"https://",
|
||||
"irc://",
|
||||
"irc6://",
|
||||
"ircs://",
|
||||
"itms://",
|
||||
"jabber://",
|
||||
"magnet://",
|
||||
"mailto://",
|
||||
"maps://",
|
||||
"news://",
|
||||
"nfs://",
|
||||
"nntp://",
|
||||
"rtmp://",
|
||||
"sip://",
|
||||
"sips://",
|
||||
"skype://",
|
||||
"smb://",
|
||||
"sms://",
|
||||
"spotify://",
|
||||
"ssh://",
|
||||
"sftp://",
|
||||
"steam://",
|
||||
"svn://",
|
||||
"tel://",
|
||||
"webcal://",
|
||||
"xmpp://",
|
||||
}
|
||||
|
||||
for _, prefix := range whitelist {
|
||||
if strings.HasPrefix(src, prefix) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func isBlacklistedResource(src string) bool {
|
||||
blacklist := []string{
|
||||
"feedsportal.com",
|
||||
"api.flattr.com",
|
||||
"stats.wordpress.com",
|
||||
"plus.google.com/share",
|
||||
"twitter.com/share",
|
||||
"feeds.feedburner.com",
|
||||
}
|
||||
|
||||
for _, element := range blacklist {
|
||||
if strings.Contains(src, element) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func isValidIframeSource(src string) bool {
|
||||
whitelist := []string{
|
||||
"http://www.youtube.com",
|
||||
"https://www.youtube.com",
|
||||
"http://player.vimeo.com",
|
||||
"https://player.vimeo.com",
|
||||
"http://www.dailymotion.com",
|
||||
"https://www.dailymotion.com",
|
||||
"http://vk.com",
|
||||
"https://vk.com",
|
||||
}
|
||||
|
||||
for _, prefix := range whitelist {
|
||||
if strings.HasPrefix(src, prefix) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func getTagWhitelist() map[string][]string {
|
||||
whitelist := make(map[string][]string)
|
||||
whitelist["img"] = []string{"alt", "title", "src"}
|
||||
whitelist["audio"] = []string{"src"}
|
||||
whitelist["video"] = []string{"poster", "height", "width", "src"}
|
||||
whitelist["source"] = []string{"src", "type"}
|
||||
whitelist["dt"] = []string{}
|
||||
whitelist["dd"] = []string{}
|
||||
whitelist["dl"] = []string{}
|
||||
whitelist["table"] = []string{}
|
||||
whitelist["caption"] = []string{}
|
||||
whitelist["thead"] = []string{}
|
||||
whitelist["tfooter"] = []string{}
|
||||
whitelist["tr"] = []string{}
|
||||
whitelist["td"] = []string{"rowspan", "colspan"}
|
||||
whitelist["th"] = []string{"rowspan", "colspan"}
|
||||
whitelist["h1"] = []string{}
|
||||
whitelist["h2"] = []string{}
|
||||
whitelist["h3"] = []string{}
|
||||
whitelist["h4"] = []string{}
|
||||
whitelist["h5"] = []string{}
|
||||
whitelist["h6"] = []string{}
|
||||
whitelist["strong"] = []string{}
|
||||
whitelist["em"] = []string{}
|
||||
whitelist["code"] = []string{}
|
||||
whitelist["pre"] = []string{}
|
||||
whitelist["blockquote"] = []string{}
|
||||
whitelist["q"] = []string{"cite"}
|
||||
whitelist["p"] = []string{}
|
||||
whitelist["ul"] = []string{}
|
||||
whitelist["li"] = []string{}
|
||||
whitelist["ol"] = []string{}
|
||||
whitelist["br"] = []string{}
|
||||
whitelist["del"] = []string{}
|
||||
whitelist["a"] = []string{"href", "title"}
|
||||
whitelist["figure"] = []string{}
|
||||
whitelist["figcaption"] = []string{}
|
||||
whitelist["cite"] = []string{}
|
||||
whitelist["time"] = []string{"datetime"}
|
||||
whitelist["abbr"] = []string{"title"}
|
||||
whitelist["acronym"] = []string{"title"}
|
||||
whitelist["wbr"] = []string{}
|
||||
whitelist["dfn"] = []string{}
|
||||
whitelist["sub"] = []string{}
|
||||
whitelist["sup"] = []string{}
|
||||
whitelist["var"] = []string{}
|
||||
whitelist["samp"] = []string{}
|
||||
whitelist["s"] = []string{}
|
||||
whitelist["del"] = []string{}
|
||||
whitelist["ins"] = []string{}
|
||||
whitelist["kbd"] = []string{}
|
||||
whitelist["rp"] = []string{}
|
||||
whitelist["rt"] = []string{}
|
||||
whitelist["rtc"] = []string{}
|
||||
whitelist["ruby"] = []string{}
|
||||
whitelist["iframe"] = []string{"width", "height", "frameborder", "src", "allowfullscreen"}
|
||||
return whitelist
|
||||
}
|
||||
|
||||
func inList(needle string, haystack []string) bool {
|
||||
for _, element := range haystack {
|
||||
if element == needle {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
144
reader/sanitizer/sanitizer_test.go
Normal file
144
reader/sanitizer/sanitizer_test.go
Normal file
|
@ -0,0 +1,144 @@
|
|||
// Copyright 2017 Frédéric Guillot. All rights reserved.
|
||||
// Use of this source code is governed by the Apache 2.0
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package sanitizer
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestValidInput(t *testing.T) {
|
||||
input := `<p>This is a <strong>text</strong> with an image: <img src="http://example.org/" alt="Test">.</p>`
|
||||
output := Sanitize("http://example.org/", input)
|
||||
|
||||
if input != output {
|
||||
t.Errorf(`Wrong output: "%s" != "%s"`, input, output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSelfClosingTags(t *testing.T) {
|
||||
input := `<p>This <br> is a <strong>text</strong> <br/>with an image: <img src="http://example.org/" alt="Test"/>.</p>`
|
||||
output := Sanitize("http://example.org/", input)
|
||||
|
||||
if input != output {
|
||||
t.Errorf(`Wrong output: "%s" != "%s"`, input, output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestTable(t *testing.T) {
|
||||
input := `<table><tr><th>A</th><th colspan="2">B</th></tr><tr><td>C</td><td>D</td><td>E</td></tr></table>`
|
||||
output := Sanitize("http://example.org/", input)
|
||||
|
||||
if input != output {
|
||||
t.Errorf(`Wrong output: "%s" != "%s"`, input, output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRelativeURL(t *testing.T) {
|
||||
input := `This <a href="/test.html">link is relative</a> and this image: <img src="../folder/image.png"/>`
|
||||
expected := `This <a href="http://example.org/test.html" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">link is relative</a> and this image: <img src="http://example.org/folder/image.png"/>`
|
||||
output := Sanitize("http://example.org/", input)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestProtocolRelativeURL(t *testing.T) {
|
||||
input := `This <a href="//static.example.org/index.html">link is relative</a>.`
|
||||
expected := `This <a href="https://static.example.org/index.html" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">link is relative</a>.`
|
||||
output := Sanitize("http://example.org/", input)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestInvalidTag(t *testing.T) {
|
||||
input := `<p>My invalid <b>tag</b>.</p>`
|
||||
expected := `<p>My invalid tag.</p>`
|
||||
output := Sanitize("http://example.org/", input)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestVideoTag(t *testing.T) {
|
||||
input := `<p>My valid <video src="videofile.webm" autoplay poster="posterimage.jpg">fallback</video>.</p>`
|
||||
expected := `<p>My valid <video src="http://example.org/videofile.webm" poster="http://example.org/posterimage.jpg" controls>fallback</video>.</p>`
|
||||
output := Sanitize("http://example.org/", input)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAudioAndSourceTag(t *testing.T) {
|
||||
input := `<p>My music <audio controls="controls"><source src="foo.wav" type="audio/wav"></audio>.</p>`
|
||||
expected := `<p>My music <audio controls><source src="http://example.org/foo.wav" type="audio/wav"></audio>.</p>`
|
||||
output := Sanitize("http://example.org/", input)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestUnknownTag(t *testing.T) {
|
||||
input := `<p>My invalid <unknown>tag</unknown>.</p>`
|
||||
expected := `<p>My invalid tag.</p>`
|
||||
output := Sanitize("http://example.org/", input)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestInvalidNestedTag(t *testing.T) {
|
||||
input := `<p>My invalid <b>tag with some <em>valid</em> tag</b>.</p>`
|
||||
expected := `<p>My invalid tag with some <em>valid</em> tag.</p>`
|
||||
output := Sanitize("http://example.org/", input)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestInvalidIFrame(t *testing.T) {
|
||||
input := `<iframe src="http://example.org/"></iframe>`
|
||||
expected := ``
|
||||
output := Sanitize("http://example.org/", input)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestInvalidURLScheme(t *testing.T) {
|
||||
input := `<p>This link is <a src="file:///etc/passwd">not valid</a></p>`
|
||||
expected := `<p>This link is not valid</p>`
|
||||
output := Sanitize("http://example.org/", input)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBlacklistedLink(t *testing.T) {
|
||||
input := `<p>This image is not valid <img src="https://stats.wordpress.com/some-tracker"></p>`
|
||||
expected := `<p>This image is not valid </p>`
|
||||
output := Sanitize("http://example.org/", input)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPixelTracker(t *testing.T) {
|
||||
input := `<p><img src="https://tracker1.example.org/" height="1" width="1"> and <img src="https://tracker2.example.org/" height="1" width="1"/></p>`
|
||||
expected := `<p> and </p>`
|
||||
output := Sanitize("http://example.org/", input)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
||||
}
|
||||
}
|
35
reader/sanitizer/strip_tags.go
Normal file
35
reader/sanitizer/strip_tags.go
Normal file
|
@ -0,0 +1,35 @@
|
|||
// Copyright 2017 Frédéric Guillot. All rights reserved.
|
||||
// Use of this source code is governed by the Apache 2.0
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package sanitizer
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"io"
|
||||
|
||||
"golang.org/x/net/html"
|
||||
)
|
||||
|
||||
// StripTags removes all HTML/XML tags from the input string.
|
||||
func StripTags(input string) string {
|
||||
tokenizer := html.NewTokenizer(bytes.NewBufferString(input))
|
||||
var buffer bytes.Buffer
|
||||
|
||||
for {
|
||||
if tokenizer.Next() == html.ErrorToken {
|
||||
err := tokenizer.Err()
|
||||
if err == io.EOF {
|
||||
return buffer.String()
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
token := tokenizer.Token()
|
||||
switch token.Type {
|
||||
case html.TextToken:
|
||||
buffer.WriteString(token.Data)
|
||||
}
|
||||
}
|
||||
}
|
17
reader/sanitizer/strip_tags_test.go
Normal file
17
reader/sanitizer/strip_tags_test.go
Normal file
|
@ -0,0 +1,17 @@
|
|||
// Copyright 2017 Frédéric Guillot. All rights reserved.
|
||||
// Use of this source code is governed by the Apache 2.0
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package sanitizer
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestStripTags(t *testing.T) {
|
||||
input := `This <a href="/test.html">link is relative</a> and <strong>this</strong> image: <img src="../folder/image.png"/>`
|
||||
expected := `This link is relative and this image: `
|
||||
output := StripTags(input)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
||||
}
|
||||
}
|
96
reader/subscription/finder.go
Normal file
96
reader/subscription/finder.go
Normal file
|
@ -0,0 +1,96 @@
|
|||
// Copyright 2017 Frédéric Guillot. All rights reserved.
|
||||
// Use of this source code is governed by the Apache 2.0
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package subscription
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"github.com/miniflux/miniflux2/errors"
|
||||
"github.com/miniflux/miniflux2/helper"
|
||||
"github.com/miniflux/miniflux2/reader/feed"
|
||||
"github.com/miniflux/miniflux2/reader/http"
|
||||
"github.com/miniflux/miniflux2/reader/url"
|
||||
"io"
|
||||
"log"
|
||||
"time"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
)
|
||||
|
||||
var (
|
||||
errConnectionFailure = "Unable to open this link: %v"
|
||||
errUnreadableDoc = "Unable to analyze this page: %v"
|
||||
)
|
||||
|
||||
// FindSubscriptions downloads and try to find one or more subscriptions from an URL.
|
||||
func FindSubscriptions(websiteURL string) (Subscriptions, error) {
|
||||
defer helper.ExecutionTime(time.Now(), fmt.Sprintf("[FindSubscriptions] url=%s", websiteURL))
|
||||
|
||||
client := http.NewHttpClient(websiteURL)
|
||||
response, err := client.Get()
|
||||
if err != nil {
|
||||
return nil, errors.NewLocalizedError(errConnectionFailure, err)
|
||||
}
|
||||
|
||||
var buffer bytes.Buffer
|
||||
io.Copy(&buffer, response.Body)
|
||||
reader := bytes.NewReader(buffer.Bytes())
|
||||
|
||||
if format := feed.DetectFeedFormat(reader); format != feed.FormatUnknown {
|
||||
var subscriptions Subscriptions
|
||||
subscriptions = append(subscriptions, &Subscription{
|
||||
Title: response.EffectiveURL,
|
||||
URL: response.EffectiveURL,
|
||||
Type: format,
|
||||
})
|
||||
|
||||
return subscriptions, nil
|
||||
}
|
||||
|
||||
reader.Seek(0, io.SeekStart)
|
||||
return parseDocument(response.EffectiveURL, bytes.NewReader(buffer.Bytes()))
|
||||
}
|
||||
|
||||
func parseDocument(websiteURL string, data io.Reader) (Subscriptions, error) {
|
||||
var subscriptions Subscriptions
|
||||
queries := map[string]string{
|
||||
"link[type='application/rss+xml']": "rss",
|
||||
"link[type='application/atom+xml']": "atom",
|
||||
"link[type='application/json']": "json",
|
||||
}
|
||||
|
||||
doc, err := goquery.NewDocumentFromReader(data)
|
||||
if err != nil {
|
||||
return nil, errors.NewLocalizedError(errUnreadableDoc, err)
|
||||
}
|
||||
|
||||
for query, kind := range queries {
|
||||
doc.Find(query).Each(func(i int, s *goquery.Selection) {
|
||||
subscription := new(Subscription)
|
||||
subscription.Type = kind
|
||||
|
||||
if title, exists := s.Attr("title"); exists {
|
||||
subscription.Title = title
|
||||
} else {
|
||||
subscription.Title = "Feed"
|
||||
}
|
||||
|
||||
if feedURL, exists := s.Attr("href"); exists {
|
||||
subscription.URL, _ = url.GetAbsoluteURL(websiteURL, feedURL)
|
||||
}
|
||||
|
||||
if subscription.Title == "" {
|
||||
subscription.Title = subscription.URL
|
||||
}
|
||||
|
||||
if subscription.URL != "" {
|
||||
log.Println("[FindSubscriptions]", subscription)
|
||||
subscriptions = append(subscriptions, subscription)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
return subscriptions, nil
|
||||
}
|
21
reader/subscription/subscription.go
Normal file
21
reader/subscription/subscription.go
Normal file
|
@ -0,0 +1,21 @@
|
|||
// Copyright 2017 Frédéric Guillot. All rights reserved.
|
||||
// Use of this source code is governed by the Apache 2.0
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package subscription
|
||||
|
||||
import "fmt"
|
||||
|
||||
// Subscription represents a feed subscription.
|
||||
type Subscription struct {
|
||||
Title string `json:"title"`
|
||||
URL string `json:"url"`
|
||||
Type string `json:"type"`
|
||||
}
|
||||
|
||||
func (s Subscription) String() string {
|
||||
return fmt.Sprintf(`Title="%s", URL="%s", Type="%s"`, s.Title, s.URL, s.Type)
|
||||
}
|
||||
|
||||
// Subscriptions represents a list of subscription.
|
||||
type Subscriptions []*Subscription
|
61
reader/url/url.go
Normal file
61
reader/url/url.go
Normal file
|
@ -0,0 +1,61 @@
|
|||
// Copyright 2017 Frédéric Guillot. All rights reserved.
|
||||
// Use of this source code is governed by the Apache 2.0
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package url
|
||||
|
||||
import "net/url"
|
||||
import "fmt"
|
||||
import "strings"
|
||||
|
||||
// GetAbsoluteURL converts the input URL as absolute URL if necessary.
|
||||
func GetAbsoluteURL(baseURL, input string) (string, error) {
|
||||
if strings.HasPrefix(input, "//") {
|
||||
input = "https://" + input[2:]
|
||||
}
|
||||
|
||||
u, err := url.Parse(input)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("unable to parse input URL: %v", err)
|
||||
}
|
||||
|
||||
if u.IsAbs() {
|
||||
return u.String(), nil
|
||||
}
|
||||
|
||||
base, err := url.Parse(baseURL)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("unable to parse base URL: %v", err)
|
||||
}
|
||||
|
||||
return base.ResolveReference(u).String(), nil
|
||||
}
|
||||
|
||||
// GetRootURL returns absolute URL without the path.
|
||||
func GetRootURL(websiteURL string) string {
|
||||
if strings.HasPrefix(websiteURL, "//") {
|
||||
websiteURL = "https://" + websiteURL[2:]
|
||||
}
|
||||
|
||||
absoluteURL, err := GetAbsoluteURL(websiteURL, "")
|
||||
if err != nil {
|
||||
return websiteURL
|
||||
}
|
||||
|
||||
u, err := url.Parse(absoluteURL)
|
||||
if err != nil {
|
||||
return absoluteURL
|
||||
}
|
||||
|
||||
return u.Scheme + "://" + u.Host + "/"
|
||||
}
|
||||
|
||||
// IsHTTPS returns true if the URL is using HTTPS.
|
||||
func IsHTTPS(websiteURL string) bool {
|
||||
parsedURL, err := url.Parse(websiteURL)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
return strings.ToLower(parsedURL.Scheme) == "https"
|
||||
}
|
107
reader/url/url_test.go
Normal file
107
reader/url/url_test.go
Normal file
|
@ -0,0 +1,107 @@
|
|||
package url
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestGetAbsoluteURLWithAbsolutePath(t *testing.T) {
|
||||
expected := `https://example.org/path/file.ext`
|
||||
input := `/path/file.ext`
|
||||
output, err := GetAbsoluteURL("https://example.org/folder/", input)
|
||||
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Unexpected output, got "%s" instead of "%s"`, output, expected)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetAbsoluteURLWithRelativePath(t *testing.T) {
|
||||
expected := `https://example.org/folder/path/file.ext`
|
||||
input := `path/file.ext`
|
||||
output, err := GetAbsoluteURL("https://example.org/folder/", input)
|
||||
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Unexpected output, got "%s" instead of "%s"`, output, expected)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetAbsoluteURLWithRelativePaths(t *testing.T) {
|
||||
expected := `https://example.org/path/file.ext`
|
||||
input := `path/file.ext`
|
||||
output, err := GetAbsoluteURL("https://example.org/folder", input)
|
||||
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Unexpected output, got "%s" instead of "%s"`, output, expected)
|
||||
}
|
||||
}
|
||||
|
||||
func TestWhenInputIsAlreadyAbsolute(t *testing.T) {
|
||||
expected := `https://example.org/path/file.ext`
|
||||
input := `https://example.org/path/file.ext`
|
||||
output, err := GetAbsoluteURL("https://example.org/folder/", input)
|
||||
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Unexpected output, got "%s" instead of "%s"`, output, expected)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetAbsoluteURLWithProtocolRelative(t *testing.T) {
|
||||
expected := `https://static.example.org/path/file.ext`
|
||||
input := `//static.example.org/path/file.ext`
|
||||
output, err := GetAbsoluteURL("https://www.example.org/", input)
|
||||
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Unexpected output, got "%s" instead of "%s"`, output, expected)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetRootURL(t *testing.T) {
|
||||
expected := `https://example.org/`
|
||||
input := `https://example.org/path/file.ext`
|
||||
output := GetRootURL(input)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Unexpected output, got "%s" instead of "%s"`, output, expected)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetRootURLWithProtocolRelativePath(t *testing.T) {
|
||||
expected := `https://static.example.org/`
|
||||
input := `//static.example.org/path/file.ext`
|
||||
output := GetRootURL(input)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Unexpected output, got "%s" instead of "%s"`, output, expected)
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsHTTPS(t *testing.T) {
|
||||
if !IsHTTPS("https://example.org/") {
|
||||
t.Error("Unable to recognize HTTPS URL")
|
||||
}
|
||||
|
||||
if IsHTTPS("http://example.org/") {
|
||||
t.Error("Unable to recognize HTTP URL")
|
||||
}
|
||||
|
||||
if IsHTTPS("") {
|
||||
t.Error("Unable to recognize malformed URL")
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue