mirror of
https://github.com/miniflux/v2.git
synced 2025-08-26 18:21:01 +00:00
Move internal packages to an internal folder
For reference: https://go.dev/doc/go1.4#internalpackages
This commit is contained in:
parent
c234903255
commit
168a870c02
433 changed files with 1121 additions and 1123 deletions
178
internal/reader/atom/atom_03.go
Normal file
178
internal/reader/atom/atom_03.go
Normal file
|
@ -0,0 +1,178 @@
|
|||
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
package atom // import "miniflux.app/v2/internal/reader/atom"
|
||||
|
||||
import (
|
||||
"encoding/base64"
|
||||
"html"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"miniflux.app/v2/internal/crypto"
|
||||
"miniflux.app/v2/internal/logger"
|
||||
"miniflux.app/v2/internal/model"
|
||||
"miniflux.app/v2/internal/reader/date"
|
||||
"miniflux.app/v2/internal/reader/sanitizer"
|
||||
"miniflux.app/v2/internal/url"
|
||||
)
|
||||
|
||||
// Specs: http://web.archive.org/web/20060811235523/http://www.mnot.net/drafts/draft-nottingham-atom-format-02.html
|
||||
type atom03Feed struct {
|
||||
ID string `xml:"id"`
|
||||
Title atom03Text `xml:"title"`
|
||||
Author atomPerson `xml:"author"`
|
||||
Links atomLinks `xml:"link"`
|
||||
Entries []atom03Entry `xml:"entry"`
|
||||
}
|
||||
|
||||
func (a *atom03Feed) Transform(baseURL string) *model.Feed {
|
||||
var err error
|
||||
|
||||
feed := new(model.Feed)
|
||||
|
||||
feedURL := a.Links.firstLinkWithRelation("self")
|
||||
feed.FeedURL, err = url.AbsoluteURL(baseURL, feedURL)
|
||||
if err != nil {
|
||||
feed.FeedURL = feedURL
|
||||
}
|
||||
|
||||
siteURL := a.Links.originalLink()
|
||||
feed.SiteURL, err = url.AbsoluteURL(baseURL, siteURL)
|
||||
if err != nil {
|
||||
feed.SiteURL = siteURL
|
||||
}
|
||||
|
||||
feed.Title = a.Title.String()
|
||||
if feed.Title == "" {
|
||||
feed.Title = feed.SiteURL
|
||||
}
|
||||
|
||||
for _, entry := range a.Entries {
|
||||
item := entry.Transform()
|
||||
entryURL, err := url.AbsoluteURL(feed.SiteURL, item.URL)
|
||||
if err == nil {
|
||||
item.URL = entryURL
|
||||
}
|
||||
|
||||
if item.Author == "" {
|
||||
item.Author = a.Author.String()
|
||||
}
|
||||
|
||||
if item.Title == "" {
|
||||
item.Title = sanitizer.TruncateHTML(item.Content, 100)
|
||||
}
|
||||
|
||||
if item.Title == "" {
|
||||
item.Title = item.URL
|
||||
}
|
||||
|
||||
feed.Entries = append(feed.Entries, item)
|
||||
}
|
||||
|
||||
return feed
|
||||
}
|
||||
|
||||
type atom03Entry struct {
|
||||
ID string `xml:"id"`
|
||||
Title atom03Text `xml:"title"`
|
||||
Modified string `xml:"modified"`
|
||||
Issued string `xml:"issued"`
|
||||
Created string `xml:"created"`
|
||||
Links atomLinks `xml:"link"`
|
||||
Summary atom03Text `xml:"summary"`
|
||||
Content atom03Text `xml:"content"`
|
||||
Author atomPerson `xml:"author"`
|
||||
}
|
||||
|
||||
func (a *atom03Entry) Transform() *model.Entry {
|
||||
entry := new(model.Entry)
|
||||
entry.URL = a.Links.originalLink()
|
||||
entry.Date = a.entryDate()
|
||||
entry.Author = a.Author.String()
|
||||
entry.Hash = a.entryHash()
|
||||
entry.Content = a.entryContent()
|
||||
entry.Title = a.entryTitle()
|
||||
return entry
|
||||
}
|
||||
|
||||
func (a *atom03Entry) entryTitle() string {
|
||||
return sanitizer.StripTags(a.Title.String())
|
||||
}
|
||||
|
||||
func (a *atom03Entry) entryContent() string {
|
||||
content := a.Content.String()
|
||||
if content != "" {
|
||||
return content
|
||||
}
|
||||
|
||||
summary := a.Summary.String()
|
||||
if summary != "" {
|
||||
return summary
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
func (a *atom03Entry) entryDate() time.Time {
|
||||
dateText := ""
|
||||
for _, value := range []string{a.Issued, a.Modified, a.Created} {
|
||||
if value != "" {
|
||||
dateText = value
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if dateText != "" {
|
||||
result, err := date.Parse(dateText)
|
||||
if err != nil {
|
||||
logger.Error("atom: %v", err)
|
||||
return time.Now()
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
return time.Now()
|
||||
}
|
||||
|
||||
func (a *atom03Entry) entryHash() string {
|
||||
for _, value := range []string{a.ID, a.Links.originalLink()} {
|
||||
if value != "" {
|
||||
return crypto.Hash(value)
|
||||
}
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
type atom03Text struct {
|
||||
Type string `xml:"type,attr"`
|
||||
Mode string `xml:"mode,attr"`
|
||||
CharData string `xml:",chardata"`
|
||||
InnerXML string `xml:",innerxml"`
|
||||
}
|
||||
|
||||
func (a *atom03Text) String() string {
|
||||
content := ""
|
||||
|
||||
switch {
|
||||
case a.Mode == "xml":
|
||||
content = a.InnerXML
|
||||
case a.Mode == "escaped":
|
||||
content = a.CharData
|
||||
case a.Mode == "base64":
|
||||
b, err := base64.StdEncoding.DecodeString(a.CharData)
|
||||
if err == nil {
|
||||
content = string(b)
|
||||
}
|
||||
default:
|
||||
content = a.CharData
|
||||
}
|
||||
|
||||
if a.Type != "text/html" {
|
||||
content = html.EscapeString(content)
|
||||
}
|
||||
|
||||
return strings.TrimSpace(content)
|
||||
}
|
274
internal/reader/atom/atom_03_test.go
Normal file
274
internal/reader/atom/atom_03_test.go
Normal file
|
@ -0,0 +1,274 @@
|
|||
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
package atom // import "miniflux.app/v2/internal/reader/atom"
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestParseAtom03(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<feed version="0.3" xmlns="http://purl.org/atom/ns#">
|
||||
<title>dive into mark</title>
|
||||
<link rel="alternate" type="text/html" href="http://diveintomark.org/"/>
|
||||
<modified>2003-12-13T18:30:02Z</modified>
|
||||
<author><name>Mark Pilgrim</name></author>
|
||||
<entry>
|
||||
<title>Atom 0.3 snapshot</title>
|
||||
<link rel="alternate" type="text/html" href="http://diveintomark.org/2003/12/13/atom03"/>
|
||||
<id>tag:diveintomark.org,2003:3.2397</id>
|
||||
<issued>2003-12-13T08:29:29-04:00</issued>
|
||||
<modified>2003-12-13T18:30:02Z</modified>
|
||||
<summary type="text/plain">It's a test</summary>
|
||||
<content type="text/html" mode="escaped"><![CDATA[<p>HTML content</p>]]></content>
|
||||
</entry>
|
||||
</feed>`
|
||||
|
||||
feed, err := Parse("http://diveintomark.org/", bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if feed.Title != "dive into mark" {
|
||||
t.Errorf("Incorrect title, got: %s", feed.Title)
|
||||
}
|
||||
|
||||
if feed.FeedURL != "http://diveintomark.org/" {
|
||||
t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
|
||||
}
|
||||
|
||||
if feed.SiteURL != "http://diveintomark.org/" {
|
||||
t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
|
||||
}
|
||||
|
||||
if len(feed.Entries) != 1 {
|
||||
t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
|
||||
}
|
||||
|
||||
tz := time.FixedZone("Test Case Time", -int((4 * time.Hour).Seconds()))
|
||||
if !feed.Entries[0].Date.Equal(time.Date(2003, time.December, 13, 8, 29, 29, 0, tz)) {
|
||||
t.Errorf("Incorrect entry date, got: %v", feed.Entries[0].Date)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Hash != "b70d30334b808f32e66eb19fabb263525cecd18f205720b583e84f7f295cf728" {
|
||||
t.Errorf("Incorrect entry hash, got: %s", feed.Entries[0].Hash)
|
||||
}
|
||||
|
||||
if feed.Entries[0].URL != "http://diveintomark.org/2003/12/13/atom03" {
|
||||
t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Title != "Atom 0.3 snapshot" {
|
||||
t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Content != "<p>HTML content</p>" {
|
||||
t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Author != "Mark Pilgrim" {
|
||||
t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseAtom03WithoutFeedTitle(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<feed version="0.3" xmlns="http://purl.org/atom/ns#">
|
||||
<link rel="alternate" type="text/html" href="http://diveintomark.org/"/>
|
||||
<modified>2003-12-13T18:30:02Z</modified>
|
||||
<author><name>Mark Pilgrim</name></author>
|
||||
<entry>
|
||||
<title>Atom 0.3 snapshot</title>
|
||||
<link rel="alternate" type="text/html" href="http://diveintomark.org/2003/12/13/atom03"/>
|
||||
<id>tag:diveintomark.org,2003:3.2397</id>
|
||||
</entry>
|
||||
</feed>`
|
||||
|
||||
feed, err := Parse("http://diveintomark.org/", bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if feed.Title != "http://diveintomark.org/" {
|
||||
t.Errorf("Incorrect title, got: %s", feed.Title)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseAtom03WithoutEntryTitleButWithLink(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<feed version="0.3" xmlns="http://purl.org/atom/ns#">
|
||||
<title>dive into mark</title>
|
||||
<link rel="alternate" type="text/html" href="http://diveintomark.org/"/>
|
||||
<modified>2003-12-13T18:30:02Z</modified>
|
||||
<author><name>Mark Pilgrim</name></author>
|
||||
<entry>
|
||||
<link rel="alternate" type="text/html" href="http://diveintomark.org/2003/12/13/atom03"/>
|
||||
<id>tag:diveintomark.org,2003:3.2397</id>
|
||||
</entry>
|
||||
</feed>`
|
||||
|
||||
feed, err := Parse("http://diveintomark.org/", bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if len(feed.Entries) != 1 {
|
||||
t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
|
||||
}
|
||||
|
||||
if feed.Entries[0].Title != "http://diveintomark.org/2003/12/13/atom03" {
|
||||
t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseAtom03WithoutEntryTitleButWithSummary(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<feed version="0.3" xmlns="http://purl.org/atom/ns#">
|
||||
<title>dive into mark</title>
|
||||
<link rel="alternate" type="text/html" href="http://diveintomark.org/"/>
|
||||
<modified>2003-12-13T18:30:02Z</modified>
|
||||
<author><name>Mark Pilgrim</name></author>
|
||||
<entry>
|
||||
<link rel="alternate" type="text/html" href="http://diveintomark.org/2003/12/13/atom03"/>
|
||||
<id>tag:diveintomark.org,2003:3.2397</id>
|
||||
<summary type="text/plain">It's a test</summary>
|
||||
</entry>
|
||||
</feed>`
|
||||
|
||||
feed, err := Parse("http://diveintomark.org/", bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if len(feed.Entries) != 1 {
|
||||
t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
|
||||
}
|
||||
|
||||
if feed.Entries[0].Title != "It's a test" {
|
||||
t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseAtom03WithoutEntryTitleButWithXMLContent(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<feed version="0.3" xmlns="http://purl.org/atom/ns#">
|
||||
<title>dive into mark</title>
|
||||
<link rel="alternate" type="text/html" href="http://diveintomark.org/"/>
|
||||
<modified>2003-12-13T18:30:02Z</modified>
|
||||
<author><name>Mark Pilgrim</name></author>
|
||||
<entry>
|
||||
<link rel="alternate" type="text/html" href="http://diveintomark.org/2003/12/13/atom03"/>
|
||||
<id>tag:diveintomark.org,2003:3.2397</id>
|
||||
<content mode="xml" type="text/html"><p>Some text.</p></content>
|
||||
</entry>
|
||||
</feed>`
|
||||
|
||||
feed, err := Parse("http://diveintomark.org/", bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if len(feed.Entries) != 1 {
|
||||
t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
|
||||
}
|
||||
|
||||
if feed.Entries[0].Title != "Some text." {
|
||||
t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseAtom03WithSummaryOnly(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<feed version="0.3" xmlns="http://purl.org/atom/ns#">
|
||||
<title>dive into mark</title>
|
||||
<link rel="alternate" type="text/html" href="http://diveintomark.org/"/>
|
||||
<modified>2003-12-13T18:30:02Z</modified>
|
||||
<author><name>Mark Pilgrim</name></author>
|
||||
<entry>
|
||||
<title>Atom 0.3 snapshot</title>
|
||||
<link rel="alternate" type="text/html" href="http://diveintomark.org/2003/12/13/atom03"/>
|
||||
<id>tag:diveintomark.org,2003:3.2397</id>
|
||||
<issued>2003-12-13T08:29:29-04:00</issued>
|
||||
<modified>2003-12-13T18:30:02Z</modified>
|
||||
<summary type="text/plain">It's a test</summary>
|
||||
</entry>
|
||||
</feed>`
|
||||
|
||||
feed, err := Parse("http://diveintomark.org/", bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if len(feed.Entries) != 1 {
|
||||
t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
|
||||
}
|
||||
|
||||
if feed.Entries[0].Content != "It's a test" {
|
||||
t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseAtom03WithXMLContent(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<feed version="0.3" xmlns="http://purl.org/atom/ns#">
|
||||
<title>dive into mark</title>
|
||||
<link rel="alternate" type="text/html" href="http://diveintomark.org/"/>
|
||||
<modified>2003-12-13T18:30:02Z</modified>
|
||||
<author><name>Mark Pilgrim</name></author>
|
||||
<entry>
|
||||
<title>Atom 0.3 snapshot</title>
|
||||
<link rel="alternate" type="text/html" href="http://diveintomark.org/2003/12/13/atom03"/>
|
||||
<id>tag:diveintomark.org,2003:3.2397</id>
|
||||
<issued>2003-12-13T08:29:29-04:00</issued>
|
||||
<modified>2003-12-13T18:30:02Z</modified>
|
||||
<content mode="xml" type="text/html"><p>Some text.</p></content>
|
||||
</entry>
|
||||
</feed>`
|
||||
|
||||
feed, err := Parse("http://diveintomark.org/", bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if len(feed.Entries) != 1 {
|
||||
t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
|
||||
}
|
||||
|
||||
if feed.Entries[0].Content != "<p>Some text.</p>" {
|
||||
t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseAtom03WithBase64Content(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<feed version="0.3" xmlns="http://purl.org/atom/ns#">
|
||||
<title>dive into mark</title>
|
||||
<link rel="alternate" type="text/html" href="http://diveintomark.org/"/>
|
||||
<modified>2003-12-13T18:30:02Z</modified>
|
||||
<author><name>Mark Pilgrim</name></author>
|
||||
<entry>
|
||||
<title>Atom 0.3 snapshot</title>
|
||||
<link rel="alternate" type="text/html" href="http://diveintomark.org/2003/12/13/atom03"/>
|
||||
<id>tag:diveintomark.org,2003:3.2397</id>
|
||||
<issued>2003-12-13T08:29:29-04:00</issued>
|
||||
<modified>2003-12-13T18:30:02Z</modified>
|
||||
<content mode="base64" type="text/html">PHA+U29tZSB0ZXh0LjwvcD4=</content>
|
||||
</entry>
|
||||
</feed>`
|
||||
|
||||
feed, err := Parse("http://diveintomark.org/", bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if len(feed.Entries) != 1 {
|
||||
t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
|
||||
}
|
||||
|
||||
if feed.Entries[0].Content != "<p>Some text.</p>" {
|
||||
t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content)
|
||||
}
|
||||
}
|
287
internal/reader/atom/atom_10.go
Normal file
287
internal/reader/atom/atom_10.go
Normal file
|
@ -0,0 +1,287 @@
|
|||
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
package atom // import "miniflux.app/v2/internal/reader/atom"
|
||||
|
||||
import (
|
||||
"encoding/xml"
|
||||
"html"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"miniflux.app/v2/internal/crypto"
|
||||
"miniflux.app/v2/internal/logger"
|
||||
"miniflux.app/v2/internal/model"
|
||||
"miniflux.app/v2/internal/reader/date"
|
||||
"miniflux.app/v2/internal/reader/media"
|
||||
"miniflux.app/v2/internal/reader/sanitizer"
|
||||
"miniflux.app/v2/internal/url"
|
||||
)
|
||||
|
||||
// Specs:
|
||||
// https://tools.ietf.org/html/rfc4287
|
||||
// https://validator.w3.org/feed/docs/atom.html
|
||||
type atom10Feed struct {
|
||||
XMLName xml.Name `xml:"http://www.w3.org/2005/Atom feed"`
|
||||
ID string `xml:"id"`
|
||||
Title atom10Text `xml:"title"`
|
||||
Authors atomAuthors `xml:"author"`
|
||||
Icon string `xml:"icon"`
|
||||
Links atomLinks `xml:"link"`
|
||||
Entries []atom10Entry `xml:"entry"`
|
||||
}
|
||||
|
||||
func (a *atom10Feed) Transform(baseURL string) *model.Feed {
|
||||
var err error
|
||||
|
||||
feed := new(model.Feed)
|
||||
|
||||
feedURL := a.Links.firstLinkWithRelation("self")
|
||||
feed.FeedURL, err = url.AbsoluteURL(baseURL, feedURL)
|
||||
if err != nil {
|
||||
feed.FeedURL = feedURL
|
||||
}
|
||||
|
||||
siteURL := a.Links.originalLink()
|
||||
feed.SiteURL, err = url.AbsoluteURL(baseURL, siteURL)
|
||||
if err != nil {
|
||||
feed.SiteURL = siteURL
|
||||
}
|
||||
|
||||
feed.Title = html.UnescapeString(a.Title.String())
|
||||
if feed.Title == "" {
|
||||
feed.Title = feed.SiteURL
|
||||
}
|
||||
|
||||
feed.IconURL = strings.TrimSpace(a.Icon)
|
||||
|
||||
for _, entry := range a.Entries {
|
||||
item := entry.Transform()
|
||||
entryURL, err := url.AbsoluteURL(feed.SiteURL, item.URL)
|
||||
if err == nil {
|
||||
item.URL = entryURL
|
||||
}
|
||||
|
||||
if item.Author == "" {
|
||||
item.Author = a.Authors.String()
|
||||
}
|
||||
|
||||
if item.Title == "" {
|
||||
item.Title = sanitizer.TruncateHTML(item.Content, 100)
|
||||
}
|
||||
|
||||
if item.Title == "" {
|
||||
item.Title = item.URL
|
||||
}
|
||||
|
||||
feed.Entries = append(feed.Entries, item)
|
||||
}
|
||||
|
||||
return feed
|
||||
}
|
||||
|
||||
type atom10Entry struct {
|
||||
ID string `xml:"id"`
|
||||
Title atom10Text `xml:"title"`
|
||||
Published string `xml:"published"`
|
||||
Updated string `xml:"updated"`
|
||||
Links atomLinks `xml:"link"`
|
||||
Summary atom10Text `xml:"summary"`
|
||||
Content atom10Text `xml:"http://www.w3.org/2005/Atom content"`
|
||||
Authors atomAuthors `xml:"author"`
|
||||
Categories []atom10Category `xml:"category"`
|
||||
media.Element
|
||||
}
|
||||
|
||||
func (a *atom10Entry) Transform() *model.Entry {
|
||||
entry := new(model.Entry)
|
||||
entry.URL = a.Links.originalLink()
|
||||
entry.Date = a.entryDate()
|
||||
entry.Author = a.Authors.String()
|
||||
entry.Hash = a.entryHash()
|
||||
entry.Content = a.entryContent()
|
||||
entry.Title = a.entryTitle()
|
||||
entry.Enclosures = a.entryEnclosures()
|
||||
entry.CommentsURL = a.entryCommentsURL()
|
||||
entry.Tags = a.entryCategories()
|
||||
return entry
|
||||
}
|
||||
|
||||
func (a *atom10Entry) entryTitle() string {
|
||||
return html.UnescapeString(a.Title.String())
|
||||
}
|
||||
|
||||
func (a *atom10Entry) entryContent() string {
|
||||
content := a.Content.String()
|
||||
if content != "" {
|
||||
return content
|
||||
}
|
||||
|
||||
summary := a.Summary.String()
|
||||
if summary != "" {
|
||||
return summary
|
||||
}
|
||||
|
||||
mediaDescription := a.FirstMediaDescription()
|
||||
if mediaDescription != "" {
|
||||
return mediaDescription
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
// Note: The published date represents the original creation date for YouTube feeds.
|
||||
// Example:
|
||||
// <published>2019-01-26T08:02:28+00:00</published>
|
||||
// <updated>2019-01-29T07:27:27+00:00</updated>
|
||||
func (a *atom10Entry) entryDate() time.Time {
|
||||
dateText := a.Published
|
||||
if dateText == "" {
|
||||
dateText = a.Updated
|
||||
}
|
||||
|
||||
if dateText != "" {
|
||||
result, err := date.Parse(dateText)
|
||||
if err != nil {
|
||||
logger.Error("atom: %v (entry ID = %s)", err, a.ID)
|
||||
return time.Now()
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
return time.Now()
|
||||
}
|
||||
|
||||
func (a *atom10Entry) entryHash() string {
|
||||
for _, value := range []string{a.ID, a.Links.originalLink()} {
|
||||
if value != "" {
|
||||
return crypto.Hash(value)
|
||||
}
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
func (a *atom10Entry) entryEnclosures() model.EnclosureList {
|
||||
enclosures := make(model.EnclosureList, 0)
|
||||
duplicates := make(map[string]bool)
|
||||
|
||||
for _, mediaThumbnail := range a.AllMediaThumbnails() {
|
||||
if _, found := duplicates[mediaThumbnail.URL]; !found {
|
||||
duplicates[mediaThumbnail.URL] = true
|
||||
enclosures = append(enclosures, &model.Enclosure{
|
||||
URL: mediaThumbnail.URL,
|
||||
MimeType: mediaThumbnail.MimeType(),
|
||||
Size: mediaThumbnail.Size(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
for _, link := range a.Links {
|
||||
if strings.ToLower(link.Rel) == "enclosure" {
|
||||
if link.URL == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
if _, found := duplicates[link.URL]; !found {
|
||||
duplicates[link.URL] = true
|
||||
length, _ := strconv.ParseInt(link.Length, 10, 0)
|
||||
enclosures = append(enclosures, &model.Enclosure{URL: link.URL, MimeType: link.Type, Size: length})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for _, mediaContent := range a.AllMediaContents() {
|
||||
if _, found := duplicates[mediaContent.URL]; !found {
|
||||
duplicates[mediaContent.URL] = true
|
||||
enclosures = append(enclosures, &model.Enclosure{
|
||||
URL: mediaContent.URL,
|
||||
MimeType: mediaContent.MimeType(),
|
||||
Size: mediaContent.Size(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
for _, mediaPeerLink := range a.AllMediaPeerLinks() {
|
||||
if _, found := duplicates[mediaPeerLink.URL]; !found {
|
||||
duplicates[mediaPeerLink.URL] = true
|
||||
enclosures = append(enclosures, &model.Enclosure{
|
||||
URL: mediaPeerLink.URL,
|
||||
MimeType: mediaPeerLink.MimeType(),
|
||||
Size: mediaPeerLink.Size(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
return enclosures
|
||||
}
|
||||
|
||||
func (r *atom10Entry) entryCategories() []string {
|
||||
var categoryList []string
|
||||
|
||||
for _, atomCategory := range r.Categories {
|
||||
if strings.TrimSpace(atomCategory.Label) != "" {
|
||||
categoryList = append(categoryList, strings.TrimSpace(atomCategory.Label))
|
||||
} else {
|
||||
categoryList = append(categoryList, strings.TrimSpace(atomCategory.Term))
|
||||
}
|
||||
}
|
||||
|
||||
return categoryList
|
||||
}
|
||||
|
||||
// See https://tools.ietf.org/html/rfc4685#section-4
|
||||
// If the type attribute of the atom:link is omitted, its value is assumed to be "application/atom+xml".
|
||||
// We accept only HTML or XHTML documents for now since the intention is to have the same behavior as RSS.
|
||||
func (a *atom10Entry) entryCommentsURL() string {
|
||||
commentsURL := a.Links.firstLinkWithRelationAndType("replies", "text/html", "application/xhtml+xml")
|
||||
if url.IsAbsoluteURL(commentsURL) {
|
||||
return commentsURL
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
type atom10Text struct {
|
||||
Type string `xml:"type,attr"`
|
||||
CharData string `xml:",chardata"`
|
||||
InnerXML string `xml:",innerxml"`
|
||||
XHTMLRootElement atomXHTMLRootElement `xml:"http://www.w3.org/1999/xhtml div"`
|
||||
}
|
||||
|
||||
type atom10Category struct {
|
||||
Term string `xml:"term,attr"`
|
||||
Label string `xml:"label,attr"`
|
||||
}
|
||||
|
||||
// Text: https://datatracker.ietf.org/doc/html/rfc4287#section-3.1.1.1
|
||||
// HTML: https://datatracker.ietf.org/doc/html/rfc4287#section-3.1.1.2
|
||||
// XHTML: https://datatracker.ietf.org/doc/html/rfc4287#section-3.1.1.3
|
||||
func (a *atom10Text) String() string {
|
||||
var content string
|
||||
switch {
|
||||
case a.Type == "", a.Type == "text", a.Type == "text/plain":
|
||||
if strings.HasPrefix(strings.TrimSpace(a.InnerXML), `<![CDATA[`) {
|
||||
content = html.EscapeString(a.CharData)
|
||||
} else {
|
||||
content = a.InnerXML
|
||||
}
|
||||
case a.Type == "xhtml":
|
||||
var root = a.XHTMLRootElement
|
||||
if root.XMLName.Local == "div" {
|
||||
content = root.InnerXML
|
||||
} else {
|
||||
content = a.InnerXML
|
||||
}
|
||||
default:
|
||||
content = a.CharData
|
||||
}
|
||||
|
||||
return strings.TrimSpace(content)
|
||||
}
|
||||
|
||||
type atomXHTMLRootElement struct {
|
||||
XMLName xml.Name `xml:"div"`
|
||||
InnerXML string `xml:",innerxml"`
|
||||
}
|
1672
internal/reader/atom/atom_10_test.go
Normal file
1672
internal/reader/atom/atom_10_test.go
Normal file
File diff suppressed because it is too large
Load diff
83
internal/reader/atom/atom_common.go
Normal file
83
internal/reader/atom/atom_common.go
Normal file
|
@ -0,0 +1,83 @@
|
|||
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
package atom // import "miniflux.app/v2/internal/reader/atom"
|
||||
|
||||
import "strings"
|
||||
|
||||
type atomPerson struct {
|
||||
Name string `xml:"name"`
|
||||
Email string `xml:"email"`
|
||||
}
|
||||
|
||||
func (a *atomPerson) String() string {
|
||||
name := ""
|
||||
|
||||
switch {
|
||||
case a.Name != "":
|
||||
name = a.Name
|
||||
case a.Email != "":
|
||||
name = a.Email
|
||||
}
|
||||
|
||||
return strings.TrimSpace(name)
|
||||
}
|
||||
|
||||
type atomAuthors []*atomPerson
|
||||
|
||||
func (a atomAuthors) String() string {
|
||||
var authors []string
|
||||
|
||||
for _, person := range a {
|
||||
authors = append(authors, person.String())
|
||||
}
|
||||
|
||||
return strings.Join(authors, ", ")
|
||||
}
|
||||
|
||||
type atomLink struct {
|
||||
URL string `xml:"href,attr"`
|
||||
Type string `xml:"type,attr"`
|
||||
Rel string `xml:"rel,attr"`
|
||||
Length string `xml:"length,attr"`
|
||||
}
|
||||
|
||||
type atomLinks []*atomLink
|
||||
|
||||
func (a atomLinks) originalLink() string {
|
||||
for _, link := range a {
|
||||
if strings.ToLower(link.Rel) == "alternate" {
|
||||
return strings.TrimSpace(link.URL)
|
||||
}
|
||||
|
||||
if link.Rel == "" && (link.Type == "" || link.Type == "text/html") {
|
||||
return strings.TrimSpace(link.URL)
|
||||
}
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
func (a atomLinks) firstLinkWithRelation(relation string) string {
|
||||
for _, link := range a {
|
||||
if strings.ToLower(link.Rel) == relation {
|
||||
return strings.TrimSpace(link.URL)
|
||||
}
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
func (a atomLinks) firstLinkWithRelationAndType(relation string, contentTypes ...string) string {
|
||||
for _, link := range a {
|
||||
if strings.ToLower(link.Rel) == relation {
|
||||
for _, contentType := range contentTypes {
|
||||
if strings.ToLower(link.Type) == contentType {
|
||||
return strings.TrimSpace(link.URL)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
61
internal/reader/atom/parser.go
Normal file
61
internal/reader/atom/parser.go
Normal file
|
@ -0,0 +1,61 @@
|
|||
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
package atom // import "miniflux.app/v2/internal/reader/atom"
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/xml"
|
||||
"io"
|
||||
|
||||
"miniflux.app/v2/internal/errors"
|
||||
"miniflux.app/v2/internal/model"
|
||||
xml_decoder "miniflux.app/v2/internal/reader/xml"
|
||||
)
|
||||
|
||||
type atomFeed interface {
|
||||
Transform(baseURL string) *model.Feed
|
||||
}
|
||||
|
||||
// Parse returns a normalized feed struct from a Atom feed.
|
||||
func Parse(baseURL string, r io.Reader) (*model.Feed, *errors.LocalizedError) {
|
||||
var buf bytes.Buffer
|
||||
tee := io.TeeReader(r, &buf)
|
||||
|
||||
var rawFeed atomFeed
|
||||
if getAtomFeedVersion(tee) == "0.3" {
|
||||
rawFeed = new(atom03Feed)
|
||||
} else {
|
||||
rawFeed = new(atom10Feed)
|
||||
}
|
||||
|
||||
decoder := xml_decoder.NewDecoder(&buf)
|
||||
err := decoder.Decode(rawFeed)
|
||||
if err != nil {
|
||||
return nil, errors.NewLocalizedError("Unable to parse Atom feed: %q", err)
|
||||
}
|
||||
|
||||
return rawFeed.Transform(baseURL), nil
|
||||
}
|
||||
|
||||
func getAtomFeedVersion(data io.Reader) string {
|
||||
decoder := xml_decoder.NewDecoder(data)
|
||||
for {
|
||||
token, _ := decoder.Token()
|
||||
if token == nil {
|
||||
break
|
||||
}
|
||||
|
||||
if element, ok := token.(xml.StartElement); ok {
|
||||
if element.Name.Local == "feed" {
|
||||
for _, attr := range element.Attr {
|
||||
if attr.Name.Local == "version" && attr.Value == "0.3" {
|
||||
return "0.3"
|
||||
}
|
||||
}
|
||||
return "1.0"
|
||||
}
|
||||
}
|
||||
}
|
||||
return "1.0"
|
||||
}
|
61
internal/reader/atom/parser_test.go
Normal file
61
internal/reader/atom/parser_test.go
Normal file
|
@ -0,0 +1,61 @@
|
|||
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
package atom // import "miniflux.app/v2/internal/reader/atom"
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestDetectAtom10(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
|
||||
<title>Example Feed</title>
|
||||
<link href="http://example.org/"/>
|
||||
<updated>2003-12-13T18:30:02Z</updated>
|
||||
<author>
|
||||
<name>John Doe</name>
|
||||
</author>
|
||||
<id>urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6</id>
|
||||
|
||||
<entry>
|
||||
<title>Atom-Powered Robots Run Amok</title>
|
||||
<link href="http://example.org/2003/12/13/atom03"/>
|
||||
<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
|
||||
<updated>2003-12-13T18:30:02Z</updated>
|
||||
<summary>Some text.</summary>
|
||||
</entry>
|
||||
|
||||
</feed>`
|
||||
|
||||
version := getAtomFeedVersion(bytes.NewBufferString(data))
|
||||
if version != "1.0" {
|
||||
t.Errorf(`Invalid Atom version detected: %s`, version)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDetectAtom03(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<feed version="0.3" xmlns="http://purl.org/atom/ns#">
|
||||
<title>dive into mark</title>
|
||||
<link rel="alternate" type="text/html" href="http://diveintomark.org/"/>
|
||||
<modified>2003-12-13T18:30:02Z</modified>
|
||||
<author><name>Mark Pilgrim</name></author>
|
||||
<entry>
|
||||
<title>Atom 0.3 snapshot</title>
|
||||
<link rel="alternate" type="text/html" href="http://diveintomark.org/2003/12/13/atom03"/>
|
||||
<id>tag:diveintomark.org,2003:3.2397</id>
|
||||
<issued>2003-12-13T08:29:29-04:00</issued>
|
||||
<modified>2003-12-13T18:30:02Z</modified>
|
||||
<summary type="text/plain">This is a test</summary>
|
||||
<content type="text/html" mode="escaped"><![CDATA[<p>HTML content</p>]]></content>
|
||||
</entry>
|
||||
</feed>`
|
||||
|
||||
version := getAtomFeedVersion(bytes.NewBufferString(data))
|
||||
if version != "0.3" {
|
||||
t.Errorf(`Invalid Atom version detected: %s`, version)
|
||||
}
|
||||
}
|
54
internal/reader/browser/browser.go
Normal file
54
internal/reader/browser/browser.go
Normal file
|
@ -0,0 +1,54 @@
|
|||
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
package browser // import "miniflux.app/v2/internal/reader/browser"
|
||||
|
||||
import (
|
||||
"miniflux.app/v2/internal/errors"
|
||||
"miniflux.app/v2/internal/http/client"
|
||||
)
|
||||
|
||||
var (
|
||||
errRequestFailed = "Unable to open this link: %v"
|
||||
errServerFailure = "Unable to fetch this resource (Status Code = %d)"
|
||||
errEncoding = "Unable to normalize encoding: %q"
|
||||
errEmptyFeed = "This feed is empty"
|
||||
errResourceNotFound = "Resource not found (404), this feed doesn't exist anymore, check the feed URL"
|
||||
errNotAuthorized = "You are not authorized to access this resource (invalid username/password)"
|
||||
)
|
||||
|
||||
// Exec executes a HTTP request and handles errors.
|
||||
func Exec(request *client.Client) (*client.Response, *errors.LocalizedError) {
|
||||
response, err := request.Get()
|
||||
if err != nil {
|
||||
if e, ok := err.(*errors.LocalizedError); ok {
|
||||
return nil, e
|
||||
}
|
||||
return nil, errors.NewLocalizedError(errRequestFailed, err)
|
||||
}
|
||||
|
||||
if response.IsNotFound() {
|
||||
return nil, errors.NewLocalizedError(errResourceNotFound)
|
||||
}
|
||||
|
||||
if response.IsNotAuthorized() {
|
||||
return nil, errors.NewLocalizedError(errNotAuthorized)
|
||||
}
|
||||
|
||||
if response.HasServerFailure() {
|
||||
return nil, errors.NewLocalizedError(errServerFailure, response.StatusCode)
|
||||
}
|
||||
|
||||
if response.StatusCode != 304 {
|
||||
// Content-Length = -1 when no Content-Length header is sent.
|
||||
if response.ContentLength == 0 {
|
||||
return nil, errors.NewLocalizedError(errEmptyFeed)
|
||||
}
|
||||
|
||||
if err := response.EnsureUnicodeBody(); err != nil {
|
||||
return nil, errors.NewLocalizedError(errEncoding, err)
|
||||
}
|
||||
}
|
||||
|
||||
return response, nil
|
||||
}
|
372
internal/reader/date/parser.go
Normal file
372
internal/reader/date/parser.go
Normal file
|
@ -0,0 +1,372 @@
|
|||
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
package date // import "miniflux.app/v2/internal/reader/date"
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"math"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
// DateFormats taken from github.com/mjibson/goread
|
||||
var dateFormats = []string{
|
||||
time.RFC822, // RSS
|
||||
time.RFC822Z, // RSS
|
||||
time.RFC3339, // Atom
|
||||
time.UnixDate,
|
||||
time.RubyDate,
|
||||
time.RFC850,
|
||||
time.RFC1123Z,
|
||||
time.RFC1123,
|
||||
time.ANSIC,
|
||||
"Mon, 02 Jan 2006 15:04:05 MST -07:00",
|
||||
"Mon, January 2, 2006, 3:04 PM MST",
|
||||
"Mon, January 2 2006 15:04:05 -0700",
|
||||
"Mon, January 02, 2006, 15:04:05 MST",
|
||||
"Mon, January 02, 2006 15:04:05 MST",
|
||||
"Mon, Jan 2, 2006 15:04 MST",
|
||||
"Mon, Jan 2 2006 15:04 MST",
|
||||
"Mon, Jan 2 2006 15:04:05 MST",
|
||||
"Mon, Jan 2, 2006 15:04:05 MST",
|
||||
"Mon, Jan 2 2006 15:04:05 -700",
|
||||
"Mon, Jan 2 2006 15:04:05 -0700",
|
||||
"Mon Jan 2 15:04 2006",
|
||||
"Mon Jan 2 15:04:05 2006 MST",
|
||||
"Mon Jan 02, 2006 3:04 pm",
|
||||
"Mon, Jan 02,2006 15:04:05 MST",
|
||||
"Mon Jan 02 2006 15:04:05 -0700",
|
||||
"Mon, 02/01/2006",
|
||||
"Monday, 2. January 2006 - 15:04",
|
||||
"Monday 02 January 2006",
|
||||
"Monday, January 2, 2006 15:04:05 MST",
|
||||
"Monday, January 2, 2006 03:04 PM",
|
||||
"Monday, January 2, 2006",
|
||||
"Monday, January 02, 2006",
|
||||
"Monday, 2 January 2006 15:04:05 MST",
|
||||
"Monday, 2 January 2006 15:04:05 -0700",
|
||||
"Monday, 2 Jan 2006 15:04:05 MST",
|
||||
"Monday, 2 Jan 2006 15:04:05 -0700",
|
||||
"Monday, 02 January 2006 15:04:05 MST",
|
||||
"Monday, 02 January 2006 15:04:05 -0700",
|
||||
"Monday, 02 January 2006 15:04:05",
|
||||
"Monday, January 02, 2006 - 3:04pm",
|
||||
"Monday, January 2, 2006 - 3:04pm",
|
||||
"Mon, 01/02/2006 - 15:04",
|
||||
"Mon, 2 January 2006 15:04 MST",
|
||||
"Mon, 2 January 2006, 15:04 -0700",
|
||||
"Mon, 2 January 2006, 15:04:05 MST",
|
||||
"Mon, 2 January 2006 15:04:05 MST",
|
||||
"Mon, 2 January 2006 15:04:05 -0700",
|
||||
"Mon, 2 January 2006",
|
||||
"Mon, 2 Jan 2006 3:04:05 PM -0700",
|
||||
"Mon, 2 Jan 2006 15:4:5 MST",
|
||||
"Mon, 2 Jan 2006 15:4:5 -0700 GMT",
|
||||
"Mon, 2, Jan 2006 15:4",
|
||||
"Mon, 2 Jan 2006 15:04 MST",
|
||||
"Mon, 2 Jan 2006, 15:04 -0700",
|
||||
"Mon, 2 Jan 2006 15:04 -0700",
|
||||
"Mon, 2 Jan 2006 15:04:05 UT",
|
||||
"Mon, 2 Jan 2006 15:04:05MST",
|
||||
"Mon, 2 Jan 2006 15:04:05 MST",
|
||||
"Mon 2 Jan 2006 15:04:05 MST",
|
||||
"mon,2 Jan 2006 15:04:05 MST",
|
||||
"Mon, 2 Jan 2006 15:04:05 -0700 MST",
|
||||
"Mon, 2 Jan 2006 15:04:05-0700",
|
||||
"Mon, 2 Jan 2006 15:04:05 -0700",
|
||||
"Mon, 2 Jan 2006 15:04:05",
|
||||
"Mon, 2 Jan 2006 15:04",
|
||||
"Mon, 02 Jan 2006, 15:04",
|
||||
"Mon, 2 Jan 2006, 15:04",
|
||||
"Mon,2 Jan 2006",
|
||||
"Mon, 2 Jan 2006",
|
||||
"Mon, 2 Jan 15:04:05 MST",
|
||||
"Mon, 2 Jan 06 15:04:05 MST",
|
||||
"Mon, 2 Jan 06 15:04:05 -0700",
|
||||
"Mon, 2006-01-02 15:04",
|
||||
"Mon,02 January 2006 14:04:05 MST",
|
||||
"Mon, 02 January 2006",
|
||||
"Mon, 02 Jan 2006 3:04:05 PM MST",
|
||||
"Mon, 02 Jan 2006 15 -0700",
|
||||
"Mon,02 Jan 2006 15:04 MST",
|
||||
"Mon, 02 Jan 2006 15:04 MST",
|
||||
"Mon, 02 Jan 2006 15:04 -0700",
|
||||
"Mon, 02 Jan 2006 15:04:05 Z",
|
||||
"Mon, 02 Jan 2006 15:04:05 UT",
|
||||
"Mon, 02 Jan 2006 15:04:05 MST-07:00",
|
||||
"Mon, 02 Jan 2006 15:04:05 MST -0700",
|
||||
"Mon, 02 Jan 2006, 15:04:05 MST",
|
||||
"Mon, 02 Jan 2006 15:04:05MST",
|
||||
"Mon, 02 Jan 2006 15:04:05 MST",
|
||||
"Mon , 02 Jan 2006 15:04:05 MST",
|
||||
"Mon, 02 Jan 2006 15:04:05 GMT-0700",
|
||||
"Mon,02 Jan 2006 15:04:05 -0700",
|
||||
"Mon, 02 Jan 2006 15:04:05 -0700",
|
||||
"Mon, 02 Jan 2006 15:04:05 -07:00",
|
||||
"Mon, 02 Jan 2006 15:04:05 --0700",
|
||||
"Mon 02 Jan 2006 15:04:05 -0700",
|
||||
"Mon 02 Jan 2006, 15:04:05 MST",
|
||||
"Mon, 02 Jan 2006 15:04:05 MST",
|
||||
"Mon, 02 Jan 2006 15:04:05 -07",
|
||||
"Mon, 02 Jan 2006 15:04:05 00",
|
||||
"Mon, 02 Jan 2006 15:04:05",
|
||||
"Mon, 02 Jan 2006",
|
||||
"Mon, 02 Jan 06 15:04:05 MST",
|
||||
"Mon, 02 Jan 2006 3:04 PM MST",
|
||||
"Mon Jan 02 2006 15:04:05 MST",
|
||||
"Mon, 01 02 2006 15:04:05 -0700",
|
||||
"Mon, 2th Jan 2006 15:05:05 MST",
|
||||
"Jan. 2, 2006, 3:04 a.m.",
|
||||
"fri, 02 jan 2006 15:04:05 -0700",
|
||||
"January 02 2006 03:04:05 PM",
|
||||
"January 2, 2006 3:04 PM",
|
||||
"January 2, 2006, 3:04 p.m.",
|
||||
"January 2, 2006 15:04:05 MST",
|
||||
"January 2, 2006 15:04:05",
|
||||
"January 2, 2006 03:04 PM",
|
||||
"January 2, 2006",
|
||||
"January 02, 2006 15:04:05 MST",
|
||||
"January 02, 2006 15:04",
|
||||
"January 02, 2006 03:04 PM",
|
||||
"January 02, 2006",
|
||||
"Jan 2, 2006 3:04:05 PM MST",
|
||||
"Jan 2, 2006 3:04:05 PM",
|
||||
"Jan 2, 2006 15:04:05 MST",
|
||||
"Jan 2, 2006",
|
||||
"Jan 02 2006 03:04:05PM",
|
||||
"Jan 02, 2006",
|
||||
"6/1/2 15:04",
|
||||
"6-1-2 15:04",
|
||||
"2 January 2006 15:04:05 MST",
|
||||
"2 January 2006 15:04:05 -0700",
|
||||
"2 January 2006",
|
||||
"2 Jan 2006 15:04:05 Z",
|
||||
"2 Jan 2006 15:04:05 MST",
|
||||
"2 Jan 2006 15:04:05 -0700",
|
||||
"2 Jan 2006",
|
||||
"2 Jan 2006 15:04 MST",
|
||||
"2.1.2006 15:04:05",
|
||||
"2/1/2006",
|
||||
"2-1-2006",
|
||||
"2006 January 02",
|
||||
"2006-1-2T15:04:05Z",
|
||||
"2006-1-2 15:04:05",
|
||||
"2006-1-2",
|
||||
"2006-01-02T15:04:05-07:00Z",
|
||||
"2006-1-02T15:04:05Z",
|
||||
"2006-01-02T15:04Z",
|
||||
"2006-01-02T15:04-07:00",
|
||||
"2006-01-02T15:04:05Z",
|
||||
"2006-01-02T15:04:05-07:00:00",
|
||||
"2006-01-02T15:04:05:-0700",
|
||||
"2006-01-02T15:04:05-0700",
|
||||
"2006-01-02T15:04:05-07:00",
|
||||
"2006-01-02T15:04:05 -0700",
|
||||
"2006-01-02T15:04:05:00",
|
||||
"2006-01-02T15:04:05",
|
||||
"2006-01-02T15:04",
|
||||
"2006-01-02 at 15:04:05",
|
||||
"2006-01-02 15:04:05Z",
|
||||
"2006-01-02 15:04:05 MST",
|
||||
"2006-01-02 15:04:05-0700",
|
||||
"2006-01-02 15:04:05-07:00",
|
||||
"2006-01-02 15:04:05 -0700",
|
||||
"2006-01-02 15:04",
|
||||
"2006-01-02 00:00:00.0 15:04:05.0 -0700",
|
||||
"2006/01/02",
|
||||
"2006-01-02",
|
||||
"15:04 02.01.2006 -0700",
|
||||
"1/2/2006 3:04 PM MST",
|
||||
"1/2/2006 3:04:05 PM MST",
|
||||
"1/2/2006 3:04:05 PM",
|
||||
"1/2/2006 15:04:05 MST",
|
||||
"1/2/2006",
|
||||
"06/1/2 15:04",
|
||||
"06-1-2 15:04",
|
||||
"02 Monday, Jan 2006 15:04",
|
||||
"02 Jan 2006 15:04 MST",
|
||||
"02 Jan 2006 15:04:05 UT",
|
||||
"02 Jan 2006 15:04:05 MST",
|
||||
"02 Jan 2006 15:04:05 -0700",
|
||||
"02 Jan 2006 15:04:05",
|
||||
"02 Jan 2006",
|
||||
"02/01/2006 15:04 MST",
|
||||
"02-01-2006 15:04:05 MST",
|
||||
"02.01.2006 15:04:05",
|
||||
"02/01/2006 15:04:05",
|
||||
"02.01.2006 15:04",
|
||||
"02/01/2006 - 15:04",
|
||||
"02.01.2006 -0700",
|
||||
"02/01/2006",
|
||||
"02-01-2006",
|
||||
"01/02/2006 3:04 PM",
|
||||
"01/02/2006 15:04:05 MST",
|
||||
"01/02/2006 - 15:04",
|
||||
"01/02/2006",
|
||||
"01-02-2006",
|
||||
"Jan. 2006",
|
||||
"Jan. 2, 2006, 03:04 p.m.",
|
||||
"2006-01-02 15:04:05 -07:00",
|
||||
"2 January, 2006",
|
||||
"2 Jan 2006 MST",
|
||||
"Mon, January 2, 2006 at 03:04 PM MST",
|
||||
"Jan 2, 2006 15:04 MST",
|
||||
"01/02/2006 3:04 pm MST",
|
||||
"Mon, 2th Jan 2006 15:04:05 MST",
|
||||
"Mon, 2rd Jan 2006 15:04:05 MST",
|
||||
"Mon, 2nd Jan 2006 15:04:05 MST",
|
||||
"Mon, 2st Jan 2006 15:04:05 MST",
|
||||
}
|
||||
|
||||
var invalidTimezoneReplacer = strings.NewReplacer(
|
||||
"Europe/Brussels", "CET",
|
||||
"America/Los_Angeles", "PDT",
|
||||
"GMT+0000 (Coordinated Universal Time)", "GMT",
|
||||
"GMT-", "GMT -",
|
||||
)
|
||||
|
||||
var invalidLocalizedDateReplacer = strings.NewReplacer(
|
||||
"Mo,", "Mon,",
|
||||
"Di,", "Tue,",
|
||||
"Mi,", "Wed,",
|
||||
"Do,", "Thu,",
|
||||
"Fr,", "Fri,",
|
||||
"Sa,", "Sat,",
|
||||
"So,", "Sun,",
|
||||
"Mär ", "Mar ",
|
||||
"Mai ", "May ",
|
||||
"Okt ", "Oct ",
|
||||
"Dez ", "Dec ",
|
||||
"lun,", "Mon,",
|
||||
"mar,", "Tue,",
|
||||
"mer,", "Wed,",
|
||||
"jeu,", "Thu,",
|
||||
"ven,", "Fri,",
|
||||
"sam,", "Sat,",
|
||||
"dim,", "Sun,",
|
||||
"lun.", "Mon",
|
||||
"mar.", "Tue",
|
||||
"mer.", "Wed",
|
||||
"jeu.", "Thu",
|
||||
"ven.", "Fri",
|
||||
"sam.", "Sat",
|
||||
"dim.", "Sun",
|
||||
"Lundi,", "Monday,",
|
||||
"Mardi,", "Tuesday,",
|
||||
"Mercredi,", "Wednesday,",
|
||||
"Jeudi,", "Thursday,",
|
||||
"Vendredi,", "Friday,",
|
||||
"Samedi,", "Saturday,",
|
||||
"Dimanche,", "Sunday,",
|
||||
"jan.", "January ",
|
||||
"feb.", "February ",
|
||||
"mars.", "March ",
|
||||
"avril.", "April ",
|
||||
"mai.", "May ",
|
||||
"juin.", "June ",
|
||||
"juil.", "July",
|
||||
"août.", "August",
|
||||
"sept.", "September",
|
||||
"oct.", "October",
|
||||
"nov.", "November",
|
||||
"dec.", "December",
|
||||
"déc.", "December",
|
||||
"janvier ", "January ",
|
||||
"février ", "February ",
|
||||
"mars ", "March ",
|
||||
"avril ", "April ",
|
||||
"mai ", "May ",
|
||||
"juin ", "June ",
|
||||
"juillet ", "July",
|
||||
"août ", "August",
|
||||
"septembre ", "September",
|
||||
"octobre ", "October",
|
||||
"november ", "November",
|
||||
"décembre ", "December",
|
||||
"Janvier", "January",
|
||||
"Février", "February",
|
||||
"Mars", "March",
|
||||
"Avril", "April",
|
||||
"Mai", "May",
|
||||
"Juin", "June",
|
||||
"Juillet", "July",
|
||||
"Août", "August",
|
||||
"Septembre", "September",
|
||||
"Octobre", "October",
|
||||
"Novembre", "November",
|
||||
"Décembre", "December",
|
||||
"avr ", "Apr ",
|
||||
"mai ", "May ",
|
||||
"jui ", "Jun ",
|
||||
"juin ", "June ",
|
||||
"Thurs,", "Thu,",
|
||||
"Thur,", "Thu,",
|
||||
)
|
||||
|
||||
// Parse parses a given date string using a large
|
||||
// list of commonly found feed date formats.
|
||||
func Parse(rawInput string) (t time.Time, err error) {
|
||||
timestamp, err := strconv.ParseInt(rawInput, 10, 64)
|
||||
if err == nil {
|
||||
return time.Unix(timestamp, 0), nil
|
||||
}
|
||||
|
||||
processedInput := invalidLocalizedDateReplacer.Replace(rawInput)
|
||||
processedInput = invalidTimezoneReplacer.Replace(processedInput)
|
||||
processedInput = strings.TrimSpace(processedInput)
|
||||
if processedInput == "" {
|
||||
return t, errors.New(`date parser: empty value`)
|
||||
}
|
||||
|
||||
for _, layout := range dateFormats {
|
||||
switch layout {
|
||||
case time.RFC822, time.RFC850, time.RFC1123:
|
||||
if t, err = parseLocalTimeDates(layout, processedInput); err == nil {
|
||||
t = checkTimezoneRange(t)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
if t, err = time.Parse(layout, processedInput); err == nil {
|
||||
t = checkTimezoneRange(t)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
err = fmt.Errorf(`date parser: failed to parse date "%s"`, rawInput)
|
||||
return
|
||||
}
|
||||
|
||||
// According to Golang documentation:
|
||||
//
|
||||
// RFC822, RFC850, and RFC1123 formats should be applied only to local times.
|
||||
// Applying them to UTC times will use "UTC" as the time zone abbreviation,
|
||||
// while strictly speaking those RFCs require the use of "GMT" in that case.
|
||||
func parseLocalTimeDates(layout, ds string) (t time.Time, err error) {
|
||||
loc := time.UTC
|
||||
|
||||
// Workaround for dates that don't use GMT.
|
||||
if strings.HasSuffix(ds, "PST") || strings.HasSuffix(ds, "PDT") {
|
||||
loc, _ = time.LoadLocation("America/Los_Angeles")
|
||||
}
|
||||
|
||||
if strings.HasSuffix(ds, "EST") || strings.HasSuffix(ds, "EDT") {
|
||||
loc, _ = time.LoadLocation("America/New_York")
|
||||
}
|
||||
|
||||
return time.ParseInLocation(layout, ds, loc)
|
||||
}
|
||||
|
||||
// https://en.wikipedia.org/wiki/List_of_UTC_offsets
|
||||
// Offset range: westernmost (−12:00) to the easternmost (+14:00)
|
||||
// Avoid "pq: time zone displacement out of range" errors
|
||||
func checkTimezoneRange(t time.Time) time.Time {
|
||||
_, offset := t.Zone()
|
||||
if math.Abs(float64(offset)) > 14*60*60 {
|
||||
t = t.UTC()
|
||||
}
|
||||
return t
|
||||
}
|
237
internal/reader/date/parser_test.go
Normal file
237
internal/reader/date/parser_test.go
Normal file
|
@ -0,0 +1,237 @@
|
|||
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
package date // import "miniflux.app/v2/internal/reader/date"
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestParseEmptyDate(t *testing.T) {
|
||||
if _, err := Parse(" "); err == nil {
|
||||
t.Fatalf(`Empty dates should return an error`)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseInvalidDate(t *testing.T) {
|
||||
if _, err := Parse("invalid"); err == nil {
|
||||
t.Fatalf(`Invalid dates should return an error`)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseAtomDate(t *testing.T) {
|
||||
date, err := Parse("2017-12-22T22:09:49+00:00")
|
||||
if err != nil {
|
||||
t.Fatalf(`Atom dates should be parsed correctly`)
|
||||
}
|
||||
|
||||
expectedTS := int64(1513980589)
|
||||
if date.Unix() != expectedTS {
|
||||
t.Errorf(`The Unix timestamp should be %v instead of %v`, expectedTS, date.Unix())
|
||||
}
|
||||
|
||||
_, offset := date.Zone()
|
||||
expectedOffset := 0
|
||||
if offset != expectedOffset {
|
||||
t.Errorf(`The offset should be %v instead of %v`, expectedOffset, offset)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseRSSDateTimezone(t *testing.T) {
|
||||
date, err := Parse("Fri, 31 Mar 2023 20:19:00 America/Los_Angeles")
|
||||
if err != nil {
|
||||
t.Fatalf(`RSS dates should be parsed correctly`)
|
||||
}
|
||||
|
||||
expectedTS := int64(1680319140)
|
||||
if date.Unix() != expectedTS {
|
||||
t.Errorf(`The Unix timestamp should be %v instead of %v`, expectedTS, date.Unix())
|
||||
}
|
||||
|
||||
expectedLocation := "America/Los_Angeles"
|
||||
if date.Location().String() != expectedLocation {
|
||||
t.Errorf(`The location should be %q instead of %q`, expectedLocation, date.Location())
|
||||
}
|
||||
|
||||
name, offset := date.Zone()
|
||||
|
||||
expectedName := "PDT"
|
||||
if name != expectedName {
|
||||
t.Errorf(`The zone name should be %q instead of %q`, expectedName, name)
|
||||
}
|
||||
|
||||
expectedOffset := -25200
|
||||
if offset != expectedOffset {
|
||||
t.Errorf(`The offset should be %v instead of %v`, expectedOffset, offset)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseRSSDateGMT(t *testing.T) {
|
||||
date, err := Parse("Tue, 03 Jun 2003 09:39:21 GMT")
|
||||
if err != nil {
|
||||
t.Fatalf(`RSS dates should be parsed correctly`)
|
||||
}
|
||||
|
||||
expectedTS := int64(1054633161)
|
||||
if date.Unix() != expectedTS {
|
||||
t.Errorf(`The Unix timestamp should be %v instead of %v`, expectedTS, date.Unix())
|
||||
}
|
||||
|
||||
expectedLocation := "GMT"
|
||||
if date.Location().String() != expectedLocation {
|
||||
t.Errorf(`The location should be %q instead of %q`, expectedLocation, date.Location())
|
||||
}
|
||||
|
||||
name, offset := date.Zone()
|
||||
|
||||
expectedName := "GMT"
|
||||
if name != expectedName {
|
||||
t.Errorf(`The zone name should be %q instead of %q`, expectedName, name)
|
||||
}
|
||||
|
||||
expectedOffset := 0
|
||||
if offset != expectedOffset {
|
||||
t.Errorf(`The offset should be %v instead of %v`, expectedOffset, offset)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseRSSDatePST(t *testing.T) {
|
||||
date, err := Parse("Wed, 26 Dec 2018 10:00:54 PST")
|
||||
if err != nil {
|
||||
t.Fatalf(`RSS dates with PST timezone should be parsed correctly: %v`, err)
|
||||
}
|
||||
|
||||
expectedTS := int64(1545847254)
|
||||
if date.Unix() != expectedTS {
|
||||
t.Errorf(`The Unix timestamp should be %v instead of %v`, expectedTS, date.Unix())
|
||||
}
|
||||
|
||||
expectedLocation := "America/Los_Angeles"
|
||||
if date.Location().String() != expectedLocation {
|
||||
t.Errorf(`The location should be %q instead of %q`, expectedLocation, date.Location())
|
||||
}
|
||||
|
||||
name, offset := date.Zone()
|
||||
|
||||
expectedName := "PST"
|
||||
if name != expectedName {
|
||||
t.Errorf(`The zone name should be %q instead of %q`, expectedName, name)
|
||||
}
|
||||
|
||||
expectedOffset := -28800
|
||||
if offset != expectedOffset {
|
||||
t.Errorf(`The offset should be %v instead of %v`, expectedOffset, offset)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseRSSDateEST(t *testing.T) {
|
||||
date, err := Parse("Wed, 10 Feb 2021 22:46:00 EST")
|
||||
if err != nil {
|
||||
t.Fatalf(`RSS dates with EST timezone should be parsed correctly: %v`, err)
|
||||
}
|
||||
|
||||
expectedTS := int64(1613015160)
|
||||
if date.Unix() != expectedTS {
|
||||
t.Errorf(`The Unix timestamp should be %v instead of %v`, expectedTS, date.Unix())
|
||||
}
|
||||
|
||||
expectedLocation := "America/New_York"
|
||||
if date.Location().String() != expectedLocation {
|
||||
t.Errorf(`The location should be %q instead of %q`, expectedLocation, date.Location())
|
||||
}
|
||||
|
||||
name, offset := date.Zone()
|
||||
|
||||
expectedName := "EST"
|
||||
if name != expectedName {
|
||||
t.Errorf(`The zone name should be %q instead of %q`, expectedName, name)
|
||||
}
|
||||
|
||||
expectedOffset := -18000
|
||||
if offset != expectedOffset {
|
||||
t.Errorf(`The offset should be %v instead of %v`, expectedOffset, offset)
|
||||
}
|
||||
}
|
||||
func TestParseRSSDateOffset(t *testing.T) {
|
||||
date, err := Parse("Sun, 28 Oct 2018 13:48:00 +0100")
|
||||
if err != nil {
|
||||
t.Fatalf(`RSS dates with offset should be parsed correctly: %v`, err)
|
||||
}
|
||||
|
||||
expectedTS := int64(1540730880)
|
||||
if date.Unix() != expectedTS {
|
||||
t.Errorf(`The Unix timestamp should be %v instead of %v`, expectedTS, date.Unix())
|
||||
}
|
||||
|
||||
_, offset := date.Zone()
|
||||
expectedOffset := 3600
|
||||
if offset != expectedOffset {
|
||||
t.Errorf(`The offset should be %v instead of %v`, expectedOffset, offset)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseWeirdDateFormat(t *testing.T) {
|
||||
dates := []string{
|
||||
"Sun, 17 Dec 2017 1:55 PM EST",
|
||||
"9 Dec 2016 12:00 GMT",
|
||||
"Friday, December 22, 2017 - 3:09pm",
|
||||
"Friday, December 8, 2017 - 3:07pm",
|
||||
"Thu, 25 Feb 2016 00:00:00 Europe/Brussels",
|
||||
"Mon, 09 Apr 2018, 16:04",
|
||||
"Di, 23 Jan 2018 00:00:00 +0100",
|
||||
"Do, 29 Mär 2018 00:00:00 +0200",
|
||||
"mer, 9 avr 2018 00:00:00 +0200",
|
||||
"1520932969",
|
||||
"Tue 16 Feb 2016, 23:16:00 EDT",
|
||||
"Tue, 16 Feb 2016 23:16:00 EDT",
|
||||
"Tue, Feb 16 2016 23:16:00 EDT",
|
||||
"March 30 2020 07:02:38 PM",
|
||||
"Mon, 30 Mar 2020 19:53 +0000",
|
||||
"Mon, 03/30/2020 - 19:19",
|
||||
"2018-12-12T12:12",
|
||||
"2020-11-08T16:20:00-05:00Z",
|
||||
"Nov. 16, 2020, 10:57 a.m.",
|
||||
"Friday 06 November 2020",
|
||||
"Mon, November 16, 2020, 11:12 PM EST",
|
||||
"Lundi, 16. Novembre 2020 - 15:54",
|
||||
"Thu Nov 12 2020 17:00:00 GMT+0000 (Coordinated Universal Time)",
|
||||
"Sat, 11 04 2020 08:51:49 +0100",
|
||||
"Mon, 16th Nov 2020 13:16:28 GMT",
|
||||
"Nov. 2020",
|
||||
"ven., 03 juil. 2020 15:09:58 +0000",
|
||||
"Fri, 26/06/2020",
|
||||
"Thu, 29 Oct 2020 07:36:03 GMT-07:00",
|
||||
"jeu., 02 avril 2020 00:00:00 +0200",
|
||||
"Jan. 4, 2016, 12:37 p.m.",
|
||||
"2018-10-23 04:07:42 +00:00",
|
||||
"5 August, 2019",
|
||||
"mar., 01 déc. 2020 16:11:02 +0000",
|
||||
"Thurs, 15 Oct 2020 00:00:39 +0000",
|
||||
"Thur, 19 Nov 2020 00:00:39 +0000",
|
||||
"26 Sep 2022 GMT",
|
||||
"Thu, June 22, 2023 at 01:11 PM EDT",
|
||||
"Apr 16, 2023 08:01 GMT",
|
||||
"Jun 23, 2023 19:00 GMT",
|
||||
"09/15/2014 4:20 pm PST",
|
||||
"Fri, 23rd Jun 2023 09:32:20 GMT",
|
||||
}
|
||||
|
||||
for _, date := range dates {
|
||||
if _, err := Parse(date); err != nil {
|
||||
t.Errorf(`Unable to parse date: %q`, date)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseDateWithTimezoneOutOfRange(t *testing.T) {
|
||||
date, err := Parse("2023-05-29 00:00:00-23:00")
|
||||
|
||||
if err != nil {
|
||||
t.Errorf(`Unable to parse date: %v`, err)
|
||||
}
|
||||
|
||||
_, offset := date.Zone()
|
||||
if offset != 0 {
|
||||
t.Errorf(`The offset should be reinitialized to 0 instead of %v because it's out of range`, offset)
|
||||
}
|
||||
}
|
37
internal/reader/encoding/encoding.go
Normal file
37
internal/reader/encoding/encoding.go
Normal file
|
@ -0,0 +1,37 @@
|
|||
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
package encoding // import "miniflux.app/v2/internal/reader/encoding"
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"io"
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/net/html/charset"
|
||||
)
|
||||
|
||||
// CharsetReader is used when the XML encoding is specified for the input document.
|
||||
//
|
||||
// The document is converted in UTF-8 only if a different encoding is specified
|
||||
// and the document is not already UTF-8.
|
||||
//
|
||||
// Several edge cases could exists:
|
||||
//
|
||||
// - Feeds with encoding specified only in Content-Type header and not in XML document
|
||||
// - Feeds with encoding specified in both places
|
||||
// - Feeds with encoding specified only in XML document and not in HTTP header
|
||||
// - Feeds with wrong encoding defined and already in UTF-8
|
||||
func CharsetReader(label string, input io.Reader) (io.Reader, error) {
|
||||
buffer, _ := io.ReadAll(input)
|
||||
r := bytes.NewReader(buffer)
|
||||
|
||||
// The document is already UTF-8, do not do anything (avoid double-encoding).
|
||||
// That means the specified encoding in XML prolog is wrong.
|
||||
if utf8.Valid(buffer) {
|
||||
return r, nil
|
||||
}
|
||||
|
||||
// Transform document to UTF-8 from the specified encoding in XML prolog.
|
||||
return charset.NewReaderLabel(label, r)
|
||||
}
|
226
internal/reader/handler/handler.go
Normal file
226
internal/reader/handler/handler.go
Normal file
|
@ -0,0 +1,226 @@
|
|||
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
package handler // import "miniflux.app/v2/internal/reader/handler"
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"miniflux.app/v2/internal/config"
|
||||
"miniflux.app/v2/internal/errors"
|
||||
"miniflux.app/v2/internal/http/client"
|
||||
"miniflux.app/v2/internal/locale"
|
||||
"miniflux.app/v2/internal/logger"
|
||||
"miniflux.app/v2/internal/model"
|
||||
"miniflux.app/v2/internal/reader/browser"
|
||||
"miniflux.app/v2/internal/reader/icon"
|
||||
"miniflux.app/v2/internal/reader/parser"
|
||||
"miniflux.app/v2/internal/reader/processor"
|
||||
"miniflux.app/v2/internal/storage"
|
||||
"miniflux.app/v2/internal/timer"
|
||||
)
|
||||
|
||||
var (
|
||||
errDuplicate = "This feed already exists (%s)"
|
||||
errNotFound = "Feed %d not found"
|
||||
errCategoryNotFound = "Category not found for this user"
|
||||
)
|
||||
|
||||
// CreateFeed fetch, parse and store a new feed.
|
||||
func CreateFeed(store *storage.Storage, userID int64, feedCreationRequest *model.FeedCreationRequest) (*model.Feed, error) {
|
||||
defer timer.ExecutionTime(time.Now(), fmt.Sprintf("[CreateFeed] FeedURL=%s", feedCreationRequest.FeedURL))
|
||||
|
||||
user, storeErr := store.UserByID(userID)
|
||||
if storeErr != nil {
|
||||
return nil, storeErr
|
||||
}
|
||||
|
||||
if !store.CategoryIDExists(userID, feedCreationRequest.CategoryID) {
|
||||
return nil, errors.NewLocalizedError(errCategoryNotFound)
|
||||
}
|
||||
|
||||
request := client.NewClientWithConfig(feedCreationRequest.FeedURL, config.Opts)
|
||||
request.WithCredentials(feedCreationRequest.Username, feedCreationRequest.Password)
|
||||
request.WithUserAgent(feedCreationRequest.UserAgent)
|
||||
request.WithCookie(feedCreationRequest.Cookie)
|
||||
request.AllowSelfSignedCertificates = feedCreationRequest.AllowSelfSignedCertificates
|
||||
|
||||
if feedCreationRequest.FetchViaProxy {
|
||||
request.WithProxy()
|
||||
}
|
||||
|
||||
response, requestErr := browser.Exec(request)
|
||||
if requestErr != nil {
|
||||
return nil, requestErr
|
||||
}
|
||||
|
||||
if store.FeedURLExists(userID, response.EffectiveURL) {
|
||||
return nil, errors.NewLocalizedError(errDuplicate, response.EffectiveURL)
|
||||
}
|
||||
|
||||
subscription, parseErr := parser.ParseFeed(response.EffectiveURL, response.BodyAsString())
|
||||
if parseErr != nil {
|
||||
return nil, parseErr
|
||||
}
|
||||
|
||||
subscription.UserID = userID
|
||||
subscription.UserAgent = feedCreationRequest.UserAgent
|
||||
subscription.Cookie = feedCreationRequest.Cookie
|
||||
subscription.Username = feedCreationRequest.Username
|
||||
subscription.Password = feedCreationRequest.Password
|
||||
subscription.Crawler = feedCreationRequest.Crawler
|
||||
subscription.Disabled = feedCreationRequest.Disabled
|
||||
subscription.IgnoreHTTPCache = feedCreationRequest.IgnoreHTTPCache
|
||||
subscription.AllowSelfSignedCertificates = feedCreationRequest.AllowSelfSignedCertificates
|
||||
subscription.FetchViaProxy = feedCreationRequest.FetchViaProxy
|
||||
subscription.ScraperRules = feedCreationRequest.ScraperRules
|
||||
subscription.RewriteRules = feedCreationRequest.RewriteRules
|
||||
subscription.BlocklistRules = feedCreationRequest.BlocklistRules
|
||||
subscription.KeeplistRules = feedCreationRequest.KeeplistRules
|
||||
subscription.UrlRewriteRules = feedCreationRequest.UrlRewriteRules
|
||||
subscription.WithCategoryID(feedCreationRequest.CategoryID)
|
||||
subscription.WithClientResponse(response)
|
||||
subscription.CheckedNow()
|
||||
|
||||
processor.ProcessFeedEntries(store, subscription, user, true)
|
||||
|
||||
if storeErr := store.CreateFeed(subscription); storeErr != nil {
|
||||
return nil, storeErr
|
||||
}
|
||||
|
||||
logger.Debug("[CreateFeed] Feed saved with ID: %d", subscription.ID)
|
||||
|
||||
checkFeedIcon(
|
||||
store,
|
||||
subscription.ID,
|
||||
subscription.SiteURL,
|
||||
subscription.IconURL,
|
||||
feedCreationRequest.UserAgent,
|
||||
feedCreationRequest.FetchViaProxy,
|
||||
feedCreationRequest.AllowSelfSignedCertificates,
|
||||
)
|
||||
return subscription, nil
|
||||
}
|
||||
|
||||
// RefreshFeed refreshes a feed.
|
||||
func RefreshFeed(store *storage.Storage, userID, feedID int64, forceRefresh bool) error {
|
||||
defer timer.ExecutionTime(time.Now(), fmt.Sprintf("[RefreshFeed] feedID=%d", feedID))
|
||||
user, storeErr := store.UserByID(userID)
|
||||
if storeErr != nil {
|
||||
return storeErr
|
||||
}
|
||||
|
||||
printer := locale.NewPrinter(user.Language)
|
||||
|
||||
originalFeed, storeErr := store.FeedByID(userID, feedID)
|
||||
if storeErr != nil {
|
||||
return storeErr
|
||||
}
|
||||
|
||||
if originalFeed == nil {
|
||||
return errors.NewLocalizedError(errNotFound, feedID)
|
||||
}
|
||||
|
||||
weeklyEntryCount := 0
|
||||
if config.Opts.PollingScheduler() == model.SchedulerEntryFrequency {
|
||||
var weeklyCountErr error
|
||||
weeklyEntryCount, weeklyCountErr = store.WeeklyFeedEntryCount(userID, feedID)
|
||||
if weeklyCountErr != nil {
|
||||
return weeklyCountErr
|
||||
}
|
||||
}
|
||||
|
||||
originalFeed.CheckedNow()
|
||||
originalFeed.ScheduleNextCheck(weeklyEntryCount)
|
||||
|
||||
request := client.NewClientWithConfig(originalFeed.FeedURL, config.Opts)
|
||||
request.WithCredentials(originalFeed.Username, originalFeed.Password)
|
||||
request.WithUserAgent(originalFeed.UserAgent)
|
||||
request.WithCookie(originalFeed.Cookie)
|
||||
request.AllowSelfSignedCertificates = originalFeed.AllowSelfSignedCertificates
|
||||
|
||||
if !originalFeed.IgnoreHTTPCache {
|
||||
request.WithCacheHeaders(originalFeed.EtagHeader, originalFeed.LastModifiedHeader)
|
||||
}
|
||||
|
||||
if originalFeed.FetchViaProxy {
|
||||
request.WithProxy()
|
||||
}
|
||||
|
||||
response, requestErr := browser.Exec(request)
|
||||
if requestErr != nil {
|
||||
originalFeed.WithError(requestErr.Localize(printer))
|
||||
store.UpdateFeedError(originalFeed)
|
||||
return requestErr
|
||||
}
|
||||
|
||||
if store.AnotherFeedURLExists(userID, originalFeed.ID, response.EffectiveURL) {
|
||||
storeErr := errors.NewLocalizedError(errDuplicate, response.EffectiveURL)
|
||||
originalFeed.WithError(storeErr.Error())
|
||||
store.UpdateFeedError(originalFeed)
|
||||
return storeErr
|
||||
}
|
||||
|
||||
if originalFeed.IgnoreHTTPCache || response.IsModified(originalFeed.EtagHeader, originalFeed.LastModifiedHeader) {
|
||||
logger.Debug("[RefreshFeed] Feed #%d has been modified", feedID)
|
||||
|
||||
updatedFeed, parseErr := parser.ParseFeed(response.EffectiveURL, response.BodyAsString())
|
||||
if parseErr != nil {
|
||||
originalFeed.WithError(parseErr.Localize(printer))
|
||||
store.UpdateFeedError(originalFeed)
|
||||
return parseErr
|
||||
}
|
||||
|
||||
originalFeed.Entries = updatedFeed.Entries
|
||||
processor.ProcessFeedEntries(store, originalFeed, user, forceRefresh)
|
||||
|
||||
// We don't update existing entries when the crawler is enabled (we crawl only inexisting entries). Unless it is forced to refresh
|
||||
updateExistingEntries := forceRefresh || !originalFeed.Crawler
|
||||
if storeErr := store.RefreshFeedEntries(originalFeed.UserID, originalFeed.ID, originalFeed.Entries, updateExistingEntries); storeErr != nil {
|
||||
originalFeed.WithError(storeErr.Error())
|
||||
store.UpdateFeedError(originalFeed)
|
||||
return storeErr
|
||||
}
|
||||
|
||||
// We update caching headers only if the feed has been modified,
|
||||
// because some websites don't return the same headers when replying with a 304.
|
||||
originalFeed.WithClientResponse(response)
|
||||
checkFeedIcon(
|
||||
store,
|
||||
originalFeed.ID,
|
||||
originalFeed.SiteURL,
|
||||
updatedFeed.IconURL,
|
||||
originalFeed.UserAgent,
|
||||
originalFeed.FetchViaProxy,
|
||||
originalFeed.AllowSelfSignedCertificates,
|
||||
)
|
||||
} else {
|
||||
logger.Debug("[RefreshFeed] Feed #%d not modified", feedID)
|
||||
}
|
||||
|
||||
originalFeed.ResetErrorCounter()
|
||||
|
||||
if storeErr := store.UpdateFeed(originalFeed); storeErr != nil {
|
||||
originalFeed.WithError(storeErr.Error())
|
||||
store.UpdateFeedError(originalFeed)
|
||||
return storeErr
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func checkFeedIcon(store *storage.Storage, feedID int64, websiteURL, iconURL, userAgent string, fetchViaProxy, allowSelfSignedCertificates bool) {
|
||||
if !store.HasIcon(feedID) {
|
||||
icon, err := icon.FindIcon(websiteURL, iconURL, userAgent, fetchViaProxy, allowSelfSignedCertificates)
|
||||
if err != nil {
|
||||
logger.Debug(`[CheckFeedIcon] %v (feedID=%d websiteURL=%s)`, err, feedID, websiteURL)
|
||||
} else if icon == nil {
|
||||
logger.Debug(`[CheckFeedIcon] No icon found (feedID=%d websiteURL=%s)`, feedID, websiteURL)
|
||||
} else {
|
||||
if err := store.CreateFeedIcon(feedID, icon); err != nil {
|
||||
logger.Debug(`[CheckFeedIcon] %v (feedID=%d websiteURL=%s)`, err, feedID, websiteURL)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
196
internal/reader/icon/finder.go
Normal file
196
internal/reader/icon/finder.go
Normal file
|
@ -0,0 +1,196 @@
|
|||
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
package icon // import "miniflux.app/v2/internal/reader/icon"
|
||||
|
||||
import (
|
||||
"encoding/base64"
|
||||
"fmt"
|
||||
"io"
|
||||
"strings"
|
||||
|
||||
stdlib_url "net/url"
|
||||
|
||||
"miniflux.app/v2/internal/config"
|
||||
"miniflux.app/v2/internal/crypto"
|
||||
"miniflux.app/v2/internal/http/client"
|
||||
"miniflux.app/v2/internal/logger"
|
||||
"miniflux.app/v2/internal/model"
|
||||
"miniflux.app/v2/internal/url"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
)
|
||||
|
||||
// FindIcon try to find the website's icon.
|
||||
func FindIcon(websiteURL, iconURL, userAgent string, fetchViaProxy, allowSelfSignedCertificates bool) (*model.Icon, error) {
|
||||
if iconURL == "" {
|
||||
rootURL := url.RootURL(websiteURL)
|
||||
logger.Debug("[FindIcon] Trying to find an icon: rootURL=%q websiteURL=%q userAgent=%q", rootURL, websiteURL, userAgent)
|
||||
|
||||
clt := client.NewClientWithConfig(rootURL, config.Opts)
|
||||
clt.WithUserAgent(userAgent)
|
||||
clt.AllowSelfSignedCertificates = allowSelfSignedCertificates
|
||||
|
||||
if fetchViaProxy {
|
||||
clt.WithProxy()
|
||||
}
|
||||
|
||||
response, err := clt.Get()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("icon: unable to download website index page: %v", err)
|
||||
}
|
||||
|
||||
if response.HasServerFailure() {
|
||||
return nil, fmt.Errorf("icon: unable to download website index page: status=%d", response.StatusCode)
|
||||
}
|
||||
|
||||
iconURL, err = parseDocument(rootURL, response.Body)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
if strings.HasPrefix(iconURL, "data:") {
|
||||
return parseImageDataURL(iconURL)
|
||||
}
|
||||
|
||||
logger.Debug("[FindIcon] Fetching icon => %s", iconURL)
|
||||
icon, err := downloadIcon(iconURL, userAgent, fetchViaProxy, allowSelfSignedCertificates)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return icon, nil
|
||||
}
|
||||
|
||||
func parseDocument(websiteURL string, data io.Reader) (string, error) {
|
||||
queries := []string{
|
||||
"link[rel='shortcut icon']",
|
||||
"link[rel='Shortcut Icon']",
|
||||
"link[rel='icon shortcut']",
|
||||
"link[rel='icon']",
|
||||
}
|
||||
|
||||
doc, err := goquery.NewDocumentFromReader(data)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("icon: unable to read document: %v", err)
|
||||
}
|
||||
|
||||
var iconURL string
|
||||
for _, query := range queries {
|
||||
doc.Find(query).Each(func(i int, s *goquery.Selection) {
|
||||
if href, exists := s.Attr("href"); exists {
|
||||
iconURL = strings.TrimSpace(href)
|
||||
}
|
||||
})
|
||||
|
||||
if iconURL != "" {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if iconURL == "" {
|
||||
iconURL = url.RootURL(websiteURL) + "favicon.ico"
|
||||
} else {
|
||||
iconURL, _ = url.AbsoluteURL(websiteURL, iconURL)
|
||||
}
|
||||
|
||||
return iconURL, nil
|
||||
}
|
||||
|
||||
func downloadIcon(iconURL, userAgent string, fetchViaProxy, allowSelfSignedCertificates bool) (*model.Icon, error) {
|
||||
clt := client.NewClientWithConfig(iconURL, config.Opts)
|
||||
clt.WithUserAgent(userAgent)
|
||||
clt.AllowSelfSignedCertificates = allowSelfSignedCertificates
|
||||
if fetchViaProxy {
|
||||
clt.WithProxy()
|
||||
}
|
||||
|
||||
response, err := clt.Get()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("icon: unable to download iconURL: %v", err)
|
||||
}
|
||||
|
||||
if response.HasServerFailure() {
|
||||
return nil, fmt.Errorf("icon: unable to download icon: status=%d", response.StatusCode)
|
||||
}
|
||||
|
||||
body, err := io.ReadAll(response.Body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("icon: unable to read downloaded icon: %v", err)
|
||||
}
|
||||
|
||||
if len(body) == 0 {
|
||||
return nil, fmt.Errorf("icon: downloaded icon is empty, iconURL=%s", iconURL)
|
||||
}
|
||||
|
||||
icon := &model.Icon{
|
||||
Hash: crypto.HashFromBytes(body),
|
||||
MimeType: response.ContentType,
|
||||
Content: body,
|
||||
}
|
||||
|
||||
return icon, nil
|
||||
}
|
||||
|
||||
// https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URIs#syntax
|
||||
// data:[<mediatype>][;base64],<data>
|
||||
func parseImageDataURL(value string) (*model.Icon, error) {
|
||||
var mediaType string
|
||||
var encoding string
|
||||
|
||||
if !strings.HasPrefix(value, "data:") {
|
||||
return nil, fmt.Errorf(`icon: invalid data URL (missing data:) %q`, value)
|
||||
}
|
||||
|
||||
value = value[5:]
|
||||
|
||||
comma := strings.Index(value, ",")
|
||||
if comma < 0 {
|
||||
return nil, fmt.Errorf(`icon: invalid data URL (no comma) %q`, value)
|
||||
}
|
||||
|
||||
data := value[comma+1:]
|
||||
semicolon := strings.Index(value[0:comma], ";")
|
||||
|
||||
if semicolon > 0 {
|
||||
mediaType = value[0:semicolon]
|
||||
encoding = value[semicolon+1 : comma]
|
||||
} else {
|
||||
mediaType = value[0:comma]
|
||||
}
|
||||
|
||||
if !strings.HasPrefix(mediaType, "image/") {
|
||||
return nil, fmt.Errorf(`icon: invalid media type %q`, mediaType)
|
||||
}
|
||||
|
||||
var blob []byte
|
||||
switch encoding {
|
||||
case "base64":
|
||||
var err error
|
||||
blob, err = base64.StdEncoding.DecodeString(data)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf(`icon: invalid data %q (%v)`, value, err)
|
||||
}
|
||||
case "":
|
||||
decodedData, err := stdlib_url.QueryUnescape(data)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf(`icon: unable to decode data URL %q`, value)
|
||||
}
|
||||
blob = []byte(decodedData)
|
||||
default:
|
||||
return nil, fmt.Errorf(`icon: unsupported data URL encoding %q`, value)
|
||||
}
|
||||
|
||||
if len(blob) == 0 {
|
||||
return nil, fmt.Errorf(`icon: empty data URL %q`, value)
|
||||
}
|
||||
|
||||
icon := &model.Icon{
|
||||
Hash: crypto.HashFromBytes(blob),
|
||||
Content: blob,
|
||||
MimeType: mediaType,
|
||||
}
|
||||
|
||||
return icon, nil
|
||||
}
|
103
internal/reader/icon/finder_test.go
Normal file
103
internal/reader/icon/finder_test.go
Normal file
|
@ -0,0 +1,103 @@
|
|||
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
package icon // import "miniflux.app/v2/internal/reader/icon"
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestParseImageDataURL(t *testing.T) {
|
||||
iconURL := ""
|
||||
icon, err := parseImageDataURL(iconURL)
|
||||
if err != nil {
|
||||
t.Fatalf(`We should be able to parse valid data URL: %v`, err)
|
||||
}
|
||||
|
||||
if icon.MimeType != "image/webp" {
|
||||
t.Fatal(`Invalid mime type parsed`)
|
||||
}
|
||||
|
||||
if icon.Hash == "" {
|
||||
t.Fatal(`Image hash should be computed`)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseImageDataURLWithNoEncoding(t *testing.T) {
|
||||
iconURL := `")
|
||||
if err == nil {
|
||||
t.Fatal(`We should detect invalid mime type`)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseInvalidImageDataURLWithUnsupportedEncoding(t *testing.T) {
|
||||
_, err := parseImageDataURL("data:image/png;base32,blob")
|
||||
if err == nil {
|
||||
t.Fatal(`We should detect unsupported encoding`)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseInvalidImageDataURLWithNoData(t *testing.T) {
|
||||
_, err := parseImageDataURL("data:image/png;base64,")
|
||||
if err == nil {
|
||||
t.Fatal(`We should detect invalid encoded data`)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseInvalidImageDataURL(t *testing.T) {
|
||||
_, err := parseImageDataURL("data:image/jpeg")
|
||||
if err == nil {
|
||||
t.Fatal(`We should detect malformed image data URL`)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseInvalidImageDataURLWithWrongPrefix(t *testing.T) {
|
||||
_, err := parseImageDataURL("data,test")
|
||||
if err == nil {
|
||||
t.Fatal(`We should detect malformed image data URL`)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseDocumentWithWhitespaceIconURL(t *testing.T) {
|
||||
html := `<link rel="shortcut icon" href="
|
||||
/static/img/favicon.ico
|
||||
">`
|
||||
|
||||
iconURL, err := parseDocument("http://www.example.org/", strings.NewReader(html))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if iconURL != "http://www.example.org/static/img/favicon.ico" {
|
||||
t.Errorf(`Invalid icon URL, got %q`, iconURL)
|
||||
}
|
||||
}
|
202
internal/reader/json/json.go
Normal file
202
internal/reader/json/json.go
Normal file
|
@ -0,0 +1,202 @@
|
|||
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
package json // import "miniflux.app/v2/internal/reader/json"
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"miniflux.app/v2/internal/crypto"
|
||||
"miniflux.app/v2/internal/logger"
|
||||
"miniflux.app/v2/internal/model"
|
||||
"miniflux.app/v2/internal/reader/date"
|
||||
"miniflux.app/v2/internal/reader/sanitizer"
|
||||
"miniflux.app/v2/internal/url"
|
||||
)
|
||||
|
||||
type jsonFeed struct {
|
||||
Version string `json:"version"`
|
||||
Title string `json:"title"`
|
||||
SiteURL string `json:"home_page_url"`
|
||||
IconURL string `json:"icon"`
|
||||
FaviconURL string `json:"favicon"`
|
||||
FeedURL string `json:"feed_url"`
|
||||
Authors []jsonAuthor `json:"authors"`
|
||||
Author jsonAuthor `json:"author"`
|
||||
Items []jsonItem `json:"items"`
|
||||
}
|
||||
|
||||
type jsonAuthor struct {
|
||||
Name string `json:"name"`
|
||||
URL string `json:"url"`
|
||||
}
|
||||
|
||||
type jsonItem struct {
|
||||
ID string `json:"id"`
|
||||
URL string `json:"url"`
|
||||
Title string `json:"title"`
|
||||
Summary string `json:"summary"`
|
||||
Text string `json:"content_text"`
|
||||
HTML string `json:"content_html"`
|
||||
DatePublished string `json:"date_published"`
|
||||
DateModified string `json:"date_modified"`
|
||||
Authors []jsonAuthor `json:"authors"`
|
||||
Author jsonAuthor `json:"author"`
|
||||
Attachments []jsonAttachment `json:"attachments"`
|
||||
Tags []string `json:"tags"`
|
||||
}
|
||||
|
||||
type jsonAttachment struct {
|
||||
URL string `json:"url"`
|
||||
MimeType string `json:"mime_type"`
|
||||
Title string `json:"title"`
|
||||
Size int64 `json:"size_in_bytes"`
|
||||
Duration int `json:"duration_in_seconds"`
|
||||
}
|
||||
|
||||
func (j *jsonFeed) GetAuthor() string {
|
||||
if len(j.Authors) > 0 {
|
||||
return (getAuthor(j.Authors[0]))
|
||||
}
|
||||
return getAuthor(j.Author)
|
||||
}
|
||||
|
||||
func (j *jsonFeed) Transform(baseURL string) *model.Feed {
|
||||
var err error
|
||||
|
||||
feed := new(model.Feed)
|
||||
|
||||
feed.FeedURL, err = url.AbsoluteURL(baseURL, j.FeedURL)
|
||||
if err != nil {
|
||||
feed.FeedURL = j.FeedURL
|
||||
}
|
||||
|
||||
feed.SiteURL, err = url.AbsoluteURL(baseURL, j.SiteURL)
|
||||
if err != nil {
|
||||
feed.SiteURL = j.SiteURL
|
||||
}
|
||||
|
||||
feed.IconURL = strings.TrimSpace(j.IconURL)
|
||||
|
||||
if feed.IconURL == "" {
|
||||
feed.IconURL = strings.TrimSpace(j.FaviconURL)
|
||||
}
|
||||
|
||||
feed.Title = strings.TrimSpace(j.Title)
|
||||
if feed.Title == "" {
|
||||
feed.Title = feed.SiteURL
|
||||
}
|
||||
|
||||
for _, item := range j.Items {
|
||||
entry := item.Transform()
|
||||
entryURL, err := url.AbsoluteURL(feed.SiteURL, entry.URL)
|
||||
if err == nil {
|
||||
entry.URL = entryURL
|
||||
}
|
||||
|
||||
if entry.Author == "" {
|
||||
entry.Author = j.GetAuthor()
|
||||
}
|
||||
|
||||
feed.Entries = append(feed.Entries, entry)
|
||||
}
|
||||
|
||||
return feed
|
||||
}
|
||||
|
||||
func (j *jsonItem) GetDate() time.Time {
|
||||
for _, value := range []string{j.DatePublished, j.DateModified} {
|
||||
if value != "" {
|
||||
d, err := date.Parse(value)
|
||||
if err != nil {
|
||||
logger.Error("json: %v", err)
|
||||
return time.Now()
|
||||
}
|
||||
|
||||
return d
|
||||
}
|
||||
}
|
||||
|
||||
return time.Now()
|
||||
}
|
||||
|
||||
func (j *jsonItem) GetAuthor() string {
|
||||
if len(j.Authors) > 0 {
|
||||
return getAuthor(j.Authors[0])
|
||||
}
|
||||
return getAuthor(j.Author)
|
||||
}
|
||||
|
||||
func (j *jsonItem) GetHash() string {
|
||||
for _, value := range []string{j.ID, j.URL, j.Text + j.HTML + j.Summary} {
|
||||
if value != "" {
|
||||
return crypto.Hash(value)
|
||||
}
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
func (j *jsonItem) GetTitle() string {
|
||||
if j.Title != "" {
|
||||
return j.Title
|
||||
}
|
||||
|
||||
for _, value := range []string{j.Summary, j.Text, j.HTML} {
|
||||
if value != "" {
|
||||
return sanitizer.TruncateHTML(value, 100)
|
||||
}
|
||||
}
|
||||
|
||||
return j.URL
|
||||
}
|
||||
|
||||
func (j *jsonItem) GetContent() string {
|
||||
for _, value := range []string{j.HTML, j.Text, j.Summary} {
|
||||
if value != "" {
|
||||
return value
|
||||
}
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
func (j *jsonItem) GetEnclosures() model.EnclosureList {
|
||||
enclosures := make(model.EnclosureList, 0)
|
||||
|
||||
for _, attachment := range j.Attachments {
|
||||
if attachment.URL == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
enclosures = append(enclosures, &model.Enclosure{
|
||||
URL: attachment.URL,
|
||||
MimeType: attachment.MimeType,
|
||||
Size: attachment.Size,
|
||||
})
|
||||
}
|
||||
|
||||
return enclosures
|
||||
}
|
||||
|
||||
func (j *jsonItem) Transform() *model.Entry {
|
||||
entry := new(model.Entry)
|
||||
entry.URL = j.URL
|
||||
entry.Date = j.GetDate()
|
||||
entry.Author = j.GetAuthor()
|
||||
entry.Hash = j.GetHash()
|
||||
entry.Content = j.GetContent()
|
||||
entry.Title = strings.TrimSpace(j.GetTitle())
|
||||
entry.Enclosures = j.GetEnclosures()
|
||||
entry.Tags = j.Tags
|
||||
return entry
|
||||
}
|
||||
|
||||
func getAuthor(author jsonAuthor) string {
|
||||
if author.Name != "" {
|
||||
return strings.TrimSpace(author.Name)
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
23
internal/reader/json/parser.go
Normal file
23
internal/reader/json/parser.go
Normal file
|
@ -0,0 +1,23 @@
|
|||
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
package json // import "miniflux.app/v2/internal/reader/json"
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"io"
|
||||
|
||||
"miniflux.app/v2/internal/errors"
|
||||
"miniflux.app/v2/internal/model"
|
||||
)
|
||||
|
||||
// Parse returns a normalized feed struct from a JSON feed.
|
||||
func Parse(baseURL string, data io.Reader) (*model.Feed, *errors.LocalizedError) {
|
||||
feed := new(jsonFeed)
|
||||
decoder := json.NewDecoder(data)
|
||||
if err := decoder.Decode(&feed); err != nil {
|
||||
return nil, errors.NewLocalizedError("Unable to parse JSON Feed: %q", err)
|
||||
}
|
||||
|
||||
return feed.Transform(baseURL), nil
|
||||
}
|
654
internal/reader/json/parser_test.go
Normal file
654
internal/reader/json/parser_test.go
Normal file
|
@ -0,0 +1,654 @@
|
|||
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
package json // import "miniflux.app/v2/internal/reader/json"
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestParseJsonFeed(t *testing.T) {
|
||||
data := `{
|
||||
"version": "https://jsonfeed.org/version/1",
|
||||
"title": "My Example Feed",
|
||||
"icon": "https://micro.blog/jsonfeed/avatar.jpg",
|
||||
"favicon": "https://micro.blog/jsonfeed/favicon.png",
|
||||
"home_page_url": "https://example.org/",
|
||||
"feed_url": "https://example.org/feed.json",
|
||||
"items": [
|
||||
{
|
||||
"id": "2",
|
||||
"content_text": "This is a second item.",
|
||||
"url": "https://example.org/second-item"
|
||||
},
|
||||
{
|
||||
"id": "1",
|
||||
"content_html": "<p>Hello, world!</p>",
|
||||
"url": "https://example.org/initial-post"
|
||||
}
|
||||
]
|
||||
}`
|
||||
|
||||
feed, err := Parse("https://example.org/feed.json", bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if feed.Title != "My Example Feed" {
|
||||
t.Errorf("Incorrect title, got: %s", feed.Title)
|
||||
}
|
||||
|
||||
if feed.FeedURL != "https://example.org/feed.json" {
|
||||
t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
|
||||
}
|
||||
|
||||
if feed.SiteURL != "https://example.org/" {
|
||||
t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
|
||||
}
|
||||
|
||||
if feed.IconURL != "https://micro.blog/jsonfeed/avatar.jpg" {
|
||||
t.Errorf("Incorrect icon URL, got: %s", feed.IconURL)
|
||||
}
|
||||
|
||||
if len(feed.Entries) != 2 {
|
||||
t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
|
||||
}
|
||||
|
||||
if feed.Entries[0].Hash != "d4735e3a265e16eee03f59718b9b5d03019c07d8b6c51f90da3a666eec13ab35" {
|
||||
t.Errorf("Incorrect entry hash, got: %s", feed.Entries[0].Hash)
|
||||
}
|
||||
|
||||
if feed.Entries[0].URL != "https://example.org/second-item" {
|
||||
t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Title != "This is a second item." {
|
||||
t.Errorf(`Incorrect entry title, got: "%s"`, feed.Entries[0].Title)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Content != "This is a second item." {
|
||||
t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content)
|
||||
}
|
||||
|
||||
if feed.Entries[1].Hash != "6b86b273ff34fce19d6b804eff5a3f5747ada4eaa22f1d49c01e52ddb7875b4b" {
|
||||
t.Errorf("Incorrect entry hash, got: %s", feed.Entries[1].Hash)
|
||||
}
|
||||
|
||||
if feed.Entries[1].URL != "https://example.org/initial-post" {
|
||||
t.Errorf("Incorrect entry URL, got: %s", feed.Entries[1].URL)
|
||||
}
|
||||
|
||||
if feed.Entries[1].Title != "Hello, world!" {
|
||||
t.Errorf(`Incorrect entry title, got: "%s"`, feed.Entries[1].Title)
|
||||
}
|
||||
|
||||
if feed.Entries[1].Content != "<p>Hello, world!</p>" {
|
||||
t.Errorf("Incorrect entry content, got: %s", feed.Entries[1].Content)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParsePodcast(t *testing.T) {
|
||||
data := `{
|
||||
"version": "https://jsonfeed.org/version/1",
|
||||
"user_comment": "This is a podcast feed. You can add this feed to your podcast client using the following URL: http://therecord.co/feed.json",
|
||||
"title": "The Record",
|
||||
"home_page_url": "http://therecord.co/",
|
||||
"feed_url": "http://therecord.co/feed.json",
|
||||
"items": [
|
||||
{
|
||||
"id": "http://therecord.co/chris-parrish",
|
||||
"title": "Special #1 - Chris Parrish",
|
||||
"url": "http://therecord.co/chris-parrish",
|
||||
"content_text": "Chris has worked at Adobe and as a founder of Rogue Sheep, which won an Apple Design Award for Postage. Chris’s new company is Aged & Distilled with Guy English — which shipped Napkin, a Mac app for visual collaboration. Chris is also the co-host of The Record. He lives on Bainbridge Island, a quick ferry ride from Seattle.",
|
||||
"content_html": "Chris has worked at <a href=\"http://adobe.com/\">Adobe</a> and as a founder of Rogue Sheep, which won an Apple Design Award for Postage. Chris’s new company is Aged & Distilled with Guy English — which shipped <a href=\"http://aged-and-distilled.com/napkin/\">Napkin</a>, a Mac app for visual collaboration. Chris is also the co-host of The Record. He lives on <a href=\"http://www.ci.bainbridge-isl.wa.us/\">Bainbridge Island</a>, a quick ferry ride from Seattle.",
|
||||
"summary": "Brent interviews Chris Parrish, co-host of The Record and one-half of Aged & Distilled.",
|
||||
"date_published": "2014-05-09T14:04:00-07:00",
|
||||
"attachments": [
|
||||
{
|
||||
"url": "http://therecord.co/downloads/The-Record-sp1e1-ChrisParrish.m4a",
|
||||
"mime_type": "audio/x-m4a",
|
||||
"size_in_bytes": 89970236,
|
||||
"duration_in_seconds": 6629
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}`
|
||||
|
||||
feed, err := Parse("http://therecord.co/feed.json", bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if feed.Title != "The Record" {
|
||||
t.Errorf("Incorrect title, got: %s", feed.Title)
|
||||
}
|
||||
|
||||
if feed.FeedURL != "http://therecord.co/feed.json" {
|
||||
t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
|
||||
}
|
||||
|
||||
if feed.SiteURL != "http://therecord.co/" {
|
||||
t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
|
||||
}
|
||||
|
||||
if len(feed.Entries) != 1 {
|
||||
t.Fatalf("Incorrect number of entries, got: %d", len(feed.Entries))
|
||||
}
|
||||
|
||||
if feed.Entries[0].Hash != "6b678e57962a1b001e4e873756563cdc08bbd06ca561e764e0baa9a382485797" {
|
||||
t.Errorf("Incorrect entry hash, got: %s", feed.Entries[0].Hash)
|
||||
}
|
||||
|
||||
if feed.Entries[0].URL != "http://therecord.co/chris-parrish" {
|
||||
t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Title != "Special #1 - Chris Parrish" {
|
||||
t.Errorf(`Incorrect entry title, got: "%s"`, feed.Entries[0].Title)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Content != `Chris has worked at <a href="http://adobe.com/">Adobe</a> and as a founder of Rogue Sheep, which won an Apple Design Award for Postage. Chris’s new company is Aged & Distilled with Guy English — which shipped <a href="http://aged-and-distilled.com/napkin/">Napkin</a>, a Mac app for visual collaboration. Chris is also the co-host of The Record. He lives on <a href="http://www.ci.bainbridge-isl.wa.us/">Bainbridge Island</a>, a quick ferry ride from Seattle.` {
|
||||
t.Errorf(`Incorrect entry content, got: "%s"`, feed.Entries[0].Content)
|
||||
}
|
||||
|
||||
location, _ := time.LoadLocation("America/Vancouver")
|
||||
if !feed.Entries[0].Date.Equal(time.Date(2014, time.May, 9, 14, 4, 0, 0, location)) {
|
||||
t.Errorf("Incorrect entry date, got: %v", feed.Entries[0].Date)
|
||||
}
|
||||
|
||||
if len(feed.Entries[0].Enclosures) != 1 {
|
||||
t.Fatalf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures))
|
||||
}
|
||||
|
||||
if feed.Entries[0].Enclosures[0].URL != "http://therecord.co/downloads/The-Record-sp1e1-ChrisParrish.m4a" {
|
||||
t.Errorf("Incorrect enclosure URL, got: %s", feed.Entries[0].Enclosures[0].URL)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Enclosures[0].MimeType != "audio/x-m4a" {
|
||||
t.Errorf("Incorrect enclosure type, got: %s", feed.Entries[0].Enclosures[0].MimeType)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Enclosures[0].Size != 89970236 {
|
||||
t.Errorf("Incorrect enclosure length, got: %d", feed.Entries[0].Enclosures[0].Size)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseEntryWithoutAttachmentURL(t *testing.T) {
|
||||
data := `{
|
||||
"version": "https://jsonfeed.org/version/1",
|
||||
"user_comment": "This is a podcast feed. You can add this feed to your podcast client using the following URL: http://therecord.co/feed.json",
|
||||
"title": "The Record",
|
||||
"home_page_url": "http://therecord.co/",
|
||||
"feed_url": "http://therecord.co/feed.json",
|
||||
"items": [
|
||||
{
|
||||
"id": "http://therecord.co/chris-parrish",
|
||||
"title": "Special #1 - Chris Parrish",
|
||||
"url": "http://therecord.co/chris-parrish",
|
||||
"content_text": "Chris has worked at Adobe and as a founder of Rogue Sheep, which won an Apple Design Award for Postage. Chris’s new company is Aged & Distilled with Guy English — which shipped Napkin, a Mac app for visual collaboration. Chris is also the co-host of The Record. He lives on Bainbridge Island, a quick ferry ride from Seattle.",
|
||||
"date_published": "2014-05-09T14:04:00-07:00",
|
||||
"attachments": [
|
||||
{
|
||||
"url": "",
|
||||
"mime_type": "audio/x-m4a",
|
||||
"size_in_bytes": 0
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}`
|
||||
|
||||
feed, err := Parse("http://therecord.co/feed.json", bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if len(feed.Entries) != 1 {
|
||||
t.Fatalf("Incorrect number of entries, got: %d", len(feed.Entries))
|
||||
}
|
||||
|
||||
if len(feed.Entries[0].Enclosures) != 0 {
|
||||
t.Errorf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures))
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseFeedWithRelativeURL(t *testing.T) {
|
||||
data := `{
|
||||
"version": "https://jsonfeed.org/version/1",
|
||||
"title": "Example",
|
||||
"home_page_url": "https://example.org/",
|
||||
"feed_url": "https://example.org/feed.json",
|
||||
"items": [
|
||||
{
|
||||
"id": "2347259",
|
||||
"url": "something.html",
|
||||
"date_published": "2016-02-09T14:22:00-07:00"
|
||||
}
|
||||
]
|
||||
}`
|
||||
|
||||
feed, err := Parse("https://example.org/feed.json", bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if feed.Entries[0].URL != "https://example.org/something.html" {
|
||||
t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseAuthor(t *testing.T) {
|
||||
data := `{
|
||||
"version": "https://jsonfeed.org/version/1",
|
||||
"user_comment": "This is a microblog feed. You can add this to your feed reader using the following URL: https://example.org/feed.json",
|
||||
"title": "Brent Simmons’s Microblog",
|
||||
"home_page_url": "https://example.org/",
|
||||
"feed_url": "https://example.org/feed.json",
|
||||
"author": {
|
||||
"name": "Brent Simmons",
|
||||
"url": "http://example.org/",
|
||||
"avatar": "https://example.org/avatar.png"
|
||||
},
|
||||
"items": [
|
||||
{
|
||||
"id": "2347259",
|
||||
"url": "https://example.org/2347259",
|
||||
"content_text": "Cats are neat. \n\nhttps://example.org/cats",
|
||||
"date_published": "2016-02-09T14:22:00-07:00"
|
||||
}
|
||||
]
|
||||
}`
|
||||
|
||||
feed, err := Parse("https://example.org/feed.json", bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if len(feed.Entries) != 1 {
|
||||
t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
|
||||
}
|
||||
|
||||
if feed.Entries[0].Author != "Brent Simmons" {
|
||||
t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseAuthors(t *testing.T) {
|
||||
data := `{
|
||||
"version": "https://jsonfeed.org/version/1.1",
|
||||
"user_comment": "This is a microblog feed. You can add this to your feed reader using the following URL: https://example.org/feed.json",
|
||||
"title": "Brent Simmons’s Microblog",
|
||||
"home_page_url": "https://example.org/",
|
||||
"feed_url": "https://example.org/feed.json",
|
||||
"author": {
|
||||
"name": "This field is deprecated, use authors",
|
||||
"url": "http://example.org/",
|
||||
"avatar": "https://example.org/avatar.png"
|
||||
},
|
||||
"authors": [
|
||||
{
|
||||
"name": "Brent Simmons",
|
||||
"url": "http://example.org/",
|
||||
"avatar": "https://example.org/avatar.png"
|
||||
}
|
||||
],
|
||||
"items": [
|
||||
{
|
||||
"id": "2347259",
|
||||
"url": "https://example.org/2347259",
|
||||
"content_text": "Cats are neat. \n\nhttps://example.org/cats",
|
||||
"date_published": "2016-02-09T14:22:00-07:00"
|
||||
}
|
||||
]
|
||||
}`
|
||||
|
||||
feed, err := Parse("https://example.org/feed.json", bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if len(feed.Entries) != 1 {
|
||||
t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
|
||||
}
|
||||
|
||||
if feed.Entries[0].Author != "Brent Simmons" {
|
||||
t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseFeedWithoutTitle(t *testing.T) {
|
||||
data := `{
|
||||
"version": "https://jsonfeed.org/version/1",
|
||||
"home_page_url": "https://example.org/",
|
||||
"feed_url": "https://example.org/feed.json",
|
||||
"items": [
|
||||
{
|
||||
"id": "2347259",
|
||||
"url": "https://example.org/2347259",
|
||||
"content_text": "Cats are neat. \n\nhttps://example.org/cats",
|
||||
"date_published": "2016-02-09T14:22:00-07:00"
|
||||
}
|
||||
]
|
||||
}`
|
||||
|
||||
feed, err := Parse("https://example.org/feed.json", bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if feed.Title != "https://example.org/" {
|
||||
t.Errorf("Incorrect title, got: %s", feed.Title)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseFeedItemWithInvalidDate(t *testing.T) {
|
||||
data := `{
|
||||
"version": "https://jsonfeed.org/version/1",
|
||||
"title": "My Example Feed",
|
||||
"home_page_url": "https://example.org/",
|
||||
"feed_url": "https://example.org/feed.json",
|
||||
"items": [
|
||||
{
|
||||
"id": "2347259",
|
||||
"url": "https://example.org/2347259",
|
||||
"content_text": "Cats are neat. \n\nhttps://example.org/cats",
|
||||
"date_published": "Tomorrow"
|
||||
}
|
||||
]
|
||||
}`
|
||||
|
||||
feed, err := Parse("https://example.org/feed.json", bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if len(feed.Entries) != 1 {
|
||||
t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
|
||||
}
|
||||
|
||||
duration := time.Since(feed.Entries[0].Date)
|
||||
if duration.Seconds() > 1 {
|
||||
t.Errorf("Incorrect entry date, got: %v", feed.Entries[0].Date)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseFeedItemWithoutID(t *testing.T) {
|
||||
data := `{
|
||||
"version": "https://jsonfeed.org/version/1",
|
||||
"title": "My Example Feed",
|
||||
"home_page_url": "https://example.org/",
|
||||
"feed_url": "https://example.org/feed.json",
|
||||
"items": [
|
||||
{
|
||||
"content_text": "Some text."
|
||||
}
|
||||
]
|
||||
}`
|
||||
|
||||
feed, err := Parse("https://example.org/feed.json", bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if len(feed.Entries) != 1 {
|
||||
t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
|
||||
}
|
||||
|
||||
if feed.Entries[0].Hash != "13b4c5aecd1b6d749afcee968fbf9c80f1ed1bbdbe1aaf25cb34ebd01144bbe9" {
|
||||
t.Errorf("Incorrect entry hash, got: %s", feed.Entries[0].Hash)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseFeedItemWithoutTitleButWithURL(t *testing.T) {
|
||||
data := `{
|
||||
"version": "https://jsonfeed.org/version/1",
|
||||
"title": "My Example Feed",
|
||||
"home_page_url": "https://example.org/",
|
||||
"feed_url": "https://example.org/feed.json",
|
||||
"items": [
|
||||
{
|
||||
"url": "https://example.org/item"
|
||||
}
|
||||
]
|
||||
}`
|
||||
|
||||
feed, err := Parse("https://example.org/feed.json", bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if len(feed.Entries) != 1 {
|
||||
t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
|
||||
}
|
||||
|
||||
if feed.Entries[0].Title != "https://example.org/item" {
|
||||
t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseFeedItemWithoutTitleButWithSummary(t *testing.T) {
|
||||
data := `{
|
||||
"version": "https://jsonfeed.org/version/1",
|
||||
"title": "My Example Feed",
|
||||
"home_page_url": "https://example.org/",
|
||||
"feed_url": "https://example.org/feed.json",
|
||||
"items": [
|
||||
{
|
||||
"summary": "This is some text content."
|
||||
}
|
||||
]
|
||||
}`
|
||||
|
||||
feed, err := Parse("https://example.org/feed.json", bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if len(feed.Entries) != 1 {
|
||||
t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
|
||||
}
|
||||
|
||||
if feed.Entries[0].Title != "This is some text content." {
|
||||
t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseFeedItemWithoutTitleButWithHTMLContent(t *testing.T) {
|
||||
data := `{
|
||||
"version": "https://jsonfeed.org/version/1",
|
||||
"title": "My Example Feed",
|
||||
"home_page_url": "https://example.org/",
|
||||
"feed_url": "https://example.org/feed.json",
|
||||
"items": [
|
||||
{
|
||||
"content_html": "This is <strong>HTML</strong>."
|
||||
}
|
||||
]
|
||||
}`
|
||||
|
||||
feed, err := Parse("https://example.org/feed.json", bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if len(feed.Entries) != 1 {
|
||||
t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
|
||||
}
|
||||
|
||||
if feed.Entries[0].Title != "This is HTML." {
|
||||
t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseFeedItemWithoutTitleButWithTextContent(t *testing.T) {
|
||||
data := `{
|
||||
"version": "https://jsonfeed.org/version/1",
|
||||
"title": "My Example Feed",
|
||||
"home_page_url": "https://example.org/",
|
||||
"feed_url": "https://example.org/feed.json",
|
||||
"items": [
|
||||
{
|
||||
"content_text": "` + strings.Repeat("a", 200) + `"
|
||||
}
|
||||
]
|
||||
}`
|
||||
|
||||
feed, err := Parse("https://example.org/feed.json", bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if len(feed.Entries) != 1 {
|
||||
t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
|
||||
}
|
||||
|
||||
if len(feed.Entries[0].Title) != 103 {
|
||||
t.Errorf("Incorrect entry title, got: %d", len(feed.Entries[0].Title))
|
||||
}
|
||||
|
||||
if len([]rune(feed.Entries[0].Title)) != 101 {
|
||||
t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseTruncateItemTitleUnicode(t *testing.T) {
|
||||
data := `{
|
||||
"version": "https://jsonfeed.org/version/1",
|
||||
"title": "My Example Feed",
|
||||
"home_page_url": "https://example.org/",
|
||||
"feed_url": "https://example.org/feed.json",
|
||||
"items": [
|
||||
{
|
||||
"title": "I’m riding my electric bike and came across this castle. It’s called “Schloss Richmond”. 🚴♂️"
|
||||
}
|
||||
]
|
||||
}`
|
||||
|
||||
feed, err := Parse("https://example.org/feed.json", bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if len(feed.Entries) != 1 {
|
||||
t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
|
||||
}
|
||||
|
||||
if len(feed.Entries[0].Title) != 110 {
|
||||
t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
|
||||
}
|
||||
|
||||
if len([]rune(feed.Entries[0].Title)) != 93 {
|
||||
t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseItemTitleWithXMLTags(t *testing.T) {
|
||||
data := `{
|
||||
"version": "https://jsonfeed.org/version/1",
|
||||
"title": "My Example Feed",
|
||||
"home_page_url": "https://example.org/",
|
||||
"feed_url": "https://example.org/feed.json",
|
||||
"items": [
|
||||
{
|
||||
"title": "</example>"
|
||||
}
|
||||
]
|
||||
}`
|
||||
|
||||
feed, err := Parse("https://example.org/feed.json", bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if len(feed.Entries) != 1 {
|
||||
t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
|
||||
}
|
||||
|
||||
if feed.Entries[0].Title != "</example>" {
|
||||
t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseInvalidJSON(t *testing.T) {
|
||||
data := `garbage`
|
||||
_, err := Parse("https://example.org/feed.json", bytes.NewBufferString(data))
|
||||
if err == nil {
|
||||
t.Error("Parse should returns an error")
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseTags(t *testing.T) {
|
||||
data := `{
|
||||
"version": "https://jsonfeed.org/version/1",
|
||||
"user_comment": "This is a microblog feed. You can add this to your feed reader using the following URL: https://example.org/feed.json",
|
||||
"title": "Brent Simmons’s Microblog",
|
||||
"home_page_url": "https://example.org/",
|
||||
"feed_url": "https://example.org/feed.json",
|
||||
"author": {
|
||||
"name": "Brent Simmons",
|
||||
"url": "http://example.org/",
|
||||
"avatar": "https://example.org/avatar.png"
|
||||
},
|
||||
"items": [
|
||||
{
|
||||
"id": "2347259",
|
||||
"url": "https://example.org/2347259",
|
||||
"content_text": "Cats are neat. \n\nhttps://example.org/cats",
|
||||
"date_published": "2016-02-09T14:22:00-07:00",
|
||||
"tags": [
|
||||
"tag 1",
|
||||
"tag 2"
|
||||
]
|
||||
}
|
||||
]
|
||||
}`
|
||||
|
||||
feed, err := Parse("https://example.org/feed.json", bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if len(feed.Entries[0].Tags) != 2 {
|
||||
t.Errorf("Incorrect number of Tags, got: %d", len(feed.Entries[0].Tags))
|
||||
}
|
||||
|
||||
expected := "tag 2"
|
||||
result := feed.Entries[0].Tags[1]
|
||||
if result != expected {
|
||||
t.Errorf("Incorrect entry tag, got %q instead of %q", result, expected)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseFavicon(t *testing.T) {
|
||||
data := `{
|
||||
"version": "https://jsonfeed.org/version/1",
|
||||
"title": "My Example Feed",
|
||||
"favicon": "https://micro.blog/jsonfeed/favicon.png",
|
||||
"home_page_url": "https://example.org/",
|
||||
"feed_url": "https://example.org/feed.json",
|
||||
"items": [
|
||||
{
|
||||
"id": "2",
|
||||
"content_text": "This is a second item.",
|
||||
"url": "https://example.org/second-item"
|
||||
},
|
||||
{
|
||||
"id": "1",
|
||||
"content_html": "<p>Hello, world!</p>",
|
||||
"url": "https://example.org/initial-post"
|
||||
}
|
||||
]
|
||||
}`
|
||||
|
||||
feed, err := Parse("https://example.org/feed.json", bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if feed.IconURL != "https://micro.blog/jsonfeed/favicon.png" {
|
||||
t.Errorf("Incorrect icon URL, got: %s", feed.IconURL)
|
||||
}
|
||||
}
|
175
internal/reader/media/media.go
Normal file
175
internal/reader/media/media.go
Normal file
|
@ -0,0 +1,175 @@
|
|||
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
package media // import "miniflux.app/v2/internal/reader/media"
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
var textLinkRegex = regexp.MustCompile(`(?mi)(\bhttps?:\/\/[-A-Z0-9+&@#\/%?=~_|!:,.;]*[-A-Z0-9+&@#\/%=~_|])`)
|
||||
|
||||
// Element represents XML media elements.
|
||||
type Element struct {
|
||||
MediaGroups []Group `xml:"http://search.yahoo.com/mrss/ group"`
|
||||
MediaContents []Content `xml:"http://search.yahoo.com/mrss/ content"`
|
||||
MediaThumbnails []Thumbnail `xml:"http://search.yahoo.com/mrss/ thumbnail"`
|
||||
MediaDescriptions DescriptionList `xml:"http://search.yahoo.com/mrss/ description"`
|
||||
MediaPeerLinks []PeerLink `xml:"http://search.yahoo.com/mrss/ peerLink"`
|
||||
}
|
||||
|
||||
// AllMediaThumbnails returns all thumbnail elements merged together.
|
||||
func (e *Element) AllMediaThumbnails() []Thumbnail {
|
||||
var items []Thumbnail
|
||||
items = append(items, e.MediaThumbnails...)
|
||||
for _, mediaGroup := range e.MediaGroups {
|
||||
items = append(items, mediaGroup.MediaThumbnails...)
|
||||
}
|
||||
return items
|
||||
}
|
||||
|
||||
// AllMediaContents returns all content elements merged together.
|
||||
func (e *Element) AllMediaContents() []Content {
|
||||
var items []Content
|
||||
items = append(items, e.MediaContents...)
|
||||
for _, mediaGroup := range e.MediaGroups {
|
||||
items = append(items, mediaGroup.MediaContents...)
|
||||
}
|
||||
return items
|
||||
}
|
||||
|
||||
// AllMediaPeerLinks returns all peer link elements merged together.
|
||||
func (e *Element) AllMediaPeerLinks() []PeerLink {
|
||||
var items []PeerLink
|
||||
items = append(items, e.MediaPeerLinks...)
|
||||
for _, mediaGroup := range e.MediaGroups {
|
||||
items = append(items, mediaGroup.MediaPeerLinks...)
|
||||
}
|
||||
return items
|
||||
}
|
||||
|
||||
// FirstMediaDescription returns the first description element.
|
||||
func (e *Element) FirstMediaDescription() string {
|
||||
description := e.MediaDescriptions.First()
|
||||
if description != "" {
|
||||
return description
|
||||
}
|
||||
|
||||
for _, mediaGroup := range e.MediaGroups {
|
||||
description = mediaGroup.MediaDescriptions.First()
|
||||
if description != "" {
|
||||
return description
|
||||
}
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
// Group represents a XML element "media:group".
|
||||
type Group struct {
|
||||
MediaContents []Content `xml:"http://search.yahoo.com/mrss/ content"`
|
||||
MediaThumbnails []Thumbnail `xml:"http://search.yahoo.com/mrss/ thumbnail"`
|
||||
MediaDescriptions DescriptionList `xml:"http://search.yahoo.com/mrss/ description"`
|
||||
MediaPeerLinks []PeerLink `xml:"http://search.yahoo.com/mrss/ peerLink"`
|
||||
}
|
||||
|
||||
// Content represents a XML element "media:content".
|
||||
type Content struct {
|
||||
URL string `xml:"url,attr"`
|
||||
Type string `xml:"type,attr"`
|
||||
FileSize string `xml:"fileSize,attr"`
|
||||
Medium string `xml:"medium,attr"`
|
||||
}
|
||||
|
||||
// MimeType returns the attachment mime type.
|
||||
func (mc *Content) MimeType() string {
|
||||
switch {
|
||||
case mc.Type == "" && mc.Medium == "image":
|
||||
return "image/*"
|
||||
case mc.Type == "" && mc.Medium == "video":
|
||||
return "video/*"
|
||||
case mc.Type == "" && mc.Medium == "audio":
|
||||
return "audio/*"
|
||||
case mc.Type == "" && mc.Medium == "video":
|
||||
return "video/*"
|
||||
case mc.Type != "":
|
||||
return mc.Type
|
||||
default:
|
||||
return "application/octet-stream"
|
||||
}
|
||||
}
|
||||
|
||||
// Size returns the attachment size.
|
||||
func (mc *Content) Size() int64 {
|
||||
if mc.FileSize == "" {
|
||||
return 0
|
||||
}
|
||||
size, _ := strconv.ParseInt(mc.FileSize, 10, 0)
|
||||
return size
|
||||
}
|
||||
|
||||
// Thumbnail represents a XML element "media:thumbnail".
|
||||
type Thumbnail struct {
|
||||
URL string `xml:"url,attr"`
|
||||
}
|
||||
|
||||
// MimeType returns the attachment mime type.
|
||||
func (t *Thumbnail) MimeType() string {
|
||||
return "image/*"
|
||||
}
|
||||
|
||||
// Size returns the attachment size.
|
||||
func (t *Thumbnail) Size() int64 {
|
||||
return 0
|
||||
}
|
||||
|
||||
// PeerLink represents a XML element "media:peerLink".
|
||||
type PeerLink struct {
|
||||
URL string `xml:"href,attr"`
|
||||
Type string `xml:"type,attr"`
|
||||
}
|
||||
|
||||
// MimeType returns the attachment mime type.
|
||||
func (p *PeerLink) MimeType() string {
|
||||
if p.Type != "" {
|
||||
return p.Type
|
||||
}
|
||||
return "application/octet-stream"
|
||||
}
|
||||
|
||||
// Size returns the attachment size.
|
||||
func (p *PeerLink) Size() int64 {
|
||||
return 0
|
||||
}
|
||||
|
||||
// Description represents a XML element "media:description".
|
||||
type Description struct {
|
||||
Type string `xml:"type,attr"`
|
||||
Description string `xml:",chardata"`
|
||||
}
|
||||
|
||||
// HTML returns the description as HTML.
|
||||
func (d *Description) HTML() string {
|
||||
if d.Type == "html" {
|
||||
return d.Description
|
||||
}
|
||||
|
||||
content := strings.Replace(d.Description, "\n", "<br>", -1)
|
||||
return textLinkRegex.ReplaceAllString(content, `<a href="${1}">${1}</a>`)
|
||||
}
|
||||
|
||||
// DescriptionList represents a list of "media:description" XML elements.
|
||||
type DescriptionList []Description
|
||||
|
||||
// First returns the first non-empty description.
|
||||
func (dl DescriptionList) First() string {
|
||||
for _, description := range dl {
|
||||
contents := description.HTML()
|
||||
if contents != "" {
|
||||
return contents
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
109
internal/reader/media/media_test.go
Normal file
109
internal/reader/media/media_test.go
Normal file
|
@ -0,0 +1,109 @@
|
|||
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
package media // import "miniflux.app/v2/internal/reader/media"
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestContentMimeType(t *testing.T) {
|
||||
scenarios := []struct {
|
||||
inputType, inputMedium, expectedMimeType string
|
||||
}{
|
||||
{"image/png", "image", "image/png"},
|
||||
{"", "image", "image/*"},
|
||||
{"", "video", "video/*"},
|
||||
{"", "audio", "audio/*"},
|
||||
{"", "", "application/octet-stream"},
|
||||
}
|
||||
|
||||
for _, scenario := range scenarios {
|
||||
content := &Content{Type: scenario.inputType, Medium: scenario.inputMedium}
|
||||
result := content.MimeType()
|
||||
if result != scenario.expectedMimeType {
|
||||
t.Errorf(`Unexpected mime type, got %q instead of %q for type=%q medium=%q`,
|
||||
result,
|
||||
scenario.expectedMimeType,
|
||||
scenario.inputType,
|
||||
scenario.inputMedium,
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestContentSize(t *testing.T) {
|
||||
scenarios := []struct {
|
||||
inputSize string
|
||||
expectedSize int64
|
||||
}{
|
||||
{"", 0},
|
||||
{"123", int64(123)},
|
||||
}
|
||||
|
||||
for _, scenario := range scenarios {
|
||||
content := &Content{FileSize: scenario.inputSize}
|
||||
result := content.Size()
|
||||
if result != scenario.expectedSize {
|
||||
t.Errorf(`Unexpected size, got %d instead of %d for %q`,
|
||||
result,
|
||||
scenario.expectedSize,
|
||||
scenario.inputSize,
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestPeerLinkType(t *testing.T) {
|
||||
scenarios := []struct {
|
||||
inputType string
|
||||
expectedMimeType string
|
||||
}{
|
||||
{"", "application/octet-stream"},
|
||||
{"application/x-bittorrent", "application/x-bittorrent"},
|
||||
}
|
||||
|
||||
for _, scenario := range scenarios {
|
||||
peerLink := &PeerLink{Type: scenario.inputType}
|
||||
result := peerLink.MimeType()
|
||||
if result != scenario.expectedMimeType {
|
||||
t.Errorf(`Unexpected mime type, got %q instead of %q for %q`,
|
||||
result,
|
||||
scenario.expectedMimeType,
|
||||
scenario.inputType,
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestDescription(t *testing.T) {
|
||||
scenarios := []struct {
|
||||
inputType string
|
||||
inputContent string
|
||||
expectedDescription string
|
||||
}{
|
||||
{"", "", ""},
|
||||
{"html", "a <b>c</b>", "a <b>c</b>"},
|
||||
{"plain", "a\nhttp://www.example.org/", `a<br><a href="http://www.example.org/">http://www.example.org/</a>`},
|
||||
}
|
||||
|
||||
for _, scenario := range scenarios {
|
||||
desc := &Description{Type: scenario.inputType, Description: scenario.inputContent}
|
||||
result := desc.HTML()
|
||||
if result != scenario.expectedDescription {
|
||||
t.Errorf(`Unexpected description, got %q instead of %q for %q`,
|
||||
result,
|
||||
scenario.expectedDescription,
|
||||
scenario.inputType,
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestFirstDescription(t *testing.T) {
|
||||
var descList DescriptionList
|
||||
descList = append(descList, Description{})
|
||||
descList = append(descList, Description{Description: "Something"})
|
||||
|
||||
if descList.First() != "Something" {
|
||||
t.Errorf(`Unexpected description`)
|
||||
}
|
||||
}
|
93
internal/reader/opml/handler.go
Normal file
93
internal/reader/opml/handler.go
Normal file
|
@ -0,0 +1,93 @@
|
|||
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
package opml // import "miniflux.app/v2/internal/reader/opml"
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
|
||||
"miniflux.app/v2/internal/logger"
|
||||
"miniflux.app/v2/internal/model"
|
||||
"miniflux.app/v2/internal/storage"
|
||||
)
|
||||
|
||||
// Handler handles the logic for OPML import/export.
|
||||
type Handler struct {
|
||||
store *storage.Storage
|
||||
}
|
||||
|
||||
// Export exports user feeds to OPML.
|
||||
func (h *Handler) Export(userID int64) (string, error) {
|
||||
feeds, err := h.store.Feeds(userID)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
var subscriptions SubcriptionList
|
||||
for _, feed := range feeds {
|
||||
subscriptions = append(subscriptions, &Subcription{
|
||||
Title: feed.Title,
|
||||
FeedURL: feed.FeedURL,
|
||||
SiteURL: feed.SiteURL,
|
||||
CategoryName: feed.Category.Title,
|
||||
})
|
||||
}
|
||||
|
||||
return Serialize(subscriptions), nil
|
||||
}
|
||||
|
||||
// Import parses and create feeds from an OPML import.
|
||||
func (h *Handler) Import(userID int64, data io.Reader) error {
|
||||
subscriptions, err := Parse(data)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, subscription := range subscriptions {
|
||||
if !h.store.FeedURLExists(userID, subscription.FeedURL) {
|
||||
var category *model.Category
|
||||
var err error
|
||||
|
||||
if subscription.CategoryName == "" {
|
||||
category, err = h.store.FirstCategory(userID)
|
||||
if err != nil {
|
||||
logger.Error("[OPML:Import] %v", err)
|
||||
return errors.New("unable to find first category")
|
||||
}
|
||||
} else {
|
||||
category, err = h.store.CategoryByTitle(userID, subscription.CategoryName)
|
||||
if err != nil {
|
||||
logger.Error("[OPML:Import] %v", err)
|
||||
return errors.New("unable to search category by title")
|
||||
}
|
||||
|
||||
if category == nil {
|
||||
category, err = h.store.CreateCategory(userID, &model.CategoryRequest{Title: subscription.CategoryName})
|
||||
if err != nil {
|
||||
logger.Error("[OPML:Import] %v", err)
|
||||
return fmt.Errorf(`unable to create this category: %q`, subscription.CategoryName)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
feed := &model.Feed{
|
||||
UserID: userID,
|
||||
Title: subscription.Title,
|
||||
FeedURL: subscription.FeedURL,
|
||||
SiteURL: subscription.SiteURL,
|
||||
Category: category,
|
||||
}
|
||||
|
||||
h.store.CreateFeed(feed)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// NewHandler creates a new handler for OPML files.
|
||||
func NewHandler(store *storage.Storage) *Handler {
|
||||
return &Handler{store: store}
|
||||
}
|
73
internal/reader/opml/opml.go
Normal file
73
internal/reader/opml/opml.go
Normal file
|
@ -0,0 +1,73 @@
|
|||
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
package opml // import "miniflux.app/v2/internal/reader/opml"
|
||||
|
||||
import (
|
||||
"encoding/xml"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// Specs: http://opml.org/spec2.opml
|
||||
type opmlDocument struct {
|
||||
XMLName xml.Name `xml:"opml"`
|
||||
Version string `xml:"version,attr"`
|
||||
Header opmlHeader `xml:"head"`
|
||||
Outlines opmlOutlineCollection `xml:"body>outline"`
|
||||
}
|
||||
|
||||
func NewOPMLDocument() *opmlDocument {
|
||||
return &opmlDocument{}
|
||||
}
|
||||
|
||||
type opmlHeader struct {
|
||||
Title string `xml:"title,omitempty"`
|
||||
DateCreated string `xml:"dateCreated,omitempty"`
|
||||
OwnerName string `xml:"ownerName,omitempty"`
|
||||
}
|
||||
|
||||
type opmlOutline struct {
|
||||
Title string `xml:"title,attr,omitempty"`
|
||||
Text string `xml:"text,attr"`
|
||||
FeedURL string `xml:"xmlUrl,attr,omitempty"`
|
||||
SiteURL string `xml:"htmlUrl,attr,omitempty"`
|
||||
Outlines opmlOutlineCollection `xml:"outline,omitempty"`
|
||||
}
|
||||
|
||||
func (o *opmlOutline) IsSubscription() bool {
|
||||
return strings.TrimSpace(o.FeedURL) != ""
|
||||
}
|
||||
|
||||
func (o *opmlOutline) GetTitle() string {
|
||||
if o.Title != "" {
|
||||
return o.Title
|
||||
}
|
||||
|
||||
if o.Text != "" {
|
||||
return o.Text
|
||||
}
|
||||
|
||||
if o.SiteURL != "" {
|
||||
return o.SiteURL
|
||||
}
|
||||
|
||||
if o.FeedURL != "" {
|
||||
return o.FeedURL
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
func (o *opmlOutline) GetSiteURL() string {
|
||||
if o.SiteURL != "" {
|
||||
return o.SiteURL
|
||||
}
|
||||
|
||||
return o.FeedURL
|
||||
}
|
||||
|
||||
type opmlOutlineCollection []opmlOutline
|
||||
|
||||
func (o opmlOutlineCollection) HasChildren() bool {
|
||||
return len(o) > 0
|
||||
}
|
44
internal/reader/opml/parser.go
Normal file
44
internal/reader/opml/parser.go
Normal file
|
@ -0,0 +1,44 @@
|
|||
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
package opml // import "miniflux.app/v2/internal/reader/opml"
|
||||
|
||||
import (
|
||||
"encoding/xml"
|
||||
"io"
|
||||
|
||||
"miniflux.app/v2/internal/errors"
|
||||
"miniflux.app/v2/internal/reader/encoding"
|
||||
)
|
||||
|
||||
// Parse reads an OPML file and returns a SubcriptionList.
|
||||
func Parse(data io.Reader) (SubcriptionList, *errors.LocalizedError) {
|
||||
opmlDocument := NewOPMLDocument()
|
||||
decoder := xml.NewDecoder(data)
|
||||
decoder.Entity = xml.HTMLEntity
|
||||
decoder.Strict = false
|
||||
decoder.CharsetReader = encoding.CharsetReader
|
||||
|
||||
err := decoder.Decode(opmlDocument)
|
||||
if err != nil {
|
||||
return nil, errors.NewLocalizedError("Unable to parse OPML file: %q", err)
|
||||
}
|
||||
|
||||
return getSubscriptionsFromOutlines(opmlDocument.Outlines, ""), nil
|
||||
}
|
||||
|
||||
func getSubscriptionsFromOutlines(outlines opmlOutlineCollection, category string) (subscriptions SubcriptionList) {
|
||||
for _, outline := range outlines {
|
||||
if outline.IsSubscription() {
|
||||
subscriptions = append(subscriptions, &Subcription{
|
||||
Title: outline.GetTitle(),
|
||||
FeedURL: outline.FeedURL,
|
||||
SiteURL: outline.GetSiteURL(),
|
||||
CategoryName: category,
|
||||
})
|
||||
} else if outline.Outlines.HasChildren() {
|
||||
subscriptions = append(subscriptions, getSubscriptionsFromOutlines(outline.Outlines, outline.Text)...)
|
||||
}
|
||||
}
|
||||
return subscriptions
|
||||
}
|
277
internal/reader/opml/parser_test.go
Normal file
277
internal/reader/opml/parser_test.go
Normal file
|
@ -0,0 +1,277 @@
|
|||
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
package opml // import "miniflux.app/v2/internal/reader/opml"
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestParseOpmlWithoutCategories(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="ISO-8859-1"?>
|
||||
<opml version="2.0">
|
||||
<head>
|
||||
<title>mySubscriptions.opml</title>
|
||||
</head>
|
||||
<body>
|
||||
<outline text="CNET News.com" description="Tech news and business reports by CNET News.com. Focused on information technology, core topics include computers, hardware, software, networking, and Internet media." htmlUrl="http://news.com.com/" language="unknown" title="CNET News.com" type="rss" version="RSS2" xmlUrl="http://news.com.com/2547-1_3-0-5.xml"/>
|
||||
<outline text="washingtonpost.com - Politics" description="Politics" htmlUrl="http://www.washingtonpost.com/wp-dyn/politics?nav=rss_politics" language="unknown" title="washingtonpost.com - Politics" type="rss" version="RSS2" xmlUrl="http://www.washingtonpost.com/wp-srv/politics/rssheadlines.xml"/>
|
||||
<outline text="Scobleizer: Microsoft Geek Blogger" description="Robert Scoble's look at geek and Microsoft life." htmlUrl="http://radio.weblogs.com/0001011/" language="unknown" title="Scobleizer: Microsoft Geek Blogger" type="rss" version="RSS2" xmlUrl="http://radio.weblogs.com/0001011/rss.xml"/>
|
||||
<outline text="Yahoo! News: Technology" description="Technology" htmlUrl="http://news.yahoo.com/news?tmpl=index&cid=738" language="unknown" title="Yahoo! News: Technology" type="rss" version="RSS2" xmlUrl="http://rss.news.yahoo.com/rss/tech"/>
|
||||
<outline text="Workbench" description="Programming and publishing news and comment" htmlUrl="http://www.cadenhead.org/workbench/" language="unknown" title="Workbench" type="rss" version="RSS2" xmlUrl="http://www.cadenhead.org/workbench/rss.xml"/>
|
||||
<outline text="Christian Science Monitor | Top Stories" description="Read the front page stories of csmonitor.com." htmlUrl="http://csmonitor.com" language="unknown" title="Christian Science Monitor | Top Stories" type="rss" version="RSS" xmlUrl="http://www.csmonitor.com/rss/top.rss"/>
|
||||
<outline text="Dictionary.com Word of the Day" description="A new word is presented every day with its definition and example sentences from actual published works." htmlUrl="http://dictionary.reference.com/wordoftheday/" language="unknown" title="Dictionary.com Word of the Day" type="rss" version="RSS" xmlUrl="http://www.dictionary.com/wordoftheday/wotd.rss"/>
|
||||
<outline text="The Motley Fool" description="To Educate, Amuse, and Enrich" htmlUrl="http://www.fool.com" language="unknown" title="The Motley Fool" type="rss" version="RSS" xmlUrl="http://www.fool.com/xml/foolnews_rss091.xml"/>
|
||||
<outline text="InfoWorld: Top News" description="The latest on Top News from InfoWorld" htmlUrl="http://www.infoworld.com/news/index.html" language="unknown" title="InfoWorld: Top News" type="rss" version="RSS2" xmlUrl="http://www.infoworld.com/rss/news.xml"/>
|
||||
<outline text="NYT > Business" description="Find breaking news & business news on Wall Street, media & advertising, international business, banking, interest rates, the stock market, currencies & funds." htmlUrl="http://www.nytimes.com/pages/business/index.html?partner=rssnyt" language="unknown" title="NYT > Business" type="rss" version="RSS2" xmlUrl="http://www.nytimes.com/services/xml/rss/nyt/Business.xml"/>
|
||||
<outline text="NYT > Technology" description="" htmlUrl="http://www.nytimes.com/pages/technology/index.html?partner=rssnyt" language="unknown" title="NYT > Technology" type="rss" version="RSS2" xmlUrl="http://www.nytimes.com/services/xml/rss/nyt/Technology.xml"/>
|
||||
<outline text="Scripting News" description="It's even worse than it appears." htmlUrl="http://www.scripting.com/" language="unknown" title="Scripting News" type="rss" version="RSS2" xmlUrl="http://www.scripting.com/rss.xml"/>
|
||||
<outline text="Wired News" description="Technology, and the way we do business, is changing the world we know. Wired News is a technology - and business-oriented news service feeding an intelligent, discerning audience. What role does technology play in the day-to-day living of your life? Wired News tells you. How has evolving technology changed the face of the international business world? Wired News puts you in the picture." htmlUrl="http://www.wired.com/" language="unknown" title="Wired News" type="rss" version="RSS" xmlUrl="http://www.wired.com/news_drop/netcenter/netcenter.rdf"/>
|
||||
</body>
|
||||
</opml>
|
||||
`
|
||||
|
||||
var expected SubcriptionList
|
||||
expected = append(expected, &Subcription{Title: "CNET News.com", FeedURL: "http://news.com.com/2547-1_3-0-5.xml", SiteURL: "http://news.com.com/"})
|
||||
|
||||
subscriptions, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if len(subscriptions) != 13 {
|
||||
t.Fatalf("Wrong number of subscriptions: %d instead of %d", len(subscriptions), 13)
|
||||
}
|
||||
|
||||
if !subscriptions[0].Equals(expected[0]) {
|
||||
t.Errorf(`Subscription is different: "%v" vs "%v"`, subscriptions[0], expected[0])
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseOpmlWithCategories(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<opml version="2.0">
|
||||
<head>
|
||||
<title>mySubscriptions.opml</title>
|
||||
</head>
|
||||
<body>
|
||||
<outline text="My Category 1">
|
||||
<outline text="Feed 1" xmlUrl="http://example.org/feed1/" htmlUrl="http://example.org/1"/>
|
||||
<outline text="Feed 2" xmlUrl="http://example.org/feed2/" htmlUrl="http://example.org/2"/>
|
||||
</outline>
|
||||
<outline text="My Category 2">
|
||||
<outline text="Feed 3" xmlUrl="http://example.org/feed3/" htmlUrl="http://example.org/3"/>
|
||||
</outline>
|
||||
</body>
|
||||
</opml>
|
||||
`
|
||||
|
||||
var expected SubcriptionList
|
||||
expected = append(expected, &Subcription{Title: "Feed 1", FeedURL: "http://example.org/feed1/", SiteURL: "http://example.org/1", CategoryName: "My Category 1"})
|
||||
expected = append(expected, &Subcription{Title: "Feed 2", FeedURL: "http://example.org/feed2/", SiteURL: "http://example.org/2", CategoryName: "My Category 1"})
|
||||
expected = append(expected, &Subcription{Title: "Feed 3", FeedURL: "http://example.org/feed3/", SiteURL: "http://example.org/3", CategoryName: "My Category 2"})
|
||||
|
||||
subscriptions, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if len(subscriptions) != 3 {
|
||||
t.Fatalf("Wrong number of subscriptions: %d instead of %d", len(subscriptions), 3)
|
||||
}
|
||||
|
||||
for i := 0; i < len(subscriptions); i++ {
|
||||
if !subscriptions[i].Equals(expected[i]) {
|
||||
t.Errorf(`Subscription is different: "%v" vs "%v"`, subscriptions[i], expected[i])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseOpmlWithEmptyTitleAndEmptySiteURL(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="ISO-8859-1"?>
|
||||
<opml version="2.0">
|
||||
<head>
|
||||
<title>mySubscriptions.opml</title>
|
||||
</head>
|
||||
<body>
|
||||
<outline xmlUrl="http://example.org/feed1/" htmlUrl="http://example.org/1"/>
|
||||
<outline xmlUrl="http://example.org/feed2/"/>
|
||||
</body>
|
||||
</opml>
|
||||
`
|
||||
|
||||
var expected SubcriptionList
|
||||
expected = append(expected, &Subcription{Title: "http://example.org/1", FeedURL: "http://example.org/feed1/", SiteURL: "http://example.org/1", CategoryName: ""})
|
||||
expected = append(expected, &Subcription{Title: "http://example.org/feed2/", FeedURL: "http://example.org/feed2/", SiteURL: "http://example.org/feed2/", CategoryName: ""})
|
||||
|
||||
subscriptions, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if len(subscriptions) != 2 {
|
||||
t.Fatalf("Wrong number of subscriptions: %d instead of %d", len(subscriptions), 2)
|
||||
}
|
||||
|
||||
for i := 0; i < len(subscriptions); i++ {
|
||||
if !subscriptions[i].Equals(expected[i]) {
|
||||
t.Errorf(`Subscription is different: "%v" vs "%v"`, subscriptions[i], expected[i])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseOpmlVersion1(t *testing.T) {
|
||||
data := `<?xml version="1.0"?>
|
||||
<opml version="1.0">
|
||||
<head>
|
||||
<title>mySubscriptions.opml</title>
|
||||
<dateCreated>Wed, 13 Mar 2019 11:51:41 GMT</dateCreated>
|
||||
</head>
|
||||
<body>
|
||||
<outline title="Feed 1">
|
||||
<outline type="rss" title="Feed 1" xmlUrl="http://example.org/feed1/" htmlUrl="http://example.org/1"></outline>
|
||||
</outline>
|
||||
<outline title="Feed 2">
|
||||
<outline type="rss" title="Feed 2" xmlUrl="http://example.org/feed2/" htmlUrl="http://example.org/2"></outline>
|
||||
</outline>
|
||||
</body>
|
||||
</opml>
|
||||
`
|
||||
|
||||
var expected SubcriptionList
|
||||
expected = append(expected, &Subcription{Title: "Feed 1", FeedURL: "http://example.org/feed1/", SiteURL: "http://example.org/1", CategoryName: ""})
|
||||
expected = append(expected, &Subcription{Title: "Feed 2", FeedURL: "http://example.org/feed2/", SiteURL: "http://example.org/2", CategoryName: ""})
|
||||
|
||||
subscriptions, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if len(subscriptions) != 2 {
|
||||
t.Fatalf("Wrong number of subscriptions: %d instead of %d", len(subscriptions), 2)
|
||||
}
|
||||
|
||||
for i := 0; i < len(subscriptions); i++ {
|
||||
if !subscriptions[i].Equals(expected[i]) {
|
||||
t.Errorf(`Subscription is different: "%v" vs "%v"`, subscriptions[i], expected[i])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseOpmlVersion1WithoutOuterOutline(t *testing.T) {
|
||||
data := `<?xml version="1.0"?>
|
||||
<opml version="1.0">
|
||||
<head>
|
||||
<title>mySubscriptions.opml</title>
|
||||
<dateCreated>Wed, 13 Mar 2019 11:51:41 GMT</dateCreated>
|
||||
</head>
|
||||
<body>
|
||||
<outline type="rss" title="Feed 1" xmlUrl="http://example.org/feed1/" htmlUrl="http://example.org/1"></outline>
|
||||
<outline type="rss" title="Feed 2" xmlUrl="http://example.org/feed2/" htmlUrl="http://example.org/2"></outline>
|
||||
</body>
|
||||
</opml>
|
||||
`
|
||||
|
||||
var expected SubcriptionList
|
||||
expected = append(expected, &Subcription{Title: "Feed 1", FeedURL: "http://example.org/feed1/", SiteURL: "http://example.org/1", CategoryName: ""})
|
||||
expected = append(expected, &Subcription{Title: "Feed 2", FeedURL: "http://example.org/feed2/", SiteURL: "http://example.org/2", CategoryName: ""})
|
||||
|
||||
subscriptions, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if len(subscriptions) != 2 {
|
||||
t.Fatalf("Wrong number of subscriptions: %d instead of %d", len(subscriptions), 2)
|
||||
}
|
||||
|
||||
for i := 0; i < len(subscriptions); i++ {
|
||||
if !subscriptions[i].Equals(expected[i]) {
|
||||
t.Errorf(`Subscription is different: "%v" vs "%v"`, subscriptions[i], expected[i])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseOpmlVersion1WithSeveralNestedOutlines(t *testing.T) {
|
||||
data := `<?xml version="1.0"?>
|
||||
<opml xmlns:rssowl="http://www.rssowl.org" version="1.1">
|
||||
<head>
|
||||
<title>RSSOwl Subscriptions</title>
|
||||
<dateCreated>星期二, 26 四月 2022 00:12:04 CST</dateCreated>
|
||||
</head>
|
||||
<body>
|
||||
<outline text="My Feeds" rssowl:isSet="true" rssowl:id="7">
|
||||
<outline text="Some Category" rssowl:isSet="false" rssowl:id="55">
|
||||
<outline type="rss" title="Feed 1" xmlUrl="http://example.org/feed1/" htmlUrl="http://example.org/1"></outline>
|
||||
<outline type="rss" title="Feed 2" xmlUrl="http://example.org/feed2/" htmlUrl="http://example.org/2"></outline>
|
||||
</outline>
|
||||
<outline text="Another Category" rssowl:isSet="false" rssowl:id="87">
|
||||
<outline type="rss" title="Feed 3" xmlUrl="http://example.org/feed3/" htmlUrl="http://example.org/3"></outline>
|
||||
</outline>
|
||||
</outline>
|
||||
</body>
|
||||
</opml>
|
||||
`
|
||||
|
||||
var expected SubcriptionList
|
||||
expected = append(expected, &Subcription{Title: "Feed 1", FeedURL: "http://example.org/feed1/", SiteURL: "http://example.org/1", CategoryName: "Some Category"})
|
||||
expected = append(expected, &Subcription{Title: "Feed 2", FeedURL: "http://example.org/feed2/", SiteURL: "http://example.org/2", CategoryName: "Some Category"})
|
||||
expected = append(expected, &Subcription{Title: "Feed 3", FeedURL: "http://example.org/feed3/", SiteURL: "http://example.org/3", CategoryName: "Another Category"})
|
||||
|
||||
subscriptions, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if len(subscriptions) != 3 {
|
||||
t.Fatalf("Wrong number of subscriptions: %d instead of %d", len(subscriptions), 3)
|
||||
}
|
||||
|
||||
for i := 0; i < len(subscriptions); i++ {
|
||||
if !subscriptions[i].Equals(expected[i]) {
|
||||
t.Errorf(`Subscription is different: "%v" vs "%v"`, subscriptions[i], expected[i])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseOpmlWithInvalidCharacterEntity(t *testing.T) {
|
||||
data := `<?xml version="1.0"?>
|
||||
<opml version="1.0">
|
||||
<head>
|
||||
<title>mySubscriptions.opml</title>
|
||||
</head>
|
||||
<body>
|
||||
<outline title="Feed 1">
|
||||
<outline type="rss" title="Feed 1" xmlUrl="http://example.org/feed1/a&b" htmlUrl="http://example.org/c&d"></outline>
|
||||
</outline>
|
||||
</body>
|
||||
</opml>
|
||||
`
|
||||
|
||||
var expected SubcriptionList
|
||||
expected = append(expected, &Subcription{Title: "Feed 1", FeedURL: "http://example.org/feed1/a&b", SiteURL: "http://example.org/c&d", CategoryName: ""})
|
||||
|
||||
subscriptions, err := Parse(bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if len(subscriptions) != 1 {
|
||||
t.Fatalf("Wrong number of subscriptions: %d instead of %d", len(subscriptions), 1)
|
||||
}
|
||||
|
||||
for i := 0; i < len(subscriptions); i++ {
|
||||
if !subscriptions[i].Equals(expected[i]) {
|
||||
t.Errorf(`Subscription is different: "%v" vs "%v"`, subscriptions[i], expected[i])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseInvalidXML(t *testing.T) {
|
||||
data := `garbage`
|
||||
_, err := Parse(bytes.NewBufferString(data))
|
||||
if err == nil {
|
||||
t.Error("Parse should generate an error")
|
||||
}
|
||||
}
|
71
internal/reader/opml/serializer.go
Normal file
71
internal/reader/opml/serializer.go
Normal file
|
@ -0,0 +1,71 @@
|
|||
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
package opml // import "miniflux.app/v2/internal/reader/opml"
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"encoding/xml"
|
||||
"sort"
|
||||
"time"
|
||||
|
||||
"miniflux.app/v2/internal/logger"
|
||||
)
|
||||
|
||||
// Serialize returns a SubcriptionList in OPML format.
|
||||
func Serialize(subscriptions SubcriptionList) string {
|
||||
var b bytes.Buffer
|
||||
writer := bufio.NewWriter(&b)
|
||||
writer.WriteString(xml.Header)
|
||||
|
||||
opmlDocument := convertSubscriptionsToOPML(subscriptions)
|
||||
encoder := xml.NewEncoder(writer)
|
||||
encoder.Indent("", " ")
|
||||
if err := encoder.Encode(opmlDocument); err != nil {
|
||||
logger.Error("[OPML:Serialize] %v", err)
|
||||
return ""
|
||||
}
|
||||
|
||||
return b.String()
|
||||
}
|
||||
|
||||
func convertSubscriptionsToOPML(subscriptions SubcriptionList) *opmlDocument {
|
||||
opmlDocument := NewOPMLDocument()
|
||||
opmlDocument.Version = "2.0"
|
||||
opmlDocument.Header.Title = "Miniflux"
|
||||
opmlDocument.Header.DateCreated = time.Now().Format("Mon, 02 Jan 2006 15:04:05 MST")
|
||||
|
||||
groupedSubs := groupSubscriptionsByFeed(subscriptions)
|
||||
var categories []string
|
||||
for k := range groupedSubs {
|
||||
categories = append(categories, k)
|
||||
}
|
||||
sort.Strings(categories)
|
||||
|
||||
for _, categoryName := range categories {
|
||||
category := opmlOutline{Text: categoryName}
|
||||
for _, subscription := range groupedSubs[categoryName] {
|
||||
category.Outlines = append(category.Outlines, opmlOutline{
|
||||
Title: subscription.Title,
|
||||
Text: subscription.Title,
|
||||
FeedURL: subscription.FeedURL,
|
||||
SiteURL: subscription.SiteURL,
|
||||
})
|
||||
}
|
||||
|
||||
opmlDocument.Outlines = append(opmlDocument.Outlines, category)
|
||||
}
|
||||
|
||||
return opmlDocument
|
||||
}
|
||||
|
||||
func groupSubscriptionsByFeed(subscriptions SubcriptionList) map[string]SubcriptionList {
|
||||
groups := make(map[string]SubcriptionList)
|
||||
|
||||
for _, subscription := range subscriptions {
|
||||
groups[subscription.CategoryName] = append(groups[subscription.CategoryName], subscription)
|
||||
}
|
||||
|
||||
return groups
|
||||
}
|
63
internal/reader/opml/serializer_test.go
Normal file
63
internal/reader/opml/serializer_test.go
Normal file
|
@ -0,0 +1,63 @@
|
|||
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
package opml // import "miniflux.app/v2/internal/reader/opml"
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestSerialize(t *testing.T) {
|
||||
var subscriptions SubcriptionList
|
||||
subscriptions = append(subscriptions, &Subcription{Title: "Feed 1", FeedURL: "http://example.org/feed/1", SiteURL: "http://example.org/1", CategoryName: "Category 1"})
|
||||
subscriptions = append(subscriptions, &Subcription{Title: "Feed 2", FeedURL: "http://example.org/feed/2", SiteURL: "http://example.org/2", CategoryName: "Category 1"})
|
||||
subscriptions = append(subscriptions, &Subcription{Title: "Feed 3", FeedURL: "http://example.org/feed/3", SiteURL: "http://example.org/3", CategoryName: "Category 2"})
|
||||
|
||||
output := Serialize(subscriptions)
|
||||
feeds, err := Parse(bytes.NewBufferString(output))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if len(feeds) != 3 {
|
||||
t.Errorf("Wrong number of subscriptions: %d instead of %d", len(feeds), 3)
|
||||
}
|
||||
|
||||
found := false
|
||||
for _, feed := range feeds {
|
||||
if feed.Title == "Feed 1" && feed.CategoryName == "Category 1" &&
|
||||
feed.FeedURL == "http://example.org/feed/1" && feed.SiteURL == "http://example.org/1" {
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if !found {
|
||||
t.Error("Serialized feed is incorrect")
|
||||
}
|
||||
}
|
||||
|
||||
func TestNormalizedCategoriesOrder(t *testing.T) {
|
||||
var orderTests = []struct {
|
||||
naturalOrderName string
|
||||
correctOrderName string
|
||||
}{
|
||||
{"Category 2", "Category 1"},
|
||||
{"Category 3", "Category 2"},
|
||||
{"Category 1", "Category 3"},
|
||||
}
|
||||
|
||||
var subscriptions SubcriptionList
|
||||
subscriptions = append(subscriptions, &Subcription{Title: "Feed 1", FeedURL: "http://example.org/feed/1", SiteURL: "http://example.org/1", CategoryName: orderTests[0].naturalOrderName})
|
||||
subscriptions = append(subscriptions, &Subcription{Title: "Feed 2", FeedURL: "http://example.org/feed/2", SiteURL: "http://example.org/2", CategoryName: orderTests[1].naturalOrderName})
|
||||
subscriptions = append(subscriptions, &Subcription{Title: "Feed 3", FeedURL: "http://example.org/feed/3", SiteURL: "http://example.org/3", CategoryName: orderTests[2].naturalOrderName})
|
||||
|
||||
feeds := convertSubscriptionsToOPML(subscriptions)
|
||||
|
||||
for i, o := range orderTests {
|
||||
if feeds.Outlines[i].Text != o.correctOrderName {
|
||||
t.Fatalf("need %v, got %v", o.correctOrderName, feeds.Outlines[i].Text)
|
||||
}
|
||||
}
|
||||
}
|
21
internal/reader/opml/subscription.go
Normal file
21
internal/reader/opml/subscription.go
Normal file
|
@ -0,0 +1,21 @@
|
|||
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
package opml // import "miniflux.app/v2/internal/reader/opml"
|
||||
|
||||
// Subcription represents a feed that will be imported or exported.
|
||||
type Subcription struct {
|
||||
Title string
|
||||
SiteURL string
|
||||
FeedURL string
|
||||
CategoryName string
|
||||
}
|
||||
|
||||
// Equals compare two subscriptions.
|
||||
func (s Subcription) Equals(subscription *Subcription) bool {
|
||||
return s.Title == subscription.Title && s.SiteURL == subscription.SiteURL &&
|
||||
s.FeedURL == subscription.FeedURL && s.CategoryName == subscription.CategoryName
|
||||
}
|
||||
|
||||
// SubcriptionList is a list of subscriptions.
|
||||
type SubcriptionList []*Subcription
|
49
internal/reader/parser/format.go
Normal file
49
internal/reader/parser/format.go
Normal file
|
@ -0,0 +1,49 @@
|
|||
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
package parser // import "miniflux.app/v2/internal/reader/parser"
|
||||
|
||||
import (
|
||||
"encoding/xml"
|
||||
"strings"
|
||||
|
||||
rxml "miniflux.app/v2/internal/reader/xml"
|
||||
)
|
||||
|
||||
// List of feed formats.
|
||||
const (
|
||||
FormatRDF = "rdf"
|
||||
FormatRSS = "rss"
|
||||
FormatAtom = "atom"
|
||||
FormatJSON = "json"
|
||||
FormatUnknown = "unknown"
|
||||
)
|
||||
|
||||
// DetectFeedFormat tries to guess the feed format from input data.
|
||||
func DetectFeedFormat(data string) string {
|
||||
if strings.HasPrefix(strings.TrimSpace(data), "{") {
|
||||
return FormatJSON
|
||||
}
|
||||
|
||||
decoder := rxml.NewDecoder(strings.NewReader(data))
|
||||
|
||||
for {
|
||||
token, _ := decoder.Token()
|
||||
if token == nil {
|
||||
break
|
||||
}
|
||||
|
||||
if element, ok := token.(xml.StartElement); ok {
|
||||
switch element.Name.Local {
|
||||
case "rss":
|
||||
return FormatRSS
|
||||
case "feed":
|
||||
return FormatAtom
|
||||
case "RDF":
|
||||
return FormatRDF
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return FormatUnknown
|
||||
}
|
78
internal/reader/parser/format_test.go
Normal file
78
internal/reader/parser/format_test.go
Normal file
|
@ -0,0 +1,78 @@
|
|||
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
package parser // import "miniflux.app/v2/internal/reader/parser"
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestDetectRDF(t *testing.T) {
|
||||
data := `<?xml version="1.0"?><rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://my.netscape.com/rdf/simple/0.9/"></rdf:RDF>`
|
||||
format := DetectFeedFormat(data)
|
||||
|
||||
if format != FormatRDF {
|
||||
t.Errorf(`Wrong format detected: %q instead of %q`, format, FormatRDF)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDetectRSS(t *testing.T) {
|
||||
data := `<?xml version="1.0"?><rss version="2.0"><channel></channel></rss>`
|
||||
format := DetectFeedFormat(data)
|
||||
|
||||
if format != FormatRSS {
|
||||
t.Errorf(`Wrong format detected: %q instead of %q`, format, FormatRSS)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDetectAtom10(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?><feed xmlns="http://www.w3.org/2005/Atom"></feed>`
|
||||
format := DetectFeedFormat(data)
|
||||
|
||||
if format != FormatAtom {
|
||||
t.Errorf(`Wrong format detected: %q instead of %q`, format, FormatAtom)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDetectAtom03(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?><feed version="0.3" xmlns="http://purl.org/atom/ns#" xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="en"></feed>`
|
||||
format := DetectFeedFormat(data)
|
||||
|
||||
if format != FormatAtom {
|
||||
t.Errorf(`Wrong format detected: %q instead of %q`, format, FormatAtom)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDetectAtomWithISOCharset(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="ISO-8859-15"?><feed xmlns="http://www.w3.org/2005/Atom"></feed>`
|
||||
format := DetectFeedFormat(data)
|
||||
|
||||
if format != FormatAtom {
|
||||
t.Errorf(`Wrong format detected: %q instead of %q`, format, FormatAtom)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDetectJSON(t *testing.T) {
|
||||
data := `
|
||||
{
|
||||
"version" : "https://jsonfeed.org/version/1",
|
||||
"title" : "Example"
|
||||
}
|
||||
`
|
||||
format := DetectFeedFormat(data)
|
||||
|
||||
if format != FormatJSON {
|
||||
t.Errorf(`Wrong format detected: %q instead of %q`, format, FormatJSON)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDetectUnknown(t *testing.T) {
|
||||
data := `
|
||||
<!DOCTYPE html> <html> </html>
|
||||
`
|
||||
format := DetectFeedFormat(data)
|
||||
|
||||
if format != FormatUnknown {
|
||||
t.Errorf(`Wrong format detected: %q instead of %q`, format, FormatUnknown)
|
||||
}
|
||||
}
|
31
internal/reader/parser/parser.go
Normal file
31
internal/reader/parser/parser.go
Normal file
|
@ -0,0 +1,31 @@
|
|||
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
package parser // import "miniflux.app/v2/internal/reader/parser"
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
"miniflux.app/v2/internal/errors"
|
||||
"miniflux.app/v2/internal/model"
|
||||
"miniflux.app/v2/internal/reader/atom"
|
||||
"miniflux.app/v2/internal/reader/json"
|
||||
"miniflux.app/v2/internal/reader/rdf"
|
||||
"miniflux.app/v2/internal/reader/rss"
|
||||
)
|
||||
|
||||
// ParseFeed analyzes the input data and returns a normalized feed object.
|
||||
func ParseFeed(baseURL, data string) (*model.Feed, *errors.LocalizedError) {
|
||||
switch DetectFeedFormat(data) {
|
||||
case FormatAtom:
|
||||
return atom.Parse(baseURL, strings.NewReader(data))
|
||||
case FormatRSS:
|
||||
return rss.Parse(baseURL, strings.NewReader(data))
|
||||
case FormatJSON:
|
||||
return json.Parse(baseURL, strings.NewReader(data))
|
||||
case FormatRDF:
|
||||
return rdf.Parse(baseURL, strings.NewReader(data))
|
||||
default:
|
||||
return nil, errors.NewLocalizedError("Unsupported feed format")
|
||||
}
|
||||
}
|
350
internal/reader/parser/parser_test.go
Normal file
350
internal/reader/parser/parser_test.go
Normal file
|
@ -0,0 +1,350 @@
|
|||
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
package parser // import "miniflux.app/v2/internal/reader/parser"
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
"miniflux.app/v2/internal/http/client"
|
||||
)
|
||||
|
||||
func TestParseAtom(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
|
||||
<title>Example Feed</title>
|
||||
<link href="http://example.org/"/>
|
||||
<updated>2003-12-13T18:30:02Z</updated>
|
||||
<author>
|
||||
<name>John Doe</name>
|
||||
</author>
|
||||
<id>urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6</id>
|
||||
|
||||
<entry>
|
||||
<title>Atom-Powered Robots Run Amok</title>
|
||||
<link href="http://example.org/2003/12/13/atom03"/>
|
||||
<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
|
||||
<updated>2003-12-13T18:30:02Z</updated>
|
||||
<summary>Some text.</summary>
|
||||
</entry>
|
||||
|
||||
</feed>`
|
||||
|
||||
feed, err := ParseFeed("https://example.org/", data)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if feed.Title != "Example Feed" {
|
||||
t.Errorf("Incorrect title, got: %s", feed.Title)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseAtomFeedWithRelativeURL(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
<title>Example Feed</title>
|
||||
<link href="/blog/atom.xml" rel="self" type="application/atom+xml"/>
|
||||
<link href="/blog"/>
|
||||
|
||||
<entry>
|
||||
<title>Test</title>
|
||||
<link href="/blog/article.html"/>
|
||||
<link href="/blog/article.html" rel="alternate" type="text/html"/>
|
||||
<id>/blog/article.html</id>
|
||||
<updated>2003-12-13T18:30:02Z</updated>
|
||||
<summary>Some text.</summary>
|
||||
</entry>
|
||||
|
||||
</feed>`
|
||||
|
||||
feed, err := ParseFeed("https://example.org/blog/atom.xml", data)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if feed.FeedURL != "https://example.org/blog/atom.xml" {
|
||||
t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
|
||||
}
|
||||
|
||||
if feed.SiteURL != "https://example.org/blog" {
|
||||
t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
|
||||
}
|
||||
|
||||
if feed.Entries[0].URL != "https://example.org/blog/article.html" {
|
||||
t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseRSS(t *testing.T) {
|
||||
data := `<?xml version="1.0"?>
|
||||
<rss version="2.0">
|
||||
<channel>
|
||||
<title>Liftoff News</title>
|
||||
<link>http://liftoff.msfc.nasa.gov/</link>
|
||||
<item>
|
||||
<title>Star City</title>
|
||||
<link>http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp</link>
|
||||
<description>How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia's <a href="http://howe.iki.rssi.ru/GCTC/gctc_e.htm">Star City</a>.</description>
|
||||
<pubDate>Tue, 03 Jun 2003 09:39:21 GMT</pubDate>
|
||||
<guid>http://liftoff.msfc.nasa.gov/2003/06/03.html#item573</guid>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>`
|
||||
|
||||
feed, err := ParseFeed("http://liftoff.msfc.nasa.gov/", data)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if feed.Title != "Liftoff News" {
|
||||
t.Errorf("Incorrect title, got: %s", feed.Title)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseRSSFeedWithRelativeURL(t *testing.T) {
|
||||
data := `<?xml version="1.0"?>
|
||||
<rss version="2.0">
|
||||
<channel>
|
||||
<title>Example Feed</title>
|
||||
<link>/blog</link>
|
||||
<item>
|
||||
<title>Example Entry</title>
|
||||
<link>/blog/article.html</link>
|
||||
<description>Something</description>
|
||||
<pubDate>Tue, 03 Jun 2003 09:39:21 GMT</pubDate>
|
||||
<guid>1234</guid>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>`
|
||||
|
||||
feed, err := ParseFeed("http://example.org/rss.xml", data)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if feed.Title != "Example Feed" {
|
||||
t.Errorf("Incorrect title, got: %s", feed.Title)
|
||||
}
|
||||
|
||||
if feed.FeedURL != "http://example.org/rss.xml" {
|
||||
t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
|
||||
}
|
||||
|
||||
if feed.SiteURL != "http://example.org/blog" {
|
||||
t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
|
||||
}
|
||||
|
||||
if feed.Entries[0].URL != "http://example.org/blog/article.html" {
|
||||
t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseRDF(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<rdf:RDF
|
||||
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||
xmlns="http://purl.org/rss/1.0/"
|
||||
>
|
||||
|
||||
<channel>
|
||||
<title>RDF Example</title>
|
||||
<link>http://example.org/</link>
|
||||
</channel>
|
||||
|
||||
<item>
|
||||
<title>Title</title>
|
||||
<link>http://example.org/item</link>
|
||||
<description>Test</description>
|
||||
</item>
|
||||
</rdf:RDF>`
|
||||
|
||||
feed, err := ParseFeed("http://example.org/", data)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if feed.Title != "RDF Example" {
|
||||
t.Errorf("Incorrect title, got: %s", feed.Title)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseRDFWithRelativeURL(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<rdf:RDF
|
||||
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||
xmlns="http://purl.org/rss/1.0/"
|
||||
>
|
||||
|
||||
<channel>
|
||||
<title>RDF Example</title>
|
||||
<link>/blog</link>
|
||||
</channel>
|
||||
|
||||
<item>
|
||||
<title>Title</title>
|
||||
<link>/blog/article.html</link>
|
||||
<description>Test</description>
|
||||
</item>
|
||||
</rdf:RDF>`
|
||||
|
||||
feed, err := ParseFeed("http://example.org/rdf.xml", data)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if feed.FeedURL != "http://example.org/rdf.xml" {
|
||||
t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
|
||||
}
|
||||
|
||||
if feed.SiteURL != "http://example.org/blog" {
|
||||
t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
|
||||
}
|
||||
|
||||
if feed.Entries[0].URL != "http://example.org/blog/article.html" {
|
||||
t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseJson(t *testing.T) {
|
||||
data := `{
|
||||
"version": "https://jsonfeed.org/version/1",
|
||||
"title": "My Example Feed",
|
||||
"home_page_url": "https://example.org/",
|
||||
"feed_url": "https://example.org/feed.json",
|
||||
"items": [
|
||||
{
|
||||
"id": "2",
|
||||
"content_text": "This is a second item.",
|
||||
"url": "https://example.org/second-item"
|
||||
},
|
||||
{
|
||||
"id": "1",
|
||||
"content_html": "<p>Hello, world!</p>",
|
||||
"url": "https://example.org/initial-post"
|
||||
}
|
||||
]
|
||||
}`
|
||||
|
||||
feed, err := ParseFeed("https://example.org/feed.json", data)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if feed.Title != "My Example Feed" {
|
||||
t.Errorf("Incorrect title, got: %s", feed.Title)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseJsonFeedWithRelativeURL(t *testing.T) {
|
||||
data := `{
|
||||
"version": "https://jsonfeed.org/version/1",
|
||||
"title": "My Example Feed",
|
||||
"home_page_url": "/blog",
|
||||
"feed_url": "/blog/feed.json",
|
||||
"items": [
|
||||
{
|
||||
"id": "2",
|
||||
"content_text": "This is a second item.",
|
||||
"url": "/blog/article.html"
|
||||
}
|
||||
]
|
||||
}`
|
||||
|
||||
feed, err := ParseFeed("https://example.org/blog/feed.json", data)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if feed.Title != "My Example Feed" {
|
||||
t.Errorf("Incorrect title, got: %s", feed.Title)
|
||||
}
|
||||
|
||||
if feed.FeedURL != "https://example.org/blog/feed.json" {
|
||||
t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
|
||||
}
|
||||
|
||||
if feed.SiteURL != "https://example.org/blog" {
|
||||
t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
|
||||
}
|
||||
|
||||
if feed.Entries[0].URL != "https://example.org/blog/article.html" {
|
||||
t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseUnknownFeed(t *testing.T) {
|
||||
data := `
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head>
|
||||
<title>Title of document</title>
|
||||
</head>
|
||||
<body>
|
||||
some content
|
||||
</body>
|
||||
</html>
|
||||
`
|
||||
|
||||
_, err := ParseFeed("https://example.org/", data)
|
||||
if err == nil {
|
||||
t.Error("ParseFeed must returns an error")
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseEmptyFeed(t *testing.T) {
|
||||
_, err := ParseFeed("", "")
|
||||
if err == nil {
|
||||
t.Error("ParseFeed must returns an error")
|
||||
}
|
||||
}
|
||||
|
||||
func TestDifferentEncodingWithResponse(t *testing.T) {
|
||||
var unicodeTestCases = []struct {
|
||||
filename, contentType string
|
||||
index int
|
||||
title string
|
||||
}{
|
||||
// Arabic language encoded in UTF-8.
|
||||
{"urdu_UTF8.xml", "text/xml; charset=utf-8", 0, "امریکی عسکری امداد کی بندش کی وجوہات: انڈیا سے جنگ، جوہری پروگرام اور اب دہشت گردوں کی پشت پناہی"},
|
||||
|
||||
// Windows-1251 encoding and not charset in HTTP header.
|
||||
{"encoding_WINDOWS-1251.xml", "text/xml", 0, "Цитата #17703"},
|
||||
|
||||
// No encoding in XML, but defined in HTTP Content-Type header.
|
||||
{"no_encoding_ISO-8859-1.xml", "application/xml; charset=ISO-8859-1", 2, "La criminalité liée surtout à... l'ennui ?"},
|
||||
|
||||
// ISO-8859-1 encoding defined in XML and HTTP header.
|
||||
{"encoding_ISO-8859-1.xml", "application/rss+xml; charset=ISO-8859-1", 5, "Projekt Jedi: Microsoft will weiter mit US-Militär zusammenarbeiten"},
|
||||
|
||||
// UTF-8 encoding defined in RDF document and HTTP header.
|
||||
{"rdf_UTF8.xml", "application/rss+xml; charset=utf-8", 1, "Mega-Deal: IBM übernimmt Red Hat"},
|
||||
|
||||
// UTF-8 encoding defined only in RDF document.
|
||||
{"rdf_UTF8.xml", "application/rss+xml", 1, "Mega-Deal: IBM übernimmt Red Hat"},
|
||||
}
|
||||
|
||||
for _, tc := range unicodeTestCases {
|
||||
content, err := os.ReadFile("testdata/" + tc.filename)
|
||||
if err != nil {
|
||||
t.Fatalf(`Unable to read file %q: %v`, tc.filename, err)
|
||||
}
|
||||
|
||||
r := &client.Response{Body: bytes.NewReader(content), ContentType: tc.contentType}
|
||||
if encodingErr := r.EnsureUnicodeBody(); encodingErr != nil {
|
||||
t.Fatalf(`Encoding error for %q: %v`, tc.filename, encodingErr)
|
||||
}
|
||||
|
||||
feed, parseErr := ParseFeed("https://example.org/", r.BodyAsString())
|
||||
if parseErr != nil {
|
||||
t.Fatalf(`Parsing error for %q - %q: %v`, tc.filename, tc.contentType, parseErr)
|
||||
}
|
||||
|
||||
if feed.Entries[tc.index].Title != tc.title {
|
||||
t.Errorf(`Unexpected title, got %q instead of %q`, feed.Entries[tc.index].Title, tc.title)
|
||||
}
|
||||
}
|
||||
}
|
422
internal/reader/parser/testdata/encoding_ISO-8859-1.xml
vendored
Normal file
422
internal/reader/parser/testdata/encoding_ISO-8859-1.xml
vendored
Normal file
|
@ -0,0 +1,422 @@
|
|||
<?xml version="1.0" encoding="ISO-8859-1"?>
|
||||
<!-- generator="FeedCreator 1.6" -->
|
||||
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/"
|
||||
xmlns:slash="http://purl.org/rss/1.0/modules/slash/"
|
||||
xmlns:atom="http://www.w3.org/2005/Atom"
|
||||
version="2.0">
|
||||
<channel>
|
||||
<title>Golem.de</title>
|
||||
<description>IT-News fuer Profis</description>
|
||||
<link>https://www.golem.de/</link>
|
||||
<atom:link rel="self" href="https://rss.golem.de/rss.php?feed=RSS2.0" />
|
||||
<lastBuildDate>Sun, 28 Oct 2018 13:49:01 +0100</lastBuildDate>
|
||||
<generator>FeedCreator 1.6</generator>
|
||||
<image>
|
||||
<url>https://www.golem.de/staticrl/images/golem-rss.png</url>
|
||||
<title>Golem.de</title>
|
||||
<link>https://www.golem.de/</link>
|
||||
<description>Golem.de News Feed</description>
|
||||
</image>
|
||||
<language>de</language>
|
||||
<atom:link rel="hub" href="http://golem.superfeedr.com/" />
|
||||
<item>
|
||||
<title>Red Dead Redemption 2: Hinweise auf PC-Umsetzung in App von Rockstar Games</title>
|
||||
<link>https://www.golem.de/news/red-dead-redemption-2-hinweise-auf-pc-umsetzung-in-app-von-rockstar-games-1810-137358-rss.html</link>
|
||||
<description>Viele Spieler wünschen sich eine PC-Version von Red Dead Redemption 2, aber Entwickler Rockstar Games schweigt zu dem Thema. Anders die offizielle Companion App: In einigen ihrer Daten gibt es Hinweise auf die Umsetzung. (<a href="https://www.golem.de/specials/red-dead-redemption-2/">Red Dead Redemption 2</a>, <a href="https://www.golem.de/specials/red-dead-redemption/">Red Dead Redemption</a>) <img src="https://cpx.golem.de/cpx.php?class=17&amp;aid=137358&amp;page=1&amp;ts=1540730880" alt="" width="1" height="1" /></description>
|
||||
<pubDate>Sun, 28 Oct 2018 13:48:00 +0100</pubDate>
|
||||
<guid>https://www.golem.de/1810/137358-rss.html</guid>
|
||||
<content:encoded><![CDATA[<img src="https://www.golem.de/1810/137358-177541-177538_rc.jpg" width="140" height="140" vspace="3" hspace="8" align="left">Viele Spieler wünschen sich eine PC-Version von Red Dead Redemption 2, aber Entwickler Rockstar Games schweigt zu dem Thema. Anders die offizielle Companion App: In einigen ihrer Daten gibt es Hinweise auf die Umsetzung. (<a href="https://www.golem.de/specials/red-dead-redemption-2/">Red Dead Redemption 2</a>, <a href="https://www.golem.de/specials/red-dead-redemption/">Red Dead Redemption</a>) <img src="https://cpx.golem.de/cpx.php?class=17&aid=137358&page=1&ts=1540730880" alt="" width="1" height="1" />]]></content:encoded>
|
||||
<slash:comments />
|
||||
</item>
|
||||
<item>
|
||||
<title>Let's Play: Twitch will Streamer zusammen spielen und singen lassen</title>
|
||||
<link>https://www.golem.de/news/let-s-play-twitch-will-streamer-zusammen-spielen-und-singen-lassen-1810-137357-rss.html</link>
|
||||
<description>Der Streamingdienst Twitch hat auf seiner Hausmesse neue Funktionen für Kanalbetreiber und Zuschauer vorgestellt. Unter anderem soll es künftig Übertragungen mit bis zu vier Spielern geben - und Singwettbewerbe. (<a href="https://www.golem.de/specials/twitch/">Twitch</a>, <a href="https://www.golem.de/specials/amazon/">Amazon</a>) <img src="https://cpx.golem.de/cpx.php?class=17&amp;aid=137357&amp;page=1&amp;ts=1540728000" alt="" width="1" height="1" /></description>
|
||||
<comments>https://forum.golem.de/kommentare/games/let-s-play-twitch-will-streamer-zusammen-spielen-und-singen-lassen/121579,list.html</comments>
|
||||
<pubDate>Sun, 28 Oct 2018 13:00:00 +0100</pubDate>
|
||||
<guid>https://www.golem.de/1810/137357-rss.html</guid>
|
||||
<content:encoded><![CDATA[<img src="https://www.golem.de/1810/137357-177536-177533_rc.jpg" width="140" height="140" vspace="3" hspace="8" align="left">Der Streamingdienst Twitch hat auf seiner Hausmesse neue Funktionen für Kanalbetreiber und Zuschauer vorgestellt. Unter anderem soll es künftig Übertragungen mit bis zu vier Spielern geben - und Singwettbewerbe. (<a href="https://www.golem.de/specials/twitch/">Twitch</a>, <a href="https://www.golem.de/specials/amazon/">Amazon</a>) <img src="https://cpx.golem.de/cpx.php?class=17&aid=137357&page=1&ts=1540728000" alt="" width="1" height="1" />]]></content:encoded>
|
||||
<slash:comments />
|
||||
</item>
|
||||
<item>
|
||||
<title>Zhuque-1: Erste private chinesische Satellitenmission fehlgeschlagen</title>
|
||||
<link>https://www.golem.de/news/zhuque-1-erste-private-chinesische-satellitenmission-fehlgeschlagen-1810-137356-rss.html</link>
|
||||
<description>Die Zhuque-1 hat es nicht in den Orbit geschafft: Beim Zünden der dritten Raketenstufe kam es zu Problemen. Bei einem Erfolg wäre der Hersteller Landspace das erste von rund 60 kommerziellen chinesischen Unternehmen gewesen, das einen Satelliten ins All gebracht hätte. (<a href="https://www.golem.de/specials/raumfahrt/">Raumfahrt</a>, <a href="https://www.golem.de/specials/internet/">Internet</a>) <img src="https://cpx.golem.de/cpx.php?class=17&amp;aid=137356&amp;page=1&amp;ts=1540722420" alt="" width="1" height="1" /></description>
|
||||
<comments>https://forum.golem.de/kommentare/internet/zhuque-1-erste-private-chinesische-satellitenmission-fehlgeschlagen/121578,list.html</comments>
|
||||
<pubDate>Sun, 28 Oct 2018 11:27:00 +0100</pubDate>
|
||||
<guid>https://www.golem.de/1810/137356-rss.html</guid>
|
||||
<content:encoded><![CDATA[<img src="https://www.golem.de/1810/137356-177532-177529_rc.jpg" width="140" height="140" vspace="3" hspace="8" align="left">Die Zhuque-1 hat es nicht in den Orbit geschafft: Beim Zünden der dritten Raketenstufe kam es zu Problemen. Bei einem Erfolg wäre der Hersteller Landspace das erste von rund 60 kommerziellen chinesischen Unternehmen gewesen, das einen Satelliten ins All gebracht hätte. (<a href="https://www.golem.de/specials/raumfahrt/">Raumfahrt</a>, <a href="https://www.golem.de/specials/internet/">Internet</a>) <img src="https://cpx.golem.de/cpx.php?class=17&aid=137356&page=1&ts=1540722420" alt="" width="1" height="1" />]]></content:encoded>
|
||||
<slash:comments>1</slash:comments>
|
||||
</item>
|
||||
<item>
|
||||
<title>City Transformer: Startup entwickelt faltbares Elektroauto gegen Parkplatznot</title>
|
||||
<link>https://www.golem.de/news/city-transformer-startup-entwickelt-faltbares-elektroauto-gegen-parkplatznot-1810-137355-rss.html</link>
|
||||
<description>Es passt fast in jede Parklücke: Ein Faltauto des Startups City Transformer soll Städtern künftig das Leben erleichtern. Das innovative Fahrzeug wird zusammen mit Yamaha entwickelt. Vorbestellungen sollen voraussichtlich ab 2020 möglich sein, mehrere Versionen sind geplant. (<a href="https://www.golem.de/specials/elektromobilitaet/">Elektromobilität</a>, <a href="https://www.golem.de/specials/elektroauto/">Elektroauto</a>) <img src="https://cpx.golem.de/cpx.php?class=17&amp;aid=137355&amp;page=1&amp;ts=1540721400" alt="" width="1" height="1" /></description>
|
||||
<comments>https://forum.golem.de/kommentare/automobil/city-transformer-startup-entwickelt-faltbares-elektroauto-gegen-parkplatznot/121577,list.html</comments>
|
||||
<pubDate>Sun, 28 Oct 2018 11:10:00 +0100</pubDate>
|
||||
<guid>https://www.golem.de/1810/137355-rss.html</guid>
|
||||
<content:encoded><![CDATA[<img src="https://www.golem.de/1810/137355-177527-177524_rc.jpg" width="140" height="140" vspace="3" hspace="8" align="left">Es passt fast in jede Parklücke: Ein Faltauto des Startups City Transformer soll Städtern künftig das Leben erleichtern. Das innovative Fahrzeug wird zusammen mit Yamaha entwickelt. Vorbestellungen sollen voraussichtlich ab 2020 möglich sein, mehrere Versionen sind geplant. (<a href="https://www.golem.de/specials/elektromobilitaet/">Elektromobilität</a>, <a href="https://www.golem.de/specials/elektroauto/">Elektroauto</a>) <img src="https://cpx.golem.de/cpx.php?class=17&aid=137355&page=1&ts=1540721400" alt="" width="1" height="1" />]]></content:encoded>
|
||||
<slash:comments>37</slash:comments>
|
||||
</item>
|
||||
<item>
|
||||
<title>Machine Learning: Von KI erstelltes Porträt für 432.500 US.Dollar versteigert</title>
|
||||
<link>https://www.golem.de/news/machine-learning-von-ki-erstelltes-portraet-fuer-432-500-us-dollar-versteigert-1810-137353-rss.html</link>
|
||||
<description>Kann Software Kunst erstellen? Eine erfolgreiche Auktion beweist, dass es zumindest Abnehmer dafür gibt. Allerdings hat sich das Entwicklerteam Obvious wohl stark bei anderen KI-Systemen bedient. (<a href="https://www.golem.de/specials/neuronalesnetzwerk/">Neuronales Netzwerk</a>, <a href="https://www.golem.de/specials/ki/">KI</a>) <img src="https://cpx.golem.de/cpx.php?class=17&amp;aid=137353&amp;page=1&amp;ts=1540643580" alt="" width="1" height="1" /></description>
|
||||
<comments>https://forum.golem.de/kommentare/applikationen/machine-learning-von-ki-erstelltes-portraet-fuer-432.500-us.dollar-versteigert/121575,list.html</comments>
|
||||
<pubDate>Sat, 27 Oct 2018 13:33:00 +0100</pubDate>
|
||||
<guid>https://www.golem.de/1810/137353-rss.html</guid>
|
||||
<content:encoded><![CDATA[<img src="https://www.golem.de/1810/137353-177523-177520_rc.jpg" width="140" height="140" vspace="3" hspace="8" align="left">Kann Software Kunst erstellen? Eine erfolgreiche Auktion beweist, dass es zumindest Abnehmer dafür gibt. Allerdings hat sich das Entwicklerteam Obvious wohl stark bei anderen KI-Systemen bedient. (<a href="https://www.golem.de/specials/neuronalesnetzwerk/">Neuronales Netzwerk</a>, <a href="https://www.golem.de/specials/ki/">KI</a>) <img src="https://cpx.golem.de/cpx.php?class=17&aid=137353&page=1&ts=1540643580" alt="" width="1" height="1" />]]></content:encoded>
|
||||
<slash:comments>31</slash:comments>
|
||||
</item>
|
||||
<item>
|
||||
<title>Projekt Jedi: Microsoft will weiter mit US-Militär zusammenarbeiten</title>
|
||||
<link>https://www.golem.de/news/project-jedi-microsoft-will-weiter-mit-us-militaer-zusammenarbeiten-1810-137352-rss.html</link>
|
||||
<description>In einem Blogbeitrag hat sich Microsoft-Präsident Brad Smith zur Zusammenarbeit mit dem US-Verteidigungsministerium bekannt. Mitarbeiter, die nicht an derartigen Projekten arbeiten wollen, sollen in andere Bereiche des Unternehmens wechseln können. (<a href="https://www.golem.de/specials/microsoft/">Microsoft</a>, <a href="https://www.golem.de/specials/google/">Google</a>) <img src="https://cpx.golem.de/cpx.php?class=17&amp;aid=137352&amp;page=1&amp;ts=1540641780" alt="" width="1" height="1" /></description>
|
||||
<comments>https://forum.golem.de/kommentare/internet/projekt-jedi-microsoft-will-weiter-mit-us-militaer-zusammenarbeiten/121574,list.html</comments>
|
||||
<pubDate>Sat, 27 Oct 2018 13:03:00 +0100</pubDate>
|
||||
<guid>https://www.golem.de/1810/137352-rss.html</guid>
|
||||
<content:encoded><![CDATA[<img src="https://www.golem.de/1810/137055-176130-176127_rc.jpg" width="140" height="140" vspace="3" hspace="8" align="left">In einem Blogbeitrag hat sich Microsoft-Präsident Brad Smith zur Zusammenarbeit mit dem US-Verteidigungsministerium bekannt. Mitarbeiter, die nicht an derartigen Projekten arbeiten wollen, sollen in andere Bereiche des Unternehmens wechseln können. (<a href="https://www.golem.de/specials/microsoft/">Microsoft</a>, <a href="https://www.golem.de/specials/google/">Google</a>) <img src="https://cpx.golem.de/cpx.php?class=17&aid=137352&page=1&ts=1540641780" alt="" width="1" height="1" />]]></content:encoded>
|
||||
<slash:comments>21</slash:comments>
|
||||
</item>
|
||||
<item>
|
||||
<title>Star Wars: Boba-Fett-Film ist "zu 100 Prozent tot"</title>
|
||||
<link>https://www.golem.de/news/star-wars-boba-fett-film-ist-zu-100-prozent-tot-1810-137351-rss.html</link>
|
||||
<description>Es wird wohl doch keinen dritten Star-Wars-Ableger geben, der sich um den kultigen Kopfgeldjäger Boba Fett dreht. Das wird laut einem Medienbericht teils auf den geringen Erfolg des Han-Solo-Films zurückgeführt. Stattdessen soll ein bisher unbekannter Charakter in einer Serie die mandalorianische Rüstung anziehen. (<a href="https://www.golem.de/specials/star-wars/">Star Wars</a>, <a href="https://www.golem.de/specials/film/">Film</a>) <img src="https://cpx.golem.de/cpx.php?class=17&amp;aid=137351&amp;page=1&amp;ts=1540639620" alt="" width="1" height="1" /></description>
|
||||
<comments>https://forum.golem.de/kommentare/audio-video/star-wars-boba-fett-film-ist-zu-100-prozent-tot/121573,list.html</comments>
|
||||
<pubDate>Sat, 27 Oct 2018 12:27:00 +0100</pubDate>
|
||||
<guid>https://www.golem.de/1810/137351-rss.html</guid>
|
||||
<content:encoded><![CDATA[<img src="https://www.golem.de/1810/137351-177519-177514_rc.jpg" width="140" height="140" vspace="3" hspace="8" align="left">Es wird wohl doch keinen dritten Star-Wars-Ableger geben, der sich um den kultigen Kopfgeldjäger Boba Fett dreht. Das wird laut einem Medienbericht teils auf den geringen Erfolg des Han-Solo-Films zurückgeführt. Stattdessen soll ein bisher unbekannter Charakter in einer Serie die mandalorianische Rüstung anziehen. (<a href="https://www.golem.de/specials/star-wars/">Star Wars</a>, <a href="https://www.golem.de/specials/film/">Film</a>) <img src="https://cpx.golem.de/cpx.php?class=17&aid=137351&page=1&ts=1540639620" alt="" width="1" height="1" />]]></content:encoded>
|
||||
<slash:comments>148</slash:comments>
|
||||
</item>
|
||||
<item>
|
||||
<title>Lenovo: Fehlerhafte Bios-Einstellung macht Thinkpads unbrauchbar</title>
|
||||
<link>https://www.golem.de/news/lenovo-fehlerhafte-bios-einstellung-macht-thinkpads-unbrauchbar-1810-137350-rss.html</link>
|
||||
<description>Die Bios-Unterstützung für Thunderbolt bei Thinkpads zu aktivieren, ist derzeit keine gute Idee: Mehrere Nutzer berichten von nicht mehr startenden Notebooks, nachdem sie diese Funktion aktiviert haben. Das konnte auf diversen Linux-Distributionen, aber auch mit Windows 10 repliziert werden. (<a href="https://www.golem.de/specials/lenovo/">Lenovo</a>, <a href="https://www.golem.de/specials/business-notebooks/">Business-Notebooks</a>) <img src="https://cpx.golem.de/cpx.php?class=17&amp;aid=137350&amp;page=1&amp;ts=1540633200" alt="" width="1" height="1" /></description>
|
||||
<comments>https://forum.golem.de/kommentare/applikationen/lenovo-fehlerhafte-bios-einstellung-macht-thinkpads-unbrauchbar/121572,list.html</comments>
|
||||
<pubDate>Sat, 27 Oct 2018 10:40:00 +0100</pubDate>
|
||||
<guid>https://www.golem.de/1810/137350-rss.html</guid>
|
||||
<content:encoded><![CDATA[<img src="https://www.golem.de/1810/137350-177513-177510_rc.jpg" width="140" height="140" vspace="3" hspace="8" align="left">Die Bios-Unterstützung für Thunderbolt bei Thinkpads zu aktivieren, ist derzeit keine gute Idee: Mehrere Nutzer berichten von nicht mehr startenden Notebooks, nachdem sie diese Funktion aktiviert haben. Das konnte auf diversen Linux-Distributionen, aber auch mit Windows 10 repliziert werden. (<a href="https://www.golem.de/specials/lenovo/">Lenovo</a>, <a href="https://www.golem.de/specials/business-notebooks/">Business-Notebooks</a>) <img src="https://cpx.golem.de/cpx.php?class=17&aid=137350&page=1&ts=1540633200" alt="" width="1" height="1" />]]></content:encoded>
|
||||
<slash:comments>16</slash:comments>
|
||||
</item>
|
||||
<item>
|
||||
<title>Wochenrückblick: Wilder Westen, buntes Handy, nutzloses Siegel</title>
|
||||
<link>https://www.golem.de/news/wochenrueckblick-wilder-westen-buntes-handy-nutzloses-siegel-1810-137318-rss.html</link>
|
||||
<description> Wir testen das iPhone Xr, sind ein Revolverheld und entdecken wieder Sicherheitslücken. Sieben Tage und viele Meldungen im Überblick. (<a href="https://www.golem.de/specials/golemwochenrueckblick/">Golem-Wochenrückblick</a>, <a href="https://www.golem.de/specials/business-notebooks/">Business-Notebooks</a>) <img src="https://cpx.golem.de/cpx.php?class=17&amp;aid=137318&amp;page=1&amp;ts=1540623720" alt="" width="1" height="1" /></description>
|
||||
<comments>https://forum.golem.de/kommentare/politik-recht/wochenrueckblick-wilder-westen-buntes-handy-nutzloses-siegel/121571,list.html</comments>
|
||||
<pubDate>Sat, 27 Oct 2018 08:02:00 +0100</pubDate>
|
||||
<guid>https://www.golem.de/1810/137318-rss.html</guid>
|
||||
<content:encoded><![CDATA[<img src="https://www.golem.de/1810/137318-177504-177501_rc.jpg" width="140" height="140" vspace="3" hspace="8" align="left"> Wir testen das iPhone Xr, sind ein Revolverheld und entdecken wieder Sicherheitslücken. Sieben Tage und viele Meldungen im Überblick. (<a href="https://www.golem.de/specials/golemwochenrueckblick/">Golem-Wochenrückblick</a>, <a href="https://www.golem.de/specials/business-notebooks/">Business-Notebooks</a>) <img src="https://cpx.golem.de/cpx.php?class=17&aid=137318&page=1&ts=1540623720" alt="" width="1" height="1" />]]></content:encoded>
|
||||
<slash:comments />
|
||||
</item>
|
||||
<item>
|
||||
<title>Fernsehen: 5G-Netz wird so wichtig wie Strom und Wasser</title>
|
||||
<link>https://www.golem.de/news/fernsehen-5g-netz-wird-so-wichtig-wie-strom-und-wasser-1810-137349-rss.html</link>
|
||||
<description>Ein 5G-FeMBMS-Sendernetz für die Fernsehverbreitung sorgt für Aufsehen, noch bevor man weiß, ob es funktioniert. Wie Rundfunkübertragung und Mobilfunk zusammenkommen können, wurde auf den Medientagen München besprochen. (<a href="https://www.golem.de/specials/fernsehen/">Fernsehen</a>, <a href="https://www.golem.de/specials/technologie/">Technologie</a>) <img src="https://cpx.golem.de/cpx.php?class=17&amp;aid=137349&amp;page=1&amp;ts=1540572960" alt="" width="1" height="1" /></description>
|
||||
<comments>https://forum.golem.de/kommentare/handy/fernsehen-5g-netz-wird-so-wichtig-wie-strom-und-wasser/121570,list.html</comments>
|
||||
<pubDate>Fri, 26 Oct 2018 17:56:00 +0100</pubDate>
|
||||
<guid>https://www.golem.de/1810/137349-rss.html</guid>
|
||||
<content:encoded><![CDATA[<img src="https://www.golem.de/1810/137349-177509-177506_rc.jpg" width="140" height="140" vspace="3" hspace="8" align="left">Ein 5G-FeMBMS-Sendernetz für die Fernsehverbreitung sorgt für Aufsehen, noch bevor man weiß, ob es funktioniert. Wie Rundfunkübertragung und Mobilfunk zusammenkommen können, wurde auf den Medientagen München besprochen. (<a href="https://www.golem.de/specials/fernsehen/">Fernsehen</a>, <a href="https://www.golem.de/specials/technologie/">Technologie</a>) <img src="https://cpx.golem.de/cpx.php?class=17&aid=137349&page=1&ts=1540572960" alt="" width="1" height="1" />]]></content:encoded>
|
||||
<slash:comments>25</slash:comments>
|
||||
</item>
|
||||
<item>
|
||||
<title>Linux und BSD: Sicherheitslücke in X.org ermöglicht Root-Rechte</title>
|
||||
<link>https://www.golem.de/news/linux-und-bsd-sicherheitsluecke-in-x-org-ermoeglicht-root-rechte-1810-137347-rss.html</link>
|
||||
<description>Eine Sicherheitslücke im Displayserver X.org erlaubt unter bestimmten Umständen das Überschreiben von Dateien und das Ausweiten der Benutzerrechte. Der passende Exploit passt in einen Tweet. (<a href="https://www.golem.de/specials/sicherheitsluecke/">Sicherheitslücke</a>, <a href="https://www.golem.de/specials/openbsd/">OpenBSD</a>) <img src="https://cpx.golem.de/cpx.php?class=17&amp;aid=137347&amp;page=1&amp;ts=1540564620" alt="" width="1" height="1" /></description>
|
||||
<comments>https://forum.golem.de/kommentare/security/linux-und-bsd-sicherheitsluecke-in-x.org-ermoeglicht-root-rechte/121569,list.html</comments>
|
||||
<pubDate>Fri, 26 Oct 2018 15:37:00 +0100</pubDate>
|
||||
<guid>https://www.golem.de/1810/137347-rss.html</guid>
|
||||
<content:encoded><![CDATA[<img src="https://www.golem.de/1810/137347-177500-177497_rc.jpg" width="140" height="140" vspace="3" hspace="8" align="left">Eine Sicherheitslücke im Displayserver X.org erlaubt unter bestimmten Umständen das Überschreiben von Dateien und das Ausweiten der Benutzerrechte. Der passende Exploit passt in einen Tweet. (<a href="https://www.golem.de/specials/sicherheitsluecke/">Sicherheitslücke</a>, <a href="https://www.golem.de/specials/openbsd/">OpenBSD</a>) <img src="https://cpx.golem.de/cpx.php?class=17&aid=137347&page=1&ts=1540564620" alt="" width="1" height="1" />]]></content:encoded>
|
||||
<slash:comments>47</slash:comments>
|
||||
</item>
|
||||
<item>
|
||||
<title>Augsburg: Fujitsu Deutschland macht alles dicht</title>
|
||||
<link>https://www.golem.de/news/augsburg-fujitsu-deutschland-macht-alles-dicht-1810-137348-rss.html</link>
|
||||
<description>Fujitsu will seine gesamte Fertigung außerhalb Japans schließen. In Deutschland ist der Standort in Augsburg komplett betroffen. (<a href="https://www.golem.de/specials/fujitsu/">Fujitsu</a>, <a href="https://www.golem.de/specials/sap/">SAP</a>) <img src="https://cpx.golem.de/cpx.php?class=17&amp;aid=137348&amp;page=1&amp;ts=1540562340" alt="" width="1" height="1" /></description>
|
||||
<comments>https://forum.golem.de/kommentare/wirtschaft/augsburg-fujitsu-deutschland-macht-alles-dicht/121568,list.html</comments>
|
||||
<pubDate>Fri, 26 Oct 2018 14:59:00 +0100</pubDate>
|
||||
<guid>https://www.golem.de/1810/137348-rss.html</guid>
|
||||
<content:encoded><![CDATA[<img src="https://www.golem.de/1810/137348-177485-177484_rc.jpg" width="140" height="140" vspace="3" hspace="8" align="left">Fujitsu will seine gesamte Fertigung außerhalb Japans schließen. In Deutschland ist der Standort in Augsburg komplett betroffen. (<a href="https://www.golem.de/specials/fujitsu/">Fujitsu</a>, <a href="https://www.golem.de/specials/sap/">SAP</a>) <img src="https://cpx.golem.de/cpx.php?class=17&aid=137348&page=1&ts=1540562340" alt="" width="1" height="1" />]]></content:encoded>
|
||||
<slash:comments>56</slash:comments>
|
||||
</item>
|
||||
<item>
|
||||
<title>Bundesnetzagentur: Seehofer fordert Verschiebung von 5G-Auktion</title>
|
||||
<link>https://www.golem.de/news/bundesnetzagentur-seehofer-fordert-verschiebung-von-5g-auktion-1810-137346-rss.html</link>
|
||||
<description>Bundesinnenminister Horst Seehofer will die 5G-Auktion verschieben, bis die ländlichen Regionen besser berücksichtigt werden. Er wird von einer Gruppe um den CDU-Abgeordneten Stefan Rouenhoff unterstützt. (<a href="https://www.golem.de/specials/5g/">5G</a>, <a href="https://www.golem.de/specials/bundesnetzagentur/">Bundesnetzagentur</a>) <img src="https://cpx.golem.de/cpx.php?class=17&amp;aid=137346&amp;page=1&amp;ts=1540557900" alt="" width="1" height="1" /></description>
|
||||
<comments>https://forum.golem.de/kommentare/handy/bundesnetzagentur-seehofer-fordert-verschiebung-von-5g-auktion/121567,list.html</comments>
|
||||
<pubDate>Fri, 26 Oct 2018 13:45:00 +0100</pubDate>
|
||||
<guid>https://www.golem.de/1810/137346-rss.html</guid>
|
||||
<content:encoded><![CDATA[<img src="https://www.golem.de/1810/137346-177483-177480_rc.jpg" width="140" height="140" vspace="3" hspace="8" align="left">Bundesinnenminister Horst Seehofer will die 5G-Auktion verschieben, bis die ländlichen Regionen besser berücksichtigt werden. Er wird von einer Gruppe um den CDU-Abgeordneten Stefan Rouenhoff unterstützt. (<a href="https://www.golem.de/specials/5g/">5G</a>, <a href="https://www.golem.de/specials/bundesnetzagentur/">Bundesnetzagentur</a>) <img src="https://cpx.golem.de/cpx.php?class=17&aid=137346&page=1&ts=1540557900" alt="" width="1" height="1" />]]></content:encoded>
|
||||
<slash:comments>14</slash:comments>
|
||||
</item>
|
||||
<item>
|
||||
<title>Linux und Patente: Open Source bei Microsoft ist "Kultur statt Strategie"</title>
|
||||
<link>https://www.golem.de/news/linux-und-patente-open-source-bei-microsoft-ist-kultur-statt-strategie-1810-137345-rss.html</link>
|
||||
<description>Der Microsoft-Angestellte Stephen Walli beschreibt den Wandel bei Microsoft hin zu Open Source Software und Linux als kulturell getrieben. Mit Blick auf den Beitritt zu dem Patentpool des Open Invention Network zeigt sich jedoch auch, dass das Unternehmen noch sehr viel Arbeit vor sich hat. Ein Bericht von Sebastian Grüner (<a href="https://www.golem.de/specials/microsoft/">Microsoft</a>, <a href="https://www.golem.de/specials/softwarepatente/">Softwarepatent</a>) <img src="https://cpx.golem.de/cpx.php?class=17&amp;aid=137345&amp;page=1&amp;ts=1540556820" alt="" width="1" height="1" /></description>
|
||||
<comments>https://forum.golem.de/kommentare/politik-recht/linux-und-patente-open-source-bei-microsoft-ist-kultur-statt-strategie/121566,list.html</comments>
|
||||
<pubDate>Fri, 26 Oct 2018 13:27:00 +0100</pubDate>
|
||||
<guid>https://www.golem.de/1810/137345-rss.html</guid>
|
||||
<content:encoded><![CDATA[<img src="https://www.golem.de/1810/137345-177479-177475_rc.jpg" width="140" height="140" vspace="3" hspace="8" align="left">Der Microsoft-Angestellte Stephen Walli beschreibt den Wandel bei Microsoft hin zu Open Source Software und Linux als kulturell getrieben. Mit Blick auf den Beitritt zu dem Patentpool des Open Invention Network zeigt sich jedoch auch, dass das Unternehmen noch sehr viel Arbeit vor sich hat. Ein Bericht von Sebastian Grüner (<a href="https://www.golem.de/specials/microsoft/">Microsoft</a>, <a href="https://www.golem.de/specials/softwarepatente/">Softwarepatent</a>) <img src="https://cpx.golem.de/cpx.php?class=17&aid=137345&page=1&ts=1540556820" alt="" width="1" height="1" />]]></content:encoded>
|
||||
<slash:comments>20</slash:comments>
|
||||
</item>
|
||||
<item>
|
||||
<title>Sicherheitslücke: Daten von 185.000 weiteren British-Airways-Kunden betroffen</title>
|
||||
<link>https://www.golem.de/news/sicherheitsluecke-daten-von-185-000-weiteren-british-airways-kunden-betroffen-1810-137344-rss.html</link>
|
||||
<description>Von dem Datenleck im Buchungssystem von British Airways waren deutlich mehr Kunden betroffen als bisher bekannt. Die Fluggesellschaft rät betroffenen Kunden, ihre Bank zu kontaktieren. Kreditkarten werden in diesem Fall häufig komplett ausgetauscht. (<a href="https://www.golem.de/specials/sicherheitsluecke/">Sicherheitslücke</a>, <a href="https://www.golem.de/specials/datenschutz/">Datenschutz</a>) <img src="https://cpx.golem.de/cpx.php?class=17&amp;aid=137344&amp;page=1&amp;ts=1540553820" alt="" width="1" height="1" /></description>
|
||||
<comments>https://forum.golem.de/kommentare/security/sicherheitsluecke-daten-von-185.000-weiteren-british-airways-kunden-betroffen/121565,list.html</comments>
|
||||
<pubDate>Fri, 26 Oct 2018 12:37:00 +0100</pubDate>
|
||||
<guid>https://www.golem.de/1810/137344-rss.html</guid>
|
||||
<content:encoded><![CDATA[<img src="https://www.golem.de/1810/137344-177474-177471_rc.jpg" width="140" height="140" vspace="3" hspace="8" align="left">Von dem Datenleck im Buchungssystem von British Airways waren deutlich mehr Kunden betroffen als bisher bekannt. Die Fluggesellschaft rät betroffenen Kunden, ihre Bank zu kontaktieren. Kreditkarten werden in diesem Fall häufig komplett ausgetauscht. (<a href="https://www.golem.de/specials/sicherheitsluecke/">Sicherheitslücke</a>, <a href="https://www.golem.de/specials/datenschutz/">Datenschutz</a>) <img src="https://cpx.golem.de/cpx.php?class=17&aid=137344&page=1&ts=1540553820" alt="" width="1" height="1" />]]></content:encoded>
|
||||
<slash:comments />
|
||||
</item>
|
||||
<item>
|
||||
<title>iPhone Xr im Test: Apples günstigeres iPhone ist nicht günstig</title>
|
||||
<link>https://www.golem.de/news/iphone-xr-im-test-apples-guenstiges-iphone-ist-nicht-guenstig-1810-137327-rss.html</link>
|
||||
<description>Apple versucht es 2018 wieder einmal mit einem relativ preisgünstigen iPhone - weniger teuer als die Xs-Modelle, aber mit 850 Euro auch nicht gerade preiswert. Käufer bekommen dafür allerdings auch ein Smartphone mit sehr guter Ausstattung, in einigen Punkten wurde jedoch auf Hardware der teuren Modelle verzichtet. Ein Test von Tobias Költzsch (<a href="https://www.golem.de/specials/iphone/">iPhone</a>, <a href="https://www.golem.de/specials/smartphone/">Smartphone</a>) <img src="https://cpx.golem.de/cpx.php?class=17&amp;aid=137327&amp;page=1&amp;ts=1540548180" alt="" width="1" height="1" /></description>
|
||||
<comments>https://forum.golem.de/kommentare/handy/iphone-xr-im-test-apples-guenstigeres-iphone-ist-nicht-guenstig/121563,list.html</comments>
|
||||
<pubDate>Fri, 26 Oct 2018 11:03:00 +0100</pubDate>
|
||||
<guid>https://www.golem.de/1810/137327-rss.html</guid>
|
||||
<content:encoded><![CDATA[<img src="https://www.golem.de/1810/137327-177418-177414_rc.jpg" width="140" height="140" vspace="3" hspace="8" align="left">Apple versucht es 2018 wieder einmal mit einem relativ preisgünstigen iPhone - weniger teuer als die Xs-Modelle, aber mit 850 Euro auch nicht gerade preiswert. Käufer bekommen dafür allerdings auch ein Smartphone mit sehr guter Ausstattung, in einigen Punkten wurde jedoch auf Hardware der teuren Modelle verzichtet. Ein Test von Tobias Költzsch (<a href="https://www.golem.de/specials/iphone/">iPhone</a>, <a href="https://www.golem.de/specials/smartphone/">Smartphone</a>) <img src="https://cpx.golem.de/cpx.php?class=17&aid=137327&page=1&ts=1540548180" alt="" width="1" height="1" />]]></content:encoded>
|
||||
<slash:comments>169</slash:comments>
|
||||
</item>
|
||||
<item>
|
||||
<title>Microsoft: PC-Spieleangebot des Xbox Game Pass wird erweitert</title>
|
||||
<link>https://www.golem.de/news/microsoft-pc-spieleangebot-des-xbox-game-pass-wird-erweitert-1810-137343-rss.html</link>
|
||||
<description>Der Xbox Game Pass soll künftig um mehr Angebote für Windows-PC erweitert werden, sagt Microsoft-Chef Satya Nadella. Derzeit gibt es für 10 Euro nur wenige plattformübergreifend verfügbare Spiele. (<a href="https://www.golem.de/specials/xbox-one/">Xbox One</a>, <a href="https://www.golem.de/specials/microsoft/">Microsoft</a>) <img src="https://cpx.golem.de/cpx.php?class=17&amp;aid=137343&amp;page=1&amp;ts=1540546800" alt="" width="1" height="1" /></description>
|
||||
<comments>https://forum.golem.de/kommentare/wirtschaft/microsoft-pc-spieleangebot-des-xbox-game-pass-wird-erweitert/121562,list.html</comments>
|
||||
<pubDate>Fri, 26 Oct 2018 10:40:00 +0100</pubDate>
|
||||
<guid>https://www.golem.de/1810/137343-rss.html</guid>
|
||||
<content:encoded><![CDATA[<img src="https://www.golem.de/1810/137343-177453-177450_rc.jpg" width="140" height="140" vspace="3" hspace="8" align="left">Der Xbox Game Pass soll künftig um mehr Angebote für Windows-PC erweitert werden, sagt Microsoft-Chef Satya Nadella. Derzeit gibt es für 10 Euro nur wenige plattformübergreifend verfügbare Spiele. (<a href="https://www.golem.de/specials/xbox-one/">Xbox One</a>, <a href="https://www.golem.de/specials/microsoft/">Microsoft</a>) <img src="https://cpx.golem.de/cpx.php?class=17&aid=137343&page=1&ts=1540546800" alt="" width="1" height="1" />]]></content:encoded>
|
||||
<slash:comments>19</slash:comments>
|
||||
</item>
|
||||
<item>
|
||||
<title>Breitbandgesellschaft: Grüne wollen Netzbetreiber zum Ausbau zwingen</title>
|
||||
<link>https://www.golem.de/news/breitbandgesellschaft-gruene-wollen-netzbetreiber-zum-ausbau-zwingen-1810-137342-rss.html</link>
|
||||
<description>Die Grünen haben die Netzversorgung in Deutschland analysiert. Die Partei, die zurzeit in Wählerumfragen stark zugewinnt, fordert den Breitbandausbau auf Kosten der Konzerne und will Glasfaser staatlich durchsetzen. (<a href="https://www.golem.de/specials/breitband/">Breitband</a>, <a href="https://www.golem.de/specials/handy/">Handy</a>) <img src="https://cpx.golem.de/cpx.php?class=17&amp;aid=137342&amp;page=1&amp;ts=1540545840" alt="" width="1" height="1" /></description>
|
||||
<comments>https://forum.golem.de/kommentare/politik-recht/breitbandgesellschaft-gruene-wollen-netzbetreiber-zum-ausbau-zwingen/121561,list.html</comments>
|
||||
<pubDate>Fri, 26 Oct 2018 10:24:00 +0100</pubDate>
|
||||
<guid>https://www.golem.de/1810/137342-rss.html</guid>
|
||||
<content:encoded><![CDATA[<img src="https://www.golem.de/1807/135605-169300-169297_rc.jpg" width="140" height="140" vspace="3" hspace="8" align="left">Die Grünen haben die Netzversorgung in Deutschland analysiert. Die Partei, die zurzeit in Wählerumfragen stark zugewinnt, fordert den Breitbandausbau auf Kosten der Konzerne und will Glasfaser staatlich durchsetzen. (<a href="https://www.golem.de/specials/breitband/">Breitband</a>, <a href="https://www.golem.de/specials/handy/">Handy</a>) <img src="https://cpx.golem.de/cpx.php?class=17&aid=137342&page=1&ts=1540545840" alt="" width="1" height="1" />]]></content:encoded>
|
||||
<slash:comments>22</slash:comments>
|
||||
</item>
|
||||
<item>
|
||||
<title>Studie: Silicon Valley dient als rechte Hand des großen Bruders</title>
|
||||
<link>https://www.golem.de/news/studie-silicon-valley-dient-als-rechte-hand-des-grossen-bruders-1810-137316-rss.html</link>
|
||||
<description>Die US-Hightech-Branche verdingt sich zunehmend als technischer Dienstleister für staatliche Big-Brother-Projekte wie die Überwachung und Abschiebung von Immigranten, heißt es in einem Bericht von Bürgerrechtlern. Amazon und Palantir verdienten damit am meisten. Von Stefan Krempl (<a href="https://www.golem.de/specials/datenschutz/">Datenschutz</a>, <a href="https://www.golem.de/specials/ibm/">IBM</a>) <img src="https://cpx.golem.de/cpx.php?class=17&amp;aid=137316&amp;page=1&amp;ts=1540543080" alt="" width="1" height="1" /></description>
|
||||
<comments>https://forum.golem.de/kommentare/security/studie-silicon-valley-dient-als-rechte-hand-des-grossen-bruders/121560,list.html</comments>
|
||||
<pubDate>Fri, 26 Oct 2018 09:38:00 +0100</pubDate>
|
||||
<guid>https://www.golem.de/1810/137316-rss.html</guid>
|
||||
<content:encoded><![CDATA[<img src="https://www.golem.de/1810/137316-177360-177357_rc.jpg" width="140" height="140" vspace="3" hspace="8" align="left">Die US-Hightech-Branche verdingt sich zunehmend als technischer Dienstleister für staatliche Big-Brother-Projekte wie die Überwachung und Abschiebung von Immigranten, heißt es in einem Bericht von Bürgerrechtlern. Amazon und Palantir verdienten damit am meisten. Von Stefan Krempl (<a href="https://www.golem.de/specials/datenschutz/">Datenschutz</a>, <a href="https://www.golem.de/specials/ibm/">IBM</a>) <img src="https://cpx.golem.de/cpx.php?class=17&aid=137316&page=1&ts=1540543080" alt="" width="1" height="1" />]]></content:encoded>
|
||||
<slash:comments>20</slash:comments>
|
||||
</item>
|
||||
<item>
|
||||
<title>Bethesda: Postnukleare PC-Systemanforderungen für Fallout 76</title>
|
||||
<link>https://www.golem.de/news/bethesda-postnukleare-pc-systemanforderungen-fuer-fallout-76-1810-137340-rss.html</link>
|
||||
<description>Kurz vor dem Start der Betaversion für PC-Spieler hat Bethesda die Systemanforderung von Fallout 76 veröffentlicht. Hoffnungen auf lange Abenteuer in der Vorabversion gibt es aber zumindest nach den Erfahrungen des Xbox-Zugangs eher nicht. (<a href="https://www.golem.de/specials/fallout-76/">Fallout 76</a>, <a href="https://www.golem.de/specials/rollenspiel/">Rollenspiel</a>) <img src="https://cpx.golem.de/cpx.php?class=17&amp;aid=137340&amp;page=1&amp;ts=1540542180" alt="" width="1" height="1" /></description>
|
||||
<comments>https://forum.golem.de/kommentare/games/bethesda-postnukleare-pc-systemanforderungen-fuer-fallout-76/121559,list.html</comments>
|
||||
<pubDate>Fri, 26 Oct 2018 09:23:00 +0100</pubDate>
|
||||
<guid>https://www.golem.de/1810/137340-rss.html</guid>
|
||||
<content:encoded><![CDATA[<img src="https://www.golem.de/1810/137340-177448-177445_rc.jpg" width="140" height="140" vspace="3" hspace="8" align="left">Kurz vor dem Start der Betaversion für PC-Spieler hat Bethesda die Systemanforderung von Fallout 76 veröffentlicht. Hoffnungen auf lange Abenteuer in der Vorabversion gibt es aber zumindest nach den Erfahrungen des Xbox-Zugangs eher nicht. (<a href="https://www.golem.de/specials/fallout-76/">Fallout 76</a>, <a href="https://www.golem.de/specials/rollenspiel/">Rollenspiel</a>) <img src="https://cpx.golem.de/cpx.php?class=17&aid=137340&page=1&ts=1540542180" alt="" width="1" height="1" />]]></content:encoded>
|
||||
<slash:comments>101</slash:comments>
|
||||
</item>
|
||||
<item>
|
||||
<title>Quartalszahlen: Intel legt 19-Milliarden-USD-Rekord vor</title>
|
||||
<link>https://www.golem.de/news/quartalszahlen-intel-legt-19-milliarden-usd-rekord-vor-1810-137339-rss.html</link>
|
||||
<description>Ungeachtet der 14-nm-Knappheit und diverser Sicherheitslücken konnte Intel im dritten Quartal 2018 mehr Umsatz erwirtschaften und mehr Gewinn erzielen als jemals zuvor. Vor allem das florierende Server-Geschäft wird bei Intel immer wichtiger. (<a href="https://www.golem.de/specials/intel/">Intel</a>, <a href="https://www.golem.de/specials/cpu/">Prozessor</a>) <img src="https://cpx.golem.de/cpx.php?class=17&amp;aid=137339&amp;page=1&amp;ts=1540540260" alt="" width="1" height="1" /></description>
|
||||
<comments>https://forum.golem.de/kommentare/wirtschaft/quartalszahlen-intel-legt-19-milliarden-usd-rekord-vor/121558,list.html</comments>
|
||||
<pubDate>Fri, 26 Oct 2018 08:51:00 +0100</pubDate>
|
||||
<guid>https://www.golem.de/1810/137339-rss.html</guid>
|
||||
<content:encoded><![CDATA[<img src="https://www.golem.de/1810/137339-177444-177441_rc.jpg" width="140" height="140" vspace="3" hspace="8" align="left">Ungeachtet der 14-nm-Knappheit und diverser Sicherheitslücken konnte Intel im dritten Quartal 2018 mehr Umsatz erwirtschaften und mehr Gewinn erzielen als jemals zuvor. Vor allem das florierende Server-Geschäft wird bei Intel immer wichtiger. (<a href="https://www.golem.de/specials/intel/">Intel</a>, <a href="https://www.golem.de/specials/cpu/">Prozessor</a>) <img src="https://cpx.golem.de/cpx.php?class=17&aid=137339&page=1&ts=1540540260" alt="" width="1" height="1" />]]></content:encoded>
|
||||
<slash:comments>10</slash:comments>
|
||||
</item>
|
||||
<item>
|
||||
<title>Physik: Weg mit der Schönheit!</title>
|
||||
<link>https://www.golem.de/news/physik-weg-mit-der-schoenheit-1810-137161-rss.html</link>
|
||||
<description>Ist eine Theorie richtig, nur weil sie schön ist? Nein, sagt Sabine Hossenfelder. In ihrem Buch "Das hässliche Universum" zeigt die theoretische Physikerin, wie das Schönheitsdenken die Wissenschaft lähmt und erklärt dabei recht unterhaltsam die unterschiedlichen Theorien und Modelle der Teilchenphysik. Eine Rezension von Friedemann Zweynert (<a href="https://www.golem.de/specials/physik/">Physik</a>, <a href="https://www.golem.de/specials/internet/">Internet</a>) <img src="https://cpx.golem.de/cpx.php?class=17&amp;aid=137161&amp;page=1&amp;ts=1540537380" alt="" width="1" height="1" /></description>
|
||||
<comments>https://forum.golem.de/kommentare/internet/physik-weg-mit-der-schoenheit/121557,list.html</comments>
|
||||
<pubDate>Fri, 26 Oct 2018 08:03:00 +0100</pubDate>
|
||||
<guid>https://www.golem.de/1810/137161-rss.html</guid>
|
||||
<content:encoded><![CDATA[<img src="https://www.golem.de/1810/137161-176716-176713_rc.jpg" width="140" height="140" vspace="3" hspace="8" align="left">Ist eine Theorie richtig, nur weil sie schön ist? Nein, sagt Sabine Hossenfelder. In ihrem Buch "Das hässliche Universum" zeigt die theoretische Physikerin, wie das Schönheitsdenken die Wissenschaft lähmt und erklärt dabei recht unterhaltsam die unterschiedlichen Theorien und Modelle der Teilchenphysik. Eine Rezension von Friedemann Zweynert (<a href="https://www.golem.de/specials/physik/">Physik</a>, <a href="https://www.golem.de/specials/internet/">Internet</a>) <img src="https://cpx.golem.de/cpx.php?class=17&aid=137161&page=1&ts=1540537380" alt="" width="1" height="1" />]]></content:encoded>
|
||||
<slash:comments>86</slash:comments>
|
||||
</item>
|
||||
<item>
|
||||
<title>Elon Musk: Teslas Model 3 für 35.000 US-Dollar derzeit unmöglich</title>
|
||||
<link>https://www.golem.de/news/elon-musk-teslas-model-3-fuer-35-000-us-dollar-derzeit-unmoeglich-1810-137335-rss.html</link>
|
||||
<description>Tesla-Chef Elon Musk hat eingeräumt, das bei 35.000 US-Dollar startende Basismodell des Elektroautos Model 3 immer noch nicht liefern zu können. (<a href="https://www.golem.de/specials/tesla-model-3/">Tesla Model 3</a>, <a href="https://www.golem.de/specials/technologie/">Technologie</a>) <img src="https://cpx.golem.de/cpx.php?class=17&amp;aid=137335&amp;page=1&amp;ts=1540533540" alt="" width="1" height="1" /></description>
|
||||
<comments>https://forum.golem.de/kommentare/automobil/elon-musk-teslas-model-3-fuer-35.000-us-dollar-derzeit-unmoeglich/121556,list.html</comments>
|
||||
<pubDate>Fri, 26 Oct 2018 06:59:00 +0100</pubDate>
|
||||
<guid>https://www.golem.de/1810/137335-rss.html</guid>
|
||||
<content:encoded><![CDATA[<img src="https://www.golem.de/1810/137335-177428-177424_rc.jpg" width="140" height="140" vspace="3" hspace="8" align="left">Tesla-Chef Elon Musk hat eingeräumt, das bei 35.000 US-Dollar startende Basismodell des Elektroautos Model 3 immer noch nicht liefern zu können. (<a href="https://www.golem.de/specials/tesla-model-3/">Tesla Model 3</a>, <a href="https://www.golem.de/specials/technologie/">Technologie</a>) <img src="https://cpx.golem.de/cpx.php?class=17&aid=137335&page=1&ts=1540533540" alt="" width="1" height="1" />]]></content:encoded>
|
||||
<slash:comments>235</slash:comments>
|
||||
</item>
|
||||
<item>
|
||||
<title>Solarzellen als Dach: Tesla-Solarschindeln verzögern sich bis 2019</title>
|
||||
<link>https://www.golem.de/news/solarzellen-als-dach-tesla-solarschindeln-verzoegern-sich-auf-2019-1810-137334-rss.html</link>
|
||||
<description>Tesla wird die Serienproduktion seiner Solardachziegel nicht mehr wie geplant in diesem Jahr starten. Nach Angaben von Firmenchef Elon Musk verschiebt sich das Vorhaben auf 2019. (<a href="https://www.golem.de/specials/solarenergie/">Solarenergie</a>, <a href="https://www.golem.de/specials/technologie/">Technologie</a>) <img src="https://cpx.golem.de/cpx.php?class=17&amp;aid=137334&amp;page=1&amp;ts=1540532460" alt="" width="1" height="1" /></description>
|
||||
<comments>https://forum.golem.de/kommentare/wissenschaft/solarzellen-als-dach-tesla-solarschindeln-verzoegern-sich-bis-2019/121555,list.html</comments>
|
||||
<pubDate>Fri, 26 Oct 2018 06:41:00 +0100</pubDate>
|
||||
<guid>https://www.golem.de/1810/137334-rss.html</guid>
|
||||
<content:encoded><![CDATA[<img src="https://www.golem.de/1705/127761-139613-i_rc.jpg" width="140" height="140" vspace="3" hspace="8" align="left">Tesla wird die Serienproduktion seiner Solardachziegel nicht mehr wie geplant in diesem Jahr starten. Nach Angaben von Firmenchef Elon Musk verschiebt sich das Vorhaben auf 2019. (<a href="https://www.golem.de/specials/solarenergie/">Solarenergie</a>, <a href="https://www.golem.de/specials/technologie/">Technologie</a>) <img src="https://cpx.golem.de/cpx.php?class=17&aid=137334&page=1&ts=1540532460" alt="" width="1" height="1" />]]></content:encoded>
|
||||
<slash:comments>47</slash:comments>
|
||||
</item>
|
||||
<item>
|
||||
<title>Uniti One: Elektroauto für 15.000 Euro wird in Großbritannien gebaut</title>
|
||||
<link>https://www.golem.de/news/uniti-one-elektroauto-fuer-15-000-euro-wird-in-grossbritannien-gebaut-1810-137333-rss.html</link>
|
||||
<description>Das schwedische Unternehmen Uniti will sein Elektroauto One in Großbritannien bauen. Einen fahrenden Prototyp des Uniti One gibt es schon. Das Auto soll je nach Modell für 15.000 bis 20.000 Euro auf den Markt kommen. (<a href="https://www.golem.de/specials/elektroauto/">Elektroauto</a>, <a href="https://www.golem.de/specials/technologie/">Technologie</a>) <img src="https://cpx.golem.de/cpx.php?class=17&amp;aid=137333&amp;page=1&amp;ts=1540531080" alt="" width="1" height="1" /></description>
|
||||
<comments>https://forum.golem.de/kommentare/automobil/uniti-one-elektroauto-fuer-15.000-euro-wird-in-grossbritannien-gebaut/121554,list.html</comments>
|
||||
<pubDate>Fri, 26 Oct 2018 06:18:00 +0100</pubDate>
|
||||
<guid>https://www.golem.de/1810/137333-rss.html</guid>
|
||||
<content:encoded><![CDATA[<img src="https://www.golem.de/1805/134432-163131-163128_rc.jpg" width="140" height="140" vspace="3" hspace="8" align="left">Das schwedische Unternehmen Uniti will sein Elektroauto One in Großbritannien bauen. Einen fahrenden Prototyp des Uniti One gibt es schon. Das Auto soll je nach Modell für 15.000 bis 20.000 Euro auf den Markt kommen. (<a href="https://www.golem.de/specials/elektroauto/">Elektroauto</a>, <a href="https://www.golem.de/specials/technologie/">Technologie</a>) <img src="https://cpx.golem.de/cpx.php?class=17&aid=137333&page=1&ts=1540531080" alt="" width="1" height="1" />]]></content:encoded>
|
||||
<slash:comments>28</slash:comments>
|
||||
</item>
|
||||
<item>
|
||||
<title>Quartalsbericht: Alphabet macht in drei Monaten 9,2 Milliarden Dollar Gewinn</title>
|
||||
<link>https://www.golem.de/news/quartalsbericht-alphabet-macht-in-drei-monaten-9-2-milliarden-dollar-gewinn-1810-137337-rss.html</link>
|
||||
<description>Alphabet erwirtschaftet weiter extrem hohe Gewinne, der Umsatz wächst nicht ganz so stark. Google muss zugleich auf eine Enthüllung in der US-Presse zu sexueller Belästigung um Android-Begründer Andy Rubin reagieren. (<a href="https://www.golem.de/specials/google/">Google</a>, <a href="https://www.golem.de/specials/boerse/">Börse</a>) <img src="https://cpx.golem.de/cpx.php?class=17&amp;aid=137337&amp;page=1&amp;ts=1540504320" alt="" width="1" height="1" /></description>
|
||||
<comments>https://forum.golem.de/kommentare/wirtschaft/quartalsbericht-alphabet-macht-in-drei-monaten-9-2-milliarden-dollar-gewinn/121553,list.html</comments>
|
||||
<pubDate>Thu, 25 Oct 2018 22:52:00 +0100</pubDate>
|
||||
<guid>https://www.golem.de/1810/137337-rss.html</guid>
|
||||
<content:encoded><![CDATA[<img src="https://www.golem.de/1810/137053-176114-176111_rc.jpg" width="140" height="140" vspace="3" hspace="8" align="left">Alphabet erwirtschaftet weiter extrem hohe Gewinne, der Umsatz wächst nicht ganz so stark. Google muss zugleich auf eine Enthüllung in der US-Presse zu sexueller Belästigung um Android-Begründer Andy Rubin reagieren. (<a href="https://www.golem.de/specials/google/">Google</a>, <a href="https://www.golem.de/specials/boerse/">Börse</a>) <img src="https://cpx.golem.de/cpx.php?class=17&aid=137337&page=1&ts=1540504320" alt="" width="1" height="1" />]]></content:encoded>
|
||||
<slash:comments>12</slash:comments>
|
||||
</item>
|
||||
<item>
|
||||
<title>Quartalsbericht: Amazon verfehlt die Umsatzprognosen</title>
|
||||
<link>https://www.golem.de/news/quartalsbericht-amazon-verfehlt-die-umsatzprognosen-1810-137336-rss.html</link>
|
||||
<description>Amazon weist erneut einen hohen Gewinn aus. Doch der Konzern lag beim Umsatz unter den Prognosen der Analysten. (<a href="https://www.golem.de/specials/amazon/">Amazon</a>, <a href="https://www.golem.de/specials/onlineshop/">Onlineshop</a>) <img src="https://cpx.golem.de/cpx.php?class=17&amp;aid=137336&amp;page=1&amp;ts=1540500780" alt="" width="1" height="1" /></description>
|
||||
<comments>https://forum.golem.de/kommentare/wirtschaft/quartalsbericht-amazon-verfehlt-die-umsatzprognosen/121552,list.html</comments>
|
||||
<pubDate>Thu, 25 Oct 2018 21:53:00 +0100</pubDate>
|
||||
<guid>https://www.golem.de/1810/137336-rss.html</guid>
|
||||
<content:encoded><![CDATA[<img src="https://www.golem.de/1810/137336-177432-177429_rc.jpg" width="140" height="140" vspace="3" hspace="8" align="left">Amazon weist erneut einen hohen Gewinn aus. Doch der Konzern lag beim Umsatz unter den Prognosen der Analysten. (<a href="https://www.golem.de/specials/amazon/">Amazon</a>, <a href="https://www.golem.de/specials/onlineshop/">Onlineshop</a>) <img src="https://cpx.golem.de/cpx.php?class=17&aid=137336&page=1&ts=1540500780" alt="" width="1" height="1" />]]></content:encoded>
|
||||
<slash:comments>24</slash:comments>
|
||||
</item>
|
||||
<item>
|
||||
<title>Datenskandal: Britische Datenschutzbehörde verurteilt Facebook</title>
|
||||
<link>https://www.golem.de/news/datenskandal-britische-datenschutzbehoerde-verurteilt-facebook-1810-137332-rss.html</link>
|
||||
<description>Im Skandal um Cambridge Analytica hat die britische Datenschutzbehörde die Höchststrafe von 500.000 Pfund verhängt. Facebook habe einen schweren Verstoß gegen geltendes Recht zugelassen. (<a href="https://www.golem.de/specials/facebook/">Facebook</a>, <a href="https://www.golem.de/specials/socialnetwork/">Soziales Netz</a>) <img src="https://cpx.golem.de/cpx.php?class=17&amp;aid=137332&amp;page=1&amp;ts=1540483080" alt="" width="1" height="1" /></description>
|
||||
<comments>https://forum.golem.de/kommentare/security/datenskandal-britische-datenschutzbehoerde-verurteilt-facebook/121550,list.html</comments>
|
||||
<pubDate>Thu, 25 Oct 2018 16:58:00 +0100</pubDate>
|
||||
<guid>https://www.golem.de/1810/137332-rss.html</guid>
|
||||
<content:encoded><![CDATA[<img src="https://www.golem.de/1810/137332-177420-177419_rc.jpg" width="140" height="140" vspace="3" hspace="8" align="left">Im Skandal um Cambridge Analytica hat die britische Datenschutzbehörde die Höchststrafe von 500.000 Pfund verhängt. Facebook habe einen schweren Verstoß gegen geltendes Recht zugelassen. (<a href="https://www.golem.de/specials/facebook/">Facebook</a>, <a href="https://www.golem.de/specials/socialnetwork/">Soziales Netz</a>) <img src="https://cpx.golem.de/cpx.php?class=17&aid=137332&page=1&ts=1540483080" alt="" width="1" height="1" />]]></content:encoded>
|
||||
<slash:comments>35</slash:comments>
|
||||
</item>
|
||||
<item>
|
||||
<title>Corsair: Neue K70 MK.2 kommt mit Cherrys Low-Profile-Switches</title>
|
||||
<link>https://www.golem.de/news/corsair-neue-k70-mk-2-kommt-mit-cherrys-low-profile-switches-1810-137331-rss.html</link>
|
||||
<description>Corsair erweitert sein Tastaturportefeuille um zwei Gaming-Tastaturen mit Cherrys flachen Low-Profile-Switches. Ein Modell hat Schalter mit einem besonders kurzem Auslöseweg von 1 mm - die Schalter darf Corsair exklusiv verwenden. (<a href="https://www.golem.de/specials/corsair/">Corsair</a>, <a href="https://www.golem.de/specials/eingabegeraet/">Eingabegerät</a>) <img src="https://cpx.golem.de/cpx.php?class=17&amp;aid=137331&amp;page=1&amp;ts=1540480320" alt="" width="1" height="1" /></description>
|
||||
<comments>https://forum.golem.de/kommentare/sonstiges/corsair-neue-k70-mk.2-kommt-mit-cherrys-low-profile-switches/121549,list.html</comments>
|
||||
<pubDate>Thu, 25 Oct 2018 16:12:00 +0100</pubDate>
|
||||
<guid>https://www.golem.de/1810/137331-rss.html</guid>
|
||||
<content:encoded><![CDATA[<img src="https://www.golem.de/1810/137331-177413-177410_rc.jpg" width="140" height="140" vspace="3" hspace="8" align="left">Corsair erweitert sein Tastaturportefeuille um zwei Gaming-Tastaturen mit Cherrys flachen Low-Profile-Switches. Ein Modell hat Schalter mit einem besonders kurzem Auslöseweg von 1 mm - die Schalter darf Corsair exklusiv verwenden. (<a href="https://www.golem.de/specials/corsair/">Corsair</a>, <a href="https://www.golem.de/specials/eingabegeraet/">Eingabegerät</a>) <img src="https://cpx.golem.de/cpx.php?class=17&aid=137331&page=1&ts=1540480320" alt="" width="1" height="1" />]]></content:encoded>
|
||||
<slash:comments>19</slash:comments>
|
||||
</item>
|
||||
<item>
|
||||
<title>Cambridge-Analytica-Skandal: EU-Parlament fordert schärfere Kontrolle von Facebook</title>
|
||||
<link>https://www.golem.de/news/cambridge-analytica-skandal-eu-parlament-fordert-schaerfere-kontrolle-von-facebook-1810-137329-rss.html</link>
|
||||
<description>Die EU-Abgeordneten haben als Reaktion auf die Datenschutzverstöße von Facebook und Cambridge Analytica Behörden angehalten, ihre Aktivitäten auf dem Netzwerk zu überdenken. Profiling zu politischen Zwecken wollen sie verbieten. Von Stefan Krempl (<a href="https://www.golem.de/specials/facebook/">Facebook</a>, <a href="https://www.golem.de/specials/socialnetwork/">Soziales Netz</a>) <img src="https://cpx.golem.de/cpx.php?class=17&amp;aid=137329&amp;page=1&amp;ts=1540479120" alt="" width="1" height="1" /></description>
|
||||
<comments>https://forum.golem.de/kommentare/security/cambridge-analytica-skandal-eu-parlament-fordert-schaerfere-kontrolle-von-facebook/121548,list.html</comments>
|
||||
<pubDate>Thu, 25 Oct 2018 15:52:00 +0100</pubDate>
|
||||
<guid>https://www.golem.de/1810/137329-rss.html</guid>
|
||||
<content:encoded><![CDATA[<img src="https://www.golem.de/1810/137329-177404-177403_rc.jpg" width="140" height="140" vspace="3" hspace="8" align="left">Die EU-Abgeordneten haben als Reaktion auf die Datenschutzverstöße von Facebook und Cambridge Analytica Behörden angehalten, ihre Aktivitäten auf dem Netzwerk zu überdenken. Profiling zu politischen Zwecken wollen sie verbieten. Von Stefan Krempl (<a href="https://www.golem.de/specials/facebook/">Facebook</a>, <a href="https://www.golem.de/specials/socialnetwork/">Soziales Netz</a>) <img src="https://cpx.golem.de/cpx.php?class=17&aid=137329&page=1&ts=1540479120" alt="" width="1" height="1" />]]></content:encoded>
|
||||
<slash:comments>3</slash:comments>
|
||||
</item>
|
||||
<item>
|
||||
<title>Kuriosum: Das Hellgate London öffnet sich mal wieder</title>
|
||||
<link>https://www.golem.de/news/kuriosum-das-hellgate-london-oeffnet-sich-mal-wieder-1810-137328-rss.html</link>
|
||||
<description>Einer der großen Trash-Klassiker der Spielegeschichte wagt einen neuen Anlauf: Mitte November 2018 soll ein neue, für Einzelspieler ausgelegte Windows-Fassung von Hellgate London erscheinen. Das Ursprungskonzept hatten sich ehemalige Blizzard-Chefentwickler ausgedacht. (<a href="https://www.golem.de/specials/rollenspiel/">Rollenspiel</a>, <a href="https://www.golem.de/specials/steam/">Steam</a>) <img src="https://cpx.golem.de/cpx.php?class=17&amp;aid=137328&amp;page=1&amp;ts=1540476600" alt="" width="1" height="1" /></description>
|
||||
<comments>https://forum.golem.de/kommentare/games/kuriosum-das-hellgate-london-oeffnet-sich-mal-wieder/121547,list.html</comments>
|
||||
<pubDate>Thu, 25 Oct 2018 15:10:00 +0100</pubDate>
|
||||
<guid>https://www.golem.de/1810/137328-rss.html</guid>
|
||||
<content:encoded><![CDATA[<img src="https://www.golem.de/1810/137328-177396-177392_rc.jpg" width="140" height="140" vspace="3" hspace="8" align="left">Einer der großen Trash-Klassiker der Spielegeschichte wagt einen neuen Anlauf: Mitte November 2018 soll ein neue, für Einzelspieler ausgelegte Windows-Fassung von Hellgate London erscheinen. Das Ursprungskonzept hatten sich ehemalige Blizzard-Chefentwickler ausgedacht. (<a href="https://www.golem.de/specials/rollenspiel/">Rollenspiel</a>, <a href="https://www.golem.de/specials/steam/">Steam</a>) <img src="https://cpx.golem.de/cpx.php?class=17&aid=137328&page=1&ts=1540476600" alt="" width="1" height="1" />]]></content:encoded>
|
||||
<slash:comments>61</slash:comments>
|
||||
</item>
|
||||
<item>
|
||||
<title>Tweether: 10 GBit/s über einen Quadratkilometer verteilt</title>
|
||||
<link>https://www.golem.de/news/tweether-10-gbit-s-ueber-einen-quadratkilometer-verteilt-1810-137326-rss.html</link>
|
||||
<description>Eine neue Technologie verteilt 10 GBit/s über eine große Fläche. Der erste Feldversuch ist erfolgreich verlaufen. Zum ersten Mal wurde ein stabiles drahtloses Netzwerk bei diesen Frequenzen und mit diesen Datenraten betrieben. (<a href="https://www.golem.de/specials/wissenschaft/">Wissenschaft</a>, <a href="https://www.golem.de/specials/mobil/">Mobil</a>) <img src="https://cpx.golem.de/cpx.php?class=17&amp;aid=137326&amp;page=1&amp;ts=1540475400" alt="" width="1" height="1" /></description>
|
||||
<comments>https://forum.golem.de/kommentare/wissenschaft/tweether-10-gbit-s-ueber-einen-quadratkilometer-verteilt/121546,list.html</comments>
|
||||
<pubDate>Thu, 25 Oct 2018 14:50:00 +0100</pubDate>
|
||||
<guid>https://www.golem.de/1810/137326-rss.html</guid>
|
||||
<content:encoded><![CDATA[<img src="https://www.golem.de/1810/137326-177391-177388_rc.jpg" width="140" height="140" vspace="3" hspace="8" align="left">Eine neue Technologie verteilt 10 GBit/s über eine große Fläche. Der erste Feldversuch ist erfolgreich verlaufen. Zum ersten Mal wurde ein stabiles drahtloses Netzwerk bei diesen Frequenzen und mit diesen Datenraten betrieben. (<a href="https://www.golem.de/specials/wissenschaft/">Wissenschaft</a>, <a href="https://www.golem.de/specials/mobil/">Mobil</a>) <img src="https://cpx.golem.de/cpx.php?class=17&aid=137326&page=1&ts=1540475400" alt="" width="1" height="1" />]]></content:encoded>
|
||||
<slash:comments>20</slash:comments>
|
||||
</item>
|
||||
<item>
|
||||
<title>Red Dead Redemption 2: Saloon-Prügelei (der Worte) im Livestream</title>
|
||||
<link>https://www.golem.de/news/red-dead-redemption-2-saloon-pruegelei-der-worte-im-livestream-1810-137312-rss.html</link>
|
||||
<description> Die Golem.de-Redakteure Peter Steinlechner und Michael Wieczorek diskutieren gemeinsam mit unserer Community über den Test zu Red Dead Redemption 2 live ab 18 Uhr. (<a href="https://www.golem.de/specials/red-dead-redemption-2/">Red Dead Redemption 2</a>, <a href="https://www.golem.de/specials/spieletest/">Spieletest</a>) <img src="https://cpx.golem.de/cpx.php?class=17&amp;aid=137312&amp;page=1&amp;ts=1540474200" alt="" width="1" height="1" /></description>
|
||||
<comments>https://forum.golem.de/kommentare/games/red-dead-redemption-2-saloon-pruegelei-der-worte-im-livestream/121545,list.html</comments>
|
||||
<pubDate>Thu, 25 Oct 2018 14:30:00 +0100</pubDate>
|
||||
<guid>https://www.golem.de/1810/137312-rss.html</guid>
|
||||
<content:encoded><![CDATA[<img src="https://www.golem.de/1810/137312-177341-177338_rc.jpg" width="140" height="140" vspace="3" hspace="8" align="left"> Die Golem.de-Redakteure Peter Steinlechner und Michael Wieczorek diskutieren gemeinsam mit unserer Community über den Test zu Red Dead Redemption 2 live ab 18 Uhr. (<a href="https://www.golem.de/specials/red-dead-redemption-2/">Red Dead Redemption 2</a>, <a href="https://www.golem.de/specials/spieletest/">Spieletest</a>) <img src="https://cpx.golem.de/cpx.php?class=17&aid=137312&page=1&ts=1540474200" alt="" width="1" height="1" />]]></content:encoded>
|
||||
<slash:comments />
|
||||
</item>
|
||||
<item>
|
||||
<title>Wolf Intelligence: Trojanerfirma aus Deutschland lässt interne Daten im Netz</title>
|
||||
<link>https://www.golem.de/news/wolf-intelligence-trojanerfirma-aus-deutschland-laesst-interne-daten-im-netz-1810-137323-rss.html</link>
|
||||
<description>Wolf Intelligence verkauft Schadsoftware an Staaten. Eine Sicherheitsfirma hat sensible Daten des Unternehmens öffentlich zugänglich im Internet gefunden. In einer Präsentation wurden die Funde gezeigt. (<a href="https://www.golem.de/specials/trojaner/">Trojaner</a>, <a href="https://www.golem.de/specials/virus/">Virus</a>) <img src="https://cpx.golem.de/cpx.php?class=17&amp;aid=137323&amp;page=1&amp;ts=1540472400" alt="" width="1" height="1" /></description>
|
||||
<comments>https://forum.golem.de/kommentare/security/wolf-intelligence-trojanerfirma-aus-deutschland-laesst-interne-daten-im-netz/121543,list.html</comments>
|
||||
<pubDate>Thu, 25 Oct 2018 14:00:00 +0100</pubDate>
|
||||
<guid>https://www.golem.de/1810/137323-rss.html</guid>
|
||||
<content:encoded><![CDATA[<img src="https://www.golem.de/1810/137323-177383-177380_rc.jpg" width="140" height="140" vspace="3" hspace="8" align="left">Wolf Intelligence verkauft Schadsoftware an Staaten. Eine Sicherheitsfirma hat sensible Daten des Unternehmens öffentlich zugänglich im Internet gefunden. In einer Präsentation wurden die Funde gezeigt. (<a href="https://www.golem.de/specials/trojaner/">Trojaner</a>, <a href="https://www.golem.de/specials/virus/">Virus</a>) <img src="https://cpx.golem.de/cpx.php?class=17&aid=137323&page=1&ts=1540472400" alt="" width="1" height="1" />]]></content:encoded>
|
||||
<slash:comments>16</slash:comments>
|
||||
</item>
|
||||
<item>
|
||||
<title>NBN: Der Top-Nutzer verwendet 24 TByte im Monat</title>
|
||||
<link>https://www.golem.de/news/nbn-der-top-nutzer-verwendet-24-tbyte-im-monat-1810-137324-rss.html</link>
|
||||
<description> Ein staatliches FTTH-Netzwerk für fast alle in Australien bis 2017 war einst das Ziel. Doch davon ist beim (NBN) National Broadband Network nicht mehr viel übrig geblieben. In Berlin wurde eine Zwischenbilanz gezogen. (<a href="https://www.golem.de/specials/festnetz/">Festnetz</a>, <a href="https://www.golem.de/specials/dsl/">DSL</a>) <img src="https://cpx.golem.de/cpx.php?class=17&amp;aid=137324&amp;page=1&amp;ts=1540471380" alt="" width="1" height="1" /></description>
|
||||
<comments>https://forum.golem.de/kommentare/internet/nbn-der-top-nutzer-verwendet-24-tbyte-im-monat/121542,list.html</comments>
|
||||
<pubDate>Thu, 25 Oct 2018 13:43:00 +0100</pubDate>
|
||||
<guid>https://www.golem.de/1810/137324-rss.html</guid>
|
||||
<content:encoded><![CDATA[<img src="https://www.golem.de/1810/137324-177387-177384_rc.jpg" width="140" height="140" vspace="3" hspace="8" align="left"> Ein staatliches FTTH-Netzwerk für fast alle in Australien bis 2017 war einst das Ziel. Doch davon ist beim (NBN) National Broadband Network nicht mehr viel übrig geblieben. In Berlin wurde eine Zwischenbilanz gezogen. (<a href="https://www.golem.de/specials/festnetz/">Festnetz</a>, <a href="https://www.golem.de/specials/dsl/">DSL</a>) <img src="https://cpx.golem.de/cpx.php?class=17&aid=137324&page=1&ts=1540471380" alt="" width="1" height="1" />]]></content:encoded>
|
||||
<slash:comments>51</slash:comments>
|
||||
</item>
|
||||
<item>
|
||||
<title>Linux-Kernel: Mit Machine Learning auf der Suche nach Bug-Fixes</title>
|
||||
<link>https://www.golem.de/news/linux-kernel-mit-machine-learning-auf-der-suche-nach-bug-fixes-1810-137321-rss.html</link>
|
||||
<description>Wichtige Patches, die in stabilen Kernel-Versionen landen sollten, werden von der Linux-Community oft vergessen oder übersehen. Abhilfe schaffen soll offenbar Machine Learning, wie die Entwickler Sasha Levin und Julia Lawall erklären. (<a href="https://www.golem.de/specials/linux-kernel/">Linux-Kernel</a>, <a href="https://www.golem.de/specials/linux/">Linux</a>) <img src="https://cpx.golem.de/cpx.php?class=17&amp;aid=137321&amp;page=1&amp;ts=1540468800" alt="" width="1" height="1" /></description>
|
||||
<comments>https://forum.golem.de/kommentare/security/linux-kernel-mit-machine-learning-auf-der-suche-nach-bug-fixes/121541,list.html</comments>
|
||||
<pubDate>Thu, 25 Oct 2018 13:00:00 +0100</pubDate>
|
||||
<guid>https://www.golem.de/1810/137321-rss.html</guid>
|
||||
<content:encoded><![CDATA[<img src="https://www.golem.de/1810/137321-177375-177372_rc.jpg" width="140" height="140" vspace="3" hspace="8" align="left">Wichtige Patches, die in stabilen Kernel-Versionen landen sollten, werden von der Linux-Community oft vergessen oder übersehen. Abhilfe schaffen soll offenbar Machine Learning, wie die Entwickler Sasha Levin und Julia Lawall erklären. (<a href="https://www.golem.de/specials/linux-kernel/">Linux-Kernel</a>, <a href="https://www.golem.de/specials/linux/">Linux</a>) <img src="https://cpx.golem.de/cpx.php?class=17&aid=137321&page=1&ts=1540468800" alt="" width="1" height="1" />]]></content:encoded>
|
||||
<slash:comments>4</slash:comments>
|
||||
</item>
|
||||
<item>
|
||||
<title>Quartalszahlen: AMDs Aktie gibt wegen mäßiger Aussichten nach</title>
|
||||
<link>https://www.golem.de/news/quartalszahlen-amds-aktie-gibt-wegen-maessiger-aussichten-nach-1810-137320-rss.html</link>
|
||||
<description>Im dritten Quartal 2018 konnte AMD zwar Umsatz und Gewinn steigern, aber nicht so stark wie erwartet. Die Aktie brach dennoch von über 25 US-Dollar auf 17 US-Dollar ein, da das vierte Quartal schlechter laufen wird als von den Anlegern gedacht - hier wurde zu viel erwartet. (<a href="https://www.golem.de/specials/amd/">AMD</a>, <a href="https://www.golem.de/specials/cpu/">Prozessor</a>) <img src="https://cpx.golem.de/cpx.php?class=17&amp;aid=137320&amp;page=1&amp;ts=1540467600" alt="" width="1" height="1" /></description>
|
||||
<comments>https://forum.golem.de/kommentare/wirtschaft/quartalszahlen-amds-aktie-gibt-wegen-maessiger-aussichten-nach/121540,list.html</comments>
|
||||
<pubDate>Thu, 25 Oct 2018 12:40:00 +0100</pubDate>
|
||||
<guid>https://www.golem.de/1810/137320-rss.html</guid>
|
||||
<content:encoded><![CDATA[<img src="https://www.golem.de/1810/137320-177379-177376_rc.jpg" width="140" height="140" vspace="3" hspace="8" align="left">Im dritten Quartal 2018 konnte AMD zwar Umsatz und Gewinn steigern, aber nicht so stark wie erwartet. Die Aktie brach dennoch von über 25 US-Dollar auf 17 US-Dollar ein, da das vierte Quartal schlechter laufen wird als von den Anlegern gedacht - hier wurde zu viel erwartet. (<a href="https://www.golem.de/specials/amd/">AMD</a>, <a href="https://www.golem.de/specials/cpu/">Prozessor</a>) <img src="https://cpx.golem.de/cpx.php?class=17&aid=137320&page=1&ts=1540467600" alt="" width="1" height="1" />]]></content:encoded>
|
||||
<slash:comments>17</slash:comments>
|
||||
</item>
|
||||
<item>
|
||||
<title>Projekt Broadband: Bahn will 3,5 Milliarden Euro vom Bund für Glasfasernetz</title>
|
||||
<link>https://www.golem.de/news/projekt-broadband-bahn-will-3-5-milliarden-euro-vom-bund-fuer-glasfasernetz-1810-137322-rss.html</link>
|
||||
<description>Die Pläne für das eigene Glasfasernetz der Deutschen Bahn werden konkret. Über die Finanzierung redet man jetzt mit der Regierung. Das Netz soll schnell gebaut werden. (<a href="https://www.golem.de/specials/deutsche-bahn/">Deutsche Bahn</a>, <a href="https://www.golem.de/specials/umts/">UMTS</a>) <img src="https://cpx.golem.de/cpx.php?class=17&amp;aid=137322&amp;page=1&amp;ts=1540466580" alt="" width="1" height="1" /></description>
|
||||
<comments>https://forum.golem.de/kommentare/internet/projekt-broadband-bahn-will-3-5-milliarden-euro-vom-bund-fuer-glasfasernetz/121539,list.html</comments>
|
||||
<pubDate>Thu, 25 Oct 2018 12:23:00 +0100</pubDate>
|
||||
<guid>https://www.golem.de/1810/137322-rss.html</guid>
|
||||
<content:encoded><![CDATA[<img src="https://www.golem.de/1808/136062-171340-171337_rc.jpg" width="140" height="140" vspace="3" hspace="8" align="left">Die Pläne für das eigene Glasfasernetz der Deutschen Bahn werden konkret. Über die Finanzierung redet man jetzt mit der Regierung. Das Netz soll schnell gebaut werden. (<a href="https://www.golem.de/specials/deutsche-bahn/">Deutsche Bahn</a>, <a href="https://www.golem.de/specials/umts/">UMTS</a>) <img src="https://cpx.golem.de/cpx.php?class=17&aid=137322&page=1&ts=1540466580" alt="" width="1" height="1" />]]></content:encoded>
|
||||
<slash:comments>31</slash:comments>
|
||||
</item>
|
||||
<item>
|
||||
<title>Red Dead Redemption 2 im Test: Der Revolverhelden-Simulator</title>
|
||||
<link>https://www.golem.de/news/red-dead-redemption-2-im-test-der-revolverhelden-simulator-1810-137304-rss.html</link>
|
||||
<description>Reiten, prügeln, kochen, jagen, schießen, böse sein oder (relativ) brav: In Red Dead Redemption 2 gibt es enorme Möglichkeiten, sich als Revolverheld in einer wunderschönen Westernwelt auszuleben. Das Actionspiel von Rockstar Games ist ein großer Spaß - aber nicht ganz so gut wie GTA 5. Von Peter Steinlechner (<a href="https://www.golem.de/specials/red-dead-redemption-2/">Red Dead Redemption 2</a>, <a href="https://www.golem.de/specials/spieletest/">Spieletest</a>) <img src="https://cpx.golem.de/cpx.php?class=17&amp;aid=137304&amp;page=1&amp;ts=1540465260" alt="" width="1" height="1" /></description>
|
||||
<comments>https://forum.golem.de/kommentare/games/red-dead-redemption-2-im-test-der-revolverhelden-simulator/121537,list.html</comments>
|
||||
<pubDate>Thu, 25 Oct 2018 12:01:00 +0100</pubDate>
|
||||
<guid>https://www.golem.de/1810/137304-rss.html</guid>
|
||||
<content:encoded><![CDATA[<img src="https://www.golem.de/1810/137304-177303-177300_rc.jpg" width="140" height="140" vspace="3" hspace="8" align="left">Reiten, prügeln, kochen, jagen, schießen, böse sein oder (relativ) brav: In Red Dead Redemption 2 gibt es enorme Möglichkeiten, sich als Revolverheld in einer wunderschönen Westernwelt auszuleben. Das Actionspiel von Rockstar Games ist ein großer Spaß - aber nicht ganz so gut wie GTA 5. Von Peter Steinlechner (<a href="https://www.golem.de/specials/red-dead-redemption-2/">Red Dead Redemption 2</a>, <a href="https://www.golem.de/specials/spieletest/">Spieletest</a>) <img src="https://cpx.golem.de/cpx.php?class=17&aid=137304&page=1&ts=1540465260" alt="" width="1" height="1" />]]></content:encoded>
|
||||
<slash:comments>107</slash:comments>
|
||||
</item>
|
||||
<item>
|
||||
<title>Xiaomi: Das Mi Mix 3 hat keine Notch und eine versteckte Frontkamera</title>
|
||||
<link>https://www.golem.de/news/xiaomi-das-mi-mix-3-hat-keine-notch-und-eine-versteckte-frontkamera-1810-137319-rss.html</link>
|
||||
<description>Das Display von Xiaomis angekündigtem Smartphone Mi Mix 3 ist nahezu randlos. Die Frontkamera versteckt das Gerät hinter der aufschiebbaren Schale. Neu ist zudem, dass Xiaomi ein OLED-Panel verbaut und wieder den Qi-Ladestandard nutzt. (<a href="https://www.golem.de/specials/xiaomi/">Xiaomi</a>, <a href="https://www.golem.de/specials/smartphone/">Smartphone</a>) <img src="https://cpx.golem.de/cpx.php?class=17&amp;aid=137319&amp;page=1&amp;ts=1540464540" alt="" width="1" height="1" /></description>
|
||||
<comments>https://forum.golem.de/kommentare/handy/xiaomi-das-mi-mix-3-hat-keine-notch-und-eine-versteckte-frontkamera/121536,list.html</comments>
|
||||
<pubDate>Thu, 25 Oct 2018 11:49:00 +0100</pubDate>
|
||||
<guid>https://www.golem.de/1810/137319-rss.html</guid>
|
||||
<content:encoded><![CDATA[<img src="https://www.golem.de/1810/137319-177371-177370_rc.jpg" width="140" height="140" vspace="3" hspace="8" align="left">Das Display von Xiaomis angekündigtem Smartphone Mi Mix 3 ist nahezu randlos. Die Frontkamera versteckt das Gerät hinter der aufschiebbaren Schale. Neu ist zudem, dass Xiaomi ein OLED-Panel verbaut und wieder den Qi-Ladestandard nutzt. (<a href="https://www.golem.de/specials/xiaomi/">Xiaomi</a>, <a href="https://www.golem.de/specials/smartphone/">Smartphone</a>) <img src="https://cpx.golem.de/cpx.php?class=17&aid=137319&page=1&ts=1540464540" alt="" width="1" height="1" />]]></content:encoded>
|
||||
<slash:comments>125</slash:comments>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>
|
359
internal/reader/parser/testdata/encoding_WINDOWS-1251.xml
vendored
Normal file
359
internal/reader/parser/testdata/encoding_WINDOWS-1251.xml
vendored
Normal file
|
@ -0,0 +1,359 @@
|
|||
<?xml version="1.0" encoding="windows-1251"?>
|
||||
<rss version="2.0">
|
||||
<channel>
|
||||
<title>iBash.Org.Ru</title>
|
||||
<link>http://ibash.org.ru/</link>
|
||||
<description>Новый цитатник Рунета</description>
|
||||
<language>ru</language>
|
||||
<item>
|
||||
<guid>http://ibash.org.ru/quote.php?id=17703</guid>
|
||||
<link>http://ibash.org.ru/quote.php?id=17703</link>
|
||||
<title>Цитата #17703</title>
|
||||
<pubDate>Wed, 21 Mar 2018 10:27:32 +0300</pubDate>
|
||||
<description><![CDATA[xxx: есть у кого жаба программер <br />xxx: кот немного пхп знает? <br />yyy: то что кот PHP немного знает, я бы ещё может поверил <br />yyy: но вот жаба-программер - это ты по-моему загнул]]></description>
|
||||
</item>
|
||||
<item>
|
||||
<guid>http://ibash.org.ru/quote.php?id=17705</guid>
|
||||
<link>http://ibash.org.ru/quote.php?id=17705</link>
|
||||
<title>Цитата #17705</title>
|
||||
<pubDate>Wed, 21 Mar 2018 10:27:22 +0300</pubDate>
|
||||
<description><![CDATA[ххх: 1С изначально проектировалась для небольшого количества пользователей, поэтому оператору предоставлялась бо'льшая свобода действий. <br />ууу: Я склоняюсь к версии что 1С изначально вообще не проектировалась, а сразу писалась.]]></description>
|
||||
</item>
|
||||
<item>
|
||||
<guid>http://ibash.org.ru/quote.php?id=17707</guid>
|
||||
<link>http://ibash.org.ru/quote.php?id=17707</link>
|
||||
<title>Цитата #17707</title>
|
||||
<pubDate>Wed, 21 Mar 2018 10:26:48 +0300</pubDate>
|
||||
<description><![CDATA[xxx: У меня сейчас такое странное чувство <br />xxx: Вот представь, что ты летишь на самолете. И вдруг ты узнаешь, что двигатель прикреплен к турбине резинкой от трусов <br />xxx: Вот я сейчас прочитал доки ACPI и очень похожее чувство возникает <br />yyy: Гы. А что там? <br />xxx: Там описание бинаря в BNF. И в нем циклы. <br />xxx: А еще виндовом парсере бага. А производители железа пишут ACPI для своего железа по принципу "на винде работает, значит сойдет" <br />xxx: Итого, 60% таблиц не соответствуют стандарту <br />yyy: Тогда это тебе только кажется, что это резинка от трусов. Это резинка от трусов только по документации и внешнему виду. На самом деле это рисунок резинки от трусов, напечатанный на туалетной бумаге]]></description>
|
||||
</item>
|
||||
<item>
|
||||
<guid>http://ibash.org.ru/quote.php?id=17698</guid>
|
||||
<link>http://ibash.org.ru/quote.php?id=17698</link>
|
||||
<title>Цитата #17698</title>
|
||||
<pubDate>Thu, 15 Mar 2018 10:15:42 +0300</pubDate>
|
||||
<description><![CDATA[(о DRM, защите от копирования) <br />— Диссоциативное Расстройство Меркантильности — очень частое психическое расстройство внутренней жабы, встречающееся у разработчиков игр, сопровождаемое сильными приступами паранойи. В восприятии больного его игра, будучи установленной на несколько устройств, как бы расщепляется на разные игры, за которые, по его мнению, должно быть заплачено отдельно. При этом больной становится одержим навязчивой идеей, что все его обворовывают. В большинстве случаев, это сопровождается визуальными галлюцинациями: больному мерещатся некие «пираты». Когда болезнь достигает критической стадии пациент начинает оберегать «свою прелесть» с таким усердием, что поиграть в нее становится затруднительно даже тем, кто честно за все заплатил.]]></description>
|
||||
</item>
|
||||
<item>
|
||||
<guid>http://ibash.org.ru/quote.php?id=17714</guid>
|
||||
<link>http://ibash.org.ru/quote.php?id=17714</link>
|
||||
<title>Цитата #17714</title>
|
||||
<pubDate>Thu, 15 Mar 2018 10:14:00 +0300</pubDate>
|
||||
<description><![CDATA[Новость: в споре, что лучше - AMD или nVidia - один программист зарубил другого топором. <br />Лучший камент: Радеон Раскольников.]]></description>
|
||||
</item>
|
||||
<item>
|
||||
<guid>http://ibash.org.ru/quote.php?id=17715</guid>
|
||||
<link>http://ibash.org.ru/quote.php?id=17715</link>
|
||||
<title>Цитата #17715</title>
|
||||
<pubDate>Thu, 15 Mar 2018 10:13:35 +0300</pubDate>
|
||||
<description><![CDATA[xxx: есть у кого жаба программер <br />xxx: кот немного пхп знает? <br />yyy: то что кот PHP немного знает, я бы ещё может поверил <br />yyy: но вот жаба-программер - это ты по-моему загнул]]></description>
|
||||
</item>
|
||||
<item>
|
||||
<guid>http://ibash.org.ru/quote.php?id=17722</guid>
|
||||
<link>http://ibash.org.ru/quote.php?id=17722</link>
|
||||
<title>Цитата #17722</title>
|
||||
<pubDate>Thu, 15 Mar 2018 10:13:03 +0300</pubDate>
|
||||
<description><![CDATA[xxx: из машинного перевода инструкции к бытовой технике: "с 1 по 20 мая пройдут выборы ценностей в ранге" (values within range 1 to 20 may be selected)]]></description>
|
||||
</item>
|
||||
<item>
|
||||
<guid>http://ibash.org.ru/quote.php?id=17475</guid>
|
||||
<link>http://ibash.org.ru/quote.php?id=17475</link>
|
||||
<title>Цитата #17475</title>
|
||||
<pubDate>Thu, 15 Mar 2018 10:05:41 +0300</pubDate>
|
||||
<description><![CDATA[<L29Ah> [[ clang++ == *g++ ]] && echo yay <br /><L29Ah> yay <br /><Minoru> «*g++»? Указатели в моём шелле?]]></description>
|
||||
</item>
|
||||
<item>
|
||||
<guid>http://ibash.org.ru/quote.php?id=17430</guid>
|
||||
<link>http://ibash.org.ru/quote.php?id=17430</link>
|
||||
<title>Цитата #17430</title>
|
||||
<pubDate>Thu, 15 Mar 2018 09:59:50 +0300</pubDate>
|
||||
<description><![CDATA[xxx: Все програмисты поcле смерти в аду варятся в говнокоде :) <br />yyy: нет, в говнокоде мы варимся уже при жизни, в аду мы его рефакторим]]></description>
|
||||
</item>
|
||||
<item>
|
||||
<guid>http://ibash.org.ru/quote.php?id=17419</guid>
|
||||
<link>http://ibash.org.ru/quote.php?id=17419</link>
|
||||
<title>Цитата #17419</title>
|
||||
<pubDate>Thu, 15 Mar 2018 09:59:01 +0300</pubDate>
|
||||
<description><![CDATA["OpenNET: QEMU/KVM и Xen подвержены уязвимости в коде эмуляции VGA" <br /> <br />xx: proxmox на форуме написали что у них падает windows в среде эмуляции после патчей, ппц. <br />yy: Правильный патч. Многоцелевой.]]></description>
|
||||
</item>
|
||||
<item>
|
||||
<guid>http://ibash.org.ru/quote.php?id=17414</guid>
|
||||
<link>http://ibash.org.ru/quote.php?id=17414</link>
|
||||
<title>Цитата #17414</title>
|
||||
<pubDate>Thu, 15 Mar 2018 09:58:30 +0300</pubDate>
|
||||
<description><![CDATA[xxx: 600 Гб логов? У меня >3 ТБ было. Дальше также место кончилось. Причина: случайно запись в лог внутри цикла написал, вместо вне него. Странно, что оно вообще работало.]]></description>
|
||||
</item>
|
||||
<item>
|
||||
<guid>http://ibash.org.ru/quote.php?id=17396</guid>
|
||||
<link>http://ibash.org.ru/quote.php?id=17396</link>
|
||||
<title>Цитата #17396</title>
|
||||
<pubDate>Thu, 15 Mar 2018 09:56:44 +0300</pubDate>
|
||||
<description><![CDATA[в офисе Apple: <br />- Может сделаем новый дизайн? <br />- Та не, мы меняли его уже пару лет назад, у кого еще идеи? <br />- А давайте исправим глюки на старых девайсах? <br />- Зачем? пусть покупают новые! <br />- Давайте добавим новые Emoji? <br />- Гениально! Так и сделаем!]]></description>
|
||||
</item>
|
||||
<item>
|
||||
<guid>http://ibash.org.ru/quote.php?id=17394</guid>
|
||||
<link>http://ibash.org.ru/quote.php?id=17394</link>
|
||||
<title>Цитата #17394</title>
|
||||
<pubDate>Thu, 15 Mar 2018 09:56:28 +0300</pubDate>
|
||||
<description><![CDATA[xxx: Саша, речь не о том, что теоретически возможно прочесть произведения классики, и даже есть те, кто прочитывает. Мануал администратора фриБЗДи Ты тоже, наверно, одолел, и возможно, что на одном дыхании - Ты станешь на этом основании утверждать, что рекомый мануал есть выдающийся памятник словесности, написан легко, увлекательно и очень душевно? <br /> <br />yyy: Мануал администратора фрибсд, сравнительно с оракловой документацией, это выдающийся, мать его так, памятник словесности. Написан легко, увлекательно, и очень душевно.]]></description>
|
||||
</item>
|
||||
<item>
|
||||
<guid>http://ibash.org.ru/quote.php?id=17386</guid>
|
||||
<link>http://ibash.org.ru/quote.php?id=17386</link>
|
||||
<title>Цитата #17386</title>
|
||||
<pubDate>Thu, 15 Mar 2018 09:55:30 +0300</pubDate>
|
||||
<description><![CDATA[> Компания Mail.Ru <...> будет предоставлять услуги коммерческой поддержки решений на базе свободной СУБД Tarantool <br /> <br />SELECT * FROM cars; <br /> <br />+------+--------------------+ <br />| id | name | <br />+------+--------------------+ <br />| 1 | "Mazda CX-3" | <br />| 2 | "Audi Q1" | <br />| 3 | "BMW X1" | <br />| 4 | "Mazda CX-5" | <br />| 5 | "Cadillac XT5" | <br />| NULL | "Спутник@Mail.Ru" | <br />| NULL | "Guard@Mail.ru" | <br />| NULL | "Агент@Mail.ru " | <br />| NULL | "Mail.ru Updater" | <br />+------+--------------------+]]></description>
|
||||
</item>
|
||||
<item>
|
||||
<guid>http://ibash.org.ru/quote.php?id=17381</guid>
|
||||
<link>http://ibash.org.ru/quote.php?id=17381</link>
|
||||
<title>Цитата #17381</title>
|
||||
<pubDate>Thu, 15 Mar 2018 09:54:50 +0300</pubDate>
|
||||
<description><![CDATA[Дело в том, что любые новые фичи C++ должны пройти через комитет трёх фанатиков. <br /> <br />Первый фанатик обожает исключения. Всё собрание этот ворчливый старик кажется спящим, но в самый неудобный момент вскакаивает и перебивает говорящего криком «и тут мы бросаем исключение!» После этого конечно всё ломается. Этот старик мало кому нравится, но все вынуждены его терпеть. <br /> <br />Второй фанатик обожает шаблоны. Любую фичу он нежно оборачивает в шаблоны, которые заворачивает в шаблоны, которые заворачивает в шаблоны… пока она опять не сломается. В свободное время пытается написать программу по вычислению смысла жизни и вообще на этапе компиляции. <br /> <br />Третий фанатик, самый молодой, обожает всё параллельное. В отличие от других, он не критикует сразу. Он с энтузиазмом хватается за предложенную фичу, сразу переводит его в параллельность, убеждается что всё ломается и со вздохом «в наш век параллельного программирования так делать нельзя» отправляет фичу в корзину. Говорят, у него множество личностей, которые друг перебивают часто друга.]]></description>
|
||||
</item>
|
||||
<item>
|
||||
<guid>http://ibash.org.ru/quote.php?id=17375</guid>
|
||||
<link>http://ibash.org.ru/quote.php?id=17375</link>
|
||||
<title>Цитата #17375</title>
|
||||
<pubDate>Thu, 15 Mar 2018 09:53:54 +0300</pubDate>
|
||||
<description><![CDATA[клиент: Непингуется хост serv29. Посмотрите что с ним. <br />админ: Посмотрел на него, пинг появился. <br />клиент: А что с ним было? <br />админ: хз. я только посмотел. Могу объяснить это приниципом неопределенности: наблюдение влияет на результат.]]></description>
|
||||
</item>
|
||||
<item>
|
||||
<guid>http://ibash.org.ru/quote.php?id=17371</guid>
|
||||
<link>http://ibash.org.ru/quote.php?id=17371</link>
|
||||
<title>Цитата #17371</title>
|
||||
<pubDate>Thu, 15 Mar 2018 09:53:46 +0300</pubDate>
|
||||
<description><![CDATA[это гениально - вырядиться в костюм пингвина и ходить по улице, приставая к людям с вопросом: "не хотите ли вы поговорить о линуксе?" После чего вручать брошюрки про gentoo... хД]]></description>
|
||||
</item>
|
||||
<item>
|
||||
<guid>http://ibash.org.ru/quote.php?id=17370</guid>
|
||||
<link>http://ibash.org.ru/quote.php?id=17370</link>
|
||||
<title>Цитата #17370</title>
|
||||
<pubDate>Thu, 15 Mar 2018 09:53:28 +0300</pubDate>
|
||||
<description><![CDATA[Обсуждение российского моноблока "Таволга": <br />zzz: "Как нам пояснили в компании "???", "не хотелось придумывать очередное безликое латинизированное название, обычно ассоциирующееся с IT, или аббревиатуру. Нам хотелось, чтобы название было узнаваемо русским, а не псевдо-западным, мелодичным и при этом не банальным – вот так из множества вариантов выбрали Таволгу"." <br /> <br />скоро так начнут и кодить на кирили...ах, ну да <br /> <br />xxx: Процессор Intel Core i5-5287U <br /> <br />yyy: Таволга, или Лаба&#769;зник (лат. Filip&#233;ndula) — род многолетних трав семейства Розовые (Rosaceae). Насчитывает 10—13 видов[3], произрастающих в умеренной зоне Северного полушария. <br />Садовое применение: <br />Великолепно отпугивает мух, комаров, слепней. <br /> <br />Походу попали в точку <br /> <br />zzz: Так ребята там здоровенный фумигатор на пятом коре запилили]]></description>
|
||||
</item>
|
||||
<item>
|
||||
<guid>http://ibash.org.ru/quote.php?id=17369</guid>
|
||||
<link>http://ibash.org.ru/quote.php?id=17369</link>
|
||||
<title>Цитата #17369</title>
|
||||
<pubDate>Thu, 15 Mar 2018 09:52:38 +0300</pubDate>
|
||||
<description><![CDATA[Grother: все знают много историй про то, как шампунь или ещё какая хрень в ванной путалась с её кремом для депиляции. Но мало кому известны истории о том, как она мазала прыщи из симпатичного маленького тюбика с непонятным названием Pasta silikonova termoprzewodzaca.]]></description>
|
||||
</item>
|
||||
<item>
|
||||
<guid>http://ibash.org.ru/quote.php?id=17367</guid>
|
||||
<link>http://ibash.org.ru/quote.php?id=17367</link>
|
||||
<title>Цитата #17367</title>
|
||||
<pubDate>Thu, 15 Mar 2018 09:52:20 +0300</pubDate>
|
||||
<description><![CDATA[xxx: Свеже-родившийся анекдот - сколько нужно айтишников чтобы переткнуть сервер в другую подсеть? <br />xxx: Ответ: пять. Два тестировщика, два инжнера и админ. <br />xxx: Тестировщикам нужно но они не знают, инженеры знают но им нельзя. А админ пустил всех в серверную.]]></description>
|
||||
</item>
|
||||
<item>
|
||||
<guid>http://ibash.org.ru/quote.php?id=17363</guid>
|
||||
<link>http://ibash.org.ru/quote.php?id=17363</link>
|
||||
<title>Цитата #17363</title>
|
||||
<pubDate>Thu, 15 Mar 2018 09:51:55 +0300</pubDate>
|
||||
<description><![CDATA[Программисту по багам программы: <br />1.При выборе даты постоянно вылетает необрабатываемое исключение. <br />2.Если нажать на кнопку "фильтр" пока идёт создание фильтра - возникает необрабатываемое исключение. <br />Ответ: <br />П.1 Это ошибка не программы а её окружения, т.е. пофикси винду. <br />П.2 куда ты торопишься?]]></description>
|
||||
</item>
|
||||
<item>
|
||||
<guid>http://ibash.org.ru/quote.php?id=17359</guid>
|
||||
<link>http://ibash.org.ru/quote.php?id=17359</link>
|
||||
<title>Цитата #17359</title>
|
||||
<pubDate>Thu, 15 Mar 2018 09:51:10 +0300</pubDate>
|
||||
<description><![CDATA[Обсуждение ReactOS 0.4 на ЛОРе: <br /> <br />Oxdeadbeef: Оно может в x86_64? <br /> <br />Jedi-to-be: Может, но пока нет.]]></description>
|
||||
</item>
|
||||
<item>
|
||||
<guid>http://ibash.org.ru/quote.php?id=17358</guid>
|
||||
<link>http://ibash.org.ru/quote.php?id=17358</link>
|
||||
<title>Цитата #17358</title>
|
||||
<pubDate>Thu, 15 Mar 2018 09:51:04 +0300</pubDate>
|
||||
<description><![CDATA[<dsmirnov> на супермикро и в отвратительных шкафах ..... а вы лабутены, лабутены ....]]></description>
|
||||
</item>
|
||||
<item>
|
||||
<guid>http://ibash.org.ru/quote.php?id=17357</guid>
|
||||
<link>http://ibash.org.ru/quote.php?id=17357</link>
|
||||
<title>Цитата #17357</title>
|
||||
<pubDate>Thu, 15 Mar 2018 09:50:55 +0300</pubDate>
|
||||
<description><![CDATA[разраб: деплоим. 20 пендингов <br />тестер: боже, сохрани <br />разраб: не поможет, место на облаке закончилось]]></description>
|
||||
</item>
|
||||
<item>
|
||||
<guid>http://ibash.org.ru/quote.php?id=17356</guid>
|
||||
<link>http://ibash.org.ru/quote.php?id=17356</link>
|
||||
<title>Цитата #17356</title>
|
||||
<pubDate>Thu, 15 Mar 2018 09:50:51 +0300</pubDate>
|
||||
<description><![CDATA[xxx: Программно-аппаратная платформа удаленного администрирования автоматизированных систем. Сокращенно - ПАПУАС]]></description>
|
||||
</item>
|
||||
<item>
|
||||
<guid>http://ibash.org.ru/quote.php?id=17354</guid>
|
||||
<link>http://ibash.org.ru/quote.php?id=17354</link>
|
||||
<title>Цитата #17354</title>
|
||||
<pubDate>Thu, 15 Mar 2018 09:50:41 +0300</pubDate>
|
||||
<description><![CDATA[Nick> Что вы вообще понимаете в тормозных серверах <br />Nick> Мой пишет “System information disabled due to load higher than 1.0” <br />Nick> Вот только при этом вилка локалки рядом на столе лежит, вынутая из сетевухи <br />Nick> Для селерона 400, в который через PCI-адаптер SATA воткнут терабайтник… <br />Nick> да для него просто дышать — и то уже физкультура]]></description>
|
||||
</item>
|
||||
<item>
|
||||
<guid>http://ibash.org.ru/quote.php?id=17353</guid>
|
||||
<link>http://ibash.org.ru/quote.php?id=17353</link>
|
||||
<title>Цитата #17353</title>
|
||||
<pubDate>Thu, 15 Mar 2018 09:49:52 +0300</pubDate>
|
||||
<description><![CDATA[Мой мир никогда не станет прежним. Сегодня я узнал, что RoHS это не название китайской фирмы по выпуску электронных компонентов, а директива по содержанию в них вредных веществ]]></description>
|
||||
</item>
|
||||
<item>
|
||||
<guid>http://ibash.org.ru/quote.php?id=17316</guid>
|
||||
<link>http://ibash.org.ru/quote.php?id=17316</link>
|
||||
<title>Цитата #17316</title>
|
||||
<pubDate>Thu, 15 Mar 2018 09:47:42 +0300</pubDate>
|
||||
<description><![CDATA[Елена: Дали строителям проект, они строили-строили и наконец построили. Приезжает заказчик. В грунте выкопана цилиндрическая яма метров 15 в глубину. На дне сияет прожектор. Заказчик переворачивает чертёж на 180 градусов и говорит: “Всё хорошо, но по проекту здесь должен был быть МАЯК”. <br />Dmitry: Боженьки мои ))))) <br />Елена: немножко похоже на историю из жизни программистов <br />Dmitry: ага ) только программист может потом решить, что будет легче все корабли сделать подземными, чем переделывать маяк.]]></description>
|
||||
</item>
|
||||
<item>
|
||||
<guid>http://ibash.org.ru/quote.php?id=17314</guid>
|
||||
<link>http://ibash.org.ru/quote.php?id=17314</link>
|
||||
<title>Цитата #17314</title>
|
||||
<pubDate>Thu, 15 Mar 2018 09:46:52 +0300</pubDate>
|
||||
<description><![CDATA[Kikimorra: Изучаю джаваскрипт на онлайн-курсах. Чувак у доски рассказывает, как удалять ноды. Приводимая в пример веб-страница выглядит так: <br /> <br />It's a nice day! <br />и кнопка <br />Delete all children! <br /> <br />Прям кодишь и чувствуешь, как рога с хвостом прорастают >.<]]></description>
|
||||
</item>
|
||||
<item>
|
||||
<guid>http://ibash.org.ru/quote.php?id=17312</guid>
|
||||
<link>http://ibash.org.ru/quote.php?id=17312</link>
|
||||
<title>Цитата #17312</title>
|
||||
<pubDate>Thu, 15 Mar 2018 09:46:33 +0300</pubDate>
|
||||
<description><![CDATA[xxx:после апдейта винда взяла и переставила панель задач слева обратно вниз <br />xxx:мол, не выёбывайся <br />ххх:тебе не убунта]]></description>
|
||||
</item>
|
||||
<item>
|
||||
<guid>http://ibash.org.ru/quote.php?id=17306</guid>
|
||||
<link>http://ibash.org.ru/quote.php?id=17306</link>
|
||||
<title>Цитата #17306</title>
|
||||
<pubDate>Thu, 15 Mar 2018 09:46:21 +0300</pubDate>
|
||||
<description><![CDATA[aaa: Есть же специальные для тачскринов, там кончики пальцев сделаны из проводящего материала. <br />bbb: Оо, как они называются? А то я видел только вязаные igloves, которые рвутся через месяц использования, И в мороз в них не походишь. <br />ccc: Купите металлизированные нитки и сделайте несколько стежков под подушечками пальцев. Начинайте прошивать изнутри оставив конец нитки после узелка подлиннее, чтобы обеспечить лучшую проводимость. <br />ddd: — Как ты работаешь с айпадом в перчатках? <br />— Я их перепрошил.]]></description>
|
||||
</item>
|
||||
<item>
|
||||
<guid>http://ibash.org.ru/quote.php?id=17304</guid>
|
||||
<link>http://ibash.org.ru/quote.php?id=17304</link>
|
||||
<title>Цитата #17304</title>
|
||||
<pubDate>Thu, 15 Mar 2018 09:45:51 +0300</pubDate>
|
||||
<description><![CDATA[<> нет ничего приятнее теплого лампового диалапа... <br /><> когда жужжание фрезы сливается с звуками хэндшейка в гармоничную мелодию?]]></description>
|
||||
</item>
|
||||
<item>
|
||||
<guid>http://ibash.org.ru/quote.php?id=17297</guid>
|
||||
<link>http://ibash.org.ru/quote.php?id=17297</link>
|
||||
<title>Цитата #17297</title>
|
||||
<pubDate>Thu, 15 Mar 2018 09:44:04 +0300</pubDate>
|
||||
<description><![CDATA[[15:15:56] r@ttler: говно успешно прилеплено и оттестировано. говно оказалось говном <br />[15:16:05] ZimM: внезапно <br />[15:17:30] r@ttler: ну я пока его прилеплял мне аж привидилась картина: замок разраба говна. дорога к нему увенчана костылями. как вот копья вешали с черепами врагов, так тут костыли с черепами разрабов <br />[15:17:59] r@ttler: и табличка на воротах "оставь свой мозг, всяк сюда входящий"]]></description>
|
||||
</item>
|
||||
<item>
|
||||
<guid>http://ibash.org.ru/quote.php?id=17291</guid>
|
||||
<link>http://ibash.org.ru/quote.php?id=17291</link>
|
||||
<title>Цитата #17291</title>
|
||||
<pubDate>Thu, 15 Mar 2018 09:42:11 +0300</pubDate>
|
||||
<description><![CDATA[Если бы врачи были ИТ-шниками: <br /> <br />- У меня не работает клавиатура. <br /> <br />- Сдайте пробы на подклавиатурный сахар и лактозу, сделайте рентген и функциональный тест клавиш.... <br />[через 2 месяца и 20 тысяч рублей]...Действительно, обнаружена карамельная бляшка под пробелом. <br /> <br />- Что же делать? <br /> <br />- Вам показана консервативная терапия: ежедневно постукивайте 15 минут по перевернутой клавиатуре, затем 15 минут протирайте клавишу спиртом, потом разрабатывайте клавишу вручную. До конца срока службы вашего системного блока старайтесь как можно реже и аккуратнее пользоваться пробелом. Поставьте виртуальную клавиатуру и печатайте мышкой.]]></description>
|
||||
</item>
|
||||
<item>
|
||||
<guid>http://ibash.org.ru/quote.php?id=17285</guid>
|
||||
<link>http://ibash.org.ru/quote.php?id=17285</link>
|
||||
<title>Цитата #17285</title>
|
||||
<pubDate>Thu, 15 Mar 2018 09:40:57 +0300</pubDate>
|
||||
<description><![CDATA[ZimM: нуу... with great power comes great responsibility <br />r@ttler: great chances to shoot your own leg <br />ZimM: ну это да. другое дело, что для этого все равно нужно постараться <br />r@ttler: work hard to shoot your own leg? <br />r@ttler: ну тогда ты совсем лол <br />r@ttler: впадло было делать по-нормальному, потому помучался и таки сделал через жопу <br />ZimM: ну, я думал, что отстрелю себе фалангу мизинца, а оторвал пол-туловища, потому что пол-туловища мне показались похожими на фалангу мизинца...]]></description>
|
||||
</item>
|
||||
<item>
|
||||
<guid>http://ibash.org.ru/quote.php?id=17280</guid>
|
||||
<link>http://ibash.org.ru/quote.php?id=17280</link>
|
||||
<title>Цитата #17280</title>
|
||||
<pubDate>Thu, 15 Mar 2018 09:37:53 +0300</pubDate>
|
||||
<description><![CDATA[xxx: Вот говорят иногда "зоопарк браузеров" (операционных систем, железок и так далее), а я тут внезапно понял, что у меня самый настоящий бордель виртуальных машин. Потому что их у меня четыре, названы женскими именами, чтобы быстро различать, и я с ними трахаюсь. Причём в данный момент со всеми четырьмя одновременно, потому что делаю лабораторную по сетям.]]></description>
|
||||
</item>
|
||||
<item>
|
||||
<guid>http://ibash.org.ru/quote.php?id=17279</guid>
|
||||
<link>http://ibash.org.ru/quote.php?id=17279</link>
|
||||
<title>Цитата #17279</title>
|
||||
<pubDate>Thu, 15 Mar 2018 09:37:50 +0300</pubDate>
|
||||
<description><![CDATA[Val: США провели третье и последнее испытание новой атомной бомбы B61-12. Бомбу без заряда, в соответствии с международным договором о запрете ядерных взрывов, сбросили с истребителя F-15E на полигоне "Тонопа" в Неваде 20 октября. <br />Val: какое интересное испытание. При ударе о землю она выбросила флажок "БУМ"? <br />Кир: Отправила в твиттер "БДЫЩ!" - она жы высокотехнологичная]]></description>
|
||||
</item>
|
||||
<item>
|
||||
<guid>http://ibash.org.ru/quote.php?id=17276</guid>
|
||||
<link>http://ibash.org.ru/quote.php?id=17276</link>
|
||||
<title>Цитата #17276</title>
|
||||
<pubDate>Thu, 15 Mar 2018 09:29:42 +0300</pubDate>
|
||||
<description><![CDATA[dimgel: (ссылка на ленту.ру) "Linux.Encoder.1 — относится к классу троянцев-шифровальщиков. После запуска с правами администратора..." <br />Бгг. Ещё сто лет назад шутка ходила про линуксовые вирусы: "распакуйте меня пожалуйста и пропишите в автозапуск демоном от имени рута". <br />dimgel: Я смотрю, ни хера не меняется в этой жизни. <br />garik: на ещё надо пару патчей найти и накатить <br />garik: иначе не скомпилится <br />dimgel: обязательно <br />dimgel: причём для разных дистров патчи будут разные <br />dimgel: Народ! Как пропатчить Linux.Encoder.1 под FreeBSD?!]]></description>
|
||||
</item>
|
||||
<item>
|
||||
<guid>http://ibash.org.ru/quote.php?id=17272</guid>
|
||||
<link>http://ibash.org.ru/quote.php?id=17272</link>
|
||||
<title>Цитата #17272</title>
|
||||
<pubDate>Thu, 15 Mar 2018 09:28:44 +0300</pubDate>
|
||||
<description><![CDATA[xxx: когда читаю такие ограничения дурацкие, хочется спросить, каким местом пишутся драйвера <br />yyy: Но тут хоть плюс, что он сам не падает, а ошибку только выдаёт. <br />xxx: андроид разработчик... <br />xxx: ну хорошо хоть не падает вместе с системой!! <br />xxx: а то что всё в говне и не работает это мелочи <br />xxx: дальше будет "ну хоть не сносит систему" <br />xxx: "ну хоть не выжигает гпу" <br />yyy: "ну, хоть не выжигает глаза" <br />xxx: "ну хоть не подключается к скайнету и не выжигает поверхность планеты" <br />yyy: "ну, хотя бы не уничтожает вселенную" <br />xxx: вот видишь, повезло-то как!]]></description>
|
||||
</item>
|
||||
<item>
|
||||
<guid>http://ibash.org.ru/quote.php?id=17259</guid>
|
||||
<link>http://ibash.org.ru/quote.php?id=17259</link>
|
||||
<title>Цитата #17259</title>
|
||||
<pubDate>Thu, 15 Mar 2018 09:25:43 +0300</pubDate>
|
||||
<description><![CDATA[обсуждение странного результата трейсроута <br />[19:27:36] ZimM: пробил по геоип. реально Europe, но если по координатам глянуть - то швейцария <br />[19:28:01] r@ttler: и пробей соседний. реально сша? <br />[19:28:30] ZimM: реально сша <br />[19:28:49] r@ttler: ну значит лол <br />[19:29:01] r@ttler: сто раз туда-сюда по трансатлантике? <br />[19:29:08] ZimM: ну а хуле <br />[19:29:14] r@ttler: вокруг света за 80 хопов?]]></description>
|
||||
</item>
|
||||
<item>
|
||||
<guid>http://ibash.org.ru/quote.php?id=17236</guid>
|
||||
<link>http://ibash.org.ru/quote.php?id=17236</link>
|
||||
<title>Цитата #17236</title>
|
||||
<pubDate>Thu, 15 Mar 2018 09:23:51 +0300</pubDate>
|
||||
<description><![CDATA[xxx: "Как перестать юзать чужой код и научиться прогать самостоятельно", новый бестселлер Алан Карра]]></description>
|
||||
</item>
|
||||
<item>
|
||||
<guid>http://ibash.org.ru/quote.php?id=17235</guid>
|
||||
<link>http://ibash.org.ru/quote.php?id=17235</link>
|
||||
<title>Цитата #17235</title>
|
||||
<pubDate>Thu, 15 Mar 2018 09:23:49 +0300</pubDate>
|
||||
<description><![CDATA[xx: Флеш умер, google его выпилит скоро :) <br /> <br />yy: Джобс тоже так говорил) <br /> <br />zz: Джобса уже бог (или ктулху, или матрица - кому что нравится) выпилил, а флэш еще барахтается <br /> <br />tt: Ну разве не ясно, что это флеш его и выпилил?]]></description>
|
||||
</item>
|
||||
<item>
|
||||
<guid>http://ibash.org.ru/quote.php?id=17233</guid>
|
||||
<link>http://ibash.org.ru/quote.php?id=17233</link>
|
||||
<title>Цитата #17233</title>
|
||||
<pubDate>Thu, 15 Mar 2018 09:23:33 +0300</pubDate>
|
||||
<description><![CDATA[К новости "Отечественный защищённый Linux-дистрибутив Заря готов к внедрению": <br /> <br />xxx: Теперь будет сборка-разборка не только автомата, но и ядра. Норматив 3 минуты.]]></description>
|
||||
</item>
|
||||
<item>
|
||||
<guid>http://ibash.org.ru/quote.php?id=17230</guid>
|
||||
<link>http://ibash.org.ru/quote.php?id=17230</link>
|
||||
<title>Цитата #17230</title>
|
||||
<pubDate>Thu, 15 Mar 2018 09:22:21 +0300</pubDate>
|
||||
<description><![CDATA[С Хабра: <br />Смотрели Last Exile, где были шахматы для воздушных кораблей — с фиксацией? Вот термос для поезда. У него горлышко как у чайника, но оно перекрыто. Кнопка на ручке открывает возможность лить из термоса наружу. Это классика страховки от ошибок. Пользоваться потенциально опасной функцией можно только сознательно. <br />[...] <br />Это ещё и защита от дурака, в частности, важная для техники безопасности. У нас на производстве есть станок, который умеет прошибать гильотинным ножиком сразу огромную пачку бумаги. Так вот, чтобы его запустить нужно: <br /> <br />1) Положить бумагу под датчик бумаги <br />2) Положить левую руку на левую пусковую кнопку далеко слева <br />3) Правую руку — на правую пусковую кнопку далеко справа <br />4) Нажать педаль ногой (в этом положении физически невозможно засунуть голову в рабочую область станка) <br />5) Нажать обе пусковые кнопки одновременно <br />6) Но, видимо, рабочие научились действовать вдвоём или блокировать кнопки — и поэтому ещё нужно убрать всё из рабочей зоны, чтобы инфракрасные лучи не пересекались. Только после этого случится пуск.]]></description>
|
||||
</item>
|
||||
<item>
|
||||
<guid>http://ibash.org.ru/quote.php?id=17226</guid>
|
||||
<link>http://ibash.org.ru/quote.php?id=17226</link>
|
||||
<title>Цитата #17226</title>
|
||||
<pubDate>Thu, 15 Mar 2018 09:20:48 +0300</pubDate>
|
||||
<description><![CDATA[Архитекторы, емае, это источник нескончаемого умиления. Планерка, разбор какого-то легаси модуля. Говорят переписывать будем. Архитектор его анализировал неделю. Читал там код, компилил что-то, виртуалок поднял кучу, думал спеку, ходил курить каждые 15 и посадил картридж в принтере. Вот выходит этот мегачеловече к вайтборду докладывать комманде об устройстве этой вундервафли. Берет маркер, долго думает, рисует кружок. Еще думает, закрашивает. Думает еще, рисует входящую стрелку. Потом еще исходящую. Поворачивается к комманде: <br /> <br />-- В общем, коллеги, это жопа. Работает все через нее. А теперь посмотрим дему и обсудим баги. <br /> <br />Я вообще удивлен что в они в свой UML до сих пор стандартный символ не добавили.]]></description>
|
||||
</item>
|
||||
<item>
|
||||
<guid>http://ibash.org.ru/quote.php?id=17227</guid>
|
||||
<link>http://ibash.org.ru/quote.php?id=17227</link>
|
||||
<title>Цитата #17227</title>
|
||||
<pubDate>Thu, 15 Mar 2018 09:20:46 +0300</pubDate>
|
||||
<description><![CDATA[xxx: О, Боже! Я начинаю получать кайф от пользования линуксом... Развернул убунту, понадобилось пару программ поставить, я ему пишу в консоли аптгет инсталл со списком желаемого, а он сам ищет, ставит и настраивает, и не надо дистрибутивы искать, по сайтам лазить, качать...]]></description>
|
||||
</item>
|
||||
<item>
|
||||
<guid>http://ibash.org.ru/quote.php?id=17223</guid>
|
||||
<link>http://ibash.org.ru/quote.php?id=17223</link>
|
||||
<title>Цитата #17223</title>
|
||||
<pubDate>Thu, 15 Mar 2018 09:19:38 +0300</pubDate>
|
||||
<description><![CDATA[xxx: 40 Ватт, бывали у АМД и помощнее, если вы понимаете... <br />yyy: у меня был пентиум D <br />yyy: этим все сказано <br />yyy: когда я запускал эклипс - зимой наступало лето <br />zzz: у меня тоже греет комнату хорошо :) <br />zzz: сейчас 80 градусов <br />xxx: да это не комната - это баня ))) <br />yyy: живучий однако <br />yyy: мой спалил три мамки <br />yyy: у всех диагноз - микротрещины <br />yyy: это была середина 2000х, обычный офисный корпус, потолок регистрировал примерно 120 градусов <br />yyy: я летом морозил лед и крошил его в миску перед воздухозаборным отверстием <br />yyy: и менял его раз в час <br />yyy: когда грузилась моя винда, запотевали окна соседних домов <br />yyy: однажды, когда я архивировал сериал на флешку, ученые отметили глобальное потепление на 5 градусов <br />yyy: каждый раз, как я компилировал проект, в антарктиде семейство пингвинов оставалось без гнезда <br />xxx: мне кажется, что Чак тебе начинает завидовать <br />yyy: чак приходил ко мне домой разгоревать свой завтрак <br />yyy: свой процессор я отдал ученым, и в итоге он лег в основу ИТЭР]]></description>
|
||||
</item>
|
||||
<item>
|
||||
<guid>http://ibash.org.ru/quote.php?id=17224</guid>
|
||||
<link>http://ibash.org.ru/quote.php?id=17224</link>
|
||||
<title>Цитата #17224</title>
|
||||
<pubDate>Thu, 15 Mar 2018 09:19:36 +0300</pubDate>
|
||||
<description><![CDATA[— Человек, который работает в ИБ, не знает, что такое NDA? <br />— Может быть ему по NDA нельзя говорить, что знает?]]></description>
|
||||
</item>
|
||||
<item>
|
||||
<guid>http://ibash.org.ru/quote.php?id=17221</guid>
|
||||
<link>http://ibash.org.ru/quote.php?id=17221</link>
|
||||
<title>Цитата #17221</title>
|
||||
<pubDate>Thu, 15 Mar 2018 09:17:22 +0300</pubDate>
|
||||
<description><![CDATA[duzorg: <br />У кондиционера заклинило дренаж и вся вода вылилась внутрь серверной. Благо я туда случайно зашёл. Там серваки стояли на деревянной столешнице, а её водой размыло, она посередине где то на 15-20см прогнулась. Еще бы немного и рухнули бы все серваки на пол. Тысяч на 100 попали бы... <br />Или скорее даже больше чем на 100. <br />duzorg: <br />А в серверную я зашёл, потому что микротик завис. А микротик впервые за 4 года завис. %) <br />Funkryer: <br />вот так повезло <br />отличный был бы отзыв о микротике =) <br />мол завис 1 раз за 4 года и то только для того, чтобы спасти наши серваки <br />duzorg: <br />Не только работает стабильно, но и заботится о стабильной работе рядом находящегося оборудования ))) <br />duzorg: <br />На самом деле может его от большой влажности чё нить клинануло... хз... <br />Funkryer: <br />ну, начал, не порть магию!]]></description>
|
||||
</item>
|
||||
<item>
|
||||
<guid>http://ibash.org.ru/quote.php?id=17220</guid>
|
||||
<link>http://ibash.org.ru/quote.php?id=17220</link>
|
||||
<title>Цитата #17220</title>
|
||||
<pubDate>Thu, 15 Mar 2018 09:16:48 +0300</pubDate>
|
||||
<description><![CDATA[xxx: конструктор запросов как всегда гентален <br />xxx: :( <br />xxx: хм хотел написать гениален, но генитален тоже пойдет]]></description>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>
|
79
internal/reader/parser/testdata/no_encoding_ISO-8859-1.xml
vendored
Normal file
79
internal/reader/parser/testdata/no_encoding_ISO-8859-1.xml
vendored
Normal file
|
@ -0,0 +1,79 @@
|
|||
<rss version="2.0" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
|
||||
<channel>
|
||||
<title>Flux RSS du magazine de psychologie Le Cercle Psy</title>
|
||||
<link>https://le-cercle-psy.scienceshumaines.com/rss</link>
|
||||
<description>Flux RSS du magazine de psychologie Le Cercle Psy, le magazine de toutes les psychologies.</description>
|
||||
<copyright>Le Cercle Psy</copyright>
|
||||
|
||||
<item>
|
||||
<title>Perturbateurs endocriniens : quels effets sur le cerveau ?</title>
|
||||
<link>https://le-cercle-psy.scienceshumaines.com/perturbateurs-endocriniens-quels-effets-sur-le-cerveau_sh_39995</link>
|
||||
<pubDate>Wed, 17 Oct 2018 10:30:00 GMT</pubDate>
|
||||
<description>Si leur impact semble discret au premier abord, nombre d'études montrent que les perturbateurs endocriniens pourraient être à l'origine de troubles neuro-développementaux chez l'enfant.</description>
|
||||
</item>
|
||||
|
||||
<item>
|
||||
<title>Masters en Psycho : une simplificationet#8230; très complexe </title>
|
||||
<link>https://le-cercle-psy.scienceshumaines.com/masters-en-psycho-une-simplification-tres-complexe_sh_40065</link>
|
||||
<pubDate>Wed, 17 Oct 2018 10:30:00 GMT</pubDate>
|
||||
<description>Une nouvelle nomenclature adoptée en 2014 a voulu simplifier les options proposées aux étudiants en Master de Psychologie. Mais on en revient à des choix aussi illisibles qu'auparavant !</description>
|
||||
</item>
|
||||
|
||||
<item>
|
||||
<title>La criminalité liée surtout à... l'ennui ?</title>
|
||||
<link>https://le-cercle-psy.scienceshumaines.com/la-criminalite-liee-surtout-a-l-ennui_sh_39986</link>
|
||||
<pubDate>Wed, 17 Oct 2018 10:30:00 GMT</pubDate>
|
||||
<description>« L'oisiveté est mère de tous les vices », dit le proverbe... Certains chercheurs américains paraissent proches de cette position !</description>
|
||||
</item>
|
||||
|
||||
<item>
|
||||
<title></title>
|
||||
<link></link>
|
||||
<pubDate>Wed, 17 Oct 2018 10:30:00 GMT</pubDate>
|
||||
<description></description>
|
||||
</item>
|
||||
|
||||
<item>
|
||||
<title>Caroline Eliacheff : « Dolto reste authentiquement subversive »</title>
|
||||
<link>https://le-cercle-psy.scienceshumaines.com/caroline-eliacheff-dolto-reste-authentiquement-subversive_sh_39992</link>
|
||||
<pubDate>Wed, 17 Oct 2018 10:30:00 GMT</pubDate>
|
||||
<description>Françoise Dolto est morte il y a trente ans. D'abord adulée par des générations de parents et de collègues, on lui a ensuite reproché d'avoir favorisé l'émergence d'enfants-rois tyranniques. Et s'il existait une autre voie ?</description>
|
||||
</item>
|
||||
|
||||
<item>
|
||||
<title>L'enfant doué : quand trop comprendre... empêche parfois de comprendre</title>
|
||||
<link>https://le-cercle-psy.scienceshumaines.com/l-enfant-doue-quand-trop-comprendre-empeche-parfois-de-comprendre_sh_40004</link>
|
||||
<pubDate>Wed, 17 Oct 2018 10:30:00 GMT</pubDate>
|
||||
<description>On ne le répètera jamais assez : réaliser le portrait-robot d'un enfant « doué », « surdoué », « à haut potentiel », peu importe la qualification choisie, est vain. Cet ouvrage nous rappelle que chaque facilité, talent, compétence ou même don, peut s'accompagner d'un versant potentiellement plus problématique. Mais insistons : potentiellement.</description>
|
||||
</item>
|
||||
|
||||
<item>
|
||||
<title>Travail, organisations, emploi : les modèles européens</title>
|
||||
<link>https://le-cercle-psy.scienceshumaines.com/travail-organisations-emploi-les-modeles-europeens_sh_33090</link>
|
||||
<pubDate>Wed, 17 Oct 2018 10:30:00 GMT</pubDate>
|
||||
<description>Les pays européens diffèrent en matière de performance, de niveau de chômage et de qualité de vie au travail. Certains pays réussissent mieux que d'autres et sont pris comme «modèles». Comment font-ils?</description>
|
||||
</item>
|
||||
|
||||
<item>
|
||||
<title>Migrants : l'urgence thérapeutique</title>
|
||||
<link>https://le-cercle-psy.scienceshumaines.com/migrants-l-urgence-therapeutique_sh_39180</link>
|
||||
<pubDate>Wed, 17 Oct 2018 10:30:00 GMT</pubDate>
|
||||
<description>Poussés à l'exil par les conflits, la pauvreté et l'espoir d'une vie meilleure, les migrants arrivent après un long parcours. Beaucoup sont blessés, brisés, désespérés parfois. Quelle réponse pour les aider ?</description>
|
||||
</item>
|
||||
|
||||
<item>
|
||||
<title>Psy en prison, une mission impossible ?</title>
|
||||
<link>https://le-cercle-psy.scienceshumaines.com/psy-en-prison-une-mission-impossible_sh_38718</link>
|
||||
<pubDate>Wed, 17 Oct 2018 10:30:00 GMT</pubDate>
|
||||
<description>Détenus proches de la psychose, manque cruel de moyens, hiérarchie intrusive... Les praticiens intervenant en prison n'ont pas un quotidien facile. Retour sur un sacerdoce des temps modernes.</description>
|
||||
</item>
|
||||
|
||||
<item>
|
||||
<title>Psychologue à domicile : de la clinique à l'état brut</title>
|
||||
<link>https://le-cercle-psy.scienceshumaines.com/psychologue-a-domicile-de-la-clinique-a-l-etat-brut_sh_35540</link>
|
||||
<pubDate>Wed, 17 Oct 2018 10:30:00 GMT</pubDate>
|
||||
<description>Si tu ne peux pas venir au psychologue, le psychologue viendra à toi ! L'intervention à domicile demeure une pratique encore peu répandue chez les psys. En quoi diffère-t-elle d'une consultation ordinaire ?</description>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>
|
||||
|
374
internal/reader/parser/testdata/rdf_UTF8.xml
vendored
Normal file
374
internal/reader/parser/testdata/rdf_UTF8.xml
vendored
Normal file
|
@ -0,0 +1,374 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://my.netscape.com/rdf/simple/0.9/">
|
||||
<channel>
|
||||
<title>heise online News</title>
|
||||
<link>https://www.heise.de/newsticker/</link>
|
||||
<description>Nachrichten nicht nur aus der Welt der Computer</description>
|
||||
</channel>
|
||||
|
||||
|
||||
<item>
|
||||
<title>OLED-TVs: Vorsichtsmaßnahmen gegen Einbrennen</title>
|
||||
<link>https://www.heise.de/newsticker/meldung/OLED-TVs-Vorsichtsmassnahmen-gegen-Einbrennen-4205274.html?wt_mc=rss.ho.beitrag.rdf</link>
|
||||
<description>Wer gerade einen neuen OLED-Fernseher gekauft hat oder sich zu Weihnachten einen zuzulegen möchte, sollte unbedingt ein paar Hinweise beachten.</description>
|
||||
</item>
|
||||
|
||||
<item>
|
||||
<title>Mega-Deal: IBM übernimmt Red Hat</title>
|
||||
<link>https://www.heise.de/newsticker/meldung/Mega-Deal-IBM-uebernimmt-Red-Hat-4205582.html?wt_mc=rss.ho.beitrag.rdf</link>
|
||||
<description>Giganten-Hochzeit in den USA: Der Computerkonzern IBM übernimmt den Open-Source-Anbieter Red Hat für umgerechnet 30 Milliarden Euro.</description>
|
||||
</item>
|
||||
|
||||
<item>
|
||||
<title>Fortnite-Macher: Epic Games soll 15 Milliarden Dollar wert sein</title>
|
||||
<link>https://www.heise.de/newsticker/meldung/Fortnite-Macher-Epic-Games-soll-15-Milliarden-Dollar-wert-sein-4205522.html?wt_mc=rss.ho.beitrag.rdf</link>
|
||||
<description>Epic Games konnte bei einer Investionsrunde einige neue Geldgeber von sich überzeugen. Insgesamt flossen 1,25 Milliarden US-Dollar.</description>
|
||||
</item>
|
||||
|
||||
<item>
|
||||
<title>Erster nichtstaatlicher Raketenstart in China fehlgeschlagen</title>
|
||||
<link>https://www.heise.de/newsticker/meldung/Erster-nichtstaatlicher-Rekatenstart-in-China-fehlgeschlagen-4205524.html?wt_mc=rss.ho.beitrag.rdf</link>
|
||||
<description>Die Trägerrakete ZQ-1 hat es wegen unbekannter technischer Probleme nach dem Start nicht in die Erdumlaufbahn geschafft.</description>
|
||||
</item>
|
||||
|
||||
<item>
|
||||
<title>eARC: Immer mehr Hersteller schalten HDMI-Audio-Rückkanal frei</title>
|
||||
<link>https://www.heise.de/newsticker/meldung/eARC-Hersteller-schalten-HDMI-Audio-Rueckkanal-frei-4205518.html?wt_mc=rss.ho.beitrag.rdf</link>
|
||||
<description>Während andere HDMI-2.1-Funktionen auf sich warten lassen, ist der "enhanced Audio Return Channel" nach einem Firmware-Update schon bei AV-Receivern nutzbar.</description>
|
||||
</item>
|
||||
|
||||
<item>
|
||||
<title>Vorschau: Neue PC-Spiele im November 2018</title>
|
||||
<link>https://www.heise.de/newsticker/meldung/Vorschau-Neue-PC-Spiele-im-November-2018-4202098.html?wt_mc=rss.ho.beitrag.rdf</link>
|
||||
<description>Jeden Monat schicken Spiele-Hersteller zahlreiche neue Titel ins Rennen. Wir haben die wichtigsten Spiele-Neuerscheinungen im November herausgesucht.</description>
|
||||
</item>
|
||||
|
||||
<item>
|
||||
<title>Israelisches Start-up baut faltbares Elektroauto</title>
|
||||
<link>https://www.heise.de/newsticker/meldung/Israelisches-Start-up-baut-faltbares-Elektroauto-4205501.html?wt_mc=rss.ho.beitrag.rdf</link>
|
||||
<description>Das zweisitzige Auto kann sein Fahrgestell zum Parken einklappen und passt dann auf einen Motorradparkplatz.</description>
|
||||
</item>
|
||||
|
||||
<item>
|
||||
<title>Flash-Speicher: WD will Produktion einschränken</title>
|
||||
<link>https://www.heise.de/newsticker/meldung/Flash-Speicher-WD-will-Produktion-einschraenken-4205498.html?wt_mc=rss.ho.beitrag.rdf</link>
|
||||
<description>Die Preise für NAND-Flash-Speicher kennen derzeit nur eine Richtung: abwärts. WD will dem mit Produktionseinschränkungen begegnen.</description>
|
||||
</item>
|
||||
|
||||
<item>
|
||||
<title>LED-Tastatur Aukey KM-G6 im Test: mechanisch, günstig und laut</title>
|
||||
<link>https://www.techstage.de/news/Mechanische-Tastatur-Aukey-KM-G6-im-Test-guenstig-und-laut-4205068.html?wt_mc=rss.ho.beitrag.rdf</link>
|
||||
<description>Die Aukey KM-G6 kostet weniger als 50 Euro und zeigt, dass mechanische Tastaturen nicht teuer sein müssen. Wir testen das Keyboard.</description>
|
||||
</item>
|
||||
|
||||
<item>
|
||||
<title>Einhörner zum Leben erwecken - Kultur-Hackathon in Mainz gestartet</title>
|
||||
<link>https://www.heise.de/newsticker/meldung/Einhoerner-zum-Leben-erwecken-Kultur-Hackathon-in-Mainz-gestartet-4205490.html?wt_mc=rss.ho.beitrag.rdf</link>
|
||||
<description>In Museen, Bibliotheken und Archive stecken viele Daten, die sich kreativ nutzen lassen. Programmierer, Designer und Historiker haben sich das vorgenommen.</description>
|
||||
</item>
|
||||
|
||||
<item>
|
||||
<title>Impressionen von der SPIEL 2018: Gesellschaftsspiele für Nerds und Geeks</title>
|
||||
<link>https://www.heise.de/newsticker/meldung/Impressionen-von-der-SPIEL-2018-Gesellschaftsspiele-fuer-Nerds-und-Geeks-4205405.html?wt_mc=rss.ho.beitrag.rdf</link>
|
||||
<description>Weg von Bildschirm und Controller, hin zu Würfeln und Karten. Die SPIEL-Messe zeigt Neuheiten bei IT-affinen Brett-, Karten- und Tabletop-Spielen.</description>
|
||||
</item>
|
||||
|
||||
<item>
|
||||
<title>Missing Link: Vor 100 Jahren begann die deutsche Revolution</title>
|
||||
<link>https://www.heise.de/newsticker/meldung/Missing-Link-Vor-100-Jahren-begann-die-deutsche-Revolution-4205422.html?wt_mc=rss.ho.beitrag.rdf</link>
|
||||
<description>Von den Sturmvögeln zu den Stiefkindern der Revolution: Mit dem Matrosenaufstand startete Deutschland in die erste Republik.</description>
|
||||
</item>
|
||||
|
||||
<item>
|
||||
<title>4W: Was war. Was wird. Wettrüsten oder Waffelessen, das ist die Frage.</title>
|
||||
<link>https://www.heise.de/newsticker/meldung/Was-war-Wettruesten-oder-Waffelessen-das-ist-die-Frage-4205432.html?wt_mc=rss.ho.beitrag.rdf</link>
|
||||
<description>Zeit! Irgendwann gab es sie gar nicht, heute wird sie uns geschenkt. Ja, auch Hal Faber weiß, dass das Quatsch ist. Wie so vieles andere.</description>
|
||||
</item>
|
||||
|
||||
<item>
|
||||
<title>Kommentar: Vom DNS, aktuellen Hypes, Überwachung und Zensur</title>
|
||||
<link>https://www.heise.de/newsticker/meldung/Kommentar-Vom-DNS-aktuellen-Hypes-Ueberwachung-und-Zensur-4205380.html?wt_mc=rss.ho.beitrag.rdf</link>
|
||||
<description>Das DNS ist gereift, es kann mit den Bedrohungen der Überwachung und Zensur umgehen. Eine Antwort von Lutz Donnerhacke auf "Die Gruft DNS gehört ausgelüftet".</description>
|
||||
</item>
|
||||
|
||||
<item>
|
||||
<title>Microsoft will Militär und Geheimdienste beliefern</title>
|
||||
<link>https://www.heise.de/newsticker/meldung/Microsoft-will-Militaer-und-Geheimdienste-beliefern-4205383.html?wt_mc=rss.ho.beitrag.rdf</link>
|
||||
<description>Microsoft ist trotz Protesten von Mitarbeitern bereit, dem Militär und den Geheimdiensten des Landes KI-Systeme und sonstige Technologien zu verkaufen.</description>
|
||||
</item>
|
||||
|
||||
<item>
|
||||
<title>Zurück zur "Normalzeit": Uhren werden (möglichweise zum letzten Mal) um eine Stunde zurückgestellt</title>
|
||||
<link>https://www.heise.de/newsticker/meldung/Zurueck-zur-Normalzeit-Uhren-werden-moeglichweise-zum-letzten-Mal-um-eine-Stunde-zurueckgestellt-4205376.html?wt_mc=rss.ho.beitrag.rdf</link>
|
||||
<description>Die Zeitumstellung könnte bald Geschichte sein. Es gibt aber Bereiche, in denen eine Umstellung sehr aufwendig werden könnte.</description>
|
||||
</item>
|
||||
|
||||
<item>
|
||||
<title>5G: Seehofer fordert Änderung der Vergaberegeln</title>
|
||||
<link>https://www.heise.de/newsticker/meldung/5G-Seehofer-fordert-Aenderung-der-Vergaberegeln-4205373.html?wt_mc=rss.ho.beitrag.rdf</link>
|
||||
<description>Der Bundesinnenminister sieht Bewohner ländlicher Gebiete durch die Ausschreibungsregeln für das 5G-Netz benachteiligt und verlangt Nachbesserungen.</description>
|
||||
</item>
|
||||
|
||||
<item>
|
||||
<title>Ausstellung erinnert an das Lebenswerk von Konrad Zuse</title>
|
||||
<link>https://www.heise.de/newsticker/meldung/Ausstellung-erinnert-an-das-Lebenswerk-von-Konrad-Zuse-4205359.html?wt_mc=rss.ho.beitrag.rdf</link>
|
||||
<description>In Hopferau im Ostallgäu stellte Konrad Zuse seine Rechenmaschine Z4 fertig. Jetzt erinnert eine Ausstellung im Schloss Hopferau an den Computerpionier.</description>
|
||||
</item>
|
||||
|
||||
<item>
|
||||
<title>Test TrackerID LTS-450: GPS-Flaschenhalter am Fahrrad</title>
|
||||
<link>https://www.techstage.de/news/Test-TrackerID-LTS-450-GPS-Flaschenhalter-am-Fahrrad-4205272.html?wt_mc=rss.ho.beitrag.rdf</link>
|
||||
<description>Fahrraddiebe sind blöd, aber nicht dumm: Sehen sie einen GPS-Tracker, entfernen sie ihn. Deswegen tarnt sich der TrackerID LTS-450 in einem Flaschenhalter.</description>
|
||||
</item>
|
||||
|
||||
<item>
|
||||
<title>Fünf mobile Beamer im Vergleichstest</title>
|
||||
<link>https://www.techstage.de/news/Fuenf-mobile-Beamer-im-Vergleichstest-4204823.html?wt_mc=rss.ho.beitrag.rdf</link>
|
||||
<description>In den vergangenen Wochen haben wir uns fünf kompakte Beamer mit integriertem Akku angeschaut. Im Vergleichstest zeigen wir Vor- und Nachteile.</description>
|
||||
</item>
|
||||
|
||||
<item>
|
||||
<title>In Japan geht ein weiterer Atomreaktor wieder ans Netz</title>
|
||||
<link>https://www.heise.de/newsticker/meldung/In-Japan-geht-ein-weiterer-Atomreaktor-wieder-ans-Netz-4205351.html?wt_mc=rss.ho.beitrag.rdf</link>
|
||||
<description>Das Atomkraftwerk Ikata in Japan hat einen Reaktor wieder hochgefahren, gegen dessen Betrieb eine Bürgergruppe geklagt hatte.</description>
|
||||
</item>
|
||||
|
||||
<item>
|
||||
<title>FlyCroTug: Kleine Drohne mit großer Zugkraft</title>
|
||||
<link>https://www.heise.de/newsticker/meldung/FlyCroTug-Kleine-Drohne-mit-grosser-Zugkraft-4205335.html?wt_mc=rss.ho.beitrag.rdf</link>
|
||||
<description>Forscher haben 100 Gramm leichte Minidrohnen entwickelt, die von einem festen Haltepunkt aus das 40-fache ihres Gewichts bewegen können.
|
||||
</description>
|
||||
</item>
|
||||
|
||||
<item>
|
||||
<title>Google Office-Programme: Neue Dokumente in Browserzeile anlegen</title>
|
||||
<link>https://www.heise.de/newsticker/meldung/Google-Docs-Neue-Dokumente-in-Browserzeile-anlegen-4205346.html?wt_mc=rss.ho.beitrag.rdf</link>
|
||||
<description>Für seine webbasierten Office-Programme hat Google eine praktische Abkürzung direkt in die Anwendungen Docs, Sheets, Slides und Forms veröffentlicht.</description>
|
||||
</item>
|
||||
|
||||
<item>
|
||||
<title>Wo Hobby-Astronomen den Profis voraus sind</title>
|
||||
<link>https://www.heise.de/newsticker/meldung/Wo-Hobby-Astronomen-den-Profis-voraus-sind-4205332.html?wt_mc=rss.ho.beitrag.rdf</link>
|
||||
<description>Hobby-Astronomen leisten einen wertvollen Beitrag zur Wissenschaft, etwa durch das Beobachten veränderlicher Sterne oder die Suche nach Meteoriten.</description>
|
||||
</item>
|
||||
|
||||
<item>
|
||||
<title>Zahlen geschönt? FBI-Untersuchung gegen Tesla</title>
|
||||
<link>https://www.heise.de/newsticker/meldung/Zahlen-geschoent-FBI-Untersuchung-gegen-Tesla-4205285.html?wt_mc=rss.ho.beitrag.rdf</link>
|
||||
<description>Es besteht der Verdacht, dass Tesla Produktionszahlen wissentlich überoptimistisch vorhergesagt hat. Das FBI ermittelt strafrechtlich.</description>
|
||||
</item>
|
||||
|
||||
<item>
|
||||
<title>c't uplink 24.6: Linux mit UEFI / OLED-Defekte / Dropbox-Alternativen</title>
|
||||
<link>https://www.heise.de/ct/artikel/c-t-uplink-24-6-Linux-mit-UEFI-OLED-Defekte-Dropbox-Alternativen-4205070.html?wt_mc=rss.ho.beitrag.rdf</link>
|
||||
<description>Die c't-uplink Show: Dieses Mal mit eingebrannten Logos bei OLED-TVs, Alternativen zu Dropbox und wie man Linux am besten mit UEFI verheiratet.</description>
|
||||
</item>
|
||||
|
||||
<item>
|
||||
<title>Die Bilder der Woche (KW 43): Von Porträt bis Landschaft</title>
|
||||
<link>https://www.heise.de/foto/meldung/Die-Bilder-der-Woche-KW-43-Von-Portraet-bis-Landschaft-4204550.html?wt_mc=rss.ho.beitrag.rdf</link>
|
||||
<description>Blickfang: Unsere Bilder des Tages haben in dieser Woche keine Scheu vor direktem Augenkontakt und Spiegelbildern.</description>
|
||||
</item>
|
||||
|
||||
<item>
|
||||
<title>BIOS-Option macht ThinkPads zu Briefbeschwerern</title>
|
||||
<link>https://www.heise.de/newsticker/meldung/BIOS-Option-macht-ThinkPads-zu-Briefbeschwerern-4205185.html?wt_mc=rss.ho.beitrag.rdf</link>
|
||||
<description>Ein einziger Klick im BIOS-Setup - und einige aktuelle Lenovo-Notebooks starten nicht mehr; eine Lösung des Problems fehlt noch.</description>
|
||||
</item>
|
||||
|
||||
<item>
|
||||
<title>Markenstreit um “Dash”: Bragi beantragt einstweilige Verfügung gegen Oneplus</title>
|
||||
<link>https://www.heise.de/newsticker/meldung/Markenstreit-um-Dash-Bragi-beantragt-einstweilige-Verfuegung-gegen-Oneplus-4205223.html?wt_mc=rss.ho.beitrag.rdf</link>
|
||||
<description>Der Hersteller der smarten Kopfhörer “The Dash” sieht seine Markenrechte durch die Schnelladegeräte der chinesischen Smartphones verletzt.</description>
|
||||
</item>
|
||||
|
||||
<item>
|
||||
<title>Übernahme von GitHub durch Microsoft abgeschlossen</title>
|
||||
<link>https://www.heise.de/newsticker/meldung/Uebernahme-von-GitHub-durch-Microsoft-abgeschlossen-4205119.html?wt_mc=rss.ho.beitrag.rdf</link>
|
||||
<description>Microsofts Übernahme von GitHub ist abgeschlossen. Ab Montag beginnt die Arbeit des neuen CEO Nat Friedman. Er will GitHub für die Entwickler besser machen.</description>
|
||||
</item>
|
||||
|
||||
<item>
|
||||
<title>Webhosting und Cloud Computing: Aus 1&1 und ProfitBricks wird 1&1 Ionos</title>
|
||||
<link>https://www.heise.de/newsticker/meldung/Webhosting-und-Cloud-Computing-Aus-1-1-und-ProfitBricks-wird-1-1-Ionos-4205066.html?wt_mc=rss.ho.beitrag.rdf</link>
|
||||
<description>1&1 bietet seine Webhosting- und Cloud-Produkte künftig unter dem Namen 1&1 Ionos an. Besserer Service soll Unternehmen den Cloud-Start schmackhaft machen.</description>
|
||||
</item>
|
||||
|
||||
<item>
|
||||
<title>iPhone XR: Die 10 wichtigsten Testergebnisse</title>
|
||||
<link>https://www.heise.de/mac-and-i/meldung/iPhone-XR-Die-10-wichtigsten-Testergebnisse-4204845.html?wt_mc=rss.ho.beitrag.rdf</link>
|
||||
<description>Seit heute ist das dritte und günstigste von Apples neuen Smartphones im Handel. Mac & i konnte es bereits testen.</description>
|
||||
</item>
|
||||
|
||||
<item>
|
||||
<title>Motorola One: Android-One-Smartphone für 299 Euro im Test</title>
|
||||
<link>https://www.techstage.de/news/Motorola-One-im-Test-Android-One-Smartphone-mit-Notch-4203618.html?wt_mc=rss.ho.beitrag.rdf</link>
|
||||
<description>Das Motorola One ist ein Smartphone mit schickem Design und Android One als Betriebssystem. Ob und für wen sich der Kauf lohnt, hat TechStage getestet.</description>
|
||||
</item>
|
||||
|
||||
<item>
|
||||
<title>US Copyright Office: DRM darf für Reparaturen umgangen werden</title>
|
||||
<link>https://www.heise.de/newsticker/meldung/US-Copyright-Office-DRM-darf-fuer-Reparaturen-umgangen-werden-4205173.html?wt_mc=rss.ho.beitrag.rdf</link>
|
||||
<description>In den USA ist es künftig legal, den Kopierschutz elektronischer Geräte zu knacken, um etwa sein Smartphone, Auto oder den vernetzten Kühlschrank zu reparieren.</description>
|
||||
</item>
|
||||
|
||||
<item>
|
||||
<title>Ausprobiert: Haptik-Datenhandschuhe von Senseglove</title>
|
||||
<link>https://www.heise.de/newsticker/meldung/Ausprobiert-Haptik-Datenhandschuhe-von-Senseglove-4205142.html?wt_mc=rss.ho.beitrag.rdf</link>
|
||||
<description>Der Senseglove-Datenhandschuh trackt nicht nur jeden Finger, sondern simuliert auch Widerstand und Haptik. Wir haben ihn ausprobiert.</description>
|
||||
</item>
|
||||
|
||||
<item>
|
||||
<title>Apple News soll "Netflix für Nachrichten" werden</title>
|
||||
<link>https://www.heise.de/mac-and-i/meldung/Apple-News-soll-Netflix-fuer-Nachrichten-werden-4204886.html?wt_mc=rss.ho.beitrag.rdf</link>
|
||||
<description>Apple betreibt für seinen hauseigenen Infodienst eine eigene Redaktion – und setzt kaum auf Algorithmen. Im Abo könnten bald diverse Magazine hinzukommen.</description>
|
||||
</item>
|
||||
|
||||
<item>
|
||||
<title>Xbox One X im 4K-Test: Spaßzentrale für UHD-TVs</title>
|
||||
<link>https://www.techstage.de/news/Xbox-One-X-im-4K-Test-Spasszentrale-fuer-UHD-TVs-4204803.html?wt_mc=rss.ho.beitrag.rdf</link>
|
||||
<description>Die aktuelle Xbox One X bewirbt Microsoft als idealen Zuspieler für UHD-Fernseher. Wir haben ihre 4K-Fähigkeiten bei Filmen und Spielen getestet. </description>
|
||||
</item>
|
||||
|
||||
<item>
|
||||
<title>Red Dead Redemption 2: Die Entschleunigung des Action-Adventures</title>
|
||||
<link>https://www.heise.de/newsticker/meldung/Red-Dead-Redemption-2-Die-Entschleunigung-des-Action-Adventures-4205034.html?wt_mc=rss.ho.beitrag.rdf</link>
|
||||
<description>Nach dem Live-Stream: Unsere Eindrücke aus den ersten Stunden des Gaming-Blockbusters Red Dead Redemption 2.</description>
|
||||
</item>
|
||||
|
||||
<item>
|
||||
<title>Grüne: Netzbetreiber sollen Breitband-Universaldienst finanzieren</title>
|
||||
<link>https://www.heise.de/newsticker/meldung/Gruene-Netzbetreiber-sollen-Breitband-Universaldienst-finanzieren-4205022.html?wt_mc=rss.ho.beitrag.rdf</link>
|
||||
<description>Schwarz-Rot will bis spätestens 2025 einen Anspruch auf Breitband für alle gesetzlich verankern. Die Grünen sehen dagegen sofortigen Handlungsbedarf.</description>
|
||||
</item>
|
||||
|
||||
<item>
|
||||
<title>Programmiersprache: Rust 1.30 will mehr Klarheit schaffen</title>
|
||||
<link>https://www.heise.de/developer/meldung/Programmiersprache-Rust-1-30-will-mehr-Klarheit-schaffen-4204893.html?wt_mc=rss.ho.beitrag.rdf</link>
|
||||
<description>Der Umgang mit absoluten Pfaden und neue prozedurale Makros sind die Highlights der aktuellen Rust-Version.</description>
|
||||
</item>
|
||||
|
||||
<item>
|
||||
<title>Apples Oktober-Event: iPad Pro 2018 und neue Macs vor der Tür</title>
|
||||
<link>https://www.heise.de/mac-and-i/meldung/Apples-Oktober-Event-iPad-Pro-2018-und-neue-Macs-vor-der-Tuer-4204922.html?wt_mc=rss.ho.beitrag.rdf</link>
|
||||
<description>Nach der Einführung der iPhones wird sich Apple der Zukunft seiner Computer zuwenden: Wichtige Neuerungen stehen an. </description>
|
||||
</item>
|
||||
|
||||
<item>
|
||||
<title>Magento-Shops: Verwundbare Add-ons als Schlupfloch für Kreditkarten-Skimmer</title>
|
||||
<link>https://www.heise.de/security/meldung/Magento-Shops-Verwundbare-Add-ons-als-Schlupfloch-fuer-Kreditkarten-Skimmer-4204828.html?wt_mc=rss.ho.beitrag.rdf</link>
|
||||
<description>Ein Sicherheitsforscher warnt vor knapp über 20 Add-ons, die Onlineshops basierend auf der Magento-Software angreifbar machen. </description>
|
||||
</item>
|
||||
|
||||
<item>
|
||||
<title>Atomkraft: Gericht kippt Schließungs-Dekret für Fessenheim</title>
|
||||
<link>https://www.heise.de/newsticker/meldung/Atomkraft-Gericht-kippt-Schliessungs-Dekret-fuer-Fessenheim-4204853.html?wt_mc=rss.ho.beitrag.rdf</link>
|
||||
<description>Der Conseil d'État hat zu Gunsten der Gemeinde Fessenheim und der Gewerkschaften entschieden.</description>
|
||||
</item>
|
||||
|
||||
<item>
|
||||
<title>Qt Design Studio erreicht Version 1.0</title>
|
||||
<link>https://www.heise.de/developer/meldung/Qt-Design-Studio-erreicht-Version-1-0-4204902.html?wt_mc=rss.ho.beitrag.rdf</link>
|
||||
<description>Neue Funktionen wie die Qt Photoshop Bridge und zeitachsenbasierte Animationen sollen die Zusammenarbeit von Entwicklern und Designern vereinfachen.</description>
|
||||
</item>
|
||||
|
||||
<item>
|
||||
<title>Vodafone bringt Mini-Handy von Palm nach Deutschland</title>
|
||||
<link>https://www.heise.de/newsticker/meldung/Vodafone-bringt-Mini-Handy-von-Palm-nach-Deutschland-4204878.html?wt_mc=rss.ho.beitrag.rdf</link>
|
||||
<description>Das neue Palm kommt auch in Deutschland auf den Markt: Vodafone bringt das kuriose Mini-Handy exklusiv in den Handel. </description>
|
||||
</item>
|
||||
|
||||
<item>
|
||||
<title>Xeons und Modems bescheren Intel Rekordquartal</title>
|
||||
<link>https://www.heise.de/newsticker/meldung/Xeons-und-Modems-bescheren-Intel-Rekordquartal-4204869.html?wt_mc=rss.ho.beitrag.rdf</link>
|
||||
<description>Starke Nachfrage nach Prozessoren für Cloud-Rechenzentren sowie LTE-Modems lassen bei Intel die Gewinne sprudeln und bescheren gute Aussichten.</description>
|
||||
</item>
|
||||
|
||||
<item>
|
||||
<title>KI druckt Kunst: Auktionshaus Christie's versteigert KI-Gemälde für 380.000 Euro</title>
|
||||
<link>https://www.heise.de/newsticker/meldung/KI-druckt-Kunst-Auktionshaus-Christie-s-versteigert-KI-Gemaelde-fuer-380-000-Euro-4204793.html?wt_mc=rss.ho.beitrag.rdf</link>
|
||||
<description>Das renommierte Auktionshaus Christie's hat erstmals ein von einer KI erschaffenes Bild versteigert. Der Preis war wesentlich höher als erwartet.</description>
|
||||
</item>
|
||||
|
||||
<item>
|
||||
<title>EU-Parlament verabschiedet Resolution zur Datenschutzuntersuchung bei Facebook </title>
|
||||
<link>https://www.heise.de/newsticker/meldung/EU-Parlament-verabschiedet-Resolution-zur-Datenschutzuntersuchung-bei-Facebook-4204766.html?wt_mc=rss.ho.beitrag.rdf</link>
|
||||
<description>Facebook soll mehr Aufklärung über seine Datenschutzpraxis leisten. Das EU-Parlament hat deshalb eine Untersuchung durch EU-Institutionen verabschiedet.</description>
|
||||
</item>
|
||||
|
||||
<item>
|
||||
<title>IBM setzt auf 277.000 Apple-Geräte</title>
|
||||
<link>https://www.heise.de/mac-and-i/meldung/IBM-setzt-auf-277-000-Apple-Geraete-4204728.html?wt_mc=rss.ho.beitrag.rdf</link>
|
||||
<description>Bei IBM stellen Macs inzwischen ein Viertel aller Laptops. Das Open-Source-Tool Mac@IBM soll auch Admins anderer Firmen die Einrichtung erleichtern.</description>
|
||||
</item>
|
||||
|
||||
<item>
|
||||
<title>heise-Angebot: #TGIQF - das c't-Retroquiz: 8 Bit & mehr</title>
|
||||
<link>https://www.heise.de/newsticker/meldung/TGIQF-das-c-t-Retroquiz-8-Bit-mehr-4202717.html?wt_mc=rss.ho.beitrag.rdf</link>
|
||||
<description>C64, ZX Spectrum, Apple II ... die Heimcomputer lösten eine IT-Revolution in den Kinderzimmern aus. Wie viel ist aus der Zeit bei Ihnen hängen geblieben?</description>
|
||||
</item>
|
||||
|
||||
<item>
|
||||
<title>heise-Angebot: c't Fotografie: Spiegeloses Vollformat im Test</title>
|
||||
<link>https://www.heise.de/foto/meldung/c-t-Fotografie-Spiegeloses-Vollformat-im-Test-4201826.html?wt_mc=rss.ho.beitrag.rdf</link>
|
||||
<description>Lange war Sony der einzige Anbieter für spiegellose Vollformatkameras. Mit der Canon EOS R und der Nikon Z-Serie werden die Karten neu gemischt.</description>
|
||||
</item>
|
||||
|
||||
<item>
|
||||
<title>British-Airways-Hack: 185.000 weitere Kunden betroffen</title>
|
||||
<link>https://www.heise.de/newsticker/meldung/British-Airways-Hack-185-000-weitere-Kunden-betroffen-4204675.html?wt_mc=rss.ho.beitrag.rdf</link>
|
||||
<description>Die Fluggesellschaft hat nun bekanntgegeben, dass bis dato Unbekannte Kreditkartendaten von noch mehr Kunden als bislang bekannt kopiert haben.</description>
|
||||
</item>
|
||||
|
||||
<item>
|
||||
<title>Google: 48 Mitarbeiter wegen sexueller Belästigung gefeuert</title>
|
||||
<link>https://www.heise.de/newsticker/meldung/Google-48-Mitarbeiter-wegen-sexueller-Belaestigung-gefeuert-4204687.html?wt_mc=rss.ho.beitrag.rdf</link>
|
||||
<description>Der Weggang von Android-Schöpfer Andy Rubin von Google war wohl nicht freiwillig – ihm wurde sexuelle Nötigung vorgeworfen. Und er war nicht der einzige.</description>
|
||||
</item>
|
||||
|
||||
<item>
|
||||
<title>Fujitsu schließt Werk in Augsburg</title>
|
||||
<link>https://www.heise.de/newsticker/meldung/Fujitsu-schliesst-Werk-in-Augsburg-4204722.html?wt_mc=rss.ho.beitrag.rdf</link>
|
||||
<description>Fujitsu plant einen Konzernumbau. In Augsburg sind davon 1500 Beschäftigte betroffen.</description>
|
||||
</item>
|
||||
|
||||
<item>
|
||||
<title>Ego-Shooter Metro 2033 bei Steam kostenlos</title>
|
||||
<link>https://www.heise.de/newsticker/meldung/Metro-2033-ist-bei-Steam-heute-kostenlos-4204706.html?wt_mc=rss.ho.beitrag.rdf</link>
|
||||
<description>Metro 2033 wird am heutigen Freitag auf Steam kostenlos angeboten. Der Ego-Shooter basiert auf dem gleichnamigen Roman von Dmitri Gluchowski.</description>
|
||||
</item>
|
||||
|
||||
<item>
|
||||
<title>Lightroom-Alternative: DxO bringt PhotoLab 2 </title>
|
||||
<link>https://www.heise.de/foto/meldung/Lightroom-Alternative-DxO-bringt-PhotoLab-2-4204614.html?wt_mc=rss.ho.beitrag.rdf</link>
|
||||
<description>PhotoLab gibt es nun in Version 2: Verbessert wurde unter anderem die Bildverwaltung, zudem integriert DxO die von den Nik-Filtern bekannte U-Point-Technologie.</description>
|
||||
</item>
|
||||
|
||||
<item>
|
||||
<title>Google schaltet Nearby Notifcations in Android ab</title>
|
||||
<link>https://www.heise.de/developer/meldung/Google-schaltet-Nearby-Notifcations-in-Android-ab-4204667.html?wt_mc=rss.ho.beitrag.rdf</link>
|
||||
<description>Die Funktion für standortbasierte Benachrichtigungen lieferte wohl mehr Spam als nützliche Inhalte.</description>
|
||||
</item>
|
||||
|
||||
<item>
|
||||
<title>iPhone XR: Verkaufsstart ohne Ansturm</title>
|
||||
<link>https://www.heise.de/mac-and-i/meldung/iPhone-XR-Verkaufsstart-ohne-Ansturm-4204679.html?wt_mc=rss.ho.beitrag.rdf</link>
|
||||
<description>Apple bringt die billigeren und bunten iPhone-Modellreihe in den Handel. Groß anstehen mussten Kunden dafür nicht.
|
||||
</description>
|
||||
</item>
|
||||
|
||||
<item>
|
||||
<title>Snapchat: Aktienabsturz durch Nutzerschwund</title>
|
||||
<link>https://www.heise.de/newsticker/meldung/Snapchat-Aktienabsturz-durch-Nutzerschwund-4204631.html?wt_mc=rss.ho.beitrag.rdf</link>
|
||||
<description>Snapchat laufen weiterhin die Nutzer weg – und das wird sich vorerst nicht ändern, sagt Snap. Die Aktie stürzte trotz geringerer Verluste um zehn Prozent ab.</description>
|
||||
</item>
|
||||
|
||||
<item>
|
||||
<title>Mi Mix 3: Xiaomi-Flaggschiff mit Kamera-Slider und 10 GByte RAM</title>
|
||||
<link>https://www.heise.de/newsticker/meldung/Mi-Mix-3-Xiaomi-Flaggschiff-mit-Kamera-Slider-und-10-GByte-RAM-4204655.html?wt_mc=rss.ho.beitrag.rdf</link>
|
||||
<description>Xiaomis nächstes Flaggschiff bietet eine fast randlose Display-Front. Die Selfie-Kamera ist in einem magnetischen Slider-Mechanismus untergebracht.</description>
|
||||
</item>
|
||||
|
||||
|
||||
|
||||
</rdf:RDF>
|
226
internal/reader/parser/testdata/urdu_UTF8.xml
vendored
Normal file
226
internal/reader/parser/testdata/urdu_UTF8.xml
vendored
Normal file
|
@ -0,0 +1,226 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?><rss xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:atom="http://www.w3.org/2005/Atom" version="2.0" xmlns:media="http://search.yahoo.com/mrss/">
|
||||
<channel>
|
||||
<title><![CDATA[BBC News اردو - پاکستان کے لیے امریکی امداد کی بہار و خزاں]]></title>
|
||||
<description><![CDATA[BBC News اردو - پاکستان کے لیے امریکی امداد کی بہار و خزاں]]></description>
|
||||
<link>http://www.bbcurdu.com</link>
|
||||
<image>
|
||||
<url>http://www.bbc.co.uk/urdu/images/gel/rss_logo.gif</url>
|
||||
<title>BBC News اردو - پاکستان کے لیے امریکی امداد کی بہار و خزاں</title>
|
||||
<link>http://www.bbcurdu.com</link>
|
||||
</image>
|
||||
<generator>RSS for Node</generator>
|
||||
<lastBuildDate>Wed, 24 Oct 2018 07:25:10 GMT</lastBuildDate>
|
||||
<copyright><![CDATA[کاپی رائٹ بی بی سی ]]></copyright>
|
||||
<language><![CDATA[ur]]></language>
|
||||
<managingEditor><![CDATA[urdu@bbc.co.uk]]></managingEditor>
|
||||
<ttl>15</ttl>
|
||||
<item>
|
||||
<title><![CDATA[امریکی عسکری امداد کی بندش کی وجوہات: انڈیا سے جنگ، جوہری پروگرام اور اب دہشت گردوں کی پشت پناہی]]></title>
|
||||
<description><![CDATA[امریکہ اور پاکستان کے 70 سالہ تعلقات میں جب بھی زیادہ کشیدگی آتی ہے تو اس میں پہلی تلوار پاکستان کو ملنے والی عسکری امداد پر چلتی ہے۔]]></description>
|
||||
<link>http://www.bbc.com/urdu/pakistan-42575603</link>
|
||||
<guid isPermaLink="true">http://www.bbc.com/urdu/pakistan-42575603</guid>
|
||||
<pubDate>Fri, 05 Jan 2018 16:51:00 GMT</pubDate>
|
||||
<media:thumbnail width="1024" height="576" url="http://c.files.bbci.co.uk/A787/production/_99478824_gettyimages-856735580.jpg"/>
|
||||
</item>
|
||||
<item>
|
||||
<title><![CDATA[امریکہ کے ساتھ خفیہ معلومات کا تبادلہ اور فوجی تعاون معطل کر دیا: وزیر دفاع]]></title>
|
||||
<description><![CDATA[پاکستان کے وزیر دفاع خرم دستگیر نے کہا ہے کہ امریکہ کی جانب سے عسکری امداد کی معطلی کے بعد پاکستان نے امریکہ سے خفیہ معلومات کا تبادلہ اور فوجی تعاون بند کر دیا ہے۔]]></description>
|
||||
<link>http://www.bbc.com/urdu/pakistan-42645212</link>
|
||||
<guid isPermaLink="true">http://www.bbc.com/urdu/pakistan-42645212</guid>
|
||||
<pubDate>Thu, 11 Jan 2018 13:20:55 GMT</pubDate>
|
||||
<media:thumbnail width="976" height="549" url="http://c.files.bbci.co.uk/13C09/production/_99550908_3f6467e7-5086-43e5-9c31-918be66a17ad.jpg"/>
|
||||
</item>
|
||||
<item>
|
||||
<title><![CDATA[پاکستان ان دہشت گرد گروہوں کے خلاف کارروائی کرے جن کے خلاف ہم چاہتے ہیں: امریکہ]]></title>
|
||||
<description><![CDATA[امریکی محکمۂ دفاع کا کہنا ہے کہ امریکہ چاہتا ہے کہ پاکستان دہشت گردوں کے خلاف فیصلہ کن کارروائی کرے اور یہ کہ امریکہ کو بعض معاملات پر شدید اختلافات ہیں اور ان پر کام کیا جا رہا ہے۔]]></description>
|
||||
<link>http://www.bbc.com/urdu/world-42615276</link>
|
||||
<guid isPermaLink="true">http://www.bbc.com/urdu/world-42615276</guid>
|
||||
<pubDate>Tue, 09 Jan 2018 02:59:43 GMT</pubDate>
|
||||
<media:thumbnail width="976" height="549" url="http://c.files.bbci.co.uk/5B55/production/_99518332_mediaitem99518331.jpg"/>
|
||||
</item>
|
||||
<item>
|
||||
<title><![CDATA[پاکستانی وزیر دفاع کہتے ہیں کہ امریکہ کو ایک کامیابی ملی ہے، وہ بھی پاکستان کی مرہون منت]]></title>
|
||||
<description><![CDATA[پاکستان کے وزیر دفاع خرم دستگیر نے کہا ہے کہ افغانستان کی صورتحال کی تمام ذمہ داری پاکستان پر نہیں ڈالی جا سکتی۔]]></description>
|
||||
<link>http://www.bbc.com/urdu/pakistan-42554318</link>
|
||||
<guid isPermaLink="true">http://www.bbc.com/urdu/pakistan-42554318</guid>
|
||||
<pubDate>Wed, 03 Jan 2018 15:50:55 GMT</pubDate>
|
||||
<media:thumbnail width="1024" height="576" url="http://c.files.bbci.co.uk/16765/production/_99450029_p05snlw4.jpg"/>
|
||||
</item>
|
||||
<item>
|
||||
<title><![CDATA[صدر ٹرمپ کے ٹویٹ کو سنجیدگی سے لیتے ہیں: وزیر دفاع]]></title>
|
||||
<description><![CDATA[پاکستان کے وزیر دفاع خرم دستگیر نے کہا کہ پاکستان امریکی صدر ٹرمپ کے پاکستان کے بارے میں ٹویٹ کو سنجیدگی سے لیتے ہیں اور تہذیب کے دائرے میں رہتے ہوئے امریکہ سے بے لاگ بات ہو گی۔]]></description>
|
||||
<link>http://www.bbc.com/urdu/42547605</link>
|
||||
<guid isPermaLink="true">http://www.bbc.com/urdu/42547605</guid>
|
||||
<pubDate>Tue, 02 Jan 2018 17:27:36 GMT</pubDate>
|
||||
<media:thumbnail width="976" height="549" url="http://c.files.bbci.co.uk/5AA4/production/_99440232_p05sk783.jpg"/>
|
||||
</item>
|
||||
<item>
|
||||
<title><![CDATA[امریکی وزیرِ خارجہ کی پاکستان آمد]]></title>
|
||||
<description><![CDATA[امریکی وزیرِ خارجہ ریکس ٹلرسن نے پاکستان آمد کے بعد وزیراعظم ہاؤس میں پاکستان کی اعلیٰ سول اور فوجی قیادت سے ملاقات کی۔]]></description>
|
||||
<link>http://www.bbc.com/urdu/pakistan-41739055</link>
|
||||
<guid isPermaLink="true">http://www.bbc.com/urdu/pakistan-41739055</guid>
|
||||
<pubDate>Tue, 24 Oct 2017 13:08:06 GMT</pubDate>
|
||||
<media:thumbnail width="976" height="549" url="http://c.files.bbci.co.uk/10293/production/_98459166_p05ktkrj.jpg"/>
|
||||
</item>
|
||||
<item>
|
||||
<title><![CDATA[امریکہ اور پاکستان ایک دوسرے کا کیا بگاڑ سکتے ہیں؟]]></title>
|
||||
<description><![CDATA[تجزیہ کار کہتے ہیں کہ حقیقتاً افغانستان میں پاکستان اور امریکہ دونوں ہی ناکام ہوئے ہیں اور دونوں یہ سمجھتے ہیں کہ دوسرے کو شکست دے کر وہ پورے افغانستان پر اثر و رسوخ قائم کر سکیں گے۔]]></description>
|
||||
<link>http://www.bbc.com/urdu/pakistan-42542988</link>
|
||||
<guid isPermaLink="true">http://www.bbc.com/urdu/pakistan-42542988</guid>
|
||||
<pubDate>Tue, 02 Jan 2018 16:28:18 GMT</pubDate>
|
||||
<media:thumbnail width="976" height="549" url="http://c.files.bbci.co.uk/1AED/production/_99439860_gettyimages-839854052.jpg"/>
|
||||
</item>
|
||||
<item>
|
||||
<title><![CDATA[امریکہ اور پاکستان کے کشیدہ تعلقات میں اربوں ڈالر کی امداد پر بھی تنازع]]></title>
|
||||
<description><![CDATA[پاکستان اور امریکہ کے کشیدہ تعلقات میں اربوں ڈالر کی امداد پر بھی تنازع ہے جس میں دونوں ممالک الگ الگ اعداد و شمار بتاتے ہیں۔]]></description>
|
||||
<link>http://www.bbc.com/urdu/pakistan-42532582</link>
|
||||
<guid isPermaLink="true">http://www.bbc.com/urdu/pakistan-42532582</guid>
|
||||
<pubDate>Tue, 02 Jan 2018 10:28:02 GMT</pubDate>
|
||||
<media:thumbnail width="976" height="549" url="http://c.files.bbci.co.uk/9281/production/_99050573__98456547_d3ee46a1-51a1-48b1-a24f-9a41cf21361e.jpg"/>
|
||||
</item>
|
||||
<item>
|
||||
<title><![CDATA[’امریکہ انسداد دہشتگردی سیکھنے کے بجائے دشنام طرازی کر رہا ہے‘]]></title>
|
||||
<description><![CDATA[پاکستان کے وزیر دفاع خرم دستگیر خان نے کہا ہے کہ پاکستان میں دہشت گردوں کی خفیہ پناہ گاہیں نہیں ہیں اور اگر باقیات ہیں تو انہیں رد الفساد کے تحت ختم کیا جا رہا ہے تاکہ پاکستان کا مستقبل محفوظ ہو سکے۔]]></description>
|
||||
<link>http://www.bbc.com/urdu/pakistan-42548010</link>
|
||||
<guid isPermaLink="true">http://www.bbc.com/urdu/pakistan-42548010</guid>
|
||||
<pubDate>Wed, 03 Jan 2018 05:04:13 GMT</pubDate>
|
||||
<media:thumbnail width="976" height="549" url="http://c.files.bbci.co.uk/9B36/production/_99443793_image1.jpg"/>
|
||||
</item>
|
||||
<item>
|
||||
<title><![CDATA[پاکستان بھی ’مذہبی آزادیوں کی خلاف ورزیاں‘ کرنے والے ممالک میں شامل]]></title>
|
||||
<description><![CDATA[امریکہ کی وزارتِ خارجہ نے پاکستان کا نام ان ملکوں کی فہرست میں شامل کر دیا ہے جہاں مبینہ طور پر مذہبی آزادیوں کی یا تو سنگین خلاف ورزیوں کا ارتکاب کیا جاتا ہے یا مذہبی آزادیوں پر پابندیاں عائد ہیں۔]]></description>
|
||||
<link>http://www.bbc.com/urdu/world-42571559</link>
|
||||
<guid isPermaLink="true">http://www.bbc.com/urdu/world-42571559</guid>
|
||||
<pubDate>Thu, 04 Jan 2018 17:12:58 GMT</pubDate>
|
||||
<media:thumbnail width="976" height="549" url="http://c.files.bbci.co.uk/184A8/production/_99469499_gettyimages-894786806.jpg"/>
|
||||
</item>
|
||||
<item>
|
||||
<title><![CDATA[پاکستان کی سکیورٹی امداد معطل: ’دہشت گردی کے خلاف عزم پر اثرانداز نہیں ہو سکتی‘]]></title>
|
||||
<description><![CDATA[امریکہ کی طرف سے پاکستان کی فوجی امداد کے بند کیے جانے پر پاکستان کے دفتر خارجہ نے کہا ہے کہ یک طرفہ بیانات، مرضی سے دی گئی ڈیڈ لائنز اور اہداف کی مستقل تبدیلی مشترکہ مفادات کے حصول میں سودمند ثابت نہیں ہو سکتی۔]]></description>
|
||||
<link>http://www.bbc.com/urdu/pakistan-42577314</link>
|
||||
<guid isPermaLink="true">http://www.bbc.com/urdu/pakistan-42577314</guid>
|
||||
<pubDate>Fri, 05 Jan 2018 12:32:27 GMT</pubDate>
|
||||
<media:thumbnail width="640" height="360" url="http://c.files.bbci.co.uk/0935/production/_99475320_556f3020-6286-47a8-a4b1-3026ff6dc95f.jpg"/>
|
||||
</item>
|
||||
<item>
|
||||
<title><![CDATA[ڈونلڈ ٹرمپ: پاکستان نے ہمیں جھوٹ اور دھوکے کے سوا کچھ نہیں دیا]]></title>
|
||||
<description><![CDATA[امریکی صدر ڈونلڈ ٹرمپ کا کہنا ہے کہ گذشتہ15 برس میں 33 ارب ڈالر کی امداد لینے کے باوجود پاکستان نے امریکہ کو سوائے جھوٹ اور دھوکے کے کچھ نہیں دیا۔]]></description>
|
||||
<link>http://www.bbc.com/urdu/world-42534486</link>
|
||||
<guid isPermaLink="true">http://www.bbc.com/urdu/world-42534486</guid>
|
||||
<pubDate>Mon, 01 Jan 2018 17:59:24 GMT</pubDate>
|
||||
<media:thumbnail width="976" height="549" url="http://c.files.bbci.co.uk/A5CE/production/_99264424_gettyimages-894923566.jpg"/>
|
||||
</item>
|
||||
<item>
|
||||
<title><![CDATA[پاکستان: اتحادی ایک دوسرے کو تنبیہ جاری نہیں کیا کرتے]]></title>
|
||||
<description><![CDATA[امریکی نائب صدر مائیک پینس کے پاکستان کے بارے میں بگرام کے ہوائی اڈے پر دیے جانے والے بیان پر تبصرہ کرتے ہوئے پاکستان کی وزارتِ خارجہ کے ترجمان نے کہا کہ یہ بیان امریکی انتظامیہ کے ساتھ ہونے والے تفصیلی مذاکرات کے منافی ہے۔]]></description>
|
||||
<link>http://www.bbc.com/urdu/world-42451883</link>
|
||||
<guid isPermaLink="true">http://www.bbc.com/urdu/world-42451883</guid>
|
||||
<pubDate>Fri, 22 Dec 2017 08:50:54 GMT</pubDate>
|
||||
<media:thumbnail width="976" height="549" url="http://c.files.bbci.co.uk/12825/production/_99331857_gettyimages-896767012.jpg"/>
|
||||
</item>
|
||||
<item>
|
||||
<title><![CDATA[امریکہ: کون سے ممالک ہیں جن پر امداد بند کر دینے کی دھمکی کارگر ثابت نہیں ہوئی]]></title>
|
||||
<description><![CDATA[سب سے زیادہ امریکی امداد وصول کرنے والے 12 ملکوں کی فہرست میں اسرائیل کے علاوہ ایک بھی ملک ایسا نہیں جس نے جنرل اسمبلی میں یروشلم کے بارے میں امریکی صدر کے فیصلے کے خلاف قرارداد کی مخالفت میں ووٹ دیا ہو۔]]></description>
|
||||
<link>http://www.bbc.com/urdu/world-42457273</link>
|
||||
<guid isPermaLink="true">http://www.bbc.com/urdu/world-42457273</guid>
|
||||
<pubDate>Fri, 22 Dec 2017 15:29:44 GMT</pubDate>
|
||||
<media:thumbnail width="976" height="549" url="http://c.files.bbci.co.uk/1331F/production/_99332687_gettyimages-882119996.jpg"/>
|
||||
</item>
|
||||
<item>
|
||||
<title><![CDATA[پاکستان امریکہ تعلقات، بیان بدلتے ہیں لیکن عینک نہیں]]></title>
|
||||
<description><![CDATA[پاکستان اور امریکہ کے نرم گرم تعلقات کی تاریخ صرف افغانستان تک محدود نہیں، لیکن حالیہ برسوں میں دونوں اکثر ایک دوسرے کو ایک ہی عینک سے دیکھنے کی کوشش کرتے ہیں۔]]></description>
|
||||
<link>http://www.bbc.com/urdu/pakistan-42225606</link>
|
||||
<guid isPermaLink="true">http://www.bbc.com/urdu/pakistan-42225606</guid>
|
||||
<pubDate>Mon, 04 Dec 2017 13:36:14 GMT</pubDate>
|
||||
<media:thumbnail width="976" height="549" url="http://c.files.bbci.co.uk/9281/production/_99050573__98456547_d3ee46a1-51a1-48b1-a24f-9a41cf21361e.jpg"/>
|
||||
</item>
|
||||
<item>
|
||||
<title><![CDATA[پاکستان امریکہ تعلقات: ’باتیں دھمکی کی زبان سے نہیں صلح کی زبان سے طے ہوں گی‘]]></title>
|
||||
<description><![CDATA[پاکستان کے وزیر خارجہ خواجہ آصف نے کہا ہے کہ امریکہ اور پاکستان کے درمیان اعتماد کا فقدان راتوں رات ختم نہیں ہو گا کیونکہ دونوں ممالک کے تعلقات پر جمی برف پگھلنے میں وقت لگے گا۔]]></description>
|
||||
<link>http://www.bbc.com/urdu/pakistan-41742387</link>
|
||||
<guid isPermaLink="true">http://www.bbc.com/urdu/pakistan-41742387</guid>
|
||||
<pubDate>Wed, 25 Oct 2017 01:19:34 GMT</pubDate>
|
||||
<media:thumbnail width="976" height="549" url="http://c.files.bbci.co.uk/15BE/production/_98466550_844059008.jpg"/>
|
||||
</item>
|
||||
<item>
|
||||
<title><![CDATA[امریکہ کے ساتھ تعاون ختم کرنے پر غور کیا جائے: قرارداد]]></title>
|
||||
<description><![CDATA[پاکستان کی پارلیمان نے ایک متفقہ قرارداد میں امریکی صدر کی حالیہ تقریراور افغانستان میں امریکی کمانڈر جنرل جان نکلسن کے بیان کو مسترد کر دیا ہے۔]]></description>
|
||||
<link>http://www.bbc.com/urdu/pakistan-41097529</link>
|
||||
<guid isPermaLink="true">http://www.bbc.com/urdu/pakistan-41097529</guid>
|
||||
<pubDate>Wed, 30 Aug 2017 16:19:55 GMT</pubDate>
|
||||
<media:thumbnail width="640" height="360" url="http://c.files.bbci.co.uk/8C50/production/_97602953_3ad0abf1-2480-41b0-bc0a-9d89393c71e4.jpg"/>
|
||||
</item>
|
||||
<item>
|
||||
<title><![CDATA[’پاکستان کو قربانی کا بکرا بناکر افغانستان میں امن نہیں لایا جاسکتا‘]]></title>
|
||||
<description><![CDATA[پاکستان کی اعلیٰ سیاسی اور عسکری قیادت نے امریکی صدر ڈونلڈ ٹرمپ کی جانب سے اپنے حالیہ خطاب میں پاکستان پر لگائے گئے الزامات کو مسترد کرتے ہوئے کہا ہے کہ پاکستان پر الزام تراشیوں سے افغانستان کو مستحکم نہیں کیا جا سکتا۔]]></description>
|
||||
<link>http://www.bbc.com/urdu/pakistan-41038882</link>
|
||||
<guid isPermaLink="true">http://www.bbc.com/urdu/pakistan-41038882</guid>
|
||||
<pubDate>Thu, 24 Aug 2017 14:35:50 GMT</pubDate>
|
||||
<media:thumbnail width="640" height="360" url="http://c.files.bbci.co.uk/38EA/production/_97507541_gettyimages-831217164.jpg"/>
|
||||
</item>
|
||||
<item>
|
||||
<title><![CDATA[امریکہ سے امداد کے نہیں اعتماد کے خواہاں ہیں: جنرل باجوہ]]></title>
|
||||
<description><![CDATA[پاکستان کے آرمی چیف قمر جاوید باجوہ نے کہا ہے کہ پاکستان امریکہ سے کسی مادی یا مالی امداد کا خواہاں نہیں بلکہ چاہتا ہے کہ اس پر اعتماد کرتے ہوئے اس کی کارکردگی کا اعتراف کیا جائے۔]]></description>
|
||||
<link>http://www.bbc.com/urdu/pakistan-41024731</link>
|
||||
<guid isPermaLink="true">http://www.bbc.com/urdu/pakistan-41024731</guid>
|
||||
<pubDate>Wed, 23 Aug 2017 13:11:20 GMT</pubDate>
|
||||
<media:thumbnail width="976" height="549" url="http://c.files.bbci.co.uk/E6B3/production/_97495095_dh6cwc3xuaawzfm.jpg"/>
|
||||
</item>
|
||||
<item>
|
||||
<title><![CDATA[’پاکستان نے اپنے رویے کو تبدیل نہ کیا تو امریکی مراعات کھو سکتا ہے‘]]></title>
|
||||
<description><![CDATA[امریکی وزیر خارجہ ریکس ٹیلرسن نے افغان طالبان کی مبینہ حمایت پر کہا ہے کہ اگر پاکستان اپنے رویے میں تبدیلی لانے میں ناکام رہتا ہے تو امریکی مراعات کھو سکتا ہے جبکہ پاکستان نے کہا ہے کہ امریکہ محفوظ پناہ گاہوں کے جھوٹے بیانیے کے بجائے دہشت گردی کے خلاف مل کر کام کرے۔]]></description>
|
||||
<link>http://www.bbc.com/urdu/pakistan-41019799</link>
|
||||
<guid isPermaLink="true">http://www.bbc.com/urdu/pakistan-41019799</guid>
|
||||
<pubDate>Tue, 22 Aug 2017 23:06:07 GMT</pubDate>
|
||||
<media:thumbnail width="640" height="360" url="http://c.files.bbci.co.uk/1710E/production/_97487449_gettyimages-825293218.jpg"/>
|
||||
</item>
|
||||
<item>
|
||||
<title><![CDATA[امریکہ کا پاکستان کو ’نوٹس‘ کیا اور کتنا سنگین ہے؟]]></title>
|
||||
<description><![CDATA[امریکی صدر ڈونلڈ ٹرمپ نے ٹویٹ کے ذریعے اپنے ارادے تو ظاہر کر دیے ہیں لیکن دیکھنا یہ ہے کیا امریکہ کی دھمکی محض افغان طالبان تک محدود ہے یا پھر انڈیا مخالف گروپس بھی اس میں شامل ہوں گے۔]]></description>
|
||||
<link>http://www.bbc.com/urdu/pakistan-42550677</link>
|
||||
<guid isPermaLink="true">http://www.bbc.com/urdu/pakistan-42550677</guid>
|
||||
<pubDate>Wed, 03 Jan 2018 07:47:12 GMT</pubDate>
|
||||
<media:thumbnail width="976" height="549" url="http://c.files.bbci.co.uk/14B12/production/_99445748_18215753-eb0c-4c5d-b4b7-06aa87bfcd39.jpg"/>
|
||||
</item>
|
||||
<item>
|
||||
<title><![CDATA[کمال ہے، اتنی دہشت؟]]></title>
|
||||
<description><![CDATA[مجھے تو تینتیس ارب ڈالر کے پاکستانی روپے ہی بنانے نہیں آ ت ، بھیا ٹرمپ! ہم سے حساب کیا مانگتے ہو؟ جن کو دیا تھا صاف صاف ان کا نام لو یا تم بھی غائب ہونے سے ڈرتے ہو؟]]></description>
|
||||
<link>http://www.bbc.com/urdu/pakistan-42594820</link>
|
||||
<guid isPermaLink="true">http://www.bbc.com/urdu/pakistan-42594820</guid>
|
||||
<pubDate>Sun, 07 Jan 2018 03:49:01 GMT</pubDate>
|
||||
<media:thumbnail width="976" height="549" url="http://c.files.bbci.co.uk/5F27/production/_99495342_cf7d6805-64db-4438-8fe7-85fd61e470cf.jpg"/>
|
||||
</item>
|
||||
<item>
|
||||
<title><![CDATA[کیا امداد بند کرنے سے امریکہ کا مقصد پورا ہو گا؟]]></title>
|
||||
<description><![CDATA[امریکہ کی جانب سے پاکستان کی عسکری امداد بند کیے جانے پر ماہرین کا کہنا ہے کہ اس اقدام سے کمزور اقتصادی صورتحال میں پاکستان پر دباؤ پڑے گا اور دہشت گردی کے خلاف جنگ بھی متاثر ہو گئی۔]]></description>
|
||||
<link>http://www.bbc.com/urdu/pakistan-42575493</link>
|
||||
<guid isPermaLink="true">http://www.bbc.com/urdu/pakistan-42575493</guid>
|
||||
<pubDate>Fri, 05 Jan 2018 08:27:04 GMT</pubDate>
|
||||
<media:thumbnail width="976" height="549" url="http://c.files.bbci.co.uk/D4AF/production/_99474445_gettyimages-482348300.jpg"/>
|
||||
</item>
|
||||
<item>
|
||||
<title><![CDATA[وہ تازیانے لگے ہوش سب ٹھکانے لگے]]></title>
|
||||
<description><![CDATA[وہ قوم جو 30 برس پہلے تک ناک پے مکھی نہیں بیٹھنے دیتی تھی اس کا یہ حال ہوگیا کہ چابک چھوڑ چابک کے سائے سے بھی ڈر جاتی ہے۔]]></description>
|
||||
<link>http://www.bbc.com/urdu/42596931</link>
|
||||
<guid isPermaLink="true">http://www.bbc.com/urdu/42596931</guid>
|
||||
<pubDate>Sun, 07 Jan 2018 12:37:26 GMT</pubDate>
|
||||
<media:thumbnail width="976" height="549" url="http://c.files.bbci.co.uk/863B/production/_99036343_batsebat.jpg"/>
|
||||
</item>
|
||||
<item>
|
||||
<title><![CDATA[امریکہ پاکستان سے چاہتا کیا ہے؟]]></title>
|
||||
<description><![CDATA[امریکی وزیر خارجہ ریکس ٹلرسن پاکستان کے مختصر دورے پر اسلام آباد پہنچ گئے جہاں وہ چند ’مخصوص‘ مطالبات بھی پیش کریں گے۔ آخر امریکہ پاکستان سے چاہتا کیا ہے اور یہ مطالبات کیا ہو سکتے ہیں؟]]></description>
|
||||
<link>http://www.bbc.com/urdu/pakistan-41736761</link>
|
||||
<guid isPermaLink="true">http://www.bbc.com/urdu/pakistan-41736761</guid>
|
||||
<pubDate>Tue, 24 Oct 2017 12:53:37 GMT</pubDate>
|
||||
<media:thumbnail width="976" height="549" url="http://c.files.bbci.co.uk/12345/production/_98456547_d3ee46a1-51a1-48b1-a24f-9a41cf21361e.jpg"/>
|
||||
</item>
|
||||
<item>
|
||||
<title><![CDATA[پاکستان امریکہ تعلقات میں ’ڈو مور‘ کا نیا ایڈیشن جو آج تک چل رہا]]></title>
|
||||
<description><![CDATA[افغانستان میں بین الاقوامی سیاست اور ترجیحات سمجھنے کے لیے یہ جاننا ضروری ہے کہ 2001 میں امریکی آمد کے بعد سے بعض ایسی چیزیں ہیں جو تبدیل نہیں ہو رہیں۔]]></description>
|
||||
<link>http://www.bbc.com/urdu/pakistan-42422392</link>
|
||||
<guid isPermaLink="true">http://www.bbc.com/urdu/pakistan-42422392</guid>
|
||||
<pubDate>Wed, 20 Dec 2017 12:23:23 GMT</pubDate>
|
||||
<media:thumbnail width="976" height="549" url="http://c.files.bbci.co.uk/7CB9/production/_99292913_gettyimages-884411870.jpg"/>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>
|
346
internal/reader/processor/processor.go
Normal file
346
internal/reader/processor/processor.go
Normal file
|
@ -0,0 +1,346 @@
|
|||
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
package processor
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"math"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
"unicode/utf8"
|
||||
|
||||
"miniflux.app/v2/internal/integration"
|
||||
|
||||
"miniflux.app/v2/internal/config"
|
||||
"miniflux.app/v2/internal/http/client"
|
||||
"miniflux.app/v2/internal/logger"
|
||||
"miniflux.app/v2/internal/metric"
|
||||
"miniflux.app/v2/internal/model"
|
||||
"miniflux.app/v2/internal/reader/browser"
|
||||
"miniflux.app/v2/internal/reader/rewrite"
|
||||
"miniflux.app/v2/internal/reader/sanitizer"
|
||||
"miniflux.app/v2/internal/reader/scraper"
|
||||
"miniflux.app/v2/internal/storage"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
"github.com/rylans/getlang"
|
||||
)
|
||||
|
||||
var (
|
||||
youtubeRegex = regexp.MustCompile(`youtube\.com/watch\?v=(.*)`)
|
||||
odyseeRegex = regexp.MustCompile(`^https://odysee\.com`)
|
||||
iso8601Regex = regexp.MustCompile(`^P((?P<year>\d+)Y)?((?P<month>\d+)M)?((?P<week>\d+)W)?((?P<day>\d+)D)?(T((?P<hour>\d+)H)?((?P<minute>\d+)M)?((?P<second>\d+)S)?)?$`)
|
||||
customReplaceRuleRegex = regexp.MustCompile(`rewrite\("(.*)"\|"(.*)"\)`)
|
||||
)
|
||||
|
||||
// ProcessFeedEntries downloads original web page for entries and apply filters.
|
||||
func ProcessFeedEntries(store *storage.Storage, feed *model.Feed, user *model.User, forceRefresh bool) {
|
||||
var filteredEntries model.Entries
|
||||
|
||||
// array used for bulk push
|
||||
entriesToPush := model.Entries{}
|
||||
|
||||
// Process older entries first
|
||||
for i := len(feed.Entries) - 1; i >= 0; i-- {
|
||||
entry := feed.Entries[i]
|
||||
|
||||
logger.Debug("[Processor] Processing entry %q from feed %q", entry.URL, feed.FeedURL)
|
||||
|
||||
if isBlockedEntry(feed, entry) || !isAllowedEntry(feed, entry) {
|
||||
continue
|
||||
}
|
||||
|
||||
url := getUrlFromEntry(feed, entry)
|
||||
entryIsNew := !store.EntryURLExists(feed.ID, entry.URL)
|
||||
if feed.Crawler && (entryIsNew || forceRefresh) {
|
||||
logger.Debug("[Processor] Crawling entry %q from feed %q", url, feed.FeedURL)
|
||||
|
||||
startTime := time.Now()
|
||||
content, scraperErr := scraper.Fetch(
|
||||
url,
|
||||
feed.ScraperRules,
|
||||
feed.UserAgent,
|
||||
feed.Cookie,
|
||||
feed.AllowSelfSignedCertificates,
|
||||
feed.FetchViaProxy,
|
||||
)
|
||||
|
||||
if config.Opts.HasMetricsCollector() {
|
||||
status := "success"
|
||||
if scraperErr != nil {
|
||||
status = "error"
|
||||
}
|
||||
metric.ScraperRequestDuration.WithLabelValues(status).Observe(time.Since(startTime).Seconds())
|
||||
}
|
||||
|
||||
if scraperErr != nil {
|
||||
logger.Error(`[Processor] Unable to crawl this entry: %q => %v`, entry.URL, scraperErr)
|
||||
} else if content != "" {
|
||||
// We replace the entry content only if the scraper doesn't return any error.
|
||||
entry.Content = content
|
||||
}
|
||||
}
|
||||
|
||||
rewrite.Rewriter(url, entry, feed.RewriteRules)
|
||||
|
||||
// The sanitizer should always run at the end of the process to make sure unsafe HTML is filtered.
|
||||
entry.Content = sanitizer.Sanitize(url, entry.Content)
|
||||
|
||||
if entryIsNew {
|
||||
intg, err := store.Integration(feed.UserID)
|
||||
if err != nil {
|
||||
logger.Error("[Processor] Get integrations for user %d failed: %v; the refresh process will go on, but no integrations will run this time.", feed.UserID, err)
|
||||
} else if intg != nil {
|
||||
localEntry := entry
|
||||
go func() {
|
||||
integration.PushEntry(localEntry, intg)
|
||||
}()
|
||||
entriesToPush = append(entriesToPush, localEntry)
|
||||
}
|
||||
}
|
||||
|
||||
updateEntryReadingTime(store, feed, entry, entryIsNew, user)
|
||||
filteredEntries = append(filteredEntries, entry)
|
||||
}
|
||||
|
||||
intg, err := store.Integration(feed.UserID)
|
||||
if err != nil {
|
||||
logger.Error("[Processor] Get integrations for user %d failed: %v; the refresh process will go on, but no integrations will run this time.", feed.UserID, err)
|
||||
} else if intg != nil && len(entriesToPush) > 0 {
|
||||
go func() {
|
||||
integration.PushEntries(entriesToPush, intg)
|
||||
}()
|
||||
}
|
||||
|
||||
feed.Entries = filteredEntries
|
||||
}
|
||||
|
||||
func isBlockedEntry(feed *model.Feed, entry *model.Entry) bool {
|
||||
if feed.BlocklistRules != "" {
|
||||
match, _ := regexp.MatchString(feed.BlocklistRules, entry.Title)
|
||||
if match {
|
||||
logger.Debug("[Processor] Blocking entry %q from feed %q based on rule %q", entry.Title, feed.FeedURL, feed.BlocklistRules)
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func isAllowedEntry(feed *model.Feed, entry *model.Entry) bool {
|
||||
if feed.KeeplistRules != "" {
|
||||
match, _ := regexp.MatchString(feed.KeeplistRules, entry.Title)
|
||||
if match {
|
||||
logger.Debug("[Processor] Allow entry %q from feed %q based on rule %q", entry.Title, feed.FeedURL, feed.KeeplistRules)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// ProcessEntryWebPage downloads the entry web page and apply rewrite rules.
|
||||
func ProcessEntryWebPage(feed *model.Feed, entry *model.Entry, user *model.User) error {
|
||||
startTime := time.Now()
|
||||
url := getUrlFromEntry(feed, entry)
|
||||
|
||||
content, scraperErr := scraper.Fetch(
|
||||
url,
|
||||
entry.Feed.ScraperRules,
|
||||
entry.Feed.UserAgent,
|
||||
entry.Feed.Cookie,
|
||||
feed.AllowSelfSignedCertificates,
|
||||
feed.FetchViaProxy,
|
||||
)
|
||||
|
||||
if config.Opts.HasMetricsCollector() {
|
||||
status := "success"
|
||||
if scraperErr != nil {
|
||||
status = "error"
|
||||
}
|
||||
metric.ScraperRequestDuration.WithLabelValues(status).Observe(time.Since(startTime).Seconds())
|
||||
}
|
||||
|
||||
if scraperErr != nil {
|
||||
return scraperErr
|
||||
}
|
||||
|
||||
if content != "" {
|
||||
entry.Content = content
|
||||
entry.ReadingTime = calculateReadingTime(content, user)
|
||||
}
|
||||
|
||||
rewrite.Rewriter(url, entry, entry.Feed.RewriteRules)
|
||||
entry.Content = sanitizer.Sanitize(url, entry.Content)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func getUrlFromEntry(feed *model.Feed, entry *model.Entry) string {
|
||||
var url = entry.URL
|
||||
if feed.UrlRewriteRules != "" {
|
||||
parts := customReplaceRuleRegex.FindStringSubmatch(feed.UrlRewriteRules)
|
||||
|
||||
if len(parts) >= 3 {
|
||||
re := regexp.MustCompile(parts[1])
|
||||
url = re.ReplaceAllString(entry.URL, parts[2])
|
||||
logger.Debug(`[Processor] Rewriting entry URL %s to %s`, entry.URL, url)
|
||||
} else {
|
||||
logger.Debug("[Processor] Cannot find search and replace terms for replace rule %s", feed.UrlRewriteRules)
|
||||
}
|
||||
}
|
||||
return url
|
||||
}
|
||||
|
||||
func updateEntryReadingTime(store *storage.Storage, feed *model.Feed, entry *model.Entry, entryIsNew bool, user *model.User) {
|
||||
if shouldFetchYouTubeWatchTime(entry) {
|
||||
if entryIsNew {
|
||||
watchTime, err := fetchYouTubeWatchTime(entry.URL)
|
||||
if err != nil {
|
||||
logger.Error("[Processor] Unable to fetch YouTube watch time: %q => %v", entry.URL, err)
|
||||
}
|
||||
entry.ReadingTime = watchTime
|
||||
} else {
|
||||
entry.ReadingTime = store.GetReadTime(entry, feed)
|
||||
}
|
||||
}
|
||||
|
||||
if shouldFetchOdyseeWatchTime(entry) {
|
||||
if entryIsNew {
|
||||
watchTime, err := fetchOdyseeWatchTime(entry.URL)
|
||||
if err != nil {
|
||||
logger.Error("[Processor] Unable to fetch Odysee watch time: %q => %v", entry.URL, err)
|
||||
}
|
||||
entry.ReadingTime = watchTime
|
||||
} else {
|
||||
entry.ReadingTime = store.GetReadTime(entry, feed)
|
||||
}
|
||||
}
|
||||
// Handle YT error case and non-YT entries.
|
||||
if entry.ReadingTime == 0 {
|
||||
entry.ReadingTime = calculateReadingTime(entry.Content, user)
|
||||
}
|
||||
}
|
||||
|
||||
func shouldFetchYouTubeWatchTime(entry *model.Entry) bool {
|
||||
if !config.Opts.FetchYouTubeWatchTime() {
|
||||
return false
|
||||
}
|
||||
matches := youtubeRegex.FindStringSubmatch(entry.URL)
|
||||
urlMatchesYouTubePattern := len(matches) == 2
|
||||
return urlMatchesYouTubePattern
|
||||
}
|
||||
|
||||
func shouldFetchOdyseeWatchTime(entry *model.Entry) bool {
|
||||
if !config.Opts.FetchOdyseeWatchTime() {
|
||||
return false
|
||||
}
|
||||
matches := odyseeRegex.FindStringSubmatch(entry.URL)
|
||||
return matches != nil
|
||||
}
|
||||
|
||||
func fetchYouTubeWatchTime(url string) (int, error) {
|
||||
clt := client.NewClientWithConfig(url, config.Opts)
|
||||
response, browserErr := browser.Exec(clt)
|
||||
if browserErr != nil {
|
||||
return 0, browserErr
|
||||
}
|
||||
|
||||
doc, docErr := goquery.NewDocumentFromReader(response.Body)
|
||||
if docErr != nil {
|
||||
return 0, docErr
|
||||
}
|
||||
|
||||
durs, exists := doc.Find(`meta[itemprop="duration"]`).First().Attr("content")
|
||||
if !exists {
|
||||
return 0, errors.New("duration has not found")
|
||||
}
|
||||
|
||||
dur, err := parseISO8601(durs)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("unable to parse duration %s: %v", durs, err)
|
||||
}
|
||||
|
||||
return int(dur.Minutes()), nil
|
||||
}
|
||||
|
||||
func fetchOdyseeWatchTime(url string) (int, error) {
|
||||
clt := client.NewClientWithConfig(url, config.Opts)
|
||||
response, browserErr := browser.Exec(clt)
|
||||
if browserErr != nil {
|
||||
return 0, browserErr
|
||||
}
|
||||
|
||||
doc, docErr := goquery.NewDocumentFromReader(response.Body)
|
||||
if docErr != nil {
|
||||
return 0, docErr
|
||||
}
|
||||
|
||||
durs, exists := doc.Find(`meta[property="og:video:duration"]`).First().Attr("content")
|
||||
// durs contains video watch time in seconds
|
||||
if !exists {
|
||||
return 0, errors.New("duration has not found")
|
||||
}
|
||||
|
||||
dur, err := strconv.ParseInt(durs, 10, 64)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("unable to parse duration %s: %v", durs, err)
|
||||
}
|
||||
|
||||
return int(dur / 60), nil
|
||||
}
|
||||
|
||||
// parseISO8601 parses an ISO 8601 duration string.
|
||||
func parseISO8601(from string) (time.Duration, error) {
|
||||
var match []string
|
||||
var d time.Duration
|
||||
|
||||
if iso8601Regex.MatchString(from) {
|
||||
match = iso8601Regex.FindStringSubmatch(from)
|
||||
} else {
|
||||
return 0, errors.New("could not parse duration string")
|
||||
}
|
||||
|
||||
for i, name := range iso8601Regex.SubexpNames() {
|
||||
part := match[i]
|
||||
if i == 0 || name == "" || part == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
val, err := strconv.ParseInt(part, 10, 64)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
switch name {
|
||||
case "hour":
|
||||
d = d + (time.Duration(val) * time.Hour)
|
||||
case "minute":
|
||||
d = d + (time.Duration(val) * time.Minute)
|
||||
case "second":
|
||||
d = d + (time.Duration(val) * time.Second)
|
||||
default:
|
||||
return 0, fmt.Errorf("unknown field %s", name)
|
||||
}
|
||||
}
|
||||
|
||||
return d, nil
|
||||
}
|
||||
|
||||
func calculateReadingTime(content string, user *model.User) int {
|
||||
sanitizedContent := sanitizer.StripTags(content)
|
||||
languageInfo := getlang.FromString(sanitizedContent)
|
||||
|
||||
var timeToReadInt int
|
||||
if languageInfo.LanguageCode() == "ko" || languageInfo.LanguageCode() == "zh" || languageInfo.LanguageCode() == "jp" {
|
||||
timeToReadInt = int(math.Ceil(float64(utf8.RuneCountInString(sanitizedContent)) / float64(user.CJKReadingSpeed)))
|
||||
} else {
|
||||
nbOfWords := len(strings.Fields(sanitizedContent))
|
||||
timeToReadInt = int(math.Ceil(float64(nbOfWords) / float64(user.DefaultReadingSpeed)))
|
||||
}
|
||||
|
||||
return timeToReadInt
|
||||
}
|
78
internal/reader/processor/processor_test.go
Normal file
78
internal/reader/processor/processor_test.go
Normal file
|
@ -0,0 +1,78 @@
|
|||
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
package processor // import "miniflux.app/v2/internal/reader/processor"
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"miniflux.app/v2/internal/model"
|
||||
)
|
||||
|
||||
func TestBlockingEntries(t *testing.T) {
|
||||
var scenarios = []struct {
|
||||
feed *model.Feed
|
||||
entry *model.Entry
|
||||
expected bool
|
||||
}{
|
||||
{&model.Feed{ID: 1, BlocklistRules: "(?i)example"}, &model.Entry{Title: "Some Example"}, true},
|
||||
{&model.Feed{ID: 1, BlocklistRules: "(?i)example"}, &model.Entry{Title: "Something different"}, false},
|
||||
{&model.Feed{ID: 1}, &model.Entry{Title: "No rule defined"}, false},
|
||||
}
|
||||
|
||||
for _, tc := range scenarios {
|
||||
result := isBlockedEntry(tc.feed, tc.entry)
|
||||
if tc.expected != result {
|
||||
t.Errorf(`Unexpected result, got %v for entry %q`, result, tc.entry.Title)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestAllowEntries(t *testing.T) {
|
||||
var scenarios = []struct {
|
||||
feed *model.Feed
|
||||
entry *model.Entry
|
||||
expected bool
|
||||
}{
|
||||
{&model.Feed{ID: 1, KeeplistRules: "(?i)example"}, &model.Entry{Title: "Some Example"}, true},
|
||||
{&model.Feed{ID: 1, KeeplistRules: "(?i)example"}, &model.Entry{Title: "Something different"}, false},
|
||||
{&model.Feed{ID: 1}, &model.Entry{Title: "No rule defined"}, true},
|
||||
}
|
||||
|
||||
for _, tc := range scenarios {
|
||||
result := isAllowedEntry(tc.feed, tc.entry)
|
||||
if tc.expected != result {
|
||||
t.Errorf(`Unexpected result, got %v for entry %q`, result, tc.entry.Title)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseISO8601(t *testing.T) {
|
||||
var scenarios = []struct {
|
||||
duration string
|
||||
expected time.Duration
|
||||
}{
|
||||
// Live streams and radio.
|
||||
{"PT0M0S", 0},
|
||||
// https://www.youtube.com/watch?v=HLrqNhgdiC0
|
||||
{"PT6M20S", (6 * time.Minute) + (20 * time.Second)},
|
||||
// https://www.youtube.com/watch?v=LZa5KKfqHtA
|
||||
{"PT5M41S", (5 * time.Minute) + (41 * time.Second)},
|
||||
// https://www.youtube.com/watch?v=yIxEEgEuhT4
|
||||
{"PT51M52S", (51 * time.Minute) + (52 * time.Second)},
|
||||
// https://www.youtube.com/watch?v=bpHf1XcoiFs
|
||||
{"PT80M42S", (1 * time.Hour) + (20 * time.Minute) + (42 * time.Second)},
|
||||
}
|
||||
|
||||
for _, tc := range scenarios {
|
||||
result, err := parseISO8601(tc.duration)
|
||||
if err != nil {
|
||||
t.Errorf("Got an error when parsing %q: %v", tc.duration, err)
|
||||
}
|
||||
|
||||
if tc.expected != result {
|
||||
t.Errorf(`Unexpected result, got %v for duration %q`, result, tc.duration)
|
||||
}
|
||||
}
|
||||
}
|
16
internal/reader/rdf/dublincore.go
Normal file
16
internal/reader/rdf/dublincore.go
Normal file
|
@ -0,0 +1,16 @@
|
|||
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
package rdf // import "miniflux.app/v2/internal/reader/rdf"
|
||||
|
||||
// DublinCoreFeedElement represents Dublin Core feed XML elements.
|
||||
type DublinCoreFeedElement struct {
|
||||
DublinCoreCreator string `xml:"http://purl.org/dc/elements/1.1/ channel>creator"`
|
||||
}
|
||||
|
||||
// DublinCoreEntryElement represents Dublin Core entry XML elements.
|
||||
type DublinCoreEntryElement struct {
|
||||
DublinCoreDate string `xml:"http://purl.org/dc/elements/1.1/ date"`
|
||||
DublinCoreCreator string `xml:"http://purl.org/dc/elements/1.1/ creator"`
|
||||
DublinCoreContent string `xml:"http://purl.org/rss/1.0/modules/content/ encoded"`
|
||||
}
|
24
internal/reader/rdf/parser.go
Normal file
24
internal/reader/rdf/parser.go
Normal file
|
@ -0,0 +1,24 @@
|
|||
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
package rdf // import "miniflux.app/v2/internal/reader/rdf"
|
||||
|
||||
import (
|
||||
"io"
|
||||
|
||||
"miniflux.app/v2/internal/errors"
|
||||
"miniflux.app/v2/internal/model"
|
||||
"miniflux.app/v2/internal/reader/xml"
|
||||
)
|
||||
|
||||
// Parse returns a normalized feed struct from a RDF feed.
|
||||
func Parse(baseURL string, data io.Reader) (*model.Feed, *errors.LocalizedError) {
|
||||
feed := new(rdfFeed)
|
||||
decoder := xml.NewDecoder(data)
|
||||
err := decoder.Decode(feed)
|
||||
if err != nil {
|
||||
return nil, errors.NewLocalizedError("Unable to parse RDF feed: %q", err)
|
||||
}
|
||||
|
||||
return feed.Transform(baseURL), nil
|
||||
}
|
578
internal/reader/rdf/parser_test.go
Normal file
578
internal/reader/rdf/parser_test.go
Normal file
|
@ -0,0 +1,578 @@
|
|||
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
package rdf // import "miniflux.app/v2/internal/reader/rdf"
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestParseRDFSample(t *testing.T) {
|
||||
data := `
|
||||
<?xml version="1.0"?>
|
||||
|
||||
<rdf:RDF
|
||||
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||
xmlns="http://purl.org/rss/1.0/"
|
||||
>
|
||||
|
||||
<channel rdf:about="http://www.xml.com/xml/news.rss">
|
||||
<title>XML.com</title>
|
||||
<link>http://xml.com/pub</link>
|
||||
<description>
|
||||
XML.com features a rich mix of information and services
|
||||
for the XML community.
|
||||
</description>
|
||||
|
||||
<image rdf:resource="http://xml.com/universal/images/xml_tiny.gif" />
|
||||
|
||||
<items>
|
||||
<rdf:Seq>
|
||||
<rdf:li resource="http://xml.com/pub/2000/08/09/xslt/xslt.html" />
|
||||
<rdf:li resource="http://xml.com/pub/2000/08/09/rdfdb/index.html" />
|
||||
</rdf:Seq>
|
||||
</items>
|
||||
|
||||
<textinput rdf:resource="http://search.xml.com" />
|
||||
|
||||
</channel>
|
||||
|
||||
<image rdf:about="http://xml.com/universal/images/xml_tiny.gif">
|
||||
<title>XML.com</title>
|
||||
<link>http://www.xml.com</link>
|
||||
<url>http://xml.com/universal/images/xml_tiny.gif</url>
|
||||
</image>
|
||||
|
||||
<item rdf:about="http://xml.com/pub/2000/08/09/xslt/xslt.html">
|
||||
<title>Processing Inclusions with XSLT</title>
|
||||
<link>http://xml.com/pub/2000/08/09/xslt/xslt.html</link>
|
||||
<description>
|
||||
Processing document inclusions with general XML tools can be
|
||||
problematic. This article proposes a way of preserving inclusion
|
||||
information through SAX-based processing.
|
||||
</description>
|
||||
</item>
|
||||
|
||||
<item rdf:about="http://xml.com/pub/2000/08/09/rdfdb/index.html">
|
||||
<title>Putting RDF to Work</title>
|
||||
<link>http://xml.com/pub/2000/08/09/rdfdb/index.html</link>
|
||||
<description>
|
||||
Tool and API support for the Resource Description Framework
|
||||
is slowly coming of age. Edd Dumbill takes a look at RDFDB,
|
||||
one of the most exciting new RDF toolkits.
|
||||
</description>
|
||||
</item>
|
||||
|
||||
<textinput rdf:about="http://search.xml.com">
|
||||
<title>Search XML.com</title>
|
||||
<description>Search XML.com's XML collection</description>
|
||||
<name>s</name>
|
||||
<link>http://search.xml.com</link>
|
||||
</textinput>
|
||||
|
||||
</rdf:RDF>`
|
||||
|
||||
feed, err := Parse("http://xml.com/pub/rdf.xml", bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if feed.Title != "XML.com" {
|
||||
t.Errorf("Incorrect title, got: %s", feed.Title)
|
||||
}
|
||||
|
||||
if feed.FeedURL != "http://xml.com/pub/rdf.xml" {
|
||||
t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
|
||||
}
|
||||
|
||||
if feed.SiteURL != "http://xml.com/pub" {
|
||||
t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
|
||||
}
|
||||
|
||||
if len(feed.Entries) != 2 {
|
||||
t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
|
||||
}
|
||||
|
||||
if feed.Entries[1].Hash != "8aaeee5d3ab50351422fbded41078ee88c73bf1441085b16a8c09fd90a7db321" {
|
||||
t.Errorf("Incorrect entry hash, got: %s", feed.Entries[0].Hash)
|
||||
}
|
||||
|
||||
if feed.Entries[1].URL != "http://xml.com/pub/2000/08/09/rdfdb/index.html" {
|
||||
t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
|
||||
}
|
||||
|
||||
if feed.Entries[1].Title != "Putting RDF to Work" {
|
||||
t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
|
||||
}
|
||||
|
||||
if strings.HasSuffix(feed.Entries[1].Content, "Tool and API support") {
|
||||
t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content)
|
||||
}
|
||||
|
||||
if feed.Entries[1].Date.Year() != time.Now().Year() {
|
||||
t.Errorf("Entry date should not be empty")
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseRDFSampleWithDublinCore(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
|
||||
<rdf:RDF
|
||||
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||
xmlns:sy="http://purl.org/rss/1.0/modules/syndication/"
|
||||
xmlns:co="http://purl.org/rss/1.0/modules/company/"
|
||||
xmlns:ti="http://purl.org/rss/1.0/modules/textinput/"
|
||||
xmlns="http://purl.org/rss/1.0/"
|
||||
>
|
||||
|
||||
<channel rdf:about="http://meerkat.oreillynet.com/?_fl=rss1.0">
|
||||
<title>Meerkat</title>
|
||||
<link>http://meerkat.oreillynet.com</link>
|
||||
<description>Meerkat: An Open Wire Service</description>
|
||||
<dc:publisher>The O'Reilly Network</dc:publisher>
|
||||
<dc:creator>Rael Dornfest (mailto:rael@oreilly.com)</dc:creator>
|
||||
<dc:rights>Copyright © 2000 O'Reilly & Associates, Inc.</dc:rights>
|
||||
<dc:date>2000-01-01T12:00+00:00</dc:date>
|
||||
<sy:updatePeriod>hourly</sy:updatePeriod>
|
||||
<sy:updateFrequency>2</sy:updateFrequency>
|
||||
<sy:updateBase>2000-01-01T12:00+00:00</sy:updateBase>
|
||||
|
||||
<image rdf:resource="http://meerkat.oreillynet.com/icons/meerkat-powered.jpg" />
|
||||
|
||||
<items>
|
||||
<rdf:Seq>
|
||||
<rdf:li resource="http://c.moreover.com/click/here.pl?r123" />
|
||||
</rdf:Seq>
|
||||
</items>
|
||||
|
||||
<textinput rdf:resource="http://meerkat.oreillynet.com" />
|
||||
|
||||
</channel>
|
||||
|
||||
<image rdf:about="http://meerkat.oreillynet.com/icons/meerkat-powered.jpg">
|
||||
<title>Meerkat Powered!</title>
|
||||
<url>http://meerkat.oreillynet.com/icons/meerkat-powered.jpg</url>
|
||||
<link>http://meerkat.oreillynet.com</link>
|
||||
</image>
|
||||
|
||||
<item rdf:about="http://c.moreover.com/click/here.pl?r123">
|
||||
<title>XML: A Disruptive Technology</title>
|
||||
<link>http://c.moreover.com/click/here.pl?r123</link>
|
||||
<dc:description>
|
||||
XML is placing increasingly heavy loads on the existing technical
|
||||
infrastructure of the Internet.
|
||||
</dc:description>
|
||||
<dc:publisher>The O'Reilly Network</dc:publisher>
|
||||
<dc:creator>Simon St.Laurent (mailto:simonstl@simonstl.com)</dc:creator>
|
||||
<dc:rights>Copyright © 2000 O'Reilly & Associates, Inc.</dc:rights>
|
||||
<dc:subject>XML</dc:subject>
|
||||
<co:name>XML.com</co:name>
|
||||
<co:market>NASDAQ</co:market>
|
||||
<co:symbol>XML</co:symbol>
|
||||
</item>
|
||||
|
||||
<textinput rdf:about="http://meerkat.oreillynet.com">
|
||||
<title>Search Meerkat</title>
|
||||
<description>Search Meerkat's RSS Database...</description>
|
||||
<name>s</name>
|
||||
<link>http://meerkat.oreillynet.com/</link>
|
||||
<ti:function>search</ti:function>
|
||||
<ti:inputType>regex</ti:inputType>
|
||||
</textinput>
|
||||
|
||||
</rdf:RDF>`
|
||||
|
||||
feed, err := Parse("http://meerkat.oreillynet.com/feed.rdf", bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if feed.Title != "Meerkat" {
|
||||
t.Errorf("Incorrect title, got: %s", feed.Title)
|
||||
}
|
||||
|
||||
if feed.FeedURL != "http://meerkat.oreillynet.com/feed.rdf" {
|
||||
t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
|
||||
}
|
||||
|
||||
if feed.SiteURL != "http://meerkat.oreillynet.com" {
|
||||
t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
|
||||
}
|
||||
|
||||
if len(feed.Entries) != 1 {
|
||||
t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries))
|
||||
}
|
||||
|
||||
if feed.Entries[0].Hash != "fa4ef7c300b175ca66f92f226b5dba5caa2a9619f031101bf56e5b884b02cd97" {
|
||||
t.Errorf("Incorrect entry hash, got: %s", feed.Entries[0].Hash)
|
||||
}
|
||||
|
||||
if feed.Entries[0].URL != "http://c.moreover.com/click/here.pl?r123" {
|
||||
t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Title != "XML: A Disruptive Technology" {
|
||||
t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title)
|
||||
}
|
||||
|
||||
if strings.HasSuffix(feed.Entries[0].Content, "XML is placing increasingly") {
|
||||
t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Author != "Simon St.Laurent (mailto:simonstl@simonstl.com)" {
|
||||
t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseItemWithOnlyFeedAuthor(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
|
||||
<rdf:RDF
|
||||
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||
xmlns="http://purl.org/rss/1.0/"
|
||||
>
|
||||
|
||||
<channel rdf:about="http://meerkat.oreillynet.com/?_fl=rss1.0">
|
||||
<title>Meerkat</title>
|
||||
<link>http://meerkat.oreillynet.com</link>
|
||||
<dc:creator>Rael Dornfest (mailto:rael@oreilly.com)</dc:creator>
|
||||
</channel>
|
||||
|
||||
<item rdf:about="http://c.moreover.com/click/here.pl?r123">
|
||||
<title>XML: A Disruptive Technology</title>
|
||||
<link>http://c.moreover.com/click/here.pl?r123</link>
|
||||
<dc:description>
|
||||
XML is placing increasingly heavy loads on the existing technical
|
||||
infrastructure of the Internet.
|
||||
</dc:description>
|
||||
</item>
|
||||
</rdf:RDF>`
|
||||
|
||||
feed, err := Parse("http://meerkat.oreillynet.com", bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Author != "Rael Dornfest (mailto:rael@oreilly.com)" {
|
||||
t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseItemRelativeURL(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/">
|
||||
<channel>
|
||||
<title>Example</title>
|
||||
<link>http://example.org</link>
|
||||
</channel>
|
||||
|
||||
<item>
|
||||
<title>Title</title>
|
||||
<description>Test</description>
|
||||
<link>something.html</link>
|
||||
</item>
|
||||
</rdf:RDF>`
|
||||
|
||||
feed, err := Parse("http://meerkat.oreillynet.com", bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if feed.Entries[0].URL != "http://example.org/something.html" {
|
||||
t.Errorf("Incorrect entry url, got: %s", feed.Entries[0].URL)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseItemWithoutLink(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
|
||||
<rdf:RDF
|
||||
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||
xmlns="http://purl.org/rss/1.0/"
|
||||
>
|
||||
|
||||
<channel rdf:about="http://meerkat.oreillynet.com/?_fl=rss1.0">
|
||||
<title>Meerkat</title>
|
||||
<link>http://meerkat.oreillynet.com</link>
|
||||
</channel>
|
||||
|
||||
<item rdf:about="http://c.moreover.com/click/here.pl?r123">
|
||||
<title>Title</title>
|
||||
<description>Test</description>
|
||||
</item>
|
||||
</rdf:RDF>`
|
||||
|
||||
feed, err := Parse("http://meerkat.oreillynet.com", bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Hash != "37f5223ebd58639aa62a49afbb61df960efb7dc5db5181dfb3cedd9a49ad34c6" {
|
||||
t.Errorf("Incorrect entry hash, got: %s", feed.Entries[0].Hash)
|
||||
}
|
||||
|
||||
if feed.Entries[0].URL != "http://meerkat.oreillynet.com" {
|
||||
t.Errorf("Incorrect entry url, got: %s", feed.Entries[0].URL)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseItemWithDublicCoreDate(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:slash="http://purl.org/rss/1.0/modules/slash/">
|
||||
<channel>
|
||||
<title>Example</title>
|
||||
<link>http://example.org</link>
|
||||
</channel>
|
||||
|
||||
<item>
|
||||
<title>Title</title>
|
||||
<description>Test</description>
|
||||
<link>http://example.org/test.html</link>
|
||||
<dc:creator>Tester</dc:creator>
|
||||
<dc:date>2018-04-10T05:00:00+00:00</dc:date>
|
||||
</item>
|
||||
</rdf:RDF>`
|
||||
|
||||
feed, err := Parse("http://example.org", bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
expectedDate := time.Date(2018, time.April, 10, 5, 0, 0, 0, time.UTC)
|
||||
if !feed.Entries[0].Date.Equal(expectedDate) {
|
||||
t.Errorf("Incorrect entry date, got: %v, want: %v", feed.Entries[0].Date, expectedDate)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseItemWithoutDate(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/">
|
||||
<channel>
|
||||
<title>Example</title>
|
||||
<link>http://example.org</link>
|
||||
</channel>
|
||||
|
||||
<item>
|
||||
<title>Title</title>
|
||||
<description>Test</description>
|
||||
<link>http://example.org/test.html</link>
|
||||
</item>
|
||||
</rdf:RDF>`
|
||||
|
||||
feed, err := Parse("http://example.org", bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
expectedDate := time.Now().In(time.Local)
|
||||
diff := expectedDate.Sub(feed.Entries[0].Date)
|
||||
if diff > time.Second {
|
||||
t.Errorf("Incorrect entry date, got: %v", diff)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseItemWithEncodedHTMLTitle(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
|
||||
<channel>
|
||||
<title>Example</title>
|
||||
<link>http://example.org</link>
|
||||
</channel>
|
||||
|
||||
<item>
|
||||
<title>AT&amp;T</title>
|
||||
<description>Test</description>
|
||||
<link>http://example.org/test.html</link>
|
||||
</item>
|
||||
</rdf:RDF>`
|
||||
|
||||
feed, err := Parse("http://example.org", bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if feed.Entries[0].Title != `AT&T` {
|
||||
t.Errorf("Incorrect entry title, got: %v", feed.Entries[0].Title)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseInvalidXml(t *testing.T) {
|
||||
data := `garbage`
|
||||
_, err := Parse("http://example.org", bytes.NewBufferString(data))
|
||||
if err == nil {
|
||||
t.Fatal("Parse should returns an error")
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseFeedWithHTMLEntity(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/">
|
||||
<channel>
|
||||
<title>Example Feed</title>
|
||||
<link>http://example.org</link>
|
||||
</channel>
|
||||
</rdf:RDF>`
|
||||
|
||||
feed, err := Parse("http://example.org", bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if feed.Title != "Example \u00a0 Feed" {
|
||||
t.Errorf(`Incorrect title, got: %q`, feed.Title)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseFeedWithInvalidCharacterEntity(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/">
|
||||
<channel>
|
||||
<title>Example Feed</title>
|
||||
<link>http://example.org/a&b</link>
|
||||
</channel>
|
||||
</rdf:RDF>`
|
||||
|
||||
feed, err := Parse("http://example.org", bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if feed.SiteURL != "http://example.org/a&b" {
|
||||
t.Errorf(`Incorrect URL, got: %q`, feed.SiteURL)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseFeedWithURLWrappedInSpaces(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<rdf:RDF xmlns:admin="http://webns.net/mvcb/" xmlns="http://purl.org/rss/1.0/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:prism="http://purl.org/rss/1.0/modules/prism/" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:syn="http://purl.org/rss/1.0/modules/syndication/">
|
||||
<channel rdf:about="http://biorxiv.org">
|
||||
<title>bioRxiv Subject Collection: Bioengineering</title>
|
||||
<link>http://biorxiv.org</link>
|
||||
<description>
|
||||
This feed contains articles for bioRxiv Subject Collection "Bioengineering"
|
||||
</description>
|
||||
<items>
|
||||
<rdf:Seq>
|
||||
<rdf:li rdf:resource="http://biorxiv.org/cgi/content/short/857789v1?rss=1"/>
|
||||
</rdf:Seq>
|
||||
</items>
|
||||
<prism:eIssn/>
|
||||
<prism:publicationName>bioRxiv</prism:publicationName>
|
||||
<prism:issn/>
|
||||
<image rdf:resource=""/>
|
||||
</channel>
|
||||
<image rdf:about="">
|
||||
<title>bioRxiv</title>
|
||||
<url/>
|
||||
<link>http://biorxiv.org</link>
|
||||
</image>
|
||||
<item rdf:about="http://biorxiv.org/cgi/content/short/857789v1?rss=1">
|
||||
<title>
|
||||
<![CDATA[
|
||||
Microscale Collagen and Fibroblast Interactions Enhance Primary Human Hepatocyte Functions in 3-Dimensional Models
|
||||
]]>
|
||||
</title>
|
||||
<link>
|
||||
http://biorxiv.org/cgi/content/short/857789v1?rss=1
|
||||
</link>
|
||||
<description><![CDATA[
|
||||
Human liver models that are 3-dimensional (3D) in architecture are proving to be indispensable for diverse applications, including compound metabolism and toxicity screening during preclinical drug development, to model human liver diseases for the discovery of novel therapeutics, and for cell-based therapies in the clinic; however, further development of such models is needed to maintain high levels of primary human hepatocyte (PHH) functions for weeks to months in vitro. Therefore, here we determined how microscale 3D collagen-I presentation and fibroblast interaction could affect the long-term functions of PHHs. High-throughput droplet microfluidics was utilized to rapidly generate reproducibly-sized (~300 micron diameter) microtissues containing PHHs encapsulated in collagen-I +/- supportive fibroblasts, namely 3T3-J2 murine embryonic fibroblasts or primary human hepatic stellate cells (HSCs); self-assembled spheroids and bulk collagen gels (macrogels) containing PHHs served as gold-standard controls. Hepatic functions (e.g. albumin and cytochrome-P450 or CYP activities) and gene expression were subsequently measured for up to 6 weeks. We found that collagen-based 3D microtissues rescued PHH functions within static multi-well plates at 2- to 30-fold higher levels than self-assembled spheroids or macrogels. Further coating of PHH microtissues with 3T3-J2s led to higher hepatic functions than when the two cell types were either coencapsulated together or when HSCs were used for the coating instead. Additionally, the 3T3-J2-coated PHH microtissues displayed 6+ weeks of relatively stable hepatic gene expression and function at levels similar to freshly thawed PHHs. Lastly, microtissues responded in a clinically-relevant manner to drug-mediated CYP induction or hepatotoxicity. In conclusion, fibroblast-coated collagen microtissues containing PHHs display hepatic functions for 6+ weeks without any fluid perfusion at higher levels than spheroids and macrogels, and such microtissues can be used to assess drug-mediated CYP induction and hepatotoxicity. Ultimately, microtissues may find broader utility for modeling liver diseases and as building blocks for cell-based therapies.
|
||||
]]></description>
|
||||
<dc:creator><![CDATA[ Kukla, D., Crampton, A., Wood, D., Khetani, S. ]]></dc:creator>
|
||||
<dc:date>2019-11-29</dc:date>
|
||||
<dc:identifier>doi:10.1101/857789</dc:identifier>
|
||||
<dc:title><![CDATA[Microscale Collagen and Fibroblast Interactions Enhance Primary Human Hepatocyte Functions in 3-Dimensional Models]]></dc:title>
|
||||
<dc:publisher>Cold Spring Harbor Laboratory</dc:publisher>
|
||||
<prism:publicationDate>2019-11-29</prism:publicationDate>
|
||||
<prism:section></prism:section>
|
||||
</item>
|
||||
</rdf:RDF>`
|
||||
|
||||
feed, err := Parse("http://biorxiv.org", bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if feed.SiteURL != "http://biorxiv.org" {
|
||||
t.Errorf(`Incorrect URL, got: %q`, feed.SiteURL)
|
||||
}
|
||||
|
||||
if len(feed.Entries) != 1 {
|
||||
t.Fatalf(`Unexpected number of entries, got %d`, len(feed.Entries))
|
||||
}
|
||||
|
||||
if feed.Entries[0].URL != `http://biorxiv.org/cgi/content/short/857789v1?rss=1` {
|
||||
t.Errorf(`Unexpected entry URL, got %q`, feed.Entries[0].URL)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseRDFWithContentEncoded(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<rdf:RDF
|
||||
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||
xmlns="http://purl.org/rss/1.0/"
|
||||
xmlns:content="http://purl.org/rss/1.0/modules/content/">
|
||||
<channel>
|
||||
<title>Example Feed</title>
|
||||
<link>http://example.org/</link>
|
||||
</channel>
|
||||
<item>
|
||||
<title>Item Title</title>
|
||||
<link>http://example.org/</link>
|
||||
<content:encoded><![CDATA[<p>Test</p>]]></content:encoded>
|
||||
</item>
|
||||
</rdf:RDF>`
|
||||
|
||||
feed, err := Parse("http://example.org/", bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if len(feed.Entries) != 1 {
|
||||
t.Fatalf(`Unexpected number of entries, got %d`, len(feed.Entries))
|
||||
}
|
||||
|
||||
expected := `<p>Test</p>`
|
||||
result := feed.Entries[0].Content
|
||||
if result != expected {
|
||||
t.Errorf(`Unexpected entry URL, got %q instead of %q`, result, expected)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseRDFWithEncodedHTMLDescription(t *testing.T) {
|
||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||
<rdf:RDF
|
||||
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||
xmlns="http://purl.org/rss/1.0/"
|
||||
xmlns:content="http://purl.org/rss/1.0/modules/content/">
|
||||
<channel>
|
||||
<title>Example Feed</title>
|
||||
<link>http://example.org/</link>
|
||||
</channel>
|
||||
<item>
|
||||
<title>Item Title</title>
|
||||
<link>http://example.org/</link>
|
||||
<description>AT&amp;T <img src="https://example.org/img.png"></a></description>
|
||||
</item>
|
||||
</rdf:RDF>`
|
||||
|
||||
feed, err := Parse("http://example.org/", bytes.NewBufferString(data))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if len(feed.Entries) != 1 {
|
||||
t.Fatalf(`Unexpected number of entries, got %d`, len(feed.Entries))
|
||||
}
|
||||
|
||||
expected := `AT&T <img src="https://example.org/img.png"></a>`
|
||||
result := feed.Entries[0].Content
|
||||
if result != expected {
|
||||
t.Errorf(`Unexpected entry URL, got %v instead of %v`, result, expected)
|
||||
}
|
||||
}
|
119
internal/reader/rdf/rdf.go
Normal file
119
internal/reader/rdf/rdf.go
Normal file
|
@ -0,0 +1,119 @@
|
|||
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
package rdf // import "miniflux.app/v2/internal/reader/rdf"
|
||||
|
||||
import (
|
||||
"encoding/xml"
|
||||
"html"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"miniflux.app/v2/internal/crypto"
|
||||
"miniflux.app/v2/internal/logger"
|
||||
"miniflux.app/v2/internal/model"
|
||||
"miniflux.app/v2/internal/reader/date"
|
||||
"miniflux.app/v2/internal/reader/sanitizer"
|
||||
"miniflux.app/v2/internal/url"
|
||||
)
|
||||
|
||||
type rdfFeed struct {
|
||||
XMLName xml.Name `xml:"RDF"`
|
||||
Title string `xml:"channel>title"`
|
||||
Link string `xml:"channel>link"`
|
||||
Items []rdfItem `xml:"item"`
|
||||
DublinCoreFeedElement
|
||||
}
|
||||
|
||||
func (r *rdfFeed) Transform(baseURL string) *model.Feed {
|
||||
var err error
|
||||
feed := new(model.Feed)
|
||||
feed.Title = sanitizer.StripTags(r.Title)
|
||||
feed.FeedURL = baseURL
|
||||
feed.SiteURL, err = url.AbsoluteURL(baseURL, r.Link)
|
||||
if err != nil {
|
||||
feed.SiteURL = r.Link
|
||||
}
|
||||
|
||||
for _, item := range r.Items {
|
||||
entry := item.Transform()
|
||||
if entry.Author == "" && r.DublinCoreCreator != "" {
|
||||
entry.Author = strings.TrimSpace(r.DublinCoreCreator)
|
||||
}
|
||||
|
||||
if entry.URL == "" {
|
||||
entry.URL = feed.SiteURL
|
||||
} else {
|
||||
entryURL, err := url.AbsoluteURL(feed.SiteURL, entry.URL)
|
||||
if err == nil {
|
||||
entry.URL = entryURL
|
||||
}
|
||||
}
|
||||
|
||||
feed.Entries = append(feed.Entries, entry)
|
||||
}
|
||||
|
||||
return feed
|
||||
}
|
||||
|
||||
type rdfItem struct {
|
||||
Title string `xml:"title"`
|
||||
Link string `xml:"link"`
|
||||
Description string `xml:"description"`
|
||||
DublinCoreEntryElement
|
||||
}
|
||||
|
||||
func (r *rdfItem) Transform() *model.Entry {
|
||||
entry := new(model.Entry)
|
||||
entry.Title = r.entryTitle()
|
||||
entry.Author = r.entryAuthor()
|
||||
entry.URL = r.entryURL()
|
||||
entry.Content = r.entryContent()
|
||||
entry.Hash = r.entryHash()
|
||||
entry.Date = r.entryDate()
|
||||
return entry
|
||||
}
|
||||
|
||||
func (r *rdfItem) entryTitle() string {
|
||||
return html.UnescapeString(strings.TrimSpace(r.Title))
|
||||
}
|
||||
|
||||
func (r *rdfItem) entryContent() string {
|
||||
switch {
|
||||
case r.DublinCoreContent != "":
|
||||
return r.DublinCoreContent
|
||||
default:
|
||||
return r.Description
|
||||
}
|
||||
}
|
||||
|
||||
func (r *rdfItem) entryAuthor() string {
|
||||
return strings.TrimSpace(r.DublinCoreCreator)
|
||||
}
|
||||
|
||||
func (r *rdfItem) entryURL() string {
|
||||
return strings.TrimSpace(r.Link)
|
||||
}
|
||||
|
||||
func (r *rdfItem) entryDate() time.Time {
|
||||
if r.DublinCoreDate != "" {
|
||||
result, err := date.Parse(r.DublinCoreDate)
|
||||
if err != nil {
|
||||
logger.Error("rdf: %v (entry link = %s)", err, r.Link)
|
||||
return time.Now()
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
return time.Now()
|
||||
}
|
||||
|
||||
func (r *rdfItem) entryHash() string {
|
||||
value := r.Link
|
||||
if value == "" {
|
||||
value = r.Title + r.Description
|
||||
}
|
||||
|
||||
return crypto.Hash(value)
|
||||
}
|
305
internal/reader/readability/readability.go
Normal file
305
internal/reader/readability/readability.go
Normal file
|
@ -0,0 +1,305 @@
|
|||
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
package readability // import "miniflux.app/v2/internal/reader/readability"
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"math"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"miniflux.app/v2/internal/logger"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
"golang.org/x/net/html"
|
||||
)
|
||||
|
||||
const (
|
||||
defaultTagsToScore = "section,h2,h3,h4,h5,h6,p,td,pre,div"
|
||||
)
|
||||
|
||||
var (
|
||||
divToPElementsRegexp = regexp.MustCompile(`(?i)<(a|blockquote|dl|div|img|ol|p|pre|table|ul)`)
|
||||
sentenceRegexp = regexp.MustCompile(`\.( |$)`)
|
||||
|
||||
blacklistCandidatesRegexp = regexp.MustCompile(`(?i)popupbody|-ad|g-plus`)
|
||||
okMaybeItsACandidateRegexp = regexp.MustCompile(`(?i)and|article|body|column|main|shadow`)
|
||||
unlikelyCandidatesRegexp = regexp.MustCompile(`(?i)banner|breadcrumbs|combx|comment|community|cover-wrap|disqus|extra|foot|header|legends|menu|modal|related|remark|replies|rss|shoutbox|sidebar|skyscraper|social|sponsor|supplemental|ad-break|agegate|pagination|pager|popup|yom-remote`)
|
||||
|
||||
negativeRegexp = regexp.MustCompile(`(?i)hidden|^hid$|hid$|hid|^hid |banner|combx|comment|com-|contact|foot|footer|footnote|masthead|media|meta|modal|outbrain|promo|related|scroll|share|shoutbox|sidebar|skyscraper|sponsor|shopping|tags|tool|widget|byline|author|dateline|writtenby|p-author`)
|
||||
positiveRegexp = regexp.MustCompile(`(?i)article|body|content|entry|hentry|h-entry|main|page|pagination|post|text|blog|story`)
|
||||
)
|
||||
|
||||
type candidate struct {
|
||||
selection *goquery.Selection
|
||||
score float32
|
||||
}
|
||||
|
||||
func (c *candidate) Node() *html.Node {
|
||||
return c.selection.Get(0)
|
||||
}
|
||||
|
||||
func (c *candidate) String() string {
|
||||
id, _ := c.selection.Attr("id")
|
||||
class, _ := c.selection.Attr("class")
|
||||
|
||||
if id != "" && class != "" {
|
||||
return fmt.Sprintf("%s#%s.%s => %f", c.Node().DataAtom, id, class, c.score)
|
||||
} else if id != "" {
|
||||
return fmt.Sprintf("%s#%s => %f", c.Node().DataAtom, id, c.score)
|
||||
} else if class != "" {
|
||||
return fmt.Sprintf("%s.%s => %f", c.Node().DataAtom, class, c.score)
|
||||
}
|
||||
|
||||
return fmt.Sprintf("%s => %f", c.Node().DataAtom, c.score)
|
||||
}
|
||||
|
||||
type candidateList map[*html.Node]*candidate
|
||||
|
||||
func (c candidateList) String() string {
|
||||
var output []string
|
||||
for _, candidate := range c {
|
||||
output = append(output, candidate.String())
|
||||
}
|
||||
|
||||
return strings.Join(output, ", ")
|
||||
}
|
||||
|
||||
// ExtractContent returns relevant content.
|
||||
func ExtractContent(page io.Reader) (string, error) {
|
||||
document, err := goquery.NewDocumentFromReader(page)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
document.Find("script,style").Each(func(i int, s *goquery.Selection) {
|
||||
removeNodes(s)
|
||||
})
|
||||
|
||||
transformMisusedDivsIntoParagraphs(document)
|
||||
removeUnlikelyCandidates(document)
|
||||
|
||||
candidates := getCandidates(document)
|
||||
logger.Debug("[Readability] Candidates: %v", candidates)
|
||||
|
||||
topCandidate := getTopCandidate(document, candidates)
|
||||
logger.Debug("[Readability] TopCandidate: %v", topCandidate)
|
||||
|
||||
output := getArticle(topCandidate, candidates)
|
||||
return output, nil
|
||||
}
|
||||
|
||||
// Now that we have the top candidate, look through its siblings for content that might also be related.
|
||||
// Things like preambles, content split by ads that we removed, etc.
|
||||
func getArticle(topCandidate *candidate, candidates candidateList) string {
|
||||
output := bytes.NewBufferString("<div>")
|
||||
siblingScoreThreshold := float32(math.Max(10, float64(topCandidate.score*.2)))
|
||||
|
||||
topCandidate.selection.Siblings().Union(topCandidate.selection).Each(func(i int, s *goquery.Selection) {
|
||||
append := false
|
||||
node := s.Get(0)
|
||||
|
||||
if node == topCandidate.Node() {
|
||||
append = true
|
||||
} else if c, ok := candidates[node]; ok && c.score >= siblingScoreThreshold {
|
||||
append = true
|
||||
}
|
||||
|
||||
if s.Is("p") {
|
||||
linkDensity := getLinkDensity(s)
|
||||
content := s.Text()
|
||||
contentLength := len(content)
|
||||
|
||||
if contentLength >= 80 && linkDensity < .25 {
|
||||
append = true
|
||||
} else if contentLength < 80 && linkDensity == 0 && sentenceRegexp.MatchString(content) {
|
||||
append = true
|
||||
}
|
||||
}
|
||||
|
||||
if append {
|
||||
tag := "div"
|
||||
if s.Is("p") {
|
||||
tag = node.Data
|
||||
}
|
||||
|
||||
html, _ := s.Html()
|
||||
fmt.Fprintf(output, "<%s>%s</%s>", tag, html, tag)
|
||||
}
|
||||
})
|
||||
|
||||
output.Write([]byte("</div>"))
|
||||
return output.String()
|
||||
}
|
||||
|
||||
func removeUnlikelyCandidates(document *goquery.Document) {
|
||||
document.Find("*").Not("html,body").Each(func(i int, s *goquery.Selection) {
|
||||
class, _ := s.Attr("class")
|
||||
id, _ := s.Attr("id")
|
||||
str := class + id
|
||||
|
||||
if blacklistCandidatesRegexp.MatchString(str) || (unlikelyCandidatesRegexp.MatchString(str) && !okMaybeItsACandidateRegexp.MatchString(str)) {
|
||||
removeNodes(s)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func getTopCandidate(document *goquery.Document, candidates candidateList) *candidate {
|
||||
var best *candidate
|
||||
|
||||
for _, c := range candidates {
|
||||
if best == nil {
|
||||
best = c
|
||||
} else if best.score < c.score {
|
||||
best = c
|
||||
}
|
||||
}
|
||||
|
||||
if best == nil {
|
||||
best = &candidate{document.Find("body"), 0}
|
||||
}
|
||||
|
||||
return best
|
||||
}
|
||||
|
||||
// Loop through all paragraphs, and assign a score to them based on how content-y they look.
|
||||
// Then add their score to their parent node.
|
||||
// A score is determined by things like number of commas, class names, etc.
|
||||
// Maybe eventually link density.
|
||||
func getCandidates(document *goquery.Document) candidateList {
|
||||
candidates := make(candidateList)
|
||||
|
||||
document.Find(defaultTagsToScore).Each(func(i int, s *goquery.Selection) {
|
||||
text := s.Text()
|
||||
|
||||
// If this paragraph is less than 25 characters, don't even count it.
|
||||
if len(text) < 25 {
|
||||
return
|
||||
}
|
||||
|
||||
parent := s.Parent()
|
||||
parentNode := parent.Get(0)
|
||||
|
||||
grandParent := parent.Parent()
|
||||
var grandParentNode *html.Node
|
||||
if grandParent.Length() > 0 {
|
||||
grandParentNode = grandParent.Get(0)
|
||||
}
|
||||
|
||||
if _, found := candidates[parentNode]; !found {
|
||||
candidates[parentNode] = scoreNode(parent)
|
||||
}
|
||||
|
||||
if grandParentNode != nil {
|
||||
if _, found := candidates[grandParentNode]; !found {
|
||||
candidates[grandParentNode] = scoreNode(grandParent)
|
||||
}
|
||||
}
|
||||
|
||||
// Add a point for the paragraph itself as a base.
|
||||
contentScore := float32(1.0)
|
||||
|
||||
// Add points for any commas within this paragraph.
|
||||
contentScore += float32(strings.Count(text, ",") + 1)
|
||||
|
||||
// For every 100 characters in this paragraph, add another point. Up to 3 points.
|
||||
contentScore += float32(math.Min(float64(int(len(text)/100.0)), 3))
|
||||
|
||||
candidates[parentNode].score += contentScore
|
||||
if grandParentNode != nil {
|
||||
candidates[grandParentNode].score += contentScore / 2.0
|
||||
}
|
||||
})
|
||||
|
||||
// Scale the final candidates score based on link density. Good content
|
||||
// should have a relatively small link density (5% or less) and be mostly
|
||||
// unaffected by this operation
|
||||
for _, candidate := range candidates {
|
||||
candidate.score = candidate.score * (1 - getLinkDensity(candidate.selection))
|
||||
}
|
||||
|
||||
return candidates
|
||||
}
|
||||
|
||||
func scoreNode(s *goquery.Selection) *candidate {
|
||||
c := &candidate{selection: s, score: 0}
|
||||
|
||||
switch s.Get(0).DataAtom.String() {
|
||||
case "div":
|
||||
c.score += 5
|
||||
case "pre", "td", "blockquote", "img":
|
||||
c.score += 3
|
||||
case "address", "ol", "ul", "dl", "dd", "dt", "li", "form":
|
||||
c.score -= 3
|
||||
case "h1", "h2", "h3", "h4", "h5", "h6", "th":
|
||||
c.score -= 5
|
||||
}
|
||||
|
||||
c.score += getClassWeight(s)
|
||||
return c
|
||||
}
|
||||
|
||||
// Get the density of links as a percentage of the content
|
||||
// This is the amount of text that is inside a link divided by the total text in the node.
|
||||
func getLinkDensity(s *goquery.Selection) float32 {
|
||||
linkLength := len(s.Find("a").Text())
|
||||
textLength := len(s.Text())
|
||||
|
||||
if textLength == 0 {
|
||||
return 0
|
||||
}
|
||||
|
||||
return float32(linkLength) / float32(textLength)
|
||||
}
|
||||
|
||||
// Get an elements class/id weight. Uses regular expressions to tell if this
|
||||
// element looks good or bad.
|
||||
func getClassWeight(s *goquery.Selection) float32 {
|
||||
weight := 0
|
||||
class, _ := s.Attr("class")
|
||||
id, _ := s.Attr("id")
|
||||
|
||||
if class != "" {
|
||||
if negativeRegexp.MatchString(class) {
|
||||
weight -= 25
|
||||
}
|
||||
|
||||
if positiveRegexp.MatchString(class) {
|
||||
weight += 25
|
||||
}
|
||||
}
|
||||
|
||||
if id != "" {
|
||||
if negativeRegexp.MatchString(id) {
|
||||
weight -= 25
|
||||
}
|
||||
|
||||
if positiveRegexp.MatchString(id) {
|
||||
weight += 25
|
||||
}
|
||||
}
|
||||
|
||||
return float32(weight)
|
||||
}
|
||||
|
||||
func transformMisusedDivsIntoParagraphs(document *goquery.Document) {
|
||||
document.Find("div").Each(func(i int, s *goquery.Selection) {
|
||||
html, _ := s.Html()
|
||||
if !divToPElementsRegexp.MatchString(html) {
|
||||
node := s.Get(0)
|
||||
node.Data = "p"
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func removeNodes(s *goquery.Selection) {
|
||||
s.Each(func(i int, s *goquery.Selection) {
|
||||
parent := s.Parent()
|
||||
if parent.Length() > 0 {
|
||||
parent.Get(0).RemoveChild(s.Get(0))
|
||||
}
|
||||
})
|
||||
}
|
383
internal/reader/rewrite/rewrite_functions.go
Normal file
383
internal/reader/rewrite/rewrite_functions.go
Normal file
|
@ -0,0 +1,383 @@
|
|||
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
package rewrite // import "miniflux.app/v2/internal/reader/rewrite"
|
||||
|
||||
import (
|
||||
"encoding/base64"
|
||||
"fmt"
|
||||
"html"
|
||||
"net/url"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"miniflux.app/v2/internal/config"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
"github.com/yuin/goldmark"
|
||||
goldmarkhtml "github.com/yuin/goldmark/renderer/html"
|
||||
)
|
||||
|
||||
var (
|
||||
youtubeRegex = regexp.MustCompile(`youtube\.com/watch\?v=(.*)`)
|
||||
youtubeIdRegex = regexp.MustCompile(`youtube_id"?\s*[:=]\s*"([a-zA-Z0-9_-]{11})"`)
|
||||
invidioRegex = regexp.MustCompile(`https?:\/\/(.*)\/watch\?v=(.*)`)
|
||||
imgRegex = regexp.MustCompile(`<img [^>]+>`)
|
||||
textLinkRegex = regexp.MustCompile(`(?mi)(\bhttps?:\/\/[-A-Z0-9+&@#\/%?=~_|!:,.;]*[-A-Z0-9+&@#\/%=~_|])`)
|
||||
)
|
||||
|
||||
func addImageTitle(entryURL, entryContent string) string {
|
||||
doc, err := goquery.NewDocumentFromReader(strings.NewReader(entryContent))
|
||||
if err != nil {
|
||||
return entryContent
|
||||
}
|
||||
|
||||
matches := doc.Find("img[src][title]")
|
||||
|
||||
if matches.Length() > 0 {
|
||||
matches.Each(func(i int, img *goquery.Selection) {
|
||||
altAttr := img.AttrOr("alt", "")
|
||||
srcAttr, _ := img.Attr("src")
|
||||
titleAttr, _ := img.Attr("title")
|
||||
|
||||
img.ReplaceWithHtml(`<figure><img src="` + srcAttr + `" alt="` + altAttr + `"/><figcaption><p>` + html.EscapeString(titleAttr) + `</p></figcaption></figure>`)
|
||||
})
|
||||
|
||||
output, _ := doc.Find("body").First().Html()
|
||||
return output
|
||||
}
|
||||
|
||||
return entryContent
|
||||
}
|
||||
|
||||
func addMailtoSubject(entryURL, entryContent string) string {
|
||||
doc, err := goquery.NewDocumentFromReader(strings.NewReader(entryContent))
|
||||
if err != nil {
|
||||
return entryContent
|
||||
}
|
||||
|
||||
matches := doc.Find(`a[href^="mailto:"]`)
|
||||
|
||||
if matches.Length() > 0 {
|
||||
matches.Each(func(i int, a *goquery.Selection) {
|
||||
hrefAttr, _ := a.Attr("href")
|
||||
|
||||
mailto, err := url.Parse(hrefAttr)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
subject := mailto.Query().Get("subject")
|
||||
if subject == "" {
|
||||
return
|
||||
}
|
||||
|
||||
a.AppendHtml(" [" + html.EscapeString(subject) + "]")
|
||||
})
|
||||
|
||||
output, _ := doc.Find("body").First().Html()
|
||||
return output
|
||||
}
|
||||
|
||||
return entryContent
|
||||
}
|
||||
|
||||
func addDynamicImage(entryURL, entryContent string) string {
|
||||
doc, err := goquery.NewDocumentFromReader(strings.NewReader(entryContent))
|
||||
if err != nil {
|
||||
return entryContent
|
||||
}
|
||||
|
||||
// Ordered most preferred to least preferred.
|
||||
candidateAttrs := []string{
|
||||
"data-src",
|
||||
"data-original",
|
||||
"data-orig",
|
||||
"data-url",
|
||||
"data-orig-file",
|
||||
"data-large-file",
|
||||
"data-medium-file",
|
||||
"data-2000src",
|
||||
"data-1000src",
|
||||
"data-800src",
|
||||
"data-655src",
|
||||
"data-500src",
|
||||
"data-380src",
|
||||
}
|
||||
|
||||
candidateSrcsetAttrs := []string{
|
||||
"data-srcset",
|
||||
}
|
||||
|
||||
changed := false
|
||||
|
||||
doc.Find("img,div").Each(func(i int, img *goquery.Selection) {
|
||||
// Src-linked candidates
|
||||
for _, candidateAttr := range candidateAttrs {
|
||||
if srcAttr, found := img.Attr(candidateAttr); found {
|
||||
changed = true
|
||||
|
||||
if img.Is("img") {
|
||||
img.SetAttr("src", srcAttr)
|
||||
} else {
|
||||
altAttr := img.AttrOr("alt", "")
|
||||
img.ReplaceWithHtml(`<img src="` + srcAttr + `" alt="` + altAttr + `"/>`)
|
||||
}
|
||||
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// Srcset-linked candidates
|
||||
for _, candidateAttr := range candidateSrcsetAttrs {
|
||||
if srcAttr, found := img.Attr(candidateAttr); found {
|
||||
changed = true
|
||||
|
||||
if img.Is("img") {
|
||||
img.SetAttr("srcset", srcAttr)
|
||||
} else {
|
||||
altAttr := img.AttrOr("alt", "")
|
||||
img.ReplaceWithHtml(`<img srcset="` + srcAttr + `" alt="` + altAttr + `"/>`)
|
||||
}
|
||||
|
||||
break
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
if !changed {
|
||||
doc.Find("noscript").Each(func(i int, noscript *goquery.Selection) {
|
||||
matches := imgRegex.FindAllString(noscript.Text(), 2)
|
||||
|
||||
if len(matches) == 1 {
|
||||
changed = true
|
||||
|
||||
noscript.ReplaceWithHtml(matches[0])
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
if changed {
|
||||
output, _ := doc.Find("body").First().Html()
|
||||
return output
|
||||
}
|
||||
|
||||
return entryContent
|
||||
}
|
||||
|
||||
func fixMediumImages(entryURL, entryContent string) string {
|
||||
doc, err := goquery.NewDocumentFromReader(strings.NewReader(entryContent))
|
||||
if err != nil {
|
||||
return entryContent
|
||||
}
|
||||
|
||||
doc.Find("figure.paragraph-image").Each(func(i int, paragraphImage *goquery.Selection) {
|
||||
noscriptElement := paragraphImage.Find("noscript")
|
||||
if noscriptElement.Length() > 0 {
|
||||
paragraphImage.ReplaceWithHtml(noscriptElement.Text())
|
||||
}
|
||||
})
|
||||
|
||||
output, _ := doc.Find("body").First().Html()
|
||||
return output
|
||||
}
|
||||
|
||||
func useNoScriptImages(entryURL, entryContent string) string {
|
||||
doc, err := goquery.NewDocumentFromReader(strings.NewReader(entryContent))
|
||||
if err != nil {
|
||||
return entryContent
|
||||
}
|
||||
|
||||
doc.Find("figure").Each(func(i int, figureElement *goquery.Selection) {
|
||||
imgElement := figureElement.Find("img")
|
||||
if imgElement.Length() > 0 {
|
||||
noscriptElement := figureElement.Find("noscript")
|
||||
if noscriptElement.Length() > 0 {
|
||||
figureElement.PrependHtml(noscriptElement.Text())
|
||||
imgElement.Remove()
|
||||
noscriptElement.Remove()
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
output, _ := doc.Find("body").First().Html()
|
||||
return output
|
||||
}
|
||||
|
||||
func addYoutubeVideo(entryURL, entryContent string) string {
|
||||
matches := youtubeRegex.FindStringSubmatch(entryURL)
|
||||
|
||||
if len(matches) == 2 {
|
||||
video := `<iframe width="650" height="350" frameborder="0" src="` + config.Opts.YouTubeEmbedUrlOverride() + matches[1] + `" allowfullscreen></iframe>`
|
||||
return video + `<br>` + entryContent
|
||||
}
|
||||
return entryContent
|
||||
}
|
||||
|
||||
func addYoutubeVideoUsingInvidiousPlayer(entryURL, entryContent string) string {
|
||||
matches := youtubeRegex.FindStringSubmatch(entryURL)
|
||||
|
||||
if len(matches) == 2 {
|
||||
video := `<iframe width="650" height="350" frameborder="0" src="https://` + config.Opts.InvidiousInstance() + `/embed/` + matches[1] + `" allowfullscreen></iframe>`
|
||||
return video + `<br>` + entryContent
|
||||
}
|
||||
return entryContent
|
||||
}
|
||||
|
||||
func addYoutubeVideoFromId(entryContent string) string {
|
||||
matches := youtubeIdRegex.FindAllStringSubmatch(entryContent, -1)
|
||||
if matches == nil {
|
||||
return entryContent
|
||||
}
|
||||
sb := strings.Builder{}
|
||||
for _, match := range matches {
|
||||
if len(match) == 2 {
|
||||
sb.WriteString(`<iframe width="650" height="350" frameborder="0" src="`)
|
||||
sb.WriteString(config.Opts.YouTubeEmbedUrlOverride())
|
||||
sb.WriteString(match[1])
|
||||
sb.WriteString(`" allowfullscreen></iframe><br>`)
|
||||
}
|
||||
}
|
||||
sb.WriteString(entryContent)
|
||||
return sb.String()
|
||||
}
|
||||
|
||||
func addInvidiousVideo(entryURL, entryContent string) string {
|
||||
matches := invidioRegex.FindStringSubmatch(entryURL)
|
||||
if len(matches) == 3 {
|
||||
video := `<iframe width="650" height="350" frameborder="0" src="https://` + matches[1] + `/embed/` + matches[2] + `" allowfullscreen></iframe>`
|
||||
return video + `<br>` + entryContent
|
||||
}
|
||||
return entryContent
|
||||
}
|
||||
|
||||
func addPDFLink(entryURL, entryContent string) string {
|
||||
if strings.HasSuffix(entryURL, ".pdf") {
|
||||
return fmt.Sprintf(`<a href="%s">PDF</a><br>%s`, entryURL, entryContent)
|
||||
}
|
||||
return entryContent
|
||||
}
|
||||
|
||||
func replaceTextLinks(input string) string {
|
||||
return textLinkRegex.ReplaceAllString(input, `<a href="${1}">${1}</a>`)
|
||||
}
|
||||
|
||||
func replaceLineFeeds(input string) string {
|
||||
return strings.Replace(input, "\n", "<br>", -1)
|
||||
}
|
||||
|
||||
func replaceCustom(entryContent string, searchTerm string, replaceTerm string) string {
|
||||
re, err := regexp.Compile(searchTerm)
|
||||
if err == nil {
|
||||
return re.ReplaceAllString(entryContent, replaceTerm)
|
||||
}
|
||||
return entryContent
|
||||
}
|
||||
|
||||
func removeCustom(entryContent string, selector string) string {
|
||||
doc, err := goquery.NewDocumentFromReader(strings.NewReader(entryContent))
|
||||
if err != nil {
|
||||
return entryContent
|
||||
}
|
||||
|
||||
doc.Find(selector).Remove()
|
||||
|
||||
output, _ := doc.Find("body").First().Html()
|
||||
return output
|
||||
}
|
||||
|
||||
func addCastopodEpisode(entryURL, entryContent string) string {
|
||||
player := `<iframe width="650" frameborder="0" src="` + entryURL + `/embed/light"></iframe>`
|
||||
|
||||
return player + `<br>` + entryContent
|
||||
}
|
||||
|
||||
func applyFuncOnTextContent(entryContent string, selector string, repl func(string) string) string {
|
||||
var treatChildren func(i int, s *goquery.Selection)
|
||||
treatChildren = func(i int, s *goquery.Selection) {
|
||||
if s.Nodes[0].Type == 1 {
|
||||
s.ReplaceWithHtml(repl(s.Nodes[0].Data))
|
||||
} else {
|
||||
s.Contents().Each(treatChildren)
|
||||
}
|
||||
}
|
||||
|
||||
doc, err := goquery.NewDocumentFromReader(strings.NewReader(entryContent))
|
||||
if err != nil {
|
||||
return entryContent
|
||||
}
|
||||
|
||||
doc.Find(selector).Each(treatChildren)
|
||||
|
||||
output, _ := doc.Find("body").First().Html()
|
||||
return output
|
||||
}
|
||||
|
||||
func decodeBase64Content(entryContent string) string {
|
||||
if ret, err := base64.StdEncoding.DecodeString(strings.TrimSpace(entryContent)); err != nil {
|
||||
return entryContent
|
||||
} else {
|
||||
return html.EscapeString(string(ret))
|
||||
}
|
||||
}
|
||||
|
||||
func parseMarkdown(entryContent string) string {
|
||||
var sb strings.Builder
|
||||
md := goldmark.New(
|
||||
goldmark.WithRendererOptions(
|
||||
goldmarkhtml.WithUnsafe(),
|
||||
),
|
||||
)
|
||||
|
||||
if err := md.Convert([]byte(entryContent), &sb); err != nil {
|
||||
return entryContent
|
||||
}
|
||||
|
||||
return sb.String()
|
||||
}
|
||||
|
||||
func removeTables(entryContent string) string {
|
||||
doc, err := goquery.NewDocumentFromReader(strings.NewReader(entryContent))
|
||||
if err != nil {
|
||||
return entryContent
|
||||
}
|
||||
|
||||
selectors := []string{"table", "tbody", "thead", "td", "th", "td"}
|
||||
|
||||
var loopElement *goquery.Selection
|
||||
|
||||
for _, selector := range selectors {
|
||||
for {
|
||||
loopElement = doc.Find(selector).First()
|
||||
|
||||
if loopElement.Length() == 0 {
|
||||
break
|
||||
}
|
||||
|
||||
innerHtml, err := loopElement.Html()
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
|
||||
loopElement.Parent().AppendHtml(innerHtml)
|
||||
loopElement.Remove()
|
||||
}
|
||||
}
|
||||
|
||||
output, _ := doc.Find("body").First().Html()
|
||||
return output
|
||||
}
|
||||
|
||||
func removeClickbait(entryTitle string) string {
|
||||
titleWords := []string{}
|
||||
for _, word := range strings.Fields(entryTitle) {
|
||||
runes := []rune(word)
|
||||
if len(runes) > 1 {
|
||||
// keep first rune as is to keep the first capital letter
|
||||
titleWords = append(titleWords, string([]rune{runes[0]})+strings.ToLower(string(runes[1:])))
|
||||
} else {
|
||||
titleWords = append(titleWords, word)
|
||||
}
|
||||
}
|
||||
return strings.Join(titleWords, " ")
|
||||
}
|
127
internal/reader/rewrite/rewriter.go
Normal file
127
internal/reader/rewrite/rewriter.go
Normal file
|
@ -0,0 +1,127 @@
|
|||
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
package rewrite // import "miniflux.app/v2/internal/reader/rewrite"
|
||||
|
||||
import (
|
||||
"strconv"
|
||||
"strings"
|
||||
"text/scanner"
|
||||
|
||||
"miniflux.app/v2/internal/logger"
|
||||
"miniflux.app/v2/internal/model"
|
||||
"miniflux.app/v2/internal/url"
|
||||
)
|
||||
|
||||
type rule struct {
|
||||
name string
|
||||
args []string
|
||||
}
|
||||
|
||||
// Rewriter modify item contents with a set of rewriting rules.
|
||||
func Rewriter(entryURL string, entry *model.Entry, customRewriteRules string) {
|
||||
rulesList := getPredefinedRewriteRules(entryURL)
|
||||
if customRewriteRules != "" {
|
||||
rulesList = customRewriteRules
|
||||
}
|
||||
|
||||
rules := parseRules(rulesList)
|
||||
rules = append(rules, rule{name: "add_pdf_download_link"})
|
||||
|
||||
logger.Debug(`[Rewrite] Applying rules %v for %q`, rules, entryURL)
|
||||
|
||||
for _, rule := range rules {
|
||||
applyRule(entryURL, entry, rule)
|
||||
}
|
||||
}
|
||||
|
||||
func parseRules(rulesText string) (rules []rule) {
|
||||
scan := scanner.Scanner{Mode: scanner.ScanIdents | scanner.ScanStrings}
|
||||
scan.Init(strings.NewReader(rulesText))
|
||||
|
||||
for {
|
||||
switch scan.Scan() {
|
||||
case scanner.Ident:
|
||||
rules = append(rules, rule{name: scan.TokenText()})
|
||||
|
||||
case scanner.String:
|
||||
if l := len(rules) - 1; l >= 0 {
|
||||
text := scan.TokenText()
|
||||
text, _ = strconv.Unquote(text)
|
||||
|
||||
rules[l].args = append(rules[l].args, text)
|
||||
}
|
||||
|
||||
case scanner.EOF:
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func applyRule(entryURL string, entry *model.Entry, rule rule) {
|
||||
switch rule.name {
|
||||
case "add_image_title":
|
||||
entry.Content = addImageTitle(entryURL, entry.Content)
|
||||
case "add_mailto_subject":
|
||||
entry.Content = addMailtoSubject(entryURL, entry.Content)
|
||||
case "add_dynamic_image":
|
||||
entry.Content = addDynamicImage(entryURL, entry.Content)
|
||||
case "add_youtube_video":
|
||||
entry.Content = addYoutubeVideo(entryURL, entry.Content)
|
||||
case "add_invidious_video":
|
||||
entry.Content = addInvidiousVideo(entryURL, entry.Content)
|
||||
case "add_youtube_video_using_invidious_player":
|
||||
entry.Content = addYoutubeVideoUsingInvidiousPlayer(entryURL, entry.Content)
|
||||
case "add_youtube_video_from_id":
|
||||
entry.Content = addYoutubeVideoFromId(entry.Content)
|
||||
case "add_pdf_download_link":
|
||||
entry.Content = addPDFLink(entryURL, entry.Content)
|
||||
case "nl2br":
|
||||
entry.Content = replaceLineFeeds(entry.Content)
|
||||
case "convert_text_link", "convert_text_links":
|
||||
entry.Content = replaceTextLinks(entry.Content)
|
||||
case "fix_medium_images":
|
||||
entry.Content = fixMediumImages(entryURL, entry.Content)
|
||||
case "use_noscript_figure_images":
|
||||
entry.Content = useNoScriptImages(entryURL, entry.Content)
|
||||
case "replace":
|
||||
// Format: replace("search-term"|"replace-term")
|
||||
if len(rule.args) >= 2 {
|
||||
entry.Content = replaceCustom(entry.Content, rule.args[0], rule.args[1])
|
||||
} else {
|
||||
logger.Debug("[Rewrite] Cannot find search and replace terms for replace rule %s", rule)
|
||||
}
|
||||
case "remove":
|
||||
// Format: remove("#selector > .element, .another")
|
||||
if len(rule.args) >= 1 {
|
||||
entry.Content = removeCustom(entry.Content, rule.args[0])
|
||||
} else {
|
||||
logger.Debug("[Rewrite] Cannot find selector for remove rule %s", rule)
|
||||
}
|
||||
case "add_castopod_episode":
|
||||
entry.Content = addCastopodEpisode(entryURL, entry.Content)
|
||||
case "base64_decode":
|
||||
if len(rule.args) >= 1 {
|
||||
entry.Content = applyFuncOnTextContent(entry.Content, rule.args[0], decodeBase64Content)
|
||||
} else {
|
||||
entry.Content = applyFuncOnTextContent(entry.Content, "body", decodeBase64Content)
|
||||
}
|
||||
case "parse_markdown":
|
||||
entry.Content = parseMarkdown(entry.Content)
|
||||
case "remove_tables":
|
||||
entry.Content = removeTables(entry.Content)
|
||||
case "remove_clickbait":
|
||||
entry.Title = removeClickbait(entry.Title)
|
||||
}
|
||||
}
|
||||
|
||||
func getPredefinedRewriteRules(entryURL string) string {
|
||||
urlDomain := url.Domain(entryURL)
|
||||
for domain, rules := range predefinedRules {
|
||||
if strings.Contains(urlDomain, domain) {
|
||||
return rules
|
||||
}
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
563
internal/reader/rewrite/rewriter_test.go
Normal file
563
internal/reader/rewrite/rewriter_test.go
Normal file
|
@ -0,0 +1,563 @@
|
|||
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
package rewrite // import "miniflux.app/v2/internal/reader/rewrite"
|
||||
|
||||
import (
|
||||
"os"
|
||||
"reflect"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"miniflux.app/v2/internal/config"
|
||||
"miniflux.app/v2/internal/model"
|
||||
)
|
||||
|
||||
func TestParseRules(t *testing.T) {
|
||||
rulesText := `add_dynamic_image,replace("article/(.*).svg"|"article/$1.png"),remove(".spam, .ads:not(.keep)")`
|
||||
expected := []rule{
|
||||
{name: "add_dynamic_image"},
|
||||
{name: "replace", args: []string{"article/(.*).svg", "article/$1.png"}},
|
||||
{name: "remove", args: []string{".spam, .ads:not(.keep)"}},
|
||||
}
|
||||
|
||||
actual := parseRules(rulesText)
|
||||
|
||||
if !reflect.DeepEqual(expected, actual) {
|
||||
t.Errorf(`Parsed rules do not match expected rules: got %v instead of %v`, actual, expected)
|
||||
}
|
||||
}
|
||||
|
||||
func TestReplaceTextLinks(t *testing.T) {
|
||||
scenarios := map[string]string{
|
||||
`This is a link to example.org`: `This is a link to example.org`,
|
||||
`This is a link to ftp://example.org`: `This is a link to ftp://example.org`,
|
||||
`This is a link to www.example.org`: `This is a link to www.example.org`,
|
||||
`This is a link to http://example.org`: `This is a link to <a href="http://example.org">http://example.org</a>`,
|
||||
`This is a link to http://example.org, end of sentence.`: `This is a link to <a href="http://example.org">http://example.org</a>, end of sentence.`,
|
||||
`This is a link to https://example.org`: `This is a link to <a href="https://example.org">https://example.org</a>`,
|
||||
`This is a link to https://www.example.org/path/to?q=s`: `This is a link to <a href="https://www.example.org/path/to?q=s">https://www.example.org/path/to?q=s</a>`,
|
||||
`This is a link to https://example.org/index#hash-tag, http://example.org/.`: `This is a link to <a href="https://example.org/index#hash-tag">https://example.org/index#hash-tag</a>, <a href="http://example.org/">http://example.org/</a>.`,
|
||||
}
|
||||
|
||||
for input, expected := range scenarios {
|
||||
actual := replaceTextLinks(input)
|
||||
if actual != expected {
|
||||
t.Errorf(`Unexpected link replacement, got "%s" instead of "%s"`, actual, expected)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestRewriteWithNoMatchingRule(t *testing.T) {
|
||||
controlEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `Some text.`,
|
||||
}
|
||||
testEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `Some text.`,
|
||||
}
|
||||
Rewriter("https://example.org/article", testEntry, ``)
|
||||
|
||||
if !reflect.DeepEqual(testEntry, controlEntry) {
|
||||
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRewriteWithYoutubeLink(t *testing.T) {
|
||||
config.Opts = config.NewOptions()
|
||||
|
||||
controlEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `<iframe width="650" height="350" frameborder="0" src="https://www.youtube-nocookie.com/embed/1234" allowfullscreen></iframe><br>Video Description`,
|
||||
}
|
||||
testEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `Video Description`,
|
||||
}
|
||||
Rewriter("https://www.youtube.com/watch?v=1234", testEntry, ``)
|
||||
|
||||
if !reflect.DeepEqual(testEntry, controlEntry) {
|
||||
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRewriteWithYoutubeLinkAndCustomEmbedURL(t *testing.T) {
|
||||
os.Clearenv()
|
||||
os.Setenv("YOUTUBE_EMBED_URL_OVERRIDE", "https://invidious.custom/embed/")
|
||||
|
||||
var err error
|
||||
parser := config.NewParser()
|
||||
config.Opts, err = parser.ParseEnvironmentVariables()
|
||||
|
||||
if err != nil {
|
||||
t.Fatalf(`Parsing failure: %v`, err)
|
||||
}
|
||||
|
||||
controlEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `<iframe width="650" height="350" frameborder="0" src="https://invidious.custom/embed/1234" allowfullscreen></iframe><br>Video Description`,
|
||||
}
|
||||
testEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `Video Description`,
|
||||
}
|
||||
Rewriter("https://www.youtube.com/watch?v=1234", testEntry, ``)
|
||||
|
||||
if !reflect.DeepEqual(testEntry, controlEntry) {
|
||||
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRewriteWithInexistingCustomRule(t *testing.T) {
|
||||
controlEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `Video Description`,
|
||||
}
|
||||
testEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `Video Description`,
|
||||
}
|
||||
Rewriter("https://www.youtube.com/watch?v=1234", testEntry, `some rule`)
|
||||
|
||||
if !reflect.DeepEqual(testEntry, controlEntry) {
|
||||
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRewriteWithXkcdLink(t *testing.T) {
|
||||
controlEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `<figure><img src="https://imgs.xkcd.com/comics/thermostat.png" alt="Your problem is so terrible, I worry that, if I help you, I risk drawing the attention of whatever god of technology inflicted it on you."/><figcaption><p>Your problem is so terrible, I worry that, if I help you, I risk drawing the attention of whatever god of technology inflicted it on you.</p></figcaption></figure>`,
|
||||
}
|
||||
testEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `<img src="https://imgs.xkcd.com/comics/thermostat.png" title="Your problem is so terrible, I worry that, if I help you, I risk drawing the attention of whatever god of technology inflicted it on you." alt="Your problem is so terrible, I worry that, if I help you, I risk drawing the attention of whatever god of technology inflicted it on you." />`,
|
||||
}
|
||||
Rewriter("https://xkcd.com/1912/", testEntry, ``)
|
||||
|
||||
if !reflect.DeepEqual(testEntry, controlEntry) {
|
||||
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRewriteWithXkcdLinkHtmlInjection(t *testing.T) {
|
||||
controlEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `<figure><img src="https://imgs.xkcd.com/comics/thermostat.png" alt="<foo>"/><figcaption><p><foo></p></figcaption></figure>`,
|
||||
}
|
||||
testEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `<img src="https://imgs.xkcd.com/comics/thermostat.png" title="<foo>" alt="<foo>" />`,
|
||||
}
|
||||
Rewriter("https://xkcd.com/1912/", testEntry, ``)
|
||||
|
||||
if !reflect.DeepEqual(testEntry, controlEntry) {
|
||||
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRewriteWithXkcdLinkAndImageNoTitle(t *testing.T) {
|
||||
controlEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `<img src="https://imgs.xkcd.com/comics/thermostat.png" alt="Your problem is so terrible, I worry that, if I help you, I risk drawing the attention of whatever god of technology inflicted it on you." />`,
|
||||
}
|
||||
testEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `<img src="https://imgs.xkcd.com/comics/thermostat.png" alt="Your problem is so terrible, I worry that, if I help you, I risk drawing the attention of whatever god of technology inflicted it on you." />`,
|
||||
}
|
||||
Rewriter("https://xkcd.com/1912/", testEntry, ``)
|
||||
|
||||
if !reflect.DeepEqual(testEntry, controlEntry) {
|
||||
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRewriteWithXkcdLinkAndNoImage(t *testing.T) {
|
||||
controlEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `test`,
|
||||
}
|
||||
testEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `test`,
|
||||
}
|
||||
Rewriter("https://xkcd.com/1912/", testEntry, ``)
|
||||
|
||||
if !reflect.DeepEqual(testEntry, controlEntry) {
|
||||
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRewriteWithXkcdAndNoImage(t *testing.T) {
|
||||
controlEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `test`,
|
||||
}
|
||||
testEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `test`,
|
||||
}
|
||||
Rewriter("https://xkcd.com/1912/", testEntry, ``)
|
||||
|
||||
if !reflect.DeepEqual(testEntry, controlEntry) {
|
||||
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRewriteMailtoLink(t *testing.T) {
|
||||
controlEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `<a href="mailto:ryan@qwantz.com?subject=blah%20blah">contact [blah blah]</a>`,
|
||||
}
|
||||
testEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `<a href="mailto:ryan@qwantz.com?subject=blah%20blah">contact</a>`,
|
||||
}
|
||||
Rewriter("https://www.qwantz.com/", testEntry, ``)
|
||||
|
||||
if !reflect.DeepEqual(testEntry, controlEntry) {
|
||||
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRewriteWithPDFLink(t *testing.T) {
|
||||
controlEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `<a href="https://example.org/document.pdf">PDF</a><br>test`,
|
||||
}
|
||||
testEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `test`,
|
||||
}
|
||||
Rewriter("https://example.org/document.pdf", testEntry, ``)
|
||||
|
||||
if !reflect.DeepEqual(testEntry, controlEntry) {
|
||||
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRewriteWithNoLazyImage(t *testing.T) {
|
||||
controlEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `<img src="https://example.org/image.jpg" alt="Image"><noscript><p>Some text</p></noscript>`,
|
||||
}
|
||||
testEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `<img src="https://example.org/image.jpg" alt="Image"><noscript><p>Some text</p></noscript>`,
|
||||
}
|
||||
Rewriter("https://example.org/article", testEntry, "add_dynamic_image")
|
||||
|
||||
if !reflect.DeepEqual(testEntry, controlEntry) {
|
||||
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRewriteWithLazyImage(t *testing.T) {
|
||||
controlEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `<img src="https://example.org/image.jpg" data-url="https://example.org/image.jpg" alt="Image"/><noscript><img src="https://example.org/fallback.jpg" alt="Fallback"></noscript>`,
|
||||
}
|
||||
testEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `<img src="" data-url="https://example.org/image.jpg" alt="Image"><noscript><img src="https://example.org/fallback.jpg" alt="Fallback"></noscript>`,
|
||||
}
|
||||
Rewriter("https://example.org/article", testEntry, "add_dynamic_image")
|
||||
|
||||
if !reflect.DeepEqual(testEntry, controlEntry) {
|
||||
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRewriteWithLazyDivImage(t *testing.T) {
|
||||
controlEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `<img src="https://example.org/image.jpg" alt="Image"/><noscript><img src="https://example.org/fallback.jpg" alt="Fallback"></noscript>`,
|
||||
}
|
||||
testEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `<div data-url="https://example.org/image.jpg" alt="Image"></div><noscript><img src="https://example.org/fallback.jpg" alt="Fallback"></noscript>`,
|
||||
}
|
||||
Rewriter("https://example.org/article", testEntry, "add_dynamic_image")
|
||||
|
||||
if !reflect.DeepEqual(testEntry, controlEntry) {
|
||||
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRewriteWithUnknownLazyNoScriptImage(t *testing.T) {
|
||||
controlEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `<img src="" data-non-candidate="https://example.org/image.jpg" alt="Image"/><img src="https://example.org/fallback.jpg" alt="Fallback"/>`,
|
||||
}
|
||||
testEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `<img src="" data-non-candidate="https://example.org/image.jpg" alt="Image"><noscript><img src="https://example.org/fallback.jpg" alt="Fallback"></noscript>`,
|
||||
}
|
||||
Rewriter("https://example.org/article", testEntry, "add_dynamic_image")
|
||||
|
||||
if !reflect.DeepEqual(testEntry, controlEntry) {
|
||||
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRewriteWithLazySrcset(t *testing.T) {
|
||||
controlEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `<img srcset="https://example.org/image.jpg" data-srcset="https://example.org/image.jpg" alt="Image"/>`,
|
||||
}
|
||||
testEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `<img srcset="" data-srcset="https://example.org/image.jpg" alt="Image">`,
|
||||
}
|
||||
Rewriter("https://example.org/article", testEntry, "add_dynamic_image")
|
||||
|
||||
if !reflect.DeepEqual(testEntry, controlEntry) {
|
||||
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRewriteWithImageAndLazySrcset(t *testing.T) {
|
||||
controlEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `<img src="meow" srcset="https://example.org/image.jpg" data-srcset="https://example.org/image.jpg" alt="Image"/>`,
|
||||
}
|
||||
testEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `<img src="meow" srcset="" data-srcset="https://example.org/image.jpg" alt="Image">`,
|
||||
}
|
||||
Rewriter("https://example.org/article", testEntry, "add_dynamic_image")
|
||||
|
||||
if !reflect.DeepEqual(testEntry, controlEntry) {
|
||||
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
|
||||
}
|
||||
}
|
||||
|
||||
func TestNewLineRewriteRule(t *testing.T) {
|
||||
controlEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `A<br>B<br>C`,
|
||||
}
|
||||
testEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: "A\nB\nC",
|
||||
}
|
||||
Rewriter("https://example.org/article", testEntry, "nl2br")
|
||||
|
||||
if !reflect.DeepEqual(testEntry, controlEntry) {
|
||||
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
|
||||
}
|
||||
}
|
||||
|
||||
func TestConvertTextLinkRewriteRule(t *testing.T) {
|
||||
controlEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `Test: <a href="http://example.org/a/b">http://example.org/a/b</a>`,
|
||||
}
|
||||
testEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `Test: http://example.org/a/b`,
|
||||
}
|
||||
Rewriter("https://example.org/article", testEntry, "convert_text_link")
|
||||
|
||||
if !reflect.DeepEqual(testEntry, controlEntry) {
|
||||
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMediumImage(t *testing.T) {
|
||||
controlEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `<img alt="Image for post" class="t u v if aj" src="https://miro.medium.com/max/2560/1*ephLSqSzQYLvb7faDwzRbw.jpeg" width="1280" height="720" srcset="https://miro.medium.com/max/552/1*ephLSqSzQYLvb7faDwzRbw.jpeg 276w, https://miro.medium.com/max/1104/1*ephLSqSzQYLvb7faDwzRbw.jpeg 552w, https://miro.medium.com/max/1280/1*ephLSqSzQYLvb7faDwzRbw.jpeg 640w, https://miro.medium.com/max/1400/1*ephLSqSzQYLvb7faDwzRbw.jpeg 700w" sizes="700px"/>`,
|
||||
}
|
||||
testEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `
|
||||
<figure class="ht hu hv hw hx hy cy cz paragraph-image">
|
||||
<div class="hz ia ib ic aj">
|
||||
<div class="cy cz hs">
|
||||
<div class="ii s ib ij">
|
||||
<div class="ik il s">
|
||||
<div class="id ie t u v if aj bk ig ih">
|
||||
<img alt="Image for post" class="t u v if aj im in io" src="https://miro.medium.com/max/60/1*ephLSqSzQYLvb7faDwzRbw.jpeg?q=20" width="1280" height="720"/>
|
||||
</div>
|
||||
<img alt="Image for post" class="id ie t u v if aj c" width="1280" height="720"/>
|
||||
<noscript>
|
||||
<img alt="Image for post" class="t u v if aj" src="https://miro.medium.com/max/2560/1*ephLSqSzQYLvb7faDwzRbw.jpeg" width="1280" height="720" srcSet="https://miro.medium.com/max/552/1*ephLSqSzQYLvb7faDwzRbw.jpeg 276w, https://miro.medium.com/max/1104/1*ephLSqSzQYLvb7faDwzRbw.jpeg 552w, https://miro.medium.com/max/1280/1*ephLSqSzQYLvb7faDwzRbw.jpeg 640w, https://miro.medium.com/max/1400/1*ephLSqSzQYLvb7faDwzRbw.jpeg 700w" sizes="700px"/>
|
||||
</noscript>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</figure>
|
||||
`,
|
||||
}
|
||||
Rewriter("https://example.org/article", testEntry, "fix_medium_images")
|
||||
testEntry.Content = strings.TrimSpace(testEntry.Content)
|
||||
|
||||
if !reflect.DeepEqual(testEntry, controlEntry) {
|
||||
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRewriteNoScriptImageWithoutNoScriptTag(t *testing.T) {
|
||||
controlEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `<figure><img src="https://developer.mozilla.org/static/img/favicon144.png" alt="The beautiful MDN logo."/><figcaption>MDN Logo</figcaption></figure>`,
|
||||
}
|
||||
testEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `<figure><img src="https://developer.mozilla.org/static/img/favicon144.png" alt="The beautiful MDN logo."><figcaption>MDN Logo</figcaption></figure>`,
|
||||
}
|
||||
Rewriter("https://example.org/article", testEntry, "use_noscript_figure_images")
|
||||
testEntry.Content = strings.TrimSpace(testEntry.Content)
|
||||
|
||||
if !reflect.DeepEqual(testEntry, controlEntry) {
|
||||
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRewriteNoScriptImageWithNoScriptTag(t *testing.T) {
|
||||
controlEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `<figure><img src="http://example.org/logo.svg"/><figcaption>MDN Logo</figcaption></figure>`,
|
||||
}
|
||||
testEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `<figure><img src="https://developer.mozilla.org/static/img/favicon144.png" alt="The beautiful MDN logo."><noscript><img src="http://example.org/logo.svg"></noscript><figcaption>MDN Logo</figcaption></figure>`,
|
||||
}
|
||||
Rewriter("https://example.org/article", testEntry, "use_noscript_figure_images")
|
||||
testEntry.Content = strings.TrimSpace(testEntry.Content)
|
||||
|
||||
if !reflect.DeepEqual(testEntry, controlEntry) {
|
||||
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRewriteReplaceCustom(t *testing.T) {
|
||||
controlEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `<img src="http://example.org/logo.svg"><img src="https://example.org/article/picture.png">`,
|
||||
}
|
||||
testEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `<img src="http://example.org/logo.svg"><img src="https://example.org/article/picture.svg">`,
|
||||
}
|
||||
Rewriter("https://example.org/article", testEntry, `replace("article/(.*).svg"|"article/$1.png")`)
|
||||
|
||||
if !reflect.DeepEqual(testEntry, controlEntry) {
|
||||
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRewriteRemoveCustom(t *testing.T) {
|
||||
controlEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `<div>Lorem Ipsum <span class="ads keep">Super important info</span></div>`,
|
||||
}
|
||||
testEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `<div>Lorem Ipsum <span class="spam">I dont want to see this</span><span class="ads keep">Super important info</span></div>`,
|
||||
}
|
||||
Rewriter("https://example.org/article", testEntry, `remove(".spam, .ads:not(.keep)")`)
|
||||
|
||||
if !reflect.DeepEqual(testEntry, controlEntry) {
|
||||
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRewriteAddCastopodEpisode(t *testing.T) {
|
||||
controlEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `<iframe width="650" frameborder="0" src="https://podcast.demo/@demo/episodes/test/embed/light"></iframe><br>Episode Description`,
|
||||
}
|
||||
testEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `Episode Description`,
|
||||
}
|
||||
Rewriter("https://podcast.demo/@demo/episodes/test", testEntry, `add_castopod_episode`)
|
||||
|
||||
if !reflect.DeepEqual(testEntry, controlEntry) {
|
||||
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRewriteBase64Decode(t *testing.T) {
|
||||
controlEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `This is some base64 encoded content`,
|
||||
}
|
||||
testEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `VGhpcyBpcyBzb21lIGJhc2U2NCBlbmNvZGVkIGNvbnRlbnQ=`,
|
||||
}
|
||||
Rewriter("https://example.org/article", testEntry, `base64_decode`)
|
||||
|
||||
if !reflect.DeepEqual(testEntry, controlEntry) {
|
||||
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRewriteBase64DecodeInHTML(t *testing.T) {
|
||||
controlEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `<div>Lorem Ipsum not valid base64<span class="base64">This is some base64 encoded content</span></div>`,
|
||||
}
|
||||
testEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `<div>Lorem Ipsum not valid base64<span class="base64">VGhpcyBpcyBzb21lIGJhc2U2NCBlbmNvZGVkIGNvbnRlbnQ=</span></div>`,
|
||||
}
|
||||
Rewriter("https://example.org/article", testEntry, `base64_decode`)
|
||||
|
||||
if !reflect.DeepEqual(testEntry, controlEntry) {
|
||||
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRewriteBase64DecodeArgs(t *testing.T) {
|
||||
controlEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `<div>Lorem Ipsum<span class="base64">This is some base64 encoded content</span></div>`,
|
||||
}
|
||||
testEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `<div>Lorem Ipsum<span class="base64">VGhpcyBpcyBzb21lIGJhc2U2NCBlbmNvZGVkIGNvbnRlbnQ=</span></div>`,
|
||||
}
|
||||
Rewriter("https://example.org/article", testEntry, `base64_decode(".base64")`)
|
||||
|
||||
if !reflect.DeepEqual(testEntry, controlEntry) {
|
||||
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRewriteRemoveTables(t *testing.T) {
|
||||
controlEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `<p>Test</p><p>Hello World!</p><p>Test</p>`,
|
||||
}
|
||||
testEntry := &model.Entry{
|
||||
Title: `A title`,
|
||||
Content: `<table class="container"><tbody><tr><td><p>Test</p><table class="row"><tbody><tr><td><p>Hello World!</p></td><td><p>Test</p></td></tr></tbody></table></td></tr></tbody></table>`,
|
||||
}
|
||||
Rewriter("https://example.org/article", testEntry, `remove_tables`)
|
||||
|
||||
if !reflect.DeepEqual(testEntry, controlEntry) {
|
||||
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRemoveClickbait(t *testing.T) {
|
||||
controlEntry := &model.Entry{
|
||||
Title: `This Is Amazing`,
|
||||
Content: `Some description`,
|
||||
}
|
||||
testEntry := &model.Entry{
|
||||
Title: `THIS IS AMAZING`,
|
||||
Content: `Some description`,
|
||||
}
|
||||
Rewriter("https://example.org/article", testEntry, `remove_clickbait`)
|
||||
|
||||
if !reflect.DeepEqual(testEntry, controlEntry) {
|
||||
t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry)
|
||||
}
|
||||
}
|
40
internal/reader/rewrite/rules.go
Normal file
40
internal/reader/rewrite/rules.go
Normal file
|
@ -0,0 +1,40 @@
|
|||
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
package rewrite // import "miniflux.app/v2/internal/reader/rewrite"
|
||||
|
||||
// List of predefined rewrite rules (alphabetically sorted)
|
||||
// Available rules: "add_image_title", "add_youtube_video"
|
||||
// domain => rule name
|
||||
var predefinedRules = map[string]string{
|
||||
"abstrusegoose.com": "add_image_title",
|
||||
"amazingsuperpowers.com": "add_image_title",
|
||||
"blog.cloudflare.com": `add_image_title,remove("figure.kg-image-card figure.kg-image + img")`,
|
||||
"blog.laravel.com": "parse_markdown",
|
||||
"cowbirdsinlove.com": "add_image_title",
|
||||
"drawingboardcomic.com": "add_image_title",
|
||||
"exocomics.com": "add_image_title",
|
||||
"framatube.org": "nl2br,convert_text_link",
|
||||
"happletea.com": "add_image_title",
|
||||
"ilpost.it": `remove(".art_tag, #audioPlayerArticle, .author-container, .caption, .ilpostShare, .lastRecents, #mc_embed_signup, .outbrain_inread, p:has(.leggi-anche), .youtube-overlay")`,
|
||||
"imogenquest.net": "add_image_title",
|
||||
"lukesurl.com": "add_image_title",
|
||||
"medium.com": "fix_medium_images",
|
||||
"mercworks.net": "add_image_title",
|
||||
"monkeyuser.com": "add_image_title",
|
||||
"mrlovenstein.com": "add_image_title",
|
||||
"nedroid.com": "add_image_title",
|
||||
"oglaf.com": "add_image_title",
|
||||
"optipess.com": "add_image_title",
|
||||
"peebleslab.com": "add_image_title",
|
||||
"quantamagazine.org": `add_youtube_video_from_id, remove("h6:not(.byline,.post__title__kicker), #comments, .next-post__content, .footer__section, figure .outer--content, script")`,
|
||||
"sentfromthemoon.com": "add_image_title",
|
||||
"thedoghousediaries.com": "add_image_title",
|
||||
"theverge.com": `add_dynamic_image, remove("div.duet--recirculation--related-list")`,
|
||||
"treelobsters.com": "add_image_title",
|
||||
"webtoons.com": `add_dynamic_image,replace("webtoon"|"swebtoon")`,
|
||||
"www.qwantz.com": "add_image_title,add_mailto_subject",
|
||||
"www.recalbox.com": "parse_markdown",
|
||||
"xkcd.com": "add_image_title",
|
||||
"youtube.com": "add_youtube_video",
|
||||
}
|
11
internal/reader/rss/dublincore.go
Normal file
11
internal/reader/rss/dublincore.go
Normal file
|
@ -0,0 +1,11 @@
|
|||
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
package rss // import "miniflux.app/v2/internal/reader/rss"
|
||||
|
||||
// DublinCoreElement represents Dublin Core XML elements.
|
||||
type DublinCoreElement struct {
|
||||
DublinCoreDate string `xml:"http://purl.org/dc/elements/1.1/ date"`
|
||||
DublinCoreCreator string `xml:"http://purl.org/dc/elements/1.1/ creator"`
|
||||
DublinCoreContent string `xml:"http://purl.org/rss/1.0/modules/content/ encoded"`
|
||||
}
|
10
internal/reader/rss/feedburner.go
Normal file
10
internal/reader/rss/feedburner.go
Normal file
|
@ -0,0 +1,10 @@
|
|||
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
package rss // import "miniflux.app/v2/internal/reader/rss"
|
||||
|
||||
// FeedBurnerElement represents FeedBurner XML elements.
|
||||
type FeedBurnerElement struct {
|
||||
FeedBurnerLink string `xml:"http://rssnamespace.org/feedburner/ext/1.0 origLink"`
|
||||
FeedBurnerEnclosureLink string `xml:"http://rssnamespace.org/feedburner/ext/1.0 origEnclosureLink"`
|
||||
}
|
24
internal/reader/rss/parser.go
Normal file
24
internal/reader/rss/parser.go
Normal file
|
@ -0,0 +1,24 @@
|
|||
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
package rss // import "miniflux.app/v2/internal/reader/rss"
|
||||
|
||||
import (
|
||||
"io"
|
||||
|
||||
"miniflux.app/v2/internal/errors"
|
||||
"miniflux.app/v2/internal/model"
|
||||
"miniflux.app/v2/internal/reader/xml"
|
||||
)
|
||||
|
||||
// Parse returns a normalized feed struct from a RSS feed.
|
||||
func Parse(baseURL string, data io.Reader) (*model.Feed, *errors.LocalizedError) {
|
||||
feed := new(rssFeed)
|
||||
decoder := xml.NewDecoder(data)
|
||||
err := decoder.Decode(feed)
|
||||
if err != nil {
|
||||
return nil, errors.NewLocalizedError("Unable to parse RSS feed: %q", err)
|
||||
}
|
||||
|
||||
return feed.Transform(baseURL), nil
|
||||
}
|
1502
internal/reader/rss/parser_test.go
Normal file
1502
internal/reader/rss/parser_test.go
Normal file
File diff suppressed because it is too large
Load diff
69
internal/reader/rss/podcast.go
Normal file
69
internal/reader/rss/podcast.go
Normal file
|
@ -0,0 +1,69 @@
|
|||
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
package rss // import "miniflux.app/v2/internal/reader/rss"
|
||||
|
||||
import "strings"
|
||||
|
||||
// PodcastFeedElement represents iTunes and GooglePlay feed XML elements.
|
||||
// Specs:
|
||||
// - https://github.com/simplepie/simplepie-ng/wiki/Spec:-iTunes-Podcast-RSS
|
||||
// - https://developers.google.com/search/reference/podcast/rss-feed
|
||||
type PodcastFeedElement struct {
|
||||
ItunesAuthor string `xml:"http://www.itunes.com/dtds/podcast-1.0.dtd channel>author"`
|
||||
Subtitle string `xml:"http://www.itunes.com/dtds/podcast-1.0.dtd channel>subtitle"`
|
||||
Summary string `xml:"http://www.itunes.com/dtds/podcast-1.0.dtd channel>summary"`
|
||||
PodcastOwner PodcastOwner `xml:"http://www.itunes.com/dtds/podcast-1.0.dtd channel>owner"`
|
||||
GooglePlayAuthor string `xml:"http://www.google.com/schemas/play-podcasts/1.0 channel>author"`
|
||||
}
|
||||
|
||||
// PodcastEntryElement represents iTunes and GooglePlay entry XML elements.
|
||||
type PodcastEntryElement struct {
|
||||
Subtitle string `xml:"http://www.itunes.com/dtds/podcast-1.0.dtd subtitle"`
|
||||
Summary string `xml:"http://www.itunes.com/dtds/podcast-1.0.dtd summary"`
|
||||
GooglePlayDescription string `xml:"http://www.google.com/schemas/play-podcasts/1.0 description"`
|
||||
}
|
||||
|
||||
// PodcastOwner represents contact information for the podcast owner.
|
||||
type PodcastOwner struct {
|
||||
Name string `xml:"http://www.itunes.com/dtds/podcast-1.0.dtd name"`
|
||||
Email string `xml:"http://www.itunes.com/dtds/podcast-1.0.dtd email"`
|
||||
}
|
||||
|
||||
// Image represents podcast artwork.
|
||||
type Image struct {
|
||||
URL string `xml:"href,attr"`
|
||||
}
|
||||
|
||||
// PodcastAuthor returns the author of the podcast.
|
||||
func (e *PodcastFeedElement) PodcastAuthor() string {
|
||||
author := ""
|
||||
|
||||
switch {
|
||||
case e.ItunesAuthor != "":
|
||||
author = e.ItunesAuthor
|
||||
case e.GooglePlayAuthor != "":
|
||||
author = e.GooglePlayAuthor
|
||||
case e.PodcastOwner.Name != "":
|
||||
author = e.PodcastOwner.Name
|
||||
case e.PodcastOwner.Email != "":
|
||||
author = e.PodcastOwner.Email
|
||||
}
|
||||
|
||||
return strings.TrimSpace(author)
|
||||
}
|
||||
|
||||
// PodcastDescription returns the description of the podcast.
|
||||
func (e *PodcastEntryElement) PodcastDescription() string {
|
||||
description := ""
|
||||
|
||||
switch {
|
||||
case e.GooglePlayDescription != "":
|
||||
description = e.GooglePlayDescription
|
||||
case e.Summary != "":
|
||||
description = e.Summary
|
||||
case e.Subtitle != "":
|
||||
description = e.Subtitle
|
||||
}
|
||||
return strings.TrimSpace(description)
|
||||
}
|
425
internal/reader/rss/rss.go
Normal file
425
internal/reader/rss/rss.go
Normal file
|
@ -0,0 +1,425 @@
|
|||
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
package rss // import "miniflux.app/v2/internal/reader/rss"
|
||||
|
||||
import (
|
||||
"encoding/xml"
|
||||
"html"
|
||||
"path"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"miniflux.app/v2/internal/crypto"
|
||||
"miniflux.app/v2/internal/logger"
|
||||
"miniflux.app/v2/internal/model"
|
||||
"miniflux.app/v2/internal/reader/date"
|
||||
"miniflux.app/v2/internal/reader/media"
|
||||
"miniflux.app/v2/internal/reader/sanitizer"
|
||||
"miniflux.app/v2/internal/url"
|
||||
)
|
||||
|
||||
// Specs: https://cyber.harvard.edu/rss/rss.html
|
||||
type rssFeed struct {
|
||||
XMLName xml.Name `xml:"rss"`
|
||||
Version string `xml:"version,attr"`
|
||||
Title string `xml:"channel>title"`
|
||||
Links []rssLink `xml:"channel>link"`
|
||||
ImageURL string `xml:"channel>image>url"`
|
||||
Language string `xml:"channel>language"`
|
||||
Description string `xml:"channel>description"`
|
||||
PubDate string `xml:"channel>pubDate"`
|
||||
ManagingEditor string `xml:"channel>managingEditor"`
|
||||
Webmaster string `xml:"channel>webMaster"`
|
||||
Items []rssItem `xml:"channel>item"`
|
||||
PodcastFeedElement
|
||||
}
|
||||
|
||||
func (r *rssFeed) Transform(baseURL string) *model.Feed {
|
||||
var err error
|
||||
|
||||
feed := new(model.Feed)
|
||||
|
||||
siteURL := r.siteURL()
|
||||
feed.SiteURL, err = url.AbsoluteURL(baseURL, siteURL)
|
||||
if err != nil {
|
||||
feed.SiteURL = siteURL
|
||||
}
|
||||
|
||||
feedURL := r.feedURL()
|
||||
feed.FeedURL, err = url.AbsoluteURL(baseURL, feedURL)
|
||||
if err != nil {
|
||||
feed.FeedURL = feedURL
|
||||
}
|
||||
|
||||
feed.Title = html.UnescapeString(strings.TrimSpace(r.Title))
|
||||
if feed.Title == "" {
|
||||
feed.Title = feed.SiteURL
|
||||
}
|
||||
|
||||
feed.IconURL = strings.TrimSpace(r.ImageURL)
|
||||
|
||||
for _, item := range r.Items {
|
||||
entry := item.Transform()
|
||||
if entry.Author == "" {
|
||||
entry.Author = r.feedAuthor()
|
||||
}
|
||||
|
||||
if entry.URL == "" {
|
||||
entry.URL = feed.SiteURL
|
||||
} else {
|
||||
entryURL, err := url.AbsoluteURL(feed.SiteURL, entry.URL)
|
||||
if err == nil {
|
||||
entry.URL = entryURL
|
||||
}
|
||||
}
|
||||
|
||||
if entry.Title == "" {
|
||||
entry.Title = sanitizer.TruncateHTML(entry.Content, 100)
|
||||
}
|
||||
|
||||
if entry.Title == "" {
|
||||
entry.Title = entry.URL
|
||||
}
|
||||
|
||||
feed.Entries = append(feed.Entries, entry)
|
||||
}
|
||||
|
||||
return feed
|
||||
}
|
||||
|
||||
func (r *rssFeed) siteURL() string {
|
||||
for _, element := range r.Links {
|
||||
if element.XMLName.Space == "" {
|
||||
return strings.TrimSpace(element.Data)
|
||||
}
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
func (r *rssFeed) feedURL() string {
|
||||
for _, element := range r.Links {
|
||||
if element.XMLName.Space == "http://www.w3.org/2005/Atom" {
|
||||
return strings.TrimSpace(element.Href)
|
||||
}
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
func (r rssFeed) feedAuthor() string {
|
||||
author := r.PodcastAuthor()
|
||||
switch {
|
||||
case r.ManagingEditor != "":
|
||||
author = r.ManagingEditor
|
||||
case r.Webmaster != "":
|
||||
author = r.Webmaster
|
||||
}
|
||||
return sanitizer.StripTags(strings.TrimSpace(author))
|
||||
}
|
||||
|
||||
type rssGUID struct {
|
||||
XMLName xml.Name
|
||||
Data string `xml:",chardata"`
|
||||
IsPermaLink string `xml:"isPermaLink,attr"`
|
||||
}
|
||||
|
||||
type rssLink struct {
|
||||
XMLName xml.Name
|
||||
Data string `xml:",chardata"`
|
||||
Href string `xml:"href,attr"`
|
||||
Rel string `xml:"rel,attr"`
|
||||
}
|
||||
|
||||
type rssCommentLink struct {
|
||||
XMLName xml.Name
|
||||
Data string `xml:",chardata"`
|
||||
}
|
||||
|
||||
type rssAuthor struct {
|
||||
XMLName xml.Name
|
||||
Data string `xml:",chardata"`
|
||||
Name string `xml:"name"`
|
||||
Email string `xml:"email"`
|
||||
Inner string `xml:",innerxml"`
|
||||
}
|
||||
|
||||
type rssTitle struct {
|
||||
XMLName xml.Name
|
||||
Data string `xml:",chardata"`
|
||||
Inner string `xml:",innerxml"`
|
||||
}
|
||||
|
||||
type rssEnclosure struct {
|
||||
URL string `xml:"url,attr"`
|
||||
Type string `xml:"type,attr"`
|
||||
Length string `xml:"length,attr"`
|
||||
}
|
||||
|
||||
type rssCategory struct {
|
||||
XMLName xml.Name
|
||||
Data string `xml:",chardata"`
|
||||
Inner string `xml:",innerxml"`
|
||||
}
|
||||
|
||||
func (enclosure *rssEnclosure) Size() int64 {
|
||||
if enclosure.Length == "" {
|
||||
return 0
|
||||
}
|
||||
size, _ := strconv.ParseInt(enclosure.Length, 10, 0)
|
||||
return size
|
||||
}
|
||||
|
||||
type rssItem struct {
|
||||
GUID rssGUID `xml:"guid"`
|
||||
Title []rssTitle `xml:"title"`
|
||||
Links []rssLink `xml:"link"`
|
||||
Description string `xml:"description"`
|
||||
PubDate string `xml:"pubDate"`
|
||||
Authors []rssAuthor `xml:"author"`
|
||||
CommentLinks []rssCommentLink `xml:"comments"`
|
||||
EnclosureLinks []rssEnclosure `xml:"enclosure"`
|
||||
Categories []rssCategory `xml:"category"`
|
||||
DublinCoreElement
|
||||
FeedBurnerElement
|
||||
PodcastEntryElement
|
||||
media.Element
|
||||
}
|
||||
|
||||
func (r *rssItem) Transform() *model.Entry {
|
||||
entry := new(model.Entry)
|
||||
entry.URL = r.entryURL()
|
||||
entry.CommentsURL = r.entryCommentsURL()
|
||||
entry.Date = r.entryDate()
|
||||
entry.Author = r.entryAuthor()
|
||||
entry.Hash = r.entryHash()
|
||||
entry.Content = r.entryContent()
|
||||
entry.Title = r.entryTitle()
|
||||
entry.Enclosures = r.entryEnclosures()
|
||||
entry.Tags = r.entryCategories()
|
||||
|
||||
return entry
|
||||
}
|
||||
|
||||
func (r *rssItem) entryDate() time.Time {
|
||||
value := r.PubDate
|
||||
if r.DublinCoreDate != "" {
|
||||
value = r.DublinCoreDate
|
||||
}
|
||||
|
||||
if value != "" {
|
||||
result, err := date.Parse(value)
|
||||
if err != nil {
|
||||
logger.Error("rss: %v (entry GUID = %s)", err, r.GUID)
|
||||
return time.Now()
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
return time.Now()
|
||||
}
|
||||
|
||||
func (r *rssItem) entryAuthor() string {
|
||||
author := ""
|
||||
|
||||
for _, rssAuthor := range r.Authors {
|
||||
switch rssAuthor.XMLName.Space {
|
||||
case "http://www.itunes.com/dtds/podcast-1.0.dtd", "http://www.google.com/schemas/play-podcasts/1.0":
|
||||
author = rssAuthor.Data
|
||||
case "http://www.w3.org/2005/Atom":
|
||||
if rssAuthor.Name != "" {
|
||||
author = rssAuthor.Name
|
||||
} else if rssAuthor.Email != "" {
|
||||
author = rssAuthor.Email
|
||||
}
|
||||
default:
|
||||
if rssAuthor.Name != "" {
|
||||
author = rssAuthor.Name
|
||||
} else if strings.Contains(rssAuthor.Inner, "<![CDATA[") {
|
||||
author = rssAuthor.Data
|
||||
} else {
|
||||
author = rssAuthor.Inner
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if author == "" {
|
||||
author = r.DublinCoreCreator
|
||||
}
|
||||
|
||||
return sanitizer.StripTags(strings.TrimSpace(author))
|
||||
}
|
||||
|
||||
func (r *rssItem) entryHash() string {
|
||||
for _, value := range []string{r.GUID.Data, r.entryURL()} {
|
||||
if value != "" {
|
||||
return crypto.Hash(value)
|
||||
}
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
func (r *rssItem) entryTitle() string {
|
||||
var title string
|
||||
|
||||
for _, rssTitle := range r.Title {
|
||||
switch rssTitle.XMLName.Space {
|
||||
case "http://search.yahoo.com/mrss/":
|
||||
// Ignore title in media namespace
|
||||
case "http://purl.org/dc/elements/1.1/":
|
||||
title = rssTitle.Data
|
||||
default:
|
||||
title = rssTitle.Data
|
||||
}
|
||||
|
||||
if title != "" {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
return html.UnescapeString(strings.TrimSpace(title))
|
||||
}
|
||||
|
||||
func (r *rssItem) entryContent() string {
|
||||
for _, value := range []string{r.DublinCoreContent, r.Description, r.PodcastDescription()} {
|
||||
if value != "" {
|
||||
return value
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func (r *rssItem) entryURL() string {
|
||||
if r.FeedBurnerLink != "" {
|
||||
return r.FeedBurnerLink
|
||||
}
|
||||
|
||||
for _, link := range r.Links {
|
||||
if link.XMLName.Space == "http://www.w3.org/2005/Atom" && link.Href != "" && isValidLinkRelation(link.Rel) {
|
||||
return strings.TrimSpace(link.Href)
|
||||
}
|
||||
|
||||
if link.Data != "" {
|
||||
return strings.TrimSpace(link.Data)
|
||||
}
|
||||
}
|
||||
|
||||
// Specs: https://cyber.harvard.edu/rss/rss.html#ltguidgtSubelementOfLtitemgt
|
||||
// isPermaLink is optional, its default value is true.
|
||||
// If its value is false, the guid may not be assumed to be a url, or a url to anything in particular.
|
||||
if r.GUID.IsPermaLink == "true" || r.GUID.IsPermaLink == "" {
|
||||
return strings.TrimSpace(r.GUID.Data)
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
func (r *rssItem) entryEnclosures() model.EnclosureList {
|
||||
enclosures := make(model.EnclosureList, 0)
|
||||
duplicates := make(map[string]bool)
|
||||
|
||||
for _, mediaThumbnail := range r.AllMediaThumbnails() {
|
||||
if _, found := duplicates[mediaThumbnail.URL]; !found {
|
||||
duplicates[mediaThumbnail.URL] = true
|
||||
enclosures = append(enclosures, &model.Enclosure{
|
||||
URL: mediaThumbnail.URL,
|
||||
MimeType: mediaThumbnail.MimeType(),
|
||||
Size: mediaThumbnail.Size(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
for _, enclosure := range r.EnclosureLinks {
|
||||
enclosureURL := enclosure.URL
|
||||
|
||||
if r.FeedBurnerEnclosureLink != "" {
|
||||
filename := path.Base(r.FeedBurnerEnclosureLink)
|
||||
if strings.Contains(enclosureURL, filename) {
|
||||
enclosureURL = r.FeedBurnerEnclosureLink
|
||||
}
|
||||
}
|
||||
|
||||
if enclosureURL == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
if _, found := duplicates[enclosureURL]; !found {
|
||||
duplicates[enclosureURL] = true
|
||||
|
||||
enclosures = append(enclosures, &model.Enclosure{
|
||||
URL: enclosureURL,
|
||||
MimeType: enclosure.Type,
|
||||
Size: enclosure.Size(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
for _, mediaContent := range r.AllMediaContents() {
|
||||
if _, found := duplicates[mediaContent.URL]; !found {
|
||||
duplicates[mediaContent.URL] = true
|
||||
enclosures = append(enclosures, &model.Enclosure{
|
||||
URL: mediaContent.URL,
|
||||
MimeType: mediaContent.MimeType(),
|
||||
Size: mediaContent.Size(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
for _, mediaPeerLink := range r.AllMediaPeerLinks() {
|
||||
if _, found := duplicates[mediaPeerLink.URL]; !found {
|
||||
duplicates[mediaPeerLink.URL] = true
|
||||
enclosures = append(enclosures, &model.Enclosure{
|
||||
URL: mediaPeerLink.URL,
|
||||
MimeType: mediaPeerLink.MimeType(),
|
||||
Size: mediaPeerLink.Size(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
return enclosures
|
||||
}
|
||||
|
||||
func (r *rssItem) entryCategories() []string {
|
||||
var categoryList []string
|
||||
|
||||
for _, rssCategory := range r.Categories {
|
||||
if strings.Contains(rssCategory.Inner, "<![CDATA[") {
|
||||
categoryList = append(categoryList, strings.TrimSpace(rssCategory.Data))
|
||||
} else {
|
||||
categoryList = append(categoryList, strings.TrimSpace(rssCategory.Inner))
|
||||
}
|
||||
}
|
||||
|
||||
return categoryList
|
||||
}
|
||||
|
||||
func (r *rssItem) entryCommentsURL() string {
|
||||
for _, commentLink := range r.CommentLinks {
|
||||
if commentLink.XMLName.Space == "" {
|
||||
commentsURL := strings.TrimSpace(commentLink.Data)
|
||||
// The comments URL is supposed to be absolute (some feeds publishes incorrect comments URL)
|
||||
// See https://cyber.harvard.edu/rss/rss.html#ltcommentsgtSubelementOfLtitemgt
|
||||
if url.IsAbsoluteURL(commentsURL) {
|
||||
return commentsURL
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
func isValidLinkRelation(rel string) bool {
|
||||
switch rel {
|
||||
case "", "alternate", "enclosure", "related", "self", "via":
|
||||
return true
|
||||
default:
|
||||
if strings.HasPrefix(rel, "http") {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
}
|
523
internal/reader/sanitizer/sanitizer.go
Normal file
523
internal/reader/sanitizer/sanitizer.go
Normal file
|
@ -0,0 +1,523 @@
|
|||
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
package sanitizer // import "miniflux.app/v2/internal/reader/sanitizer"
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"miniflux.app/v2/internal/config"
|
||||
"miniflux.app/v2/internal/url"
|
||||
|
||||
"golang.org/x/net/html"
|
||||
)
|
||||
|
||||
var (
|
||||
youtubeEmbedRegex = regexp.MustCompile(`//www\.youtube\.com/embed/(.*)`)
|
||||
)
|
||||
|
||||
// Sanitize returns safe HTML.
|
||||
func Sanitize(baseURL, input string) string {
|
||||
var buffer bytes.Buffer
|
||||
var tagStack []string
|
||||
var parentTag string
|
||||
blacklistedTagDepth := 0
|
||||
|
||||
tokenizer := html.NewTokenizer(bytes.NewBufferString(input))
|
||||
for {
|
||||
if tokenizer.Next() == html.ErrorToken {
|
||||
err := tokenizer.Err()
|
||||
if err == io.EOF {
|
||||
return buffer.String()
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
token := tokenizer.Token()
|
||||
switch token.Type {
|
||||
case html.TextToken:
|
||||
if blacklistedTagDepth > 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
// An iframe element never has fallback content.
|
||||
// See https://www.w3.org/TR/2010/WD-html5-20101019/the-iframe-element.html#the-iframe-element
|
||||
if parentTag == "iframe" {
|
||||
continue
|
||||
}
|
||||
|
||||
buffer.WriteString(html.EscapeString(token.Data))
|
||||
case html.StartTagToken:
|
||||
tagName := token.DataAtom.String()
|
||||
parentTag = tagName
|
||||
|
||||
if !isPixelTracker(tagName, token.Attr) && isValidTag(tagName) {
|
||||
attrNames, htmlAttributes := sanitizeAttributes(baseURL, tagName, token.Attr)
|
||||
|
||||
if hasRequiredAttributes(tagName, attrNames) {
|
||||
if len(attrNames) > 0 {
|
||||
buffer.WriteString("<" + tagName + " " + htmlAttributes + ">")
|
||||
} else {
|
||||
buffer.WriteString("<" + tagName + ">")
|
||||
}
|
||||
|
||||
tagStack = append(tagStack, tagName)
|
||||
}
|
||||
} else if isBlockedTag(tagName) {
|
||||
blacklistedTagDepth++
|
||||
}
|
||||
case html.EndTagToken:
|
||||
tagName := token.DataAtom.String()
|
||||
if isValidTag(tagName) && inList(tagName, tagStack) {
|
||||
buffer.WriteString(fmt.Sprintf("</%s>", tagName))
|
||||
} else if isBlockedTag(tagName) {
|
||||
blacklistedTagDepth--
|
||||
}
|
||||
case html.SelfClosingTagToken:
|
||||
tagName := token.DataAtom.String()
|
||||
if !isPixelTracker(tagName, token.Attr) && isValidTag(tagName) {
|
||||
attrNames, htmlAttributes := sanitizeAttributes(baseURL, tagName, token.Attr)
|
||||
|
||||
if hasRequiredAttributes(tagName, attrNames) {
|
||||
if len(attrNames) > 0 {
|
||||
buffer.WriteString("<" + tagName + " " + htmlAttributes + "/>")
|
||||
} else {
|
||||
buffer.WriteString("<" + tagName + "/>")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func sanitizeAttributes(baseURL, tagName string, attributes []html.Attribute) ([]string, string) {
|
||||
var htmlAttrs, attrNames []string
|
||||
var err error
|
||||
var isImageLargerThanLayout bool
|
||||
var isAnchorLink bool
|
||||
|
||||
if tagName == "img" {
|
||||
imgWidth := getIntegerAttributeValue("width", attributes)
|
||||
isImageLargerThanLayout = imgWidth > 750
|
||||
}
|
||||
|
||||
for _, attribute := range attributes {
|
||||
value := attribute.Val
|
||||
|
||||
if !isValidAttribute(tagName, attribute.Key) {
|
||||
continue
|
||||
}
|
||||
|
||||
if (tagName == "img" || tagName == "source") && attribute.Key == "srcset" {
|
||||
value = sanitizeSrcsetAttr(baseURL, value)
|
||||
}
|
||||
|
||||
if tagName == "img" && (attribute.Key == "width" || attribute.Key == "height") {
|
||||
if !isPositiveInteger(value) {
|
||||
continue
|
||||
}
|
||||
|
||||
if isImageLargerThanLayout {
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
if isExternalResourceAttribute(attribute.Key) {
|
||||
if tagName == "iframe" {
|
||||
if isValidIframeSource(baseURL, attribute.Val) {
|
||||
value = rewriteIframeURL(attribute.Val)
|
||||
} else {
|
||||
continue
|
||||
}
|
||||
} else if tagName == "img" && attribute.Key == "src" && isValidDataAttribute(attribute.Val) {
|
||||
value = attribute.Val
|
||||
} else if isAnchor("a", attribute) {
|
||||
value = attribute.Val
|
||||
isAnchorLink = true
|
||||
} else {
|
||||
value, err = url.AbsoluteURL(baseURL, value)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
if !hasValidURIScheme(value) || isBlockedResource(value) {
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
attrNames = append(attrNames, attribute.Key)
|
||||
htmlAttrs = append(htmlAttrs, fmt.Sprintf(`%s="%s"`, attribute.Key, html.EscapeString(value)))
|
||||
}
|
||||
|
||||
if !isAnchorLink {
|
||||
extraAttrNames, extraHTMLAttributes := getExtraAttributes(tagName)
|
||||
if len(extraAttrNames) > 0 {
|
||||
attrNames = append(attrNames, extraAttrNames...)
|
||||
htmlAttrs = append(htmlAttrs, extraHTMLAttributes...)
|
||||
}
|
||||
}
|
||||
|
||||
return attrNames, strings.Join(htmlAttrs, " ")
|
||||
}
|
||||
|
||||
func getExtraAttributes(tagName string) ([]string, []string) {
|
||||
switch tagName {
|
||||
case "a":
|
||||
return []string{"rel", "target", "referrerpolicy"}, []string{`rel="noopener noreferrer"`, `target="_blank"`, `referrerpolicy="no-referrer"`}
|
||||
case "video", "audio":
|
||||
return []string{"controls"}, []string{"controls"}
|
||||
case "iframe":
|
||||
return []string{"sandbox", "loading"}, []string{`sandbox="allow-scripts allow-same-origin allow-popups"`, `loading="lazy"`}
|
||||
case "img":
|
||||
return []string{"loading"}, []string{`loading="lazy"`}
|
||||
default:
|
||||
return nil, nil
|
||||
}
|
||||
}
|
||||
|
||||
func isValidTag(tagName string) bool {
|
||||
for element := range getTagAllowList() {
|
||||
if tagName == element {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func isValidAttribute(tagName, attributeName string) bool {
|
||||
for element, attributes := range getTagAllowList() {
|
||||
if tagName == element {
|
||||
if inList(attributeName, attributes) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func isExternalResourceAttribute(attribute string) bool {
|
||||
switch attribute {
|
||||
case "src", "href", "poster", "cite":
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func isPixelTracker(tagName string, attributes []html.Attribute) bool {
|
||||
if tagName == "img" {
|
||||
hasHeight := false
|
||||
hasWidth := false
|
||||
|
||||
for _, attribute := range attributes {
|
||||
if attribute.Key == "height" && attribute.Val == "1" {
|
||||
hasHeight = true
|
||||
}
|
||||
|
||||
if attribute.Key == "width" && attribute.Val == "1" {
|
||||
hasWidth = true
|
||||
}
|
||||
}
|
||||
|
||||
return hasHeight && hasWidth
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func hasRequiredAttributes(tagName string, attributes []string) bool {
|
||||
elements := make(map[string][]string)
|
||||
elements["a"] = []string{"href"}
|
||||
elements["iframe"] = []string{"src"}
|
||||
elements["img"] = []string{"src"}
|
||||
elements["source"] = []string{"src", "srcset"}
|
||||
|
||||
for element, attrs := range elements {
|
||||
if tagName == element {
|
||||
for _, attribute := range attributes {
|
||||
for _, attr := range attrs {
|
||||
if attr == attribute {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
// See https://www.iana.org/assignments/uri-schemes/uri-schemes.xhtml
|
||||
func hasValidURIScheme(src string) bool {
|
||||
whitelist := []string{
|
||||
"apt:",
|
||||
"bitcoin:",
|
||||
"callto:",
|
||||
"dav:",
|
||||
"davs:",
|
||||
"ed2k://",
|
||||
"facetime://",
|
||||
"feed:",
|
||||
"ftp://",
|
||||
"geo:",
|
||||
"gopher://",
|
||||
"git://",
|
||||
"http://",
|
||||
"https://",
|
||||
"irc://",
|
||||
"irc6://",
|
||||
"ircs://",
|
||||
"itms://",
|
||||
"itms-apps://",
|
||||
"magnet:",
|
||||
"mailto:",
|
||||
"news:",
|
||||
"nntp:",
|
||||
"rtmp://",
|
||||
"sip:",
|
||||
"sips:",
|
||||
"skype:",
|
||||
"spotify:",
|
||||
"ssh://",
|
||||
"sftp://",
|
||||
"steam://",
|
||||
"svn://",
|
||||
"svn+ssh://",
|
||||
"tel:",
|
||||
"webcal://",
|
||||
"xmpp:",
|
||||
}
|
||||
|
||||
for _, prefix := range whitelist {
|
||||
if strings.HasPrefix(src, prefix) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func isBlockedResource(src string) bool {
|
||||
blacklist := []string{
|
||||
"feedsportal.com",
|
||||
"api.flattr.com",
|
||||
"stats.wordpress.com",
|
||||
"plus.google.com/share",
|
||||
"twitter.com/share",
|
||||
"feeds.feedburner.com",
|
||||
}
|
||||
|
||||
for _, element := range blacklist {
|
||||
if strings.Contains(src, element) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func isValidIframeSource(baseURL, src string) bool {
|
||||
whitelist := []string{
|
||||
"//www.youtube.com",
|
||||
"http://www.youtube.com",
|
||||
"https://www.youtube.com",
|
||||
"https://www.youtube-nocookie.com",
|
||||
"http://player.vimeo.com",
|
||||
"https://player.vimeo.com",
|
||||
"http://www.dailymotion.com",
|
||||
"https://www.dailymotion.com",
|
||||
"http://vk.com",
|
||||
"https://vk.com",
|
||||
"http://soundcloud.com",
|
||||
"https://soundcloud.com",
|
||||
"http://w.soundcloud.com",
|
||||
"https://w.soundcloud.com",
|
||||
"http://bandcamp.com",
|
||||
"https://bandcamp.com",
|
||||
"https://cdn.embedly.com",
|
||||
"https://player.bilibili.com",
|
||||
}
|
||||
|
||||
// allow iframe from same origin
|
||||
if url.Domain(baseURL) == url.Domain(src) {
|
||||
return true
|
||||
}
|
||||
|
||||
// allow iframe from custom invidious instance
|
||||
if config.Opts != nil && config.Opts.InvidiousInstance() == url.Domain(src) {
|
||||
return true
|
||||
}
|
||||
|
||||
for _, prefix := range whitelist {
|
||||
if strings.HasPrefix(src, prefix) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func getTagAllowList() map[string][]string {
|
||||
whitelist := make(map[string][]string)
|
||||
whitelist["img"] = []string{"alt", "title", "src", "srcset", "sizes", "width", "height"}
|
||||
whitelist["picture"] = []string{}
|
||||
whitelist["audio"] = []string{"src"}
|
||||
whitelist["video"] = []string{"poster", "height", "width", "src"}
|
||||
whitelist["source"] = []string{"src", "type", "srcset", "sizes", "media"}
|
||||
whitelist["dt"] = []string{"id"}
|
||||
whitelist["dd"] = []string{"id"}
|
||||
whitelist["dl"] = []string{"id"}
|
||||
whitelist["table"] = []string{}
|
||||
whitelist["caption"] = []string{}
|
||||
whitelist["thead"] = []string{}
|
||||
whitelist["tfooter"] = []string{}
|
||||
whitelist["tr"] = []string{}
|
||||
whitelist["td"] = []string{"rowspan", "colspan"}
|
||||
whitelist["th"] = []string{"rowspan", "colspan"}
|
||||
whitelist["h1"] = []string{"id"}
|
||||
whitelist["h2"] = []string{"id"}
|
||||
whitelist["h3"] = []string{"id"}
|
||||
whitelist["h4"] = []string{"id"}
|
||||
whitelist["h5"] = []string{"id"}
|
||||
whitelist["h6"] = []string{"id"}
|
||||
whitelist["strong"] = []string{}
|
||||
whitelist["em"] = []string{}
|
||||
whitelist["code"] = []string{}
|
||||
whitelist["pre"] = []string{}
|
||||
whitelist["blockquote"] = []string{}
|
||||
whitelist["q"] = []string{"cite"}
|
||||
whitelist["p"] = []string{}
|
||||
whitelist["ul"] = []string{"id"}
|
||||
whitelist["li"] = []string{"id"}
|
||||
whitelist["ol"] = []string{"id"}
|
||||
whitelist["br"] = []string{}
|
||||
whitelist["del"] = []string{}
|
||||
whitelist["a"] = []string{"href", "title", "id"}
|
||||
whitelist["figure"] = []string{}
|
||||
whitelist["figcaption"] = []string{}
|
||||
whitelist["cite"] = []string{}
|
||||
whitelist["time"] = []string{"datetime"}
|
||||
whitelist["abbr"] = []string{"title"}
|
||||
whitelist["acronym"] = []string{"title"}
|
||||
whitelist["wbr"] = []string{}
|
||||
whitelist["dfn"] = []string{}
|
||||
whitelist["sub"] = []string{}
|
||||
whitelist["sup"] = []string{"id"}
|
||||
whitelist["var"] = []string{}
|
||||
whitelist["samp"] = []string{}
|
||||
whitelist["s"] = []string{}
|
||||
whitelist["del"] = []string{}
|
||||
whitelist["ins"] = []string{}
|
||||
whitelist["kbd"] = []string{}
|
||||
whitelist["rp"] = []string{}
|
||||
whitelist["rt"] = []string{}
|
||||
whitelist["rtc"] = []string{}
|
||||
whitelist["ruby"] = []string{}
|
||||
whitelist["iframe"] = []string{"width", "height", "frameborder", "src", "allowfullscreen"}
|
||||
return whitelist
|
||||
}
|
||||
|
||||
func inList(needle string, haystack []string) bool {
|
||||
for _, element := range haystack {
|
||||
if element == needle {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func rewriteIframeURL(link string) string {
|
||||
matches := youtubeEmbedRegex.FindStringSubmatch(link)
|
||||
if len(matches) == 2 {
|
||||
return config.Opts.YouTubeEmbedUrlOverride() + matches[1]
|
||||
}
|
||||
|
||||
return link
|
||||
}
|
||||
|
||||
func isBlockedTag(tagName string) bool {
|
||||
blacklist := []string{
|
||||
"noscript",
|
||||
"script",
|
||||
"style",
|
||||
}
|
||||
|
||||
for _, element := range blacklist {
|
||||
if element == tagName {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func sanitizeSrcsetAttr(baseURL, value string) string {
|
||||
imageCandidates := ParseSrcSetAttribute(value)
|
||||
|
||||
for _, imageCandidate := range imageCandidates {
|
||||
absoluteURL, err := url.AbsoluteURL(baseURL, imageCandidate.ImageURL)
|
||||
if err == nil {
|
||||
imageCandidate.ImageURL = absoluteURL
|
||||
}
|
||||
}
|
||||
|
||||
return imageCandidates.String()
|
||||
}
|
||||
|
||||
func isValidDataAttribute(value string) bool {
|
||||
var dataAttributeAllowList = []string{
|
||||
"data:image/avif",
|
||||
"data:image/apng",
|
||||
"data:image/png",
|
||||
"data:image/svg",
|
||||
"data:image/svg+xml",
|
||||
"data:image/jpg",
|
||||
"data:image/jpeg",
|
||||
"data:image/gif",
|
||||
"data:image/webp",
|
||||
}
|
||||
|
||||
for _, prefix := range dataAttributeAllowList {
|
||||
if strings.HasPrefix(value, prefix) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func isAnchor(tagName string, attribute html.Attribute) bool {
|
||||
return tagName == "a" && attribute.Key == "href" && strings.HasPrefix(attribute.Val, "#")
|
||||
}
|
||||
|
||||
func isPositiveInteger(value string) bool {
|
||||
if number, err := strconv.Atoi(value); err == nil {
|
||||
return number > 0
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func getAttributeValue(name string, attributes []html.Attribute) string {
|
||||
for _, attribute := range attributes {
|
||||
if attribute.Key == name {
|
||||
return attribute.Val
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func getIntegerAttributeValue(name string, attributes []html.Attribute) int {
|
||||
number, _ := strconv.Atoi(getAttributeValue(name, attributes))
|
||||
return number
|
||||
}
|
613
internal/reader/sanitizer/sanitizer_test.go
Normal file
613
internal/reader/sanitizer/sanitizer_test.go
Normal file
|
@ -0,0 +1,613 @@
|
|||
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
package sanitizer // import "miniflux.app/v2/internal/reader/sanitizer"
|
||||
|
||||
import (
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
"miniflux.app/v2/internal/config"
|
||||
)
|
||||
|
||||
func TestMain(m *testing.M) {
|
||||
config.Opts = config.NewOptions()
|
||||
exitCode := m.Run()
|
||||
os.Exit(exitCode)
|
||||
}
|
||||
|
||||
func TestValidInput(t *testing.T) {
|
||||
input := `<p>This is a <strong>text</strong> with an image: <img src="http://example.org/" alt="Test" loading="lazy">.</p>`
|
||||
output := Sanitize("http://example.org/", input)
|
||||
|
||||
if input != output {
|
||||
t.Errorf(`Wrong output: "%s" != "%s"`, input, output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestImgWithWidthAndHeightAttribute(t *testing.T) {
|
||||
input := `<img src="https://example.org/image.png" width="10" height="20">`
|
||||
expected := `<img src="https://example.org/image.png" width="10" height="20" loading="lazy">`
|
||||
output := Sanitize("http://example.org/", input)
|
||||
|
||||
if output != expected {
|
||||
t.Errorf(`Wrong output: %s`, output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestImgWithWidthAndHeightAttributeLargerThanMinifluxLayout(t *testing.T) {
|
||||
input := `<img src="https://example.org/image.png" width="1200" height="675">`
|
||||
expected := `<img src="https://example.org/image.png" loading="lazy">`
|
||||
output := Sanitize("http://example.org/", input)
|
||||
|
||||
if output != expected {
|
||||
t.Errorf(`Wrong output: %s`, output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestImgWithIncorrectWidthAndHeightAttribute(t *testing.T) {
|
||||
input := `<img src="https://example.org/image.png" width="10px" height="20px">`
|
||||
expected := `<img src="https://example.org/image.png" loading="lazy">`
|
||||
output := Sanitize("http://example.org/", input)
|
||||
|
||||
if output != expected {
|
||||
t.Errorf(`Wrong output: %s`, output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestImgWithTextDataURL(t *testing.T) {
|
||||
input := `<img src="data:text/plain;base64,SGVsbG8sIFdvcmxkIQ==" alt="Example">`
|
||||
expected := ``
|
||||
output := Sanitize("http://example.org/", input)
|
||||
|
||||
if output != expected {
|
||||
t.Errorf(`Wrong output: %s`, output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestImgWithDataURL(t *testing.T) {
|
||||
input := `<img src="" alt="Example">`
|
||||
expected := `<img src="" alt="Example" loading="lazy">`
|
||||
output := Sanitize("http://example.org/", input)
|
||||
|
||||
if output != expected {
|
||||
t.Errorf(`Wrong output: %s`, output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestImgWithSrcset(t *testing.T) {
|
||||
input := `<img srcset="example-320w.jpg, example-480w.jpg 1.5x, example-640w.jpg 2x, example-640w.jpg 640w" src="example-640w.jpg" alt="Example">`
|
||||
expected := `<img srcset="http://example.org/example-320w.jpg, http://example.org/example-480w.jpg 1.5x, http://example.org/example-640w.jpg 2x, http://example.org/example-640w.jpg 640w" src="http://example.org/example-640w.jpg" alt="Example" loading="lazy">`
|
||||
output := Sanitize("http://example.org/", input)
|
||||
|
||||
if output != expected {
|
||||
t.Errorf(`Wrong output: %s`, output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSourceWithSrcsetAndMedia(t *testing.T) {
|
||||
input := `<picture><source media="(min-width: 800px)" srcset="elva-800w.jpg"></picture>`
|
||||
expected := `<picture><source media="(min-width: 800px)" srcset="http://example.org/elva-800w.jpg"></picture>`
|
||||
output := Sanitize("http://example.org/", input)
|
||||
|
||||
if output != expected {
|
||||
t.Errorf(`Wrong output: %s`, output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMediumImgWithSrcset(t *testing.T) {
|
||||
input := `<img alt="Image for post" class="t u v ef aj" src="https://miro.medium.com/max/5460/1*aJ9JibWDqO81qMfNtqgqrw.jpeg" srcset="https://miro.medium.com/max/552/1*aJ9JibWDqO81qMfNtqgqrw.jpeg 276w, https://miro.medium.com/max/1000/1*aJ9JibWDqO81qMfNtqgqrw.jpeg 500w" sizes="500px" width="2730" height="3407">`
|
||||
expected := `<img alt="Image for post" src="https://miro.medium.com/max/5460/1*aJ9JibWDqO81qMfNtqgqrw.jpeg" srcset="https://miro.medium.com/max/552/1*aJ9JibWDqO81qMfNtqgqrw.jpeg 276w, https://miro.medium.com/max/1000/1*aJ9JibWDqO81qMfNtqgqrw.jpeg 500w" sizes="500px" loading="lazy">`
|
||||
output := Sanitize("http://example.org/", input)
|
||||
|
||||
if output != expected {
|
||||
t.Errorf(`Wrong output: %s`, output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSelfClosingTags(t *testing.T) {
|
||||
input := `<p>This <br> is a <strong>text</strong> <br/>with an image: <img src="http://example.org/" alt="Test" loading="lazy"/>.</p>`
|
||||
output := Sanitize("http://example.org/", input)
|
||||
|
||||
if input != output {
|
||||
t.Errorf(`Wrong output: "%s" != "%s"`, input, output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestTable(t *testing.T) {
|
||||
input := `<table><tr><th>A</th><th colspan="2">B</th></tr><tr><td>C</td><td>D</td><td>E</td></tr></table>`
|
||||
output := Sanitize("http://example.org/", input)
|
||||
|
||||
if input != output {
|
||||
t.Errorf(`Wrong output: "%s" != "%s"`, input, output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRelativeURL(t *testing.T) {
|
||||
input := `This <a href="/test.html">link is relative</a> and this image: <img src="../folder/image.png"/>`
|
||||
expected := `This <a href="http://example.org/test.html" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">link is relative</a> and this image: <img src="http://example.org/folder/image.png" loading="lazy"/>`
|
||||
output := Sanitize("http://example.org/", input)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestProtocolRelativeURL(t *testing.T) {
|
||||
input := `This <a href="//static.example.org/index.html">link is relative</a>.`
|
||||
expected := `This <a href="https://static.example.org/index.html" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">link is relative</a>.`
|
||||
output := Sanitize("http://example.org/", input)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestInvalidTag(t *testing.T) {
|
||||
input := `<p>My invalid <b>tag</b>.</p>`
|
||||
expected := `<p>My invalid tag.</p>`
|
||||
output := Sanitize("http://example.org/", input)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestVideoTag(t *testing.T) {
|
||||
input := `<p>My valid <video src="videofile.webm" autoplay poster="posterimage.jpg">fallback</video>.</p>`
|
||||
expected := `<p>My valid <video src="http://example.org/videofile.webm" poster="http://example.org/posterimage.jpg" controls>fallback</video>.</p>`
|
||||
output := Sanitize("http://example.org/", input)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAudioAndSourceTag(t *testing.T) {
|
||||
input := `<p>My music <audio controls="controls"><source src="foo.wav" type="audio/wav"></audio>.</p>`
|
||||
expected := `<p>My music <audio controls><source src="http://example.org/foo.wav" type="audio/wav"></audio>.</p>`
|
||||
output := Sanitize("http://example.org/", input)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestUnknownTag(t *testing.T) {
|
||||
input := `<p>My invalid <unknown>tag</unknown>.</p>`
|
||||
expected := `<p>My invalid tag.</p>`
|
||||
output := Sanitize("http://example.org/", input)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestInvalidNestedTag(t *testing.T) {
|
||||
input := `<p>My invalid <b>tag with some <em>valid</em> tag</b>.</p>`
|
||||
expected := `<p>My invalid tag with some <em>valid</em> tag.</p>`
|
||||
output := Sanitize("http://example.org/", input)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestInvalidIFrame(t *testing.T) {
|
||||
input := `<iframe src="http://example.org/"></iframe>`
|
||||
expected := ``
|
||||
output := Sanitize("http://example.com/", input)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestIFrameWithChildElements(t *testing.T) {
|
||||
input := `<iframe src="https://www.youtube.com/"><p>test</p></iframe>`
|
||||
expected := `<iframe src="https://www.youtube.com/" sandbox="allow-scripts allow-same-origin allow-popups" loading="lazy"></iframe>`
|
||||
output := Sanitize("http://example.com/", input)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAnchorLink(t *testing.T) {
|
||||
input := `<p>This link is <a href="#some-anchor">an anchor</a></p>`
|
||||
expected := `<p>This link is <a href="#some-anchor">an anchor</a></p>`
|
||||
output := Sanitize("http://example.org/", input)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestInvalidURLScheme(t *testing.T) {
|
||||
input := `<p>This link is <a src="file:///etc/passwd">not valid</a></p>`
|
||||
expected := `<p>This link is not valid</p>`
|
||||
output := Sanitize("http://example.org/", input)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAPTURIScheme(t *testing.T) {
|
||||
input := `<p>This link is <a href="apt:some-package?channel=test">valid</a></p>`
|
||||
expected := `<p>This link is <a href="apt:some-package?channel=test" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">valid</a></p>`
|
||||
output := Sanitize("http://example.org/", input)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBitcoinURIScheme(t *testing.T) {
|
||||
input := `<p>This link is <a href="bitcoin:175tWpb8K1S7NmH4Zx6rewF9WQrcZv245W">valid</a></p>`
|
||||
expected := `<p>This link is <a href="bitcoin:175tWpb8K1S7NmH4Zx6rewF9WQrcZv245W" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">valid</a></p>`
|
||||
output := Sanitize("http://example.org/", input)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCallToURIScheme(t *testing.T) {
|
||||
input := `<p>This link is <a href="callto:12345679">valid</a></p>`
|
||||
expected := `<p>This link is <a href="callto:12345679" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">valid</a></p>`
|
||||
output := Sanitize("http://example.org/", input)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFeedURIScheme(t *testing.T) {
|
||||
input := `<p>This link is <a href="feed://example.com/rss.xml">valid</a></p>`
|
||||
expected := `<p>This link is <a href="feed://example.com/rss.xml" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">valid</a></p>`
|
||||
output := Sanitize("http://example.org/", input)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
||||
}
|
||||
|
||||
input = `<p>This link is <a href="feed:https://example.com/rss.xml">valid</a></p>`
|
||||
expected = `<p>This link is <a href="feed:https://example.com/rss.xml" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">valid</a></p>`
|
||||
output = Sanitize("http://example.org/", input)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGeoURIScheme(t *testing.T) {
|
||||
input := `<p>This link is <a href="geo:13.4125,103.8667">valid</a></p>`
|
||||
expected := `<p>This link is <a href="geo:13.4125,103.8667" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">valid</a></p>`
|
||||
output := Sanitize("http://example.org/", input)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestItunesURIScheme(t *testing.T) {
|
||||
input := `<p>This link is <a href="itms://itunes.com/apps/my-app-name">valid</a></p>`
|
||||
expected := `<p>This link is <a href="itms://itunes.com/apps/my-app-name" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">valid</a></p>`
|
||||
output := Sanitize("http://example.org/", input)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
||||
}
|
||||
|
||||
input = `<p>This link is <a href="itms-apps://itunes.com/apps/my-app-name">valid</a></p>`
|
||||
expected = `<p>This link is <a href="itms-apps://itunes.com/apps/my-app-name" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">valid</a></p>`
|
||||
output = Sanitize("http://example.org/", input)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMagnetURIScheme(t *testing.T) {
|
||||
input := `<p>This link is <a href="magnet:?xt.1=urn:sha1:YNCKHTQCWBTRNJIV4WNAE52SJUQCZO5C&xt.2=urn:sha1:TXGCZQTH26NL6OUQAJJPFALHG2LTGBC7">valid</a></p>`
|
||||
expected := `<p>This link is <a href="magnet:?xt.1=urn:sha1:YNCKHTQCWBTRNJIV4WNAE52SJUQCZO5C&xt.2=urn:sha1:TXGCZQTH26NL6OUQAJJPFALHG2LTGBC7" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">valid</a></p>`
|
||||
output := Sanitize("http://example.org/", input)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMailtoURIScheme(t *testing.T) {
|
||||
input := `<p>This link is <a href="mailto:jsmith@example.com?subject=A%20Test&body=My%20idea%20is%3A%20%0A">valid</a></p>`
|
||||
expected := `<p>This link is <a href="mailto:jsmith@example.com?subject=A%20Test&body=My%20idea%20is%3A%20%0A" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">valid</a></p>`
|
||||
output := Sanitize("http://example.org/", input)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestNewsURIScheme(t *testing.T) {
|
||||
input := `<p>This link is <a href="news://news.server.example/*">valid</a></p>`
|
||||
expected := `<p>This link is <a href="news://news.server.example/*" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">valid</a></p>`
|
||||
output := Sanitize("http://example.org/", input)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
||||
}
|
||||
|
||||
input = `<p>This link is <a href="news:example.group.this">valid</a></p>`
|
||||
expected = `<p>This link is <a href="news:example.group.this" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">valid</a></p>`
|
||||
output = Sanitize("http://example.org/", input)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
||||
}
|
||||
|
||||
input = `<p>This link is <a href="nntp://news.server.example/example.group.this">valid</a></p>`
|
||||
expected = `<p>This link is <a href="nntp://news.server.example/example.group.this" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">valid</a></p>`
|
||||
output = Sanitize("http://example.org/", input)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRTMPURIScheme(t *testing.T) {
|
||||
input := `<p>This link is <a href="rtmp://mycompany.com/vod/mp4:mycoolvideo.mov">valid</a></p>`
|
||||
expected := `<p>This link is <a href="rtmp://mycompany.com/vod/mp4:mycoolvideo.mov" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">valid</a></p>`
|
||||
output := Sanitize("http://example.org/", input)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSIPURIScheme(t *testing.T) {
|
||||
input := `<p>This link is <a href="sip:+1-212-555-1212:1234@gateway.com;user=phone">valid</a></p>`
|
||||
expected := `<p>This link is <a href="sip:+1-212-555-1212:1234@gateway.com;user=phone" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">valid</a></p>`
|
||||
output := Sanitize("http://example.org/", input)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
||||
}
|
||||
|
||||
input = `<p>This link is <a href="sips:alice@atlanta.com?subject=project%20x&priority=urgent">valid</a></p>`
|
||||
expected = `<p>This link is <a href="sips:alice@atlanta.com?subject=project%20x&priority=urgent" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">valid</a></p>`
|
||||
output = Sanitize("http://example.org/", input)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSkypeURIScheme(t *testing.T) {
|
||||
input := `<p>This link is <a href="skype:echo123?call">valid</a></p>`
|
||||
expected := `<p>This link is <a href="skype:echo123?call" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">valid</a></p>`
|
||||
output := Sanitize("http://example.org/", input)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSpotifyURIScheme(t *testing.T) {
|
||||
input := `<p>This link is <a href="spotify:track:2jCnn1QPQ3E8ExtLe6INsx">valid</a></p>`
|
||||
expected := `<p>This link is <a href="spotify:track:2jCnn1QPQ3E8ExtLe6INsx" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">valid</a></p>`
|
||||
output := Sanitize("http://example.org/", input)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSteamURIScheme(t *testing.T) {
|
||||
input := `<p>This link is <a href="steam://settings/account">valid</a></p>`
|
||||
expected := `<p>This link is <a href="steam://settings/account" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">valid</a></p>`
|
||||
output := Sanitize("http://example.org/", input)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSubversionURIScheme(t *testing.T) {
|
||||
input := `<p>This link is <a href="svn://example.org">valid</a></p>`
|
||||
expected := `<p>This link is <a href="svn://example.org" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">valid</a></p>`
|
||||
output := Sanitize("http://example.org/", input)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
||||
}
|
||||
|
||||
input = `<p>This link is <a href="svn+ssh://example.org">valid</a></p>`
|
||||
expected = `<p>This link is <a href="svn+ssh://example.org" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">valid</a></p>`
|
||||
output = Sanitize("http://example.org/", input)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestTelURIScheme(t *testing.T) {
|
||||
input := `<p>This link is <a href="tel:+1-201-555-0123">valid</a></p>`
|
||||
expected := `<p>This link is <a href="tel:+1-201-555-0123" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">valid</a></p>`
|
||||
output := Sanitize("http://example.org/", input)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestWebcalURIScheme(t *testing.T) {
|
||||
input := `<p>This link is <a href="webcal://example.com/calendar.ics">valid</a></p>`
|
||||
expected := `<p>This link is <a href="webcal://example.com/calendar.ics" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">valid</a></p>`
|
||||
output := Sanitize("http://example.org/", input)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestXMPPURIScheme(t *testing.T) {
|
||||
input := `<p>This link is <a href="xmpp:user@host?subscribe&type=subscribed">valid</a></p>`
|
||||
expected := `<p>This link is <a href="xmpp:user@host?subscribe&type=subscribed" rel="noopener noreferrer" target="_blank" referrerpolicy="no-referrer">valid</a></p>`
|
||||
output := Sanitize("http://example.org/", input)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBlacklistedLink(t *testing.T) {
|
||||
input := `<p>This image is not valid <img src="https://stats.wordpress.com/some-tracker"></p>`
|
||||
expected := `<p>This image is not valid </p>`
|
||||
output := Sanitize("http://example.org/", input)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPixelTracker(t *testing.T) {
|
||||
input := `<p><img src="https://tracker1.example.org/" height="1" width="1"> and <img src="https://tracker2.example.org/" height="1" width="1"/></p>`
|
||||
expected := `<p> and </p>`
|
||||
output := Sanitize("http://example.org/", input)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestXmlEntities(t *testing.T) {
|
||||
input := `<pre>echo "test" > /etc/hosts</pre>`
|
||||
expected := `<pre>echo "test" > /etc/hosts</pre>`
|
||||
output := Sanitize("http://example.org/", input)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestEspaceAttributes(t *testing.T) {
|
||||
input := `<td rowspan="<b>test</b>">test</td>`
|
||||
expected := `<td rowspan="<b>test</b>">test</td>`
|
||||
output := Sanitize("http://example.org/", input)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestReplaceYoutubeURL(t *testing.T) {
|
||||
input := `<iframe src="http://www.youtube.com/embed/test123?version=3&rel=1&fs=1&autohide=2&showsearch=0&showinfo=1&iv_load_policy=1&wmode=transparent"></iframe>`
|
||||
expected := `<iframe src="https://www.youtube-nocookie.com/embed/test123?version=3&rel=1&fs=1&autohide=2&showsearch=0&showinfo=1&iv_load_policy=1&wmode=transparent" sandbox="allow-scripts allow-same-origin allow-popups" loading="lazy"></iframe>`
|
||||
output := Sanitize("http://example.org/", input)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestReplaceSecureYoutubeURL(t *testing.T) {
|
||||
input := `<iframe src="https://www.youtube.com/embed/test123"></iframe>`
|
||||
expected := `<iframe src="https://www.youtube-nocookie.com/embed/test123" sandbox="allow-scripts allow-same-origin allow-popups" loading="lazy"></iframe>`
|
||||
output := Sanitize("http://example.org/", input)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestReplaceSecureYoutubeURLWithParameters(t *testing.T) {
|
||||
input := `<iframe src="https://www.youtube.com/embed/test123?rel=0&controls=0"></iframe>`
|
||||
expected := `<iframe src="https://www.youtube-nocookie.com/embed/test123?rel=0&controls=0" sandbox="allow-scripts allow-same-origin allow-popups" loading="lazy"></iframe>`
|
||||
output := Sanitize("http://example.org/", input)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestReplaceYoutubeURLAlreadyReplaced(t *testing.T) {
|
||||
input := `<iframe src="https://www.youtube-nocookie.com/embed/test123?rel=0&controls=0" sandbox="allow-scripts allow-same-origin"></iframe>`
|
||||
expected := `<iframe src="https://www.youtube-nocookie.com/embed/test123?rel=0&controls=0" sandbox="allow-scripts allow-same-origin allow-popups" loading="lazy"></iframe>`
|
||||
output := Sanitize("http://example.org/", input)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestReplaceProtocolRelativeYoutubeURL(t *testing.T) {
|
||||
input := `<iframe src="//www.youtube.com/embed/Bf2W84jrGqs" width="560" height="314" allowfullscreen="allowfullscreen"></iframe>`
|
||||
expected := `<iframe src="https://www.youtube-nocookie.com/embed/Bf2W84jrGqs" width="560" height="314" allowfullscreen="allowfullscreen" sandbox="allow-scripts allow-same-origin allow-popups" loading="lazy"></iframe>`
|
||||
output := Sanitize("http://example.org/", input)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestReplaceYoutubeURLWithCustomURL(t *testing.T) {
|
||||
os.Clearenv()
|
||||
os.Setenv("YOUTUBE_EMBED_URL_OVERRIDE", "https://invidious.custom/embed/")
|
||||
|
||||
var err error
|
||||
parser := config.NewParser()
|
||||
config.Opts, err = parser.ParseEnvironmentVariables()
|
||||
|
||||
if err != nil {
|
||||
t.Fatalf(`Parsing failure: %v`, err)
|
||||
}
|
||||
|
||||
input := `<iframe src="https://www.youtube.com/embed/test123?version=3&rel=1&fs=1&autohide=2&showsearch=0&showinfo=1&iv_load_policy=1&wmode=transparent"></iframe>`
|
||||
expected := `<iframe src="https://invidious.custom/embed/test123?version=3&rel=1&fs=1&autohide=2&showsearch=0&showinfo=1&iv_load_policy=1&wmode=transparent" sandbox="allow-scripts allow-same-origin allow-popups" loading="lazy"></iframe>`
|
||||
output := Sanitize("http://example.org/", input)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestReplaceIframeURL(t *testing.T) {
|
||||
input := `<iframe src="https://player.vimeo.com/video/123456?title=0&byline=0"></iframe>`
|
||||
expected := `<iframe src="https://player.vimeo.com/video/123456?title=0&byline=0" sandbox="allow-scripts allow-same-origin allow-popups" loading="lazy"></iframe>`
|
||||
output := Sanitize("http://example.org/", input)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestReplaceNoScript(t *testing.T) {
|
||||
input := `<p>Before paragraph.</p><noscript>Inside <code>noscript</code> tag with an image: <img src="http://example.org/" alt="Test" loading="lazy"></noscript><p>After paragraph.</p>`
|
||||
expected := `<p>Before paragraph.</p><p>After paragraph.</p>`
|
||||
output := Sanitize("http://example.org/", input)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestReplaceScript(t *testing.T) {
|
||||
input := `<p>Before paragraph.</p><script type="text/javascript">alert("1");</script><p>After paragraph.</p>`
|
||||
expected := `<p>Before paragraph.</p><p>After paragraph.</p>`
|
||||
output := Sanitize("http://example.org/", input)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestReplaceStyle(t *testing.T) {
|
||||
input := `<p>Before paragraph.</p><style>body { background-color: #ff0000; }</style><p>After paragraph.</p>`
|
||||
expected := `<p>Before paragraph.</p><p>After paragraph.</p>`
|
||||
output := Sanitize("http://example.org/", input)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
||||
}
|
||||
}
|
81
internal/reader/sanitizer/srcset.go
Normal file
81
internal/reader/sanitizer/srcset.go
Normal file
|
@ -0,0 +1,81 @@
|
|||
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
package sanitizer
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type ImageCandidate struct {
|
||||
ImageURL string
|
||||
Descriptor string
|
||||
}
|
||||
|
||||
type ImageCandidates []*ImageCandidate
|
||||
|
||||
func (c ImageCandidates) String() string {
|
||||
var htmlCandidates []string
|
||||
|
||||
for _, imageCandidate := range c {
|
||||
var htmlCandidate string
|
||||
if imageCandidate.Descriptor != "" {
|
||||
htmlCandidate = fmt.Sprintf(`%s %s`, imageCandidate.ImageURL, imageCandidate.Descriptor)
|
||||
} else {
|
||||
htmlCandidate = imageCandidate.ImageURL
|
||||
}
|
||||
|
||||
htmlCandidates = append(htmlCandidates, htmlCandidate)
|
||||
}
|
||||
|
||||
return strings.Join(htmlCandidates, ", ")
|
||||
}
|
||||
|
||||
// ParseSrcSetAttribute returns the list of image candidates from the set.
|
||||
// https://html.spec.whatwg.org/#parse-a-srcset-attribute
|
||||
func ParseSrcSetAttribute(attributeValue string) (imageCandidates ImageCandidates) {
|
||||
unparsedCandidates := strings.Split(attributeValue, ", ")
|
||||
|
||||
for _, unparsedCandidate := range unparsedCandidates {
|
||||
if candidate, err := parseImageCandidate(unparsedCandidate); err == nil {
|
||||
imageCandidates = append(imageCandidates, candidate)
|
||||
}
|
||||
}
|
||||
|
||||
return imageCandidates
|
||||
}
|
||||
|
||||
func parseImageCandidate(input string) (*ImageCandidate, error) {
|
||||
input = strings.TrimSpace(input)
|
||||
parts := strings.Split(strings.TrimSpace(input), " ")
|
||||
nbParts := len(parts)
|
||||
|
||||
if nbParts > 2 || nbParts == 0 {
|
||||
return nil, fmt.Errorf(`srcset: invalid number of descriptors`)
|
||||
}
|
||||
|
||||
if nbParts == 2 {
|
||||
if !isValidWidthOrDensityDescriptor(parts[1]) {
|
||||
return nil, fmt.Errorf(`srcset: invalid descriptor`)
|
||||
}
|
||||
return &ImageCandidate{ImageURL: parts[0], Descriptor: parts[1]}, nil
|
||||
}
|
||||
|
||||
return &ImageCandidate{ImageURL: parts[0]}, nil
|
||||
}
|
||||
|
||||
func isValidWidthOrDensityDescriptor(value string) bool {
|
||||
if value == "" {
|
||||
return false
|
||||
}
|
||||
|
||||
lastChar := value[len(value)-1:]
|
||||
if lastChar != "w" && lastChar != "x" {
|
||||
return false
|
||||
}
|
||||
|
||||
_, err := strconv.ParseFloat(value[0:len(value)-1], 32)
|
||||
return err == nil
|
||||
}
|
84
internal/reader/sanitizer/srcset_test.go
Normal file
84
internal/reader/sanitizer/srcset_test.go
Normal file
|
@ -0,0 +1,84 @@
|
|||
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
package sanitizer
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestParseSrcSetAttributeWithRelativeURLs(t *testing.T) {
|
||||
input := `example-320w.jpg, example-480w.jpg 1.5x, example-640,w.jpg 2x, example-640w.jpg 640w`
|
||||
candidates := ParseSrcSetAttribute(input)
|
||||
|
||||
if len(candidates) != 4 {
|
||||
t.Error(`Incorrect number of candidates`)
|
||||
}
|
||||
|
||||
if candidates.String() != `example-320w.jpg, example-480w.jpg 1.5x, example-640,w.jpg 2x, example-640w.jpg 640w` {
|
||||
t.Errorf(`Unexpected string output`)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseSrcSetAttributeWithAbsoluteURLs(t *testing.T) {
|
||||
input := `http://example.org/example-320w.jpg 320w, http://example.org/example-480w.jpg 1.5x`
|
||||
candidates := ParseSrcSetAttribute(input)
|
||||
|
||||
if len(candidates) != 2 {
|
||||
t.Error(`Incorrect number of candidates`)
|
||||
}
|
||||
|
||||
if candidates.String() != `http://example.org/example-320w.jpg 320w, http://example.org/example-480w.jpg 1.5x` {
|
||||
t.Errorf(`Unexpected string output`)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseSrcSetAttributeWithOneCandidate(t *testing.T) {
|
||||
input := `http://example.org/example-320w.jpg`
|
||||
candidates := ParseSrcSetAttribute(input)
|
||||
|
||||
if len(candidates) != 1 {
|
||||
t.Error(`Incorrect number of candidates`)
|
||||
}
|
||||
|
||||
if candidates.String() != `http://example.org/example-320w.jpg` {
|
||||
t.Errorf(`Unexpected string output`)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseSrcSetAttributeWithCommaURL(t *testing.T) {
|
||||
input := `http://example.org/example,a:b/d.jpg , example-480w.jpg 1.5x`
|
||||
candidates := ParseSrcSetAttribute(input)
|
||||
|
||||
if len(candidates) != 2 {
|
||||
t.Error(`Incorrect number of candidates`)
|
||||
}
|
||||
|
||||
if candidates.String() != `http://example.org/example,a:b/d.jpg, example-480w.jpg 1.5x` {
|
||||
t.Errorf(`Unexpected string output`)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseSrcSetAttributeWithIncorrectDescriptor(t *testing.T) {
|
||||
input := `http://example.org/example-320w.jpg test`
|
||||
candidates := ParseSrcSetAttribute(input)
|
||||
|
||||
if len(candidates) != 0 {
|
||||
t.Error(`Incorrect number of candidates`)
|
||||
}
|
||||
|
||||
if candidates.String() != `` {
|
||||
t.Errorf(`Unexpected string output`)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseSrcSetAttributeWithTooManyDescriptors(t *testing.T) {
|
||||
input := `http://example.org/example-320w.jpg 10w 1x`
|
||||
candidates := ParseSrcSetAttribute(input)
|
||||
|
||||
if len(candidates) != 0 {
|
||||
t.Error(`Incorrect number of candidates`)
|
||||
}
|
||||
|
||||
if candidates.String() != `` {
|
||||
t.Errorf(`Unexpected string output`)
|
||||
}
|
||||
}
|
34
internal/reader/sanitizer/strip_tags.go
Normal file
34
internal/reader/sanitizer/strip_tags.go
Normal file
|
@ -0,0 +1,34 @@
|
|||
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
package sanitizer // import "miniflux.app/v2/internal/reader/sanitizer"
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"io"
|
||||
|
||||
"golang.org/x/net/html"
|
||||
)
|
||||
|
||||
// StripTags removes all HTML/XML tags from the input string.
|
||||
func StripTags(input string) string {
|
||||
tokenizer := html.NewTokenizer(bytes.NewBufferString(input))
|
||||
var buffer bytes.Buffer
|
||||
|
||||
for {
|
||||
if tokenizer.Next() == html.ErrorToken {
|
||||
err := tokenizer.Err()
|
||||
if err == io.EOF {
|
||||
return buffer.String()
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
token := tokenizer.Token()
|
||||
switch token.Type {
|
||||
case html.TextToken:
|
||||
buffer.WriteString(token.Data)
|
||||
}
|
||||
}
|
||||
}
|
16
internal/reader/sanitizer/strip_tags_test.go
Normal file
16
internal/reader/sanitizer/strip_tags_test.go
Normal file
|
@ -0,0 +1,16 @@
|
|||
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
package sanitizer // import "miniflux.app/v2/internal/reader/sanitizer"
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestStripTags(t *testing.T) {
|
||||
input := `This <a href="/test.html">link is relative</a> and <strong>this</strong> image: <img src="../folder/image.png"/>`
|
||||
expected := `This link is relative and this image: `
|
||||
output := StripTags(input)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
||||
}
|
||||
}
|
22
internal/reader/sanitizer/truncate.go
Normal file
22
internal/reader/sanitizer/truncate.go
Normal file
|
@ -0,0 +1,22 @@
|
|||
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
package sanitizer
|
||||
|
||||
import "strings"
|
||||
|
||||
func TruncateHTML(input string, max int) string {
|
||||
text := StripTags(input)
|
||||
text = strings.ReplaceAll(text, "\n", " ")
|
||||
text = strings.ReplaceAll(text, "\t", " ")
|
||||
text = strings.ReplaceAll(text, " ", " ")
|
||||
text = strings.TrimSpace(text)
|
||||
|
||||
// Convert to runes to be safe with unicode
|
||||
runes := []rune(text)
|
||||
if len(runes) > max {
|
||||
return strings.TrimSpace(string(runes[:max])) + "…"
|
||||
}
|
||||
|
||||
return text
|
||||
}
|
64
internal/reader/sanitizer/truncate_test.go
Normal file
64
internal/reader/sanitizer/truncate_test.go
Normal file
|
@ -0,0 +1,64 @@
|
|||
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
package sanitizer
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestTruncateHTMWithTextLowerThanLimitL(t *testing.T) {
|
||||
input := `This is a <strong>bug 🐛</strong>.`
|
||||
expected := `This is a bug 🐛.`
|
||||
output := TruncateHTML(input, 50)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Wrong output: %q != %q`, expected, output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestTruncateHTMLWithTextAboveLimit(t *testing.T) {
|
||||
input := `This is <strong>HTML</strong>.`
|
||||
expected := `This…`
|
||||
output := TruncateHTML(input, 4)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Wrong output: %q != %q`, expected, output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestTruncateHTMLWithUnicodeTextAboveLimit(t *testing.T) {
|
||||
input := `This is a <strong>bike 🚲</strong>.`
|
||||
expected := `This…`
|
||||
output := TruncateHTML(input, 4)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Wrong output: %q != %q`, expected, output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestTruncateHTMLWithMultilineTextAboveLimit(t *testing.T) {
|
||||
input := `
|
||||
This is a <strong>bike
|
||||
🚲</strong>.
|
||||
|
||||
`
|
||||
expected := `This is a bike…`
|
||||
output := TruncateHTML(input, 15)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Wrong output: %q != %q`, expected, output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestTruncateHTMLWithMultilineTextLowerThanLimit(t *testing.T) {
|
||||
input := `
|
||||
This is a <strong>bike
|
||||
🚲</strong>.
|
||||
|
||||
`
|
||||
expected := `This is a bike 🚲.`
|
||||
output := TruncateHTML(input, 20)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Wrong output: %q != %q`, expected, output)
|
||||
}
|
||||
}
|
57
internal/reader/scraper/rules.go
Normal file
57
internal/reader/scraper/rules.go
Normal file
|
@ -0,0 +1,57 @@
|
|||
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
package scraper // import "miniflux.app/v2/internal/reader/scraper"
|
||||
|
||||
// List of predefined scraper rules (alphabetically sorted)
|
||||
// domain => CSS selectors
|
||||
var predefinedRules = map[string]string{
|
||||
"bbc.co.uk": "div.vxp-column--single, div.story-body__inner, ul.gallery-images__list",
|
||||
"blog.cloudflare.com": "div.post-content",
|
||||
"cbc.ca": ".story-content",
|
||||
"darkreading.com": "#article-main:not(header)",
|
||||
"developpez.com": "div[itemprop=articleBody]",
|
||||
"dilbert.com": "span.comic-title-name, img.img-comic",
|
||||
"explosm.net": "div#comic",
|
||||
"financialsamurai.com": "article",
|
||||
"francetvinfo.fr": ".text",
|
||||
"github.com": "article.entry-content",
|
||||
"heise.de": "header .article-content__lead, header .article-image, div.article-layout__content.article-content",
|
||||
"igen.fr": "section.corps",
|
||||
"ikiwiki.iki.fi": ".page.group",
|
||||
"ilpost.it": ".entry-content",
|
||||
"ing.dk": "section.body",
|
||||
"lapresse.ca": ".amorce, .entry",
|
||||
"lemonde.fr": "article",
|
||||
"lepoint.fr": ".art-text",
|
||||
"lesjoiesducode.fr": ".blog-post-content img",
|
||||
"lesnumeriques.com": ".text",
|
||||
"linux.com": "div.content, div[property]",
|
||||
"mac4ever.com": "div[itemprop=articleBody]",
|
||||
"monwindows.com": ".blog-post-body",
|
||||
"npr.org": "#storytext",
|
||||
"oneindia.com": ".io-article-body",
|
||||
"opensource.com": "div[property]",
|
||||
"openingsource.org": "article.suxing-popup-gallery",
|
||||
"osnews.com": "div.newscontent1",
|
||||
"phoronix.com": "div.content",
|
||||
"pseudo-sciences.org": "#art_main",
|
||||
"quantamagazine.org": ".outer--content, figure, script",
|
||||
"raywenderlich.com": "article",
|
||||
"royalroad.com": ".author-note-portlet,.chapter-content",
|
||||
"slate.fr": ".field-items",
|
||||
"smbc-comics.com": "div#cc-comicbody, div#aftercomic",
|
||||
"swordscomic.com": "img#comic-image, div#info-frame.tab-content-area",
|
||||
"techcrunch.com": "div.article-entry",
|
||||
"theoatmeal.com": "div#comic",
|
||||
"theregister.com": "#top-col-story h2, #body",
|
||||
"theverge.com": "h2.inline:nth-child(2),h2.duet--article--dangerously-set-cms-markup,figure.w-full,div.duet--article--article-body-component",
|
||||
"turnoff.us": "article.post-content",
|
||||
"universfreebox.com": "#corps_corps",
|
||||
"version2.dk": "section.body",
|
||||
"wdwnt.com": "div.entry-content",
|
||||
"webtoons.com": ".viewer_img",
|
||||
"wired.com": "main figure, article",
|
||||
"zeit.de": ".summary, .article-body",
|
||||
"zdnet.com": "div.storyBody",
|
||||
}
|
105
internal/reader/scraper/scraper.go
Normal file
105
internal/reader/scraper/scraper.go
Normal file
|
@ -0,0 +1,105 @@
|
|||
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
package scraper // import "miniflux.app/v2/internal/reader/scraper"
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"strings"
|
||||
|
||||
"miniflux.app/v2/internal/config"
|
||||
"miniflux.app/v2/internal/http/client"
|
||||
"miniflux.app/v2/internal/logger"
|
||||
"miniflux.app/v2/internal/reader/readability"
|
||||
"miniflux.app/v2/internal/url"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
)
|
||||
|
||||
// Fetch downloads a web page and returns relevant contents.
|
||||
func Fetch(websiteURL, rules, userAgent string, cookie string, allowSelfSignedCertificates, useProxy bool) (string, error) {
|
||||
clt := client.NewClientWithConfig(websiteURL, config.Opts)
|
||||
clt.WithUserAgent(userAgent)
|
||||
clt.WithCookie(cookie)
|
||||
if useProxy {
|
||||
clt.WithProxy()
|
||||
}
|
||||
clt.AllowSelfSignedCertificates = allowSelfSignedCertificates
|
||||
|
||||
response, err := clt.Get()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
if response.HasServerFailure() {
|
||||
return "", errors.New("scraper: unable to download web page")
|
||||
}
|
||||
|
||||
if !isAllowedContentType(response.ContentType) {
|
||||
return "", fmt.Errorf("scraper: this resource is not a HTML document (%s)", response.ContentType)
|
||||
}
|
||||
|
||||
if err = response.EnsureUnicodeBody(); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
// The entry URL could redirect somewhere else.
|
||||
sameSite := url.Domain(websiteURL) == url.Domain(response.EffectiveURL)
|
||||
websiteURL = response.EffectiveURL
|
||||
|
||||
if rules == "" {
|
||||
rules = getPredefinedScraperRules(websiteURL)
|
||||
}
|
||||
|
||||
var content string
|
||||
if sameSite && rules != "" {
|
||||
logger.Debug(`[Scraper] Using rules %q for %q`, rules, websiteURL)
|
||||
content, err = scrapContent(response.Body, rules)
|
||||
} else {
|
||||
logger.Debug(`[Scraper] Using readability for %q`, websiteURL)
|
||||
content, err = readability.ExtractContent(response.Body)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return content, nil
|
||||
}
|
||||
|
||||
func scrapContent(page io.Reader, rules string) (string, error) {
|
||||
document, err := goquery.NewDocumentFromReader(page)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
contents := ""
|
||||
document.Find(rules).Each(func(i int, s *goquery.Selection) {
|
||||
var content string
|
||||
|
||||
content, _ = goquery.OuterHtml(s)
|
||||
contents += content
|
||||
})
|
||||
|
||||
return contents, nil
|
||||
}
|
||||
|
||||
func getPredefinedScraperRules(websiteURL string) string {
|
||||
urlDomain := url.Domain(websiteURL)
|
||||
|
||||
for domain, rules := range predefinedRules {
|
||||
if strings.Contains(urlDomain, domain) {
|
||||
return rules
|
||||
}
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
func isAllowedContentType(contentType string) bool {
|
||||
contentType = strings.ToLower(contentType)
|
||||
return strings.HasPrefix(contentType, "text/html") ||
|
||||
strings.HasPrefix(contentType, "application/xhtml+xml")
|
||||
}
|
75
internal/reader/scraper/scraper_test.go
Normal file
75
internal/reader/scraper/scraper_test.go
Normal file
|
@ -0,0 +1,75 @@
|
|||
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
package scraper // import "miniflux.app/v2/internal/reader/scraper"
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"os"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestGetPredefinedRules(t *testing.T) {
|
||||
if getPredefinedScraperRules("http://www.phoronix.com/") == "" {
|
||||
t.Error("Unable to find rule for phoronix.com")
|
||||
}
|
||||
|
||||
if getPredefinedScraperRules("https://www.linux.com/") == "" {
|
||||
t.Error("Unable to find rule for linux.com")
|
||||
}
|
||||
|
||||
if getPredefinedScraperRules("https://example.org/") != "" {
|
||||
t.Error("A rule not defined should not return anything")
|
||||
}
|
||||
}
|
||||
|
||||
func TestWhitelistedContentTypes(t *testing.T) {
|
||||
scenarios := map[string]bool{
|
||||
"text/html": true,
|
||||
"TeXt/hTmL": true,
|
||||
"application/xhtml+xml": true,
|
||||
"text/html; charset=utf-8": true,
|
||||
"application/xhtml+xml; charset=utf-8": true,
|
||||
"text/css": false,
|
||||
"application/javascript": false,
|
||||
"image/png": false,
|
||||
"application/pdf": false,
|
||||
}
|
||||
|
||||
for inputValue, expectedResult := range scenarios {
|
||||
actualResult := isAllowedContentType(inputValue)
|
||||
if actualResult != expectedResult {
|
||||
t.Errorf(`Unexpected result for content type whitelist, got "%v" instead of "%v"`, actualResult, expectedResult)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestSelectorRules(t *testing.T) {
|
||||
var ruleTestCases = map[string]string{
|
||||
"img.html": "article > img",
|
||||
"iframe.html": "article > iframe",
|
||||
"p.html": "article > p",
|
||||
}
|
||||
|
||||
for filename, rule := range ruleTestCases {
|
||||
html, err := os.ReadFile("testdata/" + filename)
|
||||
if err != nil {
|
||||
t.Fatalf(`Unable to read file %q: %v`, filename, err)
|
||||
}
|
||||
|
||||
actualResult, err := scrapContent(bytes.NewReader(html), rule)
|
||||
if err != nil {
|
||||
t.Fatalf(`Scraping error for %q - %q: %v`, filename, rule, err)
|
||||
}
|
||||
|
||||
expectedResult, err := os.ReadFile("testdata/" + filename + "-result")
|
||||
if err != nil {
|
||||
t.Fatalf(`Unable to read file %q: %v`, filename, err)
|
||||
}
|
||||
|
||||
if actualResult != strings.TrimSpace(string(expectedResult)) {
|
||||
t.Errorf(`Unexpected result for %q, got "%s" instead of "%s"`, rule, actualResult, expectedResult)
|
||||
}
|
||||
}
|
||||
}
|
12
internal/reader/scraper/testdata/iframe.html
vendored
Normal file
12
internal/reader/scraper/testdata/iframe.html
vendored
Normal file
|
@ -0,0 +1,12 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en-US">
|
||||
<body>
|
||||
<article>
|
||||
<iframe id="1" src="about:blank"></iframe>
|
||||
<iframe id="2" src="about:blank"></iframe>
|
||||
<iframe id="3" src="about:blank"></iframe>
|
||||
<iframe id="4" src="about:blank"></iframe>
|
||||
<iframe id="5" src="about:blank"></iframe>
|
||||
</article>
|
||||
</body>
|
||||
</html>
|
1
internal/reader/scraper/testdata/iframe.html-result
vendored
Normal file
1
internal/reader/scraper/testdata/iframe.html-result
vendored
Normal file
|
@ -0,0 +1 @@
|
|||
<iframe id="1" src="about:blank"></iframe><iframe id="2" src="about:blank"></iframe><iframe id="3" src="about:blank"></iframe><iframe id="4" src="about:blank"></iframe><iframe id="5" src="about:blank"></iframe>
|
12
internal/reader/scraper/testdata/img.html
vendored
Normal file
12
internal/reader/scraper/testdata/img.html
vendored
Normal file
|
@ -0,0 +1,12 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en-US">
|
||||
<body>
|
||||
<article>
|
||||
<img id="1" src="#" alt="" />
|
||||
<img id="2" src="#" alt="" />
|
||||
<img id="3" src="#" alt="" />
|
||||
<img id="4" src="#" alt="" />
|
||||
<img id="5" src="#" alt="" />
|
||||
</article>
|
||||
</body>
|
||||
</html>
|
1
internal/reader/scraper/testdata/img.html-result
vendored
Normal file
1
internal/reader/scraper/testdata/img.html-result
vendored
Normal file
|
@ -0,0 +1 @@
|
|||
<img id="1" src="#" alt=""/><img id="2" src="#" alt=""/><img id="3" src="#" alt=""/><img id="4" src="#" alt=""/><img id="5" src="#" alt=""/>
|
10
internal/reader/scraper/testdata/p.html
vendored
Normal file
10
internal/reader/scraper/testdata/p.html
vendored
Normal file
|
@ -0,0 +1,10 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en-US">
|
||||
<body>
|
||||
<article>
|
||||
<p>Lorem ipsum dolor sit amet, consectetuer adipiscing ept.</p>
|
||||
<p>Apquam tincidunt mauris eu risus.</p>
|
||||
<p>Vestibulum auctor dapibus neque.</p>
|
||||
</article>
|
||||
</body>
|
||||
</html>
|
1
internal/reader/scraper/testdata/p.html-result
vendored
Normal file
1
internal/reader/scraper/testdata/p.html-result
vendored
Normal file
|
@ -0,0 +1 @@
|
|||
<p>Lorem ipsum dolor sit amet, consectetuer adipiscing ept.</p><p>Apquam tincidunt mauris eu risus.</p><p>Vestibulum auctor dapibus neque.</p>
|
197
internal/reader/subscription/finder.go
Normal file
197
internal/reader/subscription/finder.go
Normal file
|
@ -0,0 +1,197 @@
|
|||
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
package subscription // import "miniflux.app/v2/internal/reader/subscription"
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"miniflux.app/v2/internal/config"
|
||||
"miniflux.app/v2/internal/errors"
|
||||
"miniflux.app/v2/internal/http/client"
|
||||
"miniflux.app/v2/internal/reader/browser"
|
||||
"miniflux.app/v2/internal/reader/parser"
|
||||
"miniflux.app/v2/internal/url"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
)
|
||||
|
||||
var (
|
||||
errUnreadableDoc = "Unable to analyze this page: %v"
|
||||
youtubeChannelRegex = regexp.MustCompile(`youtube\.com/channel/(.*)`)
|
||||
youtubeVideoRegex = regexp.MustCompile(`youtube\.com/watch\?v=(.*)`)
|
||||
)
|
||||
|
||||
// FindSubscriptions downloads and try to find one or more subscriptions from an URL.
|
||||
func FindSubscriptions(websiteURL, userAgent, cookie, username, password string, fetchViaProxy, allowSelfSignedCertificates bool) (Subscriptions, *errors.LocalizedError) {
|
||||
websiteURL = findYoutubeChannelFeed(websiteURL)
|
||||
websiteURL = parseYoutubeVideoPage(websiteURL)
|
||||
|
||||
clt := client.NewClientWithConfig(websiteURL, config.Opts)
|
||||
clt.WithCredentials(username, password)
|
||||
clt.WithUserAgent(userAgent)
|
||||
clt.WithCookie(cookie)
|
||||
clt.AllowSelfSignedCertificates = allowSelfSignedCertificates
|
||||
|
||||
if fetchViaProxy {
|
||||
clt.WithProxy()
|
||||
}
|
||||
|
||||
response, err := browser.Exec(clt)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
body := response.BodyAsString()
|
||||
if format := parser.DetectFeedFormat(body); format != parser.FormatUnknown {
|
||||
var subscriptions Subscriptions
|
||||
subscriptions = append(subscriptions, &Subscription{
|
||||
Title: response.EffectiveURL,
|
||||
URL: response.EffectiveURL,
|
||||
Type: format,
|
||||
})
|
||||
|
||||
return subscriptions, nil
|
||||
}
|
||||
|
||||
subscriptions, err := parseWebPage(response.EffectiveURL, strings.NewReader(body))
|
||||
if err != nil || subscriptions != nil {
|
||||
return subscriptions, err
|
||||
}
|
||||
|
||||
return tryWellKnownUrls(websiteURL, userAgent, cookie, username, password)
|
||||
}
|
||||
|
||||
func parseWebPage(websiteURL string, data io.Reader) (Subscriptions, *errors.LocalizedError) {
|
||||
var subscriptions Subscriptions
|
||||
queries := map[string]string{
|
||||
"link[type='application/rss+xml']": "rss",
|
||||
"link[type='application/atom+xml']": "atom",
|
||||
"link[type='application/json']": "json",
|
||||
"link[type='application/feed+json']": "json",
|
||||
}
|
||||
|
||||
doc, err := goquery.NewDocumentFromReader(data)
|
||||
if err != nil {
|
||||
return nil, errors.NewLocalizedError(errUnreadableDoc, err)
|
||||
}
|
||||
|
||||
for query, kind := range queries {
|
||||
doc.Find(query).Each(func(i int, s *goquery.Selection) {
|
||||
subscription := new(Subscription)
|
||||
subscription.Type = kind
|
||||
|
||||
if title, exists := s.Attr("title"); exists {
|
||||
subscription.Title = title
|
||||
}
|
||||
|
||||
if feedURL, exists := s.Attr("href"); exists {
|
||||
if feedURL != "" {
|
||||
subscription.URL, _ = url.AbsoluteURL(websiteURL, feedURL)
|
||||
}
|
||||
}
|
||||
|
||||
if subscription.Title == "" {
|
||||
subscription.Title = subscription.URL
|
||||
}
|
||||
|
||||
if subscription.URL != "" {
|
||||
subscriptions = append(subscriptions, subscription)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
return subscriptions, nil
|
||||
}
|
||||
|
||||
func findYoutubeChannelFeed(websiteURL string) string {
|
||||
matches := youtubeChannelRegex.FindStringSubmatch(websiteURL)
|
||||
|
||||
if len(matches) == 2 {
|
||||
return fmt.Sprintf(`https://www.youtube.com/feeds/videos.xml?channel_id=%s`, matches[1])
|
||||
}
|
||||
return websiteURL
|
||||
}
|
||||
|
||||
func parseYoutubeVideoPage(websiteURL string) string {
|
||||
if !youtubeVideoRegex.MatchString(websiteURL) {
|
||||
return websiteURL
|
||||
}
|
||||
|
||||
clt := client.NewClientWithConfig(websiteURL, config.Opts)
|
||||
response, browserErr := browser.Exec(clt)
|
||||
if browserErr != nil {
|
||||
return websiteURL
|
||||
}
|
||||
|
||||
doc, docErr := goquery.NewDocumentFromReader(response.Body)
|
||||
if docErr != nil {
|
||||
return websiteURL
|
||||
}
|
||||
|
||||
if channelID, exists := doc.Find(`meta[itemprop="channelId"]`).First().Attr("content"); exists {
|
||||
return fmt.Sprintf(`https://www.youtube.com/feeds/videos.xml?channel_id=%s`, channelID)
|
||||
}
|
||||
|
||||
return websiteURL
|
||||
}
|
||||
|
||||
func tryWellKnownUrls(websiteURL, userAgent, cookie, username, password string) (Subscriptions, *errors.LocalizedError) {
|
||||
var subscriptions Subscriptions
|
||||
knownURLs := map[string]string{
|
||||
"atom.xml": "atom",
|
||||
"feed.xml": "atom",
|
||||
"feed/": "atom",
|
||||
"rss.xml": "rss",
|
||||
"rss/": "rss",
|
||||
}
|
||||
|
||||
websiteURLRoot := url.RootURL(websiteURL)
|
||||
baseURLs := []string{
|
||||
// Look for knownURLs in the root.
|
||||
websiteURLRoot,
|
||||
}
|
||||
// Look for knownURLs in current subdirectory, such as 'example.com/blog/'.
|
||||
websiteURL, _ = url.AbsoluteURL(websiteURL, "./")
|
||||
if websiteURL != websiteURLRoot {
|
||||
baseURLs = append(baseURLs, websiteURL)
|
||||
}
|
||||
|
||||
for _, baseURL := range baseURLs {
|
||||
for knownURL, kind := range knownURLs {
|
||||
fullURL, err := url.AbsoluteURL(baseURL, knownURL)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
clt := client.NewClientWithConfig(fullURL, config.Opts)
|
||||
clt.WithCredentials(username, password)
|
||||
clt.WithUserAgent(userAgent)
|
||||
clt.WithCookie(cookie)
|
||||
|
||||
// Some websites redirects unknown URLs to the home page.
|
||||
// As result, the list of known URLs is returned to the subscription list.
|
||||
// We don't want the user to choose between invalid feed URLs.
|
||||
clt.WithoutRedirects()
|
||||
|
||||
response, err := clt.Get()
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
if response != nil && response.StatusCode == 200 {
|
||||
subscription := new(Subscription)
|
||||
subscription.Type = kind
|
||||
subscription.Title = fullURL
|
||||
subscription.URL = fullURL
|
||||
if subscription.URL != "" {
|
||||
subscriptions = append(subscriptions, subscription)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return subscriptions, nil
|
||||
}
|
285
internal/reader/subscription/finder_test.go
Normal file
285
internal/reader/subscription/finder_test.go
Normal file
|
@ -0,0 +1,285 @@
|
|||
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
package subscription
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestFindYoutubeChannelFeed(t *testing.T) {
|
||||
scenarios := map[string]string{
|
||||
"https://www.youtube.com/channel/UC-Qj80avWItNRjkZ41rzHyw": "https://www.youtube.com/feeds/videos.xml?channel_id=UC-Qj80avWItNRjkZ41rzHyw",
|
||||
"http://example.org/feed": "http://example.org/feed",
|
||||
}
|
||||
|
||||
for websiteURL, expectedFeedURL := range scenarios {
|
||||
result := findYoutubeChannelFeed(websiteURL)
|
||||
if result != expectedFeedURL {
|
||||
t.Errorf(`Unexpected Feed, got %s, instead of %s`, result, expectedFeedURL)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseWebPageWithRssFeed(t *testing.T) {
|
||||
htmlPage := `
|
||||
<!doctype html>
|
||||
<html>
|
||||
<head>
|
||||
<link href="http://example.org/rss" rel="alternate" type="application/rss+xml" title="Some Title">
|
||||
</head>
|
||||
<body>
|
||||
</body>
|
||||
</html>`
|
||||
|
||||
subscriptions, err := parseWebPage("http://example.org/", strings.NewReader(htmlPage))
|
||||
if err != nil {
|
||||
t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
|
||||
}
|
||||
|
||||
if len(subscriptions) != 1 {
|
||||
t.Fatal(`Incorrect number of subscriptions returned`)
|
||||
}
|
||||
|
||||
if subscriptions[0].Title != "Some Title" {
|
||||
t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title)
|
||||
}
|
||||
|
||||
if subscriptions[0].URL != "http://example.org/rss" {
|
||||
t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL)
|
||||
}
|
||||
|
||||
if subscriptions[0].Type != "rss" {
|
||||
t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseWebPageWithAtomFeed(t *testing.T) {
|
||||
htmlPage := `
|
||||
<!doctype html>
|
||||
<html>
|
||||
<head>
|
||||
<link href="http://example.org/atom.xml" rel="alternate" type="application/atom+xml" title="Some Title">
|
||||
</head>
|
||||
<body>
|
||||
</body>
|
||||
</html>`
|
||||
|
||||
subscriptions, err := parseWebPage("http://example.org/", strings.NewReader(htmlPage))
|
||||
if err != nil {
|
||||
t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
|
||||
}
|
||||
|
||||
if len(subscriptions) != 1 {
|
||||
t.Fatal(`Incorrect number of subscriptions returned`)
|
||||
}
|
||||
|
||||
if subscriptions[0].Title != "Some Title" {
|
||||
t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title)
|
||||
}
|
||||
|
||||
if subscriptions[0].URL != "http://example.org/atom.xml" {
|
||||
t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL)
|
||||
}
|
||||
|
||||
if subscriptions[0].Type != "atom" {
|
||||
t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseWebPageWithJSONFeed(t *testing.T) {
|
||||
htmlPage := `
|
||||
<!doctype html>
|
||||
<html>
|
||||
<head>
|
||||
<link href="http://example.org/feed.json" rel="alternate" type="application/feed+json" title="Some Title">
|
||||
</head>
|
||||
<body>
|
||||
</body>
|
||||
</html>`
|
||||
|
||||
subscriptions, err := parseWebPage("http://example.org/", strings.NewReader(htmlPage))
|
||||
if err != nil {
|
||||
t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
|
||||
}
|
||||
|
||||
if len(subscriptions) != 1 {
|
||||
t.Fatal(`Incorrect number of subscriptions returned`)
|
||||
}
|
||||
|
||||
if subscriptions[0].Title != "Some Title" {
|
||||
t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title)
|
||||
}
|
||||
|
||||
if subscriptions[0].URL != "http://example.org/feed.json" {
|
||||
t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL)
|
||||
}
|
||||
|
||||
if subscriptions[0].Type != "json" {
|
||||
t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseWebPageWithOldJSONFeedMimeType(t *testing.T) {
|
||||
htmlPage := `
|
||||
<!doctype html>
|
||||
<html>
|
||||
<head>
|
||||
<link href="http://example.org/feed.json" rel="alternate" type="application/json" title="Some Title">
|
||||
</head>
|
||||
<body>
|
||||
</body>
|
||||
</html>`
|
||||
|
||||
subscriptions, err := parseWebPage("http://example.org/", strings.NewReader(htmlPage))
|
||||
if err != nil {
|
||||
t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
|
||||
}
|
||||
|
||||
if len(subscriptions) != 1 {
|
||||
t.Fatal(`Incorrect number of subscriptions returned`)
|
||||
}
|
||||
|
||||
if subscriptions[0].Title != "Some Title" {
|
||||
t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title)
|
||||
}
|
||||
|
||||
if subscriptions[0].URL != "http://example.org/feed.json" {
|
||||
t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL)
|
||||
}
|
||||
|
||||
if subscriptions[0].Type != "json" {
|
||||
t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseWebPageWithRelativeFeedURL(t *testing.T) {
|
||||
htmlPage := `
|
||||
<!doctype html>
|
||||
<html>
|
||||
<head>
|
||||
<link href="/feed.json" rel="alternate" type="application/feed+json" title="Some Title">
|
||||
</head>
|
||||
<body>
|
||||
</body>
|
||||
</html>`
|
||||
|
||||
subscriptions, err := parseWebPage("http://example.org/", strings.NewReader(htmlPage))
|
||||
if err != nil {
|
||||
t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
|
||||
}
|
||||
|
||||
if len(subscriptions) != 1 {
|
||||
t.Fatal(`Incorrect number of subscriptions returned`)
|
||||
}
|
||||
|
||||
if subscriptions[0].Title != "Some Title" {
|
||||
t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title)
|
||||
}
|
||||
|
||||
if subscriptions[0].URL != "http://example.org/feed.json" {
|
||||
t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL)
|
||||
}
|
||||
|
||||
if subscriptions[0].Type != "json" {
|
||||
t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseWebPageWithEmptyTitle(t *testing.T) {
|
||||
htmlPage := `
|
||||
<!doctype html>
|
||||
<html>
|
||||
<head>
|
||||
<link href="/feed.json" rel="alternate" type="application/feed+json">
|
||||
</head>
|
||||
<body>
|
||||
</body>
|
||||
</html>`
|
||||
|
||||
subscriptions, err := parseWebPage("http://example.org/", strings.NewReader(htmlPage))
|
||||
if err != nil {
|
||||
t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
|
||||
}
|
||||
|
||||
if len(subscriptions) != 1 {
|
||||
t.Fatal(`Incorrect number of subscriptions returned`)
|
||||
}
|
||||
|
||||
if subscriptions[0].Title != "http://example.org/feed.json" {
|
||||
t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title)
|
||||
}
|
||||
|
||||
if subscriptions[0].URL != "http://example.org/feed.json" {
|
||||
t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL)
|
||||
}
|
||||
|
||||
if subscriptions[0].Type != "json" {
|
||||
t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseWebPageWithMultipleFeeds(t *testing.T) {
|
||||
htmlPage := `
|
||||
<!doctype html>
|
||||
<html>
|
||||
<head>
|
||||
<link href="http://example.org/atom.xml" rel="alternate" type="application/atom+xml" title="Atom Feed">
|
||||
<link href="http://example.org/feed.json" rel="alternate" type="application/feed+json" title="JSON Feed">
|
||||
</head>
|
||||
<body>
|
||||
</body>
|
||||
</html>`
|
||||
|
||||
subscriptions, err := parseWebPage("http://example.org/", strings.NewReader(htmlPage))
|
||||
if err != nil {
|
||||
t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
|
||||
}
|
||||
|
||||
if len(subscriptions) != 2 {
|
||||
t.Fatal(`Incorrect number of subscriptions returned`)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseWebPageWithEmptyFeedURL(t *testing.T) {
|
||||
htmlPage := `
|
||||
<!doctype html>
|
||||
<html>
|
||||
<head>
|
||||
<link href rel="alternate" type="application/feed+json" title="Some Title">
|
||||
</head>
|
||||
<body>
|
||||
</body>
|
||||
</html>`
|
||||
|
||||
subscriptions, err := parseWebPage("http://example.org/", strings.NewReader(htmlPage))
|
||||
if err != nil {
|
||||
t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
|
||||
}
|
||||
|
||||
if len(subscriptions) != 0 {
|
||||
t.Fatal(`Incorrect number of subscriptions returned`)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseWebPageWithNoHref(t *testing.T) {
|
||||
htmlPage := `
|
||||
<!doctype html>
|
||||
<html>
|
||||
<head>
|
||||
<link rel="alternate" type="application/feed+json" title="Some Title">
|
||||
</head>
|
||||
<body>
|
||||
</body>
|
||||
</html>`
|
||||
|
||||
subscriptions, err := parseWebPage("http://example.org/", strings.NewReader(htmlPage))
|
||||
if err != nil {
|
||||
t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
|
||||
}
|
||||
|
||||
if len(subscriptions) != 0 {
|
||||
t.Fatal(`Incorrect number of subscriptions returned`)
|
||||
}
|
||||
}
|
20
internal/reader/subscription/subscription.go
Normal file
20
internal/reader/subscription/subscription.go
Normal file
|
@ -0,0 +1,20 @@
|
|||
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
package subscription // import "miniflux.app/v2/internal/reader/subscription"
|
||||
|
||||
import "fmt"
|
||||
|
||||
// Subscription represents a feed subscription.
|
||||
type Subscription struct {
|
||||
Title string `json:"title"`
|
||||
URL string `json:"url"`
|
||||
Type string `json:"type"`
|
||||
}
|
||||
|
||||
func (s Subscription) String() string {
|
||||
return fmt.Sprintf(`Title="%s", URL="%s", Type="%s"`, s.Title, s.URL, s.Type)
|
||||
}
|
||||
|
||||
// Subscriptions represents a list of subscription.
|
||||
type Subscriptions []*Subscription
|
85
internal/reader/xml/decoder.go
Normal file
85
internal/reader/xml/decoder.go
Normal file
|
@ -0,0 +1,85 @@
|
|||
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
package xml // import "miniflux.app/v2/internal/reader/xml"
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/xml"
|
||||
"fmt"
|
||||
"io"
|
||||
"strings"
|
||||
|
||||
"miniflux.app/v2/internal/reader/encoding"
|
||||
)
|
||||
|
||||
// NewDecoder returns a XML decoder that filters illegal characters.
|
||||
func NewDecoder(data io.Reader) *xml.Decoder {
|
||||
var decoder *xml.Decoder
|
||||
buffer, _ := io.ReadAll(data)
|
||||
enc := procInst("encoding", string(buffer))
|
||||
if enc != "" && enc != "utf-8" && enc != "UTF-8" && !strings.EqualFold(enc, "utf-8") {
|
||||
// filter invalid chars later within decoder.CharsetReader
|
||||
decoder = xml.NewDecoder(bytes.NewReader(buffer))
|
||||
} else {
|
||||
// filter invalid chars now, since decoder.CharsetReader not called for utf-8 content
|
||||
filteredBytes := bytes.Map(filterValidXMLChar, buffer)
|
||||
decoder = xml.NewDecoder(bytes.NewReader(filteredBytes))
|
||||
}
|
||||
|
||||
decoder.Entity = xml.HTMLEntity
|
||||
decoder.Strict = false
|
||||
decoder.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) {
|
||||
utf8Reader, err := encoding.CharsetReader(charset, input)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rawData, err := io.ReadAll(utf8Reader)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("Unable to read data: %q", err)
|
||||
}
|
||||
filteredBytes := bytes.Map(filterValidXMLChar, rawData)
|
||||
return bytes.NewReader(filteredBytes), nil
|
||||
}
|
||||
|
||||
return decoder
|
||||
}
|
||||
|
||||
// This function is copied from encoding/xml package,
|
||||
// and is used to check if all the characters are legal.
|
||||
func filterValidXMLChar(r rune) rune {
|
||||
if r == 0x09 ||
|
||||
r == 0x0A ||
|
||||
r == 0x0D ||
|
||||
r >= 0x20 && r <= 0xD7FF ||
|
||||
r >= 0xE000 && r <= 0xFFFD ||
|
||||
r >= 0x10000 && r <= 0x10FFFF {
|
||||
return r
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
// This function is copied from encoding/xml package,
|
||||
// procInst parses the `param="..."` or `param='...'`
|
||||
// value out of the provided string, returning "" if not found.
|
||||
func procInst(param, s string) string {
|
||||
// TODO: this parsing is somewhat lame and not exact.
|
||||
// It works for all actual cases, though.
|
||||
param = param + "="
|
||||
idx := strings.Index(s, param)
|
||||
if idx == -1 {
|
||||
return ""
|
||||
}
|
||||
v := s[idx+len(param):]
|
||||
if v == "" {
|
||||
return ""
|
||||
}
|
||||
if v[0] != '\'' && v[0] != '"' {
|
||||
return ""
|
||||
}
|
||||
idx = strings.IndexRune(v[1:], rune(v[0]))
|
||||
if idx == -1 {
|
||||
return ""
|
||||
}
|
||||
return v[1 : idx+1]
|
||||
}
|
83
internal/reader/xml/decoder_test.go
Normal file
83
internal/reader/xml/decoder_test.go
Normal file
|
@ -0,0 +1,83 @@
|
|||
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
package xml // import "miniflux.app/v2/internal/reader/xml"
|
||||
|
||||
import (
|
||||
"encoding/xml"
|
||||
"fmt"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestUTF8WithIllegalCharacters(t *testing.T) {
|
||||
type myxml struct {
|
||||
XMLName xml.Name `xml:"rss"`
|
||||
Version string `xml:"version,attr"`
|
||||
Title string `xml:"title"`
|
||||
}
|
||||
|
||||
expected := "Title & 中文标题"
|
||||
data := fmt.Sprintf(`<?xml version="1.0" encoding="UTF-8"?><rss version="2.0"><title>Title & 中文%s标题</title></rss>`, "\x10")
|
||||
reader := strings.NewReader(data)
|
||||
|
||||
var x myxml
|
||||
|
||||
decoder := NewDecoder(reader)
|
||||
err := decoder.Decode(&x)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
return
|
||||
}
|
||||
if x.Title != expected {
|
||||
t.Errorf("Incorrect entry title, expected: %s, got: %s", expected, x.Title)
|
||||
}
|
||||
}
|
||||
|
||||
func TestWindows251WithIllegalCharacters(t *testing.T) {
|
||||
type myxml struct {
|
||||
XMLName xml.Name `xml:"rss"`
|
||||
Version string `xml:"version,attr"`
|
||||
Title string `xml:"title"`
|
||||
}
|
||||
|
||||
expected := "Title & 中文标题"
|
||||
data := fmt.Sprintf(`<?xml version="1.0" encoding="windows-1251"?><rss version="2.0"><title>Title & 中文%s标题</title></rss>`, "\x10")
|
||||
reader := strings.NewReader(data)
|
||||
|
||||
var x myxml
|
||||
|
||||
decoder := NewDecoder(reader)
|
||||
err := decoder.Decode(&x)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
return
|
||||
}
|
||||
if x.Title != expected {
|
||||
t.Errorf("Incorrect entry title, expected: %s, got: %s", expected, x.Title)
|
||||
}
|
||||
}
|
||||
|
||||
func TestIllegalEncodingField(t *testing.T) {
|
||||
type myxml struct {
|
||||
XMLName xml.Name `xml:"rss"`
|
||||
Version string `xml:"version,attr"`
|
||||
Title string `xml:"title"`
|
||||
}
|
||||
|
||||
expected := "Title & 中文标题"
|
||||
data := fmt.Sprintf(`<?xml version="1.0" encoding="invalid"?><rss version="2.0"><title>Title & 中文%s标题</title></rss>`, "\x10")
|
||||
reader := strings.NewReader(data)
|
||||
|
||||
var x myxml
|
||||
|
||||
decoder := NewDecoder(reader)
|
||||
err := decoder.Decode(&x)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
return
|
||||
}
|
||||
if x.Title != expected {
|
||||
t.Errorf("Incorrect entry title, expected: %s, got: %s", expected, x.Title)
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue