From cc3e65dd3c5a5e14afeafd81ef0d866c0fccabdf Mon Sep 17 00:00:00 2001 From: Adrian Smith Date: Mon, 17 Jan 2022 21:31:11 +0000 Subject: [PATCH] Handle atom feed with space around CDATA Trim space around CDATA elements before extracting the CharData. This problem was discovered when reading https://www.sethvargo.com/feed.xml. Title and Summary fields have newlines and space between the element and the CDATA element. e.g. <title> <![CDATA[Entry title here]]> This meant the title of the feed was coming into MiniFlux as, --- reader/atom/atom_10.go | 3 +-- reader/atom/atom_10_test.go | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/reader/atom/atom_10.go b/reader/atom/atom_10.go index cad011ae..71e7c697 100644 --- a/reader/atom/atom_10.go +++ b/reader/atom/atom_10.go @@ -229,10 +229,9 @@ type atom10Text struct { func (a *atom10Text) String() string { var content string - switch { case a.Type == "", a.Type == "text", a.Type == "text/plain": - if strings.HasPrefix(a.InnerXML, `Some text. + + + <![CDATA[Entry title with space around CDATA]]> + + + urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a + 2003-12-13T18:30:02Z + Some text. + + ` feed, err := Parse("https://example.org/", bytes.NewBufferString(data)) @@ -317,6 +327,10 @@ func TestParseEntryWithHTMLTitle(t *testing.T) { if feed.Entries[1].Title != "Test “Test”" { t.Errorf("Incorrect entry title, got: %q", feed.Entries[1].Title) } + + if feed.Entries[2].Title != "Entry title with space around CDATA" { + t.Errorf("Incorrect entry title, got: %q", feed.Entries[2].Title) + } } func TestParseEntryWithXHTMLTitle(t *testing.T) {