From c8c1f0532871302bee9f8f2f84d51a462b240aef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Guillot?= Date: Fri, 19 Mar 2021 21:49:35 -0700 Subject: [PATCH] Add better support of Atom text constructs - Note that Miniflux does not render entry title with HTML tags as of now - Omit XHTML div element because it should not be part of the content --- reader/atom/atom_10.go | 24 +++- reader/atom/atom_10_test.go | 252 ++++++++++++++++++++++++------------ 2 files changed, 190 insertions(+), 86 deletions(-) diff --git a/reader/atom/atom_10.go b/reader/atom/atom_10.go index 64f79214..8718dca1 100644 --- a/reader/atom/atom_10.go +++ b/reader/atom/atom_10.go @@ -221,19 +221,33 @@ func (a *atom10Entry) entryCommentsURL() string { } type atom10Text struct { - Type string `xml:"type,attr"` - CharData string `xml:",chardata"` - InnerXML string `xml:",innerxml"` + Type string `xml:"type,attr"` + CharData string `xml:",chardata"` + InnerXML string `xml:",innerxml"` + XHTMLRootElement atomXHTMLRootElement `xml:"http://www.w3.org/1999/xhtml div"` } func (a *atom10Text) String() string { var content string - if a.Type == "xhtml" { + switch { + case strings.HasPrefix(a.InnerXML, `Some text. - ` - - feed, err := Parse("https://example.org/", bytes.NewBufferString(data)) - if err != nil { - t.Fatal(err) - } - - if feed.Entries[0].Title != `AT&T bought by SBC!` { - t.Errorf("Incorrect entry title, got: %q", feed.Entries[0].Title) - } -} - -func TestParseEntryWithHTMLAndCDATATitle(t *testing.T) { - data := ` - - Example Feed - - - <![CDATA[Test “Test”]]> + AT&T bought by SBC! urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a 2003-12-13T18:30:02Z @@ -291,8 +273,11 @@ func TestParseEntryWithHTMLAndCDATATitle(t *testing.T) { t.Fatal(err) } - if feed.Entries[0].Title != "Test “Test”" { - t.Errorf("Incorrect entry title, got: %q", feed.Entries[0].Title) + expected := `AT&T bought by SBC!` + for i := 0; i < 2; i++ { + if feed.Entries[i].Title != expected { + t.Errorf("Incorrect title for entry #%d, got: %q", i, feed.Entries[i].Title) + } } } @@ -310,26 +295,8 @@ func TestParseEntryWithHTMLTitle(t *testing.T) { Some text. - ` - - feed, err := Parse("https://example.org/", bytes.NewBufferString(data)) - if err != nil { - t.Fatal(err) - } - - if feed.Entries[0].Title != "Test Test" { - t.Errorf("Incorrect entry title, got: %q", feed.Entries[0].Title) - } -} - -func TestParseEntryWithXHTMLTitle(t *testing.T) { - data := ` - - Example Feed - - - <code>Test</code> Test + <![CDATA[Test “Test”]]> urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a 2003-12-13T18:30:02Z @@ -346,6 +313,52 @@ func TestParseEntryWithXHTMLTitle(t *testing.T) { if feed.Entries[0].Title != "Test Test" { t.Errorf("Incorrect entry title, got: %q", feed.Entries[0].Title) } + + if feed.Entries[1].Title != "Test “Test”" { + t.Errorf("Incorrect entry title, got: %q", feed.Entries[1].Title) + } +} + +func TestParseEntryWithXHTMLTitle(t *testing.T) { + data := ` + + Example Feed + + + + <code>Test</code> Test + + urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a + 2003-12-13T18:30:02Z + Some text. + + + + + <div xmlns="http://www.w3.org/1999/xhtml"> + This is <b>XHTML</b> content. + </div> + + + urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a + 2003-12-13T18:30:02Z + Some text. + + + ` + + feed, err := Parse("https://example.org/", bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if feed.Entries[0].Title != `Test Test` { + t.Errorf("Incorrect entry title, got: %q", feed.Entries[0].Title) + } + + if feed.Entries[1].Title != `This is XHTML content.` { + t.Errorf("Incorrect entry title, got: %q", feed.Entries[1].Title) + } } func TestParseEntryWithNumericCharacterReferenceTitle(t *testing.T) { @@ -395,7 +408,7 @@ func TestParseEntryWithDoubleEncodedEntitiesTitle(t *testing.T) { t.Fatal(err) } - if feed.Entries[0].Title != `'AT&T'` { + if feed.Entries[0].Title != `'AT&T'` { t.Errorf("Incorrect entry title, got: %q", feed.Entries[0].Title) } } @@ -414,30 +427,12 @@ func TestParseEntryWithXHTMLSummary(t *testing.T) {

Some text.

-
` - - feed, err := Parse("https://example.org/", bytes.NewBufferString(data)) - if err != nil { - t.Fatal(err) - } - - if feed.Entries[0].Content != "

Some text.

" { - t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content) - } -} - -func TestParseEntryWithHTMLAndCDATASummary(t *testing.T) { - data := ` - - Example Feed - - - Example + Example urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a 2003-12-13T18:30:02Z - Some text.

]]>
+

Test: std::unique_ptr<S>

` @@ -447,12 +442,16 @@ func TestParseEntryWithHTMLAndCDATASummary(t *testing.T) { t.Fatal(err) } - if feed.Entries[0].Content != "

Some text.

" { + if feed.Entries[0].Content != `

Some text.

` { t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content) } + + if feed.Entries[1].Content != `

Test: std::unique_ptr<S>

` { + t.Errorf("Incorrect entry content, got: %s", feed.Entries[1].Content) + } } -func TestParseEntryWithPlainTextAndCDATASummary(t *testing.T) { +func TestParseEntryWithHTMLSummary(t *testing.T) { data := ` Example Feed @@ -460,10 +459,26 @@ func TestParseEntryWithPlainTextAndCDATASummary(t *testing.T) { Example - + urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a 2003-12-13T18:30:02Z - ]]> + <code>std::unique_ptr&lt;S&gt;</code> + + + + Example + + urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a + 2003-12-13T18:30:02Z + <code>std::unique_ptr&lt;S&gt;</code> + + + + Example + + urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a + 2003-12-13T18:30:02Z + std::unique_ptr<S>]]> ` @@ -473,12 +488,15 @@ func TestParseEntryWithPlainTextAndCDATASummary(t *testing.T) { t.Fatal(err) } - if feed.Entries[0].Content != "" { - t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content) + expected := `std::unique_ptr<S>` + for i := 0; i < 3; i++ { + if feed.Entries[i].Content != expected { + t.Errorf("Incorrect content for entry #%d, got: %q", i, feed.Entries[i].Content) + } } } -func TestParseEntryWithTextAndCDATAContent(t *testing.T) { +func TestParseEntryWithTextSummary(t *testing.T) { data := ` Example Feed @@ -486,12 +504,35 @@ func TestParseEntryWithTextAndCDATAContent(t *testing.T) { Example - + urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a 2003-12-13T18:30:02Z - + AT&T <S> + + Example + + urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a + 2003-12-13T18:30:02Z + AT&T <S> + + + + Example + + urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a + 2003-12-13T18:30:02Z + AT&T <S> + + + + Example + + urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a + 2003-12-13T18:30:02Z + + ` feed, err := Parse("https://example.org/", bytes.NewBufferString(data)) @@ -499,8 +540,11 @@ func TestParseEntryWithTextAndCDATAContent(t *testing.T) { t.Fatal(err) } - if feed.Entries[0].Content != "AT&T bought by SBC!" { - t.Errorf("Incorrect entry content, got: %q", feed.Entries[0].Content) + expected := `AT&T <S>` + for i := 0; i < 4; i++ { + if feed.Entries[i].Content != expected { + t.Errorf("Incorrect content for entry #%d, got: %q", i, feed.Entries[i].Content) + } } } @@ -512,10 +556,34 @@ func TestParseEntryWithTextContent(t *testing.T) { Example - + urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a 2003-12-13T18:30:02Z - AT&T bought by SBC! + AT&T <S> + + + + Example + + urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a + 2003-12-13T18:30:02Z + AT&T <S> + + + + Example + + urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a + 2003-12-13T18:30:02Z + AT&T <S> + + + + Example + + urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a + 2003-12-13T18:30:02Z + ` @@ -525,8 +593,11 @@ func TestParseEntryWithTextContent(t *testing.T) { t.Fatal(err) } - if feed.Entries[0].Content != "AT&T bought by SBC!" { - t.Errorf("Incorrect entry content, got: %q", feed.Entries[0].Content) + expected := `AT&T <S>` + for i := 0; i < 4; i++ { + if feed.Entries[i].Content != expected { + t.Errorf("Incorrect content for entry #%d, got: %q", i, feed.Entries[i].Content) + } } } @@ -538,12 +609,28 @@ func TestParseEntryWithHTMLContent(t *testing.T) { Example - + urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a 2003-12-13T18:30:02Z AT&amp;T bought <b>by SBC</b>! + + Example + + urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a + 2003-12-13T18:30:02Z + AT&amp;T bought <b>by SBC</b>! + + + + Example + + urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a + 2003-12-13T18:30:02Z + by SBC!]]> + + ` feed, err := Parse("https://example.org/", bytes.NewBufferString(data)) @@ -551,8 +638,11 @@ func TestParseEntryWithHTMLContent(t *testing.T) { t.Fatal(err) } - if feed.Entries[0].Content != "AT&T bought by SBC!" { - t.Errorf("Incorrect entry content, got: %q", feed.Entries[0].Content) + expected := `AT&T bought by SBC!` + for i := 0; i < 3; i++ { + if feed.Entries[i].Content != expected { + t.Errorf("Incorrect content for entry #%d, got: %q", i, feed.Entries[i].Content) + } } } @@ -563,7 +653,7 @@ func TestParseEntryWithXHTMLContent(t *testing.T) { - Example + Example urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a 2003-12-13T18:30:02Z @@ -579,7 +669,7 @@ func TestParseEntryWithXHTMLContent(t *testing.T) { t.Fatal(err) } - if feed.Entries[0].Content != `
AT&T bought by SBC!
` { + if feed.Entries[0].Content != `AT&T bought by SBC!` { t.Errorf("Incorrect entry content, got: %q", feed.Entries[0].Content) } }