diff --git a/internal/reader/rewrite/rewrite_functions.go b/internal/reader/rewrite/rewrite_functions.go index 1b48eb9b..e128a22a 100644 --- a/internal/reader/rewrite/rewrite_functions.go +++ b/internal/reader/rewrite/rewrite_functions.go @@ -455,3 +455,55 @@ func removeTables(entryContent string) string { output, _ := doc.FindMatcher(goquery.Single("body")).Html() return output } + +func fixGhostCards(entryContent string) string { + doc, err := goquery.NewDocumentFromReader(strings.NewReader(entryContent)) + if err != nil { + return entryContent + } + + const cardSelector = "figure.kg-card" + var currentList *goquery.Selection + + doc.Find(cardSelector).Each(func(i int, s *goquery.Selection) { + title := s.Find(".kg-bookmark-title").First().Text() + author := s.Find(".kg-bookmark-author").First().Text() + href := s.Find("a.kg-bookmark-container").First().AttrOr("href", "") + + // if there is no link or title, skip processing + if href == "" || title == "" { + return + } + + link := "" + if author == "" || strings.HasSuffix(title, author) { + link = fmt.Sprintf("%s", href, title) + } else { + link = fmt.Sprintf("%s - %s", href, title, author) + } + + next := s.Next() + + // if the next element is also a card, start a list + if next.Is(cardSelector) && currentList == nil { + currentList = s.BeforeHtml("").Prev() + } + + if currentList != nil { + // add this card to the list, then delete it + currentList.AppendHtml("
  • " + link + "
  • ") + s.Remove() + } else { + // replace single card + s.ReplaceWithHtml(link) + } + + // if the next element is not a card, start a new list + if !next.Is(cardSelector) && currentList != nil { + currentList = nil + } + }) + + output, _ := doc.FindMatcher(goquery.Single("body")).Html() + return strings.TrimSpace(output) +} diff --git a/internal/reader/rewrite/rewriter.go b/internal/reader/rewrite/rewriter.go index e2c26b6c..35395ac9 100644 --- a/internal/reader/rewrite/rewriter.go +++ b/internal/reader/rewrite/rewriter.go @@ -92,6 +92,8 @@ func (rule rule) applyRule(entryURL string, entry *model.Entry) { entry.Content = removeTables(entry.Content) case "remove_clickbait": entry.Title = titlelize(entry.Title) + case "fix_ghost_cards": + entry.Content = fixGhostCards(entry.Content) } } diff --git a/internal/reader/rewrite/rewriter_test.go b/internal/reader/rewrite/rewriter_test.go index 93123dbb..52ea5c01 100644 --- a/internal/reader/rewrite/rewriter_test.go +++ b/internal/reader/rewrite/rewriter_test.go @@ -703,3 +703,231 @@ func TestAddImageTitle(t *testing.T) { t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) } } + +func TestFixGhostCard(t *testing.T) { + testEntry := &model.Entry{ + Title: `A title`, + Content: `
    + +
    +
    Example Article
    +
    Lorem ipsum odor amet, consectetuer adipiscing elit. Pretium magnis luctus ligula conubia quam, donec orci vehicula efficitur...
    + +
    +
    + +
    +
    +
    `, + } + + controlEntry := &model.Entry{ + Title: `A title`, + Content: `Example Article - Example`, + } + Rewriter("https://example.org/article", testEntry, `fix_ghost_cards`) + + if !reflect.DeepEqual(testEntry, controlEntry) { + t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) + } +} + +func TestFixGhostCardNoCard(t *testing.T) { + testEntry := &model.Entry{ + Title: `A title`, + Content: `Example Article - Example`, + } + + controlEntry := &model.Entry{ + Title: `A title`, + Content: `Example Article - Example`, + } + Rewriter("https://example.org/article", testEntry, `fix_ghost_cards`) + + if !reflect.DeepEqual(testEntry, controlEntry) { + t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) + } +} + +func TestFixGhostCardInvalidCard(t *testing.T) { + testEntry := &model.Entry{ + Title: `A title`, + Content: `
    + This card does not have the required fields +
    `, + } + + controlEntry := &model.Entry{ + Title: `A title`, + Content: `
    + This card does not have the required fields +
    `, + } + Rewriter("https://example.org/article", testEntry, `fix_ghost_cards`) + + if !reflect.DeepEqual(testEntry, controlEntry) { + t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) + } +} + +func TestFixGhostCardMissingAuthor(t *testing.T) { + testEntry := &model.Entry{ + Title: `A title`, + Content: `
    + +
    +
    Example Article
    +
    Lorem ipsum odor amet, consectetuer adipiscing elit. Pretium magnis luctus ligula conubia quam, donec orci vehicula efficitur...
    +
    +
    + +
    +
    +
    `, + } + + controlEntry := &model.Entry{ + Title: `A title`, + Content: `Example Article`, + } + Rewriter("https://example.org/article", testEntry, `fix_ghost_cards`) + + if !reflect.DeepEqual(testEntry, controlEntry) { + t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) + } +} + +func TestFixGhostCardDuplicatedAuthor(t *testing.T) { + testEntry := &model.Entry{ + Title: `A title`, + Content: `
    + +
    +
    Example Article - Example
    +
    Lorem ipsum odor amet, consectetuer adipiscing elit. Pretium magnis luctus ligula conubia quam, donec orci vehicula efficitur...
    + +
    +
    + +
    +
    +
    `, + } + + controlEntry := &model.Entry{ + Title: `A title`, + Content: `Example Article - Example`, + } + Rewriter("https://example.org/article", testEntry, `fix_ghost_cards`) + + if !reflect.DeepEqual(testEntry, controlEntry) { + t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) + } +} + +func TestFixGhostCardMultiple(t *testing.T) { + testEntry := &model.Entry{ + Title: `A title`, + Content: `
    + +
    +
    Example Article 1 - Example
    +
    Lorem ipsum odor amet, consectetuer adipiscing elit. Pretium magnis luctus ligula conubia quam, donec orci vehicula efficitur...
    + +
    +
    + +
    +
    +
    +
    + +
    +
    Example Article 2 - Example
    +
    Lorem ipsum odor amet, consectetuer adipiscing elit. Pretium magnis luctus ligula conubia quam, donec orci vehicula efficitur...
    + +
    +
    + +
    +
    +
    `, + } + + controlEntry := &model.Entry{ + Title: `A title`, + Content: ``, + } + Rewriter("https://example.org/article", testEntry, `fix_ghost_cards`) + + if !reflect.DeepEqual(testEntry, controlEntry) { + t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) + } +} + +func TestFixGhostCardMultipleSplit(t *testing.T) { + testEntry := &model.Entry{ + Title: `A title`, + Content: `
    + +
    +
    Example Article 1 - Example
    +
    Lorem ipsum odor amet, consectetuer adipiscing elit. Pretium magnis luctus ligula conubia quam, donec orci vehicula efficitur...
    + +
    +
    + +
    +
    +
    +

    This separates the two cards

    +
    + +
    +
    Example Article 2 - Example
    +
    Lorem ipsum odor amet, consectetuer adipiscing elit. Pretium magnis luctus ligula conubia quam, donec orci vehicula efficitur...
    + +
    +
    + +
    +
    +
    `, + } + + controlEntry := &model.Entry{ + Title: `A title`, + Content: `Example Article 1 - Example +

    This separates the two cards

    + Example Article 2 - Example`, + } + Rewriter("https://example.org/article", testEntry, `fix_ghost_cards`) + + if !reflect.DeepEqual(testEntry, controlEntry) { + t.Errorf(`Not expected output: got "%+v" instead of "%+v"`, testEntry, controlEntry) + } +}