Some content
Code block with nested span # exit 1
diff --git a/internal/reader/readability/readability.go b/internal/reader/readability/readability.go index f3d2e976..bad0424c 100644 --- a/internal/reader/readability/readability.go +++ b/internal/reader/readability/readability.go @@ -162,6 +162,11 @@ func removeUnlikelyCandidates(document *goquery.Document) { return } + // Don't remove elements within code blocks (pre or code tags) + if s.Closest("pre, code").Length() > 0 { + return + } + if class, ok := s.Attr("class"); ok { if shouldRemove(class) { s.Remove() diff --git a/internal/reader/readability/readability_test.go b/internal/reader/readability/readability_test.go index e6deb889..ddb11afe 100644 --- a/internal/reader/readability/readability_test.go +++ b/internal/reader/readability/readability_test.go @@ -164,6 +164,28 @@ func TestRemoveBlacklist(t *testing.T) { } } +func TestNestedSpanInCodeBlock(t *testing.T) { + html := ` + +
+Some content
Code block with nested span # exit 1
Some content
Code block with nested span # exit 1