mirror of
https://github.com/miniflux/v2.git
synced 2025-07-22 17:18:37 +00:00
refactor(readability): various improvements and optimizations
- Replace a completely overkill regex - Use `.Remove()` instead of a hand-rolled loop - Use a strings.Builder instead of a bytes.NewBufferString - Replace a call to Fprintf with string concatenation, as the latter are much faster - Remove a superfluous cast - Delay some computations - Add some tests
This commit is contained in:
parent
113abeea59
commit
6ad5ad0bb2
2 changed files with 84 additions and 29 deletions
|
@ -100,3 +100,64 @@ func TestWithoutBaseURL(t *testing.T) {
|
|||
t.Errorf(`Unexpected base URL, got %q instead of ""`, baseURL)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRemoveStyleScript(t *testing.T) {
|
||||
html := `
|
||||
<html>
|
||||
<head>
|
||||
<title>Test</title>
|
||||
<script src="tololo.js"></script>
|
||||
</head>
|
||||
<body>
|
||||
<script src="tololo.js"></script>
|
||||
<style>
|
||||
h1 {color:red;}
|
||||
p {color:blue;}
|
||||
</style>
|
||||
<article>Some content</article>
|
||||
</body>
|
||||
</html>`
|
||||
want := `<div><div><article>Somecontent</article></div></div>`
|
||||
|
||||
_, content, err := ExtractContent(strings.NewReader(html))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
content = strings.ReplaceAll(content, "\n", "")
|
||||
content = strings.ReplaceAll(content, " ", "")
|
||||
content = strings.ReplaceAll(content, "\t", "")
|
||||
|
||||
if content != want {
|
||||
t.Errorf(`Invalid content, got %s instead of %s`, content, want)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRemoveBlacklist(t *testing.T) {
|
||||
html := `
|
||||
<html>
|
||||
<head>
|
||||
<title>Test</title>
|
||||
</head>
|
||||
<body>
|
||||
<article class="super-ad">Some content</article>
|
||||
<article class="g-plus-crap">Some other thing</article>
|
||||
<article class="stuff popupbody">And more</article>
|
||||
<article class="legit">Valid!</article>
|
||||
</body>
|
||||
</html>`
|
||||
want := `<div><div><articleclass="legit">Valid!</article></div></div>`
|
||||
|
||||
_, content, err := ExtractContent(strings.NewReader(html))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
content = strings.ReplaceAll(content, "\n", "")
|
||||
content = strings.ReplaceAll(content, " ", "")
|
||||
content = strings.ReplaceAll(content, "\t", "")
|
||||
|
||||
if content != want {
|
||||
t.Errorf(`Invalid content, got %s instead of %s`, content, want)
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue