// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package readability // import "miniflux.app/v2/internal/reader/readability" import ( "bytes" "os" "strings" "testing" ) func TestBaseURL(t *testing.T) { html := `
Some content
` baseURL, _, err := ExtractContent(strings.NewReader(html)) if err != nil { t.Fatal(err) } if baseURL != "https://example.org/" { t.Errorf(`Unexpected base URL, got %q instead of "https://example.org/"`, baseURL) } } func TestMultipleBaseURL(t *testing.T) { html := `
Some content
` baseURL, _, err := ExtractContent(strings.NewReader(html)) if err != nil { t.Fatal(err) } if baseURL != "https://example.org/" { t.Errorf(`Unexpected base URL, got %q instead of "https://example.org/"`, baseURL) } } func TestRelativeBaseURL(t *testing.T) { html := `
Some content
` baseURL, _, err := ExtractContent(strings.NewReader(html)) if err != nil { t.Fatal(err) } if baseURL != "" { t.Errorf(`Unexpected base URL, got %q`, baseURL) } } func TestWithoutBaseURL(t *testing.T) { html := ` Test
Some content
` baseURL, _, err := ExtractContent(strings.NewReader(html)) if err != nil { t.Fatal(err) } if baseURL != "" { t.Errorf(`Unexpected base URL, got %q instead of ""`, baseURL) } } func TestRemoveStyleScript(t *testing.T) { html := ` Test
Some content
` want := `
Somecontent
` _, content, err := ExtractContent(strings.NewReader(html)) if err != nil { t.Fatal(err) } content = strings.ReplaceAll(content, "\n", "") content = strings.ReplaceAll(content, " ", "") content = strings.ReplaceAll(content, "\t", "") if content != want { t.Errorf(`Invalid content, got %s instead of %s`, content, want) } } func TestRemoveBlacklist(t *testing.T) { html := ` Test
Some content
Some other thing
And more
Valid!
` want := `
Valid!
` _, content, err := ExtractContent(strings.NewReader(html)) if err != nil { t.Fatal(err) } content = strings.ReplaceAll(content, "\n", "") content = strings.ReplaceAll(content, " ", "") content = strings.ReplaceAll(content, "\t", "") if content != want { t.Errorf(`Invalid content, got %s instead of %s`, content, want) } } func BenchmarkExtractContent(b *testing.B) { var testCases = map[string][]byte{ "miniflux_github.html": {}, "miniflux_wikipedia.html": {}, } for filename := range testCases { data, err := os.ReadFile("testdata/" + filename) if err != nil { b.Fatalf(`Unable to read file %q: %v`, filename, err) } testCases[filename] = data } for range b.N { for _, v := range testCases { ExtractContent(bytes.NewReader(v)) } } }