1
0
Fork 0
mirror of https://github.com/miniflux/v2.git synced 2025-06-27 16:36:00 +00:00

Return outer HTML when scraping elements

This commit is contained in:
cinput 2019-12-21 21:18:31 -08:00 committed by Frédéric Guillot
parent 30f22fbd78
commit 8e1ed8bef3
8 changed files with 73 additions and 8 deletions

View file

@ -4,7 +4,12 @@
package scraper // import "miniflux.app/reader/scraper"
import "testing"
import (
"bytes"
"io/ioutil"
"strings"
"testing"
)
func TestGetPredefinedRules(t *testing.T) {
if getPredefinedScraperRules("http://www.phoronix.com/") == "" {
@ -40,3 +45,32 @@ func TestWhitelistedContentTypes(t *testing.T) {
}
}
}
func TestSelectorRules(t *testing.T) {
var ruleTestCases = map[string]string {
"img.html": "article > img",
"iframe.html": "article > iframe",
"p.html": "article > p",
}
for filename, rule := range ruleTestCases {
html, err := ioutil.ReadFile("testdata/" + filename)
if err != nil {
t.Fatalf(`Unable to read file %q: %v`, filename, err)
}
actualResult, err := scrapContent(bytes.NewReader(html), rule)
if err != nil {
t.Fatalf(`Scraping error for %q - %q: %v`, filename, rule, err)
}
expectedResult, err := ioutil.ReadFile("testdata/" + filename + "-result")
if err != nil {
t.Fatalf(`Unable to read file %q: %v`, filename, err)
}
if actualResult != strings.TrimSpace(string(expectedResult)) {
t.Errorf(`Unexpected result for %q, got "%s" instead of "%s"`, rule, actualResult, expectedResult)
}
}
}