mirror of
https://github.com/miniflux/v2.git
synced 2025-06-27 16:36:00 +00:00
Return outer HTML when scraping elements
This commit is contained in:
parent
30f22fbd78
commit
8e1ed8bef3
8 changed files with 73 additions and 8 deletions
|
@ -4,7 +4,12 @@
|
|||
|
||||
package scraper // import "miniflux.app/reader/scraper"
|
||||
|
||||
import "testing"
|
||||
import (
|
||||
"bytes"
|
||||
"io/ioutil"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestGetPredefinedRules(t *testing.T) {
|
||||
if getPredefinedScraperRules("http://www.phoronix.com/") == "" {
|
||||
|
@ -40,3 +45,32 @@ func TestWhitelistedContentTypes(t *testing.T) {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestSelectorRules(t *testing.T) {
|
||||
var ruleTestCases = map[string]string {
|
||||
"img.html": "article > img",
|
||||
"iframe.html": "article > iframe",
|
||||
"p.html": "article > p",
|
||||
}
|
||||
|
||||
for filename, rule := range ruleTestCases {
|
||||
html, err := ioutil.ReadFile("testdata/" + filename)
|
||||
if err != nil {
|
||||
t.Fatalf(`Unable to read file %q: %v`, filename, err)
|
||||
}
|
||||
|
||||
actualResult, err := scrapContent(bytes.NewReader(html), rule)
|
||||
if err != nil {
|
||||
t.Fatalf(`Scraping error for %q - %q: %v`, filename, rule, err)
|
||||
}
|
||||
|
||||
expectedResult, err := ioutil.ReadFile("testdata/" + filename + "-result")
|
||||
if err != nil {
|
||||
t.Fatalf(`Unable to read file %q: %v`, filename, err)
|
||||
}
|
||||
|
||||
if actualResult != strings.TrimSpace(string(expectedResult)) {
|
||||
t.Errorf(`Unexpected result for %q, got "%s" instead of "%s"`, rule, actualResult, expectedResult)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue