1
0
Fork 0
mirror of https://github.com/miniflux/v2.git synced 2025-06-27 16:36:00 +00:00

Allow the scraper to parse XHTML documents

Only "text/html" was authorized before.
This commit is contained in:
Frédéric Guillot 2018-11-03 13:44:13 -07:00
parent 1ff9950a55
commit 3b6e44c331
2 changed files with 28 additions and 1 deletions

View file

@ -34,7 +34,7 @@ func Fetch(websiteURL, rules, userAgent string) (string, error) {
return "", errors.New("scraper: unable to download web page")
}
if !strings.Contains(response.ContentType, "text/html") {
if !isWhitelistedContentType(response.ContentType) {
return "", fmt.Errorf("scraper: this resource is not a HTML document (%s)", response.ContentType)
}
@ -99,3 +99,9 @@ func getPredefinedScraperRules(websiteURL string) string {
return ""
}
func isWhitelistedContentType(contentType string) bool {
contentType = strings.ToLower(contentType)
return strings.HasPrefix(contentType, "text/html") ||
strings.HasPrefix(contentType, "application/xhtml+xml")
}