1
0
Fork 0
mirror of https://github.com/miniflux/v2.git synced 2025-08-01 17:38:37 +00:00

Simplify feed parser and format detection

- Avoid doing multiple buffer copies
- Move parser and format detection logic to its own package
This commit is contained in:
Frédéric Guillot 2018-10-14 11:46:41 -07:00
parent d5ff4191b6
commit 5870f04260
11 changed files with 229 additions and 221 deletions

View file

@ -6,6 +6,7 @@ package client // import "miniflux.app/http/client"
import (
"io"
"io/ioutil"
"mime"
"strings"
@ -56,23 +57,32 @@ func (r *Response) IsModified(etag, lastModified string) bool {
return true
}
// NormalizeBodyEncoding make sure the body is encoded in UTF-8.
// EnsureUnicodeBody makes sure the body is encoded in UTF-8.
//
// If a charset other than UTF-8 is detected, we convert the document to UTF-8.
// This is used by the scraper and feed readers.
//
// Do not forget edge cases:
// - Some non-utf8 feeds specify encoding only in Content-Type, not in XML document.
func (r *Response) NormalizeBodyEncoding() (io.Reader, error) {
func (r *Response) EnsureUnicodeBody() error {
_, params, err := mime.ParseMediaType(r.ContentType)
if err == nil {
if enc, found := params["charset"]; found {
enc = strings.ToLower(enc)
if enc != "utf-8" && enc != "utf8" && enc != "" {
logger.Debug("[NormalizeBodyEncoding] Convert body to UTF-8 from %s", enc)
return charset.NewReader(r.Body, r.ContentType)
logger.Debug("[EnsureUnicodeBody] Convert body to utf-8 from %s", enc)
r.Body, err = charset.NewReader(r.Body, r.ContentType)
if err != nil {
return err
}
}
}
}
return r.Body, nil
return nil
}
// String returns the response body as string.
func (r *Response) String() string {
bytes, _ := ioutil.ReadAll(r.Body)
return string(bytes)
}