mirror of
https://github.com/miniflux/v2.git
synced 2025-08-01 17:38:37 +00:00
Simplify feed parser and format detection
- Avoid doing multiple buffer copies - Move parser and format detection logic to its own package
This commit is contained in:
parent
d5ff4191b6
commit
5870f04260
11 changed files with 229 additions and 221 deletions
|
@ -6,6 +6,7 @@ package client // import "miniflux.app/http/client"
|
|||
|
||||
import (
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"mime"
|
||||
"strings"
|
||||
|
||||
|
@ -56,23 +57,32 @@ func (r *Response) IsModified(etag, lastModified string) bool {
|
|||
return true
|
||||
}
|
||||
|
||||
// NormalizeBodyEncoding make sure the body is encoded in UTF-8.
|
||||
// EnsureUnicodeBody makes sure the body is encoded in UTF-8.
|
||||
//
|
||||
// If a charset other than UTF-8 is detected, we convert the document to UTF-8.
|
||||
// This is used by the scraper and feed readers.
|
||||
//
|
||||
// Do not forget edge cases:
|
||||
// - Some non-utf8 feeds specify encoding only in Content-Type, not in XML document.
|
||||
func (r *Response) NormalizeBodyEncoding() (io.Reader, error) {
|
||||
func (r *Response) EnsureUnicodeBody() error {
|
||||
_, params, err := mime.ParseMediaType(r.ContentType)
|
||||
if err == nil {
|
||||
if enc, found := params["charset"]; found {
|
||||
enc = strings.ToLower(enc)
|
||||
if enc != "utf-8" && enc != "utf8" && enc != "" {
|
||||
logger.Debug("[NormalizeBodyEncoding] Convert body to UTF-8 from %s", enc)
|
||||
return charset.NewReader(r.Body, r.ContentType)
|
||||
logger.Debug("[EnsureUnicodeBody] Convert body to utf-8 from %s", enc)
|
||||
r.Body, err = charset.NewReader(r.Body, r.ContentType)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return r.Body, nil
|
||||
return nil
|
||||
}
|
||||
|
||||
// String returns the response body as string.
|
||||
func (r *Response) String() string {
|
||||
bytes, _ := ioutil.ReadAll(r.Body)
|
||||
return string(bytes)
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue