1
0
Fork 0
mirror of https://github.com/miniflux/v2.git synced 2025-08-26 18:21:01 +00:00

fix(scraper): avoid encoding issue if charset meta tag is after 1024 bytes

This commit is contained in:
Frédéric Guillot 2025-02-15 16:58:06 -08:00
parent af1f966250
commit 6eedf4111f
12 changed files with 352 additions and 10 deletions

View file

@ -16,12 +16,12 @@ import (
"miniflux.app/v2/internal/integration/rssbridge"
"miniflux.app/v2/internal/locale"
"miniflux.app/v2/internal/model"
"miniflux.app/v2/internal/reader/encoding"
"miniflux.app/v2/internal/reader/fetcher"
"miniflux.app/v2/internal/reader/parser"
"miniflux.app/v2/internal/urllib"
"github.com/PuerkitoBio/goquery"
"golang.org/x/net/html/charset"
)
var (
@ -136,7 +136,7 @@ func (f *SubscriptionFinder) FindSubscriptionsFromWebPage(websiteURL, contentTyp
"link[type='application/feed+json']": parser.FormatJSON,
}
htmlDocumentReader, err := charset.NewReader(body, contentType)
htmlDocumentReader, err := encoding.NewCharsetReader(body, contentType)
if err != nil {
return nil, locale.NewLocalizedErrorWrapper(err, "error.unable_to_parse_html_document", err)
}