1
0
Fork 0
mirror of https://github.com/miniflux/v2.git synced 2025-08-01 17:38:37 +00:00

fix(scraper): avoid encoding issue if charset meta tag is after 1024 bytes

This commit is contained in:
Frédéric Guillot 2025-02-15 16:58:06 -08:00
parent af1f966250
commit 6eedf4111f
12 changed files with 352 additions and 10 deletions

View file

@ -2,6 +2,6 @@
<feed>
<title>테스트 피드</title>
<entry>
<title>こんにちは世界</title>
<title>Café</title>
</entry>
</feed>

View file

@ -0,0 +1,48 @@
<!DOCTYPE html>
<html>
<!---
This text is greater than 1024 bytes which are used by the charset.NewReader to determine the encoding of the file.
This comment is used to pad the file to 1024 bytes.
The <meta> tag must be after 1024 bytes to ensure that the encoding is detected correctly.
---
More text to pad the file to 1024 bytes.
More text to pad the file to 1024 bytes.
More text to pad the file to 1024 bytes.
More text to pad the file to 1024 bytes.
More text to pad the file to 1024 bytes.
More text to pad the file to 1024 bytes.
More text to pad the file to 1024 bytes.
More text to pad the file to 1024 bytes.
More text to pad the file to 1024 bytes.
More text to pad the file to 1024 bytes.
More text to pad the file to 1024 bytes.
More text to pad the file to 1024 bytes.
More text to pad the file to 1024 bytes.
More text to pad the file to 1024 bytes.
More text to pad the file to 1024 bytes.
More text to pad the file to 1024 bytes.
More text to pad the file to 1024 bytes.
More text to pad the file to 1024 bytes.
More text to pad the file to 1024 bytes.
More text to pad the file to 1024 bytes.
More text to pad the file to 1024 bytes.
More text to pad the file to 1024 bytes.
More text to pad the file to 1024 bytes.
More text to pad the file to 1024 bytes.
More text to pad the file to 1024 bytes.
More text to pad the file to 1024 bytes.
-->
<head>
<meta charset="iso-8859-1">
<title>Frédéric</title>
</head>
<body>
<p>Café</p>
</body>
</html>

View file

@ -0,0 +1,10 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="iso-8859-1">
<title>Frédéric</title>
</head>
<body>
<p>Café</p>
</body>
</html>

View file

@ -2,6 +2,6 @@
<feed>
<title>테스트 피드</title>
<entry>
<title>こんにちは世界</title>
<title>Café</title>
</entry>
</feed>

View file

@ -0,0 +1,48 @@
<!DOCTYPE html>
<html>
<!---
This text is greater than 1024 bytes which are used by the charset.NewReader to determine the encoding of the file.
This comment is used to pad the file to 1024 bytes.
The <meta> tag must be after 1024 bytes to ensure that the encoding is detected correctly.
---
More text to pad the file to 1024 bytes.
More text to pad the file to 1024 bytes.
More text to pad the file to 1024 bytes.
More text to pad the file to 1024 bytes.
More text to pad the file to 1024 bytes.
More text to pad the file to 1024 bytes.
More text to pad the file to 1024 bytes.
More text to pad the file to 1024 bytes.
More text to pad the file to 1024 bytes.
More text to pad the file to 1024 bytes.
More text to pad the file to 1024 bytes.
More text to pad the file to 1024 bytes.
More text to pad the file to 1024 bytes.
More text to pad the file to 1024 bytes.
More text to pad the file to 1024 bytes.
More text to pad the file to 1024 bytes.
More text to pad the file to 1024 bytes.
More text to pad the file to 1024 bytes.
More text to pad the file to 1024 bytes.
More text to pad the file to 1024 bytes.
More text to pad the file to 1024 bytes.
More text to pad the file to 1024 bytes.
More text to pad the file to 1024 bytes.
More text to pad the file to 1024 bytes.
More text to pad the file to 1024 bytes.
More text to pad the file to 1024 bytes.
-->
<head>
<meta charset="utf-8">
<title>Frédéric</title>
</head>
<body>
<p>Café</p>
</body>
</html>

View file

@ -0,0 +1,10 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>Café</title>
</head>
<body>
<p>Café</p>
</body>
</html>

View file

@ -2,6 +2,6 @@
<feed>
<title>테스트 피드</title>
<entry>
<title>こんにちは世界</title>
<title>Café</title>
</entry>
</feed>