1
0
Fork 0
mirror of https://github.com/miniflux/v2.git synced 2025-08-06 17:41:00 +00:00

fix(parser): handle feeds with leading whitespace that exceeds buffer size

This commit is contained in:
Frédéric Guillot 2025-07-23 20:51:40 -07:00
parent 5eab4753e8
commit 54abd0a736
2 changed files with 88 additions and 6 deletions

View file

@ -4,9 +4,9 @@
package parser // import "miniflux.app/v2/internal/reader/parser"
import (
"bytes"
"encoding/xml"
"io"
"unicode"
rxml "miniflux.app/v2/internal/reader/xml"
)
@ -22,11 +22,7 @@ const (
// DetectFeedFormat tries to guess the feed format from input data.
func DetectFeedFormat(r io.ReadSeeker) (string, string) {
var dataArray = [32]byte{}
data := dataArray[:]
r.Read(data)
if bytes.HasPrefix(bytes.TrimSpace(data), []byte("{")) {
if isJSON, err := detectJSONFormat(r); err == nil && isJSON {
return FormatJSON, ""
}
@ -58,3 +54,36 @@ func DetectFeedFormat(r io.ReadSeeker) (string, string) {
return FormatUnknown, ""
}
// detectJSONFormat checks if the reader contains JSON by reading until it finds
// the first non-whitespace character or reaches EOF/error.
func detectJSONFormat(r io.ReadSeeker) (bool, error) {
const bufferSize = 32
buffer := make([]byte, bufferSize)
for {
n, err := r.Read(buffer)
if n == 0 {
if err == io.EOF {
return false, nil // No non-whitespace content found
}
return false, err
}
// Check each byte in the buffer
for i := range n {
ch := buffer[i]
// Skip whitespace characters (space, tab, newline, carriage return, etc.)
if unicode.IsSpace(rune(ch)) {
continue
}
// First non-whitespace character determines if it's JSON
return ch == '{', nil
}
// If we've read less than bufferSize, we've reached EOF
if n < bufferSize {
return false, nil
}
}
}

View file

@ -77,3 +77,56 @@ func TestDetectUnknown(t *testing.T) {
t.Errorf(`Wrong format detected: %q instead of %q`, format, FormatUnknown)
}
}
func TestDetectJSONWithLargeLeadingWhitespace(t *testing.T) {
leadingWhitespace := strings.Repeat(" ", 10000)
data := leadingWhitespace + `{
"version" : "https://jsonfeed.org/version/1",
"title" : "Example with lots of leading whitespace"
}`
format, _ := DetectFeedFormat(strings.NewReader(data))
if format != FormatJSON {
t.Errorf(`Wrong format detected: %q instead of %q`, format, FormatJSON)
}
}
func TestDetectJSONWithMixedWhitespace(t *testing.T) {
leadingWhitespace := strings.Repeat("\n\t ", 10000)
data := leadingWhitespace + `{
"version" : "https://jsonfeed.org/version/1",
"title" : "Example with mixed whitespace"
}`
format, _ := DetectFeedFormat(strings.NewReader(data))
if format != FormatJSON {
t.Errorf(`Wrong format detected: %q instead of %q`, format, FormatJSON)
}
}
func TestDetectOnlyWhitespace(t *testing.T) {
data := strings.Repeat(" \t\n\r", 10000)
format, _ := DetectFeedFormat(strings.NewReader(data))
if format != FormatUnknown {
t.Errorf(`Wrong format detected: %q instead of %q`, format, FormatUnknown)
}
}
func TestDetectJSONSmallerThanBuffer(t *testing.T) {
data := `{"version":"1"}` // This is only 15 bytes, well below the 32-byte buffer
format, _ := DetectFeedFormat(strings.NewReader(data))
if format != FormatJSON {
t.Errorf(`Wrong format detected: %q instead of %q`, format, FormatJSON)
}
}
func TestDetectJSONWithWhitespaceSmallerThanBuffer(t *testing.T) {
data := ` {"title":"test"} `
format, _ := DetectFeedFormat(strings.NewReader(data))
if format != FormatJSON {
t.Errorf(`Wrong format detected: %q instead of %q`, format, FormatJSON)
}
}