mirror of
https://github.com/miniflux/v2.git
synced 2025-08-06 17:41:00 +00:00
fix(parser): handle feeds with leading whitespace that exceeds buffer size
This commit is contained in:
parent
5eab4753e8
commit
54abd0a736
2 changed files with 88 additions and 6 deletions
|
@ -4,9 +4,9 @@
|
|||
package parser // import "miniflux.app/v2/internal/reader/parser"
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/xml"
|
||||
"io"
|
||||
"unicode"
|
||||
|
||||
rxml "miniflux.app/v2/internal/reader/xml"
|
||||
)
|
||||
|
@ -22,11 +22,7 @@ const (
|
|||
|
||||
// DetectFeedFormat tries to guess the feed format from input data.
|
||||
func DetectFeedFormat(r io.ReadSeeker) (string, string) {
|
||||
var dataArray = [32]byte{}
|
||||
data := dataArray[:]
|
||||
r.Read(data)
|
||||
|
||||
if bytes.HasPrefix(bytes.TrimSpace(data), []byte("{")) {
|
||||
if isJSON, err := detectJSONFormat(r); err == nil && isJSON {
|
||||
return FormatJSON, ""
|
||||
}
|
||||
|
||||
|
@ -58,3 +54,36 @@ func DetectFeedFormat(r io.ReadSeeker) (string, string) {
|
|||
|
||||
return FormatUnknown, ""
|
||||
}
|
||||
|
||||
// detectJSONFormat checks if the reader contains JSON by reading until it finds
|
||||
// the first non-whitespace character or reaches EOF/error.
|
||||
func detectJSONFormat(r io.ReadSeeker) (bool, error) {
|
||||
const bufferSize = 32
|
||||
buffer := make([]byte, bufferSize)
|
||||
|
||||
for {
|
||||
n, err := r.Read(buffer)
|
||||
if n == 0 {
|
||||
if err == io.EOF {
|
||||
return false, nil // No non-whitespace content found
|
||||
}
|
||||
return false, err
|
||||
}
|
||||
|
||||
// Check each byte in the buffer
|
||||
for i := range n {
|
||||
ch := buffer[i]
|
||||
// Skip whitespace characters (space, tab, newline, carriage return, etc.)
|
||||
if unicode.IsSpace(rune(ch)) {
|
||||
continue
|
||||
}
|
||||
// First non-whitespace character determines if it's JSON
|
||||
return ch == '{', nil
|
||||
}
|
||||
|
||||
// If we've read less than bufferSize, we've reached EOF
|
||||
if n < bufferSize {
|
||||
return false, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -77,3 +77,56 @@ func TestDetectUnknown(t *testing.T) {
|
|||
t.Errorf(`Wrong format detected: %q instead of %q`, format, FormatUnknown)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDetectJSONWithLargeLeadingWhitespace(t *testing.T) {
|
||||
leadingWhitespace := strings.Repeat(" ", 10000)
|
||||
data := leadingWhitespace + `{
|
||||
"version" : "https://jsonfeed.org/version/1",
|
||||
"title" : "Example with lots of leading whitespace"
|
||||
}`
|
||||
format, _ := DetectFeedFormat(strings.NewReader(data))
|
||||
|
||||
if format != FormatJSON {
|
||||
t.Errorf(`Wrong format detected: %q instead of %q`, format, FormatJSON)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDetectJSONWithMixedWhitespace(t *testing.T) {
|
||||
leadingWhitespace := strings.Repeat("\n\t ", 10000)
|
||||
data := leadingWhitespace + `{
|
||||
"version" : "https://jsonfeed.org/version/1",
|
||||
"title" : "Example with mixed whitespace"
|
||||
}`
|
||||
format, _ := DetectFeedFormat(strings.NewReader(data))
|
||||
|
||||
if format != FormatJSON {
|
||||
t.Errorf(`Wrong format detected: %q instead of %q`, format, FormatJSON)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDetectOnlyWhitespace(t *testing.T) {
|
||||
data := strings.Repeat(" \t\n\r", 10000)
|
||||
format, _ := DetectFeedFormat(strings.NewReader(data))
|
||||
|
||||
if format != FormatUnknown {
|
||||
t.Errorf(`Wrong format detected: %q instead of %q`, format, FormatUnknown)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDetectJSONSmallerThanBuffer(t *testing.T) {
|
||||
data := `{"version":"1"}` // This is only 15 bytes, well below the 32-byte buffer
|
||||
format, _ := DetectFeedFormat(strings.NewReader(data))
|
||||
|
||||
if format != FormatJSON {
|
||||
t.Errorf(`Wrong format detected: %q instead of %q`, format, FormatJSON)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDetectJSONWithWhitespaceSmallerThanBuffer(t *testing.T) {
|
||||
data := ` {"title":"test"} `
|
||||
format, _ := DetectFeedFormat(strings.NewReader(data))
|
||||
|
||||
if format != FormatJSON {
|
||||
t.Errorf(`Wrong format detected: %q instead of %q`, format, FormatJSON)
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue