mirror of
https://github.com/miniflux/v2.git
synced 2025-08-11 17:51:01 +00:00
fix(parser): handle feeds with leading whitespace that exceeds buffer size
This commit is contained in:
parent
5eab4753e8
commit
54abd0a736
2 changed files with 88 additions and 6 deletions
|
@ -4,9 +4,9 @@
|
||||||
package parser // import "miniflux.app/v2/internal/reader/parser"
|
package parser // import "miniflux.app/v2/internal/reader/parser"
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
|
||||||
"encoding/xml"
|
"encoding/xml"
|
||||||
"io"
|
"io"
|
||||||
|
"unicode"
|
||||||
|
|
||||||
rxml "miniflux.app/v2/internal/reader/xml"
|
rxml "miniflux.app/v2/internal/reader/xml"
|
||||||
)
|
)
|
||||||
|
@ -22,11 +22,7 @@ const (
|
||||||
|
|
||||||
// DetectFeedFormat tries to guess the feed format from input data.
|
// DetectFeedFormat tries to guess the feed format from input data.
|
||||||
func DetectFeedFormat(r io.ReadSeeker) (string, string) {
|
func DetectFeedFormat(r io.ReadSeeker) (string, string) {
|
||||||
var dataArray = [32]byte{}
|
if isJSON, err := detectJSONFormat(r); err == nil && isJSON {
|
||||||
data := dataArray[:]
|
|
||||||
r.Read(data)
|
|
||||||
|
|
||||||
if bytes.HasPrefix(bytes.TrimSpace(data), []byte("{")) {
|
|
||||||
return FormatJSON, ""
|
return FormatJSON, ""
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -58,3 +54,36 @@ func DetectFeedFormat(r io.ReadSeeker) (string, string) {
|
||||||
|
|
||||||
return FormatUnknown, ""
|
return FormatUnknown, ""
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// detectJSONFormat checks if the reader contains JSON by reading until it finds
|
||||||
|
// the first non-whitespace character or reaches EOF/error.
|
||||||
|
func detectJSONFormat(r io.ReadSeeker) (bool, error) {
|
||||||
|
const bufferSize = 32
|
||||||
|
buffer := make([]byte, bufferSize)
|
||||||
|
|
||||||
|
for {
|
||||||
|
n, err := r.Read(buffer)
|
||||||
|
if n == 0 {
|
||||||
|
if err == io.EOF {
|
||||||
|
return false, nil // No non-whitespace content found
|
||||||
|
}
|
||||||
|
return false, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check each byte in the buffer
|
||||||
|
for i := range n {
|
||||||
|
ch := buffer[i]
|
||||||
|
// Skip whitespace characters (space, tab, newline, carriage return, etc.)
|
||||||
|
if unicode.IsSpace(rune(ch)) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// First non-whitespace character determines if it's JSON
|
||||||
|
return ch == '{', nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we've read less than bufferSize, we've reached EOF
|
||||||
|
if n < bufferSize {
|
||||||
|
return false, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -77,3 +77,56 @@ func TestDetectUnknown(t *testing.T) {
|
||||||
t.Errorf(`Wrong format detected: %q instead of %q`, format, FormatUnknown)
|
t.Errorf(`Wrong format detected: %q instead of %q`, format, FormatUnknown)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestDetectJSONWithLargeLeadingWhitespace(t *testing.T) {
|
||||||
|
leadingWhitespace := strings.Repeat(" ", 10000)
|
||||||
|
data := leadingWhitespace + `{
|
||||||
|
"version" : "https://jsonfeed.org/version/1",
|
||||||
|
"title" : "Example with lots of leading whitespace"
|
||||||
|
}`
|
||||||
|
format, _ := DetectFeedFormat(strings.NewReader(data))
|
||||||
|
|
||||||
|
if format != FormatJSON {
|
||||||
|
t.Errorf(`Wrong format detected: %q instead of %q`, format, FormatJSON)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDetectJSONWithMixedWhitespace(t *testing.T) {
|
||||||
|
leadingWhitespace := strings.Repeat("\n\t ", 10000)
|
||||||
|
data := leadingWhitespace + `{
|
||||||
|
"version" : "https://jsonfeed.org/version/1",
|
||||||
|
"title" : "Example with mixed whitespace"
|
||||||
|
}`
|
||||||
|
format, _ := DetectFeedFormat(strings.NewReader(data))
|
||||||
|
|
||||||
|
if format != FormatJSON {
|
||||||
|
t.Errorf(`Wrong format detected: %q instead of %q`, format, FormatJSON)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDetectOnlyWhitespace(t *testing.T) {
|
||||||
|
data := strings.Repeat(" \t\n\r", 10000)
|
||||||
|
format, _ := DetectFeedFormat(strings.NewReader(data))
|
||||||
|
|
||||||
|
if format != FormatUnknown {
|
||||||
|
t.Errorf(`Wrong format detected: %q instead of %q`, format, FormatUnknown)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDetectJSONSmallerThanBuffer(t *testing.T) {
|
||||||
|
data := `{"version":"1"}` // This is only 15 bytes, well below the 32-byte buffer
|
||||||
|
format, _ := DetectFeedFormat(strings.NewReader(data))
|
||||||
|
|
||||||
|
if format != FormatJSON {
|
||||||
|
t.Errorf(`Wrong format detected: %q instead of %q`, format, FormatJSON)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDetectJSONWithWhitespaceSmallerThanBuffer(t *testing.T) {
|
||||||
|
data := ` {"title":"test"} `
|
||||||
|
format, _ := DetectFeedFormat(strings.NewReader(data))
|
||||||
|
|
||||||
|
if format != FormatJSON {
|
||||||
|
t.Errorf(`Wrong format detected: %q instead of %q`, format, FormatJSON)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue