1
0
Fork 0
mirror of https://github.com/miniflux/v2.git synced 2025-08-11 17:51:01 +00:00

fix(parser): handle feeds with leading whitespace that exceeds buffer size

This commit is contained in:
Frédéric Guillot 2025-07-23 20:51:40 -07:00
parent 5eab4753e8
commit 54abd0a736
2 changed files with 88 additions and 6 deletions

View file

@ -4,9 +4,9 @@
package parser // import "miniflux.app/v2/internal/reader/parser"
import (
"bytes"
"encoding/xml"
"io"
"unicode"
rxml "miniflux.app/v2/internal/reader/xml"
)
@ -22,11 +22,7 @@ const (
// DetectFeedFormat tries to guess the feed format from input data.
func DetectFeedFormat(r io.ReadSeeker) (string, string) {
var dataArray = [32]byte{}
data := dataArray[:]
r.Read(data)
if bytes.HasPrefix(bytes.TrimSpace(data), []byte("{")) {
if isJSON, err := detectJSONFormat(r); err == nil && isJSON {
return FormatJSON, ""
}
@ -58,3 +54,36 @@ func DetectFeedFormat(r io.ReadSeeker) (string, string) {
return FormatUnknown, ""
}
// detectJSONFormat checks if the reader contains JSON by reading until it finds
// the first non-whitespace character or reaches EOF/error.
func detectJSONFormat(r io.ReadSeeker) (bool, error) {
const bufferSize = 32
buffer := make([]byte, bufferSize)
for {
n, err := r.Read(buffer)
if n == 0 {
if err == io.EOF {
return false, nil // No non-whitespace content found
}
return false, err
}
// Check each byte in the buffer
for i := range n {
ch := buffer[i]
// Skip whitespace characters (space, tab, newline, carriage return, etc.)
if unicode.IsSpace(rune(ch)) {
continue
}
// First non-whitespace character determines if it's JSON
return ch == '{', nil
}
// If we've read less than bufferSize, we've reached EOF
if n < bufferSize {
return false, nil
}
}
}