diff --git a/internal/reader/parser/format.go b/internal/reader/parser/format.go index ceff4513..5c39a439 100644 --- a/internal/reader/parser/format.go +++ b/internal/reader/parser/format.go @@ -4,9 +4,9 @@ package parser // import "miniflux.app/v2/internal/reader/parser" import ( - "bytes" "encoding/xml" "io" + "unicode" rxml "miniflux.app/v2/internal/reader/xml" ) @@ -22,11 +22,7 @@ const ( // DetectFeedFormat tries to guess the feed format from input data. func DetectFeedFormat(r io.ReadSeeker) (string, string) { - var dataArray = [32]byte{} - data := dataArray[:] - r.Read(data) - - if bytes.HasPrefix(bytes.TrimSpace(data), []byte("{")) { + if isJSON, err := detectJSONFormat(r); err == nil && isJSON { return FormatJSON, "" } @@ -58,3 +54,36 @@ func DetectFeedFormat(r io.ReadSeeker) (string, string) { return FormatUnknown, "" } + +// detectJSONFormat checks if the reader contains JSON by reading until it finds +// the first non-whitespace character or reaches EOF/error. +func detectJSONFormat(r io.ReadSeeker) (bool, error) { + const bufferSize = 32 + buffer := make([]byte, bufferSize) + + for { + n, err := r.Read(buffer) + if n == 0 { + if err == io.EOF { + return false, nil // No non-whitespace content found + } + return false, err + } + + // Check each byte in the buffer + for i := range n { + ch := buffer[i] + // Skip whitespace characters (space, tab, newline, carriage return, etc.) + if unicode.IsSpace(rune(ch)) { + continue + } + // First non-whitespace character determines if it's JSON + return ch == '{', nil + } + + // If we've read less than bufferSize, we've reached EOF + if n < bufferSize { + return false, nil + } + } +} diff --git a/internal/reader/parser/format_test.go b/internal/reader/parser/format_test.go index 9f806270..ea76ae4a 100644 --- a/internal/reader/parser/format_test.go +++ b/internal/reader/parser/format_test.go @@ -77,3 +77,56 @@ func TestDetectUnknown(t *testing.T) { t.Errorf(`Wrong format detected: %q instead of %q`, format, FormatUnknown) } } + +func TestDetectJSONWithLargeLeadingWhitespace(t *testing.T) { + leadingWhitespace := strings.Repeat(" ", 10000) + data := leadingWhitespace + `{ + "version" : "https://jsonfeed.org/version/1", + "title" : "Example with lots of leading whitespace" + }` + format, _ := DetectFeedFormat(strings.NewReader(data)) + + if format != FormatJSON { + t.Errorf(`Wrong format detected: %q instead of %q`, format, FormatJSON) + } +} + +func TestDetectJSONWithMixedWhitespace(t *testing.T) { + leadingWhitespace := strings.Repeat("\n\t ", 10000) + data := leadingWhitespace + `{ + "version" : "https://jsonfeed.org/version/1", + "title" : "Example with mixed whitespace" + }` + format, _ := DetectFeedFormat(strings.NewReader(data)) + + if format != FormatJSON { + t.Errorf(`Wrong format detected: %q instead of %q`, format, FormatJSON) + } +} + +func TestDetectOnlyWhitespace(t *testing.T) { + data := strings.Repeat(" \t\n\r", 10000) + format, _ := DetectFeedFormat(strings.NewReader(data)) + + if format != FormatUnknown { + t.Errorf(`Wrong format detected: %q instead of %q`, format, FormatUnknown) + } +} + +func TestDetectJSONSmallerThanBuffer(t *testing.T) { + data := `{"version":"1"}` // This is only 15 bytes, well below the 32-byte buffer + format, _ := DetectFeedFormat(strings.NewReader(data)) + + if format != FormatJSON { + t.Errorf(`Wrong format detected: %q instead of %q`, format, FormatJSON) + } +} + +func TestDetectJSONWithWhitespaceSmallerThanBuffer(t *testing.T) { + data := ` {"title":"test"} ` + format, _ := DetectFeedFormat(strings.NewReader(data)) + + if format != FormatJSON { + t.Errorf(`Wrong format detected: %q instead of %q`, format, FormatJSON) + } +}