1
0
Fork 0
mirror of https://github.com/miniflux/v2.git synced 2025-08-11 17:51:01 +00:00
miniflux-v2/internal/reader/parser/format.go
Julien Voisin a43d150a27
refactor(parser): centralize seek logic and provide a hint for the compiler to eliminate a useless bound check
- Move the seeking inside of DetectFeedFormat instead of having it everywhere
  in ParseFeed
- Provide a hint for the compiler to eliminate a useless bound check in
  DetectJSONFormat, otherwise it'll check that buffer[i] is valid on every
  iteration of the loop. This shouldn't make a big difference, but oh well.
2025-08-03 12:53:10 -07:00

96 lines
2.1 KiB
Go

// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
package parser // import "miniflux.app/v2/internal/reader/parser"
import (
"encoding/xml"
"io"
"unicode"
rxml "miniflux.app/v2/internal/reader/xml"
)
// List of feed formats.
const (
FormatRDF = "rdf"
FormatRSS = "rss"
FormatAtom = "atom"
FormatJSON = "json"
FormatUnknown = "unknown"
)
// DetectFeedFormat tries to guess the feed format from input data.
func DetectFeedFormat(r io.ReadSeeker) (string, string) {
r.Seek(0, io.SeekStart)
defer r.Seek(0, io.SeekStart)
if isJSON, err := detectJSONFormat(r); err == nil && isJSON {
return FormatJSON, ""
}
r.Seek(0, io.SeekStart)
decoder := rxml.NewXMLDecoder(r)
for {
token, _ := decoder.Token()
if token == nil {
break
}
if element, ok := token.(xml.StartElement); ok {
switch element.Name.Local {
case "rss":
return FormatRSS, ""
case "feed":
for _, attr := range element.Attr {
if attr.Name.Local == "version" && attr.Value == "0.3" {
return FormatAtom, "0.3"
}
}
return FormatAtom, "1.0"
case "RDF":
return FormatRDF, ""
}
}
}
return FormatUnknown, ""
}
// detectJSONFormat checks if the reader contains JSON by reading until it finds
// the first non-whitespace character or reaches EOF/error.
func detectJSONFormat(r io.ReadSeeker) (bool, error) {
const bufferSize = 32
buffer := make([]byte, bufferSize)
for {
n, err := r.Read(buffer)
if n == 0 {
if err == io.EOF {
return false, nil // No non-whitespace content found
}
return false, err
}
if len(buffer) < n {
panic("unreachable") // bounds check hint to compiler
}
// Check each byte in the buffer
for i := range n {
ch := buffer[i]
// Skip whitespace characters (space, tab, newline, carriage return, etc.)
if unicode.IsSpace(rune(ch)) {
continue
}
// First non-whitespace character determines if it's JSON
return ch == '{', nil
}
// If we've read less than bufferSize, we've reached EOF
if n < bufferSize {
return false, nil
}
}
}