mirror of
https://github.com/miniflux/v2.git
synced 2025-09-15 18:57:04 +00:00
Refactor HTTP Client and LocalizedError packages
This commit is contained in:
parent
120aabfbce
commit
14e25ab9fe
104 changed files with 1277 additions and 10672 deletions
|
@ -6,9 +6,9 @@ package atom // import "miniflux.app/v2/internal/reader/atom"
|
|||
import (
|
||||
"bytes"
|
||||
"encoding/xml"
|
||||
"fmt"
|
||||
"io"
|
||||
|
||||
"miniflux.app/v2/internal/errors"
|
||||
"miniflux.app/v2/internal/model"
|
||||
xml_decoder "miniflux.app/v2/internal/reader/xml"
|
||||
)
|
||||
|
@ -18,7 +18,7 @@ type atomFeed interface {
|
|||
}
|
||||
|
||||
// Parse returns a normalized feed struct from a Atom feed.
|
||||
func Parse(baseURL string, r io.Reader) (*model.Feed, *errors.LocalizedError) {
|
||||
func Parse(baseURL string, r io.Reader) (*model.Feed, error) {
|
||||
var buf bytes.Buffer
|
||||
tee := io.TeeReader(r, &buf)
|
||||
|
||||
|
@ -29,10 +29,8 @@ func Parse(baseURL string, r io.Reader) (*model.Feed, *errors.LocalizedError) {
|
|||
rawFeed = new(atom10Feed)
|
||||
}
|
||||
|
||||
decoder := xml_decoder.NewDecoder(&buf)
|
||||
err := decoder.Decode(rawFeed)
|
||||
if err != nil {
|
||||
return nil, errors.NewLocalizedError("Unable to parse Atom feed: %q", err)
|
||||
if err := xml_decoder.NewDecoder(&buf).Decode(rawFeed); err != nil {
|
||||
return nil, fmt.Errorf("atom: unable to parse feed: %w", err)
|
||||
}
|
||||
|
||||
return rawFeed.Transform(baseURL), nil
|
||||
|
|
|
@ -1,54 +0,0 @@
|
|||
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
package browser // import "miniflux.app/v2/internal/reader/browser"
|
||||
|
||||
import (
|
||||
"miniflux.app/v2/internal/errors"
|
||||
"miniflux.app/v2/internal/http/client"
|
||||
)
|
||||
|
||||
var (
|
||||
errRequestFailed = "Unable to open this link: %v"
|
||||
errServerFailure = "Unable to fetch this resource (Status Code = %d)"
|
||||
errEncoding = "Unable to normalize encoding: %q"
|
||||
errEmptyFeed = "This feed is empty"
|
||||
errResourceNotFound = "Resource not found (404), this feed doesn't exist anymore, check the feed URL"
|
||||
errNotAuthorized = "You are not authorized to access this resource (invalid username/password)"
|
||||
)
|
||||
|
||||
// Exec executes a HTTP request and handles errors.
|
||||
func Exec(request *client.Client) (*client.Response, *errors.LocalizedError) {
|
||||
response, err := request.Get()
|
||||
if err != nil {
|
||||
if e, ok := err.(*errors.LocalizedError); ok {
|
||||
return nil, e
|
||||
}
|
||||
return nil, errors.NewLocalizedError(errRequestFailed, err)
|
||||
}
|
||||
|
||||
if response.IsNotFound() {
|
||||
return nil, errors.NewLocalizedError(errResourceNotFound)
|
||||
}
|
||||
|
||||
if response.IsNotAuthorized() {
|
||||
return nil, errors.NewLocalizedError(errNotAuthorized)
|
||||
}
|
||||
|
||||
if response.HasServerFailure() {
|
||||
return nil, errors.NewLocalizedError(errServerFailure, response.StatusCode)
|
||||
}
|
||||
|
||||
if response.StatusCode != 304 {
|
||||
// Content-Length = -1 when no Content-Length header is sent.
|
||||
if response.ContentLength == 0 {
|
||||
return nil, errors.NewLocalizedError(errEmptyFeed)
|
||||
}
|
||||
|
||||
if err := response.EnsureUnicodeBody(); err != nil {
|
||||
return nil, errors.NewLocalizedError(errEncoding, err)
|
||||
}
|
||||
}
|
||||
|
||||
return response, nil
|
||||
}
|
168
internal/reader/fetcher/request_builder.go
Normal file
168
internal/reader/fetcher/request_builder.go
Normal file
|
@ -0,0 +1,168 @@
|
|||
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
package fetcher // import "miniflux.app/v2/internal/reader/fetcher"
|
||||
|
||||
import (
|
||||
"crypto/tls"
|
||||
"encoding/base64"
|
||||
"log/slog"
|
||||
"net"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"time"
|
||||
)
|
||||
|
||||
const (
|
||||
defaultHTTPClientTimeout = 20
|
||||
defaultHTTPClientMaxBodySize = 15 * 1024 * 1024
|
||||
)
|
||||
|
||||
type RequestBuilder struct {
|
||||
headers http.Header
|
||||
clientProxyURL string
|
||||
useClientProxy bool
|
||||
clientTimeout int
|
||||
withoutRedirects bool
|
||||
ignoreTLSErrors bool
|
||||
}
|
||||
|
||||
func NewRequestBuilder() *RequestBuilder {
|
||||
return &RequestBuilder{
|
||||
headers: make(http.Header),
|
||||
clientTimeout: defaultHTTPClientTimeout,
|
||||
}
|
||||
}
|
||||
|
||||
func (r *RequestBuilder) WithHeader(key, value string) *RequestBuilder {
|
||||
r.headers.Set(key, value)
|
||||
return r
|
||||
}
|
||||
|
||||
func (r *RequestBuilder) WithETag(etag string) *RequestBuilder {
|
||||
if etag != "" {
|
||||
r.headers.Set("If-None-Match", etag)
|
||||
}
|
||||
return r
|
||||
}
|
||||
|
||||
func (r *RequestBuilder) WithLastModified(lastModified string) *RequestBuilder {
|
||||
if lastModified != "" {
|
||||
r.headers.Set("If-Modified-Since", lastModified)
|
||||
}
|
||||
return r
|
||||
}
|
||||
|
||||
func (r *RequestBuilder) WithUserAgent(userAgent string) *RequestBuilder {
|
||||
if userAgent != "" {
|
||||
r.headers.Set("User-Agent", userAgent)
|
||||
} else {
|
||||
r.headers.Del("User-Agent")
|
||||
}
|
||||
return r
|
||||
}
|
||||
|
||||
func (r *RequestBuilder) WithCookie(cookie string) *RequestBuilder {
|
||||
if cookie != "" {
|
||||
r.headers.Set("Cookie", cookie)
|
||||
}
|
||||
return r
|
||||
}
|
||||
|
||||
func (r *RequestBuilder) WithUsernameAndPassword(username, password string) *RequestBuilder {
|
||||
if username != "" && password != "" {
|
||||
r.headers.Set("Authorization", "Basic "+base64.StdEncoding.EncodeToString([]byte(username+":"+password)))
|
||||
}
|
||||
return r
|
||||
}
|
||||
|
||||
func (r *RequestBuilder) WithProxy(proxyURL string) *RequestBuilder {
|
||||
r.clientProxyURL = proxyURL
|
||||
return r
|
||||
}
|
||||
|
||||
func (r *RequestBuilder) UseProxy(value bool) *RequestBuilder {
|
||||
r.useClientProxy = value
|
||||
return r
|
||||
}
|
||||
|
||||
func (r *RequestBuilder) WithTimeout(timeout int) *RequestBuilder {
|
||||
r.clientTimeout = timeout
|
||||
return r
|
||||
}
|
||||
|
||||
func (r *RequestBuilder) WithoutRedirects() *RequestBuilder {
|
||||
r.withoutRedirects = true
|
||||
return r
|
||||
}
|
||||
|
||||
func (r *RequestBuilder) IgnoreTLSErrors(value bool) *RequestBuilder {
|
||||
r.ignoreTLSErrors = value
|
||||
return r
|
||||
}
|
||||
|
||||
func (r *RequestBuilder) ExecuteRequest(requestURL string) (*http.Response, error) {
|
||||
transport := &http.Transport{
|
||||
Proxy: http.ProxyFromEnvironment,
|
||||
DialContext: (&net.Dialer{
|
||||
// Default is 30s.
|
||||
Timeout: 10 * time.Second,
|
||||
|
||||
// Default is 30s.
|
||||
KeepAlive: 15 * time.Second,
|
||||
}).DialContext,
|
||||
|
||||
// Default is 100.
|
||||
MaxIdleConns: 50,
|
||||
|
||||
// Default is 90s.
|
||||
IdleConnTimeout: 10 * time.Second,
|
||||
|
||||
TLSClientConfig: &tls.Config{
|
||||
InsecureSkipVerify: r.ignoreTLSErrors,
|
||||
},
|
||||
}
|
||||
|
||||
if r.useClientProxy && r.clientProxyURL != "" {
|
||||
if proxyURL, err := url.Parse(r.clientProxyURL); err != nil {
|
||||
slog.Warn("Unable to parse proxy URL",
|
||||
slog.String("proxy_url", r.clientProxyURL),
|
||||
slog.Any("error", err),
|
||||
)
|
||||
} else {
|
||||
transport.Proxy = http.ProxyURL(proxyURL)
|
||||
}
|
||||
}
|
||||
|
||||
client := &http.Client{
|
||||
Timeout: time.Duration(r.clientTimeout) * time.Second,
|
||||
}
|
||||
|
||||
if r.withoutRedirects {
|
||||
client.CheckRedirect = func(req *http.Request, via []*http.Request) error {
|
||||
return http.ErrUseLastResponse
|
||||
}
|
||||
}
|
||||
|
||||
client.Transport = transport
|
||||
|
||||
req, err := http.NewRequest("GET", requestURL, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
req.Header = r.headers
|
||||
req.Header.Set("Accept", "*/*")
|
||||
req.Header.Set("Connection", "close")
|
||||
|
||||
slog.Debug("Making outgoing request", slog.Group("request",
|
||||
slog.String("method", req.Method),
|
||||
slog.String("url", req.URL.String()),
|
||||
slog.Any("headers", req.Header),
|
||||
slog.Bool("without_redirects", r.withoutRedirects),
|
||||
slog.Bool("with_proxy", r.useClientProxy),
|
||||
slog.String("proxy_url", r.clientProxyURL),
|
||||
))
|
||||
|
||||
return client.Do(req)
|
||||
}
|
147
internal/reader/fetcher/response_handler.go
Normal file
147
internal/reader/fetcher/response_handler.go
Normal file
|
@ -0,0 +1,147 @@
|
|||
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
package fetcher // import "miniflux.app/v2/internal/reader/fetcher"
|
||||
|
||||
import (
|
||||
"crypto/x509"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net"
|
||||
"net/http"
|
||||
|
||||
"miniflux.app/v2/internal/locale"
|
||||
)
|
||||
|
||||
type ResponseHandler struct {
|
||||
httpResponse *http.Response
|
||||
clientErr error
|
||||
}
|
||||
|
||||
func NewResponseHandler(httpResponse *http.Response, clientErr error) *ResponseHandler {
|
||||
return &ResponseHandler{httpResponse: httpResponse, clientErr: clientErr}
|
||||
}
|
||||
|
||||
func (r *ResponseHandler) EffectiveURL() string {
|
||||
return r.httpResponse.Request.URL.String()
|
||||
}
|
||||
|
||||
func (r *ResponseHandler) ContentType() string {
|
||||
return r.httpResponse.Header.Get("Content-Type")
|
||||
}
|
||||
|
||||
func (r *ResponseHandler) LastModified() string {
|
||||
// Ignore caching headers for feeds that do not want any cache.
|
||||
if r.httpResponse.Header.Get("Expires") == "0" {
|
||||
return ""
|
||||
}
|
||||
return r.httpResponse.Header.Get("Last-Modified")
|
||||
}
|
||||
|
||||
func (r *ResponseHandler) ETag() string {
|
||||
// Ignore caching headers for feeds that do not want any cache.
|
||||
if r.httpResponse.Header.Get("Expires") == "0" {
|
||||
return ""
|
||||
}
|
||||
return r.httpResponse.Header.Get("ETag")
|
||||
}
|
||||
|
||||
func (r *ResponseHandler) IsModified(lastEtagValue, lastModifiedValue string) bool {
|
||||
if r.httpResponse.StatusCode == http.StatusNotModified {
|
||||
return false
|
||||
}
|
||||
|
||||
if r.ETag() != "" && r.ETag() == lastEtagValue {
|
||||
return false
|
||||
}
|
||||
|
||||
if r.LastModified() != "" && r.LastModified() == lastModifiedValue {
|
||||
return false
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
func (r *ResponseHandler) Close() {
|
||||
if r.httpResponse != nil && r.httpResponse.Body != nil && r.clientErr == nil {
|
||||
r.httpResponse.Body.Close()
|
||||
}
|
||||
}
|
||||
|
||||
func (r *ResponseHandler) Body(maxBodySize int64) io.ReadCloser {
|
||||
return http.MaxBytesReader(nil, r.httpResponse.Body, maxBodySize)
|
||||
}
|
||||
|
||||
func (r *ResponseHandler) ReadBody(maxBodySize int64) ([]byte, *locale.LocalizedErrorWrapper) {
|
||||
limitedReader := http.MaxBytesReader(nil, r.httpResponse.Body, maxBodySize)
|
||||
|
||||
buffer, err := io.ReadAll(limitedReader)
|
||||
if err != nil && err != io.EOF {
|
||||
if err == io.ErrUnexpectedEOF {
|
||||
return nil, locale.NewLocalizedErrorWrapper(fmt.Errorf("fetcher: response body too large: %w", err), "error.http_response_too_large")
|
||||
}
|
||||
|
||||
return nil, locale.NewLocalizedErrorWrapper(fmt.Errorf("fetcher: unable to read response body: %w", err), "error.http_body_read", err)
|
||||
}
|
||||
|
||||
if len(buffer) == 0 {
|
||||
return nil, locale.NewLocalizedErrorWrapper(fmt.Errorf("fetcher: empty response body"), "error.http_empty_response_body")
|
||||
}
|
||||
|
||||
return buffer, nil
|
||||
}
|
||||
|
||||
func (r *ResponseHandler) LocalizedError() *locale.LocalizedErrorWrapper {
|
||||
if r.clientErr != nil {
|
||||
switch r.clientErr.(type) {
|
||||
case x509.CertificateInvalidError, x509.UnknownAuthorityError, x509.HostnameError:
|
||||
return locale.NewLocalizedErrorWrapper(fmt.Errorf("fetcher: %w", r.clientErr), "error.tls_error", r.clientErr.Error())
|
||||
case *net.OpError:
|
||||
return locale.NewLocalizedErrorWrapper(fmt.Errorf("fetcher: %w", r.clientErr), "error.network_operation", r.clientErr.Error())
|
||||
case net.Error:
|
||||
networkErr := r.clientErr.(net.Error)
|
||||
if networkErr.Timeout() {
|
||||
return locale.NewLocalizedErrorWrapper(fmt.Errorf("fetcher: %w", r.clientErr), "error.network_timeout", r.clientErr.Error())
|
||||
}
|
||||
}
|
||||
|
||||
if errors.Is(r.clientErr, io.EOF) {
|
||||
return locale.NewLocalizedErrorWrapper(fmt.Errorf("fetcher: %w", r.clientErr), "error.http_empty_response")
|
||||
}
|
||||
|
||||
return locale.NewLocalizedErrorWrapper(fmt.Errorf("fetcher: %w", r.clientErr), "error.http_client_error", r.clientErr.Error())
|
||||
}
|
||||
|
||||
switch r.httpResponse.StatusCode {
|
||||
case http.StatusUnauthorized:
|
||||
return locale.NewLocalizedErrorWrapper(fmt.Errorf("fetcher: access unauthorized (401 status code)"), "error.http_not_authorized")
|
||||
case http.StatusForbidden:
|
||||
return locale.NewLocalizedErrorWrapper(fmt.Errorf("fetcher: access forbidden (403 status code)"), "error.http_forbidden")
|
||||
case http.StatusTooManyRequests:
|
||||
return locale.NewLocalizedErrorWrapper(fmt.Errorf("fetcher: too many requests (429 status code)"), "error.http_too_many_requests")
|
||||
case http.StatusNotFound, http.StatusGone:
|
||||
return locale.NewLocalizedErrorWrapper(fmt.Errorf("fetcher: resource not found (%d status code)", r.httpResponse.StatusCode), "error.http_resource_not_found")
|
||||
case http.StatusInternalServerError:
|
||||
return locale.NewLocalizedErrorWrapper(fmt.Errorf("fetcher: remote server error (%d status code)", r.httpResponse.StatusCode), "error.http_internal_server_error")
|
||||
case http.StatusBadGateway:
|
||||
return locale.NewLocalizedErrorWrapper(fmt.Errorf("fetcher: bad gateway (%d status code)", r.httpResponse.StatusCode), "error.http_bad_gateway")
|
||||
case http.StatusServiceUnavailable:
|
||||
return locale.NewLocalizedErrorWrapper(fmt.Errorf("fetcher: service unavailable (%d status code)", r.httpResponse.StatusCode), "error.http_service_unavailable")
|
||||
case http.StatusGatewayTimeout:
|
||||
return locale.NewLocalizedErrorWrapper(fmt.Errorf("fetcher: gateway timeout (%d status code)", r.httpResponse.StatusCode), "error.http_gateway_timeout")
|
||||
}
|
||||
|
||||
if r.httpResponse.StatusCode >= 400 {
|
||||
return locale.NewLocalizedErrorWrapper(fmt.Errorf("fetcher: unexpected status code (%d status code)", r.httpResponse.StatusCode), "error.http_unexpected_status_code", r.httpResponse.StatusCode)
|
||||
}
|
||||
|
||||
if r.httpResponse.StatusCode != 304 {
|
||||
// Content-Length = -1 when no Content-Length header is sent.
|
||||
if r.httpResponse.ContentLength == 0 {
|
||||
return locale.NewLocalizedErrorWrapper(fmt.Errorf("fetcher: empty response body"), "error.http_empty_response_body")
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
|
@ -4,16 +4,15 @@
|
|||
package handler // import "miniflux.app/v2/internal/reader/handler"
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"log/slog"
|
||||
"time"
|
||||
|
||||
"miniflux.app/v2/internal/config"
|
||||
"miniflux.app/v2/internal/errors"
|
||||
"miniflux.app/v2/internal/http/client"
|
||||
"miniflux.app/v2/internal/integration"
|
||||
"miniflux.app/v2/internal/locale"
|
||||
"miniflux.app/v2/internal/model"
|
||||
"miniflux.app/v2/internal/reader/browser"
|
||||
"miniflux.app/v2/internal/reader/fetcher"
|
||||
"miniflux.app/v2/internal/reader/icon"
|
||||
"miniflux.app/v2/internal/reader/parser"
|
||||
"miniflux.app/v2/internal/reader/processor"
|
||||
|
@ -21,13 +20,13 @@ import (
|
|||
)
|
||||
|
||||
var (
|
||||
errDuplicate = "This feed already exists (%s)"
|
||||
errNotFound = "Feed %d not found"
|
||||
errCategoryNotFound = "Category not found for this user"
|
||||
ErrCategoryNotFound = errors.New("fetcher: category not found")
|
||||
ErrFeedNotFound = errors.New("fetcher: feed not found")
|
||||
ErrDuplicatedFeed = errors.New("fetcher: duplicated feed")
|
||||
)
|
||||
|
||||
// CreateFeed fetch, parse and store a new feed.
|
||||
func CreateFeed(store *storage.Storage, userID int64, feedCreationRequest *model.FeedCreationRequest) (*model.Feed, error) {
|
||||
func CreateFeed(store *storage.Storage, userID int64, feedCreationRequest *model.FeedCreationRequest) (*model.Feed, *locale.LocalizedErrorWrapper) {
|
||||
slog.Debug("Begin feed creation process",
|
||||
slog.Int64("user_id", userID),
|
||||
slog.String("feed_url", feedCreationRequest.FeedURL),
|
||||
|
@ -35,35 +34,43 @@ func CreateFeed(store *storage.Storage, userID int64, feedCreationRequest *model
|
|||
|
||||
user, storeErr := store.UserByID(userID)
|
||||
if storeErr != nil {
|
||||
return nil, storeErr
|
||||
return nil, locale.NewLocalizedErrorWrapper(storeErr, "error.database_error", storeErr)
|
||||
}
|
||||
|
||||
if !store.CategoryIDExists(userID, feedCreationRequest.CategoryID) {
|
||||
return nil, errors.NewLocalizedError(errCategoryNotFound)
|
||||
return nil, locale.NewLocalizedErrorWrapper(ErrCategoryNotFound, "error.category_not_found")
|
||||
}
|
||||
|
||||
request := client.NewClientWithConfig(feedCreationRequest.FeedURL, config.Opts)
|
||||
request.WithCredentials(feedCreationRequest.Username, feedCreationRequest.Password)
|
||||
request.WithUserAgent(feedCreationRequest.UserAgent)
|
||||
request.WithCookie(feedCreationRequest.Cookie)
|
||||
request.AllowSelfSignedCertificates = feedCreationRequest.AllowSelfSignedCertificates
|
||||
requestBuilder := fetcher.NewRequestBuilder()
|
||||
requestBuilder.WithUsernameAndPassword(feedCreationRequest.Username, feedCreationRequest.Password)
|
||||
requestBuilder.WithUserAgent(feedCreationRequest.UserAgent)
|
||||
requestBuilder.WithCookie(feedCreationRequest.Cookie)
|
||||
requestBuilder.WithTimeout(config.Opts.HTTPClientTimeout())
|
||||
requestBuilder.WithProxy(config.Opts.HTTPClientProxy())
|
||||
requestBuilder.UseProxy(feedCreationRequest.FetchViaProxy)
|
||||
requestBuilder.IgnoreTLSErrors(feedCreationRequest.AllowSelfSignedCertificates)
|
||||
|
||||
if feedCreationRequest.FetchViaProxy {
|
||||
request.WithProxy()
|
||||
responseHandler := fetcher.NewResponseHandler(requestBuilder.ExecuteRequest(feedCreationRequest.FeedURL))
|
||||
defer responseHandler.Close()
|
||||
|
||||
if localizedError := responseHandler.LocalizedError(); localizedError != nil {
|
||||
slog.Warn("Unable to fetch feed", slog.String("feed_url", feedCreationRequest.FeedURL), slog.Any("error", localizedError.Error()))
|
||||
return nil, localizedError
|
||||
}
|
||||
|
||||
response, requestErr := browser.Exec(request)
|
||||
if requestErr != nil {
|
||||
return nil, requestErr
|
||||
responseBody, localizedError := responseHandler.ReadBody(config.Opts.HTTPClientMaxBodySize())
|
||||
if localizedError != nil {
|
||||
slog.Warn("Unable to fetch feed", slog.String("feed_url", feedCreationRequest.FeedURL), slog.Any("error", localizedError.Error()))
|
||||
return nil, localizedError
|
||||
}
|
||||
|
||||
if store.FeedURLExists(userID, response.EffectiveURL) {
|
||||
return nil, errors.NewLocalizedError(errDuplicate, response.EffectiveURL)
|
||||
if store.FeedURLExists(userID, responseHandler.EffectiveURL()) {
|
||||
return nil, locale.NewLocalizedErrorWrapper(ErrDuplicatedFeed, "error.duplicated_feed")
|
||||
}
|
||||
|
||||
subscription, parseErr := parser.ParseFeed(response.EffectiveURL, response.BodyAsString())
|
||||
subscription, parseErr := parser.ParseFeed(responseHandler.EffectiveURL(), string(responseBody))
|
||||
if parseErr != nil {
|
||||
return nil, parseErr
|
||||
return nil, locale.NewLocalizedErrorWrapper(parseErr, "error.unable_to_parse_feed", parseErr)
|
||||
}
|
||||
|
||||
subscription.UserID = userID
|
||||
|
@ -81,14 +88,16 @@ func CreateFeed(store *storage.Storage, userID int64, feedCreationRequest *model
|
|||
subscription.BlocklistRules = feedCreationRequest.BlocklistRules
|
||||
subscription.KeeplistRules = feedCreationRequest.KeeplistRules
|
||||
subscription.UrlRewriteRules = feedCreationRequest.UrlRewriteRules
|
||||
subscription.EtagHeader = responseHandler.ETag()
|
||||
subscription.LastModifiedHeader = responseHandler.LastModified()
|
||||
subscription.FeedURL = responseHandler.EffectiveURL()
|
||||
subscription.WithCategoryID(feedCreationRequest.CategoryID)
|
||||
subscription.WithClientResponse(response)
|
||||
subscription.CheckedNow()
|
||||
|
||||
processor.ProcessFeedEntries(store, subscription, user, true)
|
||||
|
||||
if storeErr := store.CreateFeed(subscription); storeErr != nil {
|
||||
return nil, storeErr
|
||||
return nil, locale.NewLocalizedErrorWrapper(storeErr, "error.database_error", storeErr)
|
||||
}
|
||||
|
||||
slog.Debug("Created feed",
|
||||
|
@ -99,18 +108,16 @@ func CreateFeed(store *storage.Storage, userID int64, feedCreationRequest *model
|
|||
|
||||
checkFeedIcon(
|
||||
store,
|
||||
requestBuilder,
|
||||
subscription.ID,
|
||||
subscription.SiteURL,
|
||||
subscription.IconURL,
|
||||
feedCreationRequest.UserAgent,
|
||||
feedCreationRequest.FetchViaProxy,
|
||||
feedCreationRequest.AllowSelfSignedCertificates,
|
||||
)
|
||||
return subscription, nil
|
||||
}
|
||||
|
||||
// RefreshFeed refreshes a feed.
|
||||
func RefreshFeed(store *storage.Storage, userID, feedID int64, forceRefresh bool) error {
|
||||
func RefreshFeed(store *storage.Storage, userID, feedID int64, forceRefresh bool) *locale.LocalizedErrorWrapper {
|
||||
slog.Debug("Begin feed refresh process",
|
||||
slog.Int64("user_id", userID),
|
||||
slog.Int64("feed_id", feedID),
|
||||
|
@ -119,18 +126,16 @@ func RefreshFeed(store *storage.Storage, userID, feedID int64, forceRefresh bool
|
|||
|
||||
user, storeErr := store.UserByID(userID)
|
||||
if storeErr != nil {
|
||||
return storeErr
|
||||
return locale.NewLocalizedErrorWrapper(storeErr, "error.database_error", storeErr)
|
||||
}
|
||||
|
||||
printer := locale.NewPrinter(user.Language)
|
||||
|
||||
originalFeed, storeErr := store.FeedByID(userID, feedID)
|
||||
if storeErr != nil {
|
||||
return storeErr
|
||||
return locale.NewLocalizedErrorWrapper(storeErr, "error.database_error", storeErr)
|
||||
}
|
||||
|
||||
if originalFeed == nil {
|
||||
return errors.NewLocalizedError(errNotFound, feedID)
|
||||
return locale.NewLocalizedErrorWrapper(ErrFeedNotFound, "error.feed_not_found")
|
||||
}
|
||||
|
||||
weeklyEntryCount := 0
|
||||
|
@ -138,52 +143,62 @@ func RefreshFeed(store *storage.Storage, userID, feedID int64, forceRefresh bool
|
|||
var weeklyCountErr error
|
||||
weeklyEntryCount, weeklyCountErr = store.WeeklyFeedEntryCount(userID, feedID)
|
||||
if weeklyCountErr != nil {
|
||||
return weeklyCountErr
|
||||
return locale.NewLocalizedErrorWrapper(weeklyCountErr, "error.database_error", weeklyCountErr)
|
||||
}
|
||||
}
|
||||
|
||||
originalFeed.CheckedNow()
|
||||
originalFeed.ScheduleNextCheck(weeklyEntryCount)
|
||||
|
||||
request := client.NewClientWithConfig(originalFeed.FeedURL, config.Opts)
|
||||
request.WithCredentials(originalFeed.Username, originalFeed.Password)
|
||||
request.WithUserAgent(originalFeed.UserAgent)
|
||||
request.WithCookie(originalFeed.Cookie)
|
||||
request.AllowSelfSignedCertificates = originalFeed.AllowSelfSignedCertificates
|
||||
requestBuilder := fetcher.NewRequestBuilder()
|
||||
requestBuilder.WithUsernameAndPassword(originalFeed.Username, originalFeed.Password)
|
||||
requestBuilder.WithUserAgent(originalFeed.UserAgent)
|
||||
requestBuilder.WithCookie(originalFeed.Cookie)
|
||||
requestBuilder.WithTimeout(config.Opts.HTTPClientTimeout())
|
||||
requestBuilder.WithProxy(config.Opts.HTTPClientProxy())
|
||||
requestBuilder.UseProxy(originalFeed.FetchViaProxy)
|
||||
requestBuilder.IgnoreTLSErrors(originalFeed.AllowSelfSignedCertificates)
|
||||
|
||||
if !originalFeed.IgnoreHTTPCache {
|
||||
request.WithCacheHeaders(originalFeed.EtagHeader, originalFeed.LastModifiedHeader)
|
||||
}
|
||||
responseHandler := fetcher.NewResponseHandler(requestBuilder.ExecuteRequest(originalFeed.FeedURL))
|
||||
defer responseHandler.Close()
|
||||
|
||||
if originalFeed.FetchViaProxy {
|
||||
request.WithProxy()
|
||||
}
|
||||
|
||||
response, requestErr := browser.Exec(request)
|
||||
if requestErr != nil {
|
||||
originalFeed.WithError(requestErr.Localize(printer))
|
||||
if localizedError := responseHandler.LocalizedError(); localizedError != nil {
|
||||
slog.Warn("Unable to fetch feed", slog.String("feed_url", originalFeed.FeedURL), slog.Any("error", localizedError.Error()))
|
||||
originalFeed.WithTranslatedErrorMessage(localizedError.Translate(user.Language))
|
||||
store.UpdateFeedError(originalFeed)
|
||||
return requestErr
|
||||
return localizedError
|
||||
}
|
||||
|
||||
if store.AnotherFeedURLExists(userID, originalFeed.ID, response.EffectiveURL) {
|
||||
storeErr := errors.NewLocalizedError(errDuplicate, response.EffectiveURL)
|
||||
originalFeed.WithError(storeErr.Error())
|
||||
if store.AnotherFeedURLExists(userID, originalFeed.ID, responseHandler.EffectiveURL()) {
|
||||
localizedError := locale.NewLocalizedErrorWrapper(ErrDuplicatedFeed, "error.duplicated_feed")
|
||||
originalFeed.WithTranslatedErrorMessage(localizedError.Translate(user.Language))
|
||||
store.UpdateFeedError(originalFeed)
|
||||
return storeErr
|
||||
return localizedError
|
||||
}
|
||||
|
||||
if originalFeed.IgnoreHTTPCache || response.IsModified(originalFeed.EtagHeader, originalFeed.LastModifiedHeader) {
|
||||
if originalFeed.IgnoreHTTPCache || responseHandler.IsModified(originalFeed.EtagHeader, originalFeed.LastModifiedHeader) {
|
||||
slog.Debug("Feed modified",
|
||||
slog.Int64("user_id", userID),
|
||||
slog.Int64("feed_id", feedID),
|
||||
)
|
||||
|
||||
updatedFeed, parseErr := parser.ParseFeed(response.EffectiveURL, response.BodyAsString())
|
||||
responseBody, localizedError := responseHandler.ReadBody(config.Opts.HTTPClientMaxBodySize())
|
||||
if localizedError != nil {
|
||||
slog.Warn("Unable to fetch feed", slog.String("feed_url", originalFeed.FeedURL), slog.Any("error", localizedError.Error()))
|
||||
return localizedError
|
||||
}
|
||||
|
||||
updatedFeed, parseErr := parser.ParseFeed(responseHandler.EffectiveURL(), string(responseBody))
|
||||
if parseErr != nil {
|
||||
originalFeed.WithError(parseErr.Localize(printer))
|
||||
localizedError := locale.NewLocalizedErrorWrapper(parseErr, "error.unable_to_parse_feed")
|
||||
|
||||
if errors.Is(parseErr, parser.ErrFeedFormatNotDetected) {
|
||||
localizedError = locale.NewLocalizedErrorWrapper(parseErr, "error.feed_format_not_detected", parseErr)
|
||||
}
|
||||
|
||||
originalFeed.WithTranslatedErrorMessage(localizedError.Translate(user.Language))
|
||||
store.UpdateFeedError(originalFeed)
|
||||
return parseErr
|
||||
return localizedError
|
||||
}
|
||||
|
||||
// If the feed has a TTL defined, we use it to make sure we don't check it too often.
|
||||
|
@ -215,9 +230,10 @@ func RefreshFeed(store *storage.Storage, userID, feedID int64, forceRefresh bool
|
|||
updateExistingEntries := forceRefresh || !originalFeed.Crawler
|
||||
newEntries, storeErr := store.RefreshFeedEntries(originalFeed.UserID, originalFeed.ID, originalFeed.Entries, updateExistingEntries)
|
||||
if storeErr != nil {
|
||||
originalFeed.WithError(storeErr.Error())
|
||||
localizedError := locale.NewLocalizedErrorWrapper(storeErr, "error.database_error", storeErr)
|
||||
originalFeed.WithTranslatedErrorMessage(localizedError.Translate(user.Language))
|
||||
store.UpdateFeedError(originalFeed)
|
||||
return storeErr
|
||||
return localizedError
|
||||
}
|
||||
|
||||
userIntegrations, intErr := store.Integration(userID)
|
||||
|
@ -233,16 +249,15 @@ func RefreshFeed(store *storage.Storage, userID, feedID int64, forceRefresh bool
|
|||
|
||||
// We update caching headers only if the feed has been modified,
|
||||
// because some websites don't return the same headers when replying with a 304.
|
||||
originalFeed.WithClientResponse(response)
|
||||
originalFeed.EtagHeader = responseHandler.ETag()
|
||||
originalFeed.LastModifiedHeader = responseHandler.LastModified()
|
||||
|
||||
checkFeedIcon(
|
||||
store,
|
||||
requestBuilder,
|
||||
originalFeed.ID,
|
||||
originalFeed.SiteURL,
|
||||
updatedFeed.IconURL,
|
||||
originalFeed.UserAgent,
|
||||
originalFeed.FetchViaProxy,
|
||||
originalFeed.AllowSelfSignedCertificates,
|
||||
)
|
||||
} else {
|
||||
slog.Debug("Feed not modified",
|
||||
|
@ -254,17 +269,18 @@ func RefreshFeed(store *storage.Storage, userID, feedID int64, forceRefresh bool
|
|||
originalFeed.ResetErrorCounter()
|
||||
|
||||
if storeErr := store.UpdateFeed(originalFeed); storeErr != nil {
|
||||
originalFeed.WithError(storeErr.Error())
|
||||
localizedError := locale.NewLocalizedErrorWrapper(storeErr, "error.database_error", storeErr)
|
||||
originalFeed.WithTranslatedErrorMessage(localizedError.Translate(user.Language))
|
||||
store.UpdateFeedError(originalFeed)
|
||||
return storeErr
|
||||
return localizedError
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func checkFeedIcon(store *storage.Storage, feedID int64, websiteURL, feedIconURL, userAgent string, fetchViaProxy, allowSelfSignedCertificates bool) {
|
||||
func checkFeedIcon(store *storage.Storage, requestBuilder *fetcher.RequestBuilder, feedID int64, websiteURL, feedIconURL string) {
|
||||
if !store.HasIcon(feedID) {
|
||||
iconFinder := icon.NewIconFinder(websiteURL, feedIconURL, userAgent, fetchViaProxy, allowSelfSignedCertificates)
|
||||
iconFinder := icon.NewIconFinder(requestBuilder, websiteURL, feedIconURL)
|
||||
if icon, err := iconFinder.FindIcon(); err != nil {
|
||||
slog.Debug("Unable to find feed icon",
|
||||
slog.Int64("feed_id", feedID),
|
||||
|
|
|
@ -13,28 +13,24 @@ import (
|
|||
|
||||
"miniflux.app/v2/internal/config"
|
||||
"miniflux.app/v2/internal/crypto"
|
||||
"miniflux.app/v2/internal/http/client"
|
||||
"miniflux.app/v2/internal/model"
|
||||
"miniflux.app/v2/internal/reader/fetcher"
|
||||
"miniflux.app/v2/internal/urllib"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
)
|
||||
|
||||
type IconFinder struct {
|
||||
websiteURL string
|
||||
feedIconURL string
|
||||
userAgent string
|
||||
fetchViaProxy bool
|
||||
allowSelfSignedCertificates bool
|
||||
requestBuilder *fetcher.RequestBuilder
|
||||
websiteURL string
|
||||
feedIconURL string
|
||||
}
|
||||
|
||||
func NewIconFinder(websiteURL, feedIconURL, userAgent string, fetchViaProxy, allowSelfSignedCertificates bool) *IconFinder {
|
||||
func NewIconFinder(requestBuilder *fetcher.RequestBuilder, websiteURL, feedIconURL string) *IconFinder {
|
||||
return &IconFinder{
|
||||
websiteURL: websiteURL,
|
||||
feedIconURL: feedIconURL,
|
||||
userAgent: userAgent,
|
||||
fetchViaProxy: fetchViaProxy,
|
||||
allowSelfSignedCertificates: allowSelfSignedCertificates,
|
||||
requestBuilder: requestBuilder,
|
||||
websiteURL: websiteURL,
|
||||
feedIconURL: feedIconURL,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -105,12 +101,16 @@ func (f *IconFinder) FetchIconsFromHTMLDocument() (*model.Icon, error) {
|
|||
slog.String("website_url", f.websiteURL),
|
||||
)
|
||||
|
||||
documentBody, err := f.FetchRootDocument()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
rootURL := urllib.RootURL(f.websiteURL)
|
||||
|
||||
responseHandler := fetcher.NewResponseHandler(f.requestBuilder.ExecuteRequest(rootURL))
|
||||
defer responseHandler.Close()
|
||||
|
||||
if localizedError := responseHandler.LocalizedError(); localizedError != nil {
|
||||
return nil, fmt.Errorf("icon: unable to download website index page: %w", localizedError.Error())
|
||||
}
|
||||
|
||||
iconURLs, err := findIconURLsFromHTMLDocument(documentBody)
|
||||
iconURLs, err := findIconURLsFromHTMLDocument(responseHandler.Body(config.Opts.HTTPClientMaxBodySize()))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
@ -151,64 +151,28 @@ func (f *IconFinder) FetchIconsFromHTMLDocument() (*model.Icon, error) {
|
|||
return nil, nil
|
||||
}
|
||||
|
||||
func (f *IconFinder) FetchRootDocument() (io.Reader, error) {
|
||||
rootURL := urllib.RootURL(f.websiteURL)
|
||||
|
||||
clt := client.NewClientWithConfig(rootURL, config.Opts)
|
||||
clt.WithUserAgent(f.userAgent)
|
||||
clt.AllowSelfSignedCertificates = f.allowSelfSignedCertificates
|
||||
|
||||
if f.fetchViaProxy {
|
||||
clt.WithProxy()
|
||||
}
|
||||
|
||||
response, err := clt.Get()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("icon: unable to download website index page: %v", err)
|
||||
}
|
||||
|
||||
if response.HasServerFailure() {
|
||||
return nil, fmt.Errorf("icon: unable to download website index page: status=%d", response.StatusCode)
|
||||
}
|
||||
|
||||
return response.Body, nil
|
||||
}
|
||||
|
||||
func (f *IconFinder) DownloadIcon(iconURL string) (*model.Icon, error) {
|
||||
slog.Debug("Downloading icon",
|
||||
slog.String("website_url", f.websiteURL),
|
||||
slog.String("icon_url", iconURL),
|
||||
)
|
||||
|
||||
clt := client.NewClientWithConfig(iconURL, config.Opts)
|
||||
clt.WithUserAgent(f.userAgent)
|
||||
clt.AllowSelfSignedCertificates = f.allowSelfSignedCertificates
|
||||
if f.fetchViaProxy {
|
||||
clt.WithProxy()
|
||||
responseHandler := fetcher.NewResponseHandler(f.requestBuilder.ExecuteRequest(iconURL))
|
||||
defer responseHandler.Close()
|
||||
|
||||
if localizedError := responseHandler.LocalizedError(); localizedError != nil {
|
||||
return nil, fmt.Errorf("icon: unable to download website icon: %w", localizedError.Error())
|
||||
}
|
||||
|
||||
response, err := clt.Get()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("icon: unable to download icon %s: %v", iconURL, err)
|
||||
}
|
||||
|
||||
if response.HasServerFailure() {
|
||||
return nil, fmt.Errorf("icon: unable to download icon %s: status=%d", iconURL, response.StatusCode)
|
||||
}
|
||||
|
||||
body, err := io.ReadAll(response.Body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("icon: unable to read downloaded icon from %s: %v", iconURL, err)
|
||||
}
|
||||
|
||||
if len(body) == 0 {
|
||||
return nil, fmt.Errorf("icon: downloaded icon is empty, iconURL=%s", iconURL)
|
||||
responseBody, localizedError := responseHandler.ReadBody(config.Opts.HTTPClientMaxBodySize())
|
||||
if localizedError != nil {
|
||||
return nil, fmt.Errorf("icon: unable to read response body: %w", localizedError.Error())
|
||||
}
|
||||
|
||||
icon := &model.Icon{
|
||||
Hash: crypto.HashFromBytes(body),
|
||||
MimeType: response.ContentType,
|
||||
Content: body,
|
||||
Hash: crypto.HashFromBytes(responseBody),
|
||||
MimeType: responseHandler.ContentType(),
|
||||
Content: responseBody,
|
||||
}
|
||||
|
||||
return icon, nil
|
||||
|
|
|
@ -5,18 +5,17 @@ package json // import "miniflux.app/v2/internal/reader/json"
|
|||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
|
||||
"miniflux.app/v2/internal/errors"
|
||||
"miniflux.app/v2/internal/model"
|
||||
)
|
||||
|
||||
// Parse returns a normalized feed struct from a JSON feed.
|
||||
func Parse(baseURL string, data io.Reader) (*model.Feed, *errors.LocalizedError) {
|
||||
func Parse(baseURL string, data io.Reader) (*model.Feed, error) {
|
||||
feed := new(jsonFeed)
|
||||
decoder := json.NewDecoder(data)
|
||||
if err := decoder.Decode(&feed); err != nil {
|
||||
return nil, errors.NewLocalizedError("Unable to parse JSON Feed: %q", err)
|
||||
if err := json.NewDecoder(data).Decode(&feed); err != nil {
|
||||
return nil, fmt.Errorf("json: unable to parse feed: %w", err)
|
||||
}
|
||||
|
||||
return feed.Transform(baseURL), nil
|
||||
|
|
|
@ -5,14 +5,14 @@ package opml // import "miniflux.app/v2/internal/reader/opml"
|
|||
|
||||
import (
|
||||
"encoding/xml"
|
||||
"fmt"
|
||||
"io"
|
||||
|
||||
"miniflux.app/v2/internal/errors"
|
||||
"miniflux.app/v2/internal/reader/encoding"
|
||||
)
|
||||
|
||||
// Parse reads an OPML file and returns a SubcriptionList.
|
||||
func Parse(data io.Reader) (SubcriptionList, *errors.LocalizedError) {
|
||||
func Parse(data io.Reader) (SubcriptionList, error) {
|
||||
opmlDocument := NewOPMLDocument()
|
||||
decoder := xml.NewDecoder(data)
|
||||
decoder.Entity = xml.HTMLEntity
|
||||
|
@ -21,7 +21,7 @@ func Parse(data io.Reader) (SubcriptionList, *errors.LocalizedError) {
|
|||
|
||||
err := decoder.Decode(opmlDocument)
|
||||
if err != nil {
|
||||
return nil, errors.NewLocalizedError("Unable to parse OPML file: %q", err)
|
||||
return nil, fmt.Errorf("opml: unable to parse document: %w", err)
|
||||
}
|
||||
|
||||
return getSubscriptionsFromOutlines(opmlDocument.Outlines, ""), nil
|
||||
|
|
|
@ -4,9 +4,9 @@
|
|||
package parser // import "miniflux.app/v2/internal/reader/parser"
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"strings"
|
||||
|
||||
"miniflux.app/v2/internal/errors"
|
||||
"miniflux.app/v2/internal/model"
|
||||
"miniflux.app/v2/internal/reader/atom"
|
||||
"miniflux.app/v2/internal/reader/json"
|
||||
|
@ -14,8 +14,10 @@ import (
|
|||
"miniflux.app/v2/internal/reader/rss"
|
||||
)
|
||||
|
||||
var ErrFeedFormatNotDetected = errors.New("parser: unable to detect feed format")
|
||||
|
||||
// ParseFeed analyzes the input data and returns a normalized feed object.
|
||||
func ParseFeed(baseURL, data string) (*model.Feed, *errors.LocalizedError) {
|
||||
func ParseFeed(baseURL, data string) (*model.Feed, error) {
|
||||
switch DetectFeedFormat(data) {
|
||||
case FormatAtom:
|
||||
return atom.Parse(baseURL, strings.NewReader(data))
|
||||
|
@ -26,6 +28,6 @@ func ParseFeed(baseURL, data string) (*model.Feed, *errors.LocalizedError) {
|
|||
case FormatRDF:
|
||||
return rdf.Parse(baseURL, strings.NewReader(data))
|
||||
default:
|
||||
return nil, errors.NewLocalizedError("Unsupported feed format")
|
||||
return nil, ErrFeedFormatNotDetected
|
||||
}
|
||||
}
|
||||
|
|
|
@ -4,11 +4,7 @@
|
|||
package parser // import "miniflux.app/v2/internal/reader/parser"
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
"miniflux.app/v2/internal/http/client"
|
||||
)
|
||||
|
||||
func TestParseAtom(t *testing.T) {
|
||||
|
@ -301,50 +297,3 @@ func TestParseEmptyFeed(t *testing.T) {
|
|||
t.Error("ParseFeed must returns an error")
|
||||
}
|
||||
}
|
||||
|
||||
func TestDifferentEncodingWithResponse(t *testing.T) {
|
||||
var unicodeTestCases = []struct {
|
||||
filename, contentType string
|
||||
index int
|
||||
title string
|
||||
}{
|
||||
// Arabic language encoded in UTF-8.
|
||||
{"urdu_UTF8.xml", "text/xml; charset=utf-8", 0, "امریکی عسکری امداد کی بندش کی وجوہات: انڈیا سے جنگ، جوہری پروگرام اور اب دہشت گردوں کی پشت پناہی"},
|
||||
|
||||
// Windows-1251 encoding and not charset in HTTP header.
|
||||
{"encoding_WINDOWS-1251.xml", "text/xml", 0, "Цитата #17703"},
|
||||
|
||||
// No encoding in XML, but defined in HTTP Content-Type header.
|
||||
{"no_encoding_ISO-8859-1.xml", "application/xml; charset=ISO-8859-1", 2, "La criminalité liée surtout à... l'ennui ?"},
|
||||
|
||||
// ISO-8859-1 encoding defined in XML and HTTP header.
|
||||
{"encoding_ISO-8859-1.xml", "application/rss+xml; charset=ISO-8859-1", 5, "Projekt Jedi: Microsoft will weiter mit US-Militär zusammenarbeiten"},
|
||||
|
||||
// UTF-8 encoding defined in RDF document and HTTP header.
|
||||
{"rdf_UTF8.xml", "application/rss+xml; charset=utf-8", 1, "Mega-Deal: IBM übernimmt Red Hat"},
|
||||
|
||||
// UTF-8 encoding defined only in RDF document.
|
||||
{"rdf_UTF8.xml", "application/rss+xml", 1, "Mega-Deal: IBM übernimmt Red Hat"},
|
||||
}
|
||||
|
||||
for _, tc := range unicodeTestCases {
|
||||
content, err := os.ReadFile("testdata/" + tc.filename)
|
||||
if err != nil {
|
||||
t.Fatalf(`Unable to read file %q: %v`, tc.filename, err)
|
||||
}
|
||||
|
||||
r := &client.Response{Body: bytes.NewReader(content), ContentType: tc.contentType}
|
||||
if encodingErr := r.EnsureUnicodeBody(); encodingErr != nil {
|
||||
t.Fatalf(`Encoding error for %q: %v`, tc.filename, encodingErr)
|
||||
}
|
||||
|
||||
feed, parseErr := ParseFeed("https://example.org/", r.BodyAsString())
|
||||
if parseErr != nil {
|
||||
t.Fatalf(`Parsing error for %q - %q: %v`, tc.filename, tc.contentType, parseErr)
|
||||
}
|
||||
|
||||
if feed.Entries[tc.index].Title != tc.title {
|
||||
t.Errorf(`Unexpected title, got %q instead of %q`, feed.Entries[tc.index].Title, tc.title)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -12,10 +12,9 @@ import (
|
|||
"time"
|
||||
|
||||
"miniflux.app/v2/internal/config"
|
||||
"miniflux.app/v2/internal/http/client"
|
||||
"miniflux.app/v2/internal/metric"
|
||||
"miniflux.app/v2/internal/model"
|
||||
"miniflux.app/v2/internal/reader/browser"
|
||||
"miniflux.app/v2/internal/reader/fetcher"
|
||||
"miniflux.app/v2/internal/reader/readingtime"
|
||||
"miniflux.app/v2/internal/reader/rewrite"
|
||||
"miniflux.app/v2/internal/reader/sanitizer"
|
||||
|
@ -52,7 +51,7 @@ func ProcessFeedEntries(store *storage.Storage, feed *model.Feed, user *model.Us
|
|||
continue
|
||||
}
|
||||
|
||||
url := getUrlFromEntry(feed, entry)
|
||||
websiteURL := getUrlFromEntry(feed, entry)
|
||||
entryIsNew := !store.EntryURLExists(feed.ID, entry.URL)
|
||||
if feed.Crawler && (entryIsNew || forceRefresh) {
|
||||
slog.Debug("Scraping entry",
|
||||
|
@ -64,13 +63,19 @@ func ProcessFeedEntries(store *storage.Storage, feed *model.Feed, user *model.Us
|
|||
)
|
||||
|
||||
startTime := time.Now()
|
||||
content, scraperErr := scraper.Fetch(
|
||||
url,
|
||||
|
||||
requestBuilder := fetcher.NewRequestBuilder()
|
||||
requestBuilder.WithUserAgent(feed.UserAgent)
|
||||
requestBuilder.WithCookie(feed.Cookie)
|
||||
requestBuilder.WithTimeout(config.Opts.HTTPClientTimeout())
|
||||
requestBuilder.WithProxy(config.Opts.HTTPClientProxy())
|
||||
requestBuilder.UseProxy(feed.FetchViaProxy)
|
||||
requestBuilder.IgnoreTLSErrors(feed.AllowSelfSignedCertificates)
|
||||
|
||||
content, scraperErr := scraper.ScrapeWebsite(
|
||||
requestBuilder,
|
||||
websiteURL,
|
||||
feed.ScraperRules,
|
||||
feed.UserAgent,
|
||||
feed.Cookie,
|
||||
feed.AllowSelfSignedCertificates,
|
||||
feed.FetchViaProxy,
|
||||
)
|
||||
|
||||
if config.Opts.HasMetricsCollector() {
|
||||
|
@ -96,10 +101,10 @@ func ProcessFeedEntries(store *storage.Storage, feed *model.Feed, user *model.Us
|
|||
}
|
||||
}
|
||||
|
||||
rewrite.Rewriter(url, entry, feed.RewriteRules)
|
||||
rewrite.Rewriter(websiteURL, entry, feed.RewriteRules)
|
||||
|
||||
// The sanitizer should always run at the end of the process to make sure unsafe HTML is filtered.
|
||||
entry.Content = sanitizer.Sanitize(url, entry.Content)
|
||||
entry.Content = sanitizer.Sanitize(websiteURL, entry.Content)
|
||||
|
||||
updateEntryReadingTime(store, feed, entry, entryIsNew, user)
|
||||
filteredEntries = append(filteredEntries, entry)
|
||||
|
@ -146,15 +151,20 @@ func isAllowedEntry(feed *model.Feed, entry *model.Entry) bool {
|
|||
// ProcessEntryWebPage downloads the entry web page and apply rewrite rules.
|
||||
func ProcessEntryWebPage(feed *model.Feed, entry *model.Entry, user *model.User) error {
|
||||
startTime := time.Now()
|
||||
url := getUrlFromEntry(feed, entry)
|
||||
websiteURL := getUrlFromEntry(feed, entry)
|
||||
|
||||
content, scraperErr := scraper.Fetch(
|
||||
url,
|
||||
entry.Feed.ScraperRules,
|
||||
entry.Feed.UserAgent,
|
||||
entry.Feed.Cookie,
|
||||
feed.AllowSelfSignedCertificates,
|
||||
feed.FetchViaProxy,
|
||||
requestBuilder := fetcher.NewRequestBuilder()
|
||||
requestBuilder.WithUserAgent(feed.UserAgent)
|
||||
requestBuilder.WithCookie(feed.Cookie)
|
||||
requestBuilder.WithTimeout(config.Opts.HTTPClientTimeout())
|
||||
requestBuilder.WithProxy(config.Opts.HTTPClientProxy())
|
||||
requestBuilder.UseProxy(feed.FetchViaProxy)
|
||||
requestBuilder.IgnoreTLSErrors(feed.AllowSelfSignedCertificates)
|
||||
|
||||
content, scraperErr := scraper.ScrapeWebsite(
|
||||
requestBuilder,
|
||||
websiteURL,
|
||||
feed.ScraperRules,
|
||||
)
|
||||
|
||||
if config.Opts.HasMetricsCollector() {
|
||||
|
@ -174,8 +184,8 @@ func ProcessEntryWebPage(feed *model.Feed, entry *model.Entry, user *model.User)
|
|||
entry.ReadingTime = readingtime.EstimateReadingTime(entry.Content, user.DefaultReadingSpeed, user.CJKReadingSpeed)
|
||||
}
|
||||
|
||||
rewrite.Rewriter(url, entry, entry.Feed.RewriteRules)
|
||||
entry.Content = sanitizer.Sanitize(url, entry.Content)
|
||||
rewrite.Rewriter(websiteURL, entry, entry.Feed.RewriteRules)
|
||||
entry.Content = sanitizer.Sanitize(websiteURL, entry.Content)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
@ -270,14 +280,20 @@ func shouldFetchOdyseeWatchTime(entry *model.Entry) bool {
|
|||
return matches != nil
|
||||
}
|
||||
|
||||
func fetchYouTubeWatchTime(url string) (int, error) {
|
||||
clt := client.NewClientWithConfig(url, config.Opts)
|
||||
response, browserErr := browser.Exec(clt)
|
||||
if browserErr != nil {
|
||||
return 0, browserErr
|
||||
func fetchYouTubeWatchTime(websiteURL string) (int, error) {
|
||||
requestBuilder := fetcher.NewRequestBuilder()
|
||||
requestBuilder.WithTimeout(config.Opts.HTTPClientTimeout())
|
||||
requestBuilder.WithProxy(config.Opts.HTTPClientProxy())
|
||||
|
||||
responseHandler := fetcher.NewResponseHandler(requestBuilder.ExecuteRequest(websiteURL))
|
||||
defer responseHandler.Close()
|
||||
|
||||
if localizedError := responseHandler.LocalizedError(); localizedError != nil {
|
||||
slog.Warn("Unable to fetch YouTube page", slog.String("website_url", websiteURL), slog.Any("error", localizedError.Error()))
|
||||
return 0, localizedError.Error()
|
||||
}
|
||||
|
||||
doc, docErr := goquery.NewDocumentFromReader(response.Body)
|
||||
doc, docErr := goquery.NewDocumentFromReader(responseHandler.Body(config.Opts.HTTPClientMaxBodySize()))
|
||||
if docErr != nil {
|
||||
return 0, docErr
|
||||
}
|
||||
|
@ -295,14 +311,20 @@ func fetchYouTubeWatchTime(url string) (int, error) {
|
|||
return int(dur.Minutes()), nil
|
||||
}
|
||||
|
||||
func fetchOdyseeWatchTime(url string) (int, error) {
|
||||
clt := client.NewClientWithConfig(url, config.Opts)
|
||||
response, browserErr := browser.Exec(clt)
|
||||
if browserErr != nil {
|
||||
return 0, browserErr
|
||||
func fetchOdyseeWatchTime(websiteURL string) (int, error) {
|
||||
requestBuilder := fetcher.NewRequestBuilder()
|
||||
requestBuilder.WithTimeout(config.Opts.HTTPClientTimeout())
|
||||
requestBuilder.WithProxy(config.Opts.HTTPClientProxy())
|
||||
|
||||
responseHandler := fetcher.NewResponseHandler(requestBuilder.ExecuteRequest(websiteURL))
|
||||
defer responseHandler.Close()
|
||||
|
||||
if localizedError := responseHandler.LocalizedError(); localizedError != nil {
|
||||
slog.Warn("Unable to fetch Odysee watch time", slog.String("website_url", websiteURL), slog.Any("error", localizedError.Error()))
|
||||
return 0, localizedError.Error()
|
||||
}
|
||||
|
||||
doc, docErr := goquery.NewDocumentFromReader(response.Body)
|
||||
doc, docErr := goquery.NewDocumentFromReader(responseHandler.Body(config.Opts.HTTPClientMaxBodySize()))
|
||||
if docErr != nil {
|
||||
return 0, docErr
|
||||
}
|
||||
|
|
|
@ -4,20 +4,18 @@
|
|||
package rdf // import "miniflux.app/v2/internal/reader/rdf"
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
|
||||
"miniflux.app/v2/internal/errors"
|
||||
"miniflux.app/v2/internal/model"
|
||||
"miniflux.app/v2/internal/reader/xml"
|
||||
)
|
||||
|
||||
// Parse returns a normalized feed struct from a RDF feed.
|
||||
func Parse(baseURL string, data io.Reader) (*model.Feed, *errors.LocalizedError) {
|
||||
func Parse(baseURL string, data io.Reader) (*model.Feed, error) {
|
||||
feed := new(rdfFeed)
|
||||
decoder := xml.NewDecoder(data)
|
||||
err := decoder.Decode(feed)
|
||||
if err != nil {
|
||||
return nil, errors.NewLocalizedError("Unable to parse RDF feed: %q", err)
|
||||
if err := xml.NewDecoder(data).Decode(feed); err != nil {
|
||||
return nil, fmt.Errorf("rdf: unable to parse feed: %w", err)
|
||||
}
|
||||
|
||||
return feed.Transform(baseURL), nil
|
||||
|
|
|
@ -4,21 +4,18 @@
|
|||
package rss // import "miniflux.app/v2/internal/reader/rss"
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
|
||||
"miniflux.app/v2/internal/errors"
|
||||
"miniflux.app/v2/internal/model"
|
||||
"miniflux.app/v2/internal/reader/xml"
|
||||
)
|
||||
|
||||
// Parse returns a normalized feed struct from a RSS feed.
|
||||
func Parse(baseURL string, data io.Reader) (*model.Feed, *errors.LocalizedError) {
|
||||
func Parse(baseURL string, data io.Reader) (*model.Feed, error) {
|
||||
feed := new(rssFeed)
|
||||
decoder := xml.NewDecoder(data)
|
||||
err := decoder.Decode(feed)
|
||||
if err != nil {
|
||||
return nil, errors.NewLocalizedError("Unable to parse RSS feed: %q", err)
|
||||
if err := xml.NewDecoder(data).Decode(feed); err != nil {
|
||||
return nil, fmt.Errorf("rss: unable to parse feed: %w", err)
|
||||
}
|
||||
|
||||
return feed.Transform(baseURL), nil
|
||||
}
|
||||
|
|
|
@ -4,67 +4,54 @@
|
|||
package scraper // import "miniflux.app/v2/internal/reader/scraper"
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"log/slog"
|
||||
"strings"
|
||||
|
||||
"miniflux.app/v2/internal/config"
|
||||
"miniflux.app/v2/internal/http/client"
|
||||
"miniflux.app/v2/internal/reader/fetcher"
|
||||
"miniflux.app/v2/internal/reader/readability"
|
||||
"miniflux.app/v2/internal/urllib"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
)
|
||||
|
||||
// Fetch downloads a web page and returns relevant contents.
|
||||
func Fetch(websiteURL, rules, userAgent string, cookie string, allowSelfSignedCertificates, useProxy bool) (string, error) {
|
||||
clt := client.NewClientWithConfig(websiteURL, config.Opts)
|
||||
clt.WithUserAgent(userAgent)
|
||||
clt.WithCookie(cookie)
|
||||
if useProxy {
|
||||
clt.WithProxy()
|
||||
}
|
||||
clt.AllowSelfSignedCertificates = allowSelfSignedCertificates
|
||||
func ScrapeWebsite(requestBuilder *fetcher.RequestBuilder, websiteURL, rules string) (string, error) {
|
||||
responseHandler := fetcher.NewResponseHandler(requestBuilder.ExecuteRequest(websiteURL))
|
||||
defer responseHandler.Close()
|
||||
|
||||
response, err := clt.Get()
|
||||
if err != nil {
|
||||
return "", err
|
||||
if localizedError := responseHandler.LocalizedError(); localizedError != nil {
|
||||
slog.Warn("Unable to scrape website", slog.String("website_url", websiteURL), slog.Any("error", localizedError.Error()))
|
||||
return "", localizedError.Error()
|
||||
}
|
||||
|
||||
if response.HasServerFailure() {
|
||||
return "", errors.New("scraper: unable to download web page")
|
||||
}
|
||||
|
||||
if !isAllowedContentType(response.ContentType) {
|
||||
return "", fmt.Errorf("scraper: this resource is not a HTML document (%s)", response.ContentType)
|
||||
}
|
||||
|
||||
if err = response.EnsureUnicodeBody(); err != nil {
|
||||
return "", err
|
||||
if !isAllowedContentType(responseHandler.ContentType()) {
|
||||
return "", fmt.Errorf("scraper: this resource is not a HTML document (%s)", responseHandler.ContentType())
|
||||
}
|
||||
|
||||
// The entry URL could redirect somewhere else.
|
||||
sameSite := urllib.Domain(websiteURL) == urllib.Domain(response.EffectiveURL)
|
||||
websiteURL = response.EffectiveURL
|
||||
sameSite := urllib.Domain(websiteURL) == urllib.Domain(responseHandler.EffectiveURL())
|
||||
websiteURL = responseHandler.EffectiveURL()
|
||||
|
||||
if rules == "" {
|
||||
rules = getPredefinedScraperRules(websiteURL)
|
||||
}
|
||||
|
||||
var content string
|
||||
var err error
|
||||
|
||||
if sameSite && rules != "" {
|
||||
slog.Debug("Extracting content with custom rules",
|
||||
"url", websiteURL,
|
||||
"rules", rules,
|
||||
)
|
||||
content, err = scrapContent(response.Body, rules)
|
||||
content, err = findContentUsingCustomRules(responseHandler.Body(config.Opts.HTTPClientMaxBodySize()), rules)
|
||||
} else {
|
||||
slog.Debug("Extracting content with readability",
|
||||
"url", websiteURL,
|
||||
)
|
||||
content, err = readability.ExtractContent(response.Body)
|
||||
content, err = readability.ExtractContent(responseHandler.Body(config.Opts.HTTPClientMaxBodySize()))
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
|
@ -74,7 +61,7 @@ func Fetch(websiteURL, rules, userAgent string, cookie string, allowSelfSignedCe
|
|||
return content, nil
|
||||
}
|
||||
|
||||
func scrapContent(page io.Reader, rules string) (string, error) {
|
||||
func findContentUsingCustomRules(page io.Reader, rules string) (string, error) {
|
||||
document, err := goquery.NewDocumentFromReader(page)
|
||||
if err != nil {
|
||||
return "", err
|
||||
|
|
|
@ -58,7 +58,7 @@ func TestSelectorRules(t *testing.T) {
|
|||
t.Fatalf(`Unable to read file %q: %v`, filename, err)
|
||||
}
|
||||
|
||||
actualResult, err := scrapContent(bytes.NewReader(html), rule)
|
||||
actualResult, err := findContentUsingCustomRules(bytes.NewReader(html), rule)
|
||||
if err != nil {
|
||||
t.Fatalf(`Scraping error for %q - %q: %v`, filename, rule, err)
|
||||
}
|
||||
|
|
|
@ -4,16 +4,16 @@
|
|||
package subscription // import "miniflux.app/v2/internal/reader/subscription"
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"log/slog"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"miniflux.app/v2/internal/config"
|
||||
"miniflux.app/v2/internal/errors"
|
||||
"miniflux.app/v2/internal/http/client"
|
||||
"miniflux.app/v2/internal/integration/rssbridge"
|
||||
"miniflux.app/v2/internal/reader/browser"
|
||||
"miniflux.app/v2/internal/locale"
|
||||
"miniflux.app/v2/internal/reader/fetcher"
|
||||
"miniflux.app/v2/internal/reader/parser"
|
||||
"miniflux.app/v2/internal/urllib"
|
||||
|
||||
|
@ -21,18 +21,70 @@ import (
|
|||
)
|
||||
|
||||
var (
|
||||
errUnreadableDoc = "Unable to analyze this page: %v"
|
||||
youtubeChannelRegex = regexp.MustCompile(`youtube\.com/channel/(.*)`)
|
||||
youtubeVideoRegex = regexp.MustCompile(`youtube\.com/watch\?v=(.*)`)
|
||||
)
|
||||
|
||||
// FindSubscriptions downloads and try to find one or more subscriptions from an URL.
|
||||
func FindSubscriptions(websiteURL, userAgent, cookie, username, password string, fetchViaProxy, allowSelfSignedCertificates bool, rssbridgeURL string) (Subscriptions, *errors.LocalizedError) {
|
||||
func FindSubscriptions(websiteURL, userAgent, cookie, username, password string, fetchViaProxy, allowSelfSignedCertificates bool, rssbridgeURL string) (Subscriptions, *locale.LocalizedErrorWrapper) {
|
||||
websiteURL = findYoutubeChannelFeed(websiteURL)
|
||||
websiteURL = parseYoutubeVideoPage(websiteURL)
|
||||
|
||||
requestBuilder := fetcher.NewRequestBuilder()
|
||||
requestBuilder.WithUsernameAndPassword(username, password)
|
||||
requestBuilder.WithUserAgent(userAgent)
|
||||
requestBuilder.WithCookie(cookie)
|
||||
requestBuilder.WithTimeout(config.Opts.HTTPClientTimeout())
|
||||
requestBuilder.WithProxy(config.Opts.HTTPClientProxy())
|
||||
requestBuilder.UseProxy(fetchViaProxy)
|
||||
requestBuilder.IgnoreTLSErrors(allowSelfSignedCertificates)
|
||||
|
||||
responseHandler := fetcher.NewResponseHandler(requestBuilder.ExecuteRequest(websiteURL))
|
||||
defer responseHandler.Close()
|
||||
|
||||
if localizedError := responseHandler.LocalizedError(); localizedError != nil {
|
||||
slog.Warn("Unable to find subscriptions", slog.String("website_url", websiteURL), slog.Any("error", localizedError.Error()))
|
||||
return nil, localizedError
|
||||
}
|
||||
|
||||
responseBody, localizedError := responseHandler.ReadBody(config.Opts.HTTPClientMaxBodySize())
|
||||
if localizedError != nil {
|
||||
slog.Warn("Unable to find subscriptions", slog.String("website_url", websiteURL), slog.Any("error", localizedError.Error()))
|
||||
return nil, localizedError
|
||||
}
|
||||
|
||||
if format := parser.DetectFeedFormat(string(responseBody)); format != parser.FormatUnknown {
|
||||
var subscriptions Subscriptions
|
||||
subscriptions = append(subscriptions, &Subscription{
|
||||
Title: responseHandler.EffectiveURL(),
|
||||
URL: responseHandler.EffectiveURL(),
|
||||
Type: format,
|
||||
})
|
||||
|
||||
return subscriptions, nil
|
||||
}
|
||||
|
||||
subscriptions, localizedError := parseWebPage(responseHandler.EffectiveURL(), bytes.NewReader(responseBody))
|
||||
if localizedError != nil || subscriptions != nil {
|
||||
return subscriptions, localizedError
|
||||
}
|
||||
|
||||
if rssbridgeURL != "" {
|
||||
slog.Debug("Trying to detect feeds using RSS-Bridge",
|
||||
slog.String("website_url", websiteURL),
|
||||
slog.String("rssbridge_url", rssbridgeURL),
|
||||
)
|
||||
|
||||
bridges, err := rssbridge.DetectBridges(rssbridgeURL, websiteURL)
|
||||
if err != nil {
|
||||
return nil, errors.NewLocalizedError("RSS-Bridge: %v", err)
|
||||
return nil, locale.NewLocalizedErrorWrapper(err, "error.unable_to_detect_rssbridge", err)
|
||||
}
|
||||
|
||||
slog.Debug("RSS-Bridge results",
|
||||
slog.String("website_url", websiteURL),
|
||||
slog.String("rssbridge_url", rssbridgeURL),
|
||||
slog.Int("nb_bridges", len(bridges)),
|
||||
)
|
||||
|
||||
if len(bridges) > 0 {
|
||||
var subscriptions Subscriptions
|
||||
for _, bridge := range bridges {
|
||||
|
@ -46,45 +98,10 @@ func FindSubscriptions(websiteURL, userAgent, cookie, username, password string,
|
|||
}
|
||||
}
|
||||
|
||||
websiteURL = findYoutubeChannelFeed(websiteURL)
|
||||
websiteURL = parseYoutubeVideoPage(websiteURL)
|
||||
|
||||
clt := client.NewClientWithConfig(websiteURL, config.Opts)
|
||||
clt.WithCredentials(username, password)
|
||||
clt.WithUserAgent(userAgent)
|
||||
clt.WithCookie(cookie)
|
||||
clt.AllowSelfSignedCertificates = allowSelfSignedCertificates
|
||||
|
||||
if fetchViaProxy {
|
||||
clt.WithProxy()
|
||||
}
|
||||
|
||||
response, err := browser.Exec(clt)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
body := response.BodyAsString()
|
||||
if format := parser.DetectFeedFormat(body); format != parser.FormatUnknown {
|
||||
var subscriptions Subscriptions
|
||||
subscriptions = append(subscriptions, &Subscription{
|
||||
Title: response.EffectiveURL,
|
||||
URL: response.EffectiveURL,
|
||||
Type: format,
|
||||
})
|
||||
|
||||
return subscriptions, nil
|
||||
}
|
||||
|
||||
subscriptions, err := parseWebPage(response.EffectiveURL, strings.NewReader(body))
|
||||
if err != nil || subscriptions != nil {
|
||||
return subscriptions, err
|
||||
}
|
||||
|
||||
return tryWellKnownUrls(websiteURL, userAgent, cookie, username, password)
|
||||
return tryWellKnownUrls(websiteURL, userAgent, cookie, username, password, fetchViaProxy, allowSelfSignedCertificates)
|
||||
}
|
||||
|
||||
func parseWebPage(websiteURL string, data io.Reader) (Subscriptions, *errors.LocalizedError) {
|
||||
func parseWebPage(websiteURL string, data io.Reader) (Subscriptions, *locale.LocalizedErrorWrapper) {
|
||||
var subscriptions Subscriptions
|
||||
queries := map[string]string{
|
||||
"link[type='application/rss+xml']": "rss",
|
||||
|
@ -95,7 +112,7 @@ func parseWebPage(websiteURL string, data io.Reader) (Subscriptions, *errors.Loc
|
|||
|
||||
doc, err := goquery.NewDocumentFromReader(data)
|
||||
if err != nil {
|
||||
return nil, errors.NewLocalizedError(errUnreadableDoc, err)
|
||||
return nil, locale.NewLocalizedErrorWrapper(err, "error.unable_to_parse_html_document", err)
|
||||
}
|
||||
|
||||
for query, kind := range queries {
|
||||
|
@ -140,13 +157,19 @@ func parseYoutubeVideoPage(websiteURL string) string {
|
|||
return websiteURL
|
||||
}
|
||||
|
||||
clt := client.NewClientWithConfig(websiteURL, config.Opts)
|
||||
response, browserErr := browser.Exec(clt)
|
||||
if browserErr != nil {
|
||||
requestBuilder := fetcher.NewRequestBuilder()
|
||||
requestBuilder.WithTimeout(config.Opts.HTTPClientTimeout())
|
||||
requestBuilder.WithProxy(config.Opts.HTTPClientProxy())
|
||||
|
||||
responseHandler := fetcher.NewResponseHandler(requestBuilder.ExecuteRequest(websiteURL))
|
||||
defer responseHandler.Close()
|
||||
|
||||
if localizedError := responseHandler.LocalizedError(); localizedError != nil {
|
||||
slog.Warn("Unable to find subscriptions", slog.String("website_url", websiteURL), slog.Any("error", localizedError.Error()))
|
||||
return websiteURL
|
||||
}
|
||||
|
||||
doc, docErr := goquery.NewDocumentFromReader(response.Body)
|
||||
doc, docErr := goquery.NewDocumentFromReader(responseHandler.Body(config.Opts.HTTPClientMaxBodySize()))
|
||||
if docErr != nil {
|
||||
return websiteURL
|
||||
}
|
||||
|
@ -158,7 +181,7 @@ func parseYoutubeVideoPage(websiteURL string) string {
|
|||
return websiteURL
|
||||
}
|
||||
|
||||
func tryWellKnownUrls(websiteURL, userAgent, cookie, username, password string) (Subscriptions, *errors.LocalizedError) {
|
||||
func tryWellKnownUrls(websiteURL, userAgent, cookie, username, password string, fetchViaProxy, allowSelfSignedCertificates bool) (Subscriptions, *locale.LocalizedErrorWrapper) {
|
||||
var subscriptions Subscriptions
|
||||
knownURLs := map[string]string{
|
||||
"atom.xml": "atom",
|
||||
|
@ -173,6 +196,7 @@ func tryWellKnownUrls(websiteURL, userAgent, cookie, username, password string)
|
|||
// Look for knownURLs in the root.
|
||||
websiteURLRoot,
|
||||
}
|
||||
|
||||
// Look for knownURLs in current subdirectory, such as 'example.com/blog/'.
|
||||
websiteURL, _ = urllib.AbsoluteURL(websiteURL, "./")
|
||||
if websiteURL != websiteURLRoot {
|
||||
|
@ -185,30 +209,33 @@ func tryWellKnownUrls(websiteURL, userAgent, cookie, username, password string)
|
|||
if err != nil {
|
||||
continue
|
||||
}
|
||||
clt := client.NewClientWithConfig(fullURL, config.Opts)
|
||||
clt.WithCredentials(username, password)
|
||||
clt.WithUserAgent(userAgent)
|
||||
clt.WithCookie(cookie)
|
||||
|
||||
requestBuilder := fetcher.NewRequestBuilder()
|
||||
requestBuilder.WithUsernameAndPassword(username, password)
|
||||
requestBuilder.WithUserAgent(userAgent)
|
||||
requestBuilder.WithCookie(cookie)
|
||||
requestBuilder.WithTimeout(config.Opts.HTTPClientTimeout())
|
||||
requestBuilder.WithProxy(config.Opts.HTTPClientProxy())
|
||||
requestBuilder.UseProxy(fetchViaProxy)
|
||||
requestBuilder.IgnoreTLSErrors(allowSelfSignedCertificates)
|
||||
|
||||
// Some websites redirects unknown URLs to the home page.
|
||||
// As result, the list of known URLs is returned to the subscription list.
|
||||
// We don't want the user to choose between invalid feed URLs.
|
||||
clt.WithoutRedirects()
|
||||
requestBuilder.WithoutRedirects()
|
||||
|
||||
response, err := clt.Get()
|
||||
if err != nil {
|
||||
responseHandler := fetcher.NewResponseHandler(requestBuilder.ExecuteRequest(fullURL))
|
||||
defer responseHandler.Close()
|
||||
|
||||
if localizedError := responseHandler.LocalizedError(); localizedError != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
if response != nil && response.StatusCode == 200 {
|
||||
subscription := new(Subscription)
|
||||
subscription.Type = kind
|
||||
subscription.Title = fullURL
|
||||
subscription.URL = fullURL
|
||||
if subscription.URL != "" {
|
||||
subscriptions = append(subscriptions, subscription)
|
||||
}
|
||||
}
|
||||
subscription := new(Subscription)
|
||||
subscription.Type = kind
|
||||
subscription.Title = fullURL
|
||||
subscription.URL = fullURL
|
||||
subscriptions = append(subscriptions, subscription)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue