2023-06-19 14:42:47 -07:00
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
2017-11-19 21:10:04 -08:00
2023-08-10 19:46:45 -07:00
package handler // import "miniflux.app/v2/internal/reader/handler"
2017-11-19 21:10:04 -08:00
import (
"fmt"
2017-11-20 17:12:37 -08:00
"time"
2023-08-10 19:46:45 -07:00
"miniflux.app/v2/internal/config"
"miniflux.app/v2/internal/errors"
"miniflux.app/v2/internal/http/client"
2023-09-08 22:45:17 -07:00
"miniflux.app/v2/internal/integration"
2023-08-10 19:46:45 -07:00
"miniflux.app/v2/internal/locale"
"miniflux.app/v2/internal/logger"
"miniflux.app/v2/internal/model"
"miniflux.app/v2/internal/reader/browser"
"miniflux.app/v2/internal/reader/icon"
"miniflux.app/v2/internal/reader/parser"
"miniflux.app/v2/internal/reader/processor"
"miniflux.app/v2/internal/storage"
"miniflux.app/v2/internal/timer"
2017-11-19 21:10:04 -08:00
)
var (
2018-02-07 19:10:36 -08:00
errDuplicate = "This feed already exists (%s)"
2017-11-24 22:29:20 -08:00
errNotFound = "Feed %d not found"
2018-02-07 19:10:36 -08:00
errCategoryNotFound = "Category not found for this user"
2017-11-19 21:10:04 -08:00
)
// CreateFeed fetch, parse and store a new feed.
2021-01-04 13:49:28 -08:00
func CreateFeed ( store * storage . Storage , userID int64 , feedCreationRequest * model . FeedCreationRequest ) ( * model . Feed , error ) {
defer timer . ExecutionTime ( time . Now ( ) , fmt . Sprintf ( "[CreateFeed] FeedURL=%s" , feedCreationRequest . FeedURL ) )
2017-11-19 21:10:04 -08:00
2021-08-30 16:53:05 +02:00
user , storeErr := store . UserByID ( userID )
if storeErr != nil {
return nil , storeErr
}
2021-01-04 13:49:28 -08:00
if ! store . CategoryIDExists ( userID , feedCreationRequest . CategoryID ) {
2017-11-24 22:29:20 -08:00
return nil , errors . NewLocalizedError ( errCategoryNotFound )
}
2021-01-04 13:49:28 -08:00
request := client . NewClientWithConfig ( feedCreationRequest . FeedURL , config . Opts )
request . WithCredentials ( feedCreationRequest . Username , feedCreationRequest . Password )
request . WithUserAgent ( feedCreationRequest . UserAgent )
2021-03-23 04:27:58 +01:00
request . WithCookie ( feedCreationRequest . Cookie )
2021-02-21 13:42:49 -08:00
request . AllowSelfSignedCertificates = feedCreationRequest . AllowSelfSignedCertificates
2020-09-10 14:28:54 +08:00
2021-01-04 13:49:28 -08:00
if feedCreationRequest . FetchViaProxy {
2020-09-10 14:28:54 +08:00
request . WithProxy ( )
}
2018-10-14 21:43:48 -07:00
response , requestErr := browser . Exec ( request )
if requestErr != nil {
return nil , requestErr
2018-01-04 18:32:36 -08:00
}
2021-01-04 13:49:28 -08:00
if store . FeedURLExists ( userID , response . EffectiveURL ) {
2017-11-19 21:10:04 -08:00
return nil , errors . NewLocalizedError ( errDuplicate , response . EffectiveURL )
}
2020-12-02 20:47:11 -08:00
subscription , parseErr := parser . ParseFeed ( response . EffectiveURL , response . BodyAsString ( ) )
2018-10-14 21:43:48 -07:00
if parseErr != nil {
return nil , parseErr
2017-11-20 17:12:37 -08:00
}
2021-01-04 13:49:28 -08:00
subscription . UserID = userID
subscription . UserAgent = feedCreationRequest . UserAgent
2021-03-23 04:27:58 +01:00
subscription . Cookie = feedCreationRequest . Cookie
2021-01-04 13:49:28 -08:00
subscription . Username = feedCreationRequest . Username
subscription . Password = feedCreationRequest . Password
subscription . Crawler = feedCreationRequest . Crawler
subscription . Disabled = feedCreationRequest . Disabled
subscription . IgnoreHTTPCache = feedCreationRequest . IgnoreHTTPCache
2021-02-21 13:42:49 -08:00
subscription . AllowSelfSignedCertificates = feedCreationRequest . AllowSelfSignedCertificates
2021-01-04 13:49:28 -08:00
subscription . FetchViaProxy = feedCreationRequest . FetchViaProxy
subscription . ScraperRules = feedCreationRequest . ScraperRules
subscription . RewriteRules = feedCreationRequest . RewriteRules
subscription . BlocklistRules = feedCreationRequest . BlocklistRules
subscription . KeeplistRules = feedCreationRequest . KeeplistRules
2022-07-12 06:12:26 +02:00
subscription . UrlRewriteRules = feedCreationRequest . UrlRewriteRules
2021-01-04 13:49:28 -08:00
subscription . WithCategoryID ( feedCreationRequest . CategoryID )
2018-10-14 21:43:48 -07:00
subscription . WithClientResponse ( response )
subscription . CheckedNow ( )
2017-11-19 21:10:04 -08:00
2023-08-08 14:12:41 +00:00
processor . ProcessFeedEntries ( store , subscription , user , true )
2017-12-11 22:16:32 -08:00
2021-01-02 16:33:41 -08:00
if storeErr := store . CreateFeed ( subscription ) ; storeErr != nil {
2018-10-14 21:43:48 -07:00
return nil , storeErr
2017-11-19 21:10:04 -08:00
}
2021-01-02 16:33:41 -08:00
logger . Debug ( "[CreateFeed] Feed saved with ID: %d" , subscription . ID )
2017-11-19 21:10:04 -08:00
2021-02-21 13:42:49 -08:00
checkFeedIcon (
store ,
subscription . ID ,
subscription . SiteURL ,
2023-06-04 18:01:59 -04:00
subscription . IconURL ,
2022-01-08 15:09:12 -08:00
feedCreationRequest . UserAgent ,
2021-02-21 13:42:49 -08:00
feedCreationRequest . FetchViaProxy ,
feedCreationRequest . AllowSelfSignedCertificates ,
)
2017-11-19 21:10:04 -08:00
return subscription , nil
}
2020-09-27 16:01:06 -07:00
// RefreshFeed refreshes a feed.
2023-08-08 14:12:41 +00:00
func RefreshFeed ( store * storage . Storage , userID , feedID int64 , forceRefresh bool ) error {
2021-01-02 16:33:41 -08:00
defer timer . ExecutionTime ( time . Now ( ) , fmt . Sprintf ( "[RefreshFeed] feedID=%d" , feedID ) )
2021-08-30 16:53:05 +02:00
user , storeErr := store . UserByID ( userID )
if storeErr != nil {
return storeErr
}
printer := locale . NewPrinter ( user . Language )
2017-11-19 21:10:04 -08:00
2021-01-02 16:33:41 -08:00
originalFeed , storeErr := store . FeedByID ( userID , feedID )
2018-10-14 21:43:48 -07:00
if storeErr != nil {
return storeErr
2017-11-19 21:10:04 -08:00
}
if originalFeed == nil {
return errors . NewLocalizedError ( errNotFound , feedID )
}
2020-05-25 14:59:15 -07:00
weeklyEntryCount := 0
if config . Opts . PollingScheduler ( ) == model . SchedulerEntryFrequency {
var weeklyCountErr error
2021-01-02 16:33:41 -08:00
weeklyEntryCount , weeklyCountErr = store . WeeklyFeedEntryCount ( userID , feedID )
2020-05-25 14:59:15 -07:00
if weeklyCountErr != nil {
return weeklyCountErr
}
2020-05-25 16:06:56 -05:00
}
2018-10-14 21:43:48 -07:00
originalFeed . CheckedNow ( )
2020-05-25 14:59:15 -07:00
originalFeed . ScheduleNextCheck ( weeklyEntryCount )
2018-02-08 18:16:54 -08:00
2020-09-27 14:29:48 -07:00
request := client . NewClientWithConfig ( originalFeed . FeedURL , config . Opts )
2018-10-14 21:43:48 -07:00
request . WithCredentials ( originalFeed . Username , originalFeed . Password )
request . WithUserAgent ( originalFeed . UserAgent )
2021-03-23 04:27:58 +01:00
request . WithCookie ( originalFeed . Cookie )
2021-02-21 13:42:49 -08:00
request . AllowSelfSignedCertificates = originalFeed . AllowSelfSignedCertificates
2020-06-05 21:50:59 -07:00
if ! originalFeed . IgnoreHTTPCache {
request . WithCacheHeaders ( originalFeed . EtagHeader , originalFeed . LastModifiedHeader )
}
2020-09-10 14:28:54 +08:00
if originalFeed . FetchViaProxy {
request . WithProxy ( )
}
2018-10-14 21:43:48 -07:00
response , requestErr := browser . Exec ( request )
if requestErr != nil {
originalFeed . WithError ( requestErr . Localize ( printer ) )
2021-01-02 16:33:41 -08:00
store . UpdateFeedError ( originalFeed )
2018-10-14 21:43:48 -07:00
return requestErr
2017-11-19 21:10:04 -08:00
}
2021-01-02 16:33:41 -08:00
if store . AnotherFeedURLExists ( userID , originalFeed . ID , response . EffectiveURL ) {
2020-09-20 23:29:51 -07:00
storeErr := errors . NewLocalizedError ( errDuplicate , response . EffectiveURL )
originalFeed . WithError ( storeErr . Error ( ) )
2021-01-02 16:33:41 -08:00
store . UpdateFeedError ( originalFeed )
2020-09-20 23:29:51 -07:00
return storeErr
}
2020-06-05 21:50:59 -07:00
if originalFeed . IgnoreHTTPCache || response . IsModified ( originalFeed . EtagHeader , originalFeed . LastModifiedHeader ) {
2021-01-02 16:33:41 -08:00
logger . Debug ( "[RefreshFeed] Feed #%d has been modified" , feedID )
2018-01-04 18:32:36 -08:00
2020-12-02 20:47:11 -08:00
updatedFeed , parseErr := parser . ParseFeed ( response . EffectiveURL , response . BodyAsString ( ) )
2018-02-27 21:08:32 -08:00
if parseErr != nil {
2018-10-14 21:43:48 -07:00
originalFeed . WithError ( parseErr . Localize ( printer ) )
2021-01-02 16:33:41 -08:00
store . UpdateFeedError ( originalFeed )
2018-10-14 21:43:48 -07:00
return parseErr
2017-11-19 21:10:04 -08:00
}
2018-10-14 22:33:19 -07:00
originalFeed . Entries = updatedFeed . Entries
2023-08-08 14:12:41 +00:00
processor . ProcessFeedEntries ( store , originalFeed , user , forceRefresh )
2017-12-11 22:16:32 -08:00
2023-08-08 14:12:41 +00:00
// We don't update existing entries when the crawler is enabled (we crawl only inexisting entries). Unless it is forced to refresh
updateExistingEntries := forceRefresh || ! originalFeed . Crawler
2023-09-08 22:45:17 -07:00
newEntries , storeErr := store . RefreshFeedEntries ( originalFeed . UserID , originalFeed . ID , originalFeed . Entries , updateExistingEntries )
if storeErr != nil {
2018-10-21 11:44:29 -07:00
originalFeed . WithError ( storeErr . Error ( ) )
2021-01-02 16:33:41 -08:00
store . UpdateFeedError ( originalFeed )
2018-10-14 21:43:48 -07:00
return storeErr
2017-11-19 21:10:04 -08:00
}
2023-09-08 22:45:17 -07:00
userIntegrations , intErr := store . Integration ( userID )
if intErr != nil {
logger . Error ( "[RefreshFeed] Fetching integrations for user %d failed: %v; the refresh process will go on, but no integrations will run this time." , userID , intErr )
} else if userIntegrations != nil && len ( newEntries ) > 0 {
go integration . PushEntries ( originalFeed , newEntries , userIntegrations )
}
2018-10-14 21:43:48 -07:00
// We update caching headers only if the feed has been modified,
// because some websites don't return the same headers when replying with a 304.
originalFeed . WithClientResponse ( response )
2023-09-08 22:45:17 -07:00
2021-02-21 13:42:49 -08:00
checkFeedIcon (
store ,
originalFeed . ID ,
originalFeed . SiteURL ,
2023-06-04 18:01:59 -04:00
updatedFeed . IconURL ,
2022-01-08 15:09:12 -08:00
originalFeed . UserAgent ,
2021-02-21 13:42:49 -08:00
originalFeed . FetchViaProxy ,
originalFeed . AllowSelfSignedCertificates ,
)
2017-11-19 21:10:04 -08:00
} else {
2021-01-02 16:33:41 -08:00
logger . Debug ( "[RefreshFeed] Feed #%d not modified" , feedID )
2017-11-19 21:10:04 -08:00
}
2018-10-14 21:43:48 -07:00
originalFeed . ResetErrorCounter ( )
2018-12-15 13:04:38 -08:00
2021-01-02 16:33:41 -08:00
if storeErr := store . UpdateFeed ( originalFeed ) ; storeErr != nil {
2018-12-15 13:04:38 -08:00
originalFeed . WithError ( storeErr . Error ( ) )
2021-01-02 16:33:41 -08:00
store . UpdateFeedError ( originalFeed )
2018-12-15 13:04:38 -08:00
return storeErr
}
return nil
2017-11-19 21:10:04 -08:00
}
2023-06-04 18:01:59 -04:00
func checkFeedIcon ( store * storage . Storage , feedID int64 , websiteURL , iconURL , userAgent string , fetchViaProxy , allowSelfSignedCertificates bool ) {
2018-10-14 21:43:48 -07:00
if ! store . HasIcon ( feedID ) {
2023-06-04 18:01:59 -04:00
icon , err := icon . FindIcon ( websiteURL , iconURL , userAgent , fetchViaProxy , allowSelfSignedCertificates )
2018-10-14 21:43:48 -07:00
if err != nil {
2021-01-02 16:33:41 -08:00
logger . Debug ( ` [CheckFeedIcon] %v (feedID=%d websiteURL=%s) ` , err , feedID , websiteURL )
2018-10-14 21:43:48 -07:00
} else if icon == nil {
2021-01-02 16:33:41 -08:00
logger . Debug ( ` [CheckFeedIcon] No icon found (feedID=%d websiteURL=%s) ` , feedID , websiteURL )
2018-10-14 21:43:48 -07:00
} else {
if err := store . CreateFeedIcon ( feedID , icon ) ; err != nil {
2021-01-02 16:33:41 -08:00
logger . Debug ( ` [CheckFeedIcon] %v (feedID=%d websiteURL=%s) ` , err , feedID , websiteURL )
2018-10-14 21:43:48 -07:00
}
}
}
2019-11-29 11:17:14 -08:00
}