From c45b51d1f8b96fce533466db6e1f71d1311dbf00 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Guillot?= Date: Sun, 6 Apr 2025 16:18:41 -0700 Subject: [PATCH] feat: use `Cache-Control` max-age and `Expires` headers to calculate next check --- internal/reader/fetcher/response_handler.go | 28 ++++++++ .../reader/fetcher/response_handler_test.go | 72 +++++++++++++++++++ internal/reader/handler/handler.go | 12 +++- 3 files changed, 110 insertions(+), 2 deletions(-) diff --git a/internal/reader/fetcher/response_handler.go b/internal/reader/fetcher/response_handler.go index b71db54f..5c9778b5 100644 --- a/internal/reader/fetcher/response_handler.go +++ b/internal/reader/fetcher/response_handler.go @@ -9,6 +9,7 @@ import ( "fmt" "io" "log/slog" + "math" "net" "net/http" "net/url" @@ -53,6 +54,33 @@ func (r *ResponseHandler) ETag() string { return r.httpResponse.Header.Get("ETag") } +func (r *ResponseHandler) ExpiresInMinutes() int { + expiresHeaderValue := r.httpResponse.Header.Get("Expires") + if expiresHeaderValue != "" { + t, err := time.Parse(time.RFC1123, expiresHeaderValue) + if err == nil { + return int(math.Ceil(time.Until(t).Minutes())) + } + } + return 0 +} + +func (r *ResponseHandler) CacheControlMaxAgeInMinutes() int { + cacheControlHeaderValue := r.httpResponse.Header.Get("Cache-Control") + if cacheControlHeaderValue != "" { + for _, directive := range strings.Split(cacheControlHeaderValue, ",") { + directive = strings.TrimSpace(directive) + if strings.HasPrefix(directive, "max-age=") { + maxAge, err := strconv.Atoi(strings.TrimPrefix(directive, "max-age=")) + if err == nil { + return int(math.Ceil(float64(maxAge) / 60)) + } + } + } + } + return 0 +} + func (r *ResponseHandler) ParseRetryDelay() int { retryAfterHeaderValue := r.httpResponse.Header.Get("Retry-After") if retryAfterHeaderValue != "" { diff --git a/internal/reader/fetcher/response_handler_test.go b/internal/reader/fetcher/response_handler_test.go index da1f7856..cc675506 100644 --- a/internal/reader/fetcher/response_handler_test.go +++ b/internal/reader/fetcher/response_handler_test.go @@ -102,3 +102,75 @@ func TestRetryDelay(t *testing.T) { }) } } + +func TestExpiresInMinutes(t *testing.T) { + var testCases = map[string]struct { + ExpiresHeader string + ExpectedMinutes int + }{ + "Empty header": { + ExpiresHeader: "", + ExpectedMinutes: 0, + }, + "Valid Expires header": { + ExpiresHeader: time.Now().Add(10 * time.Minute).Format(time.RFC1123), + ExpectedMinutes: 10, + }, + "Invalid Expires header": { + ExpiresHeader: "invalid-date", + ExpectedMinutes: 0, + }, + } + for name, tc := range testCases { + t.Run(name, func(tt *testing.T) { + header := http.Header{} + header.Add("Expires", tc.ExpiresHeader) + rh := ResponseHandler{ + httpResponse: &http.Response{ + Header: header, + }, + } + if tc.ExpectedMinutes != rh.ExpiresInMinutes() { + t.Errorf("Expected %d, got %d for scenario %q", tc.ExpectedMinutes, rh.ExpiresInMinutes(), name) + } + }) + } +} + +func TestCacheControlMaxAgeInMinutes(t *testing.T) { + var testCases = map[string]struct { + CacheControlHeader string + ExpectedMinutes int + }{ + "Empty header": { + CacheControlHeader: "", + ExpectedMinutes: 0, + }, + "Valid max-age": { + CacheControlHeader: "max-age=600", + ExpectedMinutes: 10, + }, + "Invalid max-age": { + CacheControlHeader: "max-age=invalid", + ExpectedMinutes: 0, + }, + "Multiple directives": { + CacheControlHeader: "no-cache, max-age=300", + ExpectedMinutes: 5, + }, + } + for name, tc := range testCases { + t.Run(name, func(tt *testing.T) { + header := http.Header{} + header.Add("Cache-Control", tc.CacheControlHeader) + rh := ResponseHandler{ + httpResponse: &http.Response{ + Header: header, + }, + } + if tc.ExpectedMinutes != rh.CacheControlMaxAgeInMinutes() { + t.Errorf("Expected %d, got %d for scenario %q", tc.ExpectedMinutes, rh.CacheControlMaxAgeInMinutes(), name) + } + }) + } +} diff --git a/internal/reader/handler/handler.go b/internal/reader/handler/handler.go index 1a70cdbd..4745b7c1 100644 --- a/internal/reader/handler/handler.go +++ b/internal/reader/handler/handler.go @@ -301,8 +301,12 @@ func RefreshFeed(store *storage.Storage, userID, feedID int64, forceRefresh bool return localizedError } - // If the feed has a TTL defined, we use it to make sure we don't check it too often. - refreshDelayInMinutes = updatedFeed.TTL + // Use the RSS TTL value, or the Cache-Control or Expires HTTP headers if available. + // Otherwise, we use the default value from the configuration (min interval parameter). + feedTTLValue := updatedFeed.TTL + cacheControlMaxAgeValue := responseHandler.CacheControlMaxAgeInMinutes() + expiresValue := responseHandler.ExpiresInMinutes() + refreshDelayInMinutes = max(feedTTLValue, cacheControlMaxAgeValue, expiresValue) // Set the next check at with updated arguments. originalFeed.ScheduleNextCheck(weeklyEntryCount, refreshDelayInMinutes) @@ -310,6 +314,10 @@ func RefreshFeed(store *storage.Storage, userID, feedID int64, forceRefresh bool slog.Debug("Updated next check date", slog.Int64("user_id", userID), slog.Int64("feed_id", feedID), + slog.String("feed_url", originalFeed.FeedURL), + slog.Int("feed_ttl_minutes", feedTTLValue), + slog.Int("cache_control_max_age_in_minutes", cacheControlMaxAgeValue), + slog.Int("expires_in_minutes", expiresValue), slog.Int("refresh_delay_in_minutes", refreshDelayInMinutes), slog.Time("new_next_check_at", originalFeed.NextCheckAt), )