From 5403ca09f6712503341d418028918052f74a3290 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Guillot?= Date: Sun, 17 Aug 2025 17:30:33 -0700 Subject: [PATCH] feat(storage): add limit parameter to `ClearRemovedEntriesContent` Without the limit, this query is going to hangs forever on large databases with millions of entries. --- internal/cli/cleanup_tasks.go | 2 +- internal/storage/entry.go | 17 ++++++++++++----- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/internal/cli/cleanup_tasks.go b/internal/cli/cleanup_tasks.go index 224e708e..4a71e3a6 100644 --- a/internal/cli/cleanup_tasks.go +++ b/internal/cli/cleanup_tasks.go @@ -54,7 +54,7 @@ func runCleanupTasks(store *storage.Storage) { slog.Int64("removed_entries_enclosures_deleted", enclosuresAffected)) } - if contentAffected, err := store.ClearRemovedEntriesContent(); err != nil { + if contentAffected, err := store.ClearRemovedEntriesContent(config.Opts.CleanupArchiveBatchSize()); err != nil { slog.Error("Unable to clear content from removed entries", slog.Any("error", err)) } else { slog.Info("Clearing content from removed entries completed", diff --git a/internal/storage/entry.go b/internal/storage/entry.go index 09a32083..ece466f2 100644 --- a/internal/storage/entry.go +++ b/internal/storage/entry.go @@ -297,7 +297,7 @@ func (s *Storage) DeleteRemovedEntriesEnclosures() (int64, error) { } // ClearRemovedEntriesContent clears the content fields of entries marked as "removed", keeping only their metadata. -func (s *Storage) ClearRemovedEntriesContent() (int64, error) { +func (s *Storage) ClearRemovedEntriesContent(limit int) (int64, error) { query := ` UPDATE entries @@ -305,12 +305,19 @@ func (s *Storage) ClearRemovedEntriesContent() (int64, error) { title='', content=NULL, url='', - author=NULL - WHERE - status=$1 AND content IS NOT NULL + author=NULL, + comments_url=NULL, + document_vectors=NULL + WHERE id IN ( + SELECT id + FROM entries + WHERE status = $1 AND content IS NOT NULL + ORDER BY id ASC + LIMIT $2 + ) ` - result, err := s.db.Exec(query, model.EntryStatusRemoved) + result, err := s.db.Exec(query, model.EntryStatusRemoved, limit) if err != nil { return 0, fmt.Errorf(`store: unable to clear content from removed entries: %v`, err) }