diff --git a/internal/cli/cleanup_tasks.go b/internal/cli/cleanup_tasks.go index 224e708e..4a71e3a6 100644 --- a/internal/cli/cleanup_tasks.go +++ b/internal/cli/cleanup_tasks.go @@ -54,7 +54,7 @@ func runCleanupTasks(store *storage.Storage) { slog.Int64("removed_entries_enclosures_deleted", enclosuresAffected)) } - if contentAffected, err := store.ClearRemovedEntriesContent(); err != nil { + if contentAffected, err := store.ClearRemovedEntriesContent(config.Opts.CleanupArchiveBatchSize()); err != nil { slog.Error("Unable to clear content from removed entries", slog.Any("error", err)) } else { slog.Info("Clearing content from removed entries completed", diff --git a/internal/storage/entry.go b/internal/storage/entry.go index 09a32083..ece466f2 100644 --- a/internal/storage/entry.go +++ b/internal/storage/entry.go @@ -297,7 +297,7 @@ func (s *Storage) DeleteRemovedEntriesEnclosures() (int64, error) { } // ClearRemovedEntriesContent clears the content fields of entries marked as "removed", keeping only their metadata. -func (s *Storage) ClearRemovedEntriesContent() (int64, error) { +func (s *Storage) ClearRemovedEntriesContent(limit int) (int64, error) { query := ` UPDATE entries @@ -305,12 +305,19 @@ func (s *Storage) ClearRemovedEntriesContent() (int64, error) { title='', content=NULL, url='', - author=NULL - WHERE - status=$1 AND content IS NOT NULL + author=NULL, + comments_url=NULL, + document_vectors=NULL + WHERE id IN ( + SELECT id + FROM entries + WHERE status = $1 AND content IS NOT NULL + ORDER BY id ASC + LIMIT $2 + ) ` - result, err := s.db.Exec(query, model.EntryStatusRemoved) + result, err := s.db.Exec(query, model.EntryStatusRemoved, limit) if err != nil { return 0, fmt.Errorf(`store: unable to clear content from removed entries: %v`, err) }