From 5c26e06780b61135a2de47e6963111ba1fcb83cd Mon Sep 17 00:00:00 2001 From: jvoisin Date: Thu, 14 Aug 2025 17:40:10 +0200 Subject: [PATCH] feat(entry): keep only metadata for removed entries This should significantly shrink the space taken by miniflux' database: ```sql miniflux=# SELECT relname, pg_size_pretty(pg_total_relation_size(relname::regclass)) FROM pg_catalog.pg_statio_user_tables ORDER BY pg_total_relation_size(relname::regclass) DESC; relname | pg_size_pretty ----------------------+---------------- entries | 158 MB icons | 3312 kB enclosures | 1568 kB sessions | 1048 kB feeds | 288 kB feed_icons | 72 kB users | 64 kB user_sessions | 64 kB categories | 48 kB integrations | 32 kB api_keys | 32 kB webauthn_credentials | 24 kB schema_version | 16 kB acme_cache | 16 kB (14 rows) miniflux=# ``` This should close #3524 --- internal/cli/cleanup_tasks.go | 7 ++++++ internal/storage/entry.go | 45 ++++++++++++++++++++++++++++++++--- 2 files changed, 49 insertions(+), 3 deletions(-) diff --git a/internal/cli/cleanup_tasks.go b/internal/cli/cleanup_tasks.go index adeb280b..ef071e3e 100644 --- a/internal/cli/cleanup_tasks.go +++ b/internal/cli/cleanup_tasks.go @@ -46,4 +46,11 @@ func runCleanupTasks(store *storage.Storage) { metric.ArchiveEntriesDuration.WithLabelValues(model.EntryStatusUnread).Observe(time.Since(startTime).Seconds()) } } + + if rowsAffected, err := store.DeleteContentRemovedEntries(); err != nil { + slog.Error("Unable to delete the content of removed entries", slog.Any("error", err)) + } else { + slog.Info("Deleting content of removed entries completed", + slog.Int64("removed_entries_content_removed", rowsAffected)) + } } diff --git a/internal/storage/entry.go b/internal/storage/entry.go index 9edc94c5..2b3c5dd1 100644 --- a/internal/storage/entry.go +++ b/internal/storage/entry.go @@ -258,8 +258,8 @@ func (s *Storage) GetReadTime(feedID int64, entryHash string) int { return result } -// cleanupEntries deletes from the database entries marked as "removed" and not visible anymore in the feed. -func (s *Storage) cleanupEntries(feedID int64, entryHashes []string) error { +// deleteRemovedNonexistentEntries deletes from the database entries marked as "removed" and not visible anymore in the feed. +func (s *Storage) deleteRemovedNonexistentEntries(feedID int64, entryHashes []string) error { query := ` DELETE FROM entries @@ -275,6 +275,45 @@ func (s *Storage) cleanupEntries(feedID int64, entryHashes []string) error { return nil } +// deleteContentRemovedEntries deletes the content and corresponding enclosures +// of entries marked as "removed", and only keeps their metadata. +func (s *Storage) DeleteContentRemovedEntries() (int64, error) { + query := ` + DELETE FROM + enclosures + WHERE + enclosures.entry_id IN + (SELECT id FROM entries WHERE status=$1) + ` + if _, err := s.db.Exec(query, model.EntryStatusRemoved); err != nil { + return 0, fmt.Errorf(`store: unable to delete enclosures from removed entries: %v`, err) + } + + query = ` + UPDATE + entries + SET + title='', + content=NULL, + url='', + author=NULL + WHERE + status=$1 + ` + + result, err := s.db.Exec(query, model.EntryStatusRemoved) + if err != nil { + return 0, fmt.Errorf(`store: unable to delete removed entries: %v`, err) + } + + count, err := result.RowsAffected() + if err != nil { + return 0, fmt.Errorf(`store: unable to get the number of rows affected while deleting content from removed entries: %v`, err) + } + + return count, nil +} + // RefreshFeedEntries updates feed entries while refreshing a feed. func (s *Storage) RefreshFeedEntries(userID, feedID int64, entries model.Entries, updateExistingEntries bool) (newEntries model.Entries, err error) { entryHashes := make([]string, 0, len(entries)) @@ -322,7 +361,7 @@ func (s *Storage) RefreshFeedEntries(userID, feedID int64, entries model.Entries } go func() { - if err := s.cleanupEntries(feedID, entryHashes); err != nil { + if err := s.deleteRemovedNonexistentEntries(feedID, entryHashes); err != nil { slog.Error("Unable to cleanup entries", slog.Int64("user_id", userID), slog.Int64("feed_id", feedID),