1
0
Fork 0
mirror of https://github.com/miniflux/v2.git synced 2025-08-26 18:21:01 +00:00

feat(entry): keep only metadata for removed entries

This should significantly shrink the space taken by miniflux' database:

```sql
miniflux=#
SELECT
  relname, pg_size_pretty(pg_total_relation_size(relname::regclass))
FROM
  pg_catalog.pg_statio_user_tables
ORDER BY
  pg_total_relation_size(relname::regclass)
DESC;

       relname        | pg_size_pretty
----------------------+----------------
 entries              | 158 MB
 icons                | 3312 kB
 enclosures           | 1568 kB
 sessions             | 1048 kB
 feeds                | 288 kB
 feed_icons           | 72 kB
 users                | 64 kB
 user_sessions        | 64 kB
 categories           | 48 kB
 integrations         | 32 kB
 api_keys             | 32 kB
 webauthn_credentials | 24 kB
 schema_version       | 16 kB
 acme_cache           | 16 kB
(14 rows)

miniflux=#
```

This should close #3524
This commit is contained in:
jvoisin 2025-08-14 17:40:10 +02:00 committed by Frédéric Guillot
parent 9e722839b5
commit 5c26e06780
2 changed files with 49 additions and 3 deletions

View file

@ -46,4 +46,11 @@ func runCleanupTasks(store *storage.Storage) {
metric.ArchiveEntriesDuration.WithLabelValues(model.EntryStatusUnread).Observe(time.Since(startTime).Seconds())
}
}
if rowsAffected, err := store.DeleteContentRemovedEntries(); err != nil {
slog.Error("Unable to delete the content of removed entries", slog.Any("error", err))
} else {
slog.Info("Deleting content of removed entries completed",
slog.Int64("removed_entries_content_removed", rowsAffected))
}
}

View file

@ -258,8 +258,8 @@ func (s *Storage) GetReadTime(feedID int64, entryHash string) int {
return result
}
// cleanupEntries deletes from the database entries marked as "removed" and not visible anymore in the feed.
func (s *Storage) cleanupEntries(feedID int64, entryHashes []string) error {
// deleteRemovedNonexistentEntries deletes from the database entries marked as "removed" and not visible anymore in the feed.
func (s *Storage) deleteRemovedNonexistentEntries(feedID int64, entryHashes []string) error {
query := `
DELETE FROM
entries
@ -275,6 +275,45 @@ func (s *Storage) cleanupEntries(feedID int64, entryHashes []string) error {
return nil
}
// deleteContentRemovedEntries deletes the content and corresponding enclosures
// of entries marked as "removed", and only keeps their metadata.
func (s *Storage) DeleteContentRemovedEntries() (int64, error) {
query := `
DELETE FROM
enclosures
WHERE
enclosures.entry_id IN
(SELECT id FROM entries WHERE status=$1)
`
if _, err := s.db.Exec(query, model.EntryStatusRemoved); err != nil {
return 0, fmt.Errorf(`store: unable to delete enclosures from removed entries: %v`, err)
}
query = `
UPDATE
entries
SET
title='',
content=NULL,
url='',
author=NULL
WHERE
status=$1
`
result, err := s.db.Exec(query, model.EntryStatusRemoved)
if err != nil {
return 0, fmt.Errorf(`store: unable to delete removed entries: %v`, err)
}
count, err := result.RowsAffected()
if err != nil {
return 0, fmt.Errorf(`store: unable to get the number of rows affected while deleting content from removed entries: %v`, err)
}
return count, nil
}
// RefreshFeedEntries updates feed entries while refreshing a feed.
func (s *Storage) RefreshFeedEntries(userID, feedID int64, entries model.Entries, updateExistingEntries bool) (newEntries model.Entries, err error) {
entryHashes := make([]string, 0, len(entries))
@ -322,7 +361,7 @@ func (s *Storage) RefreshFeedEntries(userID, feedID int64, entries model.Entries
}
go func() {
if err := s.cleanupEntries(feedID, entryHashes); err != nil {
if err := s.deleteRemovedNonexistentEntries(feedID, entryHashes); err != nil {
slog.Error("Unable to cleanup entries",
slog.Int64("user_id", userID),
slog.Int64("feed_id", feedID),