From 86c58e11f62a1ad787da8e52e9f5444c0b9ceeb7 Mon Sep 17 00:00:00 2001 From: jvoisin Date: Wed, 18 Jun 2025 15:06:20 +0200 Subject: [PATCH] perf(reader): use a non-cryptographic hash when possible There is no need to use SHA256 everywhere, especially on small inputs where we don't care about its cryptographic properties. We're using FNV as it's the faster available hash in go's standard library, and we're picking its "a" version as it's slightly better avalanche characteristics, which are relevant for small inputs. This commit has the side-effect of invalidating all favicons saved in the database, which is desirable to benefit from the resize process implemented in 777d0dd2, as it didn't apply retro-actively. We're also making use of hex.EncodeToString instead of fmt.Sprintf, as it's marginally faster. Note that we can't change the usage of sha256 for feed.Hash as it's used to deduplicate entries in the database. --- internal/crypto/crypto.go | 14 +++++++++----- internal/reader/atom/atom_03_adapter.go | 2 +- internal/reader/atom/atom_10_adapter.go | 2 +- internal/reader/json/adapter.go | 2 +- internal/reader/rdf/adapter.go | 2 +- internal/reader/rss/adapter.go | 6 +++--- 6 files changed, 16 insertions(+), 12 deletions(-) diff --git a/internal/crypto/crypto.go b/internal/crypto/crypto.go index 329c86e7..79a838ab 100644 --- a/internal/crypto/crypto.go +++ b/internal/crypto/crypto.go @@ -10,18 +10,22 @@ import ( "crypto/subtle" "encoding/hex" "fmt" + "hash/fnv" "golang.org/x/crypto/bcrypt" ) -// HashFromBytes returns a SHA-256 checksum of the input. +// HashFromBytes returns a non-cryptographic checksum of the input. func HashFromBytes(value []byte) string { - return fmt.Sprintf("%x", sha256.Sum256(value)) + h := fnv.New128a() + h.Write(value) + return hex.EncodeToString(h.Sum(nil)) } -// Hash returns a SHA-256 checksum of a string. -func Hash(value string) string { - return HashFromBytes([]byte(value)) +// SHA256 returns a SHA-256 checksum of a string. +func SHA256(value string) string { + h := sha256.Sum256([]byte(value)) + return hex.EncodeToString(h[:]) } // GenerateRandomBytes returns random bytes. diff --git a/internal/reader/atom/atom_03_adapter.go b/internal/reader/atom/atom_03_adapter.go index 02d78ec8..daa877d8 100644 --- a/internal/reader/atom/atom_03_adapter.go +++ b/internal/reader/atom/atom_03_adapter.go @@ -103,7 +103,7 @@ func (a *Atom03Adapter) BuildFeed(baseURL string) *model.Feed { // Generate the entry hash. for _, value := range []string{atomEntry.ID, atomEntry.Links.OriginalLink()} { if value != "" { - entry.Hash = crypto.Hash(value) + entry.Hash = crypto.SHA256(value) break } } diff --git a/internal/reader/atom/atom_10_adapter.go b/internal/reader/atom/atom_10_adapter.go index ce80e814..6c7d4043 100644 --- a/internal/reader/atom/atom_10_adapter.go +++ b/internal/reader/atom/atom_10_adapter.go @@ -152,7 +152,7 @@ func (a *Atom10Adapter) populateEntries(siteURL string) model.Entries { // Generate the entry hash. for _, value := range []string{atomEntry.ID, atomEntry.Links.OriginalLink()} { if value != "" { - entry.Hash = crypto.Hash(value) + entry.Hash = crypto.SHA256(value) break } } diff --git a/internal/reader/json/adapter.go b/internal/reader/json/adapter.go index 89aa93ea..8d63e518 100644 --- a/internal/reader/json/adapter.go +++ b/internal/reader/json/adapter.go @@ -161,7 +161,7 @@ func (j *JSONAdapter) BuildFeed(baseURL string) *model.Feed { for _, value := range []string{item.ID, item.URL, item.ContentText + item.ContentHTML + item.Summary} { value = strings.TrimSpace(value) if value != "" { - entry.Hash = crypto.Hash(value) + entry.Hash = crypto.SHA256(value) break } } diff --git a/internal/reader/rdf/adapter.go b/internal/reader/rdf/adapter.go index 13d8f06f..9b5240c0 100644 --- a/internal/reader/rdf/adapter.go +++ b/internal/reader/rdf/adapter.go @@ -80,7 +80,7 @@ func (r *RDFAdapter) BuildFeed(baseURL string) *model.Feed { hashValue = item.Title + item.Description // Fallback to the title and description if the link is empty. } - entry.Hash = crypto.Hash(hashValue) + entry.Hash = crypto.SHA256(hashValue) // Populate the entry date. entry.Date = time.Now() diff --git a/internal/reader/rss/adapter.go b/internal/reader/rss/adapter.go index 78dd156d..845b103b 100644 --- a/internal/reader/rss/adapter.go +++ b/internal/reader/rss/adapter.go @@ -104,11 +104,11 @@ func (r *RSSAdapter) BuildFeed(baseURL string) *model.Feed { // Generate the entry hash. switch { case item.GUID.Data != "": - entry.Hash = crypto.Hash(item.GUID.Data) + entry.Hash = crypto.SHA256(item.GUID.Data) case entryURL != "": - entry.Hash = crypto.Hash(entryURL) + entry.Hash = crypto.SHA256(entryURL) default: - entry.Hash = crypto.Hash(entry.Title + entry.Content) + entry.Hash = crypto.SHA256(entry.Title + entry.Content) } // Find CommentsURL if defined.