From 9a9a271b1f30e11e1c8e72a03eba3985474c7647 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Guillot?= Date: Sat, 6 Feb 2021 14:01:41 -0800 Subject: [PATCH] Limit full-text search indexation to first 500K characters tsvector has a size limit of 1MB. See https://www.postgresql.org/docs/13/textsearch-limitations.html Input text is now truncated to avoid this error: "pq: string is too long for tsvector (1057834 bytes, max 1048575 bytes)" --- storage/entry.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/storage/entry.go b/storage/entry.go index deedb728..ed115d66 100644 --- a/storage/entry.go +++ b/storage/entry.go @@ -89,7 +89,7 @@ func (s *Storage) UpdateEntryContent(entry *model.Entry) error { UPDATE entries SET - document_vectors = setweight(to_tsvector(substring(coalesce(title, '') for 1000000)), 'A') || setweight(to_tsvector(substring(coalesce(content, '') for 1000000)), 'B') + document_vectors = setweight(to_tsvector(left(coalesce(title, ''), 500000)), 'A') || setweight(to_tsvector(left(coalesce(content, ''), 500000)), 'B') WHERE id=$1 AND user_id=$2 ` @@ -133,7 +133,7 @@ func (s *Storage) createEntry(tx *sql.Tx, entry *model.Entry) error { $9, $10, now(), - setweight(to_tsvector(substring(coalesce($1, '') for 1000000)), 'A') || setweight(to_tsvector(substring(coalesce($6, '') for 1000000)), 'B') + setweight(to_tsvector(left(coalesce($1, ''), 500000)), 'A') || setweight(to_tsvector(left(coalesce($6, ''), 500000)), 'B') ) RETURNING id, status @@ -182,7 +182,7 @@ func (s *Storage) updateEntry(tx *sql.Tx, entry *model.Entry) error { content=$4, author=$5, reading_time=$6, - document_vectors = setweight(to_tsvector(substring(coalesce($1, '') for 1000000)), 'A') || setweight(to_tsvector(substring(coalesce($4, '') for 1000000)), 'B') + document_vectors = setweight(to_tsvector(left(coalesce($1, ''), 500000)), 'A') || setweight(to_tsvector(left(coalesce($4, ''), 500000)), 'B') WHERE user_id=$7 AND feed_id=$8 AND hash=$9 RETURNING