diff --git a/internal/reader/sanitizer/truncate.go b/internal/reader/sanitizer/truncate.go index bac2b453..c6afdd75 100644 --- a/internal/reader/sanitizer/truncate.go +++ b/internal/reader/sanitizer/truncate.go @@ -9,8 +9,9 @@ func TruncateHTML(input string, max int) string { text := StripTags(input) text = strings.ReplaceAll(text, "\n", " ") text = strings.ReplaceAll(text, "\t", " ") - text = strings.ReplaceAll(text, " ", " ") - text = strings.TrimSpace(text) + + // Collapse multiple spaces into a single space + text = strings.Join(strings.Fields(text), " ") // Convert to runes to be safe with unicode runes := []rune(text) diff --git a/internal/reader/sanitizer/truncate_test.go b/internal/reader/sanitizer/truncate_test.go index bb50f039..0cd7fcdb 100644 --- a/internal/reader/sanitizer/truncate_test.go +++ b/internal/reader/sanitizer/truncate_test.go @@ -62,3 +62,53 @@ func TestTruncateHTMLWithMultilineTextLowerThanLimit(t *testing.T) { t.Errorf(`Wrong output: %q != %q`, expected, output) } } + +func TestTruncateHTMLWithMultipleSpaces(t *testing.T) { + tests := []struct { + name string + input string + maxLen int + expected string + }{ + { + name: "multiple spaces", + input: "hello world test", + maxLen: 20, + expected: "hello world test", + }, + { + name: "tabs and newlines", + input: "hello\t\tworld\n\ntest", + maxLen: 20, + expected: "hello world test", + }, + { + name: "truncation with unicode", + input: "hello world 你好", + maxLen: 11, + expected: "hello world…", + }, + { + name: "html stripping", + input: "

hello world test

", + maxLen: 20, + expected: "hello world test", + }, + { + name: "no truncation needed", + input: "hello world", + maxLen: 20, + expected: "hello world", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := TruncateHTML(tt.input, tt.maxLen) + if result != tt.expected { + t.Errorf("TruncateHTML(%q, %d) = %q, want %q", + tt.input, tt.maxLen, result, tt.expected) + } + }) + } +}