1
0
Fork 0
mirror of https://github.com/miniflux/v2.git synced 2025-06-27 16:36:00 +00:00

feat(sanitizer): improve text truncation with better space handling

This commit is contained in:
Frédéric Guillot 2025-02-06 21:17:10 -08:00
parent e777f12490
commit f2f60a8f73
2 changed files with 53 additions and 2 deletions

View file

@ -9,8 +9,9 @@ func TruncateHTML(input string, max int) string {
text := StripTags(input) text := StripTags(input)
text = strings.ReplaceAll(text, "\n", " ") text = strings.ReplaceAll(text, "\n", " ")
text = strings.ReplaceAll(text, "\t", " ") text = strings.ReplaceAll(text, "\t", " ")
text = strings.ReplaceAll(text, " ", " ")
text = strings.TrimSpace(text) // Collapse multiple spaces into a single space
text = strings.Join(strings.Fields(text), " ")
// Convert to runes to be safe with unicode // Convert to runes to be safe with unicode
runes := []rune(text) runes := []rune(text)

View file

@ -62,3 +62,53 @@ func TestTruncateHTMLWithMultilineTextLowerThanLimit(t *testing.T) {
t.Errorf(`Wrong output: %q != %q`, expected, output) t.Errorf(`Wrong output: %q != %q`, expected, output)
} }
} }
func TestTruncateHTMLWithMultipleSpaces(t *testing.T) {
tests := []struct {
name string
input string
maxLen int
expected string
}{
{
name: "multiple spaces",
input: "hello world test",
maxLen: 20,
expected: "hello world test",
},
{
name: "tabs and newlines",
input: "hello\t\tworld\n\ntest",
maxLen: 20,
expected: "hello world test",
},
{
name: "truncation with unicode",
input: "hello world 你好",
maxLen: 11,
expected: "hello world…",
},
{
name: "html stripping",
input: "<p>hello <b>world</b> test</p>",
maxLen: 20,
expected: "hello world test",
},
{
name: "no truncation needed",
input: "hello world",
maxLen: 20,
expected: "hello world",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := TruncateHTML(tt.input, tt.maxLen)
if result != tt.expected {
t.Errorf("TruncateHTML(%q, %d) = %q, want %q",
tt.input, tt.maxLen, result, tt.expected)
}
})
}
}