From 58178d90cbb502a3dcb992e619d8e6c44be4d0df Mon Sep 17 00:00:00 2001 From: jvoisin Date: Sat, 7 Dec 2024 23:03:56 +0100 Subject: [PATCH] Refactor Sanitize - Use `token.String()` instead of `html.EscapeString(token.Data)` - Refactor conditions to highlight their similitude, enabling further refactoring This refactoring brings forth at least one bug: `tagStack` is never emptied. --- internal/reader/sanitizer/sanitizer.go | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/internal/reader/sanitizer/sanitizer.go b/internal/reader/sanitizer/sanitizer.go index e8cff23f..3060e250 100644 --- a/internal/reader/sanitizer/sanitizer.go +++ b/internal/reader/sanitizer/sanitizer.go @@ -111,7 +111,7 @@ func Sanitize(baseURL, input string) string { continue } - buffer.WriteString(html.EscapeString(token.Data)) + buffer.WriteString(token.String()) case html.StartTagToken: parentTag = tagName @@ -121,36 +121,42 @@ func Sanitize(baseURL, input string) string { if isBlockedTag(tagName) || slices.ContainsFunc(token.Attr, func(attr html.Attribute) bool { return attr.Key == "hidden" }) { blockedStack = append(blockedStack, tagName) - } else if len(blockedStack) == 0 && isValidTag(tagName) { - attrNames, htmlAttributes := sanitizeAttributes(baseURL, tagName, token.Attr) + continue + } + if len(blockedStack) == 0 && isValidTag(tagName) { + attrNames, htmlAttributes := sanitizeAttributes(baseURL, tagName, token.Attr) if hasRequiredAttributes(tagName, attrNames) { if len(attrNames) > 0 { buffer.WriteString("<" + tagName + " " + htmlAttributes + ">") } else { - buffer.WriteString("<" + tagName + ">") + buffer.WriteString(token.String()) } tagStack = append(tagStack, tagName) } } case html.EndTagToken: - if len(blockedStack) > 0 && blockedStack[len(blockedStack)-1] == tagName { - blockedStack = blockedStack[:len(blockedStack)-1] - } else if len(blockedStack) == 0 && isValidTag(tagName) && slices.Contains(tagStack, tagName) { - buffer.WriteString("") + if len(blockedStack) == 0 { + if isValidTag(tagName) && slices.Contains(tagStack, tagName) { + buffer.WriteString(token.String()) + } + } else { + if blockedStack[len(blockedStack)-1] == tagName { + blockedStack = blockedStack[:len(blockedStack)-1] + } } case html.SelfClosingTagToken: if isPixelTracker(tagName, token.Attr) { continue } - if isValidTag(tagName) && len(blockedStack) == 0 { + if len(blockedStack) == 0 && isValidTag(tagName) { attrNames, htmlAttributes := sanitizeAttributes(baseURL, tagName, token.Attr) if hasRequiredAttributes(tagName, attrNames) { if len(attrNames) > 0 { buffer.WriteString("<" + tagName + " " + htmlAttributes + "/>") } else { - buffer.WriteString("<" + tagName + "/>") + buffer.WriteString(token.String()) } } }