From d53fd17e10938b443719635199daf0f2b382e4ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Guillot?= Date: Mon, 9 Jun 2025 20:24:12 -0700 Subject: [PATCH] feat(sanitizer): validate MathML XML namespace --- internal/reader/sanitizer/sanitizer.go | 4 ++++ internal/reader/sanitizer/sanitizer_test.go | 10 ++++++++++ 2 files changed, 14 insertions(+) diff --git a/internal/reader/sanitizer/sanitizer.go b/internal/reader/sanitizer/sanitizer.go index fab21fa9..5c0026f1 100644 --- a/internal/reader/sanitizer/sanitizer.go +++ b/internal/reader/sanitizer/sanitizer.go @@ -234,6 +234,10 @@ func sanitizeAttributes(baseURL, tagName string, attributes []html.Attribute, sa continue } + if tagName == "math" && attribute.Key == "xmlns" && value != "http://www.w3.org/1998/Math/MathML" { + value = "http://www.w3.org/1998/Math/MathML" + } + if tagName == "img" && attribute.Key == "fetchpriority" { if !isValidFetchPriorityValue(value) { continue diff --git a/internal/reader/sanitizer/sanitizer_test.go b/internal/reader/sanitizer/sanitizer_test.go index afe2e928..0628ec9d 100644 --- a/internal/reader/sanitizer/sanitizer_test.go +++ b/internal/reader/sanitizer/sanitizer_test.go @@ -829,3 +829,13 @@ func TestMathML(t *testing.T) { t.Errorf(`Wrong output: "%s" != "%s"`, expected, output) } } + +func TestInvalidMathMLXMLNamespace(t *testing.T) { + input := `x2` + expected := `x2` + output := SanitizeHTMLWithDefaultOptions("http://example.org/", input) + + if expected != output { + t.Errorf(`Wrong output: "%s" != "%s"`, expected, output) + } +}