From 828a4334db7861de849fd9b6ee361b54d50b0580 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Guillot?= Date: Tue, 6 May 2025 21:09:57 -0700 Subject: [PATCH] fix(sanitizer): MathML tags are not fully supported by `golang.org/x/net/html` See https://github.com/golang/net/blob/master/html/atom/gen.go and https://github.com/golang/net/blob/master/html/atom/table.go --- internal/reader/sanitizer/sanitizer.go | 12 ++++++++++-- internal/reader/sanitizer/sanitizer_test.go | 10 ++++++++++ 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/internal/reader/sanitizer/sanitizer.go b/internal/reader/sanitizer/sanitizer.go index df917572..994f660e 100644 --- a/internal/reader/sanitizer/sanitizer.go +++ b/internal/reader/sanitizer/sanitizer.go @@ -82,7 +82,7 @@ var ( "annotation": {}, "annotation-xml": {}, "maction": {}, - "math": {}, + "math": {"xmlns"}, "merror": {}, "mfrac": {}, "mi": {}, @@ -131,7 +131,15 @@ func Sanitize(baseURL, input string) string { } token := tokenizer.Token() - tagName := token.DataAtom.String() + + // Note: MathML elements are not fully supported by golang.org/x/net/html. + // See https://github.com/golang/net/blob/master/html/atom/gen.go + // and https://github.com/golang/net/blob/master/html/atom/table.go + tagName := token.Data + if tagName == "" { + continue + } + switch token.Type { case html.TextToken: if len(blockedStack) > 0 { diff --git a/internal/reader/sanitizer/sanitizer_test.go b/internal/reader/sanitizer/sanitizer_test.go index d26dafc7..5cd49353 100644 --- a/internal/reader/sanitizer/sanitizer_test.go +++ b/internal/reader/sanitizer/sanitizer_test.go @@ -705,3 +705,13 @@ func TestAttributesAreStripped(t *testing.T) { t.Errorf(`Wrong output: "%s" != "%s"`, expected, output) } } + +func TestMathML(t *testing.T) { + input := `x2` + expected := `x2` + output := Sanitize("http://example.org/", input) + + if expected != output { + t.Errorf(`Wrong output: "%s" != "%s"`, expected, output) + } +}