1
0
Fork 0
mirror of https://github.com/miniflux/v2.git synced 2025-10-10 19:32:06 +00:00

First commit

This commit is contained in:
Frédéric Guillot 2017-11-19 21:10:04 -08:00
commit 8ffb773f43
2121 changed files with 1118910 additions and 0 deletions

131
vendor/github.com/tdewolff/minify/html/buffer.go generated vendored Normal file
View file

@ -0,0 +1,131 @@
package html // import "github.com/tdewolff/minify/html"
import (
"github.com/tdewolff/parse"
"github.com/tdewolff/parse/html"
)
// Token is a single token unit with an attribute value (if given) and hash of the data.
type Token struct {
html.TokenType
Hash html.Hash
Data []byte
Text []byte
AttrVal []byte
Traits traits
}
// TokenBuffer is a buffer that allows for token look-ahead.
type TokenBuffer struct {
l *html.Lexer
buf []Token
pos int
attrBuffer []*Token
}
// NewTokenBuffer returns a new TokenBuffer.
func NewTokenBuffer(l *html.Lexer) *TokenBuffer {
return &TokenBuffer{
l: l,
buf: make([]Token, 0, 8),
}
}
func (z *TokenBuffer) read(t *Token) {
t.TokenType, t.Data = z.l.Next()
t.Text = z.l.Text()
if t.TokenType == html.AttributeToken {
t.AttrVal = z.l.AttrVal()
if len(t.AttrVal) > 1 && (t.AttrVal[0] == '"' || t.AttrVal[0] == '\'') {
t.AttrVal = parse.TrimWhitespace(t.AttrVal[1 : len(t.AttrVal)-1]) // quotes will be readded in attribute loop if necessary
}
t.Hash = html.ToHash(t.Text)
t.Traits = attrMap[t.Hash]
} else if t.TokenType == html.StartTagToken || t.TokenType == html.EndTagToken {
t.AttrVal = nil
t.Hash = html.ToHash(t.Text)
t.Traits = tagMap[t.Hash]
} else {
t.AttrVal = nil
t.Hash = 0
t.Traits = 0
}
}
// Peek returns the ith element and possibly does an allocation.
// Peeking past an error will panic.
func (z *TokenBuffer) Peek(pos int) *Token {
pos += z.pos
if pos >= len(z.buf) {
if len(z.buf) > 0 && z.buf[len(z.buf)-1].TokenType == html.ErrorToken {
return &z.buf[len(z.buf)-1]
}
c := cap(z.buf)
d := len(z.buf) - z.pos
p := pos - z.pos + 1 // required peek length
var buf []Token
if 2*p > c {
buf = make([]Token, 0, 2*c+p)
} else {
buf = z.buf
}
copy(buf[:d], z.buf[z.pos:])
buf = buf[:p]
pos -= z.pos
for i := d; i < p; i++ {
z.read(&buf[i])
if buf[i].TokenType == html.ErrorToken {
buf = buf[:i+1]
pos = i
break
}
}
z.pos, z.buf = 0, buf
}
return &z.buf[pos]
}
// Shift returns the first element and advances position.
func (z *TokenBuffer) Shift() *Token {
if z.pos >= len(z.buf) {
t := &z.buf[:1][0]
z.read(t)
return t
}
t := &z.buf[z.pos]
z.pos++
return t
}
// Attributes extracts the gives attribute hashes from a tag.
// It returns in the same order pointers to the requested token data or nil.
func (z *TokenBuffer) Attributes(hashes ...html.Hash) []*Token {
n := 0
for {
if t := z.Peek(n); t.TokenType != html.AttributeToken {
break
}
n++
}
if len(hashes) > cap(z.attrBuffer) {
z.attrBuffer = make([]*Token, len(hashes))
} else {
z.attrBuffer = z.attrBuffer[:len(hashes)]
for i := range z.attrBuffer {
z.attrBuffer[i] = nil
}
}
for i := z.pos; i < z.pos+n; i++ {
attr := &z.buf[i]
for j, hash := range hashes {
if hash == attr.Hash {
z.attrBuffer[j] = attr
}
}
}
return z.attrBuffer
}

37
vendor/github.com/tdewolff/minify/html/buffer_test.go generated vendored Normal file
View file

@ -0,0 +1,37 @@
package html // import "github.com/tdewolff/minify/html"
import (
"bytes"
"testing"
"github.com/tdewolff/parse/html"
"github.com/tdewolff/test"
)
func TestBuffer(t *testing.T) {
// 0 12 3 45 6 7 8 9 0
s := `<p><a href="//url">text</a>text<!--comment--></p>`
z := NewTokenBuffer(html.NewLexer(bytes.NewBufferString(s)))
tok := z.Shift()
test.That(t, tok.Hash == html.P, "first token is <p>")
test.That(t, z.pos == 0, "shift first token and restore position")
test.That(t, len(z.buf) == 0, "shift first token and restore length")
test.That(t, z.Peek(2).Hash == html.Href, "third token is href")
test.That(t, z.pos == 0, "don't change position after peeking")
test.That(t, len(z.buf) == 3, "two tokens after peeking")
test.That(t, z.Peek(8).Hash == html.P, "ninth token is <p>")
test.That(t, z.pos == 0, "don't change position after peeking")
test.That(t, len(z.buf) == 9, "nine tokens after peeking")
test.That(t, z.Peek(9).TokenType == html.ErrorToken, "tenth token is an error")
test.That(t, z.Peek(9) == z.Peek(10), "tenth and eleventh tokens are EOF")
test.That(t, len(z.buf) == 10, "ten tokens after peeking")
_ = z.Shift()
tok = z.Shift()
test.That(t, tok.Hash == html.A, "third token is <a>")
test.That(t, z.pos == 2, "don't change position after peeking")
}

463
vendor/github.com/tdewolff/minify/html/html.go generated vendored Normal file
View file

@ -0,0 +1,463 @@
// Package html minifies HTML5 following the specifications at http://www.w3.org/TR/html5/syntax.html.
package html // import "github.com/tdewolff/minify/html"
import (
"bytes"
"io"
"github.com/tdewolff/minify"
"github.com/tdewolff/parse"
"github.com/tdewolff/parse/buffer"
"github.com/tdewolff/parse/html"
)
var (
gtBytes = []byte(">")
isBytes = []byte("=")
spaceBytes = []byte(" ")
doctypeBytes = []byte("<!doctype html>")
jsMimeBytes = []byte("text/javascript")
cssMimeBytes = []byte("text/css")
htmlMimeBytes = []byte("text/html")
svgMimeBytes = []byte("image/svg+xml")
mathMimeBytes = []byte("application/mathml+xml")
dataSchemeBytes = []byte("data:")
jsSchemeBytes = []byte("javascript:")
httpBytes = []byte("http")
)
////////////////////////////////////////////////////////////////
// DefaultMinifier is the default minifier.
var DefaultMinifier = &Minifier{}
// Minifier is an HTML minifier.
type Minifier struct {
KeepConditionalComments bool
KeepDefaultAttrVals bool
KeepDocumentTags bool
KeepEndTags bool
KeepWhitespace bool
}
// Minify minifies HTML data, it reads from r and writes to w.
func Minify(m *minify.M, w io.Writer, r io.Reader, params map[string]string) error {
return DefaultMinifier.Minify(m, w, r, params)
}
// Minify minifies HTML data, it reads from r and writes to w.
func (o *Minifier) Minify(m *minify.M, w io.Writer, r io.Reader, _ map[string]string) error {
var rawTagHash html.Hash
var rawTagMediatype []byte
omitSpace := true // if true the next leading space is omitted
inPre := false
defaultScriptType := jsMimeBytes
defaultScriptParams := map[string]string(nil)
defaultStyleType := cssMimeBytes
defaultStyleParams := map[string]string(nil)
defaultInlineStyleParams := map[string]string{"inline": "1"}
attrMinifyBuffer := buffer.NewWriter(make([]byte, 0, 64))
attrByteBuffer := make([]byte, 0, 64)
l := html.NewLexer(r)
defer l.Restore()
tb := NewTokenBuffer(l)
for {
t := *tb.Shift()
SWITCH:
switch t.TokenType {
case html.ErrorToken:
if l.Err() == io.EOF {
return nil
}
return l.Err()
case html.DoctypeToken:
if _, err := w.Write(doctypeBytes); err != nil {
return err
}
case html.CommentToken:
if o.KeepConditionalComments && len(t.Text) > 6 && (bytes.HasPrefix(t.Text, []byte("[if ")) || bytes.Equal(t.Text, []byte("[endif]"))) {
// [if ...] is always 7 or more characters, [endif] is only encountered for downlevel-revealed
// see https://msdn.microsoft.com/en-us/library/ms537512(v=vs.85).aspx#syntax
if bytes.HasPrefix(t.Data, []byte("<!--[if ")) { // downlevel-hidden
begin := bytes.IndexByte(t.Data, '>') + 1
end := len(t.Data) - len("<![endif]-->")
if _, err := w.Write(t.Data[:begin]); err != nil {
return err
}
if err := o.Minify(m, w, buffer.NewReader(t.Data[begin:end]), nil); err != nil {
return err
}
if _, err := w.Write(t.Data[end:]); err != nil {
return err
}
} else if _, err := w.Write(t.Data); err != nil { // downlevel-revealed
return err
}
}
case html.SvgToken:
if err := m.MinifyMimetype(svgMimeBytes, w, buffer.NewReader(t.Data), nil); err != nil {
if err != minify.ErrNotExist {
return err
} else if _, err := w.Write(t.Data); err != nil {
return err
}
}
case html.MathToken:
if err := m.MinifyMimetype(mathMimeBytes, w, buffer.NewReader(t.Data), nil); err != nil {
if err != minify.ErrNotExist {
return err
} else if _, err := w.Write(t.Data); err != nil {
return err
}
}
case html.TextToken:
// CSS and JS minifiers for inline code
if rawTagHash != 0 {
if rawTagHash == html.Style || rawTagHash == html.Script || rawTagHash == html.Iframe {
var mimetype []byte
var params map[string]string
if rawTagHash == html.Iframe {
mimetype = htmlMimeBytes
} else if len(rawTagMediatype) > 0 {
mimetype, params = parse.Mediatype(rawTagMediatype)
} else if rawTagHash == html.Script {
mimetype = defaultScriptType
params = defaultScriptParams
} else if rawTagHash == html.Style {
mimetype = defaultStyleType
params = defaultStyleParams
}
if err := m.MinifyMimetype(mimetype, w, buffer.NewReader(t.Data), params); err != nil {
if err != minify.ErrNotExist {
return err
} else if _, err := w.Write(t.Data); err != nil {
return err
}
}
} else if _, err := w.Write(t.Data); err != nil {
return err
}
} else if inPre {
if _, err := w.Write(t.Data); err != nil {
return err
}
} else {
t.Data = parse.ReplaceMultipleWhitespace(t.Data)
// whitespace removal; trim left
if omitSpace && (t.Data[0] == ' ' || t.Data[0] == '\n') {
t.Data = t.Data[1:]
}
// whitespace removal; trim right
omitSpace = false
if len(t.Data) == 0 {
omitSpace = true
} else if t.Data[len(t.Data)-1] == ' ' || t.Data[len(t.Data)-1] == '\n' {
omitSpace = true
i := 0
for {
next := tb.Peek(i)
// trim if EOF, text token with leading whitespace or block token
if next.TokenType == html.ErrorToken {
t.Data = t.Data[:len(t.Data)-1]
omitSpace = false
break
} else if next.TokenType == html.TextToken {
// this only happens when a comment, doctype or phrasing end tag (only for !o.KeepWhitespace) was in between
// remove if the text token starts with a whitespace
if len(next.Data) > 0 && parse.IsWhitespace(next.Data[0]) {
t.Data = t.Data[:len(t.Data)-1]
omitSpace = false
}
break
} else if next.TokenType == html.StartTagToken || next.TokenType == html.EndTagToken {
if o.KeepWhitespace {
break
}
// remove when followed up by a block tag
if next.Traits&nonPhrasingTag != 0 {
t.Data = t.Data[:len(t.Data)-1]
omitSpace = false
break
} else if next.TokenType == html.StartTagToken {
break
}
}
i++
}
}
if _, err := w.Write(t.Data); err != nil {
return err
}
}
case html.StartTagToken, html.EndTagToken:
rawTagHash = 0
hasAttributes := false
if t.TokenType == html.StartTagToken {
if next := tb.Peek(0); next.TokenType == html.AttributeToken {
hasAttributes = true
}
if t.Traits&rawTag != 0 {
// ignore empty script and style tags
if !hasAttributes && (t.Hash == html.Script || t.Hash == html.Style) {
if next := tb.Peek(1); next.TokenType == html.EndTagToken {
tb.Shift()
tb.Shift()
break
}
}
rawTagHash = t.Hash
rawTagMediatype = nil
}
} else if t.Hash == html.Template {
omitSpace = true // EndTagToken
}
if t.Hash == html.Pre {
inPre = t.TokenType == html.StartTagToken
}
// remove superfluous tags, except for html, head and body tags when KeepDocumentTags is set
if !hasAttributes && (!o.KeepDocumentTags && (t.Hash == html.Html || t.Hash == html.Head || t.Hash == html.Body) || t.Hash == html.Colgroup) {
break
} else if t.TokenType == html.EndTagToken {
if !o.KeepEndTags {
if t.Hash == html.Thead || t.Hash == html.Tbody || t.Hash == html.Tfoot || t.Hash == html.Tr || t.Hash == html.Th || t.Hash == html.Td ||
t.Hash == html.Optgroup || t.Hash == html.Option || t.Hash == html.Dd || t.Hash == html.Dt ||
t.Hash == html.Li || t.Hash == html.Rb || t.Hash == html.Rt || t.Hash == html.Rtc || t.Hash == html.Rp {
break
} else if t.Hash == html.P {
i := 0
for {
next := tb.Peek(i)
i++
// continue if text token is empty or whitespace
if next.TokenType == html.TextToken && parse.IsAllWhitespace(next.Data) {
continue
}
if next.TokenType == html.ErrorToken || next.TokenType == html.EndTagToken && next.Traits&keepPTag == 0 || next.TokenType == html.StartTagToken && next.Traits&omitPTag != 0 {
break SWITCH // omit p end tag
}
break
}
}
}
if o.KeepWhitespace || t.Traits&objectTag != 0 {
omitSpace = false
} else if t.Traits&nonPhrasingTag != 0 {
omitSpace = true // omit spaces after block elements
}
if len(t.Data) > 3+len(t.Text) {
t.Data[2+len(t.Text)] = '>'
t.Data = t.Data[:3+len(t.Text)]
}
if _, err := w.Write(t.Data); err != nil {
return err
}
break
}
if o.KeepWhitespace || t.Traits&objectTag != 0 {
omitSpace = false
} else if t.Traits&nonPhrasingTag != 0 {
omitSpace = true // omit spaces after block elements
}
if _, err := w.Write(t.Data); err != nil {
return err
}
if hasAttributes {
if t.Hash == html.Meta {
attrs := tb.Attributes(html.Content, html.Http_Equiv, html.Charset, html.Name)
if content := attrs[0]; content != nil {
if httpEquiv := attrs[1]; httpEquiv != nil {
content.AttrVal = minify.ContentType(content.AttrVal)
if charset := attrs[2]; charset == nil && parse.EqualFold(httpEquiv.AttrVal, []byte("content-type")) && bytes.Equal(content.AttrVal, []byte("text/html;charset=utf-8")) {
httpEquiv.Text = nil
content.Text = []byte("charset")
content.Hash = html.Charset
content.AttrVal = []byte("utf-8")
} else if parse.EqualFold(httpEquiv.AttrVal, []byte("content-style-type")) {
defaultStyleType, defaultStyleParams = parse.Mediatype(content.AttrVal)
if defaultStyleParams != nil {
defaultInlineStyleParams = defaultStyleParams
defaultInlineStyleParams["inline"] = "1"
} else {
defaultInlineStyleParams = map[string]string{"inline": "1"}
}
} else if parse.EqualFold(httpEquiv.AttrVal, []byte("content-script-type")) {
defaultScriptType, defaultScriptParams = parse.Mediatype(content.AttrVal)
}
}
if name := attrs[3]; name != nil {
if parse.EqualFold(name.AttrVal, []byte("keywords")) {
content.AttrVal = bytes.Replace(content.AttrVal, []byte(", "), []byte(","), -1)
} else if parse.EqualFold(name.AttrVal, []byte("viewport")) {
content.AttrVal = bytes.Replace(content.AttrVal, []byte(" "), []byte(""), -1)
for i := 0; i < len(content.AttrVal); i++ {
if content.AttrVal[i] == '=' && i+2 < len(content.AttrVal) {
i++
if n := parse.Number(content.AttrVal[i:]); n > 0 {
minNum := minify.Number(content.AttrVal[i:i+n], -1)
if len(minNum) < n {
copy(content.AttrVal[i:i+len(minNum)], minNum)
copy(content.AttrVal[i+len(minNum):], content.AttrVal[i+n:])
content.AttrVal = content.AttrVal[:len(content.AttrVal)+len(minNum)-n]
}
i += len(minNum)
}
i-- // mitigate for-loop increase
}
}
}
}
}
} else if t.Hash == html.Script {
attrs := tb.Attributes(html.Src, html.Charset)
if attrs[0] != nil && attrs[1] != nil {
attrs[1].Text = nil
}
}
// write attributes
htmlEqualIdName := false
for {
attr := *tb.Shift()
if attr.TokenType != html.AttributeToken {
break
} else if attr.Text == nil {
continue // removed attribute
}
if t.Hash == html.A && (attr.Hash == html.Id || attr.Hash == html.Name) {
if attr.Hash == html.Id {
if name := tb.Attributes(html.Name)[0]; name != nil && bytes.Equal(attr.AttrVal, name.AttrVal) {
htmlEqualIdName = true
}
} else if htmlEqualIdName {
continue
} else if id := tb.Attributes(html.Id)[0]; id != nil && bytes.Equal(id.AttrVal, attr.AttrVal) {
continue
}
}
val := attr.AttrVal
if len(val) == 0 && (attr.Hash == html.Class ||
attr.Hash == html.Dir ||
attr.Hash == html.Id ||
attr.Hash == html.Lang ||
attr.Hash == html.Name ||
attr.Hash == html.Title ||
attr.Hash == html.Action && t.Hash == html.Form ||
attr.Hash == html.Value && t.Hash == html.Input) {
continue // omit empty attribute values
}
if attr.Traits&caselessAttr != 0 {
val = parse.ToLower(val)
if attr.Hash == html.Enctype || attr.Hash == html.Codetype || attr.Hash == html.Accept || attr.Hash == html.Type && (t.Hash == html.A || t.Hash == html.Link || t.Hash == html.Object || t.Hash == html.Param || t.Hash == html.Script || t.Hash == html.Style || t.Hash == html.Source) {
val = minify.ContentType(val)
}
}
if rawTagHash != 0 && attr.Hash == html.Type {
rawTagMediatype = parse.Copy(val)
}
// default attribute values can be omitted
if !o.KeepDefaultAttrVals && (attr.Hash == html.Type && (t.Hash == html.Script && bytes.Equal(val, []byte("text/javascript")) ||
t.Hash == html.Style && bytes.Equal(val, []byte("text/css")) ||
t.Hash == html.Link && bytes.Equal(val, []byte("text/css")) ||
t.Hash == html.Input && bytes.Equal(val, []byte("text")) ||
t.Hash == html.Button && bytes.Equal(val, []byte("submit"))) ||
attr.Hash == html.Language && t.Hash == html.Script ||
attr.Hash == html.Method && bytes.Equal(val, []byte("get")) ||
attr.Hash == html.Enctype && bytes.Equal(val, []byte("application/x-www-form-urlencoded")) ||
attr.Hash == html.Colspan && bytes.Equal(val, []byte("1")) ||
attr.Hash == html.Rowspan && bytes.Equal(val, []byte("1")) ||
attr.Hash == html.Shape && bytes.Equal(val, []byte("rect")) ||
attr.Hash == html.Span && bytes.Equal(val, []byte("1")) ||
attr.Hash == html.Clear && bytes.Equal(val, []byte("none")) ||
attr.Hash == html.Frameborder && bytes.Equal(val, []byte("1")) ||
attr.Hash == html.Scrolling && bytes.Equal(val, []byte("auto")) ||
attr.Hash == html.Valuetype && bytes.Equal(val, []byte("data")) ||
attr.Hash == html.Media && t.Hash == html.Style && bytes.Equal(val, []byte("all"))) {
continue
}
// CSS and JS minifiers for attribute inline code
if attr.Hash == html.Style {
attrMinifyBuffer.Reset()
if err := m.MinifyMimetype(defaultStyleType, attrMinifyBuffer, buffer.NewReader(val), defaultInlineStyleParams); err == nil {
val = attrMinifyBuffer.Bytes()
} else if err != minify.ErrNotExist {
return err
}
if len(val) == 0 {
continue
}
} else if len(attr.Text) > 2 && attr.Text[0] == 'o' && attr.Text[1] == 'n' {
if len(val) >= 11 && parse.EqualFold(val[:11], jsSchemeBytes) {
val = val[11:]
}
attrMinifyBuffer.Reset()
if err := m.MinifyMimetype(defaultScriptType, attrMinifyBuffer, buffer.NewReader(val), defaultScriptParams); err == nil {
val = attrMinifyBuffer.Bytes()
} else if err != minify.ErrNotExist {
return err
}
if len(val) == 0 {
continue
}
} else if len(val) > 5 && attr.Traits&urlAttr != 0 { // anchors are already handled
if parse.EqualFold(val[:4], httpBytes) {
if val[4] == ':' {
if m.URL != nil && m.URL.Scheme == "http" {
val = val[5:]
} else {
parse.ToLower(val[:4])
}
} else if (val[4] == 's' || val[4] == 'S') && val[5] == ':' {
if m.URL != nil && m.URL.Scheme == "https" {
val = val[6:]
} else {
parse.ToLower(val[:5])
}
}
} else if parse.EqualFold(val[:5], dataSchemeBytes) {
val = minify.DataURI(m, val)
}
}
if _, err := w.Write(spaceBytes); err != nil {
return err
}
if _, err := w.Write(attr.Text); err != nil {
return err
}
if len(val) > 0 && attr.Traits&booleanAttr == 0 {
if _, err := w.Write(isBytes); err != nil {
return err
}
// no quotes if possible, else prefer single or double depending on which occurs more often in value
val = html.EscapeAttrVal(&attrByteBuffer, attr.AttrVal, val)
if _, err := w.Write(val); err != nil {
return err
}
}
}
}
if _, err := w.Write(gtBytes); err != nil {
return err
}
}
}
}

408
vendor/github.com/tdewolff/minify/html/html_test.go generated vendored Normal file
View file

@ -0,0 +1,408 @@
package html // import "github.com/tdewolff/minify/html"
import (
"bytes"
"fmt"
"io"
"io/ioutil"
"net/url"
"os"
"regexp"
"testing"
"github.com/tdewolff/minify"
"github.com/tdewolff/minify/css"
"github.com/tdewolff/minify/js"
"github.com/tdewolff/minify/json"
"github.com/tdewolff/minify/svg"
"github.com/tdewolff/minify/xml"
"github.com/tdewolff/test"
)
func TestHTML(t *testing.T) {
htmlTests := []struct {
html string
expected string
}{
{`html`, `html`},
{`<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML+RDFa 1.0//EN" "http://www.w3.org/MarkUp/DTD/xhtml-rdfa-1.dtd">`, `<!doctype html>`},
{`<!-- comment -->`, ``},
{`<style><!--\ncss\n--></style>`, `<style><!--\ncss\n--></style>`},
{`<style>&</style>`, `<style>&</style>`},
{`<html><head></head><body>x</body></html>`, `x`},
{`<meta http-equiv="content-type" content="text/html; charset=utf-8">`, `<meta charset=utf-8>`},
{`<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />`, `<meta charset=utf-8>`},
{`<meta name="keywords" content="a, b">`, `<meta name=keywords content=a,b>`},
{`<meta name="viewport" content="width = 996" />`, `<meta name=viewport content="width=996">`},
{`<span attr="test"></span>`, `<span attr=test></span>`},
{`<span attr='test&apos;test'></span>`, `<span attr="test'test"></span>`},
{`<span attr="test&quot;test"></span>`, `<span attr='test"test'></span>`},
{`<span attr='test""&apos;&amp;test'></span>`, `<span attr='test""&#39;&amp;test'></span>`},
{`<span attr="test/test"></span>`, `<span attr=test/test></span>`},
{`<span>&amp;</span>`, `<span>&amp;</span>`},
{`<span clear=none method=GET></span>`, `<span></span>`},
{`<span onload="javascript:x;"></span>`, `<span onload=x;></span>`},
{`<span selected="selected"></span>`, `<span selected></span>`},
{`<noscript><html><img id="x"></noscript>`, `<noscript><img id=x></noscript>`},
{`<body id="main"></body>`, `<body id=main>`},
{`<link href="data:text/plain, data">`, `<link href=data:,+data>`},
{`<svg width="100" height="100"><circle cx="50" cy="50" r="40" stroke="green" stroke-width="4" fill="yellow" /></svg>`, `<svg width="100" height="100"><circle cx="50" cy="50" r="40" stroke="green" stroke-width="4" fill="yellow" /></svg>`},
{`</span >`, `</span>`},
{`<meta name=viewport content="width=0.1, initial-scale=1.0 , maximum-scale=1000">`, `<meta name=viewport content="width=.1,initial-scale=1,maximum-scale=1e3">`},
{`<br/>`, `<br>`},
// increase coverage
{`<script style="css">js</script>`, `<script style=css>js</script>`},
{`<script type="application/javascript">js</script>`, `<script type=application/javascript>js</script>`},
{`<meta http-equiv="content-type" content="text/plain, text/html">`, `<meta http-equiv=content-type content=text/plain,text/html>`},
{`<meta http-equiv="content-style-type" content="text/less">`, `<meta http-equiv=content-style-type content=text/less>`},
{`<meta http-equiv="content-style-type" content="text/less; charset=utf-8">`, `<meta http-equiv=content-style-type content="text/less;charset=utf-8">`},
{`<meta http-equiv="content-script-type" content="application/js">`, `<meta http-equiv=content-script-type content=application/js>`},
{`<span attr=""></span>`, `<span attr></span>`},
{`<code>x</code>`, `<code>x</code>`},
{`<p></p><p></p>`, `<p><p>`},
{`<ul><li></li> <li></li></ul>`, `<ul><li><li></ul>`},
{`<p></p><a></a>`, `<p></p><a></a>`},
{`<p></p>x<a></a>`, `<p></p>x<a></a>`},
{`<span style=>`, `<span>`},
{`<button onclick=>`, `<button>`},
// whitespace
{`cats and dogs `, `cats and dogs`},
{` <div> <i> test </i> <b> test </b> </div> `, `<div><i>test</i> <b>test</b></div>`},
{`<strong>x </strong>y`, `<strong>x </strong>y`},
{`<strong>x </strong> y`, `<strong>x</strong> y`},
{"<strong>x </strong>\ny", "<strong>x</strong>\ny"},
{`<p>x </p>y`, `<p>x</p>y`},
{`x <p>y</p>`, `x<p>y`},
{` <!doctype html> <!--comment--> <html> <body><p></p></body></html> `, `<!doctype html><p>`}, // spaces before html and at the start of html are dropped
{`<p>x<br> y`, `<p>x<br>y`},
{`<p>x </b> <b> y`, `<p>x</b> <b>y`},
{`a <code></code> b`, `a <code></code>b`},
{`a <code>code</code> b`, `a <code>code</code> b`},
{`a <code> code </code> b`, `a <code>code</code> b`},
{`a <script>script</script> b`, `a <script>script</script>b`},
{"text\n<!--comment-->\ntext", "text\ntext"},
{"abc\n</body>\ndef", "abc\ndef"},
{"<x>\n<!--y-->\n</x>", "<x></x>"},
{"a <template> b </template> c", "a <template>b</template>c"},
// from HTML Minifier
{`<DIV TITLE="blah">boo</DIV>`, `<div title=blah>boo</div>`},
{"<p title\n\n\t =\n \"bar\">foo</p>", `<p title=bar>foo`},
{`<p class=" foo ">foo bar baz</p>`, `<p class=foo>foo bar baz`},
{`<input maxlength=" 5 ">`, `<input maxlength=5>`},
{`<input type="text">`, `<input>`},
{`<form method="get">`, `<form>`},
{`<script language="Javascript">alert(1)</script>`, `<script>alert(1)</script>`},
{`<script></script>`, ``},
{`<p onclick=" JavaScript: x">x</p>`, `<p onclick=" x">x`},
{`<span Selected="selected"></span>`, `<span selected></span>`},
{`<table><thead><tr><th>foo</th><th>bar</th></tr></thead><tfoot><tr><th>baz</th><th>qux</th></tr></tfoot><tbody><tr><td>boo</td><td>moo</td></tr></tbody></table>`,
`<table><thead><tr><th>foo<th>bar<tfoot><tr><th>baz<th>qux<tbody><tr><td>boo<td>moo</table>`},
{`<select><option>foo</option><option>bar</option></select>`, `<select><option>foo<option>bar</select>`},
{`<meta name="keywords" content="A, B">`, `<meta name=keywords content=A,B>`},
{`<iframe><html> <p> x </p> </html></iframe>`, `<iframe><p>x</iframe>`},
{`<math> &int;_a_^b^{f(x)<over>1+x} dx </math>`, `<math> &int;_a_^b^{f(x)<over>1+x} dx </math>`},
{`<script language="x" charset="x" src="y"></script>`, `<script src=y></script>`},
{`<style media="all">x</style>`, `<style>x</style>`},
{`<a id="abc" name="abc">y</a>`, `<a id=abc>y</a>`},
{`<a id="" value="">y</a>`, `<a value>y</a>`},
// from Kangax html-minfier
{`<span style="font-family:&quot;Helvetica Neue&quot;,&quot;Helvetica&quot;,Helvetica,Arial,sans-serif">text</span>`, `<span style='font-family:"Helvetica Neue","Helvetica",Helvetica,Arial,sans-serif'>text</span>`},
// go-fuzz
{`<meta e t n content=ful><a b`, `<meta e t n content=ful><a b>`},
{`<img alt=a'b="">`, `<img alt='a&#39;b=""'>`},
{`</b`, `</b`},
// bugs
{`<p>text</p><br>text`, `<p>text</p><br>text`}, // #122
{`text <img> text`, `text <img> text`}, // #89
{`text <progress></progress> text`, `text <progress></progress> text`}, // #89
{`<pre> <x> a b </x> </pre>`, `<pre> <x> a b </x> </pre>`}, // #82
{`<svg id="1"></svg>`, `<svg id="1"></svg>`}, // #67
}
m := minify.New()
m.AddFunc("text/html", Minify)
m.AddFunc("text/css", func(_ *minify.M, w io.Writer, r io.Reader, _ map[string]string) error {
_, err := io.Copy(w, r)
return err
})
m.AddFunc("text/javascript", func(_ *minify.M, w io.Writer, r io.Reader, _ map[string]string) error {
_, err := io.Copy(w, r)
return err
})
for _, tt := range htmlTests {
t.Run(tt.html, func(t *testing.T) {
r := bytes.NewBufferString(tt.html)
w := &bytes.Buffer{}
err := Minify(m, w, r, nil)
test.Minify(t, tt.html, err, w.String(), tt.expected)
})
}
}
func TestHTMLKeepEndTags(t *testing.T) {
htmlTests := []struct {
html string
expected string
}{
{`<p></p><p></p>`, `<p></p><p></p>`},
{`<ul><li></li><li></li></ul>`, `<ul><li></li><li></li></ul>`},
}
m := minify.New()
htmlMinifier := &Minifier{KeepEndTags: true}
for _, tt := range htmlTests {
t.Run(tt.html, func(t *testing.T) {
r := bytes.NewBufferString(tt.html)
w := &bytes.Buffer{}
err := htmlMinifier.Minify(m, w, r, nil)
test.Minify(t, tt.html, err, w.String(), tt.expected)
})
}
}
func TestHTMLKeepConditionalComments(t *testing.T) {
htmlTests := []struct {
html string
expected string
}{
{`<!--[if IE 6]> <b> </b> <![endif]-->`, `<!--[if IE 6]><b></b><![endif]-->`},
{`<![if IE 6]> <b> </b> <![endif]>`, `<![if IE 6]><b></b><![endif]>`},
}
m := minify.New()
htmlMinifier := &Minifier{KeepConditionalComments: true}
for _, tt := range htmlTests {
t.Run(tt.html, func(t *testing.T) {
r := bytes.NewBufferString(tt.html)
w := &bytes.Buffer{}
err := htmlMinifier.Minify(m, w, r, nil)
test.Minify(t, tt.html, err, w.String(), tt.expected)
})
}
}
func TestHTMLKeepWhitespace(t *testing.T) {
htmlTests := []struct {
html string
expected string
}{
{`cats and dogs `, `cats and dogs`},
{` <div> <i> test </i> <b> test </b> </div> `, `<div> <i> test </i> <b> test </b> </div>`},
{`<strong>x </strong>y`, `<strong>x </strong>y`},
{`<strong>x </strong> y`, `<strong>x </strong> y`},
{"<strong>x </strong>\ny", "<strong>x </strong>\ny"},
{`<p>x </p>y`, `<p>x </p>y`},
{`x <p>y</p>`, `x <p>y`},
{` <!doctype html> <!--comment--> <html> <body><p></p></body></html> `, `<!doctype html><p>`}, // spaces before html and at the start of html are dropped
{`<p>x<br> y`, `<p>x<br> y`},
{`<p>x </b> <b> y`, `<p>x </b> <b> y`},
{`a <code>code</code> b`, `a <code>code</code> b`},
{`a <code></code> b`, `a <code></code> b`},
{`a <script>script</script> b`, `a <script>script</script> b`},
{"text\n<!--comment-->\ntext", "text\ntext"},
{"text\n<!--comment-->text<!--comment--> text", "text\ntext text"},
{"abc\n</body>\ndef", "abc\ndef"},
{"<x>\n<!--y-->\n</x>", "<x>\n</x>"},
{"<style>lala{color:red}</style>", "<style>lala{color:red}</style>"},
}
m := minify.New()
htmlMinifier := &Minifier{KeepWhitespace: true}
for _, tt := range htmlTests {
t.Run(tt.html, func(t *testing.T) {
r := bytes.NewBufferString(tt.html)
w := &bytes.Buffer{}
err := htmlMinifier.Minify(m, w, r, nil)
test.Minify(t, tt.html, err, w.String(), tt.expected)
})
}
}
func TestHTMLURL(t *testing.T) {
htmlTests := []struct {
url string
html string
expected string
}{
{`http://example.com/`, `<a href=http://example.com/>link</a>`, `<a href=//example.com/>link</a>`},
{`https://example.com/`, `<a href=http://example.com/>link</a>`, `<a href=http://example.com/>link</a>`},
{`http://example.com/`, `<a href=https://example.com/>link</a>`, `<a href=https://example.com/>link</a>`},
{`https://example.com/`, `<a href=https://example.com/>link</a>`, `<a href=//example.com/>link</a>`},
{`http://example.com/`, `<a href=" http://example.com ">x</a>`, `<a href=//example.com>x</a>`},
{`http://example.com/`, `<link rel="stylesheet" type="text/css" href="http://example.com">`, `<link rel=stylesheet href=//example.com>`},
{`http://example.com/`, `<!doctype html> <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en"> <head profile="http://dublincore.org/documents/dcq-html/"> <!-- Barlesque 2.75.0 --> <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />`,
`<!doctype html><html xmlns=//www.w3.org/1999/xhtml xml:lang=en><head profile=//dublincore.org/documents/dcq-html/><meta charset=utf-8>`},
{`http://example.com/`, `<html xmlns="http://www.w3.org/1999/xhtml"></html>`, `<html xmlns=//www.w3.org/1999/xhtml>`},
{`https://example.com/`, `<html xmlns="http://www.w3.org/1999/xhtml"></html>`, `<html xmlns=http://www.w3.org/1999/xhtml>`},
{`http://example.com/`, `<html xmlns="https://www.w3.org/1999/xhtml"></html>`, `<html xmlns=https://www.w3.org/1999/xhtml>`},
{`https://example.com/`, `<html xmlns="https://www.w3.org/1999/xhtml"></html>`, `<html xmlns=//www.w3.org/1999/xhtml>`},
}
m := minify.New()
m.AddFunc("text/html", Minify)
for _, tt := range htmlTests {
t.Run(tt.url, func(t *testing.T) {
r := bytes.NewBufferString(tt.html)
w := &bytes.Buffer{}
m.URL, _ = url.Parse(tt.url)
err := Minify(m, w, r, nil)
test.Minify(t, tt.html, err, w.String(), tt.expected)
})
}
}
func TestSpecialTagClosing(t *testing.T) {
m := minify.New()
m.AddFunc("text/html", Minify)
m.AddFunc("text/css", func(_ *minify.M, w io.Writer, r io.Reader, _ map[string]string) error {
b, err := ioutil.ReadAll(r)
test.Error(t, err, nil)
test.String(t, string(b), "</script>")
_, err = w.Write(b)
return err
})
html := `<style></script></style>`
r := bytes.NewBufferString(html)
w := &bytes.Buffer{}
err := Minify(m, w, r, nil)
test.Minify(t, html, err, w.String(), html)
}
func TestReaderErrors(t *testing.T) {
r := test.NewErrorReader(0)
w := &bytes.Buffer{}
m := minify.New()
err := Minify(m, w, r, nil)
test.T(t, err, test.ErrPlain, "return error at first read")
}
func TestWriterErrors(t *testing.T) {
errorTests := []struct {
html string
n []int
}{
{`<!doctype>`, []int{0}},
{`text`, []int{0}},
{`<foo attr=val>`, []int{0, 1, 2, 3, 4, 5}},
{`</foo>`, []int{0}},
{`<style>x</style>`, []int{2}},
{`<textarea>x</textarea>`, []int{2}},
{`<code>x</code>`, []int{2}},
{`<pre>x</pre>`, []int{2}},
{`<svg>x</svg>`, []int{0}},
{`<math>x</math>`, []int{0}},
{`<!--[if IE 6]> text <![endif]-->`, []int{0, 1, 2}},
{`<![if IE 6]> text <![endif]>`, []int{0}},
}
m := minify.New()
m.Add("text/html", &Minifier{
KeepConditionalComments: true,
})
for _, tt := range errorTests {
for _, n := range tt.n {
t.Run(fmt.Sprint(tt.html, " ", tt.n), func(t *testing.T) {
r := bytes.NewBufferString(tt.html)
w := test.NewErrorWriter(n)
err := m.Minify("text/html", w, r)
test.T(t, err, test.ErrPlain)
})
}
}
}
func TestMinifyErrors(t *testing.T) {
errorTests := []struct {
html string
err error
}{
{`<style>abc</style>`, test.ErrPlain},
{`<path style="abc"/>`, test.ErrPlain},
{`<path onclick="abc"/>`, test.ErrPlain},
{`<svg></svg>`, test.ErrPlain},
{`<math></math>`, test.ErrPlain},
}
m := minify.New()
m.AddFunc("text/css", func(_ *minify.M, w io.Writer, r io.Reader, _ map[string]string) error {
return test.ErrPlain
})
m.AddFunc("text/javascript", func(_ *minify.M, w io.Writer, r io.Reader, _ map[string]string) error {
return test.ErrPlain
})
m.AddFunc("image/svg+xml", func(_ *minify.M, w io.Writer, r io.Reader, _ map[string]string) error {
return test.ErrPlain
})
m.AddFunc("application/mathml+xml", func(_ *minify.M, w io.Writer, r io.Reader, _ map[string]string) error {
return test.ErrPlain
})
for _, tt := range errorTests {
t.Run(tt.html, func(t *testing.T) {
r := bytes.NewBufferString(tt.html)
w := &bytes.Buffer{}
err := Minify(m, w, r, nil)
test.T(t, err, tt.err)
})
}
}
////////////////////////////////////////////////////////////////
func ExampleMinify() {
m := minify.New()
m.AddFunc("text/html", Minify)
m.AddFunc("text/css", css.Minify)
m.AddFunc("text/javascript", js.Minify)
m.AddFunc("image/svg+xml", svg.Minify)
m.AddFuncRegexp(regexp.MustCompile("[/+]json$"), json.Minify)
m.AddFuncRegexp(regexp.MustCompile("[/+]xml$"), xml.Minify)
// set URL to minify link locations too
m.URL, _ = url.Parse("https://www.example.com/")
if err := m.Minify("text/html", os.Stdout, os.Stdin); err != nil {
panic(err)
}
}
func ExampleMinify_options() {
m := minify.New()
m.Add("text/html", &Minifier{
KeepDefaultAttrVals: true,
KeepWhitespace: true,
})
if err := m.Minify("text/html", os.Stdout, os.Stdin); err != nil {
panic(err)
}
}
func ExampleMinify_reader() {
b := bytes.NewReader([]byte("<html><body><h1>Example</h1></body></html>"))
m := minify.New()
m.Add("text/html", &Minifier{})
r := m.Reader("text/html", b)
if _, err := io.Copy(os.Stdout, r); err != nil {
panic(err)
}
// Output: <h1>Example</h1>
}
func ExampleMinify_writer() {
m := minify.New()
m.Add("text/html", &Minifier{})
w := m.Writer("text/html", os.Stdout)
w.Write([]byte("<html><body><h1>Example</h1></body></html>"))
w.Close()
// Output: <h1>Example</h1>
}

187
vendor/github.com/tdewolff/minify/html/table.go generated vendored Normal file
View file

@ -0,0 +1,187 @@
package html // import "github.com/tdewolff/minify/html"
import "github.com/tdewolff/parse/html"
type traits uint8
const (
rawTag traits = 1 << iota
nonPhrasingTag
objectTag
booleanAttr
caselessAttr
urlAttr
omitPTag // omit p end tag if it is followed by this start tag
keepPTag // keep p end tag if it is followed by this end tag
)
var tagMap = map[html.Hash]traits{
html.A: keepPTag,
html.Address: nonPhrasingTag | omitPTag,
html.Article: nonPhrasingTag | omitPTag,
html.Aside: nonPhrasingTag | omitPTag,
html.Audio: objectTag | keepPTag,
html.Blockquote: nonPhrasingTag | omitPTag,
html.Body: nonPhrasingTag,
html.Br: nonPhrasingTag,
html.Button: objectTag,
html.Canvas: objectTag,
html.Caption: nonPhrasingTag,
html.Col: nonPhrasingTag,
html.Colgroup: nonPhrasingTag,
html.Dd: nonPhrasingTag,
html.Del: keepPTag,
html.Details: omitPTag,
html.Div: nonPhrasingTag | omitPTag,
html.Dl: nonPhrasingTag | omitPTag,
html.Dt: nonPhrasingTag,
html.Embed: nonPhrasingTag,
html.Fieldset: nonPhrasingTag | omitPTag,
html.Figcaption: nonPhrasingTag | omitPTag,
html.Figure: nonPhrasingTag | omitPTag,
html.Footer: nonPhrasingTag | omitPTag,
html.Form: nonPhrasingTag | omitPTag,
html.H1: nonPhrasingTag | omitPTag,
html.H2: nonPhrasingTag | omitPTag,
html.H3: nonPhrasingTag | omitPTag,
html.H4: nonPhrasingTag | omitPTag,
html.H5: nonPhrasingTag | omitPTag,
html.H6: nonPhrasingTag | omitPTag,
html.Head: nonPhrasingTag,
html.Header: nonPhrasingTag | omitPTag,
html.Hgroup: nonPhrasingTag,
html.Hr: nonPhrasingTag | omitPTag,
html.Html: nonPhrasingTag,
html.Iframe: rawTag | objectTag,
html.Img: objectTag,
html.Input: objectTag,
html.Ins: keepPTag,
html.Keygen: objectTag,
html.Li: nonPhrasingTag,
html.Main: nonPhrasingTag | omitPTag,
html.Map: keepPTag,
html.Math: rawTag,
html.Menu: omitPTag,
html.Meta: nonPhrasingTag,
html.Meter: objectTag,
html.Nav: nonPhrasingTag | omitPTag,
html.Noscript: nonPhrasingTag | keepPTag,
html.Object: objectTag,
html.Ol: nonPhrasingTag | omitPTag,
html.Output: nonPhrasingTag,
html.P: nonPhrasingTag | omitPTag,
html.Picture: objectTag,
html.Pre: nonPhrasingTag | omitPTag,
html.Progress: objectTag,
html.Q: objectTag,
html.Script: rawTag,
html.Section: nonPhrasingTag | omitPTag,
html.Select: objectTag,
html.Style: rawTag | nonPhrasingTag,
html.Svg: rawTag | objectTag,
html.Table: nonPhrasingTag | omitPTag,
html.Tbody: nonPhrasingTag,
html.Td: nonPhrasingTag,
html.Textarea: rawTag | objectTag,
html.Tfoot: nonPhrasingTag,
html.Th: nonPhrasingTag,
html.Thead: nonPhrasingTag,
html.Title: nonPhrasingTag,
html.Tr: nonPhrasingTag,
html.Ul: nonPhrasingTag | omitPTag,
html.Video: objectTag | keepPTag,
}
var attrMap = map[html.Hash]traits{
html.Accept: caselessAttr,
html.Accept_Charset: caselessAttr,
html.Action: urlAttr,
html.Align: caselessAttr,
html.Alink: caselessAttr,
html.Allowfullscreen: booleanAttr,
html.Async: booleanAttr,
html.Autofocus: booleanAttr,
html.Autoplay: booleanAttr,
html.Axis: caselessAttr,
html.Background: urlAttr,
html.Bgcolor: caselessAttr,
html.Charset: caselessAttr,
html.Checked: booleanAttr,
html.Cite: urlAttr,
html.Classid: urlAttr,
html.Clear: caselessAttr,
html.Codebase: urlAttr,
html.Codetype: caselessAttr,
html.Color: caselessAttr,
html.Compact: booleanAttr,
html.Controls: booleanAttr,
html.Data: urlAttr,
html.Declare: booleanAttr,
html.Default: booleanAttr,
html.DefaultChecked: booleanAttr,
html.DefaultMuted: booleanAttr,
html.DefaultSelected: booleanAttr,
html.Defer: booleanAttr,
html.Dir: caselessAttr,
html.Disabled: booleanAttr,
html.Draggable: booleanAttr,
html.Enabled: booleanAttr,
html.Enctype: caselessAttr,
html.Face: caselessAttr,
html.Formaction: urlAttr,
html.Formnovalidate: booleanAttr,
html.Frame: caselessAttr,
html.Hidden: booleanAttr,
html.Href: urlAttr,
html.Hreflang: caselessAttr,
html.Http_Equiv: caselessAttr,
html.Icon: urlAttr,
html.Inert: booleanAttr,
html.Ismap: booleanAttr,
html.Itemscope: booleanAttr,
html.Lang: caselessAttr,
html.Language: caselessAttr,
html.Link: caselessAttr,
html.Longdesc: urlAttr,
html.Manifest: urlAttr,
html.Media: caselessAttr,
html.Method: caselessAttr,
html.Multiple: booleanAttr,
html.Muted: booleanAttr,
html.Nohref: booleanAttr,
html.Noresize: booleanAttr,
html.Noshade: booleanAttr,
html.Novalidate: booleanAttr,
html.Nowrap: booleanAttr,
html.Open: booleanAttr,
html.Pauseonexit: booleanAttr,
html.Poster: urlAttr,
html.Profile: urlAttr,
html.Readonly: booleanAttr,
html.Rel: caselessAttr,
html.Required: booleanAttr,
html.Rev: caselessAttr,
html.Reversed: booleanAttr,
html.Rules: caselessAttr,
html.Scope: caselessAttr,
html.Scoped: booleanAttr,
html.Scrolling: caselessAttr,
html.Seamless: booleanAttr,
html.Selected: booleanAttr,
html.Shape: caselessAttr,
html.Sortable: booleanAttr,
html.Src: urlAttr,
html.Target: caselessAttr,
html.Text: caselessAttr,
html.Translate: booleanAttr,
html.Truespeed: booleanAttr,
html.Type: caselessAttr,
html.Typemustmatch: booleanAttr,
html.Undeterminate: booleanAttr,
html.Usemap: urlAttr,
html.Valign: caselessAttr,
html.Valuetype: caselessAttr,
html.Vlink: caselessAttr,
html.Visible: booleanAttr,
html.Xmlns: urlAttr,
}