1
0
Fork 0
mirror of https://github.com/miniflux/v2.git synced 2025-07-27 17:28:38 +00:00

Add FeedIcon API call and update dependencies

This commit is contained in:
Frédéric Guillot 2017-12-16 11:25:18 -08:00
parent 231ebf2daa
commit 27196589fb
262 changed files with 83830 additions and 30061 deletions

View file

@ -8,7 +8,7 @@
---
Minify is a minifier package written in [Go][1]. It provides HTML5, CSS3, JS, JSON, SVG and XML minifiers and an interface to implement any other minifier. Minification is the process of removing bytes from a file (such as whitespace) without changing its output and therefore shrinking its size and speeding up transmission over the internet and possibly parsing. The implemented minifiers are high performance and streaming, which implies O(n).
Minify is a minifier package written in [Go][1]. It provides HTML5, CSS3, JS, JSON, SVG and XML minifiers and an interface to implement any other minifier. Minification is the process of removing bytes from a file (such as whitespace) without changing its output and therefore shrinking its size and speeding up transmission over the internet and possibly parsing. The implemented minifiers are designed for high performance.
The core functionality associates mimetypes with minification functions, allowing embedded resources (like CSS or JS within HTML files) to be minified as well. Users can add new implementations that are triggered based on a mimetype (or pattern), or redirect to an external command (like ClosureCompiler, UglifyCSS, ...).
@ -100,51 +100,52 @@ The benchmarks directory contains a number of standardized samples used to compa
```
name time/op
CSS/sample_bootstrap.css-4 3.05ms ± 1%
CSS/sample_gumby.css-4 4.25ms ± 1%
HTML/sample_amazon.html-4 3.33ms ± 0%
HTML/sample_bbc.html-4 1.39ms ± 7%
HTML/sample_blogpost.html-4 222µs ± 1%
HTML/sample_es6.html-4 18.0ms ± 1%
HTML/sample_stackoverflow.html-4 3.08ms ± 1%
HTML/sample_wikipedia.html-4 6.06ms ± 1%
JS/sample_ace.js-4 9.92ms ± 1%
JS/sample_dot.js-4 91.4µs ± 4%
JS/sample_jquery.js-4 4.00ms ± 1%
JS/sample_jqueryui.js-4 7.93ms ± 0%
JS/sample_moment.js-4 1.46ms ± 1%
JSON/sample_large.json-4 5.07ms ± 4%
JSON/sample_testsuite.json-4 2.96ms ± 0%
JSON/sample_twitter.json-4 11.3µs ± 0%
SVG/sample_arctic.svg-4 64.7ms ± 0%
SVG/sample_gopher.svg-4 227µs ± 0%
SVG/sample_usa.svg-4 35.9ms ± 6%
XML/sample_books.xml-4 48.1µs ± 4%
XML/sample_catalog.xml-4 20.2µs ± 0%
XML/sample_omg.xml-4 9.02ms ± 0%
CSS/sample_bootstrap.css-4 2.26ms ± 0%
CSS/sample_gumby.css-4 2.92ms ± 1%
HTML/sample_amazon.html-4 2.33ms ± 2%
HTML/sample_bbc.html-4 1.02ms ± 1%
HTML/sample_blogpost.html-4 171µs ± 2%
HTML/sample_es6.html-4 14.5ms ± 0%
HTML/sample_stackoverflow.html-4 2.41ms ± 1%
HTML/sample_wikipedia.html-4 4.76ms ± 0%
JS/sample_ace.js-4 7.41ms ± 0%
JS/sample_dot.js-4 63.7µs ± 0%
JS/sample_jquery.js-4 2.99ms ± 0%
JS/sample_jqueryui.js-4 5.92ms ± 2%
JS/sample_moment.js-4 1.09ms ± 1%
JSON/sample_large.json-4 2.95ms ± 0%
JSON/sample_testsuite.json-4 1.51ms ± 1%
JSON/sample_twitter.json-4 6.75µs ± 1%
SVG/sample_arctic.svg-4 62.3ms ± 1%
SVG/sample_gopher.svg-4 218µs ± 0%
SVG/sample_usa.svg-4 33.1ms ± 3%
XML/sample_books.xml-4 36.2µs ± 0%
XML/sample_catalog.xml-4 14.9µs ± 0%
XML/sample_omg.xml-4 6.31ms ± 1%
name speed
CSS/sample_bootstrap.css-4 45.0MB/s ± 1%
CSS/sample_gumby.css-4 43.8MB/s ± 1%
HTML/sample_amazon.html-4 142MB/s ± 0%
HTML/sample_bbc.html-4 83.0MB/s ± 7%
HTML/sample_blogpost.html-4 94.5MB/s ± 1%
HTML/sample_es6.html-4 56.8MB/s ± 1%
HTML/sample_stackoverflow.html-4 66.7MB/s ± 1%
HTML/sample_wikipedia.html-4 73.5MB/s ± 1%
JS/sample_ace.js-4 64.9MB/s ± 1%
JS/sample_dot.js-4 56.4MB/s ± 4%
JS/sample_jquery.js-4 61.8MB/s ± 1%
JS/sample_jqueryui.js-4 59.2MB/s ± 0%
JS/sample_moment.js-4 67.8MB/s ± 1%
JSON/sample_large.json-4 150MB/s ± 4%
JSON/sample_testsuite.json-4 233MB/s ± 0%
JSON/sample_twitter.json-4 134MB/s ± 0%
SVG/sample_arctic.svg-4 22.7MB/s ± 0%
SVG/sample_gopher.svg-4 25.6MB/s ± 0%
SVG/sample_usa.svg-4 28.6MB/s ± 6%
XML/sample_books.xml-4 92.1MB/s ± 4%
XML/sample_catalog.xml-4 95.6MB/s ± 0%
CSS/sample_bootstrap.css-4 60.8MB/s ± 0%
CSS/sample_gumby.css-4 63.9MB/s ± 1%
HTML/sample_amazon.html-4 203MB/s ± 2%
HTML/sample_bbc.html-4 113MB/s ± 1%
HTML/sample_blogpost.html-4 123MB/s ± 2%
HTML/sample_es6.html-4 70.7MB/s ± 0%
HTML/sample_stackoverflow.html-4 85.2MB/s ± 1%
HTML/sample_wikipedia.html-4 93.6MB/s ± 0%
JS/sample_ace.js-4 86.9MB/s ± 0%
JS/sample_dot.js-4 81.0MB/s ± 0%
JS/sample_jquery.js-4 82.8MB/s ± 0%
JS/sample_jqueryui.js-4 79.3MB/s ± 2%
JS/sample_moment.js-4 91.2MB/s ± 1%
JSON/sample_large.json-4 258MB/s ± 0%
JSON/sample_testsuite.json-4 457MB/s ± 1%
JSON/sample_twitter.json-4 226MB/s ± 1%
SVG/sample_arctic.svg-4 23.6MB/s ± 1%
SVG/sample_gopher.svg-4 26.7MB/s ± 0%
SVG/sample_usa.svg-4 30.9MB/s ± 3%
XML/sample_books.xml-4 122MB/s ± 0%
XML/sample_catalog.xml-4 130MB/s ± 0%
XML/sample_omg.xml-4 180MB/s ± 1%
```
## HTML

View file

@ -116,6 +116,11 @@ func TestHTML(t *testing.T) {
{`<meta e t n content=ful><a b`, `<meta e t n content=ful><a b>`},
{`<img alt=a'b="">`, `<img alt='a&#39;b=""'>`},
{`</b`, `</b`},
{`<title></`, `<title></`},
{`<svg <`, `<svg <`},
{`<svg "`, `<svg "`},
{`<svg></`, `<svg></`},
{`<script><!--<`, `<script><!--<`},
// bugs
{`<p>text</p><br>text`, `<p>text</p><br>text`}, // #122

View file

@ -40,6 +40,9 @@ func TestJS(t *testing.T) {
{"false\n\"string\"", "false\n\"string\""}, // #109
{"`\n", "`"}, // go fuzz
{"a\n~b", "a\n~b"}, // #132
// go-fuzz
{`/\`, `/\`},
}
m := minify.New()

View file

@ -43,6 +43,9 @@ func (o *Minifier) Minify(m *minify.M, w io.Writer, r io.Reader, _ map[string]st
for {
t := *tb.Shift()
if t.TokenType == xml.CDATAToken {
if len(t.Text) == 0 {
continue
}
if text, useText := xml.EscapeCDATAVal(&attrByteBuffer, t.Text); useText {
t.TokenType = xml.TextToken
t.Data = text

View file

@ -39,7 +39,10 @@ func TestXML(t *testing.T) {
{"<style>lala{color:red}</style>", "<style>lala{color:red}</style>"},
{`cats and dogs `, `cats and dogs`},
{`</0`, `</0`}, // go fuzz
// go fuzz
{`</0`, `</0`},
{`<!DOCTYPE`, `<!DOCTYPE`},
{`<![CDATA[`, ``},
}
m := minify.New()

View file

@ -81,9 +81,14 @@ func (z *Lexer) Restore() {
// Err returns the error returned from io.Reader or io.EOF when the end has been reached.
func (z *Lexer) Err() error {
return z.PeekErr(0)
}
// PeekErr returns the error at position pos. When pos is zero, this is the same as calling Err().
func (z *Lexer) PeekErr(pos int) error {
if z.err != nil {
return z.err
} else if z.pos >= len(z.buf)-1 {
} else if z.pos+pos >= len(z.buf)-1 {
return io.EOF
}
return nil

View file

@ -174,7 +174,8 @@ func TestParseError(t *testing.T) {
if tt.col == 0 {
test.T(t, p.Err(), io.EOF)
} else if perr, ok := p.Err().(*parse.Error); ok {
test.T(t, perr.Col, tt.col)
_, col, _ := perr.Position()
test.T(t, col, tt.col)
} else {
test.Fail(t, "bad error:", p.Err())
}

View file

@ -7,29 +7,43 @@ import (
"github.com/tdewolff/parse/buffer"
)
// Error is a parsing error returned by parser. It contains a message and an offset at which the error occurred.
type Error struct {
Message string
Line int
Col int
Context string
r io.Reader
Offset int
line int
column int
context string
}
// NewError creates a new error
func NewError(msg string, r io.Reader, offset int) *Error {
line, col, context, _ := Position(r, offset)
return &Error{
msg,
line,
col,
context,
Message: msg,
r: r,
Offset: offset,
}
}
// NewErrorLexer creates a new error from a *buffer.Lexer
func NewErrorLexer(msg string, l *buffer.Lexer) *Error {
r := buffer.NewReader(l.Bytes())
offset := l.Offset()
return NewError(msg, r, offset)
}
func (e *Error) Error() string {
return fmt.Sprintf("parse error:%d:%d: %s\n%s", e.Line, e.Col, e.Message, e.Context)
// Positions re-parses the file to determine the line, column, and context of the error.
// Context is the entire line at which the error occurred.
func (e *Error) Position() (int, int, string) {
if e.line == 0 {
e.line, e.column, e.context, _ = Position(e.r, e.Offset)
}
return e.line, e.column, e.context
}
// Error returns the error string, containing the context and line + column number.
func (e *Error) Error() string {
line, column, context := e.Position()
return fmt.Sprintf("parse error:%d:%d: %s\n%s", line, column, e.Message, context)
}

View file

@ -79,10 +79,10 @@ func NewLexer(r io.Reader) *Lexer {
// Err returns the error encountered during lexing, this is often io.EOF but also other errors can be returned.
func (l *Lexer) Err() error {
if err := l.r.Err(); err != nil {
return err
if l.err != nil {
return l.err
}
return l.err
return l.r.Err()
}
// Restore restores the NULL byte at the end of the buffer.
@ -103,8 +103,7 @@ func (l *Lexer) Next() (TokenType, []byte) {
}
break
}
if c == 0 {
l.err = parse.NewErrorLexer("unexpected null character", l.r)
if c == 0 && l.r.Err() != nil {
return ErrorToken, nil
} else if c != '>' && (c != '/' || l.r.Peek(1) != '>') {
return AttributeToken, l.shiftAttribute()
@ -133,13 +132,16 @@ func (l *Lexer) Next() (TokenType, []byte) {
c = l.r.Peek(0)
if c == '<' {
c = l.r.Peek(1)
isEndTag := c == '/' && l.r.Peek(2) != '>' && (l.r.Peek(2) != 0 || l.r.PeekErr(2) == nil)
if l.r.Pos() > 0 {
if c == '/' && l.r.Peek(2) != 0 || 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || c == '!' || c == '?' {
if isEndTag || 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || c == '!' || c == '?' {
// return currently buffered texttoken so that we can return tag next iteration
return TextToken, l.r.Shift()
}
} else if c == '/' && l.r.Peek(2) != 0 {
} else if isEndTag {
l.r.Move(2)
if c = l.r.Peek(0); c != '>' && !('a' <= c && c <= 'z' || 'A' <= c && c <= 'Z') {
// only endtags that are not followed by > or EOF arrive here
if c = l.r.Peek(0); !('a' <= c && c <= 'z' || 'A' <= c && c <= 'Z') {
return CommentToken, l.shiftBogusComment()
}
return EndTagToken, l.shiftEndTag()
@ -154,11 +156,10 @@ func (l *Lexer) Next() (TokenType, []byte) {
l.r.Move(1)
return CommentToken, l.shiftBogusComment()
}
} else if c == 0 {
} else if c == 0 && l.r.Err() != nil {
if l.r.Pos() > 0 {
return TextToken, l.r.Shift()
}
l.err = parse.NewErrorLexer("unexpected null character", l.r)
return ErrorToken, nil
}
l.r.Move(1)
@ -182,7 +183,7 @@ func (l *Lexer) AttrVal() []byte {
func (l *Lexer) shiftRawText() []byte {
if l.rawTag == Plaintext {
for {
if l.r.Peek(0) == 0 {
if l.r.Peek(0) == 0 && l.r.Err() != nil {
return l.r.Shift()
}
l.r.Move(1)
@ -237,15 +238,16 @@ func (l *Lexer) shiftRawText() []byte {
inScript = false
}
}
} else if c == 0 {
} else if c == 0 && l.r.Err() != nil {
return l.r.Shift()
} else {
l.r.Move(1)
}
l.r.Move(1)
}
} else {
l.r.Move(1)
}
} else if c == 0 {
} else if c == 0 && l.r.Err() != nil {
return l.r.Shift()
} else {
l.r.Move(1)
@ -258,7 +260,7 @@ func (l *Lexer) readMarkup() (TokenType, []byte) {
if l.at('-', '-') {
l.r.Move(2)
for {
if l.r.Peek(0) == 0 {
if l.r.Peek(0) == 0 && l.r.Err() != nil {
return CommentToken, l.r.Shift()
} else if l.at('-', '-', '>') {
l.text = l.r.Lexeme()[4:]
@ -274,7 +276,7 @@ func (l *Lexer) readMarkup() (TokenType, []byte) {
} else if l.at('[', 'C', 'D', 'A', 'T', 'A', '[') {
l.r.Move(7)
for {
if l.r.Peek(0) == 0 {
if l.r.Peek(0) == 0 && l.r.Err() != nil {
return TextToken, l.r.Shift()
} else if l.at(']', ']', '>') {
l.r.Move(3)
@ -289,7 +291,7 @@ func (l *Lexer) readMarkup() (TokenType, []byte) {
l.r.Move(1)
}
for {
if c := l.r.Peek(0); c == '>' || c == 0 {
if c := l.r.Peek(0); c == '>' || c == 0 && l.r.Err() != nil {
l.text = l.r.Lexeme()[9:]
if c == '>' {
l.r.Move(1)
@ -310,7 +312,7 @@ func (l *Lexer) shiftBogusComment() []byte {
l.text = l.r.Lexeme()[2:]
l.r.Move(1)
return l.r.Shift()
} else if c == 0 {
} else if c == 0 && l.r.Err() != nil {
l.text = l.r.Lexeme()[2:]
return l.r.Shift()
}
@ -320,19 +322,25 @@ func (l *Lexer) shiftBogusComment() []byte {
func (l *Lexer) shiftStartTag() (TokenType, []byte) {
for {
if c := l.r.Peek(0); c == ' ' || c == '>' || c == '/' && l.r.Peek(1) == '>' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == 0 {
if c := l.r.Peek(0); c == ' ' || c == '>' || c == '/' && l.r.Peek(1) == '>' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == 0 && l.r.Err() != nil {
break
}
l.r.Move(1)
}
l.text = parse.ToLower(l.r.Lexeme()[1:])
if h := ToHash(l.text); h == Textarea || h == Title || h == Style || h == Xmp || h == Iframe || h == Script || h == Plaintext || h == Svg || h == Math {
if h == Svg {
if h == Svg || h == Math {
data := l.shiftXml(h)
if l.err != nil {
return ErrorToken, nil
}
l.inTag = false
return SvgToken, l.shiftXml(h)
} else if h == Math {
l.inTag = false
return MathToken, l.shiftXml(h)
if h == Svg {
return SvgToken, data
} else {
return MathToken, data
}
}
l.rawTag = h
}
@ -343,7 +351,7 @@ func (l *Lexer) shiftAttribute() []byte {
nameStart := l.r.Pos()
var c byte
for { // attribute name state
if c = l.r.Peek(0); c == ' ' || c == '=' || c == '>' || c == '/' && l.r.Peek(1) == '>' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == 0 {
if c = l.r.Peek(0); c == ' ' || c == '=' || c == '>' || c == '/' && l.r.Peek(1) == '>' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == 0 && l.r.Err() != nil {
break
}
l.r.Move(1)
@ -374,14 +382,14 @@ func (l *Lexer) shiftAttribute() []byte {
if c == delim {
l.r.Move(1)
break
} else if c == 0 {
} else if c == 0 && l.r.Err() != nil {
break
}
l.r.Move(1)
}
} else { // attribute value unquoted state
for {
if c := l.r.Peek(0); c == ' ' || c == '>' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == 0 {
if c := l.r.Peek(0); c == ' ' || c == '>' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == 0 && l.r.Err() != nil {
break
}
l.r.Move(1)
@ -403,7 +411,7 @@ func (l *Lexer) shiftEndTag() []byte {
l.text = l.r.Lexeme()[2:]
l.r.Move(1)
break
} else if c == 0 {
} else if c == 0 && l.r.Err() != nil {
l.text = l.r.Lexeme()[2:]
break
}
@ -422,6 +430,8 @@ func (l *Lexer) shiftEndTag() []byte {
return parse.ToLower(l.r.Shift())
}
// shiftXml parses the content of a svg or math tag according to the XML 1.1 specifications, including the tag itself.
// So far we have already parsed `<svg` or `<math`.
func (l *Lexer) shiftXml(rawTag Hash) []byte {
inQuote := false
for {
@ -429,26 +439,26 @@ func (l *Lexer) shiftXml(rawTag Hash) []byte {
if c == '"' {
inQuote = !inQuote
l.r.Move(1)
} else if c == '<' && !inQuote {
if l.r.Peek(1) == '/' {
mark := l.r.Pos()
l.r.Move(2)
for {
if c = l.r.Peek(0); !('a' <= c && c <= 'z' || 'A' <= c && c <= 'Z') {
break
}
l.r.Move(1)
}
if h := ToHash(parse.ToLower(parse.Copy(l.r.Lexeme()[mark+2:]))); h == rawTag { // copy so that ToLower doesn't change the case of the underlying slice
} else if c == '<' && !inQuote && l.r.Peek(1) == '/' {
mark := l.r.Pos()
l.r.Move(2)
for {
if c = l.r.Peek(0); !('a' <= c && c <= 'z' || 'A' <= c && c <= 'Z') {
break
}
} else {
l.r.Move(1)
}
if h := ToHash(parse.ToLower(parse.Copy(l.r.Lexeme()[mark+2:]))); h == rawTag { // copy so that ToLower doesn't change the case of the underlying slice
break
}
} else if c == 0 {
if l.r.Err() == nil {
l.err = parse.NewErrorLexer("unexpected null character", l.r)
}
return l.r.Shift()
} else {
l.r.Move(1)
}
l.r.Move(1)
}
for {
@ -457,7 +467,10 @@ func (l *Lexer) shiftXml(rawTag Hash) []byte {
l.r.Move(1)
break
} else if c == 0 {
break
if l.r.Err() == nil {
l.err = parse.NewErrorLexer("unexpected null character", l.r)
}
return l.r.Shift()
}
l.r.Move(1)
}

View file

@ -63,8 +63,22 @@ func TestTokens(t *testing.T) {
{"<script><!--", TTs{StartTagToken, StartTagCloseToken, TextToken}},
{"<script><!--var x='<script></script>';-->", TTs{StartTagToken, StartTagCloseToken, TextToken}},
// NULL
{"foo\x00bar", TTs{TextToken}},
{"<\x00foo>", TTs{TextToken}},
{"<foo\x00>", TTs{StartTagToken, StartTagCloseToken}},
{"</\x00bogus>", TTs{CommentToken}},
{"</foo\x00>", TTs{EndTagToken}},
{"<plaintext>\x00</plaintext>", TTs{StartTagToken, StartTagCloseToken, TextToken}},
{"<script>\x00</script>", TTs{StartTagToken, StartTagCloseToken, TextToken, EndTagToken}},
{"<!--\x00-->", TTs{CommentToken}},
{"<![CDATA[\x00]]>", TTs{TextToken}},
{"<!doctype\x00>", TTs{DoctypeToken}},
{"<?bogus\x00>", TTs{CommentToken}},
{"<?bogus\x00>", TTs{CommentToken}},
// go-fuzz
{"</>", TTs{EndTagToken}},
{"</>", TTs{TextToken}},
}
for _, tt := range tokenTests {
t.Run(tt.html, func(t *testing.T) {
@ -135,6 +149,11 @@ func TestAttributes(t *testing.T) {
{"<foo x", []string{"x", ""}},
{"<foo x=", []string{"x", ""}},
{"<foo x='", []string{"x", "'"}},
// NULL
{"<foo \x00>", []string{"\x00", ""}},
{"<foo \x00=\x00>", []string{"\x00", "\x00"}},
{"<foo \x00='\x00'>", []string{"\x00", "'\x00'"}},
}
for _, tt := range attributeTests {
t.Run(tt.attr, func(t *testing.T) {
@ -164,7 +183,8 @@ func TestErrors(t *testing.T) {
html string
col int
}{
{"a\x00b", 2},
{"<svg>\x00</svg>", 6},
{"<svg></svg\x00>", 11},
}
for _, tt := range errorTests {
t.Run(tt.html, func(t *testing.T) {
@ -175,7 +195,8 @@ func TestErrors(t *testing.T) {
if tt.col == 0 {
test.T(t, l.Err(), io.EOF)
} else if perr, ok := l.Err().(*parse.Error); ok {
test.T(t, perr.Col, tt.col)
_, col, _ := perr.Position()
test.T(t, col, tt.col)
} else {
test.Fail(t, "bad error:", l.Err())
}

View file

@ -599,6 +599,8 @@ func (l *Lexer) consumeRegexpToken() bool {
if l.consumeLineTerminator() {
l.r.Rewind(mark)
return false
} else if l.r.Peek(0) == 0 {
return true
}
} else if l.consumeLineTerminator() {
l.r.Rewind(mark)

View file

@ -99,10 +99,10 @@ func NewParser(r io.Reader) *Parser {
// Err returns the error encountered during tokenization, this is often io.EOF but also other errors can be returned.
func (p *Parser) Err() error {
if err := p.r.Err(); err != nil {
return err
if p.err != nil {
return p.err
}
return p.err
return p.r.Err()
}
// Restore restores the NULL byte at the end of the buffer.

View file

@ -93,7 +93,8 @@ func TestGrammarsError(t *testing.T) {
if tt.col == 0 {
test.T(t, p.Err(), io.EOF)
} else if perr, ok := p.Err().(*parse.Error); ok {
test.T(t, perr.Col, tt.col)
_, col, _ := perr.Position()
test.T(t, col, tt.col)
} else {
test.Fail(t, "bad error:", p.Err())
}

View file

@ -81,11 +81,10 @@ func NewLexer(r io.Reader) *Lexer {
// Err returns the error encountered during lexing, this is often io.EOF but also other errors can be returned.
func (l *Lexer) Err() error {
err := l.r.Err()
if err != nil {
return err
if l.err != nil {
return l.err
}
return l.err
return l.r.Err()
}
// Restore restores the NULL byte at the end of the buffer.
@ -107,7 +106,9 @@ func (l *Lexer) Next() (TokenType, []byte) {
break
}
if c == 0 {
l.err = parse.NewErrorLexer("unexpected null character", l.r)
if l.r.Err() == nil {
l.err = parse.NewErrorLexer("unexpected null character", l.r)
}
return ErrorToken, nil
} else if c != '>' && (c != '/' && c != '?' || l.r.Peek(1) != '>') {
return AttributeToken, l.shiftAttribute()
@ -148,7 +149,7 @@ func (l *Lexer) Next() (TokenType, []byte) {
l.r.Move(7)
return CDATAToken, l.shiftCDATAText()
} else if l.at('D', 'O', 'C', 'T', 'Y', 'P', 'E') {
l.r.Move(8)
l.r.Move(7)
return DOCTYPEToken, l.shiftDOCTYPEText()
}
l.r.Move(-2)
@ -164,7 +165,9 @@ func (l *Lexer) Next() (TokenType, []byte) {
if l.r.Pos() > 0 {
return TextToken, l.r.Shift()
}
l.err = parse.NewErrorLexer("unexpected null character", l.r)
if l.r.Err() == nil {
l.err = parse.NewErrorLexer("unexpected null character", l.r)
}
return ErrorToken, nil
}
l.r.Move(1)

View file

@ -155,6 +155,7 @@ func TestErrors(t *testing.T) {
col int
}{
{"a\x00b", 2},
{"<a\x00>", 3},
}
for _, tt := range errorTests {
t.Run(tt.xml, func(t *testing.T) {
@ -165,7 +166,8 @@ func TestErrors(t *testing.T) {
if tt.col == 0 {
test.T(t, l.Err(), io.EOF)
} else if perr, ok := l.Err().(*parse.Error); ok {
test.T(t, perr.Col, tt.col)
_, col, _ := perr.Position()
test.T(t, col, tt.col)
} else {
test.Fail(t, "bad error:", l.Err())
}