| 
									
										
										
										
											2019-10-13 19:29:10 -03:00
										 |  |  | // Copyright 2019 The Gitea Authors. All rights reserved. | 
					
						
							|  |  |  | // Use of this source code is governed by a MIT-style | 
					
						
							|  |  |  | // license that can be found in the LICENSE file. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | package mdstripper | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | import ( | 
					
						
							|  |  |  | 	"bytes" | 
					
						
							| 
									
										
										
										
											2019-12-31 01:53:28 +00:00
										 |  |  | 	"sync" | 
					
						
							| 
									
										
										
										
											2019-10-13 19:29:10 -03:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-12-31 01:53:28 +00:00
										 |  |  | 	"io" | 
					
						
							| 
									
										
										
										
											2019-10-13 19:29:10 -03:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-12-31 01:53:28 +00:00
										 |  |  | 	"code.gitea.io/gitea/modules/log" | 
					
						
							|  |  |  | 	"code.gitea.io/gitea/modules/markup/common" | 
					
						
							| 
									
										
										
										
											2019-10-13 19:29:10 -03:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-12-31 01:53:28 +00:00
										 |  |  | 	"github.com/yuin/goldmark" | 
					
						
							|  |  |  | 	"github.com/yuin/goldmark/ast" | 
					
						
							|  |  |  | 	"github.com/yuin/goldmark/extension" | 
					
						
							|  |  |  | 	"github.com/yuin/goldmark/parser" | 
					
						
							|  |  |  | 	"github.com/yuin/goldmark/renderer" | 
					
						
							|  |  |  | 	"github.com/yuin/goldmark/renderer/html" | 
					
						
							|  |  |  | 	"github.com/yuin/goldmark/text" | 
					
						
							| 
									
										
										
										
											2019-10-13 19:29:10 -03:00
										 |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-12-31 01:53:28 +00:00
										 |  |  | type stripRenderer struct { | 
					
						
							|  |  |  | 	links []string | 
					
						
							|  |  |  | 	empty bool | 
					
						
							| 
									
										
										
										
											2019-10-13 19:29:10 -03:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-12-31 01:53:28 +00:00
										 |  |  | func (r *stripRenderer) Render(w io.Writer, source []byte, doc ast.Node) error { | 
					
						
							|  |  |  | 	return ast.Walk(doc, func(n ast.Node, entering bool) (ast.WalkStatus, error) { | 
					
						
							|  |  |  | 		if !entering { | 
					
						
							|  |  |  | 			return ast.WalkContinue, nil | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		switch v := n.(type) { | 
					
						
							|  |  |  | 		case *ast.Text: | 
					
						
							|  |  |  | 			if !v.IsRaw() { | 
					
						
							|  |  |  | 				_, prevSibIsText := n.PreviousSibling().(*ast.Text) | 
					
						
							|  |  |  | 				coalesce := prevSibIsText | 
					
						
							|  |  |  | 				r.processString( | 
					
						
							|  |  |  | 					w, | 
					
						
							|  |  |  | 					v.Text(source), | 
					
						
							|  |  |  | 					coalesce) | 
					
						
							|  |  |  | 				if v.SoftLineBreak() { | 
					
						
							|  |  |  | 					r.doubleSpace(w) | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			return ast.WalkContinue, nil | 
					
						
							|  |  |  | 		case *ast.Link: | 
					
						
							|  |  |  | 			r.processLink(w, v.Destination) | 
					
						
							|  |  |  | 			return ast.WalkSkipChildren, nil | 
					
						
							|  |  |  | 		case *ast.AutoLink: | 
					
						
							|  |  |  | 			r.processLink(w, v.URL(source)) | 
					
						
							|  |  |  | 			return ast.WalkSkipChildren, nil | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		return ast.WalkContinue, nil | 
					
						
							| 
									
										
										
										
											2019-10-31 03:06:25 +02:00
										 |  |  | 	}) | 
					
						
							| 
									
										
										
										
											2019-10-13 19:29:10 -03:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-12-31 01:53:28 +00:00
										 |  |  | func (r *stripRenderer) doubleSpace(w io.Writer) { | 
					
						
							| 
									
										
										
										
											2019-10-31 03:06:25 +02:00
										 |  |  | 	if !r.empty { | 
					
						
							|  |  |  | 		_, _ = w.Write([]byte{'\n'}) | 
					
						
							| 
									
										
										
										
											2019-10-13 19:29:10 -03:00
										 |  |  | 	} | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-12-31 01:53:28 +00:00
										 |  |  | func (r *stripRenderer) processString(w io.Writer, text []byte, coalesce bool) { | 
					
						
							| 
									
										
										
										
											2019-10-13 19:29:10 -03:00
										 |  |  | 	// Always break-up words | 
					
						
							| 
									
										
										
										
											2019-12-31 01:53:28 +00:00
										 |  |  | 	if !coalesce { | 
					
						
							| 
									
										
										
										
											2019-10-31 03:06:25 +02:00
										 |  |  | 		r.doubleSpace(w) | 
					
						
							| 
									
										
										
										
											2019-10-13 19:29:10 -03:00
										 |  |  | 	} | 
					
						
							| 
									
										
										
										
											2019-10-31 03:06:25 +02:00
										 |  |  | 	_, _ = w.Write(text) | 
					
						
							|  |  |  | 	r.empty = false | 
					
						
							| 
									
										
										
										
											2019-10-13 19:29:10 -03:00
										 |  |  | } | 
					
						
							| 
									
										
										
										
											2019-10-31 03:06:25 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-12-31 01:53:28 +00:00
										 |  |  | func (r *stripRenderer) processLink(w io.Writer, link []byte) { | 
					
						
							| 
									
										
										
										
											2019-10-13 19:29:10 -03:00
										 |  |  | 	// Links are processed out of band | 
					
						
							|  |  |  | 	r.links = append(r.links, string(link)) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // GetLinks returns the list of link data collected while parsing | 
					
						
							| 
									
										
										
										
											2019-12-31 01:53:28 +00:00
										 |  |  | func (r *stripRenderer) GetLinks() []string { | 
					
						
							| 
									
										
										
										
											2019-10-13 19:29:10 -03:00
										 |  |  | 	return r.links | 
					
						
							|  |  |  | } | 
					
						
							| 
									
										
										
										
											2019-12-31 01:53:28 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | // AddOptions adds given option to this renderer. | 
					
						
							|  |  |  | func (r *stripRenderer) AddOptions(...renderer.Option) { | 
					
						
							|  |  |  | 	// no-op | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // StripMarkdown parses markdown content by removing all markup and code blocks | 
					
						
							|  |  |  | //	in order to extract links and other references | 
					
						
							|  |  |  | func StripMarkdown(rawBytes []byte) (string, []string) { | 
					
						
							|  |  |  | 	buf, links := StripMarkdownBytes(rawBytes) | 
					
						
							|  |  |  | 	return string(buf), links | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | var stripParser parser.Parser | 
					
						
							|  |  |  | var once = sync.Once{} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // StripMarkdownBytes parses markdown content by removing all markup and code blocks | 
					
						
							|  |  |  | //	in order to extract links and other references | 
					
						
							|  |  |  | func StripMarkdownBytes(rawBytes []byte) ([]byte, []string) { | 
					
						
							|  |  |  | 	once.Do(func() { | 
					
						
							|  |  |  | 		gdMarkdown := goldmark.New( | 
					
						
							|  |  |  | 			goldmark.WithExtensions(extension.Table, | 
					
						
							|  |  |  | 				extension.Strikethrough, | 
					
						
							|  |  |  | 				extension.TaskList, | 
					
						
							|  |  |  | 				extension.DefinitionList, | 
					
						
							|  |  |  | 				common.FootnoteExtension, | 
					
						
							|  |  |  | 				common.Linkify, | 
					
						
							|  |  |  | 			), | 
					
						
							|  |  |  | 			goldmark.WithParserOptions( | 
					
						
							|  |  |  | 				parser.WithAttribute(), | 
					
						
							|  |  |  | 				parser.WithAutoHeadingID(), | 
					
						
							|  |  |  | 			), | 
					
						
							|  |  |  | 			goldmark.WithRendererOptions( | 
					
						
							|  |  |  | 				html.WithUnsafe(), | 
					
						
							|  |  |  | 			), | 
					
						
							|  |  |  | 		) | 
					
						
							|  |  |  | 		stripParser = gdMarkdown.Parser() | 
					
						
							|  |  |  | 	}) | 
					
						
							|  |  |  | 	stripper := &stripRenderer{ | 
					
						
							|  |  |  | 		links: make([]string, 0, 10), | 
					
						
							|  |  |  | 		empty: true, | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	reader := text.NewReader(rawBytes) | 
					
						
							|  |  |  | 	doc := stripParser.Parse(reader) | 
					
						
							|  |  |  | 	var buf bytes.Buffer | 
					
						
							|  |  |  | 	if err := stripper.Render(&buf, rawBytes, doc); err != nil { | 
					
						
							|  |  |  | 		log.Error("Unable to strip: %v", err) | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	return buf.Bytes(), stripper.GetLinks() | 
					
						
							|  |  |  | } |