| 
									
										
										
										
											2019-10-13 19:29:10 -03:00
										 |  |  | // Copyright 2019 The Gitea Authors. All rights reserved. | 
					
						
							| 
									
										
										
										
											2022-11-27 13:20:29 -05:00
										 |  |  | // SPDX-License-Identifier: MIT | 
					
						
							| 
									
										
										
										
											2019-10-13 19:29:10 -03:00
										 |  |  | 
 | 
					
						
							|  |  |  | package mdstripper | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | import ( | 
					
						
							|  |  |  | 	"bytes" | 
					
						
							| 
									
										
										
										
											2021-11-17 20:34:35 +08:00
										 |  |  | 	"io" | 
					
						
							| 
									
										
										
										
											2020-08-06 20:20:05 +01:00
										 |  |  | 	"net/url" | 
					
						
							|  |  |  | 	"strings" | 
					
						
							| 
									
										
										
										
											2019-12-31 01:53:28 +00:00
										 |  |  | 	"sync" | 
					
						
							| 
									
										
										
										
											2019-10-13 19:29:10 -03:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-12-31 01:53:28 +00:00
										 |  |  | 	"code.gitea.io/gitea/modules/log" | 
					
						
							|  |  |  | 	"code.gitea.io/gitea/modules/markup/common" | 
					
						
							| 
									
										
										
										
											2020-08-06 20:20:05 +01:00
										 |  |  | 	"code.gitea.io/gitea/modules/setting" | 
					
						
							| 
									
										
										
										
											2019-10-13 19:29:10 -03:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-12-31 01:53:28 +00:00
										 |  |  | 	"github.com/yuin/goldmark" | 
					
						
							|  |  |  | 	"github.com/yuin/goldmark/ast" | 
					
						
							|  |  |  | 	"github.com/yuin/goldmark/extension" | 
					
						
							|  |  |  | 	"github.com/yuin/goldmark/parser" | 
					
						
							|  |  |  | 	"github.com/yuin/goldmark/renderer" | 
					
						
							|  |  |  | 	"github.com/yuin/goldmark/renderer/html" | 
					
						
							|  |  |  | 	"github.com/yuin/goldmark/text" | 
					
						
							| 
									
										
										
										
											2019-10-13 19:29:10 -03:00
										 |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-08-06 20:20:05 +01:00
										 |  |  | var ( | 
					
						
							|  |  |  | 	giteaHostInit sync.Once | 
					
						
							|  |  |  | 	giteaHost     *url.URL | 
					
						
							|  |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-12-31 01:53:28 +00:00
										 |  |  | type stripRenderer struct { | 
					
						
							| 
									
										
										
										
											2020-08-06 20:20:05 +01:00
										 |  |  | 	localhost *url.URL | 
					
						
							|  |  |  | 	links     []string | 
					
						
							|  |  |  | 	empty     bool | 
					
						
							| 
									
										
										
										
											2019-10-13 19:29:10 -03:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-12-31 01:53:28 +00:00
										 |  |  | func (r *stripRenderer) Render(w io.Writer, source []byte, doc ast.Node) error { | 
					
						
							|  |  |  | 	return ast.Walk(doc, func(n ast.Node, entering bool) (ast.WalkStatus, error) { | 
					
						
							|  |  |  | 		if !entering { | 
					
						
							|  |  |  | 			return ast.WalkContinue, nil | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		switch v := n.(type) { | 
					
						
							|  |  |  | 		case *ast.Text: | 
					
						
							|  |  |  | 			if !v.IsRaw() { | 
					
						
							|  |  |  | 				_, prevSibIsText := n.PreviousSibling().(*ast.Text) | 
					
						
							|  |  |  | 				coalesce := prevSibIsText | 
					
						
							|  |  |  | 				r.processString( | 
					
						
							|  |  |  | 					w, | 
					
						
							|  |  |  | 					v.Text(source), | 
					
						
							|  |  |  | 					coalesce) | 
					
						
							|  |  |  | 				if v.SoftLineBreak() { | 
					
						
							|  |  |  | 					r.doubleSpace(w) | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			return ast.WalkContinue, nil | 
					
						
							|  |  |  | 		case *ast.Link: | 
					
						
							|  |  |  | 			r.processLink(w, v.Destination) | 
					
						
							|  |  |  | 			return ast.WalkSkipChildren, nil | 
					
						
							|  |  |  | 		case *ast.AutoLink: | 
					
						
							| 
									
										
										
										
											2020-08-06 20:20:05 +01:00
										 |  |  | 			// This could be a reference to an issue or pull - if so convert it | 
					
						
							|  |  |  | 			r.processAutoLink(w, v.URL(source)) | 
					
						
							| 
									
										
										
										
											2019-12-31 01:53:28 +00:00
										 |  |  | 			return ast.WalkSkipChildren, nil | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		return ast.WalkContinue, nil | 
					
						
							| 
									
										
										
										
											2019-10-31 03:06:25 +02:00
										 |  |  | 	}) | 
					
						
							| 
									
										
										
										
											2019-10-13 19:29:10 -03:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-12-31 01:53:28 +00:00
										 |  |  | func (r *stripRenderer) doubleSpace(w io.Writer) { | 
					
						
							| 
									
										
										
										
											2019-10-31 03:06:25 +02:00
										 |  |  | 	if !r.empty { | 
					
						
							|  |  |  | 		_, _ = w.Write([]byte{'\n'}) | 
					
						
							| 
									
										
										
										
											2019-10-13 19:29:10 -03:00
										 |  |  | 	} | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-12-31 01:53:28 +00:00
										 |  |  | func (r *stripRenderer) processString(w io.Writer, text []byte, coalesce bool) { | 
					
						
							| 
									
										
										
										
											2019-10-13 19:29:10 -03:00
										 |  |  | 	// Always break-up words | 
					
						
							| 
									
										
										
										
											2019-12-31 01:53:28 +00:00
										 |  |  | 	if !coalesce { | 
					
						
							| 
									
										
										
										
											2019-10-31 03:06:25 +02:00
										 |  |  | 		r.doubleSpace(w) | 
					
						
							| 
									
										
										
										
											2019-10-13 19:29:10 -03:00
										 |  |  | 	} | 
					
						
							| 
									
										
										
										
											2019-10-31 03:06:25 +02:00
										 |  |  | 	_, _ = w.Write(text) | 
					
						
							|  |  |  | 	r.empty = false | 
					
						
							| 
									
										
										
										
											2019-10-13 19:29:10 -03:00
										 |  |  | } | 
					
						
							| 
									
										
										
										
											2019-10-31 03:06:25 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-08-06 20:20:05 +01:00
										 |  |  | // ProcessAutoLinks to detect and handle links to issues and pulls | 
					
						
							|  |  |  | func (r *stripRenderer) processAutoLink(w io.Writer, link []byte) { | 
					
						
							|  |  |  | 	linkStr := string(link) | 
					
						
							|  |  |  | 	u, err := url.Parse(linkStr) | 
					
						
							|  |  |  | 	if err != nil { | 
					
						
							|  |  |  | 		// Process out of band | 
					
						
							|  |  |  | 		r.links = append(r.links, linkStr) | 
					
						
							|  |  |  | 		return | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// Note: we're not attempting to match the URL scheme (http/https) | 
					
						
							|  |  |  | 	host := strings.ToLower(u.Host) | 
					
						
							|  |  |  | 	if host != "" && host != strings.ToLower(r.localhost.Host) { | 
					
						
							|  |  |  | 		// Process out of band | 
					
						
							|  |  |  | 		r.links = append(r.links, linkStr) | 
					
						
							|  |  |  | 		return | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// We want: /user/repo/issues/3 | 
					
						
							|  |  |  | 	parts := strings.Split(strings.TrimPrefix(u.EscapedPath(), r.localhost.EscapedPath()), "/") | 
					
						
							|  |  |  | 	if len(parts) != 5 || parts[0] != "" { | 
					
						
							|  |  |  | 		// Process out of band | 
					
						
							|  |  |  | 		r.links = append(r.links, linkStr) | 
					
						
							|  |  |  | 		return | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	var sep string | 
					
						
							|  |  |  | 	if parts[3] == "issues" { | 
					
						
							|  |  |  | 		sep = "#" | 
					
						
							|  |  |  | 	} else if parts[3] == "pulls" { | 
					
						
							|  |  |  | 		sep = "!" | 
					
						
							|  |  |  | 	} else { | 
					
						
							|  |  |  | 		// Process out of band | 
					
						
							|  |  |  | 		r.links = append(r.links, linkStr) | 
					
						
							|  |  |  | 		return | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	_, _ = w.Write([]byte(parts[1])) | 
					
						
							|  |  |  | 	_, _ = w.Write([]byte("/")) | 
					
						
							|  |  |  | 	_, _ = w.Write([]byte(parts[2])) | 
					
						
							|  |  |  | 	_, _ = w.Write([]byte(sep)) | 
					
						
							|  |  |  | 	_, _ = w.Write([]byte(parts[4])) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-12-31 01:53:28 +00:00
										 |  |  | func (r *stripRenderer) processLink(w io.Writer, link []byte) { | 
					
						
							| 
									
										
										
										
											2019-10-13 19:29:10 -03:00
										 |  |  | 	// Links are processed out of band | 
					
						
							|  |  |  | 	r.links = append(r.links, string(link)) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // GetLinks returns the list of link data collected while parsing | 
					
						
							| 
									
										
										
										
											2019-12-31 01:53:28 +00:00
										 |  |  | func (r *stripRenderer) GetLinks() []string { | 
					
						
							| 
									
										
										
										
											2019-10-13 19:29:10 -03:00
										 |  |  | 	return r.links | 
					
						
							|  |  |  | } | 
					
						
							| 
									
										
										
										
											2019-12-31 01:53:28 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | // AddOptions adds given option to this renderer. | 
					
						
							|  |  |  | func (r *stripRenderer) AddOptions(...renderer.Option) { | 
					
						
							|  |  |  | 	// no-op | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // StripMarkdown parses markdown content by removing all markup and code blocks | 
					
						
							| 
									
										
										
										
											2022-08-30 21:15:45 -05:00
										 |  |  | // in order to extract links and other references | 
					
						
							| 
									
										
										
										
											2019-12-31 01:53:28 +00:00
										 |  |  | func StripMarkdown(rawBytes []byte) (string, []string) { | 
					
						
							|  |  |  | 	buf, links := StripMarkdownBytes(rawBytes) | 
					
						
							|  |  |  | 	return string(buf), links | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-01-20 18:46:10 +01:00
										 |  |  | var ( | 
					
						
							|  |  |  | 	stripParser parser.Parser | 
					
						
							|  |  |  | 	once        = sync.Once{} | 
					
						
							|  |  |  | ) | 
					
						
							| 
									
										
										
										
											2019-12-31 01:53:28 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | // StripMarkdownBytes parses markdown content by removing all markup and code blocks | 
					
						
							| 
									
										
										
										
											2022-08-30 21:15:45 -05:00
										 |  |  | // in order to extract links and other references | 
					
						
							| 
									
										
										
										
											2019-12-31 01:53:28 +00:00
										 |  |  | func StripMarkdownBytes(rawBytes []byte) ([]byte, []string) { | 
					
						
							|  |  |  | 	once.Do(func() { | 
					
						
							|  |  |  | 		gdMarkdown := goldmark.New( | 
					
						
							|  |  |  | 			goldmark.WithExtensions(extension.Table, | 
					
						
							|  |  |  | 				extension.Strikethrough, | 
					
						
							|  |  |  | 				extension.TaskList, | 
					
						
							|  |  |  | 				extension.DefinitionList, | 
					
						
							|  |  |  | 				common.FootnoteExtension, | 
					
						
							|  |  |  | 				common.Linkify, | 
					
						
							|  |  |  | 			), | 
					
						
							|  |  |  | 			goldmark.WithParserOptions( | 
					
						
							|  |  |  | 				parser.WithAttribute(), | 
					
						
							|  |  |  | 				parser.WithAutoHeadingID(), | 
					
						
							|  |  |  | 			), | 
					
						
							|  |  |  | 			goldmark.WithRendererOptions( | 
					
						
							|  |  |  | 				html.WithUnsafe(), | 
					
						
							|  |  |  | 			), | 
					
						
							|  |  |  | 		) | 
					
						
							|  |  |  | 		stripParser = gdMarkdown.Parser() | 
					
						
							|  |  |  | 	}) | 
					
						
							|  |  |  | 	stripper := &stripRenderer{ | 
					
						
							| 
									
										
										
										
											2020-08-06 20:20:05 +01:00
										 |  |  | 		localhost: getGiteaHost(), | 
					
						
							|  |  |  | 		links:     make([]string, 0, 10), | 
					
						
							|  |  |  | 		empty:     true, | 
					
						
							| 
									
										
										
										
											2019-12-31 01:53:28 +00:00
										 |  |  | 	} | 
					
						
							|  |  |  | 	reader := text.NewReader(rawBytes) | 
					
						
							|  |  |  | 	doc := stripParser.Parse(reader) | 
					
						
							|  |  |  | 	var buf bytes.Buffer | 
					
						
							|  |  |  | 	if err := stripper.Render(&buf, rawBytes, doc); err != nil { | 
					
						
							|  |  |  | 		log.Error("Unable to strip: %v", err) | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	return buf.Bytes(), stripper.GetLinks() | 
					
						
							|  |  |  | } | 
					
						
							| 
									
										
										
										
											2020-08-06 20:20:05 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | // getGiteaHostName returns a normalized string with the local host name, with no scheme or port information | 
					
						
							|  |  |  | func getGiteaHost() *url.URL { | 
					
						
							|  |  |  | 	giteaHostInit.Do(func() { | 
					
						
							|  |  |  | 		var err error | 
					
						
							|  |  |  | 		if giteaHost, err = url.Parse(setting.AppURL); err != nil { | 
					
						
							|  |  |  | 			giteaHost = &url.URL{} | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	}) | 
					
						
							|  |  |  | 	return giteaHost | 
					
						
							|  |  |  | } |