updating github.com/russross/blackfriday to v1.5.2

2019-06-14 11:29:24 -04:00
parent c8051af3b9
commit caba257fc9
21 changed files with 536 additions and 414 deletions
--- a/vendor/github.com/russross/blackfriday/block.go
+++ b/vendor/github.com/russross/blackfriday/block.go
@@ -15,8 +15,8 @@ package blackfriday

 import (
 	"bytes"
-
-	"github.com/shurcooL/sanitized_anchor_name"
+	"strings"
+	"unicode"
 )

 // Parse block-level data.
@@ -93,7 +93,7 @@ func (p *parser) block(out *bytes.Buffer, data []byte) {

 		// fenced code block:
 		//
-		// ``` go
+		// ``` go info string here
 		// func fact(n int) int {
 		//     if n <= 1 {
 		//         return n
@@ -102,7 +102,7 @@ func (p *parser) block(out *bytes.Buffer, data []byte) {
 		// }
 		// ```
 		if p.flags&EXTENSION_FENCED_CODE != 0 {
-			if i := p.fencedCode(out, data, true); i > 0 {
+			if i := p.fencedCodeBlock(out, data, true); i > 0 {
 				data = data[i:]
 				continue
 			}
@@ -243,7 +243,7 @@ func (p *parser) prefixHeader(out *bytes.Buffer, data []byte) int {
 	}
 	if end > i {
 		if id == "" && p.flags&EXTENSION_AUTO_HEADER_IDS != 0 {
-			id = sanitized_anchor_name.Create(string(data[i:end]))
+			id = SanitizedAnchorName(string(data[i:end]))
 		}
 		work := func() bool {
 			p.inline(out, data[i:end])
@@ -320,6 +320,11 @@ func (p *parser) html(out *bytes.Buffer, data []byte, doRender bool) int {
 			return size
 		}

+		// check for HTML CDATA
+		if size := p.htmlCDATA(out, data, doRender); size > 0 {
+			return size
+		}
+
 		// no special case recognized
 		return 0
 	}
@@ -397,12 +402,10 @@ func (p *parser) html(out *bytes.Buffer, data []byte, doRender bool) int {
 	return i
 }

-// HTML comment, lax form
-func (p *parser) htmlComment(out *bytes.Buffer, data []byte, doRender bool) int {
-	i := p.inlineHtmlComment(out, data)
-	// needs to end with a blank line
-	if j := p.isEmpty(data[i:]); j > 0 {
-		size := i + j
+func (p *parser) renderHTMLBlock(out *bytes.Buffer, data []byte, start int, doRender bool) int {
+	// html block needs to end with a blank line
+	if i := p.isEmpty(data[start:]); i > 0 {
+		size := start + i
 		if doRender {
 			// trim trailing newlines
 			end := size
@@ -416,6 +419,35 @@ func (p *parser) htmlComment(out *bytes.Buffer, data []byte, doRender bool) int
 	return 0
 }

+// HTML comment, lax form
+func (p *parser) htmlComment(out *bytes.Buffer, data []byte, doRender bool) int {
+	i := p.inlineHTMLComment(out, data)
+	return p.renderHTMLBlock(out, data, i, doRender)
+}
+
+// HTML CDATA section
+func (p *parser) htmlCDATA(out *bytes.Buffer, data []byte, doRender bool) int {
+	const cdataTag = "<![cdata["
+	const cdataTagLen = len(cdataTag)
+	if len(data) < cdataTagLen+1 {
+		return 0
+	}
+	if !bytes.Equal(bytes.ToLower(data[:cdataTagLen]), []byte(cdataTag)) {
+		return 0
+	}
+	i := cdataTagLen
+	// scan for an end-of-comment marker, across lines if necessary
+	for i < len(data) && !(data[i-2] == ']' && data[i-1] == ']' && data[i] == '>') {
+		i++
+	}
+	i++
+	// no end-of-comment marker
+	if i >= len(data) {
+		return 0
+	}
+	return p.renderHTMLBlock(out, data, i, doRender)
+}
+
 // HR, which is the only self-closing block tag considered
 func (p *parser) htmlHr(out *bytes.Buffer, data []byte, doRender bool) int {
 	if data[0] != '<' || (data[1] != 'h' && data[1] != 'H') || (data[2] != 'r' && data[2] != 'R') {
@@ -432,19 +464,7 @@ func (p *parser) htmlHr(out *bytes.Buffer, data []byte, doRender bool) int {
 	}

 	if data[i] == '>' {
-		i++
-		if j := p.isEmpty(data[i:]); j > 0 {
-			size := i + j
-			if doRender {
-				// trim newlines
-				end := size
-				for end > 0 && data[end-1] == '\n' {
-					end--
-				}
-				p.r.BlockHtml(out, data[:end])
-			}
-			return size
-		}
+		return p.renderHTMLBlock(out, data, i+1, doRender)
 	}

 	return 0
@@ -495,7 +515,7 @@ func (p *parser) htmlFindEnd(tag string, data []byte) int {
 	return i + skip
 }

-func (p *parser) isEmpty(data []byte) int {
+func (*parser) isEmpty(data []byte) int {
 	// it is okay to call isEmpty on an empty buffer
 	if len(data) == 0 {
 		return 0
@@ -510,7 +530,7 @@ func (p *parser) isEmpty(data []byte) int {
 	return i + 1
 }

-func (p *parser) isHRule(data []byte) bool {
+func (*parser) isHRule(data []byte) bool {
 	i := 0

 	// skip up to three spaces
@@ -539,21 +559,24 @@ func (p *parser) isHRule(data []byte) bool {
 	return n >= 3
 }

-func (p *parser) isFencedCode(data []byte, syntax **string, oldmarker string) (skip int, marker string) {
+// isFenceLine checks if there's a fence line (e.g., ``` or ``` go) at the beginning of data,
+// and returns the end index if so, or 0 otherwise. It also returns the marker found.
+// If syntax is not nil, it gets set to the syntax specified in the fence line.
+// A final newline is mandatory to recognize the fence line, unless newlineOptional is true.
+func isFenceLine(data []byte, info *string, oldmarker string, newlineOptional bool) (end int, marker string) {
 	i, size := 0, 0
-	skip = 0

 	// skip up to three spaces
 	for i < len(data) && i < 3 && data[i] == ' ' {
 		i++
 	}
-	if i >= len(data) {
-		return
-	}

 	// check for the marker characters: ~ or `
+	if i >= len(data) {
+		return 0, ""
+	}
 	if data[i] != '~' && data[i] != '`' {
-		return
+		return 0, ""
 	}

 	c := data[i]
@@ -564,79 +587,84 @@ func (p *parser) isFencedCode(data []byte, syntax **string, oldmarker string) (s
 		i++
 	}

-	if i >= len(data) {
-		return
-	}
-
 	// the marker char must occur at least 3 times
 	if size < 3 {
-		return
+		return 0, ""
 	}
 	marker = string(data[i-size : i])

 	// if this is the end marker, it must match the beginning marker
 	if oldmarker != "" && marker != oldmarker {
-		return
+		return 0, ""
 	}

-	if syntax != nil {
-		syn := 0
+	// TODO(shurcooL): It's probably a good idea to simplify the 2 code paths here
+	// into one, always get the info string, and discard it if the caller doesn't care.
+	if info != nil {
+		infoLength := 0
 		i = skipChar(data, i, ' ')

 		if i >= len(data) {
-			return
+			if newlineOptional && i == len(data) {
+				return i, marker
+			}
+			return 0, ""
 		}

-		syntaxStart := i
+		infoStart := i

 		if data[i] == '{' {
 			i++
-			syntaxStart++
+			infoStart++

 			for i < len(data) && data[i] != '}' && data[i] != '\n' {
-				syn++
+				infoLength++
 				i++
 			}

 			if i >= len(data) || data[i] != '}' {
-				return
+				return 0, ""
 			}

 			// strip all whitespace at the beginning and the end
 			// of the {} block
-			for syn > 0 && isspace(data[syntaxStart]) {
-				syntaxStart++
-				syn--
+			for infoLength > 0 && isspace(data[infoStart]) {
+				infoStart++
+				infoLength--
 			}

-			for syn > 0 && isspace(data[syntaxStart+syn-1]) {
-				syn--
+			for infoLength > 0 && isspace(data[infoStart+infoLength-1]) {
+				infoLength--
 			}

 			i++
 		} else {
-			for i < len(data) && !isspace(data[i]) {
-				syn++
+			for i < len(data) && !isverticalspace(data[i]) {
+				infoLength++
 				i++
 			}
 		}

-		language := string(data[syntaxStart : syntaxStart+syn])
-		*syntax = &language
+		*info = strings.TrimSpace(string(data[infoStart : infoStart+infoLength]))
 	}

 	i = skipChar(data, i, ' ')
 	if i >= len(data) || data[i] != '\n' {
-		return
+		if newlineOptional && i == len(data) {
+			return i, marker
+		}
+		return 0, ""
 	}

-	skip = i + 1
-	return
+	return i + 1, marker // Take newline into account.
 }

-func (p *parser) fencedCode(out *bytes.Buffer, data []byte, doRender bool) int {
-	var lang *string
-	beg, marker := p.isFencedCode(data, &lang, "")
+// fencedCodeBlock returns the end index if data contains a fenced code block at the beginning,
+// or 0 otherwise. It writes to out if doRender is true, otherwise it has no side effects.
+// If doRender is true, a final newline is mandatory to recognize the fenced code block.
+func (p *parser) fencedCodeBlock(out *bytes.Buffer, data []byte, doRender bool) int {
+	var infoString string
+	beg, marker := isFenceLine(data, &infoString, "", false)
 	if beg == 0 || beg >= len(data) {
 		return 0
 	}
@@ -647,7 +675,8 @@ func (p *parser) fencedCode(out *bytes.Buffer, data []byte, doRender bool) int {
 		// safe to assume beg < len(data)

 		// check for the end of the code block
-		fenceEnd, _ := p.isFencedCode(data[beg:], nil, marker)
+		newlineOptional := !doRender
+		fenceEnd, _ := isFenceLine(data[beg:], nil, marker, newlineOptional)
 		if fenceEnd != 0 {
 			beg += fenceEnd
 			break
@@ -668,13 +697,8 @@ func (p *parser) fencedCode(out *bytes.Buffer, data []byte, doRender bool) int {
 		beg = end
 	}

-	syntax := ""
-	if lang != nil {
-		syntax = *lang
-	}
-
 	if doRender {
-		p.r.BlockCode(out, work.Bytes(), syntax)
+		p.r.BlockCode(out, work.Bytes(), infoString)
 	}

 	return beg
@@ -914,7 +938,7 @@ func (p *parser) quote(out *bytes.Buffer, data []byte) int {
 		// irregardless of any contents inside it
 		for data[end] != '\n' {
 			if p.flags&EXTENSION_FENCED_CODE != 0 {
-				if i := p.fencedCode(out, data[end:], false); i > 0 {
+				if i := p.fencedCodeBlock(out, data[end:], false); i > 0 {
 					// -1 to compensate for the extra end++ after the loop:
 					end += i - 1
 					break
@@ -1119,6 +1143,7 @@ func (p *parser) listItem(out *bytes.Buffer, data []byte, flags *int) int {
 	// process the following lines
 	containsBlankLine := false
 	sublist := 0
+	codeBlockMarker := ""

 gatherlines:
 	for line < len(data) {
@@ -1133,6 +1158,7 @@ gatherlines:
 		// and move on to the next line
 		if p.isEmpty(data[line:i]) > 0 {
 			containsBlankLine = true
+			raw.Write(data[line:i])
 			line = i
 			continue
 		}
@@ -1145,6 +1171,28 @@ gatherlines:

 		chunk := data[line+indent : i]

+		if p.flags&EXTENSION_FENCED_CODE != 0 {
+			// determine if in or out of codeblock
+			// if in codeblock, ignore normal list processing
+			_, marker := isFenceLine(chunk, nil, codeBlockMarker, false)
+			if marker != "" {
+				if codeBlockMarker == "" {
+					// start of codeblock
+					codeBlockMarker = marker
+				} else {
+					// end of codeblock.
+					*flags |= LIST_ITEM_CONTAINS_BLOCK
+					codeBlockMarker = ""
+				}
+			}
+			// we are in a codeblock, write line, and continue
+			if codeBlockMarker != "" || marker != "" {
+				raw.Write(data[line+indent : i])
+				line = i
+				continue gatherlines
+			}
+		}
+
 		// evaluate how this line fits in
 		switch {
 		// is this a nested list item?
@@ -1153,6 +1201,14 @@ gatherlines:
 			p.dliPrefix(chunk) > 0:

 			if containsBlankLine {
+				// end the list if the type changed after a blank line
+				if indent <= itemIndent &&
+					((*flags&LIST_TYPE_ORDERED != 0 && p.uliPrefix(chunk) > 0) ||
+						(*flags&LIST_TYPE_ORDERED == 0 && p.oliPrefix(chunk) > 0)) {
+
+					*flags |= LIST_ITEM_END_OF_LIST
+					break gatherlines
+				}
 				*flags |= LIST_ITEM_CONTAINS_BLOCK
 			}

@@ -1200,17 +1256,10 @@ gatherlines:

 		// a blank line means this should be parsed as a block
 		case containsBlankLine:
-			raw.WriteByte('\n')
 			*flags |= LIST_ITEM_CONTAINS_BLOCK
 		}

-		// if this line was preceeded by one or more blanks,
-		// re-introduce the blank into the buffer
-		if containsBlankLine {
-			containsBlankLine = false
-			raw.WriteByte('\n')
-
-		}
+		containsBlankLine = false

 		// add the line into the working buffer without prefix
 		raw.Write(data[line+indent : i])
@@ -1218,6 +1267,12 @@ gatherlines:
 		line = i
 	}

+	// If reached end of data, the Renderer.ListItem call we're going to make below
+	// is definitely the last in the list.
+	if line >= len(data) {
+		*flags |= LIST_ITEM_END_OF_LIST
+	}
+
 	rawBytes := raw.Bytes()

 	// render the contents of the list item
@@ -1332,7 +1387,7 @@ func (p *parser) paragraph(out *bytes.Buffer, data []byte) int {

 				id := ""
 				if p.flags&EXTENSION_AUTO_HEADER_IDS != 0 {
-					id = sanitized_anchor_name.Create(string(data[prev:eol]))
+					id = SanitizedAnchorName(string(data[prev:eol]))
 				}

 				p.r.Header(out, work, level, id)
@@ -1362,7 +1417,7 @@ func (p *parser) paragraph(out *bytes.Buffer, data []byte) int {

 		// if there's a fenced code block, paragraph is over
 		if p.flags&EXTENSION_FENCED_CODE != 0 {
-			if p.fencedCode(out, current, false) > 0 {
+			if p.fencedCodeBlock(out, current, false) > 0 {
 				p.renderParagraph(out, data[:i])
 				return i
 			}
@@ -1396,3 +1451,24 @@ func (p *parser) paragraph(out *bytes.Buffer, data []byte) int {
 	p.renderParagraph(out, data[:i])
 	return i
 }
+
+// SanitizedAnchorName returns a sanitized anchor name for the given text.
+//
+// It implements the algorithm specified in the package comment.
+func SanitizedAnchorName(text string) string {
+	var anchorName []rune
+	futureDash := false
+	for _, r := range text {
+		switch {
+		case unicode.IsLetter(r) || unicode.IsNumber(r):
+			if futureDash && len(anchorName) > 0 {
+				anchorName = append(anchorName, '-')
+			}
+			futureDash = false
+			anchorName = append(anchorName, unicode.ToLower(r))
+		default:
+			futureDash = true
+		}
+	}
+	return string(anchorName)
+}