
* Changes to make vendored packages accept new home. * Fix go2idl to import vendored packages.
481 lines
11 KiB
Go
481 lines
11 KiB
Go
// Copyright 2009 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
// Godoc comment extraction and comment -> HTML formatting.
|
|
|
|
package doc
|
|
|
|
import (
|
|
"io"
|
|
"regexp"
|
|
"strings"
|
|
"text/template" // for HTMLEscape
|
|
"unicode"
|
|
"unicode/utf8"
|
|
)
|
|
|
|
var (
|
|
ldquo = []byte("“")
|
|
rdquo = []byte("”")
|
|
)
|
|
|
|
// Escape comment text for HTML. If nice is set,
|
|
// also turn `` into “ and '' into ”.
|
|
func commentEscape(w io.Writer, text string, nice bool) {
|
|
last := 0
|
|
if nice {
|
|
for i := 0; i < len(text)-1; i++ {
|
|
ch := text[i]
|
|
if ch == text[i+1] && (ch == '`' || ch == '\'') {
|
|
template.HTMLEscape(w, []byte(text[last:i]))
|
|
last = i + 2
|
|
switch ch {
|
|
case '`':
|
|
w.Write(ldquo)
|
|
case '\'':
|
|
w.Write(rdquo)
|
|
}
|
|
i++ // loop will add one more
|
|
}
|
|
}
|
|
}
|
|
template.HTMLEscape(w, []byte(text[last:]))
|
|
}
|
|
|
|
const (
|
|
// Regexp for Go identifiers
|
|
identRx = `[\pL_][\pL_0-9]*`
|
|
|
|
// Regexp for URLs
|
|
protocol = `https?|ftp|file|gopher|mailto|news|nntp|telnet|wais|prospero`
|
|
hostPart = `[a-zA-Z0-9_@\-]+`
|
|
filePart = `[a-zA-Z0-9_?%#~&/\-+=()]+` // parentheses may not be matching; see pairedParensPrefixLen
|
|
urlRx = `(` + protocol + `)://` + // http://
|
|
hostPart + `([.:]` + hostPart + `)*/?` + // //www.google.com:8080/
|
|
filePart + `([:.,]` + filePart + `)*`
|
|
)
|
|
|
|
var matchRx = regexp.MustCompile(`(` + urlRx + `)|(` + identRx + `)`)
|
|
|
|
var (
|
|
html_a = []byte(`<a href="`)
|
|
html_aq = []byte(`">`)
|
|
html_enda = []byte("</a>")
|
|
html_i = []byte("<i>")
|
|
html_endi = []byte("</i>")
|
|
html_p = []byte("<p>\n")
|
|
html_endp = []byte("</p>\n")
|
|
html_pre = []byte("<pre>")
|
|
html_endpre = []byte("</pre>\n")
|
|
html_h = []byte(`<h3 id="`)
|
|
html_hq = []byte(`">`)
|
|
html_endh = []byte("</h3>\n")
|
|
)
|
|
|
|
// pairedParensPrefixLen returns the length of the longest prefix of s containing paired parentheses.
|
|
func pairedParensPrefixLen(s string) int {
|
|
parens := 0
|
|
l := len(s)
|
|
for i, ch := range s {
|
|
switch ch {
|
|
case '(':
|
|
if parens == 0 {
|
|
l = i
|
|
}
|
|
parens++
|
|
case ')':
|
|
parens--
|
|
if parens == 0 {
|
|
l = len(s)
|
|
} else if parens < 0 {
|
|
return i
|
|
}
|
|
}
|
|
}
|
|
return l
|
|
}
|
|
|
|
// Emphasize and escape a line of text for HTML. URLs are converted into links;
|
|
// if the URL also appears in the words map, the link is taken from the map (if
|
|
// the corresponding map value is the empty string, the URL is not converted
|
|
// into a link). Go identifiers that appear in the words map are italicized; if
|
|
// the corresponding map value is not the empty string, it is considered a URL
|
|
// and the word is converted into a link. If nice is set, the remaining text's
|
|
// appearance is improved where it makes sense (e.g., `` is turned into “
|
|
// and '' into ”).
|
|
func emphasize(w io.Writer, line string, words map[string]string, nice bool) {
|
|
for {
|
|
m := matchRx.FindStringSubmatchIndex(line)
|
|
if m == nil {
|
|
break
|
|
}
|
|
// m >= 6 (two parenthesized sub-regexps in matchRx, 1st one is urlRx)
|
|
|
|
// write text before match
|
|
commentEscape(w, line[0:m[0]], nice)
|
|
|
|
// adjust match if necessary
|
|
match := line[m[0]:m[1]]
|
|
if n := pairedParensPrefixLen(match); n < len(match) {
|
|
// match contains unpaired parentheses (rare);
|
|
// redo matching with shortened line for correct indices
|
|
m = matchRx.FindStringSubmatchIndex(line[:m[0]+n])
|
|
match = match[:n]
|
|
}
|
|
|
|
// analyze match
|
|
url := ""
|
|
italics := false
|
|
if words != nil {
|
|
url, italics = words[match]
|
|
}
|
|
if m[2] >= 0 {
|
|
// match against first parenthesized sub-regexp; must be match against urlRx
|
|
if !italics {
|
|
// no alternative URL in words list, use match instead
|
|
url = match
|
|
}
|
|
italics = false // don't italicize URLs
|
|
}
|
|
|
|
// write match
|
|
if len(url) > 0 {
|
|
w.Write(html_a)
|
|
template.HTMLEscape(w, []byte(url))
|
|
w.Write(html_aq)
|
|
}
|
|
if italics {
|
|
w.Write(html_i)
|
|
}
|
|
commentEscape(w, match, nice)
|
|
if italics {
|
|
w.Write(html_endi)
|
|
}
|
|
if len(url) > 0 {
|
|
w.Write(html_enda)
|
|
}
|
|
|
|
// advance
|
|
line = line[m[1]:]
|
|
}
|
|
commentEscape(w, line, nice)
|
|
}
|
|
|
|
func indentLen(s string) int {
|
|
i := 0
|
|
for i < len(s) && (s[i] == ' ' || s[i] == '\t') {
|
|
i++
|
|
}
|
|
return i
|
|
}
|
|
|
|
func isBlank(s string) bool {
|
|
return len(s) == 0 || (len(s) == 1 && s[0] == '\n')
|
|
}
|
|
|
|
func commonPrefix(a, b string) string {
|
|
i := 0
|
|
for i < len(a) && i < len(b) && a[i] == b[i] {
|
|
i++
|
|
}
|
|
return a[0:i]
|
|
}
|
|
|
|
func unindent(block []string) {
|
|
if len(block) == 0 {
|
|
return
|
|
}
|
|
|
|
// compute maximum common white prefix
|
|
prefix := block[0][0:indentLen(block[0])]
|
|
for _, line := range block {
|
|
if !isBlank(line) {
|
|
prefix = commonPrefix(prefix, line[0:indentLen(line)])
|
|
}
|
|
}
|
|
n := len(prefix)
|
|
|
|
// remove
|
|
for i, line := range block {
|
|
if !isBlank(line) {
|
|
block[i] = line[n:]
|
|
}
|
|
}
|
|
}
|
|
|
|
// heading returns the trimmed line if it passes as a section heading;
|
|
// otherwise it returns the empty string.
|
|
func heading(line string) string {
|
|
line = strings.TrimSpace(line)
|
|
if len(line) == 0 {
|
|
return ""
|
|
}
|
|
|
|
// a heading must start with an uppercase letter
|
|
r, _ := utf8.DecodeRuneInString(line)
|
|
if !unicode.IsLetter(r) || !unicode.IsUpper(r) {
|
|
return ""
|
|
}
|
|
|
|
// it must end in a letter or digit:
|
|
r, _ = utf8.DecodeLastRuneInString(line)
|
|
if !unicode.IsLetter(r) && !unicode.IsDigit(r) {
|
|
return ""
|
|
}
|
|
|
|
// exclude lines with illegal characters
|
|
if strings.IndexAny(line, ",.;:!?+*/=()[]{}_^°&§~%#@<\">\\") >= 0 {
|
|
return ""
|
|
}
|
|
|
|
// allow "'" for possessive "'s" only
|
|
for b := line; ; {
|
|
i := strings.IndexRune(b, '\'')
|
|
if i < 0 {
|
|
break
|
|
}
|
|
if i+1 >= len(b) || b[i+1] != 's' || (i+2 < len(b) && b[i+2] != ' ') {
|
|
return "" // not followed by "s "
|
|
}
|
|
b = b[i+2:]
|
|
}
|
|
|
|
return line
|
|
}
|
|
|
|
type op int
|
|
|
|
const (
|
|
opPara op = iota
|
|
opHead
|
|
opPre
|
|
)
|
|
|
|
type block struct {
|
|
op op
|
|
lines []string
|
|
}
|
|
|
|
var nonAlphaNumRx = regexp.MustCompile(`[^a-zA-Z0-9]`)
|
|
|
|
func anchorID(line string) string {
|
|
// Add a "hdr-" prefix to avoid conflicting with IDs used for package symbols.
|
|
return "hdr-" + nonAlphaNumRx.ReplaceAllString(line, "_")
|
|
}
|
|
|
|
// ToHTML converts comment text to formatted HTML.
|
|
// The comment was prepared by DocReader,
|
|
// so it is known not to have leading, trailing blank lines
|
|
// nor to have trailing spaces at the end of lines.
|
|
// The comment markers have already been removed.
|
|
//
|
|
// Each span of unindented non-blank lines is converted into
|
|
// a single paragraph. There is one exception to the rule: a span that
|
|
// consists of a single line, is followed by another paragraph span,
|
|
// begins with a capital letter, and contains no punctuation
|
|
// is formatted as a heading.
|
|
//
|
|
// A span of indented lines is converted into a <pre> block,
|
|
// with the common indent prefix removed.
|
|
//
|
|
// URLs in the comment text are converted into links; if the URL also appears
|
|
// in the words map, the link is taken from the map (if the corresponding map
|
|
// value is the empty string, the URL is not converted into a link).
|
|
//
|
|
// Go identifiers that appear in the words map are italicized; if the corresponding
|
|
// map value is not the empty string, it is considered a URL and the word is converted
|
|
// into a link.
|
|
func ToHTML(w io.Writer, text string, words map[string]string) {
|
|
for _, b := range blocks(text) {
|
|
switch b.op {
|
|
case opPara:
|
|
w.Write(html_p)
|
|
for _, line := range b.lines {
|
|
emphasize(w, line, words, true)
|
|
}
|
|
w.Write(html_endp)
|
|
case opHead:
|
|
w.Write(html_h)
|
|
id := ""
|
|
for _, line := range b.lines {
|
|
if id == "" {
|
|
id = anchorID(line)
|
|
w.Write([]byte(id))
|
|
w.Write(html_hq)
|
|
}
|
|
commentEscape(w, line, true)
|
|
}
|
|
if id == "" {
|
|
w.Write(html_hq)
|
|
}
|
|
w.Write(html_endh)
|
|
case opPre:
|
|
w.Write(html_pre)
|
|
for _, line := range b.lines {
|
|
emphasize(w, line, nil, false)
|
|
}
|
|
w.Write(html_endpre)
|
|
}
|
|
}
|
|
}
|
|
|
|
func blocks(text string) []block {
|
|
var (
|
|
out []block
|
|
para []string
|
|
|
|
lastWasBlank = false
|
|
lastWasHeading = false
|
|
)
|
|
|
|
close := func() {
|
|
if para != nil {
|
|
out = append(out, block{opPara, para})
|
|
para = nil
|
|
}
|
|
}
|
|
|
|
lines := strings.SplitAfter(text, "\n")
|
|
unindent(lines)
|
|
for i := 0; i < len(lines); {
|
|
line := lines[i]
|
|
if isBlank(line) {
|
|
// close paragraph
|
|
close()
|
|
i++
|
|
lastWasBlank = true
|
|
continue
|
|
}
|
|
if indentLen(line) > 0 {
|
|
// close paragraph
|
|
close()
|
|
|
|
// count indented or blank lines
|
|
j := i + 1
|
|
for j < len(lines) && (isBlank(lines[j]) || indentLen(lines[j]) > 0) {
|
|
j++
|
|
}
|
|
// but not trailing blank lines
|
|
for j > i && isBlank(lines[j-1]) {
|
|
j--
|
|
}
|
|
pre := lines[i:j]
|
|
i = j
|
|
|
|
unindent(pre)
|
|
|
|
// put those lines in a pre block
|
|
out = append(out, block{opPre, pre})
|
|
lastWasHeading = false
|
|
continue
|
|
}
|
|
|
|
if lastWasBlank && !lastWasHeading && i+2 < len(lines) &&
|
|
isBlank(lines[i+1]) && !isBlank(lines[i+2]) && indentLen(lines[i+2]) == 0 {
|
|
// current line is non-blank, surrounded by blank lines
|
|
// and the next non-blank line is not indented: this
|
|
// might be a heading.
|
|
if head := heading(line); head != "" {
|
|
close()
|
|
out = append(out, block{opHead, []string{head}})
|
|
i += 2
|
|
lastWasHeading = true
|
|
continue
|
|
}
|
|
}
|
|
|
|
// open paragraph
|
|
lastWasBlank = false
|
|
lastWasHeading = false
|
|
para = append(para, lines[i])
|
|
i++
|
|
}
|
|
close()
|
|
|
|
return out
|
|
}
|
|
|
|
// ToText prepares comment text for presentation in textual output.
|
|
// It wraps paragraphs of text to width or fewer Unicode code points
|
|
// and then prefixes each line with the indent. In preformatted sections
|
|
// (such as program text), it prefixes each non-blank line with preIndent.
|
|
func ToText(w io.Writer, text string, indent, preIndent string, width int) {
|
|
l := lineWrapper{
|
|
out: w,
|
|
width: width,
|
|
indent: indent,
|
|
}
|
|
for _, b := range blocks(text) {
|
|
switch b.op {
|
|
case opPara:
|
|
// l.write will add leading newline if required
|
|
for _, line := range b.lines {
|
|
l.write(line)
|
|
}
|
|
l.flush()
|
|
case opHead:
|
|
w.Write(nl)
|
|
for _, line := range b.lines {
|
|
l.write(line + "\n")
|
|
}
|
|
l.flush()
|
|
case opPre:
|
|
w.Write(nl)
|
|
for _, line := range b.lines {
|
|
if isBlank(line) {
|
|
w.Write([]byte("\n"))
|
|
} else {
|
|
w.Write([]byte(preIndent))
|
|
w.Write([]byte(line))
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
type lineWrapper struct {
|
|
out io.Writer
|
|
printed bool
|
|
width int
|
|
indent string
|
|
n int
|
|
pendSpace int
|
|
}
|
|
|
|
var nl = []byte("\n")
|
|
var space = []byte(" ")
|
|
|
|
func (l *lineWrapper) write(text string) {
|
|
if l.n == 0 && l.printed {
|
|
l.out.Write(nl) // blank line before new paragraph
|
|
}
|
|
l.printed = true
|
|
|
|
for _, f := range strings.Fields(text) {
|
|
w := utf8.RuneCountInString(f)
|
|
// wrap if line is too long
|
|
if l.n > 0 && l.n+l.pendSpace+w > l.width {
|
|
l.out.Write(nl)
|
|
l.n = 0
|
|
l.pendSpace = 0
|
|
}
|
|
if l.n == 0 {
|
|
l.out.Write([]byte(l.indent))
|
|
}
|
|
l.out.Write(space[:l.pendSpace])
|
|
l.out.Write([]byte(f))
|
|
l.n += l.pendSpace + w
|
|
l.pendSpace = 1
|
|
}
|
|
}
|
|
|
|
func (l *lineWrapper) flush() {
|
|
if l.n == 0 {
|
|
return
|
|
}
|
|
l.out.Write(nl)
|
|
l.pendSpace = 0
|
|
l.n = 0
|
|
}
|