Adds a link fixer/checker to mungedocs.

Links that don't work yet can be prefixed with "TODO:" to avoid the check.
This commit is contained in:
Daniel Smith 2015-07-09 16:52:03 -07:00
parent affba42a05
commit c4aab16b04
4 changed files with 189 additions and 36 deletions

143
cmd/mungedocs/links.go Normal file
View File

@ -0,0 +1,143 @@
/*
Copyright 2015 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package main
import (
"fmt"
"net/url"
"os"
"path"
"regexp"
"strings"
)
var (
// Finds markdown links of the form [foo](bar "alt-text").
linkRE = regexp.MustCompile(`\[([^]]*)\]\(([^)]*)\)`)
// Splits the link target into link target and alt-text.
altTextRE = regexp.MustCompile(`(.*)( ".*")`)
)
// checkLinks assumes fileBytes has links in markdown syntax, and verifies that
// any relative links actually point to files that exist.
func checkLinks(filePath string, fileBytes []byte) ([]byte, error) {
dir := path.Dir(filePath)
errors := []string{}
output := linkRE.ReplaceAllFunc(fileBytes, func(in []byte) (out []byte) {
match := linkRE.FindSubmatch(in)
// match[0] is the entire expression; [1] is the visible text and [2] is the link text.
visibleText := string(match[1])
linkText := string(match[2])
altText := ""
if parts := altTextRE.FindStringSubmatch(linkText); parts != nil {
linkText = parts[1]
altText = parts[2]
}
// clean up some random garbage I found in our docs.
linkText = strings.Trim(linkText, " ")
linkText = strings.Trim(linkText, "\n")
linkText = strings.Trim(linkText, " ")
u, err := url.Parse(linkText)
if err != nil {
errors = append(
errors,
fmt.Sprintf("%v, link %q is unparsable: %v", filePath, linkText, err),
)
return in
}
if u.Host != "" {
// We only care about relative links.
return in
}
suggestedVisibleText := visibleText
if u.Path != "" && !strings.HasPrefix(linkText, "TODO:") {
newPath, targetExists := checkPath(filePath, path.Clean(u.Path))
if !targetExists {
errors = append(
errors,
fmt.Sprintf("%v, %q: target not found\n", filePath, linkText),
)
}
u.Path = newPath
// Make the visible text show the absolute path if it's
// not nested in or beneath the current directory.
if strings.HasPrefix(u.Path, "..") {
suggestedVisibleText = makeRepoRelative(path.Join(dir, u.Path))
} else {
suggestedVisibleText = u.Path
}
if unescaped, err := url.QueryUnescape(u.String()); err != nil {
// Remove %28 type stuff, be nice to humans.
// And don't fight with the toc generator.
linkText = unescaped
} else {
linkText = u.String()
}
}
// If the current visible text is trying to be a file name, use
// the correct file name.
if (strings.Contains(visibleText, ".md") || strings.Contains(visibleText, "/")) && !strings.ContainsAny(visibleText, ` '"`+"`") {
visibleText = suggestedVisibleText
}
return []byte(fmt.Sprintf("[%s](%s)", visibleText, linkText+altText))
})
err := error(nil)
if len(errors) != 0 {
err = fmt.Errorf("%s", strings.Join(errors, "\n"))
}
return output, err
}
func makeRepoRelative(path string) string {
parts := strings.Split(path, "github.com/GoogleCloudPlatform/kubernetes/")
if len(parts) > 1 {
// Take out anything that is specific to the local filesystem.
return parts[1]
}
return path
}
func checkPath(filePath, linkPath string) (newPath string, ok bool) {
dir := path.Dir(filePath)
if strings.HasPrefix(linkPath, "/") {
if !strings.HasPrefix(linkPath, "/GoogleCloudPlatform") {
// Any absolute paths that aren't relative to github.com are wrong.
// Try to fix.
linkPath = linkPath[1:]
}
}
newPath = linkPath
for i := 0; i < 5; i++ {
// The file must exist.
target := path.Join(dir, newPath)
if info, err := os.Stat(target); err == nil {
if info.IsDir() {
return newPath + "/", true
}
return newPath, true
}
newPath = path.Join("..", newPath)
}
return linkPath, false
}

View File

@ -33,52 +33,62 @@ var (
rootDir = flag.String("root-dir", "", "Root directory containing documents to be processed.")
ErrChangesNeeded = errors.New("mungedocs: changes required")
// TODO: allow selection from command line. (e.g., just check links in the examples directory.)
mungesToMake = munges{
munger(updateTOC),
munger(checkLinks),
}
)
func visitAndVerify(path string, i os.FileInfo, e error) error {
return visitAndChangeOrVerify(path, i, e, false)
}
// Munger processes a document, returning an updated document xor an error.
// Munger is NOT allowed to mutate 'before', if changes are needed it must copy
// data into a new byte array.
type munger func(filePath string, before []byte) (after []byte, err error)
func visitAndChange(path string, i os.FileInfo, e error) error {
return visitAndChangeOrVerify(path, i, e, true)
type munges []munger
type fileProcessor struct {
// Which munge functions should we call?
munges munges
// Are we allowed to make changes?
verifyOnly bool
}
// Either change a file or verify that it needs no changes (according to modify argument)
func visitAndChangeOrVerify(path string, i os.FileInfo, e error, modify bool) error {
func (f fileProcessor) visit(path string, i os.FileInfo, e error) error {
if !strings.HasSuffix(path, ".md") {
return nil
}
file, err := os.Open(path)
if err != nil {
return err
}
defer file.Close()
before, err := ioutil.ReadAll(file)
fileBytes, err := ioutil.ReadFile(path)
if err != nil {
return err
}
after, err := updateTOC(before)
if err != nil {
return err
}
if modify {
// Write out new file with any changes.
if !bytes.Equal(after, before) {
file.Close()
ioutil.WriteFile(path, after, 0644)
modificationsMade := false
for _, munge := range f.munges {
after, err := munge(path, fileBytes)
if err != nil {
return err
}
} else {
// Just verify that there are no changes.
if !bytes.Equal(after, before) {
return ErrChangesNeeded
if !modificationsMade {
if !bytes.Equal(after, fileBytes) {
modificationsMade = true
if f.verifyOnly {
// We're not allowed to make changes.
return ErrChangesNeeded
}
}
}
fileBytes = after
}
// TODO(erictune): more types of passes, such as:
// Linkify terms
// Verify links point to files.
// Write out new file with any changes.
if modificationsMade {
ioutil.WriteFile(path, fileBytes, 0644)
}
return nil
}
@ -91,6 +101,11 @@ func main() {
os.Exit(1)
}
fp := fileProcessor{
munges: mungesToMake,
verifyOnly: *verify,
}
// For each markdown file under source docs root, process the doc.
// If any error occurs, will exit with failure.
// If verify is true, then status is 0 for no changes needed, 1 for changes needed
@ -98,12 +113,7 @@ func main() {
// If verify is false, then status is 0 if changes successfully made or no changes needed,
// 1 if changes were needed but require human intervention, and >1 for an unexpected
// error during processing.
var err error
if *verify {
err = filepath.Walk(*rootDir, visitAndVerify)
} else {
err = filepath.Walk(*rootDir, visitAndChange)
}
err := filepath.Walk(*rootDir, fp.visit)
if err != nil {
if err == ErrChangesNeeded {
if *verify {

View File

@ -30,7 +30,7 @@ import (
// the ToC, thereby updating any previously inserted ToC.
//
// TODO(erictune): put this in own package with tests
func updateTOC(markdown []byte) ([]byte, error) {
func updateTOC(filePath string, markdown []byte) ([]byte, error) {
toc, err := buildTOC(markdown)
if err != nil {
return nil, err

View File

@ -92,7 +92,7 @@ func Test_updateTOC(t *testing.T) {
"# Title\nLorem ipsum \n**table of contents**\n<!-- BEGIN GENERATED TOC -->\n- [Title](#title)\n - [Section Heading](#section-heading)\n\n<!-- END GENERATED TOC -->\n## Section Heading\ndolor sit amet\n"},
}
for _, c := range cases {
actual, err := updateTOC([]byte(c.in))
actual, err := updateTOC("filename.md", []byte(c.in))
assert.NoError(t, err)
if c.out != string(actual) {
t.Errorf("Expected TOC '%v' but got '%v'", c.out, string(actual))