mount/mountinfo_linux: parser speed up

The mountinfo parser implemented via `fmt.Sscanf()` is slower than the one using `strings.Split()` and `strconv.Atoi()`. This rewrite helps to speed it up to a factor of 8x, here is a result from `go bench`: > BenchmarkParsingScanf-4 300 22294112 ns/op > BenchmarkParsingSplit-4 3000 2780703 ns/op I tried other approaches, such as using `fmt.Sscanf()` for the first three (integer) fields and `strings.Split()` for the rest, but it slows things down considerably: > BenchmarkParsingMixed-4 1000 8827058 ns/op Note the old code uses `fmt.Sscanf` first, then a linear search for the '-' field, then a split for the last 3 fields. The new code relies on a single split. One other thing is, the new code is more future proof as it skips extra optional fields before the separator (currently there are none). I have also added more comments to aid in future development. Finally, the test data is fixed to not have white space before the first field. Based on a similar change in Moby, https://github.com/moby/moby/pull/36091 [v2: remove no-op break statement to silence staticcheck] Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
2018-05-03 11:06:09 -07:00
parent d8aef117f9
commit 8eec9259e6
2 changed files with 124 additions and 99 deletions
--- a/mount/mountinfo_linux.go
+++ b/mount/mountinfo_linux.go
@@ -23,27 +23,10 @@ import (
 	"fmt"
 	"io"
 	"os"
+	"strconv"
 	"strings"
 )

-const (
-	/* 36 35 98:0 /mnt1 /mnt2 rw,noatime master:1 - ext3 /dev/root rw,errors=continue
-	   (1)(2)(3)   (4)   (5)      (6)      (7)   (8) (9)   (10)         (11)
-
-	   (1) mount ID:  unique identifier of the mount (may be reused after umount)
-	   (2) parent ID:  ID of parent (or of self for the top of the mount tree)
-	   (3) major:minor:  value of st_dev for files on filesystem
-	   (4) root:  root of the mount within the filesystem
-	   (5) mount point:  mount point relative to the process's root
-	   (6) mount options:  per mount options
-	   (7) optional fields:  zero or more fields of the form "tag[:value]"
-	   (8) separator:  marks the end of the optional fields
-	   (9) filesystem type:  name of filesystem of the form "type[.subtype]"
-	   (10) mount source:  filesystem specific information or "none"
-	   (11) super options:  per super block options*/
-	mountinfoFormat = "%d %d %d:%d %s %s %s %s"
-)
-
 // Self retrieves a list of mounts for the current running process.
 func Self() ([]Info, error) {
 	f, err := os.Open("/proc/self/mountinfo")
@@ -56,41 +39,83 @@ func Self() ([]Info, error) {
 }

 func parseInfoFile(r io.Reader) ([]Info, error) {
-	var (
-		s   = bufio.NewScanner(r)
-		out = []Info{}
-	)
+	s := bufio.NewScanner(r)
+	out := []Info{}

 	for s.Scan() {
 		if err := s.Err(); err != nil {
 			return nil, err
 		}

-		var (
-			p              = Info{}
-			text           = s.Text()
-			optionalFields string
-		)
+		/*
+		   36 35 98:0 /mnt1 /mnt2 rw,noatime master:1 - ext3 /dev/root rw,errors=continue
+		   (1)(2)(3)   (4)   (5)      (6)      (7)   (8) (9)   (10)         (11)
+		   (1) mount ID:  unique identifier of the mount (may be reused after umount)
+		   (2) parent ID:  ID of parent (or of self for the top of the mount tree)
+		   (3) major:minor:  value of st_dev for files on filesystem
+		   (4) root:  root of the mount within the filesystem
+		   (5) mount point:  mount point relative to the process's root
+		   (6) mount options:  per mount options
+		   (7) optional fields:  zero or more fields of the form "tag[:value]"
+		   (8) separator:  marks the end of the optional fields
+		   (9) filesystem type:  name of filesystem of the form "type[.subtype]"
+		   (10) mount source:  filesystem specific information or "none"
+		   (11) super options:  per super block options
+		*/

-		if _, err := fmt.Sscanf(text, mountinfoFormat,
-			&p.ID, &p.Parent, &p.Major, &p.Minor,
-			&p.Root, &p.Mountpoint, &p.Options, &optionalFields); err != nil {
-			return nil, fmt.Errorf("Scanning '%s' failed: %s", text, err)
+		text := s.Text()
+		fields := strings.Split(text, " ")
+		numFields := len(fields)
+		if numFields < 10 {
+			// should be at least 10 fields
+			return nil, fmt.Errorf("Parsing '%s' failed: not enough fields (%d)", text, numFields)
 		}
-		// Safe as mountinfo encodes mountpoints with spaces as \040.
-		index := strings.Index(text, " - ")
-		postSeparatorFields := strings.Fields(text[index+3:])
-		if len(postSeparatorFields) < 3 {
-			return nil, fmt.Errorf("Error found less than 3 fields post '-' in %q", text)
+		p := Info{}
+		// ignore any numbers parsing errors, as there should not be any
+		p.ID, _ = strconv.Atoi(fields[0])
+		p.Parent, _ = strconv.Atoi(fields[1])
+		mm := strings.Split(fields[2], ":")
+		if len(mm) != 2 {
+			return nil, fmt.Errorf("Parsing '%s' failed: unexpected minor:major pair %s", text, mm)
 		}
+		p.Major, _ = strconv.Atoi(mm[0])
+		p.Minor, _ = strconv.Atoi(mm[1])

-		if optionalFields != "-" {
-			p.Optional = optionalFields
-		}
+		p.Root = fields[3]
+		p.Mountpoint = fields[4]
+		p.Options = fields[5]
+
+		// one or more optional fields, when a separator (-)
+		i := 6
+		for ; i < numFields && fields[i] != "-"; i++ {
+			switch i {
+			case 6:
+				p.Optional = fields[6]
+			default:
+				/* NOTE there might be more optional fields before the separator
+				   such as fields[7]...fields[N] (where N < separatorIndex),
+				   although as of Linux kernel 4.15 the only known ones are
+				   mount propagation flags in fields[6]. The correct
+				   behavior is to ignore any unknown optional fields.
+				*/
+			}
+		}
+		if i == numFields {
+			return nil, fmt.Errorf("Parsing '%s' failed: missing separator ('-')", text)
+		}
+		// There should be 3 fields after the separator...
+		if i+4 > numFields {
+			return nil, fmt.Errorf("Parsing '%s' failed: not enough fields after a separator", text)
+		}
+		// ... but in Linux <= 3.9 mounting a cifs with spaces in a share name
+		// (like "//serv/My Documents") _may_ end up having a space in the last field
+		// of mountinfo (like "unc=//serv/My Documents"). Since kernel 3.10-rc1, cifs
+		// option unc= is ignored,  so a space should not appear. In here we ignore
+		// those "extra" fields caused by extra spaces.
+		p.FSType = fields[i+1]
+		p.Source = fields[i+2]
+		p.VFSOptions = fields[i+3]

-		p.FSType = postSeparatorFields[0]
-		p.Source = postSeparatorFields[1]
-		p.VFSOptions = strings.Join(postSeparatorFields[2:], " ")
 		out = append(out, p)
 	}
 	return out, nil