Merge pull request #995 from stevvooe/filter-syntax

filters: add package for filter syntax
2017-06-20 13:31:52 -07:00
parent f3d9aae6e9 d69ef98bfd
commit ebf4620206
6 changed files with 1170 additions and 0 deletions
--- a/filters/adaptor.go
+++ b/filters/adaptor.go
@@ -0,0 +1,15 @@
+package filters
+
+// Adaptor specifies the mapping of fieldpaths to a type. For the given field
+// path, the value and whether it is present should be returned. The mapping of
+// the fieldpath to a field is deferred to the adaptor implementation, but
+// should generally follow protobuf field path/mask semantics.
+type Adaptor interface {
+	Field(fieldpath []string) (value string, present bool)
+}
+
+type AdapterFunc func(fieldpath []string) (string, bool)
+
+func (fn AdapterFunc) Field(fieldpath []string) (string, bool) {
+	return fn(fieldpath)
+}
--- a/filters/filter.go
+++ b/filters/filter.go
@@ -0,0 +1,155 @@
+// Package filters defines a syntax and parser that can be used for the
+// filtration of items across the containerd API. The core is built on the
+// concept of protobuf field paths, with quoting.  Several operators allow the
+// user to flexibly select items based on field presence, equality, inequality
+// and regular expressions. Flexible adaptors support working with any type.
+//
+// The syntax is fairly familiar, if you've used container ecosystem
+// projects.  At the core, we base it on the concept of protobuf field
+// paths, augmenting with the ability to quote portions of the field path
+// to match arbitrary labels. These "selectors" come in the following
+// syntax:
+//
+// ```
+// <fieldpath>[<operator><value>]
+// ```
+//
+// A basic example is as follows:
+//
+// ```
+// name==foo
+// ```
+//
+// This would match all objects that have a field `name` with the value
+// `foo`. If we only want to test if the field is present, we can omit the
+// operator. This is most useful for matching labels in containerd. The
+// following will match objects that have the field "labels" and have the
+// label "foo" defined:
+//
+// ```
+// labels.foo
+// ```
+//
+// We also allow for quoting of parts of the field path to allow matching
+// of arbitrary items:
+//
+// ```
+// labels."very complex label"==something
+// ```
+//
+// We also define `!=` and `~=` as operators. The `!=` will match all
+// objects that don't match the value for a field and `~=` will compile the
+// target value as a regular expression and match the field value against that.
+//
+// Selectors can be combined using a comma, such that the resulting
+// selector will require all selectors are matched for the object to match.
+// The following example will match objects that are named `foo` and have
+// the label `bar`:
+//
+// ```
+// name==foo,labels.bar
+// ```
+//
+package filters
+
+import (
+	"regexp"
+
+	"github.com/containerd/containerd/log"
+)
+
+type Filter interface {
+	Match(adaptor Adaptor) bool
+}
+
+type FilterFunc func(Adaptor) bool
+
+func (fn FilterFunc) Match(adaptor Adaptor) bool {
+	return fn(adaptor)
+}
+
+var Always FilterFunc = func(adaptor Adaptor) bool {
+	return true
+}
+
+type Any []Filter
+
+func (m Any) Match(adaptor Adaptor) bool {
+	for _, m := range m {
+		if m.Match(adaptor) {
+			return true
+		}
+	}
+
+	return false
+}
+
+type All []Filter
+
+func (m All) Match(adaptor Adaptor) bool {
+	for _, m := range m {
+		if !m.Match(adaptor) {
+			return false
+		}
+	}
+
+	return true
+}
+
+type operator int
+
+const (
+	operatorPresent = iota
+	operatorEqual
+	operatorNotEqual
+	operatorMatches
+)
+
+func (op operator) String() string {
+	switch op {
+	case operatorPresent:
+		return "?"
+	case operatorEqual:
+		return "=="
+	case operatorNotEqual:
+		return "!="
+	case operatorMatches:
+		return "~="
+	}
+
+	return "unknown"
+}
+
+type selector struct {
+	fieldpath []string
+	operator  operator
+	value     string
+	re        *regexp.Regexp
+}
+
+func (m selector) Match(adaptor Adaptor) bool {
+	value, present := adaptor.Field(m.fieldpath)
+
+	switch m.operator {
+	case operatorPresent:
+		return present
+	case operatorEqual:
+		return present && value == m.value
+	case operatorNotEqual:
+		return value != m.value
+	case operatorMatches:
+		if m.re == nil {
+			r, err := regexp.Compile(m.value)
+			if err != nil {
+				log.L.Errorf("error compiling regexp %q", m.value)
+				return false
+			}
+
+			m.re = r
+		}
+
+		return m.re.MatchString(value)
+	default:
+		return false
+	}
+}
--- a/filters/filter_test.go
+++ b/filters/filter_test.go
@@ -0,0 +1,246 @@
+package filters
+
+import (
+	"reflect"
+	"strings"
+	"testing"
+)
+
+func TestFilters(t *testing.T) {
+	type cEntry struct {
+		Name   string
+		Other  string
+		Labels map[string]string
+	}
+
+	corpusS := []cEntry{
+		{
+			Name: "foo",
+			Labels: map[string]string{
+				"foo": "true",
+			},
+		},
+		{
+			Name: "bar",
+		},
+		{
+			Name: "foo",
+			Labels: map[string]string{
+				"foo":                "present",
+				"more complex label": "present",
+			},
+		},
+		{
+			Name: "bar",
+			Labels: map[string]string{
+				"bar": "true",
+			},
+		},
+		{
+			Name: "fooer",
+			Labels: map[string]string{
+				"more complex label with \\ and \"": "present",
+			},
+		},
+		{
+			Name: "fooer",
+			Labels: map[string]string{
+				"more complex label with \\ and \".post": "present",
+			},
+		},
+		{
+			Name:  "baz",
+			Other: "too complex, yo",
+		},
+		{
+			Name:  "bazo",
+			Other: "abc",
+		},
+	}
+
+	var corpus []interface{}
+	for _, entry := range corpusS {
+		corpus = append(corpus, entry)
+	}
+
+	// adapt shows an example of how to build an adaptor function for a type.
+	adapt := func(o interface{}) Adaptor {
+		obj := o.(cEntry)
+		return AdapterFunc(func(fieldpath []string) (string, bool) {
+			switch fieldpath[0] {
+			case "name":
+				return obj.Name, len(obj.Name) > 0
+			case "other":
+				return obj.Other, len(obj.Other) > 0
+			case "labels":
+				value, ok := obj.Labels[strings.Join(fieldpath[1:], ".")]
+				return value, ok
+			}
+
+			return "", false
+		})
+	}
+
+	for _, testcase := range []struct {
+		name      string
+		input     string
+		expected  []interface{}
+		errString string
+	}{
+		{
+			name:     "Empty",
+			input:    "",
+			expected: corpus,
+		},
+		{
+			name:     "Present",
+			input:    "name",
+			expected: corpus,
+		},
+		{
+			name:  "LabelPresent",
+			input: "labels.foo",
+			expected: []interface{}{
+				corpus[0],
+				corpus[2],
+			},
+		},
+		{
+			name:  "LabelValue",
+			input: "labels.foo==true",
+			expected: []interface{}{
+				corpus[0],
+			},
+		},
+		{
+			name:  "Name",
+			input: "name==bar",
+			expected: []interface{}{
+				corpus[1],
+				corpus[3],
+			},
+		},
+		{
+			name:  "NameNotEqual",
+			input: "name!=bar",
+			expected: []interface{}{
+				corpus[0],
+				corpus[2],
+				corpus[4],
+				corpus[5],
+				corpus[6],
+				corpus[7],
+			},
+		},
+		{
+			name:  "NameAndLabelPresent",
+			input: "name==bar,labels.bar",
+			expected: []interface{}{
+				corpus[3],
+			},
+		},
+		{
+			name:  "QuotedValue",
+			input: "other==\"too complex, yo\"",
+			expected: []interface{}{
+				corpus[6],
+			},
+		},
+		{
+			name:  "RegexpValue",
+			input: "other~=[abc]+,name!=foo",
+			expected: []interface{}{
+				corpus[6],
+				corpus[7],
+			},
+		},
+		{
+			name:  "NameAndLabelValue",
+			input: "name==bar,labels.bar==true",
+			expected: []interface{}{
+				corpus[3],
+			},
+		},
+		{
+			name:  "NameAndLabelValueNoMatch",
+			input: "name==bar,labels.bar==wrong",
+		},
+		{
+			name:  "LabelQuotedFieldPathPresent",
+			input: `name==foo,labels."more complex label"`,
+			expected: []interface{}{
+				corpus[2],
+			},
+		},
+		{
+			name:  "LabelQuotedFieldPathPresentWithQuoted",
+			input: `labels."more complex label with \\ and \""==present`,
+			expected: []interface{}{
+				corpus[4],
+			},
+		},
+		{
+			name:  "LabelQuotedFieldPathPresentWithQuotedEmbed",
+			input: `labels."more complex label with \\ and \"".post==present`,
+			expected: []interface{}{
+				corpus[5],
+			},
+		},
+		{
+			name:      "LabelQuotedFieldPathPresentWithQuotedEmbedInvalid",
+			input:     `labels.?"more complex label with \\ and \"".post==present`,
+			errString: `filters: parse error: [labels. >|?|< "more complex label with \\ and \"".post==present]: expected field or quoted`,
+		},
+		{
+			name:      "TrailingComma",
+			input:     "name==foo,",
+			errString: `filters: parse error: [name==foo,]: expected field or quoted`,
+		},
+		{
+			name:      "TrailingFieldSeparator",
+			input:     "labels.",
+			errString: `filters: parse error: [labels.]: expected field or quoted`,
+		},
+		{
+			name:      "MissingValue",
+			input:     "image~=,id?=?fbaq",
+			errString: `filters: parse error: [image~= >|,|< id?=?fbaq]: expected value or quoted`,
+		},
+	} {
+		t.Run(testcase.name, func(t *testing.T) {
+			t.Logf("testcase: %q", testcase.input)
+			filter, err := Parse(testcase.input)
+			if testcase.errString != "" {
+				if err == nil {
+					t.Fatalf("expected an error, but received nil")
+				}
+				if err.Error() != testcase.errString {
+					t.Fatalf("error %v != %v", err, testcase.errString)
+				}
+
+				return
+			} else {
+				if err != nil {
+					t.Fatal(err)
+				}
+			}
+
+			if filter == nil {
+				t.Fatal("filter should not be nil")
+			}
+
+			t.Log("filter", filter)
+			var results []interface{}
+			for _, item := range corpus {
+				adaptor := adapt(item)
+				if filter.Match(adaptor) {
+					results = append(results, item)
+				}
+			}
+
+			if !reflect.DeepEqual(results, testcase.expected) {
+				t.Fatalf("%q: %#v != %#v", testcase.input, results, testcase.expected)
+			}
+		})
+	}
+}
--- a/filters/parser.go
+++ b/filters/parser.go
@@ -0,0 +1,239 @@
+package filters
+
+import (
+	"fmt"
+	"io"
+	"strconv"
+
+	"github.com/pkg/errors"
+)
+
+/*
+Parse the strings into a filter that may be used with an adaptor.
+
+The filter is made up of zero or more selectors.
+
+The format is a comma separated list of expressions, in the form of
+`<fieldpath><op><value>`, known as selectors. All selectors must match the
+target object for the filter to be true.
+
+We define the operators "==" for equality, "!=" for not equal and "~=" for a
+regular expression. If the operator and value are not present, the matcher will
+test for the presence of a value, as defined by the target object.
+
+The formal grammar is as follows:
+
+selectors := selector ("," selector)*
+selector  := fieldpath (operator value)
+fieldpath := field ('.' field)*
+field     := quoted | [A-Za-z] [A-Za-z0-9_]+
+operator  := "==" | "!=" | "~="
+value     := quoted | [^\s,]+
+quoted    := <go string syntax>
+
+*/
+func Parse(s string) (Filter, error) {
+	// special case empty to match all
+	if s == "" {
+		return Always, nil
+	}
+
+	p := parser{input: s}
+	return p.parse()
+}
+
+type parser struct {
+	input   string
+	scanner scanner
+}
+
+func (p *parser) parse() (Filter, error) {
+	p.scanner.init(p.input)
+
+	ss, err := p.selectors()
+	if err != nil {
+		return nil, errors.Wrap(err, "filters")
+	}
+
+	return ss, nil
+}
+
+func (p *parser) selectors() (Filter, error) {
+	s, err := p.selector()
+	if err != nil {
+		return nil, err
+	}
+
+	ss := All{s}
+
+loop:
+	for {
+		tok := p.scanner.peek()
+		switch tok {
+		case ',':
+			pos, tok, _ := p.scanner.scan()
+			if tok != tokenSelectorSeparator {
+				return nil, p.mkerr(pos, "expected a separator")
+			}
+
+			s, err := p.selector()
+			if err != nil {
+				return nil, err
+			}
+
+			ss = append(ss, s)
+		case tokenEOF:
+			break loop
+		default:
+			panic("unconsumed input")
+		}
+	}
+
+	return ss, nil
+}
+
+func (p *parser) selector() (selector, error) {
+	fieldpath, err := p.fieldpath()
+	if err != nil {
+		return selector{}, err
+	}
+
+	switch p.scanner.peek() {
+	case tokenSelectorSeparator, tokenEOF:
+		return selector{
+			fieldpath: fieldpath,
+			operator:  operatorPresent,
+		}, nil
+	}
+
+	op, err := p.operator()
+	if err != nil {
+		return selector{}, err
+	}
+
+	value, err := p.value()
+	if err != nil {
+		if err == io.EOF {
+			return selector{}, io.ErrUnexpectedEOF
+		}
+		return selector{}, err
+	}
+
+	return selector{
+		fieldpath: fieldpath,
+		value:     value,
+		operator:  op,
+	}, nil
+}
+
+func (p *parser) fieldpath() ([]string, error) {
+	f, err := p.field()
+	if err != nil {
+		return nil, err
+	}
+
+	fs := []string{f}
+loop:
+	for {
+		tok := p.scanner.peek() // lookahead to consume field separtor
+
+		switch tok {
+		case '.':
+			pos, tok, _ := p.scanner.scan() // consume separator
+			if tok != tokenFieldSeparator {
+				return nil, p.mkerr(pos, "expected a field separator (`.`)")
+			}
+
+			f, err := p.field()
+			if err != nil {
+				return nil, err
+			}
+
+			fs = append(fs, f)
+		default:
+			// let the layer above handle the other bad cases.
+			break loop
+		}
+	}
+
+	return fs, nil
+}
+
+func (p *parser) field() (string, error) {
+	pos, tok, s := p.scanner.scan()
+	switch tok {
+	case tokenField:
+		return s, nil
+	case tokenQuoted:
+		return p.unquote(pos, s)
+	}
+
+	return "", p.mkerr(pos, "expected field or quoted")
+}
+
+func (p *parser) operator() (operator, error) {
+	pos, tok, s := p.scanner.scan()
+	switch tok {
+	case tokenOperator:
+		switch s {
+		case "==":
+			return operatorEqual, nil
+		case "!=":
+			return operatorNotEqual, nil
+		case "~=":
+			return operatorMatches, nil
+		default:
+			return 0, p.mkerr(pos, "unsupported operator %q", s)
+		}
+	}
+
+	return 0, p.mkerr(pos, `expected an operator ("=="|"!="|"~=")`)
+}
+
+func (p *parser) value() (string, error) {
+	pos, tok, s := p.scanner.scan()
+
+	switch tok {
+	case tokenValue, tokenField:
+		return s, nil
+	case tokenQuoted:
+		return p.unquote(pos, s)
+	}
+
+	return "", p.mkerr(pos, "expected value or quoted")
+}
+
+func (p *parser) unquote(pos int, s string) (string, error) {
+	uq, err := strconv.Unquote(s)
+	if err != nil {
+		return "", p.mkerr(pos, "unquoting failed: %v", err)
+	}
+
+	return uq, nil
+}
+
+type parseError struct {
+	input string
+	pos   int
+	msg   string
+}
+
+func (pe parseError) Error() string {
+	if pe.pos < len(pe.input) {
+		before := pe.input[:pe.pos]
+		location := pe.input[pe.pos : pe.pos+1] // need to handle end
+		after := pe.input[pe.pos+1:]
+
+		return fmt.Sprintf("[%s >|%s|< %s]: %v", before, location, after, pe.msg)
+	}
+
+	return fmt.Sprintf("[%s]: %v", pe.input, pe.msg)
+}
+
+func (p *parser) mkerr(pos int, format string, args ...interface{}) error {
+	return errors.Wrap(parseError{
+		input: p.input,
+		pos:   pos,
+		msg:   fmt.Sprintf(format, args...),
+	}, "parse error")
+}
--- a/filters/scanner.go
+++ b/filters/scanner.go
@@ -0,0 +1,279 @@
+package filters
+
+import (
+	"fmt"
+	"unicode"
+	"unicode/utf8"
+)
+
+const (
+	tokenEOF = -(iota + 1)
+	tokenQuoted
+	tokenValue
+	tokenField
+	tokenFieldSeparator
+	tokenOperator
+	tokenSelectorSeparator
+	tokenIllegal
+)
+
+type token rune
+
+func (t token) String() string {
+	switch t {
+	case tokenEOF:
+		return "EOF"
+	case tokenQuoted:
+		return "Quoted"
+	case tokenValue:
+		return "Value"
+	case tokenField:
+		return "Field"
+	case tokenOperator:
+		return "Operator"
+	case tokenFieldSeparator:
+		return "FieldSeparator"
+	case tokenSelectorSeparator:
+		return "SelectorSeparator"
+	case tokenIllegal:
+		return "Illegal"
+	}
+
+	return string(t)
+}
+
+func (t token) GoString() string {
+	return "token" + t.String()
+}
+
+type scanner struct {
+	input string
+	pos   int
+	ppos  int // bounds the current rune in the string
+	value bool
+}
+
+func (s *scanner) init(input string) {
+	s.input = input
+	s.pos = 0
+	s.ppos = 0
+}
+
+func (s *scanner) next() rune {
+	if s.pos >= len(s.input) {
+		return tokenEOF
+	}
+	s.pos = s.ppos
+
+	r, w := utf8.DecodeRuneInString(s.input[s.ppos:])
+	s.ppos += w
+	if r == utf8.RuneError {
+		if w > 0 {
+			return tokenIllegal
+		} else {
+			return tokenEOF
+		}
+	}
+
+	if r == 0 {
+		return tokenIllegal
+	}
+
+	return r
+}
+
+func (s *scanner) peek() rune {
+	pos := s.pos
+	ppos := s.ppos
+	ch := s.next()
+	s.pos = pos
+	s.ppos = ppos
+	return ch
+}
+
+func (s *scanner) scan() (int, token, string) {
+	var (
+		ch  = s.next()
+		pos = s.pos
+	)
+
+chomp:
+	switch {
+	case ch == tokenEOF:
+	case ch == tokenIllegal:
+	case isQuoteRune(ch):
+		s.scanString(ch)
+		return pos, tokenQuoted, s.input[pos:s.ppos]
+	case ch == ',':
+		return pos, tokenSelectorSeparator, s.input[pos:s.ppos]
+	case ch == '.':
+		return pos, tokenFieldSeparator, s.input[pos:s.ppos]
+	case isOperatorRune(ch):
+		s.scanOperator()
+		s.value = true
+		return pos, tokenOperator, s.input[pos:s.ppos]
+	case unicode.IsSpace(ch):
+		// chomp
+		ch = s.next()
+		pos = s.pos
+		goto chomp
+	case s.value:
+		s.scanValue()
+
+		// TODO(stevvooe): We can get rid of the value flag by by having a
+		// scanUnquoted that accumulates characters. If it is a legal field,
+		// then we return a field token. The parser can then treat fields as
+		// values. This will allow the default case here to just scan value or
+		// field.
+		s.value = false
+		return pos, tokenValue, s.input[pos:s.ppos]
+	case isFieldRune(ch):
+		s.scanField()
+		return pos, tokenField, s.input[pos:s.ppos]
+	}
+
+	return s.pos, token(ch), ""
+}
+
+func (s *scanner) scanField() {
+	for {
+		ch := s.peek()
+		if !isFieldRune(ch) {
+			break
+		}
+		s.next()
+	}
+}
+
+func (s *scanner) scanOperator() {
+	for {
+		ch := s.peek()
+		switch ch {
+		case '=', '!', '~':
+			s.next()
+		default:
+			return
+		}
+	}
+}
+
+func (s *scanner) scanValue() {
+	for {
+		ch := s.peek()
+		if !isValueRune(ch) {
+			break
+		}
+		s.next()
+	}
+}
+
+func (s *scanner) scanString(quote rune) {
+	ch := s.next() // read character after quote
+	for ch != quote {
+		if ch == '\n' || ch < 0 {
+			s.error("literal not terminated")
+			return
+		}
+		if ch == '\\' {
+			ch = s.scanEscape(quote)
+		} else {
+			ch = s.next()
+		}
+	}
+	return
+}
+
+func (s *scanner) scanEscape(quote rune) rune {
+	ch := s.next() // read character after '/'
+	switch ch {
+	case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', quote:
+		// nothing to do
+		ch = s.next()
+	case '0', '1', '2', '3', '4', '5', '6', '7':
+		ch = s.scanDigits(ch, 8, 3)
+	case 'x':
+		ch = s.scanDigits(s.next(), 16, 2)
+	case 'u':
+		ch = s.scanDigits(s.next(), 16, 4)
+	case 'U':
+		ch = s.scanDigits(s.next(), 16, 8)
+	default:
+		s.error("illegal char escape")
+	}
+	return ch
+}
+
+func (s *scanner) scanDigits(ch rune, base, n int) rune {
+	for n > 0 && digitVal(ch) < base {
+		ch = s.next()
+		n--
+	}
+	if n > 0 {
+		s.error("illegal char escape")
+	}
+	return ch
+}
+
+func (s *scanner) error(msg string) {
+	fmt.Println("error fixme", msg)
+}
+
+func digitVal(ch rune) int {
+	switch {
+	case '0' <= ch && ch <= '9':
+		return int(ch - '0')
+	case 'a' <= ch && ch <= 'f':
+		return int(ch - 'a' + 10)
+	case 'A' <= ch && ch <= 'F':
+		return int(ch - 'A' + 10)
+	}
+	return 16 // larger than any legal digit val
+}
+
+func isFieldRune(r rune) bool {
+	return (r == '_' || isAlphaRune(r) || isDigitRune(r))
+}
+
+func isAlphaRune(r rune) bool {
+	return r >= 'A' && r <= 'Z' || r >= 'a' && r <= 'z'
+}
+
+func isDigitRune(r rune) bool {
+	return r >= '0' && r <= '9'
+}
+
+func isOperatorRune(r rune) bool {
+	switch r {
+	case '=', '!', '~':
+		return true
+	}
+
+	return false
+}
+
+func isQuoteRune(r rune) bool {
+	switch r {
+	case '"': // maybe add single quoting?
+		return true
+	}
+
+	return false
+}
+
+func isSeparatorRune(r rune) bool {
+	switch r {
+	case ',', '.':
+		return true
+	}
+
+	return false
+}
+
+func isValueRune(r rune) bool {
+	return r != ',' && !unicode.IsSpace(r) &&
+		(unicode.IsLetter(r) ||
+			unicode.IsDigit(r) ||
+			unicode.IsNumber(r) ||
+			unicode.IsGraphic(r) ||
+			unicode.IsPunct(r))
+}
--- a/filters/scanner_test.go
+++ b/filters/scanner_test.go
@@ -0,0 +1,236 @@
+package filters
+
+import (
+	"fmt"
+	"strconv"
+	"testing"
+)
+
+type tokenResult struct {
+	pos   int
+	token token
+	text  string
+}
+
+func (tr tokenResult) String() string {
+	return fmt.Sprintf("{pos: %v, token: %v, text: %q}", tr.pos, tr.token, tr.text)
+}
+
+func TestScanner(t *testing.T) {
+
+	for _, testcase := range []struct {
+		name     string
+		input    string
+		expected []tokenResult
+	}{
+		{
+			name:  "Field",
+			input: "name",
+			expected: []tokenResult{
+				{pos: 0, token: tokenField, text: "name"},
+				{pos: 4, token: tokenEOF},
+			},
+		},
+		{
+			name:  "SelectorsWithOperators",
+			input: "name==value,foo!=bar",
+			expected: []tokenResult{
+				{pos: 0, token: tokenField, text: "name"},
+				{pos: 4, token: tokenOperator, text: "=="},
+				{pos: 6, token: tokenValue, text: "value"},
+				{pos: 11, token: tokenSelectorSeparator, text: ","},
+				{pos: 12, token: tokenField, text: "foo"},
+				{pos: 15, token: tokenOperator, text: "!="},
+				{pos: 17, token: tokenValue, text: "bar"},
+				{pos: 20, token: tokenEOF},
+			},
+		},
+		{
+			name:  "SelectorsWithFieldPaths",
+			input: "name==value,labels.foo=value,other.bar~=match",
+			expected: []tokenResult{
+				{pos: 0, token: tokenField, text: "name"},
+				{pos: 4, token: tokenOperator, text: "=="},
+				{pos: 6, token: tokenValue, text: "value"},
+				{pos: 11, token: tokenSelectorSeparator, text: ","},
+				{pos: 12, token: tokenField, text: "labels"},
+				{pos: 18, token: tokenFieldSeparator, text: "."},
+				{pos: 19, token: tokenField, text: "foo"},
+				{pos: 22, token: tokenOperator, text: "="},
+				{pos: 23, token: tokenValue, text: "value"},
+				{pos: 28, token: tokenSelectorSeparator, text: ","},
+				{pos: 29, token: tokenField, text: "other"},
+				{pos: 34, token: tokenFieldSeparator, text: "."},
+				{pos: 35, token: tokenField, text: "bar"},
+				{pos: 38, token: tokenOperator, text: "~="},
+				{pos: 40, token: tokenValue, text: "match"},
+				{pos: 45, token: tokenEOF},
+			},
+		},
+		{
+			name:  "RegexpValue",
+			input: "name~=[abc]+,foo=test",
+			expected: []tokenResult{
+				{pos: 0, token: tokenField, text: "name"},
+				{pos: 4, token: tokenOperator, text: "~="},
+				{pos: 6, token: tokenValue, text: "[abc]+"},
+				{pos: 12, token: tokenSelectorSeparator, text: ","},
+				{pos: 13, token: tokenField, text: "foo"},
+				{pos: 16, token: tokenOperator, text: "="},
+				{pos: 17, token: tokenValue, text: "test"},
+				{pos: 21, token: tokenEOF},
+			},
+		},
+		{
+			name:  "RegexpEscapedValue",
+			input: `name~=[abc]\+,foo=test`,
+			expected: []tokenResult{
+				{pos: 0, token: tokenField, text: "name"},
+				{pos: 4, token: tokenOperator, text: "~="},
+				{pos: 6, token: tokenValue, text: "[abc]\\+"},
+				{pos: 13, token: tokenSelectorSeparator, text: ","},
+				{pos: 14, token: tokenField, text: "foo"},
+				{pos: 17, token: tokenOperator, text: "="},
+				{pos: 18, token: tokenValue, text: "test"},
+				{pos: 22, token: tokenEOF},
+			},
+		},
+		{
+			name:  "Cowsay",
+			input: "name~=牛,labels.moo=true",
+			expected: []tokenResult{
+				{pos: 0, token: tokenField, text: "name"},
+				{pos: 4, token: tokenOperator, text: "~="},
+				{pos: 6, token: tokenValue, text: "牛"},
+				{pos: 9, token: tokenSelectorSeparator, text: ","},
+				{pos: 10, token: tokenField, text: "labels"},
+				{pos: 16, token: tokenFieldSeparator, text: "."},
+				{pos: 17, token: tokenField, text: "moo"},
+				{pos: 20, token: tokenOperator, text: "="},
+				{pos: 21, token: tokenValue, text: "true"},
+				{pos: 25, token: tokenEOF},
+			},
+		},
+		{
+			name:  "Escapes",
+			input: `name~="asdf\n\tfooo"`,
+			expected: []tokenResult{
+				{pos: 0, token: tokenField, text: "name"},
+				{pos: 4, token: tokenOperator, text: "~="},
+				{pos: 6, token: tokenQuoted, text: "\"asdf\\n\\tfooo\""},
+				{pos: 20, token: tokenEOF},
+			},
+		},
+		{
+			name:  "NullInput",
+			input: "foo\x00bar",
+			expected: []tokenResult{
+				{pos: 0, token: tokenField, text: "foo"},
+				{pos: 3, token: tokenIllegal},
+				{pos: 4, token: tokenField, text: "bar"},
+				{pos: 7, token: tokenEOF},
+			},
+		},
+		{
+			name:  "SpacesChomped",
+			input: "foo = bar    ",
+			expected: []tokenResult{
+				{pos: 0, token: tokenField, text: "foo"},
+				{pos: 4, token: tokenOperator, text: "="},
+				{pos: 6, token: tokenValue, text: "bar"},
+				{pos: 13, token: tokenEOF},
+			},
+		},
+		{
+			name:  "PartialInput",
+			input: "interrupted=",
+			expected: []tokenResult{
+				{pos: 0, token: tokenField, text: "interrupted"},
+				{pos: 11, token: tokenOperator, text: "="},
+				{pos: 12, token: tokenEOF},
+			},
+		},
+		{
+			name:  "DoubleValue",
+			input: "doublevalue=value value",
+			expected: []tokenResult{
+				{pos: 0, token: tokenField, text: "doublevalue"},
+				{pos: 11, token: tokenOperator, text: "="},
+				{pos: 12, token: tokenValue, text: "value"},
+				{pos: 18, token: tokenField, text: "value"},
+				{pos: 23, token: tokenEOF},
+			},
+		},
+		{
+			name:  "LeadingWithQuoted",
+			input: `"leading quote".postquote==value`,
+			expected: []tokenResult{
+				{pos: 0, token: tokenQuoted, text: "\"leading quote\""},
+				{pos: 15, token: tokenFieldSeparator, text: "."},
+				{pos: 16, token: tokenField, text: "postquote"},
+				{pos: 25, token: tokenOperator, text: "=="},
+				{pos: 27, token: tokenValue, text: "value"},
+				{pos: 32, token: tokenEOF},
+			},
+		},
+		{
+			name:  "MissingValue",
+			input: "input==,id?=ff",
+			expected: []tokenResult{
+				{pos: 0, token: tokenField, text: "input"},
+				{pos: 5, token: tokenOperator, text: "=="},
+				{pos: 7, token: tokenSelectorSeparator, text: ","},
+				{pos: 8, token: tokenValue, text: "id?=ff"},
+				{pos: 14, token: tokenEOF},
+			},
+		},
+	} {
+		t.Run(testcase.name, func(t *testing.T) {
+			var sc scanner
+			sc.init(testcase.input)
+			t.Logf("scan %q", testcase.input)
+
+			// If you leave the expected empty, the test case will just print
+			// out the token stream, which you can paste into the testcase when
+			// adding new cases.
+			if len(testcase.expected) == 0 {
+				fmt.Println("Name", testcase.name)
+			}
+
+			for i := 0; ; i++ {
+				pos, tok, s := sc.scan()
+				t.Log("token", pos, tok, strconv.Quote(s))
+				if len(testcase.expected) == 0 {
+					if len(s) > 0 {
+						fmt.Printf("{pos: %v, token: %#v, text: %q},\n", pos, tok, s)
+					} else {
+						fmt.Printf("{pos: %v, token: %#v},\n", pos, tok)
+					}
+				} else {
+					tokv := tokenResult{pos: pos, token: tok, text: s}
+					if i >= len(testcase.expected) {
+						t.Fatalf("too many tokens parsed")
+					}
+
+					if tokv != testcase.expected[i] {
+						t.Fatalf("token unexpected: %v != %v", tokv, testcase.expected[i])
+					}
+				}
+
+				if tok == tokenEOF {
+					break
+				}
+			}
+
+			// make sure we've eof'd
+			_, tok, _ := sc.scan()
+			if tok != tokenEOF {
+				t.Fatal("must consume all input")
+			}
+
+			if len(testcase.expected) == 0 {
+				t.Fatal("must define expected tokens")
+			}
+		})
+	}
+}